def read(self): '''Load the data from in_filename''' skip_index = find_first(self.in_filename, '^[0-9]{4},[0-9]{4}$') - 1 df = pd.read_table(self.in_filename, skiprows=skip_index, header=None, engine='c', sep=',', names=('a', 'b')) self.pressure_data = np.array([ uc.USGS_PROTOTYPE_V_TO_DBAR(np.float64(x)) for x in df[df.b.isnull() == False].a ]) self.temperature_data = [ uc.USGS_PROTOTYPE_V_TO_C(np.float64(x)) for x in df[df.b.isnull() == False].b ] with open(self.in_filename, 'r') as wavelog: for x in wavelog: # second arg has extra space that is unnecessary if re.match('^[0-9]{4}.[0-9]{2}.[0-9]{2}', x): start_ms = uc.datestring_to_ms(x, self.date_format_string) self.utc_millisecond_data = uc.generate_ms( start_ms, len(self.pressure_data), self.frequency) break
def read(self): '''load the data from in_filename only parse the initial datetime = much faster ''' self.get_serial() skip_index = find_first(self.in_filename, 'Date and Time,Seconds') data = pd.read_table(self.in_filename, skiprows=skip_index, header=None, engine='c', sep=',', usecols=(0, 1, 2, 3)) self.data_start = uc.datestring_to_ms(data[0][1], self.date_format_string, self.tz_info, self.daylight_savings) self.data_start2 = uc.datestring_to_ms(data[0][2], self.date_format_string, self.tz_info, self.daylight_savings) self.frequency = 1 / ((self.data_start2 - self.data_start) / 1000) self.utc_millisecond_data = uc.generate_ms(self.data_start, len(data[0]), self.frequency) self.pressure_data = data[3].values * uc.PSI_TO_DBAR
def wind_data(file_name, mode='netCDF'): #every fifteen minutes maybe not necessary frequency = 1 / 900 series_length = 1440 time = unit_conversion.generate_ms(1404647999870, series_length, frequency) wind_direction = get_rand_circular_data(series_length, 15, 360) wind_speed = get_rand_discrete_data(series_length, 2, 5, 0) if mode == 'netCDF': ds = Dataset(file_name, 'w', format="NETCDF4_CLASSIC") ds.createDimension('time', len(time)) time_var = ds.createVariable('time', 'f8', ('time')) time_var[:] = time wind_speed_var = ds.createVariable('wind_speed', 'f8', ('time')) wind_speed_var[:] = wind_speed wind_direction_var = ds.createVariable('wind_direction', 'f8', ('time')) wind_direction_var[:] = wind_direction ds.close() else: excelFile = pd.DataFrame({ 'Time': time, 'Wind Speed in m/s': wind_speed, 'Wind Direction in degrees': wind_direction, }) excelFile.to_csv(path_or_buf=file_name) print('total:', len(time), len(wind_direction), len(wind_speed))
def wind_data(file_name, mode='netCDF'): #every fifteen minutes maybe not necessary frequency = 1/900 series_length = 1440 time = unit_conversion.generate_ms(1404647999870, series_length, frequency) wind_direction = get_rand_circular_data(series_length, 15, 360) wind_speed = get_rand_discrete_data(series_length, 2, 5, 0) if mode == 'netCDF': ds = Dataset(file_name, 'w', format="NETCDF4_CLASSIC") ds.createDimension('time',len(time)) time_var = ds.createVariable('time','f8',('time')) time_var[:] = time wind_speed_var = ds.createVariable('wind_speed','f8',('time')) wind_speed_var[:] = wind_speed wind_direction_var = ds.createVariable('wind_direction','f8',('time')) wind_direction_var[:] = wind_direction ds.close() else: excelFile = pd.DataFrame({'Time': time, 'Wind Speed in m/s': wind_speed, 'Wind Direction in degrees': wind_direction, }) excelFile.to_csv(path_or_buf= file_name) print('total:', len(time), len(wind_direction), len(wind_speed))
def read(self): '''load the data from in_filename only parse the initial datetime = much faster ''' self.get_serial() skip_index = find_first(self.in_filename, '^ID') - 1 # for skipping lines in case there is calibration header data df = pd.read_table(self.in_filename, skiprows=skip_index + 1, header=None, engine='c', sep=',', usecols=[3, 4, 5]) try: self.data_start = uc.datestring_to_ms(df[3][3][1:], self.date_format_string, self.tz_info, self.daylight_savings) second_stamp = uc.datestring_to_ms(df[3][4][1:], self.date_format_string, self.tz_info, self.daylight_savings) self.frequency = 1000 / (second_stamp - self.data_start) self.pressure_data = df[5].values * uc.PSI_TO_DBAR start_ms = uc.datestring_to_ms('%s' % df[3][0][1:], self.date_format_string, self.tz_info, self.daylight_savings) except: self.data_start = uc.datestring_to_ms(df[3][3][1:], self.date_format_string2, self.tz_info, self.daylight_savings) second_stamp = uc.datestring_to_ms(df[3][4][1:], self.date_format_string2, self.tz_info, self.daylight_savings) self.frequency = 1000 / (second_stamp - self.data_start) self.pressure_data = df[5].values * uc.PSI_TO_DBAR start_ms = uc.datestring_to_ms('%s' % df[3][0][1:], self.date_format_string2, self.tz_info, self.daylight_savings) self.utc_millisecond_data = uc.generate_ms(start_ms, df.shape[0], self.frequency)
def read(self): '''load the data from in_filename only parse the initial datetime = much faster ''' self.get_serial() second = False skip_index = find_first(self.in_filename, '"#"') if skip_index == None: skip_index = find_first(self.in_filename, '#') second = True df = pd.read_table(self.in_filename, skiprows=skip_index, header=None, engine='c', sep=',', usecols=(1, 2)) df = df.dropna() try: first_stamp = uc.datestring_to_ms(df.values[0][0], self.date_format_string, self.tz_info, self.daylight_savings) second_stamp = uc.datestring_to_ms(df.values[1][0], self.date_format_string, self.tz_info, self.daylight_savings) except: first_stamp = uc.datestring_to_ms(df.values[0][0], self.date_format_string2, self.tz_info, self.daylight_savings) second_stamp = uc.datestring_to_ms(df.values[1][0], self.date_format_string2, self.tz_info, self.daylight_savings) self.frequency = 1000 / (second_stamp - first_stamp) try: start_ms = uc.datestring_to_ms(df[1][0], self.date_format_string, self.tz_info, self.daylight_savings) except: start_ms = uc.datestring_to_ms(df[1][0], self.date_format_string2, self.tz_info, self.daylight_savings) self.utc_millisecond_data = uc.generate_ms(start_ms, df.shape[0], self.frequency) # if self.daylight_savings == True: # self.utc_millisecond_data = [x - 3600000 for x in self.utc_millisecond_data] self.pressure_data = df[2].values * uc.PSI_TO_DBAR
def read(self): '''load the data from in_filename only parse the initial datetime = much faster ''' skip_index = find_first(self.in_filename, '^[0-9]{2}-[A-Z]{1}[a-z]{2,8}-[0-9]{4}') df = pd.read_csv(self.in_filename, skiprows=skip_index, delim_whitespace=True, header=None, engine='c', usecols=[0, 1, 2]) self.datestart = uc.datestring_to_ms('%s %s' % (df[0][0], df[1][0]), self.date_format_string) self.utc_millisecond_data = uc.generate_ms(self.datestart, df.shape[0] - 1, self.frequency) self.pressure_data = np.array([x for x in df[2][:-1]])
def quick_dirty_wind_data(in_file_name, out_file_name): df = pd.read_csv(in_file_name, header=None) #generate 6 minute utc millisecond data #millisecond is a time stamp for Fri Jan 22 2016 23:00:00 time = unit_conversion.generate_ms(1453503600000.0, 1213, 1/360) wind_direction = df[6] wind_speed = df[8] ds = Dataset(out_file_name, 'w', format="NETCDF4_CLASSIC") ds.createDimension('time',len(time)) time_var = ds.createVariable('time','f8',('time')) time_var[:] = time wind_speed_var = ds.createVariable('wind_speed','f8',('time')) wind_speed_var[:] = wind_speed.values wind_direction_var = ds.createVariable('wind_direction','f8',('time')) wind_direction_var[:] = wind_direction.values ds.close()
def quick_dirty_wind_data(in_file_name, out_file_name): df = pd.read_csv(in_file_name, header=None) #generate 6 minute utc millisecond data #millisecond is a time stamp for Fri Jan 22 2016 23:00:00 time = unit_conversion.generate_ms(1453503600000.0, 1213, 1 / 360) wind_direction = df[6] wind_speed = df[8] ds = Dataset(out_file_name, 'w', format="NETCDF4_CLASSIC") ds.createDimension('time', len(time)) time_var = ds.createVariable('time', 'f8', ('time')) time_var[:] = time wind_speed_var = ds.createVariable('wind_speed', 'f8', ('time')) wind_speed_var[:] = wind_speed.values wind_direction_var = ds.createVariable('wind_direction', 'f8', ('time')) wind_direction_var[:] = wind_direction.values ds.close()
def read(self): '''load the data from in_filename only parse the initial datetime = much faster ''' self.get_serial() skip_index = find_first(self.in_filename, 'Date and Time,Seconds') data = pd.read_table(self.in_filename, skiprows=skip_index, header=None, engine='c', sep=',', usecols=(0,1,2,3)) self.data_start = uc.datestring_to_ms(data[0][1], self.date_format_string, self.tz_info, self.daylight_savings) self.data_start2 = uc.datestring_to_ms(data[0][2], self.date_format_string, self.tz_info, self.daylight_savings) self.frequency = 1 / ((self.data_start2 - self.data_start) / 1000) self.utc_millisecond_data = uc.generate_ms(self.data_start, len(data[0]), self.frequency) self.pressure_data = data[3].values * uc.PSI_TO_DBAR
def read(self): '''Load the data from in_filename''' skip_index = find_first(self.in_filename, '^[0-9]{4},[0-9]{4}$') - 1 df = pd.read_table(self.in_filename, skiprows=skip_index, header=None, engine='c', sep=',', names=('a', 'b')) self.pressure_data = np.array([ uc.USGS_PROTOTYPE_V_TO_DBAR(np.float64(x)) for x in df[df.b.isnull() == False].a]) self.temperature_data = [ uc.USGS_PROTOTYPE_V_TO_C(np.float64(x)) for x in df[df.b.isnull() == False].b] with open(self.in_filename, 'r') as wavelog: for x in wavelog: # second arg has extra space that is unnecessary if re.match('^[0-9]{4}.[0-9]{2}.[0-9]{2}', x): start_ms = uc.datestring_to_ms(x, self.date_format_string) self.utc_millisecond_data = uc.generate_ms(start_ms, len(self.pressure_data), self.frequency) break
def change_netCDFTime(in_file_name, out_file_name, start_ms): shutil.copy(in_file_name, out_file_name) time_len = len(nc.get_time(out_file_name)) new_time = unit_conversion.generate_ms(start_ms, time_len, 1/900) nc.set_variable_data(out_file_name, 'time', new_time)
def change_netCDFTime(in_file_name, out_file_name, start_ms): shutil.copy(in_file_name, out_file_name) time_len = len(nc.get_time(out_file_name)) new_time = unit_conversion.generate_ms(start_ms, time_len, 1 / 900) nc.set_variable_data(out_file_name, 'time', new_time)