def classify_by_boat(indir, outdir, pstatus): """ indir: input directory, that is the path of read data outdir: output directory, that is that path of save data pstatus: telemetry_status file function: accroding the lowell_sn and time to find the file belong to which veseel, and the same vessel produces files put in the same folder. notice:this code is suitable for matching data after 2000 """ if not os.path.exists(outdir): os.makedirs(outdir) # if os.listdir(output_dir): # print ('please input a empty directory!') # sys.exit() #read the file of the telementry_status df = rdm.read_telemetrystatus(pstatus) #fix the format of time about logger_change for i in df.index: if df['logger_change'].isnull()[i]: continue else: date_logger_change = df['logger_change'][i].split( ',') #get the time data of the logger_change for j in range(0, len(date_logger_change)): if len( date_logger_change[j] ) > 4: #keep the date have the month and year such as 1/17 date_logger_change[j] = zl.transform_date( date_logger_change[j] ) #use the transform_date(date) to fix the date df['logger_change'][i] = date_logger_change #get the path and name of the files file_lists = glob.glob(os.path.join(indir, '*.csv')) #classify the file for file in file_lists: #time conversion, GMT time to local time #time_str=file.split('/')[len(file.split('/'))-1:][0].split('.')[0].split('_')[2]+' '+file.split('/')[len(file.split('/'))-1:][0].split('.')[0].split('_')[3] time_str = file.split('\\')[len(file.split('\\')) - 1:][0].split( '.')[0].split('_')[2] + ' ' + file.split( '\\')[len(file.split('\\')) - 1:][0].split('.')[0].split('_')[3] time_local = zl.gmt_to_eastern(time_str[0:4] + '-' + time_str[4:6] + '-' + time_str[6:8] + ' ' + time_str[9:11] + ':' + time_str[11:13] + ':' + time_str[13:15]).strftime("%Y%m%d") #match the SN and date for i in range(len(df)): if df['Lowell-SN'].isnull()[i] or df['logger_change'].isnull( )[i]: #we will enter the next line if SN or date is not exist continue else: for j in range(len(df['Lowell-SN'][i].split(','))): #fname_len_SN=len(file.split('/')[len(file.split('/'))-1:][0].split('_')[1]) #the length of SN in the file name fname_len_SN = len( file.split('\\')[len(file.split('\\')) - 1:][0].split('_')[1]) len_SN = len( df['Lowell-SN'][i].split(',')[j] ) #the length of SN in the culumn of the Lowell-SN inthe file of the telemetry_status.csv if df['Lowell-SN'][i].split( ',')[j][len_SN - fname_len_SN:] == file.split( '\\')[len(file.split('\\')) - 1:][0].split('_')[1]: #if df['Lowell-SN'][i].split(',')[j][len_SN-fname_len_SN:]==file.split('/')[len(file.split('/'))-1:][0].split('_')[1]: fpath, fname = os.path.split( file) #seperate the path and name of the file #dstfile=(fpath).replace(indir,outdir+'/'+df['Boat'][i]+'/'+fname.split('_')[2][:6]+'/'+fname) #produce the path+filename of the destination dstfile = (fpath).replace( indir, outdir + '\\' + df['Boat'][i] + '\\' + fname.split('_')[2][:6] + '\\' + fname) #dstfile=dstfile.replace('//','/').replace(' ','_') dstfile = dstfile.replace('//', '\\').replace(' ', '_') try: #copy the file to the destination folder if j < len(df['logger_change'][i]) - 1: if df['logger_change'][i][ j] <= time_local <= df[ 'logger_change'][i][j + 1]: zl.copyfile(file, dstfile) else: if df['logger_change'][i][j] <= time_local: zl.copyfile(file, dstfile) except KeyboardInterrupt: sys.exit() except: print( 'NOTE: ' + fname + ' does not have all the info it needs like date of last change.' ) print( "Please check telemetry status for this probe." )
def match_tele_raw( input_dir, path_save, telemetry_status, start_time, end_time, telemetry_path='https://www.nefsc.noaa.gov/drifter/emolt.dat', accept_minutes_diff=20, acceptable_distance_diff=2, dpi=300): """ match the file and telementy. we can known how many file send to the satallite and output the figure """ #read the file of the telementry_status telemetrystatus_df = read_telemetrystatus(telemetry_status) #st the record file use to write minmum maxmum and average of depth and temperature,the numbers of file, telemetry and successfully matched record_file_df=telemetrystatus_df.loc[:,['Boat','Vessel#']].reindex(columns=['Boat','Vessel#','matched_number','file_number','tele_num','max_diff_depth',\ 'min_diff_depth','average_diff_depth','max_diff_temp','min_diff_temp','average_diff_temp','sum_diff_depth','sum_diff_temp',\ 'min_lat','max_lat','min_lon','max_lon'],fill_value=None) #transfer the time format of string to datetime start_time_local = datetime.strptime(start_time, '%Y-%m-%d') end_time_local = datetime.strptime(end_time, '%Y-%m-%d') allfile_lists = zl.list_all_files(input_dir) ###################### file_lists = [] for file in allfile_lists: if file[len(file) - 4:] == '.csv': file_lists.append(file) #download the data of telementry tele_df = read_telemetry(telemetry_path) #screen out the data of telemetry in interval valuable_tele_df = pd.DataFrame( data=None, columns=['vessel_n', 'esn', 'time', 'lon', 'lat', 'depth', 'temp']) #use to save the data during start time and end time for i in range(len(tele_df)): tele_time=datetime.strptime(str(tele_df['year'].iloc[i])+'-'+str(tele_df['month'].iloc[i])+'-'+str(tele_df['day'].iloc[i])+' '+\ str(tele_df['Hours'].iloc[i])+':'+str(tele_df['minates'].iloc[i])+':'+'00','%Y-%m-%d %H:%M:%S') if zl.local2utc(start_time_local) <= tele_time < zl.local2utc( end_time_local): valuable_tele_df=valuable_tele_df.append(pd.DataFrame(data=[[tele_df['vessel_n'][i],tele_df['esn'][i],tele_time,tele_df['lon'][i],tele_df['lat'][i],tele_df['depth'][i],\ tele_df['temp'][i]]],columns=['vessel_n','esn','time','lon','lat','depth','temp'])) valuable_tele_df.index = range(len(valuable_tele_df)) #whether the data of file and telemetry is exist if len(valuable_tele_df) == 0 and len(file_lists) == 0: print( 'please check the data website of telementry and the directory of raw_data is exist!' ) sys.exit() elif len(valuable_tele_df) == 0: print('please check the data website of telementry!') sys.exit() elif len(file_lists) == 0: print('please check the directory raw_data is exist!') sys.exit() #match the file index = telemetrystatus_df['Boat'] #set the index for dictionary raw_dict = { } #the dictinary about raw data, use to write the data about 'time','filename','mean_temp','mean_depth' tele_dict = { } #the dictionary about telementry data,use to write the data about'time','mean_temp','mean_depth' for i in range(len(index)): #loop every boat raw_dict[index[i]] = pd.DataFrame(data=None, columns=[ 'time', 'filename', 'mean_temp', 'mean_depth', 'mean_lat', 'mean_lon' ]) tele_dict[index[i]] = pd.DataFrame(data=None, columns=[ 'time', 'mean_temp', 'mean_depth', 'mean_lat', 'mean_lon' ]) for file in file_lists: # loop raw files fpath, fname = os.path.split(file) #get the file's path and name # now, read header and data of every file header_df = zl.nrows_len_to(file, 2, name=['key', 'value']) #only header data_df = zl.skip_len_to(file, 2) #only data #caculate the mean temperature and depth of every file value_data_df = data_df.ix[( data_df['Depth(m)'] > 0.85 * mean(data_df['Depth(m)']))] #filter the data value_data_df = value_data_df.ix[ 2:] #delay several minutes to let temperature sensor record the real bottom temp value_data_df=value_data_df.ix[(value_data_df['Temperature(C)']>mean(value_data_df['Temperature(C)'])-3*std(value_data_df['Temperature(C)'])) & \ (value_data_df['Temperature(C)']<mean(value_data_df['Temperature(C)'])+3*std(value_data_df['Temperature(C)']))] #Excluding gross error value_data_df.index = range(len(value_data_df)) #reindex for i in range(len(value_data_df['Lat'])): value_data_df['Lat'][i], value_data_df['Lon'][i] = cv.dm2dd( value_data_df['Lat'][i], value_data_df['Lon'][i]) min_lat = min(value_data_df['Lat'].values) max_lat = max(value_data_df['Lat'].values) min_lon = min(value_data_df['Lon'].values) max_lon = max(value_data_df['Lon'].values) mean_lat = str(round(mean(value_data_df['Lat'].values), 4)) mean_lon = str(round(mean(value_data_df['Lon'].values), 4)) #caculate the mean depth mean_temp = str( round(mean(value_data_df['Temperature(C)'][1:len(value_data_df)]), 2)) mean_depth = str( abs(int(round(mean(value_data_df['Depth(m)'].values))))).zfill( 3) #caculate the mean depth #get the vessel number of every file for i in range(len(header_df)): if header_df['key'][i].lower() == 'vessel number'.lower(): vessel_number = int(header_df['value'][i]) break #caculate the number of raw files in every vessel,and min,max of lat and lon for i in range(len(record_file_df)): if record_file_df['Vessel#'][i] == vessel_number: if record_file_df['file_number'].isnull()[i]: record_file_df['min_lat'][i] = min_lat record_file_df['max_lat'][i] = max_lat record_file_df['min_lon'][i] = min_lon record_file_df['max_lon'][i] = max_lon record_file_df['file_number'][i] = 1 else: record_file_df['file_number'][i] = int( record_file_df['file_number'][i] + 1) if record_file_df['min_lat'][i] > min_lat: record_file_df['min_lat'][i] = min_lat if record_file_df['max_lat'][i] < max_lat: record_file_df['max_lat'][i] = max_lat if record_file_df['min_lon'][i] > min_lon: record_file_df['min_lon'][i] = min_lon if record_file_df['max_lon'][i] < max_lon: record_file_df['max_lon'][i] = max_lon #match rawdata and telementry data time_str = fname.split('.')[0].split('_')[2] + ' ' + fname.split( '.')[0].split('_')[3] #GMT time to local time of file time_local = zl.gmt_to_eastern(time_str[0:4] + '-' + time_str[4:6] + '-' + time_str[6:8] + ' ' + time_str[9:11] + ':' + time_str[11:13] + ':' + time_str[13:15]) time_gmt = datetime.strptime(time_str, "%Y%m%d %H%M%S") #transfer the format latitude and longitude lat, lon = value_data_df['Lat'][ len(value_data_df) - 1], value_data_df['Lon'][len(value_data_df) - 1] #write the data of raw file to dict for i in range(len(telemetrystatus_df)): if telemetrystatus_df['Vessel#'][i] == vessel_number: raw_dict[telemetrystatus_df['Boat'][i]]=raw_dict[telemetrystatus_df['Boat'][i]].append(pd.DataFrame(data=[[time_local,\ fname,float(mean_temp),float(mean_depth),float(mean_lat),float(mean_lon)]],columns=['time','filename','mean_temp','mean_depth','mean_lat','mean_lon']).iloc[0],ignore_index=True) #caculate the numbers of successful matchs and the minimum,maximum and average different of temperature and depth, and write this data to record file for i in range(len(valuable_tele_df)): if valuable_tele_df['vessel_n'][i].split('_')[1] == str( vessel_number): if abs(valuable_tele_df['time'][i] - time_gmt) <= timedelta( minutes=accept_minutes_diff): #time match if zl.dist(lat1=lat, lon1=lon, lat2=float(valuable_tele_df['lat'][i]), lon2=float(valuable_tele_df['lon'][i]) ) <= acceptable_distance_diff: #distance match for j in range(len(record_file_df)): if record_file_df['Vessel#'][j] == vessel_number: diff_temp = round( (float(mean_temp) - float(valuable_tele_df['temp'][i])), 4) diff_depth = round( (float(mean_depth) - float(valuable_tele_df['depth'][i])), 4) if record_file_df['matched_number'].isnull( )[j]: record_file_df['matched_number'][j] = 1 record_file_df['sum_diff_temp'][ j] = diff_temp record_file_df['max_diff_temp'][ j] = diff_temp record_file_df['min_diff_temp'][ j] = diff_temp record_file_df['sum_diff_depth'][ j] = diff_depth record_file_df['max_diff_depth'][ j] = diff_depth record_file_df['min_diff_depth'][ j] = diff_depth break else: record_file_df['matched_number'][j] = int( record_file_df['matched_number'][j] + 1) record_file_df['sum_diff_temp'][ j] = record_file_df['sum_diff_temp'][ j] + diff_temp record_file_df['sum_diff_depth'][ j] = record_file_df['sum_diff_depth'][ j] + diff_depth if record_file_df['max_diff_temp'][ j] < diff_temp: record_file_df['max_diff_temp'][ j] = diff_temp if record_file_df['min_diff_temp'][ j] > diff_temp: record_file_df['min_diff_temp'][ j] = diff_temp if record_file_df['max_diff_depth'][ j] < diff_depth: record_file_df['max_diff_depth'][ j] = diff_depth if record_file_df['min_diff_depth'][ j] > diff_depth: record_file_df['min_diff_depth'][ j] = diff_depth break #write 'time','mean_temp','mean_depth' of the telementry to tele_dict for i in range( len(valuable_tele_df) ): #valuable_tele_df is the valuable telemetry data during start time and end time for j in range(len(telemetrystatus_df)): if int(valuable_tele_df['vessel_n'][i].split('_') [1]) == telemetrystatus_df['Vessel#'][j]: #count the numbers by boats if record_file_df['tele_num'].isnull()[j]: record_file_df['tele_num'][j] = 1 else: record_file_df['tele_num'][ j] = record_file_df['tele_num'][j] + 1 if record_file_df['max_lat'].isnull()[j]: record_file_df['min_lat'][j] = valuable_tele_df['lat'][i] record_file_df['max_lat'][j] = valuable_tele_df['lat'][i] record_file_df['min_lon'][j] = valuable_tele_df['lon'][i] record_file_df['max_lon'][j] = valuable_tele_df['lon'][i] else: if record_file_df['min_lat'][j] > valuable_tele_df['lat'][ i]: record_file_df['min_lat'][j] = valuable_tele_df['lat'][ i] if record_file_df['max_lat'][j] < valuable_tele_df['lat'][ i]: record_file_df['max_lat'][j] = valuable_tele_df['lat'][ i] if record_file_df['min_lon'][j] > valuable_tele_df['lon'][ i]: record_file_df['min_lon'][j] = valuable_tele_df['lon'][ i] if record_file_df['max_lon'][j] < valuable_tele_df['lon'][ i]: record_file_df['max_lon'][j] = valuable_tele_df['lon'][ i] #write 'time','mean_temp','mean_depth' of the telementry to tele_dict tele_dict[telemetrystatus_df['Boat'][j]]=tele_dict[telemetrystatus_df['Boat'][j]].append(pd.DataFrame(data=[[valuable_tele_df['time'][i],\ float(valuable_tele_df['temp'][i]),float(valuable_tele_df['depth'][i]),float(valuable_tele_df['lat'][i]),float(valuable_tele_df['lon'][i])]],columns=['time','mean_temp','mean_depth','mean_lat','mean_lon']).iloc[0],ignore_index=True) print("finish the calculate of min_lat and min_lon!") for i in range(len(record_file_df)): if not record_file_df['matched_number'].isnull()[i]: record_file_df['average_diff_depth'][i] = round( record_file_df['sum_diff_depth'][i] / record_file_df['matched_number'][i], 4) record_file_df['average_diff_temp'][i] = round( record_file_df['sum_diff_temp'][i] / record_file_df['matched_number'][i], 4) else: record_file_df['matched_number'][i] = 0 if record_file_df['tele_num'].isnull()[i]: record_file_df['tele_num'][i] = 0 if record_file_df['file_number'].isnull()[i]: record_file_df['file_number'][i] = 0 for i in index: #loop every boat, i represent the name of boat raw_dict[i] = raw_dict[i].sort_values(by=['time']) raw_dict[i].index = range(len(raw_dict[i])) record_file_df = record_file_df.drop(['sum_diff_depth', 'sum_diff_temp'], axis=1) #save the record file record_file_df.to_csv(path_save + '/' + start_time + '_' + end_time + ' statistics.csv', index=0) return raw_dict, tele_dict, record_file_df, index, start_time_local, end_time_local, path_save
date_logger_change[j] ) #use the transform_date(date) to fix the date telemetry_status_df['logger_change'][i] = date_logger_change else: continue #get the path and name of the file that need to match file_lists = glob.glob(os.path.join(input_dir, '*.csv')) #match the file for file in file_lists: #time conversion, GMT time to local time time_str = file.split('/')[len(file.split('/')) - 1:][0].split( '.')[0].split('_')[2] + ' ' + file.split( '/')[len(file.split('/')) - 1:][0].split('.')[0].split('_')[3] #GMT time to local time of file time_local = zl.gmt_to_eastern(time_str[0:4] + '-' + time_str[4:6] + '-' + time_str[6:8] + ' ' + time_str[9:11] + ':' + time_str[11:13] + ':' + time_str[13:15]).strftime("%Y%m%d") #math the SN and date for i in range(len(telemetry_status_df)): if not telemetry_status_df['Lowell-SN'].isnull( )[i] and not telemetry_status_df['logger_change'].isnull( )[i]: #we will enter the next line if SN or date is not exist for j in range(len( telemetry_status_df['Lowell-SN'][i].split(','))): fname_len_SN = len( file.split('/')[len(file.split('/')) - 1:][0].split('_') [1]) #the length of SN in the file name len_SN = len( telemetry_status_df['Lowell-SN'][i].split(',')[j] ) #the length of SN in the culumn of the Lowell-SN inthe file of the telemetry_status.csv
def classify_by_boat(input_dir, output_dir, telemetry_status_path_name): """ this code used to know which boat get the data and put the data file to the right folder notice:this code is suitable for matching data after 2000 """ if not os.path.exists(output_dir): os.makedirs(output_dir) if os.listdir(output_dir): print('please input a empty directory!') sys.exit() #read the file of the telementry_status df = read_telemetrystatus(telemetry_status_path_name) #fix the format of time about logger_change for i in range(len(df)): if df['logger_change'].isnull()[i]: continue else: date_logger_change = df['logger_change'][i].split( ',') #get the time data of the logger_change for j in range(0, len(date_logger_change)): if len( date_logger_change[j] ) > 4: #keep the date have the month and year such as 1/17 date_logger_change[j] = zl.transform_date( date_logger_change[j] ) #use the transform_date(date) to fix the date df['logger_change'][i] = date_logger_change #get the path and name of the file that need to match file_lists = glob.glob(os.path.join(input_dir, '*.csv')) #match the file for file in file_lists: #time conversion, GMT time to local time time_str = file.split('/')[len(file.split('/')) - 1:][0].split( '.')[0].split('_')[2] + ' ' + file.split( '/')[len(file.split('/')) - 1:][0].split('.')[0].split('_')[3] #GMT time to local time of file time_local = zl.gmt_to_eastern(time_str[0:4] + '-' + time_str[4:6] + '-' + time_str[6:8] + ' ' + time_str[9:11] + ':' + time_str[11:13] + ':' + time_str[13:15]).strftime("%Y%m%d") #math the SN and date for i in range(len(df['Lowell-SN'])): if df['Lowell-SN'].isnull()[i] or df['logger_change'].isnull( )[i]: #we will enter the next line if SN or date is not exist continue else: for j in range(len(df['Lowell-SN'][i].split(','))): fname_len_SN = len( file.split('/')[len(file.split('/')) - 1:][0].split( '_')[1]) #the length of SN in the file name len_SN = len( df['Lowell-SN'][i].split(',')[j] ) #the length of SN in the culumn of the Lowell-SN inthe file of the telemetry_status.csv if df['Lowell-SN'][i].split( ',')[j][len_SN - fname_len_SN:] == file.split( '/')[len(file.split('/')) - 1:][0].split('_')[1]: fpath, fname = os.path.split( file) #seperate the path and name of the file dstfile = (fpath).replace( input_dir, output_dir + '/' + df['Boat'][i] + '/' + fname ) #produce the path+filename of the destination dstfile = dstfile.replace('//', '/') #copy the file to the destination folder try: if j < len(df['logger_change'][i]) - 1: if df['logger_change'][i][ j] <= time_local <= df[ 'logger_change'][i][j + 1]: zl.copyfile(file, dstfile) else: if df['logger_change'][i][j] <= time_local: zl.copyfile(file, dstfile) except: print("please check the data of telemetry status!")