def classify_by_boat(telemetry_status,start_time,end_time,dict): """function: get the Doppio, GoMOLFs, FVCOM, Climate values input telemetry status path and filename""" try: tele_dict=dict['tele_dict']#obervation dictionary, there have every vessel data. Doppio_dict=dict['Doppio']#Doppio dictionary use to store doppio data GoMOLFs_dict=dict['GoMOLFs']#GoMOLFs dictionary use to store Gomolfs data FVCOM_dict=dict['FVCOM'] #FVCOM dictionary use to store FVCOM data start_time=dict['end_time'] # if dict['end_time'] is wrong, please comment this code CrmClim_dict=dict['CrmClim']#CrmClim dictionary use to store climate history data and except KeyboardInterrupt: sys.exit() except:# if input dictionary is empty, we need creat a new dictionary dict={} tele_dict={} Doppio_dict={} GoMOLFs_dict={} FVCOM_dict={} CrmClim_dict={} telemetrystatus_df=read_telemetrystatus(telemetry_status)# read the telemetry status data #download the data of telementry tele_df=read_telemetry() #tele_df means telemeterd data, this data from website 'https://www.nefsc.noaa.gov/drifter/emolt.dat',we should avoid the update time when we use this function #screen out the data of telemetry in interval valuable_tele_df=pd.DataFrame(data=None,columns=['vessel_n','esn','time','lon','lat','depth','temp'])#use to save the data from start time to end time for i in range(len(tele_df)): tele_time_str=str(tele_df['year'].iloc[i])+'-'+str(tele_df['month'].iloc[i])+'-'+str(tele_df['day'].iloc[i])+' '+\ str(tele_df['Hours'].iloc[i])+':'+str(tele_df['minutes'].iloc[i])+':'+'00'# the string of observation time tele_time=datetime.strptime(tele_time_str,'%Y-%m-%d %H:%M:%S') #chang the observation time format as datetime.datetime. it is convenient to compare with start time and end time. if start_time<tele_time<=end_time:# grab the data that time between start time and end time valuable_tele_df=valuable_tele_df.append(pd.DataFrame(data=[[tele_df['vessel_n'][i],tele_df['esn'][i],tele_time,tele_df['lon'][i],\ tele_df['lat'][i],tele_df['depth'][i],tele_df['temp'][i]]],\ columns=['vessel_n','esn','time','lon','lat','depth','temp'])) #clean the index of valuable telementry data if len(valuable_tele_df)>0: valuable_tele_df.index=range(len(valuable_tele_df)) dict['end_time']=valuable_tele_df['time'][len(valuable_tele_df)-1] for j in range(len(telemetrystatus_df)):# loop boat name, If the name is new, then you need to create a new dataframe for the new name. if telemetrystatus_df['Boat'][j] not in tele_dict.keys(): tele_dict[telemetrystatus_df['Boat'][j]]=pd.DataFrame(data=None,columns=['time','temp','depth','lat','lon']) if telemetrystatus_df['Boat'][j] not in Doppio_dict.keys(): Doppio_dict[telemetrystatus_df['Boat'][j]]=pd.DataFrame(data=None,columns=['time','temp','depth','lat','lon']) if telemetrystatus_df['Boat'][j] not in GoMOLFs_dict.keys(): GoMOLFs_dict[telemetrystatus_df['Boat'][j]]=pd.DataFrame(data=None,columns=['time','temp','depth','lat','lon']) if telemetrystatus_df['Boat'][j] not in FVCOM_dict.keys(): FVCOM_dict[telemetrystatus_df['Boat'][j]]=pd.DataFrame(data=None,columns=['time','temp','depth','lat','lon']) if telemetrystatus_df['Boat'][j] not in CrmClim_dict.keys(): CrmClim_dict[telemetrystatus_df['Boat'][j]]=pd.DataFrame(data=None,columns=['time','temp','depth','lat','lon']) dop_nu,gmf_nu,tele_nu,fvc_nu,crmclim_nu=[],[],[],[],[] # creat multiple list use to store data('time','temp','depth','lat','lon') for i in valuable_tele_df.index: #valuable_tele_df is the valuable telemetry data during start time and end time if int(valuable_tele_df['vessel_n'][i].split('_')[1])==telemetrystatus_df['Vessel#'][j]: ptime=valuable_tele_df['time'][i] # the observation time latpt=float(valuable_tele_df['lat'][i]) # the lat of observation lonpt=float(valuable_tele_df['lon'][i]) #the lon of observation depthpt=float(valuable_tele_df['depth'][i]) #the depth of observation tele_nu.append([ptime,latpt,lonpt,float(valuable_tele_df['temp'][i]),depthpt]) #store the data of observation try: #try to get doppio data in the same location dpo_temp,dpo_depth=mm.get_doppio(latp=latpt,lonp=lonpt,depth=depthpt,dtime=ptime,fortype='tempdepth') except KeyboardInterrupt: sys.exit() except: dpo_temp,dpo_depth=np.nan,np.nan dop_nu.append([ptime,latpt,lonpt,dpo_temp,dpo_depth]) #store the data of doppio try: #try to get the gomofs data in the same location gmf_temp,gmf_depth=mm.get_gomofs(dtime=ptime,latp=latpt,lonp=lonpt,depth=depthpt,fortype='tempdepth') except KeyboardInterrupt: sys.exit() except: gmf_temp,gmf_depth=np.nan,np.nan gmf_nu.append([ptime,latpt,lonpt,gmf_temp,gmf_depth]) #store the data of GoMOLFs try: FV_temp,FV_depth=mm.get_FVCOM_temp(latp=latpt,lonp=lonpt,dtime=ptime,depth=depthpt,fortype='tempdepth') except KeyboardInterrupt: sys.exit() except: FV_temp,FV_depth=np.nan,np.nan fvc_nu.append([ptime,latpt,lonpt,FV_temp,FV_depth]) # store the data of FVCOM try: crm_depth=mm.get_depth_bathy(loni=lonpt,lati=latpt) except KeyboardInterrupt: sys.exit() except: crm_depth=np.nan try: climtemp=mm.getclim(lat1=latpt,lon1=lonpt,dtime=ptime) except KeyboardInterrupt: sys.exit() except: climtemp=np.nan crmclim_nu.append([ptime,latpt,lonpt,climtemp,crm_depth]) #store the data of climate history and ngdc valuable_tele_df=valuable_tele_df.drop(i) #if this line has been classify, delete this line #addthe data to the dataframe accroding every list if len(dop_nu)>0: Doppio_dict[telemetrystatus_df['Boat'][j]]=Doppio_dict[telemetrystatus_df['Boat'][j]].append(pd.DataFrame(data=dop_nu,\ columns=['time','lat','lon','temp','depth']),ignore_index=True) if len(gmf_nu)>0: GoMOLFs_dict[telemetrystatus_df['Boat'][j]]=GoMOLFs_dict[telemetrystatus_df['Boat'][j]].append(pd.DataFrame(data=gmf_nu,\ columns=['time','lat','lon','temp','depth']),ignore_index=True) if len(tele_nu)>0: tele_dict[telemetrystatus_df['Boat'][j]]=tele_dict[telemetrystatus_df['Boat'][j]].append(pd.DataFrame(data=tele_nu,\ columns=['time','lat','lon','temp','depth']),ignore_index=True) if len(fvc_nu)>0: FVCOM_dict[telemetrystatus_df['Boat'][j]]=FVCOM_dict[telemetrystatus_df['Boat'][j]].append(pd.DataFrame(data=fvc_nu,\ columns=['time','lat','lon','temp','depth']),ignore_index=True) if len(crmclim_nu)>0: CrmClim_dict[telemetrystatus_df['Boat'][j]]=CrmClim_dict[telemetrystatus_df['Boat'][j]].append(pd.DataFrame(data=crmclim_nu,\ columns=['time','lat','lon','temp','depth']),ignore_index=True) # add every module's data to the dicitonary dict['tele_dict']=tele_dict dict['Doppio']=Doppio_dict dict['GoMOLFs']=GoMOLFs_dict dict['FVCOM']=FVCOM_dict dict['CrmClim']=CrmClim_dict obsdpogmf=dict for i in obsdpogmf['tele_dict'].keys(): # check the data, whether there have some repeat, if there have keep the last one. if len(obsdpogmf['tele_dict'][i])>0: obsdpogmf['tele_dict'][i].drop_duplicates(subset=['time'],keep='last',inplace=True) obsdpogmf['tele_dict'][i].index=range(len(obsdpogmf['tele_dict'][i])) obsdpogmf['Doppio'][i].drop_duplicates(subset=['time'],keep='last',inplace=True) obsdpogmf['Doppio'][i].index=range(len(obsdpogmf['Doppio'][i])) obsdpogmf['GoMOLFs'][i].drop_duplicates(subset=['time'],keep='last',inplace=True) obsdpogmf['GoMOLFs'][i].index=range(len(obsdpogmf['GoMOLFs'][i])) obsdpogmf['FVCOM'][i].drop_duplicates(subset=['time'],keep='last',inplace=True) obsdpogmf['FVCOM'][i].index=range(len(obsdpogmf['FVCOM'][i])) obsdpogmf['CrmClim'][i].drop_duplicates(subset=['time'],keep='last',inplace=True) obsdpogmf['CrmClim'][i].index=range(len(obsdpogmf['CrmClim'][i])) return obsdpogmf
def classify_by_boat(telemetry_status, start_time, end_time, dictionary, climpath): """function: get the Doppio, GoMOLFs, FVCOM, Climate values input: telemetry_status: the file path and file name start_time: start time, the format is datetime.datetime end _time: end time, the format is datetime.datetime dictionary:a dictionary that stores data for each module or an empty dictionary""" try: start_time_str = dictionary[ 'end_time'] # if dict['end_time'] is wrong, please comment this code start_time = datetime.strptime(start_time_str, '%Y-%m-%d %H:%M:%S') except: start_time = start_time telemetrystatus_df = read_telemetrystatus( telemetry_status) # read the telemetry status data emolt_df = read_emolt( start=start_time, end=end_time ) #emolt_df means emolt data, this data from website 'https://www.nefsc.noaa.gov/drifter/emolt.dat',we should avoid the update time when we use this function #clean the index of valuable telementry data if len(emolt_df) > 0: emolt_df.index = range(len(emolt_df)) dictionary['end_time'] = str(emolt_df['time'][len(emolt_df) - 1]) for j in range( len(telemetrystatus_df) ): # loop boat name, If the name is new, then you need to create a new dataframe for the new name. vessel_name = telemetrystatus_df['Boat'][j] if vessel_name not in dictionary.keys(): dictionary[vessel_name] = { } #create a new dictionary under dictionary dictionary[vessel_name] = create_storedictionary( dictionary[vessel_name] ) # create mutiple subdictionary under dicitonary. for i in emolt_df.index: #valuable_tele_df is the valuable telemetry data during start time and end time if int(emolt_df['vessel_n'][i].split('_') [1]) == telemetrystatus_df['Vessel#'][j]: ptime = emolt_df['time'][i] # the observation time latpt = float(emolt_df['lat'][i]) # the lat of observation lonpt = float(emolt_df['lon'][i]) #the lon of observation depthpt = float( emolt_df['depth'][i]) #the depth of observation temppt = float(emolt_df['temp'][i]) try: #try to get doppio data in the same location dpo_temp, dpo_depth = mm.get_doppio(latp=latpt, lonp=lonpt, depth=depthpt, dtime=ptime, fortype='tempdepth') except KeyboardInterrupt: sys.exit() except: dpo_temp, dpo_depth = np.nan, np.nan try: #try to get the gomofs data in the same location gmf_temp, gmf_depth = mm.get_gomofs(dtime=ptime, latp=latpt, lonp=lonpt, depth=depthpt, fortype='tempdepth') except KeyboardInterrupt: sys.exit() except: gmf_temp, gmf_depth = np.nan, np.nan try: FV_temp, FV_depth = mm.get_FVCOM_temp(latp=latpt, lonp=lonpt, dtime=ptime, depth=depthpt, fortype='tempdepth') except KeyboardInterrupt: sys.exit() except: FV_temp, FV_depth = np.nan, np.nan try: ngdc_depth = mm.get_depth_bathy(loni=lonpt, lati=latpt) except KeyboardInterrupt: sys.exit() except: ngdc_depth = np.nan try: climtemp = mm.getclim( lat1=latpt, lon1=lonpt, path=climpath, dtime=ptime ) #store the data of climate history and ngdc except KeyboardInterrupt: sys.exit() except: climtemp = np.nan data_list = [ latpt, lonpt, float(temppt), float(depthpt), float(dpo_temp), float(dpo_depth), float(gmf_temp), float(gmf_depth), float(FV_temp), float(FV_depth), float(climtemp), float(ngdc_depth) ] dictionary[vessel_name] = store_data( key=str(ptime), data_list=data_list, dictionary=dictionary[vessel_name]) emolt_df = emolt_df.drop( i) #if this line has been classify, delete this line return dictionary