def prepare_data(self): # load data pbname = self.hparams.pbname fdf, edf, m = loadall(self.hparams.pbname) m= fdf.numMeasurements.max() # substract min timestamp in order to have a lossless conversion to float32 for i in range(m-1,-1,-1): edf.loc[:,"stimestamp"+str(i)]= edf.loc[:,"stimestamp"+str(i)].values - edf.loc[:,"stimestamp0"].values # define data used as input of pytorch self.dgroup = {} self.dgroup['xyz']=['x','y','z'] self.dgroup['times']=['stimestamp'+str(i) for i in range(m)] self.dgroup['sensors']=['sensor'+str(i) for i in range(m)] self.dgroup['samesensors'] = ['samesensor'+str(i) for i in range(m-1)] self.Batch = namedtuple('Batch', list(self.dgroup)) self.dico = {'sensors':torch.LongTensor} # keep known acft postiions self.nfts = edf.query('longitude==longitude').reset_index(drop=True) del edf # add x, y, z variable from lat lon alt variable self.nfts = addxyz(self.nfts, "geoAltitude") # compute same sensors indicator for j in range(1,m): self.nfts.loc[:,"samesensor"+str(j-1)] = self.nfts.loc[:,"sensor"+str(j-1)].values != self.nfts.loc[:,"sensor"+str(j)].values # define sensors params to be estimated loc = load_sensors(self.hparams.pbname,self.hparams.load_sensorsparams) self.loc_sensors = loc["loc"] self.alt_sensors = loc["alt"] self.shift_sensors = loc["shift"] self.C = loc["C"] print("initial values") self.print_sensorsparams()
def main(): parser = ArgumentParser() add_args(parser) args = parser.parse_args() dsmoothedTraj = pickle.load(open(args.inputfile,'rb')) learnfilter.addsmooth(dsmoothedTraj) fdf,edf,m = common.loadall(args.pbname) if args.model=='' or args.coverage is None: #compute valid time intervals for each aircraft, without filtering dicot = {aircraft:[(np.min(smo.trajff.timeAtServer),np.max(smo.trajff.timeAtServer))] for aircraft,smo in dsmoothedTraj.items()} else: #compute valid time intervals for each aircraft with filtering using the model with open(args.model,'rb') as f: model = pickle.load(f) vdf = pd.read_csv("./Data/{0}_result/{0}_result.csv".format(args.pbname)) tokeep = ceil(vdf.shape[0]*args.coverage) print("# of points to keep",tokeep) dicot = compute_dicot_from_model(edf,{k:smo for (k,smo) in dsmoothedTraj.items() if smo.trajff.shape[0]>0},model, tokeep, args.min_continuous_to_keep) print("compute prediction") pred= build_predictionfile(edf,dsmoothedTraj, dicot) print("compute distance") d = common.haversine_distance(pred.loc[:,latn].values,pred.loc[:,lonn].values,pred.latitude.values,pred.longitude.values) print(d.shape[0],common.rmse(d),common.rmse90(d)) if args.outputfile != '': print("writing prediction file") pred=pred.drop(columns=["longitude","latitude"]) df=merge_with_result(pred,args.pbname) print("actual coverage",df.query("longitude==longitude").shape[0]/df.shape[0]) df.to_csv(args.outputfile,float_format="%.12f",index=False)
def prepare_data(self): # load data self.pbname = self.hparams.pbname fdf, edf, m = loadall(self.pbname) # prepare data input for pytorch self.dgroup = {} self.dgroup['baroalt'] = ['baroAltitude'] self.dgroup['times'] = ['stimestamp' + str(i) for i in range(m)] self.dgroup['timeAtServer'] = ['timeAtServer'] self.dgroup['sensors'] = ['sensor' + str(i) for i in range(m)] self.dgroup['id'] = ['id'] self.dgroup['samesensors'] = [ 'samesensor' + str(i) for i in range(m - 1) ] self.Batch = namedtuple('Batch', list(self.dgroup)) self.dico = {'sensors': torch.LongTensor, 'id': torch.LongTensor} if self.hparams.ts: # if training set self.nfts = edf.query('longitude==longitude').sort_values( by=["aircraft", "timeAtServer"]).reset_index(drop=True) else: # if test set self.nfts = edf.query('longitude!=longitude').sort_values( by=["aircraft", "timeAtServer"]).reset_index(drop=True) del edf print("#aircraft", self.nfts.aircraft.nunique()) print("self.nfts.head()", self.nfts.head()) print(self.nfts.numMeasurements.describe()) # detect repeated and discard repeated measurements with same timeAtServer self.norepeat = np.concatenate( (np.diff(self.nfts.timeAtServer.values) > 0., np.array([True]))) print("detected repeated measurements", self.norepeat.shape[0] - self.norepeat.sum()) self.nfts = self.nfts.iloc[self.norepeat].reset_index(drop=True) assert (not np.any(np.diff(self.nfts.timeAtServer.values) == 0.)) # compute same sensors indicator for j in range(1, m): self.nfts.loc[:, "samesensor" + str(j - 1)] = self.nfts.loc[:, "sensor" + str( j - 1)].values != self.nfts.loc[:, "sensor" + str(j)].values #np.logical_or(self.nfts.loc[:,"sensor"+str(j-1)].values != self.nfts.loc[:,"sensor"+str(j)].values,self.nfts.loc[:,"stimestamp"+str(j-1)].values != self.nfts.loc[:,"stimestamp"+str(j)].values) # load sensorsparams loc = load_sensors(self.pbname, self.hparams.load_sensorsparams) self.loc_sensors = loc["loc"].cpu() self.alt_sensors = loc["alt"].cpu() self.shift_sensors = loc["shift"].cpu() self.C = loc['C'] # freeze them freeze(self.loc_sensors) freeze(self.alt_sensors) freeze(self.shift_sensors) self.C.cpu() self.C.requires_grad = False # detect close sensors lclose_sensors = get_close_sensors(self.loc_sensors, self.hparams.close_sensor, fdf.sensor.unique()) # define aircraft positions parameters self.latlon = torch.nn.Embedding(int(self.nfts.id.max()) + 1, 2) def count(dataset): c = 1 m = int(dataset.numMeasurements.max()) for i in range(m - 1): c += dataset.loc[:, "sensor" + str(i)].values != dataset.loc[:, "sensor" + str(i + 1)].values return c # measure count self.nfts.loc[:, "countmeasure"] = count(self.nfts) self.nfts.loc[:, "countmeasurecorrected"] = self.nfts.loc[:, "countmeasure"].values def isamongsensor(dataset, s): c = 0 for i in range(m - 1): c = np.maximum(c, dataset.loc[:, "sensor" + str(i)].values == s) return c # update measure count by substracting close sensors for (i, j) in lclose_sensors: self.nfts.loc[:, "countmeasurecorrected"] = self.nfts.loc[:, "countmeasurecorrected"].values - isamongsensor( self. nfts, i ) * isamongsensor( self. nfts, j) # only estimate the aicraft positions that have enough measurements self.nfts = self.nfts.query("countmeasurecorrected>=4").reset_index( drop=True) # initialize aircraft positions with sensors barycenters def init_weights(): prevpt = None for i, line in self.nfts.iterrows(): if line.countmeasure != 0: mean = tuple(self.loc_sensors.weight[int(line["sensor" + str(i)]), :] for i in range(line.countmeasure)) prevpt = sum(mean) / len(mean) else: assert (prevpt is not None) self.latlon.weight[int(line.id), :] = prevpt with torch.no_grad(): init_weights()
def main(): parser = ArgumentParser() add_args(parser) args = parser.parse_args() ds = pickle.load(open(args.inputfile, 'rb')) aircrafts = ds.aircraft.unique() if args.pbname != '': fdf, edf, m = common.loadall(args.pbname) vdftrue = None if args.ts else pd.read_csv( "./Data/{}_result/{}_result.csv".format(args.pbname, args.pbname)) d = {} ld = [] for aircraft in aircrafts: print("aircraft", aircraft) traj = ds.query("aircraft==" + str(aircraft)).reset_index(drop=True) if traj.shape[0] > MIN_REQUIRED_NB: error = traj.error.values # discard points with a large multilateration error filterror = filter_error(error, args.thr_error) trajf = traj.loc[filterror] # keep the longest sequence satisfying speed constraints if trajf.shape[0] > MIN_REQUIRED_NB: filtspeed = filter_speedlimit(trajf.nnpredlatitude.values, trajf.nnpredlongitude.values, trajf.timeAtServer.values, 0., args.speed_limit) trajff = trajf.loc[filtspeed] drawtrue = common.haversine_distance( trajff.latitude, trajff.longitude, trajff.nnpredlatitude.values, trajff.nnpredlongitude.values) smoothedtraj = SmoothedTraj(trajff, args.smooth) t = trajff.timeAtServer.values slat, slon = smoothedtraj.predict(t) dsmoothraw = common.haversine_distance( slat, slon, trajff.nnpredlatitude.values, trajff.nnpredlongitude.values) tmin = np.min(t) tmax = np.max(t) if args.pbname != '': traje = edf.query("aircraft==" + str(aircraft)).query( str(tmin) + "<=timeAtServer").query("timeAtServer<=" + str(tmax)).reset_index( drop=True) dsmoothtrue = comparewithtrue(traje, smoothedtraj, vdftrue) #[300:-300] ld.append(dsmoothtrue) print(common.rmse(ld[-1]), common.rmse90(ld[-1]), common.rmse50(ld[-1])) print(traj.shape, trajff.shape) d[aircraft] = smoothedtraj if len(ld) > 0: dsmoothtrue = np.concatenate(ld) print(dsmoothtrue.shape[0], common.rmse(dsmoothtrue), common.rmse90(dsmoothtrue)) e = np.sort(dsmoothtrue, axis=None)[:int(dsmoothtrue.shape[0] * 0.6) + 1] print(e.shape[0], common.rmse(e), common.rmse90(e)) if args.outputfile != '': # save dict[aircraft]=SmoothedTraj with open(args.outputfile, 'wb') as f: pickle.dump(d, f)
def main(): parser = ArgumentParser() add_args(parser) args = parser.parse_args() dsmoothedTraj = pickle.load(open(args.inputfile,'rb')) fdf,edf,m = common.loadall(args.pbname) edf=edf.sort_values(by=["aircraft","timeAtServer"]).reset_index(drop=True) lbandwidth = list(range(1,20))+list(range(20,100,20)) for aircraft in dsmoothedTraj: trajedf=edf.query('aircraft=='+str(aircraft))#.reset_index(drop=True) smo = dsmoothedTraj[aircraft] trajedf=trajedf.query('timeAtServer<='+str(np.max(smo.trajff.timeAtServer.values))).query(str(np.min(smo.trajff.timeAtServer.values))+'<=timeAtServer') slat,slon=smo.predict(trajedf.timeAtServer.values) dist2derror=common.haversine_distance(slat,slon,trajedf.latitude.values,trajedf.longitude.values) trajedf=trajedf.assign(smoothedlatitude=slat,smoothedlongitude=slon,dist2derror=dist2derror) dle={i:[] for i in lbandwidth} ln=[] le=[] lt0=[] lt1=[] ls = {'mean':[],'max':[],'min':[]} lc = {'mean':[],'max':[],'min':[]} n=smo.trajff.timeAtServer.values.shape[0] ddslat = smo.slat.derivative(nu=2) ddslon = smo.slon.derivative(nu=2) dslat = smo.slat.derivative(nu=1) dslon = smo.slon.derivative(nu=1) def update(d,v): d['mean'].append(np.mean(v)) d['min'].append(np.min(v)) d['max'].append(np.max(v)) # several points (>2) of trajedf are inside trajff.timeAtserver.values[i] and trajff.timeAtserver.values[i+1], so for trajff[i], we compute statistics on all the points between t0 and t1, these statistics will give us new feature for the point trajff[i] for i in range(n): t0=(smo.trajff.timeAtServer.values[max(i - 1,0)]+smo.trajff.timeAtServer.values[i])/2 t1=(smo.trajff.timeAtServer.values[min(i + 1,n-1)]+smo.trajff.timeAtServer.values[i])/2 trajedft0t1 = trajedf.query(str(t0)+"<=timeAtServer").query("timeAtServer<="+str(t1)) t = trajedft0t1.timeAtServer.values lat = smo.slat(t) lon = smo.slon(t) dlat = dslat(t) dlon = dslon(t) ddlat = ddslat(t) ddlon = ddslon(t) h = trajedft0t1.baroAltitude.values speed, c = common.speed_curvature(lat,lon,dlat,dlon,ddlat,ddlon,h) update(ls,speed) update(lc,c) ln.append(trajedft0t1.shape[0]) lt0.append(t0) lt1.append(t1) le.append(np.mean(trajedft0t1.dist2derror.values)) draw = smo.trajff.timeAtServer.values-smo.trajff.timeAtServer.values[i] for bandwidth in lbandwidth: d =(draw/bandwidth)**2 dle[bandwidth].append(np.sum(np.exp(-d))) # ld.append(density) sbaroalt=barosmooth(smo.trajff,args.altsmooth) sdbaroalt = sbaroalt.derivative(nu=1) smo.trajff.loc[:,"smoothedbaroAltitude"]=sbaroalt(smo.trajff.timeAtServer.values) smo.trajff.loc[:,"dbaroAltitude"]=sdbaroalt(smo.trajff.timeAtServer.values) # error between true traj and smoothed one on points between t0 and t1 smo.trajff.loc[:,"smoothedtrueerror"]=np.array(le) slat,slon=smo.predict(smo.trajff.timeAtServer.values) # distance between smoothed traj and raw traj, gives an idea of how spreads the raw points are smo.trajff.loc[:,"smoothedrawerror"]=common.haversine_distance(slat,slon,smo.trajff.nnpredlatitude.values,smo.trajff.nnpredlongitude.values) # number of points between t0 and t1 smo.trajff.loc[:,"nb"]=np.array(ln) smo.trajff.loc[:,"t0"]=np.array(lt0) smo.trajff.loc[:,"t1"]=np.array(lt1) # min speed between t0 and t1 smo.trajff.loc[:,"speedmin"]=np.array(ls['min']) # mean speed between t0 and t1 smo.trajff.loc[:,"speedmean"]=np.array(ls['mean']) # max speed between t0 and t1 smo.trajff.loc[:,"speedmax"]=np.array(ls['max']) # min curvature between t0 and t1 smo.trajff.loc[:,"curvaturemin"]=np.array(lc['min']) # mean curvature between t0 and t1 smo.trajff.loc[:,"curvaturemean"]=np.array(lc['mean']) # max curvature between t0 and t1 smo.trajff.loc[:,"curvaturemax"]=np.array(lc['max']) smo.trajff.loc[:,"dt01"]=smo.trajff.loc[:,"t1"].values - smo.trajff.loc[:,"t0"] smo.trajff.loc[:,"dspeed"]=smo.trajff.speedmax.values-smo.trajff.speedmin.values smo.trajff.loc[:,"dspeeddt01"]=smo.trajff.dspeed.values/smo.trajff.dt01.values for bandwidth in lbandwidth: # density of measurement in trajff across timeAtServer. The more point per unit of time, the more precise the prediction should be smo.trajff.loc[:,"density"+str(bandwidth)]=np.array(dle[bandwidth]) if args.outputfile != '': with open(args.outputfile,'wb') as f: pickle.dump(dsmoothedTraj,f)