def prepare_data(self):
     # load data
     pbname = self.hparams.pbname
     fdf, edf, m = loadall(self.hparams.pbname)
     m= fdf.numMeasurements.max()
     # substract min timestamp in order to have a lossless conversion to float32
     for i in range(m-1,-1,-1):
         edf.loc[:,"stimestamp"+str(i)]= edf.loc[:,"stimestamp"+str(i)].values - edf.loc[:,"stimestamp0"].values
     # define data used as input of pytorch
     self.dgroup = {}
     self.dgroup['xyz']=['x','y','z']
     self.dgroup['times']=['stimestamp'+str(i) for i in range(m)]
     self.dgroup['sensors']=['sensor'+str(i) for i in range(m)]
     self.dgroup['samesensors'] = ['samesensor'+str(i) for i in range(m-1)]
     self.Batch = namedtuple('Batch', list(self.dgroup))
     self.dico = {'sensors':torch.LongTensor}
     # keep known acft postiions
     self.nfts = edf.query('longitude==longitude').reset_index(drop=True)
     del edf
     # add x, y, z variable from lat lon alt variable
     self.nfts = addxyz(self.nfts, "geoAltitude")
     # compute same sensors indicator
     for j in range(1,m):
         self.nfts.loc[:,"samesensor"+str(j-1)] = self.nfts.loc[:,"sensor"+str(j-1)].values != self.nfts.loc[:,"sensor"+str(j)].values
     # define sensors params to be estimated
     loc = load_sensors(self.hparams.pbname,self.hparams.load_sensorsparams)
     self.loc_sensors = loc["loc"]
     self.alt_sensors = loc["alt"]
     self.shift_sensors = loc["shift"]
     self.C = loc["C"]
     print("initial values")
     self.print_sensorsparams()
def main():
    parser = ArgumentParser()
    add_args(parser)
    args = parser.parse_args()
    dsmoothedTraj = pickle.load(open(args.inputfile,'rb'))
    learnfilter.addsmooth(dsmoothedTraj)
    fdf,edf,m = common.loadall(args.pbname)
    if args.model=='' or args.coverage is None:
        #compute valid time intervals for each aircraft, without filtering
        dicot = {aircraft:[(np.min(smo.trajff.timeAtServer),np.max(smo.trajff.timeAtServer))] for aircraft,smo in dsmoothedTraj.items()}
    else:
        #compute valid time intervals for each aircraft with filtering using the model
        with open(args.model,'rb') as f:
            model = pickle.load(f)
        vdf = pd.read_csv("./Data/{0}_result/{0}_result.csv".format(args.pbname))
        tokeep = ceil(vdf.shape[0]*args.coverage)
        print("# of points to keep",tokeep)
        dicot = compute_dicot_from_model(edf,{k:smo for (k,smo) in dsmoothedTraj.items() if smo.trajff.shape[0]>0},model, tokeep, args.min_continuous_to_keep)
    print("compute prediction")
    pred= build_predictionfile(edf,dsmoothedTraj, dicot)
    print("compute distance")
    d = common.haversine_distance(pred.loc[:,latn].values,pred.loc[:,lonn].values,pred.latitude.values,pred.longitude.values)
    print(d.shape[0],common.rmse(d),common.rmse90(d))
    if args.outputfile != '':
        print("writing prediction file")
        pred=pred.drop(columns=["longitude","latitude"])
        df=merge_with_result(pred,args.pbname)
        print("actual coverage",df.query("longitude==longitude").shape[0]/df.shape[0])
        df.to_csv(args.outputfile,float_format="%.12f",index=False)
    def prepare_data(self):
        # load data
        self.pbname = self.hparams.pbname
        fdf, edf, m = loadall(self.pbname)
        # prepare data input for pytorch
        self.dgroup = {}
        self.dgroup['baroalt'] = ['baroAltitude']
        self.dgroup['times'] = ['stimestamp' + str(i) for i in range(m)]
        self.dgroup['timeAtServer'] = ['timeAtServer']
        self.dgroup['sensors'] = ['sensor' + str(i) for i in range(m)]
        self.dgroup['id'] = ['id']
        self.dgroup['samesensors'] = [
            'samesensor' + str(i) for i in range(m - 1)
        ]
        self.Batch = namedtuple('Batch', list(self.dgroup))
        self.dico = {'sensors': torch.LongTensor, 'id': torch.LongTensor}
        if self.hparams.ts:  # if training set
            self.nfts = edf.query('longitude==longitude').sort_values(
                by=["aircraft", "timeAtServer"]).reset_index(drop=True)
        else:  # if test set
            self.nfts = edf.query('longitude!=longitude').sort_values(
                by=["aircraft", "timeAtServer"]).reset_index(drop=True)
        del edf
        print("#aircraft", self.nfts.aircraft.nunique())
        print("self.nfts.head()", self.nfts.head())
        print(self.nfts.numMeasurements.describe())
        # detect repeated and discard repeated measurements with same timeAtServer
        self.norepeat = np.concatenate(
            (np.diff(self.nfts.timeAtServer.values) > 0., np.array([True])))
        print("detected repeated measurements",
              self.norepeat.shape[0] - self.norepeat.sum())
        self.nfts = self.nfts.iloc[self.norepeat].reset_index(drop=True)
        assert (not np.any(np.diff(self.nfts.timeAtServer.values) == 0.))
        # compute same sensors indicator
        for j in range(1, m):
            self.nfts.loc[:, "samesensor" +
                          str(j - 1)] = self.nfts.loc[:, "sensor" + str(
                              j - 1)].values != self.nfts.loc[:, "sensor" +
                                                              str(j)].values

            #np.logical_or(self.nfts.loc[:,"sensor"+str(j-1)].values != self.nfts.loc[:,"sensor"+str(j)].values,self.nfts.loc[:,"stimestamp"+str(j-1)].values != self.nfts.loc[:,"stimestamp"+str(j)].values)
        # load sensorsparams
        loc = load_sensors(self.pbname, self.hparams.load_sensorsparams)
        self.loc_sensors = loc["loc"].cpu()
        self.alt_sensors = loc["alt"].cpu()
        self.shift_sensors = loc["shift"].cpu()
        self.C = loc['C']
        # freeze them
        freeze(self.loc_sensors)
        freeze(self.alt_sensors)
        freeze(self.shift_sensors)
        self.C.cpu()
        self.C.requires_grad = False
        # detect close sensors
        lclose_sensors = get_close_sensors(self.loc_sensors,
                                           self.hparams.close_sensor,
                                           fdf.sensor.unique())
        # define aircraft positions parameters
        self.latlon = torch.nn.Embedding(int(self.nfts.id.max()) + 1, 2)

        def count(dataset):
            c = 1
            m = int(dataset.numMeasurements.max())
            for i in range(m - 1):
                c += dataset.loc[:, "sensor" +
                                 str(i)].values != dataset.loc[:, "sensor" +
                                                               str(i +
                                                                   1)].values
            return c

        # measure count
        self.nfts.loc[:, "countmeasure"] = count(self.nfts)
        self.nfts.loc[:,
                      "countmeasurecorrected"] = self.nfts.loc[:,
                                                               "countmeasure"].values

        def isamongsensor(dataset, s):
            c = 0
            for i in range(m - 1):
                c = np.maximum(c, dataset.loc[:,
                                              "sensor" + str(i)].values == s)
            return c

        # update measure count by substracting close sensors
        for (i, j) in lclose_sensors:
            self.nfts.loc[:,
                          "countmeasurecorrected"] = self.nfts.loc[:,
                                                                   "countmeasurecorrected"].values - isamongsensor(
                                                                       self.
                                                                       nfts, i
                                                                   ) * isamongsensor(
                                                                       self.
                                                                       nfts, j)
        # only estimate the aicraft positions that have enough measurements
        self.nfts = self.nfts.query("countmeasurecorrected>=4").reset_index(
            drop=True)

        # initialize aircraft positions with sensors barycenters
        def init_weights():
            prevpt = None
            for i, line in self.nfts.iterrows():
                if line.countmeasure != 0:
                    mean = tuple(self.loc_sensors.weight[int(line["sensor" +
                                                                  str(i)]), :]
                                 for i in range(line.countmeasure))
                    prevpt = sum(mean) / len(mean)
                else:
                    assert (prevpt is not None)
                self.latlon.weight[int(line.id), :] = prevpt

        with torch.no_grad():
            init_weights()
Exemple #4
0
def main():
    parser = ArgumentParser()
    add_args(parser)
    args = parser.parse_args()
    ds = pickle.load(open(args.inputfile, 'rb'))
    aircrafts = ds.aircraft.unique()
    if args.pbname != '':
        fdf, edf, m = common.loadall(args.pbname)
        vdftrue = None if args.ts else pd.read_csv(
            "./Data/{}_result/{}_result.csv".format(args.pbname, args.pbname))
    d = {}
    ld = []
    for aircraft in aircrafts:
        print("aircraft", aircraft)
        traj = ds.query("aircraft==" + str(aircraft)).reset_index(drop=True)
        if traj.shape[0] > MIN_REQUIRED_NB:
            error = traj.error.values
            # discard points with a large multilateration error
            filterror = filter_error(error, args.thr_error)
            trajf = traj.loc[filterror]
            # keep the longest sequence satisfying speed constraints
            if trajf.shape[0] > MIN_REQUIRED_NB:
                filtspeed = filter_speedlimit(trajf.nnpredlatitude.values,
                                              trajf.nnpredlongitude.values,
                                              trajf.timeAtServer.values, 0.,
                                              args.speed_limit)
                trajff = trajf.loc[filtspeed]
                drawtrue = common.haversine_distance(
                    trajff.latitude, trajff.longitude,
                    trajff.nnpredlatitude.values,
                    trajff.nnpredlongitude.values)
                smoothedtraj = SmoothedTraj(trajff, args.smooth)
                t = trajff.timeAtServer.values
                slat, slon = smoothedtraj.predict(t)
                dsmoothraw = common.haversine_distance(
                    slat, slon, trajff.nnpredlatitude.values,
                    trajff.nnpredlongitude.values)
                tmin = np.min(t)
                tmax = np.max(t)
                if args.pbname != '':
                    traje = edf.query("aircraft==" + str(aircraft)).query(
                        str(tmin) +
                        "<=timeAtServer").query("timeAtServer<=" +
                                                str(tmax)).reset_index(
                                                    drop=True)
                    dsmoothtrue = comparewithtrue(traje, smoothedtraj,
                                                  vdftrue)  #[300:-300]
                    ld.append(dsmoothtrue)
                    print(common.rmse(ld[-1]), common.rmse90(ld[-1]),
                          common.rmse50(ld[-1]))
                print(traj.shape, trajff.shape)
                d[aircraft] = smoothedtraj
    if len(ld) > 0:
        dsmoothtrue = np.concatenate(ld)
        print(dsmoothtrue.shape[0], common.rmse(dsmoothtrue),
              common.rmse90(dsmoothtrue))
        e = np.sort(dsmoothtrue,
                    axis=None)[:int(dsmoothtrue.shape[0] * 0.6) + 1]
        print(e.shape[0], common.rmse(e), common.rmse90(e))
    if args.outputfile != '':
        # save dict[aircraft]=SmoothedTraj
        with open(args.outputfile, 'wb') as f:
            pickle.dump(d, f)
def main():
    parser = ArgumentParser()
    add_args(parser)
    args = parser.parse_args()
    dsmoothedTraj = pickle.load(open(args.inputfile,'rb'))
    fdf,edf,m = common.loadall(args.pbname)
    edf=edf.sort_values(by=["aircraft","timeAtServer"]).reset_index(drop=True)
    lbandwidth = list(range(1,20))+list(range(20,100,20))
    for aircraft in dsmoothedTraj:
        trajedf=edf.query('aircraft=='+str(aircraft))#.reset_index(drop=True)
        smo = dsmoothedTraj[aircraft]
        trajedf=trajedf.query('timeAtServer<='+str(np.max(smo.trajff.timeAtServer.values))).query(str(np.min(smo.trajff.timeAtServer.values))+'<=timeAtServer')
        slat,slon=smo.predict(trajedf.timeAtServer.values)
        dist2derror=common.haversine_distance(slat,slon,trajedf.latitude.values,trajedf.longitude.values)
        trajedf=trajedf.assign(smoothedlatitude=slat,smoothedlongitude=slon,dist2derror=dist2derror)
        dle={i:[] for i in lbandwidth}
        ln=[]
        le=[]
        lt0=[]
        lt1=[]
        ls = {'mean':[],'max':[],'min':[]}
        lc = {'mean':[],'max':[],'min':[]}
        n=smo.trajff.timeAtServer.values.shape[0]
        ddslat = smo.slat.derivative(nu=2)
        ddslon = smo.slon.derivative(nu=2)
        dslat = smo.slat.derivative(nu=1)
        dslon = smo.slon.derivative(nu=1)
        def update(d,v):
            d['mean'].append(np.mean(v))
            d['min'].append(np.min(v))
            d['max'].append(np.max(v))
        # several points (>2) of trajedf are inside trajff.timeAtserver.values[i] and trajff.timeAtserver.values[i+1], so for trajff[i], we compute statistics on all the points between t0 and t1, these statistics will give us new feature for the point trajff[i]
        for i in range(n):
            t0=(smo.trajff.timeAtServer.values[max(i - 1,0)]+smo.trajff.timeAtServer.values[i])/2
            t1=(smo.trajff.timeAtServer.values[min(i + 1,n-1)]+smo.trajff.timeAtServer.values[i])/2
            trajedft0t1 = trajedf.query(str(t0)+"<=timeAtServer").query("timeAtServer<="+str(t1))
            t = trajedft0t1.timeAtServer.values
            lat = smo.slat(t)
            lon = smo.slon(t)
            dlat = dslat(t)
            dlon = dslon(t)
            ddlat = ddslat(t)
            ddlon = ddslon(t)
            h = trajedft0t1.baroAltitude.values
            speed, c = common.speed_curvature(lat,lon,dlat,dlon,ddlat,ddlon,h)
            update(ls,speed)
            update(lc,c)
            ln.append(trajedft0t1.shape[0])
            lt0.append(t0)
            lt1.append(t1)
            le.append(np.mean(trajedft0t1.dist2derror.values))
            draw = smo.trajff.timeAtServer.values-smo.trajff.timeAtServer.values[i]
            for bandwidth in lbandwidth:
                d =(draw/bandwidth)**2
                dle[bandwidth].append(np.sum(np.exp(-d)))
#            ld.append(density)
        sbaroalt=barosmooth(smo.trajff,args.altsmooth)
        sdbaroalt = sbaroalt.derivative(nu=1)
        smo.trajff.loc[:,"smoothedbaroAltitude"]=sbaroalt(smo.trajff.timeAtServer.values)
        smo.trajff.loc[:,"dbaroAltitude"]=sdbaroalt(smo.trajff.timeAtServer.values)
        # error between true traj and smoothed one on points between t0 and t1
        smo.trajff.loc[:,"smoothedtrueerror"]=np.array(le)
        slat,slon=smo.predict(smo.trajff.timeAtServer.values)
        # distance between smoothed traj and raw traj, gives an idea of how spreads the raw points are
        smo.trajff.loc[:,"smoothedrawerror"]=common.haversine_distance(slat,slon,smo.trajff.nnpredlatitude.values,smo.trajff.nnpredlongitude.values)
        # number of points between t0 and t1
        smo.trajff.loc[:,"nb"]=np.array(ln)
        smo.trajff.loc[:,"t0"]=np.array(lt0)
        smo.trajff.loc[:,"t1"]=np.array(lt1)
        # min speed between t0 and t1
        smo.trajff.loc[:,"speedmin"]=np.array(ls['min'])
        # mean speed between t0 and t1
        smo.trajff.loc[:,"speedmean"]=np.array(ls['mean'])
        # max speed between t0 and t1
        smo.trajff.loc[:,"speedmax"]=np.array(ls['max'])
        # min curvature between t0 and t1
        smo.trajff.loc[:,"curvaturemin"]=np.array(lc['min'])
        # mean curvature between t0 and t1
        smo.trajff.loc[:,"curvaturemean"]=np.array(lc['mean'])
        # max curvature between t0 and t1
        smo.trajff.loc[:,"curvaturemax"]=np.array(lc['max'])
        smo.trajff.loc[:,"dt01"]=smo.trajff.loc[:,"t1"].values - smo.trajff.loc[:,"t0"]
        smo.trajff.loc[:,"dspeed"]=smo.trajff.speedmax.values-smo.trajff.speedmin.values
        smo.trajff.loc[:,"dspeeddt01"]=smo.trajff.dspeed.values/smo.trajff.dt01.values
        for bandwidth in lbandwidth:
            # density of measurement in trajff across timeAtServer. The more point per unit of time, the more precise the prediction should be
            smo.trajff.loc[:,"density"+str(bandwidth)]=np.array(dle[bandwidth])
    if args.outputfile != '':
        with open(args.outputfile,'wb') as f:
            pickle.dump(dsmoothedTraj,f)