def save_canvas(self, canvas, sub_dir=None, name=None, print_names=True, show=True): sub_dir = self.SaveDir if hasattr( self, 'SaveDir') and sub_dir is None else '{subdir}/'.format( subdir=sub_dir) canvas.Update() file_name = canvas.GetName() if name is None else name # file_path = '{save_dir}{res}/{{typ}}/{file}'.format(res=sub_dir, file=file_name, save_dir=self.ResultsDir) file_path = join(self.ResultsDir, sub_dir, '{typ}', file_name) ftypes = ['root', 'png', 'pdf', 'eps'] out = 'Saving plots: {nam}'.format(nam=name) set_root_output(show) gROOT.ProcessLine("gErrorIgnoreLevel = kError;") for f in ftypes: ext = '.{typ}'.format(typ=f) ensure_dir(file_path.format(typ=f)) out_file = '{fname}{ext}'.format(fname=file_path, ext=ext) out_file = out_file.format(typ=f) canvas.SaveAs(out_file) if print_names: log_message(out) set_root_output(True)
def __init__(self, analysis): self.Dir = join(analysis.ProgramDir, 'pickles') self.RunNumber = analysis.RunNumber if hasattr(analysis, 'RunNumber') else None ensure_dir(self.Dir) self.TestCampaign = '' self.Path = None
def configure(self): # check if directories exist and create them if not ensure_dir(self.LoggingDir) ensure_dir(self.LogFileDir) file_name = '{hv}_{dev}_{mod}_{t}.log'.format(hv=self.Name, dev=self.DeviceName, mod=self.ModelName, t=strftime('%Y_%m_%d_%H_%M_%S')) file_path = join(self.LogFileDir, file_name) log_info('Creating LOGFILE: {}'.format(file_path)) self.Logger.removeHandler(self.FileHandler) self.FileHandler = FileHandler(file_path) self.FileHandler.setLevel(INFO) self.FileHandler.setFormatter(Formatter('%(asctime)s %(message)s', '%H:%M:%S')) self.Logger.addHandler(self.FileHandler)
def set_path(self, sub_dir, name=None, run='', ch=None, suf=None, camp=None): ensure_dir(join(self.Dir, sub_dir)) name = name if name is not None else '' campaign = self.TestCampaign if camp is None else camp run = str( self.RunNumber).zfill(3) if self.RunNumber is not None else run ch = str(ch) if ch is not None else '' suf = str(suf) if suf is not None else '' tot_name = '_'.join( [item for item in [name, campaign, run, ch, suf] if item]) self.Path = join(self.Dir, sub_dir, '{n}.pickle'.format(n=tot_name))
def prepare(time, f, target, url): ensure_dir(f) ensure_dir(target) p = pathlib.Path(target) if p.exists(): os.remove(target) if (int(time.utc_datetime().strftime("%Y%m%d")) > 20150118) \ and (int(time.utc_datetime().strftime("%Y%m%d")) <= int(now().utc_datetime().strftime("%Y%m%d"))): p = pathlib.Path(f) if not p.exists(): with urllib.request.urlopen(url) as item: item = item.read().decode('utf-8') this = open(f, 'w') this.write(item) this.close() if int(time.utc_datetime().strftime("%H%M")) == 1800: os.symlink(f, target)
def build_main_db(): ensure_dir(main_geolifedb) connectionOrig=apsw.Connection(main_geolifedb) curs1orig = connectionOrig.cursor() create_table_SQL = 'CREATE TABLE geolife(person INT, traj INT, latitude REAL, longitude REAL, datetime TEXT);' curs1orig.execute( create_table_SQL ) mainDirectory = geolife_zipfile_data_dir persons = os.listdir(mainDirectory) for person in persons: print "person n " + person files = os.listdir(mainDirectory + '/' + person + '/Trajectory') sqlList = [] for trajectoryFile in files: id_trajectory = trajectoryFile.split('.')[0] print "\t" + id_trajectory with open(mainDirectory + '/' + person + '/Trajectory/' + trajectoryFile, 'r') as f: ct = 0 for line in f: if ct >= 6 : data = line.split(',') sqlList.append((person,id_trajectory,data[0],data[1],data[-2] + " " + data[-1].replace('\n','').replace('\r',''))) ct += 1 f.close() curs1orig.execute('PRAGMA journal_mode = OFF; ') # turn of journalling for speed curs1orig.execute('BEGIN') # this will disable autocommit for speed, bundling it into a single commit curs1orig.executemany('INSERT INTO geolife VALUES(?,?,?,?,?)', sqlList) curs1orig.execute('END') connectionOrig.close() print "Done"
def build_main_db(): ensure_dir(main_geolifedb) connectionOrig=apsw.Connection(main_geolifedb) curs1orig = connectionOrig.cursor() create_table_SQL = 'CREATE TABLE geolife(person INT, traj INT, latitude REAL, longitude REAL, datetime TEXT);' curs1orig.execute( create_table_SQL ) mainDirectory = geolife_zipfile_data_dir persons = os.listdir(mainDirectory) for person in persons: print("person n " + person) files = os.listdir(mainDirectory + '/' + person + '/Trajectory') sqlList = [] for trajectoryFile in files: id_trajectory = trajectoryFile.split('.')[0] print("\t" + id_trajectory) with open(mainDirectory + '/' + person + '/Trajectory/' + trajectoryFile, 'r') as f: ct = 0 for line in f: if ct >= 6 : data = line.split(',') sqlList.append((person,id_trajectory,data[0],data[1],data[-2] + " " + data[-1].replace('\n','').replace('\r',''))) ct += 1 f.close() curs1orig.execute('PRAGMA journal_mode = OFF; ') # turn of journalling for speed curs1orig.execute('BEGIN') # this will disable autocommit for speed, bundling it into a single commit curs1orig.executemany('INSERT INTO geolife VALUES(?,?,?,?,?)', sqlList) curs1orig.execute('END') connectionOrig.close() print("Done")
def run_convert_images(): wave = Settings().wave ensure_dir(Config().tmp_dir) ensure_dir(Config().output_dir) image_earth = Config().image_earth(wave) image_night = Config().image_night(wave) t1, t2, t3 = gregorian.from_jd(wave.time.tt) month = date(t1, t2, t3).strftime('%m') if Settings().draw_earth: copyfile(Config().image_earth_month(month), image_earth) if Settings().draw_waves: this = "convert " +\ image_earth + " " +\ Config().image_waves_original(wave) +\ " -alpha on -compose Blend -define compose:args=20 -composite " +\ image_earth + " " call(this, shell=True) if Settings().draw_temperature: this = "convert " +\ image_earth + " " +\ Config().image_temperature_original(wave) +\ " -alpha on -compose Blend -define compose:args=30 -composite " +\ image_earth + " " call(this, shell=True) if Settings().draw_constellations: this = "convert " +\ image_earth + " " +\ Config().image_constellations +\ " -alpha on -compose Blend -define compose:args=40 -composite " +\ image_earth + " " call(this, shell=True) if Settings().draw_flightpaths: this = "convert " +\ image_earth + " " +\ Config().image_flightpaths +\ " -alpha on -compose Blend -define compose:args=50 -composite " +\ image_earth + " " call(this, shell=True) this = ("convert -brightness-contrast 20x20 " + image_earth + " " + image_earth + "") call(this, shell=True) if Settings().draw_night: copyfile(Config().image_night_month(month), image_night) if Settings().draw_waves: this = "convert " +\ image_night + " " +\ Config().image_waves_original(wave) +\ " -alpha on -compose Blend -define compose:args=20 -composite " +\ image_night + " " call(this, shell=True) if Settings().draw_temperature: this = "convert " +\ image_night + " " +\ Config().image_temperature_original(wave) +\ " -alpha on -compose Blend -define compose:args=5 -composite " +\ image_night + " " call(this, shell=True) if Settings().draw_constellations: this = "convert " +\ image_night + " " +\ Config().image_constellations +\ " -alpha on -compose Blend -define compose:args=50 -composite " +\ image_night + " " call(this, shell=True) if Settings().draw_flightpaths: this = "convert " +\ image_night + " " +\ Config().image_flightpaths +\ " -alpha on -compose Blend -define compose:args=50 -composite " +\ image_night + " " call(this, shell=True) this = ("convert -brightness-contrast 15x15 " + image_night + " " + image_night + "") call(this, shell=True) if not (Settings().draw_night and Settings().draw_earth): if Settings().draw_constellations: copyfile(Config().image_constellations, image_night) copyfile(Config().image_constellations, image_earth) if Settings().draw_clouds: this = ( "convert -brightness-contrast 5x0 " + Config().image_cloud_original(wave) + " " + Config().image_cloud(wave) + "") call(this, shell=True)
def buildPreprocessingTable(spatialRes, temporalRes, nest=True, personsIds=(0, 1, 2, 3, 4, 5, 7, 9, 12, 13, 14, 15, 16, 17, 22, 24, 153, 28, 30, 35, 36, 38, 39, 40, 43, 44, 50, 179, 52, 55, 68, 71, 82, 84, 85, 92, 96, 101, 104, 167, 119, 126)): data = [] nside = ComputeNside(spatialRes) # connection to the DataBase : connectionOrig=apsw.Connection(main_geolifedb) # loading it in the memory : writingConn=apsw.Connection(":memory:") # with conn.backup("main", connectionOrig, "main") as backup: # backup.step() # copy whole database in one go curs1 = connectionOrig.cursor() curs2 = connectionOrig.cursor() curs3 = connectionOrig.cursor() writingCurs = writingConn.cursor() # Create table : # sql = "DROP TABLE IF EXISTS {}".format(tableName) # writingCurs.execute(sql) writingCurs.execute("CREATE TABLE preproc (person INT, traj INT, idxPix INT, datetime TEXT)") def getIdxPix(longitude, latitude): return hp.ang2pix(nside, (90 - latitude) * np.pi / 180, longitude * np.pi / 180, nest) fmt = '%Y-%m-%d %H:%M:%S' if isinstance(personsIds, str) and personsIds.lower() == 'all': sql = "SELECT distinct person FROM geolife GROUP BY person " # WHERE person = 1 personsIds = [ x[0] for x in curs3.execute(sql) ] # # for pId in personsIds : print('Considering s: {} t:{} Person {}'.format(spatialRes,temporalRes,pId)) person = pId # person =0 sql = "SELECT distinct traj FROM geolife WHERE person = {} AND NOT datetime = '' GROUP BY traj".format(person) # AND traj = 20090426211055 trajectories = curs2.execute(sql) prev = 0 t_ct = 0 # for each trajectories for t in trajectories: # select the first element of the tuple given by sqlite: traj = t[0] # make a sql query for sql = "SELECT longitude ,latitude , datetime FROM geolife WHERE person = {} AND NOT datetime = '' AND traj = {} ORDER BY datetime".format(person,traj) rows = curs1.execute(sql) ct = 0 points = [] for row in rows: actualTime = dt.strptime(row[2],fmt) # If this is the first trajectory, set the time origin : if ct==0: # Next time is the next time a location have to be taken if t_ct == 0 : # If it's the first trajectory of the person, just set the time origin at the first point nextTime = actualTime points.append((person,traj,int(getIdxPix(row[0],row[1])),nextTime.strftime(fmt))) nextTime += temporalRes else : # Determine the number of symbols missing. nb_loc_missing = int((actualTime- nextTime).total_seconds()//temporalRes.total_seconds()+1) # Determine the next date where a location have to be taken: nextTime = nb_loc_missing * temporalRes + nextTime # #If the ending and beginning locations of a gap in the GPS records are the same, # #the user is taken as dwelling at the same location during that time. # if (int(getIdxPix(row[0],row[1])) == data[-1][-1][1]): # print("Fill blank-----------------------------------------------------", nb_loc_missing) # Else if the time is over the next time, this mean that we have to record the point elif actualTime > nextTime : # Check witch point is the closer : # This one: if abs(nextTime - actualTime) < abs(nextTime - dt.strptime(prev[2],fmt)): # record the point : points.append((person,traj,int(getIdxPix(row[0],row[1])),nextTime.strftime(fmt))) # Or the previous one ? else : # record the point : points.append((person,traj,int(getIdxPix(prev[0],prev[1])),nextTime.strftime(fmt))) nextTime += temporalRes ct += 1 prev = row t_ct += 1 if len(points) > 0 : data.append(points) if len(points) > 1 : writingCurs.executemany("INSERT INTO preproc (person,traj,idxPix,datetime) VALUES (?,?,?,?)",points) print("S: {} T: {} Person {}: {} trajectories processed".format( spatialRes,temporalRes, person, t_ct )) # writing the informations about the sample : writingCurs.execute("CREATE TABLE infoSample (nside INT)") sql = "INSERT INTO infoSample VALUES ({})".format(nside) writingCurs.execute(sql) # Creating index : print("Creating index...") sql = """CREATE INDEX idx_person_preproc ON preproc(person); CREATE INDEX idx_person_traj_preproc ON preproc(person,traj); CREATE INDEX idx_traj_preproc ON preproc(traj); CREATE INDEX idx_traj_datetime_preproc ON preproc(traj,datetime);""" writingCurs.execute(sql) print("Cleaning up...") writingCurs.execute("vacuum") writingCurs.close() print("Writing out the database file...") # Create the database file filename = "S{}T{}.sqlite".format(spatialRes, temporalRes) path = preprocessing_dir + "/" + filename ensure_dir(path) f = open(path, 'w') f.close() # Now write out the database back to a file in one go connection=apsw.Connection(path) with connection.backup("main", writingConn, "main") as backup: backup.step() # copy whole database in one go print("Done")
from datetime import datetime as dt from Utils import ensure_dir import os from multiprocessing import Pool, cpu_count from datetime import timedelta fmt = '%Y-%m-%d %H:%M:%S' # ======== # PATHS # ======== main_geolifedb = '../DataGeolife/geolife.sqlite' # will be created if it doesn't exist preprocessing_dir = '../DataGeolife/Preproc' # folder for temporary databases for quantisation geolife_zipfile_data_dir = '~/Downloads/Geolife Trajectories 1.3/Data' # original data, builds databases if don't exist ensure_dir(main_geolifedb) ensure_dir(preprocessing_dir) if '~' in geolife_zipfile_data_dir: geolife_zipfile_data_dir = os.path.expanduser(geolife_zipfile_data_dir) # ======== def build_specific_cache( spatialRes_temporalRes_pair ): spatialRes = spatialRes_temporalRes_pair[0] temporalRes = spatialRes_temporalRes_pair[1] print('Processing spatial res {}, temporal res {}'.format( spatialRes, temporalRes ))
def run( group = "All",scale = None, output_dir = './ResultsLoP_replication/final_graphs', bulk_build_preprocessing = False): """ Generates a single heatmap for a given list of Geolife ids, for a given method of computing the upper bound on the upper limit of predictability. :param group: ["id_str",[list of ids in the geolife dataset]] :type group: Nested list :param scale: [min_z, max_z, step] Set the scale of the heatmap z :type scale: Float array """ t = time.time() #Group setting if(group == "All"): suffix = "All" persons = "All" else: suffix = "Grp{}".format(group[0]) persons = group[1] if not output_dir[-1] == '/': output_dir = output_dir + '/' file_name = "{}Heatmap_{}".format(output_dir,suffix) ensure_dir(file_name) print "Calculing the LoP for {}".format(suffix) if bulk_build_preprocessing: # will attempt to bulk build the cache using multiple CPU cores # will skip caches if already built. # if this option is not specified and a cache does not exist # it will be built when required, using a single CPU core. GeolifeSymbolisation.bulk_build_resolution_cache(listSpatialRes, listTemporalRes) mlab.openPool() failed_ids = set() LoP_RL = [] LoP_DL = [] LoP_failed_ct = [] passed_norm_test = [] for spatialRes in listSpatialRes: LoP_RL.append([]) LoP_DL.append([]) LoP_failed_ct.append([]) passed_norm_test.append([]) for temporalRes in listTemporalRes: #Compute data #--------------------------------------------- #Load data from an existing preproc database, this will have been created # earlier if it did not exist. data, person_ids = get_geolife_data(spatialRes, temporalRes,persons) #--------------------------------------------- # Sanity check on loading for person in data: if len(person) == 0: raise Exception("One or more person's trajectory was not loaded/created correctly.") # End sanity check S_RL, N_RL = empiricalEntropyRate(data,'RL') S_DL, N_DL = empiricalEntropyRate(data,'DL') #Save the average: tmpG_RL = list(mlab.ParLoP(S_RL, N_RL)[0]) tmpG_DL = list(mlab.ParLoP(S_DL, N_DL)[0]) #-88 real fail in solve #-99 known fail in solve when S > log2(N) # See the Matlab script (ParLoP.m) for more details if (np.asarray(tmpG_RL)==-88).any(): raise Exception("ERROR: (RL) Matlab failed the solve, but the entropy was in the correct range. Therefore an unknown error has occured.") if (np.asarray(tmpG_DL)==-88).any(): raise Exception("ERROR: (DL) Matlab failed the solve, but the entropy was in the correct range. Therefore an unknown error has occured.") # Replace known solve fails. These are the cases when an entropy is found that is to high. # This means the LZ entropy rate estimate is wrong (the estimator has failed to converge) # There is no way to correct this, without collecting more data from the individual. # While excluding the individual is not ideal it is better than including a value that is # *known* to be erroneous. Therefore we discard the individual. tmpG_RL = np.asarray(tmpG_RL) tmpG_DL = np.asarray(tmpG_DL) tmpG_RL_known_fail_mask = tmpG_RL < -1 tmpG_DL_known_fail_mask = tmpG_DL < -1 # To be comparable we must arrive at a consistent set of individuals from which to compare both # methods. tmpG_known_fail_mask = np.asarray(tmpG_RL_known_fail_mask) | np.asarray(tmpG_DL_known_fail_mask) #print tmpG_known_fail_mask failed_ct = len(tmpG_RL[tmpG_known_fail_mask]) for p in np.asarray(person_ids)[tmpG_known_fail_mask]: failed_ids.add(p) # Filter out known solve fails. tmpG_RL = list(np.asarray(tmpG_RL)[~tmpG_known_fail_mask]) tmpG_DL = list(np.asarray(tmpG_DL)[~tmpG_known_fail_mask]) if not len(tmpG_RL) == len(tmpG_DL): raise Exception("SHOULD NOT OCCUR 5g4dfg65") if (np.asarray(tmpG_RL) < 0).any(): raise Exception("ERROR. lsdkfal") LoP_RL[-1].append(np.average(tmpG_RL)) LoP_DL[-1].append(np.average(tmpG_DL)) LoP_failed_ct[-1].append( failed_ct ) mlab.closePool() save_results( file_name, LoP_RL, 'RL') save_results( file_name, LoP_DL, 'DL') f2 = file(file_name + "_failed_ct.csv", 'w') print 'failed_ids = {}.'.format( failed_ids ) np.savetxt(f2, LoP_failed_ct,fmt ="%.5f") f2.close() print "Done in {} seconds".format(time.time() - t)
def run(group="All", scale=None, output_dir='./ResultsLoP_replication/final_graphs', bulk_build_preprocessing=False): """ Generates a single heatmap for a given list of Geolife ids, for a given method of computing the upper bound on the upper limit of predictability. :param group: ["id_str",[list of ids in the geolife dataset]] :type group: Nested list :param scale: [min_z, max_z, step] Set the scale of the heatmap z :type scale: Float array """ t = time.time() #Group setting if (group == "All"): suffix = "All" persons = "All" else: suffix = "Grp{}".format(group[0]) persons = group[1] if not output_dir[-1] == '/': output_dir = output_dir + '/' file_name = "{}Heatmap_{}".format(output_dir, suffix) ensure_dir(file_name) print "Calculing the LoP for {}".format(suffix) if bulk_build_preprocessing: # will attempt to bulk build the cache using multiple CPU cores # will skip caches if already built. # if this option is not specified and a cache does not exist # it will be built when required, using a single CPU core. GeolifeSymbolisation.bulk_build_resolution_cache( listSpatialRes, listTemporalRes) mlab.openPool() failed_ids = set() LoP_RL = [] LoP_DL = [] LoP_failed_ct = [] passed_norm_test = [] for spatialRes in listSpatialRes: LoP_RL.append([]) LoP_DL.append([]) LoP_failed_ct.append([]) passed_norm_test.append([]) for temporalRes in listTemporalRes: #Compute data #--------------------------------------------- #Load data from an existing preproc database, this will have been created # earlier if it did not exist. data, person_ids = get_geolife_data(spatialRes, temporalRes, persons) #--------------------------------------------- # Sanity check on loading for person in data: if len(person) == 0: raise Exception( "One or more person's trajectory was not loaded/created correctly." ) # End sanity check S_RL, N_RL = empiricalEntropyRate(data, 'RL') S_DL, N_DL = empiricalEntropyRate(data, 'DL') #Save the average: tmpG_RL = list(mlab.ParLoP(S_RL, N_RL)[0]) tmpG_DL = list(mlab.ParLoP(S_DL, N_DL)[0]) #-88 real fail in solve #-99 known fail in solve when S > log2(N) # See the Matlab script (ParLoP.m) for more details if (np.asarray(tmpG_RL) == -88).any(): raise Exception( "ERROR: (RL) Matlab failed the solve, but the entropy was in the correct range. Therefore an unknown error has occured." ) if (np.asarray(tmpG_DL) == -88).any(): raise Exception( "ERROR: (DL) Matlab failed the solve, but the entropy was in the correct range. Therefore an unknown error has occured." ) # Replace known solve fails. These are the cases when an entropy is found that is to high. # This means the LZ entropy rate estimate is wrong (the estimator has failed to converge) # There is no way to correct this, without collecting more data from the individual. # While excluding the individual is not ideal it is better than including a value that is # *known* to be erroneous. Therefore we discard the individual. tmpG_RL = np.asarray(tmpG_RL) tmpG_DL = np.asarray(tmpG_DL) tmpG_RL_known_fail_mask = tmpG_RL < -1 tmpG_DL_known_fail_mask = tmpG_DL < -1 # To be comparable we must arrive at a consistent set of individuals from which to compare both # methods. tmpG_known_fail_mask = np.asarray( tmpG_RL_known_fail_mask) | np.asarray(tmpG_DL_known_fail_mask) #print tmpG_known_fail_mask failed_ct = len(tmpG_RL[tmpG_known_fail_mask]) for p in np.asarray(person_ids)[tmpG_known_fail_mask]: failed_ids.add(p) # Filter out known solve fails. tmpG_RL = list(np.asarray(tmpG_RL)[~tmpG_known_fail_mask]) tmpG_DL = list(np.asarray(tmpG_DL)[~tmpG_known_fail_mask]) if not len(tmpG_RL) == len(tmpG_DL): raise Exception("SHOULD NOT OCCUR 5g4dfg65") if (np.asarray(tmpG_RL) < 0).any(): raise Exception("ERROR. lsdkfal") LoP_RL[-1].append(np.average(tmpG_RL)) LoP_DL[-1].append(np.average(tmpG_DL)) LoP_failed_ct[-1].append(failed_ct) mlab.closePool() save_results(file_name, LoP_RL, 'RL') save_results(file_name, LoP_DL, 'DL') f2 = file(file_name + "_failed_ct.csv", 'w') print 'failed_ids = {}.'.format(failed_ids) np.savetxt(f2, LoP_failed_ct, fmt="%.5f") f2.close() print "Done in {} seconds".format(time.time() - t)
def buildPreprocessingTable(spatialRes,temporalRes,nest = True, personsIds = [0, 1, 2, 3, 4, 5, 7, 9, 12, 13, 14, 15, 16, 17, 22, 24, 153, 28, 30, 35, 36, 38, 39, 40, 43, 44, 50, 179, 52, 55, 68, 71, 82, 84, 85, 92, 96, 101, 104, 167, 119, 126]): data = [] nside = ComputeNside(spatialRes) #connection to the DataBase : connectionOrig=apsw.Connection(main_geolifedb) #loading it in the memory : writingConn=apsw.Connection(":memory:") # with conn.backup("main", connectionOrig, "main") as backup: # backup.step() # copy whole database in one go curs1 = connectionOrig.cursor() curs2 = connectionOrig.cursor() curs3 = connectionOrig.cursor() writingCurs = writingConn.cursor() #Create table : # sql = "DROP TABLE IF EXISTS {}".format(tableName) # writingCurs.execute(sql) writingCurs.execute("CREATE TABLE preproc (person INT, traj INT, idxPix INT, datetime TEXT)") def getIdxPix(longitude, latitude): return hp.ang2pix(nside, (90 - latitude) * np.pi / 180, longitude * np.pi / 180, nest) fmt = '%Y-%m-%d %H:%M:%S' if isinstance(personsIds, str) and personsIds.lower() == 'all': sql = "SELECT distinct person FROM geolife GROUP BY person " # WHERE person = 1 personsIds = [ x[0] for x in curs3.execute(sql) ] # # for pId in personsIds : print 'Considering s: {} t:{} Person {}'.format(spatialRes,temporalRes,pId) person = pId # person =0 sql = "SELECT distinct traj FROM geolife WHERE person = {} AND NOT datetime = '' GROUP BY traj".format(person) # AND traj = 20090426211055 trajectories = curs2.execute(sql) prev = 0 t_ct = 0 #for each trajectories for t in trajectories: #select the first element of the tuple given by sqlite: traj = t[0] #make a sql query for sql = "SELECT longitude ,latitude , datetime FROM geolife WHERE person = {} AND NOT datetime = '' AND traj = {} ORDER BY datetime".format(person,traj) rows = curs1.execute(sql) ct = 0 points = [] for row in rows: actualTime = dt.strptime(row[2],fmt) #If this is the first trajectory, set the time origin : if ct==0: #Next time is the next time a location have to be taken if t_ct == 0 : #If it's the first trajectory of the person, just set the time origin at the first point nextTime = actualTime points.append((person,traj,int(getIdxPix(row[0],row[1])),nextTime.strftime(fmt))) nextTime += temporalRes else : #Determine the number of symbols missing. nb_loc_missing = int((actualTime- nextTime).total_seconds()//temporalRes.total_seconds()+1) #Determine the next date where a location have to be taken: nextTime = nb_loc_missing * temporalRes + nextTime # #If the ending and beginning locations of a gap in the GPS records are the same, # #the user is taken as dwelling at the same location during that time. # if (int(getIdxPix(row[0],row[1])) == data[-1][-1][1]): # print "Fill blank-----------------------------------------------------", nb_loc_missing #Else if the time is over the next time, this mean that we have to record the point elif actualTime > nextTime : #Check witch point is the closer : #This one: if abs(nextTime - actualTime) < abs(nextTime - dt.strptime(prev[2],fmt)): #record the point : points.append((person,traj,int(getIdxPix(row[0],row[1])),nextTime.strftime(fmt))) #Or the previous one ? else : #record the point : points.append((person,traj,int(getIdxPix(prev[0],prev[1])),nextTime.strftime(fmt))) nextTime += temporalRes ct += 1 prev = row t_ct += 1 if len(points) > 0 : data.append(points) if len(points) > 1 : writingCurs.executemany("INSERT INTO preproc (person,traj,idxPix,datetime) VALUES (?,?,?,?)",points) print "S: {} T: {} Person {}: {} trajectories processed".format( spatialRes,temporalRes, person, t_ct ) #writing the informations about the sample : writingCurs.execute("CREATE TABLE infoSample (nside INT)") sql = "INSERT INTO infoSample VALUES ({})".format(nside) writingCurs.execute(sql) #Creating index : print "Creating index..." sql = """CREATE INDEX idx_person_preproc ON preproc(person); CREATE INDEX idx_person_traj_preproc ON preproc(person,traj); CREATE INDEX idx_traj_preproc ON preproc(traj); CREATE INDEX idx_traj_datetime_preproc ON preproc(traj,datetime);""" writingCurs.execute(sql) print "Cleaning up..." writingCurs.execute("vacuum") writingCurs.close() print "Writing out the database file..." #Create the database file filename = "S{}T{}.sqlite".format(spatialRes, temporalRes) path = preprocessing_dir + "/" + filename ensure_dir(path) f = open(path, 'w') f.close() # Now write out the database back to a file in one go connection=apsw.Connection(path) with connection.backup("main", writingConn, "main") as backup: backup.step() # copy whole database in one go print "Done"
import os from multiprocessing import Pool, cpu_count from datetime import timedelta fmt = '%Y-%m-%d %H:%M:%S' #======== # PATHS #======== main_geolifedb = '../DataGeolife/geolife.sqlite' # will be created if it doesn't exist preprocessing_dir = '../DataGeolife/Preproc' # folder for semi-temporary databases for specific quantisations, will be created if they don't exist geolife_zipfile_data_dir = '~/Downloads/Geolife Trajectories 1.3/Data' # folder for the original data. Used to build the above databases if they don't exist. ensure_dir(main_geolifedb) ensure_dir(preprocessing_dir) if '~' in geolife_zipfile_data_dir: geolife_zipfile_data_dir = os.path.expanduser(geolife_zipfile_data_dir) #======== def build_specific_cache( spatialRes_temporalRes_pair ): spatialRes = spatialRes_temporalRes_pair[0] temporalRes = spatialRes_temporalRes_pair[1] print 'Processing spatial res {}, temporal res {}'.format( spatialRes, temporalRes ) if not os.path.exists( "{}/S{}T{}.sqlite".format(preprocessing_dir,spatialRes, temporalRes) ):