def verify_time_homogeniety(users, time_delta, db_session): logger.info("Verifying time homogeneity between {0} users".format( len(users) )) eta = ETACalculator(len(users), "Time Homogeneity Verification") for u in users: result_set = db_session.query(HomogenizedRecord.time)\ .filter(HomogenizedRecord.user == u)\ .order_by(HomogenizedRecord.time) previous = None i = 0 for r, in result_set: if previous is not None: diff = timeDifferenceSeconds(r, previous) assert diff == time_delta.total_seconds(), ( "Time homogeniety was not preserved for user {user}, record #{record}.\n" "Expected time delta: {exp}\n" "Actual time delta: {act}".format( user=u, record=i, exp=time_delta, act=diff )) previous = r i += 1 eta.checkpoint() logger.info(eta.eta())
def verify_time_homogeniety(users, time_delta, db_session): logger.info("Verifying time homogeneity between {0} users".format( len(users))) eta = ETACalculator(len(users), "Time Homogeneity Verification") for u in users: result_set = db_session.query(HomogenizedRecord.time)\ .filter(HomogenizedRecord.user == u)\ .order_by(HomogenizedRecord.time) previous = None i = 0 for r, in result_set: if previous is not None: diff = timeDifferenceSeconds(r, previous) assert diff == time_delta.total_seconds(), ( "Time homogeniety was not preserved for user {user}, record #{record}.\n" "Expected time delta: {exp}\n" "Actual time delta: {act}".format(user=u, record=i, exp=time_delta, act=diff)) previous = r i += 1 eta.checkpoint() logger.info(eta.eta())
def __init__(self, users): self.most_recent_record = {} s = Session() logger.info("Preloading RecentUserRecord object") records = s.query(WRecord).order_by(WRecord.time) eta = ETACalculator(len(users), name="Earliest User Records") for u in users: r = records.filter(WRecord.user == u).first() self.most_recent_record[u] = r logger.info("First record for user {0}: {1}".format(u, r)) eta.checkpoint() logger.info(eta.eta()) s.close()
def createCentroids(session): eta = ETACalculator(len(users), "User iteration") with open('centroids.csv', 'w') as csvfile: fieldnames = ['user', 'lat', 'long'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for u in users: centroid_of_movement = session.query( func.avg(RecordsOnOneDay.c.lat).label('lat'), func.avg(RecordsOnOneDay.c.long).label('long'), ).filter(RecordsOnOneDay.c.new_user_id == u).first() print("User #{0} has centroid {1}".format(u, centroid_of_movement)) writer.writerow({ 'user': u, 'lat': centroid_of_movement.lat, 'long': centroid_of_movement.long, }) eta.checkpoint() logger.info(eta.eta())
def drange(start, stop, step): r = start while r < stop: yield r r += step import csv if __name__ == "__main__": initialize_table(engine) session = Session() lat_values = drange(config.BEIJING["south"], config.BEIJING["north"], STEP) n = len([l for l in lat_values]) lat_values = drange(config.BEIJING["south"], config.BEIJING["north"], STEP) eta_til_completed_day = ETACalculator(n, "Latitudes") with open("latitude_histogram.csv", "w") as csv_file: histogram_file = csv.DictWriter(csv_file, ["latitude", "count"]) histogram_file.writeheader() for l in lat_values: number_of_records = session.query(WRecord).filter( WRecord.latitude >= l, WRecord.latitude < l + STEP, ).count() histogram_file.writerow({ "latitude": l, "count": number_of_records }) eta_til_completed_day.checkpoint()
logger.info("Weekday: {0}".format(record.WEEKDAY_STRINGS[weekday])) logger.info("Source: {0}".format(directory)) engine = config.getEngine() Session = sessionmaker() Session.configure(bind=engine) session = Session() logger.info("-"*50) logger.info("Database will be created and populated from files" " in {0}".format(directory)) record.initialize_table(engine) logger.info("Table initialized") timer = ETACalculator(iterations=geolife.get_num_files(directory)) user_weekday_counts = defaultdict(int) for u in user.from_directory(directory): logger.info("Beginning yielding of records from user {0.id}".format(u)) for f in u.files: f.restrictRecordsTo(weekday=weekday, aoi=config.BEIJING_80) if weekday is None or f.occursOn(weekday): session.add_all(f) session.commit() # Now that this file has been read, the user-date-file information # has been populated. synthesized_users_in_file = 0 for user_id in f.weekday_counts: user_weekday_counts[user_id] += f.weekday_counts[user_id] synthesized_users_in_file += 1
args = parser.parse_args() return args import csv if __name__ == "__main__": args = get_arguments() weekday = args.weekday delta = args.time_delta initialize_table(engine) session = Session() users = get_users_present_on(weekday) n = num_elements_in_time_range(start=time.min, end=time.max, step=delta) eta_til_completed_day = ETACalculator(n, "Synthesis") with open("time_histogram.csv", "w") as csv_file: histogram_file = csv.DictWriter(csv_file, ["time", "count"]) histogram_file.writeheader() for t in timerange(time.min, time.max, delta): logger.debug("="*60) logger.debug("Querying for time {0}".format(t)) number_of_records = session.query(WRecord).filter( WRecord.time >= t, WRecord.time < timeAdd(t, delta), WRecord.user.in_(users) ).order_by(WRecord.time).count() i = 0 histogram_file.writerow({"time": t, "count": number_of_records})
args = parser.parse_args() return args import csv if __name__ == "__main__": args = get_arguments() weekday = args.weekday delta = args.time_delta initialize_table(engine) session = Session() users = get_users_present_on(weekday) n = num_elements_in_time_range(start=time.min, end=time.max, step=delta) eta_til_completed_day = ETACalculator(n, "Synthesis") with open("time_histogram.csv", "w") as csv_file: histogram_file = csv.DictWriter(csv_file, ["time", "count"]) histogram_file.writeheader() for t in timerange(time.min, time.max, delta): logger.debug("=" * 60) logger.debug("Querying for time {0}".format(t)) number_of_records = session.query(WRecord).filter( WRecord.time >= t, WRecord.time < timeAdd(t, delta), WRecord.user.in_(users)).order_by(WRecord.time).count() i = 0 histogram_file.writerow({"time": t, "count": number_of_records}) eta_til_completed_day.checkpoint() logger.info(eta_til_completed_day.eta())
logger.info("Time delta between records: {0}".format(delta)) logger.info("Written movement file: {0}".format(one_movement_filepath)) users = [u for u, in session.query(DayUser.id).filter(and_( DayUser.duration>7200, DayUser.count>500, )).all()] if num_users is not None and len(users) >= num_users: users = random.sample(users, num_users) else: num_users = len(users) logger.info("Number of users to be written out: {0}".format(num_users)) n = num_elements_in_time_range(start=time.min, end=time.max, step=delta) eta_til_completed = ETACalculator(n, "DB to ONE output") converter = ExternalMovementReaderConverter( extent=BOUNDS, decimal_degree_scaling_factor=DECIMAL_DEGREES_TO_GRID_SCALE, users=users ) with open(one_movement_filepath, "w") as f: logger.info("Writing converted and normalized records to {0}".format( one_movement_filepath )) f.write( "{minTime} {maxTime} " "{minX} {maxX} " "{minY} {maxY} " "{minZ} {maxZ}\n".format(
def main(): session = Session() args = get_arguments() delta = args.time_delta output_directory = args.output_directory n = num_elements_in_time_range(start=time.min, end=time.max, step=delta) eta_til_completed = ETACalculator(n, "Geographic distribution over time") with open("density_stats.csv", 'w') as output_file: fieldnames = [ 'time', 'd10', 'd20', 'd40', 'd60', 'd80', 'd100', 'distance10', 'distance20', 'distance40', 'distance60', 'distance80', 'distance100', 'area10', 'area20', 'area40', 'area60', 'area80', 'area100', 'centroid_lat', 'centroid_lon' ] writer = csv.DictWriter(output_file, fieldnames=fieldnames) writer.writeheader() for t in timerange(time.min, time.max, delta): location_info_for_current_time_period = [] x_coords_of_records = [] y_coords_of_records = [] records = session.query(HomogenizedRecord).filter( HomogenizedRecord.time == t, ) for r in records: location_info_for_current_time_period.append({ 'lat': r.latitude, 'lon': r.longitude, }) x_coords_of_records.append(r.longitude) y_coords_of_records.append(r.latitude) centroid = numpy.array((numpy.mean(x_coords_of_records), numpy.mean(y_coords_of_records))) distances = [] for r in location_info_for_current_time_period: p = numpy.array((r['lon'], r['lat'])) d = numpy.linalg.norm(p - centroid) * DECIMAL_DEGREES_TO_GRID_SCALE r['distance'] = d locations = location_info_for_current_time_period density10, md1, a1 = densityWithinCircleAroundCentroidContainingPercentageOfNodes( percentage=0.1, locations=locations) density20, md2, a2 = densityWithinCircleAroundCentroidContainingPercentageOfNodes( percentage=0.2, locations=locations) density40, md3, a3 = densityWithinCircleAroundCentroidContainingPercentageOfNodes( percentage=0.4, locations=locations) density60, md4, a4 = densityWithinCircleAroundCentroidContainingPercentageOfNodes( percentage=0.6, locations=locations) density80, md5, a5 = densityWithinCircleAroundCentroidContainingPercentageOfNodes( percentage=0.8, locations=locations) density100, md6, a6 = densityWithinCircleAroundCentroidContainingPercentageOfNodes( percentage=1., locations=locations) print("Densities: {0}, {1}, {2}, {3}, {4}, {5}".format( density10, density20, density40, density60, density80, density100)) writer.writerow({ "time": t, "d10": density10, 'distance10': md1, 'area10': a1, 'd20': density20, 'distance20': md2, 'area20': a2, 'd40': density40, 'distance40': md3, 'area40': a3, 'd60': density60, 'distance60': md4, 'area60': a4, 'd80': density80, 'distance80': md5, 'area80': a5, 'd100': density100, 'distance100': md6, 'area100': a6, 'centroid_lat': centroid[1], 'centroid_lon': centroid[0], }) # Calculate the average x and y coordinate eta_til_completed.checkpoint() logger.info(eta_til_completed.eta())
logger.info("Weekday: {0}".format(record.WEEKDAY_STRINGS[weekday])) logger.info("Source: {0}".format(directory)) engine = config.getEngine() Session = sessionmaker() Session.configure(bind=engine) session = Session() logger.info("-" * 50) logger.info("Database will be created and populated from files" " in {0}".format(directory)) record.initialize_table(engine) logger.info("Table initialized") timer = ETACalculator(iterations=geolife.get_num_files(directory)) user_weekday_counts = defaultdict(int) for u in user.from_directory(directory): logger.info("Beginning yielding of records from user {0.id}".format(u)) for f in u.files: f.restrictRecordsTo(weekday=weekday, aoi=config.BEIJING_80) if weekday is None or f.occursOn(weekday): session.add_all(f) session.commit() # Now that this file has been read, the user-date-file information # has been populated. synthesized_users_in_file = 0 for user_id in f.weekday_counts: user_weekday_counts[user_id] += f.weekday_counts[user_id] synthesized_users_in_file += 1
'--input_directory', dest="input_directory", help= 'Directory containing PLT files (default: current working directory)', type=geolife.find_geolife_root, required=True, ) args = parser.parse_args() return args if __name__ == "__main__": args = get_arguments() directory = args.input_directory eta = ETACalculator(iterations=geolife.get_num_files(directory)) unique_users_per_day = defaultdict(set) unique_records_per_day = defaultdict(int) for u in user.from_directory(directory): logger.info("Beginning yielding of records from user {0.id}".format(u)) for f in u.files: for r in f: user_id = r.user date = r.date # Add the user id to the set of unique users per day. unique_users_per_day[date].add(user_id) unique_records_per_day[date] += 1
if __name__ == "__main__": args = get_arguments() dry_run = args.dry_run # Only users for a particular day will be selected. # If this argument is not specified, then all users will be selected. weekday = args.weekday session = Session() users = get_users_present_on(weekday) logger.debug("#" * 80) logger.debug("Users selected: {0}".format(users)) eta = ETACalculator(len(users), "User iteration") with open('centroids.csv', 'w') as csvfile: fieldnames = ['user', 'lat', 'long'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for u in users: centroid_of_movement = session.query( func.avg(WRecord.latitude).label('lat'), func.avg(WRecord.longitude).label('long'), ).filter(WRecord.user == u).first() print("User #{0} has centroid {1}".format(u, centroid_of_movement)) writer.writerow({ 'user': u,
parser.add_argument( '-o', '--output_directory', dest="output_directory", help='Directory to store created files (default: ./out)', default="./out", type=os.path.abspath, ) args = parser.parse_args() return args if __name__ == "__main__": session = Session() args = get_arguments() delta = args.time_delta output_directory = args.output_directory n = num_elements_in_time_range(start=time.min, end=time.max, step=delta) eta_til_completed = ETACalculator(n, "Geographic distribution over time") for t in timerange(time.min, time.max, delta): records = session.query(HomogenizedRecord).filter( HomogenizedRecord.time == t, ).count() print(records) eta_til_completed.checkpoint() logger.info(eta_til_completed.eta())