コード例 #1
0
def verify_time_homogeniety(users, time_delta, db_session):
  logger.info("Verifying time homogeneity between {0} users".format(
    len(users)
  ))
  eta = ETACalculator(len(users), "Time Homogeneity Verification")
  for u in users:
    result_set = db_session.query(HomogenizedRecord.time)\
                           .filter(HomogenizedRecord.user == u)\
                           .order_by(HomogenizedRecord.time)
    previous = None
    i = 0
    for r, in result_set:
      if previous is not None:
        diff = timeDifferenceSeconds(r, previous)
        assert diff == time_delta.total_seconds(), (
          "Time homogeniety was not preserved for user {user}, record #{record}.\n"
          "Expected time delta: {exp}\n"
          "Actual time delta:   {act}".format(
            user=u, record=i, exp=time_delta, act=diff
        ))

      previous = r
      i += 1
    eta.checkpoint()
    logger.info(eta.eta())
コード例 #2
0
def verify_time_homogeniety(users, time_delta, db_session):
    logger.info("Verifying time homogeneity between {0} users".format(
        len(users)))
    eta = ETACalculator(len(users), "Time Homogeneity Verification")
    for u in users:
        result_set = db_session.query(HomogenizedRecord.time)\
                               .filter(HomogenizedRecord.user == u)\
                               .order_by(HomogenizedRecord.time)
        previous = None
        i = 0
        for r, in result_set:
            if previous is not None:
                diff = timeDifferenceSeconds(r, previous)
                assert diff == time_delta.total_seconds(), (
                    "Time homogeniety was not preserved for user {user}, record #{record}.\n"
                    "Expected time delta: {exp}\n"
                    "Actual time delta:   {act}".format(user=u,
                                                        record=i,
                                                        exp=time_delta,
                                                        act=diff))

            previous = r
            i += 1
        eta.checkpoint()
        logger.info(eta.eta())
コード例 #3
0
  def __init__(self, users):
    self.most_recent_record = {}
    
    s = Session()

    logger.info("Preloading RecentUserRecord object")
    records = s.query(WRecord).order_by(WRecord.time)
    eta = ETACalculator(len(users), name="Earliest User Records")

    for u in users:
      r = records.filter(WRecord.user == u).first()
      self.most_recent_record[u] = r
      logger.info("First record for user {0}: {1}".format(u, r))
      eta.checkpoint()
      logger.info(eta.eta())
    s.close()
コード例 #4
0
    def __init__(self, users):
        self.most_recent_record = {}

        s = Session()

        logger.info("Preloading RecentUserRecord object")
        records = s.query(WRecord).order_by(WRecord.time)
        eta = ETACalculator(len(users), name="Earliest User Records")

        for u in users:
            r = records.filter(WRecord.user == u).first()
            self.most_recent_record[u] = r
            logger.info("First record for user {0}: {1}".format(u, r))
            eta.checkpoint()
            logger.info(eta.eta())
        s.close()
コード例 #5
0
ファイル: centroids.py プロジェクト: catLyg/GeoLifeReader
def createCentroids(session):
    eta = ETACalculator(len(users), "User iteration")

    with open('centroids.csv', 'w') as csvfile:
        fieldnames = ['user', 'lat', 'long']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

        for u in users:
            centroid_of_movement = session.query(
                func.avg(RecordsOnOneDay.c.lat).label('lat'),
                func.avg(RecordsOnOneDay.c.long).label('long'),
            ).filter(RecordsOnOneDay.c.new_user_id == u).first()

            print("User #{0} has centroid {1}".format(u, centroid_of_movement))
            writer.writerow({
                'user': u,
                'lat': centroid_of_movement.lat,
                'long': centroid_of_movement.long,
            })

            eta.checkpoint()
            logger.info(eta.eta())
コード例 #6
0
def drange(start, stop, step):
    r = start
    while r < stop:
        yield r
        r += step


import csv
if __name__ == "__main__":
    initialize_table(engine)
    session = Session()

    lat_values = drange(config.BEIJING["south"], config.BEIJING["north"], STEP)
    n = len([l for l in lat_values])
    lat_values = drange(config.BEIJING["south"], config.BEIJING["north"], STEP)
    eta_til_completed_day = ETACalculator(n, "Latitudes")
    with open("latitude_histogram.csv", "w") as csv_file:
        histogram_file = csv.DictWriter(csv_file, ["latitude", "count"])
        histogram_file.writeheader()
        for l in lat_values:
            number_of_records = session.query(WRecord).filter(
                WRecord.latitude >= l,
                WRecord.latitude < l + STEP,
            ).count()

            histogram_file.writerow({
                "latitude": l,
                "count": number_of_records
            })

            eta_til_completed_day.checkpoint()
コード例 #7
0
ファイル: raw2db.py プロジェクト: Oracleli/GeoLifeReader
    logger.info("Weekday: {0}".format(record.WEEKDAY_STRINGS[weekday]))

  logger.info("Source:  {0}".format(directory))
  
  engine = config.getEngine()

  Session = sessionmaker()
  Session.configure(bind=engine)
  session = Session()
  logger.info("-"*50)
  logger.info("Database will be created and populated from files"
              " in {0}".format(directory))
  record.initialize_table(engine)
  logger.info("Table initialized")

  timer = ETACalculator(iterations=geolife.get_num_files(directory))
  user_weekday_counts = defaultdict(int)
  for u in user.from_directory(directory):
    logger.info("Beginning yielding of records from user {0.id}".format(u))
    for f in u.files:
      f.restrictRecordsTo(weekday=weekday, aoi=config.BEIJING_80)
      if weekday is None or f.occursOn(weekday):
          session.add_all(f)
          session.commit()

          # Now that this file has been read, the user-date-file information
          #  has been populated.
          synthesized_users_in_file = 0
          for user_id in f.weekday_counts:
            user_weekday_counts[user_id] += f.weekday_counts[user_id]
            synthesized_users_in_file += 1
コード例 #8
0
ファイル: histogram.py プロジェクト: Oracleli/GeoLifeReader
  args = parser.parse_args()
  return args


import csv
if __name__ == "__main__":
  args = get_arguments()
  weekday = args.weekday
  delta = args.time_delta

  initialize_table(engine)
  session = Session()

  users = get_users_present_on(weekday)
  n = num_elements_in_time_range(start=time.min, end=time.max, step=delta)
  eta_til_completed_day = ETACalculator(n, "Synthesis")
  with open("time_histogram.csv", "w") as csv_file:
    histogram_file = csv.DictWriter(csv_file, ["time", "count"])
    histogram_file.writeheader()
    for t in timerange(time.min, time.max, delta):
      logger.debug("="*60)
      logger.debug("Querying for time {0}".format(t))
      number_of_records = session.query(WRecord).filter(
        WRecord.time >= t,
        WRecord.time < timeAdd(t, delta),
        WRecord.user.in_(users)
      ).order_by(WRecord.time).count()
      
      i = 0
      histogram_file.writerow({"time": t, "count": number_of_records})
コード例 #9
0
ファイル: histogram.py プロジェクト: catLyg/GeoLifeReader
    args = parser.parse_args()
    return args


import csv
if __name__ == "__main__":
    args = get_arguments()
    weekday = args.weekday
    delta = args.time_delta

    initialize_table(engine)
    session = Session()

    users = get_users_present_on(weekday)
    n = num_elements_in_time_range(start=time.min, end=time.max, step=delta)
    eta_til_completed_day = ETACalculator(n, "Synthesis")
    with open("time_histogram.csv", "w") as csv_file:
        histogram_file = csv.DictWriter(csv_file, ["time", "count"])
        histogram_file.writeheader()
        for t in timerange(time.min, time.max, delta):
            logger.debug("=" * 60)
            logger.debug("Querying for time {0}".format(t))
            number_of_records = session.query(WRecord).filter(
                WRecord.time >= t, WRecord.time < timeAdd(t, delta),
                WRecord.user.in_(users)).order_by(WRecord.time).count()

            i = 0
            histogram_file.writerow({"time": t, "count": number_of_records})

            eta_til_completed_day.checkpoint()
            logger.info(eta_til_completed_day.eta())
コード例 #10
0
  logger.info("Time delta between records: {0}".format(delta))
  logger.info("Written movement file: {0}".format(one_movement_filepath))

  users = [u for u, in session.query(DayUser.id).filter(and_(
    DayUser.duration>7200,
    DayUser.count>500,
  )).all()]
  if num_users is not None and len(users) >= num_users:
    users = random.sample(users, num_users)

  else:
    num_users = len(users)

  logger.info("Number of users to be written out: {0}".format(num_users))
  n = num_elements_in_time_range(start=time.min, end=time.max, step=delta)
  eta_til_completed = ETACalculator(n, "DB to ONE output")
  converter = ExternalMovementReaderConverter(
    extent=BOUNDS,
    decimal_degree_scaling_factor=DECIMAL_DEGREES_TO_GRID_SCALE,
    users=users
  )

  with open(one_movement_filepath, "w") as f:
    logger.info("Writing converted and normalized records to {0}".format(
      one_movement_filepath
    ))
    f.write(
      "{minTime} {maxTime} "
      "{minX} {maxX} "
      "{minY} {maxY} "
      "{minZ} {maxZ}\n".format(
コード例 #11
0
def main():
    session = Session()

    args = get_arguments()
    delta = args.time_delta
    output_directory = args.output_directory

    n = num_elements_in_time_range(start=time.min, end=time.max, step=delta)
    eta_til_completed = ETACalculator(n, "Geographic distribution over time")

    with open("density_stats.csv", 'w') as output_file:
        fieldnames = [
            'time', 'd10', 'd20', 'd40', 'd60', 'd80', 'd100', 'distance10',
            'distance20', 'distance40', 'distance60', 'distance80',
            'distance100', 'area10', 'area20', 'area40', 'area60', 'area80',
            'area100', 'centroid_lat', 'centroid_lon'
        ]
        writer = csv.DictWriter(output_file, fieldnames=fieldnames)
        writer.writeheader()

        for t in timerange(time.min, time.max, delta):
            location_info_for_current_time_period = []
            x_coords_of_records = []
            y_coords_of_records = []
            records = session.query(HomogenizedRecord).filter(
                HomogenizedRecord.time == t, )
            for r in records:
                location_info_for_current_time_period.append({
                    'lat':
                    r.latitude,
                    'lon':
                    r.longitude,
                })
                x_coords_of_records.append(r.longitude)
                y_coords_of_records.append(r.latitude)

            centroid = numpy.array((numpy.mean(x_coords_of_records),
                                    numpy.mean(y_coords_of_records)))

            distances = []
            for r in location_info_for_current_time_period:
                p = numpy.array((r['lon'], r['lat']))
                d = numpy.linalg.norm(p -
                                      centroid) * DECIMAL_DEGREES_TO_GRID_SCALE
                r['distance'] = d

            locations = location_info_for_current_time_period
            density10, md1, a1 = densityWithinCircleAroundCentroidContainingPercentageOfNodes(
                percentage=0.1, locations=locations)
            density20, md2, a2 = densityWithinCircleAroundCentroidContainingPercentageOfNodes(
                percentage=0.2, locations=locations)
            density40, md3, a3 = densityWithinCircleAroundCentroidContainingPercentageOfNodes(
                percentage=0.4, locations=locations)
            density60, md4, a4 = densityWithinCircleAroundCentroidContainingPercentageOfNodes(
                percentage=0.6, locations=locations)
            density80, md5, a5 = densityWithinCircleAroundCentroidContainingPercentageOfNodes(
                percentage=0.8, locations=locations)
            density100, md6, a6 = densityWithinCircleAroundCentroidContainingPercentageOfNodes(
                percentage=1., locations=locations)

            print("Densities: {0}, {1}, {2}, {3}, {4}, {5}".format(
                density10, density20, density40, density60, density80,
                density100))

            writer.writerow({
                "time": t,
                "d10": density10,
                'distance10': md1,
                'area10': a1,
                'd20': density20,
                'distance20': md2,
                'area20': a2,
                'd40': density40,
                'distance40': md3,
                'area40': a3,
                'd60': density60,
                'distance60': md4,
                'area60': a4,
                'd80': density80,
                'distance80': md5,
                'area80': a5,
                'd100': density100,
                'distance100': md6,
                'area100': a6,
                'centroid_lat': centroid[1],
                'centroid_lon': centroid[0],
            })

            # Calculate the average x and y coordinate
            eta_til_completed.checkpoint()
            logger.info(eta_til_completed.eta())
コード例 #12
0
        logger.info("Weekday: {0}".format(record.WEEKDAY_STRINGS[weekday]))

    logger.info("Source:  {0}".format(directory))

    engine = config.getEngine()

    Session = sessionmaker()
    Session.configure(bind=engine)
    session = Session()
    logger.info("-" * 50)
    logger.info("Database will be created and populated from files"
                " in {0}".format(directory))
    record.initialize_table(engine)
    logger.info("Table initialized")

    timer = ETACalculator(iterations=geolife.get_num_files(directory))
    user_weekday_counts = defaultdict(int)
    for u in user.from_directory(directory):
        logger.info("Beginning yielding of records from user {0.id}".format(u))
        for f in u.files:
            f.restrictRecordsTo(weekday=weekday, aoi=config.BEIJING_80)
            if weekday is None or f.occursOn(weekday):
                session.add_all(f)
                session.commit()

                # Now that this file has been read, the user-date-file information
                #  has been populated.
                synthesized_users_in_file = 0
                for user_id in f.weekday_counts:
                    user_weekday_counts[user_id] += f.weekday_counts[user_id]
                    synthesized_users_in_file += 1
コード例 #13
0
        '--input_directory',
        dest="input_directory",
        help=
        'Directory containing PLT files (default: current working directory)',
        type=geolife.find_geolife_root,
        required=True,
    )

    args = parser.parse_args()
    return args


if __name__ == "__main__":
    args = get_arguments()
    directory = args.input_directory
    eta = ETACalculator(iterations=geolife.get_num_files(directory))

    unique_users_per_day = defaultdict(set)
    unique_records_per_day = defaultdict(int)
    for u in user.from_directory(directory):

        logger.info("Beginning yielding of records from user {0.id}".format(u))
        for f in u.files:
            for r in f:
                user_id = r.user
                date = r.date

                # Add the user id to the set of unique users per day.
                unique_users_per_day[date].add(user_id)
                unique_records_per_day[date] += 1
コード例 #14
0
ファイル: centroids.py プロジェクト: scpei/GeoLifeReader
if __name__ == "__main__":
    args = get_arguments()
    dry_run = args.dry_run

    # Only users for a particular day will be selected.
    # If this argument is not specified, then all users will be selected.
    weekday = args.weekday

    session = Session()

    users = get_users_present_on(weekday)

    logger.debug("#" * 80)
    logger.debug("Users selected: {0}".format(users))

    eta = ETACalculator(len(users), "User iteration")

    with open('centroids.csv', 'w') as csvfile:
        fieldnames = ['user', 'lat', 'long']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

        for u in users:
            centroid_of_movement = session.query(
                func.avg(WRecord.latitude).label('lat'),
                func.avg(WRecord.longitude).label('long'),
            ).filter(WRecord.user == u).first()

            print("User #{0} has centroid {1}".format(u, centroid_of_movement))
            writer.writerow({
                'user': u,
コード例 #15
0
  parser.add_argument(
    '-o', '--output_directory',
    dest="output_directory",
    help='Directory to store created files (default: ./out)',
    default="./out",
    type=os.path.abspath,
  )

  args = parser.parse_args()
  return args


if __name__ == "__main__":
  session = Session()

  args = get_arguments()
  delta = args.time_delta
  output_directory = args.output_directory

  n = num_elements_in_time_range(start=time.min, end=time.max, step=delta)
  eta_til_completed = ETACalculator(n, "Geographic distribution over time")

  for t in timerange(time.min, time.max, delta):
      records = session.query(HomogenizedRecord).filter(
        HomogenizedRecord.time == t,
      ).count()
      print(records)
      eta_til_completed.checkpoint()
      logger.info(eta_til_completed.eta())