コード例 #1
0
def verify_time_homogeniety(users, time_delta, db_session):
  logger.info("Verifying time homogeneity between {0} users".format(
    len(users)
  ))
  eta = ETACalculator(len(users), "Time Homogeneity Verification")
  for u in users:
    result_set = db_session.query(HomogenizedRecord.time)\
                           .filter(HomogenizedRecord.user == u)\
                           .order_by(HomogenizedRecord.time)
    previous = None
    i = 0
    for r, in result_set:
      if previous is not None:
        diff = timeDifferenceSeconds(r, previous)
        assert diff == time_delta.total_seconds(), (
          "Time homogeniety was not preserved for user {user}, record #{record}.\n"
          "Expected time delta: {exp}\n"
          "Actual time delta:   {act}".format(
            user=u, record=i, exp=time_delta, act=diff
        ))

      previous = r
      i += 1
    eta.checkpoint()
    logger.info(eta.eta())
コード例 #2
0
def verify_time_homogeniety(users, time_delta, db_session):
    logger.info("Verifying time homogeneity between {0} users".format(
        len(users)))
    eta = ETACalculator(len(users), "Time Homogeneity Verification")
    for u in users:
        result_set = db_session.query(HomogenizedRecord.time)\
                               .filter(HomogenizedRecord.user == u)\
                               .order_by(HomogenizedRecord.time)
        previous = None
        i = 0
        for r, in result_set:
            if previous is not None:
                diff = timeDifferenceSeconds(r, previous)
                assert diff == time_delta.total_seconds(), (
                    "Time homogeniety was not preserved for user {user}, record #{record}.\n"
                    "Expected time delta: {exp}\n"
                    "Actual time delta:   {act}".format(user=u,
                                                        record=i,
                                                        exp=time_delta,
                                                        act=diff))

            previous = r
            i += 1
        eta.checkpoint()
        logger.info(eta.eta())
コード例 #3
0
  def __init__(self, users):
    self.most_recent_record = {}
    
    s = Session()

    logger.info("Preloading RecentUserRecord object")
    records = s.query(WRecord).order_by(WRecord.time)
    eta = ETACalculator(len(users), name="Earliest User Records")

    for u in users:
      r = records.filter(WRecord.user == u).first()
      self.most_recent_record[u] = r
      logger.info("First record for user {0}: {1}".format(u, r))
      eta.checkpoint()
      logger.info(eta.eta())
    s.close()
コード例 #4
0
    def __init__(self, users):
        self.most_recent_record = {}

        s = Session()

        logger.info("Preloading RecentUserRecord object")
        records = s.query(WRecord).order_by(WRecord.time)
        eta = ETACalculator(len(users), name="Earliest User Records")

        for u in users:
            r = records.filter(WRecord.user == u).first()
            self.most_recent_record[u] = r
            logger.info("First record for user {0}: {1}".format(u, r))
            eta.checkpoint()
            logger.info(eta.eta())
        s.close()
コード例 #5
0
ファイル: centroids.py プロジェクト: catLyg/GeoLifeReader
def createCentroids(session):
    eta = ETACalculator(len(users), "User iteration")

    with open('centroids.csv', 'w') as csvfile:
        fieldnames = ['user', 'lat', 'long']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

        for u in users:
            centroid_of_movement = session.query(
                func.avg(RecordsOnOneDay.c.lat).label('lat'),
                func.avg(RecordsOnOneDay.c.long).label('long'),
            ).filter(RecordsOnOneDay.c.new_user_id == u).first()

            print("User #{0} has centroid {1}".format(u, centroid_of_movement))
            writer.writerow({
                'user': u,
                'lat': centroid_of_movement.lat,
                'long': centroid_of_movement.long,
            })

            eta.checkpoint()
            logger.info(eta.eta())
コード例 #6
0
    with open("latitude_histogram.csv", "w") as csv_file:
        histogram_file = csv.DictWriter(csv_file, ["latitude", "count"])
        histogram_file.writeheader()
        for l in lat_values:
            number_of_records = session.query(WRecord).filter(
                WRecord.latitude >= l,
                WRecord.latitude < l + STEP,
            ).count()

            histogram_file.writerow({
                "latitude": l,
                "count": number_of_records
            })

            eta_til_completed_day.checkpoint()
            logger.info(eta_til_completed_day.eta())

    long_values = drange(config.BEIJING["west"], config.BEIJING["east"], STEP)
    n = len([l for l in long_values])
    long_values = drange(config.BEIJING["west"], config.BEIJING["east"], STEP)
    eta_til_completed_day = ETACalculator(n, "Longitude")
    with open("longitude_histogram.csv", "w") as csv_file:
        histogram_file = csv.DictWriter(csv_file, ["longitude", "count"])
        histogram_file.writeheader()
        for l in long_values:
            number_of_records = session.query(WRecord).filter(
                WRecord.longitude >= l,
                WRecord.longitude < l + STEP,
            ).count()

            histogram_file.writerow({
コード例 #7
0
ファイル: raw2db.py プロジェクト: Oracleli/GeoLifeReader
          session.commit()

          # Now that this file has been read, the user-date-file information
          #  has been populated.
          synthesized_users_in_file = 0
          for user_id in f.weekday_counts:
            user_weekday_counts[user_id] += f.weekday_counts[user_id]
            synthesized_users_in_file += 1
          logger.info("File {0} has {1} synthesized users, who will be summarized in db".format(
            os.path.basename(f.url), synthesized_users_in_file
          ))

          timer.checkpoint()
          logger.info("="*60)
          logger.info("File {0}".format(os.path.basename(f.url)))
          logger.info(timer.eta())

  # Create an index on the time values
  logger.info("Creating index on raw record time columns")
  Index('raw_time', record.WRecord.__table__.c.time).create(engine)

  # Store the user-date-count information.
  user_weekday_counts_db = []
  for key in user_weekday_counts:
        weekday_counts = user_weekday_counts[key]
        user_id, weekday = key
        user_weekday_counts_db.append(record.GeoLifeUser(
            id=user_id,
            count=weekday_counts,
            weekday=weekday,
        ))
コード例 #8
0
ファイル: histogram.py プロジェクト: Oracleli/GeoLifeReader
import csv
if __name__ == "__main__":
  args = get_arguments()
  weekday = args.weekday
  delta = args.time_delta

  initialize_table(engine)
  session = Session()

  users = get_users_present_on(weekday)
  n = num_elements_in_time_range(start=time.min, end=time.max, step=delta)
  eta_til_completed_day = ETACalculator(n, "Synthesis")
  with open("time_histogram.csv", "w") as csv_file:
    histogram_file = csv.DictWriter(csv_file, ["time", "count"])
    histogram_file.writeheader()
    for t in timerange(time.min, time.max, delta):
      logger.debug("="*60)
      logger.debug("Querying for time {0}".format(t))
      number_of_records = session.query(WRecord).filter(
        WRecord.time >= t,
        WRecord.time < timeAdd(t, delta),
        WRecord.user.in_(users)
      ).order_by(WRecord.time).count()
      
      i = 0
      histogram_file.writerow({"time": t, "count": number_of_records})

      eta_til_completed_day.checkpoint()
      logger.info(eta_til_completed_day.eta())

コード例 #9
0
    ))
    f.write(
      "{minTime} {maxTime} "
      "{minX} {maxX} "
      "{minY} {maxY} "
      "{minZ} {maxZ}\n".format(
      **converter.getHeader()
    ))
    for t in timerange(time.min, time.max, delta):
      records = session.query(HomogenizedRecord).filter(
        HomogenizedRecord.time == t,
        HomogenizedRecord.user.in_(users),
      )
      write_to_file(records, f, converter)
      eta_til_completed.checkpoint()
      logger.info(eta_til_completed.eta())

  # Create message files and configuration files.
  leaf_directory = os.path.dirname(one_movement_filepath)

  # Convert centroid files to show ONE user addresses, not the original
  #  addresses.
  centroidFileUrl = os.path.join(leaf_directory, "centroids.csv")
  with open(centroidFileUrl, 'w') as finalizedCentroidFile:
    fieldnames = ['user', 'lat', 'long']
    writer = csv.DictWriter(finalizedCentroidFile, fieldnames=fieldnames)
    writer.writeheader()

    user_info = session.query(DayUser).filter(
      DayUser.id.in_(users)
    )
コード例 #10
0
ファイル: db2one.py プロジェクト: Oracleli/GeoLifeReader
    ))
    f.write(
      "{minTime} {maxTime} "
      "{minX} {maxX} "
      "{minY} {maxY} "
      "{minZ} {maxZ}\n".format(
      **converter.getHeader()
    ))
    for t in timerange(time.min, time.max, delta):
      records = session.query(HomogenizedRecord).filter(
        HomogenizedRecord.time == t,
        HomogenizedRecord.user.in_(users),
      )
      write_to_file(records, f, converter)
      eta_til_completed.checkpoint()
      logger.info(eta_til_completed.eta())

  # Create message files and configuration files.
  duration = int(timeDifferenceSeconds(time.max, time.min))
  leaf_directory = os.path.dirname(one_movement_filepath)

  # Convert centroid files to show ONE user addresses, not the original
  #  addresses.
  centroidFileUrl = os.path.join(leaf_directory, "centroids.csv")
  with open(centroidFileUrl, 'w') as finalizedCentroidFile:
    fieldnames = ['user', 'lat', 'long']
    writer = csv.DictWriter(finalizedCentroidFile, fieldnames=fieldnames)
    writer.writeheader()

    with open("centroids.csv") as originalCentroidFile:
      reader = csv.DictReader(originalCentroidFile)
コード例 #11
0
def main():
    session = Session()

    args = get_arguments()
    delta = args.time_delta
    output_directory = args.output_directory

    n = num_elements_in_time_range(start=time.min, end=time.max, step=delta)
    eta_til_completed = ETACalculator(n, "Geographic distribution over time")

    with open("density_stats.csv", 'w') as output_file:
        fieldnames = [
            'time', 'd10', 'd20', 'd40', 'd60', 'd80', 'd100', 'distance10',
            'distance20', 'distance40', 'distance60', 'distance80',
            'distance100', 'area10', 'area20', 'area40', 'area60', 'area80',
            'area100', 'centroid_lat', 'centroid_lon'
        ]
        writer = csv.DictWriter(output_file, fieldnames=fieldnames)
        writer.writeheader()

        for t in timerange(time.min, time.max, delta):
            location_info_for_current_time_period = []
            x_coords_of_records = []
            y_coords_of_records = []
            records = session.query(HomogenizedRecord).filter(
                HomogenizedRecord.time == t, )
            for r in records:
                location_info_for_current_time_period.append({
                    'lat':
                    r.latitude,
                    'lon':
                    r.longitude,
                })
                x_coords_of_records.append(r.longitude)
                y_coords_of_records.append(r.latitude)

            centroid = numpy.array((numpy.mean(x_coords_of_records),
                                    numpy.mean(y_coords_of_records)))

            distances = []
            for r in location_info_for_current_time_period:
                p = numpy.array((r['lon'], r['lat']))
                d = numpy.linalg.norm(p -
                                      centroid) * DECIMAL_DEGREES_TO_GRID_SCALE
                r['distance'] = d

            locations = location_info_for_current_time_period
            density10, md1, a1 = densityWithinCircleAroundCentroidContainingPercentageOfNodes(
                percentage=0.1, locations=locations)
            density20, md2, a2 = densityWithinCircleAroundCentroidContainingPercentageOfNodes(
                percentage=0.2, locations=locations)
            density40, md3, a3 = densityWithinCircleAroundCentroidContainingPercentageOfNodes(
                percentage=0.4, locations=locations)
            density60, md4, a4 = densityWithinCircleAroundCentroidContainingPercentageOfNodes(
                percentage=0.6, locations=locations)
            density80, md5, a5 = densityWithinCircleAroundCentroidContainingPercentageOfNodes(
                percentage=0.8, locations=locations)
            density100, md6, a6 = densityWithinCircleAroundCentroidContainingPercentageOfNodes(
                percentage=1., locations=locations)

            print("Densities: {0}, {1}, {2}, {3}, {4}, {5}".format(
                density10, density20, density40, density60, density80,
                density100))

            writer.writerow({
                "time": t,
                "d10": density10,
                'distance10': md1,
                'area10': a1,
                'd20': density20,
                'distance20': md2,
                'area20': a2,
                'd40': density40,
                'distance40': md3,
                'area40': a3,
                'd60': density60,
                'distance60': md4,
                'area60': a4,
                'd80': density80,
                'distance80': md5,
                'area80': a5,
                'd100': density100,
                'distance100': md6,
                'area100': a6,
                'centroid_lat': centroid[1],
                'centroid_lon': centroid[0],
            })

            # Calculate the average x and y coordinate
            eta_til_completed.checkpoint()
            logger.info(eta_til_completed.eta())
コード例 #12
0
                # Now that this file has been read, the user-date-file information
                #  has been populated.
                synthesized_users_in_file = 0
                for user_id in f.weekday_counts:
                    user_weekday_counts[user_id] += f.weekday_counts[user_id]
                    synthesized_users_in_file += 1
                logger.info(
                    "File {0} has {1} synthesized users, who will be summarized in db"
                    .format(os.path.basename(f.url),
                            synthesized_users_in_file))

                timer.checkpoint()
                logger.info("=" * 60)
                logger.info("File {0}".format(os.path.basename(f.url)))
                logger.info(timer.eta())

    # Create an index on the time values
    logger.info("Creating index on raw record time columns")
    Index('raw_time', record.WRecord.__table__.c.time).create(engine)

    # Store the user-date-count information.
    user_weekday_counts_db = []
    for key in user_weekday_counts:
        weekday_counts = user_weekday_counts[key]
        user_id, weekday = key
        user_weekday_counts_db.append(
            record.GeoLifeUser(
                id=user_id,
                count=weekday_counts,
                weekday=weekday,
コード例 #13
0
    for u in user.from_directory(directory):

        logger.info("Beginning yielding of records from user {0.id}".format(u))
        for f in u.files:
            for r in f:
                user_id = r.user
                date = r.date

                # Add the user id to the set of unique users per day.
                unique_users_per_day[date].add(user_id)
                unique_records_per_day[date] += 1

            eta.checkpoint()
            logger.info("=" * 60)
            logger.info("File {0}".format(os.path.basename(f.url)))
            logger.info(eta.eta())
            u.num_records += f.num_records

    print("Number of unique users per day:")
    with open("unique_users_per_day.csv", "w") as outfile:
        for d in unique_users_per_day:
            print("{0}\t=>\t{1}".format(d, len(unique_users_per_day[d])))
            outfile.write("{0}\t{1}\n".format(d, len(unique_users_per_day[d])))

    print("#" * 80)
    print("Number of unique records per day:")
    with open("unique_records_per_day.csv", "w") as numrecordsfile:
        for d in unique_records_per_day:
            print("{0}\t=>\t{1}".format(d, unique_records_per_day[d]))
            numrecordsfile.write("{0}\t{1}\n".format(
                d, unique_records_per_day[d]))