logger = logging.getLogger("geolife") #stdout = logging.StreamHandler() #stdout.setLevel(logging.INFO) #logger.addHandler(stdout) from utils import ETACalculator from schema import get_users from schema import RecordsOnOneDay import argparse from sqlalchemy.sql import func from sqlalchemy import Index import csv from raw import record import config engine = config.getEngine() from sqlalchemy.orm import sessionmaker Session = sessionmaker() Session.configure(bind=engine) def get_arguments(): parser = argparse.ArgumentParser( description='Extract centroids of user movements.') parser.add_argument( '-t', '--dry-run', dest="dry_run", action="store_true", help=('Boolean indicator controlling if data should be added to the'
if __name__ == "__main__": args = get_arguments() directory = args.input_directory logger.info("Loading database with raw Geolife records") if args.weekday is None: logger.info("All weekdays") weekday = None else: weekday = int(args.weekday) logger.info("Weekday: {0}".format(record.WEEKDAY_STRINGS[weekday])) logger.info("Source: {0}".format(directory)) engine = config.getEngine() Session = sessionmaker() Session.configure(bind=engine) session = Session() logger.info("-"*50) logger.info("Database will be created and populated from files" " in {0}".format(directory)) record.initialize_table(engine) logger.info("Table initialized") timer = ETACalculator(iterations=geolife.get_num_files(directory)) user_weekday_counts = defaultdict(int) for u in user.from_directory(directory): logger.info("Beginning yielding of records from user {0.id}".format(u)) for f in u.files:
from sqlalchemy.ext.declarative import declarative_base from sqlalchemy import Column from sqlalchemy import Integer from sqlalchemy import BigInteger from sqlalchemy import Float from sqlalchemy import Time from sqlalchemy import Table from sqlalchemy import MetaData metadata = MetaData() import config RecordsOnOneDay = Table('day_records_view', metadata, autoload=True, autoload_with=config.getEngine()) Base = declarative_base() class HomogenizedRecord(Base): __tablename__ = "time_homogenized" id = Column(Integer, primary_key=True) user = Column(BigInteger) #, index=True) latitude = Column(Float) longitude = Column(Float) time = Column(Time) def __repr__(self): return "<HomogenizedTimeRecord(name={0}, (x,y)=({1}, {2}), time={3})>".format( self.user, self.latitude, self.longitude, self.time)
from raw.record import GeoLifeUser from raw.record import RawRecord from schema import HomogenizedRecord from schema import HomogenizedGeoLifeUser from sqlalchemy import create_engine from config import getEngine engine = getEngine() GeoLifeUser.__table__.drop(engine, checkfirst=True) engine.execute("DROP VIEW day_records_view;") RawRecord.__table__.drop(engine, checkfirst=True) HomogenizedGeoLifeUser.__table__.drop(engine, checkfirst=True) HomogenizedRecord.__table__.drop(engine, checkfirst=True)
import argparse from utils import timerange from utils import ETACalculator from utils import num_elements_in_time_range from datetime import time from datetime import timedelta from sqlalchemy.orm import sessionmaker import os from one import ExternalMovementReaderConverter from utils import timeDifferenceSeconds import messages import pystache import csv Session = sessionmaker() engine = getEngine() Session.configure(bind=engine) # Parse the command-line arguments. def get_arguments(): parser = argparse.ArgumentParser( description='Write out files for simulation.' ) parser.add_argument( '-d', '--time-delta', dest='time_delta', help="Number of seconds that should be between any two consecutive records", type=lambda x: timedelta(seconds=int(x)), default=timedelta(seconds=5), )