Python PrintUtil Beispiele, icarus.util.print.PrintUtil Python Beispiele

Beispiel #1

0

Datei anzeigen

 def create_tables(self, *tables, force=False):
     if not force:
         exists = self.database.table_exists(tables)
         if len(exists):
             cond = pr.print(f'Tables "{exists}" already exist in database '
                 f'"{self.database.db}". Drop and continue? [Y/n] ', 
                 inquiry=True, time=True, force=True)
             if cond:
                 pr.print('User chose to terminate process.')
                 raise RuntimeError
     for table in tables:
         self.database.drop_table(table)

Beispiel #2

0

Datei anzeigen

Datei: dropbox.py Projekt: ChesterIcarus/icarus-python

    def upload(self, local_path, dest_path):
        local_file = open(local_path, 'rb')
        file_size = os.path.getsize(local_path)

        CHUNK_SIZE = 4 * 1024 * 1024

        if file_size <= CHUNK_SIZE:
            self.dbx.files_upload(local_file.read(), dest_path)
        else:
            pr.print('building session')
            session = self.dbx.files_upload_session_start(
                local_file.read(CHUNK_SIZE))
            pr.print('creating cursor')
            cursor = dropbox.files.UploadSessionCursor(
                session_id=session.session_id, offset=local_file.tell())
            pr.print('building commit')
            commit = dropbox.files.CommitInfo(path=dest_path)
            pr.print('iterating')
            while local_file.tell() < file_size:
                if file_size - local_file.tell() <= CHUNK_SIZE:
                    self.dbx.files_upload_session_finish(
                        local_file.read(CHUNK_SIZE), cursor, commit)
                else:
                    self.dbx.files_upload_session_append(
                        local_file.read(CHUNK_SIZE), cursor.session_id,
                        cursor.offset)
                    cursor.offset = local_file.tell()

Beispiel #3

0

Datei anzeigen

Datei: database.py Projekt: ruilee16/DataProcessing

 def get_hist(self, db, tbl, col, bin_count=20, bin_size=0):
     limit = f'LIMIT {bin_count}' if bin_size else ''
     if not bin_size:
         low, high = self.get_stats(db, tbl, col)
         bin_size = math.ceil((high - low) / bin_count)
     pr.print(f'Fetching histogram data for {db}.{tbl}.{col}.', time=True)
     query = f'''
         SELECT
             ROUND(({col}) / {bin_size}) * {bin_size} AS bin,
             COUNT(*) as freq
         FROM {db}.{tbl}
         GROUP BY bin
         {limit}
     '''
     self.cursor.execute(query)
     return list(zip(*self.cursor.fetchall()))

Beispiel #4

0

Datei anzeigen

Datei: dropbox.py Projekt: ChesterIcarus/icarus-python

 def shell(self, key=None):
     self.local_dir = os.getcwd()
     if key is not None:
         self.connect(key)
     if self.dbx is not None:
         while True:
             try:
                 if not self.exec_command(self.read_command()):
                     break
             except KeyboardInterrupt:
                 continue
             except EOFError:
                 pr.print('goodbye')
                 exit()
     else:
         pr.print('not connected to dropbox API; shell request terminated')

Beispiel #5

0

Datei anzeigen

Datei: dropbox.py Projekt: ChesterIcarus/icarus-python

 def format_lls(self, target):
     table = []
     with os.scandir(target) as dir_entries:
         for entry in dir_entries:
             stats = entry.stat()
             table.append(
                 (stats.st_uid, *self.decode_size(stats.st_size),
                  self.decode_time(datetime.fromtimestamp(stats.st_mtime)),
                  entry.name))
     align = ['l', 'r', 'l', 'r', 'l']
     pad = [2, 0, 2, 2, 2]
     return pr.table(table, align=align, pad=pad)

Beispiel #6

0

Datei anzeigen

Datei: dropbox.py Projekt: ChesterIcarus/icarus-python

 def format_ls(self, data):
     output = []
     for entry in data:
         output.append(
             ((self.decode_user(entry.sharing_info.modified_by) if hasattr(
                 entry.sharing_info, 'modified_by') else 'folder'),
              *(self.decode_size(entry.size) if hasattr(entry, 'size') else
                ('4096', '')),
              (self.decode_time(entry.server_modified) if hasattr(
                  entry, 'server_modified') else '-'), entry.name))
     align = ['l', 'r', 'l', 'r', 'l']
     pad = [2, 0, 2, 2, 2]
     return pr.table(output, align=align, pad=pad)

Beispiel #7

0

Datei anzeigen

Datei: validation.py Projekt: ruilee16/DataProcessing

    def compare_travel_time(self,
                            savepath,
                            bin_size=20,
                            bin_count=0,
                            bin_start=None,
                            bin_end=None,
                            percent=True):

        fig, axs = plt.subplots(1, 2, tight_layout=True)

        bins, vals = self.database.get_hist('abm',
                                            'trips',
                                            'trav_time',
                                            bin_count=bin_count,
                                            bin_size=bin_size)

        pr.print('Graphing histogram.', time=True)
        pos = tuple(range(len(bins)))
        tick = len(bins) // 4
        axs[0].bar(pos, vals, width=0.9, color='b')
        if percent:
            axs[0].yaxis.set_major_formatter(PercentFormatter(xmax=sum(vals)))
        axs[0].set_xticks(pos[0::tick])
        axs[0].set_xticklabels(bins[0::tick])
        axs[0].set_title('2015 ABM Data')
        axs[0].set_ylabel('frequency (%)')
        axs[0].set_xlabel('trip duration (secs)')

        bins, vals = self.database.get_hist('abm2018',
                                            'trips',
                                            'arrive_time - depart_time',
                                            bin_count=20,
                                            bin_size=5)
        pr.print('Graphing histogram.', time=True)
        pos = tuple(range(len(bins)))
        tick = len(bins) // 4
        axs[1].bar(pos, vals, width=0.9, color='r')
        if percent:
            axs[1].yaxis.set_major_formatter(PercentFormatter(xmax=sum(vals)))
        axs[1].set_xticks(pos[0::tick])
        axs[1].set_xticklabels(bins[0::tick])
        axs[1].set_title('2018 ABM Data')
        axs[1].set_xlabel('trip duration (secs)')

        pr.print('Saving histogram.', time=True)
        fig.savefig(savepath)

Beispiel #8

0

Datei anzeigen

Datei: parser.py Projekt: ruilee16/DataProcessing

    def parse_mazs(self, filepath, bin_size=10000):
        pr.print(f'Beginning network MAZ parsing from {filepath}.', time=True)
        pr.print('MAZ Parsing Progress',
                 persist=True,
                 replace=True,
                 frmt='bold',
                 progress=0)

        parser = shapefile.Reader(filepath)
        target = len(parser)
        mazs = []
        count = 0

        for item in parser:
            mazs.append(
                (item.record.MAZ_ID_10, item.record.TAZ_2015,
                 item.record.Sq_miles, self.encode_poly(item.shape.points)))
            count += 1
            if count % bin_size == 0:
                pr.print(f'Pushing {bin_size} MAZs to database.', time=True)
                self.database.push_mazs(mazs)
                mazs = []
                pr.print('Resuming MAZ parsing.', time=True)
                pr.print('MAZ Parsing Progress',
                         persist=True,
                         replace=True,
                         frmt='bold',
                         progress=count / target)

        pr.print(f'Pushing {count % bin_size} MAZs to database.', time=True)
        self.database.push_mazs(mazs)
        pr.print('MAZ Parsing Progress',
                 persist=True,
                 replace=True,
                 frmt='bold',
                 progress=1)
        pr.push()
        pr.print('Network MAZ parsing complete.', time=True)

Beispiel #9

0

Datei anzeigen

    def parse(self, config):
        pr.print('Prallocating process files and tables.', time=True)
        force = config['run']['force']
        self.create_tables('agents', force=force)

        pr.print(f'Loading process metadata and resources.', time=True)
        agents_path = config['run']['agents_file']
        bin_size = config['run']['bin_size']

        target = sum(1 for l in open(agents_path, 'r')) - 1
        agentsfile = open(agents_path, 'r', newline='')
        parser = csv.reader(agentsfile, delimiter=',', quotechar='"')
        top = next(parser)
        cols = {key: val for key, val in zip(top, range(len(top)))}

        agents = []
        agent_id = 0

        pr.print('Starting agents CSV file iteration.', time=True)
        pr.print('Agents Parsing Progress',
                 persist=True,
                 replace=True,
                 frmt='bold',
                 progress=agent_id / target)

        for agent in parser:
            agents.append(
                (agent_id, int(agent[cols['hhid']]), int(agent[cols['pnum']]),
                 float(agent[cols['pumsSerialNo']]),
                 int(agent[cols['persType']]),
                 int(agent[cols['persTypeDetailed']]), int(agent[cols['age']]),
                 int(agent[cols['gender']]), int(agent[cols['industry']]),
                 int(agent[cols['schlGrade']]), int(agent[cols['educLevel']]),
                 int(agent[cols['workPlaceType']]),
                 int(agent[cols['workPlaceTaz']]),
                 int(agent[cols['workPlaceMaz']]),
                 int(agent[cols['schoolType']]), int(agent[cols['schoolTaz']]),
                 int(agent[cols['schoolMaz']]),
                 int(agent[cols['campusBusinessTaz']]),
                 int(agent[cols['campusBusinessMaz']]),
                 int(agent[cols['dailyActivityPattern']])))
            agent_id += 1

            if agent_id % bin_size == 0:
                pr.print(f'Pushing {bin_size} agents to database.', time=True)
                self.database.write_agents(agents)

                pr.print('Resuming agent CSV file parsing.', time=True)
                pr.print('Agent Parsing Progress',
                         persist=True,
                         replace=True,
                         frmt='bold',
                         progress=agent_id / target)
                agents = []

        pr.print(f'Pushing {agent_id % bin_size} agents to database.',
                 time=True)
        self.database.write_agents(agents)

        pr.print('Agent Parsing Progress',
                 persist=True,
                 replace=True,
                 frmt='bold',
                 progress=1)
        pr.push()
        pr.print('ABM agent data parsing complete.', time=True)

        if config['run']['create_idxs']:
            pr.print(
                f'Creating all indexes in database '
                f'{self.database.db}.',
                time=True)
            self.create_idxs()
            pr.print(f'Index creating complete.', time=True)

Beispiel #10

0

Datei anzeigen

parser = ArgumentParser(
    prog='AgentsParser',
    description='Parse ABM agents csv file into table in a SQL database.')
parser.add_argument(
    '--config',
    type=str,
    dest='config',
    default=resource_filename('icarus', 'network/parse/mazs/config.json'),
    help=('Specify a config file location; default is "config.json" in '
          'the current working directory.'),
    nargs=1)
args = parser.parse_args()

if args.log is not None:
    pr.log(args.log)

try:
    with open(args.config) as handle:
        config = json.load(handle)
except FileNotFoundError as err:
    pr.print(f'Config file {args.config} not found.', time=True)
    raise err
except json.JSONDecodeError as err:
    pr.print(f'Config file {args.config} is not valid JSON.', time=True)
    raise err
except KeyError as err:
    pr.print(f'Config file {args.config} is not valid config file.', time=True)
    raise err

database = config['database']

Beispiel #11

0

Datei anzeigen

    '--config',
    type=str,
    dest='config',
    default=resource_filename('icarus', 'abm/validate/config.json'),
    help=('Specify a config file location; default is "config.json" in '
          'the current working directory.'))
parser.add_argument(
    '--log',
    type=str,
    dest='log',
    help='specify a log file location; by default the log will not be saved',
    default=None)
args = parser.parse_args()

if args.log is not None:
    pr.log(args.log)

try:
    with open(args.config) as handle:
        config = json.load(handle)
except FileNotFoundError as err:
    pr.print(f'Config file {args.config} not found.', time=True)
    raise err
except json.JSONDecodeError as err:
    pr.print(f'Config file {args.config} is not valid JSON.', time=True)
    raise err
except KeyError as err:
    pr.print(f'Config file {args.config} is not valid config file.', time=True)
    raise err

database = config['database']

Beispiel #12

0

Datei anzeigen

Datei: __main__.py Projekt: ruilee16/DataProcessing

parser.add_argument(
    '--specs',
    type=str,
    dest='specs',
    default=resource_filename('icarus', 'abm/parse/households/specs.json'),
    help=('Specify a specs file location; default is "specs.json" in '
          'the current module directory.'))
parser.add_argument(
    '--log',
    type=str,
    dest='log',
    help='specify a log file location; by default the log will not be saved',
    default=None)
args = parser.parse_args()

pr.print('Running ABM households parser module.', time=True)
pr.print('Validating configuration file.', time=True)
config = HouseholdsParser.validate_config(args.config, args.specs)

if args.log is not None:
    log = args.log
elif config['run']['log'] not in (None, ''):
    log = config['run']['log']
else:
    log = None
if log is not None:
    pr.log(log)
    pr.print(f'Process log being saved to {log}.', time=True)

database = config['database']

Beispiel #13

0

Datei anzeigen

Datei: parser.py Projekt: ruilee16/DataProcessing

    def run(self, config):
        pr.print('Prallocating process files and tables.', time=True)
        force = config['run']['force']
        bin_size = config['run']['bin_size']
        self.database.create_temp()
        self.create_tables(self.database.tables, force=force)

        pr.print('Parsing parcel shapefile data.', time=True)
        shapes = {}
        parser = shapefile.Reader(config['run']['shapefile_file'])
        for item in parser:
            if len(item.shape.points):
                shapes[item.record['APN']] = self.encode_poly(item.shape.points)

        pr.print('Parsing residential parcel data.', time=True)

        parser = shapefile.Reader(config['run']['residence_file'])
        parcels = []
        parcel_id = 0
        n = 1

        for record in parser.iterRecords():
            apn = record['APN']
            if apn in shapes:
                parcels.append((
                    parcel_id,
                    apn,
                    shapes[apn],
                    shapes[apn]))
                parcel_id += 1

                if parcel_id % bin_size == 0:
                    self.database.write_residences(parcels)
                    parcels = []

                if parcel_id == n:
                    pr.print(f'Found residencial parcel {n}.', time=True)
                    n <<= 1

        self.database.write_residences(parcels)

        if parcel_id != (n >> 1):
            pr.print(f'Found residencial parcel {n}.', time=True)
            
        pr.print('Residential parcel data parsing complete.', time=True)
        pr.print('Parsing commercial parcel data.', time =True)

        parser = shapefile.Reader(config['run']['commerce_file'])
        parcels = []
        parcel_id = 0
        n = 1

        for record in parser.iterRecords():
            apn = record['APN']
            if apn in shapes:
                parcels.append((
                    parcel_id,
                    apn,
                    shapes[apn],
                    shapes[apn]))
                parcel_id += 1

                if parcel_id % bin_size == 0:
                    self.database.write_commerces(parcels)
                    parcels = []

                if parcel_id == n:
                    pr.print(f'Found residencial parcel {n}.', time=True)
                    n <<= 1

        self.database.write_commerces(parcels)
        
        if parcel_id != (n >> 1):
            pr.print(f'Found residencial parcel {n}.', time=True)

        pr.print('Joining parcel and MAZ data.', time=True)

        self.database.create_all_idxs('temp_residences')
        self.database.create_all_idxs('temp_commerces')
        self.database.drop_table('commerces')
        self.database.drop_table('residences')
        self.database.join_commerces()
        self.database.join_residences()
        self.database.drop_table('temp_residences')
        self.database.drop_table('temp_commerces')
        del self.database.tables['temp_residences']
        del self.database.tables['temp_commerces']

        if config['create_idxs']:
            pr.print('Beginning index creation on generated tables.', time=True)
            for table in self.database.tables:
                self.database.create_all_idxs(table)
            pr.print('Index creation complete.', time=True)

        pr.print('Parcel data parsing complete.', time=True)

Beispiel #14

0

Datei anzeigen

Datei: dropbox.py Projekt: ChesterIcarus/icarus-python

 def exec_command(self, cmd):
     args = shlex.split(cmd)
     cmd = args.pop(0)
     if cmd == 'exit':
         pr.print('goodbye')
         return False
     elif cmd == 'help':
         pr.print(
             pr.table((
                 ('cd', 'changes dropbox directory to specified directory'),
                 ('dir', 'displays working directories'),
                 ('exit', 'exits the dropbox API shell'),
                 ('help', 'lists all valid commands and their usage'),
                 ('get', 'downloads file from dropbox to local filesystem'),
                 ('lcd', 'changes local directory to specified directory'),
                 ('lls',
                  'lists all the files and folders in working local directory'
                  ),
                 ('ls',
                  'lists all the files and folders in working dropbox directory'
                  ), ('put',
                      'uploads file from local filesystem to dropbox'))))
     elif cmd == 'ls':
         if len(args) < 2:
             try:
                 target = (self.drop_dir if len(args) == 0 else
                           self.decode_dir(self.drop_dir, args[0]))
                 pr.print(target)
                 files = self.dbx.files_list_folder(
                     target if target != '/' else '').entries
                 pr.print(self.format_ls(files))
             except Exception:
                 pr.print('invalid target directory')
         else:
             pr.print(
                 f'command "ls" expected zero or one arguments but got {len(args)}'
             )
     elif cmd == 'lls':
         if len(args) < 2:
             target = (self.local_dir if len(args) == 0 else
                       self.decode_dir(self.local_dir, args[0]))
             if os.path.isdir(target):
                 pr.print(target)
                 pr.print(self.format_lls(target))
             else:
                 pr.print('invalid target directory')
         else:
             pr.print(
                 f'command "lls" expected zero or one arguments but got {len(args)}'
             )
     elif cmd == 'cd':
         if len(args) == 1:
             try:
                 target = self.decode_dir(self.drop_dir, args[0])
                 if target != '/':
                     self.dbx.files_get_metadata(target)
                 self.drop_dir = target
             except Exception:
                 pr.print('invalid target directory')
         else:
             pr.print(
                 f'command "cd" expected exactly one argument but got {len(args)}'
             )
     elif cmd == 'lcd':
         if len(args) == 1:
             target = self.decode_dir(self.local_dir, args[0])
             if os.path.isdir(target):
                 self.local_dir = target
             else:
                 pr.print('invalid target directory')
         else:
             pr.print(
                 f'command "lcd" expected exactly one argument but got {len(args)}'
             )
     elif cmd == 'dir':
         pr.print(f'drop:  {self.drop_dir}')
         pr.print(f'local: {self.local_dir}')
     elif cmd == 'put':
         if len(args) == 2:
             if os.path.isfile(args[0]):
                 try:
                     local = self.decode_dir(self.local_dir, args[0])
                     drop = self.decode_dir(self.drop_dir, args[1])
                     pr.print('uploading file to dropbox')
                     self.upload(local, drop)
                 except Exception as err:
                     pr.print('invalid dropbox file path')
                     raise (err)
             else:
                 pr.print('invalid local file path')
         else:
             pr.print(
                 f'command "put" expected exactly two arguments but got {len(args)}'
             )
     elif cmd == 'get':
         if len(args) == 2:
             if os.path.isdir('/'.join(args[0].split('/')[:-1])):
                 try:
                     local = self.decode_dir(self.local_dir, args[0])
                     drop = self.decode_dir(self.drop_dir, args[1])
                     pr.print('downloading file from dropbox')
                     self.download(local, drop)
                 except Exception:
                     pr.print('invalid dropbox file path')
             else:
                 pr.print('invalid local directory')
         else:
             pr.print(
                 f'command "get" expected exactly two arguments but got {len(args)}'
             )
     else:
         pr.print('invalid command; type "help" for list of valid commands')
     return True

Beispiel #15

0

Datei anzeigen

    def run(self, config):
        pr.print('Prallocating process files and tables.', time=True)
        force = config['run']['force']
        self.create_tables('links', 'nodes', force=force)

        pr.print(f'Loading process metadata and resources.', time=True)
        network_path = config['run']['network_file']
        bin_size = config['run']['bin_size']

        if network_path.split('.')[-1] == 'gz':
            network_file = gzip.open(network_path, mode='rb')
        else:
            network_file = open(network_path, mode='rb')

        parser = iter(iterparse(network_file, events=('start', 'end')))
        evt, root = next(parser)

        links = []
        nodes = []
        count = 0

        for evt, elem in parser:
            if evt == 'start':
                if elem.tag == 'nodes':
                    pr.print('Starting road node parsing.', time=True)
                elif elem.tag == 'links':
                    pr.print(
                        f'Pushing {count % bin_size} nodes to the '
                        'database.',
                        time=True)
                    self.database.write_nodes(nodes)
                    nodes = []
                    root.clear()
                    count = 0
                    pr.print('Starting road link parsing.', time=True)
            elif evt == 'end':
                if elem.tag == 'node':
                    nodes.append((str(elem.get('id')),
                                  f'POINT({elem.get("x")} {elem.get("y")})'))
                    count += 1
                    if count % bin_size == 0:
                        pr.print(
                            f'Pushing {bin_size} nodes to '
                            'the database.',
                            time=True)
                        self.database.write_nodes(nodes)
                        nodes = []
                        root.clear()
                        pr.print(f'Continuing nodes parsing.', time=True)
                elif elem.tag == 'link':
                    links.append(
                        (str(elem.get('id')), str(elem.get('from')),
                         str(elem.get('to')), float(elem.get('length')),
                         float(elem.get('freespeed')),
                         float(elem.get('capacity')),
                         float(elem.get('permlanes')), int(elem.get('oneway')),
                         str(elem.get('modes'))))
                    count += 1
                    if count % bin_size == 0:
                        pr.print(
                            f'Pushing {bin_size} links to '
                            'the database.',
                            time=True)
                        self.database.write_links(links)
                        links = []
                        root.clear()
                        pr.print(f'Continuing link parsing.', time=True)

        if count % bin_size != 0:
            pr.print(f'Pushing {count % bin_size} links to the database.',
                     time=True)
            self.database.write_links(links)
            links = []
            root.clear()

        network_file.close()

        pr.print('Network road parsing complete.', time=True)

        if config['run']['create_idxs']:
            pr.print(f'Creating indexes for module tables.', time=True)
            self.create_idxs()
            pr.print(f'Index creation complete.', time=True)

Beispiel #16

0

Datei anzeigen

    def run(self, config):
        pr.print('Prallocating process files and tables.', time=True)
        force = config['run']['force']
        self.create_tables('trips', 'temp_trips', force=force)

        pr.print('Creating temporary tables.', time=True)
        self.database.create_temp()

        pr.print(f'Loading process metadata and resources.', time=True)
        trips_path = config['run']['trips_file']
        bin_size = config['run']['bin_size']

        target = sum(1 for l in open(trips_path, 'r')) - 1
        tripsfile = open(trips_path, 'r', newline='')
        parser = csv.reader(tripsfile, delimiter=',', quotechar='"')
        top = next(parser)
        cols = {key: val for key, val in zip(top, range(len(top)))}

        trips = []
        parties = {}
        trip_id = 0
        party_id = 1
        
        pr.print('Starting trips CSV file iteration.', time=True)
        pr.print('Trips Parsing Progress', persist=True, replace=True,
            frmt='bold', progress=trip_id/target)

        household = None

        for trip in parser:
            prev_household = household

            vehicle = int(trip[cols['vehId']])
            household = int(trip[cols['hhid']])
            role = int(trip[cols['jointTripRole']])

            party_hash = self.hash_party(trip[cols['party']], 
                trip[cols['isamAdjDepMin']])

            if prev_household != household:
                parties = {}

            if party_hash is None:
                party = 0
                party_idx = 0
            else:
                if party_hash in parties:
                    party = parties[party_hash][0]
                    party_idx = parties[party_hash][1]
                    parties[party_hash][1] += 1
                else:
                    parties[party_hash] = [party_id, 2]
                    party = party_id
                    party_idx = 1
                    party_id += 1

            trips.append((
                trip_id,
                household,
                int(trip[cols['uniqueid']]),
                int(trip[cols['pnum']]),
                int(trip[cols['personTripNum']]) - 1,
                party,
                party_idx,
                role,
                int(trip[cols['origTaz']]),
                int(trip[cols['origMaz']]),
                int(trip[cols['destTaz']]),
                int(trip[cols['destMaz']]),
                int(trip[cols['origPurp']]),
                int(trip[cols['destPurp']]),
                int(trip[cols['mode']]),
                vehicle if vehicle > 0 else 0,
                self.adj_time(trip[cols['isamAdjDepMin']]) + 16200,
                self.adj_time(trip[cols['isamAdjArrMin']]) + 16200,
                self.adj_time(trip[cols['isamAdjDurMin']])))
            trip_id += 1

            if trip_id % bin_size == 0:
                pr.print(f'Pushing {bin_size} trips to the database.', time=True)
                self.database.write_trips(trips)

                pr.print('Resuming CSV file parsing.', time=True)
                pr.print('Trips Parsing Progress', persist=True, replace=True,
                    frmt='bold', progress=trip_id/target)
                trips = []

        pr.print(f'Pushing {trip_id % bin_size} trips to the database.', time=True)
        self.database.write_trips(trips)

        pr.print('Trips Parsing Progress', persist=True, replace=True,
            frmt='bold', progress=1)
        pr.push()
        pr.print('ABM trip data parsing complete.', time=True)

        pr.print('Merging tables and dropping temporaries.', time=True)
        pr.silence()
        self.database.drop_table('trips')
        self.database.create_all_idxs('temp_trips')
        self.database.join_trips()
        self.database.drop_table('temp_trips')
        del self.database.tables['temp_trips']
        pr.unsilence()

        if config['run']['create_idxs']:
            pr.print(f'Creating all indexes in database '
                f'{self.database.db}.', time=True)
            self.create_idxs()
            pr.print(f'Index creating complete.', time=True)

Beispiel #17

0

Datei anzeigen

Datei: parser.py Projekt: ruilee16/DataProcessing

    def run(self, config):
        pr.print('Preallocating files/tables for module run.', time=True)
        self.create_tables('temperatures',
                           'centroids',
                           force=config['run']['force'])

        day = config['select']['day']
        steps = config['select']['steps']
        files = zip(config['run']['tmax'], config['run']['tmin'])

        centroids = []
        points = []
        temperatures = []
        temps = {}
        total = 0
        centroid_id = 0
        temperature_id = 0
        n = 1

        transformer = Transformer.from_crs('epsg:4326',
                                           'epsg:2223',
                                           always_xy=True)

        pr.print('Loading netCDF files for parsing.', time=True)
        for tmax_file, tmin_file in files:
            tmaxnc = Dataset(tmax_file, 'r')
            tminnc = Dataset(tmin_file, 'r')
            total += tmaxnc.variables['tmax'].shape[1] * \
                tmaxnc.variables['tmax'].shape[2]
            tmaxnc.close()
            tminnc.close()

        pr.print('Iterating over daymet data and parsing.', time=True)
        pr.print(f'Total centroids to parse: {total}.', time=True)
        files = zip(config['run']['tmax'], config['run']['tmin'])

        for tmax_file, tmin_file in files:
            tmaxnc = Dataset(tmax_file, 'r')
            tminnc = Dataset(tmin_file, 'r')

            lons = tmaxnc.variables['lon']
            lats = tmaxnc.variables['lat']
            shape = tmaxnc.variables['tmax'].shape

            tmaxs = tmaxnc.variables['tmax'][day]
            tmins = tminnc.variables['tmin'][day]

            for i in range(shape[1]):
                for j in range(shape[2]):
                    tmax = tmaxs[i][j]
                    tmin = tmins[i][j]

                    if tmax != -9999.0:
                        point = transformer.transform(lons[i][j], lats[i][j])
                        idx = f'{tmax}-{tmin}'

                        if idx not in temps:
                            temp = self.iterpolation(tmin, tmax, 5, 15)
                            temperatures.extend([(temperature_id, step,
                                                  int(86400 * step / steps),
                                                  temp(24 * step / steps))
                                                 for step in range(steps)])
                            temps[idx] = temperature_id
                            temperature_id += 1

                        centroids.append((centroid_id, temps[idx]))
                        points.append(point)
                        centroid_id += 1

                        if centroid_id == n:
                            pr.print(f'Found centroid {centroid_id}.',
                                     time=True)
                            n <<= 1

            tmaxnc.close()
            tminnc.close()

        del tmaxnc
        del tminnc

        if centroid_id != n:
            pr.print(f'Found centroid {centroid_id}.', time=True)

        pr.print(
            f'Found {centroid_id} valid centroids (locations) and '
            f'{temperature_id} unique temperature profiles.',
            time=True)

        pr.print(f'Calculating Voronoi polygons from centroids.', time=True)
        vor = Voronoi(points)
        centroids = self.process_vor(vor, centroids)

        pr.print('Pushing centroids and temperatures to the database.',
                 time=True)
        self.database.write_rows(temperatures, 'temperatures')
        self.database.write_centroids(centroids)

        if config['run']['create_idxs']:
            pr.print('Creating indexes on module tables.', time=True)
            self.database.create_all_idxs('centroids')
            self.database.create_all_idxs('temperatures')

Beispiel #18

0

Datei anzeigen

from icarus.network.parse.parcels.parser import ParcelsParser
from icarus.util.print import PrintUtil as pr


parser = ArgumentParser(prog='Network Parcel Parser',
    description='Parse Maricopa parcel data into SQL database.')
parser.add_argument('--config', type=str, dest='config',
    default=resource_filename('icarus', 'network/parse/parcels/config.json'),
    help=('Specify a config file location; default is "config.json" in '
        'the current working directory.'))
parser.add_argument('--specs', type=str, dest='specs',
    default=resource_filename('icarus', 'network/parse/parcels/specs.json'))
args = parser.parse_args()

pr.print('Running network parcel parser module.', time=True)
pr.print('Validating configuration file.', time=True)
config = ParcelsParser.validate_config(args.config, args.specs)

if args.log is not None:
    log = args.log
elif config['run']['log'] not in (None, ''):
    log = config['run']['log']
else:
    log = None
if log is not None:
    pr.log(log)
    pr.print(f'Process log being saved to {log}.', time=True)

database = config['database']
database['password'] = pr.getpass(f'SQL password for '

Beispiel #19

0

Datei anzeigen

Datei: parser.py Projekt: ruilee16/DataProcessing

    def run(self, config):
        pr.print('Prallocating process files and tables.', time=True)
        force = config['run']['force']
        self.create_tables('households', force=force)

        pr.print(f'Loading process metadata and resources.', time=True)
        households_path = config['run']['households_file']
        bin_size = config['run']['bin_size']

        target = sum(1 for l in open(households_path, 'r')) - 1
        householdsfile = open(households_path, 'r', newline='')
        parser = csv.reader(householdsfile, delimiter=',', quotechar='"')
        top = next(parser)
        cols = {key: val for key, val in zip(top, range(len(top)))}

        households = []
        household_id = 0
        vehicles = []
        vehicle_id = 0

        hhid = 0

        pr.print('Starting households CSV file iteration.', time=True)
        pr.print('Households Parsing Progress',
                 persist=True,
                 replace=True,
                 frmt='bold',
                 progress=hhid / target)

        for household in parser:
            household_id = int(household[cols['hhid']])
            households.append(
                (household_id, float(household[cols['pumsSerialNo']]),
                 int(household[cols['homeTaz']]),
                 int(household[cols['homeMaz']]),
                 int(household[cols['hhsize']]),
                 int(household[cols['numFtWorkers']]),
                 int(household[cols['numPtWorkers']]),
                 int(household[cols['numUnivStuds']]),
                 int(household[cols['numNonWorkers']]),
                 int(household[cols['nunmRetired']]),
                 int(household[cols['numDrivAgeStuds']]),
                 int(household[cols['numPreDrivStuds']]),
                 int(household[cols['numPreshcool']]),
                 int(household[cols['hhIncomeDollars']]),
                 int(household[cols['hhNumAutos']]),
                 int(household[cols['dwellingType']]),
                 int(household[cols['ifAvHousehold']])))
            hhid += 1

            for vehicle in range(int(household[cols['hhNumAutos']])):
                vehicles.append((vehicle_id, household_id, vehicle + 1))
                vehicle_id += 1

            if hhid % bin_size == 0:
                pr.print(f'Pushing {bin_size} households to database.',
                         time=True)
                self.database.write_households(households)
                self.database.write_vehicles(vehicles)

                pr.print('Resuming household CSV file parsing.', time=True)
                pr.print('Household Parsing Progress',
                         persist=True,
                         replace=True,
                         frmt='bold',
                         progress=hhid / target)
                households = []
                vehicles = []

        pr.print(f'Pushing {hhid % bin_size} households to database.',
                 time=True)
        self.database.write_households(households)

        pr.print('ABM household data parsing complete.', time=True)
        pr.print('Household Parsing Progress',
                 persist=True,
                 replace=True,
                 frmt='bold',
                 progress=1)
        pr.push()

        if config['run']['create_idxs']:
            pr.print(
                f'Creating all indexes in database '
                f'{self.database.db}.',
                time=True)
            self.create_idxs()
            pr.print(f'Index creating complete.', time=True)