def load(self, db, keep_running=None): # Load ratings from the database. t0_all = time.time() usage_0 = None if conf.debug_mem_usage: usage_0 = mem_usage.get_usage_mb() # Load all users' ratings and the generic ratings. g.assurt(self.graph.branch_hier) if len(self.graph.branch_hier) > 1: branch_ids = ','.join([str(x[0]) for x in self.graph.branch_hier]) where_branch = "branch_id IN (%s)" % (branch_ids,) else: where_branch = "branch_id = %d" % (self.graph.branch_hier[0][0],) if not self.honor_historic: rats_sql = self.sql_current(db, where_branch) else: rats_sql = self.sql_historic(db, where_branch) if rats_sql: # Get and process all ratings log.info('load: reading byway_rating table...') time_0 = time.time() log.verbose('load: enabling dont_fetchall') db.dont_fetchall = True rats = db.sql(rats_sql) log.verbose('load: disabling dont_fetchall') db.dont_fetchall = False log.info('load: read %d ratings %s in %s' % (db.curs.rowcount, '[Current]' if not self.last_last_modified else ('[since %s]' % self.last_last_modified), misc.time_format_elapsed(time_0),)) g.check_keep_running(keep_running) self.cache_rats(db) db.curs_recycle() # Uncomment for remote debugging... #g.assurt(False) conf.debug_log_mem_usage(log, usage_0, 'ratings.load') log.info('load: done loading ratings in %s' % (misc.time_format_elapsed(t0_all),)) self.last_last_modified = None
def cache_rats(self, db): # log.info('load: caching byway_rating table...') time_0 = time.time() generator = db.get_row_iter() for rat in generator: if (rat['value'] >= 0): sid = self.ratings.setdefault(rat['byway_stack_id'], dict()) #sid['username'] = rat['value'] uname = sid.setdefault(rat['username'], dict()) uname[rat['branch_id']] = rat['value'] else: try: #self.ratings[rat['byway_stack_id']].pop(rat['username'], None) #if not self.ratings[rat['byway_stack_id']]: # self.ratings.pop(rat['byway_stack_id'], None) self.ratings[rat['byway_stack_id']] \ [rat['username']] \ .pop(rat['branch_id'], None) if not self.ratings[rat['byway_stack_id']][rat['username']]: self.ratings[rat['byway_stack_id']].pop(rat['username'], None) if not self.ratings[rat['byway_stack_id']]: self.ratings.pop(rat['byway_stack_id'], None) except KeyError: pass generator.close() log.info('load: cached ratings in %s' % (misc.time_format_elapsed(time_0),))
def byways_suss_out_facilities(self): log.info('byways_suss_out_facilities: ready, set, suss!') time_0 = time.time() prog_log = Debug_Progress_Logger(copy_this=debug_prog_log) # 2013.05.12: Weird. At first, you'll see 1250 byways being processed # each second, but later in the processing, after 100,000 byways, you'll # see 250 byways being processed every one or two seconds. #prog_log.log_freq = 250 #prog_log.log_freq = 2500 prog_log.log_freq = 1000 prog_log.loop_max = None feat_class = byway feat_search_fcn = 'search_for_items' # E.g. byway.Many().search_for_items processing_fcn = self.feat_suss_out_facil self.qb.item_mgr.load_feats_and_attcs( self.qb, feat_class, feat_search_fcn, processing_fcn, prog_log, heavyweight=False) log.info('... processed %d features in %s' % (prog_log.progress, misc.time_format_elapsed(time_0),))
def go_main(self): log.debug('Starting') time_0 = time.time() log.debug('Starting db transaction...') #db = db_glue.new() self.qb.db.transaction_begin_rw() if not self.cli_opts.skip_date_check: self.last_logged = self.get_last_logged() else: self.last_logged = None #prog_log = Debug_Progress_Logger(copy_this=debug_prog_log) #prog_log.loop_max = len(rev_rows) #prog_log.log_freq = 25 self.parse_logs() #prog_log.loops_fin() # if not debug_skip_commit: # log.info('Committing transaction...') # self.qb.db.transaction_commit() # else: # db.transaction_rollback() # self.qb.db.close() self.query_builder_destroy(do_commit=(not debug_skip_commit)) log.debug('gtfsdb_build_cache: complete: %s' % (misc.time_format_elapsed(time_0), ))
def load_and_export_items(self, feat_class): log.info('load_and_export_items: working on type: %s' % (Item_Type.id_to_str(feat_class.One.item_type_id), )) time_0 = time.time() prog_log = self.progr_get(log_freq=100) self.qb_src.filters.rating_special = True self.qb_src.filters.include_item_stack = True log.debug( 'load_and_export_items: filter_by_regions: %s' % (self.qb_src.filters.filter_by_regions), ) # The merge_job setup the item_mgr, which we use now to load the byways # and their attrs and tags. feat_search_fcn = 'search_for_items' # E.g. byway.Many().search_for_items self.qb_src.item_mgr.load_feats_and_attcs( self.qb_src, feat_class, feat_search_fcn, processing_fcn=self.feat_export, prog_log=prog_log, heavyweight=False) log.info('... exported %d features in %s' % ( prog_log.progress, misc.time_format_elapsed(time_0), ))
def load_and_export_items(self, feat_class): log.info('load_and_export_items: working on type: %s' % (Item_Type.id_to_str(feat_class.One.item_type_id),)) time_0 = time.time() prog_log = self.progr_get(log_freq=100) self.qb_src.filters.rating_special = True self.qb_src.filters.include_item_stack = True log.debug('load_and_export_items: filter_by_regions: %s' % (self.qb_src.filters.filter_by_regions),) # The merge_job setup the item_mgr, which we use now to load the byways # and their attrs and tags. feat_search_fcn = 'search_for_items' # E.g. byway.Many().search_for_items self.qb_src.item_mgr.load_feats_and_attcs( self.qb_src, feat_class, feat_search_fcn, processing_fcn=self.feat_export, prog_log=prog_log, heavyweight=False) log.info('... exported %d features in %s' % (prog_log.progress, misc.time_format_elapsed(time_0),))
def go_main(self): log.debug('Starting') time_0 = time.time() log.debug('Starting db transaction...') #db = db_glue.new() self.qb.db.transaction_begin_rw() if not self.cli_opts.skip_date_check: self.last_logged = self.get_last_logged() else: self.last_logged = None #prog_log = Debug_Progress_Logger(copy_this=debug_prog_log) #prog_log.loop_max = len(rev_rows) #prog_log.log_freq = 25 self.parse_logs() #prog_log.loops_fin() # if not debug_skip_commit: # log.info('Committing transaction...') # self.qb.db.transaction_commit() # else: # db.transaction_rollback() # self.qb.db.close() self.query_builder_destroy(do_commit=(not debug_skip_commit)) log.debug('gtfsdb_build_cache: complete: %s' % (misc.time_format_elapsed(time_0),))
def cache_rats(self, db): # log.info('load: caching byway_rating table...') time_0 = time.time() generator = db.get_row_iter() for rat in generator: if (rat['value'] >= 0): sid = self.ratings.setdefault(rat['byway_stack_id'], dict()) #sid['username'] = rat['value'] uname = sid.setdefault(rat['username'], dict()) uname[rat['branch_id']] = rat['value'] else: try: #self.ratings[rat['byway_stack_id']].pop(rat['username'], None) #if not self.ratings[rat['byway_stack_id']]: # self.ratings.pop(rat['byway_stack_id'], None) self.ratings[rat['byway_stack_id']] \ [rat['username']] \ .pop(rat['branch_id'], None) if not self.ratings[rat['byway_stack_id']][ rat['username']]: self.ratings[rat['byway_stack_id']].pop( rat['username'], None) if not self.ratings[rat['byway_stack_id']]: self.ratings.pop(rat['byway_stack_id'], None) except KeyError: pass generator.close() log.info('load: cached ratings in %s' % (misc.time_format_elapsed(time_0), ))
def loops_fin(self, callee=''): if not callee: callee = self.callee if self.info_print_speed_enable and self.info_print_speed_finish: self.loops_info(callee) log.info('%s%d loops took %s' % (('%s: ' % callee) if callee else '', self.progress, misc.time_format_elapsed(self.time_0),))
def loops_fin(self, callee=''): if not callee: callee = self.callee if self.info_print_speed_enable and self.info_print_speed_finish: self.loops_info(callee) log.info('%s%d loops took %s' % ( ('%s: ' % callee) if callee else '', self.progress, misc.time_format_elapsed(self.time_0), ))
def load_transit(self, qb, db_transit): # load the transit info agency_id = None reporter = None # sys.stdout # FIXME: Can we pass in log.debug somehow? # I think we just need to support write() maxtrips = None g.assurt(self.gserver is not None) # C.f. graphserver/pygs/build/lib/graphserver/compiler.py # ::graph_load_gtfsdb log.debug('load_transit: loading compiler') compiler = GTFSGraphCompiler(db_transit, conf.transitdb_agency_name, agency_id, reporter) nedges = 0 if self.cache_reg is not None: from_and_where = self.links_cache_from_and_where( 'gtfsdb_cache_register', qb) nedges = qb.db.sql("SELECT transit_nedges AS nedges %s" % (from_and_where))[0]['nedges'] if nedges > 0: nedges_str = str(nedges) else: nedges_str = 'unknown' time_0 = time.time() log.debug('load_transit: loading vertices and edges') #for (fromv_label, tov_label, edge) in compiler.gtfsdb_to_edges(maxtrips): for i, (fromv_label, tov_label, edge) in enumerate( compiler.gtfsdb_to_edges(maxtrips)): if (i % 25000) == 0: log.debug('load_transit: progress: on edge # %d of %s...' % (i, nedges_str,)) #log.debug(' >> fromv_label: %s / tov_label: %s / edge: %s' # % (fromv_label, tov_label, edge,)) # NOTE: fromv_label is unicode, tov_label str, and edge # graphserver.core.TripBoard/TripAlight/Etc. # FIXME: Why is fromv_label unicode? fromv_label = str(fromv_label) tov_label = str(tov_label) g.assurt(isinstance(edge, EdgePayload)) self.gserver.add_vertex(fromv_label) self.gserver.add_vertex(tov_label) self.gserver.add_edge(fromv_label, tov_label, edge) # 2011.08.08: 49 seconds # 2011.08.09: 36 seconds with less log.debug log.debug('load_transit: loaded %d edges in %s' % (i + 1, misc.time_format_elapsed(time_0),)) # 2013.12.05: This is firing. It looks like the count of transit_nedges # from gtfsdb_cache_register is less than what we've loaded... #g.assurt((nedges == 0) or (nedges == (i + 1))) if not ((nedges == 0) or (nedges == (i + 1))): log.error('load_transit: nedges: %d / i: %d' % (nedges, i,))
def link_graphs(self, qb, db_transit): log.debug('link_graphs: linking Cyclopath and Transit networks') time_0 = time.time() # 2011.08.08: Slow method taking 1570.29 secs (26 minutes). # 2011.08.09: Made fast method and it took 36 seconds. if not self.link_graphs_fast(qb, db_transit): # 2011.08.08: 1570.29 secs (26 minutes) # NOTE: If you're here, consider running gtfsdb_build_cache.py. log.warning('link_graphs: using slow method!') self.link_graphs_slow(qb, db_transit) log.debug('link_graphs: complete: in %s' % (misc.time_format_elapsed(time_0),))
def link_graphs(self, qb, db_transit): log.debug('link_graphs: linking Cyclopath and Transit networks') time_0 = time.time() # 2011.08.08: Slow method taking 1570.29 secs (26 minutes). # 2011.08.09: Made fast method and it took 36 seconds. if not self.link_graphs_fast(qb, db_transit): # 2011.08.08: 1570.29 secs (26 minutes) # NOTE: If you're here, consider running gtfsdb_build_cache.py. log.warning('link_graphs: using slow method!') self.link_graphs_slow(qb, db_transit) log.debug('link_graphs: complete: in %s' % (misc.time_format_elapsed(time_0), ))
def cache_edges(self, db_transit): # load the transit info agency_id = None reporter = None # sys.stdout # FIXME: Can we pass in log.debug somehow? # I think we just need to support write() maxtrips = None # C.f. graphserver/pygs/build/lib/graphserver/compiler.py # ::graph_load_gtfsdb log.debug('load_transit: loading compiler') compiler = GTFSGraphCompiler(db_transit, conf.transitdb_agency_name, agency_id, reporter) time_0 = time.time() log.debug('load_transit: loading vertices and edges') #for (fromv_label, tov_label, edge) in compiler.gtfsdb_to_edges(maxtrips): for i, (fromv_label, tov_label, edge) in enumerate(compiler.gtfsdb_to_edges(maxtrips)): if (i % 25000) == 0: log.debug( 'load_transit: progress: on edge # %d of unknown...' % (i, )) #log.debug(' >> fromv_label: %s / tov_label: %s / edge: %s' # % (fromv_label, tov_label, edge,)) # NOTE: fromv_label is unicode, tov_label str, and edge # graphserver.core.TripBoard/TripAlight/Etc. # FIXME: Why is fromv_label unicode? self.qb.db.sql(""" UPDATE gtfsdb_cache_register SET transit_nedges = %d WHERE username = %s AND branch_id = %d AND revision_id = %d AND gtfs_caldate = %s """ % ( i + 1, self.qb.db.quoted(self.qb.username), self.qb.branch_hier[0][0], self.revision_id, self.qb.db.quoted(self.tfeed_zipdate), )) log.debug('load_transit: loaded %d edges in %s' % ( i + 1, misc.time_format_elapsed(time_0), ))
def gtfs_download(self): # E.g., wget -N --directory-prefix=$ccp/var/transit/metc/ \ # ftp://gisftp.metc.state.mn.us/google_transit.zip time_0 = time.time() local_dir = self.dname_gtfsdb remote_file = conf.transit_db_source g.assurt(remote_file) # FIXME: Instead of using wget, use internal Python functions? the_cmd = ('wget -N -P %s %s' % ( local_dir, remote_file, )) log.debug('gtfs_download: downloading: %s' % (the_cmd, )) p = subprocess.Popen( [the_cmd], shell=True, # bufsize=bufsize, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True) (sin, sout_err) = (p.stdin, p.stdout) self.tfeed_not_retrieved = False while not self.tfeed_not_retrieved: line = sout_err.readline() if not line: break else: line = line.strip() #log.debug(line) for regex in self.regex_not_retrieved: if regex.search(line): self.tfeed_not_retrieved = True break if self.tfeed_not_retrieved: break sin.close() sout_err.close() p.wait() log.debug('gtfs_download: %s in: %s' % ( 'downloaded' if not self.tfeed_not_retrieved else 'not retrieved', misc.time_format_elapsed(time_0), ))
def vacuum_finally_maybe(self): if not self.recently_vacuumed: do_vacuum = False if self.cli_opts.do_yesall: do_vacuum = True log.info('Auto-Vacuuming...') else: do_vacuum = yn_get('Vacuum database now?') if do_vacuum: vacuum_time_0 = time.time() self.pg_db.sql("VACUUM ANALYZE;") log.info('Vacuumed database in %s' % (misc.time_format_elapsed(vacuum_time_0),)) self.recently_vacuumed = True
def feature_classes_import(self): # If we start load a Shapefile and don't find a Cyclopath config, and if # we're not conflating, we skip it. if self.shpf_class != 'incapacitated': time_0 = time.time() # Call the substage fcn. self.substage_fcn_go() # Print elapsed time. log.info('... done "%s" in %s' % ( self.mjob.wtem.latest_step.stage_name, misc.time_format_elapsed(time_0), )) else: # We still have to call stage_initialize to bump the stage num. self.mjob.stage_initialize('Skipping Shapefile...')
def cache_edges(self, db_transit): # load the transit info agency_id = None reporter = None # sys.stdout # FIXME: Can we pass in log.debug somehow? # I think we just need to support write() maxtrips = None # C.f. graphserver/pygs/build/lib/graphserver/compiler.py # ::graph_load_gtfsdb log.debug('load_transit: loading compiler') compiler = GTFSGraphCompiler(db_transit, conf.transitdb_agency_name, agency_id, reporter) time_0 = time.time() log.debug('load_transit: loading vertices and edges') #for (fromv_label, tov_label, edge) in compiler.gtfsdb_to_edges(maxtrips): for i, (fromv_label, tov_label, edge) in enumerate( compiler.gtfsdb_to_edges(maxtrips)): if (i % 25000) == 0: log.debug('load_transit: progress: on edge # %d of unknown...' % (i,)) #log.debug(' >> fromv_label: %s / tov_label: %s / edge: %s' # % (fromv_label, tov_label, edge,)) # NOTE: fromv_label is unicode, tov_label str, and edge # graphserver.core.TripBoard/TripAlight/Etc. # FIXME: Why is fromv_label unicode? self.qb.db.sql( """ UPDATE gtfsdb_cache_register SET transit_nedges = %d WHERE username = %s AND branch_id = %d AND revision_id = %d AND gtfs_caldate = %s """ % (i + 1, self.qb.db.quoted(self.qb.username), self.qb.branch_hier[0][0], self.revision_id, self.qb.db.quoted(self.tfeed_zipdate),)) log.debug('load_transit: loaded %d edges in %s' % (i + 1, misc.time_format_elapsed(time_0),))
def ccp_save_cache(self): time_0 = time.time() log.debug('ccp_save_cache: loading the transit database') db_transit = GTFSDatabase(conf.transitdb_filename) # NOTE: Cannot cache edges, since they are C-objects. See usages of # compiler.gtfsdb_to_edges(maxtrips). We can, however, at least # count the edges.... self.cache_edges(db_transit) log.debug('ccp_save_cache: making the transit graph link cache') self.cache_links(db_transit) log.debug('ccp_save_cache: done: %s' % (misc.time_format_elapsed(time_0),))
def ccp_save_cache(self): time_0 = time.time() log.debug('ccp_save_cache: loading the transit database') db_transit = GTFSDatabase(conf.transitdb_filename) # NOTE: Cannot cache edges, since they are C-objects. See usages of # compiler.gtfsdb_to_edges(maxtrips). We can, however, at least # count the edges.... self.cache_edges(db_transit) log.debug('ccp_save_cache: making the transit graph link cache') self.cache_links(db_transit) log.debug('ccp_save_cache: done: %s' % (misc.time_format_elapsed(time_0), ))
def gtfs_download(self): # E.g., wget -N --directory-prefix=$ccp/var/transit/metc/ \ # ftp://gisftp.metc.state.mn.us/google_transit.zip time_0 = time.time() local_dir = self.dname_gtfsdb remote_file = conf.transit_db_source g.assurt(remote_file) # FIXME: Instead of using wget, use internal Python functions? the_cmd = ('wget -N -P %s %s' % (local_dir, remote_file,)) log.debug('gtfs_download: downloading: %s' % (the_cmd,)) p = subprocess.Popen([the_cmd], shell=True, # bufsize=bufsize, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True) (sin, sout_err) = (p.stdin, p.stdout) self.tfeed_not_retrieved = False while not self.tfeed_not_retrieved: line = sout_err.readline() if not line: break else: line = line.strip() #log.debug(line) for regex in self.regex_not_retrieved: if regex.search(line): self.tfeed_not_retrieved = True break if self.tfeed_not_retrieved: break sin.close() sout_err.close() p.wait() log.debug('gtfs_download: %s in: %s' % ('downloaded' if not self.tfeed_not_retrieved else 'not retrieved', misc.time_format_elapsed(time_0),))
def go(self): time_0 = time.time() parser = Log_Jammin_ArgParser() self.cli_opts = parser.get_opts() if not parser.handled: try: self.go_go() except AssertionError: if self.cli_opts.interactive: # Dump to a python shell and make the user investigate. log.warning('Fatal error. Please debug!') # FIXME: I don't think the traceback prints useful info. log.warning(traceback.format_exc()) import pdb; pdb.set_trace() raise finally: log.info('Script complete! Ran in %s' % (misc.time_format_elapsed(time_0),)) self.print_stats()
def load_make_graph_add_transit(self, qb): # Not calling base class fcn. # FIXME: What happens here on update? We reload all, don't we? # FIXME: For p2, only do this on load, not on update. # BUG nnnn: For p2, start new instance of route finder and then # just change routed_ports to use that one, then kill # the existing one. time_0 = time.time() usage_0 = None if conf.debug_mem_usage: usage_0 = mem_usage.get_usage_mb() log.debug('load: adding transit...') loaded = False # Load the transit network, maybe (if we have data for it). if conf.transitdb_filename: self.cache_reg = self.links_get_cache_reg(qb) log.debug('load: loading the transit database') db_transit = GTFSDatabase(conf.transitdb_filename) log.debug('load: making the transit graph') self.load_transit(qb, db_transit) # Link the two graphs log.debug('load: linking the two graphs') self.link_graphs(qb, db_transit) loaded = True # else, using Graphserver, but no public transit data to load. if loaded: log.info('load: added transit: in %s' % (misc.time_format_elapsed(time_0), )) else: # MAYBE: Let devs test without loading transit. raise GWIS_Error( 'Unable to load route finder: no transit info found.') conf.debug_log_mem_usage(log, usage_0, 'tgraph.load / transit') return loaded
def search_calculate_reactions(self, qb): # BUG_FALL_2013: Delete this fcn. # See polarity_sql = Many.sql_polarity in thread.py: [lb] thinks this # fcn. can be deleted. We calculate the likes and dislikes of a thread, # but it doesn't make sense to count it for just one post... especially # since a post with polarity has no comment, so it doesn't make sense # that we'd hydrate such a post. And posts with comments have no polarity # so the likes and dislikes will be zero... g.assurt(False) # FIXME: Delete this fcn. and the commented-out code # above that calls it. g.assurt(self.owning_thread is not None) if (self.owning_thread.thread_type_id == Thread_Type.reaction): # SELECT polarity, body FROM post WHERE body IS NULL; # ==> polarity is 1 or -1, body is never set. # SELECT polarity, body FROM post WHERE body IS NOT NULL; # ==> polarityis 0 and body is set. # MAYBE: Does this sql take a while? time_0 = time.time() rsql = thread.Many.sql_polarity(qb, self.owning_thread.stack_id) rres = qb.db.sql(rsql) self.reac_data = etree.Element('reac_data') misc.xa_set(self.reac_data, 'likes', rres[0]['likes']) misc.xa_set(self.reac_data, 'dislikes', rres[0]['dislikes']) misc.xa_set(self.reac_data, 'comments', rres[0]['comments']) log.debug( 'srch_calc_reacts: likes: %d / disls: %d / cmmnts: %d / %s' % ( rres[0]['likes'], rres[0]['dislikes'], rres[0]['comments'], misc.time_format_elapsed(time_0), ))
def load_make_graph_add_transit(self, qb): # Not calling base class fcn. # FIXME: What happens here on update? We reload all, don't we? # FIXME: For p2, only do this on load, not on update. # BUG nnnn: For p2, start new instance of route finder and then # just change routed_ports to use that one, then kill # the existing one. time_0 = time.time() usage_0 = None if conf.debug_mem_usage: usage_0 = mem_usage.get_usage_mb() log.debug('load: adding transit...') loaded = False # Load the transit network, maybe (if we have data for it). if conf.transitdb_filename: self.cache_reg = self.links_get_cache_reg(qb) log.debug('load: loading the transit database') db_transit = GTFSDatabase(conf.transitdb_filename) log.debug('load: making the transit graph') self.load_transit(qb, db_transit) # Link the two graphs log.debug('load: linking the two graphs') self.link_graphs(qb, db_transit) loaded = True # else, using Graphserver, but no public transit data to load. if loaded: log.info('load: added transit: in %s' % (misc.time_format_elapsed(time_0),)) else: # MAYBE: Let devs test without loading transit. raise GWIS_Error( 'Unable to load route finder: no transit info found.') conf.debug_log_mem_usage(log, usage_0, 'tgraph.load / transit') return loaded
def db_vacuum(self, full_vacuum=False): time_0 = time.time() log.info('Vacuuming...') if not full_vacuum: db = db_glue.new(use_transaction=False) db.sql("VACUUM ANALYZE minnesota.geofeature (geometry);") db.close() else: # Vacuum and analyze. # EXPLAIN: ANALYZE vs. VACUUM ANALYZE vs. VACUUM FULL vs. CLUSTER. # See also: vacuum analyze verbose. # NOTE: Should be database owner, lest some tables go unvacced. pg_db = db_glue.new(conf.db_owner, use_transaction=False) pg_db.sql("VACUUM ANALYZE;") pg_db.close() log.info('... Vacuum took %s' % (misc.time_format_elapsed(time_0),))
def go(self): '''Parse the command line arguments. If the command line parser didn't handle a --help or --version command, call the command processor.''' time_0 = time.time() # Read the CLI args self.cli_args = self.argparser() self.cli_opts = self.cli_args.get_opts() if not self.cli_args.handled: log.info('Welcome to the %s!' % (self.cli_args.script_name,)) # Prepare the query builder object. if not self.skip_query_builder: self.query_builder_prepare() # Create the Ctrl-C event if we're the master script. if self.cli_opts.instance_master: # A master scripts waits to commit the revision until all workers # complete. Currently, a human operator will then send the master # script a Ctrl-C, which triggers the event, and then the master # script will cleanup (and probably commit to the database and # release the revision table lock). Ccp_Script_Base.master_event = threading.Event() signal.signal(signal.SIGINT, Ccp_Script_Base.ctrl_c_handler) # Call the derived class's go function. self.go_main() # FIXME: Where is self.cli_args.close_query() ?? log.info('Script completed in %s' % (misc.time_format_elapsed(time_0),)) # If we run as a script, be sure to return happy exit code. return 0
def search_calculate_reactions(self, qb): # BUG_FALL_2013: Delete this fcn. # See polarity_sql = Many.sql_polarity in thread.py: [lb] thinks this # fcn. can be deleted. We calculate the likes and dislikes of a thread, # but it doesn't make sense to count it for just one post... especially # since a post with polarity has no comment, so it doesn't make sense # that we'd hydrate such a post. And posts with comments have no polarity # so the likes and dislikes will be zero... g.assurt(False) # FIXME: Delete this fcn. and the commented-out code # above that calls it. g.assurt(self.owning_thread is not None) if (self.owning_thread.thread_type_id == Thread_Type.reaction): # SELECT polarity, body FROM post WHERE body IS NULL; # ==> polarity is 1 or -1, body is never set. # SELECT polarity, body FROM post WHERE body IS NOT NULL; # ==> polarityis 0 and body is set. # MAYBE: Does this sql take a while? time_0 = time.time() rsql = thread.Many.sql_polarity(qb, self.owning_thread.stack_id) rres = qb.db.sql(rsql) self.reac_data = etree.Element('reac_data') misc.xa_set(self.reac_data, 'likes', rres[0]['likes']) misc.xa_set(self.reac_data, 'dislikes', rres[0]['dislikes']) misc.xa_set(self.reac_data, 'comments', rres[0]['comments']) log.debug( 'srch_calc_reacts: likes: %d / disls: %d / cmmnts: %d / %s' % (rres[0]['likes'], rres[0]['dislikes'], rres[0]['comments'], misc.time_format_elapsed(time_0),))
def go(self): '''Parse the command line arguments. If the command line parser didn't handle a --help or --version command, call the command processor.''' time_0 = time.time() # Read the CLI args self.cli_args = self.argparser() self.cli_opts = self.cli_args.get_opts() if not self.cli_args.handled: log.info('Welcome to the %s!' % (self.cli_args.script_name, )) # Prepare the query builder object. if not self.skip_query_builder: self.query_builder_prepare() # Create the Ctrl-C event if we're the master script. if self.cli_opts.instance_master: # A master scripts waits to commit the revision until all workers # complete. Currently, a human operator will then send the master # script a Ctrl-C, which triggers the event, and then the master # script will cleanup (and probably commit to the database and # release the revision table lock). Ccp_Script_Base.master_event = threading.Event() signal.signal(signal.SIGINT, Ccp_Script_Base.ctrl_c_handler) # Call the derived class's go function. self.go_main() # FIXME: Where is self.cli_args.close_query() ?? log.info('Script completed in %s' % (misc.time_format_elapsed(time_0), )) # If we run as a script, be sure to return happy exit code. return 0
def sql_apply_query_filters(self, qb, where_clause="", conjunction=""): g.assurt((not where_clause) and (not conjunction)) g.assurt((not conjunction) or (conjunction == "AND")) # We build custom SQL in sql_context_user (and don't use # item_user_access's search_get_sql(), so we cannot support # filters that edit qb.sql_clauses. We only support filters # that only modify the where clause. g.assurt(not ( qb.filters.filter_by_creator_include or qb.filters.filter_by_creator_exclude or qb.filters.stack_id_table_ref or qb.filters.use_stealth_secret or qb.filters.include_item_stack # There are lots more filters we don't support... # but there's no reason to list them all. )) if qb.filters.filter_by_username: # %s (LOWER(rev.username) ~ LOWER(%s)) # %s (rev.username ~* %s) # [lb] is curious why we use regex search when we want exact match. # MAYBE: Full Text Search @@ is faster than exact match = which is # faster than LOWER() = LOWER() which is faster than regex ~. # NOTE: Do not need to lower username (already lower) filter_by_username = qb.filters.filter_by_username.lower() where_clause += ( """ %s (rev.username = %s) """ % (conjunction, qb.db.quoted(filter_by_username),)) conjunction = "AND" # Find items the user is watching and only those items' revisions. if (qb.filters.filter_by_watch_item or qb.filters.only_stack_ids): time_0 = time.time() # Clone the qb but keep the db: we want to make a temp table. watchers_qb = qb.clone(skip_clauses=True, skip_filtport=True) qfs = Query_Filters(req=None) qfs.filter_by_watch_item = qb.filters.filter_by_watch_item qfs.only_stack_ids = qb.filters.only_stack_ids watchers_qb.filters = qfs watchers_qb.finalize_query() watchers_qb.sql_clauses = ( item_user_watching.Many.sql_clauses_cols_all.clone()) watchers_many = item_user_watching.Many() inner_sql = watchers_many.search_get_sql(watchers_qb) # No: watchers_qb.db.close() self.revision_id_table_ref = 'temp__filter_rids' # MAYBE: The valid_start_rid and valid_until_rid are the # group_item_access record's. These usually match # the item_versioned rids, right? Whenever an item # is edited, we update the access records... [lb] thinks. # Otherwise, what we'd really want here is item_versioned's # valid_start_rid and valid_until_rid. thurrito_sql = ( """ SELECT valid_start_rid, valid_until_rid INTO TEMPORARY TABLE temp__both_rids FROM (%s) AS foo_grv_1 """ % (inner_sql,)) rows = qb.db.sql(thurrito_sql) rid_union_sql = ( """ SELECT DISTINCT (filter_rid) INTO TEMPORARY TABLE %s FROM ((SELECT valid_start_rid AS filter_rid FROM temp__both_rids) UNION (SELECT valid_until_rid AS filter_rid FROM temp__both_rids)) AS foo_grv_2 """ % (self.revision_id_table_ref,)) rows = qb.db.sql(rid_union_sql) log.debug('sql_apply_qry_fltrs: %s in %s' % ('filter_by_watch_item or only_stack_ids', misc.time_format_elapsed(time_0),)) if qb.filters.filter_by_watch_feat: # This filter only makes sense for attachment item types. log.warning('group_revision does not support filter_by_watch_feat') raise GWIS_Nothing_Found() # FIXME: Show only changes to watched items... #qfs.filter_by_watch_geom = wr; #qfs.filter_by_watch_item = 0 or 1 or 2 ... # 2013.03.29: To support CcpV1 feature, see search_for_geom and get a bbox. # Maybe: in CcpV2, you can watch non-regions, so we could get # a list of stack_ids the user is watching and return revisions # containing changes to those stack IDs... return grac_record.Many.sql_apply_query_filters( self, qb, where_clause, conjunction)
def execute_problem_solver(self, qb, edge_weight_attr): walk_path = None log.debug( 'exec_prob_slvr: wattr: %s / rat sprd: %s / fac brdn: %s / spalg: %s' % ( edge_weight_attr, self.route.p3_rating_pump, self.route.p3_burden_pump, self.route.p3_spalgorithm, )) # *** if self.route.p3_spalgorithm == 'as*': time_0 = time.time() # Calculate path using A* search. # MAYBE: We don't need a heuristic, do we?? # Would it speed up the search? # The h fcn. would just be the dist from the curnode to the # finish? Or does [lb] still not know how graph search works... ast_path = networkx.astar_path( self.tgraph.graph_di, self.route.beg_nid, self.route.fin_nid, # MAYBE: Send pload... networkx hack #heuristic=None, weight=edge_weight_attr) heuristic=None, weight=edge_weight_attr, pload=self) log.debug('exec_prob_slvr: astar_path: no. edges: %d / in %s' % ( len(ast_path), misc.time_format_elapsed(time_0), )) walk_path = ast_path # *** elif self.route.p3_spalgorithm == 'asp': time_0 = time.time() # MAYBE: Hack networkx and add pload param to support edge wght fcns. all_paths = networkx.all_shortest_paths(self.tgraph.graph_di, self.route.beg_nid, self.route.fin_nid, weight=edge_weight_attr) log.debug( 'exec_prob_slvr: all_shortest_paths: returned generator in %s' % (misc.time_format_elapsed(time_0), )) time_0 = time.time() path_num = 1 for a_path in all_paths: log.debug( 'solve_pb: all_shortest_paths: path #%d: len: %d / in %s' % ( path_num, len(a_path), misc.time_format_elapsed(time_0), )) path_num += 1 time_0 = time.time() if walk_path is None: walk_path = a_path #break # FIXME/EXPLAIN: Find an O/D pair that results in multiple paths... # or would the cost have to be equal? That doesn't sound very # likey -- in practicality, all_shortest_paths would probably # always just return one result for Cyclopath. # *** elif self.route.p3_spalgorithm == 'dij': time_0 = time.time() # Calculate path using Dijkstra's algorithm. # MAYBE: Hack networkx and add pload param to support edge wght fcns. dij_path = networkx.dijkstra_path(self.tgraph.graph_di, self.route.beg_nid, self.route.fin_nid, weight=edge_weight_attr) log.debug('exec_prob_slvr: dijkstra_path: no. edges: %d / in %s' % ( len(dij_path), misc.time_format_elapsed(time_0), )) walk_path = dij_path # *** elif self.route.p3_spalgorithm == 'sho': time_0 = time.time() # Calculate path using generic NetworkX shortest path search. # MAYBE: Hack networkx and add pload param to support edge wght fcns. asp_path = networkx.shortest_path(self.tgraph.graph_di, self.route.beg_nid, self.route.fin_nid, weight=edge_weight_attr) log.debug('exec_prob_slvr: shortest_path: no. edges: %d / in %s' % ( len(asp_path), misc.time_format_elapsed(time_0), )) walk_path = asp_path # *** elif self.route.p3_spalgorithm == 'ssd': time_0 = time.time() # This Dijkstra fcn. "returns a tuple of two dictionaries # keyed by node. The first dictionary stores distance from # the source. The second stores the path from the source to # that node." # So... this isn't an ideal fcn. to use, since the second # dictionary contains paths for every pair of nodes. # MAYBE: Hack networkx and add pload param to support edge wght fcns. dij_distance2, dij_path2 = networkx.single_source_dijkstra( self.tgraph.graph_di, self.route.beg_nid, self.route.fin_nid, weight=edge_weight_attr) log.debug( 'exec_prob_slvr: single_source_dijkstra: no. edges: %d / in %s' % ( len(dij_path2), misc.time_format_elapsed(time_0), )) walk_path = dij_path2[self.route.fin_nid] # *** else: # This code should be unreachable, as we would've raised by now. g.assurt(False) # *** if not walk_path: # NOTE: This code is probably unreachable. With the new # is_disconnected attribute, there should no longer # be a possibility that we won't find a route, since # we guarantee that the origin and destination are # on nodes that are well-connected. log.error('exec_prob_slvr: no route?: %s' % (self, )) raise GWIS_Error(Problem_Base.error_msg_basic) # *** return walk_path
def cache_links(self, db_transit): log.debug('cache_links: caching transit node/cyclopath node pairs') time_0 = time.time() # NOTE: We load all byways into the graph, including those tagged # 'prohibited' and 'closed', but we only ever link those not tagged as # such with transit stops. # MAGIC HACK ALERT #revision = revision.Historic(self.revision_id) #model = ratings.Predictor(self.qb.branch_hier[0][0], # #self.revision_id) # revision) #model.load(self.qb) # route_daemon = None ccp_graph = planner.routed_p2.tgraph.Trans_Graph( route_daemon, self.qb.username, self.qb.branch_hier, self.qb.revision) ccp_graph.load(self.qb.db) # tagprefs = {} tagprefs['prohibited'] = ratings.t_avoid tagprefs['closed'] = ratings.t_avoid # rating_func = ccp_graph.ratings.rating_func(self.qb.username, tagprefs, ccp_graph) # MAGIC NUMBER: Min rating. rating_min = 0.5 # The transit data is lat,lon, as opposed to SRID-encoded x,y. is_latlon = True n_stops = db_transit.count_stops() # NOTE: 2011.06.26: This loops takes a while. For me [lb], 55 secs. # NOTE: 2011.08.08: Find nearest node using GRAC SQL is time consuming! # On the order of minutes and minutes... # Can you cache the nearest nodes, maybe? At least for # anon user in public branch and current revision? # byway.transit_stops ?? list of transit IDs? for i, (stop_id, stop_name, stop_lat, stop_lon) in enumerate(db_transit.stops()): # Every once in a while, print a debug message # FIXME: Replace with prog logger if i and ((i % 25) == 0): log.debug('link_graphs: progress: on stop # %d of %d...' % ( i, n_stops, )) #log.debug(' >> id: %s / name: %s / lat: %s / lon: %s' # % (stop_id, stop_name, stop_lat, stop_lon,)) #log.debug(' >> id: %s / name: %s / lat: %s / lon: %s' # % (type(stop_id), type(stop_name), type(stop_lat), # type(stop_lon),)) # NOTE: The (x,y) point is lon first, then lat. stop_xy = ( stop_lon, stop_lat, ) # 2012.03.05: This is taking wayyy tooooo long. nearest_byway = route.One.byway_closest_xy(self.qb, stop_name, stop_xy, rating_func, rating_min, is_latlon) nearest_node = nearest_byway.nearest_node_id() # FIXME: What if the node is on a one-way? What if the node is tagged with # something that the user marks 'avoid'? In both cases, transit stop might be # Unreachable. # 2012.01.09: Get m-value and send to client. if nearest_node is not None: node_id = str(nearest_node) # NOTE: If we don't cast to string, it's unicode, and db.insert # doesn't quote it. stop_id = 'sta-%s' % (str(stop_id), ) if node_id != '': self.qb.db.insert( 'gtfsdb_cache_links', { 'username': self.qb.username, 'branch_id': self.qb.branch_hier[0][0], 'revision_id': self.revision_id, 'gtfs_caldate': self.tfeed_zipdate, # FIXME: This is a string? See above... 'node_stack_id': node_id, ## Bug nnnn: # 'byway_m_value': 'transit_stop_id': stop_id, }, {}) else: log.warning( 'link_graphs: no node name?!: node_id: %s / stop_id: %s' % ( node_id, stop_id, )) else: log.warning( 'link_graphs: no nearest node?!: node_id: %s / stop_id: %s' % ( node_id, stop_id, )) log.warning(' >> lat, lon: (%s, %s)' % (stop_lat, stop_lon)) nlinks = self.cache_count_sql('gtfsdb_cache_links') g.assurt(nlinks == n_stops) # 2011.08.08: 1570.29 secs (26 minutes) # 2011.08.09: 1270.86 secs (21 minutes) log.debug('link_graphs: linked: %d transit stops in %s' % ( n_stops, misc.time_format_elapsed(time_0), ))
def execute_problem_solver(self, qb, edge_weight_attr): walk_path = None log.debug( 'exec_prob_slvr: wattr: %s / rat sprd: %s / fac brdn: %s / spalg: %s' % (edge_weight_attr, self.route.p3_rating_pump, self.route.p3_burden_pump, self.route.p3_spalgorithm,)) # *** if self.route.p3_spalgorithm == 'as*': time_0 = time.time() # Calculate path using A* search. # MAYBE: We don't need a heuristic, do we?? # Would it speed up the search? # The h fcn. would just be the dist from the curnode to the # finish? Or does [lb] still not know how graph search works... ast_path = networkx.astar_path( self.tgraph.graph_di, self.route.beg_nid, self.route.fin_nid, # MAYBE: Send pload... networkx hack #heuristic=None, weight=edge_weight_attr) heuristic=None, weight=edge_weight_attr, pload=self) log.debug('exec_prob_slvr: astar_path: no. edges: %d / in %s' % (len(ast_path), misc.time_format_elapsed(time_0),)) walk_path = ast_path # *** elif self.route.p3_spalgorithm == 'asp': time_0 = time.time() # MAYBE: Hack networkx and add pload param to support edge wght fcns. all_paths = networkx.all_shortest_paths( self.tgraph.graph_di, self.route.beg_nid, self.route.fin_nid, weight=edge_weight_attr) log.debug( 'exec_prob_slvr: all_shortest_paths: returned generator in %s' % (misc.time_format_elapsed(time_0),)) time_0 = time.time() path_num = 1 for a_path in all_paths: log.debug('solve_pb: all_shortest_paths: path #%d: len: %d / in %s' % (path_num, len(a_path), misc.time_format_elapsed(time_0),)) path_num += 1 time_0 = time.time() if walk_path is None: walk_path = a_path #break # FIXME/EXPLAIN: Find an O/D pair that results in multiple paths... # or would the cost have to be equal? That doesn't sound very # likey -- in practicality, all_shortest_paths would probably # always just return one result for Cyclopath. # *** elif self.route.p3_spalgorithm == 'dij': time_0 = time.time() # Calculate path using Dijkstra's algorithm. # MAYBE: Hack networkx and add pload param to support edge wght fcns. dij_path = networkx.dijkstra_path( self.tgraph.graph_di, self.route.beg_nid, self.route.fin_nid, weight=edge_weight_attr) log.debug('exec_prob_slvr: dijkstra_path: no. edges: %d / in %s' % (len(dij_path), misc.time_format_elapsed(time_0),)) walk_path = dij_path # *** elif self.route.p3_spalgorithm == 'sho': time_0 = time.time() # Calculate path using generic NetworkX shortest path search. # MAYBE: Hack networkx and add pload param to support edge wght fcns. asp_path = networkx.shortest_path( self.tgraph.graph_di, self.route.beg_nid, self.route.fin_nid, weight=edge_weight_attr) log.debug('exec_prob_slvr: shortest_path: no. edges: %d / in %s' % (len(asp_path), misc.time_format_elapsed(time_0),)) walk_path = asp_path # *** elif self.route.p3_spalgorithm == 'ssd': time_0 = time.time() # This Dijkstra fcn. "returns a tuple of two dictionaries # keyed by node. The first dictionary stores distance from # the source. The second stores the path from the source to # that node." # So... this isn't an ideal fcn. to use, since the second # dictionary contains paths for every pair of nodes. # MAYBE: Hack networkx and add pload param to support edge wght fcns. dij_distance2, dij_path2 = networkx.single_source_dijkstra( self.tgraph.graph_di, self.route.beg_nid, self.route.fin_nid, weight=edge_weight_attr) log.debug( 'exec_prob_slvr: single_source_dijkstra: no. edges: %d / in %s' % (len(dij_path2), misc.time_format_elapsed(time_0),)) walk_path = dij_path2[self.route.fin_nid] # *** else: # This code should be unreachable, as we would've raised by now. g.assurt(False) # *** if not walk_path: # NOTE: This code is probably unreachable. With the new # is_disconnected attribute, there should no longer # be a possibility that we won't find a route, since # we guarantee that the origin and destination are # on nodes that are well-connected. log.error('exec_prob_slvr: no route?: %s' % (self,)) raise GWIS_Error(Problem_Base.error_msg_basic) # *** return walk_path
except Exception, e: log.info('Exception: "%s" / %s' % (e, traceback.format_exc(),)) sys.exit(1) finally: if self.db.transaction_in_progress(): self.db.transaction_rollback() self.db.close() self.pg_db.close() log.info('Ran %d schema upgrade scripts; exiting. Took: %s' % (script_ct, misc.time_format_elapsed(time_0),)) # def setup_next_script(self): self.last_script = self.next_script self.next_script = None self.orig_script = None # Ideally, we'd do some table locking to ensure that no one else is # manipulating the database. But this isn't manageable -- we invoke the # SQL scripts as separate processes, so we can't have table locks or we # might cause a script to deadlock (e.g., if we lock the 'revision' table # and then a script tries to update that table). Usually, we'll be fine: # we use transactions, so, worst case scenario, either the script fails
def really_purge_branch_really(self): time_0 = time.time() log.info('For real purging all traces of the branch!') log.warning('FIXME: This fcn. is not tested all that well.') log.debug('purge_branch_: Acquiring revision table lock...') revision.Revision.revision_lock_dance( self.qb.db, caller='make_new_branch.py') # MAYBE: 2012.08.03: This code brute-forces the removal. It's from # some copy-n-paste code [lb] has been using (i.e., from a psql # command line) so it's not all that well tested. One concern is that # there might be violations of foreign key constraints... tables = [ # 'new_item_policy', # 'node_byway', 'node_endpoint', # # FIXME: Delete from route_id where route_id... 'route', # 'attribute', 'tag', # Should be empty for leafy branches, anyway... 'post', 'thread', 'annotation', 'attribute', 'attachment', 'geofeature', 'link_value', # 'tag_preference', # 'merge_job', 'route_analysis_job', # FIXME: Delete from work_item_step where work_item_id... 'work_item', # # MAYBE: delete from item_event_read where item_id = ... # MAYBE: delete from item_event_alert # 'gtfsdb_cache_links', 'gtfsdb_cache_register', # 'group_revision', 'group_membership', 'group_', # 'group_item_access', # FIXME: Delete from branch_conflict where branch_system_id... 'branch', 'item_versioned', # # MAYBE: # delete from revert_event where rid_reverting/rid_victim # 'revision', # # MAYBE: # delete from track_point where track_id... # 'track', # # MAYBE: tilecache tables... ] for table in tables: self.qb.db.sql("DELETE FROM %s WHERE branch_id = %d" % (table, self.qb.branch_hier[0][0],)) log.debug('purge_branch_: delete from %d tables in %s' % (len(tables), misc.time_format_elapsed(time_0),))
def commit_qbs(self, do_commit, commit_msg, skip_vacuum=False, skip_geometry=False): g.assurt(self.qb_cur is not None) if (self.qb_cur.item_mgr.rid_new is not None) and (self.target_groups): # Both of these operations take a little bit of time. # # To make the new revision, we call cp_revision_geosummary_update, # which takes a while. And to commit everything, well, e.g., # committing five hundred thousand rows takes a while. time_0 = time.time() log.info('Saving new revision: %s...' % (self.qb_cur.item_mgr.rid_new,)) # NOTE: We could use the host or IP address of the machine when the # user submitted the work job, but we also have a non-anonymous # username, so setting the host to 'localhost' doesn't really matter. host = 'localhost' # Make group_revisions for the public group and the shared group. groups_ids = [] for grp_mship, acl_id in self.target_groups.iteritems(): groups_ids.append(grp_mship.group_id) g.assurt(len(groups_ids) > 0) # FIXME: Make sure the new revision is marked not revertable: # that would be crazy-silly if someone was able to issue a # revert request on it! Item_Manager.revision_save( self.qb_cur, self.qb_cur.item_mgr.rid_new, self.qb_cur.branch_hier, host, self.mjob.wtem.created_by, commit_msg, groups_ids, activate_alerts=False, processed_items=None, reverted_revs=None, skip_geometry_calc=False, skip_item_alerts=False) # Claim the new revision ID. revision.Revision.revision_claim(self.qb_cur.db, self.qb_cur.item_mgr.rid_new) # Claim the new stack IDs. self.qb_cur.item_mgr.finalize_seq_vals(self.qb_cur.db) log.info('... new revision took %s' % (misc.time_format_elapsed(time_0),)) g.assurt(self.qb_cur is not None) time_0 = time.time() if do_commit: log.debug('Committing the database transaction.') # BUG 2688: Use transaction_retryable? self.qb_cur.db.transaction_commit() elif self.qb_cur is not None: log.debug('Rolling back the database!!') self.qb_cur.db.transaction_rollback() log.info('... %s took %s' % ('Commit' if do_commit else 'Rollback', misc.time_format_elapsed(time_0),)) # FIXME: Put in debug? And then delete this... skip_vacuum = True if do_commit and not skip_vacuum: self.db_vacuum() if do_commit: time_0 = time.time() # Update the revision's geometry approximation. # FIXME: Hopefully, this is faster after a vacuum. db = db_glue.new() db.transaction_begin_rw() branch_hier = self.qb_cur.branch_hier revision.Revision.geosummary_update(db, self.qb_cur.item_mgr.rid_new, branch_hier, groups_ids, skip_geometry) db.transaction_commit() db.close() log.info('... Rev Geom took %s' % (misc.time_format_elapsed(time_0),)) # FIXME: Test this: 2013.10.30: Do like commit.py, and call do_post_commit # (which just tickles Mr. Do!) and called routed_hup (which sends # an interrupt to the route daemon). if do_commit: log.debug('commit_qbs: signalling Mr. Do!') self.qb_cur.item_mgr.do_post_commit(self.qb_cur) log.debug('commit_qbs: signalling route daemon') command_base.Op_Handler.routed_hup(self.qb_cur.db) self.qb_cur.item_mgr.rid_new = None
def load(self, db, keep_running=None): # Load ratings from the database. t0_all = time.time() usage_0 = None if conf.debug_mem_usage: usage_0 = mem_usage.get_usage_mb() # Load all users' ratings and the generic ratings. g.assurt(self.graph.branch_hier) if len(self.graph.branch_hier) > 1: branch_ids = ','.join([str(x[0]) for x in self.graph.branch_hier]) where_branch = "branch_id IN (%s)" % (branch_ids, ) else: where_branch = "branch_id = %d" % (self.graph.branch_hier[0][0], ) if not self.honor_historic: rats_sql = self.sql_current(db, where_branch) else: rats_sql = self.sql_historic(db, where_branch) if rats_sql: # Get and process all ratings log.info('load: reading byway_rating table...') time_0 = time.time() log.verbose('load: enabling dont_fetchall') db.dont_fetchall = True rats = db.sql(rats_sql) log.verbose('load: disabling dont_fetchall') db.dont_fetchall = False log.info('load: read %d ratings %s in %s' % ( db.curs.rowcount, '[Current]' if not self.last_last_modified else ('[since %s]' % self.last_last_modified), misc.time_format_elapsed(time_0), )) g.check_keep_running(keep_running) self.cache_rats(db) db.curs_recycle() # Uncomment for remote debugging... #g.assurt(False) conf.debug_log_mem_usage(log, usage_0, 'ratings.load') log.info('load: done loading ratings in %s' % (misc.time_format_elapsed(t0_all), )) self.last_last_modified = None
def go_main(self): log.debug('Starting') time_0 = time.time() if (self.cli_opts.cache_table_drop or self.cli_opts.cache_table_create): if self.cli_opts.cache_table_drop: self.gtfsdb_cache_delete() if self.cli_opts.cache_table_create: self.gtfsdb_cache_create() log.info('Committing transaction [go_main]') self.qb.db.transaction_commit() else: os.chdir(self.dname_gtfsdb) self.tools_check() # Download the transit archive. self.gtfs_download() # Get the date. There are three ways we can get it (well, there are # up to three different dates we can get). We use the date to compare # against saved archives, to know if we really need to update our # cache and restart the route planner (i.e., if the archive hasn't # changed, we can do nothing). self.gtfs_get_feed_dates() # If we really downloaded the archive, keep a copy of it. if not self.tfeed_not_retrieved: self.gtfs_archive() self.cache_prepare() # If a new transit feed was downloaded, or if the gtfs database or the # graphserver database are missing, rebuild the gtfs and gserver dbs. if ((not self.tfeed_not_retrieved) or (not os.path.exists(conf.transitdb_filename)) or (not os.path.exists(self.fname_transit_gdb))): self.gtfsdb_compile() self.graphserver_import() self.graphserver_inspect() else: log.debug('Transit feed up-to-date; skipping compile.') self.files_fixperms() if not self.cache_up_to_date: self.ccp_cache_populate() # log.info('Vacuuming the database') db = db_glue.new(use_transaction=False) db.sql("VACUUM ANALYZE;") db.close() else: log.debug('Transit cache up-to-date; skipping cache.') log.debug('gtfsdb_build_cache: complete: %s' % (misc.time_format_elapsed(time_0), ))
def really_purge_branch_really(self): time_0 = time.time() log.info('For real purging all traces of the branch!') log.warning('FIXME: This fcn. is not tested all that well.') log.debug('purge_branch_: Acquiring revision table lock...') revision.Revision.revision_lock_dance( self.qb.db, caller='make_new_branch.py') # MAYBE: 2012.08.03: This code brute-forces the removal. It's from # some copy-n-paste code [lb] has been using (i.e., from a psql # command line) so it's not all that well tested. One concern is that # there might be violations of foreign key constraints... tables = [ # 'new_item_policy', # 'node_byway', 'node_endpoint', # # FIXME: Delete from route_id where route_id... 'route', # 'attribute', 'tag', # Should be empty for leafy branches, anyway... 'post', 'thread', 'annotation', 'attribute', 'attachment', 'geofeature', 'link_value', # 'tag_preference', # 'merge_job', 'route_analysis_job', # FIXME: Delete from work_item_step where work_item_id... 'work_item', # # MAYBE: delete from item_event_read where item_id = ... # MAYBE: delete from item_event_alert # 'gtfsdb_cache_links', 'gtfsdb_cache_register', # 'group_revision', 'group_membership', 'group_', # 'group_item_access', # FIXME: Delete from branch_conflict where branch_system_id... 'branch', 'item_versioned', # # MAYBE: # delete from revert_event where rid_reverting/rid_victim # 'revision', # # MAYBE: # delete from track_point where track_id... # 'track', # # MAYBE: tilecache tables... ] for table in tables: self.qb.db.sql("DELETE FROM %s WHERE branch_id = %d" % (table, self.qb.branch_hier[0][0],)) log.debug('purge_branch_: delete from %d tables in %s' % (len(tables), misc.time_format_elapsed(time_0),))
def load_really(self, qb_curr, keep_running=None): '''Load the transport network from the database.''' g.check_keep_running(keep_running) t0_all = time.time() usage_0 = None if conf.debug_mem_usage: usage_0 = mem_usage.get_usage_mb() log.info('load: mem_usage: beg: %.2f Mb' % (usage_0,)) # Load ratings. # NOTE: To find its usage, search graph.ratings. if self.ratings is None: g.assurt(isinstance(qb_curr.revision, revision.Historic)) self.ratings = ratings.Predictor(self) # Load all ratings or just update what's changed since we last checked. self.ratings.load(qb_curr.db, keep_running=keep_running) # Load byways, and attrs and tags. try: if self.route_daemon.cli_opts.regions: qb_curr.filters.filter_by_regions = ( self.route_daemon.cli_opts.regions) except AttributeError: pass log.debug('load: calling load_feats_and_attcs...') prog_log = Debug_Progress_Logger(log_freq=25000) if isinstance(qb_curr.revision, revision.Historic): qb_curr.item_mgr.load_feats_and_attcs(qb_curr, byway, 'search_by_network', self.add_byway_loaded, prog_log, heavyweight=False, fetch_size=0, keep_running=keep_running) else: g.assurt(isinstance(qb_curr.revision, revision.Updated)) qb_curr.item_mgr.update_feats_and_attcs(qb_curr, byway, 'search_by_network', self.add_byway_updated, prog_log, heavyweight=False, fetch_size=0, keep_running=keep_running) # Add transit. self.load_make_graph_add_transit(qb_curr) # All done loading. conf.debug_log_mem_usage(log, usage_0, 'tgraph_base.load_really') log.info( '/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\') log.info('load: complete: for %s in %s' % (qb_curr.revision.short_name(), misc.time_format_elapsed(t0_all),)) log.info( '/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\') qb_curr.definalize() qb_curr = None
def load_really(self, qb_curr, keep_running=None): '''Load the transport network from the database.''' g.check_keep_running(keep_running) t0_all = time.time() usage_0 = None if conf.debug_mem_usage: usage_0 = mem_usage.get_usage_mb() log.info('load: mem_usage: beg: %.2f Mb' % (usage_0, )) # Load ratings. # NOTE: To find its usage, search graph.ratings. if self.ratings is None: g.assurt(isinstance(qb_curr.revision, revision.Historic)) self.ratings = ratings.Predictor(self) # Load all ratings or just update what's changed since we last checked. self.ratings.load(qb_curr.db, keep_running=keep_running) # Load byways, and attrs and tags. try: if self.route_daemon.cli_opts.regions: qb_curr.filters.filter_by_regions = ( self.route_daemon.cli_opts.regions) except AttributeError: pass log.debug('load: calling load_feats_and_attcs...') prog_log = Debug_Progress_Logger(log_freq=25000) if isinstance(qb_curr.revision, revision.Historic): qb_curr.item_mgr.load_feats_and_attcs(qb_curr, byway, 'search_by_network', self.add_byway_loaded, prog_log, heavyweight=False, fetch_size=0, keep_running=keep_running) else: g.assurt(isinstance(qb_curr.revision, revision.Updated)) qb_curr.item_mgr.update_feats_and_attcs(qb_curr, byway, 'search_by_network', self.add_byway_updated, prog_log, heavyweight=False, fetch_size=0, keep_running=keep_running) # Add transit. self.load_make_graph_add_transit(qb_curr) # All done loading. conf.debug_log_mem_usage(log, usage_0, 'tgraph_base.load_really') log.info( '/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\' ) log.info('load: complete: for %s in %s' % ( qb_curr.revision.short_name(), misc.time_format_elapsed(t0_all), )) log.info( '/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\' ) qb_curr.definalize() qb_curr = None
def cache_links(self, db_transit): log.debug('cache_links: caching transit node/cyclopath node pairs') time_0 = time.time() # NOTE: We load all byways into the graph, including those tagged # 'prohibited' and 'closed', but we only ever link those not tagged as # such with transit stops. # MAGIC HACK ALERT #revision = revision.Historic(self.revision_id) #model = ratings.Predictor(self.qb.branch_hier[0][0], # #self.revision_id) # revision) #model.load(self.qb) # route_daemon = None ccp_graph = planner.routed_p2.tgraph.Trans_Graph(route_daemon, self.qb.username, self.qb.branch_hier, self.qb.revision) ccp_graph.load(self.qb.db) # tagprefs = {} tagprefs['prohibited'] = ratings.t_avoid tagprefs['closed'] = ratings.t_avoid # rating_func = ccp_graph.ratings.rating_func(self.qb.username, tagprefs, ccp_graph) # MAGIC NUMBER: Min rating. rating_min = 0.5 # The transit data is lat,lon, as opposed to SRID-encoded x,y. is_latlon = True n_stops = db_transit.count_stops() # NOTE: 2011.06.26: This loops takes a while. For me [lb], 55 secs. # NOTE: 2011.08.08: Find nearest node using GRAC SQL is time consuming! # On the order of minutes and minutes... # Can you cache the nearest nodes, maybe? At least for # anon user in public branch and current revision? # byway.transit_stops ?? list of transit IDs? for i, (stop_id, stop_name, stop_lat, stop_lon) in enumerate( db_transit.stops()): # Every once in a while, print a debug message # FIXME: Replace with prog logger if i and ((i % 25) == 0): log.debug('link_graphs: progress: on stop # %d of %d...' % (i, n_stops,)) #log.debug(' >> id: %s / name: %s / lat: %s / lon: %s' # % (stop_id, stop_name, stop_lat, stop_lon,)) #log.debug(' >> id: %s / name: %s / lat: %s / lon: %s' # % (type(stop_id), type(stop_name), type(stop_lat), # type(stop_lon),)) # NOTE: The (x,y) point is lon first, then lat. stop_xy = (stop_lon, stop_lat,) # 2012.03.05: This is taking wayyy tooooo long. nearest_byway = route.One.byway_closest_xy( self.qb, stop_name, stop_xy, rating_func, rating_min, is_latlon) nearest_node = nearest_byway.nearest_node_id() # FIXME: What if the node is on a one-way? What if the node is tagged with # something that the user marks 'avoid'? In both cases, transit stop might be # Unreachable. # 2012.01.09: Get m-value and send to client. if nearest_node is not None: node_id = str(nearest_node) # NOTE: If we don't cast to string, it's unicode, and db.insert # doesn't quote it. stop_id = 'sta-%s' % (str(stop_id),) if node_id != '': self.qb.db.insert( 'gtfsdb_cache_links', { 'username': self.qb.username, 'branch_id': self.qb.branch_hier[0][0], 'revision_id': self.revision_id, 'gtfs_caldate': self.tfeed_zipdate, # FIXME: This is a string? See above... 'node_stack_id': node_id, ## Bug nnnn: # 'byway_m_value': 'transit_stop_id': stop_id, }, {}) else: log.warning( 'link_graphs: no node name?!: node_id: %s / stop_id: %s' % (node_id, stop_id,)) else: log.warning( 'link_graphs: no nearest node?!: node_id: %s / stop_id: %s' % (node_id, stop_id,)) log.warning(' >> lat, lon: (%s, %s)' % (stop_lat, stop_lon)) nlinks = self.cache_count_sql('gtfsdb_cache_links') g.assurt(nlinks == n_stops) # 2011.08.08: 1570.29 secs (26 minutes) # 2011.08.09: 1270.86 secs (21 minutes) log.debug('link_graphs: linked: %d transit stops in %s' % (n_stops, misc.time_format_elapsed(time_0),))
def sql_apply_query_filters(self, qb, where_clause="", conjunction=""): g.assurt((not where_clause) and (not conjunction)) g.assurt((not conjunction) or (conjunction == "AND")) # We build custom SQL in sql_context_user (and don't use # item_user_access's search_get_sql(), so we cannot support # filters that edit qb.sql_clauses. We only support filters # that only modify the where clause. g.assurt(not ( qb.filters.filter_by_creator_include or qb.filters. filter_by_creator_exclude or qb.filters.stack_id_table_ref or qb.filters.use_stealth_secret or qb.filters.include_item_stack # There are lots more filters we don't support... # but there's no reason to list them all. )) if qb.filters.filter_by_username: # %s (LOWER(rev.username) ~ LOWER(%s)) # %s (rev.username ~* %s) # [lb] is curious why we use regex search when we want exact match. # MAYBE: Full Text Search @@ is faster than exact match = which is # faster than LOWER() = LOWER() which is faster than regex ~. # NOTE: Do not need to lower username (already lower) filter_by_username = qb.filters.filter_by_username.lower() where_clause += (""" %s (rev.username = %s) """ % ( conjunction, qb.db.quoted(filter_by_username), )) conjunction = "AND" # Find items the user is watching and only those items' revisions. if (qb.filters.filter_by_watch_item or qb.filters.only_stack_ids): time_0 = time.time() # Clone the qb but keep the db: we want to make a temp table. watchers_qb = qb.clone(skip_clauses=True, skip_filtport=True) qfs = Query_Filters(req=None) qfs.filter_by_watch_item = qb.filters.filter_by_watch_item qfs.only_stack_ids = qb.filters.only_stack_ids watchers_qb.filters = qfs watchers_qb.finalize_query() watchers_qb.sql_clauses = ( item_user_watching.Many.sql_clauses_cols_all.clone()) watchers_many = item_user_watching.Many() inner_sql = watchers_many.search_get_sql(watchers_qb) # No: watchers_qb.db.close() self.revision_id_table_ref = 'temp__filter_rids' # MAYBE: The valid_start_rid and valid_until_rid are the # group_item_access record's. These usually match # the item_versioned rids, right? Whenever an item # is edited, we update the access records... [lb] thinks. # Otherwise, what we'd really want here is item_versioned's # valid_start_rid and valid_until_rid. thurrito_sql = (""" SELECT valid_start_rid, valid_until_rid INTO TEMPORARY TABLE temp__both_rids FROM (%s) AS foo_grv_1 """ % (inner_sql, )) rows = qb.db.sql(thurrito_sql) rid_union_sql = (""" SELECT DISTINCT (filter_rid) INTO TEMPORARY TABLE %s FROM ((SELECT valid_start_rid AS filter_rid FROM temp__both_rids) UNION (SELECT valid_until_rid AS filter_rid FROM temp__both_rids)) AS foo_grv_2 """ % (self.revision_id_table_ref, )) rows = qb.db.sql(rid_union_sql) log.debug('sql_apply_qry_fltrs: %s in %s' % ( 'filter_by_watch_item or only_stack_ids', misc.time_format_elapsed(time_0), )) if qb.filters.filter_by_watch_feat: # This filter only makes sense for attachment item types. log.warning('group_revision does not support filter_by_watch_feat') raise GWIS_Nothing_Found() # FIXME: Show only changes to watched items... #qfs.filter_by_watch_geom = wr; #qfs.filter_by_watch_item = 0 or 1 or 2 ... # 2013.03.29: To support CcpV1 feature, see search_for_geom and get a bbox. # Maybe: in CcpV2, you can watch non-regions, so we could get # a list of stack_ids the user is watching and return revisions # containing changes to those stack IDs... return grac_record.Many.sql_apply_query_filters( self, qb, where_clause, conjunction)
def run_script(self): g.assurt(self.next_script) if self.next_schema is None: self.next_schema = 'public' for line in open(self.next_script): if '@once-per-instance' in line: self.next_schema = self.schemas[0] break # Confirm if ((self.next_schema != 'public') and (self.next_schema not in conf.server_instances)): log.info("Not running '%s' on missing schema: '%s'" % (self.next_script, self.next_schema,)) self.db.transaction_rollback() elif self.cli_opts.do_yesall: log.info("Auto-running %s on '%s' on '%s'\n" % (self.next_script, self.next_schema, db_glue.DB.db_name,)) self.run_script_() elif not yn_get("Run %s on '%s' on '%s' now?" % (self.next_script, self.next_schema, db_glue.DB.db_name,)): self.db.transaction_rollback() raise Exception('Aborting.') else: self.run_script_() # Record that we ran it if not self.cli_opts.do_revert: self.db.sql( """ INSERT INTO upgrade_event (script_name, schema) VALUES (%s, %s) """, (self.next_script, self.next_schema)) log.info("Recorded successful run of %s on '%s' schema." % (self.next_script, self.next_schema,)) else: self.db.sql( """ DELETE FROM upgrade_event WHERE (script_name = %s AND schema = %s) """, (self.orig_script, self.next_schema,)) log.info("Recorded successful revert of %s on '%s' schema." % (self.orig_script, self.next_schema,)) # BUG nnnn: Fixed: Record successful runs of each schema script. # Make sure we record a successful run of each schema script. # This commits and recycles the cursor. We don't have to worry # about use_transaction because that's set on the connection. self.db.transaction_commit() # Always vacuum and analyze the database after an update. if (((self.cli_opts.do_dovacu) or (os.environ.get("TERM") != 'xterm')) and (not self.cli_opts.do_novacu) and ((self.next_schema == self.schemas[-1]) or (self.next_schema == 'public'))): log.info('Vacuuming...') vacuum_time_0 = time.time() self.pg_db.sql("VACUUM ANALYZE;") log.info('Vacuumed database in %s' % (misc.time_format_elapsed(vacuum_time_0),)) self.recently_vacuumed = True
def run_script_(self): # Record the time for each script. instance_time_0 = time.time() db_user = conf.db_user for line in open(self.next_script): if '@run-as-superuser' in line: db_user = conf.db_owner break # Run the script the_cmd = ('sed s/@@@instance@@@/%s/g %s | psql -U %s -d %s' % (self.next_schema, self.next_script, db_user, db_glue.DB.db_name,)) # If the SQL fails but yesall is enabled, we'll keep running scripts, # which leaves the upgrade in an unknown state. For the V1->V2 upgrade # scripts, this means you have to restart a cron job that takes 12 # hours to run. So how do you detect errors? The SQL outputs ERROR: # and WARNING: when things are awry. Errors are always show-stoppers; # Warnings not necessarily so. Note that os.system returns 0 if the cmd # returns 0, otherwise something nonzero and undefined. At least that's # been my [lb's] observation testing a python script. As for running the # sed command above, the exit_status is always 0. Fortunately, popen4 # lets us read the output, so we can grep for problems and say no to # yesall. The os.system method: # exit_status = os.system(the_cmd) # log.info('exit_status: %s' % (exit_status,)) # The popen4 method: # (sin, sout_err) = popen2.popen4(the_cmd) # Which I forgot is an Oops: # DeprecationWarning: The popen2 module is deprecated. # Use the subprocess module. # So use the subprocess module: log.debug('run_script: Popen: the_cmd: %s' % (the_cmd.replace('\n', ' || '),)) p = subprocess.Popen([the_cmd], shell=True, # bufsize=bufsize, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True) (sin, sout_err) = (p.stdin, p.stdout) error_detected = False while True: line = sout_err.readline() if not line: break else: line = line.strip() # Print to stdout. Don't use log, which prepends output #print line # [lb] is have interleaving issues, so trying log. # Also, logcheck is spewing all of this gunk. Grr. # But, I do like that the SQL lines are now timestamped. log.debug(line) if not error_detected: # See if the line matches an ERROR: or WARNING:. for regex_in in fatal_error_re_in: #log.verbose('regex_in: %s' % (regex_in,)) if regex_in.search(line): error_detected = True # See if the WARNING: isn't fatal. for regex_ex in fatal_error_re_ex: #log.verbose('regex_ex: %s' % (regex_ex,)) if regex_ex.search(line): # Ignore this false-positive error_detected = False break # If a fatal ERROR: or WARNING:, bail. if error_detected: log.error(error_text) break sin.close() sout_err.close() p.wait() # Display the time it took to run the script log.info('Ran script %s on instance %s in %s' % (self.next_script, self.next_schema, misc.time_format_elapsed(instance_time_0),)) # Ask if all is well. # FIXME: If the grepping for errors above works, maybe get # rid of yesall and asking yn_get? if ((error_detected and not self.cli_opts.do_noerrs) or (not self.cli_opts.do_yesall and not yn_get( 'Continue (i.e., was everything OK above)?'))): log.error('ERROR: Script failed!') log.error('(Check last script output for ERROR or WARNING.)') log.error(big_warning) raise Exception('Aborting: Script failed!')
def load_transit(self, qb, db_transit): # load the transit info agency_id = None reporter = None # sys.stdout # FIXME: Can we pass in log.debug somehow? # I think we just need to support write() maxtrips = None g.assurt(self.gserver is not None) # C.f. graphserver/pygs/build/lib/graphserver/compiler.py # ::graph_load_gtfsdb log.debug('load_transit: loading compiler') compiler = GTFSGraphCompiler(db_transit, conf.transitdb_agency_name, agency_id, reporter) nedges = 0 if self.cache_reg is not None: from_and_where = self.links_cache_from_and_where( 'gtfsdb_cache_register', qb) nedges = qb.db.sql("SELECT transit_nedges AS nedges %s" % (from_and_where))[0]['nedges'] if nedges > 0: nedges_str = str(nedges) else: nedges_str = 'unknown' time_0 = time.time() log.debug('load_transit: loading vertices and edges') #for (fromv_label, tov_label, edge) in compiler.gtfsdb_to_edges(maxtrips): for i, (fromv_label, tov_label, edge) in enumerate(compiler.gtfsdb_to_edges(maxtrips)): if (i % 25000) == 0: log.debug('load_transit: progress: on edge # %d of %s...' % ( i, nedges_str, )) #log.debug(' >> fromv_label: %s / tov_label: %s / edge: %s' # % (fromv_label, tov_label, edge,)) # NOTE: fromv_label is unicode, tov_label str, and edge # graphserver.core.TripBoard/TripAlight/Etc. # FIXME: Why is fromv_label unicode? fromv_label = str(fromv_label) tov_label = str(tov_label) g.assurt(isinstance(edge, EdgePayload)) self.gserver.add_vertex(fromv_label) self.gserver.add_vertex(tov_label) self.gserver.add_edge(fromv_label, tov_label, edge) # 2011.08.08: 49 seconds # 2011.08.09: 36 seconds with less log.debug log.debug('load_transit: loaded %d edges in %s' % ( i + 1, misc.time_format_elapsed(time_0), )) # 2013.12.05: This is firing. It looks like the count of transit_nedges # from gtfsdb_cache_register is less than what we've loaded... #g.assurt((nedges == 0) or (nedges == (i + 1))) if not ((nedges == 0) or (nedges == (i + 1))): log.error('load_transit: nedges: %d / i: %d' % ( nedges, i, ))
def go_main(self): log.debug('Starting') time_0 = time.time() if (self.cli_opts.cache_table_drop or self.cli_opts.cache_table_create): if self.cli_opts.cache_table_drop: self.gtfsdb_cache_delete() if self.cli_opts.cache_table_create: self.gtfsdb_cache_create() log.info('Committing transaction [go_main]') self.qb.db.transaction_commit() else: os.chdir(self.dname_gtfsdb) self.tools_check() # Download the transit archive. self.gtfs_download() # Get the date. There are three ways we can get it (well, there are # up to three different dates we can get). We use the date to compare # against saved archives, to know if we really need to update our # cache and restart the route planner (i.e., if the archive hasn't # changed, we can do nothing). self.gtfs_get_feed_dates() # If we really downloaded the archive, keep a copy of it. if not self.tfeed_not_retrieved: self.gtfs_archive() self.cache_prepare() # If a new transit feed was downloaded, or if the gtfs database or the # graphserver database are missing, rebuild the gtfs and gserver dbs. if ((not self.tfeed_not_retrieved) or (not os.path.exists(conf.transitdb_filename)) or (not os.path.exists(self.fname_transit_gdb))): self.gtfsdb_compile() self.graphserver_import() self.graphserver_inspect() else: log.debug('Transit feed up-to-date; skipping compile.') self.files_fixperms() if not self.cache_up_to_date: self.ccp_cache_populate() # log.info('Vacuuming the database') db = db_glue.new(use_transaction=False) db.sql("VACUUM ANALYZE;") db.close() else: log.debug('Transit cache up-to-date; skipping cache.') log.debug('gtfsdb_build_cache: complete: %s' % (misc.time_format_elapsed(time_0),))