Exemple #1
0
   def load(self, db, keep_running=None):

     # Load ratings from the database.

      t0_all = time.time()

      usage_0 = None
      if conf.debug_mem_usage:
         usage_0 = mem_usage.get_usage_mb()

      # Load all users' ratings and the generic ratings.

      g.assurt(self.graph.branch_hier)
      if len(self.graph.branch_hier) > 1:
         branch_ids = ','.join([str(x[0]) for x in self.graph.branch_hier])
         where_branch = "branch_id IN (%s)" % (branch_ids,)
      else:
         where_branch = "branch_id = %d" % (self.graph.branch_hier[0][0],)

      if not self.honor_historic:
         rats_sql = self.sql_current(db, where_branch)
      else:
         rats_sql = self.sql_historic(db, where_branch)

      if rats_sql:

         # Get and process all ratings

         log.info('load: reading byway_rating table...')
         time_0 = time.time()

         log.verbose('load: enabling dont_fetchall')
         db.dont_fetchall = True

         rats = db.sql(rats_sql)

         log.verbose('load: disabling dont_fetchall')
         db.dont_fetchall = False

         log.info('load: read %d ratings %s in %s'
            % (db.curs.rowcount,
               '[Current]' if not self.last_last_modified
                  else ('[since %s]' % self.last_last_modified),
               misc.time_format_elapsed(time_0),))

         g.check_keep_running(keep_running)

         self.cache_rats(db)

         db.curs_recycle()

         # Uncomment for remote debugging...
         #g.assurt(False)

         conf.debug_log_mem_usage(log, usage_0, 'ratings.load')

         log.info('load: done loading ratings in %s'
                  % (misc.time_format_elapsed(t0_all),))

      self.last_last_modified = None
Exemple #2
0
   def cache_rats(self, db):

      #
      log.info('load: caching byway_rating table...')
      time_0 = time.time()

      generator = db.get_row_iter()
      for rat in generator:
         if (rat['value'] >= 0):
            sid = self.ratings.setdefault(rat['byway_stack_id'], dict())
            #sid['username'] = rat['value']
            uname = sid.setdefault(rat['username'], dict())
            uname[rat['branch_id']] = rat['value']
         else:
            try:
               #self.ratings[rat['byway_stack_id']].pop(rat['username'], None)
               #if not self.ratings[rat['byway_stack_id']]:
               #   self.ratings.pop(rat['byway_stack_id'], None)
               self.ratings[rat['byway_stack_id']] \
                           [rat['username']]       \
                           .pop(rat['branch_id'], None)
               if not self.ratings[rat['byway_stack_id']][rat['username']]:
                  self.ratings[rat['byway_stack_id']].pop(rat['username'],
                                                          None)
               if not self.ratings[rat['byway_stack_id']]:
                  self.ratings.pop(rat['byway_stack_id'], None)
            except KeyError:
               pass
      generator.close()

      log.info('load: cached ratings in %s'
               % (misc.time_format_elapsed(time_0),))
   def byways_suss_out_facilities(self):

      log.info('byways_suss_out_facilities: ready, set, suss!')

      time_0 = time.time()

      prog_log = Debug_Progress_Logger(copy_this=debug_prog_log)
      # 2013.05.12: Weird. At first, you'll see 1250 byways being processed
      # each second, but later in the processing, after 100,000 byways, you'll
      # see 250 byways being processed every one or two seconds.
      #prog_log.log_freq = 250
      #prog_log.log_freq = 2500
      prog_log.log_freq = 1000
      prog_log.loop_max = None

      feat_class = byway
      feat_search_fcn = 'search_for_items' # E.g. byway.Many().search_for_items
      processing_fcn = self.feat_suss_out_facil
      self.qb.item_mgr.load_feats_and_attcs(
            self.qb, feat_class, feat_search_fcn,
            processing_fcn, prog_log, heavyweight=False)

      log.info('... processed %d features in %s'
               % (prog_log.progress,
                  misc.time_format_elapsed(time_0),))
Exemple #4
0
    def go_main(self):

        log.debug('Starting')

        time_0 = time.time()

        log.debug('Starting db transaction...')

        #db = db_glue.new()
        self.qb.db.transaction_begin_rw()

        if not self.cli_opts.skip_date_check:
            self.last_logged = self.get_last_logged()
        else:
            self.last_logged = None

        #prog_log = Debug_Progress_Logger(copy_this=debug_prog_log)
        #prog_log.loop_max = len(rev_rows)
        #prog_log.log_freq = 25

        self.parse_logs()

        #prog_log.loops_fin()

        #      if not debug_skip_commit:
        #         log.info('Committing transaction...')
        #         self.qb.db.transaction_commit()
        #      else:
        #         db.transaction_rollback()
        #      self.qb.db.close()
        self.query_builder_destroy(do_commit=(not debug_skip_commit))

        log.debug('gtfsdb_build_cache: complete: %s' %
                  (misc.time_format_elapsed(time_0), ))
    def load_and_export_items(self, feat_class):

        log.info('load_and_export_items: working on type: %s' %
                 (Item_Type.id_to_str(feat_class.One.item_type_id), ))

        time_0 = time.time()

        prog_log = self.progr_get(log_freq=100)

        self.qb_src.filters.rating_special = True

        self.qb_src.filters.include_item_stack = True

        log.debug(
            'load_and_export_items: filter_by_regions: %s' %
            (self.qb_src.filters.filter_by_regions), )

        # The merge_job setup the item_mgr, which we use now to load the byways
        # and their attrs and tags.
        feat_search_fcn = 'search_for_items'  # E.g. byway.Many().search_for_items

        self.qb_src.item_mgr.load_feats_and_attcs(
            self.qb_src,
            feat_class,
            feat_search_fcn,
            processing_fcn=self.feat_export,
            prog_log=prog_log,
            heavyweight=False)

        log.info('... exported %d features in %s' % (
            prog_log.progress,
            misc.time_format_elapsed(time_0),
        ))
Exemple #6
0
   def load_and_export_items(self, feat_class):

      log.info('load_and_export_items: working on type: %s'
               % (Item_Type.id_to_str(feat_class.One.item_type_id),))

      time_0 = time.time()

      prog_log = self.progr_get(log_freq=100)

      self.qb_src.filters.rating_special = True

      self.qb_src.filters.include_item_stack = True

      log.debug('load_and_export_items: filter_by_regions: %s'
                % (self.qb_src.filters.filter_by_regions),)

      # The merge_job setup the item_mgr, which we use now to load the byways
      # and their attrs and tags.
      feat_search_fcn = 'search_for_items' # E.g. byway.Many().search_for_items
      
      self.qb_src.item_mgr.load_feats_and_attcs(
            self.qb_src, feat_class, feat_search_fcn,
            processing_fcn=self.feat_export,
            prog_log=prog_log,
            heavyweight=False)

      log.info('... exported %d features in %s'
               % (prog_log.progress,
                  misc.time_format_elapsed(time_0),))
Exemple #7
0
   def go_main(self):

      log.debug('Starting')

      time_0 = time.time()

      log.debug('Starting db transaction...')

      #db = db_glue.new()
      self.qb.db.transaction_begin_rw()

      if not self.cli_opts.skip_date_check:
         self.last_logged = self.get_last_logged()
      else:
         self.last_logged = None

      #prog_log = Debug_Progress_Logger(copy_this=debug_prog_log)
      #prog_log.loop_max = len(rev_rows)
      #prog_log.log_freq = 25

      self.parse_logs()

      #prog_log.loops_fin()

#      if not debug_skip_commit:
#         log.info('Committing transaction...')
#         self.qb.db.transaction_commit()
#      else:
#         db.transaction_rollback()
#      self.qb.db.close()
      self.query_builder_destroy(do_commit=(not debug_skip_commit))

      log.debug('gtfsdb_build_cache: complete: %s'
                % (misc.time_format_elapsed(time_0),))
Exemple #8
0
    def cache_rats(self, db):

        #
        log.info('load: caching byway_rating table...')
        time_0 = time.time()

        generator = db.get_row_iter()
        for rat in generator:
            if (rat['value'] >= 0):
                sid = self.ratings.setdefault(rat['byway_stack_id'], dict())
                #sid['username'] = rat['value']
                uname = sid.setdefault(rat['username'], dict())
                uname[rat['branch_id']] = rat['value']
            else:
                try:
                    #self.ratings[rat['byway_stack_id']].pop(rat['username'], None)
                    #if not self.ratings[rat['byway_stack_id']]:
                    #   self.ratings.pop(rat['byway_stack_id'], None)
                    self.ratings[rat['byway_stack_id']] \
                                [rat['username']]       \
                                .pop(rat['branch_id'], None)
                    if not self.ratings[rat['byway_stack_id']][
                            rat['username']]:
                        self.ratings[rat['byway_stack_id']].pop(
                            rat['username'], None)
                    if not self.ratings[rat['byway_stack_id']]:
                        self.ratings.pop(rat['byway_stack_id'], None)
                except KeyError:
                    pass
        generator.close()

        log.info('load: cached ratings in %s' %
                 (misc.time_format_elapsed(time_0), ))
Exemple #9
0
 def loops_fin(self, callee=''):
    if not callee:
       callee = self.callee
    if self.info_print_speed_enable and self.info_print_speed_finish:
       self.loops_info(callee)
    log.info('%s%d loops took %s'
       % (('%s: ' % callee) if callee else '', self.progress,
          misc.time_format_elapsed(self.time_0),))
Exemple #10
0
 def loops_fin(self, callee=''):
     if not callee:
         callee = self.callee
     if self.info_print_speed_enable and self.info_print_speed_finish:
         self.loops_info(callee)
     log.info('%s%d loops took %s' % (
         ('%s: ' % callee) if callee else '',
         self.progress,
         misc.time_format_elapsed(self.time_0),
     ))
Exemple #11
0
   def load_transit(self, qb, db_transit):

      # load the transit info
      agency_id = None
      reporter = None # sys.stdout # FIXME: Can we pass in log.debug somehow?
                                   #    I think we just need to support write()
      maxtrips = None
      g.assurt(self.gserver is not None)

      # C.f. graphserver/pygs/build/lib/graphserver/compiler.py
      #         ::graph_load_gtfsdb
      log.debug('load_transit: loading compiler')
      compiler = GTFSGraphCompiler(db_transit, conf.transitdb_agency_name,
                                   agency_id, reporter)

      nedges = 0
      if self.cache_reg is not None:
         from_and_where = self.links_cache_from_and_where(
                                       'gtfsdb_cache_register', qb)
         nedges = qb.db.sql("SELECT transit_nedges AS nedges %s"
                            % (from_and_where))[0]['nedges']
      if nedges > 0:
         nedges_str = str(nedges)
      else:
         nedges_str = 'unknown'

      time_0 = time.time()
      log.debug('load_transit: loading vertices and edges')
     #for (fromv_label, tov_label, edge) in compiler.gtfsdb_to_edges(maxtrips):
      for i, (fromv_label, tov_label, edge) in enumerate(
                                          compiler.gtfsdb_to_edges(maxtrips)):
         if (i % 25000) == 0:
            log.debug('load_transit: progress: on edge # %d of %s...'
                      % (i, nedges_str,))
            #log.debug(' >> fromv_label: %s / tov_label: %s / edge: %s'
            #          % (fromv_label, tov_label, edge,))
            # NOTE: fromv_label is unicode, tov_label str, and edge
            #       graphserver.core.TripBoard/TripAlight/Etc.
            # FIXME: Why is fromv_label unicode?
         fromv_label = str(fromv_label)
         tov_label = str(tov_label)
         g.assurt(isinstance(edge, EdgePayload))
         self.gserver.add_vertex(fromv_label)
         self.gserver.add_vertex(tov_label)
         self.gserver.add_edge(fromv_label, tov_label, edge)
      # 2011.08.08: 49 seconds
      # 2011.08.09: 36 seconds with less log.debug
      log.debug('load_transit: loaded %d edges in %s'
                % (i + 1,
                   misc.time_format_elapsed(time_0),))
      # 2013.12.05: This is firing. It looks like the count of transit_nedges
      # from gtfsdb_cache_register is less than what we've loaded...
      #g.assurt((nedges == 0) or (nedges == (i + 1)))
      if not ((nedges == 0) or (nedges == (i + 1))):
         log.error('load_transit: nedges: %d / i: %d' % (nedges, i,))
Exemple #12
0
 def link_graphs(self, qb, db_transit):
    log.debug('link_graphs: linking Cyclopath and Transit networks')
    time_0 = time.time()
    # 2011.08.08: Slow method taking 1570.29 secs (26 minutes).
    # 2011.08.09: Made fast method and it took 36 seconds.
    if not self.link_graphs_fast(qb, db_transit):
       # 2011.08.08: 1570.29 secs (26 minutes)
       # NOTE: If you're here, consider running gtfsdb_build_cache.py.
       log.warning('link_graphs: using slow method!')
       self.link_graphs_slow(qb, db_transit)
    log.debug('link_graphs: complete: in %s'
              % (misc.time_format_elapsed(time_0),))
Exemple #13
0
 def link_graphs(self, qb, db_transit):
     log.debug('link_graphs: linking Cyclopath and Transit networks')
     time_0 = time.time()
     # 2011.08.08: Slow method taking 1570.29 secs (26 minutes).
     # 2011.08.09: Made fast method and it took 36 seconds.
     if not self.link_graphs_fast(qb, db_transit):
         # 2011.08.08: 1570.29 secs (26 minutes)
         # NOTE: If you're here, consider running gtfsdb_build_cache.py.
         log.warning('link_graphs: using slow method!')
         self.link_graphs_slow(qb, db_transit)
     log.debug('link_graphs: complete: in %s' %
               (misc.time_format_elapsed(time_0), ))
    def cache_edges(self, db_transit):

        # load the transit info
        agency_id = None
        reporter = None  # sys.stdout # FIXME: Can we pass in log.debug somehow?
        #    I think we just need to support write()
        maxtrips = None

        # C.f. graphserver/pygs/build/lib/graphserver/compiler.py
        #         ::graph_load_gtfsdb
        log.debug('load_transit: loading compiler')
        compiler = GTFSGraphCompiler(db_transit, conf.transitdb_agency_name,
                                     agency_id, reporter)

        time_0 = time.time()
        log.debug('load_transit: loading vertices and edges')
        #for (fromv_label, tov_label, edge) in compiler.gtfsdb_to_edges(maxtrips):
        for i, (fromv_label, tov_label,
                edge) in enumerate(compiler.gtfsdb_to_edges(maxtrips)):
            if (i % 25000) == 0:
                log.debug(
                    'load_transit: progress: on edge # %d of unknown...' %
                    (i, ))
                #log.debug(' >> fromv_label: %s / tov_label: %s / edge: %s'
                #          % (fromv_label, tov_label, edge,))
                # NOTE: fromv_label is unicode, tov_label str, and edge
                #       graphserver.core.TripBoard/TripAlight/Etc.
                # FIXME: Why is fromv_label unicode?

        self.qb.db.sql("""
         UPDATE
            gtfsdb_cache_register
         SET
            transit_nedges = %d
         WHERE
            username = %s
            AND branch_id = %d
            AND revision_id = %d
            AND gtfs_caldate = %s
         """ % (
            i + 1,
            self.qb.db.quoted(self.qb.username),
            self.qb.branch_hier[0][0],
            self.revision_id,
            self.qb.db.quoted(self.tfeed_zipdate),
        ))

        log.debug('load_transit: loaded %d edges in %s' % (
            i + 1,
            misc.time_format_elapsed(time_0),
        ))
    def gtfs_download(self):

        # E.g., wget -N --directory-prefix=$ccp/var/transit/metc/ \
        #            ftp://gisftp.metc.state.mn.us/google_transit.zip

        time_0 = time.time()

        local_dir = self.dname_gtfsdb
        remote_file = conf.transit_db_source
        g.assurt(remote_file)

        # FIXME: Instead of using wget, use internal Python functions?
        the_cmd = ('wget -N -P %s %s' % (
            local_dir,
            remote_file,
        ))

        log.debug('gtfs_download: downloading: %s' % (the_cmd, ))
        p = subprocess.Popen(
            [the_cmd],
            shell=True,
            # bufsize=bufsize,
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            close_fds=True)
        (sin, sout_err) = (p.stdin, p.stdout)

        self.tfeed_not_retrieved = False
        while not self.tfeed_not_retrieved:
            line = sout_err.readline()
            if not line:
                break
            else:
                line = line.strip()
                #log.debug(line)
                for regex in self.regex_not_retrieved:
                    if regex.search(line):
                        self.tfeed_not_retrieved = True
                        break
                if self.tfeed_not_retrieved:
                    break
        sin.close()
        sout_err.close()
        p.wait()

        log.debug('gtfs_download: %s in: %s' % (
            'downloaded' if not self.tfeed_not_retrieved else 'not retrieved',
            misc.time_format_elapsed(time_0),
        ))
   def vacuum_finally_maybe(self):

      if not self.recently_vacuumed:
         do_vacuum = False
         if self.cli_opts.do_yesall:
            do_vacuum = True
            log.info('Auto-Vacuuming...')
         else:
            do_vacuum = yn_get('Vacuum database now?')
         if do_vacuum:
            vacuum_time_0 = time.time()
            self.pg_db.sql("VACUUM ANALYZE;")
            log.info('Vacuumed database in %s'
               % (misc.time_format_elapsed(vacuum_time_0),))
            self.recently_vacuumed = True
 def feature_classes_import(self):
     # If we start load a Shapefile and don't find a Cyclopath config, and if
     # we're not conflating, we skip it.
     if self.shpf_class != 'incapacitated':
         time_0 = time.time()
         # Call the substage fcn.
         self.substage_fcn_go()
         # Print elapsed time.
         log.info('... done "%s" in %s' % (
             self.mjob.wtem.latest_step.stage_name,
             misc.time_format_elapsed(time_0),
         ))
     else:
         # We still have to call stage_initialize to bump the stage num.
         self.mjob.stage_initialize('Skipping Shapefile...')
   def cache_edges(self, db_transit):

      # load the transit info
      agency_id = None
      reporter = None # sys.stdout # FIXME: Can we pass in log.debug somehow?
                                   #    I think we just need to support write()
      maxtrips = None

      # C.f. graphserver/pygs/build/lib/graphserver/compiler.py
      #         ::graph_load_gtfsdb
      log.debug('load_transit: loading compiler')
      compiler = GTFSGraphCompiler(db_transit, conf.transitdb_agency_name,
                                   agency_id, reporter)

      time_0 = time.time()
      log.debug('load_transit: loading vertices and edges')
     #for (fromv_label, tov_label, edge) in compiler.gtfsdb_to_edges(maxtrips):
      for i, (fromv_label, tov_label, edge) in enumerate(
                                          compiler.gtfsdb_to_edges(maxtrips)):
         if (i % 25000) == 0:
            log.debug('load_transit: progress: on edge # %d of unknown...'
                      % (i,))
            #log.debug(' >> fromv_label: %s / tov_label: %s / edge: %s'
            #          % (fromv_label, tov_label, edge,))
            # NOTE: fromv_label is unicode, tov_label str, and edge
            #       graphserver.core.TripBoard/TripAlight/Etc.
            # FIXME: Why is fromv_label unicode?

      self.qb.db.sql(
         """
         UPDATE
            gtfsdb_cache_register
         SET
            transit_nedges = %d
         WHERE
            username = %s
            AND branch_id = %d
            AND revision_id = %d
            AND gtfs_caldate = %s
         """ % (i + 1,
                self.qb.db.quoted(self.qb.username),
                self.qb.branch_hier[0][0],
                self.revision_id,
                self.qb.db.quoted(self.tfeed_zipdate),))

      log.debug('load_transit: loaded %d edges in %s'
                % (i + 1, misc.time_format_elapsed(time_0),))
   def ccp_save_cache(self):

      time_0 = time.time()

      log.debug('ccp_save_cache: loading the transit database')
      db_transit = GTFSDatabase(conf.transitdb_filename)

      # NOTE: Cannot cache edges, since they are C-objects. See usages of
      #       compiler.gtfsdb_to_edges(maxtrips). We can, however, at least
      #       count the edges....
      self.cache_edges(db_transit)

      log.debug('ccp_save_cache: making the transit graph link cache')
      self.cache_links(db_transit)

      log.debug('ccp_save_cache: done: %s'
                % (misc.time_format_elapsed(time_0),))
    def ccp_save_cache(self):

        time_0 = time.time()

        log.debug('ccp_save_cache: loading the transit database')
        db_transit = GTFSDatabase(conf.transitdb_filename)

        # NOTE: Cannot cache edges, since they are C-objects. See usages of
        #       compiler.gtfsdb_to_edges(maxtrips). We can, however, at least
        #       count the edges....
        self.cache_edges(db_transit)

        log.debug('ccp_save_cache: making the transit graph link cache')
        self.cache_links(db_transit)

        log.debug('ccp_save_cache: done: %s' %
                  (misc.time_format_elapsed(time_0), ))
   def gtfs_download(self):

      # E.g., wget -N --directory-prefix=$ccp/var/transit/metc/ \
      #            ftp://gisftp.metc.state.mn.us/google_transit.zip

      time_0 = time.time()

      local_dir = self.dname_gtfsdb
      remote_file = conf.transit_db_source
      g.assurt(remote_file)

      # FIXME: Instead of using wget, use internal Python functions?
      the_cmd = ('wget -N -P %s %s' % (local_dir, remote_file,))

      log.debug('gtfs_download: downloading: %s' % (the_cmd,))
      p = subprocess.Popen([the_cmd],
                           shell=True,
                           # bufsize=bufsize,
                           stdin=subprocess.PIPE,
                           stdout=subprocess.PIPE,
                           stderr=subprocess.STDOUT,
                           close_fds=True)
      (sin, sout_err) = (p.stdin, p.stdout)

      self.tfeed_not_retrieved = False
      while not self.tfeed_not_retrieved:
         line = sout_err.readline()
         if not line:
            break
         else:
            line = line.strip()
            #log.debug(line)
            for regex in self.regex_not_retrieved:
               if regex.search(line):
                  self.tfeed_not_retrieved = True
                  break
            if self.tfeed_not_retrieved:
               break
      sin.close()
      sout_err.close()
      p.wait()

      log.debug('gtfs_download: %s in: %s'
          % ('downloaded' if not self.tfeed_not_retrieved else 'not retrieved',
             misc.time_format_elapsed(time_0),))
Exemple #22
0
 def go(self):
    time_0 = time.time()
    parser = Log_Jammin_ArgParser()
    self.cli_opts = parser.get_opts()
    if not parser.handled:
       try:
          self.go_go()
       except AssertionError:
          if self.cli_opts.interactive:
             # Dump to a python shell and make the user investigate.
             log.warning('Fatal error. Please debug!')
             # FIXME: I don't think the traceback prints useful info.
             log.warning(traceback.format_exc())
             import pdb; pdb.set_trace()
          raise
       finally:
          log.info('Script complete! Ran in %s'
                   % (misc.time_format_elapsed(time_0),))
       self.print_stats()
Exemple #23
0
    def load_make_graph_add_transit(self, qb):
        # Not calling base class fcn.

        # FIXME: What happens here on update? We reload all, don't we?
        # FIXME: For p2, only do this on load, not on update.
        # BUG nnnn: For p2, start new instance of route finder and then
        #           just change routed_ports to use that one, then kill
        #           the existing one.

        time_0 = time.time()
        usage_0 = None
        if conf.debug_mem_usage:
            usage_0 = mem_usage.get_usage_mb()
        log.debug('load: adding transit...')

        loaded = False

        # Load the transit network, maybe (if we have data for it).
        if conf.transitdb_filename:
            self.cache_reg = self.links_get_cache_reg(qb)
            log.debug('load: loading the transit database')
            db_transit = GTFSDatabase(conf.transitdb_filename)
            log.debug('load: making the transit graph')
            self.load_transit(qb, db_transit)
            # Link the two graphs
            log.debug('load: linking the two graphs')
            self.link_graphs(qb, db_transit)
            loaded = True
        # else, using Graphserver, but no public transit data to load.

        if loaded:
            log.info('load: added transit: in %s' %
                     (misc.time_format_elapsed(time_0), ))
        else:
            # MAYBE: Let devs test without loading transit.
            raise GWIS_Error(
                'Unable to load route finder: no transit info found.')

        conf.debug_log_mem_usage(log, usage_0, 'tgraph.load / transit')

        return loaded
Exemple #24
0
    def search_calculate_reactions(self, qb):

        # BUG_FALL_2013: Delete this fcn.

        # See polarity_sql = Many.sql_polarity in thread.py: [lb] thinks this
        # fcn. can be deleted. We calculate the likes and dislikes of a thread,
        # but it doesn't make sense to count it for just one post... especially
        # since a post with polarity has no comment, so it doesn't make sense
        # that we'd hydrate such a post. And posts with comments have no polarity
        # so the likes and dislikes will be zero...
        g.assurt(False)  # FIXME: Delete this fcn. and the commented-out code
        #        above that calls it.

        g.assurt(self.owning_thread is not None)

        if (self.owning_thread.thread_type_id == Thread_Type.reaction):

            # SELECT polarity, body FROM post WHERE body IS NULL;
            # ==> polarity is 1 or -1, body is never set.
            # SELECT polarity, body FROM post WHERE body IS NOT NULL;
            # ==> polarityis 0 and body is set.

            # MAYBE: Does this sql take a while?
            time_0 = time.time()

            rsql = thread.Many.sql_polarity(qb, self.owning_thread.stack_id)

            rres = qb.db.sql(rsql)

            self.reac_data = etree.Element('reac_data')
            misc.xa_set(self.reac_data, 'likes', rres[0]['likes'])
            misc.xa_set(self.reac_data, 'dislikes', rres[0]['dislikes'])
            misc.xa_set(self.reac_data, 'comments', rres[0]['comments'])

            log.debug(
                'srch_calc_reacts: likes: %d / disls: %d / cmmnts: %d / %s' % (
                    rres[0]['likes'],
                    rres[0]['dislikes'],
                    rres[0]['comments'],
                    misc.time_format_elapsed(time_0),
                ))
Exemple #25
0
   def load_make_graph_add_transit(self, qb):
      # Not calling base class fcn.

# FIXME: What happens here on update? We reload all, don't we?
# FIXME: For p2, only do this on load, not on update.
# BUG nnnn: For p2, start new instance of route finder and then
#           just change routed_ports to use that one, then kill
#           the existing one.

      time_0 = time.time()
      usage_0 = None
      if conf.debug_mem_usage:
         usage_0 = mem_usage.get_usage_mb()
      log.debug('load: adding transit...')

      loaded = False

      # Load the transit network, maybe (if we have data for it).
      if conf.transitdb_filename:
         self.cache_reg = self.links_get_cache_reg(qb)
         log.debug('load: loading the transit database')
         db_transit = GTFSDatabase(conf.transitdb_filename)
         log.debug('load: making the transit graph')
         self.load_transit(qb, db_transit)
         # Link the two graphs
         log.debug('load: linking the two graphs')
         self.link_graphs(qb, db_transit)
         loaded = True
      # else, using Graphserver, but no public transit data to load.

      if loaded:
         log.info('load: added transit: in %s'
                  % (misc.time_format_elapsed(time_0),))
      else:
         # MAYBE: Let devs test without loading transit.
         raise GWIS_Error(
            'Unable to load route finder: no transit info found.')

      conf.debug_log_mem_usage(log, usage_0, 'tgraph.load / transit')

      return loaded
Exemple #26
0
   def db_vacuum(self, full_vacuum=False):

      time_0 = time.time()

      log.info('Vacuuming...')

      if not full_vacuum:
         db = db_glue.new(use_transaction=False)
         db.sql("VACUUM ANALYZE minnesota.geofeature (geometry);")
         db.close()
      else:
         # Vacuum and analyze.
         # EXPLAIN: ANALYZE vs. VACUUM ANALYZE vs. VACUUM FULL vs. CLUSTER.
         #          See also: vacuum analyze verbose.
         # NOTE: Should be database owner, lest some tables go unvacced.
         pg_db = db_glue.new(conf.db_owner, use_transaction=False)
         pg_db.sql("VACUUM ANALYZE;")
         pg_db.close()

      log.info('... Vacuum took %s'
               % (misc.time_format_elapsed(time_0),))
Exemple #27
0
   def go(self):
      '''Parse the command line arguments. If the command line parser didn't
         handle a --help or --version command, call the command processor.'''

      time_0 = time.time()

      # Read the CLI args
      self.cli_args = self.argparser()
      self.cli_opts = self.cli_args.get_opts()

      if not self.cli_args.handled:

         log.info('Welcome to the %s!'
                  % (self.cli_args.script_name,))

         # Prepare the query builder object.
         if not self.skip_query_builder:
            self.query_builder_prepare()

         # Create the Ctrl-C event if we're the master script.
         if self.cli_opts.instance_master:
            # A master scripts waits to commit the revision until all workers
            # complete. Currently, a human operator will then send the master
            # script a Ctrl-C, which triggers the event, and then the master
            # script will cleanup (and probably commit to the database and
            # release the revision table lock).
            Ccp_Script_Base.master_event = threading.Event()
            signal.signal(signal.SIGINT, Ccp_Script_Base.ctrl_c_handler)

         # Call the derived class's go function.
         self.go_main()

         # FIXME: Where is self.cli_args.close_query() ??

      log.info('Script completed in %s'
               % (misc.time_format_elapsed(time_0),))

      # If we run as a script, be sure to return happy exit code.
      return 0
Exemple #28
0
   def search_calculate_reactions(self, qb):

# BUG_FALL_2013: Delete this fcn.

      # See polarity_sql = Many.sql_polarity in thread.py: [lb] thinks this
      # fcn. can be deleted. We calculate the likes and dislikes of a thread,
      # but it doesn't make sense to count it for just one post... especially
      # since a post with polarity has no comment, so it doesn't make sense
      # that we'd hydrate such a post. And posts with comments have no polarity
      # so the likes and dislikes will be zero...
      g.assurt(False) # FIXME: Delete this fcn. and the commented-out code
                      #        above that calls it.

      g.assurt(self.owning_thread is not None)

      if (self.owning_thread.thread_type_id == Thread_Type.reaction):

         # SELECT polarity, body FROM post WHERE body IS NULL;
         # ==> polarity is 1 or -1, body is never set.
         # SELECT polarity, body FROM post WHERE body IS NOT NULL;
         # ==> polarityis 0 and body is set.

         # MAYBE: Does this sql take a while?
         time_0 = time.time()

         rsql = thread.Many.sql_polarity(qb, self.owning_thread.stack_id)

         rres = qb.db.sql(rsql)

         self.reac_data = etree.Element('reac_data')
         misc.xa_set(self.reac_data, 'likes', rres[0]['likes'])
         misc.xa_set(self.reac_data, 'dislikes', rres[0]['dislikes'])
         misc.xa_set(self.reac_data, 'comments', rres[0]['comments'])

         log.debug(
            'srch_calc_reacts: likes: %d / disls: %d / cmmnts: %d / %s'
            % (rres[0]['likes'], rres[0]['dislikes'], rres[0]['comments'],
               misc.time_format_elapsed(time_0),))
Exemple #29
0
    def go(self):
        '''Parse the command line arguments. If the command line parser didn't
         handle a --help or --version command, call the command processor.'''

        time_0 = time.time()

        # Read the CLI args
        self.cli_args = self.argparser()
        self.cli_opts = self.cli_args.get_opts()

        if not self.cli_args.handled:

            log.info('Welcome to the %s!' % (self.cli_args.script_name, ))

            # Prepare the query builder object.
            if not self.skip_query_builder:
                self.query_builder_prepare()

            # Create the Ctrl-C event if we're the master script.
            if self.cli_opts.instance_master:
                # A master scripts waits to commit the revision until all workers
                # complete. Currently, a human operator will then send the master
                # script a Ctrl-C, which triggers the event, and then the master
                # script will cleanup (and probably commit to the database and
                # release the revision table lock).
                Ccp_Script_Base.master_event = threading.Event()
                signal.signal(signal.SIGINT, Ccp_Script_Base.ctrl_c_handler)

            # Call the derived class's go function.
            self.go_main()

            # FIXME: Where is self.cli_args.close_query() ??

        log.info('Script completed in %s' %
                 (misc.time_format_elapsed(time_0), ))

        # If we run as a script, be sure to return happy exit code.
        return 0
Exemple #30
0
   def sql_apply_query_filters(self, qb, where_clause="", conjunction=""):

      g.assurt((not where_clause) and (not conjunction))
      g.assurt((not conjunction) or (conjunction == "AND"))

      # We build custom SQL in sql_context_user (and don't use
      # item_user_access's search_get_sql(), so we cannot support
      # filters that edit qb.sql_clauses. We only support filters
      # that only modify the where clause.
      g.assurt(not (   qb.filters.filter_by_creator_include
                    or qb.filters.filter_by_creator_exclude
                    or qb.filters.stack_id_table_ref
                    or qb.filters.use_stealth_secret
                    or qb.filters.include_item_stack
                    # There are lots more filters we don't support...
                    # but there's no reason to list them all.
                    ))

      if qb.filters.filter_by_username:
         # %s (LOWER(rev.username) ~ LOWER(%s))
         # %s (rev.username ~* %s)
         # [lb] is curious why we use regex search when we want exact match.
         # MAYBE: Full Text Search @@ is faster than exact match = which is
         #        faster than LOWER() = LOWER() which is faster than regex ~.
         # NOTE: Do not need to lower username (already lower)
         filter_by_username = qb.filters.filter_by_username.lower()
         where_clause += (
            """
            %s (rev.username = %s)
            """ % (conjunction,
                   qb.db.quoted(filter_by_username),))
         conjunction = "AND"

      # Find items the user is watching and only those items' revisions.
      if (qb.filters.filter_by_watch_item
          or qb.filters.only_stack_ids):
         time_0 = time.time()
         # Clone the qb but keep the db: we want to make a temp table.
         watchers_qb = qb.clone(skip_clauses=True, skip_filtport=True)
         qfs = Query_Filters(req=None)
         qfs.filter_by_watch_item = qb.filters.filter_by_watch_item
         qfs.only_stack_ids = qb.filters.only_stack_ids
         watchers_qb.filters = qfs
         watchers_qb.finalize_query()
         watchers_qb.sql_clauses = (
            item_user_watching.Many.sql_clauses_cols_all.clone())
         watchers_many = item_user_watching.Many()
         inner_sql = watchers_many.search_get_sql(watchers_qb)
         # No: watchers_qb.db.close()
         self.revision_id_table_ref = 'temp__filter_rids'
         # MAYBE: The valid_start_rid and valid_until_rid are the
         #        group_item_access record's. These usually match
         #        the item_versioned rids, right? Whenever an item
         #        is edited, we update the access records... [lb] thinks.
         #        Otherwise, what we'd really want here is item_versioned's
         #        valid_start_rid and valid_until_rid.
         thurrito_sql = (
            """
            SELECT
               valid_start_rid,
               valid_until_rid
            INTO TEMPORARY TABLE
               temp__both_rids
            FROM
               (%s) AS foo_grv_1
            """ % (inner_sql,))
         rows = qb.db.sql(thurrito_sql)
         rid_union_sql = (
            """
            SELECT
               DISTINCT (filter_rid)
            INTO TEMPORARY TABLE
               %s
            FROM
               ((SELECT valid_start_rid AS filter_rid FROM temp__both_rids)
                UNION
                (SELECT valid_until_rid AS filter_rid FROM temp__both_rids))
               AS foo_grv_2
            """ % (self.revision_id_table_ref,))
         rows = qb.db.sql(rid_union_sql)
         log.debug('sql_apply_qry_fltrs: %s in %s'
                   % ('filter_by_watch_item or only_stack_ids',
                      misc.time_format_elapsed(time_0),))

      if qb.filters.filter_by_watch_feat:
         # This filter only makes sense for attachment item types.
         log.warning('group_revision does not support filter_by_watch_feat')
         raise GWIS_Nothing_Found()

      # FIXME: Show only changes to watched items...
      #qfs.filter_by_watch_geom = wr;
      #qfs.filter_by_watch_item = 0 or 1 or 2 ...
# 2013.03.29: To support CcpV1 feature, see search_for_geom and get a bbox.
#             Maybe: in CcpV2, you can watch non-regions, so we could get
#             a list of stack_ids the user is watching and return revisions
#             containing changes to those stack IDs...

      return grac_record.Many.sql_apply_query_filters(
                  self, qb, where_clause, conjunction)
Exemple #31
0
    def execute_problem_solver(self, qb, edge_weight_attr):

        walk_path = None

        log.debug(
            'exec_prob_slvr: wattr: %s / rat sprd: %s / fac brdn: %s / spalg: %s'
            % (
                edge_weight_attr,
                self.route.p3_rating_pump,
                self.route.p3_burden_pump,
                self.route.p3_spalgorithm,
            ))

        # ***

        if self.route.p3_spalgorithm == 'as*':

            time_0 = time.time()

            # Calculate path using A* search.
            # MAYBE: We don't need a heuristic, do we??
            #        Would it speed up the search?
            #        The h fcn. would just be the dist from the curnode to the
            #        finish? Or does [lb] still not know how graph search works...
            ast_path = networkx.astar_path(
                self.tgraph.graph_di,
                self.route.beg_nid,
                self.route.fin_nid,
                # MAYBE: Send pload... networkx hack
                #heuristic=None, weight=edge_weight_attr)
                heuristic=None,
                weight=edge_weight_attr,
                pload=self)

            log.debug('exec_prob_slvr: astar_path: no. edges: %d / in %s' % (
                len(ast_path),
                misc.time_format_elapsed(time_0),
            ))

            walk_path = ast_path

        # ***

        elif self.route.p3_spalgorithm == 'asp':

            time_0 = time.time()

            # MAYBE: Hack networkx and add pload param to support edge wght fcns.
            all_paths = networkx.all_shortest_paths(self.tgraph.graph_di,
                                                    self.route.beg_nid,
                                                    self.route.fin_nid,
                                                    weight=edge_weight_attr)

            log.debug(
                'exec_prob_slvr: all_shortest_paths: returned generator in %s'
                % (misc.time_format_elapsed(time_0), ))

            time_0 = time.time()

            path_num = 1
            for a_path in all_paths:

                log.debug(
                    'solve_pb: all_shortest_paths: path #%d: len: %d / in %s' %
                    (
                        path_num,
                        len(a_path),
                        misc.time_format_elapsed(time_0),
                    ))
                path_num += 1
                time_0 = time.time()

                if walk_path is None:
                    walk_path = a_path
                    #break

                # FIXME/EXPLAIN: Find an O/D pair that results in multiple paths...
                # or would the cost have to be equal? That doesn't sound very
                # likey -- in practicality, all_shortest_paths would probably
                # always just return one result for Cyclopath.

        # ***

        elif self.route.p3_spalgorithm == 'dij':

            time_0 = time.time()

            # Calculate path using Dijkstra's algorithm.
            # MAYBE: Hack networkx and add pload param to support edge wght fcns.
            dij_path = networkx.dijkstra_path(self.tgraph.graph_di,
                                              self.route.beg_nid,
                                              self.route.fin_nid,
                                              weight=edge_weight_attr)

            log.debug('exec_prob_slvr: dijkstra_path: no. edges: %d / in %s' %
                      (
                          len(dij_path),
                          misc.time_format_elapsed(time_0),
                      ))

            walk_path = dij_path

        # ***

        elif self.route.p3_spalgorithm == 'sho':

            time_0 = time.time()

            # Calculate path using generic NetworkX shortest path search.
            # MAYBE: Hack networkx and add pload param to support edge wght fcns.
            asp_path = networkx.shortest_path(self.tgraph.graph_di,
                                              self.route.beg_nid,
                                              self.route.fin_nid,
                                              weight=edge_weight_attr)

            log.debug('exec_prob_slvr: shortest_path: no. edges: %d / in %s' %
                      (
                          len(asp_path),
                          misc.time_format_elapsed(time_0),
                      ))

            walk_path = asp_path

        # ***

        elif self.route.p3_spalgorithm == 'ssd':

            time_0 = time.time()

            # This Dijkstra fcn. "returns a tuple of two dictionaries
            # keyed by node. The first dictionary stores distance from
            # the source. The second stores the path from the source to
            # that node."
            # So... this isn't an ideal fcn. to use, since the second
            # dictionary contains paths for every pair of nodes.
            # MAYBE: Hack networkx and add pload param to support edge wght fcns.
            dij_distance2, dij_path2 = networkx.single_source_dijkstra(
                self.tgraph.graph_di,
                self.route.beg_nid,
                self.route.fin_nid,
                weight=edge_weight_attr)

            log.debug(
                'exec_prob_slvr: single_source_dijkstra: no. edges: %d / in %s'
                % (
                    len(dij_path2),
                    misc.time_format_elapsed(time_0),
                ))

            walk_path = dij_path2[self.route.fin_nid]

        # ***

        else:

            # This code should be unreachable, as we would've raised by now.
            g.assurt(False)

        # ***

        if not walk_path:
            # NOTE: This code is probably unreachable. With the new
            #       is_disconnected attribute, there should no longer
            #       be a possibility that we won't find a route, since
            #       we guarantee that the origin and destination are
            #       on nodes that are well-connected.
            log.error('exec_prob_slvr: no route?: %s' % (self, ))
            raise GWIS_Error(Problem_Base.error_msg_basic)

        # ***

        return walk_path
    def cache_links(self, db_transit):

        log.debug('cache_links: caching transit node/cyclopath node pairs')

        time_0 = time.time()

        # NOTE: We load all byways into the graph, including those tagged
        # 'prohibited' and 'closed', but we only ever link those not tagged as
        # such with transit stops.

        # MAGIC HACK ALERT
        #revision = revision.Historic(self.revision_id)
        #model = ratings.Predictor(self.qb.branch_hier[0][0],
        #                          #self.revision_id)
        #                          revision)
        #model.load(self.qb)
        #
        route_daemon = None
        ccp_graph = planner.routed_p2.tgraph.Trans_Graph(
            route_daemon, self.qb.username, self.qb.branch_hier,
            self.qb.revision)
        ccp_graph.load(self.qb.db)
        #
        tagprefs = {}
        tagprefs['prohibited'] = ratings.t_avoid
        tagprefs['closed'] = ratings.t_avoid
        #
        rating_func = ccp_graph.ratings.rating_func(self.qb.username, tagprefs,
                                                    ccp_graph)
        # MAGIC NUMBER: Min rating.
        rating_min = 0.5
        # The transit data is lat,lon, as opposed to SRID-encoded x,y.
        is_latlon = True

        n_stops = db_transit.count_stops()

        # NOTE: 2011.06.26: This loops takes a while. For me [lb], 55 secs.
        # NOTE: 2011.08.08: Find nearest node using GRAC SQL is time consuming!
        #                   On the order of minutes and minutes...
        #                   Can you cache the nearest nodes, maybe? At least for
        #                   anon user in public branch and current revision?
        #       byway.transit_stops ?? list of transit IDs?

        for i, (stop_id, stop_name, stop_lat,
                stop_lon) in enumerate(db_transit.stops()):
            # Every once in a while, print a debug message
            # FIXME: Replace with prog logger
            if i and ((i % 25) == 0):
                log.debug('link_graphs: progress: on stop # %d of %d...' % (
                    i,
                    n_stops,
                ))
                #log.debug(' >> id: %s / name: %s / lat: %s / lon: %s'
                #          % (stop_id, stop_name, stop_lat, stop_lon,))
                #log.debug(' >> id: %s / name: %s / lat: %s / lon: %s'
                #          % (type(stop_id), type(stop_name), type(stop_lat),
                #             type(stop_lon),))
            # NOTE: The (x,y) point is lon first, then lat.
            stop_xy = (
                stop_lon,
                stop_lat,
            )
            # 2012.03.05: This is taking wayyy tooooo long.
            nearest_byway = route.One.byway_closest_xy(self.qb, stop_name,
                                                       stop_xy, rating_func,
                                                       rating_min, is_latlon)
            nearest_node = nearest_byway.nearest_node_id()
            # FIXME: What if the node is on a one-way? What if the node is tagged with
            # something that the user marks 'avoid'? In both cases, transit stop might be
            # Unreachable.
            # 2012.01.09: Get m-value and send to client.
            if nearest_node is not None:
                node_id = str(nearest_node)
                # NOTE: If we don't cast to string, it's unicode, and db.insert
                # doesn't quote it.
                stop_id = 'sta-%s' % (str(stop_id), )
                if node_id != '':
                    self.qb.db.insert(
                        'gtfsdb_cache_links',
                        {
                            'username': self.qb.username,
                            'branch_id': self.qb.branch_hier[0][0],
                            'revision_id': self.revision_id,
                            'gtfs_caldate': self.tfeed_zipdate,

                            # FIXME: This is a string? See above...
                            'node_stack_id': node_id,
                            ## Bug nnnn:
                            #                     'byway_m_value':
                            'transit_stop_id': stop_id,
                        },
                        {})
                else:
                    log.warning(
                        'link_graphs: no node name?!: node_id: %s / stop_id: %s'
                        % (
                            node_id,
                            stop_id,
                        ))
            else:
                log.warning(
                    'link_graphs: no nearest node?!: node_id: %s / stop_id: %s'
                    % (
                        node_id,
                        stop_id,
                    ))
                log.warning(' >> lat, lon: (%s, %s)' % (stop_lat, stop_lon))
        nlinks = self.cache_count_sql('gtfsdb_cache_links')
        g.assurt(nlinks == n_stops)
        # 2011.08.08: 1570.29 secs (26 minutes)
        # 2011.08.09: 1270.86 secs (21 minutes)
        log.debug('link_graphs: linked: %d transit stops in %s' % (
            n_stops,
            misc.time_format_elapsed(time_0),
        ))
Exemple #33
0
   def execute_problem_solver(self, qb, edge_weight_attr):

      walk_path = None

      log.debug(
         'exec_prob_slvr: wattr: %s / rat sprd: %s / fac brdn: %s / spalg: %s'
         % (edge_weight_attr,
            self.route.p3_rating_pump,
            self.route.p3_burden_pump,
            self.route.p3_spalgorithm,))

      # ***

      if self.route.p3_spalgorithm == 'as*':

         time_0 = time.time()

         # Calculate path using A* search.
         # MAYBE: We don't need a heuristic, do we??
         #        Would it speed up the search?
         #        The h fcn. would just be the dist from the curnode to the
         #        finish? Or does [lb] still not know how graph search works...
         ast_path = networkx.astar_path(
            self.tgraph.graph_di, self.route.beg_nid, self.route.fin_nid,
            # MAYBE: Send pload... networkx hack
            #heuristic=None, weight=edge_weight_attr)
            heuristic=None, weight=edge_weight_attr, pload=self)

         log.debug('exec_prob_slvr: astar_path: no. edges: %d / in %s'
                   % (len(ast_path),
                      misc.time_format_elapsed(time_0),))

         walk_path = ast_path

      # ***

      elif self.route.p3_spalgorithm == 'asp':

         time_0 = time.time()

         # MAYBE: Hack networkx and add pload param to support edge wght fcns.
         all_paths = networkx.all_shortest_paths(
            self.tgraph.graph_di, self.route.beg_nid, self.route.fin_nid,
            weight=edge_weight_attr)

         log.debug(
            'exec_prob_slvr: all_shortest_paths: returned generator in %s'
            % (misc.time_format_elapsed(time_0),))

         time_0 = time.time()

         path_num = 1
         for a_path in all_paths:

            log.debug('solve_pb: all_shortest_paths: path #%d: len: %d / in %s'
                      % (path_num, len(a_path),
                         misc.time_format_elapsed(time_0),))
            path_num += 1
            time_0 = time.time()

            if walk_path is None:
               walk_path = a_path
               #break

            # FIXME/EXPLAIN: Find an O/D pair that results in multiple paths...
            # or would the cost have to be equal? That doesn't sound very
            # likey -- in practicality, all_shortest_paths would probably
            # always just return one result for Cyclopath.

      # ***

      elif self.route.p3_spalgorithm == 'dij':

         time_0 = time.time()

         # Calculate path using Dijkstra's algorithm.
         # MAYBE: Hack networkx and add pload param to support edge wght fcns.
         dij_path = networkx.dijkstra_path(
            self.tgraph.graph_di, self.route.beg_nid, self.route.fin_nid,
            weight=edge_weight_attr)

         log.debug('exec_prob_slvr: dijkstra_path: no. edges: %d / in %s'
                   % (len(dij_path),
                      misc.time_format_elapsed(time_0),))

         walk_path = dij_path

      # ***

      elif self.route.p3_spalgorithm == 'sho':

         time_0 = time.time()

         # Calculate path using generic NetworkX shortest path search.
         # MAYBE: Hack networkx and add pload param to support edge wght fcns.
         asp_path = networkx.shortest_path(
            self.tgraph.graph_di, self.route.beg_nid, self.route.fin_nid,
            weight=edge_weight_attr)

         log.debug('exec_prob_slvr: shortest_path: no. edges: %d / in %s'
                   % (len(asp_path),
                      misc.time_format_elapsed(time_0),))

         walk_path = asp_path

      # ***

      elif self.route.p3_spalgorithm == 'ssd':

         time_0 = time.time()

         # This Dijkstra fcn. "returns a tuple of two dictionaries
         # keyed by node. The first dictionary stores distance from
         # the source. The second stores the path from the source to
         # that node."
         # So... this isn't an ideal fcn. to use, since the second
         # dictionary contains paths for every pair of nodes.
         # MAYBE: Hack networkx and add pload param to support edge wght fcns.
         dij_distance2, dij_path2 = networkx.single_source_dijkstra(
            self.tgraph.graph_di, self.route.beg_nid, self.route.fin_nid,
            weight=edge_weight_attr)

         log.debug(
            'exec_prob_slvr: single_source_dijkstra: no. edges: %d / in %s'
            % (len(dij_path2),
               misc.time_format_elapsed(time_0),))

         walk_path = dij_path2[self.route.fin_nid]

      # ***

      else:

         # This code should be unreachable, as we would've raised by now.
         g.assurt(False)

      # ***

      if not walk_path:
         # NOTE: This code is probably unreachable. With the new
         #       is_disconnected attribute, there should no longer
         #       be a possibility that we won't find a route, since
         #       we guarantee that the origin and destination are
         #       on nodes that are well-connected.
         log.error('exec_prob_slvr: no route?: %s' % (self,))
         raise GWIS_Error(Problem_Base.error_msg_basic)

      # ***

      return walk_path
      except Exception, e:

         log.info('Exception: "%s" / %s' % (e, traceback.format_exc(),))
         sys.exit(1)

      finally:

         if self.db.transaction_in_progress():
            self.db.transaction_rollback()

         self.db.close()
         self.pg_db.close()

      log.info('Ran %d schema upgrade scripts; exiting. Took: %s'
               % (script_ct,
                  misc.time_format_elapsed(time_0),))

   #
   def setup_next_script(self):

      self.last_script = self.next_script

      self.next_script = None
      self.orig_script = None

      # Ideally, we'd do some table locking to ensure that no one else is
      # manipulating the database. But this isn't manageable -- we invoke the
      # SQL scripts as separate processes, so we can't have table locks or we
      # might cause a script to deadlock (e.g., if we lock the 'revision' table
      # and then a script tries to update that table). Usually, we'll be fine:
      # we use transactions, so, worst case scenario, either the script fails
Exemple #35
0
   def really_purge_branch_really(self):

      time_0 = time.time()

      log.info('For real purging all traces of the branch!')

      log.warning('FIXME: This fcn. is not tested all that well.')

      log.debug('purge_branch_: Acquiring revision table lock...')
      revision.Revision.revision_lock_dance(
         self.qb.db, caller='make_new_branch.py')

      # MAYBE: 2012.08.03: This code brute-forces the removal. It's from
      # some copy-n-paste code [lb] has been using (i.e., from a psql
      # command line) so it's not all that well tested. One concern is that
      # there might be violations of foreign key constraints...
      tables = [
         #
         'new_item_policy',
         #
         'node_byway',
         'node_endpoint',
         #
         # FIXME: Delete from route_id where route_id...
         'route',
         #
         'attribute',
         'tag', # Should be empty for leafy branches, anyway...
         'post',
         'thread',
         'annotation',
         'attribute',
         'attachment',
         'geofeature',
         'link_value',
         #
         'tag_preference',
         #
         'merge_job',
         'route_analysis_job',
         # FIXME: Delete from work_item_step where work_item_id...
         'work_item',
         #
         # MAYBE: delete from item_event_read where item_id = ...
         # MAYBE: delete from item_event_alert
         #
         'gtfsdb_cache_links',
         'gtfsdb_cache_register',
         #
         'group_revision',
         'group_membership',
         'group_',
         #
         'group_item_access',
         # FIXME: Delete from branch_conflict where branch_system_id...
         'branch',
         'item_versioned',
         #
         # MAYBE:
         #  delete from revert_event where rid_reverting/rid_victim
         #  'revision',
         #
         # MAYBE:
         #  delete from track_point where track_id...
         #  'track',
         #
         # MAYBE: tilecache tables...
         ]

      for table in tables:
         self.qb.db.sql("DELETE FROM %s WHERE branch_id = %d"
                        % (table, self.qb.branch_hier[0][0],))

      log.debug('purge_branch_: delete from %d tables in %s'
                % (len(tables),
                   misc.time_format_elapsed(time_0),))
Exemple #36
0
   def commit_qbs(self, do_commit, commit_msg, skip_vacuum=False,
                                               skip_geometry=False):

      g.assurt(self.qb_cur is not None)

      if (self.qb_cur.item_mgr.rid_new is not None) and (self.target_groups):

         # Both of these operations take a little bit of time.
         #
         # To make the new revision, we call cp_revision_geosummary_update,
         # which takes a while. And to commit everything, well, e.g.,
         # committing five hundred thousand rows takes a while.
         time_0 = time.time()

         log.info('Saving new revision: %s...'
                  % (self.qb_cur.item_mgr.rid_new,))

         # NOTE: We could use the host or IP address of the machine when the
         # user submitted the work job, but we also have a non-anonymous
         # username, so setting the host to 'localhost' doesn't really matter.
         host = 'localhost'

         # Make group_revisions for the public group and the shared group.
         groups_ids = []
         for grp_mship, acl_id in self.target_groups.iteritems():
            groups_ids.append(grp_mship.group_id)
         g.assurt(len(groups_ids) > 0)
         # FIXME: Make sure the new revision is marked not revertable:
         #        that would be crazy-silly if someone was able to issue a
         #        revert request on it!
         Item_Manager.revision_save(
            self.qb_cur,
            self.qb_cur.item_mgr.rid_new,
            self.qb_cur.branch_hier,
            host,
            self.mjob.wtem.created_by,
            commit_msg,
            groups_ids,
            activate_alerts=False,
            processed_items=None,
            reverted_revs=None,
            skip_geometry_calc=False,
            skip_item_alerts=False)

         # Claim the new revision ID.
         revision.Revision.revision_claim(self.qb_cur.db,
                                          self.qb_cur.item_mgr.rid_new)

         # Claim the new stack IDs.
         self.qb_cur.item_mgr.finalize_seq_vals(self.qb_cur.db)

         log.info('... new revision took %s'
                  % (misc.time_format_elapsed(time_0),))

      g.assurt(self.qb_cur is not None)

      time_0 = time.time()
      if do_commit:
         log.debug('Committing the database transaction.')
         # BUG 2688: Use transaction_retryable?
         self.qb_cur.db.transaction_commit()
      elif self.qb_cur is not None:
         log.debug('Rolling back the database!!')
         self.qb_cur.db.transaction_rollback()
      log.info('... %s took %s'
         % ('Commit' if do_commit else 'Rollback',
            misc.time_format_elapsed(time_0),))

# FIXME: Put in debug? And then delete this...
      skip_vacuum = True

      if do_commit and not skip_vacuum:
         self.db_vacuum()

      if do_commit:
         time_0 = time.time()
         # Update the revision's geometry approximation.
         # FIXME: Hopefully, this is faster after a vacuum.

         db = db_glue.new()
         db.transaction_begin_rw()
         branch_hier = self.qb_cur.branch_hier
         revision.Revision.geosummary_update(db, self.qb_cur.item_mgr.rid_new,
                                                 branch_hier, groups_ids,
                                                 skip_geometry)
         db.transaction_commit()
         db.close()
         log.info('... Rev Geom took %s'
                  % (misc.time_format_elapsed(time_0),))


# FIXME: Test this: 2013.10.30: Do like commit.py, and call do_post_commit
#        (which just tickles Mr. Do!) and called routed_hup (which sends
#        an interrupt to the route daemon).
      if do_commit:
         log.debug('commit_qbs: signalling Mr. Do!')
         self.qb_cur.item_mgr.do_post_commit(self.qb_cur)
         log.debug('commit_qbs: signalling route daemon')
         command_base.Op_Handler.routed_hup(self.qb_cur.db)

      self.qb_cur.item_mgr.rid_new = None
Exemple #37
0
    def load(self, db, keep_running=None):

        # Load ratings from the database.

        t0_all = time.time()

        usage_0 = None
        if conf.debug_mem_usage:
            usage_0 = mem_usage.get_usage_mb()

        # Load all users' ratings and the generic ratings.

        g.assurt(self.graph.branch_hier)
        if len(self.graph.branch_hier) > 1:
            branch_ids = ','.join([str(x[0]) for x in self.graph.branch_hier])
            where_branch = "branch_id IN (%s)" % (branch_ids, )
        else:
            where_branch = "branch_id = %d" % (self.graph.branch_hier[0][0], )

        if not self.honor_historic:
            rats_sql = self.sql_current(db, where_branch)
        else:
            rats_sql = self.sql_historic(db, where_branch)

        if rats_sql:

            # Get and process all ratings

            log.info('load: reading byway_rating table...')
            time_0 = time.time()

            log.verbose('load: enabling dont_fetchall')
            db.dont_fetchall = True

            rats = db.sql(rats_sql)

            log.verbose('load: disabling dont_fetchall')
            db.dont_fetchall = False

            log.info('load: read %d ratings %s in %s' % (
                db.curs.rowcount,
                '[Current]' if not self.last_last_modified else
                ('[since %s]' % self.last_last_modified),
                misc.time_format_elapsed(time_0),
            ))

            g.check_keep_running(keep_running)

            self.cache_rats(db)

            db.curs_recycle()

            # Uncomment for remote debugging...
            #g.assurt(False)

            conf.debug_log_mem_usage(log, usage_0, 'ratings.load')

            log.info('load: done loading ratings in %s' %
                     (misc.time_format_elapsed(t0_all), ))

        self.last_last_modified = None
    def go_main(self):

        log.debug('Starting')

        time_0 = time.time()

        if (self.cli_opts.cache_table_drop
                or self.cli_opts.cache_table_create):

            if self.cli_opts.cache_table_drop:
                self.gtfsdb_cache_delete()
            if self.cli_opts.cache_table_create:
                self.gtfsdb_cache_create()

            log.info('Committing transaction [go_main]')
            self.qb.db.transaction_commit()

        else:

            os.chdir(self.dname_gtfsdb)

            self.tools_check()

            # Download the transit archive.
            self.gtfs_download()
            # Get the date. There are three ways we can get it (well, there are
            # up to three different dates we can get). We use the date to compare
            # against saved archives, to know if we really need to update our
            # cache and restart the route planner (i.e., if the archive hasn't
            # changed, we can do nothing).
            self.gtfs_get_feed_dates()
            # If we really downloaded the archive, keep a copy of it.
            if not self.tfeed_not_retrieved:
                self.gtfs_archive()

            self.cache_prepare()

            # If a new transit feed was downloaded, or if the gtfs database or the
            # graphserver database are missing, rebuild the gtfs and gserver dbs.
            if ((not self.tfeed_not_retrieved)
                    or (not os.path.exists(conf.transitdb_filename))
                    or (not os.path.exists(self.fname_transit_gdb))):
                self.gtfsdb_compile()
                self.graphserver_import()
                self.graphserver_inspect()
            else:
                log.debug('Transit feed up-to-date; skipping compile.')

            self.files_fixperms()

            if not self.cache_up_to_date:
                self.ccp_cache_populate()
                #
                log.info('Vacuuming the database')
                db = db_glue.new(use_transaction=False)
                db.sql("VACUUM ANALYZE;")
                db.close()
            else:
                log.debug('Transit cache up-to-date; skipping cache.')

            log.debug('gtfsdb_build_cache: complete: %s' %
                      (misc.time_format_elapsed(time_0), ))
Exemple #39
0
   def really_purge_branch_really(self):

      time_0 = time.time()

      log.info('For real purging all traces of the branch!')

      log.warning('FIXME: This fcn. is not tested all that well.')

      log.debug('purge_branch_: Acquiring revision table lock...')
      revision.Revision.revision_lock_dance(
         self.qb.db, caller='make_new_branch.py')

      # MAYBE: 2012.08.03: This code brute-forces the removal. It's from
      # some copy-n-paste code [lb] has been using (i.e., from a psql
      # command line) so it's not all that well tested. One concern is that
      # there might be violations of foreign key constraints...
      tables = [
         #
         'new_item_policy',
         #
         'node_byway',
         'node_endpoint',
         #
         # FIXME: Delete from route_id where route_id...
         'route',
         #
         'attribute',
         'tag', # Should be empty for leafy branches, anyway...
         'post',
         'thread',
         'annotation',
         'attribute',
         'attachment',
         'geofeature',
         'link_value',
         #
         'tag_preference',
         #
         'merge_job',
         'route_analysis_job',
         # FIXME: Delete from work_item_step where work_item_id...
         'work_item',
         #
         # MAYBE: delete from item_event_read where item_id = ...
         # MAYBE: delete from item_event_alert
         #
         'gtfsdb_cache_links',
         'gtfsdb_cache_register',
         #
         'group_revision',
         'group_membership',
         'group_',
         #
         'group_item_access',
         # FIXME: Delete from branch_conflict where branch_system_id...
         'branch',
         'item_versioned',
         #
         # MAYBE:
         #  delete from revert_event where rid_reverting/rid_victim
         #  'revision',
         #
         # MAYBE:
         #  delete from track_point where track_id...
         #  'track',
         #
         # MAYBE: tilecache tables...
         ]

      for table in tables:
         self.qb.db.sql("DELETE FROM %s WHERE branch_id = %d"
                        % (table, self.qb.branch_hier[0][0],))

      log.debug('purge_branch_: delete from %d tables in %s'
                % (len(tables),
                   misc.time_format_elapsed(time_0),))
Exemple #40
0
   def load_really(self, qb_curr, keep_running=None):
      '''Load the transport network from the database.'''

      g.check_keep_running(keep_running)

      t0_all = time.time()

      usage_0 = None
      if conf.debug_mem_usage:
         usage_0 = mem_usage.get_usage_mb()
         log.info('load: mem_usage: beg: %.2f Mb' % (usage_0,))

      # Load ratings.

      # NOTE: To find its usage, search graph.ratings.
      if self.ratings is None:
         g.assurt(isinstance(qb_curr.revision, revision.Historic))
         self.ratings = ratings.Predictor(self)
      # Load all ratings or just update what's changed since we last checked.
      self.ratings.load(qb_curr.db, keep_running=keep_running)

      # Load byways, and attrs and tags.

      try:
         if self.route_daemon.cli_opts.regions:
            qb_curr.filters.filter_by_regions = (
                  self.route_daemon.cli_opts.regions)
      except AttributeError:
         pass

      log.debug('load: calling load_feats_and_attcs...')
      prog_log = Debug_Progress_Logger(log_freq=25000)

      if isinstance(qb_curr.revision, revision.Historic):
         qb_curr.item_mgr.load_feats_and_attcs(qb_curr, byway,
            'search_by_network', self.add_byway_loaded, prog_log,
            heavyweight=False, fetch_size=0, keep_running=keep_running)
      else:
         g.assurt(isinstance(qb_curr.revision, revision.Updated))
         qb_curr.item_mgr.update_feats_and_attcs(qb_curr, byway,
            'search_by_network', self.add_byway_updated, prog_log,
            heavyweight=False, fetch_size=0, keep_running=keep_running)

      # Add transit.

      self.load_make_graph_add_transit(qb_curr)

      # All done loading.

      conf.debug_log_mem_usage(log, usage_0, 'tgraph_base.load_really')

      log.info(
   '/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\')
      log.info('load: complete: for %s in %s'
               % (qb_curr.revision.short_name(),
                  misc.time_format_elapsed(t0_all),))
      log.info(
   '/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\')

      qb_curr.definalize()
      qb_curr = None
Exemple #41
0
    def load_really(self, qb_curr, keep_running=None):
        '''Load the transport network from the database.'''

        g.check_keep_running(keep_running)

        t0_all = time.time()

        usage_0 = None
        if conf.debug_mem_usage:
            usage_0 = mem_usage.get_usage_mb()
            log.info('load: mem_usage: beg: %.2f Mb' % (usage_0, ))

        # Load ratings.

        # NOTE: To find its usage, search graph.ratings.
        if self.ratings is None:
            g.assurt(isinstance(qb_curr.revision, revision.Historic))
            self.ratings = ratings.Predictor(self)
        # Load all ratings or just update what's changed since we last checked.
        self.ratings.load(qb_curr.db, keep_running=keep_running)

        # Load byways, and attrs and tags.

        try:
            if self.route_daemon.cli_opts.regions:
                qb_curr.filters.filter_by_regions = (
                    self.route_daemon.cli_opts.regions)
        except AttributeError:
            pass

        log.debug('load: calling load_feats_and_attcs...')
        prog_log = Debug_Progress_Logger(log_freq=25000)

        if isinstance(qb_curr.revision, revision.Historic):
            qb_curr.item_mgr.load_feats_and_attcs(qb_curr,
                                                  byway,
                                                  'search_by_network',
                                                  self.add_byway_loaded,
                                                  prog_log,
                                                  heavyweight=False,
                                                  fetch_size=0,
                                                  keep_running=keep_running)
        else:
            g.assurt(isinstance(qb_curr.revision, revision.Updated))
            qb_curr.item_mgr.update_feats_and_attcs(qb_curr,
                                                    byway,
                                                    'search_by_network',
                                                    self.add_byway_updated,
                                                    prog_log,
                                                    heavyweight=False,
                                                    fetch_size=0,
                                                    keep_running=keep_running)

        # Add transit.

        self.load_make_graph_add_transit(qb_curr)

        # All done loading.

        conf.debug_log_mem_usage(log, usage_0, 'tgraph_base.load_really')

        log.info(
            '/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\'
        )
        log.info('load: complete: for %s in %s' % (
            qb_curr.revision.short_name(),
            misc.time_format_elapsed(t0_all),
        ))
        log.info(
            '/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\/*\\'
        )

        qb_curr.definalize()
        qb_curr = None
   def cache_links(self, db_transit):

      log.debug('cache_links: caching transit node/cyclopath node pairs')

      time_0 = time.time()

      # NOTE: We load all byways into the graph, including those tagged
      # 'prohibited' and 'closed', but we only ever link those not tagged as
      # such with transit stops.

      # MAGIC HACK ALERT
      #revision = revision.Historic(self.revision_id)
      #model = ratings.Predictor(self.qb.branch_hier[0][0],
      #                          #self.revision_id)
      #                          revision)
      #model.load(self.qb)
      #
      route_daemon = None
      ccp_graph = planner.routed_p2.tgraph.Trans_Graph(route_daemon,
         self.qb.username, self.qb.branch_hier, self.qb.revision)
      ccp_graph.load(self.qb.db)
      #
      tagprefs = {}
      tagprefs['prohibited'] = ratings.t_avoid
      tagprefs['closed'] = ratings.t_avoid
      #
      rating_func = ccp_graph.ratings.rating_func(self.qb.username, tagprefs,
                                                  ccp_graph)
      # MAGIC NUMBER: Min rating.
      rating_min = 0.5
      # The transit data is lat,lon, as opposed to SRID-encoded x,y.
      is_latlon = True

      n_stops = db_transit.count_stops()

      # NOTE: 2011.06.26: This loops takes a while. For me [lb], 55 secs.
      # NOTE: 2011.08.08: Find nearest node using GRAC SQL is time consuming!
      #                   On the order of minutes and minutes...
      #                   Can you cache the nearest nodes, maybe? At least for
      #                   anon user in public branch and current revision?
      #       byway.transit_stops ?? list of transit IDs?

      for i, (stop_id, stop_name, stop_lat, stop_lon) in enumerate(
                                                           db_transit.stops()):
         # Every once in a while, print a debug message
         # FIXME: Replace with prog logger
         if i and ((i % 25) == 0):
            log.debug('link_graphs: progress: on stop # %d of %d...'
                      % (i, n_stops,))
            #log.debug(' >> id: %s / name: %s / lat: %s / lon: %s'
            #          % (stop_id, stop_name, stop_lat, stop_lon,))
            #log.debug(' >> id: %s / name: %s / lat: %s / lon: %s'
            #          % (type(stop_id), type(stop_name), type(stop_lat),
            #             type(stop_lon),))
         # NOTE: The (x,y) point is lon first, then lat.
         stop_xy = (stop_lon, stop_lat,)
# 2012.03.05: This is taking wayyy tooooo long.
         nearest_byway = route.One.byway_closest_xy(
            self.qb, stop_name, stop_xy, rating_func, rating_min, is_latlon)
         nearest_node = nearest_byway.nearest_node_id()
# FIXME: What if the node is on a one-way? What if the node is tagged with
# something that the user marks 'avoid'? In both cases, transit stop might be
# Unreachable.
# 2012.01.09: Get m-value and send to client.
         if nearest_node is not None:
            node_id = str(nearest_node)
            # NOTE: If we don't cast to string, it's unicode, and db.insert
            # doesn't quote it.
            stop_id = 'sta-%s' % (str(stop_id),)
            if node_id != '':
               self.qb.db.insert(
                  'gtfsdb_cache_links', {
                     'username': self.qb.username,
                     'branch_id': self.qb.branch_hier[0][0],
                     'revision_id': self.revision_id,
                     'gtfs_caldate': self.tfeed_zipdate,

# FIXME: This is a string? See above...
                     'node_stack_id': node_id,
## Bug nnnn:
#                     'byway_m_value':

                     'transit_stop_id': stop_id,
                  }, {})
            else:
               log.warning(
                  'link_graphs: no node name?!: node_id: %s / stop_id: %s'
                  % (node_id, stop_id,))
         else:
            log.warning(
               'link_graphs: no nearest node?!: node_id: %s / stop_id: %s'
               % (node_id, stop_id,))
            log.warning(' >> lat, lon: (%s, %s)' % (stop_lat, stop_lon))
      nlinks = self.cache_count_sql('gtfsdb_cache_links')
      g.assurt(nlinks == n_stops)
      # 2011.08.08: 1570.29 secs (26 minutes)
      # 2011.08.09: 1270.86 secs (21 minutes)
      log.debug('link_graphs: linked: %d transit stops in %s'
                % (n_stops, misc.time_format_elapsed(time_0),))
Exemple #43
0
    def sql_apply_query_filters(self, qb, where_clause="", conjunction=""):

        g.assurt((not where_clause) and (not conjunction))
        g.assurt((not conjunction) or (conjunction == "AND"))

        # We build custom SQL in sql_context_user (and don't use
        # item_user_access's search_get_sql(), so we cannot support
        # filters that edit qb.sql_clauses. We only support filters
        # that only modify the where clause.
        g.assurt(not (
            qb.filters.filter_by_creator_include or qb.filters.
            filter_by_creator_exclude or qb.filters.stack_id_table_ref
            or qb.filters.use_stealth_secret or qb.filters.include_item_stack
            # There are lots more filters we don't support...
            # but there's no reason to list them all.
        ))

        if qb.filters.filter_by_username:
            # %s (LOWER(rev.username) ~ LOWER(%s))
            # %s (rev.username ~* %s)
            # [lb] is curious why we use regex search when we want exact match.
            # MAYBE: Full Text Search @@ is faster than exact match = which is
            #        faster than LOWER() = LOWER() which is faster than regex ~.
            # NOTE: Do not need to lower username (already lower)
            filter_by_username = qb.filters.filter_by_username.lower()
            where_clause += ("""
            %s (rev.username = %s)
            """ % (
                conjunction,
                qb.db.quoted(filter_by_username),
            ))
            conjunction = "AND"

        # Find items the user is watching and only those items' revisions.
        if (qb.filters.filter_by_watch_item or qb.filters.only_stack_ids):
            time_0 = time.time()
            # Clone the qb but keep the db: we want to make a temp table.
            watchers_qb = qb.clone(skip_clauses=True, skip_filtport=True)
            qfs = Query_Filters(req=None)
            qfs.filter_by_watch_item = qb.filters.filter_by_watch_item
            qfs.only_stack_ids = qb.filters.only_stack_ids
            watchers_qb.filters = qfs
            watchers_qb.finalize_query()
            watchers_qb.sql_clauses = (
                item_user_watching.Many.sql_clauses_cols_all.clone())
            watchers_many = item_user_watching.Many()
            inner_sql = watchers_many.search_get_sql(watchers_qb)
            # No: watchers_qb.db.close()
            self.revision_id_table_ref = 'temp__filter_rids'
            # MAYBE: The valid_start_rid and valid_until_rid are the
            #        group_item_access record's. These usually match
            #        the item_versioned rids, right? Whenever an item
            #        is edited, we update the access records... [lb] thinks.
            #        Otherwise, what we'd really want here is item_versioned's
            #        valid_start_rid and valid_until_rid.
            thurrito_sql = ("""
            SELECT
               valid_start_rid,
               valid_until_rid
            INTO TEMPORARY TABLE
               temp__both_rids
            FROM
               (%s) AS foo_grv_1
            """ % (inner_sql, ))
            rows = qb.db.sql(thurrito_sql)
            rid_union_sql = ("""
            SELECT
               DISTINCT (filter_rid)
            INTO TEMPORARY TABLE
               %s
            FROM
               ((SELECT valid_start_rid AS filter_rid FROM temp__both_rids)
                UNION
                (SELECT valid_until_rid AS filter_rid FROM temp__both_rids))
               AS foo_grv_2
            """ % (self.revision_id_table_ref, ))
            rows = qb.db.sql(rid_union_sql)
            log.debug('sql_apply_qry_fltrs: %s in %s' % (
                'filter_by_watch_item or only_stack_ids',
                misc.time_format_elapsed(time_0),
            ))

        if qb.filters.filter_by_watch_feat:
            # This filter only makes sense for attachment item types.
            log.warning('group_revision does not support filter_by_watch_feat')
            raise GWIS_Nothing_Found()

        # FIXME: Show only changes to watched items...
        #qfs.filter_by_watch_geom = wr;
        #qfs.filter_by_watch_item = 0 or 1 or 2 ...
# 2013.03.29: To support CcpV1 feature, see search_for_geom and get a bbox.
#             Maybe: in CcpV2, you can watch non-regions, so we could get
#             a list of stack_ids the user is watching and return revisions
#             containing changes to those stack IDs...

        return grac_record.Many.sql_apply_query_filters(
            self, qb, where_clause, conjunction)
   def run_script(self):

      g.assurt(self.next_script)

      if self.next_schema is None:
         self.next_schema = 'public'
         for line in open(self.next_script):
            if '@once-per-instance' in line:
               self.next_schema = self.schemas[0]
               break

      # Confirm
      if ((self.next_schema != 'public')
          and (self.next_schema not in conf.server_instances)):
         log.info("Not running '%s' on missing schema: '%s'"
                  % (self.next_script, self.next_schema,))
         self.db.transaction_rollback()
      elif self.cli_opts.do_yesall:
         log.info("Auto-running %s on '%s' on '%s'\n"
            % (self.next_script, self.next_schema, db_glue.DB.db_name,))
         self.run_script_()
      elif not yn_get("Run %s on '%s' on '%s' now?"
                  % (self.next_script, self.next_schema, db_glue.DB.db_name,)):
         self.db.transaction_rollback()
         raise Exception('Aborting.')
      else:
         self.run_script_()

      # Record that we ran it
      if not self.cli_opts.do_revert:
         self.db.sql(
            """
            INSERT INTO upgrade_event (script_name, schema)
            VALUES (%s, %s)
            """, (self.next_script, self.next_schema))
         log.info("Recorded successful run of %s on '%s' schema."
                  % (self.next_script, self.next_schema,))
      else:
         self.db.sql(
            """
            DELETE FROM upgrade_event
            WHERE (script_name = %s AND schema = %s)
            """, (self.orig_script, self.next_schema,))
         log.info("Recorded successful revert of %s on '%s' schema."
                  % (self.orig_script, self.next_schema,))

      # BUG nnnn: Fixed: Record successful runs of each schema script.
      # Make sure we record a successful run of each schema script.
      # This commits and recycles the cursor. We don't have to worry
      # about use_transaction because that's set on the connection.
      self.db.transaction_commit()

      # Always vacuum and analyze the database after an update.
      if (((self.cli_opts.do_dovacu)
           or (os.environ.get("TERM") != 'xterm'))
          and (not self.cli_opts.do_novacu)
          and ((self.next_schema == self.schemas[-1])
               or (self.next_schema == 'public'))):
         log.info('Vacuuming...')
         vacuum_time_0 = time.time()
         self.pg_db.sql("VACUUM ANALYZE;")
         log.info('Vacuumed database in %s'
                  % (misc.time_format_elapsed(vacuum_time_0),))
         self.recently_vacuumed = True
   def run_script_(self):

      # Record the time for each script.
      instance_time_0 = time.time()

      db_user = conf.db_user
      for line in open(self.next_script):
         if '@run-as-superuser' in line:
            db_user = conf.db_owner
            break

      # Run the script
      the_cmd = ('sed s/@@@instance@@@/%s/g %s | psql -U %s -d %s'
              % (self.next_schema,
                 self.next_script,
                 db_user,
                 db_glue.DB.db_name,))
      # If the SQL fails but yesall is enabled, we'll keep running scripts,
      # which leaves the upgrade in an unknown state. For the V1->V2 upgrade
      # scripts, this means you have to restart a cron job that takes 12
      # hours to run. So how do you detect errors?  The SQL outputs ERROR:
      # and WARNING: when things are awry.  Errors are always show-stoppers;
      # Warnings not necessarily so.  Note that os.system returns 0 if the cmd
      # returns 0, otherwise something nonzero and undefined. At least that's
      # been my [lb's] observation testing a python script. As for running the
      # sed command above, the exit_status is always 0.  Fortunately, popen4
      # lets us read the output, so we can grep for problems and say no to
      # yesall. The os.system method:
      #    exit_status = os.system(the_cmd)
      #    log.info('exit_status: %s' % (exit_status,))
      # The popen4 method:
      #   (sin, sout_err) = popen2.popen4(the_cmd)
      # Which I forgot is an Oops:
      #   DeprecationWarning: The popen2 module is deprecated.
      #   Use the subprocess module.
      # So use the subprocess module:
      log.debug('run_script: Popen: the_cmd: %s'
                % (the_cmd.replace('\n', ' || '),))
      p = subprocess.Popen([the_cmd],
                           shell=True,
                           # bufsize=bufsize,
                           stdin=subprocess.PIPE,
                           stdout=subprocess.PIPE,
                           stderr=subprocess.STDOUT,
                           close_fds=True)
      (sin, sout_err) = (p.stdin, p.stdout)

      error_detected = False
      while True:
         line = sout_err.readline()
         if not line:
            break
         else:
            line = line.strip()
            # Print to stdout. Don't use log, which prepends output
            #print line
            # [lb] is have interleaving issues, so trying log.
            # Also, logcheck is spewing all of this gunk. Grr.
            # But, I do like that the SQL lines are now timestamped.
            log.debug(line)
            if not error_detected:
               # See if the line matches an ERROR: or WARNING:.
               for regex_in in fatal_error_re_in:
                  #log.verbose('regex_in: %s' % (regex_in,))
                  if regex_in.search(line):
                     error_detected = True
                     # See if the WARNING: isn't fatal.
                     for regex_ex in fatal_error_re_ex:
                        #log.verbose('regex_ex: %s' % (regex_ex,))
                        if regex_ex.search(line):
                           # Ignore this false-positive
                           error_detected = False
                           break
                     # If a fatal ERROR: or WARNING:, bail.
                     if error_detected:
                        log.error(error_text)
                        break
      sin.close()
      sout_err.close()
      p.wait()
      # Display the time it took to run the script
      log.info('Ran script %s on instance %s in %s'
          % (self.next_script,
             self.next_schema,
             misc.time_format_elapsed(instance_time_0),))
      # Ask if all is well.
      # FIXME: If the grepping for errors above works, maybe get
      #        rid of yesall and asking yn_get?
      if ((error_detected and not self.cli_opts.do_noerrs)
          or (not self.cli_opts.do_yesall
              and not yn_get(
                  'Continue (i.e., was everything OK above)?'))):
         log.error('ERROR: Script failed!')
         log.error('(Check last script output for ERROR or WARNING.)')
         log.error(big_warning)
         raise Exception('Aborting: Script failed!')
Exemple #46
0
    def load_transit(self, qb, db_transit):

        # load the transit info
        agency_id = None
        reporter = None  # sys.stdout # FIXME: Can we pass in log.debug somehow?
        #    I think we just need to support write()
        maxtrips = None
        g.assurt(self.gserver is not None)

        # C.f. graphserver/pygs/build/lib/graphserver/compiler.py
        #         ::graph_load_gtfsdb
        log.debug('load_transit: loading compiler')
        compiler = GTFSGraphCompiler(db_transit, conf.transitdb_agency_name,
                                     agency_id, reporter)

        nedges = 0
        if self.cache_reg is not None:
            from_and_where = self.links_cache_from_and_where(
                'gtfsdb_cache_register', qb)
            nedges = qb.db.sql("SELECT transit_nedges AS nedges %s" %
                               (from_and_where))[0]['nedges']
        if nedges > 0:
            nedges_str = str(nedges)
        else:
            nedges_str = 'unknown'

        time_0 = time.time()
        log.debug('load_transit: loading vertices and edges')
        #for (fromv_label, tov_label, edge) in compiler.gtfsdb_to_edges(maxtrips):
        for i, (fromv_label, tov_label,
                edge) in enumerate(compiler.gtfsdb_to_edges(maxtrips)):
            if (i % 25000) == 0:
                log.debug('load_transit: progress: on edge # %d of %s...' % (
                    i,
                    nedges_str,
                ))
                #log.debug(' >> fromv_label: %s / tov_label: %s / edge: %s'
                #          % (fromv_label, tov_label, edge,))
                # NOTE: fromv_label is unicode, tov_label str, and edge
                #       graphserver.core.TripBoard/TripAlight/Etc.
                # FIXME: Why is fromv_label unicode?
            fromv_label = str(fromv_label)
            tov_label = str(tov_label)
            g.assurt(isinstance(edge, EdgePayload))
            self.gserver.add_vertex(fromv_label)
            self.gserver.add_vertex(tov_label)
            self.gserver.add_edge(fromv_label, tov_label, edge)
        # 2011.08.08: 49 seconds
        # 2011.08.09: 36 seconds with less log.debug
        log.debug('load_transit: loaded %d edges in %s' % (
            i + 1,
            misc.time_format_elapsed(time_0),
        ))
        # 2013.12.05: This is firing. It looks like the count of transit_nedges
        # from gtfsdb_cache_register is less than what we've loaded...
        #g.assurt((nedges == 0) or (nedges == (i + 1)))
        if not ((nedges == 0) or (nedges == (i + 1))):
            log.error('load_transit: nedges: %d / i: %d' % (
                nedges,
                i,
            ))
   def go_main(self):

      log.debug('Starting')

      time_0 = time.time()

      if (self.cli_opts.cache_table_drop
          or self.cli_opts.cache_table_create):

         if self.cli_opts.cache_table_drop:
            self.gtfsdb_cache_delete()
         if self.cli_opts.cache_table_create:
            self.gtfsdb_cache_create()

         log.info('Committing transaction [go_main]')
         self.qb.db.transaction_commit()

      else:

         os.chdir(self.dname_gtfsdb)

         self.tools_check()

         # Download the transit archive.
         self.gtfs_download()
         # Get the date. There are three ways we can get it (well, there are
         # up to three different dates we can get). We use the date to compare
         # against saved archives, to know if we really need to update our
         # cache and restart the route planner (i.e., if the archive hasn't
         # changed, we can do nothing).
         self.gtfs_get_feed_dates()
         # If we really downloaded the archive, keep a copy of it.
         if not self.tfeed_not_retrieved:
            self.gtfs_archive()

         self.cache_prepare()

         # If a new transit feed was downloaded, or if the gtfs database or the
         # graphserver database are missing, rebuild the gtfs and gserver dbs.
         if ((not self.tfeed_not_retrieved)
             or (not os.path.exists(conf.transitdb_filename))
             or (not os.path.exists(self.fname_transit_gdb))):
            self.gtfsdb_compile()
            self.graphserver_import()
            self.graphserver_inspect()
         else:
            log.debug('Transit feed up-to-date; skipping compile.')

         self.files_fixperms()

         if not self.cache_up_to_date:
            self.ccp_cache_populate()
            #
            log.info('Vacuuming the database')
            db = db_glue.new(use_transaction=False)
            db.sql("VACUUM ANALYZE;")
            db.close()
         else:
            log.debug('Transit cache up-to-date; skipping cache.')

         log.debug('gtfsdb_build_cache: complete: %s'
                   % (misc.time_format_elapsed(time_0),))