def ccp_cache_populate(self):

        # We cleared the db handle earlier, so get a new one, and lock it.
        g.assurt_soft(self.qb.db is None)
        self.qb.db = db_glue.new()
        # FIXME: What's gtfsdb_cache_edges? Or don't we care?
        #self.qb.db.transaction_begin_rw('gtfsdb_cache_edges',
        #                                'gtfsdb_cache_links')
        # EXPLAIN: Who are we competing with? Just other instances of this
        #          script?
        locked = self.qb.db.transaction_lock_try('gtfsdb_cache_links',
                                                 caller='gtfsdb_build_cache')
        g.assurt(locked)

        self.qb.db.insert(
            'gtfsdb_cache_register', {
                'username': self.qb.username,
                'branch_id': self.qb.branch_hier[0][0],
                'revision_id': self.revision_id,
                'gtfs_caldate': self.tfeed_zipdate,
            }, {})

        self.ccp_clear_cache()

        self.ccp_save_cache()

        log.info('Committing transaction [ccp_cache_populate]')
        self.qb.db.transaction_commit()
        self.qb.db.close()
        self.qb.db = None
Beispiel #2
0
   def save_rstop(self, qb, route, stop_number):

      log.debug('save_rstop: route: %s / stop_number: %s'
                % (route, stop_number,))

      #item_helper.One.save_core(self, qb)
      self.route_id = route.system_id
      self.stop_number = stop_number
      # FIXME: 2012.09.24: Drop the stack_id and version, eh?
      self.route_stack_id = route.stack_id
      self.route_version = route.version

      # 2014.09.13: There's been a bug until now wherein the user drags a new
      # route in the client, and when the route is saved, the client does not
      # send the intermediate stops' node IDs.
      if not self.node_id:

         pt_xy = (self.x, self.y,)
         nodes_byway = node_byway.Many()
         nodes_byway.search_by_endpoint_xy(qb, pt_xy, internals_ok=False,
                                                      limit_one=True)
         if len(nodes_byway) == 1:
            self.node_id = nodes_byway[0].node_stack_id
         else:
            g.assurt_soft(len(nodes_byway) == 0)
            log.warning('save_rstop: endpoint no found: %s' % (pt_xy,))
            g.assurt_soft(False)

      self.save_insert(qb, One.item_type_table, One.psql_defns)
   def ccp_cache_populate(self):

      # We cleared the db handle earlier, so get a new one, and lock it.
      g.assurt_soft(self.qb.db is None)
      self.qb.db = db_glue.new()
      # FIXME: What's gtfsdb_cache_edges? Or don't we care?
      #self.qb.db.transaction_begin_rw('gtfsdb_cache_edges',
      #                                'gtfsdb_cache_links')
      # EXPLAIN: Who are we competing with? Just other instances of this
      #          script?
      locked = self.qb.db.transaction_lock_try('gtfsdb_cache_links',
                                               caller='gtfsdb_build_cache')
      g.assurt(locked)

      self.qb.db.insert(
         'gtfsdb_cache_register', {
            'username': self.qb.username,
            'branch_id': self.qb.branch_hier[0][0],
            'revision_id': self.revision_id,
            'gtfs_caldate': self.tfeed_zipdate,
         }, {})

      self.ccp_clear_cache()

      self.ccp_save_cache()

      log.info('Committing transaction [ccp_cache_populate]')
      self.qb.db.transaction_commit()
      self.qb.db.close()
      self.qb.db = None
Beispiel #4
0
 def wire_link_tag(self, qb, lval_tag):
    g.assurt(qb.item_mgr.cache_tags is not None)
    try:
       the_tag = qb.item_mgr.cache_tags[lval_tag.lhs_stack_id]
       if the_tag.name:
          if not lval_tag.deleted:
             self.tagged.add(the_tag.name)
          else:
             try:
                self.tagged.remove(the_tag.name)
             except KeyError:
                pass
       else:
          g.assurt_soft(False)
    except KeyError:
       log.warning('wire_link_tag: missing tag! item_mgr: %s / stack_id: %d'
                   % (qb.item_mgr, lval_tag.lhs_stack_id,))
Beispiel #5
0
   def __eq__(self, other):

      attrs_equal = True
      # MAYBE: We only compare item_base.One's __slots__.
      #        Derived classes should really override this fcn.
      #        and compare whatever __slots__ they use and care about.
      if attrs_equal:
         # NOTE: self.__slots__ is the derived class's.
         #       MAYBE: If derived classes care, override this fcn. and check
         #       their own One.__slots__
         #for key in self.__slots__:
         for key in One.__slots__:
            if (getattr(self, key, None)
                != getattr(other, key, None)):
               #import rpdb2;rpdb2.start_embedded_debugger('password',
               #                                           fAllowRemote=True)
               attrs_equal = False
               break
      if attrs_equal:
         for attr_defn in self.attr_defns:
            attr_name = attr_defn[One.scol_pyname]
            try:
               # If this is a floating point number, round it,
               # otherwise __eq__ is liable to be not.
               self_value = round(getattr(self, attr_name, None),
                                  attr_defn[One.scol_precision])
               other_value = round(getattr(other, attr_name, None),
                                   attr_defn[One.scol_precision])
               if self_value != other_value:
                  attrs_equal = False
                  break
               else:
                  continue
            except IndexError:
               pass
            except TypeError:
               g.assurt_soft(False)
               pass
            if (getattr(self, attr_name, None)
                != getattr(other, attr_name, None)):
               attrs_equal = False
               break

      return attrs_equal
Beispiel #6
0
    def __eq__(self, other):

        attrs_equal = True
        # MAYBE: We only compare item_base.One's __slots__.
        #        Derived classes should really override this fcn.
        #        and compare whatever __slots__ they use and care about.
        if attrs_equal:
            # NOTE: self.__slots__ is the derived class's.
            #       MAYBE: If derived classes care, override this fcn. and check
            #       their own One.__slots__
            #for key in self.__slots__:
            for key in One.__slots__:
                if (getattr(self, key, None) != getattr(other, key, None)):
                    #import rpdb2;rpdb2.start_embedded_debugger('password',
                    #                                           fAllowRemote=True)
                    attrs_equal = False
                    break
        if attrs_equal:
            for attr_defn in self.attr_defns:
                attr_name = attr_defn[One.scol_pyname]
                try:
                    # If this is a floating point number, round it,
                    # otherwise __eq__ is liable to be not.
                    self_value = round(getattr(self, attr_name, None),
                                       attr_defn[One.scol_precision])
                    other_value = round(getattr(other, attr_name, None),
                                        attr_defn[One.scol_precision])
                    if self_value != other_value:
                        attrs_equal = False
                        break
                    else:
                        continue
                except IndexError:
                    pass
                except TypeError:
                    g.assurt_soft(False)
                    pass
                if (getattr(self, attr_name, None) != getattr(
                        other, attr_name, None)):
                    attrs_equal = False
                    break

        return attrs_equal
Beispiel #7
0
 def wire_link_tag(self, qb, lval_tag):
     g.assurt(qb.item_mgr.cache_tags is not None)
     try:
         the_tag = qb.item_mgr.cache_tags[lval_tag.lhs_stack_id]
         if the_tag.name:
             if not lval_tag.deleted:
                 self.tagged.add(the_tag.name)
             else:
                 try:
                     self.tagged.remove(the_tag.name)
                 except KeyError:
                     pass
         else:
             g.assurt_soft(False)
     except KeyError:
         log.warning(
             'wire_link_tag: missing tag! item_mgr: %s / stack_id: %d' % (
                 qb.item_mgr,
                 lval_tag.lhs_stack_id,
             ))
Beispiel #8
0
 def __init__(self, byway, forward):
     GenericPyPayload.__init__(self)
     # NOTE: graphserver.core crashes if you don't set self.type
     self.type = LGSTypes.ENUM_edgepayload_t.PL_EXTERNVALUE
     self.rise = 0.0
     #self.fall = 0.0
     self.slog = 1.0
     self.byway = byway
     self.forward = forward
     # Calculate the slope of the street.
     # Graphserver's Street defines a rise and a fall. The rise is the number
     # of meters of elevation as you travel the line segment, and the fall is
     # the number of meters of descending. Since Cyclopath only stores the
     # elevation at the endpoints, we can only supply one of those values, and
     # it might be less than its true value.
     if byway.geometry_len > 0.0:
         try:
             if forward:
                 elevation_delta = (float(byway.node_rhs_elevation_m) -
                                    float(byway.node_lhs_elevation_m))
             else:
                 elevation_delta = (float(byway.node_lhs_elevation_m) -
                                    float(byway.node_rhs_elevation_m))
             self.average_grade = elevation_delta / byway.geometry_len
             if elevation_delta > 0.0:
                 self.rise = elevation_delta
         except TypeError, e:
             log.error('TypeError: %s' % (str(e), ))
             log.error('byway: %s' % (str(byway), ))
             log.error('beg_node_id: %s' % (byway.beg_node_id, ))
             log.error('fin_node_id: %s' % (byway.fin_node_id, ))
             log.error('node_lhs_elevation_m: %s' %
                       (byway.node_lhs_elevation_m, ))
             log.error('node_rhs_elevation_m: %s' %
                       (byway.node_rhs_elevation_m, ))
             self.average_grade = 0.0
             #self.rise = 0.0
             # Don't raise, so that we keep loading, even if we can't figure
             # out the elevation.
             # Nope: raise
             g.assurt_soft(False)
Beispiel #9
0
 def __init__(self, byway, forward):
    GenericPyPayload.__init__(self)
    # NOTE: graphserver.core crashes if you don't set self.type
    self.type = LGSTypes.ENUM_edgepayload_t.PL_EXTERNVALUE
    self.rise = 0.0
    #self.fall = 0.0
    self.slog = 1.0
    self.byway = byway
    self.forward = forward
    # Calculate the slope of the street.
    # Graphserver's Street defines a rise and a fall. The rise is the number
    # of meters of elevation as you travel the line segment, and the fall is
    # the number of meters of descending. Since Cyclopath only stores the
    # elevation at the endpoints, we can only supply one of those values, and
    # it might be less than its true value.
    if byway.geometry_len > 0.0:
       try:
          if forward:
             elevation_delta = (float(byway.node_rhs_elevation_m) 
                                - float(byway.node_lhs_elevation_m))
          else:
             elevation_delta = (float(byway.node_lhs_elevation_m) 
                                - float(byway.node_rhs_elevation_m))
          self.average_grade = elevation_delta / byway.geometry_len
          if elevation_delta > 0.0:
             self.rise = elevation_delta
       except TypeError, e:
          log.error('TypeError: %s' % (str(e),))
          log.error('byway: %s' % (str(byway),))
          log.error('beg_node_id: %s' % (byway.beg_node_id,))
          log.error('fin_node_id: %s' % (byway.fin_node_id,))
          log.error('node_lhs_elevation_m: %s'
                    % (byway.node_lhs_elevation_m,))
          log.error('node_rhs_elevation_m: %s'
                    % (byway.node_rhs_elevation_m,))
          self.average_grade = 0.0
          #self.rise = 0.0
          # Don't raise, so that we keep loading, even if we can't figure
          # out the elevation.
          # Nope: raise
          g.assurt_soft(False)
Beispiel #10
0
    def save_rstop(self, qb, route, stop_number):

        log.debug('save_rstop: route: %s / stop_number: %s' % (
            route,
            stop_number,
        ))

        #item_helper.One.save_core(self, qb)
        self.route_id = route.system_id
        self.stop_number = stop_number
        # FIXME: 2012.09.24: Drop the stack_id and version, eh?
        self.route_stack_id = route.stack_id
        self.route_version = route.version

        # 2014.09.13: There's been a bug until now wherein the user drags a new
        # route in the client, and when the route is saved, the client does not
        # send the intermediate stops' node IDs.
        if not self.node_id:

            pt_xy = (
                self.x,
                self.y,
            )
            nodes_byway = node_byway.Many()
            nodes_byway.search_by_endpoint_xy(qb,
                                              pt_xy,
                                              internals_ok=False,
                                              limit_one=True)
            if len(nodes_byway) == 1:
                self.node_id = nodes_byway[0].node_stack_id
            else:
                g.assurt_soft(len(nodes_byway) == 0)
                log.warning('save_rstop: endpoint no found: %s' % (pt_xy, ))
                g.assurt_soft(False)

        self.save_insert(qb, One.item_type_table, One.psql_defns)
Beispiel #11
0
   def sql_apply_query_filter_by_text_tc(self, qb, table_col,
                                                   stop_words,
                                                   where_clause,
                                                   conjunction,
                                                   use_outer,
                                                   outer_where):

      # Only select items whose name matches the user's search query.
      # But if multiple search columns or search filters are specified,
      # just OR them all together (this is so, e.g., search threads
      # looks in both the thread name and the post body).

      # See below for a bunch of comments are the different postgres
      # string comparison operators (=, ~/~*, and @@).

      if qb.filters.filter_by_text_exact:
         filter_by_text_exact_lower = qb.filters.filter_by_text_exact.lower()
         where_clause += (
            """
            %s (LOWER(%s) = %s)
            """ % (conjunction,
                   table_col,
                   # qb.db.quoted(qb.filters.filter_by_text_exact),
                   #    %s (LOWER(%s) = LOWER(%s))
                   qb.db.quoted(filter_by_text_exact_lower),))
         conjunction = "OR"

      # This is like the previous filter but allows the user to specify a list.
      if qb.filters.filter_by_names_exact:
         item_names = [x.strip().lower()
                       for x in qb.filters.filter_by_names_exact.split(',')]
         name_clauses = []
         for item_name in item_names:
            # item_name is the empty string if input contained ,,
            if item_name:
               name_clauses.append("(LOWER(gia.name) = %s)"
                                   % (qb.db.quoted(item_name),))
         name_clauses = " OR ".join(name_clauses)
         where_clause += (
            """
            %s (%s)
            """ % (conjunction,
                   name_clauses,))
         conjunction = "OR"

      if qb.filters.filter_by_text_loose:
         # NOTE: ~* does case-insensitive regex matching. This is slower than
         #       using =, but this is how we get a loose search. Consider
         #         select 'a' ~ 'a b c'; ==> false
         #         select 'a b c' ~ 'a'; ==> true
         #       meaning if the user searches 'lake' they get all the lakes.
         if not use_outer:
            where_clause += (
               """
               %s (%s ~* %s)
               """ % (conjunction, 
                      table_col,
                      qb.db.quoted(qb.filters.filter_by_text_loose),))
            conjunction = "OR"
         else:
            sub_where = (" (%s ~* %s) "
                         % (table_col,
                            qb.db.quoted(qb.filters.filter_by_text_loose),))
            if not outer_where:
               outer_where = sub_where
            else:
               outer_where = (" (%s OR (%s ~* %s)) "
                              % (outer_where,
                                 table_col,
                                 qb.db.quoted(qb.filters.filter_by_text_loose),
                                 ))

      # For filter_by_text_smart and filter_by_text_full:
      tsquery = None

      # Callers should only specify columns that are properly indexed for
      # full text search, since that's the column we really want (if we use
      # the normal-named column, Postgres does an inline index on the text).
      (table, column) = table_col.split('.', 1)
      table_col = '%s.tsvect_%s' % (table, column,)
      # FIXME: l18n. Hard-coding 'english' for now.
      # NOTE: Avoid plainto_tsquery, which applies & and not |.
      #       Or is that what we want??
      #where_clause += (
      #   """
      #   %s (%s @@ plainto_tsquery('english', %s))
      #   """ % (conjunction, 
      #          table_col,
      #          qb.db.quoted(qb.filters.filter_by_text_smart),))

      if qb.filters.filter_by_text_smart:

         # Get a list of "quoted phrases".
         query_text = qb.filters.filter_by_text_smart
         query_terms = re.findall(r'\"[^\"]*\"', query_text)
         # Remove the quotes from each multi-word term.
         raw_terms = [t.strip('"').strip() for t in query_terms]
         # Cull the "quoted phrases" we just extracted from the query string.
         (remainder, num_subs) = re.subn(r'\"[^\"]*\"', r' ', query_text)
         # Add the remaining single-word terms.
         raw_terms.extend(remainder.split())

         # Remove all non-alphanums and search just clean words.
         clean_terms = set()
         for raw_term in raw_terms:
            cleaned = re.sub(r'\W', ' ', raw_term).split()
            for clean_word in cleaned:
               if (not stop_words) or (clean_word not in stop_words.lookup):
                  clean_terms.add(clean_word)
            # Add the original string-term, too.
            if cleaned and ((len(cleaned) > 1) or (cleaned[0] != raw_term)):
               if (not stop_words) or (raw_term not in stop_words.lookup):
                  # 2014.08.19: Watch out for, e.g., "Ruttger's" (as in,
                  # (Ruttger's Resort), which splits on the \W to "Ruttger s"
                  # but whose raw term remains "Ruttger's": the single quote
                  # is special to full text search so remove 'em all.
                  raw_sans_single_quote = re.sub("'", '', raw_term)
                  clean_terms.add(raw_sans_single_quote)

         approved_terms = []
         for clean_term in clean_terms:
            # MAGIC_NUMBER: Short terms are okay when &'ed to another term, but
            # on their own, they're not very meaningful. E.g., searching 'st'
            # would return half the byways. And [lb] cannot think of any one-
            # or two-letter words that would be important to search on their
            # own.
            if len(clean_term) > 2:
               approved_terms.append(clean_term)
         if not approved_terms:
            nothing_to_query = True
         else:
            # Special Case: Check if query is all stop words.
            sql_tsquery = ("SELECT to_tsquery('%s')"
                           % ('|'.join(approved_terms),))
            dont_fetchall = qb.db.dont_fetchall
            qb.db.dont_fetchall = False
            rows = qb.db.sql(sql_tsquery)
            qb.db.dont_fetchall = dont_fetchall
            g.assurt(len(rows) == 1)
            nothing_to_query = not rows[0]['to_tsquery']
         if nothing_to_query:
            approved_terms = []
            log.info(
               'sql_apply_query_filter_by_text_tc: only stop words: %s'
               % (qb.filters.filter_by_text_smart,))
            # Stop processing the request now.
            #raise GWIS_Warning(
            #   'Too vague: Please try using more specific search terms.')

         # Quote each andd everything.
         if raw_terms and (raw_terms.sort() != approved_terms.sort()):
            quoted_terms = ["'%s'" % (' '.join([x for x in raw_terms]),),]
         else:
            quoted_terms = []
         quoted_terms.extend([
            "'%s'" % (qb.db.quoted(term),) for term in approved_terms])
         tsquery = "|".join(quoted_terms)

      if qb.filters.filter_by_text_full:

         # This is only used internally. It's a ready-to-go string, like
         # ''123 main st''|''minneapolis''|''main st''
         tsquery = qb.filters.filter_by_text_full

      if tsquery and (qb.filters.filter_by_text_smart
                   or qb.filters.filter_by_text_full):

         where_clause += (
            """
            %s (%s @@ to_tsquery('english', '%s'))
            """ % (conjunction, table_col, tsquery,))
         conjunction = "OR"
         # Sort the full text results by relevance.
         if True:
            # The ts_rank_cd function returns a number from 0 to whatever,
            # adding 0.1 for every matching word. E.g.,
            #  select ts_rank_cd(
            #    to_tsvector('english', 'route|route|hello|hello'),
            #    to_tsquery('english', 'hello|route'));
            # returns 0.4 and not just 0.2 because the query includes
            # the same words twice... so if the user includes a search
            # term multiple times, any results with that term will be
            # ranked even higher.
            qb.sql_clauses.outer.enabled = True

            # An example of how one might use debuggery_print_next_sql:
            #  conf.debuggery_print_next_sql += 1

            qb.sql_clauses.inner.shared += (
               """
               , %s
               """ % (table_col,))
            qb.sql_clauses.outer.select += (
               """
               , ts_rank_cd(group_item.tsvect_%s,
                            to_tsquery('english', '%s'))
                  AS fts_rank_%s
               """ % (column,
                      tsquery,
                      column,))
            qb.sql_clauses.outer.group_by += (
               """
               , fts_rank_%s
               """ % (column,))
            qb.sql_clauses.outer.order_by_enable = True
            # Route will add edited_date DESC, which we don't want when
            # ranking by test.
            comma_maybe = ', ' if qb.sql_clauses.outer.order_by else ''
            if comma_maybe:
               check_ordering = qb.sql_clauses.outer.order_by.strip()
               if check_ordering == 'edited_date DESC':
                  qb.sql_clauses.outer.order_by = ''
                  comma_maybe = ''
               # When searching multiple columns, we'll order by each of
               # them, e.g., when geocoding, we'll search text in the
               # item name and also look for item comments, so the order-
               # by is a collection of fts_rank_*, e.g.,
               #   ORDER BY fts_rank_name DESC, fts_rank_comments DESC
               # not that ordering by the second column probably does much.
               elif not check_ordering.startswith('fts_rank_'):
                  log.warning(
                     'sql_apply_query_filter_by_text_tc: check_ordering: %s'
                     % (check_ordering,))
                  log.warning(
                     'sql_apply_query_filter_by_text_tc: qb: %s' % (qb,))
                  g.assurt_soft(False)
            qb.sql_clauses.outer.order_by += (
               """
               %s fts_rank_%s DESC
               """ % (comma_maybe,
                      column,))
         if False:
            # We could use levenshtein distance, but that doesn't work well
            # when terms are scrambled, e.g., comparing "my favorite route"
            # to "route favorite my" is same as comparing "my favorite route"
            # to "completely different", i.e., not a good way to rank results
            # (but good for looking for duplicate line segments by name, like
            # how geofeature_io.py works).
            #   sudo apt-get install postgresql-contrib
            #   psql -U postgres ccpv3_lite
            #   ccpv3_lite=# create extension fuzzystrmatch;
            qb.sql_clauses.outer.enabled = True
            qb.sql_clauses.outer.select += (
               """
               , levenshtein(LOWER(group_item.name), %s) AS leven_dist
               """ % (qb.db.quoted(qb.filters.filter_by_text_smart),))
            qb.sql_clauses.outer.group_by += (
               """
               , leven_dist
               """)
            qb.sql_clauses.outer.order_by_enable = True
            comma_maybe = ', ' if qb.sql_clauses.outer.order_by else ''
            qb.sql_clauses.outer.order_by += (
               """
               %s leven_dist ASC
               """ % (comma_maybe,))

      return (where_clause, conjunction, outer_where,)
Beispiel #12
0
 def save_core(self, qb):
    g.assurt_soft(self.name)
    attachment.One.save_core(self, qb)
    # Save to the 'tag' table.
    self.save_insert(qb, One.item_type_table, One.psql_defns)
Beispiel #13
0
   def geocode_mapquest_process(json_in, geocoded):

      # We only sent one address (i.e., not a bulk query) so 'results' will
      # have length of 0 or 1, and 'locations' within it might have many.
      g.assurt_soft(len(json_in['results']) <= 1)

      log.debug('geocode_mapquest_proc: received %s results'
                % (str(len(json_in['results'][0]['locations']))
                   if json_in['results'] else 'zero',))

      for matches in json_in['results']:
         for result in matches['locations']:

            addr_g = address.Address()

            addr_g.text = ', '.join(
               [x for x in [result['street'], # House/Street or Xsct
                            result['adminArea5'], # City
                            result['adminArea3'], # State
                            result['postalCode'],] # ZIP(r)
                if x])

            # MAYBE: This includes house number or intersection... should be
            #        fine?
            addr_g.street = result['street']

            addr_g.city = result['adminArea5']
            g.assurt_soft(result['adminArea5Type'] == 'City')

            addr_g.state = result['adminArea3']
            g.assurt_soft(result['adminArea3Type'] == 'State')

            addr_g.county = result['adminArea4']
            g.assurt_soft(result['adminArea4Type'] == 'County')

            addr_g.country = result['adminArea1']
            g.assurt_soft(result['adminArea1Type'] == 'Country')

            addr_g.zip = result['postalCode']

            # EXPLAIN: Is there a difference btw. displayLatLng and latLng?
            addr_g.y = float(result['latLng']['lat'])
            addr_g.x = float(result['latLng']['lng'])

            # See: http://www.mapquestapi.com/geocoding/geocodequality.html
            if (result['geocodeQuality']
                in Geocode.mapquest_confident_categories):
               addr_g.gc_confidence = 100
            else:
               # From MapQuest docs: "The geocodeQualityCode value in a
               #   Geocode Response is a five character string which
               #   describes the quality of the geocoding results.
               #      Character Position 1 2 3 4 5
               #                   Value G S F A P
               #   where:      G = Granularity Code
               #               S = Granularity Sub-Code
               #               F = Full Street Name Confidence Level
               #               A = Administrative Area Confidence Level
               #               P = Postal Code Confidence Level
               #
               # We handle granularities (see mapquest_confident_categories)
               # and administrative and postal codes specially, so
               # all we want to check out is the full street confidence.
               # It's one of: 'A', 'B', 'C', 'X'.
               #   http://www.mapquestapi.com/geocoding/geocodequality.html
               # MAGIC_NUMBER: 2 is Full Street Name Confidence Level Index.
               #               That is, no street name confidence, 0 overall.
               try:
                  addr_g.gc_confidence = Geocode.mapquest_confidence_lookup[
                                             result['geocodeQualityCode'][2]]
               except Exception, e:
                  log.warning('geocode_mapquest_proc: what gQC?: %s / %s / %s'
                              % (result['geocodeQualityCode'][2],
                                 pprint.pprint(json_in),
                                 str(e),))
                  addr_g.gc_confidence = 0

            addr_g.gc_fulfiller = 'mapq'

            log.debug('geocode_mapquest_proc: adding: %s' % (addr_g,))

            geocoded.append(addr_g)
Beispiel #14
0
    def sql_apply_query_filter_by_text_tc(self, qb, table_col, stop_words,
                                          where_clause, conjunction, use_outer,
                                          outer_where):

        # Only select items whose name matches the user's search query.
        # But if multiple search columns or search filters are specified,
        # just OR them all together (this is so, e.g., search threads
        # looks in both the thread name and the post body).

        # See below for a bunch of comments are the different postgres
        # string comparison operators (=, ~/~*, and @@).

        if qb.filters.filter_by_text_exact:
            filter_by_text_exact_lower = qb.filters.filter_by_text_exact.lower(
            )
            where_clause += (
                """
            %s (LOWER(%s) = %s)
            """ % (
                    conjunction,
                    table_col,
                    # qb.db.quoted(qb.filters.filter_by_text_exact),
                    #    %s (LOWER(%s) = LOWER(%s))
                    qb.db.quoted(filter_by_text_exact_lower),
                ))
            conjunction = "OR"

        # This is like the previous filter but allows the user to specify a list.
        if qb.filters.filter_by_names_exact:
            item_names = [
                x.strip().lower()
                for x in qb.filters.filter_by_names_exact.split(',')
            ]
            name_clauses = []
            for item_name in item_names:
                # item_name is the empty string if input contained ,,
                if item_name:
                    name_clauses.append("(LOWER(gia.name) = %s)" %
                                        (qb.db.quoted(item_name), ))
            name_clauses = " OR ".join(name_clauses)
            where_clause += ("""
            %s (%s)
            """ % (
                conjunction,
                name_clauses,
            ))
            conjunction = "OR"

        if qb.filters.filter_by_text_loose:
            # NOTE: ~* does case-insensitive regex matching. This is slower than
            #       using =, but this is how we get a loose search. Consider
            #         select 'a' ~ 'a b c'; ==> false
            #         select 'a b c' ~ 'a'; ==> true
            #       meaning if the user searches 'lake' they get all the lakes.
            if not use_outer:
                where_clause += ("""
               %s (%s ~* %s)
               """ % (
                    conjunction,
                    table_col,
                    qb.db.quoted(qb.filters.filter_by_text_loose),
                ))
                conjunction = "OR"
            else:
                sub_where = (" (%s ~* %s) " % (
                    table_col,
                    qb.db.quoted(qb.filters.filter_by_text_loose),
                ))
                if not outer_where:
                    outer_where = sub_where
                else:
                    outer_where = (" (%s OR (%s ~* %s)) " % (
                        outer_where,
                        table_col,
                        qb.db.quoted(qb.filters.filter_by_text_loose),
                    ))

        # For filter_by_text_smart and filter_by_text_full:
        tsquery = None

        # Callers should only specify columns that are properly indexed for
        # full text search, since that's the column we really want (if we use
        # the normal-named column, Postgres does an inline index on the text).
        (table, column) = table_col.split('.', 1)
        table_col = '%s.tsvect_%s' % (
            table,
            column,
        )
        # FIXME: l18n. Hard-coding 'english' for now.
        # NOTE: Avoid plainto_tsquery, which applies & and not |.
        #       Or is that what we want??
        #where_clause += (
        #   """
        #   %s (%s @@ plainto_tsquery('english', %s))
        #   """ % (conjunction,
        #          table_col,
        #          qb.db.quoted(qb.filters.filter_by_text_smart),))

        if qb.filters.filter_by_text_smart:

            # Get a list of "quoted phrases".
            query_text = qb.filters.filter_by_text_smart
            query_terms = re.findall(r'\"[^\"]*\"', query_text)
            # Remove the quotes from each multi-word term.
            raw_terms = [t.strip('"').strip() for t in query_terms]
            # Cull the "quoted phrases" we just extracted from the query string.
            (remainder, num_subs) = re.subn(r'\"[^\"]*\"', r' ', query_text)
            # Add the remaining single-word terms.
            raw_terms.extend(remainder.split())

            # Remove all non-alphanums and search just clean words.
            clean_terms = set()
            for raw_term in raw_terms:
                cleaned = re.sub(r'\W', ' ', raw_term).split()
                for clean_word in cleaned:
                    if (not stop_words) or (clean_word
                                            not in stop_words.lookup):
                        clean_terms.add(clean_word)
                # Add the original string-term, too.
                if cleaned and ((len(cleaned) > 1) or
                                (cleaned[0] != raw_term)):
                    if (not stop_words) or (raw_term not in stop_words.lookup):
                        # 2014.08.19: Watch out for, e.g., "Ruttger's" (as in,
                        # (Ruttger's Resort), which splits on the \W to "Ruttger s"
                        # but whose raw term remains "Ruttger's": the single quote
                        # is special to full text search so remove 'em all.
                        raw_sans_single_quote = re.sub("'", '', raw_term)
                        clean_terms.add(raw_sans_single_quote)

            approved_terms = []
            for clean_term in clean_terms:
                # MAGIC_NUMBER: Short terms are okay when &'ed to another term, but
                # on their own, they're not very meaningful. E.g., searching 'st'
                # would return half the byways. And [lb] cannot think of any one-
                # or two-letter words that would be important to search on their
                # own.
                if len(clean_term) > 2:
                    approved_terms.append(clean_term)
            if not approved_terms:
                nothing_to_query = True
            else:
                # Special Case: Check if query is all stop words.
                sql_tsquery = ("SELECT to_tsquery('%s')" %
                               ('|'.join(approved_terms), ))
                dont_fetchall = qb.db.dont_fetchall
                qb.db.dont_fetchall = False
                rows = qb.db.sql(sql_tsquery)
                qb.db.dont_fetchall = dont_fetchall
                g.assurt(len(rows) == 1)
                nothing_to_query = not rows[0]['to_tsquery']
            if nothing_to_query:
                approved_terms = []
                log.info(
                    'sql_apply_query_filter_by_text_tc: only stop words: %s' %
                    (qb.filters.filter_by_text_smart, ))
                # Stop processing the request now.
                #raise GWIS_Warning(
                #   'Too vague: Please try using more specific search terms.')

            # Quote each andd everything.
            if raw_terms and (raw_terms.sort() != approved_terms.sort()):
                quoted_terms = [
                    "'%s'" % (' '.join([x for x in raw_terms]), ),
                ]
            else:
                quoted_terms = []
            quoted_terms.extend(
                ["'%s'" % (qb.db.quoted(term), ) for term in approved_terms])
            tsquery = "|".join(quoted_terms)

        if qb.filters.filter_by_text_full:

            # This is only used internally. It's a ready-to-go string, like
            # ''123 main st''|''minneapolis''|''main st''
            tsquery = qb.filters.filter_by_text_full

        if tsquery and (qb.filters.filter_by_text_smart
                        or qb.filters.filter_by_text_full):

            where_clause += ("""
            %s (%s @@ to_tsquery('english', '%s'))
            """ % (
                conjunction,
                table_col,
                tsquery,
            ))
            conjunction = "OR"
            # Sort the full text results by relevance.
            if True:
                # The ts_rank_cd function returns a number from 0 to whatever,
                # adding 0.1 for every matching word. E.g.,
                #  select ts_rank_cd(
                #    to_tsvector('english', 'route|route|hello|hello'),
                #    to_tsquery('english', 'hello|route'));
                # returns 0.4 and not just 0.2 because the query includes
                # the same words twice... so if the user includes a search
                # term multiple times, any results with that term will be
                # ranked even higher.
                qb.sql_clauses.outer.enabled = True

                # An example of how one might use debuggery_print_next_sql:
                #  conf.debuggery_print_next_sql += 1

                qb.sql_clauses.inner.shared += ("""
               , %s
               """ % (table_col, ))
                qb.sql_clauses.outer.select += ("""
               , ts_rank_cd(group_item.tsvect_%s,
                            to_tsquery('english', '%s'))
                  AS fts_rank_%s
               """ % (
                    column,
                    tsquery,
                    column,
                ))
                qb.sql_clauses.outer.group_by += ("""
               , fts_rank_%s
               """ % (column, ))
                qb.sql_clauses.outer.order_by_enable = True
                # Route will add edited_date DESC, which we don't want when
                # ranking by test.
                comma_maybe = ', ' if qb.sql_clauses.outer.order_by else ''
                if comma_maybe:
                    check_ordering = qb.sql_clauses.outer.order_by.strip()
                    if check_ordering == 'edited_date DESC':
                        qb.sql_clauses.outer.order_by = ''
                        comma_maybe = ''
                    # When searching multiple columns, we'll order by each of
                    # them, e.g., when geocoding, we'll search text in the
                    # item name and also look for item comments, so the order-
                    # by is a collection of fts_rank_*, e.g.,
                    #   ORDER BY fts_rank_name DESC, fts_rank_comments DESC
                    # not that ordering by the second column probably does much.
                    elif not check_ordering.startswith('fts_rank_'):
                        log.warning(
                            'sql_apply_query_filter_by_text_tc: check_ordering: %s'
                            % (check_ordering, ))
                        log.warning(
                            'sql_apply_query_filter_by_text_tc: qb: %s' %
                            (qb, ))
                        g.assurt_soft(False)
                qb.sql_clauses.outer.order_by += ("""
               %s fts_rank_%s DESC
               """ % (
                    comma_maybe,
                    column,
                ))
            if False:
                # We could use levenshtein distance, but that doesn't work well
                # when terms are scrambled, e.g., comparing "my favorite route"
                # to "route favorite my" is same as comparing "my favorite route"
                # to "completely different", i.e., not a good way to rank results
                # (but good for looking for duplicate line segments by name, like
                # how geofeature_io.py works).
                #   sudo apt-get install postgresql-contrib
                #   psql -U postgres ccpv3_lite
                #   ccpv3_lite=# create extension fuzzystrmatch;
                qb.sql_clauses.outer.enabled = True
                qb.sql_clauses.outer.select += ("""
               , levenshtein(LOWER(group_item.name), %s) AS leven_dist
               """ % (qb.db.quoted(qb.filters.filter_by_text_smart), ))
                qb.sql_clauses.outer.group_by += ("""
               , leven_dist
               """)
                qb.sql_clauses.outer.order_by_enable = True
                comma_maybe = ', ' if qb.sql_clauses.outer.order_by else ''
                qb.sql_clauses.outer.order_by += ("""
               %s leven_dist ASC
               """ % (comma_maybe, ))

        return (
            where_clause,
            conjunction,
            outer_where,
        )
Beispiel #15
0
 def save_core(self, qb):
     g.assurt_soft(self.name)
     attachment.One.save_core(self, qb)
     # Save to the 'tag' table.
     self.save_insert(qb, One.item_type_table, One.psql_defns)