def load_make_graph_insert_new(self, new_byway): # Alias the node IDs just for this fcn. bid = new_byway.beg_node_id eid = new_byway.fin_node_id # Initialize edges data structure with byway's node IDs. self.edges.setdefault(bid, dict()) self.edges[bid].setdefault(eid, dict()) self.edges.setdefault(eid, dict()) self.edges[eid].setdefault(bid, dict()) # Get the points' xs and ys so we can determine direction. # FIXME: Does it matter these values do not always match node_endpoint, # because of endpoint drift? (There's a bug about endpoints with the same # stack ID not sharing the same x,y.) 2012.07.30: Probably not, I think # point_beg and point_fin is just used for calculating distances and # directions between nodes. point_beg = geometry.wkt_point_to_xy(new_byway.beg_point) point_beg2 = geometry.wkt_point_to_xy(new_byway.beg2_point) point_fin = geometry.wkt_point_to_xy(new_byway.fin_point) point_fin2 = geometry.wkt_point_to_xy(new_byway.fin2_point) # Cache node x/y locations # FIXME: does this leak when node ids are orphaned? # BUG nnnn: Byways whose node endpoints have same ids but diff geom will # overwrite the value here. self.node_xys[bid] = point_beg self.node_xys[eid] = point_fin # Cache forward and backward edges. # # MAGIC NUMBER: 1 and -1 indicate the direction of the one way. if new_byway.one_way != -1: # Cache forward edge step = route_step.One() step.travel_mode = Travel_Mode.bicycle step.init_from_byway(new_byway) step.forward = True step.dir_entry = geometry.v_dir(point_beg, point_beg2) step.dir_exit = geometry.v_dir(point_fin2, point_fin) self.edges[bid][eid][step.byway_stack_id] = step # Bug 2641: Poor Python Memory Management # BUG nnnn: Directional attrs/tags. Maybe self.step_lookup is always a # tuple with two entries, one for each direction? self.step_lookup_append(step.byway_stack_id, step) # if new_byway.one_way != 1: # Cache backward edge step = route_step.One() step.travel_mode = Travel_Mode.bicycle step.init_from_byway(new_byway) step.forward = False step.dir_entry = geometry.v_dir(point_fin, point_fin2) step.dir_exit = geometry.v_dir(point_beg2, point_beg) self.edges[eid][bid][step.byway_stack_id] = step # Bug 2641: Poor Python Memory Management self.step_lookup_append(step.byway_stack_id, step)
def save_related_maybe(self, qb, rid): permissions_free.One.save_related_maybe(self, qb, rid) # If this is a new node_endpoint, we may have to create the node_endpt_xy # entry. if self.version == 1: g.assurt(self.endpoint_wkt) # For the public basemap, a new node_endpoint means a new # node_endpt_xy. But for branches, a new node_endpoint does not # necessarily mean a new node_endpt_xy: if another branch uses the # same node_endpoint, the node_endpt_xy should already exist. endpt_sql = (""" SELECT ST_AsText(endpoint_xy) AS existing_xy_wkt FROM node_endpt_xy WHERE node_stack_id = %d """ % (self.stack_id, )) rows = qb.db.sql(endpt_sql) if not rows: # This endpoint truly is new. sql_endpt_xy = (""" INSERT INTO node_endpt_xy (node_stack_id, endpoint_xy) VALUES (%d, '%s') """ % ( self.stack_id, self.endpoint_wkt, )) qb.db.sql(sql_endpt_xy) else: # The endpoint is being used or has been previously used by another # branch. Just see if the geometry is the same or not. g.assurt(len(rows) == 1) existing_xy = geometry.wkt_point_to_xy( rows[0]['existing_xy_wkt'], precision=conf.node_precision) proposed_xy = geometry.wkt_point_to_xy( self.endpoint_wkt, precision=conf.node_precision) if existing_xy != proposed_xy: log.warning( 'save_related_maybe: endpt_xy unequal: theirs: %s / ours: %s' % ( existing_xy, proposed_xy, ))
def load_make_graph_insert_new(self, new_byway): g.assurt(new_byway is not None) # Get the points' xs and ys so we can determine direction. # (C.f. planner.routed_p1.tgraph.load_make_graph_insert_new) point_beg = geometry.wkt_point_to_xy(new_byway.beg_point) point_beg2 = geometry.wkt_point_to_xy(new_byway.beg2_point) point_fin = geometry.wkt_point_to_xy(new_byway.fin_point) point_fin2 = geometry.wkt_point_to_xy(new_byway.fin2_point) beg_id = str(new_byway.beg_node_id) fin_id = str(new_byway.fin_node_id) # FIXME: Byways share vertices, so make sure add_vertex is add_vertex_maybe? # Or maybe the graph handles that? self.gserver.add_vertex(beg_id) self.gserver.add_vertex(fin_id) # FIXME: Use remove_vertex when byway is deleted # or maybe not: there's no remove_edge -- or is remove_edge implied? # FIXME: Guarantee that node IDs are unique to revision and branch? Are they # already (at least, are node IDs unique across revision changes)? # Cache forward and backward edges. # # MAGIC NUMBER: 1 and -1 indicate the direction of the one way. if (new_byway.one_way != -1): # Cache forward edge pload = Payload_Byway(new_byway, True) pload.dir_entry = geometry.v_dir(point_beg, point_beg2) pload.dir_exit = geometry.v_dir(point_fin2, point_fin) self.gserver.add_edge(beg_id, fin_id, pload) self.step_lookup_append(new_byway.stack_id, pload) # if (new_byway.one_way != 1): # Cache backward edge pload = Payload_Byway(new_byway, False) pload.dir_entry = geometry.v_dir(point_fin, point_fin2) pload.dir_exit = geometry.v_dir(point_beg2, point_beg) self.gserver.add_edge(fin_id, beg_id, pload) self.step_lookup_append(new_byway.stack_id, pload)
def insert_elevations(db,clean): if (clean): info('Truncating node_attribute table.') info('Loading byway nodes into memory.') # this query is slow (as it is actually 4 queries), # but saves time in the end by retrieving only nodes # that need to be updated q = ( """ SELECT node_id, nodes.geometry FROM ( (SELECT beg_node_id AS node_id, ST_AsText(StartPoint(geometry)) AS geometry FROM geofeature ) UNION (SELECT end_node_id as node_id, ST_AsText(EndPoint(geometry)) AS geometry FROM geofeature) ) AS nodes WHERE geometry IS NOT NULL AND node_id NOT IN (SELECT node_id FROM node_attribute) GROUP BY node_id, geometry """) if (clean): q = "TRUNCATE node_attribute; " + q rows = db.sql(q) info('DONE.') pr = misc.Progress_Bar(len(rows)) for row in rows: node_id = row['node_id'] loc = geometry.wkt_point_to_xy(row['geometry']) #info('Inserting elevation for node (#%d).' % row['node_id']) if (loc is not None): elevation.node_elevation_insert(db, node_id, loc) else: error('Could not parse point') pr.inc() print
def geocode_external(qb, results_latlon): g.assurt(False) # Not called. results_default = [] for addr in results_latlon: rows = qb.db.sql( """ SELECT ST_AsText(geom.g) AS xy FROM (SELECT (ST_Transform( ST_GeomFromEWKT( 'SRID=%(srid_latlon)d;POINT(%(x)f %(y)f)'), %(srid_default)d)) AS g) AS geom WHERE ST_DWithin( (SELECT coverage_area FROM branch JOIN item_versioned USING (system_id) WHERE branch.stack_id = %(branch_sid)d AND item_versioned.valid_until_rid = %(rid_inf)d), geom.g, %(buffer)d); """ % ({ 'x': addr.x, 'y': addr.y, 'srid_latlon': conf.srid_latlon, 'srid_default': conf.default_srid, 'branch_sid': qb.branch_hier[0][0], 'rid_inf': conf.rid_inf, 'buffer': conf.geocode_buffer})) if rows: wkt = rows[0] (x, y,) = geometry.wkt_point_to_xy(wkt['xy']) addr.x = x addr.y = y addr.gc_fulfiller = 'This fcn not called' results_default.append(addr) # else, the result from the geocode service is outside of our map, so # we can safely ignore it (see Bug 1985) return results_default
def store_result_item(self, db, item): # This fcn. is called on a new object only. g.assurt(not self.result_gfs) # This fcn. assumes the item is an exact match. self.gc_confidence = 100 self.gc_fulfiller = 'ccp_gf'; self.gf_name = item.name self.gf_type_id = item.item_type_id self.gf_stack_id = item.stack_id # MAGIC_NUMBER: See Full_Text_Query_Part.vect_stop_words. self.ts_include['name'] = True try: self.node_ids.add(item.beg_node_id) self.node_ids.add(item.fin_node_id) except AttributeError: pass # Not a byway. gf_res = Search_Result_Geofeature(self) gf_res.stack_id = item.stack_id # This is a little different than above. # MAYBE: Make a qb.filters to add the center SQL in the original item # query. # BUG nnnn/LOW PRIORITY/MEH: Handle other item types, like ROUTE. if self.gf_type_id == Item_Type.BYWAY: # Wrong: gf_res.geometry = item.geometry gf_res.geometry = item.geometry_svg center_sql = byway.One.search_center_sql(geom=item.geometry) elif self.gf_type_id == Item_Type.REGION: # Wrong: gf_res.geometry = item.geometry gf_res.geometry = item.geometry_svg center_sql = region.One.search_center_sql(geom=item.geometry) elif self.gf_type_id == Item_Type.WAYPOINT: # Skipping: geometry; would be same as 'center', so leave None. center_sql = waypoint.One.search_center_sql(geom=item.geometry) else: # This happens if you, e.g., put a route stack ID in the search # box and search on that. We'll find the route item, but we're # currently not wired to return it. # BUG nnnn: Search by stack ID for any item type and open that # item (load its panel, find it on the map, etc.). log.info('store_result_item: Unexpected obj. type: %s / %s' % (self.gf_type_id, item,)) # Wrong: # raise GWIS_Error('Unexpected obj. type: %s.' % (self.gf_type_id,)) center_sql = None if center_sql: center_sql = "SELECT %s AS center" % (center_sql,) rows = db.sql(center_sql) g.assurt(len(rows) == 1) gf_res.center = rows[0]['center'] (gf_res.x, gf_res.y,) = geometry.wkt_point_to_xy(gf_res.center) # We don't need width and height... it's never used. But set to -1 # just to mess up any future code that tries to use it without coming # here and reading this comment and fixing this code. gf_res.width = -1 gf_res.height = -1 self.result_gfs.append(gf_res)
def store_result_raw(self, row, raw_queries, search_map_obj): # Only call this fcn. on a new object, i.e., one that's not a group yet. g.assurt(not self.result_gfs) # Rather than use the item's real name, use the one that might include # the name of the region in parantheses. # Instead of: self.gf_name = row['gf_name'] self.gf_name = row['name_enclosed'] # Since we searched using item_user_access, we had to do a little extra # magic to get the item_type_id of the result. self.gf_type_id = row['real_item_type_id'] # Finally, a straight forward value to consume. self.gf_stack_id = row['stack_id'] # Consume all the ts_in/ts_ex_* values into two lookups. for vect_name in search_full_text.Full_Text_Query_Part.ts_vects: if vect_name != 'all': self.ts_include[vect_name] = bool(int(row['ts_in_' + vect_name])) self.ts_exclude[vect_name] = bool(int(row['ts_ex_' + vect_name])) # Remember byway node IDs so we can assemble connected byways into a # single group of results. try: self.node_ids.add(row['beg_node_id']) self.node_ids.add(row['fin_node_id']) except AttributeError: pass # Not a byway. # Setup the first, and maybe only, result geometry object. gf_res = Search_Result_Geofeature(self) gf_res.stack_id = row['stack_id'] if ((self.gf_type_id == Item_Type.BYWAY) or (self.gf_type_id == Item_Type.REGION)): gf_res.geometry = row['geometry_svg'] elif self.gf_type_id == Item_Type.WAYPOINT: # Skipping: geometry; would be same as 'center', so just leave None. pass else: raise GWIS_Error('Unexpected object type: %s.' % (self.gf_type_id,)) gf_res.center = row['center'] (gf_res.x, gf_res.y,) = geometry.wkt_point_to_xy(gf_res.center) gf_res.width = row['width'] gf_res.height = row['height'] # 2014.06.20: [lb] retiring use of ts_rank and switching to Levenshtein. # # What we're trying to do is to order matches better. # # Consider searching 'dupont ave s': the 's' is a full text stop word, # so the results will include anything with 'dupont' and 'ave', e.g., # 'dupont ave s' and 'dupont ave n'. # # The problem with text search ranking -- or maybe it's how I've been # using it -- is that it doesn't really normalize well, i.e., two # separate comparisons on two different sets of equivalent terms # produces two different values. # # E.g, # SELECT ts_rank_cd(to_tsvector('english', 'dupont|ave|s'), # to_tsquery('english', 'dupont|ave|s')); # returns 0.2, even though the search term and the target exactly match. # Basically, for every non-stop word that matches ('dupont' and 'ave'), # the ts_rank_cd function increases the return value by 0.1 (starting # from 0). # # As another example, consider a four word term that's mostly stop words. # # SELECT to_tsvector('english', 'what&is&my&name'); # SELECT to_tsquery('english', 'what&is&my&name'); # both return # 'name':4 # i.e., the other words are stop words. # and so we can expect the rank to be 0.1: # SELECT ts_rank_cd(to_tsvector('english', 'what&is&my&name'), # to_tsquery('english', 'what&is&my&name')); # # What we want is a value from 0 to 1, where 1 means there's a strong # match between the strings, and 0 means no match. So I don't get why # ts_rank_cd basically just returns a count of words that match, but # it doesn't scale it according to the total number of terms (e.g., so # a one-word match for a one-word query returns a value of 1). # # The ts_rank and ts_rank_cd functions accept a normalization parameter, # but it doesn't seem to help. They also accept a weights parameter, but # [lb] doesn't really want to bother trying to figure out how it works, # 'cause I'm not sure it would help. # "The weight arrays specify how heavily to weigh each category of word, # in the order: # {D-weight, C-weight, B-weight, A-weight} # If no weights are provided, then these defaults are used: # {0.1, 0.2, 0.4, 1.0}" # # Anyway, one last comment about ts_rank and then back to Levenshtein. # # The weights or word category might be how full text search judges # the strength of each term, e.g., the more unique and less-often used # a word is in English, the greater the value of the match for that term. # # SELECT to_tsvector('english','name|building|wonderful|downhill|party'); # 'build':2 'downhil':4 'name':1 'parti':5 'wonder':3 # # SELECT to_tsvector('english','name&building&wonderful&downhill&party'); # 'build':2 'downhil':4 'name':1 'parti':5 'wonder':3 # # SELECT to_tsquery('english', 'name|building|wonderful|downhill|party'); # ( ( ( 'name' | 'build' ) | 'wonder' ) | 'downhil' ) | 'parti' # # SELECT to_tsquery('english', 'name&building&wonderful&downhill&party'); # 'name' & 'build' & 'wonder' & 'downhil' & 'parti' # # And here's a cross-reference of all the ts_rank(_cd) values. # # Note that the normalization integer is a bit flag from 0 to 32. # see: http://www.postgresql.org/docs/8.3/static/textsearch-controls.html # # Query: a|b|c|d|e: 'name|building|wonderful|downhill|party' # Query: a&b&c&d&e: 'name&building&wonderful&downhill&party' # # ts_rank ts_rank_cd ts_rank ts_rank_cd # normalization a|b|c|d|e a|b|c|d|e a&b&c&d&e a&b&c&d&e # ts_rank 0 0.0607927 0.5 0.644239 0.1 # ts_rank 1 0.0235178 0.279055 0.249226 0.0558111 # ts_rank 2 0.0121585 0.1 0.128848 0.02 # ts_rank 4 0.0607927 0.4 0.644239 0.1 # ts_rank 8 0.0121585 0.1 0.128848 0.02 # ts_rank 16 0.0235178 0.193426 0.249226 0.0386853 # ts_rank 32 0.0573088 0.333333 0.391816 0.0909091 # # Although, one problem with Levenshtein is that it compares edit # distance, and it doesn't consider words. Consider: # # >>> Levenshtein.ratio('drinking gateway', 'drinking gateway') # 1.0 # >>> Levenshtein.ratio('gateway fountain', 'fountain gateway') # 0.5 # # compared to: # # SELECT ts_rank(to_tsvector('english', 'gateway|fountain'), # to_tsquery('english', 'gateway|fountain')); # SELECT ts_rank(to_tsvector('english', 'gateway|fountain'), # to_tsquery('english', 'fountain|gateway')); # which both return # ts_rank # ----------- # 0.0607927 # # and # SELECT ts_rank(to_tsvector('english', 'gateway|fountain'), # to_tsquery('english', 'gateway&fountain')); # SELECT ts_rank(to_tsvector('english', 'gateway|fountain'), # to_tsquery('english', 'fountain&gateway')); # which both return # ts_rank # ----------- # 0.0991032 # # where # # SELECT to_tsvector('english', 'gateway|fountain'); # to_tsvector # -------------------------- # 'fountain':2 'gateway':1 # # so maybe Levenshtein is still better. But it also feels like I'm # missing something about ts_rank... like how to take advantage of # the word weights better?.... # # Interesting: # >>> Levenshtein.ratio('999999 dupont ave s, mpls, mn', 'dupont ave s') # 0.5853658536585366 # >>> Levenshtein.ratio('999999 dupont ave s, mpls, mn', 'dupont ave n') # 0.5853658536585366 # # >>> Levenshtein.ratio('dupont ave s mpls mn', 'dupont ave s') # 0.75 # >>> Levenshtein.ratio('dupont ave s mpls mn', 'dupont ave n') # 0.75 # >>> Levenshtein.ratio('dupont ave s', 'dupont ave s') # 1.0 # >>> Levenshtein.ratio('dupont ave s', 'dupont ave n') # 0.9166666666666666 # The raw_queries list is: [clean_query, full_street, full_street2,], # so rank a clean_query match higher. # Also, self.gf_name is row['name_enclosed'], which might include # a region name in parantheses, so here we use row['gf_name'] (which # then means that we don't rank points-in-region higher artifically, # e.g., searching for "Minneapolis" finds all points in Minneapolis... # I think... # TESTME: Search neighborhood name: Do you get all points within? try: gf_basename = row['name'].lower() gf_name_len = len(row['name'].split()) except Exception, e: # Unnamed. gf_basename = '' gf_name_len = 0