def getAddressJson(eid): # json with all geocoded data j = {} with db_old.getCursor() as cur: cur = db_old.execute(cur, "SELECT json FROM entities WHERE eid=%s", [eid]) row = cur.fetchone() if row is None: return None j = json.loads(row["json"]) # TODO: do not duplicate this with code in verejne/ def getComponent(json, typeName): try: for component in json[0]["address_components"]: if typeName in component["types"]: return component["long_name"] return "" except: return "" # types description: https://developers.google.com/maps/documentation/geocoding/intro#Types # street / city can be defined in multiple ways address = { "street": ( getComponent(j, "street_address") + getComponent(j, "route") + getComponent(j, "intersection") + " " + getComponent(j, "street_number") ), "city": getComponent(j, "locality"), "zip": getComponent(j, "postal_code"), "country": getComponent(j, "country"), } return address
def get(self): # Parse input try: firstname = self.request.GET["firstname"].encode("utf8") surname = self.request.GET["surname"].encode("utf8") address = self.request.GET["address"].encode("utf8") except: self.abort(400, detail="Unable to parse input text") # Carry-out the logic q = """ SELECT DISTINCT eid AS eid FROM entities WHERE to_tsvector('unaccent', entity_name) @@ plainto_tsquery('unaccent', %s) AND to_tsvector('unaccent', address) @@ plainto_tsquery('unaccent', %s) LIMIT 20; """ with db_old.getCursor() as cur: cur = db_old.execute(cur, q, (firstname + ' ' + surname, address)) result = [] for row in cur: try: result.append({"eid": row["eid"]}) except: pass self.returnJSON(result)
def get(self): try: eid = int(self.request.GET["eid"]) except: self.abort(400, detail="Could not parse parameter 'eid' as an integer") # Copy total sum of contracts if present result = {} entities = webapp2.get_app().registry['entities'] if eid in entities.eid_to_index: entity = entities.entities[entities.eid_to_index[eid]] result["total_contracts"] = entity.contracts # load info data from individual tables data_sources = webapp2.get_app().registry['data_sources'] for table in data_sources: print "Processing table", table columns = ",".join(data_sources[table]) sql = "SELECT DISTINCT " + columns + " FROM entities" if (table != "entities"): sql += " JOIN " + table + " ON " + "entities.id=" + table + ".id" sql += " WHERE entities.eid=%s" with db_old.getCursor() as cur: cur = db_old.execute(cur, sql, [eid]) current = [] for row in cur: # TODO: is this needed? r = {key: row[key] for key in row} current.append(r) result[table] = current result["related"] = entities.getRelated(eid) self.returnJSON(result)
def getForTable(table): sql = "SELECT eid FROM " + table + \ " JOIN entities ON entities.id = " + table + ".id" + \ " WHERE ico = %s" + \ " LIMIT 1" with db_old.getCursor() as cur: cur = db_old.execute(cur, sql, [ico]) row = cur.fetchone() if row is None: return None return row["eid"]
def IcoToLatLngMap(): output_map = {} for table in ["orsresd_data", "firmy_data", "new_orsr_data"]: with db_old.getCursor() as cur: sql = "SELECT ico, lat, lng FROM " + table + \ " JOIN entities on entities.id = " + table + ".id" + \ " WHERE ico IS NOT NULL" db_old.execute(cur, sql) for row in cur: output_map[int(row["ico"])] = (row["lat"], row["lng"]) return output_map
def loadExtra(self, sql, column, attribute, mapping=None): log("loadExtra " + column + ", " + attribute) with db.getCursor() as cur: cur = db.execute(cur, sql) for row in cur: eid = row["eid"] if not eid in self.eid_to_index: continue value = row[column] if mapping is not None: value = mapping(value) setattr(self.entities[self.eid_to_index[eid]], attribute, value)
def getRelated(self, eid): with db.getCursor() as cur: indices = set() cur = db.execute(cur, "(select distinct eid1 as eid from related where eid2=%s and " \ + "eid2!=eid1) union (select distinct eid2 as eid from related where eid1=%s and eid2!=eid1)", [eid, eid]) for row in cur: indices.add(row["eid"]) result = [ RawJson(self.entities[self.eid_to_index[index]].json) for index in indices if index in self.eid_to_index ] return result
def getColumnForTableIco(table, column, ico): sql = "SELECT " + column + " FROM " + table + \ " JOIN entities ON entities.id = " + table + ".id" + \ " WHERE ico = %s" + \ " LIMIT 1" with db_old.getCursor() as cur: try: cur = db_old.execute(cur, sql, [ico]) row = cur.fetchone() if row is None: return None return row[column] except: return None
def get(self): try: text = self.request.GET["text"].encode("utf8") except: self.abort(400, detail="Unable to parse input text") with db_old.getCursor() as cur: sql = "SELECT DISTINCT eid AS eid FROM entities " + \ "WHERE to_tsvector('unaccent', entity_name) @@ plainto_tsquery('unaccent', %s) " + \ "LIMIT 20" cur = db_old.execute(cur, sql, [text]) result = [] for row in cur: try: result.append({"eid": row["eid"]}) except: pass self.returnJSON(result)
def loadEntities(self, batch_size): cur = db.getCursor() db.execute(cur, "SELECT eid, entity_name, lat, lng, json, address " + "FROM entities " + "WHERE (entity_name IS NOT NULL) AND " + " (entity_name != '') AND " + " (has_data IS NOT NULL)" + \ "LIMIT %s", [batch_size]) for row in cur: # if multiple rows share common eid, use only the first one # TODO: use the most recent address if (row["eid"] in self.eid_to_index): continue j = json.loads(row["json"]) entity = Entity() country = getComponent(j, "country") entity.country = country entity.slovakia = (entity.country == "Slovakia") entity.eid = row["eid"] entity.lat = float(row["lat"]) entity.lng = float(row["lng"]) entity.name = row["entity_name"].decode("utf8") address = row["address"].decode("utf8") if country is not None: address = address.replace(", " + country, "") psc = getComponent(j, "postal_code") if psc is not None: address = address.replace(psc + " ", "") entity.address = address entity.origin = 0 entity.size = 1 try: city = getComponent(j, "locality") entity.city = city except: entity.city = None self.entities.append(entity) num_entities = len(self.entities) # Debug output entity json if (num_entities % 30000 == 0): print json.dumps(json.loads(row["json"]), indent=4) self.eid_to_index[entity.eid] = num_entities cur.close()