def test_prepare_doc(self): """Relates to issue#62""" mock_results = mock.MagicMock() mock_results.results = [{}] doc = {'type': 'test/foo'} with mock.patch('moxie.places.importers.helpers.merge_docs') as mock_merge_docs: prepare_document(doc, mock_results, 1) mock_merge_docs.assert_called_with(mock_results.results[0], doc, 1)
def run(self): conn = sqlite3.connect(self.sbb_db) conn.row_factory = dict_factory db = conn.cursor() if self.indexer: docs = [] sql = "SELECT * FROM station" for i, row in enumerate(db.execute(sql).fetchall()): data = {} data['id'] = "stoparea:%s" % str(row['id']) data[self.identifier_key] = [ str(row['id']), 'sbb:%s' % row['id'] ] data['location'] = "%s,%s" % (row['x'], row['y']) data['name'] = row['name'] data['name_sort'] = row['name'] data['type'] = "/transport/rail-station" data['tags'] = [] search_results = self.indexer.search_for_ids( self.identifier_key, data[self.identifier_key]) docs.append( prepare_document(data, search_results, self.precedence)) if not (i + 1) % 400: self.indexer.index(docs) self.indexer.commit() docs = [] self.indexer.index(docs) self.indexer.commit()
def run(self): parser = make_parser(["xml.sax.IncrementalParser"]) parser.setContentHandler(self.handler) buffered_data = self.naptan_file.read(self.buffer_size) while buffered_data: parser.feed(buffered_data) buffered_data = self.naptan_file.read(self.buffer_size) parser.close() if self.indexer: docs = [] for stop_area_code, data in self.handler.stop_areas.items(): search_results = self.indexer.search_for_ids(self.identifier_key, data[self.identifier_key]) docs.append(prepare_document(data, search_results, self.precedence)) for atco_code, sp in self.handler.stop_points.items(): search_results = self.indexer.search_for_ids(self.identifier_key, sp[self.identifier_key]) docs.append(prepare_document(sp, search_results, self.precedence)) self.indexer.index(docs) self.indexer.commit()
def process_type(self, rdf_type, defined_type): """Browse the graph for a certain type and process found subjects :param rdf_type: RDF type to find :param defined_type: type defining subjects found :return list of documents """ objects = [] for subject in self.graph.subjects(RDF.type, rdf_type): try: doc = self.process_subject(subject, defined_type) if doc: search_results = self.indexer.search_for_ids(self.identifier_key, doc[self.identifier_key]) result = prepare_document(doc, search_results, self.precedence) objects.append(result) except Exception: logger.warning('Could not process subject', exc_info=True, extra={'data': {'subject': subject.toPython()}}) return objects
def index_library(self, lib): ident = "{key}:{value}".format(key=self.lib_data_identifier, value=lib['id']) search_results = self.indexer.search_for_ids(self.identifier_key, [ident]) if search_results.results: doc = search_results.results[0] doc[self.prefix_index_key+'opening_hours_termtime'] = lib['opening_hours_termtime'] doc[self.prefix_index_key+'opening_hours_vacation'] = lib['opening_hours_vacation'] doc[self.prefix_index_key+'opening_hours_closed'] = lib['opening_hours_closed'] doc[self.prefix_index_key+'subject'] = lib['subjects'] if 'academic' in lib['policies']: doc[self.prefix_index_key+'policy_academic'] = lib['policies']['academic'] if 'other' in lib['policies']: doc[self.prefix_index_key+'policy_other'] = lib['policies']['other'] if 'postgraduate' in lib['policies']: doc[self.prefix_index_key+'policy_postgraduate'] = lib['policies']['postgraduate'] if 'undergraduate' in lib['policies']: doc[self.prefix_index_key+'policy_undergraduate'] = lib['policies']['undergraduate'] return prepare_document(doc, search_results, self.precedence) else: logger.info('No results for {ident}'.format(ident=ident)) return None
def endElement(self, element_type): if element_type == 'node': location = self.node_location elif element_type == 'way': min_, max_ = (float('inf'), float('inf')), (float('-inf'), float('-inf')) for lat, lon in [self.node_locations[n] for n in self.nodes]: min_ = min(min_[0], lat), min(min_[1], lon) max_ = max(max_[0], lat), max(max_[1], lon) location = (min_[0] + max_[0]) / 2, (min_[1] + max_[1]) / 2 try: if self.tags.get('life_cycle', 'in_use') != 'in_use': return for key in self.tags.iterkeys(): if 'disused' in key: # e.g. disused:amenity=restaurant # http://wiki.openstreetmap.org/wiki/Key:disused return if element_type in ['way', 'node'] and any([x in self.tags for x in self.element_tags]): result = {} osm_id = 'osm:%s' % self.id atco_id = self.tags.get('naptan:AtcoCode', None) result[self.identifier_key] = [osm_id] # if it has an ATCO ID, we set the ATCO ID as the main ID for this document # instead of the OSM ID if atco_id: result['id'] = atco_id result[self.identifier_key].append('atco:%s' % atco_id) else: result['id'] = osm_id result['tags'] = [] for it in self.indexed_tags: doc_tags = [t.replace('_', ' ').strip() for t in self.tags.get(it, '').split(';')] if doc_tags and doc_tags != ['']: result['tags'].extend(doc_tags) # Filter elements depending on amenity / shop tags if 'amenity' in self.tags: if self.tags['amenity'] in AMENITIES: # special case for Park and Rides where amenity=parking and park_ride=bus/yes/... except no # TODO we should be able to handle this kind of case in a better way if self.tags['amenity'] == "parking" and self.tags.get('park_ride', 'no') != 'no': result['type'] = PARK_AND_RIDE else: result['type'] = AMENITIES[self.tags['amenity']] else: return elif 'shop' in self.tags: if self.tags['shop'] in SHOPS: result['type'] = SHOPS[self.tags['shop']] else: return else: return # if the element doesn't have a name, it will be an empty string result['name'] = self.tags.get('name', self.tags.get('operator', '')) result['name_sort'] = result['name'] address = "{0} {1} {2} {3}".format(self.tags.get("addr:housename", ""), self.tags.get("addr:housenumber", ""), self.tags.get("addr:street", ""), self.tags.get("addr:postcode", "")) result['address'] = " ".join(address.split()) if 'phone' in self.tags: result['phone'] = format_uk_telephone(self.tags['phone']) if 'url' in self.tags: result['website'] = self.tags['url'] if 'website' in self.tags: result['website'] = self.tags['website'] if 'opening_hours' in self.tags: result['opening_hours'] = self.tags['opening_hours'] if 'collection_times' in self.tags: result['collection_times'] = self.tags['collection_times'] result['location'] = "%s,%s" % location search_results = self.indexer.search_for_ids( self.identifier_key, result[self.identifier_key]) self.pois.append(prepare_document(result, search_results, self.precedence)) except Exception as e: logger.warning("Couldn't index a POI.", exc_info=True)