from lib.parser.wiki.France import France as WikiParser config = Config('./config/config.yml') document_factory = DocFactory(config.get('mongodb')) url = 'https://fr.wikipedia.org/wiki/Paris' headers = {'User-Agent': 'Mozilla/5.0'} loader = LoaderFactory.loader_with_mongodb(config.get('mongodb')) content, code = loader.load(url, headers=headers) parser = WikiParser(content) doc = document_factory.wiki(url) print('.' if doc.is_new() else 'E', end='') document = doc.get_document() print('.' if 'code' in document else 'E', end='') doc.update(parser.as_dictionary()) dic = doc.get_document() print('.' if dic.get('name') == 'Paris' else 'E', end='') print('.' if dic.get('type') == 'commune' else 'E', end='') print('.' if len(dic.get('admin_hierarchy')) == 4 else 'E', end='') print('.' if dic.get('admin_hierarchy')[0].get('name') == 'France' else 'E', end='')
print(index) try: new_address = 'Italia, ' if row[region_index]: new_address += row[region_index] if new_address not in lst_address: lst_address.append(new_address) gmap = gmap_by_address(new_address) if gmap.get('code'): gmap_obj = doc_factory.gmaps(gmap.get('code')) gmap_obj.update(gmap) else: gmap_obj = doc_factory.gmaps('dummy') istat_code = hash().make(str(['Italia', row[region_index]])) istat_obj = doc_factory.istat(istat_code) wiki_obj = doc_factory.wiki('dummy') internal_obj = make_internal(row, istat_obj, {}, wiki_obj, gmap, gmap_obj) new_address += ', ' if row[provincia_index]: new_address += row[provincia_index] if new_address not in lst_address: lst_address.append(new_address) gmap = gmap_by_address(new_address) if gmap.get('code'): gmap_obj = doc_factory.gmaps(gmap.get('code')) gmap_obj.update(gmap) else: gmap_obj = doc_factory.gmaps('dummy') istat_code = hash().make(str(['Italia', row[region_index], row[provincia_index]])) istat_obj = doc_factory.istat(istat_code)
added_requests = [tuple(x) for x in actual_doc.get('requests', ())] added_requests.append(request) actual_doc.update(requests=list(set(added_requests))) document.update(actual_doc) try: if use_link: log.add(message_format.format(custom_link), log.INFO) content, code = loader.load(custom_link, headers=headers) parser = WikiIt(content) if parser.is_many_answers(): urls = parser.get_answers_links() for url in urls: doc = document_factory.wiki(url) if doc.is_new() or force_update: page, code = loader.load(url, headers=headers) page_parser = WikiIt(page) if page_parser.is_location_page(): doc.update(page_parser.as_dictionary()) update_meta(url=url, request=custom_link, document=doc) elif parser.is_location_page(): doc = document_factory.wiki(custom_link) if doc.is_new() or force_update: doc.update(parser.as_dictionary()) update_meta(url=custom_link, request=custom_link, document=doc) else: log.add('Wrong command', log.ERROR) print('use parameters like -l link to wiki page')
adress.replace(' ', '') ) + '&title=Sp%C3%A9cial:Recherche&profile=default&fulltext=1&searchengineselect=mediawiki&searchToken=ac9zaxa1lggzxpdhc5ukg06t6' # adress = str(row[1]+' , '+row[3]) content, code = loader.load(url, headers=headers) parser = WikiES(content) print( adress, '=====================================================================================', url) # print (url) (LA) (LAS) (EL) if parser.is_many_answers(): urls = parser.get_answers_links() for answer_url in urls: print(answer_url) doc = document_factory.wiki(answer_url) page, code = loader.load(answer_url, headers=headers) page_parser = WikiES(page) # print (code) # print(page_parser.as_dictionary()) data = page_parser.as_dictionary() print(data['name']) doc = document_factory.wiki(answer_url) if point[-1].lower().lstrip().replace('(LA)', '').replace( '(LAS)', '').replace('(EL)', '') in data['name'].lower().lstrip(): print(data, 'YEEEESSSS') data['Municipio_Name'] = row[3] data['Collective_Entity_Code'] = row[4] data['Collective_Entity_Name'] = row[5]