Ejemplo n.º 1
0
from lib.parser.wiki.France import France as WikiParser


config = Config('./config/config.yml')
document_factory = DocFactory(config.get('mongodb'))

url = 'https://fr.wikipedia.org/wiki/Paris'
headers = {'User-Agent': 'Mozilla/5.0'}

loader = LoaderFactory.loader_with_mongodb(config.get('mongodb'))

content, code = loader.load(url, headers=headers)

parser = WikiParser(content)

doc = document_factory.wiki(url)

print('.' if doc.is_new() else 'E', end='')

document = doc.get_document()

print('.' if 'code' in document else 'E', end='')

doc.update(parser.as_dictionary())

dic = doc.get_document()

print('.' if dic.get('name') == 'Paris' else 'E', end='')
print('.' if dic.get('type') == 'commune' else 'E', end='')
print('.' if len(dic.get('admin_hierarchy')) == 4 else 'E', end='')
print('.' if dic.get('admin_hierarchy')[0].get('name') == 'France' else 'E', end='')
    print(index)
    try:
        new_address = 'Italia, '
        if row[region_index]:
            new_address += row[region_index]
            if new_address not in lst_address:
                lst_address.append(new_address)
                gmap = gmap_by_address(new_address)
                if gmap.get('code'):
                    gmap_obj = doc_factory.gmaps(gmap.get('code'))
                    gmap_obj.update(gmap)
                else:
                    gmap_obj = doc_factory.gmaps('dummy')
                istat_code = hash().make(str(['Italia', row[region_index]]))
                istat_obj = doc_factory.istat(istat_code)
                wiki_obj = doc_factory.wiki('dummy')
                internal_obj = make_internal(row, istat_obj, {}, wiki_obj, gmap, gmap_obj)

            new_address += ', '
            if row[provincia_index]:
                new_address += row[provincia_index]
                if new_address not in lst_address:
                    lst_address.append(new_address)
                    gmap = gmap_by_address(new_address)
                    if gmap.get('code'):
                        gmap_obj = doc_factory.gmaps(gmap.get('code'))
                        gmap_obj.update(gmap)
                    else:
                        gmap_obj = doc_factory.gmaps('dummy')
                    istat_code = hash().make(str(['Italia', row[region_index], row[provincia_index]]))
                    istat_obj = doc_factory.istat(istat_code)
Ejemplo n.º 3
0
    added_requests = [tuple(x) for x in actual_doc.get('requests', ())]
    added_requests.append(request)
    actual_doc.update(requests=list(set(added_requests)))
    document.update(actual_doc)


try:
    if use_link:
        log.add(message_format.format(custom_link), log.INFO)
        content, code = loader.load(custom_link, headers=headers)
        parser = WikiIt(content)

        if parser.is_many_answers():
            urls = parser.get_answers_links()
            for url in urls:
                doc = document_factory.wiki(url)
                if doc.is_new() or force_update:
                    page, code = loader.load(url, headers=headers)
                    page_parser = WikiIt(page)
                    if page_parser.is_location_page():
                        doc.update(page_parser.as_dictionary())
                update_meta(url=url, request=custom_link, document=doc)
        elif parser.is_location_page():
            doc = document_factory.wiki(custom_link)
            if doc.is_new() or force_update:
                doc.update(parser.as_dictionary())
            update_meta(url=custom_link, request=custom_link, document=doc)

    else:
        log.add('Wrong command', log.ERROR)
        print('use parameters like -l link to wiki page')
Ejemplo n.º 4
0
                adress.replace(' ', '')
            ) + '&title=Sp%C3%A9cial:Recherche&profile=default&fulltext=1&searchengineselect=mediawiki&searchToken=ac9zaxa1lggzxpdhc5ukg06t6'
            # adress = str(row[1]+' , '+row[3])
            content, code = loader.load(url, headers=headers)
            parser = WikiES(content)
            print(
                adress,
                '=====================================================================================',
                url)
            # print (url) (LA) (LAS) (EL)

            if parser.is_many_answers():
                urls = parser.get_answers_links()
                for answer_url in urls:
                    print(answer_url)
                    doc = document_factory.wiki(answer_url)
                    page, code = loader.load(answer_url, headers=headers)
                    page_parser = WikiES(page)
                    # print (code)
                    # print(page_parser.as_dictionary())
                    data = page_parser.as_dictionary()
                    print(data['name'])
                    doc = document_factory.wiki(answer_url)
                    if point[-1].lower().lstrip().replace('(LA)', '').replace(
                            '(LAS)',
                            '').replace('(EL)',
                                        '') in data['name'].lower().lstrip():
                        print(data, 'YEEEESSSS')
                        data['Municipio_Name'] = row[3]
                        data['Collective_Entity_Code'] = row[4]
                        data['Collective_Entity_Name'] = row[5]