Esempio n. 1
0
def matching_france(region=None):
    mode = request.args.get('mode', 'none')
    if region is None:
        config = Config('./config/config.yml')

        factory = DocFactory(config.get('mongodb'))
        internal = factory.internal_collection()
        objects = internal.aggregate([{
            '$match': {
                'name': {
                    '$exists': True,
                    '$not': {
                        '$size': 0
                    }
                },
                '$and': [{
                    'admin_hierarchy.ADMIN_LEVEL_1.name': 'France'
                }]
            }
        }, {
            '$group': {
                '_id': '$admin_hierarchy.ADMIN_LEVEL_2.name',
                'count': {
                    '$sum': 1
                }
            }
        }])
        return render_template('admin/matching-france/region-list.html',
                               data=objects,
                               mode=mode)
    else:
        return render_template('admin/matching-france/list.html',
                               region=region,
                               mode=mode)
Esempio n. 2
0
def by_place_id(list_places, city_id):
    config = Config('./config/config.yml')
    Key = Keygen()
    keyAPI = Key.get_key_geocode()
    if not keyAPI:
        sys.exit()

    cnf = {'googlemaps': {'geocoding': {'key': keyAPI}}}
    config.set(cnf)
    language = 'uk'
    doc_factory = DocFactory(config.get('mongodb'))

    spider = Spider(loader_factory=LoaderFactory,
                    gmap_parser=MapFactory.spain,
                    doc_factory=doc_factory,
                    language=language,
                    config=config,
                    use_cache=True)
    for loc in list_places:
        objects = spider.get_gmap_place_id(loc)
        gmap = {}
        try:
            gmap = objects[0].get_document()
            gmap["city_id"] = city_id
            exixts = db.ukraine_city_sublocal.find({
                "city_id": city_id,
                "code": gmap['code']
            }).count()
            if exixts < 1:
                gmap.pop('_id', None)
                db.ukraine_city_sublocal.save(gmap)
                print(gmap)

        except Exception as e:
            print(str(e))
Esempio n. 3
0
    def _get_common_data(self, list_name, force=False):
        config = Config('./config/config.yml')

        loader = LoaderFactory.loader_gmaps_with_cache(
            gmaps_config=config.get('googlemaps'),
            storage_config=config.get('mongodb')
        )
        document_factory = DocFactory(config.get('mongodb'))

        options = {
            'loader': loader,
            'doc_factory': document_factory,
            'parser': getattr(MapFactory, self._country),
            'force_update': force
        }

        storage = MongoDB('{}_{}'.format(list_name, self._country), config.get('mongodb'))

        return options, storage
Esempio n. 4
0
def main():

    config = Config('./config/config.yml')
    mongo_config = config.get('mongodb')
    connection = MongoClient(mongo_config['host'], mongo_config['port'])
    db = connection.local
    user = input("Enter your phone (format: 38063000000): ")
    password = input("Enter your password: "******"_id": user, "password": pass_hash})
        print("User created.")
    except DuplicateKeyError:
        print("User already present in DB.")
Esempio n. 5
0
import csv
from lib.config.Yaml import Yaml as Config
import pandas as pd
import json
import requests
from pymongo import MongoClient
from lib.keygen.gmap_keygen import Keygen
import requests
import sys
# from pymongo import Connection
config = Config('./config/config.yml')
mongo_config = config.get('mongodb')
conn = MongoClient(mongo_config['host'], mongo_config['port'])
db = conn.location
coll = db.SPAININE

try:
    for row in db.internal.find({'25_SNIG_TIPO': {'$exists': True}}):

        if 'sinoptik_db_id' in row:

            print('+')
            if row['25_SNIG_TIPO'] == 'Municipio' or row[
                    '25_SNIG_TIPO'] == 'Entidad colectiva':
                print('+!!!!!!!!!!!!!', row['sinoptik_db_id'])
                data = {
                    "event": "Delete",
                    "id": row['sinoptik_db_id'],
                }
                r = requests.post(
                    'https://55-devsin.ukr.net/admin/api_settle.php',
from lib.factory.Loader import Loader as LoaderFactory
from lib.parser.map.google.GMapFactory import GMapFactory as MapFactory
from lib.config.Yaml import Yaml as Config


config = Config('./config/config.yml')

loader = LoaderFactory.loader_gmaps_with_cache(config.get('googlemaps'), config.get('mongodb'))

position_content = loader.by_position(lat=41.900, lng=12.500)

print(position_content)

print('.' if len(position_content) else 'E', end='')

objects = MapFactory.italy(position_content)

print('.' if len(objects) else 'E', end='')
from lib.factory.StorageLocation import StorageLocation as DocFactory
from lib.factory.Loader import Loader as LoaderFactory
from lib.config.Yaml import Yaml as Config
from lib.parser.wiki.France import France as WikiParser


config = Config('./config/config.yml')
document_factory = DocFactory(config.get('mongodb'))

url = 'https://fr.wikipedia.org/wiki/Paris'
headers = {'User-Agent': 'Mozilla/5.0'}

loader = LoaderFactory.loader_with_mongodb(config.get('mongodb'))

content, code = loader.load(url, headers=headers)

parser = WikiParser(content)

doc = document_factory.wiki(url)

print('.' if doc.is_new() else 'E', end='')

document = doc.get_document()

print('.' if 'code' in document else 'E', end='')

doc.update(parser.as_dictionary())

dic = doc.get_document()

print('.' if dic.get('name') == 'Paris' else 'E', end='')
lst_address = []

region_index = 1
provincia_index = 3
comune_index = 5
localita_index = 9
altitude_index = 13
codloc_index = 8
loc2011_index = 7
procom_index = 6
codcom_index = 4
codpro_index = 2
codreg_index = 0

config = Config('./config/config.yml')
doc_factory = DocFactory(config.get('mongodb'))
language='it'

spider = Spider(
    loader_factory=LoaderFactory,
    gmap_parser=MapFactory.italy,
    wiki_parser=ParserItalyWiki,
    doc_factory=doc_factory,
    language=language,
    config=config,
    use_cache=True
)

def gmap_by_address(address):
    objects = spider.get_gmap_address(address)
Esempio n. 9
0
import math
import sys
import hashlib
from lib.logger.File import File as FileLog
from argparse import ArgumentParser
import sys
import json
import requests
import pymongo
from bson.json_util import dumps
from lib.keygen.gmap_keygen import Keygen

# from lib.parser.wiki.Spain import Spain as ParserSpain
country = 'Spain'
config = Config('./config/config.yml')
mongo_config = config.get('mongodb')
conn = pymongo.MongoClient(mongo_config['host'], mongo_config['port'])
Key = Keygen()
cnf = {'geocoding': {'key': Key.get_key_geocode()}}
config.set(cnf)
db = conn.location
coll = db.sinoplik_romania
# print(config.get('googlemaps').get('geocoding').get('key'))
doc_factory = DocFactory(config.get('mongodb'))
# try:
# 	skiprows = sys.argv[1]
# except Exception as e:
# 	skiprows = 0
# df = pd.read_csv('./data/spain/Spain_notDublicate.csv',  skiprows=int(skiprows), low_memory=False)
loader = Loader.loader_with_mongodb(config.get('mongodb'))
headers = {'User-Agent': 'Mozilla/5.0'}
Esempio n. 10
0
def insee_code_unit(id):
    config = Config('./config/config.yml')
    factory = DocFactory(config.get('mongodb'))
    collection = factory.insee_collection()
    obj = collection.find_one({'code': id})
    return render_template('admin/other/unit.html', data=obj)
Esempio n. 11
0
def matching_france_js(region):
    region = unquote_plus(region)
    #mode = request.args.get('mode', 'none')
    config = Config('./config/config.yml')

    factory = DocFactory(config.get('mongodb'))
    internal = factory.internal_collection()
    wiki = factory.wiki_collection()
    gmap = factory.gmaps_collection()
    insee = factory.insee_collection()
    objects = internal.find({
        'name': {
            '$exists': True,
            '$not': {
                '$size': 0
            }
        },
        '$and': [{
            'admin_hierarchy.ADMIN_LEVEL_1.name': 'France'
        }, {
            'admin_hierarchy.ADMIN_LEVEL_2.name': region
        }],
    })
    result = []
    for item in objects:
        dic = {'internal': item}

        wiki_res = {}
        if item.get('source', {}).get('wiki'):
            wiki_res = wiki.find_one(
                {'code': item.get('source', {}).get('wiki')})

        dic.update(wiki=wiki_res)

        gmap_res = {}
        if item.get('source', {}).get('gmap'):
            gmap_res = gmap.find_one(
                {'code': item.get('source', {}).get('gmap')})

        dic.update(gmap=gmap_res)

        insee_res = {}
        if item.get('source', {}).get('insee'):
            insee_res = insee.find_one(
                {'code': item.get('source', {}).get('insee')})

        dic.update(insee=insee_res)

        compare_res = {}
        compare_res.update({
            'insee_code!=wiki_code':
            1 if not (insee_res.get('InseeXls_CodeCommune')
                      == wiki_res.get('commune_codes')) else 0
        })
        compare_res.update({
            'insee_name!=wiki_name':
            1 if
            not (insee_res.get('InseeXls_NameCommune') == wiki_res.get('name'))
            else 0
        })
        compare_res.update({
            'wiki_name!=gmaps_name':
            1
            if not (wiki_res.get('true_name', wiki_res.get('name'))
                    == gmap_res.get('true_name', gmap_res.get('name'))) else 0
        })
        compare_res.update({
            'wiki_post!=gmaps_post':
            1 if not (str(wiki_res.get('postal_codes')) == str(
                gmap_res.get('postal_code'))) else 0
        })
        compare_res.update({
            'wiki_admin!=gmaps_admin':
            1 if not (str(wiki_res.get('admin_hierarchy')) == str(
                gmap_res.get('admin_hierarchy'))) else 0
        })
        try:
            max_meters_in_distance = 5000
            compare_res.update({
                'wiki_posinion>gmaps_position':
                1 if Comparison.by_distance(wiki_res.get('center'),
                                            gmap_res.get('center')) >
                max_meters_in_distance else 0
            })
        except:
            compare_res.update({'wiki_posinion>gmaps_position': 1})
        dic.update(compare=compare_res)

        #        if mode != 'none':
        #            if mode == 'wiki_adapte':
        #                if dic.get('wiki', {}).get('name', '').lower() != dic.get('insee', {}).get('name', '').lower():
        #                    result.append(dic)
        #            elif mode == 'gmap_adapte':
        #                if dic.get('gmap', {}).get('name', '').lower() != dic.get('insee', {}).get('name', '').lower():
        #                    result.append(dic)
        #        else:
        #            result.append(dic)

        result.append(dic)

    return render_template('admin/matching-france/list.js',
                           e=escape,
                           items=result)
from lib.factory.StorageLocation import StorageLocation as DocFactory
from lib.factory.Loader import Loader
from lib.config.Yaml import Yaml as Config
from lib.parser.map.google.GMapFactory import GMapFactory as MapFactory
from lib.factory.Loader import Loader as LoaderFactory
from lib.parser.wiki.France import France as ParserFranceWiki
from lib.spider.Spider import Spider

config = Config('./config/config.yml')

doc_factory = DocFactory(config.get('mongodb'))

internal_docs = doc_factory.internal_collection()
gmap_docs = doc_factory.gmaps_collection()
wiki_docs = doc_factory.wiki_collection()

language = 'fr'

gmap_config = config.get('googlemaps')
gmap_config.update(language=language)

gmap_loader = Loader.loader_gmaps_with_cache(
    gmaps_config=gmap_config, storage_config=config.get('mongodb'))

document_filter = {
    'name': {
        '$exists': True,
        '$not': {
            '$size': 0
        }
    },
Esempio n. 13
0
from lib.factory.Loader import Loader as Factory
from lib.config.Yaml import Yaml as Config

config = Config('./config/config.yml')

print(config.get('googlemaps').get('geocoding').get('key'))

gmaps_config = config.get('googlemaps')
gmaps_config.update(language='fr')

loader = Factory.loader_gmaps(gmaps_config)

lat, lng = 48.861077, 2.344552

position_content = loader.by_position(lat=lat, lng=lng)

print(position_content)

print('.' if len(position_content) else 'E', end='')
Esempio n. 14
0
country = 'Italy'

title = opts.t if opts.t else 'italy_recursive_{}'.format(
    datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S'))

print('START {}'.format(title))

force = True

config = Config('./config/config.yml')

options = {}

loader = LoaderFactory.loader_with_mongodb(
    storage_config=config.get('mongodb'))
options.update(loader=loader)

doc_factory = DocFactory(config.get('mongodb'))
options.update(doc_factory=doc_factory)

options.update(force_update=force)

options.update(parser=Italy)
options.update(host='it.wikipedia.org')
options.update(headers={'User-Agent': 'Mozilla/5.0'})
storage = Storage(job_name=PageRecursiveTask.TYPE,
                  storage_config=config.get('mongodb'))

options.update()
Esempio n. 15
0
from lib.factory.Loader import Loader as Factory
from lib.config.Yaml import Yaml as Config

config = Config('./config/config.yml')

gmaps_config = config.get('googlemaps')
gmaps_config.update(language='it')

loader = Factory.loader_gmaps(gmaps_config)

address = 'Italia, Piemonte, Torino, Agliè, Madonna delle Grazie'

address_content = loader.by_address(address=address)

print(address_content)

print('.' if len(address_content) else 'E', end='')
Esempio n. 16
0
from lib.factory.Loader import Loader as Loader
from lib.config.Yaml import Yaml as Config


config = Config('./config/config.yml')
loader = Loader.loader_with_mongodb(config.get('mongodb'))

url = 'https://fr.wikipedia.org/wiki/Paris'
headers = {'User-Agent': 'Mozilla/5.0'}

content, code = loader.load(url, headers=headers)

content_from_storage = loader.from_cache(url, headers=headers)

if len(content) == len(content_from_storage):
    print('.', end='')
else:
    print('E', end='')

loader._storage.remove(url, headers=headers)
Esempio n. 17
0
from lib.factory.Loader import Loader as LoaderFactory
from lib.parser.map.google.GMapFactory import GMapFactory as MapFactory
from lib.config.Yaml import Yaml as Config

config = Config('./config/config.yml')

gmap_config = config.get('googlemaps')
gmap_config.update(language='fr')

loader = LoaderFactory.loader_gmaps_with_cache(
    gmaps_config=gmap_config, storage_config=config.get('mongodb'))

components = {'country': 'FR', 'postal_code': '25320'}

components_content = loader.by_component(components=components)

print(components_content)

print('.' if len(components_content) else 'E', end='')

objects = MapFactory.france(components_content)

print('.' if len(objects) else 'E', end='')
Esempio n. 18
0
import hashlib
from lib.logger.File import File as FileLog
from argparse import ArgumentParser
import sys
import json
import requests
import pymongo
from bson.json_util import dumps
from lib.keygen.gmap_keygen import Keygen



# from lib.parser.wiki.Spain import Spain as ParserSpain
country = 'Spain'
config = Config('./config/config.yml')
mongo_config = config.get('mongodb')
conn = pymongo.MongoClient(mongo_config['host'], mongo_config['port'])
db = conn.location
coll = db.SPAININE
print(config.get('googlemaps').get('geocoding').get('key'))
doc_factory = DocFactory(config.get('mongodb'))
try:
	skiprows = sys.argv[1]
except Exception as e:
	skiprows = 0
df = pd.read_csv('./data/spain/Spain_notDublicate.csv',  skiprows=int(skiprows), low_memory=False)
loader = Loader.loader_with_mongodb(config.get('mongodb'))
headers = {'User-Agent': 'Mozilla/5.0'}


language='es'
Esempio n. 19
0
from lib.factory.Loader import Loader as LoaderFactory
from lib.factory.StorageLocation import StorageLocation as DocFactory
from lib.parser.wiki.Italy import Italy
from lib.logger.MongoDB import MongoDB as Log
from time import sleep

force = True

config = Config('./config/config.yml')

country = 'Italia'

options = {}

loader = LoaderFactory.loader_with_mongodb(
    storage_config=config.get('mongodb'))
options.update(loader=loader)

doc_factory = DocFactory(config.get('mongodb'))
options.update(doc_factory=doc_factory)

options.update(force_update=force)

options.update(parser=Italy)
options.update(headers={'User-Agent': 'Mozilla/5.0'})
storage = Storage(job_name=PageTask.get_name(country),
                  storage_config=config.get('mongodb'))

log = Log(log_name=PageTask.get_name(country), config=config.get('mongodb'))

task_list = TaskListMongoDB(task_type=PageTask.get_name(country),
Esempio n. 20
0
from lib.factory.Loader import Loader as LoaderFactory
from lib.factory.StorageLocation import StorageLocation as DocFactory
from lib.parser.map.google.Italy import Italy
from lib.logger.MongoDB import MongoDB as Log
from lib.Counter.CounterMongoDB import CounterMongoDB
from pymongo import MongoClient

force = True

config = Config('./config/config.yml')

country = 'Italy'

options = {}

gmaps_config = config.get('googlemaps')
gmaps_config.update(language='it')

loader = LoaderFactory.loader_gmaps_with_cache(
    gmaps_config=gmaps_config, storage_config=config.get('mongodb'))
options.update(loader=loader)

doc_factory = DocFactory(config.get('mongodb'))

mongo_config = config.get('mongodb')

connection = MongoClient(mongo_config['host'], mongo_config['port'])

counter = CounterMongoDB(counter_name='gmap',
                         start=1,
                         end=gmaps_config.get('geocoding').get('limit'),
Esempio n. 21
0
from lib.job.storage.MongoDB import MongoDB as Storage
from lib.job.wiki.RequestTask import RequestTask
from lib.config.Yaml import Yaml as Config
import pandas as pd

config = Config('./config/config.yml')

country = 'France'

job_list = Storage(RequestTask.get_name(country), config.get('mongodb'))

df = pd.read_csv('./WorkBaseFile/BaseCommuneInInseeFR', delimiter="\t")
for index, row in df.iterrows():
    insee = row[0]
    job_list.add("insee+{insee}".format(insee=insee))
Esempio n. 22
0
from lib.factory.Loader import Loader as Factory
import sys
from lib.config.Yaml import Yaml as Config
from lib.parser.wiki.Italy import Italy as WikiParser

config = Config('./config/config.yml')

loader = Factory.loader_with_mongodb(config.get('mongodb'))

#url = 'https://it.wikipedia.org/wiki/Variazioni_amministrative_della_Calabria'
url = 'https://it.wikipedia.org/wiki/Provincia_di_Biella'
headers = {'User-Agent': 'Mozilla/5.0'}

content, code = loader.load(url, headers=headers)

content = loader.from_cache(url, headers=headers)

if code == 200 and len(content) > 0:
    print('.')
else:
    print('E')
    sys.exit()

#print(content)

parser = WikiParser(content)

dic = parser.as_dictionary()

print('is location: {}'.format(('yes' if parser.is_location_page() else 'no')))
print('is list: {}'.format(('yes' if parser.is_many_answers() else 'no')))
Esempio n. 23
0
from lib.factory.Loader import Loader as LoaderFactory
from lib.parser.map.google.GMapFactory import GMapFactory as MapFactory
from lib.config.Yaml import Yaml as Config
from lib.logger.File import File as FileLog
from lib.factory.StorageLocation import StorageLocation as DocFactory
from argparse import ArgumentParser

arg_parser = ArgumentParser(description='Download data from gmaps by address')
arg_parser.add_argument('-f', help='turn on the force mode')
arg_parser.add_argument('-a', help='address')
opts = arg_parser.parse_args()

config = Config('./config/config.yml')

loader = LoaderFactory.loader_gmaps_with_cache(
    gmaps_config=config.get('googlemaps'),
    storage_config=config.get('mongodb'))
document_factory = DocFactory(config.get('mongodb'))
log = FileLog('./log/gmaps_address_france_{date}.log'.format(
    date=datetime.datetime.now().strftime('%Y-%m-%d')))
log.add('Start', log.INFO)
log.add('Params: [{0}]'.format(repr(opts).encode('utf-8')), log.INFO)

use_address = bool(opts.a)
address = opts.a if use_address else ''
force_update = opts.f


def update_meta(request, document):
    actual_doc = document.get_document()
    added_requests = [(tuple(x) if isinstance(x, list) else x)
Esempio n. 24
0
from lib.factory.StorageLocation import StorageLocation as DocFactory
from lib.parser.map.google.Italy import Italy
from lib.logger.MongoDB import MongoDB as Log
from lib.Counter.CounterMongoDB import CounterMongoDB
from pymongo import MongoClient


force = True

config = Config('./config/config.yml')

country = 'Italy'

options = {}

gmaps_config = config.get('googlemaps')
gmaps_config.update(language='it')

loader = LoaderFactory.loader_gmaps_with_cache(
    gmaps_config=gmaps_config,
    storage_config=config.get('mongodb')
)
options.update(loader=loader)

doc_factory = DocFactory(config.get('mongodb'))
options.update(doc_factory=doc_factory)

mongo_config = config.get('mongodb')

connection = MongoClient(mongo_config['host'], mongo_config['port'])
Esempio n. 25
0
from lib.job.storage.MongoDB import MongoDB as Storage
from lib.job.wiki.RequestTask import RequestTask
from lib.config.Yaml import Yaml as Config
import pandas as pd
from lib.job.wiki.PageRecursiveTask import PageRecursiveTask

config = Config('./config/config.yml')

country = 'Italy'

max_dig_level = 4

job_list = Storage(PageRecursiveTask.get_name(country), config.get('mongodb'))

df = pd.read_csv('./WorkBaseFile/ItalyUrlMainList', delimiter="\t")
for index, row in df.iterrows():
    link = row[0]
    job_list.add({'link': link, 'level': max_dig_level})