def matching_france(region=None): mode = request.args.get('mode', 'none') if region is None: config = Config('./config/config.yml') factory = DocFactory(config.get('mongodb')) internal = factory.internal_collection() objects = internal.aggregate([{ '$match': { 'name': { '$exists': True, '$not': { '$size': 0 } }, '$and': [{ 'admin_hierarchy.ADMIN_LEVEL_1.name': 'France' }] } }, { '$group': { '_id': '$admin_hierarchy.ADMIN_LEVEL_2.name', 'count': { '$sum': 1 } } }]) return render_template('admin/matching-france/region-list.html', data=objects, mode=mode) else: return render_template('admin/matching-france/list.html', region=region, mode=mode)
def by_place_id(list_places, city_id): config = Config('./config/config.yml') Key = Keygen() keyAPI = Key.get_key_geocode() if not keyAPI: sys.exit() cnf = {'googlemaps': {'geocoding': {'key': keyAPI}}} config.set(cnf) language = 'uk' doc_factory = DocFactory(config.get('mongodb')) spider = Spider(loader_factory=LoaderFactory, gmap_parser=MapFactory.spain, doc_factory=doc_factory, language=language, config=config, use_cache=True) for loc in list_places: objects = spider.get_gmap_place_id(loc) gmap = {} try: gmap = objects[0].get_document() gmap["city_id"] = city_id exixts = db.ukraine_city_sublocal.find({ "city_id": city_id, "code": gmap['code'] }).count() if exixts < 1: gmap.pop('_id', None) db.ukraine_city_sublocal.save(gmap) print(gmap) except Exception as e: print(str(e))
def main(): config = Config('./config/config.yml') mongo_config = config.get('mongodb') connection = MongoClient(mongo_config['host'], mongo_config['port']) db = connection.local user = input("Enter your phone (format: 38063000000): ") password = input("Enter your password: "******"_id": user, "password": pass_hash}) print("User created.") except DuplicateKeyError: print("User already present in DB.")
def _get_common_data(self, list_name, force=False): config = Config('./config/config.yml') loader = LoaderFactory.loader_gmaps_with_cache( gmaps_config=config.get('googlemaps'), storage_config=config.get('mongodb') ) document_factory = DocFactory(config.get('mongodb')) options = { 'loader': loader, 'doc_factory': document_factory, 'parser': getattr(MapFactory, self._country), 'force_update': force } storage = MongoDB('{}_{}'.format(list_name, self._country), config.get('mongodb')) return options, storage
import csv from lib.config.Yaml import Yaml as Config import pandas as pd import json import requests from pymongo import MongoClient from lib.keygen.gmap_keygen import Keygen import requests import sys # from pymongo import Connection config = Config('./config/config.yml') mongo_config = config.get('mongodb') conn = MongoClient(mongo_config['host'], mongo_config['port']) db = conn.location coll = db.SPAININE try: for row in db.internal.find({'25_SNIG_TIPO': {'$exists': True}}): if 'sinoptik_db_id' in row: print('+') if row['25_SNIG_TIPO'] == 'Municipio' or row[ '25_SNIG_TIPO'] == 'Entidad colectiva': print('+!!!!!!!!!!!!!', row['sinoptik_db_id']) data = { "event": "Delete", "id": row['sinoptik_db_id'], } r = requests.post( 'https://55-devsin.ukr.net/admin/api_settle.php',
from lib.factory.Loader import Loader as LoaderFactory from lib.parser.map.google.GMapFactory import GMapFactory as MapFactory from lib.config.Yaml import Yaml as Config config = Config('./config/config.yml') loader = LoaderFactory.loader_gmaps_with_cache(config.get('googlemaps'), config.get('mongodb')) position_content = loader.by_position(lat=41.900, lng=12.500) print(position_content) print('.' if len(position_content) else 'E', end='') objects = MapFactory.italy(position_content) print('.' if len(objects) else 'E', end='')
from lib.factory.StorageLocation import StorageLocation as DocFactory from lib.factory.Loader import Loader as LoaderFactory from lib.config.Yaml import Yaml as Config from lib.parser.wiki.France import France as WikiParser config = Config('./config/config.yml') document_factory = DocFactory(config.get('mongodb')) url = 'https://fr.wikipedia.org/wiki/Paris' headers = {'User-Agent': 'Mozilla/5.0'} loader = LoaderFactory.loader_with_mongodb(config.get('mongodb')) content, code = loader.load(url, headers=headers) parser = WikiParser(content) doc = document_factory.wiki(url) print('.' if doc.is_new() else 'E', end='') document = doc.get_document() print('.' if 'code' in document else 'E', end='') doc.update(parser.as_dictionary()) dic = doc.get_document() print('.' if dic.get('name') == 'Paris' else 'E', end='')
country = 'Italy' lst_address = [] region_index = 1 provincia_index = 3 comune_index = 5 localita_index = 9 altitude_index = 13 codloc_index = 8 loc2011_index = 7 procom_index = 6 codcom_index = 4 codpro_index = 2 codreg_index = 0 config = Config('./config/config.yml') doc_factory = DocFactory(config.get('mongodb')) language='it' spider = Spider( loader_factory=LoaderFactory, gmap_parser=MapFactory.italy, wiki_parser=ParserItalyWiki, doc_factory=doc_factory, language=language, config=config, use_cache=True ) def gmap_by_address(address): objects = spider.get_gmap_address(address)
from lib.factory.Loader import Loader import math import sys import hashlib from lib.logger.File import File as FileLog from argparse import ArgumentParser import sys import json import requests import pymongo from bson.json_util import dumps from lib.keygen.gmap_keygen import Keygen # from lib.parser.wiki.Spain import Spain as ParserSpain country = 'Spain' config = Config('./config/config.yml') mongo_config = config.get('mongodb') conn = pymongo.MongoClient(mongo_config['host'], mongo_config['port']) Key = Keygen() cnf = {'geocoding': {'key': Key.get_key_geocode()}} config.set(cnf) db = conn.location coll = db.sinoplik_romania # print(config.get('googlemaps').get('geocoding').get('key')) doc_factory = DocFactory(config.get('mongodb')) # try: # skiprows = sys.argv[1] # except Exception as e: # skiprows = 0 # df = pd.read_csv('./data/spain/Spain_notDublicate.csv', skiprows=int(skiprows), low_memory=False) loader = Loader.loader_with_mongodb(config.get('mongodb'))
def insee_code_unit(id): config = Config('./config/config.yml') factory = DocFactory(config.get('mongodb')) collection = factory.insee_collection() obj = collection.find_one({'code': id}) return render_template('admin/other/unit.html', data=obj)
def matching_france_js(region): region = unquote_plus(region) #mode = request.args.get('mode', 'none') config = Config('./config/config.yml') factory = DocFactory(config.get('mongodb')) internal = factory.internal_collection() wiki = factory.wiki_collection() gmap = factory.gmaps_collection() insee = factory.insee_collection() objects = internal.find({ 'name': { '$exists': True, '$not': { '$size': 0 } }, '$and': [{ 'admin_hierarchy.ADMIN_LEVEL_1.name': 'France' }, { 'admin_hierarchy.ADMIN_LEVEL_2.name': region }], }) result = [] for item in objects: dic = {'internal': item} wiki_res = {} if item.get('source', {}).get('wiki'): wiki_res = wiki.find_one( {'code': item.get('source', {}).get('wiki')}) dic.update(wiki=wiki_res) gmap_res = {} if item.get('source', {}).get('gmap'): gmap_res = gmap.find_one( {'code': item.get('source', {}).get('gmap')}) dic.update(gmap=gmap_res) insee_res = {} if item.get('source', {}).get('insee'): insee_res = insee.find_one( {'code': item.get('source', {}).get('insee')}) dic.update(insee=insee_res) compare_res = {} compare_res.update({ 'insee_code!=wiki_code': 1 if not (insee_res.get('InseeXls_CodeCommune') == wiki_res.get('commune_codes')) else 0 }) compare_res.update({ 'insee_name!=wiki_name': 1 if not (insee_res.get('InseeXls_NameCommune') == wiki_res.get('name')) else 0 }) compare_res.update({ 'wiki_name!=gmaps_name': 1 if not (wiki_res.get('true_name', wiki_res.get('name')) == gmap_res.get('true_name', gmap_res.get('name'))) else 0 }) compare_res.update({ 'wiki_post!=gmaps_post': 1 if not (str(wiki_res.get('postal_codes')) == str( gmap_res.get('postal_code'))) else 0 }) compare_res.update({ 'wiki_admin!=gmaps_admin': 1 if not (str(wiki_res.get('admin_hierarchy')) == str( gmap_res.get('admin_hierarchy'))) else 0 }) try: max_meters_in_distance = 5000 compare_res.update({ 'wiki_posinion>gmaps_position': 1 if Comparison.by_distance(wiki_res.get('center'), gmap_res.get('center')) > max_meters_in_distance else 0 }) except: compare_res.update({'wiki_posinion>gmaps_position': 1}) dic.update(compare=compare_res) # if mode != 'none': # if mode == 'wiki_adapte': # if dic.get('wiki', {}).get('name', '').lower() != dic.get('insee', {}).get('name', '').lower(): # result.append(dic) # elif mode == 'gmap_adapte': # if dic.get('gmap', {}).get('name', '').lower() != dic.get('insee', {}).get('name', '').lower(): # result.append(dic) # else: # result.append(dic) result.append(dic) return render_template('admin/matching-france/list.js', e=escape, items=result)
from lib.factory.StorageLocation import StorageLocation as DocFactory from lib.factory.Loader import Loader from lib.config.Yaml import Yaml as Config from lib.parser.map.google.GMapFactory import GMapFactory as MapFactory from lib.factory.Loader import Loader as LoaderFactory from lib.parser.wiki.France import France as ParserFranceWiki from lib.spider.Spider import Spider config = Config('./config/config.yml') doc_factory = DocFactory(config.get('mongodb')) internal_docs = doc_factory.internal_collection() gmap_docs = doc_factory.gmaps_collection() wiki_docs = doc_factory.wiki_collection() language = 'fr' gmap_config = config.get('googlemaps') gmap_config.update(language=language) gmap_loader = Loader.loader_gmaps_with_cache( gmaps_config=gmap_config, storage_config=config.get('mongodb')) document_filter = { 'name': { '$exists': True, '$not': { '$size': 0 } },
from lib.factory.Loader import Loader as Factory from lib.config.Yaml import Yaml as Config config = Config('./config/config.yml') print(config.get('googlemaps').get('geocoding').get('key')) gmaps_config = config.get('googlemaps') gmaps_config.update(language='fr') loader = Factory.loader_gmaps(gmaps_config) lat, lng = 48.861077, 2.344552 position_content = loader.by_position(lat=lat, lng=lng) print(position_content) print('.' if len(position_content) else 'E', end='')
from argparse import ArgumentParser arg_parser = ArgumentParser(description='Worker with recursive parse in wiki') arg_parser.add_argument('-t', help='title of history') opts = arg_parser.parse_args() country = 'Italy' title = opts.t if opts.t else 'italy_recursive_{}'.format( datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')) print('START {}'.format(title)) force = True config = Config('./config/config.yml') options = {} loader = LoaderFactory.loader_with_mongodb( storage_config=config.get('mongodb')) options.update(loader=loader) doc_factory = DocFactory(config.get('mongodb')) options.update(doc_factory=doc_factory) options.update(force_update=force) options.update(parser=Italy) options.update(host='it.wikipedia.org') options.update(headers={'User-Agent': 'Mozilla/5.0'})
from lib.factory.Loader import Loader as Factory from lib.config.Yaml import Yaml as Config config = Config('./config/config.yml') gmaps_config = config.get('googlemaps') gmaps_config.update(language='it') loader = Factory.loader_gmaps(gmaps_config) address = 'Italia, Piemonte, Torino, Agliè, Madonna delle Grazie' address_content = loader.by_address(address=address) print(address_content) print('.' if len(address_content) else 'E', end='')
from lib.factory.Loader import Loader as Loader from lib.config.Yaml import Yaml as Config config = Config('./config/config.yml') loader = Loader.loader_with_mongodb(config.get('mongodb')) url = 'https://fr.wikipedia.org/wiki/Paris' headers = {'User-Agent': 'Mozilla/5.0'} content, code = loader.load(url, headers=headers) content_from_storage = loader.from_cache(url, headers=headers) if len(content) == len(content_from_storage): print('.', end='') else: print('E', end='') loader._storage.remove(url, headers=headers)
from lib.factory.Loader import Loader as LoaderFactory from lib.parser.map.google.GMapFactory import GMapFactory as MapFactory from lib.config.Yaml import Yaml as Config config = Config('./config/config.yml') gmap_config = config.get('googlemaps') gmap_config.update(language='fr') loader = LoaderFactory.loader_gmaps_with_cache( gmaps_config=gmap_config, storage_config=config.get('mongodb')) components = {'country': 'FR', 'postal_code': '25320'} components_content = loader.by_component(components=components) print(components_content) print('.' if len(components_content) else 'E', end='') objects = MapFactory.france(components_content) print('.' if len(objects) else 'E', end='')
import sys import hashlib from lib.logger.File import File as FileLog from argparse import ArgumentParser import sys import json import requests import pymongo from bson.json_util import dumps from lib.keygen.gmap_keygen import Keygen # from lib.parser.wiki.Spain import Spain as ParserSpain country = 'Spain' config = Config('./config/config.yml') mongo_config = config.get('mongodb') conn = pymongo.MongoClient(mongo_config['host'], mongo_config['port']) db = conn.location coll = db.SPAININE print(config.get('googlemaps').get('geocoding').get('key')) doc_factory = DocFactory(config.get('mongodb')) try: skiprows = sys.argv[1] except Exception as e: skiprows = 0 df = pd.read_csv('./data/spain/Spain_notDublicate.csv', skiprows=int(skiprows), low_memory=False) loader = Loader.loader_with_mongodb(config.get('mongodb')) headers = {'User-Agent': 'Mozilla/5.0'}
from lib.job.TaskListMongoDB import TaskListMongoDB from lib.job.wiki.PageTask import PageTask from lib.job.Executor import Executor from lib.job.storage.MongoDB import MongoDB as Storage from lib.config.Yaml import Yaml as Config from lib.factory.Loader import Loader as LoaderFactory from lib.factory.StorageLocation import StorageLocation as DocFactory from lib.parser.wiki.Italy import Italy from lib.logger.MongoDB import MongoDB as Log from time import sleep force = True config = Config('./config/config.yml') country = 'Italia' options = {} loader = LoaderFactory.loader_with_mongodb( storage_config=config.get('mongodb')) options.update(loader=loader) doc_factory = DocFactory(config.get('mongodb')) options.update(doc_factory=doc_factory) options.update(force_update=force) options.update(parser=Italy) options.update(headers={'User-Agent': 'Mozilla/5.0'}) storage = Storage(job_name=PageTask.get_name(country),
from lib.job.TaskListMongoDB import TaskListMongoDB from lib.job.map.google.AddressTask import AddressTask from lib.job.ExecutorWithLimit import ExecutorWithLimit from lib.job.storage.MongoDB import MongoDB as Storage from lib.config.Yaml import Yaml as Config from lib.factory.Loader import Loader as LoaderFactory from lib.factory.StorageLocation import StorageLocation as DocFactory from lib.parser.map.google.Italy import Italy from lib.logger.MongoDB import MongoDB as Log from lib.Counter.CounterMongoDB import CounterMongoDB from pymongo import MongoClient force = True config = Config('./config/config.yml') country = 'Italy' options = {} gmaps_config = config.get('googlemaps') gmaps_config.update(language='it') loader = LoaderFactory.loader_gmaps_with_cache( gmaps_config=gmaps_config, storage_config=config.get('mongodb')) options.update(loader=loader) doc_factory = DocFactory(config.get('mongodb')) mongo_config = config.get('mongodb')
from lib.job.storage.MongoDB import MongoDB as Storage from lib.job.wiki.RequestTask import RequestTask from lib.config.Yaml import Yaml as Config import pandas as pd config = Config('./config/config.yml') country = 'France' job_list = Storage(RequestTask.get_name(country), config.get('mongodb')) df = pd.read_csv('./WorkBaseFile/BaseCommuneInInseeFR', delimiter="\t") for index, row in df.iterrows(): insee = row[0] job_list.add("insee+{insee}".format(insee=insee))
from lib.factory.Loader import Loader as Factory import sys from lib.config.Yaml import Yaml as Config from lib.parser.wiki.Italy import Italy as WikiParser config = Config('./config/config.yml') loader = Factory.loader_with_mongodb(config.get('mongodb')) #url = 'https://it.wikipedia.org/wiki/Variazioni_amministrative_della_Calabria' url = 'https://it.wikipedia.org/wiki/Provincia_di_Biella' headers = {'User-Agent': 'Mozilla/5.0'} content, code = loader.load(url, headers=headers) content = loader.from_cache(url, headers=headers) if code == 200 and len(content) > 0: print('.') else: print('E') sys.exit() #print(content) parser = WikiParser(content) dic = parser.as_dictionary() print('is location: {}'.format(('yes' if parser.is_location_page() else 'no'))) print('is list: {}'.format(('yes' if parser.is_many_answers() else 'no')))
import datetime import sys from lib.factory.Loader import Loader as LoaderFactory from lib.parser.map.google.GMapFactory import GMapFactory as MapFactory from lib.config.Yaml import Yaml as Config from lib.logger.File import File as FileLog from lib.factory.StorageLocation import StorageLocation as DocFactory from argparse import ArgumentParser arg_parser = ArgumentParser(description='Download data from gmaps by address') arg_parser.add_argument('-f', help='turn on the force mode') arg_parser.add_argument('-a', help='address') opts = arg_parser.parse_args() config = Config('./config/config.yml') loader = LoaderFactory.loader_gmaps_with_cache( gmaps_config=config.get('googlemaps'), storage_config=config.get('mongodb')) document_factory = DocFactory(config.get('mongodb')) log = FileLog('./log/gmaps_address_france_{date}.log'.format( date=datetime.datetime.now().strftime('%Y-%m-%d'))) log.add('Start', log.INFO) log.add('Params: [{0}]'.format(repr(opts).encode('utf-8')), log.INFO) use_address = bool(opts.a) address = opts.a if use_address else '' force_update = opts.f def update_meta(request, document):
from lib.job.TaskListMongoDB import TaskListMongoDB from lib.job.map.google.PositionTask import PositionTask from lib.job.ExecutorWithLimit import ExecutorWithLimit from lib.job.storage.MongoDB import MongoDB as Storage from lib.config.Yaml import Yaml as Config from lib.factory.Loader import Loader as LoaderFactory from lib.factory.StorageLocation import StorageLocation as DocFactory from lib.parser.map.google.Italy import Italy from lib.logger.MongoDB import MongoDB as Log from lib.Counter.CounterMongoDB import CounterMongoDB from pymongo import MongoClient force = True config = Config('./config/config.yml') country = 'Italy' options = {} gmaps_config = config.get('googlemaps') gmaps_config.update(language='it') loader = LoaderFactory.loader_gmaps_with_cache( gmaps_config=gmaps_config, storage_config=config.get('mongodb') ) options.update(loader=loader) doc_factory = DocFactory(config.get('mongodb'))
from lib.job.storage.MongoDB import MongoDB as Storage from lib.job.wiki.RequestTask import RequestTask from lib.config.Yaml import Yaml as Config import pandas as pd from lib.job.wiki.PageRecursiveTask import PageRecursiveTask config = Config('./config/config.yml') country = 'Italy' max_dig_level = 4 job_list = Storage(PageRecursiveTask.get_name(country), config.get('mongodb')) df = pd.read_csv('./WorkBaseFile/ItalyUrlMainList', delimiter="\t") for index, row in df.iterrows(): link = row[0] job_list.add({'link': link, 'level': max_dig_level})