from lib.factory.Loader import Loader as Factory import sys from lib.config.Yaml import Yaml as Config from lib.parser.wiki.Italy import Italy as WikiParser config = Config('./config/config.yml') loader = Factory.loader_with_mongodb(config.get('mongodb')) url = 'https://it.wikipedia.org/wiki/Roma' headers = {'User-Agent': 'Mozilla/5.0'} content, code = loader.load(url, headers=headers) content = loader.from_cache(url, headers=headers) if code == 200 and len(content) > 0: print('.') else: print('E') sys.exit() parser = WikiParser(content) dic = parser.as_dictionary() print('.' if dic.get('name') == 'Roma' else 'E', end='') print('.' if dic.get('type') == 'comune' else 'E', end='') print('.' if len(dic.get('admin_hierarchy')) == 3 else 'E', end='') print('.' if dic.get('admin_hierarchy')[0].get('name') == 'Italia' else 'E', end='')
from lib.parser.map.google.GMapFactory import GMapFactory as MapFactory from lib.config.Yaml import Yaml as Config from lib.logger.File import File as FileLog from lib.factory.StorageLocation import StorageLocation as DocFactory from argparse import ArgumentParser arg_parser = ArgumentParser(description='Download data from gmaps by address') arg_parser.add_argument('-f', help='turn on the force mode') arg_parser.add_argument('--lat', help='latitude') arg_parser.add_argument('--lng', help='longitude') opts = arg_parser.parse_args() config = Config('./config/config.yml') loader = LoaderFactory.loader_gmaps_with_cache( gmaps_config=config.get('googlemaps'), storage_config=config.get('mongodb')) document_factory = DocFactory(config.get('mongodb')) log = FileLog('./log/gmaps_position_france_{date}.log'.format( date=datetime.datetime.now().strftime('%Y-%m-%d'))) log.add('Start', log.INFO) log.add('Params: [{0}]'.format(repr(opts).encode('utf-8')), log.INFO) use_position = opts.lat and opts.lng latitude = opts.lat if use_position else '' longitude = opts.lng if use_position else '' force_update = opts.f def update_meta(request, document): actual_doc = document.get_document()
from lib.factory.Loader import Loader as LoaderFactory from lib.parser.map.google.GMapFactory import GMapFactory as MapFactory from lib.config.Yaml import Yaml as Config config = Config('./config/config.yml') loader = LoaderFactory.loader_gmaps_with_cache(config.get('googlemaps'), config.get('mongodb')) address = 'Italia, Roma' address_content = loader.by_address(address=address) print(address_content) print('.' if len(address_content) else 'E', end='') objects = MapFactory.italy(address_content) print('.' if len(objects) else 'E', end='')
from lib.factory.Loader import Loader as Factory from lib.config.Yaml import Yaml as Config config = Config('./config/config.yml') print(config.get('googlemaps').get('geocoding').get('key')) gmaps_config = config.get('googlemaps') gmaps_config.update(language='fr') loader = Factory.loader_gmaps(gmaps_config) lat, lng = 48.861077, 2.344552 position_content = loader.by_position(lat=lat, lng=lng) print(position_content) print('.' if len(position_content) else 'E', end='')
country = 'Spain' config = Config('./config/config.yml') mongo_config = config.get('mongodb') conn = pymongo.MongoClient(mongo_config['host'], mongo_config['port']) db = conn.location coll = db.SPAININE print(config.get('googlemaps').get('geocoding').get('key')) doc_factory = DocFactory(config.get('mongodb')) try: skiprows = sys.argv[1] except Exception as e: skiprows = 0 df = pd.read_csv('./data/spain/Spain_notDublicate.csv', skiprows=int(skiprows), low_memory=False) loader = Loader.loader_with_mongodb(config.get('mongodb')) headers = {'User-Agent': 'Mozilla/5.0'} language = 'es' spider = Spider(loader_factory=LoaderFactory, gmap_parser=MapFactory.spain, wiki_parser=True, doc_factory=doc_factory, language=language, config=config, use_cache=True) def getDistance(lat1, lon1, lat2, lon2): Key = Keygen() url = 'https://maps.googleapis.com/maps/api/distancematrix/json?units=imperial&mode=walking&origins=' + str(
opts = arg_parser.parse_args() country = 'Italy' title = opts.t if opts.t else 'italy_recursive_{}'.format( datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')) print('START {}'.format(title)) force = True config = Config('./config/config.yml') options = {} loader = LoaderFactory.loader_with_mongodb( storage_config=config.get('mongodb')) options.update(loader=loader) doc_factory = DocFactory(config.get('mongodb')) options.update(doc_factory=doc_factory) options.update(force_update=force) options.update(parser=Italy) options.update(host='it.wikipedia.org') options.update(headers={'User-Agent': 'Mozilla/5.0'}) storage = Storage(job_name=PageRecursiveTask.TYPE, storage_config=config.get('mongodb')) options.update()