예제 #1
0
from lib.factory.Loader import Loader as Factory
import sys
from lib.config.Yaml import Yaml as Config
from lib.parser.wiki.Italy import Italy as WikiParser

config = Config('./config/config.yml')

loader = Factory.loader_with_mongodb(config.get('mongodb'))

url = 'https://it.wikipedia.org/wiki/Roma'
headers = {'User-Agent': 'Mozilla/5.0'}

content, code = loader.load(url, headers=headers)

content = loader.from_cache(url, headers=headers)

if code == 200 and len(content) > 0:
    print('.')
else:
    print('E')
    sys.exit()

parser = WikiParser(content)

dic = parser.as_dictionary()

print('.' if dic.get('name') == 'Roma' else 'E', end='')
print('.' if dic.get('type') == 'comune' else 'E', end='')
print('.' if len(dic.get('admin_hierarchy')) == 3 else 'E', end='')
print('.' if dic.get('admin_hierarchy')[0].get('name') == 'Italia' else 'E',
      end='')
예제 #2
0
from lib.parser.map.google.GMapFactory import GMapFactory as MapFactory
from lib.config.Yaml import Yaml as Config
from lib.logger.File import File as FileLog
from lib.factory.StorageLocation import StorageLocation as DocFactory
from argparse import ArgumentParser

arg_parser = ArgumentParser(description='Download data from gmaps by address')
arg_parser.add_argument('-f', help='turn on the force mode')
arg_parser.add_argument('--lat', help='latitude')
arg_parser.add_argument('--lng', help='longitude')
opts = arg_parser.parse_args()

config = Config('./config/config.yml')

loader = LoaderFactory.loader_gmaps_with_cache(
    gmaps_config=config.get('googlemaps'),
    storage_config=config.get('mongodb'))
document_factory = DocFactory(config.get('mongodb'))
log = FileLog('./log/gmaps_position_france_{date}.log'.format(
    date=datetime.datetime.now().strftime('%Y-%m-%d')))
log.add('Start', log.INFO)
log.add('Params: [{0}]'.format(repr(opts).encode('utf-8')), log.INFO)

use_position = opts.lat and opts.lng
latitude = opts.lat if use_position else ''
longitude = opts.lng if use_position else ''
force_update = opts.f


def update_meta(request, document):
    actual_doc = document.get_document()
예제 #3
0
from lib.factory.Loader import Loader as LoaderFactory
from lib.parser.map.google.GMapFactory import GMapFactory as MapFactory
from lib.config.Yaml import Yaml as Config

config = Config('./config/config.yml')

loader = LoaderFactory.loader_gmaps_with_cache(config.get('googlemaps'),
                                               config.get('mongodb'))

address = 'Italia, Roma'

address_content = loader.by_address(address=address)

print(address_content)

print('.' if len(address_content) else 'E', end='')

objects = MapFactory.italy(address_content)

print('.' if len(objects) else 'E', end='')
예제 #4
0
from lib.factory.Loader import Loader as Factory
from lib.config.Yaml import Yaml as Config

config = Config('./config/config.yml')

print(config.get('googlemaps').get('geocoding').get('key'))

gmaps_config = config.get('googlemaps')
gmaps_config.update(language='fr')

loader = Factory.loader_gmaps(gmaps_config)

lat, lng = 48.861077, 2.344552

position_content = loader.by_position(lat=lat, lng=lng)

print(position_content)

print('.' if len(position_content) else 'E', end='')
예제 #5
0
country = 'Spain'
config = Config('./config/config.yml')
mongo_config = config.get('mongodb')
conn = pymongo.MongoClient(mongo_config['host'], mongo_config['port'])
db = conn.location
coll = db.SPAININE
print(config.get('googlemaps').get('geocoding').get('key'))
doc_factory = DocFactory(config.get('mongodb'))
try:
    skiprows = sys.argv[1]
except Exception as e:
    skiprows = 0
df = pd.read_csv('./data/spain/Spain_notDublicate.csv',
                 skiprows=int(skiprows),
                 low_memory=False)
loader = Loader.loader_with_mongodb(config.get('mongodb'))
headers = {'User-Agent': 'Mozilla/5.0'}

language = 'es'
spider = Spider(loader_factory=LoaderFactory,
                gmap_parser=MapFactory.spain,
                wiki_parser=True,
                doc_factory=doc_factory,
                language=language,
                config=config,
                use_cache=True)


def getDistance(lat1, lon1, lat2, lon2):
    Key = Keygen()
    url = 'https://maps.googleapis.com/maps/api/distancematrix/json?units=imperial&mode=walking&origins=' + str(
예제 #6
0
opts = arg_parser.parse_args()

country = 'Italy'

title = opts.t if opts.t else 'italy_recursive_{}'.format(
    datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S'))

print('START {}'.format(title))

force = True

config = Config('./config/config.yml')

options = {}

loader = LoaderFactory.loader_with_mongodb(
    storage_config=config.get('mongodb'))
options.update(loader=loader)

doc_factory = DocFactory(config.get('mongodb'))
options.update(doc_factory=doc_factory)

options.update(force_update=force)

options.update(parser=Italy)
options.update(host='it.wikipedia.org')
options.update(headers={'User-Agent': 'Mozilla/5.0'})
storage = Storage(job_name=PageRecursiveTask.TYPE,
                  storage_config=config.get('mongodb'))

options.update()