コード例 #1
0
ファイル: GMapTask.py プロジェクト: andriynichik/sincity
    def _get_common_data(self, list_name, force=False):
        config = Config('./config/config.yml')

        loader = LoaderFactory.loader_gmaps_with_cache(
            gmaps_config=config.get('googlemaps'),
            storage_config=config.get('mongodb')
        )
        document_factory = DocFactory(config.get('mongodb'))

        options = {
            'loader': loader,
            'doc_factory': document_factory,
            'parser': getattr(MapFactory, self._country),
            'force_update': force
        }

        storage = MongoDB('{}_{}'.format(list_name, self._country), config.get('mongodb'))

        return options, storage
コード例 #2
0
from lib.job.storage.MongoDB import MongoDB as Storage
from lib.job.map.google.AddressTask import AddressTask
from lib.config.Yaml import Yaml as Config
import pandas as pd

country = 'Italy'
lst_address = []

region_index = 1
provincia_index = 3
comune_index = 5
localita_index = 9

config = Config('./config/config.yml').get('mongodb')

job_list = Storage(AddressTask.get_name(country), config)

df = pd.read_csv('./data/italy/indicatori_2011_localita.csv',
                 delimiter=";",
                 skiprows=[1],
                 encoding='ISO-8859-1')

for index, row in df.iterrows():
    print(index)
    try:
        new_address = 'Italia, '
        if row[region_index]:
            new_address += row[region_index]
            if new_address not in lst_address:
                lst_address.append(new_address)
                job_list.add(new_address)
コード例 #3
0
ファイル: wiki_request.py プロジェクト: andriynichik/sincity
from lib.job.storage.MongoDB import MongoDB as Storage
from lib.job.wiki.RequestTask import RequestTask
from lib.config.Yaml import Yaml as Config
import pandas as pd

config = Config('./config/config.yml')

country = 'France'

job_list = Storage(RequestTask.get_name(country), config.get('mongodb'))

df = pd.read_csv('./WorkBaseFile/BaseCommuneInInseeFR', delimiter="\t")
for index, row in df.iterrows():
    insee = row[0]
    job_list.add("insee+{insee}".format(insee=insee))
コード例 #4
0
from lib.job.storage.MongoDB import MongoDB as Storage
from lib.job.map.google.PositionTask import PositionTask
from lib.config.Yaml import Yaml as Config
from lib.factory.StorageLocation import StorageLocation as DocFactory

country = 'Italia'

config = Config('./config/config.yml').get('mongodb')

job_list = Storage(PositionTask.get_name(country), config)

factory = DocFactory(config)
wiki = factory.wiki_collection()

filter = {
    'name': {
        '$exists': True,
        '$not': {
            '$size': 0
        }
    },
    'admin_hierarchy': {
        '$elemMatch': {
            'name': country
        }
    }
}

objects = wiki.find(filter)
for obj in objects:
    try:
コード例 #5
0
from lib.job.wiki.RequestTask import RequestTask
from lib.config.Yaml import Yaml as Config
import pandas as pd
import urllib.parse

country = 'Italy'
lst_address = []

region_index = 1
provincia_index = 3
comune_index = 5
localita_index = 9

config = Config('./config/config.yml').get('mongodb')

job_list = Storage(RequestTask.get_name(country), config)

df = pd.read_csv('./data/italy/indicatori_2011_localita.csv',
                 delimiter=";",
                 skiprows=[1],
                 encoding='ISO-8859-1')

for index, row in df.iterrows():
    print(index)
    try:
        new_address = 'Italia,'
        if row[1]:
            new_address += row[region_index]
            if new_address not in lst_address:
                lst_address.append(new_address)
                job_list.add(urllib.parse.quote(new_address))
コード例 #6
0
from lib.config.Yaml import Yaml as Config
from lib.factory.StorageLocation import StorageLocation as DocFactory
import pandas as pd
import urllib.parse

country = 'Italia'
lst_address = []

region_index = 1
provincia_index = 3
comune_index = 5
localita_index = 9

config = Config('./config/config.yml').get('mongodb')

job_list = Storage(PageTask.get_name(country), config)

factory = DocFactory(config)
wiki = factory.wiki_collection()

filter = {
    'name': {
        '$exists': True,
        '$not': {
            '$size': 0
        }
    },
    'admin_hierarchy': {
        '$elemMatch': {
            'name': country
        }
コード例 #7
0
from lib.job.storage.MongoDB import MongoDB as Storage
from lib.job.wiki.RequestTask import RequestTask
from lib.config.Yaml import Yaml as Config
import pandas as pd
from lib.job.wiki.PageRecursiveTask import PageRecursiveTask

config = Config('./config/config.yml')

country = 'Italy'

max_dig_level = 4

job_list = Storage(PageRecursiveTask.get_name(country), config.get('mongodb'))

df = pd.read_csv('./WorkBaseFile/ItalyUrlMainList', delimiter="\t")
for index, row in df.iterrows():
    link = row[0]
    job_list.add({'link': link, 'level': max_dig_level})