コード例 #1
0
ファイル: __init__.py プロジェクト: Cristianf/mptracker
def assets(file_path, no_commit=False):
    from mptracker.scraper.assets import parse_assets
    from mptracker.nlp import normalize

    asset_patcher = TablePatcher(
        models.AssetStatement,
        models.db.session,
        key_columns=['person_id', 'date'],
    )

    people_map = {
        normalize(person.name): person.id
        for person in (
            models.Person.query
            .join(models.Person.mandates)
            .filter_by(year=2012)
        )
    }

    with asset_patcher.process(remove=True) as add_asset:
        for record in parse_assets(file_path):
            person_name = normalize(record.pop('person_name'))
            person_id = people_map[person_name]
            del record['constituency']
            del record['county']
            res = add_asset({
                'person_id': person_id,
                'date': date(2012, 11, 1),
                'raw_data': record,
                'net_worth_eur': (
                    record['acct_value']['TOTAL_EUR']
                    - record['debt_value']['TOTAL_EUR']
                    + record['invest_value']['TOTAL_EUR']
                    + record['valuables_value']['TOTAL_EUR']
                ),
                'land_agri_area': record['land_agri_area'],
                'land_city_area': record['land_city_area'],
                'realty_count': (
                    record['realty_apartment_count'] +
                    record['realty_business_count'] +
                    record['realty_house_count']
                ),
                'vehicle_count': record['vehicle_count'],
                'year_income_eur': (
                    record['family_income_value']['TOTAL_EUR'] +
                    record['gift_value']['TOTAL_EUR'] +
                    record['sales_value']['TOTAL_EUR']
                ),
            })

    if no_commit:
        logger.warn("Rolling back the transaction")
        models.db.session.rollback()

    else:
        models.db.session.commit()
コード例 #2
0
ファイル: __init__.py プロジェクト: rdragos/mptracker
def get_romania_curata():
        
    from os import path
    from difflib import SequenceMatcher as sm
    from itertools import permutations
    import json
    from mptracker.nlp import normalize

    sql_names = [person.name for person in models.Person.query.all()]

    with open(path.relpath("mptracker/scraper/scraper_curata_out.json"),
              'r', encoding='utf-8') as f:
        scraper_result = json.load(f)

    with open(path.relpath(
            'mptracker/scraper/romania_curata_exceptions.json'),
            'r', encoding='utf-8') as f:
        person_exceptions = json.load(f)


    def matching_score(first_name, second_name):
        return sm(None, first_name, second_name).ratio() * 100

    def add_person(name, fortune):
        person = (
            models.Person.query
                .filter_by(name=name)
                .first()
        )
        if person != None:
            person.romania_curata = "\n".join(fortune)
            print("Found a match for ", name.encode('utf-8'))
            sql_names.remove(name)

    for name, fortune in scraper_result:
        name_scraper = normalize(name)
        max_matching = (0, 0)

        if name_scraper in person_exceptions:
            add_person(person_exceptions[name_scraper], fortune)

        for temporary_sqlname in sql_names:
            name_sql = normalize(temporary_sqlname)
            for perm in permutations(name_scraper.split(" ")):
                current_matching = matching_score(" ".join(perm), name_sql)

                if max_matching[0] < current_matching:
                    max_matching = (current_matching, temporary_sqlname)

        if max_matching[0] > 93:
            add_person(max_matching[1], fortune)

    models.db.session.commit()
コード例 #3
0
ファイル: __init__.py プロジェクト: rdragos/mptracker
def get_romania_curata():

    from os import path
    from difflib import SequenceMatcher as sm
    from itertools import permutations
    import json
    from mptracker.nlp import normalize

    sql_names = [person.name for person in models.Person.query.all()]

    with open(path.relpath("mptracker/scraper/scraper_curata_out.json"),
              'r',
              encoding='utf-8') as f:
        scraper_result = json.load(f)

    with open(path.relpath('mptracker/scraper/romania_curata_exceptions.json'),
              'r',
              encoding='utf-8') as f:
        person_exceptions = json.load(f)

    def matching_score(first_name, second_name):
        return sm(None, first_name, second_name).ratio() * 100

    def add_person(name, fortune):
        person = (models.Person.query.filter_by(name=name).first())
        if person != None:
            person.romania_curata = "\n".join(fortune)
            print("Found a match for ", name.encode('utf-8'))
            sql_names.remove(name)

    for name, fortune in scraper_result:
        name_scraper = normalize(name)
        max_matching = (0, 0)

        if name_scraper in person_exceptions:
            add_person(person_exceptions[name_scraper], fortune)

        for temporary_sqlname in sql_names:
            name_sql = normalize(temporary_sqlname)
            for perm in permutations(name_scraper.split(" ")):
                current_matching = matching_score(" ".join(perm), name_sql)

                if max_matching[0] < current_matching:
                    max_matching = (current_matching, temporary_sqlname)

        if max_matching[0] > 93:
            add_person(max_matching[1], fortune)

    models.db.session.commit()
コード例 #4
0
ファイル: models.py プロジェクト: alexef/mptracker
 def explode(self, name):
     return frozenset(normalize(name).replace('-', ' ').split())