Beispiel #1
0
def import_aliases(pipeline, fh):
    """ Import aliases from a CSV source. This will not create
    new entities, but re-name existing entities or merge two
    entities if one's name is given as an alias for the other. """

    importer = CSVImporter(fh)
    canonical_column, alias_column = None, None
    for k, v in pipeline.config.get('mapping', {}).items():
        if v.get('attribute') == 'alias':
            alias_column = k
        elif v.get('attribute') == 'canonical':
            canonical_column = k

    for i, row in enumerate(importer):
        source_url = _row_source_url(pipeline, row)

        entities.apply_alias(pipeline.project, pipeline.author,
                             row.get(canonical_column),
                             row.get(alias_column),
                             source_url=source_url)

        if i % 100 == 0:
            percentage = int((float(i) / max(1, len(importer))) * 100)
            pipeline.percent_complete = percentage
            db.session.commit()
Beispiel #2
0
def import_aliases(pipeline, fh):
    """ Import aliases from a CSV source. This will not create
    new entities, but re-name existing entities or merge two
    entities if one's name is given as an alias for the other. """

    importer = CSVImporter(fh)
    canonical_column, alias_column = None, None
    for k, v in pipeline.config.get('mapping', {}).items():
        if v.get('attribute') == 'alias':
            alias_column = k
        elif v.get('attribute') == 'canonical':
            canonical_column = k

    for i, row in enumerate(importer):
        source_url = _row_source_url(pipeline, row)

        entities.apply_alias(pipeline.project, pipeline.author,
                             row.get(canonical_column),
                             row.get(alias_column),
                             source_url=source_url)
        
        if i % 100 == 0:
            percentage = int((float(i) / max(1, len(importer))) * 100)
            pipeline.percent_complete = percentage
            db.session.commit()
Beispiel #3
0
def import_aliases(project, author, path):
    """ Import aliases from a CSV file. This will not create new entities, but
    re-name existing entities or merge two entities if one's name is given as 
    an alias for the other. """
    with open(path, 'r') as fh:
        reader = DictReader(fh)
        for i, row in enumerate(reader):
            data = {}
            for k, v in row.items():
                k = k.lower().strip()
                data[k] = v
            assert 'canonical' in data, 'No "canonical" column!'
            assert 'alias' in data, 'No "alias" column!'
            entities.apply_alias(project, author, data.get('canonical'),
                                 data.get('alias'))
            if i % 1000 == 0:
                db.session.commit()
        db.session.commit()
Beispiel #4
0
def import_aliases(project, author, path):
    """ Import aliases from a CSV file. This will not create new entities, but
    re-name existing entities or merge two entities if one's name is given as 
    an alias for the other. """
    with open(path, 'r') as fh:
        reader = DictReader(fh)
        for i, row in enumerate(reader):
            data = {}
            for k, v in row.items():
                k = k.lower().strip()
                data[k] = v
            assert 'canonical' in data, 'No "canonical" column!'
            assert 'alias' in data, 'No "alias" column!'
            entities.apply_alias(project, author,
                data.get('canonical'),
                data.get('alias'))
            if i % 1000 == 0:
                db.session.commit()
        db.session.commit()