def import_aliases(pipeline, fh): """ Import aliases from a CSV source. This will not create new entities, but re-name existing entities or merge two entities if one's name is given as an alias for the other. """ importer = CSVImporter(fh) canonical_column, alias_column = None, None for k, v in pipeline.config.get('mapping', {}).items(): if v.get('attribute') == 'alias': alias_column = k elif v.get('attribute') == 'canonical': canonical_column = k for i, row in enumerate(importer): source_url = _row_source_url(pipeline, row) entities.apply_alias(pipeline.project, pipeline.author, row.get(canonical_column), row.get(alias_column), source_url=source_url) if i % 100 == 0: percentage = int((float(i) / max(1, len(importer))) * 100) pipeline.percent_complete = percentage db.session.commit()
def import_aliases(project, author, path): """ Import aliases from a CSV file. This will not create new entities, but re-name existing entities or merge two entities if one's name is given as an alias for the other. """ with open(path, 'r') as fh: reader = DictReader(fh) for i, row in enumerate(reader): data = {} for k, v in row.items(): k = k.lower().strip() data[k] = v assert 'canonical' in data, 'No "canonical" column!' assert 'alias' in data, 'No "alias" column!' entities.apply_alias(project, author, data.get('canonical'), data.get('alias')) if i % 1000 == 0: db.session.commit() db.session.commit()