コード例 #1
0
def run_mapping(mapping_yaml):
    config = load_mapping_file(mapping_yaml)
    stream = click.get_text_stream('stdout')
    try:
        for dataset, meta in config.items():
            for mapping in keys_values(meta, 'queries', 'query'):
                entities = model.map_entities(mapping, key_prefix=dataset)
                for entity in entities:
                    read_entity(stream, entity)
    except BrokenPipeError:
        raise click.Abort()
    except Exception as exc:
        raise click.ClickException(str(exc))
コード例 #2
0
ファイル: util.py プロジェクト: djoffrey/aleph
def read_entities(file_name):
    with open(file_name) as fh:
        while True:
            entity = read_entity(fh)
            if entity is None:
                break
            yield entity
コード例 #3
0
def read_entities(file_name):
    now = datetime.utcnow()
    with open(file_name) as fh:
        while True:
            entity = read_entity(fh)
            if entity is None:
                break
            entity.set('indexUpdatedAt', now, quiet=True)
            yield entity
コード例 #4
0
ファイル: util.py プロジェクト: rmallof/aleph
def read_entities(file_name):
    now = datetime.utcnow().isoformat()
    with open(file_name) as fh:
        while True:
            entity = read_entity(fh)
            if entity is None:
                break
            entity.context["updated_at"] = now
            yield entity
コード例 #5
0
ファイル: cli.py プロジェクト: tendai-zw/followthemoney
def load_entities(entities):
    session = Session()
    try:
        while True:
            entity = read_entity(entities)
            if entity is None:
                break
            Entity.save(session, entities.name, entity)
    except BrokenPipeError:
        pass
    session.commit()
コード例 #6
0
ファイル: util.py プロジェクト: stofstar/aleph
def read_entities(file_name):
    now = datetime.utcnow()
    entities = []
    with open(file_name) as fh:
        while True:
            entity = read_entity(fh)
            if entity is None:
                break
            entity.set('indexUpdatedAt', now, quiet=True)
            entities.append(entity)
    return entities
コード例 #7
0
def pretty(infile):
    stdout = click.get_text_stream('stdout')
    try:
        while True:
            entity = read_entity(infile)
            if entity is None:
                break
            data = json.dumps(entity.to_dict(), indent=2)
            stdout.write(data + '\n')
    except BrokenPipeError:
        raise click.Abort()
コード例 #8
0
def sign(infile, outfile, signature):
    ns = Namespace(signature)
    try:
        while True:
            entity = read_entity(infile)
            if entity is None:
                break
            signed = ns.apply(entity)
            write_object(outfile, signed)
    except BrokenPipeError:
        raise click.Abort()
コード例 #9
0
ファイル: sieve.py プロジェクト: openlegaldata/followthelaw
def sieve(infile, outfile, schema, property, type):
    try:
        while True:
            entity = read_entity(infile)
            if entity is None:
                break
            entity = sieve_entity(entity, schema, property, type)
            if entity is not None:
                write_object(outfile, entity)
    except BrokenPipeError:
        raise click.Abort()
コード例 #10
0
ファイル: cli.py プロジェクト: wayne9qiu/followthemoney
def enrich(infile, outfile, enricher):
    enricher = load_enricher(enricher)
    try:
        while True:
            entity = read_entity(infile)
            if entity is None:
                break
            for match in enricher.enrich_entity_raw(entity):
                write_object(outfile, match)
    except BrokenPipeError:
        raise click.Abort()
コード例 #11
0
ファイル: cli.py プロジェクト: wayne9qiu/followthemoney
def expand(infile, outfile, enricher):
    enricher = load_enricher(enricher)
    try:
        while True:
            entity = read_entity(infile)
            if entity is None:
                break
            for entity in enricher.expand_entity(entity):
                write_object(outfile, entity)
    except BrokenPipeError:
        raise click.Abort()
コード例 #12
0
def export_cypher():
    stdin = click.get_text_stream('stdin')
    stdout = click.get_text_stream('stdout')
    exporter = CypherGraphExport(stdout)
    try:
        while True:
            entity = read_entity(stdin)
            if entity is None:
                break
            exporter.write(entity)
    except BrokenPipeError:
        raise click.Abort()
コード例 #13
0
def stream_mapping(mapping_yaml):
    stdin = click.get_text_stream('stdin')
    stdout = click.get_text_stream('stdout')

    sources = []
    config = load_mapping_file(mapping_yaml)
    for dataset, meta in config.items():
        for data in keys_values(meta, 'queries', 'query'):
            query = model.make_mapping(data, key_prefix=dataset)
            source = StreamSource(query, data)
            sources.append(source)

    try:
        for record in StreamSource.read_csv(stdin):
            for source in sources:
                if source.check_filters(record):
                    entities = source.query.map(record)
                    for entity in entities.values():
                        read_entity(stdout, entity)
    except BrokenPipeError:
        raise click.Abort()
コード例 #14
0
def export_excel(filename):
    stdin = click.get_text_stream('stdin')
    workbook = get_workbook()
    try:
        while True:
            entity = read_entity(stdin)
            if entity is None:
                break
            write_entity(workbook, entity)
        workbook.save(filename)
    except BrokenPipeError:
        raise click.Abort()
コード例 #15
0
 def match(cls, file_path, entity):
     score = super(FtMIngestor, cls).match(file_path, entity)
     if score < 1:
         return score
     try:
         with open(file_path, "rb") as fh:
             proxy = read_entity(fh, max_line=100 * MEGABYTE)
             if isinstance(proxy, EntityProxy) and proxy.id is not None:
                 return cls.SCORE
     except Exception:
         log.exception("Failed to read FtM file: %r", entity)
     return -1
コード例 #16
0
def validate(infile, outfile):
    try:
        while True:
            entity = read_entity(infile)
            if entity is None:
                break
            clean = model.make_entity(entity.schema)
            clean.id = entity.id
            for (prop, value) in entity.itervalues():
                clean.add(prop, value)
            write_object(outfile, clean)
    except BrokenPipeError:
        raise click.Abort()
コード例 #17
0
ファイル: dedupe.py プロジェクト: openlegaldata/followthelaw
def link(infile, outfile, matches):
    try:
        linker = Linker(model)
        for match in Match.from_file(model, matches):
            linker.add(match)
        log.info("Linker: %s clusters.", len(linker.lookup))
        while True:
            entity = read_entity(infile)
            if entity is None:
                break
            entity = linker.apply(entity)
            write_object(outfile, entity)
    except BrokenPipeError:
        raise click.Abort()
コード例 #18
0
def export_csv(outdir):
    stdin = click.get_text_stream('stdin')
    handlers = {}
    try:
        while True:
            entity = read_entity(stdin)
            if entity is None:
                break
            fh = _get_csv_handler(outdir, entity.schema, handlers)
            write_entity(fh, entity)
    except BrokenPipeError:
        raise click.Abort()
    finally:
        for fh in handlers.values():
            fh.close()
コード例 #19
0
ファイル: cli.py プロジェクト: tendai-zw/followthemoney
def load_votes(votes):
    session = Session()
    try:
        while True:
            data = read_entity(votes)
            if data is None:
                break
            Vote.save(session,
                      data.get('match_id'),
                      data.get('user'),
                      data.get('judgement'))
    except BrokenPipeError:
        pass
    Match.tally(session)
    session.commit()
コード例 #20
0
def expand(enricher):
    enricher = load_enricher(enricher)
    try:
        stdin = click.get_text_stream('stdin')
        stdout = click.get_text_stream('stdout')
        while True:
            entity = read_entity(stdin)
            if entity is None:
                break
            result = enricher.expand_entity(entity)
            write_object(stdout, result)
    except BrokenPipeError:
        raise click.Abort()
    finally:
        enricher.close()
コード例 #21
0
def export_gexf():
    stdin = click.get_text_stream('stdin')
    stdout = click.get_text_stream('stdout')
    graph = nx.MultiDiGraph()
    exporter = NXGraphExport(graph)
    try:
        while True:
            entity = read_entity(stdin)
            if entity is None:
                break
            exporter.write(entity)
    except BrokenPipeError:
        raise click.Abort()

    for line in generate_gexf(graph, prettyprint=False):
        stdout.write(line)
コード例 #22
0
ファイル: aggregate.py プロジェクト: wayne9qiu/followthemoney
def aggregate(infile, outfile):
    buffer = {}
    namespace = Namespace(None)
    try:
        while True:
            entity = read_entity(infile)
            if entity is None:
                break
            entity = namespace.apply(entity)
            if entity.id in buffer:
                buffer[entity.id].merge(entity)
            else:
                buffer[entity.id] = entity

        for entity in buffer.values():
            write_object(outfile, entity)
    except BrokenPipeError:
        raise click.Abort()
コード例 #23
0
def aggregate():
    buffer = {}
    namespace = Namespace(None)
    try:
        stdin = click.get_text_stream('stdin')
        while True:
            entity = read_entity(stdin)
            if entity is None:
                break
            entity = namespace.apply(entity)
            if entity.id in buffer:
                buffer[entity.id].merge(entity)
            else:
                buffer[entity.id] = entity

        stdout = click.get_text_stream('stdout')
        for entity in buffer.values():
            write_object(stdout, entity)
    except BrokenPipeError:
        raise click.Abort()
コード例 #24
0
def apply_recon(recon):
    try:
        linker = EntityLinker()
        for recon in Recon.from_file(recon):
            if recon.judgement == Recon.MATCH:
                linker.add(recon.subject, recon.canonical)
        log.info("Linker: %s clusters.", len(linker.clusters))
        stdin = click.get_text_stream('stdin')
        stdout = click.get_text_stream('stdout')
        while True:
            entity = read_entity(stdin)
            if entity is None:
                break
            entity = NS.apply(entity)
            outgoing = linker.apply(entity)
            if outgoing.id != entity.id:
                outgoing.add('sameAs', entity.id, quiet=True)
            write_object(stdout, outgoing)
    except BrokenPipeError:
        raise click.Abort()