예제 #1
0
def sieve(infile, outfile, schema, property, type):
    try:
        for entity in read_entities(infile):
            entity = sieve_entity(entity, schema, property, type)
            if entity is not None:
                write_object(outfile, entity)
    except BrokenPipeError:
        raise click.Abort()
예제 #2
0
def import_cellebrite(infile, outfile, owner, country):
    try:
        converter = CellebriteConverter(infile, owner, country)
        for entity in converter.convert():
            if entity.id is not None:
                write_object(outfile, entity)
    except BrokenPipeError:
        raise click.Abort()
예제 #3
0
def enrich(infile, outfile, enricher):
    enricher = load_enricher(enricher)
    try:
        for entity in read_entities(infile):
            for match in enricher.enrich_entity_raw(entity):
                write_object(outfile, match)
    except BrokenPipeError:
        raise click.Abort()
예제 #4
0
def sign(infile, outfile, signature):
    ns = Namespace(signature)
    try:
        for entity in read_entities(infile):
            signed = ns.apply(entity)
            write_object(outfile, signed)
    except BrokenPipeError:
        raise click.Abort()
예제 #5
0
def expand(infile, outfile, enricher):
    enricher = load_enricher(enricher)
    try:
        for entity in read_entities(infile):
            for expanded in enricher.expand_entity(entity):
                write_object(outfile, expanded)
    except BrokenPipeError:
        raise click.Abort()
예제 #6
0
def match_decide(infile, outfile, threshold):
    try:
        for match in Match.from_file(model, infile):
            if match.decision is None:
                if match.score is not None and match.score > threshold:
                    match.decision = True
            write_object(outfile, match)
    except BrokenPipeError:
        raise click.Abort()
예제 #7
0
def validate(infile, outfile):
    try:
        for entity in read_entities(infile, cleaned=False):
            clean = model.make_entity(entity.schema)
            clean.id = entity.id
            for (prop, value) in entity.itervalues():
                clean.add(prop, value)
            write_object(outfile, clean)
    except BrokenPipeError:
        raise click.Abort()
예제 #8
0
def expand(infile, outfile, enricher):
    enricher = load_enricher(enricher)
    try:
        while True:
            entity = read_entity(infile)
            if entity is None:
                break
            for entity in enricher.expand_entity(entity):
                write_object(outfile, entity)
    except BrokenPipeError:
        raise click.Abort()
예제 #9
0
def enrich(infile, outfile, enricher):
    enricher = load_enricher(enricher)
    try:
        while True:
            entity = read_entity(infile)
            if entity is None:
                break
            for match in enricher.enrich_entity_raw(entity):
                write_object(outfile, match)
    except BrokenPipeError:
        raise click.Abort()
예제 #10
0
def sign(infile, outfile, signature):
    ns = Namespace(signature)
    try:
        while True:
            entity = read_entity(infile)
            if entity is None:
                break
            signed = ns.apply(entity)
            write_object(outfile, signed)
    except BrokenPipeError:
        raise click.Abort()
예제 #11
0
def link(infile, outfile, matches):
    try:
        linker = Linker(model)
        for match in Match.from_file(model, matches):
            linker.add(match)
        log.info("Linker: %s clusters.", len(linker.lookup))
        for entity in read_entities(infile):
            entity = linker.apply(entity)
            write_object(outfile, entity)
    except BrokenPipeError:
        raise click.Abort()
예제 #12
0
def match_entities(infile, outfile, all):
    try:
        for match in Match.from_file(model, infile):
            if not all and match.decision is not True:
                continue
            if match.canonical is not None:
                write_object(outfile, match.canonical)
            if match.entity is not None:
                write_object(outfile, match.entity)
    except BrokenPipeError:
        raise click.Abort()
예제 #13
0
def run_mapping(outfile, mapping_yaml):
    config = load_mapping_file(mapping_yaml)
    try:
        for dataset, meta in config.items():
            for mapping in keys_values(meta, 'queries', 'query'):
                entities = model.map_entities(mapping, key_prefix=dataset)
                for entity in entities:
                    write_object(outfile, entity)
    except BrokenPipeError:
        raise click.Abort()
    except Exception as exc:
        raise click.ClickException(str(exc))
예제 #14
0
def make_entities(db_path, outfile):
    db = dataset.connect("sqlite:///%s" % db_path)
    store = Dataset("temp", database_uri="sqlite://")
    writer = store.bulk()
    write_edges(writer, db)
    write_addresses(writer, db)
    write_nodes(writer, db["entity"], "Company")
    write_nodes(writer, db["intermediary"])
    write_nodes(writer, db["officer"])

    for entity in store.iterate():
        write_object(outfile, entity)
예제 #15
0
def import_ocds(infile, outfile):
    try:
        while True:
            line = infile.readline()
            if not line:
                return
            record = json.loads(line)
            for entity in convert_record(record):
                if entity.id is not None:
                    write_object(outfile, entity)
    except BrokenPipeError:
        raise click.Abort()
예제 #16
0
def result_entities():
    try:
        stdin = click.get_text_stream('stdin')
        stdout = click.get_text_stream('stdout')
        while True:
            result = read_result(stdin)
            if result is None:
                break
            for entity in result.entities:
                write_object(stdout, entity)
    except BrokenPipeError:
        raise click.Abort()
예제 #17
0
def validate(infile, outfile):
    try:
        while True:
            entity = read_entity(infile)
            if entity is None:
                break
            clean = model.make_entity(entity.schema)
            clean.id = entity.id
            for (prop, value) in entity.itervalues():
                clean.add(prop, value)
            write_object(outfile, clean)
    except BrokenPipeError:
        raise click.Abort()
예제 #18
0
def auto_match(threshold):
    try:
        stdin = click.get_text_stream('stdin')
        stdout = click.get_text_stream('stdout')
        while True:
            result = read_result(stdin)
            if result is None:
                break
            if result.score > threshold:
                recon = Recon(result.subject, result.candidate, Recon.MATCH)
                write_object(stdout, recon)
    except BrokenPipeError:
        raise click.Abort()
예제 #19
0
def import_ocds():
    stdin = click.get_text_stream('stdin')
    stdout = click.get_text_stream('stdout')
    try:
        while True:
            line = stdin.readline()
            if not line:
                return
            record = json.loads(line)
            for entity in convert_record(record):
                if entity.id is not None:
                    write_object(stdout, entity)
    except BrokenPipeError:
        raise click.Abort()
예제 #20
0
def run_mapping(outfile, mapping_yaml, sign=True):
    config = load_mapping_file(mapping_yaml)
    try:
        for dataset, meta in config.items():
            ns = Namespace(dataset)
            for mapping in keys_values(meta, "queries", "query"):
                entities = model.map_entities(mapping, key_prefix=dataset)
                for entity in entities:
                    if sign:
                        entity = ns.apply(entity)
                    write_object(outfile, entity)
    except BrokenPipeError:
        raise click.Abort()
    except Exception as exc:
        raise click.ClickException(str(exc))
예제 #21
0
def expand(enricher):
    enricher = load_enricher(enricher)
    try:
        stdin = click.get_text_stream('stdin')
        stdout = click.get_text_stream('stdout')
        while True:
            entity = read_entity(stdin)
            if entity is None:
                break
            result = enricher.expand_entity(entity)
            write_object(stdout, result)
    except BrokenPipeError:
        raise click.Abort()
    finally:
        enricher.close()
예제 #22
0
def aggregate(infile, outfile):
    buffer = {}
    namespace = Namespace(None)
    try:
        for entity in read_entities(infile):
            entity = namespace.apply(entity)
            if entity.id in buffer:
                buffer[entity.id].merge(entity)
            else:
                buffer[entity.id] = entity

        for entity in buffer.values():
            write_object(outfile, entity)
    except BrokenPipeError:
        raise click.Abort()
예제 #23
0
    def export(self):
        """Generate exported files for the dataset."""
        try:
            self.bind()

            ftm_path = self.get_artifact_path("entities.ftm.json")
            ftm_path.parent.mkdir(exist_ok=True, parents=True)
            self.log.info(
                "Writing entities to line-based JSON",
                path=ftm_path,
                entities=len(self.dataset.store),
            )
            with open(ftm_path, "w") as fh:
                for entity in self.dataset.store:
                    write_object(fh, entity)
        finally:
            self.close()
예제 #24
0
def stream_mapping(infile, outfile, mapping_yaml):
    sources = []
    config = load_mapping_file(mapping_yaml)
    for dataset, meta in config.items():
        for data in keys_values(meta, 'queries', 'query'):
            query = model.make_mapping(data, key_prefix=dataset)
            source = StreamSource(query, data)
            sources.append(source)

    try:
        for record in StreamSource.read_csv(infile):
            for source in sources:
                if source.check_filters(record):
                    entities = source.query.map(record)
                    for entity in entities.values():
                        write_object(outfile, entity)
    except BrokenPipeError:
        raise click.Abort()
예제 #25
0
def filter_results(recon):
    try:
        matches = set()
        for recon in Recon.from_file(recon):
            if recon.judgement == Recon.MATCH:
                matches.add(recon.subject)
        stdin = click.get_text_stream('stdin')
        stdout = click.get_text_stream('stdout')
        while True:
            result = read_result(stdin)
            if result is None:
                break
            if result.candidate is None:
                continue
            candidate = NS.apply(result.candidate)
            if candidate.id in matches:
                write_object(stdout, result)
    except BrokenPipeError:
        raise click.Abort()
예제 #26
0
def aggregate():
    buffer = {}
    namespace = Namespace(None)
    try:
        stdin = click.get_text_stream('stdin')
        while True:
            entity = read_entity(stdin)
            if entity is None:
                break
            entity = namespace.apply(entity)
            if entity.id in buffer:
                buffer[entity.id].merge(entity)
            else:
                buffer[entity.id] = entity

        stdout = click.get_text_stream('stdout')
        for entity in buffer.values():
            write_object(stdout, entity)
    except BrokenPipeError:
        raise click.Abort()
예제 #27
0
def apply_recon(recon):
    try:
        linker = EntityLinker()
        for recon in Recon.from_file(recon):
            if recon.judgement == Recon.MATCH:
                linker.add(recon.subject, recon.canonical)
        log.info("Linker: %s clusters.", len(linker.clusters))
        stdin = click.get_text_stream('stdin')
        stdout = click.get_text_stream('stdout')
        while True:
            entity = read_entity(stdin)
            if entity is None:
                break
            entity = NS.apply(entity)
            outgoing = linker.apply(entity)
            if outgoing.id != entity.id:
                outgoing.add('sameAs', entity.id, quiet=True)
            write_object(stdout, outgoing)
    except BrokenPipeError:
        raise click.Abort()
예제 #28
0
def stream_mapping(infile, outfile, mapping_yaml, sign=True):
    sources = []
    config = load_mapping_file(mapping_yaml)
    for dataset, meta in config.items():
        for data in keys_values(meta, "queries", "query"):
            query = model.make_mapping(data, key_prefix=dataset)
            source = StreamSource(query, data)
            sources.append((dataset, source))

    try:
        for record in StreamSource.read_csv(infile):
            for (dataset, source) in sources:
                ns = Namespace(dataset)
                if source.check_filters(record):
                    entities = source.query.map(record)
                    for entity in entities.values():
                        if sign:
                            entity = ns.apply(entity)
                        write_object(outfile, entity)
    except BrokenPipeError:
        raise click.Abort()
예제 #29
0
def stream_mapping(infile, outfile, mapping_yaml, sign=True):
    queries: List[Tuple[str, QueryMapping]] = []
    config = load_mapping_file(mapping_yaml)
    for dataset, meta in config.items():
        for data in keys_values(meta, "queries", "query"):
            data.pop("database", None)
            data["csv_url"] = "/dev/null"
            query = model.make_mapping(data, key_prefix=dataset)
            queries.append((dataset, query))

    try:
        for record in CSVSource.read_csv(infile):
            for (dataset, query) in queries:
                ns = Namespace(dataset)
                if query.source.check_filters(record):
                    entities = query.map(record)
                    for entity in entities.values():
                        if sign:
                            entity = ns.apply(entity)
                        write_object(outfile, entity)
    except BrokenPipeError:
        raise click.Abort()
예제 #30
0
파일: manage.py 프로젝트: djoffrey/aleph
def dump_entities(foreign_id, outfile):
    """Export FtM entities for the given collection."""
    collection = get_collection(foreign_id)
    for entity in iter_proxies(collection_id=collection.id, excludes=['text']):
        write_object(outfile, entity)