def produce_ttl(dataset, target_dir, gaf_path, ontology_graph): gafparser = GafParser() gafparser.config = assocparser.AssocParserConfig(ontology=ontology_graph) with open(gaf_path) as sg: lines = sum(1 for line in sg) ttl_path = os.path.join( os.path.split(gaf_path)[0], "{}_cam.ttl".format(dataset)) click.echo("Producing ttl: {}".format(ttl_path)) rdf_writer = assoc_rdfgen.TurtleRdfWriter() transformer = assoc_rdfgen.CamRdfTransform(writer=rdf_writer) parser_config = assocparser.AssocParserConfig(ontology=ontology_graph) with open(gaf_path) as gf: with click.progressbar( iterable=gafparser.association_generator(file=gf), length=lines) as associations: for association in associations: if "header" not in association or not association["header"]: transformer.provenance() transformer.translate(association) with open(ttl_path, "wb") as ttl: click.echo("Writing ttl to disk") rdf_writer.serialize(destination=ttl) return ttl_path
def produce_gpi(dataset, target_dir, gaf_path, ontology_graph): gafparser = GafParser() gafparser.config = assocparser.AssocParserConfig(ontology=ontology_graph) with open(gaf_path) as sg: lines = sum(1 for line in sg) gpi_path = os.path.join( os.path.split(gaf_path)[0], "{}.gpi".format(dataset)) with open(gaf_path) as gf, open(gpi_path, "w") as gpi: click.echo("Using {} as the gaf to build gpi with".format(gaf_path)) bridge = gafgpibridge.GafGpiBridge() gpiwriter = entitywriter.GpiWriter(file=gpi) gpi_cache = set() with click.progressbar( iterable=gafparser.association_generator(file=gf), length=lines) as associations: for association in associations: entity = bridge.convert_association(association) if entity not in gpi_cache and entity is not None: # If the entity is not in the cache, add it and write it out gpi_cache.add(entity) gpiwriter.write_entity(entity) return gpi_path
def produce_gaf(dataset, source_gaf, ontology_graph, gpipath=None, paint=False, group="unknown"): filtered_associations = open(os.path.join(os.path.split(source_gaf)[0], "{}_noiea.gaf".format(dataset)), "w") config = assocparser.AssocParserConfig( ontology=ontology_graph, filter_out_evidence=["IEA"], filtered_evidence_file=filtered_associations, gpi_authority_path=gpipath, paint=paint ) validated_gaf_path = os.path.join(os.path.split(source_gaf)[0], "{}_valid.gaf".format(dataset)) outfile = open(validated_gaf_path, "w") gafwriter = GafWriter(file=outfile) click.echo("Validating source GAF: {}".format(source_gaf)) parser = GafParser(config=config, group=group, dataset=dataset) with open(source_gaf) as sg: lines = sum(1 for line in sg) with open(source_gaf) as gaf: with click.progressbar(iterable=parser.association_generator(file=gaf), length=lines) as associations: for assoc in associations: gafwriter.write_assoc(assoc) outfile.close() filtered_associations.close() with open(os.path.join(os.path.split(source_gaf)[0], "{}.report.md".format(dataset)), "w") as report_md: report_md.write(parser.report.to_markdown()) with open(os.path.join(os.path.split(source_gaf)[0], "{}.report.json".format(dataset)), "w") as report_json: report_json.write(json.dumps(parser.report.to_report_json(), indent=4)) return [validated_gaf_path, filtered_associations.name]
def make_products(dataset, target_dir, gaf_path, products, ontology_graph): gafparser = GafParser() gafparser.config = assocparser.AssocParserConfig( ontology=ontology_graph, paint=True ) with open(gaf_path) as sg: lines = sum(1 for line in sg) product_files = { "gpad": open(os.path.join(os.path.split(gaf_path)[0], "{}.gpad".format(dataset)), "w"), "ttl": open(os.path.join(os.path.split(gaf_path)[0], "{}_cam.ttl".format(dataset)), "wb") } if not products["gpad"] and not products["ttl"]: # Bail if we have no products return [] # def write_gpi_entity(association, bridge, gpiwriter): with open(gaf_path) as gf: # gpi info: click.echo("Using {} as the gaf to build data products with".format(gaf_path)) if products["ttl"]: click.echo("Setting up {}".format(product_files["ttl"].name)) rdf_writer = assoc_rdfgen.TurtleRdfWriter(label=os.path.split(product_files["ttl"].name)[1] ) transformer = assoc_rdfgen.CamRdfTransform(writer=rdf_writer) parser_config = assocparser.AssocParserConfig(ontology=ontology_graph) if products["gpad"]: click.echo("Setting up {}".format(product_files["gpad"].name)) gpadwriter = GpadWriter(file=product_files["gpad"]) click.echo("Making products...") with click.progressbar(iterable=gafparser.association_generator(file=gf), length=lines) as associations: for association in associations: if products["ttl"]: if "header" not in association or not association["header"]: transformer.provenance() transformer.translate(association) if products["gpad"]: gpadwriter.write_assoc(association) # post ttl steps if products["ttl"]: click.echo("Writing ttl to disk") rdf_writer.serialize(destination=product_files["ttl"]) # After we run through associations for f in product_files.values(): f.close() return [product_files[prod].name for prod in sorted(product_files.keys()) if products[prod]]
def gafparser_generator(ontology_graph: ontol.Ontology, gaf_file): config = assocparser.AssocParserConfig(ontology=ontology_graph, ) parser = GafParser(config=config) return parser.association_generator(gaf_file, skipheader=True)