def produce_ttl(dataset, target_dir, gaf_path, ontology_graph): gafparser = GafParser() gafparser.config = assocparser.AssocParserConfig(ontology=ontology_graph) with open(gaf_path) as sg: lines = sum(1 for line in sg) ttl_path = os.path.join( os.path.split(gaf_path)[0], "{}_cam.ttl".format(dataset)) click.echo("Producing ttl: {}".format(ttl_path)) rdf_writer = assoc_rdfgen.TurtleRdfWriter() transformer = assoc_rdfgen.CamRdfTransform(writer=rdf_writer) parser_config = assocparser.AssocParserConfig(ontology=ontology_graph) with open(gaf_path) as gf: with click.progressbar( iterable=gafparser.association_generator(file=gf), length=lines) as associations: for association in associations: if "header" not in association or not association["header"]: transformer.provenance() transformer.translate(association) with open(ttl_path, "wb") as ttl: click.echo("Writing ttl to disk") rdf_writer.serialize(destination=ttl) return ttl_path
def produce_gpi(dataset, target_dir, gaf_path, ontology_graph): gafparser = GafParser() gafparser.config = assocparser.AssocParserConfig(ontology=ontology_graph) with open(gaf_path) as sg: lines = sum(1 for line in sg) gpi_path = os.path.join( os.path.split(gaf_path)[0], "{}.gpi".format(dataset)) with open(gaf_path) as gf, open(gpi_path, "w") as gpi: click.echo("Using {} as the gaf to build gpi with".format(gaf_path)) bridge = gafgpibridge.GafGpiBridge() gpiwriter = entitywriter.GpiWriter(file=gpi) gpi_cache = set() with click.progressbar( iterable=gafparser.association_generator(file=gf), length=lines) as associations: for association in associations: entity = bridge.convert_association(association) if entity not in gpi_cache and entity is not None: # If the entity is not in the cache, add it and write it out gpi_cache.add(entity) gpiwriter.write_entity(entity) return gpi_path
def make_products(dataset, target_dir, gaf_path, products, ontology_graph): gafparser = GafParser() gafparser.config = assocparser.AssocParserConfig( ontology=ontology_graph, paint=True ) with open(gaf_path) as sg: lines = sum(1 for line in sg) product_files = { "gpad": open(os.path.join(os.path.split(gaf_path)[0], "{}.gpad".format(dataset)), "w"), "ttl": open(os.path.join(os.path.split(gaf_path)[0], "{}_cam.ttl".format(dataset)), "wb") } if not products["gpad"] and not products["ttl"]: # Bail if we have no products return [] # def write_gpi_entity(association, bridge, gpiwriter): with open(gaf_path) as gf: # gpi info: click.echo("Using {} as the gaf to build data products with".format(gaf_path)) if products["ttl"]: click.echo("Setting up {}".format(product_files["ttl"].name)) rdf_writer = assoc_rdfgen.TurtleRdfWriter(label=os.path.split(product_files["ttl"].name)[1] ) transformer = assoc_rdfgen.CamRdfTransform(writer=rdf_writer) parser_config = assocparser.AssocParserConfig(ontology=ontology_graph) if products["gpad"]: click.echo("Setting up {}".format(product_files["gpad"].name)) gpadwriter = GpadWriter(file=product_files["gpad"]) click.echo("Making products...") with click.progressbar(iterable=gafparser.association_generator(file=gf), length=lines) as associations: for association in associations: if products["ttl"]: if "header" not in association or not association["header"]: transformer.provenance() transformer.translate(association) if products["gpad"]: gpadwriter.write_assoc(association) # post ttl steps if products["ttl"]: click.echo("Writing ttl to disk") rdf_writer.serialize(destination=product_files["ttl"]) # After we run through associations for f in product_files.values(): f.close() return [product_files[prod].name for prod in sorted(product_files.keys()) if products[prod]]
def main(): """ Wrapper for Assoc Parsing """ parser = argparse.ArgumentParser( description='Wrapper for obographs assocmodel library' """ By default, ontologies and assocs are cached locally and synced from a remote sparql endpoint """, formatter_class=argparse.RawTextHelpFormatter) parser.add_argument('-r', '--resource', type=str, required=False, help='Name of ontology') parser.add_argument( '-f', '--file', type=str, required=False, help='Name of input file for associations - currently GAF is assumed') parser.add_argument('-F', '--format', type=str, required=False, help='Format of assoc file. One of GAF, GPAD or HPOA') parser.add_argument('-o', '--outfile', type=str, required=False, help='Path to output file') parser.add_argument('-m', '--messagefile', type=str, required=False, help='Path to messages (report) markdown file') parser.add_argument('-t', '--to', type=str, required=False, help='Output to (tree, dot, ...)') parser.add_argument( "--filter-out", nargs="+", required=False, default=[], metavar="EVIDENCE", help= "List of any evidence codes to filter out of the GAF. E.G. --filter-out IEA IMP" ) parser.add_argument("--filtered-file", required=False, default=None, metavar="FILTERED_FILE", help="File to write the filtered out evidence GAF to") parser.add_argument( '-T', '--taxon', nargs='*', required=False, help='valid taxon (NCBITaxon ID) - validate against this') parser.add_argument('--subject_prefix', nargs='*', required=False, help='E.g PomBase - validate against this') parser.add_argument('--object_prefix', nargs='*', required=False, help='E.g GO - validate against this') parser.add_argument('-v', '--verbosity', default=0, action='count', help='Increase output verbosity') subparsers = parser.add_subparsers(dest='subcommand', help='sub-command help') parser_n = subparsers.add_parser('validate', help='Validate associations') parser_n.set_defaults(function=validate_assocs) parser_n = subparsers.add_parser('filter', help='Filter associations') parser_n.set_defaults(function=filter_assocs) parser_n = subparsers.add_parser('convert', help='Convert associations') parser_n.set_defaults(function=convert_assocs) parser_n.add_argument('-t', '--to', type=str, required=True, help='Format to convert to') parser_n = subparsers.add_parser('map2slim', help='Map to a subset/slim') parser_n.set_defaults(function=map2slim) parser_n.add_argument('-p', '--properties', nargs='*', type=str, required=False, help='Properties') parser_n.add_argument('-s', '--subset', type=str, required=True, help='subset (e.g. map2slim)') args = parser.parse_args() if args.verbosity >= 2: logging.basicConfig(level=logging.DEBUG) elif args.verbosity == 1: logging.basicConfig(level=logging.INFO) else: logging.basicConfig(level=logging.WARNING) logging.info("Welcome!") handle = args.resource # Ontology Factory ofactory = OntologyFactory() logging.info("Creating ont object from: {} {}".format(handle, ofactory)) ont = ofactory.create(handle) logging.info("ont: {}".format(ont)) func = args.function # Upper case all evidence codes args.filter_out = [code.upper() for code in args.filter_out] # set configuration filtered_evidence_file = open(args.filtered_file, "w") if args.filtered_file else None config = assocparser.AssocParserConfig( valid_taxa=args.taxon, ontology=ont, class_idspaces=args.object_prefix, entity_idspaces=args.subject_prefix, filter_out_evidence=args.filter_out, filtered_evidence_file=filtered_evidence_file) p = None fmt = None if args.format is None: fmt = 'gaf' else: fmt = args.format.lower() # TODO: use a factory if fmt == 'gaf': p = GafParser() elif fmt == 'gpad': p = GpadParser() elif fmt == 'hpoa': p = HpoaParser() elif fmt == "gpi": p = entityparser.GpiParser() func = validate_entity p.config = config outfh = None if args.outfile is not None: two_mb = 2097152 outfh = open(args.outfile, "w", buffering=two_mb) func(ont, args.file, outfh, p, args) if filtered_evidence_file: filtered_evidence_file.close() if outfh is not None: outfh.close() if args.messagefile is not None: mfh = open(args.messagefile, "w") mfh.write(p.report.to_markdown()) mfh.close() else: print(p.report.to_markdown())