def produce_gpi(dataset, target_dir, gaf_path, ontology_graph): gafparser = GafParser() gafparser.config = assocparser.AssocParserConfig(ontology=ontology_graph) with open(gaf_path) as sg: lines = sum(1 for line in sg) gpi_path = os.path.join( os.path.split(gaf_path)[0], "{}.gpi".format(dataset)) with open(gaf_path) as gf, open(gpi_path, "w") as gpi: click.echo("Using {} as the gaf to build gpi with".format(gaf_path)) bridge = gafgpibridge.GafGpiBridge() gpiwriter = entitywriter.GpiWriter(file=gpi) gpi_cache = set() with click.progressbar( iterable=gafparser.association_generator(file=gf), length=lines) as associations: for association in associations: entity = bridge.convert_association(association) if entity not in gpi_cache and entity is not None: # If the entity is not in the cache, add it and write it out gpi_cache.add(entity) gpiwriter.write_entity(entity) return gpi_path
def test_gaf_gpi_bridge(): gaf = ["MGI", "MGI:1923503", "0610006L08Rik", "enables", "GO:0003674", "MGI:MGI:2156816|GO_REF:0000015", "ND", "", "F", "RIKEN cDNA 0610006L08 gene", "", "gene", "taxon:10090", "20120430", "MGI", "", ""] association = gafparser.to_association(gaf, qualifier_parser=assocparser.Qualifier2_2()).associations[0] bridge = gafgpibridge.GafGpiBridge() entity = bridge.convert_association(association) assert entity.get("type") == ["gene"]