def process_vivo(graph_filename, vclass, format): g = Graph() g.parse(graph_filename, format=format) rq = """ SELECT DISTINCT ?org ?label WHERE { ?org a VCLASS . ?org rdfs:label ?label . } """.replace('VCLASS', '<{}>'.format(vclass)) out = defaultdict(list) for org, label in g.query(rq): nl = text_normalize(label.toPython()) out[nl].append(org.toPython()) return out
def process_orgref(fname): out = defaultdict(list) for count, row in enumerate(read_oref_csv(fname)): clean_name = text_normalize(row.Name) out[clean_name].append(row.ID) return out