def __init__(self, inputFileName, namespace='molecular_function'): """Constructor, reads and parses the ontology OBO file.""" debug("Reading ontology file %s... " % inputFileName) self.root = None self.namespace = namespace ontology = defaultdict(lambda: defaultdict(list)) self.inputFileName = Path(inputFileName) with self.inputFileName.open() as go: terms = groupby(go.read().splitlines(), lambda x: x != '') for b, term in terms: term = list(term) if not b or term[0] != '[Term]': continue nonlists = ('id', 'def', 'name', 'namespace', 'is_obsolete') # Do some deciphering here... term = defaultdict( list, [(a, [y[1] for y in b][0 if a in nonlists else slice(None)]) for a, b in groupby([x.split(': ', 1) for x in term[1:]], lambda x: x[0])]) # Filter terms by namespace, discard obsolete terms if term['namespace'] != namespace or term[ 'is_obsolete'] == 'true': continue # Decide root node if term['name'] == namespace: assert self.root is None self.root = term['id'] # Save the term to ontology ontology[term['id']]['name'] = term['name'].replace( '_', ' ') # FIXME KDYBY BLBLO, ODEBRAT replace for ref in term['is_a']: refid, refname = ref.split(' ! ') ontology[refid]['children'].append(term['id']) ontology[term['id']]['parents'].append(refid) # This is used by Bayes nets ontology[term['id']]['node'] = defaultdict( dict) # fold : clfName : node ontology[term['id']]['clf'] = defaultdict( dict) # fold : clfName : Classifier self.ontology = {**ontology} self.associations = None self.geneFactory = GeneFactory() debug("Initialized ontology for file %s... " % inputFileName)