def inverseAssoc(self, gene): """Returns terms associated with gene""" gene = Gene.canonicalName(gene) for term in self.associations: if gene in self[term]: yield term
def fromFile(cls, inputFileName, taxons=None, dataset=None): """Decides file type and reads relevant data.""" debug("Reading gene associations file %s...%s" % (inputFileName, ("" if dataset is None else " Dataset size is %d." % len(dataset)))) #open = gzip.open if inputFileName.endswith(".gz") else __builtins__.open if inputFileName.endswith('.pickle') or inputFileName.endswith( '.pickle_reserved'): # Serialized data = much faster with open(inputFileName, 'rb') as f: associations, alltaxons = pickle.load(f) else: associations = defaultdict(set) alltaxons = set() with open(inputFileName, 'rb') as associationFile: for line in associationFile.read().decode('utf8').splitlines(): if line.startswith('!'): continue line = line.split('\t') taxon = {int(x.split(':')[1]) for x in line[12].split('|')} alltaxons.update(taxon) gene = Gene.canonicalName(line[2]) term = line[4] if (taxons is None or taxons.intersection(taxon)) and \ (dataset is None or gene in dataset): associations[term].add(gene) debug("Finished reading gene associations file %s... " % inputFileName) #if dataset is not None: # d = dataset.difference(allgenes) # if d: # debug("Missing genes: %s!!!" % ", ".join(d)) return cls(associations, alltaxons, dataset)
def fromFile(cls, inputFileName, taxons = None, dataset = None): """Decides file type and reads relevant data.""" debug("Reading gene associations file %s...%s" % (inputFileName, ("" if dataset is None else " Dataset size is %d." % len(dataset)))) #open = gzip.open if inputFileName.endswith(".gz") else __builtins__.open if inputFileName.endswith('.pickle') or inputFileName.endswith('.pickle_reserved'): # Serialized data = much faster with open(inputFileName, 'rb') as f: associations, alltaxons = pickle.load(f) else: associations = defaultdict(set) alltaxons = set() with open(inputFileName, 'rb') as associationFile: for line in associationFile.read().decode('utf8').splitlines(): if line.startswith('!'): continue line = line.split('\t') taxon = {int(x.split(':')[1]) for x in line[12].split('|')} alltaxons.update(taxon) gene = Gene.canonicalName(line[2]) term = line[4] if (taxons is None or taxons.intersection(taxon)) and \ (dataset is None or gene in dataset): associations[term].add(gene) debug("Finished reading gene associations file %s... " % inputFileName) #if dataset is not None: # d = dataset.difference(allgenes) # if d: # debug("Missing genes: %s!!!" % ", ".join(d)) return cls(associations, alltaxons, dataset)