def getKMERsForName(libname = 'mouse',tissue_term = None, **kwargs): ''' Calls "parse" on the fasta file referred to as libname. Optionally, specify a tissue term that will serve as a regex filter on the FASTA record descriptors. Specifying a term such as "brain" will do a case insensitive search on records in the library to return only kmers from records matching "term". ''' def setKMERsForName(**kwargs): lname = kwargs['libname'] return parse( **kwargs) name = libname if tissue_term ==None \ else '{0}_tissue={1}'.format(libname,tissue_term) if kwargs.has_key('restored'): output = kwargs['restored'] mem.getOrSet(setKMERsForName, **mem.rc(kwargs, libname = libname, tissue_term = tissue_term, name = name, update = output)) else: return mem.getOrSet(setKMERsForName, **mem.rc(kwargs, libname = libname, tissue_term = tissue_term, name = name))
def getTrackChrGenes(**kwargs): ''' Get all of the genes from a bed file on a given chromosome. kwargs num: chromosome number fname: bedfile path (uses global bedfile as the default) returns a list of attributes for every gene. ''' def setTrackChrGenes(**kwargs): fname = kwargs.get('fname', bedfile) num = kwargs.get('num', 1) t = track.load(fname); chromosome_data = t.read('chr{0}'.format(num)) rows = [dict(zip(r.keys(),r.data)) for r in iter(chromosome_data)] return rows return mem.getOrSet(setTrackChrGenes, **mem.rc( kwargs, onfail = 'compute', name = '{0}_{1}'.format(kwargs.get('fname',os.path.basename(bedfile)), kwargs.get('num', 1)) ))
def getTranslatedForName(libname, **kwargs): '''Translate kMERs to a numerical array for downstream analysis.''' def setTranslatedForName(**kwargs): global k global translation libname = kwargs.get('libname') o = getKMERsForName( **mem.sr(kwargs, libname = libname)) translated = zeros((len(o),k)) idxed_mers = dict([(i,k) for i,k in enumerate(o.keys())]) occurrences=array([ o[idxed_mers[i]] for i in range(len(translated))]) d = translation for i in idxed_mers.keys(): translated[i] = [d.get(l,4) for l in idxed_mers[i]] return idxed_mers,translated, occurrences tissue_term = kwargs.get('tissue_term', None) name = libname if tissue_term ==None \ else '{0}_tissue={1}'.format(libname,tissue_term) return mem.getOrSet(setTranslatedForName, **mem.rc(kwargs, libname = libname, name = name))
def getBandCollectionAliases(**kwargs): def setBandCollectionAliases(name = None, **kwargs): assert name != None all_aliases = [] freebase = discovery.build('freebase', 'v1', developerKey=DEVELOPER_KEY) names_key = name names_list = band_collectionnames[names_key] for n in names_list: q = [{ "name~=":"{0}".format(n), "type": "/music/musical_group", "/common/topic/alias": [{ "value": None }], "/music/musical_group/member": [{ "member": { "/common/topic/alias": [{ "value": None }] } }], }] responses = json.loads(freebase.mqlread(query=json.dumps(q)).execute()) for band in responses['result']: member_aliases = [ a['value'] for e in band["/music/musical_group/member"] for a in e['member']["/common/topic/alias"]] band_aliases = [a['value'] for a in band["/common/topic/alias"] ] all_aliases.extend(member_aliases) all_aliases.extend(band_aliases) return all_aliases name = kwargs['name'] return mem.getOrSet(setBandCollectionAliases, **mem.rc(kwargs, name = name))