Beispiel #1
0
def getKMERsForName(libname = 'mouse',tissue_term = None, **kwargs):
    '''
Calls "parse" on the fasta file referred to as libname.

Optionally, specify a tissue term that will serve as a regex filter on the 
FASTA record descriptors. Specifying a term such as "brain" will do
a case insensitive search on records in the library to return only kmers
from records matching "term".
'''
    def setKMERsForName(**kwargs):
        lname = kwargs['libname']
        return parse( **kwargs)

    
    name = libname if tissue_term ==None \
        else '{0}_tissue={1}'.format(libname,tissue_term) 
    
    if kwargs.has_key('restored'):
        output = kwargs['restored']
        mem.getOrSet(setKMERsForName, 
                        **mem.rc(kwargs,
                                 libname = libname,
                                 tissue_term = tissue_term,
                                 name = name,
                                 update = output))
    else:
        return mem.getOrSet(setKMERsForName, 
                        **mem.rc(kwargs,
                                 libname = libname,
                                 tissue_term = tissue_term,
                                 name = name))
Beispiel #2
0
def getTrackChrGenes(**kwargs):
    '''
Get all of the genes from a bed file
on a given chromosome.

kwargs
num:   chromosome number 
fname: bedfile path (uses global bedfile as the default)

returns
a list of attributes for every gene.
'''
    def setTrackChrGenes(**kwargs):
        fname = kwargs.get('fname', bedfile)
        num = kwargs.get('num', 1)
        t = track.load(fname);
        chromosome_data = t.read('chr{0}'.format(num))
        rows = [dict(zip(r.keys(),r.data)) for r in iter(chromosome_data)]
        return rows
    
    return mem.getOrSet(setTrackChrGenes,
                        **mem.rc( kwargs,
                                  onfail = 'compute',
                                  name = '{0}_{1}'.format(kwargs.get('fname',os.path.basename(bedfile)),
                                                          kwargs.get('num', 1))
                                  ))
Beispiel #3
0
def getTranslatedForName(libname, **kwargs):
    '''Translate kMERs to a numerical array for downstream analysis.'''
    def setTranslatedForName(**kwargs):
        global k
        global translation
        libname = kwargs.get('libname')
        o = getKMERsForName( **mem.sr(kwargs, libname = libname))
        translated = zeros((len(o),k))
        idxed_mers = dict([(i,k) for i,k in enumerate(o.keys())])
        occurrences=array([ o[idxed_mers[i]] for i in range(len(translated))])
        d = translation
        for i in idxed_mers.keys():
            translated[i] = [d.get(l,4) for  l in idxed_mers[i]]
        return idxed_mers,translated, occurrences
    tissue_term = kwargs.get('tissue_term', None)
    name = libname if tissue_term ==None \
        else '{0}_tissue={1}'.format(libname,tissue_term) 

    return mem.getOrSet(setTranslatedForName, 
                        **mem.rc(kwargs,
                                 libname = libname,
                                 name = name))
Beispiel #4
0
def getBandCollectionAliases(**kwargs):
    def setBandCollectionAliases(name = None, **kwargs):
        assert name != None
        all_aliases = []
	freebase = discovery.build('freebase', 'v1', developerKey=DEVELOPER_KEY)
        names_key = name
        names_list = band_collectionnames[names_key]
        for n in names_list:
            q = [{
                    "name~=":"{0}".format(n),
                    "type":  "/music/musical_group",

                    "/common/topic/alias": [{
                            "value": None
                            }],
                    "/music/musical_group/member": [{
                            "member": {
                                "/common/topic/alias": [{
                                        "value": None
                                        }]
                                }
                            }],
                    }]
            responses = json.loads(freebase.mqlread(query=json.dumps(q)).execute())
            for band in responses['result']:
                member_aliases = [ a['value']  for e in band["/music/musical_group/member"] for a in e['member']["/common/topic/alias"]]
                band_aliases = [a['value'] for a in band["/common/topic/alias"] ]
                all_aliases.extend(member_aliases)
                all_aliases.extend(band_aliases)

        return all_aliases

        
    name = kwargs['name']
    return mem.getOrSet(setBandCollectionAliases, **mem.rc(kwargs,
                                                           name = name))