Python load_data Beispiele, compbio.utils.bsub_utils.load_data Python Beispiele

Beispiel #1

0

Datei anzeigen

def setFamData(rfid = None, ftype = None,**kwargs):

    assert rfid; assert ftype;
    fprefix = 'FA' if ftype == 'all' else 'RS'
    sdat = bsu.load_data('{1}_{0}'.format(rfid,fprefix), 'output')
    tdat = bsu.load_data('{1}_tree_{0}'.format(rfid,fprefix), 'output')

    return sdat, tdat

Beispiel #2

0

Datei anzeigen

Datei: run_mcmc.py Projekt: bh0085/compbio

def run_single(run_id):
    '''
Given an input dictionary containing a single paramater set
run mcmc in matlab using bs_macros.run_matlab.
'''
    input_dict = butils.load_data(run_id,'input')
    return bsm.runmat('run_mcmc', input_dict, run_id)

Beispiel #3

0

Datei anzeigen

Datei: scripts.py Projekt: bh0085/projects

def run(run_id):
	data = bsu.load_data(run_id, 'input')
	ofs = data['ofs']
	outputs = get_consensus(ofs, 
			       run_id = run_id,
			       reset = True)
	return(outputs)

Beispiel #4

0

Datei anzeigen

Datei: view_outputs.py Projekt: bh0085/compbio

def view2():
    files = [l for l in os.listdir(cfg.dataPath("batch/outputs")) if "mcmc" in l]
    ids = [l[0:10] for l in files]
    ids = ids[::10]

    inps = [butils.load_data(i, "input") for i in ids]
    outs = [butils.load_data(i, "output") for i in ids]

    # idxs_good = nonzero(greater([elt.get('improve_ratio') for elt in outs],, .2 )[0]
    idxs_good = range(len(outs))

    outs = [o for idx, o in enumerate(outs) if idx in idxs_good]
    inps = [i for idx, i in enumerate(inps) if idx in idxs_good]

    params = inps[0].keys()

    f = myplots.fignum(1, (8, 8))

    params = params

    for i, p in enumerate(params):
        ax = f.add_axes([0.05, i * (1.0 / len(params)), 0.9, 1.0 / len(params)], title=p)
        # ax.set_yticks([])
        # ax.set_xticks([])

        xvals = [elt.get(p) for elt in inps]
        if type(xvals[0]) == str:
            continue
        yvals = [elt.get("improve_ratio") for elt in outs]
        yvals2 = [elt.get("stay_same") for elt in outs]

        yvals += random.rand(*shape(yvals)) * (max(yvals) - min(yvals)) / 50
        yvals2 += random.rand(*shape(yvals)) * (max(yvals) - min(yvals)) / 50
        xvals += random.rand(*shape(xvals)) * (max(xvals) - min(xvals)) / 50
        ax.scatter(xvals, yvals)

        # ax.scatter(xvals , yvals + yvals2,   25, color = 'red')
        ax.annotate(p, [0, 0], xycoords="axes fraction", ha="left", va="bottom")

    f.savefig(cfg.dataPath("figs/soheil/broad_run0_psplits.ps"))
    raise Exception()

    return inps

Beispiel #5

0

Datei anzeigen

Datei: score_utils.py Projekt: bh0085/projects

def get_tree(rfid, fam_type='all'):
    if fam_type == 'riboswitch':
        rname = rfid
        si = 'FA_{0}'.format(rfid)
        ti = 'FA_tree_{0}'.format(rfid)

    elif fam_type == 'all':
        rname = rfid
        si = 'FA_{0}'.format(rfid)
        ti = 'FA_tree_{0}'.format(rfid)

    print 'Loading family for {0}'.format(rname)

    try:
        structs = bsu.load_data(si, 'output')
        trees = bsu.load_data(ti, 'output')
        print 'Success! Analyzing tree output'
    except Exception, e:
        print 'Failure! Did I make a booboo?'
        return None

Beispiel #6

0

Datei anzeigen

Datei: score_utils.py Projekt: bh0085/projects

def get_tree(rfid, fam_type = 'all'):
    if fam_type == 'riboswitch':
        rname = rfid
        si ='FA_{0}'.format(rfid) 
        ti = 'FA_tree_{0}'.format(rfid)

    elif fam_type == 'all':
        rname = rfid
        si = 'FA_{0}'.format(rfid)
        ti = 'FA_tree_{0}'.format(rfid) 


    print 'Loading family for {0}'.format(rname)

    try:
        structs = bsu.load_data(si, 'output')
        trees   = bsu.load_data(ti, 'output')
        print 'Success! Analyzing tree output'
    except Exception, e:
        print 'Failure! Did I make a booboo?'
        return None

Beispiel #7

0

Datei anzeigen

Datei: bsub_clusters.py Projekt: bh0085/projects

def test_bsubfun(run_id):
    """
A sample function to demonstrate the calling of a matlab script (here, 
ap_frompy) from within python. Taking an input dictionary and a run_id,
this script is designed to be called using the 'eyeball' class from 
utils/bsub.py.

inputs:
  input_dict: {similarities: a similarity matrix for the input points,
               self_similarity: a single value for the self similarity
                                of datapoints. Control cluster size.

outputs:
  outpt_dict: {indexes: cluster exemplar indices.}

"""
    input_dict = butils.load_data(run_id, "input")
    return bsm.runmat("ap_frompy", input_dict, run_id)

Beispiel #8

0

Datei anzeigen

def test_bsubfun(run_id):
    '''
A sample function to demonstrate the calling of a matlab script (here, 
ap_frompy) from within python. Taking an input dictionary and a run_id,
this script is designed to be called using the 'eyeball' class from 
utils/bsub.py.

inputs:
  input_dict: {similarities: a similarity matrix for the input points,
               self_similarity: a single value for the self similarity
                                of datapoints. Control cluster size.

outputs:
  outpt_dict: {indexes: cluster exemplar indices.}

'''
    input_dict = butils.load_data(run_id, 'input')
    return bsm.runmat('ap_frompy', input_dict, run_id)

Beispiel #9

0

Datei anzeigen

Datei: view_outputs.py Projekt: bh0085/compbio

def setModules(**kwargs):
    files = [l for l in os.listdir(cfg.dataPath("batch/tmp")) if "mcmc" in l]
    fpaths = [os.path.join(cfg.dataPath("batch/tmp"), f) for f in files]
    ids = [l[0:10] for l in files]
    inps = [butils.load_data(i, "input") for i in ids]

    modules = {}
    lin_modules = {}
    for fidx, f in enumerate(fpaths):
        print "Getting module info for: {0}".format(f)
        data = sio.loadmat(f)
        tfnames = [d[0][0] for d in data["tf_names"]]
        tgnames = [d[0][0] for d in data["gene_names"]]
        coefs = [d[0][0] for d in data["coefs_dic_nonlinear"]]
        inp = inps[fidx]

        term_list = [list(it.chain(*mod)) for mod in data["model"]]
        for j, terms in enumerate(term_list):
            if sum([len(t) for t in terms]) == 0:
                continue
            for k, t in enumerate(terms):
                mod = tuple([tfnames[i] for i in sorted(t - 1)])
                mod_d = modules.get(mod, dict(genes=[], coefs=[], fpaths=[], clust_fpaths=[]))
                mod_d["genes"].append(tgnames[j])
                mod_d["coefs"].append(coefs[j][k])
                mod_d["clust_fpaths"].append(inp["filename"])
                mod_d["fpaths"].append(f)
                modules[mod] = mod_d

        lin_coefs = [d[0][0] for d in data["coefs_dic_nonlinear"]]
        term_list = [list(it.chain(*mod)) for mod in data["model_linear"]]
        for j, terms in enumerate(term_list):
            if sum([len(t) for t in terms]) == 0:
                continue
            for k, t in enumerate(terms):
                mod = tuple([tfnames[i] for i in sorted(t - 1)])
                mod_d = lin_modules.get(mod, dict(genes=[], coefs=[], fpaths=[], clust_fpaths=[]))
                mod_d["genes"].append(tgnames[j])
                mod_d["coefs"].append(coefs[j][k])
                mod_d["fpaths"].append(f)
                mod_d["clust_fpaths"].append(inp["filename"])

                lin_modules[mod] = mod_d
    return modules, lin_modules

Beispiel #10

0

Datei anzeigen

def remote_make_tests(run_id):
    '''
the idea is that this function will queue up the batch jobs
and submit them with bsub. Using eyeball, it will then wait
until all jobs are done and when they are, export output back to
gliese.

inputs:
  run_id

output:
  the datapath (same for local and remote) of data output from 
  threads.
'''
    test = False
    if test:
        mirnaf = os.path.join(os.path.dirname(inspect.stack()[0][1]),
                              'miRNA.mat')
        mirna = sio.loadmat(mirnaf)
        expr = mirna['expression']
        e_norms = sum(expr**2, 1)
        cluster_dists = e_norms[:,newaxis] + e_norms[newaxis,:] \
            - 2 * dot(expr, expr.T)
        sims = -cluster_dists
        inp_dicts = []
        percentiles = logspace(-2, 1.99, 3)
        for p in percentiles:
            inp_dicts.append(
                dict(similarities=sims,
                     self_similarity=percentile(sims.flatten(), p)))
    else:
        inp_dicts = butils.load_data(run_id, 'input')

    eyeball = bsub.eyeball(run_id,
                           os.path.abspath(inspect.stack()[0][1]),
                           inp_dicts,
                           func='test_bsubfun',
                           name=run_id + '_test_',
                           mem=2)

    eyeball.launch()
    eyeball. await ()
    eyeball.package()
    eyeball.complete()

Beispiel #11

0

Datei anzeigen

Datei: bsub_clusters.py Projekt: bh0085/projects

def bic_clustering(run_id):
    """
A matlab/bsub process to compute the BIC maximal clustering for an
input dictionary containing a similarity matrix.

inputs:
  input_dict:  {similarities: a similarity matrix}

outputs:
  output_dict: {inds:cluster exemplar indices,      (MAX BIC)
                self_similarity:float, self similarity (MAX BIC)
                
                inds_[#]: (same as above, ALL BIC)
                self_similarity_[#}: (...)
                bic_[#]: (...)
                }
"""
    input_dict = butils.load_data(run_id, "input")
    return bsm.runmat("ap_max_bic", input_dict, run_id)

Beispiel #12

0

Datei anzeigen

def bic_clustering(run_id):
    '''
A matlab/bsub process to compute the BIC maximal clustering for an
input dictionary containing a similarity matrix.

inputs:
  input_dict:  {similarities: a similarity matrix}

outputs:
  output_dict: {inds:cluster exemplar indices,      (MAX BIC)
                self_similarity:float, self similarity (MAX BIC)
                
                inds_[#]: (same as above, ALL BIC)
                self_similarity_[#}: (...)
                bic_[#]: (...)
                }
'''
    input_dict = butils.load_data(run_id, 'input')
    return bsm.runmat('ap_max_bic', input_dict, run_id)

Beispiel #13

0

Datei anzeigen

Datei: view_outputs.py Projekt: bh0085/compbio

def fetch_genes():
    files = [l for l in os.listdir(cfg.dataPath("batch/tmp")) if "mcmc" in l]
    fpaths = [os.path.join(cfg.dataPath("batch/tmp"), f) for f in files]
    ids = [l[0:10] for l in files]

    inps = [butils.load_data(i, "input") for i in ids]
    l_info = {}
    for l, elt in enumerate(zip(fpaths, inps)):
        f, inp = elt
        if inp["out_iter_num"] == 2:
            continue
        print inp["filename"]
        clustname = re.search(re.compile("_([^_]+)\.mat"), inp["filename"]).group(1)
        l_info[l] = {}
        l_info[l]["cname"] = clustname
        l_info[l]["filename"] = inp["filename"]

        data = sio.loadmat(f)
        l_info[l]["stay_same"] = data["stay_same"]
        l_info[l]["improve_ratio"] = data["improve_ratio"]
        l_info[l]["error_test"] = data["error_test"]
        l_info[l]["error_test"] = data["error_test"]

Beispiel #14

0

Datei anzeigen

Datei: view_outputs.py Projekt: bh0085/compbio

def errors():
    files = [l for l in os.listdir(cfg.dataPath("batch/tmp")) if "mcmc" in l]
    fpaths = [os.path.join(cfg.dataPath("batch/tmp"), f) for f in files]
    ids = [l[0:10] for l in files]

    inps = [butils.load_data(i, "input") for i in ids]

    idxs_good = nonzero(greater([elt.get("out_iter_num") for elt in inps], -1))[0]
    inps = [inps[i] for i in idxs_good]
    fpaths = [fpaths[i] for i in idxs_good]

    errors, staysames, improves = [], [], []
    for l, elt in enumerate(zip(fpaths, inps)):
        f, inp = elt

        data = sio.loadmat(f)
        errors.append(data["error"])
        staysames.append(data["stay_same"])
        improves.append(data["improve_ratio"])
        gnames = data["gene_names"]

    return errors, staysames, improves, gnames

Beispiel #15

0

Datei anzeigen

Datei: bsub_clusters.py Projekt: bh0085/projects

def remote_make_tests(run_id):
    """
the idea is that this function will queue up the batch jobs
and submit them with bsub. Using eyeball, it will then wait
until all jobs are done and when they are, export output back to
gliese.

inputs:
  run_id

output:
  the datapath (same for local and remote) of data output from 
  threads.
"""
    test = False
    if test:
        mirnaf = os.path.join(os.path.dirname(inspect.stack()[0][1]), "miRNA.mat")
        mirna = sio.loadmat(mirnaf)
        expr = mirna["expression"]
        e_norms = sum(expr ** 2, 1)
        cluster_dists = e_norms[:, newaxis] + e_norms[newaxis, :] - 2 * dot(expr, expr.T)
        sims = -cluster_dists
        inp_dicts = []
        percentiles = logspace(-2, 1.99, 3)
        for p in percentiles:
            inp_dicts.append(dict(similarities=sims, self_similarity=percentile(sims.flatten(), p)))
    else:
        inp_dicts = butils.load_data(run_id, "input")

    eyeball = bsub.eyeball(
        run_id, os.path.abspath(inspect.stack()[0][1]), inp_dicts, func="test_bsubfun", name=run_id + "_test_", mem=2
    )

    eyeball.launch()
    eyeball.await()
    eyeball.package()
    eyeball.complete()

Beispiel #16

0

Datei anzeigen

def eval_seq_group(gap_seqs, rfid, run_id, inp_run_id, reset = True,
                   draw_alis = draw_all_easy,
                   clade_alignment_method = clade_alignment_method,
                   max_structs = 5):

    rutils = utils
    data = butils.load_data(inp_run_id, 'output')
    structs = data['structs']
    energies = data['energies']
    esrt = argsort(energies)[::-1]
    s_inds = esrt[:max_structs]
    structs, energies = [structs[i] for i in s_inds], [energies[i] for i in s_inds]

    refseq = data['seq']
    
    nq = len(gap_seqs)
    ns = len(structs)

    names = ['N{1:04}'.format(rfid, idx) for idx in range(nq)]
    seqs = [rutils.ungapped_seq(gap_seqs[i], names[i]) for i in range(nq)]
    


    profiles = mem.getOrSet(setProfiles, 
                            **mem.rc({},
                                     seq = refseq, structs = structs, run_id = rfid,
                                     reset = reset,
                                     on_fail = 'compute', 
                                     register = 'tuprof_{0}'.format(rfid)))
    
    if draw_alis: 
        draw_cm_muscle_congruencies(seqs, profiles, 
                                    run_id, reset = reset)
    

    if clade_alignment_method == 'cm':
        alis, refs, all_pairs  =\
            mem.getOrSet(setAlignments, 
                         **mem.rc({},
                                  seqs = seqs, profiles = profiles, 
                                  run_id = rfid, ali_type = 'struct',
                                  reset = reset,
                                  on_fail = 'compute', 
                                  register = 'tuali_struct_{0}'.format(rfid)))
    else:
        raise Exception('No methods besides cm are yet implemented')
    

    seq_group_data = {}
    seq_group_data['seqs'] = gap_seqs
    seq_group_data['structs'] = []
    for i, struct in enumerate(structs):
        struct_data = {}
        ali = alis[i]
        ref = refs[i]
        pairs = all_pairs[i]
        
        #NOTE THAT DUE TO AN AWKWARD SYNTAX DECISION,
        #I AM ALLOWING FOR THE POSSIBILITY THAT EACH
        #ALI ELT HAS DIFFERENT PAIRS.
        #
        #ALL OF MY ROUTINES SO FAR ONLY USE A SINGLE 
        #PAIR SET AND SO I USE PAIRS[0] EXCLUSIVELY
        struct_data.update(ref = ref[0], 
                           pairs = pairs[0],
                           ali = ali)
                        
        rid = '{0}_{1}'.format(run_id, i)

        if clade_tree_method ==  'bionj': 
            tree = phyml.tree(ali, run_id = rid, bionj = True)
        else: tree = get_phase_tree(ali, pairs[0], run_id)

        for i, ct in enumerate(tree.get_terminals()):
            seq = filter(lambda x: x.id == ct.name, ali)[0]
            ct.m = {'seq':seq,
                    'probs':array([1 for j in range(len(seq))])}

        if clade_ancestor_method == 'independent':
            ml_tree = get_ml_ancestor_tree(tree, ali, 
                                           '{0}_paml{1}'.format(run_id, i))
        else:
            ml_tree = get_structure_ancestor_tree(\
                tree, ali,'{0}_stree{1}'.format(run_id, i))
        
        muts, times, gaps, irresolvables = tree_conservation.count_struct(ml_tree, pairs[0])

        struct_data.update(muts = muts, times = times, 
                        gaps = gaps, irresolvables = irresolvables)
        seq_group_data['structs'].append(struct_data)

    return seq_group_data

Beispiel #17

0

Datei anzeigen

Datei: view_outputs.py Projekt: bh0085/compbio

def view3():

    files = [l for l in os.listdir(cfg.dataPath("batch/tmp")) if "mcmc" in l]
    fpaths = [os.path.join(cfg.dataPath("batch/tmp"), f) for f in files]
    ids = [l[0:10] for l in files]

    inps = [butils.load_data(i, "input") for i in ids]
    idxs_good = nonzero(greater([elt.get("out_iter_num") for elt in inps], 2))[0]
    inps = [inps[i] for i in idxs_good]
    fpaths = [fpaths[i] for i in idxs_good]

    fig = myplots.fignum(3, (35, 15))
    ax = fig.add_axes([0, 0, 1, 1])

    for f, inp in zip(fpaths, inps):
        if inp["out_iter_num"] == 2:
            continue
        print inp["filename"]

        data = sio.loadmat(f)

        import compbio.utils.colors as mycolors

        ct = mycolors.getct(len(data["gene_names"]))

        term_list = [list(it.chain(*mod)) for mod in data["model"]]
        fac_list = [list(it.chain(*t)) for t in term_list]

        xvals, yvals, colors, rads = [], [], [], []
        for i, terms in enumerate(term_list):
            for j, term in enumerate(terms):
                for k, fact in enumerate(term):
                    xvals.extend([i] * len(term))
                    yvals.extend([fact] * len(term))
                    colors.extend([ct[c] for c in sorted(term)])
                    rads.extend(((arange(1, len(term) + 1) ** 2) * 50)[::-1])

        vecs = zeros((len(fac_list), len(fac_list)))
        for i, fl in enumerate(fac_list):
            for f in fl:
                vecs[i, f] = 1

        # plt.imshow(vecs)

        # ax1 = fig.add_subplot(121)
        # ax2 = fig.add_subplot(122)
        import hcluster

        clusters = hcluster.fclusterdata(vecs, 1.1, criterion="inconsistent", method="complete")

        # ax1.imshow(vecs)
        # ax2.imshow(vecs[argsort(clusters)])

        # raise Exception()

        csrt = argsort(argsort(clusters))
        xvals2 = [csrt[x] for x in xvals]

        # raise Exception()
        plt.scatter(xvals2, yvals, rads, color=colors)
        raise Exception()

    raise Exception()

Beispiel #18

0

Datei anzeigen

Datei: score_utils.py Projekt: bh0085/projects

def ribo_struct_outfile(rfid):
    si = 'RS_{0}'.format(rfid)
    return bsu.load_data(si, 'output')

Beispiel #19

0

Datei anzeigen

Datei: score_utils.py Projekt: bh0085/projects

def check_trees(fam_type = 'riboswitch'):
    if fam_type == 'riboswitch':
        sdicts = rutils.switch_dicts()
        rfids = [ 'RF{0:05}'.format(n) for n in sdicts.values()]
        names = sdicts.keys()
        struct_ids = ['RS_{0}'.format(rfid) for rfid in rfids]
        tree_ids = ['RS_tree_{0}'.format(rfid) for rfid in rfids]
    elif fam_type == 'all':
        rfids = [ 'RF{0:05}'.format(n) for n in range(0,1493)]
        names = rfids
        struct_ids = ['FA_{0}'.format(rfid) for rfid in rfids]
        tree_ids = ['FA_tree_{0}'.format(rfid) for rfid in rfids]

    switch_muts = {}
    switch_times= {}
    switch_structs= {}

    for i, rname in enumerate(names):
        print 'Loading family for {0}'.format(rname)
        si, ti = zip(*[struct_ids, tree_ids])[i]
        try:
            structs = bsu.load_data(si, 'output')
            trees   = bsu.load_data(ti, 'output')
            print 'Success! Analyzing tree output'
        except Exception, e:
            print 'Failure! Did I make a booboo?'
            continue
        
            

        str_esrt = argsort(structs['energies'])[::-1]
        #SORT STRUCTURES IN DECREASING ORDER OF ENERGY (MATCH TREES)
        structs['structs'] = [structs['structs'][j] for j in str_esrt]
        structs['energies'] = [structs['energies'][j] for j in str_esrt]
        
        mc, tc, sc = {},{},{}
        sc['energies'] = structs['energies']
        sc['structs'] = structs['structs']
        sc['seq'] = structs['seq']
        for j, t in enumerate(trees):
            if t == None:continue
            mc[j] = {}
            tc[j] = {}
            for idx in range(len(t['structs'])):
                t_infos = t['structs']
                t_str = t_infos[idx]['pairs']
                s_str = structs['structs'][idx]
                e = structs['energies'][idx]
                
                t_times =t_infos[idx]['times']
                t_muts  =t_infos[idx]['muts']
                
                frac_resolved = t_times['total']  /\
                    (t_times['total_incl_unresolved'])
                frac_paired   = t_times['paired'] /\
                    (t_times['unpaired'] + t_times['paired'])

                n_2cons = t_muts['comp']
                n_1cons = t_muts['wob'] 
                n_0cons = t_muts['ucom']
                n_pluscons = t_muts['reco']
                n_nocons =   t_muts['bbad']

                frac_silent = (n_2cons+n_1cons+n_pluscons)/\
                    (n_0cons + n_nocons+\
                         n_2cons+n_1cons+n_pluscons)
                
                frac_double = (n_2cons)/\
                    (n_2cons+n_1cons)
                                          
                frac_destructive=(n_0cons)/\
                    (n_2cons+n_1cons+n_0cons)

                total_muts = (n_0cons + n_nocons+\
                                       n_2cons+n_1cons+n_pluscons)
                total_silent = (n_2cons+n_1cons)
                total_pair_mutants = (n_2cons+n_1cons+n_0cons)

                tc[j][idx] = dict(frac_resolved = frac_resolved,
                             frac_paired = frac_paired,
                             total_time = array([t_times['total_incl_unresolved']]*len(t_times['total'])),
                             total_time_res = t_times['total'])
                mc[j][idx] = dict(frac_silent = frac_silent,
                             frac_double = frac_double,
                             frac_destructive = frac_destructive,
                             total_muts = total_muts,
                             total_silent = total_silent,
                             total_pair_mutants = total_pair_mutants)

                
        print '''Done!
Results:
  {0} subtrees computed.

'''.format(len(mc))

        switch_muts[rname] = mc
        switch_times[rname] = tc
        switch_structs[rname] = sc

Beispiel #20

0

Datei anzeigen

def run(run_id):
    data = bsu.load_data(run_id, 'input')
    ofs = data['ofs']
    outputs = get_consensus(ofs, run_id=run_id, reset=True)
    return (outputs)

Beispiel #21

0

Datei anzeigen

Datei: score_utils.py Projekt: bh0085/projects

def ribo_struct_outfile(rfid):
    si = 'RS_{0}'.format(rfid)
    return bsu.load_data(si, 'output')

Beispiel #22

0

Datei anzeigen

Datei: view_outputs.py Projekt: bh0085/compbio

def view4():

    files = [l for l in os.listdir(cfg.dataPath("batch/tmp")) if "mcmc" in l]
    fpaths = [os.path.join(cfg.dataPath("batch/tmp"), f) for f in files]
    ids = [l[0:10] for l in files]
    inps = [butils.load_data(i, "input") for i in ids]

    idxs_good = nonzero(greater([elt.get("out_iter_num") for elt in inps], -1))[0]
    inps = [inps[i] for i in idxs_good]
    fpaths = [fpaths[i] for i in idxs_good]

    termgroups, cnames, xvals, gvals, yvals, colors, rads, tfs, all_coefs = [], [], [], [], [], [], [], [], []
    l_info = {}

    for l, elt in enumerate(zip(fpaths, inps)):
        f, inp = elt
        if inp["out_iter_num"] == 2:
            continue
        print inp["filename"]
        clustname = re.search(re.compile("_([^_]+)\.mat"), inp["filename"]).group(1)
        cnames.append(clustname)
        l_info[l] = {}
        l_info[l]["cname"] = clustname
        l_info[l]["filename"] = inp["filename"]

        data = sio.loadmat(f)
        l_info[l]["stay_same"] = data["stay_same"]
        l_info[l]["improve_ratio"] = data["improve_ratio"]
        l_info[l]["error_test"] = data["error_test"]

        import compbio.utils.colors as mycolors

        ct = mycolors.getct(len(data["gene_names"]))

        term_list = [list(it.chain(*mod)) for mod in data["model"]]
        fac_list = [list(it.chain(*t)) for t in term_list]

        seen = set()
        all_coefs.append(data["coefs_dic_nonlinear"])
        coefs = data["coefs_dic_nonlinear"]
        nlcof_all = open(
            cfg.dataPath("network/network_predmodel/regressionwts/nonlinear_all/nw_{0}.sif".format(l)), "w"
        )

        nlcof_sing = open(
            cfg.dataPath("network/network_predmodel/regressionwts/nonlinear_sing/nw_{0}.sif".format(l)), "w"
        )

        tfnames = data["tf_names"]
        tgnames = data["gene_names"]

        for i, terms in enumerate(term_list):
            if i in (5, 49, 53, 30, 17, 8, 38):
                if sum(terms) > 0:
                    raise Exception()
            terms = [t - 1 for t in terms]
            for j, term in enumerate(terms):
                if len(term) == 1:
                    wt = coefs[i][0][0][j]
                    nlcof_sing.write("{0}\t{1}\t{2}\n".format(tfnames[term][0][0], tgnames[i][0], wt))

                for k, fact in enumerate(list(set(term))):
                    wt = coefs[i][0][0][j]
                    nlcof_all.write("{0}\t{1}\t{2}\n".format(tfnames[fact][0][0], tgnames[i][0][0], wt))

                    gvals.append([i] * (len(term) + 1))
                    yvals.append([fact] * (len(term) + 1))
                    colors.append([ct[c] for c in sorted(term)] + [1, 1, 1])
                    tfs.append([c for c in sorted(term)])
                    rads.append(((arange(1, len(term) + 2) ** 2) * 50)[::-1])
                    xvals.append([l] * (len(term) + 1))

        nlcof_all.close()
        nlcof_sing.close()

    return cnames, xvals, gvals, yvals, colors, rads, l_info, tfs, coefs

Beispiel #23

0

Datei anzeigen

Datei: score_utils.py Projekt: bh0085/projects

def check_trees(fam_type='riboswitch'):
    if fam_type == 'riboswitch':
        sdicts = rutils.switch_dicts()
        rfids = ['RF{0:05}'.format(n) for n in sdicts.values()]
        names = sdicts.keys()
        struct_ids = ['RS_{0}'.format(rfid) for rfid in rfids]
        tree_ids = ['RS_tree_{0}'.format(rfid) for rfid in rfids]
    elif fam_type == 'all':
        rfids = ['RF{0:05}'.format(n) for n in range(0, 1493)]
        names = rfids
        struct_ids = ['FA_{0}'.format(rfid) for rfid in rfids]
        tree_ids = ['FA_tree_{0}'.format(rfid) for rfid in rfids]

    switch_muts = {}
    switch_times = {}
    switch_structs = {}

    for i, rname in enumerate(names):
        print 'Loading family for {0}'.format(rname)
        si, ti = zip(*[struct_ids, tree_ids])[i]
        try:
            structs = bsu.load_data(si, 'output')
            trees = bsu.load_data(ti, 'output')
            print 'Success! Analyzing tree output'
        except Exception, e:
            print 'Failure! Did I make a booboo?'
            continue

        str_esrt = argsort(structs['energies'])[::-1]
        #SORT STRUCTURES IN DECREASING ORDER OF ENERGY (MATCH TREES)
        structs['structs'] = [structs['structs'][j] for j in str_esrt]
        structs['energies'] = [structs['energies'][j] for j in str_esrt]

        mc, tc, sc = {}, {}, {}
        sc['energies'] = structs['energies']
        sc['structs'] = structs['structs']
        sc['seq'] = structs['seq']
        for j, t in enumerate(trees):
            if t == None: continue
            mc[j] = {}
            tc[j] = {}
            for idx in range(len(t['structs'])):
                t_infos = t['structs']
                t_str = t_infos[idx]['pairs']
                s_str = structs['structs'][idx]
                e = structs['energies'][idx]

                t_times = t_infos[idx]['times']
                t_muts = t_infos[idx]['muts']

                frac_resolved = t_times['total']  /\
                    (t_times['total_incl_unresolved'])
                frac_paired   = t_times['paired'] /\
                    (t_times['unpaired'] + t_times['paired'])

                n_2cons = t_muts['comp']
                n_1cons = t_muts['wob']
                n_0cons = t_muts['ucom']
                n_pluscons = t_muts['reco']
                n_nocons = t_muts['bbad']

                frac_silent = (n_2cons+n_1cons+n_pluscons)/\
                    (n_0cons + n_nocons+\
                         n_2cons+n_1cons+n_pluscons)

                frac_double = (n_2cons)/\
                    (n_2cons+n_1cons)

                frac_destructive=(n_0cons)/\
                    (n_2cons+n_1cons+n_0cons)

                total_muts = (n_0cons + n_nocons+\
                                       n_2cons+n_1cons+n_pluscons)
                total_silent = (n_2cons + n_1cons)
                total_pair_mutants = (n_2cons + n_1cons + n_0cons)

                tc[j][idx] = dict(
                    frac_resolved=frac_resolved,
                    frac_paired=frac_paired,
                    total_time=array([t_times['total_incl_unresolved']] *
                                     len(t_times['total'])),
                    total_time_res=t_times['total'])
                mc[j][idx] = dict(frac_silent=frac_silent,
                                  frac_double=frac_double,
                                  frac_destructive=frac_destructive,
                                  total_muts=total_muts,
                                  total_silent=total_silent,
                                  total_pair_mutants=total_pair_mutants)

        print '''Done!
Results:
  {0} subtrees computed.

'''.format(len(mc))

        switch_muts[rname] = mc
        switch_times[rname] = tc
        switch_structs[rname] = sc