Beispiel #1
0
def setFamData(rfid = None, ftype = None,**kwargs):

    assert rfid; assert ftype;
    fprefix = 'FA' if ftype == 'all' else 'RS'
    sdat = bsu.load_data('{1}_{0}'.format(rfid,fprefix), 'output')
    tdat = bsu.load_data('{1}_tree_{0}'.format(rfid,fprefix), 'output')

    return sdat, tdat
Beispiel #2
0
def run_single(run_id):
    '''
Given an input dictionary containing a single paramater set
run mcmc in matlab using bs_macros.run_matlab.
'''
    input_dict = butils.load_data(run_id,'input')
    return bsm.runmat('run_mcmc', input_dict, run_id)
Beispiel #3
0
def run(run_id):
	data = bsu.load_data(run_id, 'input')
	ofs = data['ofs']
	outputs = get_consensus(ofs, 
			       run_id = run_id,
			       reset = True)
	return(outputs)
Beispiel #4
0
def view2():
    files = [l for l in os.listdir(cfg.dataPath("batch/outputs")) if "mcmc" in l]
    ids = [l[0:10] for l in files]
    ids = ids[::10]

    inps = [butils.load_data(i, "input") for i in ids]
    outs = [butils.load_data(i, "output") for i in ids]

    # idxs_good = nonzero(greater([elt.get('improve_ratio') for elt in outs],, .2 )[0]
    idxs_good = range(len(outs))

    outs = [o for idx, o in enumerate(outs) if idx in idxs_good]
    inps = [i for idx, i in enumerate(inps) if idx in idxs_good]

    params = inps[0].keys()

    f = myplots.fignum(1, (8, 8))

    params = params

    for i, p in enumerate(params):
        ax = f.add_axes([0.05, i * (1.0 / len(params)), 0.9, 1.0 / len(params)], title=p)
        # ax.set_yticks([])
        # ax.set_xticks([])

        xvals = [elt.get(p) for elt in inps]
        if type(xvals[0]) == str:
            continue
        yvals = [elt.get("improve_ratio") for elt in outs]
        yvals2 = [elt.get("stay_same") for elt in outs]

        yvals += random.rand(*shape(yvals)) * (max(yvals) - min(yvals)) / 50
        yvals2 += random.rand(*shape(yvals)) * (max(yvals) - min(yvals)) / 50
        xvals += random.rand(*shape(xvals)) * (max(xvals) - min(xvals)) / 50
        ax.scatter(xvals, yvals)

        # ax.scatter(xvals , yvals + yvals2,   25, color = 'red')
        ax.annotate(p, [0, 0], xycoords="axes fraction", ha="left", va="bottom")

    f.savefig(cfg.dataPath("figs/soheil/broad_run0_psplits.ps"))
    raise Exception()

    return inps
Beispiel #5
0
def get_tree(rfid, fam_type='all'):
    if fam_type == 'riboswitch':
        rname = rfid
        si = 'FA_{0}'.format(rfid)
        ti = 'FA_tree_{0}'.format(rfid)

    elif fam_type == 'all':
        rname = rfid
        si = 'FA_{0}'.format(rfid)
        ti = 'FA_tree_{0}'.format(rfid)

    print 'Loading family for {0}'.format(rname)

    try:
        structs = bsu.load_data(si, 'output')
        trees = bsu.load_data(ti, 'output')
        print 'Success! Analyzing tree output'
    except Exception, e:
        print 'Failure! Did I make a booboo?'
        return None
Beispiel #6
0
def get_tree(rfid, fam_type = 'all'):
    if fam_type == 'riboswitch':
        rname = rfid
        si ='FA_{0}'.format(rfid) 
        ti = 'FA_tree_{0}'.format(rfid)

    elif fam_type == 'all':
        rname = rfid
        si = 'FA_{0}'.format(rfid)
        ti = 'FA_tree_{0}'.format(rfid) 


    print 'Loading family for {0}'.format(rname)

    try:
        structs = bsu.load_data(si, 'output')
        trees   = bsu.load_data(ti, 'output')
        print 'Success! Analyzing tree output'
    except Exception, e:
        print 'Failure! Did I make a booboo?'
        return None
Beispiel #7
0
def test_bsubfun(run_id):
    """
A sample function to demonstrate the calling of a matlab script (here, 
ap_frompy) from within python. Taking an input dictionary and a run_id,
this script is designed to be called using the 'eyeball' class from 
utils/bsub.py.

inputs:
  input_dict: {similarities: a similarity matrix for the input points,
               self_similarity: a single value for the self similarity
                                of datapoints. Control cluster size.

outputs:
  outpt_dict: {indexes: cluster exemplar indices.}

"""
    input_dict = butils.load_data(run_id, "input")
    return bsm.runmat("ap_frompy", input_dict, run_id)
Beispiel #8
0
def test_bsubfun(run_id):
    '''
A sample function to demonstrate the calling of a matlab script (here, 
ap_frompy) from within python. Taking an input dictionary and a run_id,
this script is designed to be called using the 'eyeball' class from 
utils/bsub.py.

inputs:
  input_dict: {similarities: a similarity matrix for the input points,
               self_similarity: a single value for the self similarity
                                of datapoints. Control cluster size.

outputs:
  outpt_dict: {indexes: cluster exemplar indices.}

'''
    input_dict = butils.load_data(run_id, 'input')
    return bsm.runmat('ap_frompy', input_dict, run_id)
Beispiel #9
0
def setModules(**kwargs):
    files = [l for l in os.listdir(cfg.dataPath("batch/tmp")) if "mcmc" in l]
    fpaths = [os.path.join(cfg.dataPath("batch/tmp"), f) for f in files]
    ids = [l[0:10] for l in files]
    inps = [butils.load_data(i, "input") for i in ids]

    modules = {}
    lin_modules = {}
    for fidx, f in enumerate(fpaths):
        print "Getting module info for: {0}".format(f)
        data = sio.loadmat(f)
        tfnames = [d[0][0] for d in data["tf_names"]]
        tgnames = [d[0][0] for d in data["gene_names"]]
        coefs = [d[0][0] for d in data["coefs_dic_nonlinear"]]
        inp = inps[fidx]

        term_list = [list(it.chain(*mod)) for mod in data["model"]]
        for j, terms in enumerate(term_list):
            if sum([len(t) for t in terms]) == 0:
                continue
            for k, t in enumerate(terms):
                mod = tuple([tfnames[i] for i in sorted(t - 1)])
                mod_d = modules.get(mod, dict(genes=[], coefs=[], fpaths=[], clust_fpaths=[]))
                mod_d["genes"].append(tgnames[j])
                mod_d["coefs"].append(coefs[j][k])
                mod_d["clust_fpaths"].append(inp["filename"])
                mod_d["fpaths"].append(f)
                modules[mod] = mod_d

        lin_coefs = [d[0][0] for d in data["coefs_dic_nonlinear"]]
        term_list = [list(it.chain(*mod)) for mod in data["model_linear"]]
        for j, terms in enumerate(term_list):
            if sum([len(t) for t in terms]) == 0:
                continue
            for k, t in enumerate(terms):
                mod = tuple([tfnames[i] for i in sorted(t - 1)])
                mod_d = lin_modules.get(mod, dict(genes=[], coefs=[], fpaths=[], clust_fpaths=[]))
                mod_d["genes"].append(tgnames[j])
                mod_d["coefs"].append(coefs[j][k])
                mod_d["fpaths"].append(f)
                mod_d["clust_fpaths"].append(inp["filename"])

                lin_modules[mod] = mod_d
    return modules, lin_modules
Beispiel #10
0
def remote_make_tests(run_id):
    '''
the idea is that this function will queue up the batch jobs
and submit them with bsub. Using eyeball, it will then wait
until all jobs are done and when they are, export output back to
gliese.

inputs:
  run_id

output:
  the datapath (same for local and remote) of data output from 
  threads.
'''
    test = False
    if test:
        mirnaf = os.path.join(os.path.dirname(inspect.stack()[0][1]),
                              'miRNA.mat')
        mirna = sio.loadmat(mirnaf)
        expr = mirna['expression']
        e_norms = sum(expr**2, 1)
        cluster_dists = e_norms[:,newaxis] + e_norms[newaxis,:] \
            - 2 * dot(expr, expr.T)
        sims = -cluster_dists
        inp_dicts = []
        percentiles = logspace(-2, 1.99, 3)
        for p in percentiles:
            inp_dicts.append(
                dict(similarities=sims,
                     self_similarity=percentile(sims.flatten(), p)))
    else:
        inp_dicts = butils.load_data(run_id, 'input')

    eyeball = bsub.eyeball(run_id,
                           os.path.abspath(inspect.stack()[0][1]),
                           inp_dicts,
                           func='test_bsubfun',
                           name=run_id + '_test_',
                           mem=2)

    eyeball.launch()
    eyeball. await ()
    eyeball.package()
    eyeball.complete()
Beispiel #11
0
def bic_clustering(run_id):
    """
A matlab/bsub process to compute the BIC maximal clustering for an
input dictionary containing a similarity matrix.

inputs:
  input_dict:  {similarities: a similarity matrix}

outputs:
  output_dict: {inds:cluster exemplar indices,      (MAX BIC)
                self_similarity:float, self similarity (MAX BIC)
                
                inds_[#]: (same as above, ALL BIC)
                self_similarity_[#}: (...)
                bic_[#]: (...)
                }
"""
    input_dict = butils.load_data(run_id, "input")
    return bsm.runmat("ap_max_bic", input_dict, run_id)
Beispiel #12
0
def bic_clustering(run_id):
    '''
A matlab/bsub process to compute the BIC maximal clustering for an
input dictionary containing a similarity matrix.

inputs:
  input_dict:  {similarities: a similarity matrix}

outputs:
  output_dict: {inds:cluster exemplar indices,      (MAX BIC)
                self_similarity:float, self similarity (MAX BIC)
                
                inds_[#]: (same as above, ALL BIC)
                self_similarity_[#}: (...)
                bic_[#]: (...)
                }
'''
    input_dict = butils.load_data(run_id, 'input')
    return bsm.runmat('ap_max_bic', input_dict, run_id)
Beispiel #13
0
def fetch_genes():
    files = [l for l in os.listdir(cfg.dataPath("batch/tmp")) if "mcmc" in l]
    fpaths = [os.path.join(cfg.dataPath("batch/tmp"), f) for f in files]
    ids = [l[0:10] for l in files]

    inps = [butils.load_data(i, "input") for i in ids]
    l_info = {}
    for l, elt in enumerate(zip(fpaths, inps)):
        f, inp = elt
        if inp["out_iter_num"] == 2:
            continue
        print inp["filename"]
        clustname = re.search(re.compile("_([^_]+)\.mat"), inp["filename"]).group(1)
        l_info[l] = {}
        l_info[l]["cname"] = clustname
        l_info[l]["filename"] = inp["filename"]

        data = sio.loadmat(f)
        l_info[l]["stay_same"] = data["stay_same"]
        l_info[l]["improve_ratio"] = data["improve_ratio"]
        l_info[l]["error_test"] = data["error_test"]
        l_info[l]["error_test"] = data["error_test"]
Beispiel #14
0
def errors():
    files = [l for l in os.listdir(cfg.dataPath("batch/tmp")) if "mcmc" in l]
    fpaths = [os.path.join(cfg.dataPath("batch/tmp"), f) for f in files]
    ids = [l[0:10] for l in files]

    inps = [butils.load_data(i, "input") for i in ids]

    idxs_good = nonzero(greater([elt.get("out_iter_num") for elt in inps], -1))[0]
    inps = [inps[i] for i in idxs_good]
    fpaths = [fpaths[i] for i in idxs_good]

    errors, staysames, improves = [], [], []
    for l, elt in enumerate(zip(fpaths, inps)):
        f, inp = elt

        data = sio.loadmat(f)
        errors.append(data["error"])
        staysames.append(data["stay_same"])
        improves.append(data["improve_ratio"])
        gnames = data["gene_names"]

    return errors, staysames, improves, gnames
Beispiel #15
0
def remote_make_tests(run_id):
    """
the idea is that this function will queue up the batch jobs
and submit them with bsub. Using eyeball, it will then wait
until all jobs are done and when they are, export output back to
gliese.

inputs:
  run_id

output:
  the datapath (same for local and remote) of data output from 
  threads.
"""
    test = False
    if test:
        mirnaf = os.path.join(os.path.dirname(inspect.stack()[0][1]), "miRNA.mat")
        mirna = sio.loadmat(mirnaf)
        expr = mirna["expression"]
        e_norms = sum(expr ** 2, 1)
        cluster_dists = e_norms[:, newaxis] + e_norms[newaxis, :] - 2 * dot(expr, expr.T)
        sims = -cluster_dists
        inp_dicts = []
        percentiles = logspace(-2, 1.99, 3)
        for p in percentiles:
            inp_dicts.append(dict(similarities=sims, self_similarity=percentile(sims.flatten(), p)))
    else:
        inp_dicts = butils.load_data(run_id, "input")

    eyeball = bsub.eyeball(
        run_id, os.path.abspath(inspect.stack()[0][1]), inp_dicts, func="test_bsubfun", name=run_id + "_test_", mem=2
    )

    eyeball.launch()
    eyeball.await()
    eyeball.package()
    eyeball.complete()
Beispiel #16
0
def eval_seq_group(gap_seqs, rfid, run_id, inp_run_id, reset = True,
                   draw_alis = draw_all_easy,
                   clade_alignment_method = clade_alignment_method,
                   max_structs = 5):

    rutils = utils
    data = butils.load_data(inp_run_id, 'output')
    structs = data['structs']
    energies = data['energies']
    esrt = argsort(energies)[::-1]
    s_inds = esrt[:max_structs]
    structs, energies = [structs[i] for i in s_inds], [energies[i] for i in s_inds]

    refseq = data['seq']
    
    nq = len(gap_seqs)
    ns = len(structs)

    names = ['N{1:04}'.format(rfid, idx) for idx in range(nq)]
    seqs = [rutils.ungapped_seq(gap_seqs[i], names[i]) for i in range(nq)]
    


    profiles = mem.getOrSet(setProfiles, 
                            **mem.rc({},
                                     seq = refseq, structs = structs, run_id = rfid,
                                     reset = reset,
                                     on_fail = 'compute', 
                                     register = 'tuprof_{0}'.format(rfid)))
    
    if draw_alis: 
        draw_cm_muscle_congruencies(seqs, profiles, 
                                    run_id, reset = reset)
    

    if clade_alignment_method == 'cm':
        alis, refs, all_pairs  =\
            mem.getOrSet(setAlignments, 
                         **mem.rc({},
                                  seqs = seqs, profiles = profiles, 
                                  run_id = rfid, ali_type = 'struct',
                                  reset = reset,
                                  on_fail = 'compute', 
                                  register = 'tuali_struct_{0}'.format(rfid)))
    else:
        raise Exception('No methods besides cm are yet implemented')
    

    seq_group_data = {}
    seq_group_data['seqs'] = gap_seqs
    seq_group_data['structs'] = []
    for i, struct in enumerate(structs):
        struct_data = {}
        ali = alis[i]
        ref = refs[i]
        pairs = all_pairs[i]
        
        #NOTE THAT DUE TO AN AWKWARD SYNTAX DECISION,
        #I AM ALLOWING FOR THE POSSIBILITY THAT EACH
        #ALI ELT HAS DIFFERENT PAIRS.
        #
        #ALL OF MY ROUTINES SO FAR ONLY USE A SINGLE 
        #PAIR SET AND SO I USE PAIRS[0] EXCLUSIVELY
        struct_data.update(ref = ref[0], 
                           pairs = pairs[0],
                           ali = ali)
                        
        rid = '{0}_{1}'.format(run_id, i)

        if clade_tree_method ==  'bionj': 
            tree = phyml.tree(ali, run_id = rid, bionj = True)
        else: tree = get_phase_tree(ali, pairs[0], run_id)

        for i, ct in enumerate(tree.get_terminals()):
            seq = filter(lambda x: x.id == ct.name, ali)[0]
            ct.m = {'seq':seq,
                    'probs':array([1 for j in range(len(seq))])}

        if clade_ancestor_method == 'independent':
            ml_tree = get_ml_ancestor_tree(tree, ali, 
                                           '{0}_paml{1}'.format(run_id, i))
        else:
            ml_tree = get_structure_ancestor_tree(\
                tree, ali,'{0}_stree{1}'.format(run_id, i))
        
        muts, times, gaps, irresolvables = tree_conservation.count_struct(ml_tree, pairs[0])

        struct_data.update(muts = muts, times = times, 
                        gaps = gaps, irresolvables = irresolvables)
        seq_group_data['structs'].append(struct_data)

    return seq_group_data
Beispiel #17
0
def view3():

    files = [l for l in os.listdir(cfg.dataPath("batch/tmp")) if "mcmc" in l]
    fpaths = [os.path.join(cfg.dataPath("batch/tmp"), f) for f in files]
    ids = [l[0:10] for l in files]

    inps = [butils.load_data(i, "input") for i in ids]
    idxs_good = nonzero(greater([elt.get("out_iter_num") for elt in inps], 2))[0]
    inps = [inps[i] for i in idxs_good]
    fpaths = [fpaths[i] for i in idxs_good]

    fig = myplots.fignum(3, (35, 15))
    ax = fig.add_axes([0, 0, 1, 1])

    for f, inp in zip(fpaths, inps):
        if inp["out_iter_num"] == 2:
            continue
        print inp["filename"]

        data = sio.loadmat(f)

        import compbio.utils.colors as mycolors

        ct = mycolors.getct(len(data["gene_names"]))

        term_list = [list(it.chain(*mod)) for mod in data["model"]]
        fac_list = [list(it.chain(*t)) for t in term_list]

        xvals, yvals, colors, rads = [], [], [], []
        for i, terms in enumerate(term_list):
            for j, term in enumerate(terms):
                for k, fact in enumerate(term):
                    xvals.extend([i] * len(term))
                    yvals.extend([fact] * len(term))
                    colors.extend([ct[c] for c in sorted(term)])
                    rads.extend(((arange(1, len(term) + 1) ** 2) * 50)[::-1])

        vecs = zeros((len(fac_list), len(fac_list)))
        for i, fl in enumerate(fac_list):
            for f in fl:
                vecs[i, f] = 1

        # plt.imshow(vecs)

        # ax1 = fig.add_subplot(121)
        # ax2 = fig.add_subplot(122)
        import hcluster

        clusters = hcluster.fclusterdata(vecs, 1.1, criterion="inconsistent", method="complete")

        # ax1.imshow(vecs)
        # ax2.imshow(vecs[argsort(clusters)])

        # raise Exception()

        csrt = argsort(argsort(clusters))
        xvals2 = [csrt[x] for x in xvals]

        # raise Exception()
        plt.scatter(xvals2, yvals, rads, color=colors)
        raise Exception()

    raise Exception()
Beispiel #18
0
def ribo_struct_outfile(rfid):
    si = 'RS_{0}'.format(rfid)
    return bsu.load_data(si, 'output')
Beispiel #19
0
def check_trees(fam_type = 'riboswitch'):
    if fam_type == 'riboswitch':
        sdicts = rutils.switch_dicts()
        rfids = [ 'RF{0:05}'.format(n) for n in sdicts.values()]
        names = sdicts.keys()
        struct_ids = ['RS_{0}'.format(rfid) for rfid in rfids]
        tree_ids = ['RS_tree_{0}'.format(rfid) for rfid in rfids]
    elif fam_type == 'all':
        rfids = [ 'RF{0:05}'.format(n) for n in range(0,1493)]
        names = rfids
        struct_ids = ['FA_{0}'.format(rfid) for rfid in rfids]
        tree_ids = ['FA_tree_{0}'.format(rfid) for rfid in rfids]

    switch_muts = {}
    switch_times= {}
    switch_structs= {}

    for i, rname in enumerate(names):
        print 'Loading family for {0}'.format(rname)
        si, ti = zip(*[struct_ids, tree_ids])[i]
        try:
            structs = bsu.load_data(si, 'output')
            trees   = bsu.load_data(ti, 'output')
            print 'Success! Analyzing tree output'
        except Exception, e:
            print 'Failure! Did I make a booboo?'
            continue
        
            

        str_esrt = argsort(structs['energies'])[::-1]
        #SORT STRUCTURES IN DECREASING ORDER OF ENERGY (MATCH TREES)
        structs['structs'] = [structs['structs'][j] for j in str_esrt]
        structs['energies'] = [structs['energies'][j] for j in str_esrt]
        
        mc, tc, sc = {},{},{}
        sc['energies'] = structs['energies']
        sc['structs'] = structs['structs']
        sc['seq'] = structs['seq']
        for j, t in enumerate(trees):
            if t == None:continue
            mc[j] = {}
            tc[j] = {}
            for idx in range(len(t['structs'])):
                t_infos = t['structs']
                t_str = t_infos[idx]['pairs']
                s_str = structs['structs'][idx]
                e = structs['energies'][idx]
                
                t_times =t_infos[idx]['times']
                t_muts  =t_infos[idx]['muts']
                
                frac_resolved = t_times['total']  /\
                    (t_times['total_incl_unresolved'])
                frac_paired   = t_times['paired'] /\
                    (t_times['unpaired'] + t_times['paired'])

                n_2cons = t_muts['comp']
                n_1cons = t_muts['wob'] 
                n_0cons = t_muts['ucom']
                n_pluscons = t_muts['reco']
                n_nocons =   t_muts['bbad']

                frac_silent = (n_2cons+n_1cons+n_pluscons)/\
                    (n_0cons + n_nocons+\
                         n_2cons+n_1cons+n_pluscons)
                
                frac_double = (n_2cons)/\
                    (n_2cons+n_1cons)
                                          
                frac_destructive=(n_0cons)/\
                    (n_2cons+n_1cons+n_0cons)

                total_muts = (n_0cons + n_nocons+\
                                       n_2cons+n_1cons+n_pluscons)
                total_silent = (n_2cons+n_1cons)
                total_pair_mutants = (n_2cons+n_1cons+n_0cons)

                tc[j][idx] = dict(frac_resolved = frac_resolved,
                             frac_paired = frac_paired,
                             total_time = array([t_times['total_incl_unresolved']]*len(t_times['total'])),
                             total_time_res = t_times['total'])
                mc[j][idx] = dict(frac_silent = frac_silent,
                             frac_double = frac_double,
                             frac_destructive = frac_destructive,
                             total_muts = total_muts,
                             total_silent = total_silent,
                             total_pair_mutants = total_pair_mutants)

                
        print '''Done!
Results:
  {0} subtrees computed.

'''.format(len(mc))

        switch_muts[rname] = mc
        switch_times[rname] = tc
        switch_structs[rname] = sc
Beispiel #20
0
def run(run_id):
    data = bsu.load_data(run_id, 'input')
    ofs = data['ofs']
    outputs = get_consensus(ofs, run_id=run_id, reset=True)
    return (outputs)
Beispiel #21
0
def ribo_struct_outfile(rfid):
    si = 'RS_{0}'.format(rfid)
    return bsu.load_data(si, 'output')
Beispiel #22
0
def view4():

    files = [l for l in os.listdir(cfg.dataPath("batch/tmp")) if "mcmc" in l]
    fpaths = [os.path.join(cfg.dataPath("batch/tmp"), f) for f in files]
    ids = [l[0:10] for l in files]
    inps = [butils.load_data(i, "input") for i in ids]

    idxs_good = nonzero(greater([elt.get("out_iter_num") for elt in inps], -1))[0]
    inps = [inps[i] for i in idxs_good]
    fpaths = [fpaths[i] for i in idxs_good]

    termgroups, cnames, xvals, gvals, yvals, colors, rads, tfs, all_coefs = [], [], [], [], [], [], [], [], []
    l_info = {}

    for l, elt in enumerate(zip(fpaths, inps)):
        f, inp = elt
        if inp["out_iter_num"] == 2:
            continue
        print inp["filename"]
        clustname = re.search(re.compile("_([^_]+)\.mat"), inp["filename"]).group(1)
        cnames.append(clustname)
        l_info[l] = {}
        l_info[l]["cname"] = clustname
        l_info[l]["filename"] = inp["filename"]

        data = sio.loadmat(f)
        l_info[l]["stay_same"] = data["stay_same"]
        l_info[l]["improve_ratio"] = data["improve_ratio"]
        l_info[l]["error_test"] = data["error_test"]

        import compbio.utils.colors as mycolors

        ct = mycolors.getct(len(data["gene_names"]))

        term_list = [list(it.chain(*mod)) for mod in data["model"]]
        fac_list = [list(it.chain(*t)) for t in term_list]

        seen = set()
        all_coefs.append(data["coefs_dic_nonlinear"])
        coefs = data["coefs_dic_nonlinear"]
        nlcof_all = open(
            cfg.dataPath("network/network_predmodel/regressionwts/nonlinear_all/nw_{0}.sif".format(l)), "w"
        )

        nlcof_sing = open(
            cfg.dataPath("network/network_predmodel/regressionwts/nonlinear_sing/nw_{0}.sif".format(l)), "w"
        )

        tfnames = data["tf_names"]
        tgnames = data["gene_names"]

        for i, terms in enumerate(term_list):
            if i in (5, 49, 53, 30, 17, 8, 38):
                if sum(terms) > 0:
                    raise Exception()
            terms = [t - 1 for t in terms]
            for j, term in enumerate(terms):
                if len(term) == 1:
                    wt = coefs[i][0][0][j]
                    nlcof_sing.write("{0}\t{1}\t{2}\n".format(tfnames[term][0][0], tgnames[i][0], wt))

                for k, fact in enumerate(list(set(term))):
                    wt = coefs[i][0][0][j]
                    nlcof_all.write("{0}\t{1}\t{2}\n".format(tfnames[fact][0][0], tgnames[i][0][0], wt))

                    gvals.append([i] * (len(term) + 1))
                    yvals.append([fact] * (len(term) + 1))
                    colors.append([ct[c] for c in sorted(term)] + [1, 1, 1])
                    tfs.append([c for c in sorted(term)])
                    rads.append(((arange(1, len(term) + 2) ** 2) * 50)[::-1])
                    xvals.append([l] * (len(term) + 1))

        nlcof_all.close()
        nlcof_sing.close()

    return cnames, xvals, gvals, yvals, colors, rads, l_info, tfs, coefs
Beispiel #23
0
def check_trees(fam_type='riboswitch'):
    if fam_type == 'riboswitch':
        sdicts = rutils.switch_dicts()
        rfids = ['RF{0:05}'.format(n) for n in sdicts.values()]
        names = sdicts.keys()
        struct_ids = ['RS_{0}'.format(rfid) for rfid in rfids]
        tree_ids = ['RS_tree_{0}'.format(rfid) for rfid in rfids]
    elif fam_type == 'all':
        rfids = ['RF{0:05}'.format(n) for n in range(0, 1493)]
        names = rfids
        struct_ids = ['FA_{0}'.format(rfid) for rfid in rfids]
        tree_ids = ['FA_tree_{0}'.format(rfid) for rfid in rfids]

    switch_muts = {}
    switch_times = {}
    switch_structs = {}

    for i, rname in enumerate(names):
        print 'Loading family for {0}'.format(rname)
        si, ti = zip(*[struct_ids, tree_ids])[i]
        try:
            structs = bsu.load_data(si, 'output')
            trees = bsu.load_data(ti, 'output')
            print 'Success! Analyzing tree output'
        except Exception, e:
            print 'Failure! Did I make a booboo?'
            continue

        str_esrt = argsort(structs['energies'])[::-1]
        #SORT STRUCTURES IN DECREASING ORDER OF ENERGY (MATCH TREES)
        structs['structs'] = [structs['structs'][j] for j in str_esrt]
        structs['energies'] = [structs['energies'][j] for j in str_esrt]

        mc, tc, sc = {}, {}, {}
        sc['energies'] = structs['energies']
        sc['structs'] = structs['structs']
        sc['seq'] = structs['seq']
        for j, t in enumerate(trees):
            if t == None: continue
            mc[j] = {}
            tc[j] = {}
            for idx in range(len(t['structs'])):
                t_infos = t['structs']
                t_str = t_infos[idx]['pairs']
                s_str = structs['structs'][idx]
                e = structs['energies'][idx]

                t_times = t_infos[idx]['times']
                t_muts = t_infos[idx]['muts']

                frac_resolved = t_times['total']  /\
                    (t_times['total_incl_unresolved'])
                frac_paired   = t_times['paired'] /\
                    (t_times['unpaired'] + t_times['paired'])

                n_2cons = t_muts['comp']
                n_1cons = t_muts['wob']
                n_0cons = t_muts['ucom']
                n_pluscons = t_muts['reco']
                n_nocons = t_muts['bbad']

                frac_silent = (n_2cons+n_1cons+n_pluscons)/\
                    (n_0cons + n_nocons+\
                         n_2cons+n_1cons+n_pluscons)

                frac_double = (n_2cons)/\
                    (n_2cons+n_1cons)

                frac_destructive=(n_0cons)/\
                    (n_2cons+n_1cons+n_0cons)

                total_muts = (n_0cons + n_nocons+\
                                       n_2cons+n_1cons+n_pluscons)
                total_silent = (n_2cons + n_1cons)
                total_pair_mutants = (n_2cons + n_1cons + n_0cons)

                tc[j][idx] = dict(
                    frac_resolved=frac_resolved,
                    frac_paired=frac_paired,
                    total_time=array([t_times['total_incl_unresolved']] *
                                     len(t_times['total'])),
                    total_time_res=t_times['total'])
                mc[j][idx] = dict(frac_silent=frac_silent,
                                  frac_double=frac_double,
                                  frac_destructive=frac_destructive,
                                  total_muts=total_muts,
                                  total_silent=total_silent,
                                  total_pair_mutants=total_pair_mutants)

        print '''Done!
Results:
  {0} subtrees computed.

'''.format(len(mc))

        switch_muts[rname] = mc
        switch_times[rname] = tc
        switch_structs[rname] = sc