Esempio n. 1
0
def write_ensemble_metrics(row, id_field):

	bp_filename = 'bpps_%s.npy.gz' % str(row[id_field])

	# bpps exist already
	if os.path.exists(bp_filename):
		f = gzip.GzipFile(bp_filename, "r")
		bpp_mat = np.load(f)
		print('loaded ', row[id_field])

	else: # calculate bpps and cache
		bpp_mat = bpps(row['sequence'], package='eternafold')
		print("wrote ", row[id_field])

		# cache
		f = gzip.GzipFile(bp_filename, 'w')
		np.save(file=f, arr=bpp_mat)
		f.close()

	punp_vector = 1-np.sum(bpp_mat, axis=0)

	aup = np.mean(punp_vector)
	sup_init = np.sum(punp_vector[:14])

	return aup, sup_init
Esempio n. 2
0
def threshknot_util(sequence, package='vienna_2', theta=0):
    '''
    Inputs:
    sequence: RNA sequence
    package: folding package to use
    
    Set theta = 0 to not filter base pairs as in ThreshKnot.
    
    Returns: N x N matrix of base pair probabilities. Nonzero entries represent base pairs
    predicted in final (possibly pseudoknotted) structure.
    Probabilities are their associated probability (obvs).
    '''

    bp_matrix = bpps(sequence, package=package)

    # if desired, filter base pair probabilities below a cutoff
    bp_matrix[np.where(bp_matrix <= theta)] = 0
    output = np.zeros([len(sequence), len(sequence)])

    # ProbKnot heuristic part 1: get all base pairs where p(ij) == p_max(i)
    output[np.where(bp_matrix == np.max(bp_matrix, axis=0))] = 1

    # ProbKnot heuristic part 2: get all base pairs where p(ij) == p_max(j)
    array_of_bps = np.clip(output + np.transpose(output) - 1, 0, 1)

    # setting all bp probabilities not corresponding to a final selected base pair to zero
    bp_matrix[np.where(array_of_bps == 0)] = 0

    return bp_matrix
def get_secstruct_mea(int_start, int_end, ref_seq, secstruct_interval=20):
    sequence = ref_seq[(int_start-secstruct_interval):(int_end+secstruct_interval)]
    bp_matrix = bpps(sequence, package='contrafold_2')
    
    best_struct = ''
    best_mcc = 0
    for log_gamma in range(-2,2):
        mea_mdl = MEA(bp_matrix,gamma=10**log_gamma)
        [exp_sen, exp_ppv, exp_mcc, exp_fscore] = mea_mdl.score_expected()
        if exp_mcc > best_mcc:
            best_struct = mea_mdl.structure
            best_mcc = exp_mcc

    conserved_str = '.'*secstruct_interval + '*'*(int_end - int_start) + '.'*secstruct_interval
    return((best_struct, best_mcc, conserved_str, sequence))
Esempio n. 4
0
from arnie.utils import write_vector_to_file

if __name__=='__main__':
    p = argparse.ArgumentParser(description=
        """Write unpaired posterior probabilities to files.
        """)
    
    p.add_argument("seq_dir", nargs='+',
                   help="path to dir of *.seq files")
    p.add_argument("-o", help="name of output dir")
    p.add_argument("-p", "--package", default='vienna_2', help="Package to use")

    if len(sys.argv)==1:
        p.print_help(sys.stderr)
        sys.exit(1)

    args = p.parse_args()

    if not os.path.exists('./%s' % args.o):
        os.makedirs('./%s' % args.o)

    for seqfile in args.seq_dir:
        print(seqfile)
        seq=open(seqfile,'r').readlines()[-1].rstrip()
    	seq_id = os.path.basename(seqfile).replace('.seq','')

        unp_vector = 1-np.sum(bpps.bpps(seq, package=args.package),axis=0)

    	with open("%s/%s.unp" % (args.o, seq_id),'w') as f:
    		write_vector_to_file(unp_vector, f)
Esempio n. 5
0
from arnie.utils import write_matrix_to_file

if __name__=='__main__':
    p = argparse.ArgumentParser(description=
        """
        Write base pairing probability matrices to files.
        """)
    
    p.add_argument("seq_dir", nargs='+',
                   help="path to dir of *.seq files")
    p.add_argument("-o", help="name of output dir")
    p.add_argument("-p", "--package", default='vienna_2',
                   help="Package to use")

    if len(sys.argv)==1:
        p.print_help(sys.stderr)
        sys.exit(1)

    args = p.parse_args()

    if not os.path.exists('./%s' % args.o):
        os.makedirs('./%s' % args.o)

    for seqfile in args.seq_dir:
        print(seqfile)
        seq=open(seqfile,'r').readlines()[-1].rstrip()
        seq_id = os.path.basename(seqfile).replace('.seq','')
        bp_matrix = bpps.bpps(seq, package=args.package)
        with open("%s/%s.bpps" % (args.o, seq_id),'w') as f:
            write_matrix_to_file(bp_matrix, f)
Esempio n. 6
0
def test_bpps(pkg):
    p = bpps(sample_seq, package=pkg)
    print('test bpps %s' % pkg)
    print(p[0])
    return
Esempio n. 7
0
def calc_bpp(seq): 
    return bpps(seq, package=args.package, T=args.temp)