#print 'subspace:'
#subSpace.display()

#print 'Measurements in set'
#print expandedSpace.listMeasurementsInSet()

# loop over measurements and load them into Measurements
fileName = 'None'
for msrmtInSet in expandedSpace.listMeasurementsInSet():
     if fileName != allMeasurements[msrmtInSet][0]:
	fileName = allMeasurements[msrmtInSet][0]
	rdatFileName = 'ETERNA'+fileName[3:12]+'.rdat'
        rdat = RDATFile()
        rdat.load(open('/home/qmac/projects/testdir/'+rdatFileName))
        offset=0
     constructs = rdat.constructs.values()[0]
    # pdb.set_trace()
     dsection = constructs.data[allMeasurements[msrmtInSet][1]]
     if dsection.annotations['sequence'][0] != expandedSpace.sequences[msrmtInSet]:
	print 'Error, sequences not the same!'
	sys.exit()
     seq=dsection.annotations['sequence'][0] 

     countZeros = 0
     rdatLength = len(dsection.values)
     for j in range(1,rdatLength):
	if dsection.values[-j] != 0.0:
	    break
	else:
Exemple #2
0
from matplotlib.pylab import *
from rdatkit.datahandlers import RDATFile
from rdatkit.view import VARNA
from rdatkit.secondary_structure import fold
from rdatkit.mapping import MappingData, normalize
from analysis import eigen_reactivities
import sys

rdat = RDATFile()
rdat.load(open(sys.argv[1]))
vals = array(rdat.values.values()[0])
for i in xrange(shape(vals)[0]):
    vals[i,:] = normalize(vals[i,:])
eigenrs = eigen_reactivities(vals)

matshow(vals)
#mshow(vals, cmap=get_cmap('Greys'), vmin=0, vmax=vals.mean(), aspect='auto', interpolation='nearest')
matshow(eigenrs)
#imshow(eigenrs, cmap=get_cmap('Greys'), vmin=eigenrs.min(), vmax=eigenrs.mean(), aspect='auto', interpolation='nearest')
show()
construct = rdat.constructs.values()[0]
for i, e in enumerate(eigenrs[:35]):
    sequence = construct.sequence
    md = MappingData(data=e, seqpos=[s - construct.offset - 1 for s in construct.seqpos])
    print fold(sequence, mapping_data=md)
    structure = fold(sequence, mapping_data=md)[0].dbn
    VARNA.cmd(sequence, structure, 'test_results/eigen_struct%s.png' % i)



from matplotlib.pylab import *
from rdatkit.datahandlers import RDATFile
from rdatkit.view import VARNA
from rdatkit.secondary_structure import fold
from rdatkit.mapping import MappingData, normalize
from analysis import eigen_reactivities
import sys

rdat = RDATFile()
rdat.load(open(sys.argv[1]))
vals = array(rdat.values.values()[0])
for i in xrange(shape(vals)[0]):
    vals[i, :] = normalize(vals[i, :])
eigenrs = eigen_reactivities(vals)

matshow(vals)
#mshow(vals, cmap=get_cmap('Greys'), vmin=0, vmax=vals.mean(), aspect='auto', interpolation='nearest')
matshow(eigenrs)
#imshow(eigenrs, cmap=get_cmap('Greys'), vmin=eigenrs.min(), vmax=eigenrs.mean(), aspect='auto', interpolation='nearest')
show()
construct = rdat.constructs.values()[0]
for i, e in enumerate(eigenrs[:35]):
    sequence = construct.sequence
    md = MappingData(
        data=e, seqpos=[s - construct.offset - 1 for s in construct.seqpos])
    print fold(sequence, mapping_data=md)
    structure = fold(sequence, mapping_data=md)[0].dbn
    VARNA.cmd(sequence, structure, 'test_results/eigen_struct%s.png' % i)
Exemple #4
0
args = parser.parse_args()

fragtypes = ['all', 'helices', 'interiorloops', 'hairpins', 'dangles', 'bulges',\
        '2wayjunctions', '3wayjunctions', '4wayjunctions', '5wayjunctions', 'unpaired', 'edgepairs', 'internalpairs']
db = {}
dberrors = {}
dbidx = {}
for t in fragtypes:
    db[t] = []
    dberrors[t] = []
    dbidx[t] = {}
for filename in os.listdir(args.rdatdir):
    if not os.path.isdir(args.rdatdir+'/'+filename):
        print filename
    rdat = RDATFile()
    rdat.load(open(args.rdatdir+'/'+filename))
    for cname in rdat.constructs:
        construct = rdat.constructs[cname]
        struct = SecondaryStructure(construct.structure)
        frags = struct.explode()
        for data in construct.data:
            if (('mutation' not in data.annotations) or \
                    ('mutation' in data.annotations and \
                    'WT' in data.annotations['mutation'])):
                if 'modifier' in data.annotations:
                    if args.normalize:
                        normvals = normalize(data.values)
                    else:
                        normvals = data.values
                        iqr = scoreatpercentile(normvals, 75) - scoreatpercentile(normvals, 25)
            for fragtype in frags:
Exemple #5
0
def parse_rdat_data(request, is_get_file):
    sequences, titles, structures, modifiers, messages, valerrors, offset_seqpos = (
        [], [], [], [], [], [], [])
    temperature = 37
    rdatfile = RDATFile()
    refstruct = secondary_structure.SecondaryStructure()

    if len(request.POST['sequences']):
        messages.append(
            'WARNING: Using sequences and/or structures from received RDAT file content. Original input in fields were overwritten.'
        )

    if is_get_file:
        uploadfile = request.FILES['rdatfile']
        rf = write_temp_file('/tmp/%s' % uploadfile.name)
    else:
        rmdbid = request.POST['rmdbid'].strip()
        version = RMDBEntry.get_current_version(rmdbid)
        rf = open(
            PATH.DATA_DIR['FILE_DIR'] + '/%s/%s_%s.rdat' %
            (rmdbid, rmdbid, version), 'r')
    rdatfile.load(rf)
    rf.close()

    is_modified = 'modifier' in rdatfile.annotations
    if is_modified:
        modifier = ','.join(rdatfile.annotations['modifier'])

    for cname in rdatfile.constructs:
        c = rdatfile.constructs[cname]

        if 'temperature' in c.annotations:
            temperature = c.annotations['temperature']

        seq = ''
        bonuses_1d = []
        bonuses_2d = []
        seqpos_min = min(c.seqpos)

        if ('clipsequence' in request.POST):
            if len(c.sequence) >= max(c.seqpos) - c.offset - 1:
                seq_clipped = ''.join(
                    [c.sequence[i - c.offset - 1] for i in sorted(c.seqpos)])
            else:
                messages.append(
                    'WARNING: SEQUENCE and SEQPOS mismatch for construct %s in RDAT file. SEQPOS ignored.'
                    % c.name)
                c.seqpos = [(i + 1) for i in range(len(c.sequence))]
                seq_clipped = c.sequence

            if len(c.structure) >= max(c.seqpos) - c.offset - 1:
                struct_clipped = ''.join(
                    [c.structure[i - c.offset - 1] for i in sorted(c.seqpos)])
            else:
                messages.append(
                    'WARNING: STRUCTURE and SEQPOS mismatch for construct %s in RDAT file. STRUCTURE ignored.'
                    % c.name)
                struct_clipped = '.' * (max(c.seqpos) - c.offset - 1)
                c.structure = struct_clipped

            seq = seq_clipped
            struct = struct_clipped
        else:
            seq = c.sequence
            struct = c.structure

        if len(refstruct) == 0:
            refstruct = secondary_structure.SecondaryStructure(dbn=struct)

        for d in c.data:
            if is_modified or ('modifier' in d.annotations):
                s = seq
                is_2d = False
                if ('mutation' in d.annotations):
                    for mut in d.annotations['mutation']:
                        if 'WT' == mut.strip():
                            break
                        is_2d = True
                        idx = int(mut.strip()[1:-1])
                        base = mut[-1]
                        s = s[:idx - c.offset] + base + s[idx - c.offset + 1:]
                    titles.append(';'.join(d.annotations['mutation']))
                else:
                    titles.append(cname)
                sequences.append(s)

                b = [str(x) for x in d.values]
                bonuses_1d.append(b)
                if ('clipsequence' in request.POST):
                    offset = seqpos_min
                    offset_seqpos.append([i - offset for i in c.seqpos])
                else:
                    offset = c.offset + 1
                    offset_seqpos.append([i - offset for i in c.seqpos])

                if is_2d:
                    if len(bonuses_2d) == 0:
                        bonuses_2d = zeros([len(seq), len(seq)])
                    for i, pos in enumerate(c.seqpos):
                        bonuses_2d[pos - offset, idx - offset] = d.values[i]

                if is_modified:
                    modifiers.append(modifier)
                else:
                    modifiers.append(','.join(d.annotations['modifier']))

    return (messages, valerrors, bonuses_1d, bonuses_2d, titles, modifiers,
            offset_seqpos, temperature, sequences, refstruct)
Exemple #6
0
def get_constructs_from_rdats(dir):
	"""
	using rdatkit parse all RDAT files in the directory specified and parse
	each construct's sequence, structure and score into construct objects.
	ONLY files with .rdat extension will be recognized as RDAT files other 
	files will be skipped

	:params dir: directory with rdat files 
	:type dir: str
	:returns: List of Construct Objects

	"""

	files = glob.glob(dir+"/*")
	rdat_files = []

	#make sure files are rdat files
	for file in files:
		if file[-4:] == "rdat":
			rdat_files.append(file)

	if len(rdat_files) == 0:
		raise ValueError("no rdat files in directory "+dir+" files must have rdat extension to be recognized")

	construct_objs = []
	mm = re.compile("Mutate and Map")

	for file in files:

		r = RDATFile()
		r.load(open(file))
		
		construct = r.constructs.values()
		constructs = construct[0].data

		for c in constructs:
			#some data entries dont have signal_to_noise variable, skip over 
			#them
			if 'signal_to_noise' not in c.annotations:
				continue
			data_quality = c.annotations['signal_to_noise']
			spl = re.split("\:",data_quality[0])

			#dont want to include weak data
			if spl[0] == "weak":
				continue

			name = c.annotations['MAPseq'][0]
			project_name = c.annotations['MAPseq'][1]

			#mutate and map data wont be useful since target structure is not 
			#correct with the mutation
			if mm.search(name) or mm.search(project_name):
				continue

			score = c.annotations['EteRNA'][0]
			spl1 = re.split("\:",score)

			c = Construct(seq=c.annotations['sequence'][0],ss=c.annotations['structure'][0],score=spl1[2])

			construct_objs.append(c)

	return construct_objs
import matplotlib.pyplot as plt
import numpy as np
import rdatkit.secondary_structure as ss
import sys
import pickle
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("infile",help="input file name, please end with .rdat")
parser.add_argument("outfile",help="output file name no extition needed")
args = parser.parse_args()


#import rdat data
rdat = RDATFile()
rdat.load(open('/home/qmac/projects/testdir/'+args.infile))
offset=0
constructs = rdat.constructs.values()[0]
competing_pairs = []
sequences_included=[]
msrmtsNumbers=[]
print 'lenth of constructs.data', len(constructs.data)

for count in range(0,len(constructs.data)):
     dsection = constructs.data[count]
     seq=dsection.annotations['sequence'][0]
     #structs=ss.fold(seq,nstructs=2)
     struct_energy_list =[(struct.dbn, energy) for struct, energy in zip(*ss.subopt(seq,nstructs=100,fraction=0.075,energies=True))]
     struct_energy_list_unique = list(set(struct_energy_list))
     struct_energy_list_unique = sorted(struct_energy_list_unique, key=lambda x: x[1])
     #print struct_energy_list_unique 
args = parser.parse_args()

fragtypes = ['all', 'helices', 'interiorloops', 'hairpins', 'dangles', 'bulges',\
        '2wayjunctions', '3wayjunctions', '4wayjunctions', '5wayjunctions', 'unpaired', 'edgepairs', 'internalpairs']
db = {}
dberrors = {}
dbidx = {}
for t in fragtypes:
    db[t] = []
    dberrors[t] = []
    dbidx[t] = {}
for filename in os.listdir(args.rdatdir):
    if not os.path.isdir(args.rdatdir + '/' + filename):
        print filename
    rdat = RDATFile()
    rdat.load(open(args.rdatdir + '/' + filename))
    for cname in rdat.constructs:
        construct = rdat.constructs[cname]
        struct = SecondaryStructure(construct.structure)
        frags = struct.explode()
        for data in construct.data:
            if (('mutation' not in data.annotations) or \
                    ('mutation' in data.annotations and \
                    'WT' in data.annotations['mutation'])):
                if 'modifier' in data.annotations:
                    if args.normalize:
                        normvals = normalize(data.values)
                    else:
                        normvals = data.values
                        iqr = scoreatpercentile(
                            normvals, 75) - scoreatpercentile(normvals, 25)