#print 'subspace:' #subSpace.display() #print 'Measurements in set' #print expandedSpace.listMeasurementsInSet() # loop over measurements and load them into Measurements fileName = 'None' for msrmtInSet in expandedSpace.listMeasurementsInSet(): if fileName != allMeasurements[msrmtInSet][0]: fileName = allMeasurements[msrmtInSet][0] rdatFileName = 'ETERNA'+fileName[3:12]+'.rdat' rdat = RDATFile() rdat.load(open('/home/qmac/projects/testdir/'+rdatFileName)) offset=0 constructs = rdat.constructs.values()[0] # pdb.set_trace() dsection = constructs.data[allMeasurements[msrmtInSet][1]] if dsection.annotations['sequence'][0] != expandedSpace.sequences[msrmtInSet]: print 'Error, sequences not the same!' sys.exit() seq=dsection.annotations['sequence'][0] countZeros = 0 rdatLength = len(dsection.values) for j in range(1,rdatLength): if dsection.values[-j] != 0.0: break else:
from matplotlib.pylab import * from rdatkit.datahandlers import RDATFile from rdatkit.view import VARNA from rdatkit.secondary_structure import fold from rdatkit.mapping import MappingData, normalize from analysis import eigen_reactivities import sys rdat = RDATFile() rdat.load(open(sys.argv[1])) vals = array(rdat.values.values()[0]) for i in xrange(shape(vals)[0]): vals[i,:] = normalize(vals[i,:]) eigenrs = eigen_reactivities(vals) matshow(vals) #mshow(vals, cmap=get_cmap('Greys'), vmin=0, vmax=vals.mean(), aspect='auto', interpolation='nearest') matshow(eigenrs) #imshow(eigenrs, cmap=get_cmap('Greys'), vmin=eigenrs.min(), vmax=eigenrs.mean(), aspect='auto', interpolation='nearest') show() construct = rdat.constructs.values()[0] for i, e in enumerate(eigenrs[:35]): sequence = construct.sequence md = MappingData(data=e, seqpos=[s - construct.offset - 1 for s in construct.seqpos]) print fold(sequence, mapping_data=md) structure = fold(sequence, mapping_data=md)[0].dbn VARNA.cmd(sequence, structure, 'test_results/eigen_struct%s.png' % i)
from matplotlib.pylab import * from rdatkit.datahandlers import RDATFile from rdatkit.view import VARNA from rdatkit.secondary_structure import fold from rdatkit.mapping import MappingData, normalize from analysis import eigen_reactivities import sys rdat = RDATFile() rdat.load(open(sys.argv[1])) vals = array(rdat.values.values()[0]) for i in xrange(shape(vals)[0]): vals[i, :] = normalize(vals[i, :]) eigenrs = eigen_reactivities(vals) matshow(vals) #mshow(vals, cmap=get_cmap('Greys'), vmin=0, vmax=vals.mean(), aspect='auto', interpolation='nearest') matshow(eigenrs) #imshow(eigenrs, cmap=get_cmap('Greys'), vmin=eigenrs.min(), vmax=eigenrs.mean(), aspect='auto', interpolation='nearest') show() construct = rdat.constructs.values()[0] for i, e in enumerate(eigenrs[:35]): sequence = construct.sequence md = MappingData( data=e, seqpos=[s - construct.offset - 1 for s in construct.seqpos]) print fold(sequence, mapping_data=md) structure = fold(sequence, mapping_data=md)[0].dbn VARNA.cmd(sequence, structure, 'test_results/eigen_struct%s.png' % i)
args = parser.parse_args() fragtypes = ['all', 'helices', 'interiorloops', 'hairpins', 'dangles', 'bulges',\ '2wayjunctions', '3wayjunctions', '4wayjunctions', '5wayjunctions', 'unpaired', 'edgepairs', 'internalpairs'] db = {} dberrors = {} dbidx = {} for t in fragtypes: db[t] = [] dberrors[t] = [] dbidx[t] = {} for filename in os.listdir(args.rdatdir): if not os.path.isdir(args.rdatdir+'/'+filename): print filename rdat = RDATFile() rdat.load(open(args.rdatdir+'/'+filename)) for cname in rdat.constructs: construct = rdat.constructs[cname] struct = SecondaryStructure(construct.structure) frags = struct.explode() for data in construct.data: if (('mutation' not in data.annotations) or \ ('mutation' in data.annotations and \ 'WT' in data.annotations['mutation'])): if 'modifier' in data.annotations: if args.normalize: normvals = normalize(data.values) else: normvals = data.values iqr = scoreatpercentile(normvals, 75) - scoreatpercentile(normvals, 25) for fragtype in frags:
def parse_rdat_data(request, is_get_file): sequences, titles, structures, modifiers, messages, valerrors, offset_seqpos = ( [], [], [], [], [], [], []) temperature = 37 rdatfile = RDATFile() refstruct = secondary_structure.SecondaryStructure() if len(request.POST['sequences']): messages.append( 'WARNING: Using sequences and/or structures from received RDAT file content. Original input in fields were overwritten.' ) if is_get_file: uploadfile = request.FILES['rdatfile'] rf = write_temp_file('/tmp/%s' % uploadfile.name) else: rmdbid = request.POST['rmdbid'].strip() version = RMDBEntry.get_current_version(rmdbid) rf = open( PATH.DATA_DIR['FILE_DIR'] + '/%s/%s_%s.rdat' % (rmdbid, rmdbid, version), 'r') rdatfile.load(rf) rf.close() is_modified = 'modifier' in rdatfile.annotations if is_modified: modifier = ','.join(rdatfile.annotations['modifier']) for cname in rdatfile.constructs: c = rdatfile.constructs[cname] if 'temperature' in c.annotations: temperature = c.annotations['temperature'] seq = '' bonuses_1d = [] bonuses_2d = [] seqpos_min = min(c.seqpos) if ('clipsequence' in request.POST): if len(c.sequence) >= max(c.seqpos) - c.offset - 1: seq_clipped = ''.join( [c.sequence[i - c.offset - 1] for i in sorted(c.seqpos)]) else: messages.append( 'WARNING: SEQUENCE and SEQPOS mismatch for construct %s in RDAT file. SEQPOS ignored.' % c.name) c.seqpos = [(i + 1) for i in range(len(c.sequence))] seq_clipped = c.sequence if len(c.structure) >= max(c.seqpos) - c.offset - 1: struct_clipped = ''.join( [c.structure[i - c.offset - 1] for i in sorted(c.seqpos)]) else: messages.append( 'WARNING: STRUCTURE and SEQPOS mismatch for construct %s in RDAT file. STRUCTURE ignored.' % c.name) struct_clipped = '.' * (max(c.seqpos) - c.offset - 1) c.structure = struct_clipped seq = seq_clipped struct = struct_clipped else: seq = c.sequence struct = c.structure if len(refstruct) == 0: refstruct = secondary_structure.SecondaryStructure(dbn=struct) for d in c.data: if is_modified or ('modifier' in d.annotations): s = seq is_2d = False if ('mutation' in d.annotations): for mut in d.annotations['mutation']: if 'WT' == mut.strip(): break is_2d = True idx = int(mut.strip()[1:-1]) base = mut[-1] s = s[:idx - c.offset] + base + s[idx - c.offset + 1:] titles.append(';'.join(d.annotations['mutation'])) else: titles.append(cname) sequences.append(s) b = [str(x) for x in d.values] bonuses_1d.append(b) if ('clipsequence' in request.POST): offset = seqpos_min offset_seqpos.append([i - offset for i in c.seqpos]) else: offset = c.offset + 1 offset_seqpos.append([i - offset for i in c.seqpos]) if is_2d: if len(bonuses_2d) == 0: bonuses_2d = zeros([len(seq), len(seq)]) for i, pos in enumerate(c.seqpos): bonuses_2d[pos - offset, idx - offset] = d.values[i] if is_modified: modifiers.append(modifier) else: modifiers.append(','.join(d.annotations['modifier'])) return (messages, valerrors, bonuses_1d, bonuses_2d, titles, modifiers, offset_seqpos, temperature, sequences, refstruct)
def get_constructs_from_rdats(dir): """ using rdatkit parse all RDAT files in the directory specified and parse each construct's sequence, structure and score into construct objects. ONLY files with .rdat extension will be recognized as RDAT files other files will be skipped :params dir: directory with rdat files :type dir: str :returns: List of Construct Objects """ files = glob.glob(dir+"/*") rdat_files = [] #make sure files are rdat files for file in files: if file[-4:] == "rdat": rdat_files.append(file) if len(rdat_files) == 0: raise ValueError("no rdat files in directory "+dir+" files must have rdat extension to be recognized") construct_objs = [] mm = re.compile("Mutate and Map") for file in files: r = RDATFile() r.load(open(file)) construct = r.constructs.values() constructs = construct[0].data for c in constructs: #some data entries dont have signal_to_noise variable, skip over #them if 'signal_to_noise' not in c.annotations: continue data_quality = c.annotations['signal_to_noise'] spl = re.split("\:",data_quality[0]) #dont want to include weak data if spl[0] == "weak": continue name = c.annotations['MAPseq'][0] project_name = c.annotations['MAPseq'][1] #mutate and map data wont be useful since target structure is not #correct with the mutation if mm.search(name) or mm.search(project_name): continue score = c.annotations['EteRNA'][0] spl1 = re.split("\:",score) c = Construct(seq=c.annotations['sequence'][0],ss=c.annotations['structure'][0],score=spl1[2]) construct_objs.append(c) return construct_objs
import matplotlib.pyplot as plt import numpy as np import rdatkit.secondary_structure as ss import sys import pickle import argparse parser = argparse.ArgumentParser() parser.add_argument("infile",help="input file name, please end with .rdat") parser.add_argument("outfile",help="output file name no extition needed") args = parser.parse_args() #import rdat data rdat = RDATFile() rdat.load(open('/home/qmac/projects/testdir/'+args.infile)) offset=0 constructs = rdat.constructs.values()[0] competing_pairs = [] sequences_included=[] msrmtsNumbers=[] print 'lenth of constructs.data', len(constructs.data) for count in range(0,len(constructs.data)): dsection = constructs.data[count] seq=dsection.annotations['sequence'][0] #structs=ss.fold(seq,nstructs=2) struct_energy_list =[(struct.dbn, energy) for struct, energy in zip(*ss.subopt(seq,nstructs=100,fraction=0.075,energies=True))] struct_energy_list_unique = list(set(struct_energy_list)) struct_energy_list_unique = sorted(struct_energy_list_unique, key=lambda x: x[1]) #print struct_energy_list_unique
args = parser.parse_args() fragtypes = ['all', 'helices', 'interiorloops', 'hairpins', 'dangles', 'bulges',\ '2wayjunctions', '3wayjunctions', '4wayjunctions', '5wayjunctions', 'unpaired', 'edgepairs', 'internalpairs'] db = {} dberrors = {} dbidx = {} for t in fragtypes: db[t] = [] dberrors[t] = [] dbidx[t] = {} for filename in os.listdir(args.rdatdir): if not os.path.isdir(args.rdatdir + '/' + filename): print filename rdat = RDATFile() rdat.load(open(args.rdatdir + '/' + filename)) for cname in rdat.constructs: construct = rdat.constructs[cname] struct = SecondaryStructure(construct.structure) frags = struct.explode() for data in construct.data: if (('mutation' not in data.annotations) or \ ('mutation' in data.annotations and \ 'WT' in data.annotations['mutation'])): if 'modifier' in data.annotations: if args.normalize: normvals = normalize(data.values) else: normvals = data.values iqr = scoreatpercentile( normvals, 75) - scoreatpercentile(normvals, 25)