def precalculate_structures(entry): try: constructs = ConstructSection.objects.filter(entry=entry) for c in constructs: datas = DataSection.objects.filter(construct_section=c) for d in datas: bonuses = correct_rx_bonus(d, c) for i, b in enumerate(bonuses): if b == -1.0: bonuses[i] = -999 m = mapping.MappingData(data=bonuses) structs = secondary_structure.fold(c.sequence, mapping_data=m) if len(structs) == 0: # No non-trivial structures found, try with data normalization m = mapping.MappingData(data=bonuses, norm=True) structs = secondary_structure.fold(c.sequence, mapping_data=m) if len(structs) == 0: d.structure = 'NA' else: d.structure = structs[0].dbn else: d.structure = structs[0].dbn d.save() except ConstructSection.DoesNotExist: print 'FATAL! There are no constructs for entry %s' % entry.rmdb_id
def by_nbs_addition(sequence, data, database='default.db.dists', use_data=False, nstructs=20): db = pickle.load(open('%s/models/%s' % (settings.MAPPING_DATABASE_PATH, database))) if use_data: structures = secondary_structure.fold(sequence, mapping_data=data, nstructs=nstructs) else: structures = secondary_structure.fold(sequence, nstructs=nstructs) if nstructs > len(structures): print 'WARNING: Found %d non trivial structures in the ensemble, not intended %d' % (len(structures), nstructs) struct_probs = [] struct_lh = [] for struct in structures: probs, lh = struct.likelihood(data, db_obj=db) struct_probs.append(probs) struct_lh.append(-log(lh)) currsol = 0. prevsol = float('-Inf') numstructs = 1 while numstructs <= len(structures): """ Solve minimize -sum( ci*log(p(Mi|D))) over models Mi, given data D, for c 0 <= c <= 1 for all i p(Mi|D) are given in struct_lh """ dim = numstructs p = cvxopt.matrix(struct_lh[:dim]) c = variable(dim, 'c') C1 = (c <= 1) C2 = (c >= 0) C3 = (sum(c) == 1) lp = op(min(p.trans()*c), [C1,C2,C3]) lp.solve() print 'Status is %s' % lp.status """ G = cvxopt.spmatrix(1., range(dim), range(dim)) h = cvxopt.matrix([1.]*dim) A = cvxopt.spmatrix(-1., range(dim), range(dim)) b = cvxopt.matrix([0.]*dim) sol = solvers.lp(c, G, h, A, b) opt = array(sol['x']) """ opt = array(c.value) prevsol = currsol print 'Solution %s' % opt print 'Struct lh %s' % struct_lh[:numstructs] currsol = sum(array([opt[i]*struct_lh[i] for i in range(numstructs)])) numstructs += 1 numstructs -= 1 print 'Finished...' print 'Used %d of %d structures' % (numstructs, len(structures)) return structures[:numstructs], opt, currsol
from matplotlib.pylab import * from rdatkit.datahandlers import RDATFile from rdatkit.view import VARNA from rdatkit.secondary_structure import fold from rdatkit.mapping import MappingData, normalize from analysis import eigen_reactivities import sys rdat = RDATFile() rdat.load(open(sys.argv[1])) vals = array(rdat.values.values()[0]) for i in xrange(shape(vals)[0]): vals[i, :] = normalize(vals[i, :]) eigenrs = eigen_reactivities(vals) matshow(vals) #mshow(vals, cmap=get_cmap('Greys'), vmin=0, vmax=vals.mean(), aspect='auto', interpolation='nearest') matshow(eigenrs) #imshow(eigenrs, cmap=get_cmap('Greys'), vmin=eigenrs.min(), vmax=eigenrs.mean(), aspect='auto', interpolation='nearest') show() construct = rdat.constructs.values()[0] for i, e in enumerate(eigenrs[:35]): sequence = construct.sequence md = MappingData( data=e, seqpos=[s - construct.offset - 1 for s in construct.seqpos]) print fold(sequence, mapping_data=md) structure = fold(sequence, mapping_data=md)[0].dbn VARNA.cmd(sequence, structure, 'test_results/eigen_struct%s.png' % i)
from rdatkit.view import VARNA from rdatkit.secondary_structure import fold from rdatkit.mapping import MappingData, normalize from analysis import eigen_reactivities import sys rdat = RDATFile() rdat.load(open(sys.argv[1])) vals = array(rdat.values.values()[0]) for i in xrange(shape(vals)[0]): vals[i,:] = normalize(vals[i,:]) eigenrs = eigen_reactivities(vals) matshow(vals) #mshow(vals, cmap=get_cmap('Greys'), vmin=0, vmax=vals.mean(), aspect='auto', interpolation='nearest') matshow(eigenrs) #imshow(eigenrs, cmap=get_cmap('Greys'), vmin=eigenrs.min(), vmax=eigenrs.mean(), aspect='auto', interpolation='nearest') show() construct = rdat.constructs.values()[0] for i, e in enumerate(eigenrs[:35]): sequence = construct.sequence md = MappingData(data=e, seqpos=[s - construct.offset - 1 for s in construct.seqpos]) print fold(sequence, mapping_data=md) structure = fold(sequence, mapping_data=md)[0].dbn VARNA.cmd(sequence, structure, 'test_results/eigen_struct%s.png' % i)
def predict_run_2D(request, sequences, titles, structures, other_options, messages): slope = float(request.POST['slope_2d']) intercept = float(request.POST['intercept_2d']) bonus_options = ' -xs %s -xo %s ' % (slope, intercept) seq = sequences[0] sequences = [seq] titles = [titles[0] + ':2D bonuses'] if len(structures) > 0: structures = [structures[0]] data = zeros([len(seq), len(seq)]) rows = request.POST['bonuses_2d'].split('\n') if len(rows) != len(seq): messages.append( 'ERROR: 2D BONUS and (first) SEQUENCE size mismatch. No BONUS applied.' ) else: for i, row in enumerate(rows): items = row.split() if len(items) != len(seq): messages.append( 'ERROR: 2D BONUS and (first) SEQUENCE size mismatch. No BONUS applied.' ) data = zeros([len(seq), len(seq)]) break for j, item in enumerate(items): data[i, j] = float(item) if 'applyzscores' in request.POST: bins = [i for i in range(data.shape[0]) if len(data[i, :] != 0) > 0] data = quick_norm(data, bins=bins[10:-10]) zdata = zscores_by_row(data, slope, intercept) means = array( [data[i, data[i, :] != 0].mean() for i in range(data.shape[0])]) zdata[means > 0.2, :] = 0 zdata[zdata < 0] = 0 zdata = -abs(zdata) else: zdata = data zdata = zdata.T base_annotations = [] predstructs = secondary_structure.fold(seq.sequence, mapping_data=zdata, fold_opts=bonus_options + other_options, bonus2d=True) if predstructs: struct = predstructs[0] else: struct = secondary_structure.SecondaryStructure(dbn='.' * len(seq)) if request.POST['nbootstraps']: ba = bootstrap_annotations(seq, zdata, int(request.POST['nbootstraps']), bonus_options + other_options, True) base_annotations.append(ba) structures.append(struct) return (sequences, structures, messages, base_annotations)
def predict_run_1D_NN(request, sequences, mapping_data, structures, other_options, messages): is_1D = (request.POST['predtype'] == '1D') is_apply_bonus = is_1D slope = float(request.POST['slope_1d']) intercept = float(request.POST['intercept_1d']) bonus_options = ' -sm %s -si %s ' % (slope, intercept) if is_1D: modtype = request.POST['modtype'] parsed_data = [] parsed_seqpos = [] try: for line in request.POST['bonuses_1d'].split('\n'): if len(line.strip()) == 0: continue if line[0] == '#': if len(parsed_data) > 0: mapping_data.append( mapping.MappingData(data=parsed_data, seqpos=parsed_seqpos)) parsed_data = [] parsed_seqpos = [] else: items = line.split() if len(items) > 2: raise ValueError('Invalid input') parsed_seqpos.append(int(items[0]) - 1) if 'raw_bonuses' in request.POST: term = exp((float(items[-1]) - intercept) / slope) - 1 parsed_data.append(term) else: parsed_data.append(float(items[-1])) mapping_data.append( mapping.MappingData(data=parsed_data, seqpos=parsed_seqpos)) except Exception: messages.append( 'ERROR: Invalid bonus input format. No BONUS applied.') is_apply_bonus = False numitems = min(len(mapping_data), len(sequences)) if numitems != len(mapping_data) and is_apply_bonus: messages.append( 'WARNING: SEQUENCE (more) and BONUS number mismatch. Only SEQUENCE with available BONUS were used.' ) mapping_data = mapping_data[:numitems] if numitems != len(sequences) and is_apply_bonus: messages.append( 'WARNING: SEQUENCE and BONUS (more) number mismatch. Only BONUS with available SEQUENCE were used.' ) sequences = sequences[:numitems] base_annotations = [] for i, s in enumerate(sequences): if is_apply_bonus: predstructs = secondary_structure.fold( s.sequence, modifier=modtype, mapping_data=mapping_data[i], fold_opts=bonus_options + other_options) else: predstructs = secondary_structure.fold(s.sequence) if predstructs: struct = predstructs[0] else: struct = secondary_structure.SecondaryStructure(dbn='.' * len(s)) structures.append(struct) if ('nbootstraps' in request.POST) and is_apply_bonus: if not request.POST['nbootstraps']: nbootstraps = 100 messages.append( 'WARNING: invalid BOOTSTRAP number. Used default 100 instead.' ) else: nbootstraps = int(request.POST['nbootstraps']) ba = bootstrap_annotations(s, mapping_data[i], nbootstraps, other_options, False) base_annotations.append(ba) return (base_annotations, structures, mapping_data, messages)
def by_nbs_addition(sequence, data, database='default.db.dists', use_data=False, nstructs=20): db = pickle.load( open('%s/models/%s' % (settings.MAPPING_DATABASE_PATH, database))) if use_data: structures = secondary_structure.fold(sequence, mapping_data=data, nstructs=nstructs) else: structures = secondary_structure.fold(sequence, nstructs=nstructs) if nstructs > len(structures): print 'WARNING: Found %d non trivial structures in the ensemble, not intended %d' % ( len(structures), nstructs) struct_probs = [] struct_lh = [] for struct in structures: probs, lh = struct.likelihood(data, db_obj=db) struct_probs.append(probs) struct_lh.append(-log(lh)) currsol = 0. prevsol = float('-Inf') numstructs = 1 while numstructs <= len(structures): """ Solve minimize -sum( ci*log(p(Mi|D))) over models Mi, given data D, for c 0 <= c <= 1 for all i p(Mi|D) are given in struct_lh """ dim = numstructs p = cvxopt.matrix(struct_lh[:dim]) c = variable(dim, 'c') C1 = (c <= 1) C2 = (c >= 0) C3 = (sum(c) == 1) lp = op(min(p.trans() * c), [C1, C2, C3]) lp.solve() print 'Status is %s' % lp.status """ G = cvxopt.spmatrix(1., range(dim), range(dim)) h = cvxopt.matrix([1.]*dim) A = cvxopt.spmatrix(-1., range(dim), range(dim)) b = cvxopt.matrix([0.]*dim) sol = solvers.lp(c, G, h, A, b) opt = array(sol['x']) """ opt = array(c.value) prevsol = currsol print 'Solution %s' % opt print 'Struct lh %s' % struct_lh[:numstructs] currsol = sum(array([opt[i] * struct_lh[i] for i in range(numstructs)])) numstructs += 1 numstructs -= 1 print 'Finished...' print 'Used %d of %d structures' % (numstructs, len(structures)) return structures[:numstructs], opt, currsol