Example #1
0
def precalculate_structures(entry):
    try:
        constructs = ConstructSection.objects.filter(entry=entry)
        for c in constructs:
            datas = DataSection.objects.filter(construct_section=c)
            for d in datas:
                bonuses = correct_rx_bonus(d, c)
                for i, b in enumerate(bonuses):
                    if b == -1.0:
                        bonuses[i] = -999
                m = mapping.MappingData(data=bonuses)
                structs = secondary_structure.fold(c.sequence, mapping_data=m)
                if len(structs) == 0:
                    # No non-trivial structures found, try with data normalization
                    m = mapping.MappingData(data=bonuses, norm=True)
                    structs = secondary_structure.fold(c.sequence,
                                                       mapping_data=m)
                    if len(structs) == 0:
                        d.structure = 'NA'
                    else:
                        d.structure = structs[0].dbn
                else:
                    d.structure = structs[0].dbn
                d.save()
    except ConstructSection.DoesNotExist:
        print 'FATAL! There are no constructs for entry %s' % entry.rmdb_id
Example #2
0
def by_nbs_addition(sequence, data, database='default.db.dists', use_data=False, nstructs=20):
    db = pickle.load(open('%s/models/%s' % (settings.MAPPING_DATABASE_PATH, database)))
    if use_data:
	structures = secondary_structure.fold(sequence, mapping_data=data, nstructs=nstructs)
    else:
	structures = secondary_structure.fold(sequence, nstructs=nstructs)
    if nstructs > len(structures):
        print 'WARNING: Found %d non trivial structures in the ensemble, not intended %d' % (len(structures), nstructs) 
    struct_probs = []
    struct_lh = []
    for struct in structures:
	probs, lh = struct.likelihood(data, db_obj=db)  
	struct_probs.append(probs)
	struct_lh.append(-log(lh))
    currsol = 0.
    prevsol = float('-Inf')
    numstructs = 1
    while  numstructs <= len(structures):
	"""
	 Solve
	    minimize -sum( ci*log(p(Mi|D)))  over models Mi, given data D, for c
	    0 <= c <= 1 for all i

	    p(Mi|D) are given in struct_lh
	"""
	dim = numstructs
	p = cvxopt.matrix(struct_lh[:dim])
	c = variable(dim, 'c')
	C1 = (c <= 1)
	C2 = (c >= 0)
	C3 = (sum(c) == 1)
	lp = op(min(p.trans()*c), [C1,C2,C3])
	lp.solve()
	print 'Status is %s' % lp.status
	"""
	G = cvxopt.spmatrix(1., range(dim), range(dim))
	h = cvxopt.matrix([1.]*dim)
	A = cvxopt.spmatrix(-1., range(dim), range(dim))
	b = cvxopt.matrix([0.]*dim)
	sol = solvers.lp(c, G, h, A, b)
	opt = array(sol['x'])
	"""
	opt = array(c.value)
	prevsol = currsol
        print 'Solution %s' % opt
        print 'Struct lh %s' % struct_lh[:numstructs]
	currsol = sum(array([opt[i]*struct_lh[i] for i in range(numstructs)]))
	numstructs += 1
    numstructs -= 1
    print 'Finished...'
    print 'Used %d of %d structures' % (numstructs, len(structures))
    return structures[:numstructs], opt, currsol 
Example #3
0
from matplotlib.pylab import *
from rdatkit.datahandlers import RDATFile
from rdatkit.view import VARNA
from rdatkit.secondary_structure import fold
from rdatkit.mapping import MappingData, normalize
from analysis import eigen_reactivities
import sys

rdat = RDATFile()
rdat.load(open(sys.argv[1]))
vals = array(rdat.values.values()[0])
for i in xrange(shape(vals)[0]):
    vals[i, :] = normalize(vals[i, :])
eigenrs = eigen_reactivities(vals)

matshow(vals)
#mshow(vals, cmap=get_cmap('Greys'), vmin=0, vmax=vals.mean(), aspect='auto', interpolation='nearest')
matshow(eigenrs)
#imshow(eigenrs, cmap=get_cmap('Greys'), vmin=eigenrs.min(), vmax=eigenrs.mean(), aspect='auto', interpolation='nearest')
show()
construct = rdat.constructs.values()[0]
for i, e in enumerate(eigenrs[:35]):
    sequence = construct.sequence
    md = MappingData(
        data=e, seqpos=[s - construct.offset - 1 for s in construct.seqpos])
    print fold(sequence, mapping_data=md)
    structure = fold(sequence, mapping_data=md)[0].dbn
    VARNA.cmd(sequence, structure, 'test_results/eigen_struct%s.png' % i)
Example #4
0
from rdatkit.view import VARNA
from rdatkit.secondary_structure import fold
from rdatkit.mapping import MappingData, normalize
from analysis import eigen_reactivities
import sys

rdat = RDATFile()
rdat.load(open(sys.argv[1]))
vals = array(rdat.values.values()[0])
for i in xrange(shape(vals)[0]):
    vals[i,:] = normalize(vals[i,:])
eigenrs = eigen_reactivities(vals)

matshow(vals)
#mshow(vals, cmap=get_cmap('Greys'), vmin=0, vmax=vals.mean(), aspect='auto', interpolation='nearest')
matshow(eigenrs)
#imshow(eigenrs, cmap=get_cmap('Greys'), vmin=eigenrs.min(), vmax=eigenrs.mean(), aspect='auto', interpolation='nearest')
show()
construct = rdat.constructs.values()[0]
for i, e in enumerate(eigenrs[:35]):
    sequence = construct.sequence
    md = MappingData(data=e, seqpos=[s - construct.offset - 1 for s in construct.seqpos])
    print fold(sequence, mapping_data=md)
    structure = fold(sequence, mapping_data=md)[0].dbn
    VARNA.cmd(sequence, structure, 'test_results/eigen_struct%s.png' % i)





Example #5
0
def predict_run_2D(request, sequences, titles, structures, other_options,
                   messages):
    slope = float(request.POST['slope_2d'])
    intercept = float(request.POST['intercept_2d'])
    bonus_options = ' -xs %s -xo %s ' % (slope, intercept)

    seq = sequences[0]
    sequences = [seq]
    titles = [titles[0] + ':2D bonuses']
    if len(structures) > 0:
        structures = [structures[0]]

    data = zeros([len(seq), len(seq)])
    rows = request.POST['bonuses_2d'].split('\n')
    if len(rows) != len(seq):
        messages.append(
            'ERROR: 2D BONUS and (first) SEQUENCE size mismatch. No BONUS applied.'
        )
    else:
        for i, row in enumerate(rows):
            items = row.split()
            if len(items) != len(seq):
                messages.append(
                    'ERROR: 2D BONUS and (first) SEQUENCE size mismatch. No BONUS applied.'
                )
                data = zeros([len(seq), len(seq)])
                break
            for j, item in enumerate(items):
                data[i, j] = float(item)

    if 'applyzscores' in request.POST:
        bins = [i for i in range(data.shape[0]) if len(data[i, :] != 0) > 0]
        data = quick_norm(data, bins=bins[10:-10])
        zdata = zscores_by_row(data, slope, intercept)
        means = array(
            [data[i, data[i, :] != 0].mean() for i in range(data.shape[0])])
        zdata[means > 0.2, :] = 0
        zdata[zdata < 0] = 0
        zdata = -abs(zdata)
    else:
        zdata = data
    zdata = zdata.T

    base_annotations = []
    predstructs = secondary_structure.fold(seq.sequence,
                                           mapping_data=zdata,
                                           fold_opts=bonus_options +
                                           other_options,
                                           bonus2d=True)
    if predstructs:
        struct = predstructs[0]
    else:
        struct = secondary_structure.SecondaryStructure(dbn='.' * len(seq))
    if request.POST['nbootstraps']:
        ba = bootstrap_annotations(seq, zdata,
                                   int(request.POST['nbootstraps']),
                                   bonus_options + other_options, True)
        base_annotations.append(ba)
    structures.append(struct)

    return (sequences, structures, messages, base_annotations)
Example #6
0
def predict_run_1D_NN(request, sequences, mapping_data, structures,
                      other_options, messages):
    is_1D = (request.POST['predtype'] == '1D')
    is_apply_bonus = is_1D
    slope = float(request.POST['slope_1d'])
    intercept = float(request.POST['intercept_1d'])
    bonus_options = ' -sm %s -si %s ' % (slope, intercept)

    if is_1D:
        modtype = request.POST['modtype']
        parsed_data = []
        parsed_seqpos = []
        try:
            for line in request.POST['bonuses_1d'].split('\n'):
                if len(line.strip()) == 0:
                    continue
                if line[0] == '#':
                    if len(parsed_data) > 0:
                        mapping_data.append(
                            mapping.MappingData(data=parsed_data,
                                                seqpos=parsed_seqpos))
                    parsed_data = []
                    parsed_seqpos = []
                else:
                    items = line.split()
                    if len(items) > 2:
                        raise ValueError('Invalid input')
                    parsed_seqpos.append(int(items[0]) - 1)
                    if 'raw_bonuses' in request.POST:
                        term = exp((float(items[-1]) - intercept) / slope) - 1
                        parsed_data.append(term)
                    else:
                        parsed_data.append(float(items[-1]))
            mapping_data.append(
                mapping.MappingData(data=parsed_data, seqpos=parsed_seqpos))
        except Exception:
            messages.append(
                'ERROR: Invalid bonus input format. No BONUS applied.')
            is_apply_bonus = False

    numitems = min(len(mapping_data), len(sequences))
    if numitems != len(mapping_data) and is_apply_bonus:
        messages.append(
            'WARNING: SEQUENCE (more) and BONUS number mismatch. Only SEQUENCE with available BONUS were used.'
        )
        mapping_data = mapping_data[:numitems]
    if numitems != len(sequences) and is_apply_bonus:
        messages.append(
            'WARNING: SEQUENCE and BONUS (more) number mismatch. Only BONUS with available SEQUENCE were used.'
        )
        sequences = sequences[:numitems]

    base_annotations = []

    for i, s in enumerate(sequences):
        if is_apply_bonus:
            predstructs = secondary_structure.fold(
                s.sequence,
                modifier=modtype,
                mapping_data=mapping_data[i],
                fold_opts=bonus_options + other_options)
        else:
            predstructs = secondary_structure.fold(s.sequence)
        if predstructs:
            struct = predstructs[0]
        else:
            struct = secondary_structure.SecondaryStructure(dbn='.' * len(s))
        structures.append(struct)

        if ('nbootstraps' in request.POST) and is_apply_bonus:
            if not request.POST['nbootstraps']:
                nbootstraps = 100
                messages.append(
                    'WARNING: invalid BOOTSTRAP number. Used default 100 instead.'
                )
            else:
                nbootstraps = int(request.POST['nbootstraps'])
            ba = bootstrap_annotations(s, mapping_data[i], nbootstraps,
                                       other_options, False)
            base_annotations.append(ba)

    return (base_annotations, structures, mapping_data, messages)
Example #7
0
def by_nbs_addition(sequence,
                    data,
                    database='default.db.dists',
                    use_data=False,
                    nstructs=20):
    db = pickle.load(
        open('%s/models/%s' % (settings.MAPPING_DATABASE_PATH, database)))
    if use_data:
        structures = secondary_structure.fold(sequence,
                                              mapping_data=data,
                                              nstructs=nstructs)
    else:
        structures = secondary_structure.fold(sequence, nstructs=nstructs)
    if nstructs > len(structures):
        print 'WARNING: Found %d non trivial structures in the ensemble, not intended %d' % (
            len(structures), nstructs)
    struct_probs = []
    struct_lh = []
    for struct in structures:
        probs, lh = struct.likelihood(data, db_obj=db)
        struct_probs.append(probs)
        struct_lh.append(-log(lh))
    currsol = 0.
    prevsol = float('-Inf')
    numstructs = 1
    while numstructs <= len(structures):
        """
	 Solve
	    minimize -sum( ci*log(p(Mi|D)))  over models Mi, given data D, for c
	    0 <= c <= 1 for all i

	    p(Mi|D) are given in struct_lh
	"""
        dim = numstructs
        p = cvxopt.matrix(struct_lh[:dim])
        c = variable(dim, 'c')
        C1 = (c <= 1)
        C2 = (c >= 0)
        C3 = (sum(c) == 1)
        lp = op(min(p.trans() * c), [C1, C2, C3])
        lp.solve()
        print 'Status is %s' % lp.status
        """
	G = cvxopt.spmatrix(1., range(dim), range(dim))
	h = cvxopt.matrix([1.]*dim)
	A = cvxopt.spmatrix(-1., range(dim), range(dim))
	b = cvxopt.matrix([0.]*dim)
	sol = solvers.lp(c, G, h, A, b)
	opt = array(sol['x'])
	"""
        opt = array(c.value)
        prevsol = currsol
        print 'Solution %s' % opt
        print 'Struct lh %s' % struct_lh[:numstructs]
        currsol = sum(array([opt[i] * struct_lh[i]
                             for i in range(numstructs)]))
        numstructs += 1
    numstructs -= 1
    print 'Finished...'
    print 'Used %d of %d structures' % (numstructs, len(structures))
    return structures[:numstructs], opt, currsol