def __init__(self, pastafile, reffile, seqfile, verbose=False): """ Constructor @param pastafile: filename for residue list @type pastafile: string @param reffile: filename for reference list (eg. BMRB ASCII file) @type reffile: string @param seqfile: filename for FASTA sequence file @type seqfile: string """ fh = FileHandler() ## list of Residue.PastaResidue objects self.residues = fh.read_pasta(pastafile, reffile) ## list of Residue.AminoAcid objects self.amino_acids = fh.read_preset(reffile) ## list of Residue.AminoAcid objects self.seq = fh.read_seq(seqfile, reffile) self.P = None ## numpy.ndarray for typing posterior probabilities self.L = None ## numpy.ndarray for linking constraints self.S = None ## numpy.ndarray for aa type in sequence self.A = None ## list of assignments and respective similarity score ## ILP STUFF self.B = None ## list of assignments and respective costs self.C = None ## cost matrix ILP self.Xs = None ## assignment matrices from solution pool, self.ILP_L = None ## Linking Matrix of ILP self.typing_elapsed = 0 self.linking_elapsed = 0 self.mapping_elapsed = 0 self.full_running_time = 0
if __name__ == '__main__': from FileHandler import FileHandler from Linking import Linking import pylab as pl fh = FileHandler() statsfile = 'shiftPresets/bmrb.shift' folder = "Datasets/Ubiquitin/" fn = folder + "residue_lists/Ub_opt_relabeled.pasta" seqfile = folder + "sequences/Ub.fasta" result_folder = folder + "Results/" tol = .6 strat = "Joint" residues = fh.read_pasta(fn, statsfile) link = Linking(residues) L = link.linking_matrix(strat, tolerance=tol, conservative=True) L2 = link.linking_matrix(strat, tolerance=tol, conservative=False) pl.matshow(L) pl.matshow(L2) pl.show() ## link.check_pos_constraints(L,residues,tol) ## link.check_neg_constraints(L,residues,tol) # # A = fh.assignment_matrix_from_assigned(fn, seqfile, statsfile).astype("i")
if __name__ == "__main__": import pylab as pl from numpy import max, mean, take, zeros from Residue import PastaResidue, AminoAcid from MaxLikelihood import Likelihood from FileHandler import FileHandler fh = FileHandler() fn = "/is/ei/jhooge/EclipseWorkspaces/PASTA/PyPASTA/GMM/src/"\ "Classification/tests/reference_lists/bmrb.shift" # pastalist = 'tests/residue_lists/all_singles.pasta' pastalist = "Datasets/Ubiquitin/residue_lists/"\ "incompleteData/Ub_bmrb_missing_shifts_0.50.pasta" statsfile = fn amino_acids = fh.read_preset(fn) residues = fh.read_pasta(pastalist, statsfile) toAA = AbstractMapping.create("on_amino_acid") toRes = AbstractMapping.create("on_residue") aas = amino_acids del aas[1] L = Likelihood() A = L.calc_likelihoods(residues, amino_acids, previous=False) pl.matshow(A) pl.colorbar() pl.show()
x[i] = ones_like(x[i]) i += 1 return x if __name__ == '__main__': from FileHandler import FileHandler from numpy import argmax, mean, max import pylab as pl fh = FileHandler() L = Likelihood() # pastafile = 'multiple_test_files/Ubiquitin/Ub_new.pasta' pastafile = 'tests/residue_lists/all_singles.pasta' statsfile = 'shiftPresets/bmrb.shift' residues = fh.read_pasta(pastafile, statsfile) amino_acids = fh.read_preset(statsfile) print ' '.join([aa.three_let for aa in amino_acids]) # r = residues[1] # print r.name # p11 = L.calc_likelihoods(residues, amino_acids, previous=False, summarize=mean) # p12 = L.calc_likelihoods(residues, amino_acids, previous=True, summarize=mean) p21 = L.calc_likelihoods(residues, amino_acids, previous=False, summarize=max) p22 = L.calc_likelihoods(residues, amino_acids, previous=True, summarize=max) print p21 print p22 # pl.matshow(p11) # pl.colorbar()
from FileHandler import FileHandler from Mapping import Mapping from Mapping2 import AbstractMapping from Definitions import three2One from MaxLikelihood import Likelihood from numpy import array,mean, max import pylab as pl fh = FileHandler() pairs = 'tests/residue_lists/all_pairs.pasta' singles = 'tests/residue_lists/all_singles.pasta' seqfile_pairs = 'tests/sequences/all_pairs.fasta' seqfile_singles = 'tests/sequences/all_singles.fasta' statsfile = 'tests/reference_lists/bmrb.shift' single_residues = fh.read_pasta(singles, statsfile) amino_acids = fh.read_seq(seqfile_singles, statsfile) atoms = {'CO' :0, 'CA' :1, 'CB' :2, 'CG' :3, 'CG1':4, 'CG2':5, 'CD' :6, 'CD1':7, 'CD2':8, 'CE' :9, 'CE1':10, 'CE2':11, 'CE3':12, 'CZ':13, 'CZ2':14, 'CZ3':15, "CH2":16} aa_names = ''.join([aa.one_let for aa in amino_acids]) # k = aa_names.index("F") # print "Residue: ",res.name_im1, res.get_carbons(previous=True) # print "Amino Acid: ",aa.three_let, aa.get_carbons() mapper1 = Mapping(aa_mapping=True)
res= get_rnd_residue(residues,i) swap_rnd_atom(res,previous) i = get_indices(residues, previous) m = float(get_no_of_ambiguous_keys(residues)) return residues if __name__ == '__main__': fh = FileHandler() folder = "Datasets/Ubiquitin/" pastafn = "residue_lists/Ub_bmrb_unassigned.pasta" pastafn2 = "residue_lists/Ub_opt_unambiguous_unassigned.pasta" presetfn = 'shiftPresets/bmrb.shift' seqfn = folder + "sequences/Ub_bmrb.fasta" result_folder = folder + "Results/" residues = fh.read_pasta(folder + pastafn, presetfn) residues2 = fh.read_pasta(folder + pastafn2, presetfn) ## ADD NOISE # noise = linspace(0,3,31) # for n in noise: # print residues[0].shifts_i.values() # new_res = add_noise(deepcopy(residues), n) # outfolder = folder + "UbqBaxNoise=%.1f.pasta" %n # fh.write_pasta(outfolder, new_res) # print outfolder, "written" ## INTRODUCE AMBIGUOUS SHITS # old_res = deepcopy(residues) # for p in arange(.0, .55, .05): # new_res = make_ambiguous(old_res,p)