def __init__(self, badStrs=None, trueStrs=None, falseStrs=None): if badStrs is None: self.badStrs = set() else: self.badStrs = set( [s.lower() for s in SeqUtil.asCollection(badStrs)]) if trueStrs is None: self.trueStrs = set( [v.lower() for v in _TrueValues if hasattr(v, "lower")]) self.trueStrs -= self.badStrs else: self.trueStrs = set( [s.lower() for s in SeqUtil.asCollection(trueStrs)]) if self.trueStrs & self.badStrs: raise ValueError( "One or more bad values and true values overlap") if falseStrs is None: self.falseStrs = set( [v.lower() for v in _FalseValues if hasattr(v, "lower")]) self.falseStrs -= self.badStrs else: self.falseStrs = set( [s.lower() for s in SeqUtil.asCollection(falseStrs)]) if self.falseStrs & self.badStrs: raise ValueError( "One or more bad values and true values overlap") if self.trueStrs & self.falseStrs: raise ValueError("One or more true and false values overlap")
def test_findlonglen_returns_longest_length(self): test_dict = { 'length1': 15, 'length10': 150, 'length100': 1, } self.assertEqual(len('length100'), SeqUtil.findlonglen(test_dict))
def __init__(self, badStrs=None, trueStrs=None, falseStrs=None): if badStrs is None: self.badStrs = set() else: self.badStrs = set([s.lower() for s in SeqUtil.asCollection(badStrs)]) if trueStrs is None: self.trueStrs = set([v.lower() for v in _TrueValues if hasattr(v, "lower")]) self.trueStrs -= self.badStrs else: self.trueStrs = set([s.lower() for s in SeqUtil.asCollection(trueStrs)]) if self.trueStrs & self.badStrs: raise ValueError("One or more bad values and true values overlap") if falseStrs is None: self.falseStrs = set([v.lower() for v in _FalseValues if hasattr(v, "lower")]) self.falseStrs -= self.badStrs else: self.falseStrs = set([s.lower() for s in SeqUtil.asCollection(falseStrs)]) if self.falseStrs & self.badStrs: raise ValueError("One or more bad values and true values overlap") if self.trueStrs & self.falseStrs: raise ValueError("One or more true and false values overlap")
inputFile = './data/5.leaderboard_data-1.txt' #inputFile = 'C:/Users/Ashis/Downloads/dataset_102_4 (1).txt' outputFile = './results/5.leaderboard.txt' aminoAcidMassMapFile = './data/integer_mass_table.txt' # set current directory os.chdir(curDir) # read input with open(inputFile) as f: N = int(f.readline().strip()) spectrumLine = f.readline().strip() spectrum = [int(n) for n in spectrumLine.split(" ")] # load SeqUtil methods sys.path.append('code') import SeqUtil importlib.reload(SeqUtil) ### original work #### # load amion acid mass AAMassMap = SeqUtil.loadAminoAcidMass(aminoAcidMassMapFile) # generate sequence from spectrum seq = SeqUtil.leaderboardCyclopeptideSequencing(spectrum, N, AAMassMap.values()) # output with open(outputFile, "w") as f: f.writelines("-".join([str(s) for s in seq]))
inputFile = './data/3.theoretical-spectrum-data-1.txt' #inputFile = 'C:/Users/Ashis/Downloads/dataset_98_3.txt' outputFile = './results/3.theoretical-spectrum.txt' aminoAcidMassMapFile = './data/integer_mass_table.txt' # set current directory os.chdir(curDir) # read input with open(inputFile) as f: peptide = f.readline().strip() # load SeqUtil methods sys.path.append('code') import SeqUtil importlib.reload(SeqUtil) ### original work #### # load amion acid mass AAMassMap = SeqUtil.loadAminoAcidMass(aminoAcidMassMapFile) # generate spectrum spectrum = SeqUtil.cyclospectrum(peptide, AAMassMap) # output with open(outputFile, "w") as f: f.writelines(" ".join([str(s) for s in spectrum]))
def __init__(self, badStrs=("NaN", "?")): if not SeqUtil.isCollection(badStrs): self.badStrs = set([badStrs.lower()]) else: self.badStrs = set([bs.lower() for bs in badStrs])
import os, sys import SeqUtil, Report if not os.path.exists('aligns'): os.mkdir('aligns') if not os.path.exists('Bayes'): os.mkdir('Bayes') #if not os.path.exists('ML'): # os.mkdir('ML') out= sys.argv[1] query = sys.argv[2] SeqUtil.rename('Data/bac-'+out+'.fas') if not os.path.exists('aligns/bac-'+out+'.best.nex'): os.system('prank -d=Data/bac-'+out+' -o=aligns/bac-'+out+' -f=nexus -quiet') SeqUtil.bayesinNex('aligns/bac-'+out+'.best.nex') #SeqUtil.splicealign('aligns/bac-'+out+'.best.nex','Bayes/bac-'+out+'-mod.nxs') #models=SeqUtil.bestmod('Bayes/bac-'+out+'-mod.nxs') models_ori=SeqUtil.bestmod('aligns/bac-'+out+'.best.nex') if not os.path.exists('Bayes/bac-'+out+'-bayes.nxs'): SeqUtil.bayesfile('aligns/bac-'+out+'.best.nex',models_ori,'Bayes/bac-'+out+'-bayes.nxs') #SeqUtil.bayesfile('Bayes/bac-'+out+'-mod.nxs',models,'Bayes/bac-'+out+'-bayes.nxs') os.system('mb Bayes/bac-'+out+'-bayes.nxs') #SeqUtil.pamlseqnex('Bayes/bac-'+out+'-mod.nxs','ML/bac-'+out) #for mod in models.keys(): # SeqUtil.pamlinput('ML/bac-'+out,'ML/bac-'+out+'.out','ML/bac-'+out+'.ctl',{models.keys()[mod].split('+')[0]:models[models.keys()[mod]][1]}) # os.system('codeml ML/bac-'+out+'.ctl') # SeqUtil.extractMLtree('ML/bac-'+out+'.out') Report.generateReport(out,query,models_ori,'bac')
# settings curDir = 'E:/Copy/Coursera/Bioinformatics Algorithms (part-I)/MyPrograms/week2' #curDir = 'D:/Copy/Coursera/Bioinformatics Algorithms (part-I)/MyPrograms/week2' inputFile = './data/3.theoretical-spectrum-data-1.txt' #inputFile = 'C:/Users/Ashis/Downloads/dataset_98_3.txt' outputFile = './results/3.theoretical-spectrum.txt' aminoAcidMassMapFile = './data/integer_mass_table.txt' # set current directory os.chdir(curDir) # read input with open(inputFile) as f: peptide = f.readline().strip() # load SeqUtil methods sys.path.append('code') import SeqUtil importlib.reload(SeqUtil) ### original work #### # load amion acid mass AAMassMap = SeqUtil.loadAminoAcidMass(aminoAcidMassMapFile) # generate spectrum spectrum = SeqUtil.cyclospectrum(peptide, AAMassMap) # output with open(outputFile, "w") as f: f.writelines(" ".join([str(s) for s in spectrum]))
aminoAcidMassMapFile = './data/integer_mass_table.txt' # set current directory os.chdir(curDir) # read input with open(inputFile) as f: spectrumLine = f.readline().strip() spectrum = [int(n) for n in spectrumLine.split(" ")] # load SeqUtil methods sys.path.append('code') import SeqUtil importlib.reload(SeqUtil) ### original work #### # load amion acid mass AAMassMap = SeqUtil.loadAminoAcidMass(aminoAcidMassMapFile) # generate sequence from spectrum conv = SeqUtil.spectralConvolution(spectrum) conv = sorted(conv) # output with open(outputFile, "w") as f: f.writelines(" ".join([str(m) for m in conv]))
aminoAcidMassMapFile = './data/integer_mass_table.txt' # set current directory os.chdir(curDir) # read input with open(inputFile) as f: N = int(f.readline().strip()) spectrumLine = f.readline().strip() spectrum = [int(n) for n in spectrumLine.split(" ")] # load SeqUtil methods sys.path.append('code') import SeqUtil importlib.reload(SeqUtil) ### original work #### # load amion acid mass AAMassMap = SeqUtil.loadAminoAcidMass(aminoAcidMassMapFile) # generate sequence from spectrum seq = SeqUtil.leaderboardCyclopeptideSequencing(spectrum, N, AAMassMap.values()) # output with open(outputFile, "w") as f: f.writelines("-".join([str(s) for s in seq]))
# set current directory os.chdir(curDir) # read input with open(inputFile) as f: spectrumLine = f.readline().strip() spectrum = [int(n) for n in spectrumLine.split(" ")] # load SeqUtil methods sys.path.append('code') import SeqUtil importlib.reload(SeqUtil) ### original work #### # load amion acid mass AAMassMap = SeqUtil.loadAminoAcidMass(aminoAcidMassMapFile) # generate sequence from spectrum sequences = SeqUtil.cyclopeptideSequencing(spectrum, AAMassMap.values()) # output with open(outputFile, "w") as f: seqStrings = [] for seq in sequences: seqStrings.append("-".join(str(mass) for mass in seq)) f.writelines(" ".join([s for s in seqStrings]))
inputFile = './data/7.convolutional_seq_data-1.txt' #inputFile = 'C:/Users/Ashis/Downloads/dataset_104_7.txt' outputFile = './results/7.convolutional_seq.txt' # set current directory os.chdir(curDir) # read input with open(inputFile) as f: M = int(f.readline().strip()) N = int(f.readline().strip()) spectrumLine = f.readline().strip() spectrum = [int(n) for n in spectrumLine.split(" ")] # load SeqUtil methods sys.path.append('code') import SeqUtil importlib.reload(SeqUtil) ### original work #### # generate sequence from spectrum seq = SeqUtil.convolutionCyclopeptideSequencing(spectrum, M, N) # output with open(outputFile, "w") as f: f.writelines("-".join([str(s) for s in seq]))
# load SeqUtil methods sys.path.append('code') import SeqUtil importlib.reload(SeqUtil) lenTargetPeptide = len(target) lenTargetDna = lenTargetPeptide*3 foundDnas = [] # storage of target dan sequences ## search amino acids in forward direction for start in range(0,3): # loop for reading frame start curDna = dna[start:] curRna = SeqUtil.dna2rna(curDna) peptides = SeqUtil.rna2peptide(curRna, mapFile='./data/RNA_codon_table_1.txt') pepStartIndex = 0 for pep in peptides: # find positions of target in peptide targetPositions = [pos for pos in range(0, len(pep)) if pep[pos:(pos+lenTargetPeptide)]==target] # get corresponding dna sequence and save targetDnas = [curDna[(pepStartIndex+pos*3):(pepStartIndex+pos*3+lenTargetDna)] for pos in targetPositions] foundDnas.extend(targetDnas) # update pepStartIndex for the next peptide pepStartIndex = pepStartIndex + (len(pep)+1)*3
if not os.path.exists('aligns'): os.mkdir('aligns') if not os.path.exists('Bayes'): os.mkdir('Bayes') #if not os.path.exists('ML'): # os.mkdir('ML') out=sys.argv[1] query=sys.argv[2] try: paml=sys.argv[3] paml= paml=='-y' except IndexError: paml=False print "Beginning alignment" SeqUtil.rename('Data/all-'+out+'.fas') os.system('prank -d=Data/all-'+out+' -o=aligns/all-'+out+' -f=nexus -quiet') SeqUtil.bayesinNex('aligns/all-'+out+'.best.nex') #SeqUtil.splicealign('aligns/all-'+out+'.best.nex','Bayes/all-'+out+'-mod.nxs') print "Alignment complete.\nCalculating best model for tree finding" models_ori=SeqUtil.bestmod('aligns/all-'+out+'.best.nex') #models=SeqUtil.bestmod('Bayes/all-'+out+'-mod.nxs') #print models_ori, models if paml: for mod in models.keys(): SeqUtil.pamlseqnex('Bayes/all-'+out+'-mod.nxs','ML/all-'+out+mod.split('+')[0]) if models[mod][0]=='0' and models[mod][1]=='0': os.system('phyml -i ML/all-'+out+mod.split('+')[0]+' -d aa -b 100 -m '+mod.split('+')[0]+ ' -f e -s BEST -u aligns/all-'+out+'.ed.2.dnd -o tl') elif models[mod][0]=='0': os.system('phyml -i '+'ML/all-'+out+mod.split('+')[0]+' -d aa -b 100 -m '+mod.split('+')[0]+