def test_writeFastaFile(self): """Test method to test the correct writing of a fasta file.""" if os.path.exists("testWriteFasta.fas"): os.remove("testWriteFasta.fas") sequence = [["ACGT", "ACGTAATTA"]] expectedReadSequence = [ ">Alignment 0 sequence 0", "ACGT", ">Alignment 0 sequence 1", "ACGTAATTA" ] readInputSequence = [] # first test case, filename with extension io().writeFastaFile(sequence, "testWriteFasta.fas") testInputFile = open("testWriteFasta.fas") for line in testInputFile.readlines(): readInputSequence.append(line.strip("\n")) self.assertEqual(expectedReadSequence, readInputSequence) testInputFile.close() os.remove("testWriteFasta.fas") # second test case, filename without extension readInputSequence = [] io().writeFastaFile(sequence, "testWriteFasta") testInputFile = open("testWriteFasta.fas") for line in testInputFile.readlines(): readInputSequence.append(line.strip("\n")) self.assertEqual(expectedReadSequence, readInputSequence) testInputFile.close() os.remove("testWriteFasta.fas")
def test_writeFastaFile(self): """Test method to test the correct writing of a fasta file.""" if os.path.exists("testWriteFasta.fas"): os.remove("testWriteFasta.fas") sequence = [["ACGT", "ACGTAATTA"]] expectedReadSequence = [">Alignment 0 sequence 0", "ACGT", ">Alignment 0 sequence 1", "ACGTAATTA"] readInputSequence = [] # first test case, filename with extension io().writeFastaFile(sequence, "testWriteFasta.fas") testInputFile = open("testWriteFasta.fas") for line in testInputFile.readlines(): readInputSequence.append(line.strip("\n")) self.assertEqual(expectedReadSequence, readInputSequence) testInputFile.close() os.remove("testWriteFasta.fas") # second test case, filename without extension readInputSequence = [] io().writeFastaFile(sequence, "testWriteFasta") testInputFile = open("testWriteFasta.fas") for line in testInputFile.readlines(): readInputSequence.append(line.strip("\n")) self.assertEqual(expectedReadSequence, readInputSequence) testInputFile.close() os.remove("testWriteFasta.fas")
def nussinov(sequence, outputFile): """Executes the RNA-folding algorithm from Nussinov. sequence: The RNA-sequnce as a list. outputFile: The name of the output file.""" print "\nThe following sequence is given:" print sequence[0] print "\n" nussinov = Nussinov(sequence[0]) nussinov.execute() print "\nDot-bracket: " io().writeRnaDotBracketNotation(sequence[0], nussinov.pairedBases, outputFile) print "The result was also written to: ", os.path.abspath(outputFile)
def needlemanWunschN3(sequences, weightFunction="weightFunctionDifference", outputFile="nw3.fas"): """Executes the Needleman-Wunsch algorithm with three sequences""" print "\nThe following sequences are given:" for i in sequences: print i print "\nComputing solution...\n\n" nw3 = NW3(sequences[0], sequences[1], sequences[2], weightFunction) result = nw3.execute() io().writeFastaFile(result, outputFile) print "\nScore: ", nw3.computation_matrix[-1][-1][-1] print "Number of optimal solutions: ", len(result) print "\nOne solution is:\n", result[0][0], "\n", result[0][1], "\n", result[0][2] print "\nFor more solutions look in the file \"nw3.fas\" in the bin directory.\n"
def needlemanWunsch(sequences, scoreFunction, outputFile, numberOfSolutions): """Executes the Needleman-Wunsch algorithm with a default score function defined as: a == b -> 0 and a !=b --> 1.\n Stores the alignments per default in file needlemanWunsch.fas. To change the score function define a function in class PairwiseAligmentHelper and define the name as an input paramter. scoreFunction: The name of the weigh function which is defined in class PairwiseAligmentHelper. outputFile: The path to the output file. numberOfSolutions: Maximal number of optimal solutions which should be computed.""" print "\nThe following sequences are given:" for i in sequences: print i print "\nComputing solution...\n\n" result = nw().compute(sequences, scoreFunction, int(numberOfSolutions), scoringValue=True) print "\nScore: ", result[1] print "Number of optimal solutions: ", len(result[0]) print "\nOne solution is:\n", result[0][0][0], "\n", result[0][0][1] print "\nFor more solutions look in the file \"needlemanWunsch.fas\" in the bin directory.\n" io().writeFastaFile(result[0], outputFile)
def test_readFastaFile(self): """Test method to test the correct reading of a fasta file.""" if os.path.exists("testReadFasta.fas"): os.remove("testReadFasta.fas") # first test case: two sequences sequenceToWrite = [["ACGT", "ACGTAATTA"]] expectedSequence = ["ACGT", "ACGTAATTA"] io().writeFastaFile(sequenceToWrite, "testReadFasta.fas") readSequence = io().readFastaFile("testReadFasta.fas", multipleSequenceAlignment=False) self.assertEqual(expectedSequence, readSequence) # second test case: two sequences but there are multilpe ones sequenceToWrite = [["ACGT", "ACGTAATTA", "AGTTG"]] expectedSequence = ["ACGT", "ACGTAATTA", "AGTTG"] io().writeFastaFile(sequenceToWrite, "testReadFasta.fas") readSequence = io().readFastaFile("testReadFasta.fas", multipleSequenceAlignment=False) self.assertNotEqual(expectedSequence, readSequence) # third test case: multiple sequences readSequence = io().readFastaFile("testReadFasta.fas", multipleSequenceAlignment=True) self.assertEqual(expectedSequence, readSequence) os.remove("testReadFasta.fas")
def fengDoolittle(sequences, weightFunction, similarityScore, outputFile): """Executes the heuristic multiple sequence alignment by Feng and Doolittle. sequences: All input sequnces to align. weightFunction: The weight function defined in class PairwiseAlignmentHelper for the Needleman-Wunsch algorithm to compute the optimal local alignment. similarityScore: Name of a similarity score defined in class PairwiseAligmentHelper. outputFile: The output file name.""" fd = FengDoolittle(sequences, weightFunction, similarityScore) alignmentDict = fd.computeMultipleAlignment() alignment = [[]] for i in alignmentDict: alignment[0].append(alignmentDict[i]) io().writeFastaFile(alignment, outputFile) print "Input sequences:\n" for i in sequences: print i print "\nAlignment:" for i in alignmentDict: print alignmentDict[i] print sumOfPairs(alignment[0], weightFunction)
def gotoh(sequences, scoreFunction="weightFunctionDifference", costFunction="gapCost", outputFile="gotoh.fas"): """Executes the Gotoh algorithm with a default score function defined as: a == b -> 0 and a !=b --> 1 and a cost function defined as: g(x) = 2 + k.\n Stores the alignments per default in file gotoh.fas. To change the score or cost function define a function in class PairwiseAligmentHelper and define the name as an input paramter. scoreFunction: The name of the weigh function which is defined in class PairwiseAligmentHelper. costFunction: The name of the gap cost function which is defined in class PairwiseAligmentHelper. outputFile: The path to the output file. """ print "The following sequences are given:" for i in sequences: print i print "Computing solution..." gotoh = Gotoh(sequences[0], sequences[1], scoreFunction, costFunction) result = gotoh.compute() io().writeFastaFile(result, outputFile) print "Number of solutions: ", len(result) print "Score:", max(gotoh.computationMatrix[0][-1][-1], max(gotoh.computationMatrix[1][-1][-1], gotoh.computationMatrix[2][-1][-1])) print "One solution is:\n", result[0][0], "\n", result[0][1] print "For more solutions look in the file \"gotoh.fas\" in the bin directory."
def upgmaWpgma(upgmaWpgma, sequences, outputFile, fileFormat): """Executes the a phylogenetic clustering with a upgm or wpgm weighting. sequences: All defined input sequences as a list. outputFile: The name of the output file fileFormat: The file format of the output file""" #create print "The following sequences are given:" for i in sequences: print i print "Computing clustering..." data = mah().createDataForUpgmaWpgma(sequences) if upgmaWpgma: upgma = UpgmaWpgma(data[0], len(data[1])) upgma.compute_clustering() if not fileFormat: outputFile += ".graphML" io().writeGraphMLFile(upgma.mapping, outputFile) print "Clustering written as graphML file: ", os.path.abspath(outputFile) else: outputFile += ".newickTree" cluster = upgma.get_newick_tree(with_edge_weights=True) io().writeNewickTree(cluster, outputFile) print "Computed upgma cluster: ", cluster print "The clustering was also written to: ", os.path.abspath(outputFile) else: wpgma = UpgmaWpgma(data[0], len(data[1]), False, data[2]) wpgma.compute_clustering() if not fileFormat: outputFile += ".graphML" io().writeGraphMLFile(wpgma.mapping, outputFile) print "Clustering written as graphML file: ", os.path.abspath(outputFile) else: outputFile += ".newickTree" cluster = wpgma.get_newick_tree(with_edge_weights=True) io().writeNewickTree(cluster, outputFile) print "Computed wpgma cluster: ", cluster print "The clustering was also written to: ", os.path.abspath(outputFile)
def getSequencesFromFile(inputFile): """Parse the input file to get the sequences. Returns the sequences as an array. inputFile: A fasta format file with the input sequences.""" sequences = io().readFastaFile(inputFile) return sequences