def test_writeFastaFile(self):
        """Test method to test the correct writing of a fasta file."""
        if os.path.exists("testWriteFasta.fas"):
            os.remove("testWriteFasta.fas")
        sequence = [["ACGT", "ACGTAATTA"]]
        expectedReadSequence = [
            ">Alignment 0 sequence 0", "ACGT", ">Alignment 0 sequence 1",
            "ACGTAATTA"
        ]
        readInputSequence = []

        # first test case, filename with extension
        io().writeFastaFile(sequence, "testWriteFasta.fas")
        testInputFile = open("testWriteFasta.fas")
        for line in testInputFile.readlines():
            readInputSequence.append(line.strip("\n"))
        self.assertEqual(expectedReadSequence, readInputSequence)
        testInputFile.close()
        os.remove("testWriteFasta.fas")

        # second test case, filename without extension
        readInputSequence = []
        io().writeFastaFile(sequence, "testWriteFasta")
        testInputFile = open("testWriteFasta.fas")
        for line in testInputFile.readlines():
            readInputSequence.append(line.strip("\n"))
        self.assertEqual(expectedReadSequence, readInputSequence)
        testInputFile.close()
        os.remove("testWriteFasta.fas")
    def test_writeFastaFile(self):
        """Test method to test the correct writing of a fasta file."""
        if os.path.exists("testWriteFasta.fas"):
            os.remove("testWriteFasta.fas")
        sequence = [["ACGT", "ACGTAATTA"]]
        expectedReadSequence = [">Alignment 0 sequence 0", "ACGT", ">Alignment 0 sequence 1", "ACGTAATTA"]
        readInputSequence = []

        # first test case, filename with extension
        io().writeFastaFile(sequence, "testWriteFasta.fas")     
        testInputFile = open("testWriteFasta.fas")
        for line in testInputFile.readlines():
            readInputSequence.append(line.strip("\n"))
        self.assertEqual(expectedReadSequence, readInputSequence)
        testInputFile.close()
        os.remove("testWriteFasta.fas")

        # second test case, filename without extension
        readInputSequence = []
        io().writeFastaFile(sequence, "testWriteFasta")
        testInputFile = open("testWriteFasta.fas")
        for line in testInputFile.readlines():
            readInputSequence.append(line.strip("\n"))
        self.assertEqual(expectedReadSequence, readInputSequence)
        testInputFile.close()
        os.remove("testWriteFasta.fas")
Esempio n. 3
0
def nussinov(sequence, outputFile):
    """Executes the RNA-folding algorithm from Nussinov.
        sequence:   The RNA-sequnce as a list.
        outputFile: The name of the output file."""
    print "\nThe following sequence is given:"
    print sequence[0]
    print "\n"
    nussinov = Nussinov(sequence[0])
    nussinov.execute()
    print "\nDot-bracket: "
    io().writeRnaDotBracketNotation(sequence[0], nussinov.pairedBases, outputFile)
    print "The result was also written to: ", os.path.abspath(outputFile)
Esempio n. 4
0
def needlemanWunschN3(sequences, weightFunction="weightFunctionDifference", outputFile="nw3.fas"):
    """Executes the Needleman-Wunsch algorithm with three sequences"""
    print "\nThe following sequences are given:"
    for i in sequences:
        print i
    print "\nComputing solution...\n\n"
    nw3 = NW3(sequences[0], sequences[1], sequences[2], weightFunction)
    result = nw3.execute()

    io().writeFastaFile(result, outputFile)
    print "\nScore: ", nw3.computation_matrix[-1][-1][-1]
    print "Number of optimal solutions: ", len(result)
    print "\nOne solution is:\n", result[0][0], "\n", result[0][1], "\n", result[0][2]
    print "\nFor more solutions look in the file \"nw3.fas\" in the bin directory.\n"
Esempio n. 5
0
def needlemanWunsch(sequences, scoreFunction, outputFile, numberOfSolutions):
    """Executes the Needleman-Wunsch algorithm with a default score function defined as: a == b -> 0 and a !=b --> 1.\n
    Stores the alignments per default in file needlemanWunsch.fas.
    To change the score function define a function in class PairwiseAligmentHelper and define the name as an input paramter.
        scoreFunction:      The name of the weigh function which is defined in class PairwiseAligmentHelper.
        outputFile:         The path to the output file.
        numberOfSolutions:  Maximal number of optimal solutions which should be computed."""
    print "\nThe following sequences are given:"
    for i in sequences:
        print i
    print "\nComputing solution...\n\n"
    result = nw().compute(sequences, scoreFunction, int(numberOfSolutions), scoringValue=True)
    print "\nScore: ", result[1]
    print "Number of optimal solutions: ", len(result[0])
    print "\nOne solution is:\n", result[0][0][0], "\n", result[0][0][1]
    print "\nFor more solutions look in the file \"needlemanWunsch.fas\" in the bin directory.\n"
    io().writeFastaFile(result[0], outputFile)
    def test_readFastaFile(self):
        """Test method to test the correct reading of a fasta file."""
        if os.path.exists("testReadFasta.fas"):
            os.remove("testReadFasta.fas")

        # first test case: two sequences
        sequenceToWrite = [["ACGT", "ACGTAATTA"]]
        expectedSequence = ["ACGT", "ACGTAATTA"]
        io().writeFastaFile(sequenceToWrite, "testReadFasta.fas")
        readSequence = io().readFastaFile("testReadFasta.fas",
                                          multipleSequenceAlignment=False)
        self.assertEqual(expectedSequence, readSequence)

        # second test case: two sequences but there are multilpe ones
        sequenceToWrite = [["ACGT", "ACGTAATTA", "AGTTG"]]
        expectedSequence = ["ACGT", "ACGTAATTA", "AGTTG"]
        io().writeFastaFile(sequenceToWrite, "testReadFasta.fas")
        readSequence = io().readFastaFile("testReadFasta.fas",
                                          multipleSequenceAlignment=False)
        self.assertNotEqual(expectedSequence, readSequence)

        # third test case: multiple sequences
        readSequence = io().readFastaFile("testReadFasta.fas",
                                          multipleSequenceAlignment=True)
        self.assertEqual(expectedSequence, readSequence)

        os.remove("testReadFasta.fas")
Esempio n. 7
0
def fengDoolittle(sequences, weightFunction, similarityScore, outputFile):
    """Executes the heuristic multiple sequence alignment by Feng and Doolittle.
        sequences:          All input sequnces to align.
        weightFunction:     The weight function defined in class PairwiseAlignmentHelper for the Needleman-Wunsch algorithm to compute the optimal local alignment.
        similarityScore:    Name of a similarity score defined in class PairwiseAligmentHelper.
        outputFile:         The output file name."""
    fd = FengDoolittle(sequences, weightFunction, similarityScore)
    alignmentDict = fd.computeMultipleAlignment()
    alignment = [[]]
    for i in alignmentDict:
        alignment[0].append(alignmentDict[i])
    io().writeFastaFile(alignment, outputFile)
    print "Input sequences:\n"
    for i in sequences:
        print i
    print "\nAlignment:"
    for i in alignmentDict:
        print alignmentDict[i]
    print sumOfPairs(alignment[0], weightFunction)
Esempio n. 8
0
def gotoh(sequences, scoreFunction="weightFunctionDifference", costFunction="gapCost", outputFile="gotoh.fas"):
    """Executes the Gotoh algorithm with a default score function defined as: a == b -> 0 and a !=b --> 1 and a cost function defined as: g(x) = 2 + k.\n
    Stores the alignments per default in file gotoh.fas.
    To change the score or cost function define a function in class PairwiseAligmentHelper and define the name as an input paramter.
        scoreFunction:  The name of the weigh function which is defined in class PairwiseAligmentHelper.
        costFunction:   The name of the gap cost function which is defined in class PairwiseAligmentHelper.
        outputFile:     The path to the output file.
        """
    print "The following sequences are given:"
    for i in sequences:
        print i
    print "Computing solution..."
    gotoh = Gotoh(sequences[0], sequences[1], scoreFunction, costFunction)
    result = gotoh.compute()
    io().writeFastaFile(result, outputFile)
    print "Number of solutions: ", len(result)
    print "Score:", max(gotoh.computationMatrix[0][-1][-1], max(gotoh.computationMatrix[1][-1][-1], gotoh.computationMatrix[2][-1][-1]))
    print "One solution is:\n", result[0][0], "\n", result[0][1]
    print "For more solutions look in the file \"gotoh.fas\" in the bin directory."
Esempio n. 9
0
def upgmaWpgma(upgmaWpgma, sequences, outputFile, fileFormat):
    """Executes the a phylogenetic clustering with a upgm or wpgm weighting.
        sequences:  All defined input sequences as a list.
        outputFile: The name of the output file
        fileFormat: The file format of the output file"""
    #create
    print "The following sequences are given:"
    for i in sequences:
        print i
    print "Computing clustering..."
    data = mah().createDataForUpgmaWpgma(sequences)
    if upgmaWpgma:
        upgma = UpgmaWpgma(data[0], len(data[1]))
        upgma.compute_clustering()
        if not fileFormat:
            outputFile += ".graphML"
            io().writeGraphMLFile(upgma.mapping, outputFile)
            print "Clustering written as graphML file: ", os.path.abspath(outputFile)
        else:
            outputFile += ".newickTree"
            cluster = upgma.get_newick_tree(with_edge_weights=True)
            io().writeNewickTree(cluster, outputFile)
            print "Computed upgma cluster: ", cluster
            print "The clustering was also written to: ", os.path.abspath(outputFile)
    else:
        wpgma = UpgmaWpgma(data[0], len(data[1]), False, data[2])
        wpgma.compute_clustering()
        if not fileFormat:
            outputFile += ".graphML"
            io().writeGraphMLFile(wpgma.mapping, outputFile)
            print "Clustering written as graphML file: ", os.path.abspath(outputFile)
        else:
            outputFile += ".newickTree"
            cluster = wpgma.get_newick_tree(with_edge_weights=True)
            io().writeNewickTree(cluster, outputFile)
            print "Computed wpgma cluster: ", cluster
            print "The clustering was also written to: ", os.path.abspath(outputFile)
    def test_readFastaFile(self):
        """Test method to test the correct reading of a fasta file."""
        if os.path.exists("testReadFasta.fas"):
            os.remove("testReadFasta.fas")

        # first test case: two sequences
        sequenceToWrite = [["ACGT", "ACGTAATTA"]]
        expectedSequence = ["ACGT", "ACGTAATTA"]
        io().writeFastaFile(sequenceToWrite, "testReadFasta.fas")
        readSequence = io().readFastaFile("testReadFasta.fas", multipleSequenceAlignment=False)
        self.assertEqual(expectedSequence, readSequence)

        # second test case: two sequences but there are multilpe ones
        sequenceToWrite = [["ACGT", "ACGTAATTA", "AGTTG"]]
        expectedSequence = ["ACGT", "ACGTAATTA", "AGTTG"]
        io().writeFastaFile(sequenceToWrite, "testReadFasta.fas")
        readSequence = io().readFastaFile("testReadFasta.fas", multipleSequenceAlignment=False)
        self.assertNotEqual(expectedSequence, readSequence)

        # third test case: multiple sequences
        readSequence = io().readFastaFile("testReadFasta.fas", multipleSequenceAlignment=True)
        self.assertEqual(expectedSequence, readSequence)

        os.remove("testReadFasta.fas")
Esempio n. 11
0
def getSequencesFromFile(inputFile):
    """Parse the input file to get the sequences. Returns the sequences as an array.
            inputFile:  A fasta format file with the input sequences."""
    sequences = io().readFastaFile(inputFile)
    return sequences