def test_should_print_class_name(self): # setup sumofpairs = SumOfPairs().get_name() star = Star().get_name() entropy = Entropy().get_name() # check self.assertEqual("SumOfPairs", sumofpairs) self.assertEqual("Star", star) self.assertEqual("Entropy", entropy)
def test_get_score_of_column_with_only_gaps(self): # setup column = MSA(['-', '-', '-']) # results result = SumOfPairs(column).get_column_score(0) expected = 3 # check self.assertEqual(expected, result)
def test_only_gaps_with_BLOSUM62(self): # setup sequences = MSA(['---', '---']) # results result = SumOfPairs(sequences, Blosum62()).compute() expected = 3 # check self.assertEqual(expected, result)
def test_basic_score_with_gaps_BLOSUM62(self): # setup sequences = MSA(['FA', 'A-']) # results result = SumOfPairs(sequences, Blosum62()).compute() expected = -10 # check self.assertEqual(expected, result)
def test_basic_score_of_12_with_PAM250(self): # setup sequences = MSA(['AA', 'AA', 'AA']) # results result = SumOfPairs(sequences, PAM250()).compute() expected = 12 # check self.assertEqual(expected, result)
def run_all_scores(msa: list) -> None: align_sequences = list(pair[1] for pair in msa) sequences_id = list(pair[0] for pair in msa) # Percentage of non-gaps and totally conserved columns non_gaps = PercentageOfNonGaps() totally_conserved_columns = PercentageOfTotallyConservedColumns() percentage = non_gaps.compute(align_sequences) conserved = totally_conserved_columns.compute(align_sequences) print("Percentage of non-gaps: {0} %".format(percentage)) print("Percentage of totally conserved columns: {0}".format(conserved)) # Entropy value = Entropy().compute(align_sequences=align_sequences) print("Entropy score: {0}".format(value)) # Sum of pairs value = SumOfPairs(Blosum62()).compute(align_sequences=align_sequences) print("SumOfPairs score (Blosum62): {0}".format(value)) value = SumOfPairs(PAM250()).compute(align_sequences=align_sequences) print("SumOfPairs score (PAM250): {0}".format(value)) value = SumOfPairs(FileMatrix('PAM380.txt')).compute(align_sequences=align_sequences) print("SumOfPairs score (PAM380): {0}".format(value)) # Star value = Star(Blosum62()).compute(align_sequences=align_sequences) print("Star score (Blosum62): {0}".format(value)) value = Star(PAM250()).compute(align_sequences=align_sequences) print("Star score (PAM250): {0}".format(value)) # STRIKE value = Strike().compute(align_sequences=align_sequences, sequences_id=sequences_id, chains=['A', 'E', 'A', 'A']) print("STRIKE score: {0}".format(value))
def test_get_score_of_an_alignment(self): # setup sequences = \ MSA(['---GKGDPKKPRGKMSSYAFFVQTSREEHKKKHPDASVNFSEFSKKCSERWKTMSAKEKGKFEDMAKADKARYEREMKTYI------PPKGE----', '------MQDRVKRPMNAFIVWSRDQRRKMALENPRMR-NS-EISKQLGYQWKMLTEAEKWPFFQEAQKLQAMHREKYPNYKYRP---RRKAKMLPK', 'MKKLKKHPDFPKKPLTPYFRFFMEKRAKYAKLHPEMS-NL-DLTKILSKKYKELPEKKKMKYIQDFQREKQEFERNLARFREDH---PDLIQNAKK', '--------MHIKKPLNAFMLYMKEMRANVVAES-TLK-ESAAINQILGRRWHALSREEQAKYYELARKERQLHMQLYPGWSARDNYGKKKKRKREK']) # results result = SumOfPairs(sequences, PAM250()).compute() expected = 24 # check self.assertEqual(expected, result)
from sequoya.util.solution import get_representative_set from sequoya.util.visualization import MSAPlot if __name__ == '__main__': # setup Dask client (web interface will be initialized at http://127.0.0.1:8787/workers) cluster = LocalCluster(n_workers=4, processes=True) client = Client(cluster) ncores = sum(client.ncores().values()) print(f'{ncores} cores available') # creates the problem problem = BAliBASE( instance='BB20019', path='../resources', score_list=[SumOfPairs(), PercentageOfTotallyConservedColumns()]) # creates the algorithm max_evaluations = 200000 reference_point = [-175000, -1.35] algorithm = DistributedNSGAII( problem=problem, population_size=100, mutation=ShiftClosedGapGroups(probability=0.3), crossover=SPXMSA(probability=0.7), termination_criterion=StoppingByEvaluations( max_evaluations=max_evaluations), dominance_comparator=GDominanceComparator(reference_point), number_of_cores=ncores,
def setUp(self): self.sumofpairs_PAM250 = SumOfPairs(PAM250()) self.sumofpairs_Blosum62 = SumOfPairs(Blosum62())
class SumOfPairsTestCases(unittest.TestCase): def setUp(self): self.sumofpairs_PAM250 = SumOfPairs(PAM250()) self.sumofpairs_Blosum62 = SumOfPairs(Blosum62()) def test_basic_score_of_12_with_PAM250(self): # setup sequences = ['AA', 'AA', 'AA'] # results result = self.sumofpairs_PAM250.compute(sequences) expected = 12 # check self.assertEqual(expected, result) def test_basic_score_of_12_with_BLOSUM62(self): # setup sequences = ['AA', 'AA', 'AA'] # results result = self.sumofpairs_Blosum62.compute(sequences) expected = 24 # check self.assertEqual(expected, result) def test_basic_score_with_gaps_BLOSUM62(self): # setup sequences = ['FA', 'A-'] # results result = self.sumofpairs_Blosum62.compute(sequences) expected = -10 # check self.assertEqual(expected, result) def test_only_gaps_with_BLOSUM62(self): # setup sequences = ['---', '---'] # results result = self.sumofpairs_Blosum62.compute(sequences) expected = 3 # check self.assertEqual(expected, result) def test_get_score_of_column_with_only_gaps(self): # setup column = ['-', '-', '-'] # results result = self.sumofpairs_Blosum62.get_score_of_k_column(column) expected = 3 # check self.assertEqual(expected, result) def test_get_score_of_an_alignment(self): # setup sequences = \ ['---GKGDPKKPRGKMSSYAFFVQTSREEHKKKHPDASVNFSEFSKKCSERWKTMSAKEKGKFEDMAKADKARYEREMKTYI------PPKGE----', '------MQDRVKRPMNAFIVWSRDQRRKMALENPRMR-NS-EISKQLGYQWKMLTEAEKWPFFQEAQKLQAMHREKYPNYKYRP---RRKAKMLPK', 'MKKLKKHPDFPKKPLTPYFRFFMEKRAKYAKLHPEMS-NL-DLTKILSKKYKELPEKKKMKYIQDFQREKQEFERNLARFREDH---PDLIQNAKK', '--------MHIKKPLNAFMLYMKEMRANVVAES-TLK-ESAAINQILGRRWHALSREEQAKYYELARKERQLHMQLYPGWSARDNYGKKKKRKREK'] # results result = self.sumofpairs_PAM250.compute(sequences) expected = 24 # check self.assertEqual(expected, result)
from sequoya.operator import SPXMSA, ShiftClosedGapGroups from sequoya.problem import BAliBASE from sequoya.util.solution import get_representative_set from sequoya.util.visualization import MSAPlot if __name__ == '__main__': # setup Dask client (web interface will be initialized at http://127.0.0.1:8787/workers) cluster = LocalCluster(processes=True) client = Client(cluster) ncores = sum(client.ncores().values()) print(f'{ncores} cores available') # creates the problem problem = BAliBASE(instance='BB50011', path='../resources', score_list=[SumOfPairs(), PercentageOfTotallyConservedColumns()]) # creates the algorithm max_evaluations = 25000 algorithm = DistributedNSGAII( problem=problem, population_size=100, mutation=ShiftClosedGapGroups(probability=0.4), crossover=SPXMSA(probability=0.7), termination_criterion=StoppingByEvaluations(max_evaluations=max_evaluations), number_of_cores=ncores, client=client ) algorithm.observable.register(observer=ProgressBarObserver(max=max_evaluations))