def test_init(self):
     """Initialization functions as expected with varied input types
     """
     SequenceCollection(self.seqs1)
     SequenceCollection(self.seqs2)
     SequenceCollection(self.seqs3)
     SequenceCollection([])
 def test_init_validate(self):
     """initialization with validation functions as expected
     """
     SequenceCollection(self.seqs1, validate=True)
     SequenceCollection(self.seqs1, validate=True)
     # can't validate self.seqs2 as a DNASequence
     self.assertRaises(SequenceCollectionError,
                       SequenceCollection,
                       self.invalid_s1,
                       validate=True)
Exemple #3
0
    def __call__(self,
                 seq_path,
                 result_path=None,
                 log_path=None,
                 failure_path=None):
        # load candidate sequences
        seq_file = open(seq_path, 'U')
        candidate_sequences = parse_fasta(seq_file)

        # load template sequences
        template_alignment = []
        template_alignment_fp = self.Params['template_filepath']
        for seq_id, seq in parse_fasta(open(template_alignment_fp)):
            # replace '.' characters with '-' characters
            template_alignment.append((seq_id, seq.replace('.', '-').upper()))
        template_alignment = Alignment.from_fasta_records(template_alignment,
                                                          DNASequence,
                                                          validate=True)

        # initialize_logger
        logger = NastLogger(log_path)

        # get function for pairwise alignment method
        pairwise_alignment_f = pairwise_alignment_methods[
            self.Params['pairwise_alignment_method']]

        pynast_aligned, pynast_failed = pynast_seqs(
            candidate_sequences,
            template_alignment,
            min_pct=self.Params['min_pct'],
            min_len=self.Params['min_len'],
            align_unaligned_seqs_f=pairwise_alignment_f,
            logger=logger,
            temp_dir=get_qiime_temp_dir())

        logger.record(str(self))

        for i, seq in enumerate(pynast_failed):
            skb_seq = DNASequence(str(seq), id=seq.Name)
            pynast_failed[i] = skb_seq
        pynast_failed = SequenceCollection(pynast_failed)

        for i, seq in enumerate(pynast_aligned):
            skb_seq = DNASequence(str(seq), id=seq.Name)
            pynast_aligned[i] = skb_seq
        pynast_aligned = Alignment(pynast_aligned)

        if failure_path is not None:
            fail_file = open(failure_path, 'w')
            fail_file.write(pynast_failed.to_fasta())
            fail_file.close()

        if result_path is not None:
            result_file = open(result_path, 'w')
            result_file.write(pynast_aligned.to_fasta())
            result_file.close()
            return None
        else:
            return pynast_aligned
    def test_ne(self):
        """inequality operator functions as expected
        """
        self.assertFalse(self.s1 != self.s1)
        self.assertTrue(self.s1 != self.s2)

        # SequenceCollections with different number of sequences are not equal
        self.assertTrue(self.s1 != SequenceCollection([self.d1]))

        class FakeSequenceCollection(SequenceCollection):
            pass

        # SequenceCollections of different types are not equal
        self.assertTrue(self.s1 != FakeSequenceCollection([self.d1, self.d2]))
        self.assertTrue(self.s1 != Alignment([self.d1, self.d2]))

        # SequenceCollections with different sequences are not equal
        self.assertTrue(self.s1 != SequenceCollection([self.d1, self.r1]))
    def setUp(self):
        """Initialize values to be used in tests
        """
        self.d1 = DNASequence('GATTACA', id="d1")
        self.d2 = DNASequence('TTG', id="d2")
        self.d1_lower = DNASequence('gattaca', id="d1")
        self.d2_lower = DNASequence('ttg', id="d2")
        self.r1 = RNASequence('GAUUACA', id="r1")
        self.r2 = RNASequence('UUG', id="r2")
        self.r3 = RNASequence('U-----UGCC--', id="r3")

        self.i1 = DNASequence('GATXACA', id="i1")

        self.seqs1 = [self.d1, self.d2]
        self.seqs1_lower = [self.d1_lower, self.d2_lower]
        self.seqs2 = [self.r1, self.r2, self.r3]
        self.seqs3 = self.seqs1 + self.seqs2

        self.seqs1_t = [('d1', 'GATTACA'), ('d2', 'TTG')]
        self.seqs2_t = [('r1', 'GAUUACA'), ('r2', 'UUG'),
                        ('r3', 'U-----UGCC--')]
        self.seqs3_t = self.seqs1_t + self.seqs2_t

        self.s1 = SequenceCollection(self.seqs1)
        self.s1_lower = SequenceCollection(self.seqs1_lower)
        self.s2 = SequenceCollection(self.seqs2)
        self.s3 = SequenceCollection(self.seqs3)
        self.empty = SequenceCollection([])

        self.invalid_s1 = SequenceCollection([self.i1])