Example #1
0
    def dipeptide_composition(self, num_segments, feature_ids=False):

        alph = sequtil.aa_unambiguous_alph

        if not(feature_ids):

            seq = self.protein_sequence

            if(num_segments == 1):
                return sequtil.diletter_composition(seq, alph, 1)
            else:
                seqs = sequtil.segment(seq, num_segments)
                return numpy.concatenate(
                    [sequtil.diletter_composition(s, alph, 1) for s in seqs])

        else:

            pairs = sequtil.ordered_alph_pairs(alph)

            feat_ids = []
            feat_names = []

            for si in xrange(1, num_segments + 1):
                for p in pairs:
                    feat_ids.append('%s%i' % (p, si))
                    feat_names.append('%s, segment %i' % (p, si))

            return (feat_ids, feat_names)
Example #2
0
    def sa_composition(self, num_segments, feature_ids=False):

        alph = sequtil.sa_alph

        if not(feature_ids):

            seq = self.sa_sequence

            if(num_segments == 1):
                return sequtil.letter_composition(seq, alph)
            else:
                seqs = sequtil.segment(seq, num_segments)
                return numpy.concatenate(
                    [sequtil.letter_composition(s, alph) for s in seqs])
        else:

            feat_ids = []
            feat_names = []

            for si in xrange(1, num_segments + 1):
                for sa_id, sa_name in zip(alph, sequtil.sa_name):
                    feat_ids.append('%s%i' % (sa_id, si))
                    feat_names.append('%s, segment %i' % (sa_name, si))

            return (feat_ids, feat_names)
Example #3
0
    def amino_acid_composition(self, num_segments, feature_ids=False):

        alph = sequtil.aa_unambiguous_alph

        if not(feature_ids):

            seq = self.protein_sequence

            if(num_segments == 1):
                return sequtil.letter_composition(seq, alph)
            else:
                seqs = sequtil.segment(seq, num_segments)
                return numpy.concatenate(
                    [sequtil.letter_composition(s, alph) for s in seqs])
        else:

            feat_ids = []
            feat_names = []

            for si in xrange(1, num_segments + 1):
                for aa in alph:
                    feat_ids.append('%s%i' % (aa, si))
                    feat_names.append('%s, segment %i' % (aa, si))

            return (feat_ids, feat_names)
Example #4
0
    def sa_composition(self, num_segments, feature_ids=False):

        alph = sequtil.sa_alph

        if not (feature_ids):

            seq = self.sa_sequence

            if (num_segments == 1):
                return sequtil.letter_composition(seq, alph)
            else:
                seqs = sequtil.segment(seq, num_segments)
                return numpy.concatenate(
                    [sequtil.letter_composition(s, alph) for s in seqs])
        else:

            feat_ids = []
            feat_names = []

            for si in xrange(1, num_segments + 1):
                for sa_id, sa_name in zip(alph, sequtil.sa_name):
                    feat_ids.append('%s%i' % (sa_id, si))
                    feat_names.append('%s, segment %i' % (sa_name, si))

            return (feat_ids, feat_names)
Example #5
0
    def dipeptide_composition(self, num_segments, feature_ids=False):

        alph = sequtil.aa_unambiguous_alph

        if not (feature_ids):

            seq = self.protein_sequence

            if (num_segments == 1):
                return sequtil.diletter_composition(seq, alph, 1)
            else:
                seqs = sequtil.segment(seq, num_segments)
                return numpy.concatenate(
                    [sequtil.diletter_composition(s, alph, 1) for s in seqs])

        else:

            pairs = sequtil.ordered_alph_pairs(alph)

            feat_ids = []
            feat_names = []

            for si in xrange(1, num_segments + 1):
                for p in pairs:
                    feat_ids.append('%s%i' % (p, si))
                    feat_names.append('%s, segment %i' % (p, si))

            return (feat_ids, feat_names)
Example #6
0
    def amino_acid_composition(self, num_segments, feature_ids=False):

        alph = sequtil.aa_unambiguous_alph

        if not (feature_ids):

            seq = self.protein_sequence

            if (num_segments == 1):
                return sequtil.letter_composition(seq, alph)
            else:
                seqs = sequtil.segment(seq, num_segments)
                return numpy.concatenate(
                    [sequtil.letter_composition(s, alph) for s in seqs])
        else:

            feat_ids = []
            feat_names = []

            for si in xrange(1, num_segments + 1):
                for aa in alph:
                    feat_ids.append('%s%i' % (aa, si))
                    feat_names.append('%s, segment %i' % (aa, si))

            return (feat_ids, feat_names)
Example #7
0
    def test_segment(self):

        # segment sequence of length 100 in 2 segments
        segments = sequtil.segment(self.seq100, 2)

        # check correct functionality
        self.assertEqual(len(segments), 2)
        self.assertTrue(all([len(s) == 50 for s in segments]))