def dipeptide_composition(self, num_segments, feature_ids=False): alph = sequtil.aa_unambiguous_alph if not(feature_ids): seq = self.protein_sequence if(num_segments == 1): return sequtil.diletter_composition(seq, alph, 1) else: seqs = sequtil.segment(seq, num_segments) return numpy.concatenate( [sequtil.diletter_composition(s, alph, 1) for s in seqs]) else: pairs = sequtil.ordered_alph_pairs(alph) feat_ids = [] feat_names = [] for si in xrange(1, num_segments + 1): for p in pairs: feat_ids.append('%s%i' % (p, si)) feat_names.append('%s, segment %i' % (p, si)) return (feat_ids, feat_names)
def sa_composition(self, num_segments, feature_ids=False): alph = sequtil.sa_alph if not(feature_ids): seq = self.sa_sequence if(num_segments == 1): return sequtil.letter_composition(seq, alph) else: seqs = sequtil.segment(seq, num_segments) return numpy.concatenate( [sequtil.letter_composition(s, alph) for s in seqs]) else: feat_ids = [] feat_names = [] for si in xrange(1, num_segments + 1): for sa_id, sa_name in zip(alph, sequtil.sa_name): feat_ids.append('%s%i' % (sa_id, si)) feat_names.append('%s, segment %i' % (sa_name, si)) return (feat_ids, feat_names)
def amino_acid_composition(self, num_segments, feature_ids=False): alph = sequtil.aa_unambiguous_alph if not(feature_ids): seq = self.protein_sequence if(num_segments == 1): return sequtil.letter_composition(seq, alph) else: seqs = sequtil.segment(seq, num_segments) return numpy.concatenate( [sequtil.letter_composition(s, alph) for s in seqs]) else: feat_ids = [] feat_names = [] for si in xrange(1, num_segments + 1): for aa in alph: feat_ids.append('%s%i' % (aa, si)) feat_names.append('%s, segment %i' % (aa, si)) return (feat_ids, feat_names)
def sa_composition(self, num_segments, feature_ids=False): alph = sequtil.sa_alph if not (feature_ids): seq = self.sa_sequence if (num_segments == 1): return sequtil.letter_composition(seq, alph) else: seqs = sequtil.segment(seq, num_segments) return numpy.concatenate( [sequtil.letter_composition(s, alph) for s in seqs]) else: feat_ids = [] feat_names = [] for si in xrange(1, num_segments + 1): for sa_id, sa_name in zip(alph, sequtil.sa_name): feat_ids.append('%s%i' % (sa_id, si)) feat_names.append('%s, segment %i' % (sa_name, si)) return (feat_ids, feat_names)
def dipeptide_composition(self, num_segments, feature_ids=False): alph = sequtil.aa_unambiguous_alph if not (feature_ids): seq = self.protein_sequence if (num_segments == 1): return sequtil.diletter_composition(seq, alph, 1) else: seqs = sequtil.segment(seq, num_segments) return numpy.concatenate( [sequtil.diletter_composition(s, alph, 1) for s in seqs]) else: pairs = sequtil.ordered_alph_pairs(alph) feat_ids = [] feat_names = [] for si in xrange(1, num_segments + 1): for p in pairs: feat_ids.append('%s%i' % (p, si)) feat_names.append('%s, segment %i' % (p, si)) return (feat_ids, feat_names)
def amino_acid_composition(self, num_segments, feature_ids=False): alph = sequtil.aa_unambiguous_alph if not (feature_ids): seq = self.protein_sequence if (num_segments == 1): return sequtil.letter_composition(seq, alph) else: seqs = sequtil.segment(seq, num_segments) return numpy.concatenate( [sequtil.letter_composition(s, alph) for s in seqs]) else: feat_ids = [] feat_names = [] for si in xrange(1, num_segments + 1): for aa in alph: feat_ids.append('%s%i' % (aa, si)) feat_names.append('%s, segment %i' % (aa, si)) return (feat_ids, feat_names)
def test_segment(self): # segment sequence of length 100 in 2 segments segments = sequtil.segment(self.seq100, 2) # check correct functionality self.assertEqual(len(segments), 2) self.assertTrue(all([len(s) == 50 for s in segments]))