Example #1
0
 def test_chop_maxsize_divisor(self):
     pair_list = sequence_ops.coord_chop(self.length, self.size, "maxsize_divisor")
     first_pair_len = pair_list[0][1] - pair_list[0][0]
     last_pair_len = pair_list[-1][1] - pair_list[-1][0]
     self.assertGreaterEqual(len(pair_list), self.length / self.size)
     self.assertLessEqual(first_pair_len, self.size)
     self.assertLessEqual(last_pair_len, self.size)
Example #2
0
 def test_chop_exactsize(self):
     pair_list = sequence_ops.coord_chop(self.length, self.size, "exact_size")
     first_pair_len = pair_list[0][1] - pair_list[0][0]
     last_pair_len = pair_list[-1][1] - pair_list[-1][0]
     self.assertGreaterEqual(len(pair_list), self.length / self.size)
     self.assertEquals(first_pair_len, self.size)
     self.assertLessEqual(last_pair_len, self.size)
     self.assertGreaterEqual(first_pair_len, last_pair_len)
Example #3
0
def seq_subset_load(infile, subset_mode, subset_args):
    """Load a subset of sequence segments from a sequence file."""
    from analysis.sequence_ops import feat_collect, feature_coords, \
        coord_chop, get_seq_subset_by_coords 
    from analysis.seqfile_ops import load_multifasta, surefmt_load, \
        write_fasta
    from analysis.text_manipulation import adaptive_list_load
    if subset_mode is 'flatfile':
        # in this case the sequence file MUST be multifasta
        try: subset = load_multifasta(infile)
        except: raise
        else:
            print "set of", len(subset), "sequence segments"
            subset_file = infile
    else:
        # load the query single sequence file (convert format if necessary)
        try: seq_record = surefmt_load(infile, 'fasta', 'generic_dna')
        except: raise
        else: print "query sequence loaded from", infile
        # load or generate coordinate pairs for target segments
        if subset_mode is 'coordinates':
            try:
                coords_file = subset_args['file']
                header = subset_args['header']
                columns = subset_args['columns']
                coords_list = adaptive_list_load(coords_file, header, columns)
            except: raise
            else: print len(coords_list), "segments loaded from", infile
        elif subset_mode is 'features':
            try:
                feat_mode = subset_args
                features = feat_collect(infile, feat_mode)
                coords_list = feature_coords(features)
                print coords_list
            except: raise
            else: print len(coords_list),"features loaded from", infile
        elif subset_mode is 'size':
            try:
                size = subset_args['size']
                chop_mode = subset_args['chop_mode']
                coords_list = coord_chop(len(seq_record.seq), size, chop_mode)
            except: raise
            else: print len(coords_list), "segments generated to fit", size
        else:
            print "ERROR: A mode MUST be specified."
            coords_list = None
        # collect subset of sequence segments using resulting coords_list
        try: subset = get_seq_subset_by_coords(seq_record, coords_list)
        except: raise
        else: print "subset of", len(subset), "sequence segments"
        # save subset to multifasta file for later use or reference
        subset_file = seq_record.id+'_subset.fas'
        try: write_fasta(subset_file, subset)
        except: raise
        else: print "subset written to fasta file", subset_file
    return subset, subset_file
Example #4
0
 def test_chop_none(self):
     pair_list = sequence_ops.coord_chop(self.length, None, None)
     first_pair_len = pair_list[0][1] - pair_list[0][0]
     self.assertIs(len(pair_list), 1)
     self.assertEqual(first_pair_len, self.length)
Example #5
0
 def test_chop_count_divisor(self):
     pair_list = sequence_ops.coord_chop(self.length, self.count, "count_divisor")
     first_pair_len = pair_list[0][1] - pair_list[0][0]
     last_pair_len = pair_list[-1][1] - pair_list[-1][0]
     self.assertLessEqual(abs(len(pair_list) - self.count), 1)
     self.assertGreaterEqual(first_pair_len, last_pair_len)