Beispiel #1
0
 def test_surefmt_load_gbk2fas(self):
     count = seqfile_ops.write_genbank(self.gbk_filename,
                                             self.record)
     self.assertIs(count, 1)
     fas_record = seqfile_ops.surefmt_load(self.gbk_filename,
                                                 'fasta', generic_dna)
     self.assertEqual(fas_record.id, self.record.id)
Beispiel #2
0
def seq_subset_load(infile, subset_mode, subset_args):
    """Load a subset of sequence segments from a sequence file."""
    from analysis.sequence_ops import feat_collect, feature_coords, \
        coord_chop, get_seq_subset_by_coords 
    from analysis.seqfile_ops import load_multifasta, surefmt_load, \
        write_fasta
    from analysis.text_manipulation import adaptive_list_load
    if subset_mode is 'flatfile':
        # in this case the sequence file MUST be multifasta
        try: subset = load_multifasta(infile)
        except: raise
        else:
            print "set of", len(subset), "sequence segments"
            subset_file = infile
    else:
        # load the query single sequence file (convert format if necessary)
        try: seq_record = surefmt_load(infile, 'fasta', 'generic_dna')
        except: raise
        else: print "query sequence loaded from", infile
        # load or generate coordinate pairs for target segments
        if subset_mode is 'coordinates':
            try:
                coords_file = subset_args['file']
                header = subset_args['header']
                columns = subset_args['columns']
                coords_list = adaptive_list_load(coords_file, header, columns)
            except: raise
            else: print len(coords_list), "segments loaded from", infile
        elif subset_mode is 'features':
            try:
                feat_mode = subset_args
                features = feat_collect(infile, feat_mode)
                coords_list = feature_coords(features)
                print coords_list
            except: raise
            else: print len(coords_list),"features loaded from", infile
        elif subset_mode is 'size':
            try:
                size = subset_args['size']
                chop_mode = subset_args['chop_mode']
                coords_list = coord_chop(len(seq_record.seq), size, chop_mode)
            except: raise
            else: print len(coords_list), "segments generated to fit", size
        else:
            print "ERROR: A mode MUST be specified."
            coords_list = None
        # collect subset of sequence segments using resulting coords_list
        try: subset = get_seq_subset_by_coords(seq_record, coords_list)
        except: raise
        else: print "subset of", len(subset), "sequence segments"
        # save subset to multifasta file for later use or reference
        subset_file = seq_record.id+'_subset.fas'
        try: write_fasta(subset_file, subset)
        except: raise
        else: print "subset written to fasta file", subset_file
    return subset, subset_file
Beispiel #3
0
 def test_surefmt_load_gbk2gbk(self):
     count = seqfile_ops.write_genbank(self.gbk_filename,
                                             self.record)
     self.assertIs(count, 1)
     gbk_record = seqfile_ops.surefmt_load(self.gbk_filename,
                                                 'genbank', generic_dna)
     self.assertEqual(gbk_record.id, self.record.id)
     # check features
     for index in range (0,1):
         self.assertEqual(gbk_record.features[index].type,
                          self.record.features[index].type)
Beispiel #4
0
 def test_surefmt_load_fas2gbk(self):
     count = seqfile_ops.write_fasta(self.fas_filename, self.record)
     self.assertIs(count, 1)
     gbk_record = seqfile_ops.surefmt_load(self.fas_filename,
                                                 'genbank', generic_dna)
     self.assertEqual(gbk_record.id, self.record.id)