def seq_subset_load(infile, subset_mode, subset_args): """Load a subset of sequence segments from a sequence file.""" from analysis.sequence_ops import feat_collect, feature_coords, \ coord_chop, get_seq_subset_by_coords from analysis.seqfile_ops import load_multifasta, surefmt_load, \ write_fasta from analysis.text_manipulation import adaptive_list_load if subset_mode is 'flatfile': # in this case the sequence file MUST be multifasta try: subset = load_multifasta(infile) except: raise else: print "set of", len(subset), "sequence segments" subset_file = infile else: # load the query single sequence file (convert format if necessary) try: seq_record = surefmt_load(infile, 'fasta', 'generic_dna') except: raise else: print "query sequence loaded from", infile # load or generate coordinate pairs for target segments if subset_mode is 'coordinates': try: coords_file = subset_args['file'] header = subset_args['header'] columns = subset_args['columns'] coords_list = adaptive_list_load(coords_file, header, columns) except: raise else: print len(coords_list), "segments loaded from", infile elif subset_mode is 'features': try: feat_mode = subset_args features = feat_collect(infile, feat_mode) coords_list = feature_coords(features) print coords_list except: raise else: print len(coords_list),"features loaded from", infile elif subset_mode is 'size': try: size = subset_args['size'] chop_mode = subset_args['chop_mode'] coords_list = coord_chop(len(seq_record.seq), size, chop_mode) except: raise else: print len(coords_list), "segments generated to fit", size else: print "ERROR: A mode MUST be specified." coords_list = None # collect subset of sequence segments using resulting coords_list try: subset = get_seq_subset_by_coords(seq_record, coords_list) except: raise else: print "subset of", len(subset), "sequence segments" # save subset to multifasta file for later use or reference subset_file = seq_record.id+'_subset.fas' try: write_fasta(subset_file, subset) except: raise else: print "subset written to fasta file", subset_file return subset, subset_file
def test_feat_collect_gene_by_product(self): self.assertIs(self.count, 1) feat_mode = {"types": ("gene"), "tags": {"locus_tag": ["locustag 1", "locustag 4"]}} collected = sequence_ops.feat_collect(self.filename, feat_mode) self.assertIs(len(collected), 1)
def test_feat_collect_mixed(self): self.assertIs(self.count, 1) feat_mode = {"types": ("CDS", "gene"), "tags": {"locus_tag": ("locustag 3"), "product": (("product 2"))}} collected = sequence_ops.feat_collect(self.filename, feat_mode) self.assertIs(len(collected), 2)
def test_feat_collect_all_genes(self): self.assertIs(self.count, 1) feat_mode = {"types": ("genes"), "tags": {}} collected = sequence_ops.feat_collect(self.filename, feat_mode) self.assertIs(len(collected), 2)