def process(self): for feature in self.record.features(): if feature.type != 'CDS': continue if len(feature.location.regions) != 1: continue region = feature.location.regions[0] start = region.start - 1 end = region.end feat_id = self._get_feature_id(feature) sequence = self.record.sequence[start:end].upper() if region.complement: sequence = genbank.reverseComplement(sequence) if not sequence.startswith('ATG'): msg = ('Feature %s don\'t start with ATG. It starts ' 'with %s') % (str(feat_id), sequence[0:3]) print >> sys.stderr, msg if type(self.enzyme) is str: parts = sequence.split(self.enzyme) else: parts = self.enzyme.split(sequence) if len(parts) == 1: insert_in_dict_with_list(self.tags, '', feat_id) else: part = parts[-1] key = self.originalEnzyme + part[0:self.tag_length] insert_in_dict_with_list(self.tags, key, feat_id) self._genes_c += 1
def main(args): parser = OptionParser(('usage: %prog [options] ' '<qualifier to use as gene name>')) parser.add_option('-g', '--gi', dest='gi', type='int', help='gi of the genbank record to analyze.') parser.add_option('-r', '--record', dest='record', type='string', help='read genbank record from FILE', metavar='FILE') parser.add_option('-o', '--output', dest="output", help='name of output file.') parser.add_option('-c', '--clobber', dest="clobber", help='Clobber the output files.', action='store_true', default=False) (options, args) = parser.parse_args(args) if not options.gi and not options.record: print >> sys.stderr, 'Usage error: a record file (-r) or ' + \ 'a gi (-g) is required' print >> sys.stderr, parser.format_help() sys.exit(1) if options.output: filenameOut = options.output else: if options.record: filenameOut = '%s' % options.record else: filenameOut = '%s' % options.gi if len(args) < 2: print >> sys.stderr, 'Qualifier name is required' print >> sys.stderr, parser.format_help() sys.exit(1) if options.record: record = genbank.Record(file(options.record)) else: record = giInfo.GiRecord(options.gi, True) qualifierName = args[1] fastaFh = safeOFW('%s.fasta' % filenameOut, clobber=options.clobber) print >> fastaFh, record.fasta() fastaFh.close() genesFh = safeOFW('%s.genes.fasta' % filenameOut, clobber=options.clobber) genes = [] for feature in record.features(): if len(feature.location.regions) != 1: continue if feature.type != 'gene': continue region = feature.location.regions[0] seq = record.sequence[region.start - 1:region.end] seq = seq.upper() if region.complement: seq = genbank.reverseComplement(seq) if feature.qualifiers.has_key(qualifierName): feat_name = feature.qualifiers[qualifierName] else: print >> sys.stderr, ("Feature doesn't have qualifier " "'%s'.\nQualifiers: %s" % (qualifierName, feature.qualifiers)) continue print >> genesFh, fasta.Record(title=feat_name, sequence=seq) print >> genesFh, '' genesFh.close()