Example #1
0
    def process(self):
        for feature in self.record.features():
            if feature.type != 'CDS':
                continue
            if len(feature.location.regions) != 1:
                continue

            region = feature.location.regions[0]
            start = region.start - 1
            end = region.end
            feat_id = self._get_feature_id(feature)
            sequence = self.record.sequence[start:end].upper()
            if region.complement:
                sequence = genbank.reverseComplement(sequence)
            if not sequence.startswith('ATG'):
                msg = ('Feature %s don\'t start with ATG. It starts '
                       'with %s') % (str(feat_id), sequence[0:3])
                print >> sys.stderr, msg
            if type(self.enzyme) is str:
                parts = sequence.split(self.enzyme)
            else:
                parts = self.enzyme.split(sequence)
                
            if len(parts) == 1:
                insert_in_dict_with_list(self.tags, '', feat_id)
            else:
                part = parts[-1]
                key = self.originalEnzyme + part[0:self.tag_length]
                insert_in_dict_with_list(self.tags, key, feat_id)
            self._genes_c += 1
def main(args):
    parser = OptionParser(('usage: %prog [options] '
                          '<qualifier to use as gene name>'))
    parser.add_option('-g', '--gi', dest='gi', type='int',
                      help='gi of the genbank record to analyze.')
    parser.add_option('-r', '--record', dest='record', type='string',
                      help='read genbank record from FILE',
                      metavar='FILE')
    parser.add_option('-o', '--output', dest="output",
                      help='name of output file.')
    parser.add_option('-c', '--clobber', dest="clobber",
                      help='Clobber the output files.',
                      action='store_true', default=False)
    (options, args) = parser.parse_args(args)

    if not options.gi and not options.record:
        print >> sys.stderr, 'Usage error: a record file (-r) or ' + \
              'a gi (-g) is required'
        print >> sys.stderr, parser.format_help()
        sys.exit(1)

    if options.output:
        filenameOut = options.output
    else:
        if options.record:
            filenameOut = '%s' % options.record
        else:
            filenameOut = '%s' % options.gi

    if len(args) < 2:
        print >> sys.stderr, 'Qualifier name is required'
        print >> sys.stderr, parser.format_help()
        sys.exit(1)

    if options.record:
        record = genbank.Record(file(options.record))
    else:
        record = giInfo.GiRecord(options.gi, True)
        
    qualifierName = args[1]

    fastaFh = safeOFW('%s.fasta' % filenameOut,
                      clobber=options.clobber)
    print >> fastaFh, record.fasta()
    fastaFh.close()

    genesFh = safeOFW('%s.genes.fasta' % filenameOut,
                      clobber=options.clobber)
    genes = []
    for feature in record.features():
        if len(feature.location.regions) != 1:
            continue

        if feature.type != 'gene':
            continue

        region = feature.location.regions[0]
        seq = record.sequence[region.start - 1:region.end]
        seq = seq.upper()
        if region.complement:
            seq = genbank.reverseComplement(seq)
        
        if feature.qualifiers.has_key(qualifierName):
            feat_name = feature.qualifiers[qualifierName]
        else:
            print >> sys.stderr, ("Feature doesn't have qualifier "
                                  "'%s'.\nQualifiers: %s" %
                                  (qualifierName, feature.qualifiers))
            continue

        print >> genesFh, fasta.Record(title=feat_name, sequence=seq)
        print >> genesFh, ''
    
    genesFh.close()