コード例 #1
0
    def form_alleles(self, regions, qrySeq, qryQual, genome_id, accepted, argument) :
        alleles = {}
        regions.sort(key=lambda x:x['identity'], reverse=True)
        regions.sort(key=lambda x:min(x['flanking'] + [0]), reverse=True)
        for region in regions:
            if sum(region['flanking']) >= -30 and (argument.get('ORF', False) or argument.get('CDS', False)) and region['CIGAR'] != 'intergenic' :
                flag = self.lookForORF(qrySeq, region)
                region['accepted'] = region['accepted'] | flag
            region['seq'] = self.get_seq(qrySeq, *region['coordinates'])
            region['id'] = ''
            region['value_md5'] = get_md5(region['seq'])
            if min(region['flanking']) >= 0 and len(re.findall(r'[^ACGT]', region['seq'])) == 0 :  ## add proportional check
                region['accepted'] = region['accepted'] | 1
            else :
                region['status'] += '{Fragmented}'
                region['accepted'] = region['accepted'] | 64
                region['allele_id'] = -1

            if region['locus'] in alleles :
                if region['accepted'] & 64 > 0 :
                    if alleles[ region['locus'] ]['accepted'] & 64 > 0 :
                        if 'secondary' not in alleles[ region['locus'] ] :
                            alleles[ region['locus'] ]['secondary'] = []
                        alleles[ region['locus'] ]['secondary'].append( dict(coordinates =region['coordinates'], seq=region['seq'], identity=region['identity']) )
                elif alleles[ region['locus'] ] ['accepted'] & 32 == 0 :
                    alleles[ region['locus'] ] ['status'] += '{Duplicated}'
                    alleles[ region['locus'] ] ['seq'] = 'DUPLICATED'
                    alleles[ region['locus'] ] ['value_md5'] = get_md5('DUPLICATED')
                    alleles[ region['locus'] ] ['accepted'] = (alleles[ region['locus'] ] ['accepted'] | 32) & (~1)
                    alleles[ region['locus'] ] ['allele_id'] = -1
                    if 'secondary' not in alleles[ region['locus'] ] :
                        alleles[ region['locus'] ]['secondary'] = []
                    alleles[ region['locus'] ]['secondary'].append( dict(coordinates =region['coordinates'], seq=region['seq'], identity=region['identity']) )
            else :
                if accepted == 0 or self.get_qual(qryQual, *region['coordinates']) < 10:
                    region['accepted'] = region['accepted'] | 2
                region['reference'] = 'MLSType:'+genome_id
                alleles[region['locus']] = region
            if region['accepted'] & 2 > 0 :
                region['accepted'] = region['accepted'] & (~1)
        for locus, allele in alleles.items() :
            if allele['accepted'] & 65 == 64 :
                allele_len = allele['coordinates'][2] - allele['coordinates'][1] + 1
                for ale in allele.get('secondary', {}) :
                    allele_len += ale['coordinates'][2] - ale['coordinates'][1] + 1
                if allele_len < argument['min_frag_prop'] :
                    alleles.pop(locus)
            if 'identity' in allele and allele['identity'] < argument['min_iden'] :
                allele['allele_id'] = -1
                allele['accepted'] = (allele['accepted'] & (~1)) | 256
                allele['status'] += '{Low identities:'+str(allele['identity'])+'}'
                if allele['accepted'] & 224 > 0 :
                    alleles.pop(locus, None)
        return alleles
コード例 #2
0
def MLSTdb(args):
    params = getParams(args)
    database, refset, alleleFasta, refstrain, max_iden, min_iden, coverage, paralog, relaxEnd = params[
        'database'], params['refset'], params['alleleFasta'], params[
            'refstrain'], params['max_iden'], params['min_iden'], params[
                'coverage'], params['paralog'], params['relaxEnd']
    if os.path.isfile(alleleFasta):
        alleles = readFasta(uopen(alleleFasta))
    else:
        alleles = readFasta(StringIO(alleleFasta))
    alleles = [allele for allele in alleles \
                   if allele['value_id'].isdigit() and int(allele['value_id']) > 0 and allele['fieldname'].find('/') < 0]
    refAlleles = ''
    if refset is not None:
        if refstrain:
            if os.path.isfile(refstrain):
                references = readFasta(uopen(refstrain))
            else:
                references = readFasta(StringIO(refstrain))
        else:
            loci, references = {}, []
            for allele in alleles:
                if allele['fieldname'] not in loci:
                    loci[allele['fieldname']] = 1
                    references.append(allele)

        allele_text, refAlleles = buildReference(alleles, references, max_iden,
                                                 min_iden, coverage, paralog,
                                                 relaxEnd)
        if refset:
            with open(str(refset), 'w') as fout:
                fout.write(refAlleles + '\n')
        logger('A file of reference alleles has been generated:  {0}'.format(
            refset))
    if database:
        conversion = [[], []]
        with open(database, 'w') as fout:
            for allele in alleles:
                conversion[0].append(get_md5(allele['value']))
                conversion[1].append(
                    [allele['fieldname'],
                     int(allele['value_id'])])

        conversion = pd.DataFrame(conversion[1], index=conversion[0])
        conversion.to_csv(database, header=False)
        logger('A lookup table of all alleles has been generated:  {0}'.format(
            database))
    return allele_text, refAlleles