def form_alleles(self, regions, qrySeq, qryQual, genome_id, accepted, argument) : alleles = {} regions.sort(key=lambda x:x['identity'], reverse=True) regions.sort(key=lambda x:min(x['flanking'] + [0]), reverse=True) for region in regions: if sum(region['flanking']) >= -30 and (argument.get('ORF', False) or argument.get('CDS', False)) and region['CIGAR'] != 'intergenic' : flag = self.lookForORF(qrySeq, region) region['accepted'] = region['accepted'] | flag region['seq'] = self.get_seq(qrySeq, *region['coordinates']) region['id'] = '' region['value_md5'] = get_md5(region['seq']) if min(region['flanking']) >= 0 and len(re.findall(r'[^ACGT]', region['seq'])) == 0 : ## add proportional check region['accepted'] = region['accepted'] | 1 else : region['status'] += '{Fragmented}' region['accepted'] = region['accepted'] | 64 region['allele_id'] = -1 if region['locus'] in alleles : if region['accepted'] & 64 > 0 : if alleles[ region['locus'] ]['accepted'] & 64 > 0 : if 'secondary' not in alleles[ region['locus'] ] : alleles[ region['locus'] ]['secondary'] = [] alleles[ region['locus'] ]['secondary'].append( dict(coordinates =region['coordinates'], seq=region['seq'], identity=region['identity']) ) elif alleles[ region['locus'] ] ['accepted'] & 32 == 0 : alleles[ region['locus'] ] ['status'] += '{Duplicated}' alleles[ region['locus'] ] ['seq'] = 'DUPLICATED' alleles[ region['locus'] ] ['value_md5'] = get_md5('DUPLICATED') alleles[ region['locus'] ] ['accepted'] = (alleles[ region['locus'] ] ['accepted'] | 32) & (~1) alleles[ region['locus'] ] ['allele_id'] = -1 if 'secondary' not in alleles[ region['locus'] ] : alleles[ region['locus'] ]['secondary'] = [] alleles[ region['locus'] ]['secondary'].append( dict(coordinates =region['coordinates'], seq=region['seq'], identity=region['identity']) ) else : if accepted == 0 or self.get_qual(qryQual, *region['coordinates']) < 10: region['accepted'] = region['accepted'] | 2 region['reference'] = 'MLSType:'+genome_id alleles[region['locus']] = region if region['accepted'] & 2 > 0 : region['accepted'] = region['accepted'] & (~1) for locus, allele in alleles.items() : if allele['accepted'] & 65 == 64 : allele_len = allele['coordinates'][2] - allele['coordinates'][1] + 1 for ale in allele.get('secondary', {}) : allele_len += ale['coordinates'][2] - ale['coordinates'][1] + 1 if allele_len < argument['min_frag_prop'] : alleles.pop(locus) if 'identity' in allele and allele['identity'] < argument['min_iden'] : allele['allele_id'] = -1 allele['accepted'] = (allele['accepted'] & (~1)) | 256 allele['status'] += '{Low identities:'+str(allele['identity'])+'}' if allele['accepted'] & 224 > 0 : alleles.pop(locus, None) return alleles
def MLSTdb(args): params = getParams(args) database, refset, alleleFasta, refstrain, max_iden, min_iden, coverage, paralog, relaxEnd = params[ 'database'], params['refset'], params['alleleFasta'], params[ 'refstrain'], params['max_iden'], params['min_iden'], params[ 'coverage'], params['paralog'], params['relaxEnd'] if os.path.isfile(alleleFasta): alleles = readFasta(uopen(alleleFasta)) else: alleles = readFasta(StringIO(alleleFasta)) alleles = [allele for allele in alleles \ if allele['value_id'].isdigit() and int(allele['value_id']) > 0 and allele['fieldname'].find('/') < 0] refAlleles = '' if refset is not None: if refstrain: if os.path.isfile(refstrain): references = readFasta(uopen(refstrain)) else: references = readFasta(StringIO(refstrain)) else: loci, references = {}, [] for allele in alleles: if allele['fieldname'] not in loci: loci[allele['fieldname']] = 1 references.append(allele) allele_text, refAlleles = buildReference(alleles, references, max_iden, min_iden, coverage, paralog, relaxEnd) if refset: with open(str(refset), 'w') as fout: fout.write(refAlleles + '\n') logger('A file of reference alleles has been generated: {0}'.format( refset)) if database: conversion = [[], []] with open(database, 'w') as fout: for allele in alleles: conversion[0].append(get_md5(allele['value'])) conversion[1].append( [allele['fieldname'], int(allele['value_id'])]) conversion = pd.DataFrame(conversion[1], index=conversion[0]) conversion.to_csv(database, header=False) logger('A lookup table of all alleles has been generated: {0}'.format( database)) return allele_text, refAlleles