def armiparser(self): """Creates a dictionary that can be put into Mike's ARMI decipher function :param parseddict: dictionary of parsed results :param seqdict: dictionary containing import paths and file names :param analysistype: string of the current analysis type :param reportfolder: folder in which reports are stored """ from ARMICARD import decipher # Initialise a variable to store the target path used in creating the dictionary of antimicrobial resistances targetpath = "" # Initialise the dictionary targetdict = defaultdict(bamPysamStats.make_dict) # Iterate through the strains in the analysis for strain in self.seqdict: # Iterate through all the targets for target in sorted(self.seqdict[strain]["targets"][self.analysistype]): # Initialise targetpresent as false - this will change only if the percent identity is greater than the # identity cutoff targetpresent = False # Create the targetname variable from the target targetname = os.path.basename(target).split(".")[0] targetpath = os.path.split(target)[0] # Iterate through all the alleles for each target in parseddict for allele in self.parseddict[strain][target]: # Initialise the totaldepth and the number of nonsnps (number of matches to the reference) totaldepth = 0 nonsnps = 0 # Retrieve the length of the allele contiglength = len(self.seqdict[strain]["targetSequences"][self.analysistype] [target]["allele"][allele]) # Iterate through each position in the allele for pos in self.parseddict[strain][target][allele]: # Number of matches for matches in self.parseddict[strain][target][allele][pos]: # Number of mismatches and depth of coverage for mismatches, depth in self.parseddict[strain][target][allele][pos][matches].iteritems(): # Each position represents a non-SNP due to pre-filtering nonsnps += 1 # Increment the total depth totaldepth += depth # Calculate the total percent identity currentidentity = float("%.2f" % (float(nonsnps)/contiglength * 100)) print allele, currentidentity # If this identity is greater than the cutoff if currentidentity >= self.identitycutoff: # The target is present in the strain targetpresent = True # If the target is present, add a plus to Dict if targetpresent: targetdict[strain][targetname] = ["+"] # Set the path of the resistance dictionary # antidict = json.load(open("/media/nas0/Jackson/ARMI_Docker/ARMI/aro3.json")) antidict = json.load(open("%s/aro3.json" % targetpath)) # Send the dictionaries, and report locations to the decipher function # import json # print json.dumps(targetdict, sort_keys=True, indent=4, separators=(',', ': ')) decipher(targetdict, antidict, self.reportfolder + "/geneSippr") return targetdict
def helper(genes, targets, out, cuttoff, aro, threads): from glob import glob assert os.path.isdir(out), u'Output location is not a valid directory {0!r:s}'.format(out) assert os.path.isfile(genes), u'ARMI-genes.fa not valid {0!r:s}'.format(genes) assert os.path.isfile(aro), u'Antibiotic JSON not valid {0!r:s}'.format(aro) assert isinstance(threads, int) ispath = (lambda x: glob(x + "/*.fa*") if os.path.isdir(x) else [x]) genes = ispath(genes) targets = ispath(targets) result = GeneSeekr(genes, targets, threads) result.mpblast(cuttoff) json.dump(result.plus, open("%s/ARMI-gene_results_%s.json" % (out, time.strftime("%Y.%m.%d.%H.%M.%S")), 'w'), sort_keys=True, indent=4, separators=(',', ': ')) decipher(result.plus, json.load(open(aro)), out)
def parseDict(): global holdingDict global seqDict global plusdict global strain for gene in seqDict: for aros in seqDict[gene]: length = seqDict[gene][aros] # print gene, length, aros # aro = aros.split(" ") # for ar in aro: # # pass # # if len(aro) > 1: matches = 0 totalDepth = 0 minDepth = 10 # for presence in sorted(holdingDict[gene].items(), key=operator.itemgetter(0)): for presence in sorted(holdingDict[gene]): depth = holdingDict[gene][presence] matches += 1 totalDepth += depth if depth < minDepth: minDepth = depth # if gene == "AP009048.1.gene3309" and aros == "3000502": # print gene, aros, length, presence, type(presence), matches, totalDepth averageDepth = float("%0.2f" % (float(totalDepth) / float(length))) percentID = float("%0.2f" % (float(matches) / float(length))) * 100 # print gene, aros, matches, length, percentID, averageDepth if percentID > 70 and minDepth > 4: plusdict[strain][aros] = ["+"] # print gene, aros, matches, length, percentID, averageDepth else: plusdict[strain][aros] = [] antidict = json.load(open("/media/nas0/Jackson/ARMI_Docker/ARMI/aro3.json")) resDict = decipher(plusdict, antidict, "/media/nas/akoziol/Pipeline_development/GeneSipperV2/baitTest/results/armi70_5")
def armiparser(parseddict, seqdict, analysistype, reportfolder): """Creates a dictionary that can be put into Mike's ARMI decipher function :param parseddict: dictionary of parsed results :param seqdict: dictionary containing import paths and file names :param analysistype: string of the current analysis type :param reportfolder: folder in which reports are stored """ # import bamPysamStats # seqdict = bamPysamStats.targetlength(seqdict, analysistype) # Initialise a variable to store the target path used in creating the dictionary of antimicrobial resistances targetpath = "" # Initialise the dictionary targetdict = defaultdict(make_dict) # Iterate through the strains in the analysis for strain in seqdict: # Get the identity cutoff from seqdict - need to get baittype first identitycutoff = seqdict[strain]["cutoff"][analysistype] # Iterate through all the targets for target in seqdict[strain]["targets"][analysistype]: # Initialise targetpresent as false - this will change only if the percent identity is greater than the # identity cutoff targetpresent = False # Create the targetname variable from the target targetname = os.path.basename(target).split(".")[0] targetpath = os.path.split(target)[0] # Iterate through all the alleles for each target in parseddict for allele in parseddict[strain][targetname]: # Iterate through the percent identity of each allele for percentidentity in parseddict[strain][targetname][allele]: # If this identity is greater than the cutoff if percentidentity >= identitycutoff: # The target is present in the strain targetpresent = True # If the target is present, add a plus to Dict if targetpresent: targetdict[strain][targetname] = ["+"] # Set the path of the resistance dictionary antidict = json.load(open("%s/aro3.json" % targetpath)) # Send the dictionaries, and report locations to the decipher function decipher(targetdict, antidict, reportfolder + "/geneSippr") # print json.dumps(antidict, sort_keys=True, indent=4, separators=(',', ': ')) return targetdict
def blaster(path, targets, out, threshold, db, aro): if db == "both": db = ['ardb', 'card'] else: db = [db] jsonfile = '%splusdict.json' % targets # if os.path.isfile(jsonfile): # plusdict = json.load(open(jsonfile)) # # else: markers = glob(path + "/*.fa*") for marker in markers: cardcheck = match("^\d{7}$", marker) if db == 'ardb' and cardcheck is not None: markers.remove(marker) elif db == 'card' and cardcheck is None: markers.remove(marker) plusdict = ARMISeekr.blaster(markers, targets, out, 'ARMI2') json.dump(plusdict, open(jsonfile, 'w'), sort_keys=True, indent=4, separators=(',', ': ')) print json.dumps(plusdict, sort_keys=True, indent=4, separators=(',', ': ')) antidict = json.load(open(aro)) decipher(plusdict, antidict, out)
def armiparser(self): """Creates a dictionary that can be put into Mike's ARMI decipher function :param parseddict: dictionary of parsed results :param seqdict: dictionary containing import paths and file names :param analysistype: string of the current analysis type :param reportfolder: folder in which reports are stored """ from ARMICARD import decipher # Initialise a variable to store the target path used in creating the dictionary of antimicrobial resistances targetpath = "" # Initialise the dictionary targetdict = defaultdict(bamPysamStats.make_dict) # Iterate through the strains in the analysis for strain in self.seqdict: # Iterate through all the targets for target in sorted( self.seqdict[strain]["targets"][self.analysistype]): # Initialise targetpresent as false - this will change only if the percent identity is greater than the # identity cutoff targetpresent = False # Create the targetname variable from the target targetname = os.path.basename(target).split(".")[0] targetpath = os.path.split(target)[0] # Iterate through all the alleles for each target in parseddict for allele in self.parseddict[strain][target]: # Initialise the totaldepth and the number of nonsnps (number of matches to the reference) totaldepth = 0 nonsnps = 0 # Retrieve the length of the allele contiglength = len(self.seqdict[strain]["targetSequences"][ self.analysistype][target]["allele"][allele]) # Iterate through each position in the allele for pos in self.parseddict[strain][target][allele]: # Number of matches for matches in self.parseddict[strain][target][allele][ pos]: # Number of mismatches and depth of coverage for mismatches, depth in self.parseddict[strain][ target][allele][pos][matches].iteritems(): # Each position represents a non-SNP due to pre-filtering nonsnps += 1 # Increment the total depth totaldepth += depth # Calculate the total percent identity currentidentity = float( "%.2f" % (float(nonsnps) / contiglength * 100)) print allele, currentidentity # If this identity is greater than the cutoff if currentidentity >= self.identitycutoff: # The target is present in the strain targetpresent = True # If the target is present, add a plus to Dict if targetpresent: targetdict[strain][targetname] = ["+"] # Set the path of the resistance dictionary # antidict = json.load(open("/media/nas0/Jackson/ARMI_Docker/ARMI/aro3.json")) antidict = json.load(open("%s/aro3.json" % targetpath)) # Send the dictionaries, and report locations to the decipher function # import json # print json.dumps(targetdict, sort_keys=True, indent=4, separators=(',', ': ')) decipher(targetdict, antidict, self.reportfolder + "/geneSippr") return targetdict