def armiparser(self):
     """Creates a dictionary that can be put into Mike's ARMI decipher function
     :param parseddict: dictionary of parsed results
     :param seqdict: dictionary containing import paths and file names
     :param analysistype: string of the current analysis type
     :param reportfolder: folder in which reports are stored
     """
     from ARMICARD import decipher
     # Initialise a variable to store the target path used in creating the dictionary of antimicrobial resistances
     targetpath = ""
     # Initialise the dictionary
     targetdict = defaultdict(bamPysamStats.make_dict)
     # Iterate through the strains in the analysis
     for strain in self.seqdict:
         # Iterate through all the targets
         for target in sorted(self.seqdict[strain]["targets"][self.analysistype]):
             # Initialise targetpresent as false - this will change only if the percent identity is greater than the
             # identity cutoff
             targetpresent = False
             # Create the targetname variable from the target
             targetname = os.path.basename(target).split(".")[0]
             targetpath = os.path.split(target)[0]
             # Iterate through all the alleles for each target in parseddict
             for allele in self.parseddict[strain][target]:
                 # Initialise the totaldepth and the number of nonsnps (number of matches to the reference)
                 totaldepth = 0
                 nonsnps = 0
                 # Retrieve the length of the allele
                 contiglength = len(self.seqdict[strain]["targetSequences"][self.analysistype]
                                    [target]["allele"][allele])
                 # Iterate through each position in the allele
                 for pos in self.parseddict[strain][target][allele]:
                     # Number of matches
                     for matches in self.parseddict[strain][target][allele][pos]:
                         # Number of mismatches and depth of coverage
                         for mismatches, depth in self.parseddict[strain][target][allele][pos][matches].iteritems():
                             # Each position represents a non-SNP due to pre-filtering
                             nonsnps += 1
                             # Increment the total depth
                             totaldepth += depth
                 # Calculate the total percent identity
                 currentidentity = float("%.2f" % (float(nonsnps)/contiglength * 100))
                 print allele, currentidentity
                 # If this identity is greater than the cutoff
                 if currentidentity >= self.identitycutoff:
                     # The target is present in the strain
                     targetpresent = True
             # If the target is present, add a plus to Dict
             if targetpresent:
                 targetdict[strain][targetname] = ["+"]
     # Set the path of the resistance dictionary
     # antidict = json.load(open("/media/nas0/Jackson/ARMI_Docker/ARMI/aro3.json"))
     antidict = json.load(open("%s/aro3.json" % targetpath))
     # Send the dictionaries, and report locations to the decipher function
     # import json
     # print json.dumps(targetdict, sort_keys=True, indent=4, separators=(',', ': '))
     decipher(targetdict, antidict, self.reportfolder + "/geneSippr")
     return targetdict
Esempio n. 2
0
def helper(genes, targets, out, cuttoff, aro, threads):
    from glob import glob
    assert os.path.isdir(out), u'Output location is not a valid directory {0!r:s}'.format(out)
    assert os.path.isfile(genes), u'ARMI-genes.fa not valid {0!r:s}'.format(genes)
    assert os.path.isfile(aro), u'Antibiotic JSON not valid {0!r:s}'.format(aro)
    assert isinstance(threads, int)
    ispath = (lambda x: glob(x + "/*.fa*") if os.path.isdir(x) else [x])
    genes = ispath(genes)
    targets = ispath(targets)
    result = GeneSeekr(genes, targets, threads)
    result.mpblast(cuttoff)
    json.dump(result.plus, open("%s/ARMI-gene_results_%s.json" % (out, time.strftime("%Y.%m.%d.%H.%M.%S")), 'w'),
              sort_keys=True, indent=4, separators=(',', ': '))
    decipher(result.plus, json.load(open(aro)), out)
Esempio n. 3
0
def parseDict():
    global holdingDict
    global seqDict
    global plusdict
    global strain
    for gene in seqDict:
        for aros in seqDict[gene]:
            length = seqDict[gene][aros]
            # print gene, length, aros
            # aro = aros.split(" ")
            # for ar in aro:
            #     # pass
            #     # if len(aro) > 1:
            matches = 0
            totalDepth = 0
            minDepth = 10
            # for presence in sorted(holdingDict[gene].items(), key=operator.itemgetter(0)):
            for presence in sorted(holdingDict[gene]):
                depth = holdingDict[gene][presence]
                matches += 1
                totalDepth += depth
                if depth < minDepth:
                    minDepth = depth
                # if gene == "AP009048.1.gene3309" and aros == "3000502":
                #     print gene, aros, length, presence, type(presence), matches, totalDepth
            averageDepth = float("%0.2f" % (float(totalDepth) / float(length)))
            percentID = float("%0.2f" % (float(matches) / float(length))) * 100
            # print gene, aros, matches, length, percentID, averageDepth
            if percentID > 70 and minDepth > 4:
                plusdict[strain][aros] = ["+"]
                # print gene, aros, matches, length, percentID, averageDepth
            else:
                plusdict[strain][aros] = []
    antidict = json.load(open("/media/nas0/Jackson/ARMI_Docker/ARMI/aro3.json"))
    resDict = decipher(plusdict, antidict, "/media/nas/akoziol/Pipeline_development/GeneSipperV2/baitTest/results/armi70_5")
Esempio n. 4
0
def armiparser(parseddict, seqdict, analysistype, reportfolder):
    """Creates a dictionary that can be put into Mike's ARMI decipher function
    :param parseddict: dictionary of parsed results
    :param seqdict: dictionary containing import paths and file names
    :param analysistype: string of the current analysis type
    :param reportfolder: folder in which reports are stored
    """
    # import bamPysamStats
    # seqdict = bamPysamStats.targetlength(seqdict, analysistype)
    # Initialise a variable to store the target path used in creating the dictionary of antimicrobial resistances
    targetpath = ""
    # Initialise the dictionary
    targetdict = defaultdict(make_dict)
    # Iterate through the strains in the analysis
    for strain in seqdict:
        # Get the identity cutoff from seqdict - need to get baittype first
        identitycutoff = seqdict[strain]["cutoff"][analysistype]
        # Iterate through all the targets
        for target in seqdict[strain]["targets"][analysistype]:
            # Initialise targetpresent as false - this will change only if the percent identity is greater than the
            # identity cutoff
            targetpresent = False
            # Create the targetname variable from the target
            targetname = os.path.basename(target).split(".")[0]
            targetpath = os.path.split(target)[0]
            # Iterate through all the alleles for each target in parseddict
            for allele in parseddict[strain][targetname]:
                # Iterate through the percent identity of each allele
                for percentidentity in parseddict[strain][targetname][allele]:
                    # If this identity is greater than the cutoff
                    if percentidentity >= identitycutoff:
                        # The target is present in the strain
                        targetpresent = True
            # If the target is present, add a plus to Dict
            if targetpresent:
                targetdict[strain][targetname] = ["+"]
    # Set the path of the resistance dictionary
    antidict = json.load(open("%s/aro3.json" % targetpath))
    # Send the dictionaries, and report locations to the decipher function
    decipher(targetdict, antidict, reportfolder + "/geneSippr")
    # print json.dumps(antidict, sort_keys=True, indent=4, separators=(',', ': '))
    return targetdict
def armiparser(parseddict, seqdict, analysistype, reportfolder):
    """Creates a dictionary that can be put into Mike's ARMI decipher function
    :param parseddict: dictionary of parsed results
    :param seqdict: dictionary containing import paths and file names
    :param analysistype: string of the current analysis type
    :param reportfolder: folder in which reports are stored
    """
    # import bamPysamStats
    # seqdict = bamPysamStats.targetlength(seqdict, analysistype)
    # Initialise a variable to store the target path used in creating the dictionary of antimicrobial resistances
    targetpath = ""
    # Initialise the dictionary
    targetdict = defaultdict(make_dict)
    # Iterate through the strains in the analysis
    for strain in seqdict:
        # Get the identity cutoff from seqdict - need to get baittype first
        identitycutoff = seqdict[strain]["cutoff"][analysistype]
        # Iterate through all the targets
        for target in seqdict[strain]["targets"][analysistype]:
            # Initialise targetpresent as false - this will change only if the percent identity is greater than the
            # identity cutoff
            targetpresent = False
            # Create the targetname variable from the target
            targetname = os.path.basename(target).split(".")[0]
            targetpath = os.path.split(target)[0]
            # Iterate through all the alleles for each target in parseddict
            for allele in parseddict[strain][targetname]:
                # Iterate through the percent identity of each allele
                for percentidentity in parseddict[strain][targetname][allele]:
                    # If this identity is greater than the cutoff
                    if percentidentity >= identitycutoff:
                        # The target is present in the strain
                        targetpresent = True
            # If the target is present, add a plus to Dict
            if targetpresent:
                targetdict[strain][targetname] = ["+"]
    # Set the path of the resistance dictionary
    antidict = json.load(open("%s/aro3.json" % targetpath))
    # Send the dictionaries, and report locations to the decipher function
    decipher(targetdict, antidict, reportfolder + "/geneSippr")
    # print json.dumps(antidict, sort_keys=True, indent=4, separators=(',', ': '))
    return targetdict
Esempio n. 6
0
def blaster(path, targets, out, threshold, db, aro):
    if db == "both":
        db = ['ardb', 'card']
    else:
        db = [db]
    jsonfile = '%splusdict.json' % targets
    # if os.path.isfile(jsonfile):
    #     plusdict = json.load(open(jsonfile))
    #
    # else:
    markers = glob(path + "/*.fa*")
    for marker in markers:
        cardcheck = match("^\d{7}$", marker)
        if db == 'ardb' and cardcheck is not None:
            markers.remove(marker)
        elif db == 'card' and cardcheck is None:
            markers.remove(marker)

    plusdict = ARMISeekr.blaster(markers, targets, out, 'ARMI2')
    json.dump(plusdict, open(jsonfile, 'w'), sort_keys=True, indent=4, separators=(',', ': '))
    print json.dumps(plusdict, sort_keys=True, indent=4, separators=(',', ': '))
    antidict = json.load(open(aro))
    decipher(plusdict, antidict, out)
Esempio n. 7
0
 def armiparser(self):
     """Creates a dictionary that can be put into Mike's ARMI decipher function
     :param parseddict: dictionary of parsed results
     :param seqdict: dictionary containing import paths and file names
     :param analysistype: string of the current analysis type
     :param reportfolder: folder in which reports are stored
     """
     from ARMICARD import decipher
     # Initialise a variable to store the target path used in creating the dictionary of antimicrobial resistances
     targetpath = ""
     # Initialise the dictionary
     targetdict = defaultdict(bamPysamStats.make_dict)
     # Iterate through the strains in the analysis
     for strain in self.seqdict:
         # Iterate through all the targets
         for target in sorted(
                 self.seqdict[strain]["targets"][self.analysistype]):
             # Initialise targetpresent as false - this will change only if the percent identity is greater than the
             # identity cutoff
             targetpresent = False
             # Create the targetname variable from the target
             targetname = os.path.basename(target).split(".")[0]
             targetpath = os.path.split(target)[0]
             # Iterate through all the alleles for each target in parseddict
             for allele in self.parseddict[strain][target]:
                 # Initialise the totaldepth and the number of nonsnps (number of matches to the reference)
                 totaldepth = 0
                 nonsnps = 0
                 # Retrieve the length of the allele
                 contiglength = len(self.seqdict[strain]["targetSequences"][
                     self.analysistype][target]["allele"][allele])
                 # Iterate through each position in the allele
                 for pos in self.parseddict[strain][target][allele]:
                     # Number of matches
                     for matches in self.parseddict[strain][target][allele][
                             pos]:
                         # Number of mismatches and depth of coverage
                         for mismatches, depth in self.parseddict[strain][
                                 target][allele][pos][matches].iteritems():
                             # Each position represents a non-SNP due to pre-filtering
                             nonsnps += 1
                             # Increment the total depth
                             totaldepth += depth
                 # Calculate the total percent identity
                 currentidentity = float(
                     "%.2f" % (float(nonsnps) / contiglength * 100))
                 print allele, currentidentity
                 # If this identity is greater than the cutoff
                 if currentidentity >= self.identitycutoff:
                     # The target is present in the strain
                     targetpresent = True
             # If the target is present, add a plus to Dict
             if targetpresent:
                 targetdict[strain][targetname] = ["+"]
     # Set the path of the resistance dictionary
     # antidict = json.load(open("/media/nas0/Jackson/ARMI_Docker/ARMI/aro3.json"))
     antidict = json.load(open("%s/aro3.json" % targetpath))
     # Send the dictionaries, and report locations to the decipher function
     # import json
     # print json.dumps(targetdict, sort_keys=True, indent=4, separators=(',', ': '))
     decipher(targetdict, antidict, self.reportfolder + "/geneSippr")
     return targetdict