Python checkFile Examples, MODApy.utils.checkFile Python Examples

Example #1

0

Show file

 def diffvcf(self):
     # Description for duos usage
     try:
         patient1 = argv[2]
         patient2 = argv[3]
         # Checks file existence and type for patients
         pt1Check = checkFile(patient1, ".vcf")
         pt2Check = checkFile(patient2, ".vcf")
         logger.info(
             "Evaluating differences between %s and %s"
             % (str(patient1), str(patient2))
         )
         pvcfs = vcfmgr.ParsedVCF.mp_parser(patient1, patient2)
         result = pvcfs[0].duos(pvcfs[1])
         resultname = result.name
         outpath = (
             cfg.resultsPath
             + "Diffs/"
             + result.name.replace(":", "_")
             + "/"
             + result.name.replace(":", "_")
         )
         result.name = resultname
         outpath = outpath + ".xlsx"
         logger.info("Writing Result File")
         result.vcf_to_excel(outpath)
         logger.info("Diff Analisis Complete")
         logger.info("File available at:%s" % outpath)
     except Exception as e:
         logger.info("Diff Analisis Failed")
         logger.error(str(e))
     return 0

Example #2

0

Show file

def single(patient, panel):
    try:
        checkFile(patient, ".vcf")
        checkFile(panel, ".xlsx")
        logger.info("Running %s on patient %s" % (str(panel), str(patient)))
        result = vcfmgr.ParsedVCF.from_vcf(patient).panel(panel)
        outpath = (cfg.patientPath + result.name + "/Panels/" + result.name +
                   "_" + panel + ".xlsx")
        os.makedirs(os.path.dirname(outpath), exist_ok=True)
        result.vcf_to_excel(outpath)
        logger.info("Single Analisis Complete")
        logger.info("File available at:%s" % outpath)
        return outpath
    except Exception as err:
        logger.error("Single analysis Failed")
        logger.debug(f"Error was: {err}", exc_info=True)
        raise Exception

Example #3

0

Show file

def trios(patient1,
          patient2,
          patient3,
          VennPlace=None,
          Filter=None,
          Panel=None):
    try:
        checkFile(patient1, ".vcf")
        checkFile(patient2, ".vcf")
        checkFile(patient3, ".vcf")
        logger.info("Running Trios Study on %s, %s and %s" %
                    (str(patient1), str(patient2), str(patient3)))
        pvcfs = vcfmgr.ParsedVCF.mp_parser(patient1, patient2, patient3)
        result = pvcfs[0].duos(pvcfs[1]).duos(pvcfs[2], VENNPLACE=VennPlace)
        resultname = result.name
        outpath = (cfg.resultsPath + "Trios/" + result.name.replace(":", "_") +
                   "/" + result.name.replace(":", "_"))
        result.name = resultname
        if VennPlace is not None:
            outpath = outpath + "_Venn" + VennPlace.replace(":", "_")
        # check if there is a Panel Requested
        if Panel:
            logger.info("Running panel {}".format(Panel))
            panel = Panel
            checkFile(panel, ".xlsx")
            result = result.panel(panel)
            result.name = resultname
            outpath = outpath + "_Panel" + Panel
        # check if there is a Filter Requested
        if Filter[0] is not None:
            for x in Filter:
                if (len(x.split())) != 2:
                    logger.error("--Filter accepts only two arguments. \
                            Usage: --Filter COLUMN_NAME TEXT_TO_FILTER")
                    exit(1)
                else:
                    x = x.split()
                    if x[1] == "Empty":
                        result = result[result[x[0]] != ""]
                    else:
                        result = result[~result[x[0]].str.contains(x[1])]
                    result.name = resultname
                    outpath = outpath + "_Filter" + str(x[0]) + str(x[1])
        outpath = outpath + ".xlsx"
        result.vcf_to_excel(outpath)
        logger.info("Trios Analisis Complete")
        logger.info("File available at:%s" % outpath)
    except Exception as err:
        logger.error("Trios Analisis Failed")
        logger.debug(f"Error was: {err}", exc_info=True)
        raise Exception

Example #4

0

Show file

 def single(self):
     # Description for panel usage
     parser = argparse.ArgumentParser(description="Run study on a single patient")
     parser.add_argument(
         "-Panel", required=True, help="File name of Panel inside Panels folder"
     )
     parser.add_argument(
         "-Patient",
         required=True,
         help="Patient File Path - It needs to match exactly to the one found inside Patients folder",
     )
     # ignore first argument
     try:
         args = parser.parse_args(argv[2:])
         panel = cfg.panelsPath + args.Panel + ".xlsx"
         patient = cfg.patientPath + args.Patient
         ptCheck = checkFile(patient, ".vcf")
         pnCheck = checkFile(panel, ".xlsx")
         logger.info(
             "Running %s on patient %s" % (str(args.Panel), str(args.Patient))
         )
         result = vcfmgr.ParsedVCF.from_vcf(patient).panel(panel)
         outpath = (
             cfg.patientPath
             + result.name
             + "/Panels/"
             + result.name
             + "_"
             + args.Panel
             + ".xlsx"
         )
         os.makedirs(os.path.dirname(outpath), exist_ok=True)
         result.vcf_to_excel(outpath)
         logger.info("Single Analisis Complete")
         logger.info("File available at:%s" % outpath)
     except:
         logger.info("Single Analisis Failed")
     return 0

Example #5

0

Show file

 def trios(self):
     parser = argparse.ArgumentParser(description="Run Trios Study on two patients")
     parser.add_argument(
         "-Patient1",
         required=True,
         help="Patient 1 File Path - It needs to match exactly to the one found inside Patients folder",
     )
     parser.add_argument(
         "-Patient2",
         required=True,
         help="Patient 2 File Path - It needs to match exactly to the one found inside Patients folder",
     )
     parser.add_argument(
         "-Patient3",
         required=True,
         help="Patient 3 File Path - It needs to match exactly to the one found inside Patients folder",
     )
     parser.add_argument(
         "--Panel", nargs="?", const=None, help="Panel to run on Trios study"
     )
     parser.add_argument(
         "--Filter",
         nargs="?",
         const=None,
         help="Filter to apply. This function will filter out every row that includes the given text"
         " in the given column. For filtering Empty data, TEXT keyword is 'Empty'",
         metavar=("COLUMN TEXT"),
         action="append",
     )
     parser.add_argument(
         "--VennPlace",
         default=None,
         const=None,
         nargs="?",
         choices=["A", "B", "C", "A:B", "A:C", "B:C", "A:B:C", "ALL"],
         help="Place in a Venn Diagram to obtain variants from",
     )
     try:
         # ignore first argument
         args = parser.parse_args(argv[2:])
         patient1 = cfg.patientPath + args.Patient1
         patient2 = cfg.patientPath + args.Patient2
         patient3 = cfg.patientPath + args.Patient3
         # Checks file existence and type for patients
         pt1Check = checkFile(patient1, ".vcf")
         pt2Check = checkFile(patient2, ".vcf")
         pt3Check = checkFile(patient3, ".vcf")
         logger.info(
             "Running Trios Study on %s, %s and %s"
             % (str(args.Patient1), str(args.Patient2), str(args.Patient3))
         )
         pvcfs = vcfmgr.ParsedVCF.mp_parser(patient1, patient2, patient3)
         result = pvcfs[0].duos(pvcfs[1]).duos(pvcfs[2], VENNPLACE=args.VennPlace)
         resultname = result.name
         outpath = (
             cfg.resultsPath
             + "Trios/"
             + result.name.replace(":", "_")
             + "/"
             + result.name.replace(":", "_")
         )
         result.name = resultname
         if args.VennPlace is not None:
             outpath = outpath + "_Venn" + args.VennPlace.replace(":", "_")
         # check if there is a Panel Requested
         if args.Panel:
             logger.info("Running panel {}".format(args.Panel))
             panel = cfg.panelsPath + args.Panel + ".xlsx"
             checkFile(panel, ".xlsx")
             result = result.panel(panel)
             result.name = resultname
             outpath = outpath + "_Panel" + args.Panel
         # check if there is a Filter Requested
         if args.Filter[0] is not None:
             for x in args.Filter:
                 if (len(x.split())) != 2:
                     logger.error(
                         "--Filter accepts only two arguments. Usage: --Filter COLUMN_NAME TEXT_TO_FILTER"
                     )
                     exit(1)
                 else:
                     x = x.split()
                     if x[1] == "Empty":
                         result = result[result[x[0]] != ""]
                     else:
                         result = result[~result[x[0]].str.contains(x[1])]
                     result.name = resultname
                     outpath = outpath + "_Filter" + str(x[0]) + str(x[1])
         outpath = outpath + ".xlsx"
         result.vcf_to_excel(outpath)
         logger.info("Trios Analisis Complete")
         logger.info("File available at:%s" % outpath)
     except:
         logger.info("Trios Analisis Failed")
     return 0

Example #6

0

Show file

    def pipeline(self):
        # Description for pipeline usage
        parser = argparse.ArgumentParser(description="Run a Pipeline from FASTQ to VCF")
        parser.add_argument(
            "-Pipeline",
            required=True,
            help="File name of the Pipeline inside Pipelines folder",
        )
        parser.add_argument(
            "-FQ",
            required=True,
            help="Patient FastQ1 File Path - It needs to match exactly "
            "the filename found inside Patients folder. Only this one is needed for Single End."
            "Two FastQs will be needed for Paired End (usage: -FQ Fastq1 -FQ Fastq2",
            action="append",
        )
        parser.add_argument(
            "-keeptmp",
            action="store_true",
            default=False,
            help="Keep Temp files, otherwise just creates annotated vcf file.",
        )
        parser.add_argument(
            "-startStep",
            default=0,
            type=int,
            help="Defines step to start running pipeline.",
        )
        parser.add_argument(
            "-endStep",
            default=0,
            type=int,
            help="Defines step to start running pipeline.",
        )

        # ignore first argument
        args = parser.parse_args(argv[2:])
        pipe = cfg.pipelinesPath + args.Pipeline

        checkFile(pipe, args.Pipeline.split(".")[-1])

        newpipe = pipeline.Pipeline.from_json(pipe)

        if len(args.FQ) > 2:
            logger.error(
                "Only Two FASTQ files allowed. The Input for FastQ Files was: ",
                str(args.FQ),
            )
            return exit(1)

        elif len(args.FQ) == 2:
            fq1 = cfg.patientPath + args.FQ[0]
            fq2 = cfg.patientPath + args.FQ[1]
            checkFile(fq1, "." + fq1.split(".")[-1])
            checkFile(fq2, "." + fq2.split(".")[-1])
            if args.keeptmp:
                newpipe.runpipeline(
                    fq1,
                    fq2,
                    keeptmp=True,
                    startStep=args.startStep,
                    endStep=args.endStep,
                )
            else:
                newpipe.runpipeline(
                    fq1, fq2, startStep=args.startStep, endStep=args.endStep
                )
            return 0
        else:
            fq1 = cfg.patientPath + args.FQ[0]
            fq2 = ""
            checkFile(fq1, "." + fq1.split(".")[-1])
            if args.keeptmp:
                newpipe.runpipeline(
                    fq1, keeptmp=True, startStep=args.startStep, endStep=args.endStep
                )
            else:
                newpipe.runpipeline(fq1, startStep=args.startStep, endStep=args.endStep)
            return 0