예제 #1
0
def get_name_prefix(args):
    regexp = re.compile(
        args.gwas_file_pattern) if args.gwas_file_pattern else None
    names = Utilities.contentsWithRegexpFromFolder(args.gwas_folder, regexp)
    name = names[0]
    report_prefix = name.split("/")[-1].split(".")[0]
    return report_prefix
예제 #2
0
    def run(self):
        if self.args.weight_db_path:
            logging.info("Loading weight model")
            weight_db_logic = WeightDBUtilities.WeightDBEntryLogic(
                self.weight_db_path)
        else:
            weight_db_logic = None

        names = Utilities.contentsWithRegexpFromFolder(self.gwas_folder,
                                                       self.gwas_regexp)

        if not os.path.exists(self.output_folder):
            os.makedirs(self.output_folder)

        if len(names) == 0:
            raise Exceptions.ReportableException(
                "No GWAS files found on %s with pattern %s" % (
                    self.gwas_folder,
                    self.gwas_regexp.pattern,
                ))

        for name in names:
            try:
                self.buildBetas(weight_db_logic, name)
            # This just means that there is some extra stuff inside that directory,
            # so I'm thinking we want to ignore it.
            except Exceptions.BadFilename as e:
                logging.info("Wrong file name: %s, skipping", e.msg)
                pass
예제 #3
0
def readGWAS(args):
    start = timer()
    validate(args)
    regexp = re.compile(args.gwas_file_pattern) if args.gwas_file_pattern else  None
    names = Utilities.contentsWithRegexpFromFolder(args.gwas_folder, regexp)
    names.sort() #cosmetic, because different filesystems/OS yield folders in different order

    if len(names) == 0:
        msg = "No GWAS files found on %s with pattern %s" % (args.gwas_folder, args.gwas_file_pattern,)
        raise Exceptions.ReportableException(msg)
    
    print "INFO: Reading GWAS data"
    gwas_format = GWASUtilities.gwas_format_from_args(args)
    GWAS.validate_format_basic(gwas_format)
    GWAS.validate_format_for_strict(gwas_format)
    #model = PredictionModel.load_model(args.model_db_path) if args.model_db_path else None
    model = None
    # dataframe
    r = pandas.DataFrame()
    for name in names:
        b = build_betas(args, model, gwas_format, name)
        r = pandas.concat([r,b])
    end = timer()
    logging.info("Successfully parsed input gwas in %s seconds"%(str(end-start)))
    print("Successfully parsed input gwas in %s seconds"%(str(end-start)))
    return r
예제 #4
0
def run(args):
    start = timer()
    validate(args)

    if args.gwas_folder:
        regexp = re.compile(
            args.gwas_file_pattern) if args.gwas_file_pattern else None
        names = Utilities.contentsWithRegexpFromFolder(args.gwas_folder,
                                                       regexp)
        names.sort(
        )  #cosmetic, because different filesystems/OS yield folders in different order

        if len(names) == 0:
            msg = "No GWAS files found on %s with pattern %s" % (
                args.gwas_folder,
                args.gwas_file_pattern,
            )
            raise Exceptions.ReportableException(msg)
    else:
        names = [args.gwas_file]

    gwas_format = GWASUtilities.gwas_format_from_args(args)
    GWAS.validate_format_basic(gwas_format)
    GWAS.validate_format_for_strict(gwas_format)
    model = PredictionModel.load_model(
        args.model_db_path,
        args.model_db_snp_key) if args.model_db_path else None

    if args.output_folder:
        if not os.path.exists(args.output_folder):
            os.makedirs(args.output_folder)

        for name in names:
            output_path = os.path.join(args.output_folder, name)
            if not ".gz" in output_path:
                output_path += ".gz"
            if os.path.exists(output_path):
                logging.info(
                    "%s already exists, delete it if you want it to be done again",
                    output_path)
                continue

            b = build_betas(args, model, gwas_format, name)
            c = "gzip" if ".gz" in name else None
            b.to_csv(output_path, sep="\t", index=False, compression=c)
        end = timer()
        logging.info("Successfully ran GWAS input processing in %s seconds" %
                     (str(end - start)))
    else:
        r = pandas.DataFrame()
        for name in names:
            b = build_betas(args, model, gwas_format, name)
            r = pandas.concat([r, b])
        end = timer()
        logging.info("Successfully parsed input gwas in %s seconds" %
                     (str(end - start)))

        return r
예제 #5
0
def get_name_prefix(args):
    if args.gwas_folder:
        regexp = re.compile(args.gwas_file_pattern) if args.gwas_file_pattern else None
        names = Utilities.contentsWithRegexpFromFolder(args.gwas_folder, regexp)
        name = names[0]
        report_prefix = get_result_prefix(args, name)
    else:
        report_prefix = get_result_prefix(args, args.gwas_file)
    return report_prefix
예제 #6
0
def get_name_prefix(args):
    if args.gwas_folder:
        regexp = re.compile(args.gwas_file_pattern) if args.gwas_file_pattern else None
        names = Utilities.contentsWithRegexpFromFolder(args.gwas_folder, regexp)
        name = names[0]
        report_prefix = get_result_prefix(args, name)
    else:
        report_prefix = get_result_prefix(args, args.gwas_file)
    return report_prefix
예제 #7
0
    def run(self):
        logging.info("Loading weight model")
        weight_db_logic = WeightDBUtilities.WeightDBEntryLogic(self.weight_db_path)

        names = Utilities.contentsWithRegexpFromFolder(self.gwas_folder, self.gwas_regexp)

        if not os.path.exists(self.output_folder):
            os.makedirs(self.output_folder)

        for name in names:
            try:
                self.buildBetas(weight_db_logic,name)
            # This just means that there is some extra stuff inside that directory,
            # so I'm thinking we want to ignore it.
            except Exceptions.BadFilename as e:
                logging.info("Wrong file name: %s, skipping", e.msg)
                pass
예제 #8
0
def run(args):
    start = timer()
    validate(args)

    if args.gwas_folder:
        regexp = re.compile(args.gwas_file_pattern) if args.gwas_file_pattern else  None
        names = Utilities.contentsWithRegexpFromFolder(args.gwas_folder, regexp)
        names.sort() #cosmetic, because different filesystems/OS yield folders in different order

        if len(names) == 0:
            msg = "No GWAS files found on %s with pattern %s" % (args.gwas_folder, args.gwas_file_pattern,)
            raise Exceptions.ReportableException(msg)
    else:
        names = [args.gwas_file]

    gwas_format = GWASUtilities.gwas_format_from_args(args)
    GWAS.validate_format_basic(gwas_format)
    GWAS.validate_format_for_strict(gwas_format)
    model = PredictionModel.load_model(args.model_db_path, args.model_db_snp_key) if args.model_db_path else None

    if args.output_folder:
        if not os.path.exists(args.output_folder):
            os.makedirs(args.output_folder)

        for name in names:
            output_path = os.path.join(args.output_folder, name)
            if not ".gz" in output_path:
                output_path += ".gz"
            if os.path.exists(output_path):
                logging.info("%s already exists, delete it if you want it to be done again", output_path)
                continue

            b = build_betas(args, model, gwas_format, name)
            c = "gzip" if ".gz" in name else None
            b.to_csv(output_path, sep="\t", index=False, compression=c)
        end = timer()
        logging.info("Successfully ran GWAS input processing in %s seconds" %(str(end - start)))
    else:
        r = pandas.DataFrame()
        for name in names:
            b = build_betas(args, model, gwas_format, name)
            r = pandas.concat([r,b])
        end = timer()
        logging.info("Successfully parsed input gwas in %s seconds"%(str(end-start)))

        return r
예제 #9
0
    def run(self):
        logging.info("Loading weight model")
        weight_db_logic = WeightDBUtilities.WeightDBEntryLogic(
            self.weight_db_path)

        names = Utilities.contentsWithRegexpFromFolder(self.gwas_folder,
                                                       self.gwas_regexp)

        if not os.path.exists(self.output_folder):
            os.makedirs(self.output_folder)

        for name in names:
            try:
                self.buildBetas(weight_db_logic, name)
            # This just means that there is some extra stuff inside that directory,
            # so I'm thinking we want to ignore it.
            except Exceptions.BadFilename as e:
                logging.info("Wrong file name: %s, skipping", e.msg)
                pass
예제 #10
0
    def run(self):
        if self.args.weight_db_path:
            logging.info("Loading weight model")
            weight_db_logic = WeightDBUtilities.WeightDBEntryLogic(self.weight_db_path)
        else:
            weight_db_logic = None

        names = Utilities.contentsWithRegexpFromFolder(self.gwas_folder, self.gwas_regexp)

        if not os.path.exists(self.output_folder):
            os.makedirs(self.output_folder)

        if len(names) == 0:
            raise Exceptions.ReportableException("No GWAS files found on %s with pattern %s" %(self.gwas_folder, self.gwas_regexp.pattern,))

        for name in names:
            try:
                self.buildBetas(weight_db_logic,name)
            # This just means that there is some extra stuff inside that directory,
            # so I'm thinking we want to ignore it.
            except Exceptions.BadFilename as e:
                logging.info("Wrong file name: %s, skipping", e.msg)
                pass
예제 #11
0
    def buildBetas(self, db_filename):
        filebase = os.path.basename(db_filename).replace(".db", "")
        output_folder = self.args.output_folder
        logging.info("Processing betas for %s" % (db_filename))
        self.args.weight_db_path = os.path.abspath(db_filename)
        self.args.covariance = os.path.join(self.args.covariance_directory, filebase) + ".cov.txt.gz"
        self.args.output_file = os.path.join(self.args.output_directory, filebase) + ".csv"

        logging.info("Loading weight model")
        weight_db_logic = WeightDBUtilities.WeightDBEntryLogic(self.args.weight_db_path)

        betaScript = M03_betas.GetBetas(self.args)
        names = Utilities.contentsWithRegexpFromFolder(self.args.gwas_folder, betaScript.gwas_regexp)

        if not os.path.exists(self.args.output_folder):
            os.makedirs(self.args.output_folder)
        betaScript.output_folder = os.path.join(output_folder, filebase)
        if not os.path.exists(betaScript.output_folder):
            os.makedirs(betaScript.output_folder)

        for name in names:
            try:
                betaScript.buildBetas(weight_db_logic,name)


            # This just means that there is some extra stuff inside that directory,
            # so I'm thinking we want to ignore it.
            except Exceptions.BadFilename as e:
                logging.info("Wrong file name: %s, skipping", e.msg)
                pass

        # ZScores
        logging.info("Calculating ZScores for %s" % (filebase))
        zscoreScript = M04_zscores.CalculateZScores(self.args)
        zscoreScript.folder_beta = betaScript.output_folder
        zscoreScript.run()
예제 #12
0
    def buildBetas(self, db_filename):
        filebase = os.path.basename(db_filename).replace(".db", "")
        output_folder = os.path.abspath(self.args.output_directory)

        logging.info("Processing betas for %s" % (db_filename))
        self.args.weight_db_path = os.path.abspath(db_filename)
        cov_directory = self.args.covariance_directory
        if cov_directory.upper() == "SAME":
            cov_directory = "/".join(self.args.weight_db_path.split("/")[0:-1])

        extComponents = self.args.covariance_suffix.split("..")

        if len(extComponents) > 1:
            covext = "..".join(extComponents[0:-1])
            dbext = extComponents[-1]
            filebase = db_filename.replace(dbext, "")
            self.args.covariance = "%s/%s%s" % (cov_directory, filebase.split("/")[-1], covext)
        else:
            self.args.covariance = "%s/%s%s" % (
            cov_directory, filebase.strip("/")[-1], self.args.covariance_suffix)
        file_prefix = filebase.split("/")[-1].split(".")[0]
        beta_output = os.path.join(output_folder, file_prefix)
        logging.info("Writing betas to %s" % (beta_output))

        self.args.output_folder = beta_output

        logging.info("Loading weight model")
        weight_db_logic = WeightDBUtilities.WeightDBEntryLogic(self.args.weight_db_path)

        betaScript = M03_betas.GetBetas(self.args)
        names = Utilities.contentsWithRegexpFromFolder(self.args.gwas_folder, betaScript.gwas_regexp)

        if not os.path.exists(beta_output):
            os.makedirs(beta_output)
        betaScript.output_folder = beta_output              #os.path.join(output_folder, filebase)
        if not os.path.exists(betaScript.output_folder):
            os.makedirs(betaScript.output_folder)

        report_prefix = None
        for name in names:
            name = name + ".gz"
            if report_prefix is None:
                report_prefix = name.split("/")[-1].split(".")[0]
            try:
                betaScript.buildBetas(weight_db_logic,name)

            # This just means that there is some extra stuff inside that directory,
            # so I'm thinking we want to ignore it.
            except Exceptions.BadFilename as e:
                logging.info("Wrong file name: %s, skipping", e.msg)
                pass

        suffix = ".csv"
        self.args.output_file = os.path.join(output_folder,
                                             report_prefix + "-" + file_prefix + suffix)  # output_folder       #os.path.join(output_folder, file_prefix) + ".csv"

        # ZScores
        logging.info("Calculating ZScores for %s" % (filebase))
        zscoreScript = M04_zscores.CalculateZScores(self.args)
        zscoreScript.folder_beta = betaScript.output_folder
        zscoreScript.run()
예제 #13
0
 def testContentsWithRegexpFromFolder(self):
     contents = Utilities.contentsWithRegexpFromFolder(
         "tests/_td/dosage_set_1", re.compile(".*sample"))
     self.assertEqual(contents, ["set.sample"])
예제 #14
0
 def testContentsWithRegexpFromFolder(self):
     contents = Utilities.contentsWithRegexpFromFolder("tests/_td/dosage_set_1", re.compile(".*sample"))
     self.assertEqual(contents, ["set.sample"])
예제 #15
0
def get_name_prefix(args):
    regexp = re.compile(args.gwas_file_pattern) if args.gwas_file_pattern else None
    names = Utilities.contentsWithRegexpFromFolder(args.gwas_folder, regexp)
    name = names[0]
    report_prefix = name.split("/")[-1].split(".")[0]
    return report_prefix