def get_name_prefix(args): regexp = re.compile( args.gwas_file_pattern) if args.gwas_file_pattern else None names = Utilities.contentsWithRegexpFromFolder(args.gwas_folder, regexp) name = names[0] report_prefix = name.split("/")[-1].split(".")[0] return report_prefix
def run(self): if self.args.weight_db_path: logging.info("Loading weight model") weight_db_logic = WeightDBUtilities.WeightDBEntryLogic( self.weight_db_path) else: weight_db_logic = None names = Utilities.contentsWithRegexpFromFolder(self.gwas_folder, self.gwas_regexp) if not os.path.exists(self.output_folder): os.makedirs(self.output_folder) if len(names) == 0: raise Exceptions.ReportableException( "No GWAS files found on %s with pattern %s" % ( self.gwas_folder, self.gwas_regexp.pattern, )) for name in names: try: self.buildBetas(weight_db_logic, name) # This just means that there is some extra stuff inside that directory, # so I'm thinking we want to ignore it. except Exceptions.BadFilename as e: logging.info("Wrong file name: %s, skipping", e.msg) pass
def readGWAS(args): start = timer() validate(args) regexp = re.compile(args.gwas_file_pattern) if args.gwas_file_pattern else None names = Utilities.contentsWithRegexpFromFolder(args.gwas_folder, regexp) names.sort() #cosmetic, because different filesystems/OS yield folders in different order if len(names) == 0: msg = "No GWAS files found on %s with pattern %s" % (args.gwas_folder, args.gwas_file_pattern,) raise Exceptions.ReportableException(msg) print "INFO: Reading GWAS data" gwas_format = GWASUtilities.gwas_format_from_args(args) GWAS.validate_format_basic(gwas_format) GWAS.validate_format_for_strict(gwas_format) #model = PredictionModel.load_model(args.model_db_path) if args.model_db_path else None model = None # dataframe r = pandas.DataFrame() for name in names: b = build_betas(args, model, gwas_format, name) r = pandas.concat([r,b]) end = timer() logging.info("Successfully parsed input gwas in %s seconds"%(str(end-start))) print("Successfully parsed input gwas in %s seconds"%(str(end-start))) return r
def run(args): start = timer() validate(args) if args.gwas_folder: regexp = re.compile( args.gwas_file_pattern) if args.gwas_file_pattern else None names = Utilities.contentsWithRegexpFromFolder(args.gwas_folder, regexp) names.sort( ) #cosmetic, because different filesystems/OS yield folders in different order if len(names) == 0: msg = "No GWAS files found on %s with pattern %s" % ( args.gwas_folder, args.gwas_file_pattern, ) raise Exceptions.ReportableException(msg) else: names = [args.gwas_file] gwas_format = GWASUtilities.gwas_format_from_args(args) GWAS.validate_format_basic(gwas_format) GWAS.validate_format_for_strict(gwas_format) model = PredictionModel.load_model( args.model_db_path, args.model_db_snp_key) if args.model_db_path else None if args.output_folder: if not os.path.exists(args.output_folder): os.makedirs(args.output_folder) for name in names: output_path = os.path.join(args.output_folder, name) if not ".gz" in output_path: output_path += ".gz" if os.path.exists(output_path): logging.info( "%s already exists, delete it if you want it to be done again", output_path) continue b = build_betas(args, model, gwas_format, name) c = "gzip" if ".gz" in name else None b.to_csv(output_path, sep="\t", index=False, compression=c) end = timer() logging.info("Successfully ran GWAS input processing in %s seconds" % (str(end - start))) else: r = pandas.DataFrame() for name in names: b = build_betas(args, model, gwas_format, name) r = pandas.concat([r, b]) end = timer() logging.info("Successfully parsed input gwas in %s seconds" % (str(end - start))) return r
def get_name_prefix(args): if args.gwas_folder: regexp = re.compile(args.gwas_file_pattern) if args.gwas_file_pattern else None names = Utilities.contentsWithRegexpFromFolder(args.gwas_folder, regexp) name = names[0] report_prefix = get_result_prefix(args, name) else: report_prefix = get_result_prefix(args, args.gwas_file) return report_prefix
def run(self): logging.info("Loading weight model") weight_db_logic = WeightDBUtilities.WeightDBEntryLogic(self.weight_db_path) names = Utilities.contentsWithRegexpFromFolder(self.gwas_folder, self.gwas_regexp) if not os.path.exists(self.output_folder): os.makedirs(self.output_folder) for name in names: try: self.buildBetas(weight_db_logic,name) # This just means that there is some extra stuff inside that directory, # so I'm thinking we want to ignore it. except Exceptions.BadFilename as e: logging.info("Wrong file name: %s, skipping", e.msg) pass
def run(args): start = timer() validate(args) if args.gwas_folder: regexp = re.compile(args.gwas_file_pattern) if args.gwas_file_pattern else None names = Utilities.contentsWithRegexpFromFolder(args.gwas_folder, regexp) names.sort() #cosmetic, because different filesystems/OS yield folders in different order if len(names) == 0: msg = "No GWAS files found on %s with pattern %s" % (args.gwas_folder, args.gwas_file_pattern,) raise Exceptions.ReportableException(msg) else: names = [args.gwas_file] gwas_format = GWASUtilities.gwas_format_from_args(args) GWAS.validate_format_basic(gwas_format) GWAS.validate_format_for_strict(gwas_format) model = PredictionModel.load_model(args.model_db_path, args.model_db_snp_key) if args.model_db_path else None if args.output_folder: if not os.path.exists(args.output_folder): os.makedirs(args.output_folder) for name in names: output_path = os.path.join(args.output_folder, name) if not ".gz" in output_path: output_path += ".gz" if os.path.exists(output_path): logging.info("%s already exists, delete it if you want it to be done again", output_path) continue b = build_betas(args, model, gwas_format, name) c = "gzip" if ".gz" in name else None b.to_csv(output_path, sep="\t", index=False, compression=c) end = timer() logging.info("Successfully ran GWAS input processing in %s seconds" %(str(end - start))) else: r = pandas.DataFrame() for name in names: b = build_betas(args, model, gwas_format, name) r = pandas.concat([r,b]) end = timer() logging.info("Successfully parsed input gwas in %s seconds"%(str(end-start))) return r
def run(self): logging.info("Loading weight model") weight_db_logic = WeightDBUtilities.WeightDBEntryLogic( self.weight_db_path) names = Utilities.contentsWithRegexpFromFolder(self.gwas_folder, self.gwas_regexp) if not os.path.exists(self.output_folder): os.makedirs(self.output_folder) for name in names: try: self.buildBetas(weight_db_logic, name) # This just means that there is some extra stuff inside that directory, # so I'm thinking we want to ignore it. except Exceptions.BadFilename as e: logging.info("Wrong file name: %s, skipping", e.msg) pass
def run(self): if self.args.weight_db_path: logging.info("Loading weight model") weight_db_logic = WeightDBUtilities.WeightDBEntryLogic(self.weight_db_path) else: weight_db_logic = None names = Utilities.contentsWithRegexpFromFolder(self.gwas_folder, self.gwas_regexp) if not os.path.exists(self.output_folder): os.makedirs(self.output_folder) if len(names) == 0: raise Exceptions.ReportableException("No GWAS files found on %s with pattern %s" %(self.gwas_folder, self.gwas_regexp.pattern,)) for name in names: try: self.buildBetas(weight_db_logic,name) # This just means that there is some extra stuff inside that directory, # so I'm thinking we want to ignore it. except Exceptions.BadFilename as e: logging.info("Wrong file name: %s, skipping", e.msg) pass
def buildBetas(self, db_filename): filebase = os.path.basename(db_filename).replace(".db", "") output_folder = self.args.output_folder logging.info("Processing betas for %s" % (db_filename)) self.args.weight_db_path = os.path.abspath(db_filename) self.args.covariance = os.path.join(self.args.covariance_directory, filebase) + ".cov.txt.gz" self.args.output_file = os.path.join(self.args.output_directory, filebase) + ".csv" logging.info("Loading weight model") weight_db_logic = WeightDBUtilities.WeightDBEntryLogic(self.args.weight_db_path) betaScript = M03_betas.GetBetas(self.args) names = Utilities.contentsWithRegexpFromFolder(self.args.gwas_folder, betaScript.gwas_regexp) if not os.path.exists(self.args.output_folder): os.makedirs(self.args.output_folder) betaScript.output_folder = os.path.join(output_folder, filebase) if not os.path.exists(betaScript.output_folder): os.makedirs(betaScript.output_folder) for name in names: try: betaScript.buildBetas(weight_db_logic,name) # This just means that there is some extra stuff inside that directory, # so I'm thinking we want to ignore it. except Exceptions.BadFilename as e: logging.info("Wrong file name: %s, skipping", e.msg) pass # ZScores logging.info("Calculating ZScores for %s" % (filebase)) zscoreScript = M04_zscores.CalculateZScores(self.args) zscoreScript.folder_beta = betaScript.output_folder zscoreScript.run()
def buildBetas(self, db_filename): filebase = os.path.basename(db_filename).replace(".db", "") output_folder = os.path.abspath(self.args.output_directory) logging.info("Processing betas for %s" % (db_filename)) self.args.weight_db_path = os.path.abspath(db_filename) cov_directory = self.args.covariance_directory if cov_directory.upper() == "SAME": cov_directory = "/".join(self.args.weight_db_path.split("/")[0:-1]) extComponents = self.args.covariance_suffix.split("..") if len(extComponents) > 1: covext = "..".join(extComponents[0:-1]) dbext = extComponents[-1] filebase = db_filename.replace(dbext, "") self.args.covariance = "%s/%s%s" % (cov_directory, filebase.split("/")[-1], covext) else: self.args.covariance = "%s/%s%s" % ( cov_directory, filebase.strip("/")[-1], self.args.covariance_suffix) file_prefix = filebase.split("/")[-1].split(".")[0] beta_output = os.path.join(output_folder, file_prefix) logging.info("Writing betas to %s" % (beta_output)) self.args.output_folder = beta_output logging.info("Loading weight model") weight_db_logic = WeightDBUtilities.WeightDBEntryLogic(self.args.weight_db_path) betaScript = M03_betas.GetBetas(self.args) names = Utilities.contentsWithRegexpFromFolder(self.args.gwas_folder, betaScript.gwas_regexp) if not os.path.exists(beta_output): os.makedirs(beta_output) betaScript.output_folder = beta_output #os.path.join(output_folder, filebase) if not os.path.exists(betaScript.output_folder): os.makedirs(betaScript.output_folder) report_prefix = None for name in names: name = name + ".gz" if report_prefix is None: report_prefix = name.split("/")[-1].split(".")[0] try: betaScript.buildBetas(weight_db_logic,name) # This just means that there is some extra stuff inside that directory, # so I'm thinking we want to ignore it. except Exceptions.BadFilename as e: logging.info("Wrong file name: %s, skipping", e.msg) pass suffix = ".csv" self.args.output_file = os.path.join(output_folder, report_prefix + "-" + file_prefix + suffix) # output_folder #os.path.join(output_folder, file_prefix) + ".csv" # ZScores logging.info("Calculating ZScores for %s" % (filebase)) zscoreScript = M04_zscores.CalculateZScores(self.args) zscoreScript.folder_beta = betaScript.output_folder zscoreScript.run()
def testContentsWithRegexpFromFolder(self): contents = Utilities.contentsWithRegexpFromFolder( "tests/_td/dosage_set_1", re.compile(".*sample")) self.assertEqual(contents, ["set.sample"])
def testContentsWithRegexpFromFolder(self): contents = Utilities.contentsWithRegexpFromFolder("tests/_td/dosage_set_1", re.compile(".*sample")) self.assertEqual(contents, ["set.sample"])
def get_name_prefix(args): regexp = re.compile(args.gwas_file_pattern) if args.gwas_file_pattern else None names = Utilities.contentsWithRegexpFromFolder(args.gwas_folder, regexp) name = names[0] report_prefix = name.split("/")[-1].split(".")[0] return report_prefix