def run(args): if not args.overwrite and args.output_file and os.path.exists(args.output_file): logging.info("%s already exists, move it or delete it if you want it done again", args.output_file) return if not args.model_db_path: logging.info("Need to provide a model database file path") return args.output_folder = None g = M03_betas.run(args) M04_zscores.run(args, g)
def run(args): if not args.overwrite and os.path.exists(args.output_file): logging.info("%s already exists, move it or delete it if you want it done again", args.output_file) return if not args.model_db_path: logging.info("Need to provide a model database file path") return args.output_folder = None g = M03_betas.run(args) M04_zscores.run(args, g)
def buildWork(self): class BetaWorkArgs(object): def __init__(self, source): self.verbosity = "10" self.weight_db_path = source.weight_db_path self.gwas_folder = source.gwas_folder self.output_folder = source.beta_folder self.snp_column = source.snp_value.get() self.a1_column = source.a1_value.get() self.a2_column = source.a2_value.get() self.or_column = source.or_value.get() if source.or_on.get() else None self.beta_column = source.beta_value.get() if source.beta_on.get() else None self.beta_sign_column = source.beta_sign_value.get() if source.beta_sign_on.get() else None self.beta_zscore_column = source.beta_z_value.get() if source.beta_z_on.get() else None self.frequency_column = source.frequency_value.get() if source.frequency_on.get() else None self.se_column = source.se_value.get() if source.se_on.get() else None self.pvalue_column = source.p_value.get() if source.p_on.get() else None self.compressed = source.compressed_on.get() self.gwas_file_pattern = source.gwas_file_pattern_value.get() if len(source.gwas_file_pattern_value.get()) else None self.separator = source.separator_value.get() if len(source.separator_value.get()) else None self.scheme = GWASUtilities.BETA_P # TODO: implement this self.skip_until_header = None beta_args = BetaWorkArgs(source=self) beta_work = M03_betas.GetBetas(beta_args) class ZScoresWorkArgs(object): def __init__(self, source): self.verbosity = "10" self.keep_ens_version = False self.beta_folder = source.beta_folder self.weight_db_path = source.weight_db_path self.output_file = source.output_path self.covariance = source.covariance_file self.zscore_scheme = ZScoreCalculation.BETA_Z_SIGMA_REF self.normalization_scheme = Normalization.NONE self.input_format = Formats.FlatFile self.selected_dosage_folder = "intermediate/filtered_1000GP_Phase3" zscore_args = ZScoresWorkArgs(source=self) zscore_work = M04_zscores.CalculateZScores(zscore_args) #TODO: maybe connect stuff together so that M03 passes stuff to M04 class WorkWrapper(object): def __init__(self, works): self.works = works def run(self): try: #delete as we go so that stuff gets deleted self.works = list(reversed(self.works)) for i in xrange(len(self.works) - 1, -1, -1): work = self.works[i] work.run() del self.works[i] except Exception as e: logging.info("Exception when running task: %s", str(e)) finally: pass work = WorkWrapper([beta_work, zscore_work]) return work
def buildZScores(self): logging.info("Calculating ZScores!") M04_zscores.run(self.args)
def buildBetas(self, db_filename): filebase = os.path.basename(db_filename).replace(".db", "") output_folder = os.path.abspath(self.args.output_directory) logging.info("Processing betas for %s" % (db_filename)) self.args.weight_db_path = os.path.abspath(db_filename) cov_directory = self.args.covariance_directory if cov_directory.upper() == "SAME": cov_directory = "/".join(self.args.weight_db_path.split("/")[0:-1]) extComponents = self.args.covariance_suffix.split("..") if len(extComponents) > 1: covext = "..".join(extComponents[0:-1]) dbext = extComponents[-1] filebase = db_filename.replace(dbext, "") self.args.covariance = "%s/%s%s" % (cov_directory, filebase.split("/")[-1], covext) else: self.args.covariance = "%s/%s%s" % ( cov_directory, filebase.strip("/")[-1], self.args.covariance_suffix) file_prefix = filebase.split("/")[-1].split(".")[0] beta_output = os.path.join(output_folder, file_prefix) logging.info("Writing betas to %s" % (beta_output)) self.args.output_folder = beta_output logging.info("Loading weight model") weight_db_logic = WeightDBUtilities.WeightDBEntryLogic(self.args.weight_db_path) betaScript = M03_betas.GetBetas(self.args) names = Utilities.contentsWithRegexpFromFolder(self.args.gwas_folder, betaScript.gwas_regexp) if not os.path.exists(beta_output): os.makedirs(beta_output) betaScript.output_folder = beta_output #os.path.join(output_folder, filebase) if not os.path.exists(betaScript.output_folder): os.makedirs(betaScript.output_folder) report_prefix = None for name in names: name = name + ".gz" if report_prefix is None: report_prefix = name.split("/")[-1].split(".")[0] try: betaScript.buildBetas(weight_db_logic,name) # This just means that there is some extra stuff inside that directory, # so I'm thinking we want to ignore it. except Exceptions.BadFilename as e: logging.info("Wrong file name: %s, skipping", e.msg) pass suffix = ".csv" self.args.output_file = os.path.join(output_folder, report_prefix + "-" + file_prefix + suffix) # output_folder #os.path.join(output_folder, file_prefix) + ".csv" # ZScores logging.info("Calculating ZScores for %s" % (filebase)) zscoreScript = M04_zscores.CalculateZScores(self.args) zscoreScript.folder_beta = betaScript.output_folder zscoreScript.run()