def test_run_to_file(self): op = ".kk_test" if os.path.exists(op): shutil.rmtree(op) args = base_args() args.pvalue_column = "P" args.or_column = "OR" args.output_folder = op M03_betas.run(args) n = os.listdir(op)[0] n = os.path.join(op,n) r = pandas.read_table(n) assert_beta_pb(self, r) shutil.rmtree(op)
def test_run_split(self): args = base_args("tests/_td/GWAS/scz2b") args.gwas_file_pattern = ".*gz" args.pvalue_column = "P" args.or_column = "OR" r = M03_betas.run(args) assert_beta_pb(self, r)
def test_run_skip(self): args = base_args("tests/_td/GWAS/scz2c") args.pvalue_column = "P" args.or_column = "OR" args.skip_until_header = "\t".join(["HG19CHRC", "SNPID", "A1", "A2", "BP", "INFO", "OR", "SE", "P", "NGT", "BETA", "ZSCORE", "BETA_SIGN"]) r = M03_betas.run(args) assert_beta_pb(self, r)
def test_with_model(self): args = base_args() args.pvalue_column = "P" args.or_column = "OR" args.model_db_path = "tests/_td/dbs/test_3.db" r = M03_betas.run(args) assert_model_beta_pb(self, r)
def test_split_with_model(self): args = base_args("tests/_td/GWAS/scz2b") args.gwas_file_pattern = ".*gz" args.pvalue_column = "P" args.or_column = "OR" args.model_db_path = "tests/_td/dbs/test_3.db" r = M03_betas.run(args) assert_model_beta_pb(self, r)
def test_run_to_files(self): op = ".kk_test" if os.path.exists(op): shutil.rmtree(op) args = base_args("tests/_td/GWAS/scz2b") args.gwas_file_pattern = ".*gz" args.pvalue_column = "P" args.or_column = "OR" args.output_folder = op M03_betas.run(args) r = pandas.DataFrame() for n in sorted(os.listdir(op)): n = os.path.join(op,n) d = pandas.read_table(n) r = pandas.concat([r,d]) assert_beta_pb(self, r) shutil.rmtree(op)
def test_fix_pvalue(self): op = ".kk_test" if os.path.exists(op): shutil.rmtree(op) args = base_args("tests/_td/GWAS/scz2d") args.gwas_file_pattern = ".*gz" args.pvalue_column = "P" args.or_column = "OR" r = M03_betas.run(args) assert_beta_pb_fix(self, r)
def run(args): if not args.overwrite and os.path.exists(args.output_file): logging.info("%s already exists, move it or delete it if you want it done again", args.output_file) return if not args.model_db_path: logging.info("Need to provide a model database file path") return args.output_folder = None g = M03_betas.run(args) M04_zscores.run(args, g)
def run(args): if not args.overwrite and args.output_file and os.path.exists(args.output_file): logging.info("%s already exists, move it or delete it if you want it done again", args.output_file) return if not args.model_db_path: logging.info("Need to provide a model database file path") return args.output_folder = None g = M03_betas.run(args) M04_zscores.run(args, g)
def test_split_with_model(self): op = ".kk_test" if os.path.exists(op): shutil.rmtree(op) args = base_args("tests/_td/GWAS/scz2b") args.gwas_file_pattern = ".*gz" args.pvalue_column = "P" args.or_column = "OR" args.model_db_path = "tests/_td/dbs/test_3.db" r = M03_betas.run(args) assert_model_beta_pb(self, r)
def testWrongArguments(self, patch_validate_basic, patch_validate_strict): args = DummyArgs() with self.assertRaises(Exception) as c: M03_betas.run(args) args.gwas_folder = "tests/_td/GWAS/scz2" patch_validate_basic.side_effect = RuntimeError("k") with self.assertRaises(RuntimeError) as c: M03_betas.run(args) patch_validate_basic.side_effect = None patch_validate_strict.side_effect = RuntimeError() with self.assertRaises(RuntimeError) as c: M03_betas.run(args)
def run_folder_or_file(self, folder, file): args = base_args(folder, file) args.pvalue_column = "P" args.or_column = "OR" r = M03_betas.run(args) assert_beta_pb(self, r) args = base_args(folder, file) args.pvalue_column = "P" args.beta_column = "BETA" r = M03_betas.run(args) assert_beta_pb(self, r) args = base_args(folder, file) args.pvalue_column = "P" args.beta_sign_column = "BETA_SIGN" r = M03_betas.run(args) assert_beta_p(self, r) args = base_args(folder, file) args.beta_column = "BETA" args.se_column = "SE" r = M03_betas.run(args) assert_beta_bse(self, r) args = base_args(folder, file) args.beta_column = "BETA" args.se_column = "SE" r = M03_betas.run(args) assert_beta_bse(self, r) args = base_args(folder, file) args.or_column = "OR" args.se_column = "SE" r = M03_betas.run(args) assert_beta_bse(self, r) #Should fail args = base_args(folder, file) args.or_column = "BETA" args.se_column = "SE" with self.assertRaises(Exception): M03_betas.run(args)
def buildWork(self): class BetaWorkArgs(object): def __init__(self, source): self.verbosity = "10" self.weight_db_path = source.weight_db_path self.gwas_folder = source.gwas_folder self.output_folder = source.beta_folder self.snp_column = source.snp_value.get() self.a1_column = source.a1_value.get() self.a2_column = source.a2_value.get() self.or_column = source.or_value.get() if source.or_on.get() else None self.beta_column = source.beta_value.get() if source.beta_on.get() else None self.beta_sign_column = source.beta_sign_value.get() if source.beta_sign_on.get() else None self.beta_zscore_column = source.beta_z_value.get() if source.beta_z_on.get() else None self.frequency_column = source.frequency_value.get() if source.frequency_on.get() else None self.se_column = source.se_value.get() if source.se_on.get() else None self.pvalue_column = source.p_value.get() if source.p_on.get() else None self.compressed = source.compressed_on.get() self.gwas_file_pattern = source.gwas_file_pattern_value.get() if len(source.gwas_file_pattern_value.get()) else None self.separator = source.separator_value.get() if len(source.separator_value.get()) else None self.scheme = GWASUtilities.BETA_P # TODO: implement this self.skip_until_header = None beta_args = BetaWorkArgs(source=self) beta_work = M03_betas.GetBetas(beta_args) class ZScoresWorkArgs(object): def __init__(self, source): self.verbosity = "10" self.keep_ens_version = False self.beta_folder = source.beta_folder self.weight_db_path = source.weight_db_path self.output_file = source.output_path self.covariance = source.covariance_file self.zscore_scheme = ZScoreCalculation.BETA_Z_SIGMA_REF self.normalization_scheme = Normalization.NONE self.input_format = Formats.FlatFile self.selected_dosage_folder = "intermediate/filtered_1000GP_Phase3" zscore_args = ZScoresWorkArgs(source=self) zscore_work = M04_zscores.CalculateZScores(zscore_args) #TODO: maybe connect stuff together so that M03 passes stuff to M04 class WorkWrapper(object): def __init__(self, works): self.works = works def run(self): try: #delete as we go so that stuff gets deleted self.works = list(reversed(self.works)) for i in xrange(len(self.works) - 1, -1, -1): work = self.works[i] work.run() del self.works[i] except Exception as e: logging.info("Exception when running task: %s", str(e)) finally: pass work = WorkWrapper([beta_work, zscore_work]) return work
def buildBetas(self): logging.info("Processing betas!") self.args.output_folder = args.beta_folder M03_betas.run(self.args)
def test_fail_incompatible_arguments(self): args = base_args("tests/_td/GWAS/scz2", "tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz") with self.assertRaises(Exceptions.InvalidArguments) as c: M03_betas.run(args)
def buildBetas(self, db_filename): filebase = os.path.basename(db_filename).replace(".db", "") output_folder = os.path.abspath(self.args.output_directory) logging.info("Processing betas for %s" % (db_filename)) self.args.weight_db_path = os.path.abspath(db_filename) cov_directory = self.args.covariance_directory if cov_directory.upper() == "SAME": cov_directory = "/".join(self.args.weight_db_path.split("/")[0:-1]) extComponents = self.args.covariance_suffix.split("..") if len(extComponents) > 1: covext = "..".join(extComponents[0:-1]) dbext = extComponents[-1] filebase = db_filename.replace(dbext, "") self.args.covariance = "%s/%s%s" % (cov_directory, filebase.split("/")[-1], covext) else: self.args.covariance = "%s/%s%s" % ( cov_directory, filebase.strip("/")[-1], self.args.covariance_suffix) file_prefix = filebase.split("/")[-1].split(".")[0] beta_output = os.path.join(output_folder, file_prefix) logging.info("Writing betas to %s" % (beta_output)) self.args.output_folder = beta_output logging.info("Loading weight model") weight_db_logic = WeightDBUtilities.WeightDBEntryLogic(self.args.weight_db_path) betaScript = M03_betas.GetBetas(self.args) names = Utilities.contentsWithRegexpFromFolder(self.args.gwas_folder, betaScript.gwas_regexp) if not os.path.exists(beta_output): os.makedirs(beta_output) betaScript.output_folder = beta_output #os.path.join(output_folder, filebase) if not os.path.exists(betaScript.output_folder): os.makedirs(betaScript.output_folder) report_prefix = None for name in names: name = name + ".gz" if report_prefix is None: report_prefix = name.split("/")[-1].split(".")[0] try: betaScript.buildBetas(weight_db_logic,name) # This just means that there is some extra stuff inside that directory, # so I'm thinking we want to ignore it. except Exceptions.BadFilename as e: logging.info("Wrong file name: %s, skipping", e.msg) pass suffix = ".csv" self.args.output_file = os.path.join(output_folder, report_prefix + "-" + file_prefix + suffix) # output_folder #os.path.join(output_folder, file_prefix) + ".csv" # ZScores logging.info("Calculating ZScores for %s" % (filebase)) zscoreScript = M04_zscores.CalculateZScores(self.args) zscoreScript.folder_beta = betaScript.output_folder zscoreScript.run()