def buildBetas(self, weight_db_logic, name): output_path = os.path.join(self.output_folder, name) if not ".gz" in output_path: output_path += ".gz" if os.path.exists(output_path): logging.info( "%s already exists, delete it if you want it to be done again", output_path) return logging.info("Building beta for %s and %s", name, self.weight_db_path) input_path = os.path.join(self.gwas_folder, name) file_format = GWASUtilities.GWASFileFormat.fileFormatFromArgs( input_path, self.args) scheme = MethodGuessing.chooseGWASProcessingScheme( self.args, input_path) callback = MethodGuessing.chooseGWASCallback(file_format, scheme, weight_db_logic) if not weight_db_logic: GWASUtilities.loadGWASAndStream(input_path, output_path, self.compressed_gwas, self.args.separator, self.args.skip_until_header, callback) else: dosage_loader = GWASUtilities.GWASDosageFileLoader( input_path, self.compressed_gwas, self.args.separator, self.args.skip_until_header, callback) results, column_order = dosage_loader.load() # The following check is sort of redundant, as it exists in "saveSetsToCompressedFile". # It exists merely to provide different logging if len(results): def do_output(file, results, column_order): file.write("\t".join(column_order) + "\n") first = results[column_order[0]] n = len(first) for i in xrange(0, n): line_comps = [str(results[c][i]) for c in column_order] line = "%s\n" % "\t".join(line_comps) file.write(line) with gzip.open(output_path, "wb") as file: do_output(file, results, column_order) else: logging.info( "No snps from the tissue model found in the GWAS file") logging.info("Successfully ran GWAS input processing")
def run(args): logging.info("Loading weight db") weight_db_logic = WeightDBUtilities.WeightDBEntryLogic(args.weight_db_path) logging.info("Loading covariance file") covariance_contents = MatrixUtilities.loadMatrixFromFile(args.covariance) logging.info("Choosing method") beta_contents = Utilities.contentsWithPatternsFromFolder(args.beta_folder, []) zscore_calculation, normalization = MethodGuessing.chooseZscoreSchemeFromFiles(args.beta_folder, beta_contents, covariance_contents, weight_db_logic) logging.info("Processing") betas = {} for content in beta_contents: logging.info("Loading betas") beta_path = os.path.join(args.beta_folder, content) beta_sets = KeyedDataSet.KeyedDataSetFileUtilities.loadDataSetsFromCompressedFile(beta_path, header="") beta_sets = {set.name: set for set in beta_sets} betas[content] = beta_sets if args.gene_name: try: gene_data, weights, covariance_matrix, valid_rsids, beta_sets = get_gene_data(args.gene_name, weight_db_logic, covariance_contents, betas) weight_values, variances = ZScoreCalculation.preProcess(covariance_matrix, valid_rsids, weights, beta_sets) if args.interactive: embed() logging.info("Processed gene data") except Exception as e: logging.info("Couldn't get gene data") embed()
def buildBetas(self, weight_db_logic, name): output_path = os.path.join(self.output_folder, name) if os.path.exists(output_path): logging.info( "%s already exists, delete it if you want it to be done again", output_path) return logging.info("Building beta for %s and %s", name, self.weight_db_path) input_path = os.path.join(self.gwas_folder, name) file_format = GWASUtilities.GWASFileFormat.fileFormatFromArgs( input_path, self.args) scheme = MethodGuessing.chooseGWASProcessingScheme( file_format, weight_db_logic, self.args, input_path) callback = GWASUtilities.GWASWeightDBFilteredBetaLineCollector( file_format, scheme, weight_db_logic) dosage_loader = GWASUtilities.GWASDosageFileLoader( input_path, self.compressed, self.args.separator, self.args.skip_until_header, callback) result_sets = dosage_loader.load() # The following check is sort of redundant, as it exists in "saveSetsToCompressedFile". # It exists merely to provide different login if len(result_sets): KeyedDataSet.KeyedDataSetFileUtilities.saveSetsToCompressedFile( output_path, result_sets, "rsid") else: logging.info( "No snps from the tissue model found in the GWAS file")
def selectMethod(self, folder, beta_contents, covariance_entries, weight_db_logic): normalization = None zscore_calculation = None if self.zscore_scheme: zscore_calculation = ZScoreCalculation.ZScoreScheme(self.zscore_scheme) if not self.normalization_scheme: raise Exception("Normalization scheme is required") else: zscore_calculation, normalization = MethodGuessing.chooseZscoreSchemeFromFiles(folder, beta_contents, covariance_entries, weight_db_logic) if self.normalization_scheme: normalization = Normalization.normalizationScheme(self.normalization_scheme, covariance_entries, weight_db_logic) return zscore_calculation, normalization
def buildBetas(self, weight_db_logic, name): output_path = os.path.join(self.output_folder, name) if not ".gz" in output_path: output_path += ".gz" if os.path.exists(output_path): logging.info("%s already exists, delete it if you want it to be done again", output_path) return logging.info("Building beta for %s and %s", name, self.weight_db_path if self.weight_db_path else "no database") input_path = os.path.join(self.gwas_folder, name) file_format = GWASUtilities.GWASFileFormat.fileFormatFromArgs(input_path, self.args) scheme = MethodGuessing.chooseGWASProcessingScheme(self.args, input_path) callback = MethodGuessing.chooseGWASCallback(file_format, scheme, weight_db_logic) if not weight_db_logic: GWASUtilities.loadGWASAndStream(input_path, output_path, self.compressed_gwas, self.args.separator, self.args.skip_until_header, callback) else: dosage_loader = GWASUtilities.GWASDosageFileLoader(input_path, self.compressed_gwas, self.args.separator, self.args.skip_until_header, callback) results, column_order = dosage_loader.load() # The following check is sort of redundant, as it exists in "saveSetsToCompressedFile". # It exists merely to provide different logging if len(results): def do_output(file, results, column_order): file.write("\t".join(column_order)+"\n") first = results[column_order[0]] n = len(first) for i in xrange(0,n): line_comps = [str(results[c][i]) for c in column_order] line = "%s\n" % "\t".join(line_comps) file.write(line) with gzip.open(output_path, "wb") as file: do_output(file, results, column_order) else: logging.info("No snps from the tissue model found in the GWAS file") logging.info("Successfully ran GWAS input processing")
def selectMethod(self, folder, beta_contents, covariance_entries, weight_db_logic): normalization = None zscore_calculation = None if self.zscore_scheme: zscore_calculation = ZScoreCalculation.ZScoreScheme( self.zscore_scheme) if not self.normalization_scheme: raise Exception("Normalization scheme is required") else: zscore_calculation, normalization = MethodGuessing.chooseZscoreSchemeFromFiles( folder, beta_contents, covariance_entries, weight_db_logic) if self.normalization_scheme: normalization = Normalization.normalizationScheme( self.normalization_scheme, covariance_entries, weight_db_logic) return zscore_calculation, normalization
def buildBetas(self, weight_db_logic, name): output_path = os.path.join(self.output_folder, name) if os.path.exists(output_path): logging.info("%s already exists, delete it if you want it to be done again", output_path) return logging.info("Building beta for %s and %s", name, self.weight_db_path) input_path = os.path.join(self.gwas_folder, name) file_format = GWASUtilities.GWASFileFormat.fileFormatFromArgs(input_path, self.args) scheme = MethodGuessing.chooseGWASProcessingScheme(file_format, weight_db_logic, self.args, input_path) callback = GWASUtilities.GWASWeightDBFilteredBetaLineCollector(file_format, scheme, weight_db_logic) dosage_loader = GWASUtilities.GWASDosageFileLoader(input_path, self.compressed, self.args.separator, callback) result_sets = dosage_loader.load() # The following check is sort of redundant, as it exists in "saveSetsToCompressedFile". # It exists merely to provide different login if len(result_sets): KeyedDataSet.KeyedDataSetFileUtilities.saveSetsToCompressedFile(output_path, result_sets, "rsid") else: logging.info("No snps from the tissue model found in the GWAS file")