def buildFiles(self, weight_db_logic): do_correlations = self.correlation_output is not None if do_correlations: if os.path.exists(self.correlation_output): logging.info("%s already exists, delete it if you want it figured out again", self.correlation_output) do_correlations = False else: correlation_dir = os.path.dirname(self.correlation_output) if not os.path.exists(correlation_dir): os.makedirs(correlation_dir) self.writeFileHeader(self.correlation_output) do_covariances = self.covariance_output is not None if do_covariances: if os.path.exists(self.covariance_output): logging.info("%s already exists, delete it if you want it figured out again", self.covariance_output) do_covariances = False else: covariance_dir = os.path.dirname(self.covariance_output) if not os.path.exists(covariance_dir): os.makedirs(covariance_dir) self.writeFileHeader(self.covariance_output) if not do_covariances and not do_correlations: return names = Utilities.dosageNamesFromFolder(self.data_folder) for name in names: snps, snps_by_rsid = self.getSNPS(name, weight_db_logic) if do_correlations: self.addToCorrelationFile(weight_db_logic, name, snps, snps_by_rsid) if do_covariances: self.addToCovarianceFile(weight_db_logic, name, snps, snps_by_rsid)
def run(self): if os.path.exists(self.output_file): logging.info("File %s already exists, delete it if you want it calculated again", self.output_file) return logging.info("Opening %s", self.weight_db) weight_db_logic = WeightDBUtilities.WeightDBEntryLogic(self.db_path) CHROMOSOMES = ["chr"+str(x) for x in xrange(1, 23)] dosage_names = Utilities.dosageNamesFromFolder(self.data_folder_gwas_dosage) legend_names = Utilities.legendNamesFromFolder(self.data_folder_phase) findings={} for chromosome in CHROMOSOMES: logging.info("Processing chromosome %s", chromosome) dosage_name = Utilities.removeNameWithPatterns(dosage_names, [chromosome+"."]) dosage = self.loadDosageFile(self.data_folder_gwas_dosage, dosage_name) self.processDosage(chromosome, weight_db_logic, dosage, findings) legend_name = Utilities.removeNameEndingWith(legend_names, chromosome) self.processLegendName(chromosome, weight_db_logic, dosage, findings, legend_name) with open(self.output_file, "w") as file: file.write(AlleleStats.CSVHeader()) def sortByChromosome(finding): return finding.chromosome entries = sorted(findings.values(), key=sortByChromosome) for finding in entries: line = finding.toCSVLine() file.write(line)
def testDosageNamesFromFolder(self): names = Utilities.dosageNamesFromFolder("tests/_td/dosage_set_1") self.assertEqual(names, [])