def parse_folder(folder, db, gencode_file): contents = os.listdir(folder) logging.info("Processing gencode file") class GencodeCallback(object): def __init__(self, contents): self.contents = {gene:True for gene in contents} self.genes = {} def __call__(self, gencode): if gencode.name in self.contents: self.genes[gencode.name] = gencode.ensemble_version callback = GencodeCallback(contents) GencodeFile.parse_gencode_file(gencode_file, callback) gene_names = callback.genes logging.info("processing folder") genes = {} for content in contents: if not content in gene_names: logging.log(9, "Gene %s not in gencode", content) continue sub_path = TWASFormat.build_subpaths(folder, content) map_path = sub_path + ".wgt.map" snps = TWASFormat.load_map(map_path) weights = TWASFormat.build_weights(sub_path) rows = [] gene_id = gene_names[content] for i, snp in enumerate(snps): w = weights[i] row = (snp[TWASFormat.MTF.snp], gene_id, content, w, snp[TWASFormat.MTF.a1], snp[TWASFormat.MTF.a2] ) rows.append(row) genes[content] = rows Utilities.insert_entries(db, genes)
def build_twas_gene_folder(weight_folder, working_folder, gwas_results, content): print "yo" sub_path = TWASFormat.build_subpaths(weight_folder, content) map_path = sub_path + ".wgt.map" snps = TWASFormat.load_map(map_path) cor_path = sub_path + ".wgt.cor" cors = TWASFormat.load_cor(cor_path) ld_path = sub_path + ".wgt.ld" zscores = [] for i,snp in enumerate(snps): zscore = 0 rsid = snp[TWASFormat.MTF.snp] a1 = snp[TWASFormat.MTF.a1] a2 = snp[TWASFormat.MTF.a2] if rsid in gwas_results: result = gwas_results[rsid] if result[GTF.a1] == a1 and result[GTF.a2] == a2: zscore = result[GTF.z] elif result[GTF.a1] == a2 and result[GTF.a2] == a1: zscore = -1.0*result[GTF.z] else: cors[i] = 0 else: cors[i] = 0 if zscore == 0: continue zscore = str(zscore) pos = snp[TWASFormat.MTF.pos] zscore_row = (rsid, pos, a1, a2, zscore) zscores.append(zscore_row) working_path = os.path.join(working_folder, content) os.makedirs(working_path) sub_working_path = os.path.join(working_path, content) working_cor = sub_working_path +".wgt.cor" with open(working_cor, "w") as file: for cor in cors: line = str(cor)+"\n" file.write(line) working_ld_path = sub_working_path + ".wgt.ld" shutil.copy(ld_path, working_ld_path) working_map_path = sub_working_path + ".wgt.map" shutil.copy(map_path, working_map_path) zscore_path = sub_working_path + ".zscore" with open(zscore_path, "w") as file: file.write("SNP_ID SNP_Pos Ref_Allele Alt_Allele Z-score\n") for z in zscores: line = " ".join(z) + "\n" file.write(line) return working_path