コード例 #1
0
ファイル: CollectTWASWeight.py プロジェクト: Heroico/CStuff
def parse_folder(folder, db, gencode_file):

    contents = os.listdir(folder)

    logging.info("Processing gencode file")
    class GencodeCallback(object):
        def __init__(self, contents):
            self.contents = {gene:True for gene in contents}
            self.genes = {}
        def __call__(self, gencode):
            if gencode.name in self.contents:
                self.genes[gencode.name] = gencode.ensemble_version
    callback = GencodeCallback(contents)
    GencodeFile.parse_gencode_file(gencode_file, callback)
    gene_names = callback.genes

    logging.info("processing folder")
    genes = {}
    for content in contents:
        if not content in gene_names:
            logging.log(9, "Gene %s not in gencode", content)
            continue

        sub_path = TWASFormat.build_subpaths(folder, content)

        map_path = sub_path + ".wgt.map"
        snps = TWASFormat.load_map(map_path)

        weights = TWASFormat.build_weights(sub_path)

        rows = []
        gene_id = gene_names[content]
        for i, snp in enumerate(snps):
            w = weights[i]
            row = (snp[TWASFormat.MTF.snp], gene_id, content, w, snp[TWASFormat.MTF.a1], snp[TWASFormat.MTF.a2] )
            rows.append(row)
        genes[content] = rows

    Utilities.insert_entries(db, genes)
コード例 #2
0
ファイル: ProcessTWAS.py プロジェクト: Heroico/CStuff
def build_twas_gene_folder(weight_folder, working_folder, gwas_results, content):
    print "yo"
    sub_path = TWASFormat.build_subpaths(weight_folder, content)

    map_path = sub_path + ".wgt.map"
    snps = TWASFormat.load_map(map_path)

    cor_path = sub_path + ".wgt.cor"
    cors = TWASFormat.load_cor(cor_path)

    ld_path = sub_path + ".wgt.ld"

    zscores = []
    for i,snp in enumerate(snps):
        zscore = 0
        rsid = snp[TWASFormat.MTF.snp]
        a1 = snp[TWASFormat.MTF.a1]
        a2 = snp[TWASFormat.MTF.a2]
        if rsid in gwas_results:
            result = gwas_results[rsid]
            if result[GTF.a1] == a1 and result[GTF.a2] == a2:
                zscore = result[GTF.z]
            elif result[GTF.a1] == a2 and result[GTF.a2] == a1:
                zscore =  -1.0*result[GTF.z]
            else:
                cors[i] = 0
        else:
            cors[i] = 0

        if zscore == 0:
            continue
        zscore = str(zscore)
        pos = snp[TWASFormat.MTF.pos]
        zscore_row = (rsid, pos, a1, a2, zscore)
        zscores.append(zscore_row)

    working_path = os.path.join(working_folder, content)
    os.makedirs(working_path)

    sub_working_path = os.path.join(working_path, content)

    working_cor = sub_working_path +".wgt.cor"
    with open(working_cor, "w") as file:
        for cor in cors:
            line = str(cor)+"\n"
            file.write(line)

    working_ld_path = sub_working_path + ".wgt.ld"
    shutil.copy(ld_path, working_ld_path)

    working_map_path = sub_working_path + ".wgt.map"
    shutil.copy(map_path, working_map_path)

    zscore_path = sub_working_path + ".zscore"
    with open(zscore_path, "w") as file:
        file.write("SNP_ID SNP_Pos Ref_Allele Alt_Allele Z-score\n")
        for z in zscores:
            line = " ".join(z) + "\n"
            file.write(line)

    return working_path