Exemple #1
0
'''
Created on May 5, 2020

@author: ywkim
'''

from sys import argv
from src.germline_analyses import get_list_from_csv, enrichment_test


if __name__ == '__main__':

    input_file1 = argv[1]
    input_file2 = argv[2]

    l1 = get_list_from_csv(input_file1, 'Gene', sep='\t')
    l2 = get_list_from_csv(input_file2, 'Gene', sep='\t')

    # n_total = len(list(set(l1+l2)))
    n_overlap, p, overlap = enrichment_test(l1, l2, 17039)

    print(len(l1), len(l2))
    print(n_overlap, p)
    print(overlap)

import matplotlib.pyplot as plt

if __name__ == '__main__':
    output_folder = str(Path().absolute()) + '/output_ADSP_discovery/'

    gene_file = output_folder + 'iDEAL_genelist.txt'
    comparison = 'APOE2_AD_v_APOE4_HC'

    # gene_file = output_folder + 'pathogenic.txt'
    # comparison = 'APOE2_ADvHC_pathogenic'

    # gene_file = output_folder + 'protective.txt'
    # comparison = 'APOE4_HCvAD_protective'

    gene_list = get_list_from_csv(gene_file, 'Gene', sep='\t')

    # these are gene x patient matrices
    ADe2 = get_matrix_subset(output_folder,
                             'sum_ea_matrix_ADe2.tsv',
                             gene_list,
                             sep='\t',
                             index_col=0)
    ADe4 = get_matrix_subset(output_folder,
                             'sum_ea_matrix_ADe4.tsv',
                             gene_list,
                             sep='\t',
                             index_col=0)
    HCe4 = get_matrix_subset(output_folder,
                             'sum_ea_matrix_HCe4.tsv',
                             gene_list,
Exemple #3
0
    except HTTPError:
        time.sleep(5)
        Entrez.email = '*****@*****.**'
        handle = Entrez.esearch(db='pubmed',
                                sort='relevance',
                                retstart=rs,
                                retmax='100000',
                                retmode='xml',
                                term=term_)
        results = Entrez.read(handle)

    return results


if __name__ == '__main__':
    input_folder = str(Path().absolute()) + '/input/'
    output_folder = str(Path().absolute()) + '/output_ADSP_discovery/'

    drugs_file = input_folder + 'iDEAL_drugs_list.txt'
    drugs_list = get_list_from_csv(drugs_file, 'Drug', sep='\t')

    outputFile = output_folder + 'iDEAL_drugs_pubmed.txt'
    with open(outputFile, 'w') as f:
        writer = csv.writer(f, delimiter='\t')
        writer.writerow(['Drug', 'PMID_count', 'PMID'])
        for idx, drug in enumerate(drugs_list):
            print(drug)
            id_list = search(drug, 0)['IdList']

            writer.writerow([drug, len(id_list), id_list])
from src.germline_analyses import get_list_from_csv

def venn_diagram(list1, list2, label1, label2, title, filename):
    venn = venn2([set(list1), set(list2)], set_labels=(label1, label2), set_colors=('red', 'skyblue'), alpha=0.7)
    venn2_circles([set(list1), set(list2)])
    for text in venn.set_labels:
        text.set_fontsize(14)
    for text in venn.subset_labels:
        text.set_fontsize(16)
    plt.title(title)
    plt.savefig(new_ideal_folder + filename + '.png', dpi=200, transparent=True)
    plt.clf()


if __name__ == '__main__':
    new_ideal_folder = '/Users/ywkim/Desktop/Projects/GermlineProject/ADSP/updated_iDEAL/'
    old_ideal_folder = '/Users/ywkim/Desktop/Projects/GermlineProject/ADSP/RVEA/new2018/RVEA_BaylorPass_' \
                       'nonHisWhite_2v4_h5py_STARTLOSS100/'

    new_pathogenic_list = get_list_from_csv(new_ideal_folder + 'pathogenic.tsv', 'Gene', sep='\t')
    new_protective_list = get_list_from_csv(new_ideal_folder + 'protective.tsv', 'Gene', sep='\t')
    old_pathogenic_list = get_list_from_csv(old_ideal_folder + 'pathogenic.txt', 'Gene', sep='\t')
    old_protective_list = get_list_from_csv(old_ideal_folder + 'protective.txt', 'Gene', sep='\t')

    #'''
    venn_diagram(old_pathogenic_list, new_pathogenic_list, 'Old ver', 'New ver', 'APOE2-AD genes',
                      'APOE2_AD_genes_comparison')
    venn_diagram(old_protective_list, new_protective_list, 'Old ver', 'New ver', 'APOE4-HC genes',
                      'APOE4_HC_genes_comparison')
    #'''
Exemple #5
0
    cases = ADe2 + ADe3 + ADe4
    controls = HCe2 + HCe3 + HCe4

    trauma_folder = '/media/vision/ExtraDrive1/Exome/ADSP_discovery/all_short_name/'
    output_folder = '/home/vision/Documents/GermlineProject/ADSP/iDEAL_updated_test_2019/'

    EAListCase = []
    EAListControl = []

    # gene_file = output_folder + 'iDEAL_genelist.txt'
    # candidate = get_list_from_csv(gene_file, 'Gene', sep='\t')

    pathogenic_file = output_folder + 'pathogenic.txt'
    protective_file = output_folder + 'protective.txt'

    pathogenic_gene = get_list_from_csv(pathogenic_file, 'Gene', sep='\t')
    protective_gene = get_list_from_csv(protective_file, 'Gene', sep='\t')

    ptCounter = 0
    for filename in os.listdir(trauma_folder):
        ptFile = (os.path.join(trauma_folder, filename))
        if filename in cases:
            processGermlineFile(ptFile, EAListCase, pathogenic_gene)
        elif filename in controls:
            processGermlineFile(ptFile, EAListControl, pathogenic_gene)
        ptCounter += 1
        print(ptCounter)

    getHistogram(EAListCase, EAListControl, 'pathogenic_CaseControl')

    file1 = open(output_folder + 'pathogenic_case_control_kstest.txt', 'w')
        for idx, output_list in enumerate(output_lists):
            gene = gene_list[idx]
            variant = variant_list[idx]
            # info = [gene, variant] + output_list
            info = output_list
            writer.writerow(info)


if __name__ == '__main__':
    input_folder = str(Path().absolute()) + '/input/'
    output_folder = str(Path().absolute()) + '/output_ADSP_discovery/'

    variant_file = input_folder + 'top_protective_variants.txt'
    # variant_file = input_folder + 'top_pathogenic_variants.txt'
    gene_list = get_list_from_csv(variant_file, 'Gene', sep='\t')
    variant_list = get_list_from_csv(variant_file, 'Sub', sep='\t')

    trauma_folder = '/media/vision/ExtraDrive1/Exome/ADSP_discovery/all_short_name/'

    sift_list = []
    pph2_list = []
    cadd_list = []
    for idx, gene in enumerate(gene_list):
        variant = variant_list[idx]
        flag = 0
        print(gene, variant)
        pt_counter = 0
        for filename in os.listdir(trauma_folder):
            ptFile = (os.path.join(trauma_folder, filename))
            flag = get_variant_info(ptFile, gene, variant, flag)