Esempio n. 1
0
def main():
    if len(sys.argv) != 2:
        print "Usage:python %s mouse/tcga_cancer_index" % sys.argv[0]
        exit()
    data_type = sys.argv[1]
    assert data_type == "mouse" or data_type.isdigit()
    if data_type.isdigit():
        data_type = file_operations.get_tcga_disease_list()[int(data_type)]

    if data_type == "mouse":
        pcc_threshold = 0.9
    else:
        # TCGA coefficients are worse.
        pcc_threshold = 0.5

    # Read in the tsv file.
    gene_exp_dct = file_operations.get_gene_expression_dct(data_type)
    high_std_genes = file_operations.get_high_std_genes(data_type)

    gene_exp_matrix = create_gene_exp_matrix(gene_exp_dct, high_std_genes)

    r, p = corrcoef(gene_exp_matrix)

    out = open("./data/%s_data/high_std_network.txt" % data_type, "w")
    for row_idx, row in enumerate(r):
        for col_idx, pcc in enumerate(row):
            if col_idx <= row_idx or pcc < pcc_threshold or pcc == 1:
                continue
            # if p[row_idx][col_idx] > P_VALUE_THRESOLD:
            #     continue
            # Write out gene information.
            gene_a, gene_b = (high_std_genes[row_idx], high_std_genes[col_idx])
            out.write("%s\t%s\t%f\n" % (gene_a, gene_b, abs(pcc)))
    out.close()