Esempio n. 1
0
def get_labels_and_correlation(
        bw_files,
        # chrs_to_skip,
        bin_size=10000,
        method='pearson',
        fileset_name='result',
        blacklist=None,
        labels=bw_labels,
        output_dir=BASE_DIR):
    my_listUnnested = []
    # my_labels_list = []
    assert method in ('pearson', 'spearman'), 'Invalid correlation method'
    # Autogenerate labels from filename if not provided
    if not labels:
        labels = [
            filename.split('/')[-1].split('.')[0] for filename in bw_files
        ]
    # Generate a name for the unique combination
    test_name = fileset_name + '_' + method
    if blacklist:
        blacklist_title = 'Blacklisted'
        test_name += '_blacklisted'
    else:
        blacklist_title = ''
    image_name = test_name + '.png'
    # Bin the bigwig data in 10kb increments
    num_reads_per_bin = score_bw.getScorePerBin(
        bw_files,
        bin_size,
        # chrsToSkip=chrs_to_skip,
        blackListFileName=blacklist)
    # Write to npz file
    print("right before npz")
    os.system('pwd')
    os.system('ls -lat')
    print('svo')
    print(output_dir)
    print(test_name)
    filename = output_dir + test_name + '.npz'
    print(filename)
    print('svo.. ')
    with open(filename, "wb") as f:
        np.savez_compressed(f, matrix=num_reads_per_bin, labels=labels)
    # Compute the correlations
    corr = Correlation(filename, method, labels=labels)
    np_array = corr.compute_correlation()
    print("ALERT CORR")
    print(np_array)

    listNested = np_array.tolist()

    def removeNestings(listNest):
        for i in listNest:
            if type(i) == list:
                print(str(i) + "is: " + str(type(i)))
                removeNestings(i)
                print(str(i) + "is after: " + str(type(i)))
            else:
                print(str(i) + "is finally " + str(type(i)))
                my_listUnnested.append(i)

    removeNestings(listNested)

    print("FINAL CORRELATION VALUES")
    print(my_listUnnested)

    with open("corrScores.txt", "w") as f:
        f.write(str(my_listUnnested))

    plot_title = '{}{} Correlation of {}'.format(blacklist_title,
                                                 method.capitalize(),
                                                 fileset_name)
    # Create a png file of correlation heatmap
    image_path = output_dir + image_name
    corr.plot_correlation(image_path, plot_title=plot_title)

    # return np_ar
    my_labels_list = labels
    return image_path, my_labels_list, my_listUnnested
Esempio n. 2
0
def get_labels_and_correlation(
        bw_files,
        # chrs_to_skip,
        bin_size=10000,
        method='pearson',
        fileset_name='result',
        blacklist=None,
        labels=bw_labels,
        output_dir='/Users/baditya02/Downloads/treatment-data/graphs/test/'
):
    my_listUnnested = []
    my_labels_list = []
    assert method in ('pearson', 'spearman'), 'Invalid correlation method'
    # Autogenerate labels from filename if not provided
    if not labels:
        labels = [filename.split( '/' )[-1].split( '.' )[0] for filename in bw_files]
    # Generate a name for the unique combination
    test_name = fileset_name + '_' + method
    if blacklist:
        blacklist_title = 'Blacklisted '
        test_name += '_blacklisted'
    else:
        blacklist_title = ''
    image_name = test_name + '.png'
    # Bin the bigwig data in 10kb increments
    num_reads_per_bin = score_bw.getScorePerBin(
        bw_files,
        bin_size,
        # chrsToSkip=chrs_to_skip,
        blackListFileName=blacklist
    )
    # Write to npz file
    filename = output_dir + test_name + '.npz'
    with open(filename, "wb") as f:
        np.savez_compressed(f, matrix=num_reads_per_bin, labels=labels)
    # Compute the correlations
    corr = Correlation(filename, method, labels=labels)
    np_array = corr.compute_correlation()
    listNested = np_array.tolist()

    def removeNestings(listNest):
        for i in listNest:
            if type(i) == list:
                removeNestings(i)
            else:
                my_listUnnested.append(i)

    removeNestings(listNested)

    plot_title = '{}{} Correlation of {}'.format(
        blacklist_title,
        method.capitalize(),
        fileset_name
    )
    # Create a png file of correlation heatmap
    image_path = output_dir + image_name
    corr.plot_correlation( image_path, plot_title=plot_title )

    # return np_ar
    my_labels_list = labels
    return image_path, my_labels_list, my_listUnnested