def main(args=None): args = parse_arguments().parse_args(args) if args.plotFile is None and args.outFileCorMatrix is None: sys.exit("At least one of --plotFile and --outFileCorMatrix must be specified!\n") corr = Correlation(args.corData, args.corMethod, labels=args.labels, remove_outliers=args.removeOutliers, skip_zeros=args.skipZeros) if args.corMethod == 'pearson': # test if there are outliers and write a message recommending the removal if len(corr.get_outlier_indices(np.asarray(corr.matrix).flatten())) > 0: if args.removeOutliers: sys.stderr.write("\nOutliers were detected in the data. They " "will be removed to avoid bias " "in the pearson correlation.\n") else: sys.stderr.write("\nOutliers were detected in the data. Consider " "using the --removeOutliers parameter to avoid a bias " "in the pearson correlation.\n") if args.colorMap: try: plt.get_cmap(args.colorMap) except ValueError as error: sys.stderr.write( "A problem was found. Message: {}\n".format(error)) exit() if args.plotFile is not None: if args.whatToPlot == 'scatterplot': corr.plot_scatter(args.plotFile, plot_title=args.plotTitle, image_format=args.plotFileFormat, xRange=args.xRange, yRange=args.yRange, log1p=args.log1p) else: corr.plot_correlation(args.plotFile, vmax=args.zMax, vmin=args.zMin, colormap=args.colorMap, plot_title=args.plotTitle, image_format=args.plotFileFormat, plot_numbers=args.plotNumbers, plotWidth=args.plotWidth, plotHeight=args.plotHeight) if args.outFileCorMatrix: o = open(args.outFileCorMatrix, "w") o.write("#plotCorrelation --outFileCorMatrix\n") corr.save_corr_matrix(o) o.close()
def main(args=None): args = parse_arguments().parse_args(args) if args.plotFile is None and args.outFileCorMatrix is None: sys.exit("At least one of --plotFile and --outFileCorMatrix must be specified!\n") corr = Correlation(args.corData, args.corMethod, labels=args.labels, remove_outliers=args.removeOutliers, skip_zeros=args.skipZeros) if args.corMethod == 'pearson': # test if there are outliers and write a message recommending the removal if len(corr.get_outlier_indices(np.asarray(corr.matrix).flatten())) > 0: if args.removeOutliers: sys.stderr.write("\nOutliers were detected in the data. They " "will be removed to avoid bias " "in the pearson correlation.\n") else: sys.stderr.write("\nOutliers were detected in the data. Consider " "using the --removeOutliers parameter to avoid a bias " "in the pearson correlation.\n") if args.colorMap: try: plt.get_cmap(args.colorMap) except ValueError as error: sys.stderr.write( "A problem was found. Message: {}\n".format(error)) exit() if args.plotFile is not None: if args.whatToPlot == 'scatterplot': corr.plot_scatter(args.plotFile, plot_title=args.plotTitle, image_format=args.plotFileFormat, xRange=args.xRange, yRange=args.yRange, log1p=args.log1p) else: corr.plot_correlation(args.plotFile, vmax=args.zMax, vmin=args.zMin, colormap=args.colorMap, plot_title=args.plotTitle, image_format=args.plotFileFormat, plot_numbers=args.plotNumbers, plotWidth=args.plotWidth, plotHeight=args.plotHeight) if args.outFileCorMatrix: o = open(args.outFileCorMatrix, "w") o.write("#plotCorrelation --outFileCorMatrix\n") corr.save_corr_matrix(o) o.close()
def get_labels_and_correlation( bw_files, # chrs_to_skip, bin_size=10000, method='pearson', fileset_name='result', blacklist=None, labels=bw_labels, output_dir=BASE_DIR): my_listUnnested = [] # my_labels_list = [] assert method in ('pearson', 'spearman'), 'Invalid correlation method' # Autogenerate labels from filename if not provided if not labels: labels = [ filename.split('/')[-1].split('.')[0] for filename in bw_files ] # Generate a name for the unique combination test_name = fileset_name + '_' + method if blacklist: blacklist_title = 'Blacklisted' test_name += '_blacklisted' else: blacklist_title = '' image_name = test_name + '.png' # Bin the bigwig data in 10kb increments num_reads_per_bin = score_bw.getScorePerBin( bw_files, bin_size, # chrsToSkip=chrs_to_skip, blackListFileName=blacklist) # Write to npz file print("right before npz") os.system('pwd') os.system('ls -lat') print('svo') print(output_dir) print(test_name) filename = output_dir + test_name + '.npz' print(filename) print('svo.. ') with open(filename, "wb") as f: np.savez_compressed(f, matrix=num_reads_per_bin, labels=labels) # Compute the correlations corr = Correlation(filename, method, labels=labels) np_array = corr.compute_correlation() print("ALERT CORR") print(np_array) listNested = np_array.tolist() def removeNestings(listNest): for i in listNest: if type(i) == list: print(str(i) + "is: " + str(type(i))) removeNestings(i) print(str(i) + "is after: " + str(type(i))) else: print(str(i) + "is finally " + str(type(i))) my_listUnnested.append(i) removeNestings(listNested) print("FINAL CORRELATION VALUES") print(my_listUnnested) with open("corrScores.txt", "w") as f: f.write(str(my_listUnnested)) plot_title = '{}{} Correlation of {}'.format(blacklist_title, method.capitalize(), fileset_name) # Create a png file of correlation heatmap image_path = output_dir + image_name corr.plot_correlation(image_path, plot_title=plot_title) # return np_ar my_labels_list = labels return image_path, my_labels_list, my_listUnnested
def get_labels_and_correlation( bw_files, # chrs_to_skip, bin_size=10000, method='pearson', fileset_name='result', blacklist=None, labels=bw_labels, output_dir='/Users/baditya02/Downloads/treatment-data/graphs/test/' ): my_listUnnested = [] my_labels_list = [] assert method in ('pearson', 'spearman'), 'Invalid correlation method' # Autogenerate labels from filename if not provided if not labels: labels = [filename.split( '/' )[-1].split( '.' )[0] for filename in bw_files] # Generate a name for the unique combination test_name = fileset_name + '_' + method if blacklist: blacklist_title = 'Blacklisted ' test_name += '_blacklisted' else: blacklist_title = '' image_name = test_name + '.png' # Bin the bigwig data in 10kb increments num_reads_per_bin = score_bw.getScorePerBin( bw_files, bin_size, # chrsToSkip=chrs_to_skip, blackListFileName=blacklist ) # Write to npz file filename = output_dir + test_name + '.npz' with open(filename, "wb") as f: np.savez_compressed(f, matrix=num_reads_per_bin, labels=labels) # Compute the correlations corr = Correlation(filename, method, labels=labels) np_array = corr.compute_correlation() listNested = np_array.tolist() def removeNestings(listNest): for i in listNest: if type(i) == list: removeNestings(i) else: my_listUnnested.append(i) removeNestings(listNested) plot_title = '{}{} Correlation of {}'.format( blacklist_title, method.capitalize(), fileset_name ) # Create a png file of correlation heatmap image_path = output_dir + image_name corr.plot_correlation( image_path, plot_title=plot_title ) # return np_ar my_labels_list = labels return image_path, my_labels_list, my_listUnnested