def main(args=None): args = parse_arguments().parse_args(args) corr = Correlation( args.corData, labels=args.labels, ) args.plotFile.close() corr.plot_pca(args.plotFile.name, plot_title=args.plotTitle, image_format=args.plotFileFormat) if args.outFileNameData is not None: import matplotlib mlab_pca = matplotlib.mlab.PCA(corr.matrix) n = len(corr.labels) of = args.outFileNameData of.write("Component\t{}\tEigenvalue\n".format("\t".join(corr.labels))) for i in xrange(n): of.write("{}".format(i + 1)) for v in mlab_pca.Wt[i, :]: of.write("\t{}".format(v)) of.write("\t{}\n".format(mlab_pca.s[i])) args.outFileNameData.close()
def main(args=None): args = parse_arguments().parse_args(args) corr = Correlation(args.corData, labels=args.labels,) args.plotFile.close() corr.rowCenter = args.rowCenter corr.plot_pca(args.plotFile.name, plot_title=args.plotTitle, image_format=args.plotFileFormat) if args.outFileNameData is not None: import matplotlib mlab_pca = matplotlib.mlab.PCA(corr.matrix) n = len(corr.labels) of = args.outFileNameData of.write("Component\t{}\tEigenvalue\n".format("\t".join(corr.labels))) for i in range(n): of.write("{}".format(i + 1)) for v in mlab_pca.Wt[i, :]: of.write("\t{}".format(v)) of.write("\t{}\n".format(mlab_pca.s[i])) args.outFileNameData.close()
def main(args=None): args = parse_arguments().parse_args(args) corr = Correlation(args.corData, labels=args.labels,) args.plotFile.close() corr.plot_pca(args.plotFile.name, plot_title=args.plotTitle, image_format=args.plotFileFormat)
def main(args=None): args = parse_arguments().parse_args(args) if args.plotFile is None and args.outFileCorMatrix is None: sys.exit("At least one of --plotFile and --outFileCorMatrix must be specified!\n") corr = Correlation(args.corData, args.corMethod, labels=args.labels, remove_outliers=args.removeOutliers, skip_zeros=args.skipZeros) if args.corMethod == 'pearson': # test if there are outliers and write a message recommending the removal if len(corr.get_outlier_indices(np.asarray(corr.matrix).flatten())) > 0: if args.removeOutliers: sys.stderr.write("\nOutliers were detected in the data. They " "will be removed to avoid bias " "in the pearson correlation.\n") else: sys.stderr.write("\nOutliers were detected in the data. Consider " "using the --removeOutliers parameter to avoid a bias " "in the pearson correlation.\n") if args.colorMap: try: plt.get_cmap(args.colorMap) except ValueError as error: sys.stderr.write( "A problem was found. Message: {}\n".format(error)) exit() if args.plotFile is not None: if args.whatToPlot == 'scatterplot': corr.plot_scatter(args.plotFile, plot_title=args.plotTitle, image_format=args.plotFileFormat, xRange=args.xRange, yRange=args.yRange, log1p=args.log1p) else: corr.plot_correlation(args.plotFile, vmax=args.zMax, vmin=args.zMin, colormap=args.colorMap, plot_title=args.plotTitle, image_format=args.plotFileFormat, plot_numbers=args.plotNumbers, plotWidth=args.plotWidth, plotHeight=args.plotHeight) if args.outFileCorMatrix: o = open(args.outFileCorMatrix, "w") o.write("#plotCorrelation --outFileCorMatrix\n") corr.save_corr_matrix(o) o.close()
def main(args=None): args = parse_arguments().parse_args(args) if args.plotFile is None and args.outFileNameData is None: sys.exit( "At least one of --plotFile and --outFileNameData must be specified!\n" ) if args.ntop < 0: sys.exit("The value specified for --ntop must be >= 0!\n") if args.PCs[0] == args.PCs[1]: sys.exit("You must specify different principal components!\n") if args.PCs[0] <= 0 or args.PCs[1] <= 0: sys.exit("The specified principal components must be at least 1!\n") corr = Correlation( args.corData, labels=args.labels, ) corr.rowCenter = args.rowCenter corr.transpose = args.transpose corr.ntop = args.ntop corr.log2 = args.log2 Wt, eigenvalues = corr.plot_pca(args.plotFile, PCs=args.PCs, plot_title=args.plotTitle, image_format=args.plotFileFormat, plotWidth=args.plotWidth, plotHeight=args.plotHeight, cols=args.colors, marks=args.markers) if args.outFileNameData is not None: of = open(args.outFileNameData, "w") of.write("#plotPCA --outFileNameData\n") of.write("Component\t{}\tEigenvalue\n".format("\t".join(corr.labels))) n = eigenvalues.shape[0] for i in range(n): of.write("{}\t{}\t{}\n".format( i + 1, "\t".join(["{}".format(x) for x in Wt[i, :]]), eigenvalues[i])) of.close()
def main(args=None): args = parse_arguments().parse_args(args) corr = Correlation( args.corData, labels=args.labels, ) if args.outFileCorMatrix: corr.save_corr_matrix(args.outFileCorMatrix) args.plotFile.close() corr.plot_pca(args.plotFile.name, plot_title=args.plotTitle, image_format=args.plotFileFormat)
def main(args=None): args = parse_arguments().parse_args(args) if args.plotFile is None and args.outFileNameData is None: sys.exit("At least one of --plotFile and --outFileNameData must be specified!\n") if args.ntop < 0: sys.exit("The value specified for --ntop must be >= 0!\n") if args.PCs[0] == args.PCs[1]: sys.exit("You must specify different principal components!\n") if args.PCs[0] <= 0 or args.PCs[1] <= 0: sys.exit("The specified principal components must be at least 1!\n") corr = Correlation(args.corData, labels=args.labels,) corr.rowCenter = args.rowCenter corr.transpose = args.transpose corr.ntop = args.ntop corr.log2 = args.log2 Wt, eigenvalues = corr.plot_pca(args.plotFile, PCs=args.PCs, plot_title=args.plotTitle, image_format=args.plotFileFormat, plotWidth=args.plotWidth, plotHeight=args.plotHeight, cols=args.colors, marks=args.markers) if args.outFileNameData is not None: of = open(args.outFileNameData, "w") of.write("#plotPCA --outFileNameData\n") of.write("Component\t{}\tEigenvalue\n".format("\t".join(corr.labels))) n = eigenvalues.shape[0] for i in range(n): of.write("{}\t{}\t{}\n".format(i + 1, "\t".join(["{}".format(x) for x in Wt[i, :]]), eigenvalues[i])) of.close()
def get_labels_and_correlation( bw_files, # chrs_to_skip, bin_size=10000, method='pearson', fileset_name='result', blacklist=None, labels=bw_labels, output_dir=BASE_DIR): my_listUnnested = [] # my_labels_list = [] assert method in ('pearson', 'spearman'), 'Invalid correlation method' # Autogenerate labels from filename if not provided if not labels: labels = [ filename.split('/')[-1].split('.')[0] for filename in bw_files ] # Generate a name for the unique combination test_name = fileset_name + '_' + method if blacklist: blacklist_title = 'Blacklisted' test_name += '_blacklisted' else: blacklist_title = '' image_name = test_name + '.png' # Bin the bigwig data in 10kb increments num_reads_per_bin = score_bw.getScorePerBin( bw_files, bin_size, # chrsToSkip=chrs_to_skip, blackListFileName=blacklist) # Write to npz file print("right before npz") os.system('pwd') os.system('ls -lat') print('svo') print(output_dir) print(test_name) filename = output_dir + test_name + '.npz' print(filename) print('svo.. ') with open(filename, "wb") as f: np.savez_compressed(f, matrix=num_reads_per_bin, labels=labels) # Compute the correlations corr = Correlation(filename, method, labels=labels) np_array = corr.compute_correlation() print("ALERT CORR") print(np_array) listNested = np_array.tolist() def removeNestings(listNest): for i in listNest: if type(i) == list: print(str(i) + "is: " + str(type(i))) removeNestings(i) print(str(i) + "is after: " + str(type(i))) else: print(str(i) + "is finally " + str(type(i))) my_listUnnested.append(i) removeNestings(listNested) print("FINAL CORRELATION VALUES") print(my_listUnnested) with open("corrScores.txt", "w") as f: f.write(str(my_listUnnested)) plot_title = '{}{} Correlation of {}'.format(blacklist_title, method.capitalize(), fileset_name) # Create a png file of correlation heatmap image_path = output_dir + image_name corr.plot_correlation(image_path, plot_title=plot_title) # return np_ar my_labels_list = labels return image_path, my_labels_list, my_listUnnested
def get_labels_and_correlation( bw_files, # chrs_to_skip, bin_size=10000, method='pearson', fileset_name='result', blacklist=None, labels=bw_labels, output_dir='/Users/baditya02/Downloads/treatment-data/graphs/test/' ): my_listUnnested = [] my_labels_list = [] assert method in ('pearson', 'spearman'), 'Invalid correlation method' # Autogenerate labels from filename if not provided if not labels: labels = [filename.split( '/' )[-1].split( '.' )[0] for filename in bw_files] # Generate a name for the unique combination test_name = fileset_name + '_' + method if blacklist: blacklist_title = 'Blacklisted ' test_name += '_blacklisted' else: blacklist_title = '' image_name = test_name + '.png' # Bin the bigwig data in 10kb increments num_reads_per_bin = score_bw.getScorePerBin( bw_files, bin_size, # chrsToSkip=chrs_to_skip, blackListFileName=blacklist ) # Write to npz file filename = output_dir + test_name + '.npz' with open(filename, "wb") as f: np.savez_compressed(f, matrix=num_reads_per_bin, labels=labels) # Compute the correlations corr = Correlation(filename, method, labels=labels) np_array = corr.compute_correlation() listNested = np_array.tolist() def removeNestings(listNest): for i in listNest: if type(i) == list: removeNestings(i) else: my_listUnnested.append(i) removeNestings(listNested) plot_title = '{}{} Correlation of {}'.format( blacklist_title, method.capitalize(), fileset_name ) # Create a png file of correlation heatmap image_path = output_dir + image_name corr.plot_correlation( image_path, plot_title=plot_title ) # return np_ar my_labels_list = labels return image_path, my_labels_list, my_listUnnested