def load_deepblue_files(regionsFileName, scoreFileName): deepBlueFiles = [] for idx, fname in enumerate(scoreFileName): if db.isDeepBlue(fname): deepBlueFiles.append([fname, idx]) if len(deepBlueFiles) > 0: sys.stderr.write( "Preloading the following deepBlue files: {}\n".format(",".join( [x[0] for x in deepBlueFiles]))) regs = db.makeRegions(regionsFileName, args) for x in deepBlueFiles: x.extend([args, regs]) if len(deepBlueFiles) > 1 and args.numberOfProcessors > 1: pool = multiprocessing.Pool(args.numberOfProcessors) res = pool.map_async(db.preloadWrapper, deepBlueFiles).get(9999999) else: res = list(map(db.preloadWrapper, deepBlueFiles)) # substitute the file names with the temp files for (ftuple, r) in zip(deepBlueFiles, res): scoreFileName[ftuple[1]] = r deepBlueFiles = [[x[0], x[1]] for x in deepBlueFiles] del regs return deepBlueFiles
def main(args=None): """ 1. get read counts at different positions either all of same length or from genomic regions from the BED file 2. compute the scores """ args = process_args(args) if 'BED' in args: bed_regions = args.BED else: bed_regions = None if len(args.bwfiles) == 1 and not args.outRawCounts: sys.stderr.write("You've input a single bigWig file and not specified " "--outRawCounts. The resulting output will NOT be " "useful with any deepTools program!\n") # Preload deepBlue files, which need to then be deleted deepBlueFiles = [] for idx, fname in enumerate(args.bwfiles): if db.isDeepBlue(fname): deepBlueFiles.append([fname, idx]) if len(deepBlueFiles) > 0: sys.stderr.write( "Preloading the following deepBlue files: {}\n".format(",".join( [x[0] for x in deepBlueFiles]))) if 'BED' in args: regs = db.makeRegions(args.BED, args) else: foo = db.deepBlue(deepBlueFiles[0][0], url=args.deepBlueURL, userKey=args.userKey) regs = db.makeTiles(foo, args) del foo for x in deepBlueFiles: x.extend([args, regs]) if len(deepBlueFiles) > 1 and args.numberOfProcessors > 1: pool = multiprocessing.Pool(args.numberOfProcessors) res = pool.map_async(db.preloadWrapper, deepBlueFiles).get(9999999) else: res = list(map(db.preloadWrapper, deepBlueFiles)) # substitute the file names with the temp files for (ftuple, r) in zip(deepBlueFiles, res): args.bwfiles[ftuple[1]] = r deepBlueFiles = [[x[0], x[1]] for x in deepBlueFiles] del regs num_reads_per_bin = score_bw.getScorePerBin( args.bwfiles, args.binSize, blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, stepSize=args.binSize + args.distanceBetweenBins, verbose=args.verbose, region=args.region, bedFile=bed_regions, chrsToSkip=args.chromosomesToSkip, out_file_for_raw_data=args.outRawCounts, allArgs=args) sys.stderr.write("Number of bins " "found: {}\n".format(num_reads_per_bin.shape[0])) if num_reads_per_bin.shape[0] < 2: exit("ERROR: too few non zero bins found.\n" "If using --region please check that this " "region is covered by reads.\n") f = open(args.outFileName, "wb") np.savez_compressed(f, matrix=num_reads_per_bin, labels=args.labels) f.close() if args.outRawCounts: # append to the generated file the # labels header = "#'chr'\t'start'\t'end'\t" header += "'" + "'\t'".join(args.labels) + "'\n" f = open(args.outRawCounts, "r+") content = f.read() f.seek(0, 0) f.write(header + content) """ if bed_regions: bed_regions.seek(0) reg_list = bed_regions.readlines() args.outRawCounts.write("#'chr'\t'start'\t'end'\t") args.outRawCounts.write("'" + "'\t'".join(args.labels) + "'\n") fmt = "\t".join(np.repeat('%s', num_reads_per_bin.shape[1])) + "\n" for idx, row in enumerate(num_reads_per_bin): args.outRawCounts.write("{}\t{}\t{}\t".format(*reg_list[idx].strip().split("\t")[0:3])) args.outRawCounts.write(fmt % tuple(row)) else: args.outRawCounts.write("'" + "'\t'".join(args.labels) + "'\n") fmt = "\t".join(np.repeat('{}', num_reads_per_bin.shape[1])) + "\n" for row in num_reads_per_bin: args.outRawCounts.write(fmt.format(*tuple(row))) """ f.close() # Clean up temporary bigWig files, if applicable if not args.deepBlueKeepTemp: for k, v in deepBlueFiles: os.remove(args.bwfiles[v]) else: for k, v in deepBlueFiles: print("{} is stored in {}".format(k, args.bwfiles[v]))
def main(args=None): args = parse_arguments().parse_args(args) if args.scaleFactors: scaleFactors = [float(x) for x in args.scaleFactors.split(":")] else: scaleFactors = [1, 1] # the getRatio function is called and receives # the function_args per each tile that is considered FUNC = getRatio function_args = {'valueType': args.ratio, 'scaleFactors': scaleFactors, 'pseudocount': args.pseudocount} # Preload deepBlue files, which need to then be deleted deepBlueFiles = [] for idx, fname in enumerate([args.bigwig1, args.bigwig2]): if db.isDeepBlue(fname): deepBlueFiles.append([fname, idx]) if len(deepBlueFiles) > 0: sys.stderr.write("Preloading the following deepBlue files: {}\n".format(",".join([x[0] for x in deepBlueFiles]))) foo = db.deepBlue(deepBlueFiles[0][0], url=args.deepBlueURL, userKey=args.userKey) regs = db.makeChromTiles(foo) for x in deepBlueFiles: x.extend([args, regs]) if len(deepBlueFiles) > 1 and args.numberOfProcessors > 1: pool = multiprocessing.Pool(args.numberOfProcessors) res = pool.map_async(db.preloadWrapper, deepBlueFiles).get(9999999) else: res = list(map(db.preloadWrapper, deepBlueFiles)) # substitute the file names with the temp files for (ftuple, r) in zip(deepBlueFiles, res): if ftuple[1] == 0: args.bigwig1 = r else: args.bigwig2 = r deepBlueFiles = [[x[0], x[1]] for x in deepBlueFiles] del regs writeBedGraph_bam_and_bw.writeBedGraph( [(args.bigwig1, getType(args.bigwig1)), (args.bigwig2, getType(args.bigwig2))], args.outFileName, 0, FUNC, function_args, tileSize=args.binSize, region=args.region, blackListFileName=args.blackListFileName, verbose=args.verbose, numberOfProcessors=args.numberOfProcessors, format=args.outFileFormat, smoothLength=False, missingDataAsZero=not args.skipNonCoveredRegions, extendPairedEnds=False) # Clean up temporary bigWig files, if applicable if not args.deepBlueKeepTemp: for k, v in deepBlueFiles: if v == 0: os.remove(args.bigwig1) else: os.remove(args.bigwig2) else: for k, v in deepBlueFiles: foo = args.bigwig1 if v == 1: foo = args.bigwig2 print("{} is stored in {}".format(k, foo))
def main(args=None): args = process_args(args) parameters = {'upstream': args.beforeRegionStartLength, 'downstream': args.afterRegionStartLength, 'body': args.regionBodyLength, 'bin size': args.binSize, 'ref point': args.referencePoint, 'verbose': args.verbose, 'bin avg type': args.averageTypeBins, 'missing data as zero': args.missingDataAsZero, 'min threshold': args.minThreshold, 'max threshold': args.maxThreshold, 'scale': args.scale, 'skip zeros': args.skipZeros, 'nan after end': args.nanAfterEnd, 'proc number': args.numberOfProcessors, 'sort regions': args.sortRegions, 'sort using': args.sortUsing, 'unscaled 5 prime': args.unscaled5prime, 'unscaled 3 prime': args.unscaled3prime } hm = heatmapper.heatmapper() # Preload deepBlue files, which need to then be deleted deepBlueFiles = [] for idx, fname in enumerate(args.scoreFileName): if db.isDeepBlue(fname): deepBlueFiles.append([fname, idx]) if len(deepBlueFiles) > 0: sys.stderr.write("Preloading the following deepBlue files: {}\n".format(",".join([x[0] for x in deepBlueFiles]))) regs = db.makeRegions(args.regionsFileName, args) for x in deepBlueFiles: x.extend([args, regs]) if len(deepBlueFiles) > 1 and args.numberOfProcessors > 1: pool = multiprocessing.Pool(args.numberOfProcessors) res = pool.map_async(db.preloadWrapper, deepBlueFiles).get(9999999) else: res = list(map(db.preloadWrapper, deepBlueFiles)) # substitute the file names with the temp files for (ftuple, r) in zip(deepBlueFiles, res): args.scoreFileName[ftuple[1]] = r deepBlueFiles = [[x[0], x[1]] for x in deepBlueFiles] del regs scores_file_list = args.scoreFileName hm.computeMatrix(scores_file_list, args.regionsFileName, parameters, blackListFileName=args.blackListFileName, verbose=args.verbose, allArgs=args) if args.sortRegions not in ['no', 'keep']: sortUsingSamples = [] if args.sortUsingSamples is not None: for i in args.sortUsingSamples: if (i > 0 and i <= hm.matrix.get_num_samples()): sortUsingSamples.append(i - 1) else: exit("The value {0} for --sortUsingSamples is not valid. Only values from 1 to {1} are allowed.".format(args.sortUsingSamples, hm.matrix.get_num_samples())) print('Samples used for ordering within each group: ', sortUsingSamples) hm.matrix.sort_groups(sort_using=args.sortUsing, sort_method=args.sortRegions, sample_list=sortUsingSamples) elif args.sortRegions == 'keep': hm.parameters['group_labels'] = hm.matrix.group_labels hm.parameters["group_boundaries"] = hm.matrix.group_boundaries cmo.sortMatrix(hm, args.regionsFileName, args.transcriptID, args.transcript_id_designator, verbose=not args.quiet) hm.save_matrix(args.outFileName) if args.outFileNameMatrix: hm.save_matrix_values(args.outFileNameMatrix) if args.outFileSortedRegions: hm.save_BED(args.outFileSortedRegions) # Clean up temporary bigWig files, if applicable if not args.deepBlueKeepTemp: for k, v in deepBlueFiles: os.remove(args.scoreFileName[v]) else: for k, v in deepBlueFiles: print("{} is stored in {}".format(k, args.scoreFileName[v]))
def main(args=None): """ 1. get read counts at different positions either all of same length or from genomic regions from the BED file 2. compute the scores """ args = process_args(args) if 'BED' in args: bed_regions = args.BED else: bed_regions = None if len(args.bwfiles) == 1 and not args.outRawCounts: sys.stderr.write("You've input a single bigWig file and not specified " "--outRawCounts. The resulting output will NOT be " "useful with any deepTools program!\n") # Preload deepBlue files, which need to then be deleted deepBlueFiles = [] for idx, fname in enumerate(args.bwfiles): if db.isDeepBlue(fname): deepBlueFiles.append([fname, idx]) if len(deepBlueFiles) > 0: sys.stderr.write("Preloading the following deepBlue files: {}\n".format(",".join([x[0] for x in deepBlueFiles]))) if 'BED' in args: regs = db.makeRegions(args.BED, args) else: foo = db.deepBlue(deepBlueFiles[0][0], url=args.deepBlueURL, userKey=args.userKey) regs = db.makeTiles(foo, args) del foo for x in deepBlueFiles: x.extend([args, regs]) if len(deepBlueFiles) > 1 and args.numberOfProcessors > 1: pool = multiprocessing.Pool(args.numberOfProcessors) res = pool.map_async(db.preloadWrapper, deepBlueFiles).get(9999999) else: res = list(map(db.preloadWrapper, deepBlueFiles)) # substitute the file names with the temp files for (ftuple, r) in zip(deepBlueFiles, res): args.bwfiles[ftuple[1]] = r deepBlueFiles = [[x[0], x[1]] for x in deepBlueFiles] del regs num_reads_per_bin = score_bw.getScorePerBin( args.bwfiles, args.binSize, blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, stepSize=args.binSize + args.distanceBetweenBins, verbose=args.verbose, region=args.region, bedFile=bed_regions, chrsToSkip=args.chromosomesToSkip, out_file_for_raw_data=args.outRawCounts, allArgs=args) sys.stderr.write("Number of bins " "found: {}\n".format(num_reads_per_bin.shape[0])) if num_reads_per_bin.shape[0] < 2: exit("ERROR: too few non zero bins found.\n" "If using --region please check that this " "region is covered by reads.\n") f = open(args.outFileName, "wb") np.savez_compressed(f, matrix=num_reads_per_bin, labels=args.labels) f.close() if args.outRawCounts: # append to the generated file the # labels header = "#'chr'\t'start'\t'end'\t" header += "'" + "'\t'".join(args.labels) + "'\n" f = open(args.outRawCounts, "r+") content = f.read() f.seek(0, 0) f.write(header + content) """ if bed_regions: bed_regions.seek(0) reg_list = bed_regions.readlines() args.outRawCounts.write("#'chr'\t'start'\t'end'\t") args.outRawCounts.write("'" + "'\t'".join(args.labels) + "'\n") fmt = "\t".join(np.repeat('%s', num_reads_per_bin.shape[1])) + "\n" for idx, row in enumerate(num_reads_per_bin): args.outRawCounts.write("{}\t{}\t{}\t".format(*reg_list[idx].strip().split("\t")[0:3])) args.outRawCounts.write(fmt % tuple(row)) else: args.outRawCounts.write("'" + "'\t'".join(args.labels) + "'\n") fmt = "\t".join(np.repeat('{}', num_reads_per_bin.shape[1])) + "\n" for row in num_reads_per_bin: args.outRawCounts.write(fmt.format(*tuple(row))) """ f.close() # Clean up temporary bigWig files, if applicable if not args.deepBlueKeepTemp: for k, v in deepBlueFiles: os.remove(args.bwfiles[v]) else: for k, v in deepBlueFiles: print("{} is stored in {}".format(k, args.bwfiles[v]))