Python heatmapperの例、deeptools.heatmapper.heatmapper Pythonの例

コード例 #1

0

ファイルを表示

ファイル: computeMatrix.py プロジェクト: idelvalle/deepTools

def main(args=None):

    args = process_args(args)

    # if more than one bed file is given, they are concatenated into one file.
    if len(args.regionsFileName) > 1:
        bed_file = open(deeptools.utilities.getTempFileName(suffix='.bed'), 'w+t')
        for bed in args.regionsFileName:
            bed.close()
            # concatenate all intermediate tempfiles into one
            shutil.copyfileobj(open(bed.name, 'U'), bed_file)
            # append hash and label based on the file name
            label = os.path.basename(bed.name)
            if label.endswith(".bed"):
                label = label[:-4]
            bed_file.write("# {}\n".format(label))
        bed_file.seek(0)
    else:
        bed_file = args.regionsFileName[0]

    parameters = {'upstream': args.beforeRegionStartLength,
                  'downstream': args.afterRegionStartLength,
                  'body': args.regionBodyLength,
                  'bin size': args.binSize,
                  'ref point': args.referencePoint,
                  'verbose': args.verbose,
                  'bin avg type': args.averageTypeBins,
                  'missing data as zero': args.missingDataAsZero,
                  'min threshold': args.minThreshold,
                  'max threshold': args.maxThreshold,
                  'scale': args.scale,
                  'skip zeros': args.skipZeros,
                  'nan after end': args.nanAfterEnd,
                  'proc number': args.numberOfProcessors,
                  'sort regions': args.sortRegions,
                  'sort using': args.sortUsing
                  }

    hm = heatmapper.heatmapper()

    scores_file_list = args.scoreFileName
    hm.computeMatrix(scores_file_list, bed_file, parameters, verbose=args.verbose)
    if args.sortRegions != 'no':
        hm.matrix.sort_groups(sort_using=args.sortUsing, sort_method=args.sortRegions)

    hm.save_matrix(args.outFileName)
    bed_file.close()

    if len(args.regionsFileName) > 1:
        os.remove(bed_file.name)

    if args.outFileNameMatrix:
        hm.save_matrix_values(args.outFileNameMatrix)

    # TODO This isn't implemented
    # if args.outFileNameData:
    #    hm.saveTabulatedValues(args.outFileNameData)

    if args.outFileSortedRegions:
        hm.save_BED(args.outFileSortedRegions)

コード例 #2

0

ファイルを表示

def __compute_matrix(regions, bigwigs, configfile, parameters, refIndex=None):
    """
    computing the corresponding matrix using deeptools/computeMatrix
    """
    hm = heatmapper()

    if refIndex:
        bigwigs = [bigwigs[int(i) - 1] for i in refIndex]

    index = 1 if not refIndex else 0

    matrix_args = argparse.Namespace()
    matrix_args.transcriptID = configfile['transcriptID'][index]
    matrix_args.exonID = configfile['exonID'][index]
    matrix_args.transcript_id_designator = configfile[
        'transcript_id_designator'][index]
    matrix_args.samplesLabel = configfile['samplesLabel']
    matrix_args.exonID = configfile['exonID'][index]
    hm.computeMatrix(score_file_list=bigwigs,
                     regions_file=regions,
                     parameters=parameters,
                     blackListFileName=configfile["blackListFileName"][index],
                     verbose=configfile["verbose"][index],
                     allArgs=matrix_args)

    return hm

コード例 #3

0

ファイルを表示

def main():
    """
    Either the closest genes are foune and a deeptools-like matrix is created,
    if annotation file is provided, or a deeptools-like matrix directly from
    a provided enriched regions name-based files. In either case the output
    matrix is ordered and is appended to the input deeptools matrix.
    """
    parser = parse_args()
    args = parser.parse_args()

    # Check if the feature names are consistent between all the tables
    __read_tables_columns(args.tables, args.Features)

    hm = heatmapper()
    hm.read_matrix_file(args.deeptoolsMatrix)
    regions = parseMatrixRegions(hm.matrix.get_regions())
    # Using bedtool closest to map annotation and regions
    if args.annotation:
        closestMapping = find_closest_genes(
            regions, args.annotation, args.annotationFeature,
            args.annotationOutput, args.referencePoint, args.closestGenesOutput
        )  # XXX instead of all these arguments i can simply add args.
        # paste an extra column per table to the input matrix
        extract_ge_folchange_per_peak(regions, args.tables, closestMapping,
                                      args.Features, args.idcolumn, hm)

    else:  # No closest gene is involved in this case , each enrichment id is individually checked and values are updated.
        update_matrix_values(regions, args.tables, args.Features,
                             args.idcolumn, hm)

    # save the joint matrix obtained from either of cases
    hm.save_matrix(os.path.join(args.outputMatrix))

コード例 #4

0

ファイルを表示

ファイル: plotProfile.py プロジェクト: UMMS-Biocore/dolphin-bin

def main(args=None):
    args = process_args(args)
    hm = heatmapper.heatmapper()
    matrix_file = args.matrixFile.name
    args.matrixFile.close()
    hm.read_matrix_file(matrix_file)

    if args.kmeans is not None:
        hm.matrix.hmcluster(args.kmeans, method='kmeans')
    else:
        if args.hclust is not None:
            print("Performing hierarchical clustering."
                  "Please note that it might be very slow for large datasets.\n")
            hm.matrix.hmcluster(args.hclust, method='hierarchical')

    group_len_ratio = np.diff(hm.matrix.group_boundaries) / float(len(hm.matrix.regions))
    if np.any(group_len_ratio < 5.0 / 1000):
        problem = np.flatnonzero(group_len_ratio < 5.0 / 1000)
        sys.stderr.write("WARNING: Group '{}' is too small for plotting, you might want to remove it. \n".format(hm.matrix.group_labels[problem[0]]))

    if args.regionsLabel:
        hm.matrix.set_group_labels(args.regionsLabel)

    if args.samplesLabel and len(args.samplesLabel):
        hm.matrix.set_sample_labels(args.samplesLabel)

    if args.outFileNameData:
        hm.save_tabulated_values(args.outFileNameData, reference_point_label=args.refPointLabel,
                                 start_label=args.startLabel,
                                 end_label=args.endLabel,
                                 averagetype=args.averageType)

    if args.outFileSortedRegions:
        hm.save_BED(args.outFileSortedRegions)

    prof = Profile(hm, args.outFileName,
                   plot_title=args.plotTitle,
                   y_axis_label=args.yAxisLabel,
                   y_min=args.yMin, y_max=args.yMax,
                   averagetype=args.averageType,
                   reference_point_label=args.refPointLabel,
                   start_label=args.startLabel,
                   end_label=args.endLabel,
                   plot_height=args.plotHeight,
                   plot_width=args.plotWidth,
                   per_group=args.perGroup,
                   plot_type=args.plotType,
                   image_format=args.plotFileFormat,
                   color_list=args.colors,
                   legend_location=args.legendLocation,
                   plots_per_row=args.numPlotsPerRow,
                   dpi=args.dpi)

    if args.plotType == 'heatmap':
        prof.plot_heatmap()
    elif args.plotType == 'overlapped_lines':
        prof.plot_hexbin()
    else:
        prof.plot_profile()

コード例 #5

0

ファイルを表示

ファイル: plotProfile.py プロジェクト: venuthatikonda/deepTools

def main(args=None):
    args = process_args(args)
    hm = heatmapper.heatmapper()
    matrix_file = args.matrixFile.name
    args.matrixFile.close()
    hm.read_matrix_file(matrix_file)

    if args.kmeans is not None:
        hm.matrix.hmcluster(args.kmeans, method='kmeans')
    else:
        if args.hclust is not None:
            print("Performing hierarchical clustering."
                  "Please note that it might be very slow for large datasets.\n")
            hm.matrix.hmcluster(args.hclust, method='hierarchical')

    group_len_ratio = np.diff(hm.matrix.group_boundaries) / float(len(hm.matrix.regions))
    if np.any(group_len_ratio < 5.0 / 1000):
        problem = np.flatnonzero(group_len_ratio < 5.0 / 1000)
        sys.stderr.write("WARNING: Group '{}' is too small for plotting, you might want to remove it. \n".format(hm.matrix.group_labels[problem[0]]))

    if args.regionsLabel:
        hm.matrix.set_group_labels(args.regionsLabel)

    if args.samplesLabel and len(args.samplesLabel):
        hm.matrix.set_sample_labels(args.samplesLabel)

    if args.outFileNameData:
        hm.save_tabulated_values(args.outFileNameData, reference_point_label=args.refPointLabel,
                                 start_label=args.startLabel,
                                 end_label=args.endLabel,
                                 averagetype=args.averageType)

    if args.outFileSortedRegions:
        hm.save_BED(args.outFileSortedRegions)

    prof = Profile(hm, args.outFileName,
                   plot_title=args.plotTitle,
                   y_axis_label=args.yAxisLabel,
                   y_min=args.yMin, y_max=args.yMax,
                   averagetype=args.averageType,
                   reference_point_label=args.refPointLabel,
                   start_label=args.startLabel,
                   end_label=args.endLabel,
                   plot_height=args.plotHeight,
                   plot_width=args.plotWidth,
                   per_group=args.perGroup,
                   plot_type=args.plotType,
                   image_format=args.plotFileFormat,
                   color_list=args.colors,
                   legend_location=args.legendLocation,
                   plots_per_row=args.numPlotsPerRow,
                   dpi=args.dpi)

    if args.plotType == 'heatmap':
        prof.plot_heatmap()
    elif args.plotType == 'overlapped_lines':
        prof.plot_hexbin()
    else:
        prof.plot_profile()

コード例 #6

0

ファイルを表示

def main(args=None):
    if len(sys.argv) == 1:
        args = ["-h"]
    if len(sys.argv) == 2:
        args = [sys.argv[1], "-h"]
    args = parse_arguments().parse_args(args)

    hm = heatmapper.heatmapper()
    if not isinstance(args.matrixFile, list):
        hm.read_matrix_file(args.matrixFile)

    if args.command == 'info':
        printInfo(hm)
    elif args.command == 'subset':
        sIdx = getSampleBounds(args, hm)
        gIdx, gBounds = getGroupBounds(args, hm)

        # groups
        hm.matrix.regions = subsetRegions(hm, gIdx)
        # matrix
        hm.matrix.matrix = hm.matrix.matrix[gIdx, :]
        hm.matrix.matrix = hm.matrix.matrix[:, sIdx]
        # boundaries
        if args.samples is None:
            args.samples = hm.matrix.sample_labels
        hm.matrix.sample_boundaries = hm.matrix.sample_boundaries[
            0:len(args.samples) + 1]
        hm.matrix.group_boundaries = gBounds.tolist()
        # special params
        keepIdx = set()
        for _, sample in enumerate(hm.matrix.sample_labels):
            if sample in args.samples:
                keepIdx.add(_)
        for param in hm.special_params:
            hm.parameters[param] = [
                v for k, v in enumerate(hm.parameters[param]) if k in keepIdx
            ]
        # labels
        hm.matrix.sample_labels = args.samples
        if args.groups is None:
            args.groups = hm.matrix.group_labels
        hm.matrix.group_labels = args.groups
        # save
        hm.save_matrix(args.outFileName)
    elif args.command == 'filterStrand':
        filterHeatmap(hm, args)
        hm.save_matrix(args.outFileName)
    elif args.command == 'rbind':
        rbindMatrices(hm, args)
        hm.save_matrix(args.outFileName)
    elif args.command == 'cbind':
        cbindMatrices(hm, args)
        hm.save_matrix(args.outFileName)
    elif args.command == 'sort':
        sortMatrix(hm, args.regionsFileName, args.transcriptID,
                   args.transcript_id_designator)
        hm.save_matrix(args.outFileName)
    else:
        sys.exit("Unknown command {0}!\n".format(args.command))

コード例 #7

0

ファイルを表示

ファイル: plotHeatmap.py プロジェクト: JCSMR-Tremethick-Lab/deepTools

def main(args=None):
    args = process_args(args)
    hm = heatmapper.heatmapper()
    matrix_file = args.matrixFile.name
    args.matrixFile.close()
    hm.read_matrix_file(matrix_file)

    if args.kmeans is not None:
        hm.matrix.hmcluster(args.kmeans, method='kmeans')
    else:
        if args.hclust is not None:
            print "Performing hierarchical clustering." \
                  "Please note that it might be very slow for large datasets.\n"
            hm.matrix.hmcluster(args.hclust, method='hierarchical')

    group_len_ratio = np.diff(hm.matrix.group_boundaries) / len(hm.matrix.regions)
    if np.any(group_len_ratio < 5.0 / 1000):
        problem = np.flatnonzero(group_len_ratio < 5.0 / 1000)
        sys.stderr.write("WARNING: Group '{}' is too small for plotting, you might want to remove it. "
                         "There will likely be an error message from matplotlib regarding this below.\n".format(hm.matrix.group_labels[problem[0]]))

    if args.regionsLabel:
        hm.matrix.set_group_labels(args.regionsLabel)

    if args.samplesLabel and len(args.samplesLabel):
        hm.matrix.set_sample_labels(args.samplesLabel)

    if args.sortRegions != 'no':
        hm.matrix.sort_groups(sort_using=args.sortUsing,
                              sort_method=args.sortRegions)

    if args.outFileNameMatrix:
        hm.save_matrix_values(args.outFileNameMatrix)

    if args.outFileSortedRegions:
        hm.save_BED(args.outFileSortedRegions)

    colormap_dict = {'colorMap': args.colorMap,
                     'colorList': args.colorList,
                     'colorNumber': args.colorNumber,
                     'missingDataColor': args.missingDataColor}

    plotMatrix(hm,
               args.outFileName,
               colormap_dict, args.plotTitle,
               args.xAxisLabel, args.yAxisLabel, args.regionsLabel,
               args.zMin, args.zMax,
               args.yMin, args.yMax,
               args.averageTypeSummaryPlot,
               args.refPointLabel,
               args.startLabel,
               args.endLabel,
               args.heatmapHeight,
               args.heatmapWidth,
               args.perGroup,
               args.whatToShow,
               image_format=args.plotFileFormat,
               legend_location=args.legendLocation)

コード例 #8

0

ファイルを表示

ファイル: batchHeatmapper.py プロジェクト: emilliman5/BioinfoWorkflows

def zMn_set(f):
    if zMinTest:
        zMn=0
        matrixFlatten=None
        content=heatmapper.heatmapper()
        content.readMatrixFile(f)
        matrixFlatten = flattenMatrix(content.matrixDict)
        zMn = np.percentile(matrixFlatten, 1.0)
    return f, zMn

コード例 #9

0

ファイルを表示

def zMn_set(f):
    if zMinTest:
        zMn = 0
        matrixFlatten = None
        content = heatmapper.heatmapper()
        content.readMatrixFile(f)
        matrixFlatten = flattenMatrix(content.matrixDict)
        zMn = np.percentile(matrixFlatten, 1.0)
    return f, zMn

コード例 #10

0

ファイルを表示

def cbindMatrices(hm, args):
    """
    Bind columns from different matrices according to the group and region names

    Missing regions are left as NA
    """
    hm2 = heatmapper.heatmapper()

    # Make a dict of region name:row associations
    hm.read_matrix_file(args.matrixFile[0])
    d = dict({x: dict() for x in hm.parameters["group_labels"]})
    for idx, group in enumerate(hm.parameters["group_labels"]):
        s = hm.parameters["group_boundaries"][idx]
        e = hm.parameters["group_boundaries"][idx + 1]
        for idx2, reg in enumerate(hm.matrix.regions[s:e]):
            d[group][reg[2]] = idx2 + s

    # Iterate through the other matrices
    for idx in range(1, len(args.matrixFile)):
        hm2.read_matrix_file(args.matrixFile[idx])
        # Add the sample labels
        hm.parameters['sample_labels'].extend(hm2.parameters['sample_labels'])
        # Add the sample boundaries
        lens = [
            x + hm.parameters['sample_boundaries'][-1]
            for x in hm2.parameters['sample_boundaries']
        ][1:]
        hm.parameters['sample_boundaries'].extend(lens)

        # Add on additional NA initialized columns
        ncol = hm.matrix.matrix.shape[1]
        hm.matrix.matrix = np.hstack(
            (hm.matrix.matrix, np.empty(hm2.matrix.matrix.shape)))
        hm.matrix.matrix[:, ncol:] = np.NAN

        # Update the values
        for idx2, group in enumerate(hm2.parameters["group_labels"]):
            if group not in d:
                continue
            s = hm2.parameters["group_boundaries"][idx2]
            e = hm2.parameters["group_boundaries"][idx2 + 1]
            for idx3, reg in enumerate(hm2.matrix.regions[s:e]):
                if reg[2] not in d[group]:
                    continue
                hm.matrix.matrix[d[group][reg[2]],
                                 ncol:] = hm2.matrix.matrix[s + idx3, :]

        # Append the special params
        for s in hm.special_params:
            hm.parameters[s].extend(hm2.parameters[s])

    # Update the sample parameters
    hm.matrix.sample_labels = hm.parameters['sample_labels']
    hm.matrix.sample_boundaries = hm.parameters['sample_boundaries']

コード例 #11

0

ファイルを表示

ファイル: batchHeatmapper.py プロジェクト: emilliman5/BioinfoWorkflows

def zMx_set(f):
    if zMaxTest:
        zMx=0
        matrixFlatten=None
        content=heatmapper.heatmapper()
        content.readMatrixFile(f)
        matrixFlatten = flattenMatrix(content.matrixDict)
        # try to avoid outliers by using np.percentile
        zMx = np.percentile(matrixFlatten, 98.0)
        print np.shape(content.matrixDict["genes"])[0]
       
    return f, zMx

コード例 #12

0

ファイルを表示

def zMx_set(f):
    if zMaxTest:
        zMx = 0
        matrixFlatten = None
        content = heatmapper.heatmapper()
        content.readMatrixFile(f)
        matrixFlatten = flattenMatrix(content.matrixDict)
        # try to avoid outliers by using np.percentile
        zMx = np.percentile(matrixFlatten, 98.0)
        print np.shape(content.matrixDict["genes"])[0]

    return f, zMx

コード例 #13

0

ファイルを表示

ファイル: computeMatrixOperations.py プロジェクト: fidelram/deepTools

def cbindMatrices(hm, args):
    """
    Bind columns from different matrices according to the group and region names

    Missing regions are left as NA
    """
    hm2 = heatmapper.heatmapper()

    # Make a dict of region name:row associations
    hm.read_matrix_file(args.matrixFile[0])
    d = dict({x: dict() for x in hm.parameters["group_labels"]})
    for idx, group in enumerate(hm.parameters["group_labels"]):
        s = hm.parameters["group_boundaries"][idx]
        e = hm.parameters["group_boundaries"][idx + 1]
        for idx2, reg in enumerate(hm.matrix.regions[s:e]):
            d[group][reg[2]] = idx2 + s

    # Iterate through the other matrices
    for idx in range(1, len(args.matrixFile)):
        hm2.read_matrix_file(args.matrixFile[idx])
        # Add the sample labels
        hm.parameters['sample_labels'].extend(hm2.parameters['sample_labels'])
        # Add the sample boundaries
        lens = [x + hm.parameters['sample_boundaries'][-1] for x in hm2.parameters['sample_boundaries']][1:]
        hm.parameters['sample_boundaries'].extend(lens)

        # Add on additional NA initialized columns
        ncol = hm.matrix.matrix.shape[1]
        hm.matrix.matrix = np.hstack((hm.matrix.matrix, np.empty(hm2.matrix.matrix.shape)))
        hm.matrix.matrix[:, ncol:] = np.NAN

        # Update the values
        for idx2, group in enumerate(hm2.parameters["group_labels"]):
            if group not in d:
                continue
            s = hm2.parameters["group_boundaries"][idx2]
            e = hm2.parameters["group_boundaries"][idx2 + 1]
            for idx3, reg in enumerate(hm2.matrix.regions[s:e]):
                if reg[2] not in d[group]:
                    continue
                hm.matrix.matrix[d[group][reg[2]], ncol:] = hm2.matrix.matrix[s + idx3, :]

        # Append the special params
        for s in hm.special_params:
            hm.parameters[s].extend(hm2.parameters[s])

    # Update the sample parameters
    hm.matrix.sample_labels = hm.parameters['sample_labels']
    hm.matrix.sample_boundaries = hm.parameters['sample_boundaries']

コード例 #14

0

ファイルを表示

ファイル: computeMatrixOperations.py プロジェクト: venuthatikonda/deepTools

def main(args=None):
    if len(sys.argv) == 1:
        args = ["-h"]
    if len(sys.argv) == 2:
        args = [sys.argv[1], "-h"]
    args = parse_arguments().parse_args(args)

    hm = heatmapper.heatmapper()
    if not isinstance(args.matrixFile, list):
        hm.read_matrix_file(args.matrixFile)

    if args.command == 'info':
        printInfo(hm)
    elif args.command == 'subset':
        sIdx = getSampleBounds(args, hm)
        gIdx, gBounds = getGroupBounds(args, hm)

        # groups
        hm.matrix.regions = subsetRegions(hm, gIdx)
        # matrix
        hm.matrix.matrix = hm.matrix.matrix[gIdx, :]
        hm.matrix.matrix = hm.matrix.matrix[:, sIdx]
        # boundaries
        if args.samples is None:
            args.samples = hm.matrix.sample_labels
        hm.matrix.sample_boundaries = hm.matrix.sample_boundaries[0:len(args.samples) + 1]
        hm.matrix.group_boundaries = gBounds.tolist()
        # labels
        hm.matrix.sample_labels = args.samples
        if args.groups is None:
            args.groups = hm.matrix.group_labels
        hm.matrix.group_labels = args.groups
        # save
        hm.save_matrix(args.outFileName)
    elif args.command == 'filterStrand':
        filterHeatmap(hm, args)
        hm.save_matrix(args.outFileName)
    elif args.command == 'rbind':
        rbindMatrices(hm, args)
        hm.save_matrix(args.outFileName)
    elif args.command == 'cbind':
        cbindMatrices(hm, args)
        hm.save_matrix(args.outFileName)
    elif args.command == 'sort':
        sortMatrix(hm, args.regionsFileName, args.transcriptID, args.transcript_id_designator)
        hm.save_matrix(args.outFileName)
    else:
        sys.exit("Unknown command {0}!\n".format(args.command))

コード例 #15

0

ファイルを表示

ファイル: computeMatrix.py プロジェクト: UMMS-Biocore/dolphin-bin

def main(args=None):

    args = process_args(args)

    parameters = {'upstream': args.beforeRegionStartLength,
                  'downstream': args.afterRegionStartLength,
                  'body': args.regionBodyLength,
                  'bin size': args.binSize,
                  'ref point': args.referencePoint,
                  'verbose': args.verbose,
                  'bin avg type': args.averageTypeBins,
                  'missing data as zero': args.missingDataAsZero,
                  'min threshold': args.minThreshold,
                  'max threshold': args.maxThreshold,
                  'scale': args.scale,
                  'skip zeros': args.skipZeros,
                  'nan after end': args.nanAfterEnd,
                  'proc number': args.numberOfProcessors,
                  'sort regions': args.sortRegions,
                  'sort using': args.sortUsing,
                  'unscaled 5 prime': args.unscaled5prime,
                  'unscaled 3 prime': args.unscaled3prime
                  }

    hm = heatmapper.heatmapper()

    scores_file_list = args.scoreFileName
    hm.computeMatrix(scores_file_list, args.regionsFileName, parameters, blackListFileName=args.blackListFileName, verbose=args.verbose, allArgs=args)
    if args.sortRegions != 'no':

        sortUsingSamples = []
        if args.sortUsingSamples is not None:
            for i in args.sortUsingSamples:
                if (i > 0 and i <= hm.matrix.get_num_samples()):
                    sortUsingSamples.append(i - 1)
                else:
                    exit("The value {0} for --sortUsingSamples is not valid. Only values from 1 to {1} are allowed.".format(args.sortUsingSamples, hm.matrix.get_num_samples()))
            print('Samples used for ordering within each group: ', sortUsingSamples)

        hm.matrix.sort_groups(sort_using=args.sortUsing, sort_method=args.sortRegions, sample_list=sortUsingSamples)

    hm.save_matrix(args.outFileName)

    if args.outFileNameMatrix:
        hm.save_matrix_values(args.outFileNameMatrix)

    if args.outFileSortedRegions:
        hm.save_BED(args.outFileSortedRegions)

コード例 #16

0

ファイルを表示

ファイル: plotProfile.py プロジェクト: pombredanne/deepTools

def main(args=None):
    args = process_args(args)
    hm = heatmapper.heatmapper()
    matrix_file = args.matrixFile.name
    args.matrixFile.close()
    hm.read_matrix_file(matrix_file, default_group_name=args.regionsLabel)

    if args.kmeans is not None:
        hm.matrix.hmcluster(args.kmeans, method='kmeans')

    if len(args.regionsLabel):
        hm.matrix.set_group_labels(args.regionsLabel)

    if args.samplesLabel and len(args.samplesLabel):
        hm.matrix.set_sample_labels(args.samplesLabel)

    # if args.outFileNameData:
    #    hm.saveTabulatedValues(args.outFileNameData)

    if args.outFileSortedRegions:
        hm.save_BED(args.outFileSortedRegions)

    prof = Profile(hm,
                   args.outFileName,
                   plot_title=args.plotTitle,
                   y_axis_label=args.yAxisLabel,
                   y_min=args.yMin,
                   y_max=args.yMax,
                   averagetype=args.averageType,
                   reference_point_label=args.refPointLabel,
                   start_label=args.startLabel,
                   end_label=args.endLabel,
                   plot_height=args.plotHeight,
                   plot_width=args.plotWidth,
                   per_group=args.perGroup,
                   plot_type=args.plotType,
                   image_format=args.plotFileFormat,
                   color_list=args.colors,
                   legend_location=args.legendLocation,
                   plots_per_row=args.numPlotsPerRow)

    if args.plotType == 'heatmap':
        prof.plot_heatmap()
    elif args.plotType == 'overlapped_lines':
        prof.plot_hexbin()
    else:
        prof.plot_profile()

コード例 #17

0

ファイルを表示

ファイル: computeMatrixOperations.py プロジェクト: venuthatikonda/deepTools

def rbindMatrices(hm, args):
    """
    This only supports a single group at this point

    It's assumed that the same samples are present in both and in the exact same order
    """
    hm2 = heatmapper.heatmapper()
    hm.read_matrix_file(args.matrixFile[0])
    for idx in range(1, len(args.matrixFile)):
        hm2.read_matrix_file(args.matrixFile[idx])
        for idx, group in enumerate(hm2.parameters["group_labels"]):
            if group in hm.parameters["group_labels"]:
                insertMatrix(hm, hm2, group)
            else:
                appendMatrix(hm, hm2, group)

    # Update the group boundaries attribute
    hm.matrix.group_labels = hm.parameters['group_labels']
    hm.matrix.group_boundaries = hm.parameters['group_boundaries']

コード例 #18

0

ファイルを表示

def rbindMatrices(hm, args):
    """
    This only supports a single group at this point

    It's assumed that the same samples are present in both and in the exact same order
    """
    hm2 = heatmapper.heatmapper()
    hm.read_matrix_file(args.matrixFile[0])
    for idx in range(1, len(args.matrixFile)):
        hm2.read_matrix_file(args.matrixFile[idx])
        for idx, group in enumerate(hm2.parameters["group_labels"]):
            if group in hm.parameters["group_labels"]:
                insertMatrix(hm, hm2, group)
            else:
                appendMatrix(hm, hm2, group)

    # Update the group boundaries attribute
    hm.matrix.group_labels = hm.parameters['group_labels']
    hm.matrix.group_boundaries = hm.parameters['group_boundaries']

コード例 #19

0

ファイルを表示

ファイル: k-binning.py プロジェクト: Pothof-Lab/Transcriptional-Stress

def main(args):
    parameters = {
        'upstream': args.beforeRegionStartLength,
        'downstream': args.afterRegionStartLength,
        'body': args.regionBodyLength,
        'bin size': args.binSize,
        'ref point': args.referencePoint,
        'verbose': args.verbose,
        'bin avg type': args.averageTypeBins,
        'missing data as zero': args.missingDataAsZero,
        'min threshold': args.minThreshold,
        'max threshold': args.maxThreshold,
        'scale': args.scale,
        'skip zeros': args.skipZeros,
        'nan after end': args.nanAfterEnd,
        'proc number': args.numberOfProcessors,
    }

    hm = heatmapper.heatmapper()

    hm.computeMatrix(args.scoreFileName.name,
                     args.regionsFileName,
                     parameters,
                     verbose=args.verbose)
    if args.sortRegions != 'no':
        hm.sortMatrix(sort_using=args.sortUsing, sort_method=args.sortRegions)

    hm.saveMatrix(args.outFileName)

    if args.outFileNameMatrix:
        hm.saveMatrixValues(args.outFileNameMatrix)

    if args.outFileNameData:
        hm.saveTabulatedValues(args.outFileNameData)

    if args.outFileSortedRegions:
        hm.saveBED(args.outFileSortedRegions)

コード例 #20

0

ファイルを表示

ファイル: plotHeatmap.py プロジェクト: mblue9/deepTools

def main(args=None):
    args = process_args(args)
    hm = heatmapper.heatmapper()
    matrix_file = args.matrixFile.name
    args.matrixFile.close()
    hm.read_matrix_file(matrix_file)

    if args.sortRegions == 'keep':
        args.sortRegions = 'no'  # These are the same thing

    if args.kmeans is not None:
        hm.matrix.hmcluster(args.kmeans, method='kmeans')
    else:
        if args.hclust is not None:
            print(
                "Performing hierarchical clustering."
                "Please note that it might be very slow for large datasets.\n")
            hm.matrix.hmcluster(args.hclust, method='hierarchical')

    group_len_ratio = np.diff(hm.matrix.group_boundaries) / len(
        hm.matrix.regions)
    if np.any(group_len_ratio < 5.0 / 1000):
        problem = np.flatnonzero(group_len_ratio < 5.0 / 1000)
        sys.stderr.write(
            "WARNING: Group '{}' is too small for plotting, you might want to remove it. "
            "There will likely be an error message from matplotlib regarding this "
            "below.\n".format(hm.matrix.group_labels[problem[0]]))

    if args.regionsLabel:
        hm.matrix.set_group_labels(args.regionsLabel)

    if args.samplesLabel and len(args.samplesLabel):
        hm.matrix.set_sample_labels(args.samplesLabel)

    if args.sortRegions != 'no':
        sortUsingSamples = []
        if args.sortUsingSamples is not None:
            for i in args.sortUsingSamples:
                if (i > 0 and i <= hm.matrix.get_num_samples()):
                    sortUsingSamples.append(i - 1)
                else:
                    exit(
                        "The value {0} for --sortSamples is not valid. Only values from 1 to {1} are allowed."
                        .format(args.sortUsingSamples,
                                hm.matrix.get_num_samples()))
            print('Samples used for ordering within each group: ',
                  sortUsingSamples)

        hm.matrix.sort_groups(sort_using=args.sortUsing,
                              sort_method=args.sortRegions,
                              sample_list=sortUsingSamples)

    if args.outFileNameMatrix:
        hm.save_matrix_values(args.outFileNameMatrix)

    if args.outFileSortedRegions:
        hm.save_BED(args.outFileSortedRegions)

    colormap_dict = {
        'colorMap': args.colorMap,
        'colorList': args.colorList,
        'colorNumber': args.colorNumber,
        'missingDataColor': args.missingDataColor,
        'alpha': args.alpha
    }

    plotMatrix(hm,
               args.outFileName,
               colormap_dict,
               args.plotTitle,
               args.xAxisLabel,
               args.yAxisLabel,
               args.regionsLabel,
               args.zMin,
               args.zMax,
               args.yMin,
               args.yMax,
               args.averageTypeSummaryPlot,
               args.refPointLabel,
               args.startLabel,
               args.endLabel,
               args.heatmapHeight,
               args.heatmapWidth,
               args.perGroup,
               args.whatToShow,
               image_format=args.plotFileFormat,
               legend_location=args.legendLocation,
               box_around_heatmaps=args.boxAroundHeatmaps,
               label_rotation=args.label_rotation,
               dpi=args.dpi,
               interpolation_method=args.interpolationMethod)

コード例 #21

0

ファイルを表示

ファイル: plotHeatmap.py プロジェクト: venuthatikonda/deepTools

def main(args=None):
    args = process_args(args)
    hm = heatmapper.heatmapper()
    matrix_file = args.matrixFile.name
    args.matrixFile.close()
    hm.read_matrix_file(matrix_file)

    if args.kmeans is not None:
        hm.matrix.hmcluster(args.kmeans, method='kmeans')
    else:
        if args.hclust is not None:
            print("Performing hierarchical clustering."
                  "Please note that it might be very slow for large datasets.\n")
            hm.matrix.hmcluster(args.hclust, method='hierarchical')

    group_len_ratio = np.diff(hm.matrix.group_boundaries) / len(hm.matrix.regions)
    if np.any(group_len_ratio < 5.0 / 1000):
        problem = np.flatnonzero(group_len_ratio < 5.0 / 1000)
        sys.stderr.write("WARNING: Group '{}' is too small for plotting, you might want to remove it. "
                         "There will likely be an error message from matplotlib regarding this "
                         "below.\n".format(hm.matrix.group_labels[problem[0]]))

    if args.regionsLabel:
        hm.matrix.set_group_labels(args.regionsLabel)

    if args.samplesLabel and len(args.samplesLabel):
        hm.matrix.set_sample_labels(args.samplesLabel)

    if args.sortRegions != 'no':
        sortUsingSamples = []
        if args.sortUsingSamples is not None:
            for i in args.sortUsingSamples:
                if (i > 0 and i <= hm.matrix.get_num_samples()):
                    sortUsingSamples.append(i - 1)
                else:
                    exit("The value {0} for --sortSamples is not valid. Only values from 1 to {1} are allowed.".format(args.sortUsingSamples, hm.matrix.get_num_samples()))
            print('Samples used for ordering within each group: ', sortUsingSamples)

        hm.matrix.sort_groups(sort_using=args.sortUsing,
                              sort_method=args.sortRegions,
                              sample_list=sortUsingSamples)

    if args.outFileNameMatrix:
        hm.save_matrix_values(args.outFileNameMatrix)

    if args.outFileSortedRegions:
        hm.save_BED(args.outFileSortedRegions)

    colormap_dict = {'colorMap': args.colorMap,
                     'colorList': args.colorList,
                     'colorNumber': args.colorNumber,
                     'missingDataColor': args.missingDataColor,
                     'alpha': args.alpha}

    plotMatrix(hm,
               args.outFileName,
               colormap_dict, args.plotTitle,
               args.xAxisLabel, args.yAxisLabel, args.regionsLabel,
               args.zMin, args.zMax,
               args.yMin, args.yMax,
               args.averageTypeSummaryPlot,
               args.refPointLabel,
               args.startLabel,
               args.endLabel,
               args.heatmapHeight,
               args.heatmapWidth,
               args.perGroup,
               args.whatToShow,
               image_format=args.plotFileFormat,
               legend_location=args.legendLocation,
               box_around_heatmaps=args.boxAroundHeatmaps,
               dpi=args.dpi)

コード例 #22

0

ファイルを表示

def main(args=None):
    args = process_args(args)
    hm = heatmapper.heatmapper()
    matrix_file = args.matrixFile.name
    args.matrixFile.close()
    hm.read_matrix_file(matrix_file, default_group_name=args.regionsLabel)

    if args.kmeans is not None:
        hm.matrix.hmcluster(args.kmeans, method='kmeans')
    else:
        if args.hclust is not None:
            print "Performing hierarchical clustering." \
                  "Please note that it might be very slow for large datasets.\n"
            hm.matrix.hmcluster(args.hclust, method='hierarchical')

    group_len_ratio = np.diff(hm.matrix.group_boundaries) / len(
        hm.matrix.regions)
    if np.any(group_len_ratio < 5.0 / 1000):
        problem = np.flatnonzero(group_len_ratio < 5.0 / 1000)
        group_len = np.diff(hm.matrix.group_boundaries)
        print "Group '{}' contains too few regions {}. It can't "\
            "be plotted. Try removing this group.\n".format(hm.matrix.group_labels[problem[0]],
                                                            group_len[problem])
        if args.outFileSortedRegions:
            hm.save_BED(args.outFileSortedRegions)
            print 'Clustered output written in : ' + args.outFileSortedRegions.name
        else:
            print "No Output file defined for sorted regions. Please re-run "\
                  "heatmapper with --outFileSortedRegions to save the clustered output. "
        exit(1)

    if len(args.regionsLabel):
        hm.matrix.set_group_labels(args.regionsLabel)

    if args.samplesLabel and len(args.samplesLabel):
        hm.matrix.set_sample_labels(args.samplesLabel)

    if args.sortRegions != 'no':
        hm.matrix.sort_groups(sort_using=args.sortUsing,
                              sort_method=args.sortRegions)

    if args.outFileNameMatrix:
        hm.save_matrix_values(args.outFileNameMatrix)

    # if args.outFileNameData:
    #    hm.saveTabulatedValues(args.outFileNameData)

    if args.outFileSortedRegions:
        hm.save_BED(args.outFileSortedRegions)

    colormap_dict = {
        'colorMap': args.colorMap,
        'colorList': args.colorList,
        'colorNumber': args.colorNumber,
        'missingDataColor': args.missingDataColor
    }

    plotMatrix(hm,
               args.outFileName,
               colormap_dict,
               args.plotTitle,
               args.xAxisLabel,
               args.yAxisLabel,
               args.regionsLabel,
               args.zMin,
               args.zMax,
               args.yMin,
               args.yMax,
               args.averageTypeSummaryPlot,
               args.refPointLabel,
               args.startLabel,
               args.endLabel,
               args.heatmapHeight,
               args.heatmapWidth,
               args.perGroup,
               args.whatToShow,
               image_format=args.plotFileFormat,
               legend_location=args.legendLocation)

コード例 #23

0

ファイルを表示

stats = {}
sample = ""
for filename in snakemake.input.profiles:
    kind = kind_from_filename(filename)
    sample = sample_from_filename(filename, kind)
    # if the file is empty, we need to decide what to do with it. For now, just
    # continue
    if os.stat(filename).st_size == 0:
        continue

    # use deeptools' parsing to handle loading the matrix. They actually
    # support some pretty complex features (concatentating upstream, gene body,
    # downstream; concatentating multiple sets of features). The snakefile is
    # running the simpler mode of a single set of features. So we can simply
    # grab the (0, 0) matrix.
    h = heatmapper()
    h.read_matrix_file(filename)

    # `matrix` is a numpy array, with one row per feature (in this context, one
    # row per peak) and one column for each bin
    matrix = h.matrix.get_matrix(0, 0)['matrix']

    # take the mean of each column
    y = matrix.mean(axis=0)

    # deeptools.Heatmapper.read_matrix_file also parses the parameters, which
    # is pretty nice. We can use that to build an x-axis.

    if snakemake.wildcards.scaling == 'reference-point':
        x = np.linspace(
            -h.parameters['upstream'],

コード例 #24

0

ファイルを表示

ファイル: computeMatrixStranded.py プロジェクト: manschmi/MS_Metagene_Tools

def compute_matrix(args):

    args.samplesLabel = [
        scoreFname.replace(args.scoreFileNamePlusSuffix, '')
        for scoreFname in args.scoreFileNamePlus
    ]
    parameters = {
        'upstream': args.beforeRegionStartLength,
        'downstream': args.afterRegionStartLength,
        'body': args.regionBodyLength,
        'bin size': args.binSize,
        'ref point': args.referencePoint,
        'verbose': args.verbose,
        'bin avg type': args.averageTypeBins,
        'missing data as zero': args.missingDataAsZero,
        'min threshold': args.minThreshold,
        'max threshold': args.maxThreshold,
        'scale': args.scale,
        'skip zeros': args.skipZeros,
        'nan after end': args.nanAfterEnd,
        'proc number': args.numberOfProcessors,
        'sort regions': args.sortRegions,
        'sort using': args.sortUsing,
        'unscaled 5 prime': args.unscaled5prime,
        'unscaled 3 prime': args.unscaled3prime
    }

    # Preload deepBlue files (@MS: any file .wiggle, .wig or .bedgraph in deeptools context afaics), which need to then be deleted
    #deepBlueFilesPlus = load_deepblue_files(args.regionsFileNamePlus, args.scoreFileNamePlus)
    #deepBlueFilesMinus = load_deepblue_files(args.regionsFileNameMinus, args.scoreFileNameMinus)

    hm = heatmapper.heatmapper()
    hm.computeMatrix(args.scoreFileNamePlus,
                     args.regionsFileNamePlus,
                     parameters,
                     blackListFileName=args.blackListFileName,
                     verbose=args.verbose,
                     allArgs=args)
    hm.matrix.group_labels = [
        grp_label.replace(args.regionFileNamePlusSuffix, '')
        for grp_label in hm.matrix.group_labels
    ]

    hm_minus = heatmapper.heatmapper()
    hm_minus.computeMatrix(args.scoreFileNameMinus,
                           args.regionsFileNameMinus,
                           parameters,
                           blackListFileName=args.blackListFileName,
                           verbose=args.verbose,
                           allArgs=args)
    hm_minus.matrix.group_labels = [
        grp_label.replace(args.regionFileNameMinusSuffix, '')
        for grp_label in hm_minus.matrix.group_labels
    ]

    hm = rbindMatrices(hm, hm_minus)

    if args.sortRegions not in ['no', 'keep']:

        sortUsingSamples = []
        if args.sortUsingSamples is not None:
            for i in args.sortUsingSamples:
                if (i > 0 and i <= hm.matrix.get_num_samples()):
                    sortUsingSamples.append(i - 1)
                else:
                    exit(
                        "The value {0} for --sortUsingSamples is not valid. Only values from 1 to {1} are allowed."
                        .format(args.sortUsingSamples,
                                hm.matrix.get_num_samples()))
            print('Samples used for ordering within each group: ',
                  sortUsingSamples)

        hm.matrix.sort_groups(sort_using=args.sortUsing,
                              sort_method=args.sortRegions,
                              sample_list=sortUsingSamples)

    elif args.sortRegions == 'keep':
        hm.parameters['group_labels'] = hm.matrix.group_labels
        hm.parameters["group_boundaries"] = hm.matrix.group_boundaries
        # cmo.sortMatrix(hm, args.regionsFileName, args.transcriptID, args.transcript_id_designator)

    # Clean up temporary bigWig files, if applicable
    # if not args.deepBlueKeepTemp:
    #     for k, v in deepBlueFilesPlus:
    #         os.remove(args.scoreFileNamePlus[v])
    #     for k, v in deepBlueFilesMinus:
    #         os.remove(args.scoreFileNameMinus[v])
    # else:
    #     for k, v in deepBlueFilesPlus:
    #         print("{} is stored in {}".format(k, args.scoreFileNamePlus[v]))
    #     for k, v in deepBlueFilesMinus:
    #         print("{} is stored in {}".format(k, args.scoreFileNameMinus[v]))

    return hm

コード例 #25

0

ファイルを表示

ファイル: computeMatrixOperationsMS.py プロジェクト: manschmi/MS_Metagene_Tools

def tabbed_BED_region_to_deeptools(region):
    '''
    tab-separated region to string
    '''
    return [region[0], region[1], region[2], region[3]]

def deeptools_region_str(region):
    '''
    tab-separated region to string
    '''
    if isinstance(region, dict):
        return region['chrom']+':'+str(region['start'])+'-'+str(region['end'])+'('+region['strand']+')_'+region['name']
    elif isinstance(region, list):
        return region[0] + ':' + str(region[1][0][0]) + '-' + str(region[1][-1][1]) + '(' + region[4] + ')_' + \
               region[2]



if __name__ == '__main__':

    args = parse_arguments().parse_args(sys.argv[1:])
    hm = heatmapper.heatmapper()
    hm.read_matrix_file(args.matrixFile)

    perform_operations(args, hm.matrix)

    hm.parameters['group_labels'] = hm.matrix.group_labels
    hm.parameters['group_boundaries'] = hm.matrix.group_boundaries

    hm.save_matrix(args.outFileName)

コード例 #26

0

ファイルを表示

def __clustering(hm, indexList, configfile):
    """
    """
    for index, i in enumerate(indexList):
        indexList[index] = i - 1
    if hm.parameters['min threshold'] is not None or\
            hm.parameters['max threshold'] is not None:
        hm.filterHeatmapValues(hm.parameters['min threshold'],
                               hm.parameters['max threshold'])

    if configfile["kmeans"] is not None:
        hm.matrix.hmcluster(configfile["kmeans"], method='kmeans')
    else:
        if configfile["hclust"] is not None:
            print("Performing hierarchical clustering."
                  "Please note that it might be very slow for large "
                  "datasets.\n")
            hm.matrix.hmcluster(configfile["hclust"], method='hierarchical')

    group_len_ratio = np.diff(hm.matrix.group_boundaries) /\
        len(hm.matrix.regions)
    if np.any(group_len_ratio < 5.0 / 1000):
        problem = np.flatnonzero(group_len_ratio < 5.0 / 1000)
        sys.stderr.write("WARNING: Group '{}' is too small for plotting,"
                         "you might want to remove it. "
                         "There will likely be an error message from "
                         "matplotlib regarding this "
                         "below.\n".format(hm.matrix.group_labels[problem[0]]))
    # TODO set sample & region labels!!
    if configfile["sortRegions"][0] != 'keep':
        hm.matrix.sort_groups(sort_using=configfile["sortUsing"][0],
                              sort_method=configfile["sortRegions"][0],
                              sample_list=indexList)
    outputMatrix_path = ""
    if configfile["outputReferenceMatrix"] is not None:
        outputMatrix_path = os.path.join(configfile["outputReferenceMatrix"])
        hm.save_matrix(outputMatrix_path)
    """ TODO: figure out how to do it directly from hm. For the moment when I
        use hm some parameters have incomapatible types. for example upstream
        is a value if it is read directly but is a list if it is read from a
        file."""
    if configfile["plotOutput"] is not None:
        if configfile["outputReferenceMatrix"] is None:
            outputMatrix_path = os.path.dirname(
                os.path.abspath(configfile["matrixOutput"]))
            outputMatrix_path += "/outputReferenceMatrix.gz"
            hm.save_matrix(outputMatrix_path)

        hm1 = heatmapper()
        hm1.read_matrix_file(outputMatrix_path)
        color_dict = {
            'colorMap': ['RdYlBu'],
            'colorList': None,
            'colorNumber': int(256),
            'missingDataColor': 'black',
            'alpha': float(1.0)
        }
        plotMatrix(hm1,
                   os.path.join(configfile["plotOutput"]),
                   colorMapDict=color_dict)

    assert (configfile["outFileSortedRegions"])
    hm.save_BED(open(configfile["outFileSortedRegions"], "w"))

コード例 #27

0

ファイルを表示

def main(args=None):

    args = process_args(args)

    # if more than one bed file is given, they are concatenated into one file.
    if len(args.regionsFileName) > 1:
        bed_file = open(deeptools.utilities.getTempFileName(suffix='.bed'),
                        'w+t')
        for bed in args.regionsFileName:
            bed.close()
            # concatenate all intermediate tempfiles into one
            print "appending {} file".format(bed.name)
            shutil.copyfileobj(open(bed.name, 'U'), bed_file)
            # append hash and label based on the file name
            label = os.path.basename(bed.name)
            if label.endswith(".bed"):
                label = label[:-4]
            bed_file.write("# {}\n".format(label))
        bed_file.seek(0)
    else:
        bed_file = args.regionsFileName[0]

    parameters = {
        'upstream': args.beforeRegionStartLength,
        'downstream': args.afterRegionStartLength,
        'body': args.regionBodyLength,
        'bin size': args.binSize,
        'ref point': args.referencePoint,
        'verbose': args.verbose,
        'bin avg type': args.averageTypeBins,
        'missing data as zero': args.missingDataAsZero,
        'min threshold': args.minThreshold,
        'max threshold': args.maxThreshold,
        'scale': args.scale,
        'skip zeros': args.skipZeros,
        'nan after end': args.nanAfterEnd,
        'proc number': args.numberOfProcessors,
        'sort regions': args.sortRegions,
        'sort using': args.sortUsing
    }

    hm = heatmapper.heatmapper()

    scores_file_list = [x.name for x in args.scoreFileName]
    hm.computeMatrix(scores_file_list,
                     bed_file,
                     parameters,
                     verbose=args.verbose)
    if args.sortRegions != 'no':
        hm.matrix.sort_groups(sort_using=args.sortUsing,
                              sort_method=args.sortRegions)

    hm.save_matrix(args.outFileName)
    bed_file.close()

    if len(args.regionsFileName) > 1:
        os.remove(bed_file.name)

    if args.outFileNameMatrix:
        hm.save_matrix_values(args.outFileNameMatrix)

    # TODO This isn't implemented
    # if args.outFileNameData:
    #    hm.saveTabulatedValues(args.outFileNameData)

    if args.outFileSortedRegions:
        hm.save_BED(args.outFileSortedRegions)

コード例 #28

0

ファイルを表示

ファイル: plotHeatmap.py プロジェクト: steffenheyne/deepTools

def main(args=None):
    args = process_args(args)
    hm = heatmapper.heatmapper()
    matrix_file = args.matrixFile.name
    args.matrixFile.close()
    hm.read_matrix_file(matrix_file, default_group_name=args.regionsLabel)

    if args.kmeans is not None:
        hm.matrix.hmcluster(args.kmeans, method='kmeans')
    else:
        if args.hclust is not None:
            print "Performing hierarchical clustering." \
                  "Please note that it might be very slow for large datasets.\n"
            hm.matrix.hmcluster(args.hclust, method='hierarchical')

    group_len_ratio = np.diff(hm.matrix.group_boundaries) / len(hm.matrix.regions)
    if np.any(group_len_ratio < 5.0 / 1000):
        problem = np.flatnonzero(group_len_ratio < 5.0 / 1000)
        group_len = np.diff(hm.matrix.group_boundaries)
        print "Group '{}' contains too few regions {}. It can't "\
            "be plotted. Try removing this group.\n".format(hm.matrix.group_labels[problem[0]],
                                                            group_len[problem])
        if args.outFileSortedRegions:
            hm.save_BED(args.outFileSortedRegions)
            print 'Clustered output written in : ' + args.outFileSortedRegions.name
        else:
            print "No Output file defined for sorted regions. Please re-run "\
                  "heatmapper with --outFileSortedRegions to save the clustered output. "
        exit(1)

    if len(args.regionsLabel):
        hm.matrix.set_group_labels(args.regionsLabel)

    if args.samplesLabel and len(args.samplesLabel):
        hm.matrix.set_sample_labels(args.samplesLabel)

    if args.sortRegions != 'no':
        hm.matrix.sort_groups(sort_using=args.sortUsing,
                              sort_method=args.sortRegions)

    if args.outFileNameMatrix:
        hm.save_matrix_values(args.outFileNameMatrix)

    # if args.outFileNameData:
    #    hm.saveTabulatedValues(args.outFileNameData)

    if args.outFileSortedRegions:
        hm.save_BED(args.outFileSortedRegions)

    colormap_dict = {'colorMap': args.colorMap,
                     'colorList': args.colorList,
                     'colorNumber': args.colorNumber,
                     'missingDataColor': args.missingDataColor}

    plotMatrix(hm,
               args.outFileName,
               colormap_dict, args.plotTitle,
               args.xAxisLabel, args.yAxisLabel, args.regionsLabel,
               args.zMin, args.zMax,
               args.yMin, args.yMax,
               args.averageTypeSummaryPlot,
               args.refPointLabel,
               args.startLabel,
               args.endLabel,
               args.heatmapHeight,
               args.heatmapWidth,
               args.perGroup,
               args.whatToShow,
               image_format=args.plotFileFormat,
               legend_location=args.legendLocation)

コード例 #29

0

ファイルを表示

ファイル: computeMatrix.py プロジェクト: YichaoOU/test_argparse_readoc

def main(args=None):

    args = process_args(args)

    parameters = {'upstream': args.beforeRegionStartLength,
                  'downstream': args.afterRegionStartLength,
                  'body': args.regionBodyLength,
                  'bin size': args.binSize,
                  'ref point': args.referencePoint,
                  'verbose': args.verbose,
                  'bin avg type': args.averageTypeBins,
                  'missing data as zero': args.missingDataAsZero,
                  'min threshold': args.minThreshold,
                  'max threshold': args.maxThreshold,
                  'scale': args.scale,
                  'skip zeros': args.skipZeros,
                  'nan after end': args.nanAfterEnd,
                  'proc number': args.numberOfProcessors,
                  'sort regions': args.sortRegions,
                  'sort using': args.sortUsing,
                  'unscaled 5 prime': args.unscaled5prime,
                  'unscaled 3 prime': args.unscaled3prime
                  }

    hm = heatmapper.heatmapper()

    # Preload deepBlue files, which need to then be deleted
    deepBlueFiles = []
    for idx, fname in enumerate(args.scoreFileName):
        if db.isDeepBlue(fname):
            deepBlueFiles.append([fname, idx])
    if len(deepBlueFiles) > 0:
        sys.stderr.write("Preloading the following deepBlue files: {}\n".format(",".join([x[0] for x in deepBlueFiles])))
        regs = db.makeRegions(args.regionsFileName, args)
        for x in deepBlueFiles:
            x.extend([args, regs])
        if len(deepBlueFiles) > 1 and args.numberOfProcessors > 1:
            pool = multiprocessing.Pool(args.numberOfProcessors)
            res = pool.map_async(db.preloadWrapper, deepBlueFiles).get(9999999)
        else:
            res = list(map(db.preloadWrapper, deepBlueFiles))

        # substitute the file names with the temp files
        for (ftuple, r) in zip(deepBlueFiles, res):
            args.scoreFileName[ftuple[1]] = r
        deepBlueFiles = [[x[0], x[1]] for x in deepBlueFiles]
        del regs

    scores_file_list = args.scoreFileName
    hm.computeMatrix(scores_file_list, args.regionsFileName, parameters, blackListFileName=args.blackListFileName, verbose=args.verbose, allArgs=args)
    if args.sortRegions not in ['no', 'keep']:
        sortUsingSamples = []
        if args.sortUsingSamples is not None:
            for i in args.sortUsingSamples:
                if (i > 0 and i <= hm.matrix.get_num_samples()):
                    sortUsingSamples.append(i - 1)
                else:
                    exit("The value {0} for --sortUsingSamples is not valid. Only values from 1 to {1} are allowed.".format(args.sortUsingSamples, hm.matrix.get_num_samples()))
            print('Samples used for ordering within each group: ', sortUsingSamples)

        hm.matrix.sort_groups(sort_using=args.sortUsing, sort_method=args.sortRegions, sample_list=sortUsingSamples)
    elif args.sortRegions == 'keep':
        hm.parameters['group_labels'] = hm.matrix.group_labels
        hm.parameters["group_boundaries"] = hm.matrix.group_boundaries
        cmo.sortMatrix(hm, args.regionsFileName, args.transcriptID, args.transcript_id_designator, verbose=not args.quiet)

    hm.save_matrix(args.outFileName)

    if args.outFileNameMatrix:
        hm.save_matrix_values(args.outFileNameMatrix)

    if args.outFileSortedRegions:
        hm.save_BED(args.outFileSortedRegions)

    # Clean up temporary bigWig files, if applicable
    if not args.deepBlueKeepTemp:
        for k, v in deepBlueFiles:
            os.remove(args.scoreFileName[v])
    else:
        for k, v in deepBlueFiles:
            print("{} is stored in {}".format(k, args.scoreFileName[v]))