Beispiel #1
0
def reorder_pov_by_cluster(pov, mutIDs, config):
    # reorder rows and columns of pov matrix
    # to put mutations assigned to common clusters next to
    # each other
    clusterPath = os.path.join(config.working_dir, \
                               config.mutation_to_cluster_assignment)
    if not os.path.exists(clusterPath):
        # if no cluster assignment available
        # return the matrix and ids as is
        return pov, mutIDs

    cluster2mut = read_cluster_assignments(clusterPath)
    pairs = []
    for cl in cluster2mut:
        pairs.extend(zip(cluster2mut[cl], [cl] * len(cluster2mut[cl])))
    print pairs
    pairs = sorted(pairs, key=lambda x: x[1])
    print pairs

    sortedmutIDs = zip(*pairs)[0]
    print sortedmutIDs

    sortedPov = np.zeros(pov.shape)
    for pnID in sortedmutIDs:
        for cnID in sortedmutIDs:
            sortedPov[sortedmutIDs.index(pnID), sortedmutIDs.index(cnID)] = pov[mutIDs.index(pnID), \
                                                                                    mutIDs.index(cnID)]
    return sortedPov, sortedmutIDs
Beispiel #2
0
def reorder_pov_by_cluster(pov, mutIDs, config):
    # reorder rows and columns of pov matrix
    # to put mutations assigned to common clusters next to 
    # each other
    clusterPath = os.path.join(config.working_dir, \
                               config.mutation_to_cluster_assignment)
    if not os.path.exists(clusterPath):
        # if no cluster assignment available
        # return the matrix and ids as is
        return pov, mutIDs
    
    cluster2mut = read_cluster_assignments(clusterPath)
    pairs = []
    for cl in cluster2mut:
        pairs.extend(zip(cluster2mut[cl], [cl] * len(cluster2mut[cl])))
    print pairs
    pairs = sorted(pairs, key = lambda x: x[1])
    print pairs

    sortedmutIDs = zip(*pairs)[0]
    print sortedmutIDs

    sortedPov = np.zeros(pov.shape)
    for pnID in sortedmutIDs:
        for cnID in sortedmutIDs:
            sortedPov[sortedmutIDs.index(pnID), sortedmutIDs.index(cnID)] = pov[mutIDs.index(pnID), \
                                                                                    mutIDs.index(cnID)]
    return sortedPov, sortedmutIDs
Beispiel #3
0
def plot_mut_clust_cellularity(args):
    # plot estimated cellularity for mutations and clusters
    # across samples
    try:
        import matplotlib
        matplotlib.use('Agg')
        import matplotlib.pyplot as plt
    except:
        print >>sys.stderr, \
            'Plotting mutation/cluster cellularities requires matplotlib \n' +\
            'module. Failed to import matplotlib. Please install \n'+\
            'matplotlib package and try again'
        return
    config = Config(args.config_file)
    #--------------------------------------
    # figure out path to input data files
    if config.cellularity_estimation == 'schism':
        mutCellularityPath = os.path.join(config.working_dir,\
                                  config.output_prefix + '.mutation.cellularity')
    else:
        mutCellularityPath = os.path.join(config.working_dir,\
                                    config.mutation_cellularity_input)

    clustCellularityPath = os.path.join(config.working_dir,\
                                   config.output_prefix + '.cluster.cellularity')
    clusterPath = os.path.join(config.working_dir, \
                               config.mutation_to_cluster_assignment)
    cluster2mut = read_cluster_assignments(clusterPath)
    #--------------------------------------
    # read in cluster cellularity data
    cl2index, samples = read_input_samples(clustCellularityPath)
    clBounds = {}
    samples = sorted(samples, key=lambda x: x.name)
    x = range(len(samples))
    # generate serial information each cluster
    for cl in cl2index.keys():
        lowerBound = ['NA'] * len(samples)
        upperBound = ['NA'] * len(samples)
        for id, sample in enumerate(samples):
            if sample.mutCellularity[cl2index[cl]] != -1:
                lowerBound[id] = sample.mutCellularity[cl2index[cl]] - \
                    sample.mutSigma[cl2index[cl]]
                upperBound[id] = sample.mutCellularity[cl2index[cl]] + \
                    sample.mutSigma[cl2index[cl]]
            else:
                pass
        clBounds[cl] = [x, lowerBound, upperBound]

    # visualize cluster cellularity estimates and standard error
    # as ribbon plot
    #colorPalette = ["#E62E41", "#0A893D", "#455593", "#F19131", \
    #                    "#F375A0", "#874B2C", "#C894F1", "#C3401E", "#91430F", "#F65348"]
    # modified color palette
    colorPalette = ['#323f7b', '#cb3245', '#638e4d', '#9a336d', '#e2a86a',\
                    '#246c8f', '#7d303d', '#734d85', '#077783', '#9c7688',\
                    '#b48b73', '#7da1bf', '#4b6b6c', '#7b7282', '#263246']

    if len(cl2index) > len(colorPalette):
        print >> sys.stderr, 'Cellularity plot not supported for more than 15 clusters.'
        sys.exit()
    plt.grid(True)
    fig, ax = plt.subplots(1, 1)
    for cl, index in cl2index.items():
        trace = np.mean(np.array([clBounds[cl][1], clBounds[cl][2]]), 0)
        trace = map(bound_fix, trace)

        clBounds[cl][1] = map(bound_fix, clBounds[cl][1])
        clBounds[cl][2] = map(bound_fix, clBounds[cl][2])

        ax.fill_between(clBounds[cl][0], \
                        clBounds[cl][1], \
                        clBounds[cl][2], color = colorPalette[index], \
                        alpha = 0.5)
        ax.plot(clBounds[cl][0], list(trace), \
                       color = colorPalette[index], label = cl, \
                       linestyle="dotted", marker="o", linewidth = 1)
    #-----------------------------------------#
    # visualize cluster cellularity for individual mutations as
    # scatter plot overlay
    mut2index, samples = read_input_samples(mutCellularityPath)
    samples = sorted(samples, key=lambda x: x.name)
    for cl in cluster2mut:
        coords = []
        for mut in cluster2mut[cl]:
            xm = range(len(samples))
            ym = [sample.mutCellularity[mut2index[mut]] \
                                                for sample in samples]
            coords.extend(zip(xm, ym))

        coords = filter(lambda x: x[1] != -1, coords)
        ax.scatter(zip(*coords)[0], zip(*coords)[1], color = colorPalette[cl2index[cl]],\
                       marker = '+', s = 20.0)
    #-----------------------------------------#
    # adjust visual properties of the plot
    handles, labels = ax.get_legend_handles_labels()
    handles, labels = zip(*sorted(zip(handles, labels), key=lambda x: x[1]))
    ax.legend(handles, labels)

    plt.xticks(range(len(samples)))
    plt.yticks([x * 0.1 for x in range(11)])

    plt.xlim((-0.5, 2.5))
    plt.ylim((-0.1, 1.1))

    xtext = [sample.name for sample in samples]
    ax.set_xticklabels(xtext)

    plt.gca().yaxis.grid(True)
    outputPath = os.path.join(config.working_dir, \
                              config.output_prefix +'.cellularity.png')
    plt.savefig(outputPath)
    return
Beispiel #4
0
def plot_mut_clust_cellularity(args):
    # plot estimated cellularity for mutations and clusters 
    # across samples
    try:
        import matplotlib
        matplotlib.use('Agg')
        import matplotlib.pyplot as plt
    except:
        print >>sys.stderr, \
            'Plotting mutation/cluster cellularities requires matplotlib \n' +\
            'module. Failed to import matplotlib. Please install \n'+\
            'matplotlib package and try again'
        return
    config = Config(args.config_file)
    #--------------------------------------
    # figure out path to input data files
    if config.cellularity_estimation == 'schism':
        mutCellularityPath = os.path.join(config.working_dir,\
                                  config.output_prefix + '.mutation.cellularity')
    else:
        mutCellularityPath = os.path.join(config.working_dir,\
                                    config.mutation_cellularity_input)
    
    clustCellularityPath = os.path.join(config.working_dir,\
                                   config.output_prefix + '.cluster.cellularity')
    clusterPath = os.path.join(config.working_dir, \
                               config.mutation_to_cluster_assignment)
    cluster2mut = read_cluster_assignments(clusterPath)
    #--------------------------------------
    # read in cluster cellularity data
    cl2index, samples = read_input_samples(clustCellularityPath)
    clBounds = {}
    samples = sorted(samples, key = lambda x: x.name)
    x = range(len(samples))
    # generate serial information each cluster
    for cl in cl2index.keys():
        lowerBound = ['NA'] * len(samples)
        upperBound = ['NA'] * len(samples)
        for id, sample in enumerate(samples):
            if sample.mutCellularity[cl2index[cl]] != -1:
                lowerBound[id] = sample.mutCellularity[cl2index[cl]] - \
                    sample.mutSigma[cl2index[cl]]
                upperBound[id] = sample.mutCellularity[cl2index[cl]] + \
                    sample.mutSigma[cl2index[cl]]
            else:
                pass
        clBounds[cl] = [x, lowerBound, upperBound]
    
    # visualize cluster cellularity estimates and standard error
    # as ribbon plot
    colorPalette = ["#E62E41", "#0A893D", "#455593", "#F19131", \
                        "#F375A0", "#874B2C", "#C894F1", "#C3401E", "#91430F", "#F65348"]
    if len(cl2index) > len(colorPalette):
        print >>sys.stderr, 'Cellularity plot not supported for more than 8 clusters.'
        sys.exit()
    plt.grid(True)
    fig, ax = plt.subplots(1,1)
    for cl, index in cl2index.items():
        trace = np.mean(np.array([clBounds[cl][1], clBounds[cl][2]]), 0)
        trace = map(bound_fix, trace)
        
        clBounds[cl][1] = map(bound_fix, clBounds[cl][1])
        clBounds[cl][2] = map(bound_fix, clBounds[cl][2])    
    
        ax.fill_between(clBounds[cl][0], \
                        clBounds[cl][1], \
                        clBounds[cl][2], color = colorPalette[index], \
                        alpha = 0.5)
        ax.plot(clBounds[cl][0], list(trace), \
                       color = colorPalette[index], label = cl, \
                       linestyle="dotted", marker="o", linewidth = 1)
    #-----------------------------------------#
    # visualize cluster cellularity for individual mutations as 
    # scatter plot overlay
    mut2index, samples = read_input_samples(mutCellularityPath)
    samples = sorted(samples, key = lambda x: x.name)
    for cl in cluster2mut:
        coords = []
        for mut in cluster2mut[cl]:
            xm = range(len(samples))
            ym = [sample.mutCellularity[mut2index[mut]] \
                                                for sample in samples]
            coords.extend(zip(xm,ym))

        coords = filter(lambda x: x[1] != -1, coords)
        ax.scatter(zip(*coords)[0], zip(*coords)[1], color = colorPalette[cl2index[cl]],\
                       marker = '+', s = 20.0)
    #-----------------------------------------#
    # adjust visual properties of the plot
    handles, labels = ax.get_legend_handles_labels()
    handles, labels = zip(*sorted(zip(handles, labels), key = lambda x: x[1]))
    ax.legend(handles, labels)
    
    plt.xticks(range(len(samples)))
    plt.yticks([x * 0.1 for x in range(11)])

    plt.xlim((-0.5,2.5))
    plt.ylim((-0.1,1.1))

    xtext = [sample.name for sample in samples]
    ax.set_xticklabels(xtext)

    plt.gca().yaxis.grid(True)
    outputPath = os.path.join(config.working_dir, \
                              config.output_prefix +'.cellularity.png')
    plt.savefig(outputPath)
    return