Esempio n. 1
0
def run_iter(args):
    solvers, solver_options, general_options, seed = args

    for op in solver_options:
        op['seed'] = seed

    times = []
    for s, s_ops in zip(solvers, solver_options):
        tstart = time.time()
        print "{} START TIME: {}".format(s, tstart)
        madhype.simulate_run([s], [s_ops], seed=seed, **general_options)
        tstop = time.time()
        times.append(tstop - tstart)

    return times
Esempio n. 2
0
def collect_data():
    # Set up run parameters
    solvers = ['madhype', 'alphabetr']
    solver_options = [{'fdr': 0.05}, {'estimate_frequencies': False}]

    # Set up parameters that apply to all solvers/simulations
    general_options = {
        'num_cells': 3000,
        'cell_freq_constant': 2,
        'cell_freq_max': 0.05,
        'alpha_sharing_probs': None,
        'beta_sharing_probs': None,
        'num_wells': (96, ),
        'cpw': (300, ),
        'visual': False,
        'fdr': 0.05,
    }

    repeats = 20

    madhype_results = []
    alphabetr_results = []

    for rep in xrange(repeats):
        seed = random.randint(0, 1e6)
        print "Iteration {}: SEED {}".format(rep, seed)

        solver_options[1]['seed'] = seed
        _, results = madhype.simulate_run(solvers,
                                          solver_options,
                                          seed=seed,
                                          **ops)

        madhype_results.append(results[0])
        alphabetr_results.append(results[1])
Esempio n. 3
0
def collect_data():
    # Set up run parameters
    solvers = ['madhype', 'alphabetr']
    solver_options = [{
        'silent': True
    }, {
        'silent': True,
        'estimate_frequencies': False
    }]

    # Set up parameters that apply to all solvers/simulations
    general_options = {
        'num_cells': 1000,
        'cell_freq_constant': 2,
        'cell_freq_max': 0.01,
        'alpha_sharing_probs': None,
        'beta_sharing_probs': None,
        'visual': False,
        'silent': True,
    }

    total_cells = 9600
    total_wells = 96

    w_range = np.arange(12, 96, 12)
    c_range = np.ceil(np.logspace(0, np.log10(total_cells / total_wells),
                                  11)).astype(np.int)
    repeats = 10

    madhype_results = {(c, w): [] for c, w in it.product(c_range, w_range)}
    alphabetr_results = {(c, w): [] for c, w in it.product(c_range, w_range)}

    print "Running simulations with the following settings:"
    print "  Well partitions:", w_range
    print "  CPW:", c_range
    print "  # reps/condition", repeats

    for i, c in enumerate(c_range):
        for j, w in enumerate(w_range):
            num_wells = (w, total_wells - w)
            cpw = (c, int((total_cells - c * w) / (total_wells - w)))
            print "Number of wells: {}, cells per well: {}".format(
                num_wells, cpw)
            for rep in xrange(repeats):
                seed = random.randint(0, 1e6)
                print "Iteration {}: SEED {}".format(rep, seed)

                _, results = madhype.simulate_run(solvers,
                                                  solver_options,
                                                  num_wells=num_wells,
                                                  cpw=cpw,
                                                  seed=seed,
                                                  **general_options)

                madhype_results[(c, w)].append(results[0]['frac_repertoire'])
                alphabetr_results[(c, w)].append(results[1]['frac_repertoire'])

    return madhype_results, alphabetr_results
Esempio n. 4
0
def main(*args, **kwargs):
    """ Loads data, does some pretty basic simulations """

    labels = ['Peripheral Blood (9-25 y)', 'Peripheral Blood (61-66 y)']

    specific_settings = {
        # ranges 0.1% to 3%
        'Peripheral Blood (9-25 y)': {
            'cell_freq_max': 0.0025,
            'cell_freq_constant': 1. + 1.21,
            'num_cells': 10000,
            'threshold': 2.0,
            'block': False,
            'num_wells': (48, 48),
            'cpw': (500, 10000)
        },
        'Peripheral Blood (61-66 y)': {
            'cell_freq_max': 0.086,
            'cell_freq_constant': 1. + 1.15,
            'num_cells': 10000,
            'threshold': 2.0,
            'block': True,
            'num_wells': (48, 48),
            'cpw': (500, 10000)
        },
    }

    # figure specific properties
    fig, axes = plt.subplots(nrows=1, ncols=len(labels), figsize=(15, 4))
    #plt.subplots_adjust(left=0.15,right=0.9,top=0.85,bottom=0.3,hspace=0.5,wspace=0.5)

    # some default settings
    solvers = ['madhype']
    solver_options = [{}]

    for ind, label in enumerate(labels):

        settings = {
            'plot_repertoire': {
                'ax': axes[ind],
                'fig': fig,
            },
            'visual': True,
            'silent': False,
            'legend': bool(ind),
            'save': True,
            'savename': 'figS1ab_{}.png',  # where plots are saved
            'alpha_dual_prob': 0.33,
            'beta_dual_prob': 0.33,
        }

        specific_settings[label].update(settings)
        data, results = madhype.simulate_run(solvers, solver_options,
                                             **specific_settings[label])

    plt.show(block=False)
    raw_input('Press enter to close...')
    plt.close()
Esempio n. 5
0
def main(*args, **kwargs):

    modifications = {
        'dual_prob': [.0, .1, .2, .3, .4, .5],
        #'dual_prob':[.0,.1],
    }

    labels = ['0%', '10%', '20%', '30%', '40%', '50%']
    #labels = ['0%','10%']

    repeats = 10

    settings = default_settings()

    settings['cell_freq_max'] = 0.05
    settings['num_cells'] = 1000
    settings['cpw'] = (50, 1000)
    settings['num_wells'] = (48, 48)
    settings['chain_deletion_prob'] = 0.1
    settings['chain_misplacement_prob'] = 0.0

    all_coverage = {}
    all_matches = {}

    solvers = ['madhype']
    solver_options = [{}]

    for first_mod, values in modifications.items():

        for chain_sharing in [False, True]:

            if chain_sharing: mod = 'Chain sharing'
            else: mod = 'No chain sharing'

            all_coverage[mod] = []
            all_matches[mod] = []

            for i, v in enumerate(values):

                all_results = []

                # iterate across system
                for r in xrange(repeats):

                    specific_settings = copy.copy(settings)

                    if first_mod == 'dual_prob':
                        specific_settings['alpha_dual_prob'] = v
                        specific_settings['beta_dual_prob'] = v
                    else:
                        specific_settings[first_mod] = v

                    if chain_sharing == True:
                        specific_settings['alpha_sharing_probs'] = None
                        specific_settings['beta_sharing_probs'] = None
                    else:
                        specific_settings['alpha_sharing_probs'] = 0.0
                        specific_settings['beta_sharing_probs'] = 0.0

                    specific_settings['seed'] = r

                    _, results = simulate_run(solvers, solver_options,
                                              **specific_settings)

                    all_results += results

                all_coverage[mod].append(
                    [results['frac_repertoire'] for results in all_results])
                all_matches[mod].append([
                    float(results['positives']) / (settings['num_cells'] *
                                                   (1 + 2 * v + v**2))
                    for results in all_results
                ])

    # plot/display settings
    fs = 18
    boxprops = dict(linewidth=3.0, zorder=1)
    meanlineprops = dict(linestyle='-', linewidth=2, color='black', zorder=0)
    plt.rcParams['xtick.labelsize'] = fs - 4

    # figure specific properties
    fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 12), sharey=False)
    plt.subplots_adjust(left=0.15, right=0.9, hspace=0.3, wspace=0.4)

    # set border for figure
    for axe in axes:
        for ax in axe:
            [i.set_linewidth(3) for i in ax.spines.itervalues()]

    bp = axes[0][0].boxplot(all_matches['No chain sharing'],
                            labels=labels,
                            boxprops=boxprops,
                            meanprops=meanlineprops,
                            widths=0.6,
                            meanline=True,
                            showmeans=True)

    axes[0][0].set_title('No chain sharing', fontweight='bold', fontsize=fs)
    label_figure(axes[0][0],
                 'Dual Clone Probability (%)',
                 'Clonal Matches (%)',
                 fs=fs)

    bp = axes[1][0].boxplot(all_coverage['No chain sharing'],
                            labels=labels,
                            boxprops=boxprops,
                            meanprops=meanlineprops,
                            widths=0.6,
                            meanline=True,
                            showmeans=True)

    label_figure(axes[1][0],
                 'Dual Clone Probability (%)',
                 'Repertoire Coverage',
                 fs=fs)

    bp = axes[0][1].boxplot(all_matches['Chain sharing'],
                            labels=labels,
                            boxprops=boxprops,
                            meanprops=meanlineprops,
                            widths=0.6,
                            meanline=True,
                            showmeans=True)

    axes[0][1].set_title('Chain sharing', fontweight='bold', fontsize=fs)
    label_figure(axes[0][1],
                 'Dual Clone Probability (%)',
                 'Clonal Matches (%)',
                 fs=fs)

    bp = axes[1][1].boxplot(all_coverage['Chain sharing'],
                            labels=labels,
                            boxprops=boxprops,
                            meanprops=meanlineprops,
                            widths=0.6,
                            meanline=True,
                            showmeans=True)

    label_figure(axes[1][1],
                 'Dual Clone Probability (%)',
                 'Repertoire Coverage',
                 fs=fs)

    plt.show(block=False)
    raw_input('Press enter to close...')
    plt.savefig('fig_S5.png', format='png', dpi=300)
    plt.close()
Esempio n. 6
0
def main(*args, **kwargs):

    mod_range = [.1, .15, .2, .25, .3, .35, .4]
    #mod_range = [.1,.4]
    #mod_range = [.0,.05,.1,.15,.2,.25]
    labels = ['{}%'.format(int(100 * m)) for m in mod_range]

    modifications = {
        'chain_deletion_prob': mod_range,
    }

    repeats = 10
    match_limit_low = 750
    match_limit = 250

    settings = default_settings()
    settings['cell_freq_max'] = 0.01
    settings['num_cells'] = 1000
    settings['cpw'] = (300, )
    settings['chain_deletion_prob'] = 0.1
    settings['chain_misplacement_prob'] = 0.0
    settings['alpha_sharing_probs'] = None
    settings['beta_sharing_probs'] = None

    all_cm_fdr = {}
    all_rep_fdr = {}

    for sn in ['madhype', 'alphabetr']:

        if sn == 'madhype':
            solver_options = [{
                'threshold':
                10.,  # minimum ratio accepted by match_probability
            }]

        elif sn == 'alphabetr':
            solver_options = [{}]

        solvers = [sn]
        all_cm_fdr[sn] = {}
        all_rep_fdr[sn] = {}

        for mod, values in modifications.items():

            all_cm_fdr[sn][mod] = []
            all_rep_fdr[sn][mod] = []

            for i, v in enumerate(values):

                all_results = []

                # iterate across system
                for r in xrange(repeats):

                    specific_settings = copy.copy(settings)

                    specific_settings[mod] = v
                    specific_settings['seed'] = r

                    data, results = simulate_run(solvers, solver_options,
                                                 **specific_settings)

                    results[0]['fdr_for_cm'] = _get_fdr_for_match_limit(
                        data, results, match_limit)
                    results[0]['fdr_for_rep'] = _get_fdr_for_match_limit(
                        data, results, match_limit_low)

                    print 'FDR (cm):', results[0]['fdr_for_cm']
                    print 'FDR (low cm):', results[0]['fdr_for_rep']

                    all_results += results

                all_cm_fdr[sn][mod].append(
                    [results['fdr_for_cm'] for results in all_results])
                all_rep_fdr[sn][mod].append(
                    [results['fdr_for_rep'] for results in all_results])

    # plot/display settings
    fs = 18
    boxprops = dict(linewidth=3.0, zorder=1)
    meanlineprops = dict(linestyle='-', linewidth=2, color='black', zorder=0)
    plt.rcParams['xtick.labelsize'] = fs - 4

    # figure specific properties
    fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(14, 12), sharey=False)
    plt.subplots_adjust(left=0.15, right=0.9, hspace=0.3, wspace=0.5)

    # set border for figure
    for axe in axes:
        for ax in axe:
            [i.set_linewidth(3) for i in ax.spines.itervalues()]

    bp = axes[0][0].boxplot(all_cm_fdr['madhype']['chain_deletion_prob'],
                            labels=labels,
                            boxprops=boxprops,
                            meanprops=meanlineprops,
                            widths=0.6,
                            meanline=True,
                            showmeans=True)

    axes[0][0].set_title('MAD-HYPE', fontweight='bold', fontsize=fs)
    label_figure(axes[0][0], 'Chain Deletion Probability', 'FDR (%)', fs=fs)

    bp = axes[0][1].boxplot(all_cm_fdr['alphabetr']['chain_deletion_prob'],
                            labels=labels,
                            boxprops=boxprops,
                            meanprops=meanlineprops,
                            widths=0.6,
                            meanline=True,
                            showmeans=True)

    axes[0][1].set_title('ALPHABETR', fontweight='bold', fontsize=fs)
    label_figure(axes[0][1], 'Chain Deletion Probability', 'FDR (%)', fs=fs)

    bp = axes[1][0].boxplot(all_rep_fdr['madhype']['chain_deletion_prob'],
                            labels=labels,
                            boxprops=boxprops,
                            meanprops=meanlineprops,
                            widths=0.6,
                            meanline=True,
                            showmeans=True)

    axes[1][0].set_title('MAD-HYPE', fontweight='bold', fontsize=fs)
    label_figure(axes[1][0], 'Chain Deletion Probability', 'FDR (%)', fs=fs)

    bp = axes[1][1].boxplot(all_rep_fdr['alphabetr']['chain_deletion_prob'],
                            labels=labels,
                            boxprops=boxprops,
                            meanprops=meanlineprops,
                            widths=0.6,
                            meanline=True,
                            showmeans=True)

    axes[1][1].set_title('ALPHABETR', fontweight='bold', fontsize=fs)
    label_figure(axes[1][1], 'Chain Deletion Probability', 'FDR (%)', fs=fs)

    plt.show(block=False)
    raw_input('Press enter to close...')
    plt.savefig('fig_S7.png', format='png', dpi=300)
    plt.close()
Esempio n. 7
0
        'seed': 1,
    }

    id_map = np.zeros((len(cpw_range), options['num_cells']))

    for ii, num_wells in enumerate(num_wells_range):
        for i, cpw in enumerate(cpw_range):
            for seed in xrange(repeats):

                specific_options = copy.copy(options)

                specific_options['cpw'] = (cpw, )
                specific_options['num_wells'] = (num_wells, )
                specific_options['seed'] = seed

                _, results = simulate_run(solvers, solver_options,
                                          **specific_options)

                id_map[i, :] += results[0]['pattern']

            print 'Finished cpw = {}!'.format(cpw)

        # display graph
        c_labels = [
            v for v in [1, 10, 100, 1000, 10000] if v <= max(cpw_range)
        ]
        c_inds = [
            min(range(len(cpw_range)), key=lambda i: abs(cpw_range[i] - v))
            for v in c_labels
        ]

        fig, ax = plt.subplots()
Esempio n. 8
0
def main():
    """ Match probability as a function of w_a,w_b """

    fig = plt.figure(figsize=(12, 10))

    grid = plt.GridSpec(2, 2)

    ax1 = plt.subplot(grid[0, 0])
    ax2 = plt.subplot(grid[0, 1])
    ax3 = plt.subplot(grid[1, :])

    fig = plt.gcf()
    plt.subplots_adjust(wspace=0.45, hspace=0.3)

    # Spanning parameters
    w_i_range = (0, 24)
    w_j_range = (0, 24)
    prior = 1e-5

    # Create match built from parameters
    X, Y = np.mgrid[w_i_range[0]:w_i_range[1] + 1,
                    w_j_range[0]:w_j_range[1] + 1]

    # Preallocate data matrix space
    Z = np.zeros((X.shape[0], Y.shape[1]))

    # Define default well distribution parameters
    well_data = {
        'w_ij': (24, ),
        'w_o': (48, ),
        'w_tot': (96, ),
        'cpw': (10, ),
        'alpha': 2
    }

    plt.sca(ax1)

    for i in xrange(X.shape[0]):
        for j in xrange(Y.shape[1]):
            well_data['w_i'] = (X[i, j], )
            well_data['w_j'] = (Y[i, j], )
            val = 10**(-match_probability(well_data, prior)[0])
            Z[i, j] = 1. / (1. + val)

    matplotlib.rcParams['xtick.labelsize'] = 16
    matplotlib.rcParams['ytick.labelsize'] = 16

    #cax = ax.imshow(data,interpolation='nearest')
    #ax.set_aspect(aspect='auto')

    pcm = ax1.pcolor(X,
                     Y,
                     Z,
                     norm=colors.Normalize(vmin=0, vmax=1),
                     cmap='PuBu_r')

    cbar = fig.colorbar(pcm, ax=ax1)
    cbar.ax.tick_params(labelsize=16)
    cbar.set_ticks([0, .5, 1])
    cbar.set_ticklabels(['0%', '50%', '100%'])

    #ax.set_yticks(c_inds)
    #ax.set_yticklabels(c_labels)
    ax1.tick_params(width=3, length=8, labelsize=18)
    plt.xticks([0, 10, 20])
    plt.yticks([0, 10, 20])

    plt.xlabel('$w_{i}$', fontsize=20)
    plt.ylabel('$w_{j}$', fontsize=20)
    #cbar.ax.set_yticklabels(['0%','100%'])  # vertically oriented colorbar

    # some default settings
    solvers = ['madhype']
    solver_options = [{}]
    settings = {
        'plot_auroc': True,
        'plot_auroc_options': {
            'ax': ax2,
            'fig': fig,
        },
        'plot_repertoire': True,
        'plot_repertoire_options': {
            'ax': ax3,
            'fig': fig,
        },
        'visual': True,
        'silent': False,
    }

    data, results = madhype.simulate_run(solvers, solver_options, **settings)

    plt.savefig('Figure3.png')
Esempio n. 9
0
def main(*args,**kwargs):

    modifications = {
            'chain_deletion_prob':[.0,.1,.2,.3,.4,.5,.75],
            'chain_misplacement_prob':[.0,.1,.2,.3,.4,.5,.75]
            }

    repeats = 10

    settings = default_settings()
    settings['cell_freq_max'] = 0.01
    settings['num_cells'] = 1000
    settings['cpw'] = (100,)
    settings['chain_deletion_prob'] = 0.0
    settings['chain_misplacement_prob'] = 0.0

    all_coverage = {}
    all_matches = {}

    solvers = ['madhype']
    solver_options = [{}]

    #
    for mod,values in modifications.items():

        #all_results = []
        all_coverage[mod] = []
        all_matches[mod] = []

        for i,v in enumerate(values): 

            all_results = []

            # iterate across system
            for r in xrange(repeats):

                specific_settings = copy.copy(settings)

                specific_settings[mod] = v
                specific_settings['seed'] = r

                _,results = simulate_run(solvers,solver_options,**specific_settings)

                all_results += results

            all_coverage[mod].append([results['frac_repertoire'] for results in all_results])
            all_matches[mod].append([results['positives'] for results in all_results])


    # plot/display settings
    fs = 18
    boxprops = dict(linewidth=3.0,zorder=1)
    meanlineprops = dict(linestyle='-',linewidth=2, color='black', zorder=0)
    plt.rcParams['xtick.labelsize'] = fs-4

    # figure specific properties
    fig,axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 12), sharey=False)
    plt.subplots_adjust(left=0.15,right=0.9,hspace=0.3,wspace=0.4)

    # set border for figure
    for axe in axes:
        for ax in axe:
            [i.set_linewidth(3) for i in ax.spines.itervalues()]

    labels = ['0%','10%','20%','30%','40%','50%','75%']
    
    bp = axes[0][0].boxplot(
            all_matches['chain_deletion_prob'], 
            labels=labels, 
            boxprops=boxprops, 
            meanprops=meanlineprops, 
            widths=0.6, 
            meanline=True, 
            showmeans=True
            )

    label_figure(axes[0][0],'Chain Deletion Probability','Clonal Matches (#)',fs=fs)

    bp = axes[0][1].boxplot(
            all_coverage['chain_deletion_prob'], 
            labels=labels, 
            boxprops=boxprops, 
            meanprops=meanlineprops, 
            widths=0.6, 
            meanline=True, 
            showmeans=True
            )

    label_figure(axes[0][1],'Chain Deletion Probability','Repertoire Coverage',fs=fs)

    bp = axes[1][0].boxplot(
            all_matches['chain_misplacement_prob'], 
            labels=labels, 
            boxprops=boxprops, 
            meanprops=meanlineprops, 
            widths=0.6, 
            meanline=True, 
            showmeans=True
            )

    label_figure(axes[1][0],'Chain Misplacement Probability','Clonal Matches (#)',fs=fs)

    bp = axes[1][1].boxplot(
            all_coverage['chain_misplacement_prob'], 
            labels=labels, 
            boxprops=boxprops, 
            meanprops=meanlineprops, 
            widths=0.6, 
            meanline=True, 
            showmeans=True
            )

    label_figure(axes[1][1],'Chain Misplacement Probability','Repertoire Coverage',fs=fs)

    plt.show(block=False)
    raw_input('Press enter to close...')
    plt.savefig('figS2.png', format='png', dpi=300)
    plt.close()
Esempio n. 10
0
def main(*args, **kwargs):

    modifications = {'prior_alpha': [.1, 0.5, 1.0, 2.0, 10]}

    repeats = 2

    settings = default_settings()
    settings['cell_freq_max'] = 0.01
    settings['num_cells'] = 100
    settings['cpw'] = (100, )
    settings['chain_deletion_prob'] = 0.1
    settings['chain_misplacement_prob'] = 0.0

    if os.path.isfile('figS3A_data.p'):
        (all_coverage, all_matches) = pickle.load(open('figS3A_data.p', 'rb'))
    else:
        all_coverage = {}
        all_matches = {}

    solvers = ['madhype']
    solver_options = [{}]

    for mod, values in modifications.items():

        all_results = []

        try:
            all_coverage[mod]
            all_matches[mod]
            print 'Skipping {}!'.format(mod)
            continue

        except KeyError:
            all_coverage[mod] = []
            all_matches[mod] = []

        for i, v in enumerate(values):

            # iterate across system
            for r in xrange(repeats):

                specific_settings = copy.copy(settings)

                specific_settings[mod] = v
                specific_settings['seed'] = r

                _, results = simulate_run(solvers, solver_options,
                                          **specific_settings)

                all_results += results

            all_coverage[mod].append(
                [results['frac_repertoire'] for results in all_results])
            all_matches[mod].append(
                [results['positives'] for results in all_results])

        pickle.dump((all_coverage, all_matches), open('figS3A_data.p', 'wb'))

    # plot/display settings
    fs = 18
    boxprops = dict(linewidth=3.0, zorder=1)
    meanlineprops = dict(linestyle='-', linewidth=2, color='black', zorder=0)
    plt.rcParams['xtick.labelsize'] = fs - 4

    # figure specific properties
    fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 12), sharey=False)
    plt.subplots_adjust(left=0.15, right=0.9, hspace=0.3, wspace=0.5)

    # set border for figure
    for axe in axes:
        for ax in axe:
            [i.set_linewidth(3) for i in ax.spines.itervalues()]

    labels = ['0.1', '0.5', '1', '2', '10']

    bp = axes[0][0].boxplot(all_matches['prior_alpha'],
                            labels=labels,
                            boxprops=boxprops,
                            meanprops=meanlineprops,
                            widths=0.6,
                            meanline=True,
                            showmeans=True)

    label_figure(axes[0][0], r'Prior $\alpha$', 'Clonal Matches (#)', fs=fs)

    bp = axes[0][1].boxplot(all_coverage['prior_alpha'],
                            labels=labels,
                            boxprops=boxprops,
                            meanprops=meanlineprops,
                            widths=0.6,
                            meanline=True,
                            showmeans=True)

    label_figure(axes[0][1], r'Prior $\alpha$', 'Repertoire Coverage', fs=fs)

    ##### HEATMAPS #####

    print 'Start heatmaps...'

    settings = default_settings()
    settings['chain_deletion_prob'] = 0.1
    settings['chain_misplacement_prob'] = 0.0

    num_wells_range = [(48, ), (96, )]
    cpw_range = np.logspace(0, 4, 5, dtype=int)
    freq_range = np.logspace(-4, -1, 13)
    #cpw_range = np.logspace(0,3,4,dtype=int)
    #freq_range = np.logspace(-2,-1,3)

    repeats = 3
    fs = 18

    pickle_files = ['figS3C_data.p', 'figS3D_data.p']

    for plot_ind, num_wells in enumerate(num_wells_range):

        if os.path.isfile(pickle_files[plot_ind]):
            id_map = pickle.load(open(pickle_files[plot_ind], 'rb'))
        else:
            id_map = np.zeros((len(freq_range), len(cpw_range)))

        for i, freq in enumerate(freq_range):

            for j, cpw in enumerate(cpw_range):

                print 'Starting f = {} / cpw = {}...'.format(freq, cpw)
                val = []

                print 'Val:', id_map[i][j]
                if id_map[i][j] != 0.:
                    print 'Skipping! {} found...'.format(id_map[i][j])
                    continue

                for r in xrange(repeats):

                    if int(1. / freq) >= 5000 and cpw >= 1000:
                        threshold = 2.0
                    else:
                        threshold = 0.1

                    specific_settings = copy.copy(settings)

                    specific_settings['num_wells'] = num_wells
                    specific_settings['cpw'] = (cpw, )
                    specific_settings[
                        'cell_freq_distro'] = 'uniform'  # forces uniform
                    #specific_settings['cell_freq_max'] = 0.0 # forces uniform
                    specific_settings['num_cells'] = int(
                        1. / freq)  # forces uniform
                    specific_settings['threshold'] = threshold
                    specific_settings['seed'] = r

                    _, results = simulate_run(solvers, solver_options,
                                              **specific_settings)

                    val.append(results[0]['frac_repertoire'])

                id_map[i][j] = np.mean(val)
                pickle.dump(id_map, open(pickle_files[plot_ind], 'wb'))

        axes[1][plot_ind].imshow(id_map, interpolation='nearest')
        axes[1][plot_ind].set_aspect(aspect='auto')

        # X axis
        c_labels = [
            v for v in [1, 10, 100, 1000, 10000]
            if v <= max(cpw_range) and v >= min(cpw_range)
        ]
        c_inds = [
            min(range(len(cpw_range)), key=lambda i: abs(cpw_range[i] - v))
            for v in c_labels
        ]
        axes[1][plot_ind].set_xticks(c_inds)
        axes[1][plot_ind].set_xticklabels(c_labels)

        # Y axis
        c_labels = [
            v for v in [1e-4, 1e-3, 1e-2, 1e-1]
            if v <= max(freq_range) and v >= min(freq_range)
        ]
        c_inds = [
            min(range(len(freq_range)), key=lambda i: abs(freq_range[i] - v))
            for v in c_labels
        ]
        axes[1][plot_ind].set_yticks(c_inds)
        axes[1][plot_ind].set_yticklabels(c_labels)

        plt.title('Identification of clones with {} wells'.format(
            num_wells[0]))
        axes[1][plot_ind].set_xlabel('Cells/Well', fontsize=fs)
        axes[1][plot_ind].set_ylabel('Clonal Frequency', fontsize=fs)

    # plot figure
    plt.show(block=False)
    raw_input('Press enter to close...')
    plt.savefig('Figure S3.png', format='png', dpi=300)
    plt.close()
Esempio n. 11
0
cpws = [(100, ), (30, )]

# Set up parameters that apply to all solvers/simulations
general_options = {
    'num_wells': (96, ),
}

fig, ax = plt.subplots(2, 1, figsize=(10, 10))

general_options['fig'] = fig

for i, cpw in enumerate(cpws):

    # set the number of cells per well
    general_options['cpw'] = cpw
    general_options['ax'] = ax[i]

    # Run MAD-HYPE with default parameters
    data, results = madhype.simulate_run(solvers, solver_options,
                                         **general_options)

    plot_comparison(results, **general_options)

    # Print out results
    for solver, result in zip(solvers, results):

        print "{} Results:".format(solver)
        print "  Total # Cells:", result['total']
        print "  Chain pairs identified:", result['positives']
        print "  Chain pairs not identified:", result['negatives']
Esempio n. 12
0
def main():

    settings = {
            
               }

    # update settings
    #for arg in args: settings.update(arg)
    #settings.update(kwargs)

    """ Match probability as a function of w_a,w_b """

    fig1,ax1 = plt.subplots(3,1,figsize=(8,12))
    fig2,ax2 = plt.subplots(1,3,figsize=(15,4))

    fig = plt.gcf()
    plt.subplots_adjust(wspace=0.45,bottom=0.2)
    #plt.subplots_adjust(left=0.1,right=0.9,wspace=0.25, hspace=0.2)

    # some default settings
    solvers = ['madhype']
    solver_options = [{}]

    base_settings = {
        'plot_repertoire': {
            'ax':ax1,
            'fig':fig1,
            },
        'plot_frequency_estimation': {
            'ax':ax2,
            'fig':fig2,
            'figsize':  (12,9),
            'xlim':((2.)*(10.**-4),(6./5)*(10.**-2)),
            'ylim':((2.)*(10.**-4),(6./5)*(10.**-2)),
            },
        'num_cells':1000,
        'cell_freq_max':         0.01, # 0.01
        'cell_freq_constant':       2,
        'visual':                True,
        'silent':               False,
        }


    settings_list = [
            {
                'num_wells':(42,54),
                'cpw':(25,1758),
            },
            {
                'num_wells':(60,36),
                'cpw':(159,2402),
            },
            {
                'num_wells':(96,),
                'cpw':(1000,),
            },
            ]


    for i,sub_settings in enumerate(settings_list):

        specific_options = copy.copy(base_settings)

        specific_options.update(sub_settings)
        specific_options['plot_repertoire']['ax'] = ax1[i]
        specific_options['plot_frequency_estimation']['ax'] = ax2[i]

        _,results = madhype.simulate_run(solvers, solver_options, **specific_options)

    fig1.savefig('Figure 5B.png', format='png', dpi=300)
    fig2.savefig('Figure 5C.png', format='png', dpi=300)

    plt.show(block=False)
    raw_input('Press enter to close...')
    plt.close()
Esempio n. 13
0
def main(*args,**kwargs):
    """ Loads data, does some pretty basic simulations """

    fnames = {
            'Adjacent Tissue':'PTC.txt',
            'Tumor Tissue':'TTC.txt'
            }

    specific_settings = {
            'Adjacent Tissue':{
                'num_cells':1000,
                'num_wells':(48,48),
                'cpw':(50,1000)
                },
            'Tumor Tissue':{
                'num_cells':1000,
                'num_wells':(48,48),
                'cpw':(50,1000)
                }
            }
    
    # figure specific properties
    #fig,axes = plt.subplots(nrows=2, ncols=len(fnames), figsize=(12,12), sharey=False)
    #plt.tight_layout()
    #plt.subplots_adjust(left=0.15,right=0.9,top=0.85,bottom=0.3,hspace=0.5,wspace=0.5)
    fs = 18

    # iterate across sample/data sets
    for ind,(sample,fname) in enumerate(fnames.items()):
        
        #ax = axes[ind][0]
        #plt.sca(ax)

        with open('./figures/'+fname) as f:
            data = f.readlines()

        data = [float(x.strip()) for x in data] 
        data = [x/sum(data) for x in data]

        options = {'maxiters':10000}
        alpha = minimize(_error,1.0,data,method = 'Nelder-Mead',tol=1e-12,options=options)

        alpha = 1. + 1./alpha.x

        print 'Sample: {} / Alpha: {}'.format(sample,alpha)

        #[i.set_linewidth(3) for i in axes[ind][0].spines.itervalues()]

        '''
        ax.set_xscale('log')
        ax.set_yscale('log')
        ax.set_xlabel('Clonal Index',fontsize=fs,fontweight='bold')
        ax.set_ylabel('Clonal Frequency',fontsize=fs,fontweight='bold')
        ax.tick_params(width = 3,length=8,labelsize=18)
        #ax.tick_params(axis='y',pad=22)

        ax.scatter(xrange(1,len(data)+1),
                data,c='blue',s=55,marker='s')
        ax.scatter(xrange(1,len(data)+1),
                _power_law_distribution(len(data),max(data),alpha),c='red',s=30,marker='D')
        '''

        #plt.yticks(rotation='vertical')

        # default settings
        settings = {
                'cell_freq_constant': alpha,
                'cell_freq_max':    max(data),
                '''
                'plot_repertoire': {
                    'save': True,
                    },
                'plot_frequency_estimation': {
                    #'ax':axes[ind][1],
                    #'fig':fig,
                    'save': True,
                    },
                '''	
                'visual':                False,
                'display': False,
                'silent':               False,
                }

        # update settings with sample specific hyperparameters
        settings.update(specific_settings[sample])

        solvers = ['alphabetr']
        solver_options = [{}]

        data,results = madhype.simulate_run(solvers, solver_options, **settings)
        
    plt.subplots_adjust(left=0.125,right=0.9,top=0.9,bottom=0.1,hspace=0.4,wspace=0.4)

    plt.savefig('figure_S1cd.png',dpi=300) 
Esempio n. 14
0
def main():

    # Set up run parameters
    solvers = ['madhype', 'alphabetr']
    solver_options = [{}, {}]  # don't change default parameters

    # variants
    cpws = [(10, ), (30, )]
    num_simulations = 5

    # Set up parameters that apply to all solvers/simulations
    general_options = {
        'num_wells': (96, ),
    }

    fig, ax = plt.subplots(2, 1, figsize=(10, 10))

    general_options['fig'] = fig

    matches_by_cpw = {}

    coverage_by_cpw = {}

    for _, cpw in enumerate(cpws):

        print 'Running simulations with {} cpw...'.format(cpw)

        # set the number of cells per well
        general_options['cpw'] = cpw
        matches_by_cpw[cpw] = dict([(s, []) for s in solvers])
        coverage_by_cpw[cpw] = dict([(s, []) for s in solvers])

        for index in xrange(num_simulations):

            print 'Starting simulation {}/{}...'.format(
                index + 1, num_simulations)

            # Run MAD-HYPE with default parameters
            data, results = madhype.simulate_run(solvers, solver_options,
                                                 **general_options)

            # iterate across data
            for method_name, result in zip(solvers, results):
                matches_by_cpw[cpw][method_name].append(result['positives'])
                coverage_by_cpw[cpw][method_name].append(
                    result['frac_repertoire'])

    ### START FIGURE ###

    # settings
    boxprops = dict(linewidth=3.0, zorder=1)
    meanlineprops = dict(linestyle='-', linewidth=2, color='black', zorder=0)
    fs = 18

    # figure specific properties
    fig, axes = plt.subplots(nrows=len(cpws),
                             ncols=1,
                             figsize=(2 * len(cpws) + 1, 12),
                             sharey=False)
    plt.subplots_adjust(left=0.3, right=0.9, hspace=1.0, wspace=1.0)

    # set border for figure
    for ax in axes:
        [i.set_linewidth(3) for i in ax.spines.itervalues()]

    matches = [matches_by_cpw[c][s] for s in solvers for c in cpws]
    coverage = [coverage_by_cpw[c][s] for s in solvers for c in cpws]

    labels = ['$N$ = {}'.format(c) for _ in xrange(2) for c in cpws]

    # boxplot matches
    bp = axes[0].boxplot(matches,
                         labels=labels,
                         boxprops=boxprops,
                         meanprops=meanlineprops,
                         widths=0.6,
                         meanline=True,
                         showmeans=True)
    axes[0].plot((2.5, 2.5), (0, 1000), linestyle='--', color='k')
    setBoxColors(bp)

    axes[0].set_xticks((1.5, 3.5))
    axes[0].set_xticklabels(('100', '1000'), fontsize=fs)
    axes[0].set_xlabel('Cells/well (#)', fontsize=fs)

    axes[0].set_ylim((0, 1000))
    axes[0].set_yticks((0, 250, 500, 750, 1000))
    axes[0].set_yticklabels((0, 250, 500, 750, 1000), fontsize=fs)
    axes[0].set_ylabel('Clonal Matches (#)', fontsize=fs)

    # boxplot coverage
    bp = axes[1].boxplot(coverage,
                         labels=labels,
                         boxprops=boxprops,
                         meanprops=meanlineprops,
                         widths=0.6,
                         meanline=True,
                         showmeans=True)
    axes[1].plot((2.5, 2.5), (0, 1), linestyle='--', color='k')
    setBoxColors(bp)

    axes[1].set_xticks((1.5, 3.5))
    axes[1].set_xticklabels(('N = 100', 'N = 1000'), fontsize=fs)
    axes[1].set_xlabel('Cells/well', fontsize=fs)

    axes[1].set_ylim((0., 1.))
    axes[1].set_yticks((0., .5, 1.))
    axes[1].set_yticklabels(('0%', '50%', '100%'), fontsize=fs)
    axes[1].set_ylabel('Repertoire Coverage', fontsize=fs)

    plt.show(block=False)
    raw_input('Press enter to close...')
    plt.savefig('fig4C.png', format='png', dpi=200)
    plt.close()