Beispiel #1
0
def test_other():
    np.random.seed(111)
    import matplotlib
    matplotlib.rcParams.update(matplotlib.rcParamsDefault)
    #matplotlib.rcParams[u'figure.figsize'] = (4,3)
    matplotlib.rcParams[u'text.usetex'] = False
    import warnings
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")

        #from topslam.simulation.simulate_trajectory import qpcr_simulation

        #Xsim, simulate_new, t, c, labels, seed = qpcr_simulation(48, 6, 5001)

        #np.random.seed(3)
        #Y = simulate_new()

        #m = GPy.models.BayesianGPLVM(Y, 2, X=Xsim, num_inducing=25)
        #m.optimize()

        try:
            test_data = np.load(os.path.join(basedir, 'test_data_model.npz'))
            test_init = np.load(os.path.join(basedir, 'test_data_others.npz'))
        except IOError:
            raise  #SkipTest('not installed by source, skipping plotting tests')
        labels = test_data['labels']
        dims = test_init['dims'].tolist()
        X_init = test_init['X_init']

        m = GPy.models.BayesianGPLVM(test_data['Y'].copy(),
                                     2,
                                     num_inducing=25,
                                     initialize=False)
        m.param_array[:] = test_data['model_params']
        m.initialize_parameter()

        from topslam import ManifoldCorrectionKNN
        mc = ManifoldCorrectionKNN(m, 10)

        from topslam.plotting import plot_comparison, plot_dist_hist, plot_labels_other, plot_landscape_other
        plot_comparison(mc, X_init, dims, labels, np.unique(labels), 0)

        plot_dist_hist(test_data['Y'])

        X, pt = X_init[:, dims['t-SNE']], test_data['t']
        fig, ax = plt.subplots()
        plot_landscape_other(X, pt, labels, ax=ax)
        plot_labels_other(X, pt, labels, ax=ax)

    for do_test in _image_comparison(baseline_images=[
            'other_{}'.format(sub) for sub in [
                "comparison",
                "dist_hist",
                "landscape_labs",
            ]
    ],
                                     extensions=extensions):
        yield do_test
Beispiel #2
0
def test_other():
    np.random.seed(111)
    import matplotlib

    matplotlib.rcParams.update(matplotlib.rcParamsDefault)
    # matplotlib.rcParams[u'figure.figsize'] = (4,3)
    matplotlib.rcParams[u"text.usetex"] = False
    import warnings

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")

        # from topslam.simulation.simulate_trajectory import qpcr_simulation

        # Xsim, simulate_new, t, c, labels, seed = qpcr_simulation(48, 6, 5001)

        # np.random.seed(3)
        # Y = simulate_new()

        # m = GPy.models.BayesianGPLVM(Y, 2, X=Xsim, num_inducing=25)
        # m.optimize()

        try:
            test_data = np.load(os.path.join(basedir, "test_data_model.npz"))
            test_init = np.load(os.path.join(basedir, "test_data_others.npz"))
        except IOError:
            raise  # SkipTest('not installed by source, skipping plotting tests')
        labels = test_data["labels"]
        dims = test_init["dims"].tolist()
        X_init = test_init["X_init"]

        m = GPy.models.BayesianGPLVM(test_data["Y"].copy(), 2, num_inducing=25, initialize=False)
        m.param_array[:] = test_data["model_params"]
        m.initialize_parameter()

        from topslam import ManifoldCorrectionKNN

        mc = ManifoldCorrectionKNN(m, 10)

        from topslam.plotting import plot_comparison, plot_dist_hist, plot_labels_other, plot_landscape_other

        plot_comparison(mc, X_init, dims, labels, np.unique(labels), 0)

        plot_dist_hist(test_data["Y"])

        X, pt = X_init[:, dims["t-SNE"]], test_data["t"]
        ax = plot_landscape_other(X, pt, labels)
        plot_labels_other(X, pt, labels, ax=ax)

    for do_test in _image_comparison(
        baseline_images=["other_{}".format(sub) for sub in ["comparison", "dist_hist", "landscape_labs"]],
        extensions=extensions,
    ):
        yield do_test
Beispiel #3
0
def example_deng(optimize=True, plot=True):
    import pandas as pd, os
    import GPy, numpy as np
    from topslam.filtering import filter_RNASeq
    # Reproduceability, BGPLVM has local optima
    np.random.seed(42)

    # This is the process of how we loaded the data:
    ulabels = ['Zygote',
               '2-cell embryo',
               'Early 2-cell blastomere', 'Mid 2-cell blastomere', 'Late 2-cell blastomere',
               '4-cell blastomere', '8-cell blastomere', '16-cell blastomere',
               'Early blastocyst cell', 'Mid blastocyst cell', 'Late blastocyst cell',
               'fibroblast',
               'adult liver',
              ]

    folder_path = os.path.expanduser('~/tmp/Deng')
    csv_file = os.path.join(folder_path, 'filtered_expression_values.csv')

    if os.path.exists(csv_file):
        print('Loading previous filtered data: {}'.format(csv_file))
        Y_bgplvm = pd.read_csv(csv_file, index_col=[0,1,2], header=0)
    else:
        print('Loading data:')
        data = GPy.util.datasets.singlecell_rna_seq_deng()
        if not os.path.exists(folder_path):
            os.mkdir(folder_path)
        Ydata = data['Y'].copy()
        Ydata.columns = Ydata.columns.to_series().apply(str.upper)
        Ydata = Ydata.reset_index().set_index('index', append=True)
        Ydata['labels'] = data['labels'].values
        Ydata = Ydata.set_index('labels', append=True)
        Ydata = Ydata.reorder_levels([0,2,1])
        Ydata = Ydata.reset_index([0,2]).loc[ulabels].set_index(['level_0', 'index'], append=True)

        Y = Ydata.copy()
        Y.columns = [c.split('.')[0] for c in Y.columns]
        Y_bgplvm = filter_RNASeq(Y)
        print('\nSaving data to tmp file: {}'.format(csv_file))
        Y_bgplvm.to_csv(csv_file)

    labels = Y_bgplvm.index.get_level_values(0).values
    Ymean = Y_bgplvm.values.mean()
    Ystd = Y_bgplvm.values.std()

    Y_m = Y_bgplvm.values
    Y_m -= Ymean
    Y_m /= Ystd

    # get the labels right for split experiments
    # get the labels right for 8 and split
    new_8_labels = []
    for _l in Y_bgplvm.loc['8-cell blastomere'].index.get_level_values(1):
        _l = _l.split('-')[0]
        if not('split' in _l):
            new_8_labels.append('8')
        elif not('pooled' in _l):
            new_8_labels.append('8 split')
        else:
            new_8_labels.append('8 split')

    labels[labels=='8-cell blastomere'] = new_8_labels

    # get the labels right for 16 and split
    new_16_labels = []
    for _l in Y_bgplvm.loc['16-cell blastomere'].index.get_level_values(1):
        _l = _l.split('-')[0]
        if not('split' in _l):
            new_16_labels.append('16')
        elif not('pooled' in _l):
            new_16_labels.append('16 split')
        else:
            new_16_labels.append('16 split')

    labels[labels=='16-cell blastomere'] = new_16_labels

    ulabels = []
    for lab in labels:
        if lab not in ulabels:
            ulabels.append(lab)

    short_labels = labels.copy()
    _ulabels_convert = np.array([
            'Z',# Z',
            'E',# Em',
            '2',# Bm E',
            '2',# Bm M',
            '2',# Bm L',
            '4',
            '8',
            '8 s',
            '16',
            '16 s',
            'Bz',# E',
            'Bz',# M',
            'Bz',# L'
            'F',
            'L'
        ])

    short_ulabels = []
    for lab, nlab in zip(ulabels, _ulabels_convert):
        short_labels[short_labels==lab] = nlab
        if nlab not in short_ulabels:
            short_ulabels.append(nlab)

    from topslam.optimization import run_methods, methods, create_model, optimize_model
    X_init, dims = run_methods(Y_m, methods)

    m = create_model(Y_m, X_init, num_inducing=25)
    m.Ymean = Ymean
    m.Ystd = Ystd
    m.data_labels = short_labels
    m.data_ulabels = short_ulabels
    m.data = Y_bgplvm

    m.X_init = X_init
    m.dims = dims

    if optimize:
        optimize_model(m)
    if plot:
        mc = ManifoldCorrectionTree(m)
        plot_comparison(mc, X_init, dims, m.data_labels, m.data_ulabels, 0)

    return m
Beispiel #4
0
def example_deng(optimize=True, plot=True):
    import pandas as pd, os
    import GPy, numpy as np
    from topslam.filtering import filter_RNASeq
    # Reproduceability, BGPLVM has local optima
    np.random.seed(42)

    # This is the process of how we loaded the data:
    ulabels = [
        'Zygote',
        '2-cell embryo',
        'Early 2-cell blastomere',
        'Mid 2-cell blastomere',
        'Late 2-cell blastomere',
        '4-cell blastomere',
        '8-cell blastomere',
        '16-cell blastomere',
        'Early blastocyst cell',
        'Mid blastocyst cell',
        'Late blastocyst cell',
        'fibroblast',
        'adult liver',
    ]

    folder_path = os.path.expanduser('~/tmp/Deng')
    csv_file = os.path.join(folder_path, 'filtered_expression_values.csv')

    if os.path.exists(csv_file):
        print('Loading previous filtered data: {}'.format(csv_file))
        Y_bgplvm = pd.read_csv(csv_file, index_col=[0, 1, 2], header=0)
    else:
        print('Loading data:')
        data = GPy.util.datasets.singlecell_rna_seq_deng()
        if not os.path.exists(folder_path):
            os.mkdir(folder_path)
        Ydata = data['Y'].copy()
        Ydata.columns = Ydata.columns.to_series().apply(str.upper)
        Ydata = Ydata.reset_index().set_index('index', append=True)
        Ydata['labels'] = data['labels'].values
        Ydata = Ydata.set_index('labels', append=True)
        Ydata = Ydata.reorder_levels([0, 2, 1])
        Ydata = Ydata.reset_index([0, 2]).loc[ulabels].set_index(
            ['level_0', 'index'], append=True)

        Y = Ydata.copy()
        Y.columns = [c.split('.')[0] for c in Y.columns]
        Y_bgplvm = filter_RNASeq(Y)
        print('\nSaving data to tmp file: {}'.format(csv_file))
        Y_bgplvm.to_csv(csv_file)

    labels = Y_bgplvm.index.get_level_values(0).values
    Ymean = Y_bgplvm.values.mean()
    Ystd = Y_bgplvm.values.std()

    Y_m = Y_bgplvm.values
    Y_m -= Ymean
    Y_m /= Ystd

    # get the labels right for split experiments
    # get the labels right for 8 and split
    new_8_labels = []
    for _l in Y_bgplvm.loc['8-cell blastomere'].index.get_level_values(1):
        _l = _l.split('-')[0]
        if not ('split' in _l):
            new_8_labels.append('8')
        elif not ('pooled' in _l):
            new_8_labels.append('8 split')
        else:
            new_8_labels.append('8 split')

    labels[labels == '8-cell blastomere'] = new_8_labels

    # get the labels right for 16 and split
    new_16_labels = []
    for _l in Y_bgplvm.loc['16-cell blastomere'].index.get_level_values(1):
        _l = _l.split('-')[0]
        if not ('split' in _l):
            new_16_labels.append('16')
        elif not ('pooled' in _l):
            new_16_labels.append('16 split')
        else:
            new_16_labels.append('16 split')

    labels[labels == '16-cell blastomere'] = new_16_labels

    ulabels = []
    for lab in labels:
        if lab not in ulabels:
            ulabels.append(lab)

    short_labels = labels.copy()
    _ulabels_convert = np.array([
        'Z',  # Z',
        'E',  # Em',
        '2',  # Bm E',
        '2',  # Bm M',
        '2',  # Bm L',
        '4',
        '8',
        '8 s',
        '16',
        '16 s',
        'Bz',  # E',
        'Bz',  # M',
        'Bz',  # L'
        'F',
        'L'
    ])

    short_ulabels = []
    for lab, nlab in zip(ulabels, _ulabels_convert):
        short_labels[short_labels == lab] = nlab
        if nlab not in short_ulabels:
            short_ulabels.append(nlab)

    from topslam.optimization import run_methods, methods, create_model, optimize_model
    X_init, dims = run_methods(Y_m, methods)

    m = create_model(Y_m, X_init, num_inducing=25)
    m.Ymean = Ymean
    m.Ystd = Ystd
    m.data_labels = short_labels
    m.data_ulabels = short_ulabels
    m.data = Y_bgplvm

    m.X_init = X_init
    m.dims = dims

    if optimize:
        optimize_model(m)
    if plot:
        mc = ManifoldCorrectionTree(m)
        plot_comparison(mc, X_init, dims, m.data_labels, m.data_ulabels, 0)

    return m