Example #1
0
def experiment(data_file, data_type, dim, distance, npoints, n_neighbors,
               noise_std, target_dim, max_turns):
    xs = None
    if data_file is not None:
        xs = multidimensional.common.load_embeddings(data_file)

    xs, d_goal, color = (datagen.DataBuilder().with_dim(dim).with_distance(
        distance).with_noise(noise_std).with_npoints(npoints).with_neighbors(
            n_neighbors).with_points(xs).with_type(data_type).build())
    m = manifold.Isomap(n_components=target_dim,
                        n_neighbors=n_neighbors,
                        max_iter=max_turns)
    x = isomap(m, xs)

    fig = plt.figure()

    ax = plt.axes(projection='3d')
    ax.scatter(xs[:, 0], xs[:, 1], xs[:, 2], c=color, cmap=plt.cm.Spectral)
    ax.set_title("Original data")
    #plt.show()

    ax = plt.axes(projection='3d')
    ax.scatter(x[:, 0], x[:, 1], c=color, cmap=plt.cm.Spectral)
    plt.title('Projected data')
    plt.show()
Example #2
0
def experiment(data_file, data_type, dim, distance, npoints, n_neighbors,
               noise_std, target_dim, point_filter, radius_update,
               radius_barrier, explore_dim_percent, starting_radius, max_turns,
               _run):
    xs = None
    if data_file is not None:
        xs = multidimensional.common.load_embeddings(data_file)

    xs, d_goal, color = (datagen.DataBuilder().with_dim(dim).with_distance(
        distance).with_noise(noise_std).with_npoints(npoints).with_neighbors(
            n_neighbors).with_points(xs).with_type(data_type).build())

    m = multidimensional.mds.MDS(target_dim,
                                 point_filter,
                                 radius_update,
                                 starting_radius=starting_radius,
                                 radius_barrier=radius_barrier,
                                 max_turns=max_turns,
                                 explore_dim_percent=explore_dim_percent,
                                 keep_history=KEEP_HISTORY,
                                 history_color=color,
                                 history_path=EXPERIMENT_NAME,
                                 dissimilarities='precomputed')
    x = m.fit(d_goal)

    m.plot_history()

    history = m.history_observer.history
    for i, error in enumerate(history['error']):
        _run.log_scalar('mds.mse.error', error, i + 1)
    for i, radius in enumerate(history['radius']):
        _run.log_scalar('mds.step', radius, i + 1)
    start_points = history['xs_files'][0]
    _run.add_artifact(start_points, name='points_start')
    end_points = history['xs_files'][-1]
    _run.add_artifact(end_points, name='points_end')
    if len(history['xs_images']) > 0:
        start_image = history['xs_images'][0]
        _run.add_artifact(start_image, name='points_image_start')
        end_image = history['xs_images'][-1]
        _run.add_artifact(end_image, name='points_image_end')
    if history['animation'] is not None:
        _run.add_artifact(history['animation'], name='animation')
    return m.history_observer.history['error'][-1]
Example #3
0
def experiment(
        data_file, data_type, dim, distance, npoints, n_neighbors,
        noise_std, target_dim, max_turns, _run):
    xs = None
    if data_file is not None:
        xs = multidimensional.common.load_embeddings(data_file)

    xs, d_goal, color = (datagen.DataBuilder()
                         .with_dim(dim)
                         .with_distance(distance)
                         .with_noise(noise_std)
                         .with_npoints(npoints)
                         .with_neighbors(n_neighbors)
                         .with_points(xs)
                         .with_type(data_type)
                         .build())
    m = smacof.MDS(n_components=target_dim,
                   n_init=1,
                   max_iter=max_turns,
                   verbose=2,
                   dissimilarity='precomputed',
                   history_path=EXPERIMENT_NAME)
    x = smacof_fit(m, d_goal)
    m.history_observer.plot(m.n_iter_, target_dim)

    history = m.history_observer.history
    for i, error in enumerate(history['error']):
        _run.log_scalar('mds.mse.error', error, i + 1)
    for i, radius in enumerate(history['radius']):
        _run.log_scalar('mds.step', radius, i + 1)
    start_points = history['xs_files'][0]
    _run.add_artifact(start_points, name='points_start')
    end_points = history['xs_files'][-1]
    _run.add_artifact(end_points, name='points_end')
    if len(history['xs_images']) > 0:
        start_image = history['xs_images'][0]
        _run.add_artifact(start_image, name='points_image_start')
        end_image = history['xs_images'][-1]
        _run.add_artifact(end_image, name='points_image_end')
    if history['animation'] is not None:
        _run.add_artifact(history['animation'], name='animation')
    return m.history_observer.history['error'][-1]
def experiment(data_type, dim, distance, npoints, n_neighbors, noise_std,
               target_dim, point_filter, radius_update, radius_barrier,
               explore_dim_percent, starting_radius, max_turns, _run):

    xs = None
    xs, d_goal, color = (datagen.DataBuilder().with_dim(dim).with_distance(
        distance).with_noise(noise_std).with_npoints(npoints).with_neighbors(
            n_neighbors).with_points(xs).with_type(data_type).build())
    dim_reduction = namedtuple('dim_reduction', 'name method data')
    MDS_proposed = dim_reduction(
        'MDS (proposed)',
        multidimensional.mds.MDS(target_dim,
                                 point_filter,
                                 radius_update,
                                 starting_radius=starting_radius,
                                 radius_barrier=radius_barrier,
                                 max_turns=max_turns,
                                 explore_dim_percent=explore_dim_percent,
                                 keep_history=KEEP_HISTORY,
                                 history_color=color,
                                 history_path=EXPERIMENT_NAME +
                                 '_mds_proposed',
                                 dissimilarities='precomputed'), d_goal)
    LLE = dim_reduction(
        'LLE',
        manifold.LocallyLinearEmbedding(n_neighbors,
                                        target_dim,
                                        eigen_solver='auto',
                                        method='standard'), xs)
    LTSA = dim_reduction(
        'LTSA',
        manifold.LocallyLinearEmbedding(n_neighbors,
                                        target_dim,
                                        eigen_solver='auto',
                                        method='ltsa'), xs)

    PCA = dim_reduction('Truncated SVD',
                        decomposition.TruncatedSVD(n_components=target_dim),
                        xs)

    HessianLLE = dim_reduction(
        'HessianLLE',
        manifold.LocallyLinearEmbedding(n_neighbors,
                                        target_dim,
                                        eigen_solver='auto',
                                        method='hessian'), xs)
    ModifiedLLE = dim_reduction(
        'ModifiedLLE',
        manifold.LocallyLinearEmbedding(n_neighbors,
                                        target_dim,
                                        eigen_solver='auto',
                                        method='modified'), xs)
    Isomap = dim_reduction('Isomap', manifold.Isomap(n_neighbors, target_dim),
                           xs)
    mds = dim_reduction(
        'MDS SMACOF',
        multidimensional.smacof.MDS(n_components=target_dim,
                                    n_init=1,
                                    max_iter=max_turns,
                                    verbose=2,
                                    dissimilarity='precomputed',
                                    history_path=EXPERIMENT_NAME +
                                    '_mds_smacof'), d_goal)
    SpectralEmbedding = dim_reduction(
        'SpectralEmbedding',
        manifold.SpectralEmbedding(n_components=target_dim,
                                   n_neighbors=n_neighbors), xs)
    tSNE = dim_reduction(
        'tSNE',
        manifold.TSNE(n_components=target_dim, init='pca', random_state=0), xs)

    methods = [
        MDS_proposed, mds, PCA, Isomap, LLE, HessianLLE, ModifiedLLE, LTSA
    ]
    #methods = [MDS_proposed, mds]
    fig = plt.figure(figsize=(20, 20))

    #plt.suptitle("Learning %s with %i points, %.3f noise"
    #             % (data_type, npoints, noise_std), fontsize=14)
    ax = fig.add_subplot(331, projection='3d', aspect=1)
    ax.scatter(xs[:, 0], xs[:, 1], xs[:, 2], c=color, cmap=plt.cm.Spectral)
    plt.title("Original Manifold", fontsize=32)
    for i, method in enumerate(methods):
        print("Running {}".format(methods[i].name))
        try:
            t0 = time.time()
            x = methods[i].method.fit_transform(methods[i].data)
            if method.name == 'LTSA':
                print(x)
            t1 = time.time()
            ax = fig.add_subplot("33{}".format(i + 2), aspect=1)
            # Plot the 2 dimensions.
            ax.scatter(x[:, 0], x[:, 1], c=color, cmap=plt.cm.Spectral)
            plt.title(methods[i].name + "(%.2g sec)" % (t1 - t0), fontsize=32)
            #ax.xaxis.set_major_formatter(NullFormatter())
            #ax.yaxis.set_major_formatter(NullFormatter())
            plt.axis('tight')

            #plt.show()
            # With high noise level, some of the models fail.
        except Exception as e:
            print(e)
            ax = fig.add_subplot("33{}".format(i + 2), aspect=1)
            plt.title(methods[i].name + " did not run", fontsize=32)
            # ax.xaxis.set_major_formatter(NullFormatter())
            # ax.yaxis.set_major_formatter(NullFormatter())
            plt.axis('tight')
    plt.tight_layout()
    plt.savefig(RESULT_IMAGE)
    plt.show()

    # m.plot_history()

    history = MDS_proposed.method.history_observer.history
    for i, error in enumerate(history['error']):
        _run.log_scalar('mds.mse.error', error, i + 1)
    for i, radius in enumerate(history['radius']):
        _run.log_scalar('mds.step', radius, i + 1)
    # start_points = history['xs_files'][0]
    # _run.add_artifact(start_points, name='points_start')
    # end_points = history['xs_files'][-1]
    # _run.add_artifact(end_points, name='points_end')
    # if len(history['xs_images']) > 0:
    #     start_image = history['xs_images'][0]
    #     _run.add_artifact(start_image, name='points_image_start')
    #     end_image = history['xs_images'][-1]
    #     _run.add_artifact(end_image, name='points_image_end')
    # if history['animation'] is not None:
    #     _run.add_artifact(history['animation'], name='animation')

    history = mds.method.history_observer.history
    for i, error in enumerate(history['error']):
        _run.log_scalar('smacof.mse.error', error, i + 1)
    for i, radius in enumerate(history['radius']):
        _run.log_scalar('smacof.step', radius, i + 1)

    return MDS_proposed.method.history_observer.history['error'][-1]
Example #5
0
def experiment(ground_truth_men, ground_truth_simlex, data_file,
               data_file_small, data_type, dim, distance, npoints, n_neighbors,
               noise_std, target_dim, point_filter, radius_update,
               radius_barrier, explore_dim_percent, starting_radius, max_turns,
               dataset, _run):
    xs = None
    xs_small = None
    words = None
    if data_file is not None:
        if data_file.endswith('pkl'):
            words, xs = load_pkl(data_file)
        else:
            words, xs = multidimensional.common.load_embeddings(data_file)

    if data_file_small is not None:
        _, xs_small = multidimensional.common.load_embeddings(data_file_small)
    men = load_ground_truth(ground_truth_men)
    simlex = load_ground_truth(ground_truth_simlex)

    print(xs.shape)

    xs, d_goal, color = (datagen.DataBuilder().with_dim(dim).with_distance(
        distance).with_noise(noise_std).with_npoints(npoints).with_neighbors(
            n_neighbors).with_points(xs).with_type(data_type).build())

    xs_small, d_goal_small, color = (
        datagen.DataBuilder().with_dim(dim).with_distance(distance)
        #.with_noise(noise_std)
        .with_npoints(npoints).with_neighbors(n_neighbors).with_points(
            xs_small).with_type(data_type).build())

    dim_reduction = namedtuple('dim_reduction', 'name method data')
    MDS_proposed = dim_reduction(
        'MDS proposed',
        multidimensional.mds.MDS(target_dim,
                                 point_filter,
                                 radius_update,
                                 starting_radius=starting_radius,
                                 radius_barrier=radius_barrier,
                                 max_turns=max_turns,
                                 explore_dim_percent=explore_dim_percent,
                                 keep_history=KEEP_HISTORY,
                                 history_color=color,
                                 history_path=EXPERIMENT_NAME + 'mds_proposed',
                                 dissimilarities='precomputed'), d_goal)

    mds_monotonic = dim_reduction(
        'MDS (monotonic)',
        multidimensional.monotonic.MDS(target_dim,
                                       point_filter,
                                       radius_update,
                                       starting_radius=starting_radius,
                                       radius_barrier=radius_barrier,
                                       max_turns=max_turns,
                                       explore_dim_percent=explore_dim_percent,
                                       keep_history=KEEP_HISTORY,
                                       history_color=color,
                                       history_path=EXPERIMENT_NAME +
                                       '_mds_monotonic',
                                       dissimilarities='precomputed'), d_goal)

    LLE = dim_reduction(
        'LLE',
        manifold.LocallyLinearEmbedding(n_neighbors,
                                        target_dim,
                                        eigen_solver='auto',
                                        method='standard'), xs)
    LTSA = dim_reduction(
        'LTSA',
        manifold.LocallyLinearEmbedding(n_neighbors,
                                        target_dim,
                                        eigen_solver='auto',
                                        method='ltsa'), xs)
    HessianLLE = dim_reduction(
        'HessianLLE',
        manifold.LocallyLinearEmbedding(n_neighbors,
                                        target_dim,
                                        eigen_solver='auto',
                                        method='hessian'), xs)
    ModifiedLLE = dim_reduction(
        'ModifiedLLE',
        manifold.LocallyLinearEmbedding(n_neighbors,
                                        target_dim,
                                        eigen_solver='auto',
                                        method='modified'), xs)
    Isomap = dim_reduction('Isomap', manifold.Isomap(n_neighbors, target_dim),
                           xs)
    mds = dim_reduction(
        'MDS SMACOF',
        multidimensional.smacof.MDS(n_components=target_dim,
                                    n_init=1,
                                    max_iter=max_turns,
                                    verbose=2,
                                    dissimilarity='precomputed',
                                    history_path=EXPERIMENT_NAME +
                                    '_mds_smacof'), d_goal)

    PCA = dim_reduction('Truncated SVD',
                        decomposition.TruncatedSVD(n_components=target_dim),
                        xs)

    original = dim_reduction('Original Embeddings 300d', Identity(), xs)

    original_small = dim_reduction('Original Embeddings 50d', Identity(),
                                   xs_small)

    SpectralEmbedding = dim_reduction(
        'SpectralEmbedding',
        manifold.SpectralEmbedding(n_components=target_dim,
                                   n_neighbors=n_neighbors), xs)
    tSNE = dim_reduction(
        'tSNE',
        manifold.TSNE(n_components=target_dim, init='pca', random_state=0), xs)

    #methods = [original]
    methods = [
        MDS_proposed, mds_monotonic, mds, original, Isomap, PCA, LLE,
        ModifiedLLE, LTSA, HessianLLE
    ]
    res = {}
    for method in methods:
        try:
            x = method.method.fit_transform(method.data)

            res[method.name] = {}

            if dataset == 'men':
                res[method.name]['men'] = semantic_similarity(
                    method.name, men, words, x)
            else:
                res[method.name]['simlex'] = semantic_similarity(
                    method.name, simlex, words, x)

            print("Model: {}\t result:{}".format(method.name, res))
        except Exception as e:
            print(e)

    with open('semantic_similarity_simlex_only_bench.json', 'w') as fd:
        json.dump(res, fd, indent=4, sort_keys=True)
    # m.plot_history()

    history = MDS_proposed.method.history_observer.history
    for i, error in enumerate(history['error']):
        _run.log_scalar('mds.mse.error', error, i + 1)
    for i, radius in enumerate(history['radius']):
        _run.log_scalar('mds.step', radius, i + 1)
    for i, radius in enumerate(history['epoch_time']):
        _run.log_scalar('mds.epoch_time', radius, i + 1)

    history = mds.method.history_observer.history
    for i, error in enumerate(history['error']):
        _run.log_scalar('smacof.mse.error', error, i + 1)
    for i, radius in enumerate(history['radius']):
        _run.log_scalar('smacof.step', radius, i + 1)
    for i, radius in enumerate(history['epoch_time']):
        _run.log_scalar('smacof.epoch_time', radius, i + 1)

    history = mds_monotonic.method.history_observer.history
    for i, error in enumerate(history['error']):
        _run.log_scalar('mds_monotonic.mse.error', error, i + 1)
    for i, radius in enumerate(history['radius']):
        _run.log_scalar('mds_monotonic.step', radius, i + 1)
    for i, radius in enumerate(history['epoch_time']):
        _run.log_scalar('mds_monotonic.epoch_time', radius, i + 1)

    return MDS_proposed.method.history_observer.history['error'][-1]
Example #6
0
def experiment(data_type, dim, distance, npoints, n_neighbors, noise_std,
               target_dim, point_filter, radius_update, radius_barrier,
               explore_dim_percent, starting_radius, max_turns, _run):

    xs = None
    xs, d_goal, color = (datagen.DataBuilder().with_dim(dim).with_distance(
        distance).with_noise(noise_std).with_npoints(npoints).with_neighbors(
            n_neighbors).with_points(xs).with_type(data_type).build())
    dim_reduction = namedtuple('dim_reduction', 'name method data')
    mds = dim_reduction(
        'r0=16',
        multidimensional.mds.MDS(target_dim,
                                 point_filter,
                                 radius_update,
                                 starting_radius=starting_radius,
                                 radius_barrier=radius_barrier,
                                 max_turns=max_turns,
                                 explore_dim_percent=explore_dim_percent,
                                 keep_history=KEEP_HISTORY,
                                 history_color=color,
                                 history_path=EXPERIMENT_NAME +
                                 '_mds_proposed',
                                 dissimilarities='precomputed'), d_goal)

    mds_pess = dim_reduction(
        'r0=1',
        multidimensional.mds.MDS(target_dim,
                                 point_filter,
                                 radius_update,
                                 starting_radius=1,
                                 radius_barrier=radius_barrier,
                                 max_turns=max_turns,
                                 explore_dim_percent=explore_dim_percent,
                                 keep_history=KEEP_HISTORY,
                                 history_color=color,
                                 history_path=EXPERIMENT_NAME +
                                 '_mds_proposed',
                                 dissimilarities='precomputed'), d_goal)

    mds_opt = dim_reduction(
        'r0=65536',
        multidimensional.mds.MDS(target_dim,
                                 point_filter,
                                 radius_update,
                                 starting_radius=65536,
                                 radius_barrier=radius_barrier,
                                 max_turns=max_turns,
                                 explore_dim_percent=explore_dim_percent,
                                 keep_history=KEEP_HISTORY,
                                 history_color=color,
                                 history_path=EXPERIMENT_NAME +
                                 '_mds_proposed',
                                 dissimilarities='precomputed'), d_goal)

    methods = [mds, mds_pess, mds_opt]
    #methods = [MDS_proposed, mds]
    fig = plt.figure(figsize=(20, 10))

    #plt.suptitle("Learning %s with %i points, %.3f noise"
    #             % (data_type, npoints, noise_std), fontsize=14)
    ax = fig.add_subplot(141, projection='3d', aspect=1)
    ax.scatter(xs[:, 0], xs[:, 1], xs[:, 2], c=color, cmap=plt.cm.Spectral)
    plt.title("Original Manifold", fontsize=32)
    for i, method in enumerate(methods):
        print("Running {}".format(methods[i].name))
        try:
            t0 = time.time()
            x = methods[i].method.fit_transform(methods[i].data)
            t1 = time.time()
            ax = fig.add_subplot("14{}".format(i + 2), aspect=1)
            # Plot the 2 dimensions.
            ax.scatter(x[:, 0], x[:, 1], c=color, cmap=plt.cm.Spectral)
            plt.title(methods[i].name)
            #ax.xaxis.set_major_formatter(NullFormatter())
            #ax.yaxis.set_major_formatter(NullFormatter())
            plt.axis('tight')

            #plt.show()
            # With high noise level, some of the models fail.
        except Exception as e:
            print(e)
            ax = fig.add_subplot("33{}".format(i + 2), aspect=1)
            plt.title(methods[i].name + " did not run", fontsize=32)
            # ax.xaxis.set_major_formatter(NullFormatter())
            # ax.yaxis.set_major_formatter(NullFormatter())
            plt.axis('tight')
    plt.tight_layout()
    plt.savefig(RESULT_IMAGE)
    plt.show()

    # m.plot_history()

    history = mds.method.history_observer.history
    for i, error in enumerate(history['error']):
        _run.log_scalar('mds.mse.error', error, i + 1)
    for i, radius in enumerate(history['radius']):
        _run.log_scalar('mds.step', radius, i + 1)
    # start_points = history['xs_files'][0]
    # _run.add_artifact(start_points, name='points_start')
    # end_points = history['xs_files'][-1]
    # _run.add_artifact(end_points, name='points_end')
    # if len(history['xs_images']) > 0:
    #     start_image = history['xs_images'][0]
    #     _run.add_artifact(start_image, name='points_image_start')
    #     end_image = history['xs_images'][-1]
    #     _run.add_artifact(end_image, name='points_image_end')
    # if history['animation'] is not None:
    #     _run.add_artifact(history['animation'], name='animation')

    history = mds_pess.method.history_observer.history
    for i, error in enumerate(history['error']):
        _run.log_scalar('pessimistic.mse.error', error, i + 1)
    for i, radius in enumerate(history['radius']):
        _run.log_scalar('pessimistic.step', radius, i + 1)

    history = mds_opt.method.history_observer.history
    for i, error in enumerate(history['error']):
        _run.log_scalar('optimistic.mse.error', error, i + 1)
    for i, radius in enumerate(history['radius']):
        _run.log_scalar('optimistic.step', radius, i + 1)

    return mds.method.history_observer.history['error'][-1]