Ejemplo n.º 1
0
def largevis(edgelist_filename, outdim=2, alpha=-1):
    """ Use LargeVis for embedding

        This function is Clustit's interface to the LargeVis model
        for embedding large-scale and high-dimensional data.

        :param edgelist_filename: The filename of the edgelist with the similarities or distances
        :type edgelist_filename: string

        :param outdim: The number of output dimensions, default is 2.
        :type outdim: int

        :returns: The resulting embedding of the input data
        :rtype: pandas.DataFrame
    """
    LargeVis.loadgraph(edgelist_filename)
    _run_largevis(outdim, alpha)

    #get output data from LargeVis
    temp_file = "/tmp/largevis_tempfile.txt"
    try:
        LargeVis.save(temp_file)
        data_frame = pandas.read_csv(temp_file, sep=" ", index_col=0)
    finally:
        delete_temp_file(temp_file)

    return data_frame
Ejemplo n.º 2
0
def experiment_five(data_path,
                    output_path,
                    dataset_name='MNIST',
                    n_neighbors=80):
    X_lows, total_times = [], []
    for i in range(5):
        input_path = data_path_finder(data_path, dataset_name)
        LargeVis.loadfile(input_path)
        start_time = time()
        X_low = LargeVis.run(2, 16, -1, -1, -1, -1, -1, 3 * n_neighbors, -1,
                             n_neighbors)  # -1 means default value
        total_time = time() - start_time
        method = 'LargeVis'
        X_lows.append(X_low)
        total_times.append(total_time)
    X_lows = np.array(X_lows)
    total_times = np.array(total_times)
    avg_time = np.mean(total_times)
    np.save(output_path + '/{dataset_name}_{method}_{n_neighbors}'.format(dataset_name\
            =dataset_name,method=method,n_neighbors=n_neighbors), X_lows)
    print(
        'Total time for method {method} on {dataset_name} with {n_neighbors} is {avg_time}'
        .format(method=method,
                dataset_name=dataset_name,
                avg_time=avg_time,
                n_neighbors=n_neighbors))
    print('Detailed time is {total_times}'.format(total_times=total_times))
Ejemplo n.º 3
0
def _run_largevis(outdim=-1, threads=-1, samples=-1, prop=-1, alpha=-1.0, trees=-1, neg=-1, neigh=-1, gamma=-1.0, perp=-1.0):
    """ Provide a nicer way to call LargeVis.run()

        This function provides defaults through optional parameters rather than command-line arguments.
        Like LargeVis.run() this function assumes LargeVis.loadfile() or LargeVis.loadgraph() have been called already.

        :param outdim: output dimensionality
        :type outdim: int
        :param threads: number of training threads
        :type threads: int
        :param samples: number of training mini-batches
        :type samples: int
        :param prop: number of propagations
        :type prop: int
        :param alpha: learning rate
        :type alpha: float
        :param trees: number of rp-trees
        :type trees: int
        :param neg: number of negative samples
        :type neg: int
        :param neigh: number of neighbors in the NN-graph
        :type neigh: int
        :param gamma: weight assigned to negative edges
        :type gamma: float
        :param perp: perplexity for the NN-grapn
        :type perp: float

    """
    LargeVis.run(outdim, threads, samples, prop, alpha, trees, neg, neigh, gamma, perp)
Ejemplo n.º 4
0
def largevis(edgelist_filename, outdim=2, alpha=-1.0):
    """ Use LargeVis for embedding

        This function is Clustit's interface to the LargeVis model
        for embedding large-scale and high-dimensional data.

        :param edgelist_filename: The filename of the edgelist with the similarities or distances
        :type edgelist_filename: string

        :param outdim: The number of output dimensions, default is 2.
        :type outdim: int

        :returns: The resulting embedding of the input data
        :rtype: pandas.DataFrame
    """
    LargeVis.loadgraph(edgelist_filename)
    _run_largevis(outdim, alpha)

    #get output data from LargeVis
    temp_file = "/tmp/largevis_tempfile.txt"
    try:
        LargeVis.save(temp_file)
        names = get_column_names(outdim)
        data_frame = pandas.read_csv(temp_file, sep=" ", names=names, header=0)
    finally:
        #delete_temp_file(temp_file)
        pass

    return data_frame
Ejemplo n.º 5
0
def main():
    dataset_name = 'mammoth'
    method = 'LargeVis'
    n_neighbors = 125
    input_path = data_path_finder(dataset_name)
    LargeVis.loadfile(input_path)
    start_time = time()
    X_low = LargeVis.run(2, 16, -1, -1, -1, -1, -1, 3 * n_neighbors, -1,
                         n_neighbors)  # -1 means default value
    total_time = time() - start_time
    method = 'LargeVis'
    np.save('/home/home1/hh219/PaCMAP/output_{dataset_name}_{method}_{n_neighbors}'.format(dataset_name\
        =dataset_name,method=method,n_neighbors=n_neighbors), X_low)
    print('Total time for method {method} on {dataset_name} is {total_time}'.
          format(method=method,
                 dataset_name=dataset_name,
                 total_time=total_time))
Ejemplo n.º 6
0
def _run_largevis(outdim=-1,
                  threads=-1,
                  samples=-1,
                  prop=-1,
                  alpha=-1.0,
                  trees=-1,
                  neg=-1,
                  neigh=-1,
                  gamma=-1.0,
                  perp=-1.0):
    """ Provide a nicer way to call LargeVis.run()

        This function provides defaults through optional parameters rather than command-line arguments.
        Like LargeVis.run() this function assumes LargeVis.loadfile() or LargeVis.loadgraph() have been called already.

        :param outdim: output dimensionality
        :type outdim: int
        :param threads: number of training threads
        :type threads: int
        :param samples: number of training mini-batches
        :type samples: int
        :param prop: number of propagations
        :type prop: int
        :param alpha: learning rate
        :type alpha: float
        :param trees: number of rp-trees
        :type trees: int
        :param neg: number of negative samples
        :type neg: int
        :param neigh: number of neighbors in the NN-graph
        :type neigh: int
        :param gamma: weight assigned to negative edges
        :type gamma: float
        :param perp: perplexity for the NN-grapn
        :type perp: float

    """
    LargeVis.run(np.int32(outdim), np.int32(threads), np.int32(samples),
                 np.int32(prop), np.float32(alpha), np.int32(trees),
                 np.int32(neg), np.int32(neigh), np.float32(gamma),
                 np.float32(perp))
Ejemplo n.º 7
0
def largevisproc(i_file, o_file, sim):
    import LargeVis

    outdim = 2
    threads = 24
    samples = -1
    prop = -1
    alpha = -1
    trees = -1
    neg = -1
    neigh = -1
    gamma = -1
    perp = -1

    if sim: LargeVis.loadgraph(i_file)
    else: LargeVis.loadfile(i_file)

    Y = LargeVis.run(outdim, threads, samples, prop, alpha, trees, neg, neigh,
                     gamma, perp)

    LargeVis.save(o_file)
Ejemplo n.º 8
0
import LargeVis
import argparse

parser = argparse.ArgumentParser()
parser.add_argument('-fea', default = 1, type = int, help = 'whether to visualize high-dimensional feature vectors or networks')
parser.add_argument('-input', default = '', help = 'input file')
parser.add_argument('-output', default = '', help = 'output file')
parser.add_argument('-outdim', default = -1, type = int, help = 'output dimensionality')
parser.add_argument('-threads', default = -1, type = int, help = 'number of training threads')
parser.add_argument('-samples', default = -1, type = int, help = 'number of training mini-batches')
parser.add_argument('-prop', default = -1, type = int, help = 'number of propagations')
parser.add_argument('-alpha', default = -1, type = float, help = 'learning rate')
parser.add_argument('-trees', default = -1, type = int, help = 'number of rp-trees')
parser.add_argument('-neg', default = -1, type = int, help = 'number of negative samples')
parser.add_argument('-neigh', default = -1, type = int, help = 'number of neighbors in the NN-graph')
parser.add_argument('-gamma', default = -1, type = float, help = 'weight assigned to negative edges')
parser.add_argument('-perp', default = -1, type = float, help = 'perplexity for the NN-grapn')

args = parser.parse_args()

if args.fea == 1:
    LargeVis.loadfile(args.input)
else:
    LargeVis.loadgraph(args.input)

Y = LargeVis.run(args.outdim, args.threads, args.samples, args.prop, args.alpha, args.trees, args.neg, args.neigh, args.gamma, args.perp)

LargeVis.save(args.output)
Ejemplo n.º 9
0
#!/usr/bin/env python

import LargeVis
import argparse

parser = argparse.ArgumentParser()
parser.add_argument('--fea', default=1, type=int, help='whether to visualize high-dimensional feature vectors or networks')
parser.add_argument('--input', default='', help='input file', required=True)
parser.add_argument('--output', default='', help='output file', required=True)
parser.add_argument('--outdim', default=-1, type=int, help='output dimensionality')
parser.add_argument('--threads', default=-1, type=int, help='number of training threads')
parser.add_argument('--samples', default=-1, type=int, help='number of training mini-batches')
parser.add_argument('--prop', default=-1, type=int, help='number of propagations')
parser.add_argument('--alpha', default=-1, type=float, help='learning rate')
parser.add_argument('--trees', default=-1, type=int, help='number of rp-trees')
parser.add_argument('--neg', default=-1, type=int, help='number of negative samples')
parser.add_argument('--neigh', default=-1, type=int, help='number of neighbors in the NN-graph')
parser.add_argument('--gamma', default=-1, type=float, help='weight assigned to negative edges')
parser.add_argument('--perp', default=-1, type=float, help='perplexity for the NN-grapn')

args = parser.parse_args()

if args.fea == 1:
    LargeVis.loadfile(args.input)
else:
    LargeVis.loadgraph(args.input)

Y = LargeVis.run(args.outdim, args.threads, args.samples, args.prop, args.alpha, args.trees, args.neg, args.neigh, args.gamma, args.perp)

LargeVis.save(args.output)
Ejemplo n.º 10
0
        random_indices = np.concatenate(random_indices)
        idx_filename = '.indices_year_{}.npy'.format(args.sample_size)
        np.save(args.input+idx_filename,random_indices)
        features = features[random_indices]

    # now we write the data to file in the required LargeVis format (which requires a header 
    # with the number of items and the dimensionality of the feature vectors)
    with open(args.temp+'lv_format.txt','w') as out:
        out.write("{}\t{}\n".format(*features.shape))
        for row in tq(features):
            out.write('\t'.join(row.astype(str))+'\n')
    del features

    # now run Large Vis! (in 2D mode)

    LargeVis.loadfile(args.temp+"lv_format.txt")

    # samples only matters for graph layout
    samples = -1
    gamma = -1
    Y = LargeVis.run(2, args.threads, samples, args.prop, args.alpha, args.trees, args.neg, args.neigh, gamma, args.perp)
    if args.sampling == 'by_year':
        filename = '.{}.year_lv_coords'.format(args.sample_size)
    else:
        filename = '.{}.lv_coords'.format(args.sample_size)
    LargeVis.save(args.input+filename)

    donestring = """
    -----PROCESSING COMPLETE-----
    2D Embedding saved as: {}
    """.format(args.input+filename)