def largevis(edgelist_filename, outdim=2, alpha=-1):
    """ Use LargeVis for embedding

        This function is Clustit's interface to the LargeVis model
        for embedding large-scale and high-dimensional data.

        :param edgelist_filename: The filename of the edgelist with the similarities or distances
        :type edgelist_filename: string

        :param outdim: The number of output dimensions, default is 2.
        :type outdim: int

        :returns: The resulting embedding of the input data
        :rtype: pandas.DataFrame
    """
    LargeVis.loadgraph(edgelist_filename)
    _run_largevis(outdim, alpha)

    #get output data from LargeVis
    temp_file = "/tmp/largevis_tempfile.txt"
    try:
        LargeVis.save(temp_file)
        data_frame = pandas.read_csv(temp_file, sep=" ", index_col=0)
    finally:
        delete_temp_file(temp_file)

    return data_frame
Beispiel #2
0
def largevis(edgelist_filename, outdim=2, alpha=-1.0):
    """ Use LargeVis for embedding

        This function is Clustit's interface to the LargeVis model
        for embedding large-scale and high-dimensional data.

        :param edgelist_filename: The filename of the edgelist with the similarities or distances
        :type edgelist_filename: string

        :param outdim: The number of output dimensions, default is 2.
        :type outdim: int

        :returns: The resulting embedding of the input data
        :rtype: pandas.DataFrame
    """
    LargeVis.loadgraph(edgelist_filename)
    _run_largevis(outdim, alpha)

    #get output data from LargeVis
    temp_file = "/tmp/largevis_tempfile.txt"
    try:
        LargeVis.save(temp_file)
        names = get_column_names(outdim)
        data_frame = pandas.read_csv(temp_file, sep=" ", names=names, header=0)
    finally:
        #delete_temp_file(temp_file)
        pass

    return data_frame
Beispiel #3
0
def largevisproc(i_file, o_file, sim):
    import LargeVis

    outdim = 2
    threads = 24
    samples = -1
    prop = -1
    alpha = -1
    trees = -1
    neg = -1
    neigh = -1
    gamma = -1
    perp = -1

    if sim: LargeVis.loadgraph(i_file)
    else: LargeVis.loadfile(i_file)

    Y = LargeVis.run(outdim, threads, samples, prop, alpha, trees, neg, neigh,
                     gamma, perp)

    LargeVis.save(o_file)
Beispiel #4
0
import LargeVis
import argparse

parser = argparse.ArgumentParser()
parser.add_argument('-fea', default = 1, type = int, help = 'whether to visualize high-dimensional feature vectors or networks')
parser.add_argument('-input', default = '', help = 'input file')
parser.add_argument('-output', default = '', help = 'output file')
parser.add_argument('-outdim', default = -1, type = int, help = 'output dimensionality')
parser.add_argument('-threads', default = -1, type = int, help = 'number of training threads')
parser.add_argument('-samples', default = -1, type = int, help = 'number of training mini-batches')
parser.add_argument('-prop', default = -1, type = int, help = 'number of propagations')
parser.add_argument('-alpha', default = -1, type = float, help = 'learning rate')
parser.add_argument('-trees', default = -1, type = int, help = 'number of rp-trees')
parser.add_argument('-neg', default = -1, type = int, help = 'number of negative samples')
parser.add_argument('-neigh', default = -1, type = int, help = 'number of neighbors in the NN-graph')
parser.add_argument('-gamma', default = -1, type = float, help = 'weight assigned to negative edges')
parser.add_argument('-perp', default = -1, type = float, help = 'perplexity for the NN-grapn')

args = parser.parse_args()

if args.fea == 1:
    LargeVis.loadfile(args.input)
else:
    LargeVis.loadgraph(args.input)

Y = LargeVis.run(args.outdim, args.threads, args.samples, args.prop, args.alpha, args.trees, args.neg, args.neigh, args.gamma, args.perp)

LargeVis.save(args.output)
Beispiel #5
0
#!/usr/bin/env python

import LargeVis
import argparse

parser = argparse.ArgumentParser()
parser.add_argument('--fea', default=1, type=int, help='whether to visualize high-dimensional feature vectors or networks')
parser.add_argument('--input', default='', help='input file', required=True)
parser.add_argument('--output', default='', help='output file', required=True)
parser.add_argument('--outdim', default=-1, type=int, help='output dimensionality')
parser.add_argument('--threads', default=-1, type=int, help='number of training threads')
parser.add_argument('--samples', default=-1, type=int, help='number of training mini-batches')
parser.add_argument('--prop', default=-1, type=int, help='number of propagations')
parser.add_argument('--alpha', default=-1, type=float, help='learning rate')
parser.add_argument('--trees', default=-1, type=int, help='number of rp-trees')
parser.add_argument('--neg', default=-1, type=int, help='number of negative samples')
parser.add_argument('--neigh', default=-1, type=int, help='number of neighbors in the NN-graph')
parser.add_argument('--gamma', default=-1, type=float, help='weight assigned to negative edges')
parser.add_argument('--perp', default=-1, type=float, help='perplexity for the NN-grapn')

args = parser.parse_args()

if args.fea == 1:
    LargeVis.loadfile(args.input)
else:
    LargeVis.loadgraph(args.input)

Y = LargeVis.run(args.outdim, args.threads, args.samples, args.prop, args.alpha, args.trees, args.neg, args.neigh, args.gamma, args.perp)

LargeVis.save(args.output)
Beispiel #6
0
        out.write("{}\t{}\n".format(*features.shape))
        for row in tq(features):
            out.write('\t'.join(row.astype(str))+'\n')
    del features

    # now run Large Vis! (in 2D mode)

    LargeVis.loadfile(args.temp+"lv_format.txt")

    # samples only matters for graph layout
    samples = -1
    gamma = -1
    Y = LargeVis.run(2, args.threads, samples, args.prop, args.alpha, args.trees, args.neg, args.neigh, gamma, args.perp)
    if args.sampling == 'by_year':
        filename = '.{}.year_lv_coords'.format(args.sample_size)
    else:
        filename = '.{}.lv_coords'.format(args.sample_size)
    LargeVis.save(args.input+filename)

    donestring = """
    -----PROCESSING COMPLETE-----
    2D Embedding saved as: {}
    """.format(args.input+filename)
    if args.sampling is not None:
        donestring += "\nRandom indices saves as as: {}".format(args.input+idx_filename)
    print(donestring)