def main(args): argp = _argparser().parse_args(args[1:]) ### Build the numpy array to be used by the library if not argp.whitespace_cells: cell_sep = '\t' else: cell_sep = None # Keep the tokens along to align them later toks = [] toks_vals = [] for line_num, line in enumerate((l.rstrip('\n') for l in argp.input), start=1): try: if not argp.no_tokens: tok, vals_str = line.split(cell_sep, 1) else: vals_str = line tok_vals = [float(v) for v in vals_str.split(cell_sep)] except ValueError: print >> stderr, ('ERROR: Failed to read input line %s "%s"' ) % (line_num, line, ) print >> stderr, ('ERROR: Is the input perhaps separated by ' 'spaces instead of tabs? If so, try the -w flag') return -1 if not argp.no_tokens: toks.append(tok) else: toks.append(line_num) toks_vals.append(tok_vals) toks_vals_array = array(toks_vals) del toks_vals ### Perform t-SNE (this is heavy stuff) # TODO: A majority of the below arguments could be program arguments tsne_output = tsne(toks_vals_array, no_dims=2, perplexity=30, initial_dims=30) del toks_vals_array ### Re-format and produce TSV output # Normalise the dimensions onto the range [0, 1] max_x = max(x for x, _ in tsne_output) min_x = min(x for x, _ in tsne_output) max_y = max(y for _, y in tsne_output) min_y = min(y for _, y in tsne_output) x_interval = sqrt((max_x - min_x) ** 2) y_interval = sqrt((max_y - min_y) ** 2) normalised_tsne_output = (((x - min_x) / x_interval, (y - min_y) / y_interval) for x, y in tsne_output) ### And write it all out for token, point in izip(toks, normalised_tsne_output): argp.output.write('{0}\t{1}\t{2}\n'.format(token, point[0], point[1])) return 0
def main(argv=None): if argv is None: argv = sys.argv[1:] args = parser.parse_args(argv) log.info('start parameters: ' + str(args)) log.info('loading data') titles, x = getData(args.file) file_size1 = len(titles) if args.file2 is not None: titles2, x2 = getData(args.file2) titles.extend(titles2) x = np.vstack((x, x2)) # x = scaleData(x) log.info('performing t-SNE') out = tsne(x, no_dims=2, perplexity=30, initial_dims=100, use_pca=False) points = [('green', [ (title, point[0], point[1]) for title, point in zip(titles[:file_size1], out[:file_size1, :]) ])] if args.file2 is not None: points.append(('gray', [ (title, point[0], point[1]) for title, point in zip(titles[file_size1:], out[file_size1:, :]) ])) # pca = PCA(n_components=2) # out = pca.fit_transform(x) # mds = MDS() # out = mds.fit_transform(x) log.info('rendering result') render_points(points, 20, 20) if args.out: plt.savefig(args.out, dpi=600) else: plt.show() log.info('finished')
def main(argv=None): if argv is None: argv = sys.argv[1:] args = parser.parse_args(argv) log.info('start parameters: ' + str(args)) log.info('loading data') titles, x = getData(args.file) file_size1 = len(titles) if args.file2 is not None: titles2, x2 = getData(args.file2) titles.extend(titles2) x = np.vstack((x, x2)) # x = scaleData(x) log.info('performing t-SNE') out = tsne(x, no_dims=2, perplexity=30, initial_dims=100, use_pca=False) points = [('green', [(title, point[0], point[1]) for title, point in zip(titles[:file_size1], out[:file_size1, :])])] if args.file2 is not None: points.append(('gray', [(title, point[0], point[1]) for title, point in zip(titles[file_size1:], out[file_size1:, :])])) # pca = PCA(n_components=2) # out = pca.fit_transform(x) # mds = MDS() # out = mds.fit_transform(x) log.info('rendering result') render_points(points, 20, 20) if args.out: plt.savefig(args.out, dpi=600) else: plt.show() log.info('finished')
#!/usr/bin/env python import string, numpy, gzip o = gzip.open("testdata/english-embeddings.turian.txt.gz", "rb") titles, x = [], [] for l in o: toks = string.split(l) titles.append(toks[0]) x.append([float(f) for f in toks[1:]]) x = numpy.array(x) #from tsne import tsne from calc_tsne import tsne #out = tsne(x, no_dims=2, perplexity=30, initial_dims=30, USE_PCA=False) #out = tsne(x, no_dims=2, perplexity=30, initial_dims=30, use_pca=False) out = tsne(x, no_dims=2, perplexity=30, initial_dims=30) import render render.render([(title, point[0], point[1]) for title, point in zip(titles, out)], "test-output.rendered.png", width=3000, height=1800)