def run(args): edges, adj_lists, adj_array, features_array, labels = load_data( args.data_dir, args.dataset, normalized=args.normalized) features = nn.Embedding(features_array.shape[0], features_array.shape[1]) features.weight = nn.Parameter(torch.FloatTensor(features_array), requires_grad=False) if args.cuda: features = features.cuda() model = infomaxANE(adj_lists, adj_array, features, args) print_params(model) optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr) if args.cuda: model.cuda() batch_nums = len(edges) // args.batch_size + 1 total_time = 0 for epoch in range(args.epoch): start_time = time.time() random.shuffle(edges) epoch_loss = 0 for batch in range(batch_nums): batch_edges = edges[batch * args.batch_size:(batch + 1) * args.batch_size] optimizer.zero_grad() loss = model(batch_edges) loss.backward() optimizer.step() epoch_loss += loss.item() epoch_time = time.time() - start_time total_time += epoch_time print('epoch:%d, loss:%.6f, time:%.2f' % (epoch, epoch_loss, epoch_time)) if (epoch + 1) % args.print_epoch == 0: emb = model.get_all_embs() if args.cuda: emb = emb.cpu() evaluate(emb, labels) cluster(emb, labels) if args.save: np.save( args.savedir + '/' + args.dataset + '/infomaxANE_' + args.dataset + '.npy', emb) print('time_per_epoch:%.2f' % (total_time / args.epoch))
def test(points, value): K = len(points) flat_points = [] for sl in points: flat_points.extend(sl) verify = lambda ls1, ls2: (len([x for x in ls1]) - len(list([x for x in ls1 if x not in ls2] + \ [x for x in ls2 if x not in ls1])))/(len([x for x in ls1])) return verify(points, utils.cluster(flat_points, value=value, K=K))
def to_handmodel(hand_crop, direction='up'): handcontour = utils.skindetect(hand_crop) hand = utils.cut(hand_crop, handcontour) handskeleton = utils.skeletonize(hand) fingerlines = utils.linedetect(handskeleton) if direction == 'dn': handmodel = map(lambda l: [l[2], l[3]], fingerlines) else: handmodel = map(lambda l: [l[0], l[1]], fingerlines) if sum([1 for _ in handmodel]) > 4: handmodel = utils.cluster(handmodel, \ value=lambda p: (p[0])**2 + (p[1])**2, \ K=4) combine = lambda p1, p2: [(p1[0] + p2[0]) / 2, (p1[1] + p2[1]) / 2] handmodel = map(lambda l: reduce(combine, l), handmodel) return handmodel
def to_handmodel(hand_crop, direction='up'): handcontour = utils.skindetect(hand_crop) hand = utils.cut(hand_crop, handcontour) handskeleton = utils.skeletonize(hand) fingerlines = utils.linedetect(handskeleton) if direction == 'dn': handmodel = map(lambda l: [l[2], l[3]], fingerlines) else: handmodel = map(lambda l: [l[0], l[1]], fingerlines) if sum([1 for _ in handmodel]) > 4: handmodel = utils.cluster(handmodel, \ value=lambda p: (p[0])**2 + (p[1])**2, \ K=4) combine=lambda p1, p2: [(p1[0]+p2[0])/2, (p1[1]+p2[1])/2] handmodel = map(lambda l: reduce(combine, l), handmodel) return handmodel
with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for i in range(epochs): _, _ = sess.run([train, loss], feed_dict={ timeseries_x: x, timeseries_y: y }) if i % 500 == 0: validation_loss, validation_dim_reduction = sess.run( [loss, encoded], feed_dict={ timeseries_x: val_x, timeseries_y: val_y }) print('Validation loss: {}'.format(validation_loss)) score = utils.cluster_compare(validate_labels, validation_dim_reduction) print('Adjusted Rand Index: {}'.format(score)) encoded_timeseries = sess.run(encoded, feed_dict={ timeseries_x: timeseries, timeseries_y: timeseries }) pred_timeseries_y = utils.cluster(encoded_timeseries) print(pred_timeseries_y.labels_) utils.save(pred_timeseries_y.labels_)
def get_guitar(frame): guitar["available"] = False # Get all lines in current frame that are long enough to be strings lines = utils.linedetect(frame, minLineLength=180) # Return if no lines if len(lines) < 1: return guitar # Convert to line model lines = list(map(lambda l: utils.tolinemodel(l), lines)) # Get average angle of all lines string_angle = reduce(lambda avg, lm: avg + lm["angle"], [0] + lines) / len(lines) # Filter down to all lines within delta angle of the average delta_angle = 2 #degs lines_near_avg = list( filter(lambda lm: abs(lm["angle"] - string_angle) < delta_angle, lines)) # If the lines that fit the average are not the majority, exit if len(lines_near_avg) < len(lines) / 2: return guitar # Search for strings and append/update string locations string_lines = utils.cluster(lines_near_avg, \ value=lambda lm: lm["origin"], K=NUM_STRINGS) string_lines = map(lambda sl: reduce(utils.combine_linemodel, sl), string_lines) if string_lines is None: return guitar guitar["available"] = True # Update string locations if len(guitar["locations"]["strings"]) < 4 or \ len(string_lines) == NUM_STRINGS: # Update all strings guitar["locations"]["strings"] = list( map(lambda lm: lm["line"], string_lines)) else: # TODO: Collectively move strings by average displacement guitar["locations"]["strings"] = list( map(lambda lm: lm["line"], string_lines)) # Bounding box is x, y coordinates of both ends for first and last strings bounding_box = [] bounding_box.append(guitar["locations"]["strings"][0][:2]) bounding_box.append(guitar["locations"]["strings"][0][2:]) bounding_box.append(guitar["locations"]["strings"][-1][:2]) bounding_box.append(guitar["locations"]["strings"][-1][2:]) print("Bounding box: {}".format(bounding_box)) # Search for frets and append/update string locations lines = utils.linedetect(frame, minLineLength=20, maxLineGap=20) # Return if no lines if len(lines) < 1: return guitar # Convert to line model lines = map(lambda l: utils.tolinemodel(l), lines) # Frets are 90 degrees to the strings fret_angle = string_angle + 90 # Filter down to all lines within twice the delta angle of the fret angle lines_near_avg = filter( lambda lm: abs(lm["angle"] - fret_angle) < 2 * delta_angle, lines) # Filter down to lines that lie partially inside space made by strings print("Frets Found:\n{}".format(map(lambda lm: lm["line"], lines_near_avg))) # TODO: Figure this out def box_check(lm): return True lines_near_avg = list(filter(box_check, lines_near_avg)) # If there aren't enough lines to support this operation, return if len(lines_near_avg) < 1: #NUM_FRETS/2: return guitar # Search for strings and append/update string locations fret_lines = utils.cluster(lines_near_avg, \ value=lambda lm: lm["origin"], K=NUM_STRINGS) fret_lines = map(lambda sl: reduce(utils.combine_linemodel, sl), fret_lines) if fret_lines is None: return guitar guitar["locations"]["frets"] = map(lambda lm: lm["line"], fret_lines) guitar["available"] = True return guitar
'intermediate', 'estimated_map', dictionaries[0]['filename']), orig_img_w_heatmap_origsize.transpose((1, 2, 0))[:, :, ::-1]) # Tensor -> int est_count_int = int(round(est_count.item())) # The estimated map must be thresholded to obtain estimated points for t, tau in enumerate(args.taus): if tau != -2: mask, _ = utils.threshold(est_map_np_origsize, tau) else: mask, _, mix = utils.threshold(est_map_np_origsize, tau) bmm_tracker.feed(mix) centroids_wrt_orig = utils.cluster(mask, est_count_int, max_mask_pts=args.max_mask_pts) # Save thresholded map to disk os.makedirs(os.path.join(args.out, 'intermediate', 'estimated_map_thresholded', f'tau={round(tau, 4)}'), exist_ok=True) cv2.imwrite(os.path.join(args.out, 'intermediate', 'estimated_map_thresholded', f'tau={round(tau, 4)}', dictionaries[0]['filename']), mask) # Paint red dots if user asked for it
def extract_experiments(data, datapath, resultspath, metric='area', method='raw'): #,control='starvation 16h, no EGF'): '''Extracts the Mass Spec data, when multiple experiments are stored in a single file.''' ############################################ NORMMETHOD = 'l1' # 'l2' destroys signal from low throughput ############################################ # Get data and group by experiment df = pd.read_csv('{}/IPMSData.txt'.format(datapath), delimiter='\t') #print min(df['_e2g_nGPArea_Sum_cgpAdj']), np.median(df['_e2g_nGPArea_Sum_cgpAdj']),np.mean(df['_e2g_nGPArea_Sum_cgpAdj']) ############################################################################################ min_value = min(df[df['iBAQ'] != 0]['iBAQ']) # "min value" print(min_value) # 3.02044e-05 #embed() df['iBAQ'] = df['iBAQ'].fillna(min_value) ######################################################################################## print(len(df['iBAQ']), "original length") #df=df[df['_e2g_nGPArea_Sum_cgpAdj'] >0.03] ######################################################################################## df = df[df['GeneName'] != 'EGFR'] #gb=df.groupby('Experiment') lines = open('{}/ExperimentsKey.csv'.format(datapath)).read().replace( '\r', '\n').split('\n') expmapping = {} useexp = {} experimentset = {} for line in lines[1:]: if line == '': continue line = line.strip().split(',') exp = int(line[0]) expmapping[exp] = float(line[1]) useexp[exp] = int(line[3]) experimentset[exp] = int(line[2]) df['Set'] = [experimentset[x] for x in df['Experiment']] df['Use'] = [useexp[x] for x in df['Experiment']] # Get experiments and all genes in IPMSs experiments = sorted(list(set(df['Experiment']))) experiments = [x for x in experiments if useexp[x]] allgenes = sorted(list(set(df['GeneName']))) print('total genes', len(allgenes)) #Get unique experiments uniqueexperiments = sorted(list(set([expmapping[x] for x in experiments]))) #replicates=len(experiments)/len(uniqueexperiments) # Initialize a matrix for each experiment set to store the IPMS data # for each experiment on every gene mats = [] raw_mats = [] for expset in set(experimentset.values()): data = np.empty((len(uniqueexperiments), len(allgenes))) data.fill(min_value) rawdata = np.empty((len(uniqueexperiments), len(allgenes))) rawdata.fill(min_value) labels = [str(x) for x in uniqueexperiments] #print labels times = uniqueexperiments #times=times*60. toaverage = [] toaverageind = [] tmpdf = df[df['Set'] == expset] tmpdf = tmpdf[tmpdf['Use'] == 1] gb = tmpdf.groupby('Experiment') tmpexperiments = list(set(tmpdf['Experiment'])) # Fill matrix for i in range(0, len(tmpexperiments)): experiment = tmpexperiments[i] dfa = gb.get_group(experiment) genes = list(dfa['GeneName']) #print genes ind = [allgenes.index(x) for x in genes] pair = (labels.index(str(expmapping[experiment])), ind) if metric == 'area': if pair in toaverage: data[pair] = data[pair] + dfa['iBAQ'] print('HERE') exit() else: data[pair] = dfa['iBAQ'] rawdata[pair] = dfa['iBAQ'] toaverage.append(pair) for p in pair[1]: toaverageind.append((pair[0], p)) else: print('Please choose "area"') sys.exit() toaverage = list(set(toaverageind)) # Normalization data[data == 0] = min_value rawdata[rawdata == 0] = min_value if method == 'norm': data = normalize(data, norm=NORMMETHOD, axis=0) # Normalize each gene #data=np.nan_to_num(data/data[labels.index(control),:]) elif 'gradient' in method: if 'norm' in method: data = normalize(data, norm=NORMMETHOD, axis=1) data = normalize(data, norm=NORMMETHOD, axis=0) # Normalize each gene #embed() #data=-np.log10(data) new = np.zeros_like(data) for i in range(1, len(uniqueexperiments)): new[i, :] = (data[i, :] - data[i - 1, :]) / (times[i] - times[i - 1]) #data[i,:]=(data[i,:]-data[0,:])/times[i] data = new data = data[1:, :] else: print('defaulting to no normalization') mats.append(data) raw_mats.append(rawdata) #mbed() raw = np.mean(np.array(raw_mats), axis=0) data = np.mean(np.array(mats), axis=0) if 'gradient' in method: labels = labels[1:] times = times[1:] # Write matrix to file f = open('{}/Matrix_{}_{}.txt'.format(resultspath, metric, method), 'w') w, h = data.shape # @UnusedVariable f.write('Genes\t{}\n'.format('\t'.join(labels))) for i in range(0, h): s = '\t'.join([str(x) for x in list(data[:, i])]) f.write("{}\t{}\n".format(allgenes[i], s)) f.close() # Visualize changes as heatmap heatmap(data, labels, allgenes, 'noclusters_{}_{}'.format(metric, method), resultspath) #embed() # Plot all gene trends together plt.figure() plt.plot(times, np.mean(data, axis=1), '--', lw=3) #plt.xscale('log') for i in range(0, len(allgenes)): plt.plot(times, data[:, i], alpha=0.01) plt.savefig("{}/allchanges_{}_{}.pdf".format(resultspath, metric, method)) plt.xscale('linear') plt.close() # Cluster clusters, dev = cluster(data, labels, allgenes, times, resultspath, metric + '_' + method) return data, allgenes, labels, (clusters, dev), raw
import time import random from eventlet.green import socket from eventlet.green import zmq from eventlet.hubs import use_hub from zmq import devices import memcache import utils use_hub('zeromq') task = sys.argv[1] nodes = utils.cluster(sys.argv[2]) ctx = zmq.Context() messages = [] def stopped(count, td): print count print td print 1/(td/count) sys.exit(0) def enqueuer(n): frontend = ctx.socket(zmq.REQ) for node in nodes: frontend.connect('tcp://%s:7000' % node)
def get_guitar(frame): guitar["available"] = False # Get all lines in current frame that are long enough to be strings lines = utils.linedetect(frame, minLineLength=180) # Return if no lines if len(lines) < 1: return guitar # Convert to line model lines = list(map(lambda l: utils.tolinemodel(l), lines)) # Get average angle of all lines string_angle = reduce(lambda avg, lm: avg + lm["angle"], [0] + lines)/len(lines) # Filter down to all lines within delta angle of the average delta_angle = 2 #degs lines_near_avg = list(filter(lambda lm: abs(lm["angle"] - string_angle) < delta_angle, lines)) # If the lines that fit the average are not the majority, exit if len(lines_near_avg) < len(lines)/2: return guitar # Search for strings and append/update string locations string_lines = utils.cluster(lines_near_avg, \ value=lambda lm: lm["origin"], K=NUM_STRINGS) string_lines = map(lambda sl: reduce(utils.combine_linemodel, sl), string_lines) if string_lines is None: return guitar guitar["available"] = True # Update string locations if len(guitar["locations"]["strings"]) < 4 or \ len(string_lines) == NUM_STRINGS: # Update all strings guitar["locations"]["strings"] = list(map(lambda lm: lm["line"], string_lines)) else: # TODO: Collectively move strings by average displacement guitar["locations"]["strings"] = list(map(lambda lm: lm["line"], string_lines)) # Bounding box is x, y coordinates of both ends for first and last strings bounding_box = [] bounding_box.append(guitar["locations"]["strings"][0][:2]) bounding_box.append(guitar["locations"]["strings"][0][2:]) bounding_box.append(guitar["locations"]["strings"][-1][:2]) bounding_box.append(guitar["locations"]["strings"][-1][2:]) print("Bounding box: {}".format(bounding_box)) # Search for frets and append/update string locations lines = utils.linedetect(frame, minLineLength=20, maxLineGap=20) # Return if no lines if len(lines) < 1: return guitar # Convert to line model lines = map(lambda l: utils.tolinemodel(l), lines) # Frets are 90 degrees to the strings fret_angle = string_angle + 90 # Filter down to all lines within twice the delta angle of the fret angle lines_near_avg = filter(lambda lm: abs(lm["angle"] - fret_angle) < 2*delta_angle, lines) # Filter down to lines that lie partially inside space made by strings print("Frets Found:\n{}".format(map(lambda lm: lm["line"], lines_near_avg))) # TODO: Figure this out def box_check(lm): return True lines_near_avg = list(filter(box_check, lines_near_avg)) # If there aren't enough lines to support this operation, return if len(lines_near_avg) < 1:#NUM_FRETS/2: return guitar # Search for strings and append/update string locations fret_lines = utils.cluster(lines_near_avg, \ value=lambda lm: lm["origin"], K=NUM_STRINGS) fret_lines = map(lambda sl: reduce(utils.combine_linemodel, sl), fret_lines) if fret_lines is None: return guitar guitar["locations"]["frets"] = map(lambda lm: lm["line"], fret_lines) guitar["available"] = True return guitar
for key in d: rval[key]+=d[key] return rval aspects_cluster2names=defaultdict(dfint) aspects_cluster2opinions=defaultdict(dfint) ii=open("result/aspects_cluster.txt",'r') oo=open("result/aspect_opinion_cluster.txt",'w') for line in ii: oo.write(line) parts=line.strip().split(':') cluster_name=parts[0] aspects_names=[asp.split('~')[0] for asp in ':'.join(parts[1:]).split('/')] print cluster_name print '/'.join(aspects_names) opinions=dict_sum([aspect2opinion[asp.split('~')[0]] for asp in aspects_names\ if asp in aspect2opinion]) #aspects_cluster2names[cluster_name]=aspects_names #aspects_cluster2opinions[cluster_name]=opinions print len(opinions) good_opinions={key:opinions[key] for key in opinions if key in senti_dict and senti_dict[key]=='1'} labels=cluster(good_opinions.keys(),5) oo.write("good_opinions:\n") output_cluster(oo, labels, good_opinions.keys()) bad_opinions={key:opinions[key] for key in opinions if key in senti_dict and senti_dict[key]=='-1'} labels=cluster(bad_opinions.keys(),5) oo.write("bad_opinions:\n") output_cluster(oo, labels, bad_opinions.keys())
def work(names, path, count): labels = cluster(names, num=25) names = [name + "~" + str(count[name]) for name in names] oo = open("result/{}_cluster.txt".format(path), 'w') output_cluster(oo, labels, names)
from __future__ import print_function import numpy as np import glob import json import os import utils # Folder Directory mdir = '../data/' folders = os.listdir(mdir) np.random.seed(seed=0) for folder in folders: print(folder + '/' + str(len(folders))) fdir = folder + '/' j_out = '../out/' json_out = j_out + folder + '.json' images = glob.glob(mdir + fdir + '*.jpg') # Algorithm distM = utils.getdistanceM(images, eq=True) dic = utils.isSimilar(distM, thresh=5) dic_out = utils.cluster(dic, images, distM) with open(json_out, 'w') as fp: json.dump(dic_out.values(), fp)