def dbscan(self,D,eps,MinPts): self.dataSet = D C = -1 Noise = cluster('Noise') fig = plt.figure() ax = plt.axes(projection='3d') ax.set_title('dbscaned data') for point in D: if point not in self.visited: self.visited.append(point) NeighbourPoints = self.regionQuery(point,eps) if len(NeighbourPoints) < MinPts: Noise.addPoint(point) else: name = 'Cluster'+str(self.count) C = cluster(name) self.count+=1 self.expandCluster(point,NeighbourPoints,C,eps,MinPts) ax.plot(C.getX(),C.getY(),C.getZ(),'.',label=name) if len(Noise.getPoints())!=0: ax.plot(Noise.getX(),Noise.getY(),Noise.getZ(),'.',label='Noise') plt.legend(loc='lower left') #plt.show() plt.savefig('dbscaned.png')
def dbscan(self,D,eps,MinPts): self.dataSet = D title(r'DBSCAN Algorithm', fontsize=18) xlabel(r'Dim 1',fontsize=17) ylabel(r'Dim 2', fontsize=17) C = -1 Noise = cluster('Noise') for point in D: if point not in self.visited: self.visited.append(point) NeighbourPoints = self.regionQuery(point,eps) if len(NeighbourPoints) < MinPts: Noise.addPoint(point) else: name = 'Cluster'+str(self.count); C = cluster(name) self.count+=1; self.expandCluster(point,NeighbourPoints,C,eps,MinPts) plot(C.getX(),C.getY(),'o',label=name) hold(True) if len(Noise.getPoints())!=0: plot(Noise.getX(),Noise.getY(),'x',label='Noise') hold(False) legend(loc='lower left') grid(True) show()
def execute2(parametersobject): parametersobject.read_derived() if not parametersobject.parameterdic['Replot_only']: analyse_trjs(parametersobject) if parametersobject.parameterdic['Plot_energy']: get_energies(parametersobject) contactmap_getdata(parametersobject) cluster(parametersobject) graph_angles(parametersobject) contactmap_draw(parametersobject) cluster_dotplot(parametersobject)
def main(): screen_size = [1120, 630] screen = display.set_mode(screen_size) display.set_caption("Antivirus") main_scr = image.load('data/main.png').convert() main_bar = image.load('data/bar/bar.png').convert() main_btn = [image.load('data/buttons/main.png').convert(), image.load('data/buttons/main1.png').convert()] share_btn = [image.load('data/buttons/share.png').convert(), image.load('data/buttons/share1.png').convert()] bar_btn = [image.load('data/bar/btn.png').convert(), image.load('data/bar/btn1.png').convert(), image.load('data/bar/btno.png').convert()] bar_btn1 = [image.load('data/bar/btn10.png').convert(), image.load('data/bar/btn11.png').convert(), image.load('data/bar/btn1o.png').convert()] inp_box = [image.load('data/input_box/input_box.png').convert(), image.load('data/input_box/input_box1.png').convert(), image.load('data/input_box/input_box_o.png').convert(), image.load('data/input_box/browse.png').convert(), image.load('data/input_box/browse1.png').convert()] sett = [image.load('data/bar/sett.png').convert(), image.load('data/bar/sett1.png').convert(), image.load('data/bar/setto.png').convert()] rp = [image.load('data/buttons/radio_buttons/panelc.png').convert(), image.load('data/buttons/radio_buttons/panelc1.png').convert(), image.load('data/buttons/radio_buttons/panelo.png').convert()] rb = [image.load('data/buttons/radio_buttons/btn.png').convert(), image.load('data/buttons/radio_buttons/btn2.png').convert(), image.load('data/buttons/radio_buttons/btn1.png').convert(), image.load('data/buttons/radio_buttons/btn3.png').convert()] x = radio_panel([radio_button([383, 310, 300, 30], "Full Scan", screen, rb), radio_button([383, 340, 300, 30], "Only Executable Scan", screen, rb)], [383, 300, 300, 30], screen, rp) c1 = cluster([button([443, 240, 193, 50], func.test, screen, main_btn), x], [input_box([250, 200, 580, 30], screen, inp_box)], [5, 5, 100, 40], screen, bar_btn) c2 = cluster([button([443, 240, 193, 50], func.ps, screen, share_btn)], [input_box([250, 200, 580, 30], screen, inp_box)], [110, 5, 100, 40], screen, bar_btn1) c3 = cluster([], [], [1075, 5, 40, 40], screen, sett) c = cluster_panel([c1, c2, c3]) screen_opened = True fps = time.Clock() while screen_opened: screen.blit(main_scr,[0, 0]) screen.blit(main_bar,[0, 0]) c.draw() display.flip() for cur in event.get(): if cur.type == MOUSEBUTTONUP: if cur.button == 1: c.click() c.input(cur) if cur.type == QUIT: screen_opened = False quit() fps.tick(30)
def gamma2(x, h, N): """Computes the second moment of the cluster size for a given DGFF sample and threshold h.""" y = levelset(x, h) z = cluster(y, N) clusterarray = np.bincount(np.bincount(z)[:-1]) gamma = (clusterarray * np.arange(len(clusterarray))**2).sum() return gamma
def searchBK(self, file_in, lmsi): file_in.seek(0) candidate = [] while True: line1 = file_in.readline() if not line1: break line2 = file_in.readline().strip() line1_list = line1.split('\t') line2_list = line2.split('\t') readseq1 = line1_list[9].upper() readseq2 = line2_list[9].upper() pairedread = pairedRead(line1_list[0], line1_list[1], line1_list[2], line1_list[3], line1_list[4], line1_list[5], line1_list[6], line1_list[7], line1_list[8], readseq1, \ line2_list[0], line2_list[1], line2_list[2], line2_list[3], line2_list[4], line2_list[5], line2_list[6], line2_list[7], line2_list[8], readseq2) if (pairedread.read1.CIGAR == '100M' and pairedread.read2.CIGAR != '100M' and pairedread.read2.isRepete() is False and (int(pairedread.read1.POS) < int(pairedread.read2.POS))): bkun = pairedread.read2.getbkunit() if bkun[0] and bkun[1]: candidate.append(bkun) cpos = cluster(candidate, 100, 0) print len(cpos) pos = [] for i in range(len(cpos)): pos.append(cpos[i][0]) return pos
def noise_removal(pcd_list, ratio=0.75): """ Remove noise by keeping the largest clusters after clustering with DBScan. First, we keep the largest cluster C1, then we check if the size of the second largest cluster C2 is bigger than C1.size*ratio. If it is, we also keep C2 and do the test with C3, and so on. """ # Cluster the points clusters = cluster(pcd_list, 'dbscan', {'eps':0.00005}) # Get the clusters sizes sizes = clustersSize(clusters) # Get the largest clust largest_c = getCluster(clusters, sizes[0][0]) # For each cluster size for s in sizes[1:]: # is the present cluster bigger than the largest cluster times the # ratio if s[1] >= sizes[0][1]*ratio: # keep it if it is the case largest_c += getCluster(clusters, s[0]) else: break return largest_c
def __init__(self, set_name): self.job_root = "job_configs/%s" % set_name self.job_files = glob.glob(r'job_configs/%s/job*.json' % set_name) self.num_jobs = len(self.job_files) self.job_set = [] self.load_job_set() self.clust = cluster(CLUSTER)
def muL_ss(): """ 多进程进行开始搜索搜索,多进程调用start_search :return: """ tbw_dict = cluster() # {bid1:[(title1,blog1,word1),(title2,blog2,word2)], bid2:[(title4,blog4,word4),(title5,blog5,word5)]} for (bid, tbw_list) in tbw_dict.items(): search_process = multiprocessing.Process(target=start_search, args=(bid, tbw_list)) # 一个bid作为一个进程搜索 search_process.start() search_process.join()
def muL_ss(): """ 多进程进行开始搜索搜索,多进程调用start_search :return: """ tbw_dict = cluster() # {bid1:[(title1,blog1,word1),(title2,blog2,word2)], bid2:[(title4,blog4,word4),(title5,blog5,word5)]} for (bid, tbw_list) in tbw_dict.items(): search_process = multiprocessing.Process( target=start_search, args=(bid, tbw_list)) # 一个bid作为一个进程搜索 search_process.start() search_process.join()
def k_means(data, k, iterations): """ Actual K-mean algorithm :param data: the data to run the algorithm on. Should be a numpy array. :param k: the number of clusters you want to use. :return: The list of cluster centers """ # Get min and max of data minima, maxima = np.min(data, axis=0), np.max(data, axis=0) # Draw cluster centers from uniform distribution centers_x = np.random.uniform(minima[0], maxima[0], k) centers_y = np.random.uniform(minima[1], maxima[1], k) # Initialise the clusters: coordinates = [np.array([centers_x[i], centers_y[i]]) for i in range(k)] clusters = [] for center in coordinates: clusters.append(cluster(center)) print(center) # Keep track of cluster index for each point point_to_cluster = [-1] * data.shape[0] # Main loop for it in range(iterations): # Assign each point to its closest cluster for pt_idx in range(data.shape[0]): point = data[pt_idx, :] closest = get_closest(point, clusters) if closest != -1: clusters[closest].add_member(pt_idx, point) # Remove the point from its previous cluster old_idx = point_to_cluster[pt_idx] if old_idx != -1: clusters[old_idx].remove_member(old_idx) # Update the record of the point cluster point_to_cluster[pt_idx] = closest # Now that each point has been assigned to its nearest cluster # we can update the clusters centers for cluster in clusters: cluster.update_center() for cluster in clusters: print(cluster.center) return
def transportBench(numObjects, objectSize, transport, uncached=False): f = partial(runTransportBench, numObjects, objectSize, uncached) stats = cluster(f, numBackups=0, replicas=0, transport=transport, timeout=240) for line in open('%s/mcp.%s.log' % (stats['run'], hosts[0][0])): m = re.match('.*METRICS: (.*)$', line) if m: stats.update(eval(m.group(1))) return stats
def bk_unit(self, file_in): print "Geting_breakpoint&unit>>>>>>>>>>>>>>>>>>>>>>>" file_in.seek(0) bklist = [] while True: line1 = file_in.readline() if not line1: break line2 = file_in.readline().strip() line1_list = line1.split('\t') line2_list = line2.split('\t') readseq1 = line1_list[9].upper() readseq2 = line2_list[9].upper() pairedread = pairedRead(line1_list[0], line1_list[1], line1_list[2], line1_list[3], line1_list[4], line1_list[5], line1_list[6], line1_list[7], line1_list[8], readseq1, \ line2_list[0], line2_list[1], line2_list[2], line2_list[3], line2_list[4], line2_list[5], line2_list[6], line2_list[7], line2_list[8], readseq2) '''readlen1 = len(pairedread.read1.SEQ) readlen2 = len(pairedread.read2.SEQ) if pairedread.read1.CIGAR == str(readlen1) + "M" and pairedread.read2.CIGAR != str(readlen2) + "M" and \ pairedread.read2.CIGAR != "*" and int(pairedread.read1.POS) < int(pairedread.read2.POS): # if pairedread.read1.verify(ref) and pairedread.read2.getmsiunit(): if pairedread.read2.getmsiunit()[0]: bkun = pairedread.read2.getbkunit() # 返回(断点,unit) bklist.append(bkun) elif pairedread.read2.CIGAR == str(readlen2) + "M" and pairedread.read1.CIGAR != str(readlen1) + "M" and \ pairedread.read1.CIGAR != "*" and int(pairedread.read2.POS) < int(pairedread.read1.POS): # if pairedread.read2.verify(ref) and pairedread.read1.getmsiunit(): if pairedread.read1.getmsiunit()[0]: bkun = pairedread.read1.getbkunit() # 返回(断点,unit) bklist.append(bkun) else: continue''' if pairedread.read1.isMap() and not pairedread.read2.isMap() and pairedread.read2.CIGAR != "*" and \ int(pairedread.read1.POS) < int(pairedread.read2.POS) and pairedread.read2.getmsiunit()[0]: bkun = pairedread.read2.getbkunit() # 返回(unit,断点) bklist.append(bkun) elif not pairedread.read1.isMap() and pairedread.read2.CIGAR != "*"and pairedread.read2.isMap() and \ int(pairedread.read2.POS) < int(pairedread.read1.POS) and pairedread.read1.getmsiunit()[0]: bkun = pairedread.read1.getbkunit() # 返回(unit,断点) bklist.append(bkun) else: continue rebu = cluster(bklist, 30, 1, alpha=0.1) keys = [] bklist = [] for r in rebu: bklist.append(r[0]) self.bklist = dict(bklist) self.unitdic = list(self.bklist.keys()) return self.bklist, self.unitdic
def do_classify(self,num,count): self.count=count self.jihe=cluster(self.pure_messages,self.count) self.jihe.update_bytime(num) i=0 while(i<count): message_temp=[] for r in self.jihe.clus[i]: temp=RawMessage(r.contain) message_temp.append(temp) symbol=Symbol(messages=message_temp) Format.splitAligned(symbol,doInternalSlick=True) self.pure_symbols.append(symbol) i=i+1
def recall(events): # step into the cluster cluster() if event is while True: now = datetime.utcnow() # spawn a timedelta object t = now - when # establish the threshold in milliseconds threshold = t.total_seconds() * 1000 # what did I - just - perceive? if threshold < 5: print now print 'Threshold crossed' print threshold else : print 'threshold not crossed' # who else perceived it? # live and die gracefully. if __name__ == '__main__': try: recall() except KeyboardInterrupt: print 'stop memories'
def add_tensorboard(originData, target, feature_I, feature_V, pretrain_model_time_dir, iterations): print "cluster_thread: begin" F_I_cluster_np, F_V_cluster_np = cluster(originData, target, feature_I, feature_V, pretrain_model_time_dir, iterations) summary = sess.run(summary_merge_cluster_img, feed_dict={ F_I_cluster_img: np.expand_dims(F_I_cluster_np, axis=0), F_V_cluster_img: np.expand_dims(F_V_cluster_np, axis=0) }) training_writer.add_summary( summary, tf.train.global_step(sess, global_step)) print "cluster_thread: added img to summary"
def __init__(self, set_name): self.job_root = "job_configs/%s" % set_name self.job_files = glob.glob(r'job_configs/%s/job*.json' % set_name) self.num_jobs = len(self.job_files) self.job_set = [[] for i in range(ARRIVAL_MAX) ] # simulate one day of 1440 minutes self.load_job_set() self.job_queue = [] self.starve_queue = [] self.job_running = [] self.clust = cluster(CLUSTER) # statistical variable self.total_time = 0
def dbscan(D,eps,MinPts): dataSet = D C = -1 Noise = None for i in D: if i not in visited: visited.append(point) NeighbourPoints = _regionQuery(point,eps) if len(NeighbourPoints) < MinPts: Noise.addPoint(point) else: name = 'Cluster'+str(count); C = cluster(name) count+=1; expandCluster(point,NeighbourPoints,C,eps,MinPts)
def dbscan(self, D, ids, eps, MinPts): self.dataSet = ids self.Distance_values = D #print self.Distance_values C = -1 for file in ids: if file not in self.visited: #self.visited.append(file) NeighbourPoints = self.regionQuery(file, eps) if len(NeighbourPoints) < MinPts: print "noise" else: name = 'Cluster' + str(self.count) #modify the name to filename C = cluster(name) self.count += 1 self.expandCluster(file, NeighbourPoints, C, eps, MinPts)
def main(args=None): model = Maze(9, 9, [(4, 0), (4, 1), (4, 2), (4, 3), (4, 5), (4, 6), (4, 7), (4, 8)]) learner = QLearner(model) for i in range(10): g = makeLocalGraph(learner) (nodes, v) = cluster(g) learner.plan(nodes, v) # visualization tab = [[(x, y) in model.walls for y in range(model.w)] for x in range(model.h)] for (node, val) in zip(sorted(g.nodes()), v): tab[node[0]][node[1]] = val#(val * val.conjugate()).real**.5 tab += NP.min(tab) tab /= NP.max(tab) fig = plt.figure() ax = fig.add_subplot(111) ax.imshow(tab, cmap=cm.coolwarm, interpolation='nearest') plt.show()
def returnDataBackend(df, per=.2, nf=0): def Mbyt(w): if w[-1] == 'M': return float(w[-2]) elif w[-1] == 'G': return float(w[-2]) * 1000 else: try: return float(w[-1]) except: print w with open(df) as f: if nf > 0: z = [(0.0, 1, 1)] * nf ls = f.readlines() q = 0 for i, line in enumerate(ls): if i == 0: continue if ':' in line: break if nf > 0 and q == nf: break w = line.split() tmp = float(w[-1]) z[q] = (int(w[0]), int(w[1]), tmp) q = q + 1 t = sorted(z[0:q], reverse=True, key=lambda x: x[2]) if nf == 0: l = int(i * per) t = t[0:l] else: t = t[0:q] I = mySparse(t) #~ dat = '/Users/kshadi/Documents/Cisco_kamal/zDATA3/2016/11/16/t10.csv' #~ print 'SQ...' #~ save_sqlite(t,dat) #~ raw_input('database created>') print 'Clustering ...' C = cluster(t, I, '_res_1', gradient=False) C.cluster()
def __init__(self, set_name, cluster_dict, schedule_conf): self.job_root = "job_configs/%s" % set_name self.set_name = set_name self.schedule_conf = schedule_conf # statistical variable self.total_time = 0 self.task_dist = [] self.turn_on_dist = [] self.clust = cluster(cluster_dict) if "online" in self.set_name: self.ARRIVAL_MAX = 1440 else: self.ARRIVAL_MAX = 1 self.job_set = [[] for i in range(self.ARRIVAL_MAX) ] # simulate one day of 1440 minutes self.load()
def analyze(self, time_dependent = False): #A: look for core points for pt in self.points: if pt.visited: continue pt.visited = True neighbors = self.queryRegion(pt, time_dependent) if(len(neighbors) < self.minPts): #mark noise temporarily self.noise.append(pt) pt.classification = "NOISE" else: #mark core points c = cluster("cluster_"+str(self.cluster_count)) print"HI" self.cluster_count += 1 pt.classification = "CORE" self.expandCluster(pt, neighbors, c, time_dependent) #B: reiterate and look for reachable points in noise new_noise = [] for pt in self.noise: for neighbor in self.queryRegion(pt, time_dependent): if(neighbor.classification == "CORE"): #if this point is reachable, then add to the cluster pt.classification = "REACHABLE" c = self.findCluster(neighbor.cluster_name) c.addPoint(pt) break if pt.classification == "NOISE": new_noise.append(pt) self.noise = new_noise #C:return list of centroids of clusters centroids = [] for c in self.clusters: centroids.append(c.getCentroid()) return centroids
def incrementalAdd(self, p, eps, Minpts): self.num = self.num + 1 print("\nADDING point " + str(self.num)) self.visited = [] self.newCores = [] UpdSeedIns = [] foundClusters = [] NeighbourPoints = self.regionQuery(p, eps) if len(NeighbourPoints) >= Minpts: self.newCores.append(p) self.visited.append(p) for pt in NeighbourPoints: if pt not in self.visited: self.visited.append(pt) np = self.regionQuery(pt, eps) if len(np) >= Minpts: for n in np: if n not in NeighbourPoints: NeighbourPoints.append(n) if pt not in self.curCores: self.newCores.append(pt) for core in self.newCores: corehood = self.regionQuery(core, eps) for elem in corehood: if self.regionQuery(elem, eps) >= Minpts: if elem not in UpdSeedIns: UpdSeedIns.append(elem) if len(UpdSeedIns) < 1: self.Noise.addPoint(p) else: findCount = 0 for seed in UpdSeedIns: for clust in self.Clusters: if clust.has(seed): findCount += 1 if clust.name not in foundClusters: foundClusters.append(clust.name) break if len(foundClusters) == 0: name = 'Cluster' + str(self.count) C = cluster(name) self.count += 1 self.expandCluster(UpdSeedIns[0], self.regionQuery(UpdSeedIns[0], eps), C, eps, Minpts) elif len(foundClusters) == 1: originalCluster = -1 newCluster = -1 for c in self.Clusters: if c.name == foundClusters[0]: originalCluster = c newCluster = c newCluster.addPoint(p) if len(UpdSeedIns) > findCount: for seed in UpdSeedIns: if not newCluster.has(seed): newCluster.addPoint(seed) self.Clusters.remove(originalCluster) self.Clusters.append(newCluster) else: masterCluster = -1 originalCluster = -1 for c in self.Clusters: if c.name == foundClusters[0]: masterCluster = c originalCluster = c for clusname in foundClusters: for clus in self.Clusters: if clus.name == clusname: for cluspoints in clus.getPoints(): if not masterCluster.has(cluspoints): masterCluster.addPoint(cluspoints) if len(UpdSeedIns) > findCount: for seed in UpdSeedIns: if not masterCluster.has(seed): masterCluster.addPoint(seed) self.Clusters.remove(originalCluster) self.Clusters.append(masterCluster)
def process_clusters(pIDarr,xarr,yarr,valarr,avg3arr,avg5arr,pmarr,eIDarr,etimearr,elonarr,elatarr,epAvgarr,epStdarr,eorxarr,eoryarr,eorzarr,xIDarr,xtimearr,xL1arr,xL2arr,xdrparr): totclarr = [] for i in range(0, len(xarr)): #Get huge indexed clustered array for whole run totclarr.append(cluster(xarr[i], yarr[i])) pIDclu = []; pxclu = []; pyclu = [] pvalclu = []; pavg3clu = []; pavg5clu = [] pmclu = []; eIDclu = []; etimeclu = [] elonclu = []; elatclu = []; epavgclu = [] epstdclu = []; eorxclu = []; eoryclu = [] eorzclu = []; xIDclu = []; xtimeclu = [] xL1clu = []; xL2clu = []; xdrpclu = [] totclucnt = [] for i in range(0, len(totclarr)): #Sort out the x, y, and vals by their respective clusters now tpID, tpx, tpy, tpval, tpavg3, tpavg5, tpm, teID, tetime, telon, telat, tepAvg, tepStd, teorx, teory, teorz, txID, txtime, txL1, txL2, txdrp = cluster_merge(pIDarr[i],xarr[i],yarr[i],valarr[i],avg3arr[i],avg5arr[i],pmarr[i],eIDarr[i],etimearr[i],elonarr[i],elatarr[i],epAvgarr[i],epStdarr[i],eorxarr[i],eoryarr[i],eorzarr[i],xIDarr[i],xtimearr[i],xL1arr[i],xL2arr[i],xdrparr[i],totclarr[i]) #Used for histogram xval = [] yval = [] for n in range(0, len(xarr[i])): for m in range(0, int(valarr[i][n])): xval.append(xarr[i][n]) yval.append(yarr[i][n]) #If desired, can output individual event image #Need to manually input num and set range ''' num = 712 if i == num: #Create an image for each cluster in the frame for f in xrange(len(tmpx)): #Create a zoomed in image for each individual cluster hval, xedges, yedges = np.histogram2d(xval, yval, bins = 40, range=[[tmpx[f][0]-19,tmpx[f][0]+20],[tmpy[f][0]-19,tmpy[f][0]+20]]) extent = [yedges[0], yedges[-1], xedges[-1], xedges[0]] plt.figure() plt.imshow(hval, extent = extent, interpolation = 'nearest') plt.gca().invert_yaxis() plt.colorbar() plt.xlabel("pixel x") plt.ylabel("pixel y") plt.title('Pixel Value (Cluster: %s)'%(f+1)) hval, xedges, yedges = np.histogram2d(xval, yval, bins = 350, range=[[0,350],[0,350]]) extent = [yedges[0], yedges[-1], xedges[-1], xedges[0]] plt.figure() plt.imshow(hval, extent = extent, interpolation = 'nearest') plt.gca().invert_yaxis() plt.colorbar() plt.xlabel("pixel x") plt.ylabel("pixel y") plt.title('Pixel Value (Frame: %s)'%(i)) #plt.show() #''' #Make full run cluster x, y, val, and count arrays #for each event pIDclu.append(tpID) pxclu.append(tpx) pyclu.append(tpy) pvalclu.append(tpval) pavg3clu.append(tpavg3) pavg5clu.append(tpavg5) pmclu.append(tpm) eIDclu.append(teID) etimeclu.append(tetime) elonclu.append(telon) elatclu.append(telat) epavgclu.append(tepAvg) epstdclu.append(tepStd) eorxclu.append(teorx) eoryclu.append(teory) eorzclu.append(teorz) xIDclu.append(txID) xtimeclu.append(txtime) xL1clu.append(txL1) xL2clu.append(txL2) xdrpclu.append(txdrp) totclucnt.append(len(tpx)) #get all of the clusters lengths for each individual frame #and throw into an array of arrays totclulen = [] #Find the cluster lengths for each cluster in run for i in xrange(len(pxclu)): length = cluster_length(pxclu[i], pyclu[i]) for j in xrange(len(length)): totclulen.append(length[j]) return pIDclu,pxclu,pyclu,pvalclu,pavg3clu,pavg5clu,pmclu,eIDclu,etimeclu,elonclu,elatclu,epavgclu,epstdclu,eorxclu,eoryclu,eorzclu,xIDclu,xtimeclu,xL1clu,xL2clu,xdrpclu,totclucnt,totclulen
# import all defined functions from cluster import * # ============ Parameters ================ slo = False # titles and labels in slovenian or english language # make analysis ks = [5, 8] # number of clusters logscale = [True] # ============ Run functions using these parameters ============ print("\n\t10: clustering and analysis of clusters on logarithmic scale") # cluster(folder=10, ks=ks, slo=slo, logscale=logscale) print("\n\t100: clustering and analysis of clusters on logarithmic scale") cluster(folder=100, ks=ks, slo=slo, logscale=logscale) print("\n\t1000: clustering and analysis of clusters on logarithmic scale") # cluster(folder=1000, ks=ks, slo=slo, logscale=logscale) print("\n\t10000: clustering and analysis of clusters on linear scale") cluster(folder=10000, ks=ks, slo=slo, logscale=logscale) # size of marker ... log(N)
def process_clusters(pIDarr, xarr, yarr, valarr, avg3arr, avg5arr, pmarr, eIDarr, etimearr, elonarr, elatarr, epAvgarr, epStdarr, eorxarr, eoryarr, eorzarr, xIDarr, xtimearr, xL1arr, xL2arr, xdrparr): totclarr = [] for i in range(0, len(xarr)): #Get huge indexed clustered array for whole run totclarr.append(cluster(xarr[i], yarr[i])) pIDclu = [] pxclu = [] pyclu = [] pvalclu = [] pavg3clu = [] pavg5clu = [] pmclu = [] eIDclu = [] etimeclu = [] elonclu = [] elatclu = [] epavgclu = [] epstdclu = [] eorxclu = [] eoryclu = [] eorzclu = [] xIDclu = [] xtimeclu = [] xL1clu = [] xL2clu = [] xdrpclu = [] totclucnt = [] for i in range(0, len(totclarr)): #Sort out the x, y, and vals by their respective clusters now tpID, tpx, tpy, tpval, tpavg3, tpavg5, tpm, teID, tetime, telon, telat, tepAvg, tepStd, teorx, teory, teorz, txID, txtime, txL1, txL2, txdrp = cluster_merge( pIDarr[i], xarr[i], yarr[i], valarr[i], avg3arr[i], avg5arr[i], pmarr[i], eIDarr[i], etimearr[i], elonarr[i], elatarr[i], epAvgarr[i], epStdarr[i], eorxarr[i], eoryarr[i], eorzarr[i], xIDarr[i], xtimearr[i], xL1arr[i], xL2arr[i], xdrparr[i], totclarr[i]) #Used for histogram xval = [] yval = [] for n in range(0, len(xarr[i])): for m in range(0, int(valarr[i][n])): xval.append(xarr[i][n]) yval.append(yarr[i][n]) #If desired, can output individual event image #Need to manually input num and set range ''' num = 712 if i == num: #Create an image for each cluster in the frame for f in xrange(len(tmpx)): #Create a zoomed in image for each individual cluster hval, xedges, yedges = np.histogram2d(xval, yval, bins = 40, range=[[tmpx[f][0]-19,tmpx[f][0]+20],[tmpy[f][0]-19,tmpy[f][0]+20]]) extent = [yedges[0], yedges[-1], xedges[-1], xedges[0]] plt.figure() plt.imshow(hval, extent = extent, interpolation = 'nearest') plt.gca().invert_yaxis() plt.colorbar() plt.xlabel("pixel x") plt.ylabel("pixel y") plt.title('Pixel Value (Cluster: %s)'%(f+1)) hval, xedges, yedges = np.histogram2d(xval, yval, bins = 350, range=[[0,350],[0,350]]) extent = [yedges[0], yedges[-1], xedges[-1], xedges[0]] plt.figure() plt.imshow(hval, extent = extent, interpolation = 'nearest') plt.gca().invert_yaxis() plt.colorbar() plt.xlabel("pixel x") plt.ylabel("pixel y") plt.title('Pixel Value (Frame: %s)'%(i)) #plt.show() #''' #Make full run cluster x, y, val, and count arrays #for each event pIDclu.append(tpID) pxclu.append(tpx) pyclu.append(tpy) pvalclu.append(tpval) pavg3clu.append(tpavg3) pavg5clu.append(tpavg5) pmclu.append(tpm) eIDclu.append(teID) etimeclu.append(tetime) elonclu.append(telon) elatclu.append(telat) epavgclu.append(tepAvg) epstdclu.append(tepStd) eorxclu.append(teorx) eoryclu.append(teory) eorzclu.append(teorz) xIDclu.append(txID) xtimeclu.append(txtime) xL1clu.append(txL1) xL2clu.append(txL2) xdrpclu.append(txdrp) totclucnt.append(len(tpx)) #get all of the clusters lengths for each individual frame #and throw into an array of arrays totclulen = [] #Find the cluster lengths for each cluster in run for i in xrange(len(pxclu)): length = cluster_length(pxclu[i], pyclu[i]) for j in xrange(len(length)): totclulen.append(length[j]) return pIDclu, pxclu, pyclu, pvalclu, pavg3clu, pavg5clu, pmclu, eIDclu, etimeclu, elonclu, elatclu, epavgclu, epstdclu, eorxclu, eoryclu, eorzclu, xIDclu, xtimeclu, xL1clu, xL2clu, xdrpclu, totclucnt, totclulen
print 'Usage:', sys.argv[0], ' (0: reset 1: append) index_file video_list' exit(-1) IS_APPEND = int(sys.argv[1]) index_file = sys.argv[2] video_list_file = sys.argv[3] video_list = open(video_list_file).read().splitlines() index = {} if IS_APPEND: with open(index_file, 'rb') as handle: prev_index = pickle.load(handle) print prev_index for video_name in video_list: video_name = video_name.split('.')[0] print video_name if IS_APPEND: if video_name in index: continue gt_nodes = load_turker_labels(video_name) clusters, linkage_matrix = cluster(gt_nodes) index[video_name] = clusters with open(index_file, 'wb') as handle: pickle.dump(index, handle)
def main(args): # load and preprocess dataset data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) np.save(f'{args.dataset}_labels', labels) if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) else: train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() cluster_interval = args.cluster_interval print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) if args.early_stop: stopper = EarlyStopping(patience=100) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() device = features.device labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() # graph preprocess and calculate normalization factor g = data.graph # add self loop if not args.no_self_loop: print('add self-loop') g.remove_edges_from(nx.selfloop_edges(g)) g.add_edges_from(zip(g.nodes(), g.nodes())) g = DGLGraph(g) n_edges = g.number_of_edges() # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 if cuda: norm = norm.cuda() g.ndata['norm'] = norm.unsqueeze(1) # # create GCN model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = create_model(args.arch, g, num_layers=args.num_layers, in_dim=in_feats, num_hidden=args.num_hidden, num_classes=n_classes, heads=heads, activation=F.elu, feat_drop=args.in_drop, attn_drop=args.attn_drop, negative_slope=args.negative_slope, residual=args.residual) print(model) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # Step 1. initilization with GCN # init graph feat dur = [] centroid_emb, hidden_emb, cluster_ids = [], [], [] att = [] for epoch in range(args.epochs): model.train() if epoch >= 3: t0 = time.time() # cluster # forward if epoch < args.init_feat_epoch: # logits = model(features) logits, hidden_h = model(features) else: if epoch == args.init_feat_epoch or epoch % cluster_interval == 0: cluster_ids_x, cluster_centers = cluster( X=hidden_h.detach(), num_clusters=args.cluster_number, distance='cosine', method=args.cluster_method ) # TODO: fix zero norm embedding centroid_emb.append(cluster_centers.detach().cpu().numpy()) hidden_emb.append(hidden_h.detach().cpu().numpy()) cluster_ids.append(cluster_ids_x.detach().cpu().numpy()) pass logits, hidden_h = model(features, cluster_ids_x, cluster_centers, att) # logits, hidden_h = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() # loss.backward(retain_graph=True) loss.backward(retain_graph=False) optimizer.step() if epoch >= 3: dur.append(time.time() - t0) if args.fastmode: val_acc = accuracy(logits[val_mask], labels[val_mask]) else: val_acc = evaluate(model, features, labels, val_mask) if args.early_stop: if stopper.step(val_acc, model): break # acc = evaluate(model, features, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), val_acc, n_edges / np.mean(dur) / 1000)) print() acc = evaluate(model, features, labels, test_mask) print("Test accuracy {:.2%}".format(acc)) prefix = 'embedding' np.save(Path(prefix, f'{args.dataset}_centroid_emb'), np.array(centroid_emb)) np.save(Path(prefix, f'{args.dataset}_hidden_emb'), np.array(hidden_emb)) np.save(Path(prefix, f'{args.dataset}_att'), np.array(att)) np.save(Path(prefix, f'{args.dataset}_cluster_ids'), np.array(cluster_ids))
# # 14-Feb-14: Version 1.0: Updated # 22-Jan-14: Version 1.0: Created # ################################################################################ ################################################################################ import sys from cluster import * if __name__ == "__main__": if (len(sys.argv) == 2): _Directory = sys.argv[1] _Cluster = cluster(_Directory) _Cluster.build_dictionary() #print "Dictionary size: " + str(_Cluster.dictionary_size()) #_Cluster.print_dictionary() _Cluster.build_cluster() #print "Unique Hashes in Dictionary " + str(_Cluster.unique_hash_count()) #_Cluster.print_cluster() _Cluster._write_cluster()
from cluster import * tftp_one = rdpcap("final_last.pcap") start_str = [] for t in tftp_one: ss = str(t) ss1 = ss[54:] if (len(ss1) > 0): start_str.append(ss1) for s in start_str: print repr(s) print "\r\n" start_list = [] for s in start_str: nn = t_node(0, s) start_list.append(nn) jihe = cluster(start_list, 5) jihe.update_bytime(20) print "kkk" i = 0 file_object = open('thefile_six.txt', 'w+') while (i < 5): print i file_object.write(repr(i)) file_object.write("\r\n") print repr(jihe.cores[i].contain) file_object.write(repr(jihe.cores[i].contain)) file_object.write("\r\n") print "clui" for r in jihe.clus[i]: file_object.write(repr(r.contain)) file_object.write("\r\n")
corrRightVal, _ = randomPickRight(start, end, teX, teY, indexTableVal) X_left = teX[start:end].reshape([-1, 28, 28, 1]) / 255 F_V_matrix, F_I_matrix = sess.run( [F_V_left, F_I_left], feed_dict={ image_real_left: X_left, image_real_right: corrRightVal.reshape([-1, 28, 28, 1]) / 255 }) if start == 0: image_real_left_agg = X_left F_V_matrix_agg = F_V_matrix F_I_matrix_agg = F_I_matrix else: image_real_left_agg = np.concatenate((image_real_left_agg, X_left), axis=0) F_V_matrix_agg = F_V_matrix = np.concatenate( (F_V_matrix_agg, F_V_matrix), axis=0) F_I_matrix_agg = F_I_matrix = np.concatenate( (F_I_matrix_agg, F_I_matrix), axis=0) iterations += 1 cluster(image_real_left_agg, teY[0:len(teY)], F_I_matrix_agg, F_V_matrix_agg, args.pretrain_model_time_dir, iterations, is_tensorboard=False) # cluster(image_real_left_agg, teY[0: len(teY)], F_I_matrix_agg, F_V_matrix_agg, # args.pretrain_model_time_dir, iterations)
import sys from utils import * from prepare import * from model import * from train import * from evaluate import * from cluster import * if __name__ == "__main__": action = sys.argv[1] # prepare, train, eval, predict data_dir = sys.argv[2] # file of train, test data csv_file = sys.argv[3] # csv file of info OR output file name model_path = sys.argv[4] # model path save or load OR dataloader # TODO: add batch_size, num_epochs as args if action == 'prepare': prepare(action, data_dir, csv_file, model_path) elif action == 'train': dataloader, data_size = prepare(action, data_dir, csv_file) train(model_path, dataloader, data_size) elif action == 'eval': dataloader, data_size = prepare(action, data_dir, csv_file) evaluate(model_path, dataloader, data_size) elif action == 'cluster': dataloader, data_size = prepare(action, data_dir, csv_file) cluster(model_path, dataloader, data_size) elif action == 'predict': # TODO: dataloader = prepare(action, data_dir) predict(model_path, dataloader)
def __init__(self): self.Noise = cluster('Noise')
def incrementalDelete(self, p, eps, Minpts): print "\nPoint to Delete : " + str(p) self.newCores = [] obsoleteCores = [] UpdSeedDel = [] Neighbourhood = self.regionQuery(p, eps) Neighbourhood.remove(p) self.dataSet.remove(p) if p in self.curCores: self.curCores.remove(p) obsoleteCores.append(p) for core in self.curCores: np = self.regionQuery(core, eps) if len(np) >= Minpts: self.newCores.append(core) else: obsoleteCores.append(core) for core in obsoleteCores: np = self.regionQuery(core, eps) for point in np: if len(self.regionQuery(point, eps)) >= Minpts and cmp( point, p) != 0: UpdSeedDel.append(point) print "\nUpdSeedDel:" + str(UpdSeedDel) + "\nCurCores:" + str( self.curCores) + "\nNewCores:" + str(self.newCores) if len(UpdSeedDel) <= 0: removePts = [] for pt in Neighbourhood: if len(self.regionQuery(pt, eps)) < Minpts: removePts.append(pt) for clust in self.Clusters: if clust.has(p): clust.remPoint(p) if len(clust.getPoints()) == 0: self.Clusters.remove(clust) else: if len(Neighbourhood) == len(removePts): for poin in clust.getPoints(): self.Noise.addPoint(poin) self.Clusters.remove(clust) else: for poin in removePts: clust.remPoint(poin) self.Noise.addPoint(poin) break if self.Noise.has(p): self.Noise.remPoint(p) else: directlyConnected = True np = self.regionQuery(UpdSeedDel[0], eps) for Seed in UpdSeedDel: if Seed not in np: directlyConnected = False if directlyConnected: print "\nProcedure Reached" for point in Neighbourhood: isNoise = True neighbour = self.regionQuery(point, eps) for pt in neighbour: if pt in self.newCores: isNoise = False break if isNoise: print "\nFound Noise:" + str(point) for clust in self.Clusters: if clust.has(point): clust.remPoint(point) if len(clust.getPoints()) == 0: self.Clusters.remove(clust) break if not self.Noise.has(point): self.Noise.addPoint(point) for clust in self.Clusters: if clust.has(p): clust.remPoint(p) if len(clust.getPoints()) == 0: self.Clusters.remove(clust) break else: C = -1 self.visited = [] visitedSeeds = [] newCluster = -1 for clust in self.Clusters: if clust.has(p): C = clust break if C != -1: self.Clusters.remove(C) for seed in UpdSeedDel: neighbour = [] if seed not in visitedSeeds: name = 'Cluster' + str(self.count) self.count += 1 newCluster = cluster(name) visitedSeeds.append(seed) if seed not in self.visited: self.visited.append(seed) newCluster.addPoint(seed) neighbour = self.regionQuery(seed, eps) for pt in neighbour: if pt not in self.visited: self.visited.append(pt) if pt in UpdSeedDel: if pt not in visitedSeeds: visitedSeeds.append(pt) np = self.regionQuery(pt, eps) if len(np) >= Minpts: for poin in np: if poin not in self.visited: neighbour.append(poin) if not newCluster.has(pt): newCluster.addPoint(pt) if len(visitedSeeds) == len(UpdSeedDel): self.Clusters.append(newCluster) break else: self.Clusters.append(newCluster) self.curCores = list(self.newCores)
def Query(q): contexts=question_analysis(q) sorted_d = [] table=[] d2 = dict() d3 = dict() # 执行查询 cl = SphinxClient() cl.SetServer ( host, port ) cl.SetWeights ( [100, 1] ) cl.SetMatchMode ( mode ) if filtervals: cl.SetFilter ( filtercol, filtervals ) if groupby: cl.SetGroupBy ( groupby, SPH_GROUPBY_ATTR, groupsort ) if sortby: cl.SetSortMode ( SPH_SORT_EXTENDED, sortby ) if limit: cl.SetLimits ( 0, limit, max(limit,1000) ) res = cl.Query ( q, index ) log_f = open("retrieve.log","w") log_c = open("candidate.log","w") if not res: print 'query failed: %s' % cl.GetLastError() sys.exit(1) if cl.GetLastWarning(): print 'WARNING: %s\n' % cl.GetLastWarning() print >> log_f, 'Query \'%s\' retrieved %d of %d matches in %s sec' % (q, res['total'], res['total_found'], res['time']) print >> log_f, 'Query stats:' if res.has_key('words'): for info in res['words']: print >> log_f, '\t\'%s\' found %d times in %d documents' % (info['word'], info['hits'], info['docs']) if res.has_key('matches'): n = 1 print >> log_f, '\nMatches:' for match in res['matches']: filePath = d[str(match['id'])] tree = ET.ElementTree(file=filePath) match='article[@id="'+str(match['id'])+'"]' for elem in tree.iterfind(match): print >> log_f, elem[0].text.encode('utf-8'), elem[1].text.encode('utf-8') cuttest(elem[0].text.encode('utf-8').strip(),d2,d3,n) lines = elem[1].text.encode('utf-8').strip().splitlines() for line in lines: sents = line.strip().split('。') for sen in sents: cuttest(sen.strip(),d2,d3,n) n += 1 #候选词按词频从大到小排列 sorted_d = sorted(d2.iteritems(),key=operator.itemgetter(1),reverse=True) #test = open('test.txt','w') i=1 for w in sorted_d: if i>600: break name = w[0].encode('utf-8') c = cluster(name) tf = max(d3[w[0]].values()) s = len(d3[w[0]]) tfidf = tf*log(n-1/s) prob=1 for y in contexts: if len(y)>5: print q,name,y[0],y[1],y[2],y[3],y[4],y[5],probability_clusterInContext_givenCluster(c,y) else: print q,name,y[0],y[1],y[2],probability_clusterInContext_givenCluster(c,y) prob*=probability_clusterInContext_givenCluster(c,y) if prob==1: prob=0 print >> log_c, name,'\t','1:'+str(w[1]),'\t','2:'+str(tf),'\t','3:'+str(prob) if prob>0: table.append([w[0],'1:'+str(w[1]),'2:'+str(tf),'3:'+str(prob)]) i+=1 else: print >> log_c, "no result" return table