pivot = L[left] l, r = left+1, right while l <= r: if pivot <= L[l]: L[l], L[r] = L[r], L[l] r = r - 1 else: l = l + 1 L[left], L[l-1] = L[l-1], L[left] return l-1 def SORT2(L, left, right): if left < right: i = Split(L, left, right) SORT2(L, left, i-1) SORT2(L, i+1, right) import util L = util.random_list(20,0,50) print(L) SORT2(L, 0, len(L)-1) print(L) # (average case, assuming a good split) Time complexity: # T(n) = 2n + 2T(n/2) is in Theta(n log n) # Space complexity # S(n) = n + 2S(n/2) is in Theta(n log n) # S(n) = c + 2S(n/2) is in Theta(n)
def training_process(self, x, labels, batch_size, node_batch, node_epoch, eval_interval, out_c, USE_GPU, LR, save_path, logfile=None, dropout_r=0.1, svm_flag=False, use_pairwise=True, use_momentum=False, criterion='iforest'): if svm_flag: x_ori = x.toarray() else: x_ori = x labels_ori = labels x_level = np.zeros(x_ori.shape[0]) for level in range(1, self.tree_depth + 1): # form x and labels keep_pos = np.where(x_level == 0) x = x_ori[keep_pos] labels = labels_ori[keep_pos] group_num = int(x.shape[0] / batch_size) + 1 batch_x = np.array_split(x, group_num) model = RDP_Model(in_c=x.shape[1], out_c=out_c, USE_GPU=USE_GPU, LR=LR, logfile=logfile, dropout_r=dropout_r) best_auc = best_epoch = 0 for epoch in range(0, node_epoch): if not is_batch_replace: random.shuffle(batch_x) batch_cnt = 0 for batch_i in batch_x: gap_loss = model.train_model(batch_i, epoch, use_pairwise=use_pairwise, use_momentum=use_momentum) # print("epoch ", epoch, "loss: ", loss) batch_cnt += 1 if batch_cnt >= node_batch: break else: # random sampling with replacement for batch_i in range(node_batch): random_pos = random_list(0, x.shape[0] - 1, batch_size) batch_data = x[random_pos] gap_loss = model.train_model(batch_data, epoch, use_pairwise=use_pairwise, use_momentum=use_momentum) if epoch % eval_interval == 0: # print("epoch ", epoch, "gap_loss:", gap_loss, " recon_loss:", recon_loss) # if logfile: # logfile.write("epoch " + str(epoch) + " gap_loss: " + str(gap_loss) + # " recon_loss: " + str(recon_loss) + '\n') print("tree_id:", self.t_id, "level:", level) print("keep_pos.size ==", keep_pos[0].size) if logfile: logfile.write("tree_id: " + str(self.t_id) + " level: " + str(level) + "keep_pos.size == " + str(keep_pos[0].size) + '\n') print("epoch ", epoch, "gap_loss:", gap_loss) if logfile: logfile.write("epoch " + str(epoch) + " gap_loss: " + str(gap_loss) + '\n') model.save_model(save_path + 't' + str(self.t_id) + '_l' + str(level) + '_latest.h5') scores = model.eval_model(x, criterion=criterion) # eval if is_eval: try: roc_auc, ap = aucPerformance( scores, labels, logfile) if roc_auc > best_auc: best_auc = roc_auc best_epoch = epoch print("Best AUC-ROC: %.4f" % best_auc) if logfile: logfile.write("Best AUC-ROC: %.4f\n" % best_auc) print("Best Epoch %d\n" % best_epoch) if logfile: logfile.write("Best Epoch %d\n\n" % best_epoch) except ValueError: print( "Only one class present in y_true. ROC AUC score is not defined in that case." ) if logfile: logfile.flush() # filter anomaly elements. the higher the scores are, the more abnormal ranking_scores = scores score_ranking_idx = np.argsort(ranking_scores) filter_num = int(self.filter_ratio * score_ranking_idx.size) filter_idx = score_ranking_idx[score_ranking_idx.size - filter_num:] x_level[keep_pos[0][filter_idx]] = self.tree_depth + 1 - level self.thresh.append( ranking_scores[score_ranking_idx[score_ranking_idx.size - filter_num]]) # epoch for # level for # save self.thresh filename = save_path + 'threshList_t' + str(self.t_id) + '.txt' list_save(self.thresh, filename, 'w')
"""快速排序 nlogn nlogn 不稳定""" from util import random_list def quick(array): length = len(array) if length <= 1: return array left = cursor = 0 right = length - 1 while cursor <= right: if array[cursor] == array[left]: cursor += 1 elif array[cursor] > array[left]: array[cursor], array[right] = array[right], array[cursor] right -= 1 else: array[cursor], array[left] = array[left], array[cursor] left += 1 cursor += 1 return quick(array[:left]) + array[left:cursor] + quick(array[cursor:]) if __name__ == '__main__': l = random_list() print(quick(l))
def main(): shutil.rmtree(save_path) os.mkdir(save_path) if data_path == '20newsgroups': newsgroups_data = fetch_20newsgroups_vectorized(subset='all') x = newsgroups_data.data.toarray() labels = newsgroups_data.target n_clusters = 20 elif data_path == 'r8': df = pd.read_csv('data/r8-all-stemmed.txt') labels_idx = [ 'acq', 'crude', 'earn', 'grain', 'interest', 'money-fx', 'ship', 'trade' ] labels = df['class'].values labels = [labels_idx.index(ele) for ele in labels] labels = np.asarray(labels, dtype=np.int64) x_df = df.drop(['class'], axis=1) corpus = np.squeeze(x_df.values) is_TfidfVectorizer = True if is_TfidfVectorizer: vectorizer = TfidfVectorizer() x = vectorizer.fit_transform(corpus).toarray() else: vectorizer = CountVectorizer() x = vectorizer.fit_transform(corpus).toarray() n_clusters = 8 elif data_path == 'olivetti_faces': data = fetch_olivetti_faces() x = data.data labels = data.target n_clusters = 40 elif data_path == 'rcv1': # data = fetch_rcv1() # x = data.data.toarray() # labels = data.target.toarray() # n_clusters = 103 x, labels = get_data_from_svmlight_file('data/rcv1_train.binary') x = x.toarray() n_clusters = 2 elif data_path == 'sector': x, labels = get_data_from_svmlight_file('data/sector.scale.all') x = x.toarray() n_clusters = 105 else: raise Exception("Invalid data path!") print("Data shape: (%d, %d)" % x.shape) data_size = labels.size # build model model = RDP_Model(in_c=x.shape[1], out_c=out_c, USE_GPU=USE_GPU, LR=LR, logfile=logfile, dropout_r=dropout_r) best_nmi = best_epoch = 0 loss = 0 for epoch in range(0, total_epoch): # random sampling with replacement for batch_i in range(epoch_batch): random_pos = random_list(0, data_size - 1, batch_size) batch_data = x[random_pos] loss = model.train_model(batch_data, epoch) if epoch % eval_interval == 0: print("epoch ", epoch, "loss:", loss) if logfile: logfile.write("epoch " + str(epoch) + " loss: " + str(loss) + '\n') model.save_model(save_path + 'model_latest.h5') # eval if is_eval: gap_dims = model.eval_model(x) kmeans_results = KMeans(n_clusters=n_clusters, random_state=0).fit(gap_dims) # Match each learned cluster label with the true labels found in them y_pred = kmeans_results.labels_ labels_pred = np.zeros_like(y_pred) for i in range(n_clusters): mask = (y_pred == i) labels_pred[mask] = mode(labels[mask])[0] # evaluations nmi_scores = normalized_mutual_info_score(labels, labels_pred) print("nmi_scores:", nmi_scores) if logfile: logfile.write("nmi_scores: %.4f\n" % nmi_scores) fscores = f1_score(labels, labels_pred, average='macro') print("fscores_macro:", fscores) if logfile: logfile.write("fscores_macro: %.4f\n" % fscores) fscores = f1_score(labels, labels_pred, average='micro') print("fscores_micro:", fscores) if logfile: logfile.write("fscores_micro: %.4f\n" % fscores) fscores = f1_score(labels, labels_pred, average='weighted') print("fscores_weighted:", fscores) if logfile: logfile.write("fscores_weighted: %.4f\n" % fscores) RI_scores = adjusted_rand_score(labels, labels_pred) print("RI_scores:", RI_scores) if logfile: logfile.write("RI_scores: %.4f\n" % RI_scores) if best_nmi < nmi_scores: best_nmi = nmi_scores best_epoch = epoch print("Best NMI: %.4f" % best_nmi) print("Best Epoch %d\n" % best_epoch) if logfile: logfile.write("Best NMI: %.4f\n" % best_nmi) logfile.write("Best Epoch %d\n\n" % best_epoch) logfile.flush()
return pair_up(L, new_L) def majority_pair_up(L): return pair_up(L, L) # print(votes) # a = majority_pair_up(votes) # b = brute_force(votes) # print(a,b) import util import time for m in range(100): votes = util.random_list(100, 0, 2) start1 = time.time() a = brute_force(votes) end1 = time.time() start2 = time.time() b = majority_pair_up(votes) end2 = time.time() #print(votes) print("brute-force:ans=%s,time=%s pair-up:ans=%s,time=%s" % (a, end1 - start1, b, end2 - start2)) if a != b: print('Answers dont match!')
)) print("Init tic time.") tic_time() # training process for i in range(args.forest_Tnum): # random sampling with replacement if random_size < 0: warnings.warn(f'Using full size {args.data_size} by default...') if random_size > data_size: raise ValueError( f'`random_size` {args.random_size} exceeds data size {args.data_size}!' ) random_pos = random_list(0, data_size - 1, random_size) # random sampling without replacement # random_pos = random.sample(range(0, data_size), random_size) # to form x and labels x = x_ori[random_pos] if svm_flag: labels = labels_ori[random_pos] else: labels = labels_ori[random_pos].values print("tree id:", i, "tic time.") tic_time() forest[i].training_process(x=x, labels=labels,
def main(): global random_size shutil.rmtree(save_path) os.mkdir(save_path) svm_flag = False if 'svm' in data_path: svm_flag = True from util import get_data_from_svmlight_file x_ori, labels_ori = get_data_from_svmlight_file(data_path) random_size = 1024 else: x_ori, labels_ori = dataLoading(data_path, logfile) data_size = labels_ori.size # build forest forest = [] for i in range(forest_Tnum): forest.append( RDPTree( t_id=i + 1, tree_depth=tree_depth, filter_ratio=filter_ratio, )) print("Init tic time.") tic_time() # training process for i in range(forest_Tnum): # random sampling with replacement random_pos = random_list(0, data_size - 1, random_size) # random sampling without replacement # random_pos = random.sample(range(0, data_size), random_size) # to form x and labels x = x_ori[random_pos] if svm_flag: labels = labels_ori[random_pos] else: labels = labels_ori[random_pos].values print("tree id:", i, "tic time.") tic_time() forest[i].training_process( x=x, labels=labels, batch_size=batch_size, node_batch=node_batch, node_epoch=node_epoch, eval_interval=eval_interval, out_c=out_c, USE_GPU=USE_GPU, LR=LR, save_path=save_path, logfile=logfile, dropout_r=dropout_r, svm_flag=svm_flag, ) print("tree id:", i, "tic time end.") tic_time()
ms_left = maxsum(Left) # compute max_sum on Right half # T(n/2) ms_right = maxsum(Right) # third case: ms overlaping Left and Right #c*n i = Len(L) // 2 ms_overlap_ending_at_im1 = L[i - 1] cur_sum = L[i - 1] for j in range(i - 2, -1, -1): cur_sum = cur_sum + L[j] if cur_sum > ms_overlap_ending_at_im1: ms_overlap_ending_at_im1 = cur_sum ms_overlap_ending_at_i = L[i] cur_sum = l[i] for j in range(i + 1, len(L)): cur_sum = cur_sum + L[j] if cur_sum > ms_overlap_ending_at_i: ms_overlap_ending_at_i = cur_sum ms_overlap = ms_overlap_ending_at_im1 + ms_overlap_ending_at_i m = max(ms_left, ms_right, ms_overlap) return m profits = util.random_list(8, -5, 6) print(profits, maxsum(profits))
if j < N and theoretical_w < cap: theoretical_best += ((cap - theoretical_w) / weight[j]) * calories[j] if theoretical_best < best: return False return True # W = [6,3,4,2] # C = [3000,1400,1600,900] import util import time best, best_weight = None, 0 for N in range(15, 25): W = util.random_list(N, 10, 20) C = util.random_list(N, 2000, 3000) Cap = W[0] * len(W) * 0.4 start_time = time.time() Config = [None] * N best, best_weight = None, 0 Knapsack(Cap, W, C, -1, 0) end_time = time.time() print(N, end_time - start_time, best, Cap, total(best, W), total(best, C)) start_time = time.time() Config = [None] * N best, best_weight = None, 0 Knapsack2(Cap, W, C, -1, 0, 0) end_time = time.time()
if i not in Table: if i == 0: Table[i] = 1 m = 1 for j in range(i): if (L[j] < L[i]) and (m < LIS(L, j) + 1): m = LIS(L, j) + 1 Table[i] = m return Table[i] def longest_increasing_subseq(L): return max([LIS(L, i) for i in range(len(L))]) # T[i] == Table{i} == LIS(L,i) def iLIS(L): T = [1] * len(L) for i in range(len(L)): for j in range(i): if (L[j] < L[i]) and (T[i] < T[j] + 1): T[i] = T[j] + 1 return max(T) import util for n in range(20, 1000, 10): items = util.random_list(n, 0, 100) Table = {} print(len(items), longest_increasing_subseq(items), iLIS(items))
''' This program creates a real network wherein every node is ''' table = {} import util for n in range(0, 100, 1): A = util.random_list(n, 0, 99) import networkx as nx import matplotlib.pyplot as plt import matplotlib.pyplot as plt2 import random def real_network(A): G = nx.Graph() list_of_nodes = [] coordinates = [] for i in range(100): x = random.choice(range(100)) y = random.choice(range(100)) while (x, y) in coordinates: x = random.choice(range(100)) y = random.choice(range(100)) coordinates = coordinates + [(x, y)] G.add_node(i, posxy=(coordinates[i][0], coordinates[i][1])) list_of_nodes = list_of_nodes + [i] G.add_cycle(list_of_nodes) #B = assign_neighbors()
def majority(V, L, R): if L == R: return V[L] mid = (L + R) // 2 m1 = majority(V, L, mid) m2 = majority(V, mid + 1, R) if m1 == m2: return m1 count1, count2 = 0, 0 for i in range(L, R + 1): if V[i] == m1: count1 += 1 if V[i] == m2: count2 += 1 if count1 > (R - L + 1) / 2: return m1 if count2 > (R - L + 1) / 2: return m2 return None import util import time for n in range(5000, 100000, 1000): votes = util.random_list(n, 0, 2) start = time.time() # brute_force(votes) majority(votes, 0, len(votes) - 1) end = time.time() print(n, end - start)