pivot = L[left]
	l, r = left+1, right
	while l <= r:
		if pivot <= L[l]:
			L[l], L[r] = L[r], L[l]
			r = r - 1
		else:
			l = l + 1
	L[left], L[l-1] = L[l-1], L[left]
	return l-1

def SORT2(L, left, right):
	if left < right:
		i = Split(L, left, right)
		SORT2(L, left, i-1)
		SORT2(L, i+1, right)
		

import util
L = util.random_list(20,0,50)
print(L)
SORT2(L, 0, len(L)-1)
print(L)

# (average case, assuming a good split) Time complexity:
# T(n) =  2n + 2T(n/2) is in Theta(n log n)
# Space complexity
# S(n) = n + 2S(n/2) is in Theta(n log n)
# S(n) = c + 2S(n/2) is in Theta(n)

Beispiel #2
0
    def training_process(self,
                         x,
                         labels,
                         batch_size,
                         node_batch,
                         node_epoch,
                         eval_interval,
                         out_c,
                         USE_GPU,
                         LR,
                         save_path,
                         logfile=None,
                         dropout_r=0.1,
                         svm_flag=False,
                         use_pairwise=True,
                         use_momentum=False,
                         criterion='iforest'):
        if svm_flag:
            x_ori = x.toarray()
        else:
            x_ori = x
        labels_ori = labels
        x_level = np.zeros(x_ori.shape[0])
        for level in range(1, self.tree_depth + 1):

            # form x and labels
            keep_pos = np.where(x_level == 0)
            x = x_ori[keep_pos]
            labels = labels_ori[keep_pos]
            group_num = int(x.shape[0] / batch_size) + 1
            batch_x = np.array_split(x, group_num)
            model = RDP_Model(in_c=x.shape[1],
                              out_c=out_c,
                              USE_GPU=USE_GPU,
                              LR=LR,
                              logfile=logfile,
                              dropout_r=dropout_r)
            best_auc = best_epoch = 0

            for epoch in range(0, node_epoch):
                if not is_batch_replace:
                    random.shuffle(batch_x)
                    batch_cnt = 0
                    for batch_i in batch_x:
                        gap_loss = model.train_model(batch_i,
                                                     epoch,
                                                     use_pairwise=use_pairwise,
                                                     use_momentum=use_momentum)
                        # print("epoch ", epoch, "loss: ", loss)
                        batch_cnt += 1
                        if batch_cnt >= node_batch:
                            break

                else:
                    # random sampling with replacement
                    for batch_i in range(node_batch):
                        random_pos = random_list(0, x.shape[0] - 1, batch_size)
                        batch_data = x[random_pos]
                        gap_loss = model.train_model(batch_data,
                                                     epoch,
                                                     use_pairwise=use_pairwise,
                                                     use_momentum=use_momentum)

                if epoch % eval_interval == 0:
                    # print("epoch ", epoch, "gap_loss:", gap_loss, " recon_loss:", recon_loss)
                    # if logfile:
                    #     logfile.write("epoch " + str(epoch) + " gap_loss: " + str(gap_loss) +
                    #                   " recon_loss: " + str(recon_loss) + '\n')

                    print("tree_id:", self.t_id, "level:", level)
                    print("keep_pos.size ==", keep_pos[0].size)
                    if logfile:
                        logfile.write("tree_id: " + str(self.t_id) +
                                      " level: " + str(level) +
                                      "keep_pos.size == " +
                                      str(keep_pos[0].size) + '\n')
                    print("epoch ", epoch, "gap_loss:", gap_loss)
                    if logfile:
                        logfile.write("epoch " + str(epoch) + " gap_loss: " +
                                      str(gap_loss) + '\n')
                    model.save_model(save_path + 't' + str(self.t_id) + '_l' +
                                     str(level) + '_latest.h5')

                    scores = model.eval_model(x, criterion=criterion)

                    # eval
                    if is_eval:
                        try:
                            roc_auc, ap = aucPerformance(
                                scores, labels, logfile)
                            if roc_auc > best_auc:
                                best_auc = roc_auc
                                best_epoch = epoch

                            print("Best AUC-ROC: %.4f" % best_auc)
                            if logfile:
                                logfile.write("Best AUC-ROC: %.4f\n" %
                                              best_auc)
                            print("Best Epoch %d\n" % best_epoch)
                            if logfile:
                                logfile.write("Best Epoch %d\n\n" % best_epoch)
                        except ValueError:
                            print(
                                "Only one class present in y_true. ROC AUC score is not defined in that case."
                            )

                    if logfile:
                        logfile.flush()

            # filter anomaly elements. the higher the scores are, the more abnormal
            ranking_scores = scores
            score_ranking_idx = np.argsort(ranking_scores)
            filter_num = int(self.filter_ratio * score_ranking_idx.size)
            filter_idx = score_ranking_idx[score_ranking_idx.size -
                                           filter_num:]
            x_level[keep_pos[0][filter_idx]] = self.tree_depth + 1 - level
            self.thresh.append(
                ranking_scores[score_ranking_idx[score_ranking_idx.size -
                                                 filter_num]])

            # epoch for
        # level for

        # save self.thresh
        filename = save_path + 'threshList_t' + str(self.t_id) + '.txt'
        list_save(self.thresh, filename, 'w')
Beispiel #3
0
"""快速排序 nlogn nlogn 不稳定"""
from util import random_list


def quick(array):
    length = len(array)
    if length <= 1:
        return array
    left = cursor = 0
    right = length - 1

    while cursor <= right:
        if array[cursor] == array[left]:
            cursor += 1
        elif array[cursor] > array[left]:
            array[cursor], array[right] = array[right], array[cursor]
            right -= 1
        else:
            array[cursor], array[left] = array[left], array[cursor]
            left += 1
            cursor += 1
    return quick(array[:left]) + array[left:cursor] + quick(array[cursor:])


if __name__ == '__main__':
    l = random_list()
    print(quick(l))
Beispiel #4
0
def main():

    shutil.rmtree(save_path)
    os.mkdir(save_path)

    if data_path == '20newsgroups':
        newsgroups_data = fetch_20newsgroups_vectorized(subset='all')
        x = newsgroups_data.data.toarray()
        labels = newsgroups_data.target
        n_clusters = 20
    elif data_path == 'r8':
        df = pd.read_csv('data/r8-all-stemmed.txt')
        labels_idx = [
            'acq', 'crude', 'earn', 'grain', 'interest', 'money-fx', 'ship',
            'trade'
        ]
        labels = df['class'].values
        labels = [labels_idx.index(ele) for ele in labels]
        labels = np.asarray(labels, dtype=np.int64)
        x_df = df.drop(['class'], axis=1)
        corpus = np.squeeze(x_df.values)

        is_TfidfVectorizer = True
        if is_TfidfVectorizer:
            vectorizer = TfidfVectorizer()
            x = vectorizer.fit_transform(corpus).toarray()
        else:
            vectorizer = CountVectorizer()
            x = vectorizer.fit_transform(corpus).toarray()
        n_clusters = 8
    elif data_path == 'olivetti_faces':
        data = fetch_olivetti_faces()
        x = data.data
        labels = data.target
        n_clusters = 40
    elif data_path == 'rcv1':
        # data = fetch_rcv1()
        # x = data.data.toarray()
        # labels = data.target.toarray()
        # n_clusters = 103

        x, labels = get_data_from_svmlight_file('data/rcv1_train.binary')
        x = x.toarray()
        n_clusters = 2
    elif data_path == 'sector':
        x, labels = get_data_from_svmlight_file('data/sector.scale.all')
        x = x.toarray()
        n_clusters = 105
    else:
        raise Exception("Invalid data path!")
    print("Data shape: (%d, %d)" % x.shape)
    data_size = labels.size

    # build model
    model = RDP_Model(in_c=x.shape[1],
                      out_c=out_c,
                      USE_GPU=USE_GPU,
                      LR=LR,
                      logfile=logfile,
                      dropout_r=dropout_r)

    best_nmi = best_epoch = 0
    loss = 0

    for epoch in range(0, total_epoch):

        # random sampling with replacement
        for batch_i in range(epoch_batch):
            random_pos = random_list(0, data_size - 1, batch_size)
            batch_data = x[random_pos]
            loss = model.train_model(batch_data, epoch)

        if epoch % eval_interval == 0:
            print("epoch ", epoch, "loss:", loss)
            if logfile:
                logfile.write("epoch " + str(epoch) + " loss: " + str(loss) +
                              '\n')

            model.save_model(save_path + 'model_latest.h5')

            # eval
            if is_eval:
                gap_dims = model.eval_model(x)

                kmeans_results = KMeans(n_clusters=n_clusters,
                                        random_state=0).fit(gap_dims)
                # Match each learned cluster label with the true labels found in them
                y_pred = kmeans_results.labels_
                labels_pred = np.zeros_like(y_pred)
                for i in range(n_clusters):
                    mask = (y_pred == i)
                    labels_pred[mask] = mode(labels[mask])[0]

                # evaluations
                nmi_scores = normalized_mutual_info_score(labels, labels_pred)
                print("nmi_scores:", nmi_scores)
                if logfile:
                    logfile.write("nmi_scores: %.4f\n" % nmi_scores)

                fscores = f1_score(labels, labels_pred, average='macro')
                print("fscores_macro:", fscores)
                if logfile:
                    logfile.write("fscores_macro: %.4f\n" % fscores)

                fscores = f1_score(labels, labels_pred, average='micro')
                print("fscores_micro:", fscores)
                if logfile:
                    logfile.write("fscores_micro: %.4f\n" % fscores)

                fscores = f1_score(labels, labels_pred, average='weighted')
                print("fscores_weighted:", fscores)
                if logfile:
                    logfile.write("fscores_weighted: %.4f\n" % fscores)

                RI_scores = adjusted_rand_score(labels, labels_pred)
                print("RI_scores:", RI_scores)
                if logfile:
                    logfile.write("RI_scores: %.4f\n" % RI_scores)

                if best_nmi < nmi_scores:
                    best_nmi = nmi_scores
                    best_epoch = epoch

                print("Best NMI: %.4f" % best_nmi)
                print("Best Epoch %d\n" % best_epoch)
                if logfile:
                    logfile.write("Best NMI: %.4f\n" % best_nmi)
                    logfile.write("Best Epoch %d\n\n" % best_epoch)
                    logfile.flush()
Beispiel #5
0
    return pair_up(L, new_L)


def majority_pair_up(L):
    return pair_up(L, L)


# print(votes)
# a = majority_pair_up(votes)
# b = brute_force(votes)

# print(a,b)

import util
import time
for m in range(100):
    votes = util.random_list(100, 0, 2)
    start1 = time.time()
    a = brute_force(votes)
    end1 = time.time()

    start2 = time.time()
    b = majority_pair_up(votes)
    end2 = time.time()

    #print(votes)
    print("brute-force:ans=%s,time=%s  pair-up:ans=%s,time=%s" %
          (a, end1 - start1, b, end2 - start2))

    if a != b:
        print('Answers dont match!')
Beispiel #6
0
            ))

    print("Init tic time.")
    tic_time()

    # training process
    for i in range(args.forest_Tnum):
        # random sampling with replacement
        if random_size < 0:
            warnings.warn(f'Using full size {args.data_size} by default...')
        if random_size > data_size:
            raise ValueError(
                f'`random_size` {args.random_size} exceeds data size {args.data_size}!'
            )

        random_pos = random_list(0, data_size - 1, random_size)
        # random sampling without replacement
        # random_pos = random.sample(range(0, data_size), random_size)

        # to form x and labels
        x = x_ori[random_pos]
        if svm_flag:
            labels = labels_ori[random_pos]
        else:
            labels = labels_ori[random_pos].values

        print("tree id:", i, "tic time.")
        tic_time()

        forest[i].training_process(x=x,
                                   labels=labels,
Beispiel #7
0
def main():
    global random_size

    shutil.rmtree(save_path)
    os.mkdir(save_path)

    svm_flag = False
    if 'svm' in data_path:
        svm_flag = True
        from util import get_data_from_svmlight_file
        x_ori, labels_ori = get_data_from_svmlight_file(data_path)
        random_size = 1024
    else:
        x_ori, labels_ori = dataLoading(data_path, logfile)
    data_size = labels_ori.size

    # build forest
    forest = []
    for i in range(forest_Tnum):
        forest.append(
            RDPTree(
                t_id=i + 1,
                tree_depth=tree_depth,
                filter_ratio=filter_ratio,
            ))

    print("Init tic time.")
    tic_time()

    # training process
    for i in range(forest_Tnum):

        # random sampling with replacement
        random_pos = random_list(0, data_size - 1, random_size)
        # random sampling without replacement
        # random_pos = random.sample(range(0, data_size), random_size)

        # to form x and labels
        x = x_ori[random_pos]
        if svm_flag:
            labels = labels_ori[random_pos]
        else:
            labels = labels_ori[random_pos].values

        print("tree id:", i, "tic time.")
        tic_time()

        forest[i].training_process(
            x=x,
            labels=labels,
            batch_size=batch_size,
            node_batch=node_batch,
            node_epoch=node_epoch,
            eval_interval=eval_interval,
            out_c=out_c,
            USE_GPU=USE_GPU,
            LR=LR,
            save_path=save_path,
            logfile=logfile,
            dropout_r=dropout_r,
            svm_flag=svm_flag,
        )

        print("tree id:", i, "tic time end.")
        tic_time()
Beispiel #8
0
    ms_left = maxsum(Left)

    # compute max_sum on Right half
    # T(n/2)
    ms_right = maxsum(Right)

    # third case: ms overlaping Left and Right
    #c*n
    i = Len(L) // 2
    ms_overlap_ending_at_im1 = L[i - 1]
    cur_sum = L[i - 1]
    for j in range(i - 2, -1, -1):
        cur_sum = cur_sum + L[j]
        if cur_sum > ms_overlap_ending_at_im1:
            ms_overlap_ending_at_im1 = cur_sum

    ms_overlap_ending_at_i = L[i]
    cur_sum = l[i]
    for j in range(i + 1, len(L)):
        cur_sum = cur_sum + L[j]
        if cur_sum > ms_overlap_ending_at_i:
            ms_overlap_ending_at_i = cur_sum

    ms_overlap = ms_overlap_ending_at_im1 + ms_overlap_ending_at_i

    m = max(ms_left, ms_right, ms_overlap)
    return m


profits = util.random_list(8, -5, 6)
print(profits, maxsum(profits))
Beispiel #9
0
    if j < N and theoretical_w < cap:
        theoretical_best += ((cap - theoretical_w) / weight[j]) * calories[j]
    if theoretical_best < best:
        return False
    return True


# W = [6,3,4,2]
# C = [3000,1400,1600,900]

import util
import time
best, best_weight = None, 0

for N in range(15, 25):
    W = util.random_list(N, 10, 20)
    C = util.random_list(N, 2000, 3000)
    Cap = W[0] * len(W) * 0.4

    start_time = time.time()
    Config = [None] * N
    best, best_weight = None, 0
    Knapsack(Cap, W, C, -1, 0)
    end_time = time.time()
    print(N, end_time - start_time, best, Cap, total(best, W), total(best, C))

    start_time = time.time()
    Config = [None] * N
    best, best_weight = None, 0
    Knapsack2(Cap, W, C, -1, 0, 0)
    end_time = time.time()
Beispiel #10
0
    if i not in Table:
        if i == 0:
            Table[i] = 1
        m = 1
        for j in range(i):
            if (L[j] < L[i]) and (m < LIS(L, j) + 1):
                m = LIS(L, j) + 1
        Table[i] = m
    return Table[i]


def longest_increasing_subseq(L):
    return max([LIS(L, i) for i in range(len(L))])


# T[i] == Table{i} == LIS(L,i)
def iLIS(L):
    T = [1] * len(L)
    for i in range(len(L)):
        for j in range(i):
            if (L[j] < L[i]) and (T[i] < T[j] + 1):
                T[i] = T[j] + 1
    return max(T)


import util
for n in range(20, 1000, 10):
    items = util.random_list(n, 0, 100)
    Table = {}
    print(len(items), longest_increasing_subseq(items), iLIS(items))
Beispiel #11
0
'''
This program creates a real network wherein every node is 
'''

table = {}
import util
for n in range(0, 100, 1):
    A = util.random_list(n, 0, 99)

import networkx as nx
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt2
import random


def real_network(A):
    G = nx.Graph()
    list_of_nodes = []
    coordinates = []
    for i in range(100):
        x = random.choice(range(100))
        y = random.choice(range(100))
        while (x, y) in coordinates:
            x = random.choice(range(100))
            y = random.choice(range(100))
        coordinates = coordinates + [(x, y)]
        G.add_node(i, posxy=(coordinates[i][0], coordinates[i][1]))
        list_of_nodes = list_of_nodes + [i]
    G.add_cycle(list_of_nodes)

    #B = assign_neighbors()
Beispiel #12
0
def majority(V, L, R):
    if L == R:
        return V[L]
    mid = (L + R) // 2
    m1 = majority(V, L, mid)
    m2 = majority(V, mid + 1, R)
    if m1 == m2:
        return m1
    count1, count2 = 0, 0
    for i in range(L, R + 1):
        if V[i] == m1:
            count1 += 1
        if V[i] == m2:
            count2 += 1
    if count1 > (R - L + 1) / 2:
        return m1
    if count2 > (R - L + 1) / 2:
        return m2
    return None


import util
import time
for n in range(5000, 100000, 1000):
    votes = util.random_list(n, 0, 2)
    start = time.time()
    # brute_force(votes)
    majority(votes, 0, len(votes) - 1)
    end = time.time()
    print(n, end - start)