def testingPoint(formu, dimension, number, lowerbound, largebound, path, catagory): with open(path, 'wb') as csvfile: numberOfPoint = int(round(math.pow(number, (1.0 / dimension)))) step = (largebound - lowerbound) / float(numberOfPoint) pointList = [] for i in range(numberOfPoint): pointList.append(lowerbound + i * step) output = list(product(pointList, repeat=dimension)) test = csv.writer(csvfile) for i in output: i = list(i) if catagory == formula.POLYHEDRON: flag = tf.polycircleModel(formu[0], formu[1], i) else: flag = tf.polynomialModel(formu[:-1], i, formu[-1]) if (flag): i.insert(0, 0.0) else: i.insert(0, 1.0) test.writerow(i) # testingPoint(2, 4000, -1.5, 1.5) # randomPolynomial([[1,2],[3],[4,5,6]])
def PolyhedronPoint(formu, dimension, number, path, catagory): with open(path, 'wb') as csvfile: test = csv.writer(csvfile) numberOfPoint = int( round(math.pow(int(number / len(formu[0])), (1.0 / dimension)))) # numberOfPoint = 300 for j in range(len(formu[0])): # j th center point largebound = formu[1][j] step = (2 * largebound) / float(numberOfPoint) pointList = [] for i in range(numberOfPoint): pointList.append(-largebound + i * step) output = list(product(pointList, repeat=dimension)) result = [] for i in output: i = list(i) for d in range(len(i)): i[d] += formu[0][j][d] flag = tf.polycircleModel(formu[0], formu[1], i) if (flag): i.insert(0, 0.0) else: i.insert(0, 1.0) test.writerow(i)
def randomCircle(formu): # [[[12,0],[-12,0]],[4,4]] number = random.randint(1, 20) dim = len(formu[0][0]) print(dim) trainname = "train" + "_".join(str(x) for x in formu[1]) + ".csv" testname = "test" + "_".join(str(x) for x in formu[1]) + ".csv" train_path = "./dataset/" + trainname test_path = "./dataset/" + testname with open(train_path, 'wb') as csvfile: with open(test_path, 'wb') as csvfile2: train = csv.writer(csvfile) test = csv.writer(csvfile2) for k in range(700): data_point = [] generated_point = [] if k % 3 == 0: center = random.randint(0, len(formu[0]) - 1) for i in range(dim): generated_point.append( random.uniform( int(formu[0][center][i]) - 10, int(formu[0][center][i]) + 10)) else: for i in range(dim): generated_point.append(random.uniform(-1000, 1000)) flag = tf.polycircleModel(formu[0], formu[1], generated_point) if (flag): data_point.append(0.0) data_point += generated_point train.writerow(data_point) else: data_point.append(1.0) data_point += generated_point train.writerow(data_point) PolyhedronPoint(formu, dim, 4000, test_path, formula.POLYHEDRON) return train_path, test_path
def generate_accuracy(train_path, test_path, formula, catagory): # Parameters learning_rate = 0.1 training_epochs = 100 display_step = 1 changing_rate = [1000] step = 8 pointsRatio = 0.25 active_learning_iteration = 10 train_set = [] test_set_X = [] test_set_Y = [] train_set_X = [] train_set_Y = [] util.preprocess(train_set_X, train_set_Y, test_set_X, test_set_Y, train_path, test_path, read_next=True) # Network Parameters n_hidden_1 = 10 # 1st layer number of neurons n_hidden_2 = 10 # 2nd layer number of neurons n_input = len(train_set_X[0]) # MNIST data input (img shape: 28*28) n_classes = 1 # MNIST total classes (0-9 digits) random_seed = 0 random.seed(random_seed) np.random.seed(random_seed) tf.set_random_seed(random_seed) # tf Graph input X = tf.placeholder("float", [None, n_input]) Y = tf.placeholder("float", [None, n_classes]) weights = { 'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1], mean=0)), 'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2], mean=0)), 'out': tf.Variable(tf.random_normal([n_hidden_1, n_classes], mean=0)) } biases = { 'b1': tf.Variable(tf.random_normal([n_hidden_1])), 'b2': tf.Variable(tf.random_normal([n_hidden_2])), 'out': tf.Variable(tf.random_normal([n_classes])) } # Construct model logits = util.multilayer_perceptron(X, weights, biases) # Define loss and optimizer loss_op = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=Y)) optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) # optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) # Initializing the variables train_op = optimizer.minimize(loss_op) # Initializing the variables init = tf.global_variables_initializer() grads = tf.gradients(loss_op, weights["out"]) newgrads = tf.gradients(logits, X) y = None train_acc_list = [] test_acc_list = [] result = [] test_list = [] for i in range(20): test_list.append([0, i]) wb = xlwt.Workbook() ws = wb.add_sheet("farcircle_gradient") # result.append(["Active learning using gradients"]) # result.append(model) for i in range(active_learning_iteration): print("*******", i, "th loop:") print("training set size", len(train_set_X)) # ten times training with tf.Session() as sess: sess.run(init) # h1 = sess.run(weights["h1"]) # out = sess.run(weights["out"]) # # print("h1", h1) # print("out", out) g = sess.run(newgrads, feed_dict={X: train_set_X, Y: train_set_Y}) ##print(g) smallGradient_Unchanged = 0.0 smallGradient_total = 0.0 largeGradient_Unchanged = 0.0 largeGradient_total = 0.0 for epoch in range(training_epochs): _, c = sess.run([train_op, loss_op], feed_dict={ X: train_set_X, Y: train_set_Y }) ##print(g) ##print("Epoch:", '%04d' % (epoch + 1), "cost={:.9f}".format(c)) g = sess.run(newgrads, feed_dict={X: train_set_X, Y: train_set_Y}) # test_g = sess.run(newgrads, feed_dict={X: test_list}) # test_new_y = sess.run(logits, feed_dict={X: test_list}) # for i in range(len(test_list)): # print("data point ", test_list[i], " gradient ", test_g[0][i], " label ", test_new_y[i]) # print(g) train_y = sess.run(logits, feed_dict={X: train_set_X}) test_y = sess.run(logits, feed_dict={X: test_set_X}) ##print(len(train_y)) ##print(len(train_set_Y)) train_acc = util.calculateAccuracy(train_y, train_set_Y, False) test_acc = util.calculateAccuracy(test_y, test_set_Y, False) train_acc_list.append(train_acc) test_acc_list.append(test_acc) # predicted = tf.cast(logits > 0.5, dtype=tf.float32) # util.plot_decision_boundary(lambda x: sess.run(predicted, feed_dict={X:x}), train_set_X, train_set_Y) new_train_set_X = [] new_train_set_Y = [] gradientList = g[0].tolist() g_list = [] # print (type(gradientList)) for i in range(len(g[0])): grad = 0 for j in range(n_input): grad += g[0][i][j] * g[0][i][j] g_list.append(math.sqrt(grad)) util.quickSort(g_list) threshold = g_list[int(-len(gradientList) * pointsRatio)] # threshold = math.sqrt(threshold[0]*threshold[0]+threshold[1]*threshold[1]+threshold[2]*threshold[2]+threshold[3]*threshold[3]) print(threshold) smallGradient_Unchanged = 0 smallGradient_total = 0 largeGradient_Unchanged = 0 largeGradient_total = 0 # print("boundary points") ################################# # decide new points # decision = decide_gradient(n_input) # for j in range(len(train_set_X)): # grad = 0 # for k in range(n_input): # grad += g[0][j][k] * g[0][j][k] # g_total = math.sqrt(grad) # # print("Im here ==================================") # if (g_total > threshold): # new_pointsX = [] # for k in range(len(decision)): # tmp = [] # for h in range(n_input): # if (decision[k][h]==True): # tmp.append(train_set_X[j][h] - g[0][i][h] * (step / g_total)) # else: # tmp.append(train_set_X[j][h] + g[0][i][h] * (step / g_total)) # # tmp[k].append(train_set_X[j][k] + g[0][j][k] * (step / g_total)) # new_pointsX.append(tmp) # new_pointsX.append(train_set_X[j]) # new_pointsY = sess.run(logits, feed_dict={X: new_pointsX}) # original_y = new_pointsY[-1] # distances = [x for x in new_pointsY] # distances = distances[:-1] # # ans = 0 # if (original_y < 0.5): # ans = max(distances) # else: # ans = min(distances) # new = new_pointsX[distances.index(ans)] ########################################### # decide new points dimension by dimension for j in range(len(train_set_X)): grad = 0 for k in range(n_input): grad += g[0][j][k] * g[0][j][k] g_total = math.sqrt(grad) # print("Im here ==================================") new = [] if (g_total > threshold): for k in range(n_input): tmp1 = [x for x in train_set_X[j]] tmp1[k] = tmp1[k] + g[0][j][k] * (step / g_total) tmp2 = [x for x in train_set_X[j]] tmp2[k] = tmp2[k] - g[0][j][k] * (step / g_total) new_pointsX = [tmp1, tmp2, train_set_X[j]] new_pointsY = sess.run(logits, feed_dict={X: new_pointsX}) original_y = new_pointsY[-1] distances = [x for x in new_pointsY] distances = distances[:-1] # ans = 0 if (original_y < 0.5): ans = max(distances) else: ans = min(distances) one_position = new_pointsX[distances.index(ans)] if (one_position == tmp1): new.append(tmp1[k]) else: new.append(tmp2[k]) ############################################# if (new not in train_set_X): new_train_set_X.append(new) haha = new if catagory == f.POLYHEDRON: flag = testing_function.polycircleModel( formula[0], formula[1], new) else: flag = testing_function.polynomialModel( formula[:-1], new, formula[-1]) if (flag): new_train_set_Y.append([0]) else: new_train_set_Y.append([1]) ##boundary remaining test ##small gradient test # X1=train_set_X[j][0] # X2=train_set_X[j][1] # newY=train_set_Y[j][0] # g_x = g[0][j][0] # g_y = g[0][j][1] # g_total = math.sqrt(g_x*g_x+g_y*g_y) # if (g_total==0) : # tmpX1 = X1 - step # tmpX2 = X2 + step # else: # tmpX1 = X1 - g[0][j][1]*(step/g_total) # tmpX2 = X2 + g[0][j][0]*(step/g_total) # ##print ("Y",newY) # if(g[0][j][0]<0.01): # smallGradient_total+=1 # if(newY==0): # if(polynomialModel(tmpX1,tmpX2)): # smallGradient_Unchanged+=1.0 # elif(newY==1): # if(not polynomialModel(tmpX1,tmpX2)): # smallGradient_Unchanged+=1.0 # # ##large gradient test # if(g[0][j][0]>0.01): # # newtmpX1=train_set_X[j][0]-g[0][j][0]*k # # newtmpX2=train_set_X[j][1]-g[0][j][1]*k # largeGradient_total+=1 # if(newY==0): # if(polynomialModel(tmpX1,tmpX2)): # largeGradient_Unchanged+=1.0 # elif(newY==1): # if(not polynomialModel(tmpX1,tmpX2)): # largeGradient_Unchanged+=1.0 # # print("generated data points:") # for j in range(len(new_train_set_X)): # print("(", new_train_set_X[j][0], ", ", new_train_set_X[j][1], ", ", new_train_set_X[j][2], ")", "label: ", new_train_set_Y[j][0]) # if (smallGradient_total != 0) : # print ("Small gradients", smallGradient_Unchanged/smallGradient_total) # if (largeGradient_total != 0): # print ("Large gradients", largeGradient_Unchanged/largeGradient_total) train_set_X = train_set_X + new_train_set_X train_set_Y = train_set_Y + new_train_set_Y # print(train_set_X) # for i, row in enumerate(result): # for j, col in enumerate(row): # if (i == 1): # if (type(model[0]) != list): # ws.write(i, j, str(col) + "x^" + str(len(model) - j)) # else: # ws.write(i, j, str(col)) # else: # ws.write(i, j, col) # wb.save("train_results.xls") # print(smallGradient_total) # print (smallGradient_Unchanged) # print(largeGradient_total) # print (largeGradient_Unchanged) # print ("small gradient unchanged rate: ",smallGradient_Unchanged/smallGradient_total) # print ("large gradient unchanged rate: ", largeGradient_Unchanged/largeGradient_total) result.append(train_acc_list) result.append(test_acc_list) return result
def balancingPoint(inflag, points, gradient, length_added, formu, category, std_dev): times = 0 outputX = [] iter = 0 flag = False count = 0.0 wrong = 0.0 step = std_dev while True: for i in range(len(points)): g_total = 0 grad = 0 for k in range(len(points[0])): grad += gradient[i][k] * gradient[i][k] g_total = math.sqrt(grad) tmpList = [] for j in range(len(points[i])): tmpValue = points[i][j] + gradient[i][j] * (step / g_total) tmpList.append(tmpValue) if category == formula.POLYHEDRON: pointflag = testing_function.polycircleModel( formu[0], formu[1], tmpList) elif category == formula.POLYNOMIAL: pointflag = testing_function.polynomialModel( formu[:-1], tmpList, formu[-1]) count += 1 if inflag == 1 and pointflag: times += 1 wrong += 1 if times > 100: flag = True break continue if inflag == 0 and not pointflag: times += 1 wrong += 1 if times > 100: flag = True break continue outputX.append(tmpList) times += 1 if times > 100: flag = True break iter += 1 if (iter == length_added): flag = True break if (flag == True): break print(count, wrong) print("points added \n", outputX) print("Boundary remaining accuracy: ", float((count - wrong) / count)) return outputX # label_0=[[1,2]] # label_1=[[1,2],[3,4],[5,6],[7,8],[9,10]] # gra0=[[0.1,0.2]] # gra1=[] # balancingPoint(label_0,label_1,gra0,gra1)
def generate_accuracy(train_data_file, test_data_file, formu, category): # Parameters learning_rate = 0.1 training_epochs = 100 pointsNumber = 10 active_learning_iteration = 10 threhold = 5 test_set_X = [] test_set_Y = [] train_set_X = [] train_set_Y = [] util.preprocess(train_set_X, train_set_Y, test_set_X, test_set_Y, train_data_file, test_data_file, read_next=True) # Network Parameters n_hidden_1 = 10 # 1st layer number of neurons n_hidden_2 = 10 # 2nd layer number of neurons n_input = len(train_set_X[0]) # MNIST data input (img shape: 28*28) n_classes = 1 # MNIST total classes (0-9 digits) random_seed = 0 random.seed(random_seed) np.random.seed(random_seed) tf.set_random_seed(random_seed) train_acc_list = [] test_acc_list = [] result = [] # tf Graph input X = tf.placeholder("float", [None, n_input]) Y = tf.placeholder("float", [None, n_classes]) weights = { 'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1], mean=0)), 'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2], mean=0)), 'out': tf.Variable(tf.random_normal([n_hidden_1, n_classes], mean=0)) } biases = { 'b1': tf.Variable(tf.random_normal([n_hidden_1])), 'b2': tf.Variable(tf.random_normal([n_hidden_2])), 'out': tf.Variable(tf.random_normal([n_classes])) } # Construct model logits = util.multilayer_perceptron(X, weights, biases) # Define loss and optimizer loss_op = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=Y)) optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) # optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) # Initializing the variables train_op = optimizer.minimize(loss_op) # Initializing the variables init = tf.global_variables_initializer() newgrads = tf.gradients(logits, X) y = None # class point_pair: # def __init__(self, point_label_0, point_label_1, distance): # self.point_label_0 = point_label_0 # self.point_label_1 = point_label_1 # self.distance = distance for i in range(active_learning_iteration): print("*******", i, "th loop:") print("training set size", len(train_set_X)) pointsNumber = 10 with tf.Session() as sess: sess.run(init) label_0 = [] label_1 = [] label_0, label_1 = util.data_partition(train_set_X, train_set_Y) print(len(label_0), len(label_1)) if (len(label_1) == 0 or len(label_0) == 0): raise Exception("Cannot be classified") distanceList = [] point_pairList = {} for m in label_0: for n in label_1: distance = 0 for d in range(n_input): distance += (m[d] - n[d]) * (m[d] - n[d]) distance = math.sqrt(distance) if (distance > threhold): # if(distance in distanceList): ##print ("cnm") key = () for h in range(n_input): tmpKey = (m[h], n[h]) key = key + tmpKey value = distance if (not point_pairList): point_pairList[key] = value elif (not (key in point_pairList.keys())): point_pairList[key] = value distanceList.append(distance) # print(m,n) # print (distanceList) util.quickSort(distanceList) # print(distanceList) selectedList = [] # pivot=0 # while pivot<pointsNumber: # if(distanceList[pivot] in selectedList): # pointsNumber+=1 # pivot+=1 # else: # selectedList.append(distanceList[pivot]) # pivot+=1 length = len(distanceList) index1 = length / 3 index2 = length / 3 * 2 pointer = 0 for p in range(3): if (pointer < index1): num = int(pointsNumber * 0.6) util.addPoints(num, distanceList, selectedList, pointer) pointer = index1 elif (pointer < index2): num = int(pointsNumber * 0.3) util.addPoints(num, distanceList, selectedList, pointer) # pointer = index2 else: num = int(pointsNumber * 0.1) util.addPoints(num, distanceList, selectedList, pointer) # pick large pooints ##print (selectedList) ##print (distanceList) # print (len(selectedList)) # print (len(point_pairList)) # print (train_set_X) for m in selectedList: for k, v in point_pairList.items(): if (m == v): point_0 = [] point_1 = [] for b in range(len(k)): if (b % 2 == 0): point_0.append(k[b]) else: point_1.append(k[b]) middlepoint = [] for b in range(n_input): middlepoint.append((point_0[b] + point_1[b]) / 2.0) # print (point_0) # print("original point", point_0, point_1) # print("middlepoint", middlepoint) if category == formula.POLYHEDRON: flag = testing_function.polycircleModel( formu[0], formu[1], middlepoint) elif category == formula.POLYNOMIAL: flag = testing_function.polynomialModel( formu[:-1], middlepoint, formu[-1]) if (flag): if (middlepoint not in train_set_X): train_set_X.append(middlepoint) train_set_Y.append([0]) else: if (middlepoint not in train_set_X): train_set_X.append(middlepoint) train_set_Y.append([1]) label_0, label_1 = util.data_partition(train_set_X, train_set_Y) length_0 = len(label_0) + 0.0 length_1 = len(label_1) + 0.0 print("label 0", length_0, "label 1", length_1) if (length_0 / length_1 > 0.7 and length_0 / length_1 < 1) or ( length_1 / length_0 > 0.7 and length_1 / length_0 < 1): for epoch in range(training_epochs): _, c = sess.run([train_op, loss_op], feed_dict={ X: train_set_X, Y: train_set_Y }) train_y = sess.run(logits, feed_dict={X: train_set_X}) test_y = sess.run(logits, feed_dict={X: test_set_X}) print("new train size after mid point", len(train_set_X), len(train_set_Y)) train_acc = util.calculateAccuracy(train_y, train_set_Y, False) test_acc = util.calculateAccuracy(test_y, test_set_Y, False) train_acc_list.append(train_acc) test_acc_list.append(test_acc) continue label_selected = [] gradient_selected = [] length_added = 0 # compare if data is unbalanced label_flag = 0 g = sess.run(newgrads, feed_dict={X: train_set_X, Y: train_set_Y}) label_0, label_1, label_0_gradient, label_1_gradient = util.data_partition_gradient( train_set_X, train_set_Y, g[0]) if length_0 / length_1 < 0.7: label_selected = label_0 gradient_selected = label_0_gradient length_added = length_1 - length_0 label_flag = 0 elif length_1 / length_0 < 0.7: label_selected = label_1 gradient_selected = label_1_gradient length_added = length_0 - length_1 label_flag = 1 else: continue ################################################################ # get all gradients for the unbalanced label points #boundary remaining # print(g) gradient_list = [] decision = decide_gradient(len(label_selected[0])) for j in range(len(label_selected)): grad = 0 for k in range(len(label_selected[0])): grad += gradient_selected[j][k] * gradient_selected[j][k] g_total = math.sqrt(grad) # print("Im here ==================================") if g_total == 0: tmpg = [] for d in range(len(label_selected[0])): tmpg.append(1) gradient_list.append(tmpg) continue ############################################################# new_pointsX = [] for k in range(len(decision)): tmp = [] for h in range(len(label_selected[0])): if (decision[k][h] == True): tmp.append(label_selected[j][h] - gradient_selected[j][h] * (step / g_total)) else: tmp.append(label_selected[j][h] + gradient_selected[j][h] * (step / g_total)) # tmp[k].append(train_set_X[j][k] + g[0][j][k] * (step / g_total)) new_pointsX.append(tmp) new_pointsX.append(label_selected[j]) new_pointsY = sess.run(logits, feed_dict={X: new_pointsX}) original_y = new_pointsY[-1] distances = [x for x in new_pointsY] distances = distances[:-1] # ans = 0 if (original_y < 0.5): ans = min(distances) else: ans = max(distances) direction = decision[distances.index(ans)] #################################################### new = [] n_input = len(label_selected[0]) for k in range(n_input): tmp1 = [] tmp2 = [] for h in range(n_input): if h == k: tmp1.append(train_set_X[j][h] - g[0][i][h] * (step / g_total)) tmp2.append(train_set_X[j][h] + g[0][i][h] * (step / g_total)) else: tmp1.append(train_set_X[j][h]) tmp2.append(train_set_X[j][h]) new_pointsX = [tmp1, tmp2, train_set_X[j]] new_pointsY = sess.run(logits, feed_dict={X: new_pointsX}) original_y = new_pointsY[-1] distances = [x for x in new_pointsY] distances = distances[:-1] # ans = 0 if (original_y < 0.5): ans = max(distances) else: ans = min(distances) one_position = new_pointsX[distances.index(ans)] if (one_position == tmp1): new.append(tmp1[k]) else: new.append(tmp2[k]) # continue working on this part ####################################################### return_value = [] for k in range(len(direction)): if direction[k] == True: return_value.append(-gradient_selected[j][k]) else: return_value.append(gradient_selected[j][k]) gradient_list.append(return_value) ################################################################ count = 0.0 point_distance_list = [] for p in range(len(train_set_X) - 1): for q in range(p, len(train_set_X)): distance = 0 for d in range(n_input): distance += (train_set_X[p][d] - train_set_X[q][d]) * ( train_set_X[p][d] - train_set_X[q][d]) distance = math.sqrt(distance) point_distance_list.append(distance) std_dev = np.std(point_distance_list) print("standard deviation", std_dev) newX = br.balancingPoint(label_flag, label_selected, gradient_list, length_added, formu, category, std_dev) for point in newX: if category == formula.POLYHEDRON: flag = testing_function.polycircleModel( formu[0], formu[1], point) elif category == formula.POLYNOMIAL: flag = testing_function.polynomialModel( formu[:-1], point, formu[-1]) if (flag): train_set_X.append(point) train_set_Y.append([0]) else: train_set_X.append(point) train_set_Y.append([1]) print("new training size after boundary remaining", len(train_set_X), len(train_set_Y)) for epoch in range(training_epochs): _, c = sess.run([train_op, loss_op], feed_dict={ X: train_set_X, Y: train_set_Y }) train_y = sess.run(logits, feed_dict={X: train_set_X}) test_y = sess.run(logits, feed_dict={X: test_set_X}) print("new train size after mid point", len(train_set_X), len(train_set_Y)) train_acc = util.calculateAccuracy(train_y, train_set_Y, False) test_acc = util.calculateAccuracy(test_y, test_set_Y, False) train_acc_list.append(train_acc) test_acc_list.append(test_acc) result.append(train_acc_list) result.append(test_acc_list) return result