def quality_evaluate(error_rate): xtrain = cm.x_train ytrain = cm.y_index_train ICs = [] real_error = [] for error in error_rate: x = xtrain y = np.array(ytrain) y_error = y[:int(len(y)*error)] y_keep = y[int(len(y)*error):] random.shuffle(y_error) yerrorlist = list(y_error) ykeeplist = list(y_keep) yerrorlist.extend(ykeeplist) y_new = np.array(yerrorlist) real_error.append(1-sum(y == ytrain)/len(y)) print() label_ris, IC = information_cleanliness(x,y_new,[0,1,2,3,4,5,6,7,8,9]) ICs.append(IC) # print(label_ris) print(IC) dr.save_data(ICs,'data_evaluation_minst/error_label_detection.csv') dr.save_data(real_error, 'data_evaluation_minst/real_error_detection.csv') plt.plot(real_error,ICs) plt.show()
def single_accuracy_test(fs, specific_test=[-1]): for file_address in fs: file_ls = os.listdir(file_address) file_ls.sort(key=sort_key) losses = [] accuracies = [] for file in file_ls: extend = os.path.splitext(file)[-1][1:] if (extend != 'h5'): continue cm.model.load_weights(file_address + '/' + file) print(file) loss, ac = accuracy(cm.model, specific_test=specific_test) print(file) losses.append(loss) accuracies.append(ac) # if i>10: # break # list = [] # list.append(eds) # list.append(wds) # name_list = ['ed','wd'] # draw_plot(list,name_list) plt.plot(range(len(losses)), losses) dr.save_data(losses, 'record/' + file_address + '/losses.csv') plt.savefig('record/' + file_address + '/losses.png') plt.close() plt.plot(range(len(accuracies)), accuracies) dr.save_data(accuracies, 'record/' + file_address + '/accuracy.csv') plt.savefig('record/' + file_address + '/accuracy.png') plt.close()
def compare_two_sequence(path1, path2, metric, metric_name): file_address = 'record/' + path1 + '_' + path2 if not os.path.exists(file_address): # 判断是否存在文件夹如果不存在则创建为文件夹 os.makedirs(file_address) # makedirs 创建文件时如果路径不存在会创建这个路径 os.makedirs('record/' + file_address) print "--- new folder... ---" else: print "--- There is this folder! ---" file_ls1 = os.listdir(path1) file_ls1.sort(key=sort_key) file_ls2 = os.listdir(path2) file_ls2.sort(key=sort_key) distance = [] for file in file_ls1: if file not in file_ls2: break extend = os.path.splitext(file)[-1][1:] if (extend != 'h5'): continue print(path1 + '/' + file, ' ', path2 + '/' + file) cm.model.load_weights(path1 + '/' + file) vcurrent1 = resize_model(cm.model.get_weights()) cm.model.load_weights(path2 + '/' + file) vcurrent3 = resize_model(cm.model.get_weights()) dis = metric(np.array(vcurrent1), np.array(vcurrent3)) print(dis) distance.append(dis) dr.save_data(distance, file_address + '/' + metric_name + '.csv')
def analyse_sequence(path_set, target_path_set, metric, metric_name): for i in range(len(path_set)): path = path_set[i] target_path = target_path_set[i] file_address = 'record/' + path file_ls1 = os.listdir(path) file_ls1.sort(key=sort_key) distance = [] cm.model.load_weights(target_path) targetv = resize_model(cm.model.get_weights()) for file in file_ls1: extend = os.path.splitext(file)[-1][1:] if (extend != 'h5'): continue cm.model.load_weights(path + '/' + file) vcurrent1 = resize_model(cm.model.get_weights()) dis = metric(vcurrent1, targetv) distance.append(dis) print(dis) dr.save_data(distance, file_address + '/' + metric_name + '.csv') plt.plot(range(len(distance)), distance) plt.savefig(file_address + '/' + metric_name + '.png') plt.close()
def calculate_velocity_datapoints(input_path, output_path, model): weight_list = os.listdir(input_path + '/weights/' + '0') def sort_key(e): epoch_str = e.split('E') batch_str = epoch_str[1].split('b') return int(epoch_str[0]) * 500 + int(batch_str[0]) weight_list.sort(key=sort_key) list = os.listdir(input_path + '/weights') v_squere = [] count = 0 for weight in weight_list: weight_set = [] for folder in list: weight_path = output_path + '/weights/' + folder + '/' + weight print(weight_path) model.load_weights(weight_path) v1 = resize_model(model.get_weights()) weight_set.append(v1) array_list = np.array(weight_set) std = np.var(array_list, axis=0, keepdims=True)[0] avg = np.average(std) v_squere.append(avg) dr.save_data(v_squere, output_path + '/v_square.csv') plt.plot(range(len(weight_list)), v_squere, label='v_square') plt.legend() plt.savefig(output_path + '/v_square.png') plt.close()
def mds_analysis(matrix, dim=3, opath='', d_opath=''): weight_set = np.array(matrix, dtype=float) embedding = MDS(n_components=dim) X_transformed = embedding.fit_transform(weight_set) normal = np.linalg.norm(X_transformed, axis=1) print(sum(normal)) draw_scatter3D(X_transformed, opath=opath) dr.save_data(X_transformed, d_opath)
def error_label_shift(error_set, standard_init='Yes', standard_training='Yes', gaps=0): for error in error_set: x_train = cm.x_train y_train = cm.y_train file_address = 'error_label' + str(error) if not os.path.exists(file_address): # 判断是否存在文件夹如果不存在则创建为文件夹 os.makedirs(file_address) # makedirs 创建文件时如果路径不存在会创建这个路径 os.makedirs('record/' + file_address) print "--- new folder... ---" print "--- OK ---" else: print "--- There is this folder! ---" if standard_init == 'Yes': print('standard_init using') cm.model.load_weights('cnn_check_standard_init.h5') if standard_training == 'Yes': training_order = np.array( dr.read_csv('cifar10_training_order.csv'), dtype='int32')[:, 0] else: length = x_train.shape[0] training_order = list(range(length)) random.shuffle(training_order) dr.save_data(training_order, 'templete_training_order.csv') print('training_config:', file_address, 'standard training order:', training_order, 'standard init:', standard_init) #global_shuffle(array, rate): error_rate = error / 100 y_train_ori = np.copy(y_train) error_array = y_train[:int(len(y_train) * error_rate)] random.shuffle(error_array) y_train[:int(len(y_train) * error_rate)] = error_array print('global error rate:' + str(error_onehotlabel_rate(y_train, y_train_ori))) gap = 0 for epoch in range(cm.epochs): for b in range(x_train.shape[0] // cm.batch_size): idx = training_order[b * cm.batch_size:(b + 1) * cm.batch_size] x = x_train[idx] y = y_train[idx] loss, acc = cm.model.train_on_batch(x, y) print('local error rate:' + str(error_onehotlabel_rate(y, y_train_ori[idx]))) print('loss: ' + str(loss) + ' acc: ' + str(acc)) if gap == gaps: name = file_address + '/' + str(epoch) + 'E' + str( b) + 'b.h5' cm.model.save(name) print(name) gap = 0 else: gap += 1
def average_num(output_address): kl = [] for i in range(10): idx = cm.y_index_train == i test = cm.x_train[idx] RI = reletive_information(test)/len(idx) print(RI) kl.append(RI) dr.save_data(kl, output_address) return kl
def calculate_colormap_edis(path_acc, path_loss): acc = np.array(dr.read_csv(path_acc), dtype=float) acc_map = np.array(dr.read_csv('radius_test/acc_map.csv'), dtype=float)[:, 0] loss = np.array(dr.read_csv(path_loss), dtype=float) loss_map = np.array(dr.read_csv('radius_test/loss_map.csv'), dtype=float)[:, 0] acc_mean = np.mean(acc, axis=1) loss_mean = np.mean(loss, axis=1) acc_ecolor = set_mapping(acc_map, acc_mean) dr.save_data(acc_ecolor, 'radius_test/keep/acc_color.csv')
def extract_layers_parameters(input_path, model=create_network(channals=10)): weight_list = os.listdir(input_path + '/weights/' + '0') def sort_key(e): epoch_str = e.split('E') batch_str = epoch_str[1].split('b') return int(epoch_str[0]) * 500 + int(batch_str[0]) weight_list.sort(key=sort_key) list = os.listdir(input_path + '/weights') v_squere = [] count = 0 output_path = input_path + '/layer_wise_data' if not os.path.exists(output_path): # 判断是否存在文件夹如果不存在则创建为文件夹 os.makedirs(output_path) for weight in weight_list: for folder in list: weight_path = input_path + '/weights/' + folder + '/' + weight print(weight_path) model.load_weights(weight_path) c1 = model.get_weights()[0] b1 = model.get_weights()[1] data = reshape_convolutional_kernal(c1, b1) file_path = output_path + '/' + folder if not os.path.exists(file_path): # 判断是否存在文件夹如果不存在则创建为文件夹 os.makedirs(file_path) dr.save_data(data, file_path + '/' + weight + 'c1.csv') # test_mass('mass_test/cnn_channels') # path = 'F:/information_effectiveness/1channel/all_random' # train_velocity_samples(path,channels=1,usesameinit=False,usersameorder=False,epochs=5) # calculate_velocity(path,path,create_network(channals=1)) # # path = 'F:/information_effectiveness/1channel/sameinit_randomorder' # train_velocity_samples(path,channels=1,usesameinit=True,usersameorder=False,epochs=5) # calculate_velocity(path,path,create_network(channals=1)) # path = 'F:/information_effectiveness/1channel/sameinit_sameorder' # train_velocity_samples(path,channels=1,usesameinit=True,usersameorder=True,epochs=20) # calculate_velocity(path,path,create_network(channals=1)) # path = 'F:/information_effectiveness/1channel/randominit_sameorder' # train_velocity_samples(path,channels=1,usesameinit=False,usersameorder=True,epochs=5) # calculate_velocity(path,path,create_network(channals=1)) # path = 'F:/chaotic_similarity/randominit_sameorder' # train_velocity_samples(path,channels=10,usesameinit=False,usersameorder=True,epochs=20,gap=200,loop=100) # path = 'F:/chaotic_similarity/sameinit_sameorder' # train_velocity_samples(path,channels=10,usesameinit=True,usersameorder=True,epochs=20,gap=200,loop=100) # extract_layers_parameters('F:/chaotic_similarity/randominit_sameorder')
def train_velocity_samples(output_path, usesameinit=True, usersameorder=True, channels=10, loop=5, epochs=1, gap=0): x_train, x_test, y_train, y_test = create_data_set(list(range(10)), 4000) v_square_estimate = [] bs = 100 length = x_train.shape[0] if not os.path.exists(output_path): # 判断是否存在文件夹如果不存在则创建为文件夹 os.makedirs(output_path) if usersameorder: if os.path.exists(output_path + '/training_order.csv'): training_order = np.array(dr.read_csv(output_path + '/training_order.csv'), dtype='int32')[:, 0] else: training_order = list(range(length)) random.shuffle(training_order) dr.save_data(training_order, output_path + '/training_order.csv') if not os.path.exists(output_path + '/weights/'): # 判断是否存在文件夹如果不存在则创建为文件夹 os.makedirs(output_path + '/weights/') for i in range(loop): model = create_network(channals=channels) if usesameinit: if os.path.exists(output_path + '/0E0b.h5'): model.load_weights(output_path + '/0E0b.h5') else: model.save_weights(output_path + '/0E0b.h5') if not os.path.exists(output_path + '/weights/' + str(i)): # 判断是否存在文件夹如果不存在则创建为文件夹 os.makedirs(output_path + '/weights/' + str(i)) count = 0 for epoch in range(epochs): for b in range(x_train.shape[0] // bs): idx = training_order[b * bs:(b + 1) * bs] x = x_train[idx] y = y_train[idx] l = model.train_on_batch(x, y) count += 1 if count > gap: name = output_path + '/weights/' + str(i) + '/' + str( epoch) + 'E' + str(b) + 'b.h5' model.save(name) print(name) count = 0
def compare_two_sequence_accuracy(path1, path2): file_address = 'record/' + path1 + '_' + path2 os.makedirs(file_address) file_ls1 = os.listdir(path1) file_ls1.sort(key=sort_key) file_ls2 = os.listdir(path2) file_ls2.sort(key=sort_key) loss_1 = [] loss_2 = [] ac_1 = [] ac_2 = [] for file in file_ls1: if file not in file_ls2: break extend = os.path.splitext(file)[-1][1:] if (extend != 'h5'): continue print(path1 + '/' + file, ' ', path2 + '/' + file) cm.model.load_weights(path1 + '/' + file) loss1, accuracy1 = accuracy(cm.model) print(loss1, ' ', accuracy1) cm.model.load_weights(path2 + '/' + file) loss2, accuracy2 = accuracy(cm.model) print(loss2, ' ', accuracy2) loss_1.append(loss1) loss_2.append(loss2) ac_1.append(accuracy1) ac_2.append(accuracy2) dr.save_data(ac_1, file_address + '/accuracy_1.csv') dr.save_data(ac_2, file_address + '/accuracy_2.csv') dr.save_data(loss_1, file_address + '/loss1.csv') dr.save_data(loss_2, file_address + '/loss2.csv')
def single_test(model, file_address, output): x_train, x_test, y_train, y_test = create_data_set(list(range(10)), 4000) file_ls = os.listdir(file_address) file_ls.sort(key=sort_key) model.load_weights(file_address + '/' + file_ls[0]) #end of normal vmodel = model.get_weights() target = kl.resize_model(vmodel) wds = [] eds = [] loss_set = [] acc_set = [] i = 0 for file in file_ls: extend = os.path.splitext(file)[-1][1:] if (extend != 'h5'): continue model.load_weights(file_address + '/' + file) print(file) vcurrent = kl.resize_model(model.get_weights()) # dis = kl.KL_div_sigmoid(target,vcurrent) #inverse dis = kl.KL_div_sigmoid(target, vcurrent) E_dis = np.linalg.norm(np.array(target) - np.array(vcurrent), ord=2) loss, acc = model.evaluate(x_test, y_test) eds.append(E_dis) wds.append(dis) acc_set.append(acc) loss_set.append(loss) print(acc) print(loss) print(file) i = i + 1 # if i>100: # break print('*********************************') plt.plot(range(len(wds)), wds) dr.save_data(wds, output + '/dis_limitation_e.csv') plt.savefig(output + '/dis_limitation_inverse_e.png') plt.close() plt.plot(range(len(eds)), eds) dr.save_data(eds, output + '/dis_limitation_eul.csv') plt.savefig(output + '/dis_limitation_inverse_eul.png') plt.close() plt.plot(range(len(loss_set)), loss_set) dr.save_data(loss_set, output + '/dis_limitation_loss.csv') plt.savefig(output + '/dis_limitation_inverse_loss.png') plt.close() plt.plot(range(len(acc_set)), acc_set) dr.save_data(acc_set, output + '/dis_limitation_acc.csv') plt.savefig(output + '/dis_limitation_inverse_acc.png') plt.close()
def batch_kl_test(path_d1, path_d2, opath): file_ls = os.listdir(path_d1) def sort_key(e): epoch_str = e.split('.') return int(epoch_str[0]) output = [] file_ls.sort(key=sort_key) for file in file_ls: array_d1 = np.array(dr.read_csv(path_d1 + '/' + file), dtype=float)[:, 0] array_d2 = np.array(dr.read_csv(path_d2 + '/' + file), dtype=float)[:, 0] output.append(kl.KL_div(array_d1, array_d2, activation=False)) dr.save_data(output, opath)
def mixed_RI(label_set, test_length,output_address): xtrains = [] ytrains = [] xtrain_mix = [] ytrain_mix = [] for label in label_set: idx1 = (cm.y_index_train == label) x_train1 = cm.x_train[idx1][:test_length, :, :] y_train1 = cm.y_train[idx1][:test_length, :] xtrains.append(x_train1) ytrains.append(y_train1) x_mix, y_mix = mix_dataset(xtrains, ytrains, test_length, len(xtrains)) xtrain_mix.append(x_mix) ytrain_mix.append(y_mix) RIs = [] for dataset in xtrain_mix: ri = reletive_information(dataset) RIs.append(ri) dr.save_data(RIs,output_address)
def test_mass(output_path): x_train, x_test, y_train, y_test = create_data_set(list(range(10)), 4000) edis_set = [] kldis_set = [] loss_set = [] ac_set = [] loop = 100 bs = 100 epochs = 1 for i in range(loop): model = create_network(channals=i + 1) model.save_weights(output_path + '/init.h5') model.fit(x_train, y_train, batch_size=bs, epochs=epochs, verbose=1, validation_data=(x_train, y_train)) score = model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) model.save_weights(output_path + '/end.h5') e_dis = distance(output_path + '/init.h5', output_path + '/end.h5', model, E_dis) kl_dis = distance(output_path + '/init.h5', output_path + '/end.h5', model, KL_div) loss_set.append(score[0]) ac_set.append(score[1]) edis_set.append(e_dis) kldis_set.append(kl_dis) dr.save_data(loss_set, output_path + '/loss.csv') dr.save_data(ac_set, output_path + '/ac.csv') dr.save_data(edis_set, output_path + '/edis.csv') dr.save_data(kldis_set, output_path + 'kldis.csv') plt.plot(range(loop), loss_set, label='loss') plt.plot(range(loop), ac_set, label='accuracy') plt.legend() plt.savefig(output_path + '/performance_evaluation.png') plt.close() plt.plot(range(loop), edis_set, label='dis') plt.legend() plt.savefig(output_path + '/dis.png') plt.plot(range(loop), kldis_set, label='dis') plt.legend() plt.savefig(output_path + '/kldis.png')
def mds_single_analysis(model, path_set, opath, dim=2): weight_set = [] for path in path_set: print(path) model.load_weights(path) vcurrent = resize_model(model.get_weights()) weight_set.append(vcurrent) weight_set = np.array(weight_set, dtype=float) embedding = MDS(n_components=dim) X_transformed = embedding.fit_transform(weight_set) dr.save_data(X_transformed, opath) if dim == 3: plt.rcParams['figure.figsize'] = [4, 4] fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.scatter(X_transformed[:, 0], X_transformed[:, 1], X_transformed[:, 2]) plt.show() elif dim == 2: plt.scatter(X_transformed[:, 0], X_transformed[:, 1]) plt.show()
def test_layer_design(output_path): x_train, x_test, y_train, y_test = create_data_set(list(range(10)), 200) RI = reletive_information(x_train) dis_set = [] effectiveness_set = [] loss_set = [] ac_set = [] loop = 100 bs = 2 for i in range(loop): model = create_network() model.load_weights(output_path+'/init.h5') model.fit(x_train, y_train, batch_size=bs, epochs=epochs, verbose=0, validation_data=(x_test, y_test)) score = model.evaluate(x_test, y_test, verbose=0) bs = bs + 2 print('Test loss:', score[0]) print('Test accuracy:', score[1]) print(str(i)+'************************') model.save_weights(output_path+'/end.h5') dis,effectiveness = ef.effectiveness(output_path+'/init.h5', output_path+'/end.h5', model, kl.KL_div, RI) loss_set.append(score[0]) ac_set.append(score[1]) dis_set.append(dis) effectiveness_set.append(effectiveness) dr.save_data(loss_set,output_path+'/loss.csv') dr.save_data(ac_set, output_path+'/ac.csv') dr.save_data(dis_set, output_path+'/dis.csv') dr.save_data(effectiveness_set, output_path+'/ef.csv') plt.plot(range(loop),loss_set,label = 'loss') plt.plot(range(loop), ac_set,label = 'accuracy') plt.legend() plt.savefig(output_path+'/performance_evaluation.png') plt.close() plt.plot(range(loop), dis_set,label = 'dis') plt.legend() plt.savefig(output_path+'/dis.png') plt.show()
def analyse_sequence(path_set, target_path_set,metric, metric_name,output_path_set = [],test_range = []): for i in range(len(path_set)): if len(test_range) == 0: x_train, x_test, y_train, y_test = cm.create_data_set() else: x_train, x_test, y_train, y_test = cm.create_data_set(range(test_range[i][0]),test_range[i][1]) path = path_set[i] target_path = target_path_set[i] if len(output_path_set) == 0: file_address = 'record/' + path else: file_address = output_path_set[i] file_ls1 = os.listdir(path) file_ls1.sort(key=sort_key) distance = [] for i in range(len(metric)): distance.append([]) loss_set = [] acc_set = [] cm.model.load_weights(target_path) targetv = resize_model(cm.model.get_weights()) for file in file_ls1: extend = os.path.splitext(file)[-1][1:] if (extend != 'h5'): continue cm.model.load_weights(path + '/' + file) vcurrent1 = resize_model(cm.model.get_weights()) for i in range(len(metric)): dis = metric[i](vcurrent1, targetv) distance[i].append(dis) print(metric_name[i]+' '+str(dis)) loss,acc = cm.model.evaluate(x_test,y_test) loss_set.append(loss) acc_set.append(acc) print('loss'+' '+str(loss)+' acc'+' '+str(acc)) for i in range(len(metric)): dr.save_data(distance[i], file_address + '/' + metric_name[i] + '.csv') plt.plot(range(len(distance[i])), distance[i]) plt.savefig(file_address + '/'+metric_name[i]+'.png') plt.close() dr.save_data(acc_set, file_address + '/acc.csv') plt.plot(range(len(acc_set)), acc_set) plt.savefig(file_address + '/ acc.png') plt.close() dr.save_data(loss_set, file_address + '/loss.csv') plt.plot(range(len(loss_set)), loss_set) plt.savefig(file_address + '/loss.png') plt.close()
def batch_distance_to_distribution(path, opath): file_ls = os.listdir(path) def sort_key(e): epoch_str = e.split('.') return int(epoch_str[0]) file_ls.sort(key=sort_key) for file in file_ls: array = np.array(dr.read_csv(path + '/' + file), dtype=float)[:, 0] dis, normal_dis, fanwei = distribution_test(array) dr.save_data(dis, opath + '/distribution/' + file) dr.save_data(normal_dis, opath + '/normal_distribution/' + file) dr.save_data(fanwei, opath + '/fanwei/' + file)
def single_training(file_address): x_train = cm.x_train y_train = cm.y_train if not os.path.exists(file_address): # 判断是否存在文件夹如果不存在则创建为文件夹 os.makedirs(file_address) # makedirs 创建文件时如果路径不存在会创建这个路径 os.makedirs('record/' + file_address) print "--- new folder... ---" print "--- OK ---" else: print "--- There is this folder! ---" cm.model.load_weights('standard_init.h5') training_order = np.array(dr.read_csv('standard_order.csv'), dtype='int32')[:, 0] loss = [] acc = [] for epoch in range(cm.epochs): for b in range(x_train.shape[0] // cm.batch_size): idx = training_order[b * cm.batch_size:(b + 1) * cm.batch_size] x = x_train[idx] y = y_train[idx] l = cm.model.train_on_batch(x, y) name = file_address + '/' + str(epoch) + 'E' + str(b) + 'b.h5' cm.model.save(name) print(name + ' ' + str(l)) loss.append(l[0]) acc.append(l[1]) if (l[1] > 0.99): dr.save_data(acc, 'radius_test/acc_map.csv') dr.save_data(loss, 'radius_test/loss_map.csv') return loss, acc dr.save_data(acc, 'radius_test/acc_map.csv') dr.save_data(loss, 'radius_test/loss_map.csv') return loss, acc
def mds_analysis(model, path_set, opath_set, dim=2): weight_set = [] length_set = [] gredient_set = [] vpre = [] for path in path_set: file_ls = os.listdir(path) length_set.append(len(file_ls)) def sort_key_multi_label(e): epoch_str = e.split('E') batch_str = epoch_str[1].split('b') return int(epoch_str[0]) * 1000 + int(batch_str[0]) file_ls.sort(key=sort_key_multi_label) for file in file_ls: print(path + '/' + file) model.load_weights(path + '/' + file) vcurrent = resize_model(model.get_weights()) weight_set.append(vcurrent) vcurrent = np.array(vcurrent, dtype=float) if len(vpre) == 0: vpre = vcurrent else: gredient_set.append(vcurrent - vpre) vpre = vcurrent weight_set = np.array(weight_set, dtype=float) embedding = MDS(n_components=dim) X_transformed = embedding.fit_transform(weight_set) gredient_set = np.array(gredient_set, dtype=float) G_transformed = embedding.fit_transform(gredient_set) pre_len = 0 for i in range(len(opath_set)): dr.save_data(X_transformed[pre_len:pre_len + length_set[i]], opath_set[i] + '.csv') if pre_len == 0: dr.save_data(G_transformed[pre_len:pre_len + length_set[i] - 1], opath_set[i] + '_gredient.csv') else: dr.save_data( G_transformed[pre_len - 1:pre_len + length_set[i] - 1], opath_set[i] + '_gredient.csv') pre_len += length_set[i]
def test_learning_rate(init_path, output_path): x_train, x_test, y_train, y_test = create_data_set(list(range(10)), 4000) dis_set = [] loss_set = [] ac_set = [] loop = 100 bs = 100 epochs = 10 for i in range(loop): model = create_network(10 * i) model.load_weights(init_path + '/init.h5') # model.save_weights(init_path+'/init.h5') model.fit(x_train, y_train, batch_size=bs, epochs=epochs, verbose=1, validation_data=(x_train, y_train)) score = model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) model.save_weights(output_path + '/end.h5') dis = distance(init_path + '/init.h5', output_path + '/end.h5', model, KL_div) loss_set.append(score[0]) ac_set.append(score[1]) dis_set.append(dis) dr.save_data(loss_set, output_path + '/loss.csv') dr.save_data(ac_set, output_path + '/ac.csv') dr.save_data(dis_set, output_path + '/dis.csv') plt.plot(range(loop), loss_set, label='loss') plt.plot(range(loop), ac_set, label='accuracy') plt.legend() plt.savefig(output_path + '/performance_evaluation.png') plt.close() plt.plot(range(loop), dis_set, label='dis') plt.legend() plt.savefig(output_path + '/dis.png') plt.show()
def add_noise_batch(path, loop=1, samples=10, upper=1000): gap = upper / samples norms = np.array(range(samples)) * gap dr.save_data(norms, 'radius_test/keep/norms.csv') acc_records = [] loss_records = [] for norm in norms: acc_record = [] loss_record = [] for l in range(loop): add_edis_noise(path, norm) loss, acc = cm.model.evaluate(x_test, y_test, verbose=0) acc_record.append(acc) loss_record.append(loss) print(np.mean(acc_record), np.std(acc_record)) acc_records.append(acc_record) loss_records.append(loss_record) dr.save_data(acc_records, 'radius_test/keep/acc_map.csv') dr.save_data(loss_records, 'radius_test/keep/loss_map.csv') print('end')
def analysis_mds(path_array, opath=''): stds = [] avgs = [] cvs = [] file_ls = os.listdir(path_array) def sort_key(e): epoch_str = e.split('.') return int(epoch_str[0]) file_ls.sort(key=sort_key) for file in file_ls: mat = np.array(dr.read_csv(path_array + '/' + file), dtype=float) mn = np.linalg.norm(mat, axis=1) std = np.std(mn) avg = np.mean(mn) cv = std / avg avgs.append(avg) stds.append(std) cvs.append(cv) print('ok') if len('opath') != 0: dr.save_data(stds, opath + '/stds.csv') dr.save_data(avgs, opath + '/avgs.csv') dr.save_data(cvs, opath + '/cvs.csv') plt.plot(range(len(stds)), stds) plt.savefig(opath + '/stds.png') plt.close() plt.plot(range(len(avgs)), avgs) plt.savefig(opath + '/avgs.png') plt.close() plt.plot(range(len(cvs)), cvs) plt.savefig(opath + '/cvs.png') plt.close()
def test_layer_design(): x_train, x_test, y_train, y_test = create_data_set(list(range(10)), 20) RI = de.reletive_information(x_train) dis_set = [] effectiveness_set = [] loss_set = [] ac_set = [] loop = 50 bs = 128 for i in range(loop): x_train, x_test, y_train, y_test = create_data_set(list(range(10)), 1000) model = Sequential() model.add(Conv2D(1, kernel_size=(3, 3), activation='relu', input_shape=input_shape)) # model.add(Conv2D(10 + i * 5, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) # # #extra layer # extra layer model.add(Flatten()) # model.add(Dense(128, activation='relu')) model.add(Dense(128, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(), metrics=['accuracy']) model.save_weights('Final_experiment/batch_size/init.h5') model.fit(x_train, y_train, batch_size=bs, epochs=epochs, verbose=1, validation_data=(x_test, y_test)) score = model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) bs = bs-2 model.save_weights('Final_experiment/batch_size/end.h5') dis,effectiveness = ef.effectiveness('Final_experiment/batch_size/init.h5', 'Final_experiment/batch_size/end.h5', model, kl.KL_div, RI) loss_set.append(score[0]) ac_set.append(score[1]) dis_set.append(dis) effectiveness_set.append(effectiveness) dr.save_data(loss_set,'Final_experiment/batch_size/loss.csv') dr.save_data(ac_set, 'Final_experiment/batch_size/ac.csv') dr.save_data(dis_set, 'Final_experiment/batch_size/dis.csv') dr.save_data(effectiveness_set, 'Final_experiment/batch_size/ef.csv') plt.plot(range(loop),loss_set,label = 'loss') plt.plot(range(loop), ac_set,label = 'accuracy') plt.legend() plt.savefig('Final_experiment/batch_size/performance_evaluation.png') plt.close() plt.plot(range(loop), dis_set,label = 'dis') plt.legend() plt.savefig('Final_experiment/batch_size/dis.png') plt.show()
def ratio_simulation(universe, o_name, gap, epoch): mean_output = np.ones(epoch) mean_support = np.ones(epoch) std_output = np.ones(epoch) std_support = np.ones(epoch) rest_index = list(range(len(universe))) samples = [] distribute_opath = o_name + '/distribute' data_opath = o_name + '/data' if not os.path.exists(o_name): # 判断是否存在文件夹如果不存在则创建为文件夹 os.makedirs(o_name) # makedirs 创建文件时如果路径不存在会创建这个路径 print "--- new folder... ---" else: print "--- There is this folder! ---" if not os.path.exists(distribute_opath): # 判断是否存在文件夹如果不存在则创建为文件夹 os.makedirs(distribute_opath) print "--- new folder... ---" else: print "--- There is this folder! ---" if not os.path.exists(data_opath): # 判断是否存在文件夹如果不存在则创建为文件夹 os.makedirs(data_opath) print "--- new folder... ---" else: print "--- There is this folder! ---" for i in range(epoch): rest_index, removed_index = remove_samples(rest_index, gap) if len(rest_index) == 0: break select_bool = index_to_bool(removed_index, len(universe)) rest_bool = index_to_bool(rest_index, len(universe)) support_set = universe[rest_bool] samples_new = universe[select_bool] samples.extend(samples_new) # print(len(samples)) shortest_dis = shortest_distance(samples, support_set) dr.save_data(shortest_dis, data_opath + '/' + str(i) + '.csv') plt.hist(shortest_dis, bins=100, normed=True, alpha=0.5, label='Distance') plt.savefig(distribute_opath + '/' + str(i) + '.png') plt.close() mean_support[i] = np.mean(shortest_dis) std_support[i] = np.std(shortest_dis) all_dis = np.r_[shortest_dis, np.zeros(len(samples))] mean_output[i] = np.mean(all_dis) std_output[i] = np.std(all_dis) print(i) dr.save_data(mean_output, o_name + '/mean.csv') dr.save_data(std_output, o_name + '/std.csv') dr.save_data(mean_support, o_name + '/mean_support.csv') dr.save_data(std_support, o_name + '/std_support.csv') plt.plot(range(len(mean_output)), mean_output) plt.savefig(o_name + '/mean.png') plt.close() plt.plot(range(len(std_output)), std_output) plt.savefig(o_name + '/std.png') plt.close() plt.plot(range(len(mean_support)), mean_support) plt.savefig(o_name + '/mean_support.png') plt.close() plt.plot(range(len(std_support)), std_support) plt.savefig(o_name + '/std_support.png') plt.close()
def figure3D_test_datacenter(datalist,accuracy,datapath): sample_rate = pow(0.01,accuracy) print(sample_rate) test_center = [] test_center_w = figure_grid(len(datalist),1,sample_rate) # average_w = np.ones(len(datalist))*(1/len(datalist)) #add average weight # test_center_w.append(average_w.tolist()) # print(test_center_w) print(len(test_center_w)) index = 0 for weights in test_center_w: output = np.zeros(datalist[0].shape) for i in range(len(datalist)): output += datalist[i]*weights[i] test_center.append(output) RI = [] for center in test_center: RI.append(reletive_information_centerize(datalist,center)) print(index) index += 1 # compare print('centerget') # for i in range(len(test_center)): # for j in range(0, len(test_center)-i-1): # if RI[j]>RI[j+1]: # temp = RI[j] # RI[j] = RI[j+1] # RI[j+1] = temp # # temp_weights = test_center_w[j] # test_center_w[j] = test_center_w[j+1] # test_center_w[j+1] = temp_weights # for i in range(5): # print(RI[i]) # print(test_center_w[i]) RI_min = RI[0] RI_max = RI[-1] dr.save_data(RI,datapath+'/RI.csv') dr.save_data(test_center_w, datapath+'/test_center_w.csv') print(RI[0]) print(test_center_w[0]) zz = np.array(RI, dtype='float') dim = list(range(len(datalist))) dim_set = permutations(dim,2) for dim_select in dim_set: key = [] for w in test_center_w: w_d1 = optimzie_key(w[dim_select[0]],accuracy+1) w_d2 = optimzie_key(w[dim_select[1]],accuracy+1) mark1 = 'x' + str(w_d1) mark2 = 'y' + str(w_d2) key.append( mark1+ mark2) dic = dict(zip(key, zz)) print(len(dic)) xx = np.arange(0, 1 + sample_rate, sample_rate) yy = np.arange(0, 1 + sample_rate, sample_rate) X, Y = np.meshgrid(xx, yy) Z = np.zeros(X.shape) for i in range(len(xx)): for j in range(len(xx)): w_d1 = optimzie_key(X[i, j], accuracy + 1) w_d2 = optimzie_key(Y[i, j], accuracy + 1) mark1 = 'x' + str(w_d1) mark2 = 'y' + str(w_d2) print(mark1 + mark2) Z[i, j] = dic[mark1 + mark2] fig = plt.figure() # 定义新的三维坐标轴 ax3 = plt.axes(projection='3d') ax3.plot_surface(X, Y, Z, cmap='rainbow') ax3.set_xlabel('sample '+ str(dim_select[0])+' weight', fontsize=10, rotation=150) ax3.set_ylabel('sample '+ str(dim_select[1])+' weights') ax3.set_zlabel('RI') plt.savefig(datapath + str(dim_select[0]) + '_' + str(dim_select[1]) + '.png') plt.close() # ax3.contour(X, Y, Z, offset=-2, colors='black') # 生成等高线 offset参数是等高线所处的位置 ratio = Z.max() - Z.min() rag = np.arange(0,10,1)*ratio/10+Z.min() C = plt.contour(X, Y, Z,rag,cmap = 'rainbow') plt.clabel(C, inline=True, fontsize=10) # 在等高线上标出对应的z值 # ax3.set_zlim(-1, 1) # 设置z的范围 plt.savefig(datapath+str(dim_select[0])+'_'+str(dim_select[1])+'contour.png') plt.close()
def frame_test_datacenter(datalist,accuracy,datapath): sample_rate = pow(0.01,accuracy) test_center = [] test_center_w = test_datacenter_weights(len(datalist),1,sample_rate) # average_w = np.ones(len(datalist))*(1/len(datalist)) #add average weight # test_center_w.append(average_w.tolist()) # print(test_center_w) for weights in test_center_w: output = np.zeros(datalist[0].shape) for i in range(len(datalist)): output += datalist[i]*weights[i] test_center.append(output) RI = [] for center in test_center: RI.append(reletive_information_centerize(datalist,center)) # compare for i in range(len(test_center)): for j in range(0, len(test_center)-i-1): if RI[j]>RI[j+1]: temp = RI[j] RI[j] = RI[j+1] RI[j+1] = temp temp_weights = test_center_w[j] test_center_w[j] = test_center_w[j+1] test_center_w[j+1] = temp_weights # for i in range(5): # print(RI[i]) # print(test_center_w[i]) dr.save_data(RI,datapath+'/RI.csv') dr.save_data(test_center_w, datapath+'/test_center_w.csv') print(RI[0]) print(test_center_w[0]) zz = np.array(RI, dtype='float') dim = list(range(len(datalist))) dim_set = permutations(dim,2) for dim_select in dim_set: key = [] for w in test_center_w: w_d1 = optimzie_key(w[dim_select[0]],accuracy+1) w_d2 = optimzie_key(w[dim_select[1]],accuracy+1) mark1 = 'x' + str(w_d1) mark2 = 'y' + str(w_d2) key.append( mark1+ mark2) dic = dict(zip(key, zz)) print(len(dic)) xx = np.arange(0, 1 + sample_rate, sample_rate) yy = np.arange(0, 1 + sample_rate, sample_rate) X, Y = np.meshgrid(xx, yy) Z = np.zeros(X.shape) for i in range(len(xx)): for j in range(len(xx)): if X[i, j] + Y[i, j] > 1: Z[i, j] = 0 else: w_d1 = optimzie_key(X[i, j],accuracy+1) w_d2 = optimzie_key(Y[i, j], accuracy + 1) mark1 = 'x' + str(w_d1) mark2 = 'y' + str(w_d2) print(mark1 + mark2) Z[i, j] = dic[mark1+mark2] fig = plt.figure() # 定义新的三维坐标轴 ax3 = plt.axes(projection='3d') ax3.plot_surface(X, Y, Z, cmap='rainbow') ax3.set_xlabel('sample '+ str(dim_select[0])+' weight', fontsize=10, rotation=150) ax3.set_ylabel('sample '+ str(dim_select[1])+' weights') ax3.set_zlabel('RI') plt.savefig(datapath+str(dim_select[0])+'_'+str(dim_select[1])+'.png') plt.close()
def compare_two_model_batch(begin_path, end_path, out_path, measure): output = [] for i in range(len(begin_path)): output.append(compare_two_model(begin_path[i], end_path[i], measure)) dr.save_data(output, out_path)