def Get_vector_of_difference_between_clusters(data_path, result_path, step, n_features, Embedding): #data_path=FLAGS.data_path #result_path=FLAGS.result_dir #step=FLAGS.step #n_features = FLAGS.n_features_embedding data1 = GetData(data_path + str(2 * step) + "/") print(data_path + str(2 * step) + "/") cluster1_mean = np.zeros(n_features) for d in data1: cluster1_mean += Embedding(d) cluster1_mean /= len(data1) data2 = GetData(data_path + str(2 * step + 1) + "/") cluster2_mean = np.zeros(n_features) for d in data2: cluster2_mean += Embedding(d) cluster2_mean /= len(data2) if not os.path.exists(result_path): os.makedirs(result_path) np.savetxt(result_path + str(2 * step) + "_" + str(2 * step + 1) + ".txt", cluster2_mean - cluster1_mean) np.savetxt(result_path + str(2 * step) + ".txt", cluster1_mean) np.savetxt(result_path + str(2 * step + 1) + ".txt", cluster2_mean)
def test_invest_business(self): r = random.random() phone_num = '13812345698' img_code = '8888' phone_code = '666666' pwd = 'admin123' real_name = '张三' card_id = '330102199003073332' dy_server = 'on' try: # 获取图片验证码 self.reg_login.get_img_code(r) # 获取手机验证码 self.reg_login.get_note_code(phone_num, img_code) # 注册 reg = self.reg_login.register(phone_num, pwd, img_code, phone_code, dy_server) print(reg.json()) # 登录 self.reg_login.login(phone_num, pwd) # 实名认证 self.account.approve_real_name(real_name, card_id) # 开户 r1 = self.recharge.set_account() # 获取第三方 res1 = GetData.get_html_data(r1) third1 = self.session.post(url=res1[0], data=res1[1]) logging.info('第三方响应文本:{}'.format(third1.text)) self.assertIn('OK', third1.text) # 获取充值验证码 self.recharge.get_charge_verify_code(r) # 充值 r2 = self.recharge.recharge('chinapnrTrust', '2000', 'reForm', '8888') # 第三方 res2 = GetData.get_html_data(r2) third2 = self.session.post(url=res2[0], data=res2[1]) logging.info('第三方响应文本:{}'.format(third2.text)) self.assertIn('OK', third2.text) # 投资产品详情成功 self.invest.get_invest_detail(842) # 投资 r3 = self.invest.invest(842, 1000) # 第三方 res3 = GetData.get_html_data(r3) third3 = self.session.post(url=res3[0], data=res3[1]) logging.info('第三方响应文本:{}'.format(third3.text)) self.assertIn('OK', third3.text) except Exception as e: logging.error(e) raise
def main(): EXPERIMENT.set_code(CodeLogger(CODE_FILES)) EXPERIMENT.add_tag("FC") dataset_x, dataset_y = GetData(is_train=True) train_data = GetDataloader(dataset_x, dataset_y, BATCH_SIZE) dataset_x, dataset_y = GetData(is_train=False) test_data = GetDataloader(dataset_x, dataset_y, BATCH_SIZE) model = MyNet() optimizer = optim.Adam(model.parameters(), lr=1e-3) scheduler = optim.lr_scheduler.StepLR(optimizer, 0.5) model_saver = ModelSaver(os.path.join("Models", "FC")) for epoch in range(EPOCHS): loss, accuracy, f1score, precision, recall = train( train_data, model, optimizer, EXPERIMENT) print("Training loss @ epoch", epoch, "=", loss) print("Training accuracy @ epoch", epoch, "=", accuracy) print("Training F1 score @ epoch", epoch, "=", f1score) print("Training precision @ epoch", epoch, "=", precision) print("Training recall @ epoch", epoch, "=", recall) with EXPERIMENT.train(): EXPERIMENT.log_metric("Batch loss", loss, step=epoch) EXPERIMENT.log_metric("Batch accuracy", accuracy, step=epoch) EXPERIMENT.log_metric("Batch F1 score", f1score, step=epoch) EXPERIMENT.log_metric("Batch precision", precision, step=epoch) EXPERIMENT.log_metric("Batch recall", recall, step=epoch) loss, accuracy, f1score, precision, recall = test( test_data, model, EXPERIMENT) print("Validation loss @ epoch", epoch, "=", loss) print("Validation accuracy @ epoch", epoch, "=", accuracy) print("Validation F1 score @ epoch", epoch, "=", f1score) print("Validation precision @ epoch", epoch, "=", precision) print("Validation recall @ epoch", epoch, "=", recall) with EXPERIMENT.test(): EXPERIMENT.log_metric("Batch loss", loss, step=epoch) EXPERIMENT.log_metric("Batch accuracy", accuracy, step=epoch) EXPERIMENT.log_metric("Batch F1 score", f1score, step=epoch) EXPERIMENT.log_metric("Batch precision", precision, step=epoch) EXPERIMENT.log_metric("Batch recall", recall, step=epoch) print("###############################################\n") model_saver.Save("epoch" + str(epoch) + ".pt", model, optimizer, scheduler) if (epoch % 10 == 0): scheduler.step()
def BuildPairsFromRealImages(data_path1, data_path2, result, result_dir1, result_dir2, Embedding): if not (os.path.exists(result)): os.makedirs(result) os.makedirs(result_dir1) os.makedirs(result_dir2) n_clusters = 20 data1 = GetData(data_path1) random.shuffle(data1) data2 = GetData(data_path2) images_indexes1, clusters1 = ClusterData(data1, FLAGS.n_features_embedding, Embedding, n_clusters, result + "0_test/", FLAGS.step) images_indexes2, clusters2 = ClusterData(data2, FLAGS.n_features_embedding, Embedding, n_clusters, result + "1_test/", FLAGS.step) print("VARIETION") print(np.mean(clusters1.cluster_centers_, axis=0), np.mean(clusters2.cluster_centers_, axis=0)) print( np.linalg.norm(clusters1.cluster_centers_ - np.mean(clusters1.cluster_centers_, axis=0)), np.linalg.norm(clusters2.cluster_centers_ - np.mean(clusters2.cluster_centers_, axis=0))) index = 0 for cl in range(n_clusters): data1_ = [data1[i] for i in images_indexes1[cl]] im = GetTheClosestPoint(clusters1.cluster_centers_[cl], data1_, Embedding) for cl1 in range(n_clusters): data2_ = [data2[i] for i in images_indexes2[cl1]] im1 = GetTheClosestPoint(clusters1.cluster_centers_[cl], data2_, Embedding) subprocess.call(["cp", im, result_dir1 + str(index) + ".jpg"]) subprocess.call(["cp", im1, result_dir2 + str(index) + ".jpg"]) index += 1 for cl in range(n_clusters): data2_ = [data2[i] for i in images_indexes2[cl]] im1 = GetTheClosestPoint(clusters2.cluster_centers_[cl], data2_, Embedding) for cl1 in range(n_clusters): data1_ = [data1[i] for i in images_indexes1[cl1]] im = GetTheClosestPoint(clusters2.cluster_centers_[cl], data1_, Embedding) subprocess.call(["cp", im, result_dir1 + str(index) + ".jpg"]) subprocess.call(["cp", im1, result_dir2 + str(index) + ".jpg"]) index += 1
def AutoEncoderLatent(data_path, feature_dimension_embedding, Embedding): data = GetData(data_path) random.shuffle(data) latent = np.zeros([len(data), feature_dimension_embedding]) for d in range(len(data)): latent[d] = Embedding(data[d]) return data, latent
def test01_set_account(self): r = self.recharge.set_account() print(r.text) common_assert(self, r, status_code=200, status=200, desc='form') h = GetData.get_html_data(r) r2 = self.session.post(url=h[0], data=h[1]) common_assert(self, r2, status_code=200, status=None)
def GIST_latent(data_path, feature_dimension_embedding): data = GetData(data_path) random.shuffle(data) latent = np.zeros([len(data), feature_dimension_embedding]) for d in range(len(data)): latent[d] = Gist(data[d]) return data, latent
def CheckProbabilityLabel(meta_data, cluster, n_features): res = np.zeros(n_features) data = GetData(cluster) for d in data: res += meta_data[os.path.basename(d).split("_")[-1][:-4]] return res
def test02_invest(self): r = self.invest.invest(842, 1000) res = GetData.get_html_data(r) r2 = self.session.post(url=res[0], data=res[1]) logging.info('响应数据:{}'.format(r2.text)) try: self.assertIn('OK', r2.text) except Exception as e: logging.error(e) raise
def __init__(self, data_path, n_bins, n_pictures_per_bin, n_features, Embedding, data_path_truth, Embedding_truth, im): self.data_path = data_path self.Embedding = Embedding self.image_search = ImageSearch(data_path, n_bins, n_pictures_per_bin, n_features, Embedding) data_truth_ = GetData(data_path_truth) self.data_truth = {} for d in data_truth_: self.data_truth[os.path.basename(d).split("_")[-1]] = Embedding_truth(d) self.chosen_picture = im self.truth_value = self.data_truth[os.path.basename(self.chosen_picture).split("_")[-1]] self.image_search.show_images()
def test03_recharge(self): self.recharge.get_charge_verify_code(random.random()) r = self.recharge.recharge('chinapnrTrust', '500', 'reForm', '8888') logging.info('响应:{}'.format(r.json())) common_assert(self, r, desc='form') h = GetData.get_html_data(r) r2 = self.session.post(url=h[0], data=h[1]) # 第三方开户 try: common_assert(self, r2, status=None) except Exception as e: logging.error(e) raise
def GetPicturesWithLabels(meta_data, data_path, dictionary, result_dir): print(dictionary) os.makedirs(result_dir) data = GetData(data_path) for d in data: basename = os.path.basename(d).split("_") meta_features = meta_data[basename[-1][:-4]] for f in dictionary.keys(): if meta_features[dictionary[f][0]] > 0: subprocess.call([ 'cp', d, result_dir + basename[0] + "_" + str(dictionary[f][1]) + "_" + basename[-1] ])
def main(query="you know nothing", cmd="script", top_n=5): if len(sys.argv) > 2: query = sys.argv[1] cmd = sys.argv[2] top_n = int(sys.argv[3]) buffer = "" data = GetData() if cmd == "scene": # search for scene query = QueryPreprocess(query) eps = GetEps() vec, tfidf = GetModel(data, eps) sceneSet = GetEpisodeSet(data, eps, vec, tfidf, query, top_n) for i, (x, y, z) in enumerate(sceneSet): buffer += "{}. {}-<{}>:\r{}\r\r".format(i+1, x, y, z) elif cmd == "script": # search for script docs = GetScriptDataByLines() scriptSet = GetScriptSet(docs, data, query, top_n) for i, (x, y, z) in enumerate(scriptSet): buffer += "{}. {}-<{}>:\r{}\r\r".format(i+1, x, y, z) print(buffer)
def My_latent(data_path, svm_path, feature_dimension_embedding, result_dir): if not os.path.exists(result_dir): os.makedirs(result_dir) data = GetData(data_path) svm = [] for i in range(feature_dimension_embedding): svm.append(np.genfromtxt(svm_path + str(i + 1) + ".txt")) for i in range(feature_dimension_embedding): os.makedirs(result_dir + str(i) + "/") for d in range(len(data)): g = Gist(data[d]) latent = np.array( [np.dot(svm[i], g) for i in range(feature_dimension_embedding)]) basename = os.path.basename(data[d]).split("_")[-1] for i in range(feature_dimension_embedding): subprocess.call([ 'cp', data[d], result_dir + str(i) + "/" + str(latent[i] + 10) + "_" + "_".join(str(f) for f in latent) + "_" + basename ]) return data
print(direction_path) AutoEncoderRankingByDirection(data, latent, direction_path, n_features_embedding, result_dir) if FLAGS.check_interpretability: data_path = FLAGS.data_path n_bins = FLAGS.n_bins result_dir = FLAGS.result_dir step = FLAGS.step dir1 = result_dir + str(2 * step) + "/" dir2 = result_dir + str(2 * step + 1) + "/" if not os.path.exists(result_dir): os.makedirs(result_dir) for i in range(n_bins): os.makedirs(result_dir + str(step) + "_" + str(i)) bins = DevideFeatureIntoBins(data_path, n_bins, 0) data = GetData(data_path) for d in data: basename = os.path.basename(d) basename = basename.split("_") #print(bins, basename[0], d) bin = 0 while (bin < n_bins and (float(basename[0]) > bins[bin])): bin += 1 subprocess.call([ 'cp', d, result_dir + str(step) + "_" + str(bin) + "/" + "_".join(basename[1:]) ])
def main(): # os.makedirs(os.path.join("Models", "Word2Vec"), exist_ok=True) data, word_frequency = GetData(TRAIN_PATH, to_lower=True, remove_proper_nouns=True) # Not using unk embeddings yet # I am doing lowercase. Think about it! # Load Google's embeddings word2vec = KeyedVectors.load_word2vec_format(os.path.join( "Data", "GoogleNews-vectors-negative300.bin"), binary=True) # # Train word2vec # word2vec = BuildWord2Vec(os.path.join( # "Data", "GoogleNews-vectors-negative300.bin"), data) # Get word_index dictionary word_index = BuildWord2Index(word_frequency.keys(), word2vec.wv.vocab.keys()) train_loader, test_loader = GetDataLoader(data, word_index) # Set up the model model = MyNet(len(word_index), EMBEDDING_DIM) optimizer = th.optim.Adamax(model.GetTrainableParameters(), lr=LEARNING_RATE) scheduler = th.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.9) saver = ModelSaver(CWD) model.InitializeEmbeddings(word2vec, word_index, EMBEDDING_DIM) print("Train batches =", len(train_loader)) print("Test batches =", len(test_loader)) print( "####################$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$####################\n" ) with open("out.txt", "w") as file: for epoch in range(EPOCHS): print("Epoch", epoch) loss, real_prob, fake_prob = train(model, train_loader, optimizer) print("Train loss =", loss) print("Train real prob =", real_prob) print("Train fake prob =", fake_prob) file.write("Train loss = " + str(loss) + "\n") file.write("Train real prob = " + str(real_prob) + "\n") file.write("Train fake prob = " + str(fake_prob) + "\n") scheduler.step() if (epoch % TEST_EVERY == 0): loss, real_prob, fake_prob = test(model, test_loader) print("Test loss =", loss) print("Test real prob =", real_prob) print("Test fake prob =", fake_prob) file.write("Test loss = " + str(loss) + "\n") file.write("Test real prob = " + str(real_prob) + "\n") file.write("Test fake prob = " + str(fake_prob) + "\n") print( "####################$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$####################\n" ) if (epoch % 1 == 0): saver.SaveCheckpoint(model, word_index, optimizer, scheduler, str(epoch) + ".pt")
def json_data(file_name, case_name): return GetData.get_json_data(file_name, case_name)
import argparse from utils import GetData parser = argparse.ArgumentParser( description='Create spectrogram magnitude files in dataset.') parser.add_argument('--audio-dir', type=str, dest='audio_dir', help='Directory of MedleyDB Audio files', required=True) if __name__ == '__main__': d = GetData(audio_dir=audio_dir) d.convert_all_wav_files()
def get_traits(): get_data = GetData() data = get_data.getDataValues() trait_parser = ParseTraits(data) return trait_parser.getTraits()
import numpy as np import tensorflow as tf import argparse from utils import GetData import matplotlib.pyplot as plt NUM_FREQ_BINS = 1025 SAMPLES_PER_FRAME = 20 SAVE_ITERATIONS = 20 CONFIDENCE = 0.5 GPU_ON = True n_input_units = NUM_FREQ_BINS * SAMPLES_PER_FRAME n_hidden_units_h0 = NUM_FREQ_BINS * SAMPLES_PER_FRAME n_output_units = NUM_FREQ_BINS * SAMPLES_PER_FRAME fetch_data = GetData() batch_size = 10 n_channels = 1 dropout = 0.0 # No dropout test_pred_conf = 0.5 # threshold for prediction on test if len(sys.argv) == 2: fetch_data = GetData(sys.argv[1]) X = tf.placeholder(tf.float32, shape=(None, n_input_units), name='data_placeholder') y = tf.placeholder(tf.float32, shape=(None, n_output_units), name='labels_placeholder')
"""if __name__ == "__main__": root = tk.Tk() root.geometry('+%d+%d' % (100, 100)) ShowImages(root, "../CycleGAN_shoes/Toy/PCA_GIST/0/", 2, 5, 3, Embedding("3Features")).pack(fill="both", expand=True) #ShowImages(root, "../RankSVM_DATA/Shoes/AutoEncoder/result9/","./RankSVM_DATA/Shoes/Supervised/result/", "result/", # "../RankSVM_DATA/Shoes/AutoEncoder/", 3).pack(fill="both", expand=True) root.mainloop()""" if __name__ == "__main__": n_iterations = 7 n_images = 1000 data = GetData("../CycleGAN_shoes/Toy/shoes_boots_heels_white_black/") data_path0 = "../CycleGAN_shoes/Toy/My_interpretability/0/" data_path1 = "../CycleGAN_shoes/Toy/AutoEncoderByClusterDirection/0/" data_path2 = "../CycleGAN_shoes/Toy/PCA_AutoEncoder/0/" data_path3 = "../CycleGAN_shoes/Toy/PCA_GIST/0/" bar = progressbar.ProgressBar(maxval=n_images, \ widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()]) data_paths = [data_path0, data_path1, data_path2, data_path3] simuliators = [] for method in range(len(data_paths)): simuliators.append(UserSimuliator (data_paths[method], 2, 5, 3, Embedding("3Features"), "../CycleGAN_shoes/Toy/shoes_boots_heels_white_black/", Embedding("LabelToyShoes"), random.choice(data)))
'600808', '600832', '600837', '600875', '600900', '601006', '601009', '601088', '601111', '601166', '601168', '601169', '601186', '601318', '601328', '601333', '601390', '601398', '601600', '601601', '601628', '601666', '601699', '601727', '601766', '601808', '601857', '601866', '601872', '601898', '601899', '601919', '601939', '601958', '601988', '601991', '601998' ] x_train_list = [] y_train_list = [] x_test_list = [] y_test_list = [] i = 0 for stock in ID: i += 1 print(str(i) + '\t' + stock) x_temp, y_temp = GetData.get_data(stock) x_train_list += x_temp[0:int(len(x_temp) * 0.7)] y_train_list += y_temp[0:int(len(y_temp) * 0.7)] x_test_list += x_temp[int(len(x_temp) * 0.7):] y_test_list += y_temp[int(len(y_temp) * 0.7):] print('Training...') print(np.array(x_train_list).shape) x_train = np.array(x_train_list)[:, :, 0:12] y_train = np.array(y_train_list) x_test = np.array(x_test_list)[:, :, 0:12] y_test = np.array(y_test_list) print(x_train.shape) print(x_test.shape)
if FLAGS.take_pictures_from_first_bin: print("AAAAAAA") data_path = FLAGS.data_path result_dir = FLAGS.result_dir step = FLAGS.step n_bins = FLAGS.n_bins bins = DevideFeatureIntoBins(data_path, n_bins, 0) data = [] for root, subdirs, files in os.walk(data_path): for f in files: if os.path.splitext(f)[1].lower() in ('.jpg', '.jpeg'): data += [os.path.join(root, f)] print(len(data)) data1_len = len(GetData(data_path + "0_/")) data2_len = len(GetData(data_path + "1_/")) os.makedirs(result_dir) if data1_len < data2_len: for d in data: basename = os.path.basename(d) basename = basename.split("_") if (float(basename[0]) < bins[0]): subprocess.call(['cp', d, result_dir + "_".join(basename[1:])]) else: for d in data: basename = os.path.basename(d) basename = basename.split("_") if (float(basename[0]) > bins[1]): subprocess.call(['cp', d, result_dir + "_".join(basename[1:])])
s_data = line.strip().split(":\t") acc.append([s_data[0], float(s_data[1])]) result = sorted(acc, key=lambda x: x[1], reverse=True) print(result) for i in range(1): ID.append(result[i][0]) i = 0 x_train_list = [] y_train_list = [] x_test_list = [] y_test_list = [] for stock in ID: i += 1 print(str(i) + '\t' + stock) x_temp, y_temp = GetData.get_data_rumor_tensor(stock) x_train_list += x_temp[0:int(len(x_temp) * 0.7)] y_train_list += y_temp[0:int(len(y_temp) * 0.7)] x_test_list += x_temp[int(len(x_temp) * 0.7):] y_test_list += y_temp[int(len(y_temp) * 0.7):] print('Training...') print(np.array(x_train_list).shape) x_train = np.array(x_train_list) y_train = np.array(y_train_list) x_test = np.array(x_test_list) y_test = np.array(y_test_list) s_train = np.array(x_train_list) s_test = np.array(x_test_list) # print('Training...')
# -*- coding: utf-8 -*- import numpy as np from Model import model_pre from utils import GetData print('Getting Data....') ID = GetData.get_stock_ID('../Data/stock.txt') ID.remove('600005') x_train_list = [] y_train_list = [] x_test_list = [] y_test_list = [] i = 0 for stock in ID: i += 1 print(str(i) + '\t' + stock) x_temp = GetData.get_data_quota(stock) x_train_list += x_temp[0:int(len(x_temp) * 0.7)] x_test_list += x_temp[int(len(x_temp) * 0.7):] for stock in ID: i += 1 print(str(i) + '\t' + stock) x_temp, y_temp = GetData.get_data(stock) y_train_list += y_temp[0:int(len(y_temp) * 0.7)]