def main(): transition_cmds = [ # './adb shell input tap 383 1203', # click on next (Challenge mode) './adb shell input tap 549 156', # tap outside to remove touch artifacts (example: developer options) ] # Thrice so that even if click on ads by mistake, we go back to the game back_cmds = ['./adb shell input keyevent 4' for _ in range(3)] pkl_filepath = './log.pkl' log = utils.load_if_pickled(pkl_filepath) new_in_log = 0 while True: if new_in_log >= 5: utils.save_to_pickle(log, pkl_filepath) new_in_log = 0 try: utils.run_cmds(transition_cmds) rundata = solve() log.append(rundata) new_in_log += 1 time.sleep(4) # Time in seconds. except (solver.BFSError, solver.HashError): utils.run_cmds(back_cmds)
def preprocess_data(train_files, config): x_train, y_train, x_dev, y_dev = [], [], [], [] for file in tqdm(train_files, desc="preprocessing_data"): cache_file_path = file.replace("data", "cache").replace( ".csv", "_fw{}_pw{}_pad{}_ts{}_cache.pickle".format( config["feature_window_size"], config["prediction_window_size"], config["pad_size"], config["test_size"])) if os.path.exists(cache_file_path): print("Load cache from {}".format(cache_file_path)) x_window_train, y_window_train, x_window_dev, y_window_dev = utils.load_from_pickle( cache_file_path) else: x_train_single, y_train_single, x_dev_single, y_dev_single = read_data_from_file( file, config["test_size"]) x_window_train, y_window_train, x_window_dev, y_window_dev = \ prepare_data(x_train_single, y_train_single, x_dev_single, y_dev_single, config) print("Save cache to {}".format(cache_file_path)) utils.save_to_pickle( (x_window_train, y_window_train, x_window_dev, y_window_dev), cache_file_path) x_train.extend(x_window_train) y_train.extend(y_window_train) x_dev.extend(x_window_dev) y_dev.extend(y_window_dev) return x_train, y_train, x_dev, y_dev
def get_bot_response(): user_text = request.args.get('msg') custom_answer = find_custom_answer(user_text, threshold=COS_SIM_THRESHOLD) if custom_answer: reply_text = custom_answer else: helper_data = load_from_pickle(TMP_FILENAME_FOR_DIALOGUE_HELPER_DATA) restart_dialogue = helper_data["restart_dialogue"] chat_history_ids = helper_data["chat_history_ids"] if any(w == user_text.strip().lower() for w in restart_keywords): reply_text = "Ok, let's start from scratch, I am ready" restart_dialogue = True elif any(w == user_text.strip().lower() for w in exit_keywords): reply_text = "Ok, bye! Just waiting if you type something..." restart_dialogue = True else: reply_text, chat_history_ids = dialog_gpt(user_text, chat_history_ids, restart_dialogue) restart_dialogue = False helper_data = { "restart_dialogue": restart_dialogue, "chat_history_ids": chat_history_ids } save_to_pickle(helper_data, TMP_FILENAME_FOR_DIALOGUE_HELPER_DATA) return reply_text
def set_hsv_std_values(hsv_object): """ Save given HSV object into '/pickle_files/hsv.pickle'. :param hsv_object: contains min and max values for hue, saturation and value. :return: """ utils.save_to_pickle(HSV_PICKLE_PATH, _object=hsv_object) logging.info("Data saved to '%s.pickle'" % HSV_PICKLE_PATH)
def save_to_pickle(self, pickle_file=None): if not pickle_file: if self.is_train_data: pickle_file = TRAIN_DATA_PICKLE else: pickle_file = EVAL_DATA_PICKLE path = os.path.join(self.save_path, pickle_file) save_to_pickle(self.images_data, path) return path
def run_srgan(device, image_size, batch_size, config, run_dir, saved_dir, run_name, num_epochs, val_dataset, train_dataset=None, checkpoints=None, mode='train', gpu_num=1): srgan_generator = SRGenerator(device).to(device) srgan_discriminator = SRDiscriminator().to(device) summary(srgan_discriminator, (3, image_size, image_size)) summary(srgan_generator, (3, image_size // 4, image_size // 4)) if checkpoints is not None: utils.load_from_checkpoint(srgan_generator, saved_dir, checkpoints["generator"]) utils.load_from_checkpoint(srgan_discriminator, saved_dir, checkpoints["discriminator"]) run_name = 'SRGAN' + '_' + run_name if mode == 'train': inception_FID_scores = train_gan(num_epochs, batch_size, None, device, train_dataset, val_dataset, srgan_generator, srgan_discriminator, type='SRGAN', config=config, run_dir=run_dir, saved_dir=saved_dir, run_name=run_name, calc_IS=False) elif mode == 'test': inception_FID_scores = [ calc_inception_FID_score(batch_size, device, val_dataset, srgan_generator, type='SRGAN') ] date_str = datetime.datetime.now().strftime("%m%d%Y%H") utils.save_to_pickle( inception_FID_scores, os.path.join(saved_dir, 'srgan_fid_' + date_str + ".pickle")) return inception_FID_scores
def model_selection(x_train, y_train, x_val, y_val, w0, epochs, eta, mini_batch, lambdas): ''' :param x_train: ciag treningowy wejsciowy NxD :param y_train: ciag treningowy wyjsciowy NxK :param x_val: ciag walidacyjny wejsciowy Nval x D :param y_val: ciag walidacyjny wyjsciowy Nval x K :param w0: wektor poczatkowych wartosci parametrow :param epochs: liczba epok dla SGD :param eta: kroki uczenia, które maja byc sprawdzone :param mini_batch: wielkosci mini batcha, ktore maja byc sprawdzone :param lambdas: lista wartosci parametru regularyzacji lambda, ktore maja byc sprawdzone :return: funkcja wykonuje selekcje modelu. Zwraca krotke (best_lambda, best_w, best_error, best_eta, best_epochs, best_mb), która przedstawia wartości parametrów dla najlepszego wybranego modelu. ''' best_lambda = 0 best_w = w0 best_error = 1 best_eta = 0 best_mb = 0 best_epochs = 0 for current_lamda in lambdas: for current_epochs in epochs: for current_eta in eta: for current_mb in mini_batch: def nowa(w, x, y): return regularized_logistic_cost_function( w, x, y, current_lamda) w = stochastic_gradient_descent(nowa, x_train, y_train, w0, current_epochs, current_eta, current_mb) y_pred = prediction(x_val, w) current_error = prediction_error(y_pred, y_val) print( "Lambda: {}, Eta: {}, Batch: {}, Epochs: {} → ERROR: {}" .format(current_lamda, current_eta, current_mb, current_epochs, current_error)) if (current_error < best_error): best_lambda = current_lamda best_w = w best_error = current_error best_eta = current_eta best_mb = current_mb best_epochs = current_epochs save_to_pickle('parameters-thebest.pkl', best_w) return best_lambda, best_w, best_error, best_eta, best_epochs, best_mb
def get_page_rank_eigen_vector(self, image_similarity_matrix, S, pickle_file_name=PICKLE_FILE_NAME): pickle_dir = get_pickle_directory() interim_file_path = os.path.join(pickle_dir, pickle_file_name) # if(os.path.exists(interim_file_path)): # interim = read_from_pickle(pickle_file_name) # else: interim = self.calculate_intermediate_page_rank_matrix( image_similarity_matrix) save_to_pickle(interim, pickle_file_name) pie = np.matmul(interim, S) return pie
def execute_task5(request): l = int(request.POST.get('number_of_layers')) k = int(request.POST.get('number_of_hashes_per_layer')) lsh = LSH(k=k, l=l) dbconnection = DatabaseConnection() if read_from_pickle('all_img_features_LSH.pickle') != None: all_image_hog_features = read_from_pickle('all_img_features_LSH.pickle') else: all_image_hog_features = dbconnection.get_object_feature_matrix_from_db(tablename='histogram_of_gradients') save_to_pickle(all_image_hog_features,'all_img_features_LSH.pickle') #SVD on hog features if(read_from_pickle('svd_hog_lsh.pickle')!=None): svd_obj = read_from_pickle('svd_hog_lsh.pickle') transformed_data = svd_obj['data_matrix'] vt = svd_obj['vt'] else: svd = SingularValueDecomposition() transformed_data,vt = svd.get_transformed_data_copy(all_image_hog_features['data_matrix'],400) save_to_pickle({"data_matrix":transformed_data,"images":all_image_hog_features['images'],"vt":vt},'svd_hog_lsh.pickle') # index_of_query_image = (all_image_hog_features['images']).index(query_image) # image_vector = transformed_data[index_of_query_image] bit_map = lsh.generate_representation_for_all_layers(transformed_data,all_image_hog_features['images']) save_to_pickle(lsh, 'lsh_model') return render(request, 'task5a_output.html')
def run_dcgan(device, image_size, noise_size, batch_size, config, run_dir, saved_dir, run_name, num_epochs, val_dataset, train_dataset=None, checkpoints=None, mode='train', gpu_num=1): #Run DCGAN type = 'DCGAN' dcgan_generator = Generator(noise_size=noise_size, image_size=image_size).to(device) dcgan_discriminator = Discriminator(image_size=image_size).to(device) #Parallel for improved performence if device.type == 'cuda' and gpu_num > 1: dcgan_generator = nn.DataParallel(dcgan_generator, list(range(gpu_num))) dcgan_discriminator = nn.DataParallel(dcgan_discriminator, list(range(gpu_num))) #Print networks print('Discriminator') summary(dcgan_discriminator, (3, image_size, image_size)) print('Generator') summary(dcgan_generator, (noise_size, 1, 1)) if checkpoints is not None: utils.load_from_checkpoint(dcgan_generator, saved_dir, checkpoints["generator"]) utils.load_from_checkpoint(dcgan_discriminator, saved_dir, checkpoints["discriminator"]) run_name = 'DCGAN' + '_' + run_name #We train the model in train phase and only calculate scores in test mode if mode == 'train': inception_FID_scores, inception_scores = train_gan(num_epochs, batch_size, noise_size, device, train_dataset, val_dataset, dcgan_generator, dcgan_discriminator, type='DCGAN', config=config, run_dir=run_dir, saved_dir=saved_dir, run_name=run_name) elif mode == 'test': inception_FID_scores = [ calc_inception_FID_score(batch_size, device, val_dataset, dcgan_generator, type, noise_size) ] inception_scores = [ calc_inception_score(device, noise_size, dcgan_generator, eval_size=len(val_dataset)) ] #Return list of all score accumulated in epochs date_str = datetime.datetime.now().strftime("%m%d%Y%H") save_to_pickle( inception_FID_scores, os.path.join(saved_dir, 'dcgan_fid_' + run_name + date_str + ".pickle")) save_to_pickle( inception_scores, os.path.join(saved_dir, 'dcgan_IS_' + run_name + date_str + ".pickle")) return inception_FID_scores, inception_scores
def save_loss_gradients(loss_gradients, n_samples, filename, savedir): save_to_pickle(data=loss_gradients, path=TESTS + savedir, filename=filename + "_samp=" + str(n_samples) + "_lossGrads.pkl")
def stage_i(A_val,y_batch_val,hparams,hid_i,init_obj,early_stop,bs,optim,recovered=False): model_def = globals()['model_def'] m_loss1_batch_dict = {} m_loss2_batch_dict = {} zp_loss_batch_dict = {} total_loss_dict = {} x_hat_batch_dict = {} model_selection = ModelSelect(hparams) hid_i=int(hid_i) # print('Matrix norm is {}'.format(np.linalg.norm(A_val))) # hparams.eps = hparams.eps * np.linalg.norm(A_val) # Get a session sess = tf.Session() # Set up palceholders A = tf.placeholder(tf.float32, shape=(hparams.n_input, hparams.num_measurements), name='A') y_batch = tf.placeholder(tf.float32, shape=(hparams.batch_size, hparams.num_measurements), name='y_batch') # Create the generator model_hparams = model_def.Hparams() model_hparams.n_z = hparams.n_z model_hparams.stdv = hparams.stdv model_hparams.mean = hparams.mean model_hparams.grid = copy.deepcopy(hparams.grid) model_selection.setup_dim(hid_i,model_hparams) if not hparams.model_types[0] == 'vae-flex-alt' and 'alt' in hparams.model_types[0]: model_def.ignore_grid = next((j for j in model_selection.dim_list if j >= hid_i), None) #set up the initialization print('The initialization is: {}'.format(init_obj.mode)) if init_obj.mode=='random': z_batch = model_def.get_z_var(model_hparams,hparams.batch_size,hid_i) elif init_obj.mode in ['previous-and-random','only-previous']: z_batch = model_def.get_z_var(model_hparams,hparams.batch_size,hid_i) init_op_par = tf.assign(z_batch, truncate_val(model_hparams,hparams,hid_i,init_obj,stdv=0)) else: z_batch = truncate_val(model_hparams,hparams,hid_i,init_obj,stdv=0.1) _, x_hat_batch, _ = model_def.generator_i(model_hparams, z_batch, 'gen', hparams.bol,hid_i,relative=False) x_hat_batch_dict[hid_i] = x_hat_batch # measure the estimate if hparams.measurement_type == 'project': y_hat_batch = tf.identity(x_hat_batch, name='y_hat_batch') else: y_hat_batch = tf.matmul(x_hat_batch, A, name='y_hat_batch') # define all losses m_loss1_batch = tf.reduce_mean(tf.abs(y_batch - y_hat_batch), 1) m_loss2_batch = tf.reduce_mean((y_batch - y_hat_batch)**2, 1) if hparams.stdv>0: norm_val = 1/(hparams.stdv**2) else: norm_val = 1e+20 zp_loss_batch = tf.reduce_sum((z_batch-tf.ones(tf.shape(z_batch))*hparams.mean)**2*norm_val, 1) #added normalization # define total loss total_loss_batch = hparams.mloss1_weight * m_loss1_batch \ + hparams.mloss2_weight * m_loss2_batch \ + hparams.zprior_weight * zp_loss_batch total_loss = tf.reduce_mean(total_loss_batch) total_loss_dict[hid_i] = total_loss # Compute means for logging m_loss1 = tf.reduce_mean(m_loss1_batch) m_loss2 = tf.reduce_mean(m_loss2_batch) zp_loss = tf.reduce_mean(zp_loss_batch) m_loss1_batch_dict[hid_i] = m_loss1 m_loss2_batch_dict[hid_i] = m_loss2 zp_loss_batch_dict[hid_i] = zp_loss # Set up gradient descent var_list = [z_batch] if recovered: global_step = tf.Variable(hparams.optim.global_step, trainable=False, name='global_step') else: global_step = tf.Variable(0, trainable=False, name='global_step') learning_rate = utils.get_learning_rate(global_step, hparams) opt = utils.get_optimizer(learning_rate, hparams) update_op = opt.minimize(total_loss, var_list=var_list, global_step=global_step, name='update_op') opt_reinit_op = utils.get_opt_reinit_op(opt, var_list, global_step) # Intialize and restore model parameters init_op = tf.global_variables_initializer() sess.run(init_op) #restore the setting if 'alt' in hparams.model_types[0]: factor = 1 else: factor = len(hparams.grid) model_def.batch_size = hparams.batch_size*factor #changes object (call by reference), necessary, since call of generator_i might change batch size. model_selection.restore(sess,hid_i) if recovered: best_keeper = hparams.optim.best_keeper else: best_keeper = utils.BestKeeper(hparams,logg_z=True) if hparams.measurement_type == 'project': feed_dict = {y_batch: y_batch_val} else: feed_dict = {A: A_val, y_batch: y_batch_val} flag = False for i in range(init_obj.num_random_restarts): if recovered and i <= hparams.optim.i: #Loosing optimizer's state, keras implementation maybe better if i < hparams.optim.i: continue else: sess.run(utils.get_opt_reinit_op(opt, [], global_step)) sess.run(tf.assign(z_batch,hparams.optim.z_batch)) else: sess.run(opt_reinit_op) if i<1 and init_obj.mode in ['previous-and-random','only-previous']: print('Using previous outcome as starting point') sess.run(init_op_par) for j in range(hparams.max_update_iter): if recovered and j < hparams.optim.j: continue _, lr_val, total_loss_val, \ m_loss1_val, \ m_loss2_val, \ zp_loss_val = sess.run([update_op, learning_rate, total_loss, m_loss1, m_loss2, zp_loss], feed_dict=feed_dict) if hparams.gif and ((j % hparams.gif_iter) == 0): images = sess.run(x_hat_batch, feed_dict=feed_dict) for im_num, image in enumerate(images): save_dir = '{0}/{1}/{2}/'.format(hparams.gif_dir, hid_i,im_num) utils.set_up_dir(save_dir) save_path = save_dir + '{0}.png'.format(j) image = image.reshape(hparams.image_shape) save_image(image, save_path) if j%100==0 and early_stop: x_hat_batch_val = sess.run(x_hat_batch, feed_dict=feed_dict) if check_tolerance(hparams,A_val,x_hat_batch_val,y_batch_val)[1]: flag = True print('Early stopping') break if j%25==0:#Now not every turn logging_format = 'hid {} rr {} iter {} lr {} total_loss {} m_loss1 {} m_loss2 {} zp_loss {}' print( logging_format.format(hid_i, i, j, lr_val, total_loss_val, m_loss1_val, m_loss2_val, zp_loss_val)) if j%100==0: x_hat_batch_val, total_loss_batch_val, z_batch_val = sess.run([x_hat_batch, total_loss_batch,z_batch], feed_dict=feed_dict) best_keeper.report(x_hat_batch_val, total_loss_batch_val,z_val=z_batch_val) optim.global_step = sess.run(global_step) optim.A = A_val optim.y_batch = y_batch_val optim.i=i optim.j=j optim.z_batch= z_batch_val optim.best_keeper=best_keeper optim.bs=bs optim.init_obj = init_obj utils.save_to_pickle(optim,utils.get_checkpoint_dir(hparams, hparams.model_types[0])+'tmp/optim.pkl') print('Checkpoint of optimization created') hparams.optim.j = 0 x_hat_batch_val, total_loss_batch_val, z_batch_val = sess.run([x_hat_batch, total_loss_batch,z_batch], feed_dict=feed_dict) best_keeper.report(x_hat_batch_val, total_loss_batch_val,z_val=z_batch_val) if flag: break tf.reset_default_graph() return best_keeper.get_best()
from keras.datasets import mnist import matplotlib.pyplot as plt import random import utils import config (x_train, y_train), (x_test, y_test) = mnist.load_data() train_data = [] test_data = [] for i in range(10): train_data.append(x_train[y_train==i]) test_data.append(x_test[y_test==i]) utils.save_to_pickle(train_data,config.TRAIN_DATA_PATH) utils.save_to_pickle(test_data, config.TEST_DATA_PATH) #ckeck data train_data = utils.load_pickle(config.TRAIN_DATA_PATH) plt.figure(figsize=(5, 10)) for i in range(10): data_single = train_data[i] show_sample_index = random.sample(range(len(data_single)), 5) for j in range(5): plt.subplot(10, 5, 5 * i + j + 1) plt.imshow(data_single[show_sample_index[j]]) title = '%s' % (i) plt.title(title) plt.show()
sess.run(train_step,feed_dict={x:state_batch, y:target_reward_batch, a:action_batch}) # 更新replay memory replay,next_state = utils.update_replay(game_state,replay,esplion,x,prediction,sess,curr_state) #更新curr_state curr_state = next_state #更新step step+=1 #esplion随着训练次数衰减,使采取随机action的概率越来越低 if esplion>config.FINIAL_ESPLION: esplion -= (config.ESPLION - config.FINIAL_ESPLION)/config.EXPLORE if step%1000==0: train_loss = sess.run(loss,feed_dict={x:state_batch, y:target_reward_batch, a:action_batch}) duration = time.time() - start_time logger.info("step %d: loss is %g, esplion is %g (%0.3f sec)" % (step, train_loss,esplion, duration)) start_time = time.time() if step%10000==0: saver.save(sess, config.CHECKFILE, global_step=step) utils.save_to_pickle([game_state,replay,curr_state,esplion],config.SAVEFILE) print('writing checkpoint at step %s' % step)
app = Flask(__name__) @app.route("/") def home(): return render_template("maintemplate.html") restart_dialogue = True chat_history_ids = None helper_data = { "restart_dialogue": restart_dialogue, "chat_history_ids": chat_history_ids } save_to_pickle(helper_data, TMP_FILENAME_FOR_DIALOGUE_HELPER_DATA) restart_keywords = load_keywords_from_csv(RESTART_KEYWORDS_PATH) exit_keywords = load_keywords_from_csv(EXIT_KEYWORDS_PATH) @app.route("/get") def get_bot_response(): user_text = request.args.get('msg') custom_answer = find_custom_answer(user_text, threshold=COS_SIM_THRESHOLD) if custom_answer: reply_text = custom_answer else: helper_data = load_from_pickle(TMP_FILENAME_FOR_DIALOGUE_HELPER_DATA) restart_dialogue = helper_data["restart_dialogue"] chat_history_ids = helper_data["chat_history_ids"]
def execute_task6(request): query_image = request.POST.get('query_image') most_similar_images = int(request.POST.get('most_similar_images')) query_image_folder_name = request.POST.get('query_image_folder_name') relevance_feedback = request.POST.get('relevance_feedback') lsh = read_from_pickle('lsh_model') db_connection = DatabaseConnection() image_vector = db_connection.get_feature_data_for_image( 'histogram_of_gradients', query_image) image_vector = np.asarray(image_vector.flatten()) if read_from_pickle('all_img_features_LSH.pickle') != None: all_image_hog_features = read_from_pickle( 'all_img_features_LSH.pickle') else: all_image_hog_features = db_connection.get_object_feature_matrix_from_db( tablename='histogram_of_gradients') save_to_pickle(all_image_hog_features, 'all_img_features_LSH.pickle') #SVD on hog features if (read_from_pickle('svd_hog_lsh.pickle') != None): svd_obj = read_from_pickle('svd_hog_lsh.pickle') transformed_data = svd_obj['data_matrix'] vt = svd_obj['vt'] else: svd = SingularValueDecomposition() transformed_data, vt = svd.get_transformed_data_copy( all_image_hog_features['data_matrix'], 400) save_to_pickle( { "data_matrix": transformed_data, "images": all_image_hog_features['images'], "vt": vt }, 'svd_hog_lsh.pickle') if (query_image_folder_name != ''): table_name = convert_folder_path_to_table_name( query_image_folder_name, 'histogram_of_gradients') image_vector = db_connection.get_feature_data_for_image( table_name, query_image) image_vector = np.dot(image_vector.astype(float), np.transpose(vt)) new_obj = {} new_obj['data_matrix'] = transformed_data new_obj['images'] = all_image_hog_features['images'] (sorted_k_values, result_stats) = lsh.find_ksimilar_images(k=most_similar_images, image_vector=image_vector, all_image_hog_features=new_obj) # Now getting a bigger test dataset for relevance feedback if relevance_feedback == "Probabilistic": (test_dataset, result_stats) = lsh.find_ksimilar_images( k=10 + most_similar_images, image_vector=image_vector, all_image_hog_features=new_obj) else: (test_dataset, result_stats) = lsh.find_ksimilar_images( k=200 + most_similar_images, image_vector=image_vector, all_image_hog_features=new_obj) save_to_pickle(test_dataset, 'test_dataset.pickle') print(sorted_k_values[:most_similar_images]) return render( request, 'visualize_images.html', { 'images': sorted_k_values[:most_similar_images], "from_task": "task5", 'rel_type': relevance_feedback, "q": query_image, "t": most_similar_images, "num_total": result_stats['total'], "num_unique": result_stats['unique'] })
def make_dicts(reset_all, make_val): # Create meta directory if doesn't already exist for all dictionaries generated below os.makedirs(meta_dir, exist_ok=True) train_data = dict([ (img, whale) for (_, img, whale) in read_csv(train_csv).to_records() ]) test_data = [img for (_, img, _) in read_csv(sample_csv).to_records()] # Load whale_to_imgs dictionary if exists, or create it otherwise if isfile(whale2imgs_file and not reset_all): whale2imgs = load_pickle_file(whale2imgs_file) else: whale2imgs = {} for img, whale in tqdm(train_data.items()): if whale not in whale2imgs: whale2imgs[whale] = [] if img not in whale2imgs[whale]: whale2imgs[whale].append(img) save_to_pickle(whale2imgs_file, whale2imgs) if not isfile(img2whale_file) or reset_all: # Find elements from training set other then 'new_whale' img2whale = {} for img, whale in tqdm(train_data.items()): if whale != 'new_whale': if img not in img2whale: img2whale[img] = whale train_known = sorted(list(img2whale.keys())) save_to_pickle(img2whale_file, img2whale) save_to_pickle(train_known_file, train_known) save_to_pickle(train_submit_file, test_data) if not (isfile(train_examples_file) and isfile(validation_examples_file) and reset_all == False): train_examples = [] validation_examples = [] lonely = [] new_whale = [] val_match = [] lonely_count = len([x for x in whale2imgs.values() if len(x) == 1]) # 2073 couple_count = len([x for x in whale2imgs.values() if len(x) == 2]) # 1285 new_count = len([x for x in train_data.values() if x == 'new_whale']) # 9664 # aditional matching whales count needed or creating balanced validation dataset (same matching and unmatching number of examples) extra_count = lonely_count - couple_count # 2073 - 1285 = 788 val_known = [] val_submit = [] matching_count = 0 small_train_examples = [] small_count = 0 if make_val: for whale, imgs in tqdm(whale2imgs.items()): if whale == 'new_whale': new_whale += imgs elif len(imgs) == 1: lonely += imgs val_known += imgs elif len(imgs) == 2: val_match.append((imgs[0], imgs[1], 1)) val_known.append(imgs[1]) val_submit.append((imgs[0], whale)) elif len(imgs) >= 4 and matching_count < extra_count: val_match.append((imgs[0], imgs[1], 1)) val_known.append(imgs[0]) val_submit.append((imgs[1], whale)) matching_count += 1 train_examples += imgs[2:] if (small_count + 2) % 10 < 2: small_train_examples += imgs[2:] small_count += 2 else: train_examples += imgs if (small_count + len(imgs)) % 10 < len(imgs): small_train_examples += imgs small_count += len(imgs) else: for whale, imgs in tqdm(whale2imgs.items()): if whale == 'new_whale': new_whale += imgs elif len(imgs) == 1: lonely += imgs val_known += imgs elif len(imgs) == 2: val_match.append((imgs[0], imgs[1], 1)) val_known.append(imgs[1]) val_submit.append((imgs[0], whale)) train_examples += imgs if (small_count + 2) % 10 < 2: small_train_examples += imgs small_count += 2 elif len(imgs) >= 4 and matching_count < extra_count: val_match.append((imgs[0], imgs[1], 1)) val_known.append(imgs[0]) val_submit.append((imgs[1], whale)) matching_count += 1 train_examples += imgs if (small_count + len(imgs)) % 10 < len(imgs): small_train_examples += imgs small_count += len(imgs) else: train_examples += imgs if (small_count + len(imgs)) % 10 < len(imgs): small_train_examples += imgs small_count += len(imgs) print('lonely whales count: ', lonely_count) print('new whales count: ', new_count) print('couple whales count: ', couple_count) print('extra whales count: ', extra_count) random.shuffle(lonely) val_unmatch = list( zip(lonely, np.random.choice(new_whale, size=lonely_count, replace=False), np.zeros(lonely_count, dtype=np.int8))) validation_examples = val_match + val_unmatch random.shuffle(validation_examples) random.shuffle(train_examples) # small_train_size = len(train_examples) // 10 # small_train_examples = train_examples[:small_train_size] small_validation_size = len(validation_examples) // 10 small_validation_examples = validation_examples[:small_validation_size] # print('TRAIN') # print(train_examples[:10]) # print('VALIDATION') # print(validation_examples[:10]) print('Train size: ', len(train_examples)) print('Validation size: ', len(validation_examples)) print('val_known size: ', len(val_known)) print('val_submit size: ', len(val_submit)) save_to_pickle(train_examples_file, train_examples) save_to_pickle(validation_examples_file, validation_examples) save_to_pickle(train_examples_small_file, small_train_examples) save_to_pickle(validation_examples_small_file, small_validation_examples) save_to_pickle(val_known_file, val_known) save_to_pickle(val_submit_file, val_submit)
def run_sagan(device, image_size, noise_size, batch_size, config, run_dir, saved_dir, run_name, num_epochs, val_dataset, train_dataset=None, checkpoints=None, mode='train', gpu_num=1): type = 'SAGAN' sagan_generator = SAGenerator(noise_size=noise_size, image_size=image_size).to(device) sagan_discriminator = SADiscriminator(image_size=image_size).to(device) # Parallel for improved performance if ((device.type == 'cuda') and (gpu_num > 1)): sagan_generator = nn.DataParallel(sagan_generator, list(range(gpu_num))) sagan_discriminator = nn.DataParallel(sagan_discriminator, list(range(gpu_num))) # Print networks print('Discriminator') summary(sagan_discriminator, (3, image_size, image_size)) print('Generator') summary(sagan_generator, (noise_size, 1, 1)) if checkpoints is not None: utils.load_from_checkpoint(sagan_generator, saved_dir, checkpoints["generator"]) utils.load_from_checkpoint(sagan_discriminator, saved_dir, checkpoints["discriminator"]) run_name = 'SAGAN' + '_' + run_name if mode == 'train': inception_FID_scores, inception_scores = train_gan(num_epochs, batch_size, noise_size, device, train_dataset, val_dataset, sagan_generator, sagan_discriminator, type='SAGAN', config=config, run_dir=run_dir, saved_dir=saved_dir, run_name=run_name) elif mode == 'test': inception_FID_scores = [ calc_inception_FID_score(batch_size, device, val_dataset, sagan_generator, type, noise_size) ] inception_scores = [ calc_inception_score(device, noise_size, sagan_generator, eval_size=len(val_dataset)) ] date_str = datetime.datetime.now().strftime("%m%d%Y%H") utils.save_to_pickle( inception_FID_scores, os.path.join(saved_dir, 'sagan_fid_' + run_name + date_str + ".pickle")) utils.save_to_pickle( inception_scores, os.path.join(saved_dir, 'sagan_IS_' + run_name + date_str + ".pickle")) return inception_FID_scores, inception_scores
def run(group, problem, alpha, file_prefix, total_time, simulation_time): regret_min = [] regret_avg = [] r = [] individual_regret = np.zeros([group.size, problem.iterations]) start_time = time.time() ##########RUN PERCEPTRON AVERAGE############### for counter in range(problem.iterations - 1): weights = group.weights x = utils.model(weights, problem) print('System recommendation using the average perceptron: ', x.object) regret = group.utility_star_x_star - x.get_utility( group.avg_weights_star) r.append(regret) # Calculate the regret for every individual user # for i in range(0, group.size): usr = group.users[i] reg = usr.get_regret(x.phi) individual_regret[i, counter] = reg if (regret == 0.0): final_time = (time.time() - start_time) + total_time file = utils.save_to_pickle(x.object, r, regret_min, regret_avg, regret_min, 'average', file_prefix, problem, group, simulation_time, final_time, counter, individual_regret) return x.object, simulation_time, file # phi_y_bar, s_t = average(group, phi_y_list, alpha) improvements = [] for u in group.users: sim_start_time = time.time() x_bar = u.step(x.phi, x.object, alpha) bar = [ np.array(x_bar.phi[0]), np.array(x_bar.phi[1]), np.array(x_bar.phi[2]), np.array(x_bar.phi[3]) ] improvements.append(bar) # y, improvements[group.get_index(u)] = u.step(phi_y, alpha) temp_simulation_time = time.time() - sim_start_time simulation_time += temp_simulation_time print('Improvement user ', group.get_index(u), ': ', x_bar.object) phi_x_bar = utils.avg_phi_list(improvements, group.problem) group.update_weights(phi_x_bar, x.phi) x = utils.model(weights, problem) regret = group.utility_star_x_star - x.get_utility(group.avg_weights_star) r.append(regret) # Calculate the regret for every individual user # for i in range(0, group.size): usr = group.users[i] reg = usr.get_regret(x.phi) individual_regret[i, counter + 1] = reg final_time = (time.time() - start_time) + total_time #y, regret_avg, regret_min, regret_su_avg, regret_su_min, strategy, file_prefix, problem, group, sim_time, runtime, counter file = utils.save_to_pickle(x.object, r, regret_min, regret_avg, regret_min, 'average', file_prefix, problem, group, simulation_time, final_time, problem.iterations, individual_regret) return x.object, simulation_time, file
def run(strategy, group, problem, alpha, file_prefix, total_time, simulation_time, avg=False): regret_min = [] regret_avg = [] regret_su_min = [] regret_su_avg = [] individual_regret = np.zeros([group.size, problem.iterations]) start_time = time.time() ##########RUN PERCEPTRON AVERAGE############### extra_time = 0 for counter in range(problem.iterations - 1): print('Iteration: (', counter, '/', problem.iterations, ')') ######USER SELECTION####### if (strategy == strategy.LEAST_MISERY): user, x_group = utils.get_least_misery_user(group, problem, avg) elif (strategy == strategy.RANDOM): user, x_group, extra_t = utils.get_random_user(group, problem, avg) extra_time += extra_t ############################ #Find object to present to this user x = utils.model(user.current_weights, problem) print('System recommendation using the ', strategy.value, ' strategy: ', x.object) #Calculate regret for the group recommendation #note: I use x_group because we want to know group wide regret, x is generated using individual weights rgrt_avg = utils.get_regret(x_group, group, aggregation_function='avg') regret_avg.append(rgrt_avg) rgrt_min = utils.get_regret(x_group, group, aggregation_function='min') regret_min.append(rgrt_min) #Calculate the regret for the picked user rgrt_avg = utils.get_regret(x, group, aggregation_function='avg') regret_su_avg.append(rgrt_avg) rgrt_min = utils.get_regret(x, group, aggregation_function='min') regret_su_min.append(rgrt_min) #Calculate the regret for every individual user # for i in range(0, group.size): usr = group.users[i] reg = usr.get_regret(x_group.phi) individual_regret[i, counter] = reg #Algorithm ends when the ideal object is found if (rgrt_min == 0.0 and not avg): final_time = ((time.time() - start_time) + total_time) - extra_time file = utils.save_to_pickle(x_group.object, regret_avg, regret_min, regret_su_avg, regret_su_min, strategy.value, file_prefix, problem, group, simulation_time, final_time, counter, individual_regret) return x_group.object, simulation_time, file if (rgrt_avg == 0.0 and avg): final_time = ((time.time() - start_time) + total_time) - extra_time file = utils.save_to_pickle(x_group.object, regret_avg, regret_min, regret_su_avg, regret_su_min, strategy.value, file_prefix, problem, group, simulation_time, final_time, counter, individual_regret) return x_group.object, simulation_time, file # start_time = time.time() start_sim_time = time.time() #Simulate a step from the chosen user x_bar = user.step(x.phi, x.object, alpha) temp_simulation_time = time.time() - start_sim_time print('Improvement user: '******': ', x_bar.object) simulation_time += temp_simulation_time #update weights for the chosen user user.update_weights(x_bar.phi, x.phi) x_final = utils.get_aggregation_object(group, problem, avg) regret_avg.append( utils.get_regret(x_final, group, aggregation_function='avg')) regret_min.append( utils.get_regret(x_final, group, aggregation_function='min')) regret_su_avg.append( utils.get_regret(x_final, group, aggregation_function='avg')) regret_su_min.append( utils.get_regret(x_final, group, aggregation_function='min')) for i in range(0, group.size): usr = group.users[i] reg = usr.get_regret(x_final.phi) individual_regret[i, counter + 1] = reg final_time = ((time.time() - start_time) + total_time) - extra_time file = utils.save_to_pickle(x_final, regret_avg, regret_min, regret_su_avg, regret_su_min, strategy.value, file_prefix, problem, group, simulation_time, final_time, problem.iterations, individual_regret) return x_final, simulation_time, file
def run_pubmed(): data_path = "pubmed_adr/data/ADE-Corpus-V2/DRUG-AE.rel" final_data, idx2word, idx2label, maxlen, vocsize, nclasses, tok_senc_adr, train_lex, test_lex, train_y, test_y, saved_data = \ data_processing(data_path) test_toks = [] test_tok_senc_adr = tok_senc_adr[TRAIN_NUMBER:] for i in test_tok_senc_adr: test_toks.append(i[0]) train_toks = [] train_tok_senc_adr = tok_senc_adr[:TRAIN_NUMBER] for i in train_tok_senc_adr: train_toks.append(i[0]) # Char embedding char_per_word = [] char_word = [] char_senc = [] maxlen_char_word = 0 a = [] # save max_len_char_word for s in (train_toks + test_toks): for w in s: for c in w.lower(): char_per_word.append(c) if len(char_per_word) > 37: a.append(char_per_word) char_per_word = char_per_word[:37] if len(char_per_word) > maxlen_char_word: maxlen_char_word = len(char_per_word) char_word.append(char_per_word) char_per_word = [] char_senc.append(char_word) char_word = [] charcounts = collections.Counter() for senc in char_senc: for word in senc: for charac in word: charcounts[charac] += 1 chars = [ charcount[0] for charcount in charcounts.most_common(MAX_CHAR_VOCAB_SIZE) if charcount[1] >= MIN_CHAR_VOCAB_COUNT ] char2idx = {c: i + 2 for i, c in enumerate(chars) } # same with word, we are keeping 'UNK' token char_word_lex = [] char_lex = [] char_word = [] for senc in char_senc: for word in senc: for charac in word: char_word_lex.append( [char2idx[charac] if charac in char2idx else 1]) char_word.append(char_word_lex) char_word_lex = [] char_lex.append(char_word) char_word = [] char_per_word = [] char_per_senc = [] char_senc = [] for s in char_lex: for w in s: for c in w: for e in c: char_per_word.append(e) char_per_senc.append(char_per_word) char_per_word = [] char_senc.append(char_per_senc) char_per_senc = [] pad_char_all = [] for senc in char_senc: while len(senc) < maxlen: senc.insert(0, []) pad_senc = pad_sequences(senc, maxlen=maxlen_char_word) pad_char_all.append(pad_senc) pad_senc = [] pad_char_all = np.array(pad_char_all) pad_train_lex = pad_char_all[:TRAIN_NUMBER] pad_test_lex = pad_char_all[TRAIN_NUMBER:] idx2char = dict((k, v) for v, k in char2idx.items()) idx2char[0] = 'PAD' idx2char[1] = 'UNK' charsize = max(idx2char.keys()) + 1 # 加载词向量 print('Loading word embeddings...') _ = glove2word2vec(glove_300d_path, glove_300d_tmp_path) w2v = KeyedVectors.load_word2vec_format(glove_300d_tmp_path, binary=False, unicode_errors='ignore') print('word embeddings loading done!') # Build the model print('Building the model...') model = build_model(maxlen, maxlen_char_word, idx2word, w2v, vocsize, charsize, embed_dim, char_embed_dim, nclasses) model.compile(optimizer='adam', loss='categorical_crossentropy') # Save model call back cp_callback = keras.callbacks.ModelCheckpoint(filepath=MODEL_OUTPUT_PATH, verbose=1, save_weights_only=True, period=2) print('Training...') history = model.fit([train_lex, pad_train_lex], train_y, batch_size=BATCH_SIZE, validation_split=0.1, epochs=NUM_EPOCHS, callbacks=[cp_callback]) # 预测结果 predir = 'pubmed_adr/model_output/predictions' fileprefix = 'embedding_level_attention_' scores = predict_score(model, [test_lex, pad_test_lex], test_toks, test_y, predir, idx2label, maxlen, fileprefix=fileprefix) saved_data["char2idx"] = char2idx saved_data["max_char_len"] = maxlen_char_word saved_data["vocsize"] = vocsize saved_data["charsize"] = charsize saved_data["embed_dim"] = embed_dim saved_data["char_embed_dim"] = char_embed_dim saved_data["num_classes"] = nclasses saved_data["idx2label"] = idx2label utils.save_to_pickle(test_lex, "test_lex.pickle") utils.save_to_pickle(pad_test_lex, "pad_test_lex.pickle") utils.save_to_pickle(test_toks, "test_toks.pickle") with open(META_PATH, "w") as f: json.dump(saved_data, f)
def _save(self): """ Save all robot objects, using pickle. """ ### XXX better just save the factory itself instead? # Send a list of objects to save robot_list = [self.robots[k] for k in self.robots.keys()] save_to_pickle(robot_list)