Esempio n. 1
0
def main():
    transition_cmds = [
        # './adb shell input tap 383 1203',  # click on next (Challenge mode)
        './adb shell input tap 549 156',  # tap outside to remove touch artifacts (example: developer options)
    ]

    # Thrice so that even if click on ads by mistake, we go back to the game
    back_cmds = ['./adb shell input keyevent 4' for _ in range(3)]

    pkl_filepath = './log.pkl'
    log = utils.load_if_pickled(pkl_filepath)

    new_in_log = 0
    while True:
        if new_in_log >= 5:
            utils.save_to_pickle(log, pkl_filepath)
            new_in_log = 0

        try:
            utils.run_cmds(transition_cmds)
            rundata = solve()
            log.append(rundata)
            new_in_log += 1
            time.sleep(4)  # Time in seconds.
        except (solver.BFSError, solver.HashError):
            utils.run_cmds(back_cmds)
Esempio n. 2
0
def preprocess_data(train_files, config):
    x_train, y_train, x_dev, y_dev = [], [], [], []
    for file in tqdm(train_files, desc="preprocessing_data"):
        cache_file_path = file.replace("data", "cache").replace(
            ".csv", "_fw{}_pw{}_pad{}_ts{}_cache.pickle".format(
                config["feature_window_size"],
                config["prediction_window_size"], config["pad_size"],
                config["test_size"]))
        if os.path.exists(cache_file_path):
            print("Load cache from {}".format(cache_file_path))
            x_window_train, y_window_train, x_window_dev, y_window_dev = utils.load_from_pickle(
                cache_file_path)
        else:
            x_train_single, y_train_single, x_dev_single, y_dev_single = read_data_from_file(
                file, config["test_size"])
            x_window_train, y_window_train, x_window_dev, y_window_dev = \
                prepare_data(x_train_single, y_train_single, x_dev_single, y_dev_single, config)
            print("Save cache to {}".format(cache_file_path))
            utils.save_to_pickle(
                (x_window_train, y_window_train, x_window_dev, y_window_dev),
                cache_file_path)
        x_train.extend(x_window_train)
        y_train.extend(y_window_train)
        x_dev.extend(x_window_dev)
        y_dev.extend(y_window_dev)
    return x_train, y_train, x_dev, y_dev
Esempio n. 3
0
def get_bot_response():
    user_text = request.args.get('msg')

    custom_answer = find_custom_answer(user_text, threshold=COS_SIM_THRESHOLD)
    if custom_answer:
        reply_text = custom_answer
    else:
        helper_data = load_from_pickle(TMP_FILENAME_FOR_DIALOGUE_HELPER_DATA)
        restart_dialogue = helper_data["restart_dialogue"]
        chat_history_ids = helper_data["chat_history_ids"]

        if any(w == user_text.strip().lower() for w in restart_keywords):
            reply_text = "Ok, let's start from scratch, I am ready"
            restart_dialogue = True
        elif any(w == user_text.strip().lower() for w in exit_keywords):
            reply_text = "Ok, bye! Just waiting if you type something..."
            restart_dialogue = True
        else:
            reply_text, chat_history_ids = dialog_gpt(user_text,
                                                      chat_history_ids,
                                                      restart_dialogue)
            restart_dialogue = False

        helper_data = {
            "restart_dialogue": restart_dialogue,
            "chat_history_ids": chat_history_ids
        }
        save_to_pickle(helper_data, TMP_FILENAME_FOR_DIALOGUE_HELPER_DATA)

    return reply_text
Esempio n. 4
0
def set_hsv_std_values(hsv_object):
    """
        Save given HSV object into '/pickle_files/hsv.pickle'.

        :param hsv_object: contains min and max values for hue, saturation and value.
        :return:
    """
    utils.save_to_pickle(HSV_PICKLE_PATH, _object=hsv_object)
    logging.info("Data saved to '%s.pickle'" % HSV_PICKLE_PATH)
Esempio n. 5
0
 def save_to_pickle(self, pickle_file=None):
     if not pickle_file:
         if self.is_train_data:
             pickle_file = TRAIN_DATA_PICKLE
         else:
             pickle_file = EVAL_DATA_PICKLE
     path = os.path.join(self.save_path, pickle_file)
     save_to_pickle(self.images_data, path)
     return path
Esempio n. 6
0
def run_srgan(device,
              image_size,
              batch_size,
              config,
              run_dir,
              saved_dir,
              run_name,
              num_epochs,
              val_dataset,
              train_dataset=None,
              checkpoints=None,
              mode='train',
              gpu_num=1):

    srgan_generator = SRGenerator(device).to(device)
    srgan_discriminator = SRDiscriminator().to(device)
    summary(srgan_discriminator, (3, image_size, image_size))
    summary(srgan_generator, (3, image_size // 4, image_size // 4))

    if checkpoints is not None:
        utils.load_from_checkpoint(srgan_generator, saved_dir,
                                   checkpoints["generator"])
        utils.load_from_checkpoint(srgan_discriminator, saved_dir,
                                   checkpoints["discriminator"])

    run_name = 'SRGAN' + '_' + run_name
    if mode == 'train':
        inception_FID_scores = train_gan(num_epochs,
                                         batch_size,
                                         None,
                                         device,
                                         train_dataset,
                                         val_dataset,
                                         srgan_generator,
                                         srgan_discriminator,
                                         type='SRGAN',
                                         config=config,
                                         run_dir=run_dir,
                                         saved_dir=saved_dir,
                                         run_name=run_name,
                                         calc_IS=False)
    elif mode == 'test':
        inception_FID_scores = [
            calc_inception_FID_score(batch_size,
                                     device,
                                     val_dataset,
                                     srgan_generator,
                                     type='SRGAN')
        ]

    date_str = datetime.datetime.now().strftime("%m%d%Y%H")
    utils.save_to_pickle(
        inception_FID_scores,
        os.path.join(saved_dir, 'srgan_fid_' + date_str + ".pickle"))

    return inception_FID_scores
Esempio n. 7
0
def model_selection(x_train, y_train, x_val, y_val, w0, epochs, eta,
                    mini_batch, lambdas):
    '''
    :param x_train: ciag treningowy wejsciowy NxD
    :param y_train: ciag treningowy wyjsciowy NxK
    :param x_val: ciag walidacyjny wejsciowy Nval x D
    :param y_val: ciag walidacyjny wyjsciowy Nval x K
    :param w0: wektor poczatkowych wartosci parametrow
    :param epochs: liczba epok dla SGD
    :param eta: kroki uczenia, które maja byc sprawdzone
    :param mini_batch: wielkosci mini batcha, ktore maja byc sprawdzone
    :param lambdas: lista wartosci parametru regularyzacji lambda, ktore maja byc sprawdzone
    :return: funkcja wykonuje selekcje modelu. Zwraca krotke (best_lambda, best_w, best_error, best_eta, best_epochs, best_mb),
            która przedstawia wartości parametrów dla najlepszego wybranego modelu.
    '''

    best_lambda = 0
    best_w = w0
    best_error = 1
    best_eta = 0
    best_mb = 0
    best_epochs = 0

    for current_lamda in lambdas:
        for current_epochs in epochs:
            for current_eta in eta:
                for current_mb in mini_batch:

                    def nowa(w, x, y):
                        return regularized_logistic_cost_function(
                            w, x, y, current_lamda)

                    w = stochastic_gradient_descent(nowa, x_train, y_train, w0,
                                                    current_epochs,
                                                    current_eta, current_mb)
                    y_pred = prediction(x_val, w)
                    current_error = prediction_error(y_pred, y_val)
                    print(
                        "Lambda: {}, Eta: {}, Batch: {}, Epochs: {} → ERROR: {}"
                        .format(current_lamda, current_eta, current_mb,
                                current_epochs, current_error))
                    if (current_error < best_error):
                        best_lambda = current_lamda
                        best_w = w
                        best_error = current_error
                        best_eta = current_eta
                        best_mb = current_mb
                        best_epochs = current_epochs
                        save_to_pickle('parameters-thebest.pkl', best_w)

    return best_lambda, best_w, best_error, best_eta, best_epochs, best_mb
 def get_page_rank_eigen_vector(self,
                                image_similarity_matrix,
                                S,
                                pickle_file_name=PICKLE_FILE_NAME):
     pickle_dir = get_pickle_directory()
     interim_file_path = os.path.join(pickle_dir, pickle_file_name)
     # if(os.path.exists(interim_file_path)):
     #     interim = read_from_pickle(pickle_file_name)
     # else:
     interim = self.calculate_intermediate_page_rank_matrix(
         image_similarity_matrix)
     save_to_pickle(interim, pickle_file_name)
     pie = np.matmul(interim, S)
     return pie
Esempio n. 9
0
def execute_task5(request):
    l = int(request.POST.get('number_of_layers'))
    k = int(request.POST.get('number_of_hashes_per_layer'))
    lsh = LSH(k=k, l=l)
    dbconnection = DatabaseConnection()

    if read_from_pickle('all_img_features_LSH.pickle') != None:
        all_image_hog_features = read_from_pickle('all_img_features_LSH.pickle')
    else:
        all_image_hog_features = dbconnection.get_object_feature_matrix_from_db(tablename='histogram_of_gradients')
        save_to_pickle(all_image_hog_features,'all_img_features_LSH.pickle')
    #SVD on hog features
    if(read_from_pickle('svd_hog_lsh.pickle')!=None):
        svd_obj = read_from_pickle('svd_hog_lsh.pickle')
        transformed_data = svd_obj['data_matrix']
        vt = svd_obj['vt']
    else:
        svd = SingularValueDecomposition()
        transformed_data,vt = svd.get_transformed_data_copy(all_image_hog_features['data_matrix'],400)
        save_to_pickle({"data_matrix":transformed_data,"images":all_image_hog_features['images'],"vt":vt},'svd_hog_lsh.pickle')

    # index_of_query_image = (all_image_hog_features['images']).index(query_image)
    # image_vector = transformed_data[index_of_query_image]
    bit_map = lsh.generate_representation_for_all_layers(transformed_data,all_image_hog_features['images'])

    save_to_pickle(lsh, 'lsh_model')
    return render(request, 'task5a_output.html')
Esempio n. 10
0
def run_dcgan(device,
              image_size,
              noise_size,
              batch_size,
              config,
              run_dir,
              saved_dir,
              run_name,
              num_epochs,
              val_dataset,
              train_dataset=None,
              checkpoints=None,
              mode='train',
              gpu_num=1):

    #Run DCGAN
    type = 'DCGAN'
    dcgan_generator = Generator(noise_size=noise_size,
                                image_size=image_size).to(device)
    dcgan_discriminator = Discriminator(image_size=image_size).to(device)
    #Parallel for improved performence
    if device.type == 'cuda' and gpu_num > 1:
        dcgan_generator = nn.DataParallel(dcgan_generator,
                                          list(range(gpu_num)))
        dcgan_discriminator = nn.DataParallel(dcgan_discriminator,
                                              list(range(gpu_num)))

    #Print networks
    print('Discriminator')
    summary(dcgan_discriminator, (3, image_size, image_size))
    print('Generator')
    summary(dcgan_generator, (noise_size, 1, 1))

    if checkpoints is not None:
        utils.load_from_checkpoint(dcgan_generator, saved_dir,
                                   checkpoints["generator"])
        utils.load_from_checkpoint(dcgan_discriminator, saved_dir,
                                   checkpoints["discriminator"])

    run_name = 'DCGAN' + '_' + run_name
    #We train the model in train phase and only calculate scores in test mode
    if mode == 'train':
        inception_FID_scores, inception_scores = train_gan(num_epochs,
                                                           batch_size,
                                                           noise_size,
                                                           device,
                                                           train_dataset,
                                                           val_dataset,
                                                           dcgan_generator,
                                                           dcgan_discriminator,
                                                           type='DCGAN',
                                                           config=config,
                                                           run_dir=run_dir,
                                                           saved_dir=saved_dir,
                                                           run_name=run_name)
    elif mode == 'test':
        inception_FID_scores = [
            calc_inception_FID_score(batch_size, device, val_dataset,
                                     dcgan_generator, type, noise_size)
        ]
        inception_scores = [
            calc_inception_score(device,
                                 noise_size,
                                 dcgan_generator,
                                 eval_size=len(val_dataset))
        ]
    #Return list of all score accumulated in epochs

    date_str = datetime.datetime.now().strftime("%m%d%Y%H")
    save_to_pickle(
        inception_FID_scores,
        os.path.join(saved_dir,
                     'dcgan_fid_' + run_name + date_str + ".pickle"))
    save_to_pickle(
        inception_scores,
        os.path.join(saved_dir, 'dcgan_IS_' + run_name + date_str + ".pickle"))

    return inception_FID_scores, inception_scores
Esempio n. 11
0
def save_loss_gradients(loss_gradients, n_samples, filename, savedir):
    save_to_pickle(data=loss_gradients,
                   path=TESTS + savedir,
                   filename=filename + "_samp=" + str(n_samples) +
                   "_lossGrads.pkl")
Esempio n. 12
0
def stage_i(A_val,y_batch_val,hparams,hid_i,init_obj,early_stop,bs,optim,recovered=False):
    model_def = globals()['model_def']
    m_loss1_batch_dict = {}
    m_loss2_batch_dict = {}
    zp_loss_batch_dict = {}
    total_loss_dict = {}
    x_hat_batch_dict = {}
    model_selection = ModelSelect(hparams) 
    hid_i=int(hid_i)
#        print('Matrix norm is {}'.format(np.linalg.norm(A_val)))
#        hparams.eps = hparams.eps * np.linalg.norm(A_val)
   
    # Get a session
    sess = tf.Session()

    # Set up palceholders
    A = tf.placeholder(tf.float32, shape=(hparams.n_input, hparams.num_measurements), name='A')
   
    y_batch = tf.placeholder(tf.float32, shape=(hparams.batch_size, hparams.num_measurements), name='y_batch')
    # Create the generator
    model_hparams = model_def.Hparams()
    model_hparams.n_z = hparams.n_z
    model_hparams.stdv = hparams.stdv
    model_hparams.mean = hparams.mean
    model_hparams.grid = copy.deepcopy(hparams.grid)
    model_selection.setup_dim(hid_i,model_hparams)
    
    if not hparams.model_types[0] == 'vae-flex-alt' and 'alt' in hparams.model_types[0]:
        model_def.ignore_grid = next((j for  j in model_selection.dim_list if j >= hid_i), None)
    
    #set up the initialization            
    print('The initialization is: {}'.format(init_obj.mode))
    if init_obj.mode=='random':
        z_batch = model_def.get_z_var(model_hparams,hparams.batch_size,hid_i)
    elif init_obj.mode in ['previous-and-random','only-previous']:
        z_batch = model_def.get_z_var(model_hparams,hparams.batch_size,hid_i)
        init_op_par = tf.assign(z_batch, truncate_val(model_hparams,hparams,hid_i,init_obj,stdv=0))
    else:
        z_batch = truncate_val(model_hparams,hparams,hid_i,init_obj,stdv=0.1)
    _, x_hat_batch, _ = model_def.generator_i(model_hparams, z_batch, 'gen', hparams.bol,hid_i,relative=False)
    x_hat_batch_dict[hid_i] = x_hat_batch


    # measure the estimate
    if hparams.measurement_type == 'project':
        y_hat_batch = tf.identity(x_hat_batch, name='y_hat_batch')
    else:
        y_hat_batch = tf.matmul(x_hat_batch, A, name='y_hat_batch')

    # define all losses
    m_loss1_batch = tf.reduce_mean(tf.abs(y_batch - y_hat_batch), 1)
    m_loss2_batch = tf.reduce_mean((y_batch - y_hat_batch)**2, 1)
    
    if hparams.stdv>0:
        norm_val = 1/(hparams.stdv**2)
    else:
        norm_val = 1e+20
    
    zp_loss_batch = tf.reduce_sum((z_batch-tf.ones(tf.shape(z_batch))*hparams.mean)**2*norm_val, 1) #added normalization       
    
    # define total loss    
    total_loss_batch = hparams.mloss1_weight * m_loss1_batch \
                     + hparams.mloss2_weight * m_loss2_batch \
                     + hparams.zprior_weight * zp_loss_batch
    total_loss = tf.reduce_mean(total_loss_batch)
    total_loss_dict[hid_i] = total_loss
    
    # Compute means for logging
    m_loss1 = tf.reduce_mean(m_loss1_batch)
    m_loss2 = tf.reduce_mean(m_loss2_batch)
    zp_loss = tf.reduce_mean(zp_loss_batch)
    
    m_loss1_batch_dict[hid_i] = m_loss1
    m_loss2_batch_dict[hid_i] = m_loss2
    zp_loss_batch_dict[hid_i] = zp_loss

    # Set up gradient descent
    var_list = [z_batch]
    if recovered:
        global_step = tf.Variable(hparams.optim.global_step, trainable=False, name='global_step')
    else:
        global_step = tf.Variable(0, trainable=False, name='global_step')
    learning_rate = utils.get_learning_rate(global_step, hparams)
    opt = utils.get_optimizer(learning_rate, hparams)
    update_op = opt.minimize(total_loss, var_list=var_list, global_step=global_step, name='update_op')
    opt_reinit_op = utils.get_opt_reinit_op(opt, var_list, global_step)

    # Intialize and restore model parameters
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    #restore the setting
    if 'alt' in hparams.model_types[0]:
        factor = 1
    else:
        factor = len(hparams.grid)
    model_def.batch_size = hparams.batch_size*factor #changes object (call by reference), necessary, since call of generator_i might change batch size.
    model_selection.restore(sess,hid_i)        

    if recovered:
        best_keeper = hparams.optim.best_keeper
    else:
        best_keeper = utils.BestKeeper(hparams,logg_z=True)
    if hparams.measurement_type == 'project':
        feed_dict = {y_batch: y_batch_val}
    else:
        feed_dict = {A: A_val, y_batch: y_batch_val}
    flag = False
    for i in range(init_obj.num_random_restarts):
        if recovered and i <= hparams.optim.i: #Loosing optimizer's state, keras implementation maybe better
            if i < hparams.optim.i:
                continue
            else:
                sess.run(utils.get_opt_reinit_op(opt, [], global_step))
                sess.run(tf.assign(z_batch,hparams.optim.z_batch))              
        else:            
            sess.run(opt_reinit_op)
            if i<1 and init_obj.mode in ['previous-and-random','only-previous']:
                print('Using previous outcome as starting point')
                sess.run(init_op_par)            
        for j in range(hparams.max_update_iter):
            if recovered and j < hparams.optim.j:
                continue
            _, lr_val, total_loss_val, \
            m_loss1_val, \
            m_loss2_val, \
            zp_loss_val = sess.run([update_op, learning_rate, total_loss,
                                    m_loss1,
                                    m_loss2,
                                    zp_loss], feed_dict=feed_dict)         

            if hparams.gif and ((j % hparams.gif_iter) == 0):
                images = sess.run(x_hat_batch, feed_dict=feed_dict)
                for im_num, image in enumerate(images):
                    save_dir = '{0}/{1}/{2}/'.format(hparams.gif_dir, hid_i,im_num)
                    utils.set_up_dir(save_dir)
                    save_path = save_dir + '{0}.png'.format(j)
                    image = image.reshape(hparams.image_shape)
                    save_image(image, save_path)
            if j%100==0 and early_stop:
                x_hat_batch_val = sess.run(x_hat_batch, feed_dict=feed_dict)
                if check_tolerance(hparams,A_val,x_hat_batch_val,y_batch_val)[1]:
                    flag = True
                    print('Early stopping')
                    break
            if j%25==0:#Now not every turn                
                logging_format = 'hid {} rr {} iter {} lr {} total_loss {} m_loss1 {} m_loss2 {} zp_loss {}'
                print( logging_format.format(hid_i, i, j, lr_val, total_loss_val,
                                            m_loss1_val,
                                            m_loss2_val,
                                            zp_loss_val)) 
            if j%100==0:
                x_hat_batch_val, total_loss_batch_val, z_batch_val = sess.run([x_hat_batch, total_loss_batch,z_batch], feed_dict=feed_dict)
                best_keeper.report(x_hat_batch_val, total_loss_batch_val,z_val=z_batch_val)
                optim.global_step = sess.run(global_step)
                optim.A = A_val
                optim.y_batch = y_batch_val
                optim.i=i
                optim.j=j
                optim.z_batch= z_batch_val
                optim.best_keeper=best_keeper
                optim.bs=bs
                optim.init_obj = init_obj
                utils.save_to_pickle(optim,utils.get_checkpoint_dir(hparams, hparams.model_types[0])+'tmp/optim.pkl')
                print('Checkpoint of optimization created')

        hparams.optim.j = 0                
        x_hat_batch_val, total_loss_batch_val, z_batch_val = sess.run([x_hat_batch, total_loss_batch,z_batch], feed_dict=feed_dict)
        best_keeper.report(x_hat_batch_val, total_loss_batch_val,z_val=z_batch_val)
        if flag:
            break
    tf.reset_default_graph()
    return best_keeper.get_best()
Esempio n. 13
0
from keras.datasets import mnist
import matplotlib.pyplot as plt
import random

import utils
import config

(x_train, y_train), (x_test, y_test) = mnist.load_data()

train_data = []
test_data = []
for i in range(10):
    train_data.append(x_train[y_train==i])
    test_data.append(x_test[y_test==i])

utils.save_to_pickle(train_data,config.TRAIN_DATA_PATH)
utils.save_to_pickle(test_data, config.TEST_DATA_PATH)

#ckeck data
train_data = utils.load_pickle(config.TRAIN_DATA_PATH)

plt.figure(figsize=(5, 10))
for i in range(10):
    data_single = train_data[i]
    show_sample_index = random.sample(range(len(data_single)), 5)
    for j in range(5):
        plt.subplot(10, 5, 5 * i + j + 1)
        plt.imshow(data_single[show_sample_index[j]])
    title = '%s' % (i)
    plt.title(title)
plt.show()
        sess.run(train_step,feed_dict={x:state_batch, y:target_reward_batch, a:action_batch})

        # 更新replay memory
        replay,next_state = utils.update_replay(game_state,replay,esplion,x,prediction,sess,curr_state)
        #更新curr_state
        curr_state = next_state
        #更新step
        step+=1

        #esplion随着训练次数衰减,使采取随机action的概率越来越低
        if esplion>config.FINIAL_ESPLION:
            esplion -= (config.ESPLION - config.FINIAL_ESPLION)/config.EXPLORE

        if step%1000==0:
            train_loss = sess.run(loss,feed_dict={x:state_batch, y:target_reward_batch, a:action_batch})
            duration = time.time() - start_time
            logger.info("step %d: loss is %g, esplion is %g (%0.3f sec)" % (step, train_loss,esplion, duration))
            start_time = time.time()
        if step%10000==0:
            saver.save(sess, config.CHECKFILE, global_step=step)
            utils.save_to_pickle([game_state,replay,curr_state,esplion],config.SAVEFILE)
            print('writing checkpoint at step %s' % step)








Esempio n. 15
0
app = Flask(__name__)


@app.route("/")
def home():
    return render_template("maintemplate.html")


restart_dialogue = True
chat_history_ids = None
helper_data = {
    "restart_dialogue": restart_dialogue,
    "chat_history_ids": chat_history_ids
}
save_to_pickle(helper_data, TMP_FILENAME_FOR_DIALOGUE_HELPER_DATA)
restart_keywords = load_keywords_from_csv(RESTART_KEYWORDS_PATH)
exit_keywords = load_keywords_from_csv(EXIT_KEYWORDS_PATH)


@app.route("/get")
def get_bot_response():
    user_text = request.args.get('msg')

    custom_answer = find_custom_answer(user_text, threshold=COS_SIM_THRESHOLD)
    if custom_answer:
        reply_text = custom_answer
    else:
        helper_data = load_from_pickle(TMP_FILENAME_FOR_DIALOGUE_HELPER_DATA)
        restart_dialogue = helper_data["restart_dialogue"]
        chat_history_ids = helper_data["chat_history_ids"]
Esempio n. 16
0
def execute_task6(request):
    query_image = request.POST.get('query_image')
    most_similar_images = int(request.POST.get('most_similar_images'))
    query_image_folder_name = request.POST.get('query_image_folder_name')
    relevance_feedback = request.POST.get('relevance_feedback')
    lsh = read_from_pickle('lsh_model')
    db_connection = DatabaseConnection()
    image_vector = db_connection.get_feature_data_for_image(
        'histogram_of_gradients', query_image)
    image_vector = np.asarray(image_vector.flatten())

    if read_from_pickle('all_img_features_LSH.pickle') != None:
        all_image_hog_features = read_from_pickle(
            'all_img_features_LSH.pickle')
    else:
        all_image_hog_features = db_connection.get_object_feature_matrix_from_db(
            tablename='histogram_of_gradients')
        save_to_pickle(all_image_hog_features, 'all_img_features_LSH.pickle')
    #SVD on hog features
    if (read_from_pickle('svd_hog_lsh.pickle') != None):
        svd_obj = read_from_pickle('svd_hog_lsh.pickle')
        transformed_data = svd_obj['data_matrix']
        vt = svd_obj['vt']
    else:
        svd = SingularValueDecomposition()
        transformed_data, vt = svd.get_transformed_data_copy(
            all_image_hog_features['data_matrix'], 400)
        save_to_pickle(
            {
                "data_matrix": transformed_data,
                "images": all_image_hog_features['images'],
                "vt": vt
            }, 'svd_hog_lsh.pickle')

    if (query_image_folder_name != ''):
        table_name = convert_folder_path_to_table_name(
            query_image_folder_name, 'histogram_of_gradients')
        image_vector = db_connection.get_feature_data_for_image(
            table_name, query_image)

    image_vector = np.dot(image_vector.astype(float), np.transpose(vt))

    new_obj = {}
    new_obj['data_matrix'] = transformed_data
    new_obj['images'] = all_image_hog_features['images']
    (sorted_k_values,
     result_stats) = lsh.find_ksimilar_images(k=most_similar_images,
                                              image_vector=image_vector,
                                              all_image_hog_features=new_obj)

    # Now getting a bigger test dataset for relevance feedback
    if relevance_feedback == "Probabilistic":
        (test_dataset, result_stats) = lsh.find_ksimilar_images(
            k=10 + most_similar_images,
            image_vector=image_vector,
            all_image_hog_features=new_obj)
    else:
        (test_dataset, result_stats) = lsh.find_ksimilar_images(
            k=200 + most_similar_images,
            image_vector=image_vector,
            all_image_hog_features=new_obj)

    save_to_pickle(test_dataset, 'test_dataset.pickle')
    print(sorted_k_values[:most_similar_images])
    return render(
        request, 'visualize_images.html', {
            'images': sorted_k_values[:most_similar_images],
            "from_task": "task5",
            'rel_type': relevance_feedback,
            "q": query_image,
            "t": most_similar_images,
            "num_total": result_stats['total'],
            "num_unique": result_stats['unique']
        })
def make_dicts(reset_all, make_val):

    # Create meta directory if doesn't already exist for all dictionaries generated below
    os.makedirs(meta_dir, exist_ok=True)

    train_data = dict([
        (img, whale) for (_, img, whale) in read_csv(train_csv).to_records()
    ])
    test_data = [img for (_, img, _) in read_csv(sample_csv).to_records()]

    # Load whale_to_imgs dictionary if exists, or create it otherwise
    if isfile(whale2imgs_file and not reset_all):
        whale2imgs = load_pickle_file(whale2imgs_file)
    else:
        whale2imgs = {}
        for img, whale in tqdm(train_data.items()):
            if whale not in whale2imgs:
                whale2imgs[whale] = []
            if img not in whale2imgs[whale]:
                whale2imgs[whale].append(img)

        save_to_pickle(whale2imgs_file, whale2imgs)

    if not isfile(img2whale_file) or reset_all:
        # Find elements from training set other then 'new_whale'
        img2whale = {}
        for img, whale in tqdm(train_data.items()):
            if whale != 'new_whale':
                if img not in img2whale:
                    img2whale[img] = whale
        train_known = sorted(list(img2whale.keys()))

        save_to_pickle(img2whale_file, img2whale)
        save_to_pickle(train_known_file, train_known)
        save_to_pickle(train_submit_file, test_data)

    if not (isfile(train_examples_file) and isfile(validation_examples_file)
            and reset_all == False):
        train_examples = []
        validation_examples = []
        lonely = []
        new_whale = []
        val_match = []
        lonely_count = len([x for x in whale2imgs.values()
                            if len(x) == 1])  # 2073
        couple_count = len([x for x in whale2imgs.values()
                            if len(x) == 2])  # 1285
        new_count = len([x for x in train_data.values()
                         if x == 'new_whale'])  # 9664
        # aditional matching whales count needed or creating balanced validation dataset (same matching and unmatching number of examples)
        extra_count = lonely_count - couple_count  # 2073 - 1285 = 788

        val_known = []
        val_submit = []
        matching_count = 0
        small_train_examples = []
        small_count = 0

        if make_val:
            for whale, imgs in tqdm(whale2imgs.items()):
                if whale == 'new_whale':
                    new_whale += imgs
                elif len(imgs) == 1:
                    lonely += imgs
                    val_known += imgs
                elif len(imgs) == 2:
                    val_match.append((imgs[0], imgs[1], 1))
                    val_known.append(imgs[1])
                    val_submit.append((imgs[0], whale))
                elif len(imgs) >= 4 and matching_count < extra_count:
                    val_match.append((imgs[0], imgs[1], 1))
                    val_known.append(imgs[0])
                    val_submit.append((imgs[1], whale))
                    matching_count += 1
                    train_examples += imgs[2:]
                    if (small_count + 2) % 10 < 2:
                        small_train_examples += imgs[2:]
                        small_count += 2
                else:
                    train_examples += imgs
                    if (small_count + len(imgs)) % 10 < len(imgs):
                        small_train_examples += imgs
                        small_count += len(imgs)
        else:
            for whale, imgs in tqdm(whale2imgs.items()):
                if whale == 'new_whale':
                    new_whale += imgs
                elif len(imgs) == 1:
                    lonely += imgs
                    val_known += imgs
                elif len(imgs) == 2:
                    val_match.append((imgs[0], imgs[1], 1))
                    val_known.append(imgs[1])
                    val_submit.append((imgs[0], whale))
                    train_examples += imgs
                    if (small_count + 2) % 10 < 2:
                        small_train_examples += imgs
                        small_count += 2
                elif len(imgs) >= 4 and matching_count < extra_count:
                    val_match.append((imgs[0], imgs[1], 1))
                    val_known.append(imgs[0])
                    val_submit.append((imgs[1], whale))
                    matching_count += 1
                    train_examples += imgs
                    if (small_count + len(imgs)) % 10 < len(imgs):
                        small_train_examples += imgs
                        small_count += len(imgs)
                else:
                    train_examples += imgs
                    if (small_count + len(imgs)) % 10 < len(imgs):
                        small_train_examples += imgs
                        small_count += len(imgs)

        print('lonely whales count: ', lonely_count)
        print('new whales count: ', new_count)
        print('couple whales count: ', couple_count)
        print('extra whales count: ', extra_count)

        random.shuffle(lonely)
        val_unmatch = list(
            zip(lonely,
                np.random.choice(new_whale, size=lonely_count, replace=False),
                np.zeros(lonely_count, dtype=np.int8)))
        validation_examples = val_match + val_unmatch
        random.shuffle(validation_examples)
        random.shuffle(train_examples)

        # small_train_size = len(train_examples) // 10
        # small_train_examples = train_examples[:small_train_size]

        small_validation_size = len(validation_examples) // 10
        small_validation_examples = validation_examples[:small_validation_size]

        # print('TRAIN')
        # print(train_examples[:10])
        # print('VALIDATION')
        # print(validation_examples[:10])

        print('Train size: ', len(train_examples))
        print('Validation size: ', len(validation_examples))

        print('val_known size: ', len(val_known))
        print('val_submit size: ', len(val_submit))

        save_to_pickle(train_examples_file, train_examples)
        save_to_pickle(validation_examples_file, validation_examples)

        save_to_pickle(train_examples_small_file, small_train_examples)
        save_to_pickle(validation_examples_small_file,
                       small_validation_examples)

        save_to_pickle(val_known_file, val_known)
        save_to_pickle(val_submit_file, val_submit)
Esempio n. 18
0
def run_sagan(device,
              image_size,
              noise_size,
              batch_size,
              config,
              run_dir,
              saved_dir,
              run_name,
              num_epochs,
              val_dataset,
              train_dataset=None,
              checkpoints=None,
              mode='train',
              gpu_num=1):
    type = 'SAGAN'
    sagan_generator = SAGenerator(noise_size=noise_size,
                                  image_size=image_size).to(device)
    sagan_discriminator = SADiscriminator(image_size=image_size).to(device)
    # Parallel for improved performance
    if ((device.type == 'cuda') and (gpu_num > 1)):
        sagan_generator = nn.DataParallel(sagan_generator,
                                          list(range(gpu_num)))
        sagan_discriminator = nn.DataParallel(sagan_discriminator,
                                              list(range(gpu_num)))
    # Print networks
    print('Discriminator')
    summary(sagan_discriminator, (3, image_size, image_size))
    print('Generator')
    summary(sagan_generator, (noise_size, 1, 1))
    if checkpoints is not None:
        utils.load_from_checkpoint(sagan_generator, saved_dir,
                                   checkpoints["generator"])
        utils.load_from_checkpoint(sagan_discriminator, saved_dir,
                                   checkpoints["discriminator"])

    run_name = 'SAGAN' + '_' + run_name
    if mode == 'train':
        inception_FID_scores, inception_scores = train_gan(num_epochs,
                                                           batch_size,
                                                           noise_size,
                                                           device,
                                                           train_dataset,
                                                           val_dataset,
                                                           sagan_generator,
                                                           sagan_discriminator,
                                                           type='SAGAN',
                                                           config=config,
                                                           run_dir=run_dir,
                                                           saved_dir=saved_dir,
                                                           run_name=run_name)
    elif mode == 'test':
        inception_FID_scores = [
            calc_inception_FID_score(batch_size, device, val_dataset,
                                     sagan_generator, type, noise_size)
        ]
        inception_scores = [
            calc_inception_score(device,
                                 noise_size,
                                 sagan_generator,
                                 eval_size=len(val_dataset))
        ]

    date_str = datetime.datetime.now().strftime("%m%d%Y%H")
    utils.save_to_pickle(
        inception_FID_scores,
        os.path.join(saved_dir,
                     'sagan_fid_' + run_name + date_str + ".pickle"))
    utils.save_to_pickle(
        inception_scores,
        os.path.join(saved_dir, 'sagan_IS_' + run_name + date_str + ".pickle"))

    return inception_FID_scores, inception_scores
Esempio n. 19
0
def run(group, problem, alpha, file_prefix, total_time, simulation_time):
    regret_min = []
    regret_avg = []
    r = []
    individual_regret = np.zeros([group.size, problem.iterations])

    start_time = time.time()
    ##########RUN PERCEPTRON AVERAGE###############
    for counter in range(problem.iterations - 1):

        weights = group.weights

        x = utils.model(weights, problem)

        print('System recommendation using the average perceptron: ', x.object)

        regret = group.utility_star_x_star - x.get_utility(
            group.avg_weights_star)
        r.append(regret)

        # Calculate the regret for every individual user
        #
        for i in range(0, group.size):
            usr = group.users[i]
            reg = usr.get_regret(x.phi)
            individual_regret[i, counter] = reg

        if (regret == 0.0):
            final_time = (time.time() - start_time) + total_time
            file = utils.save_to_pickle(x.object, r, regret_min, regret_avg,
                                        regret_min, 'average', file_prefix,
                                        problem, group, simulation_time,
                                        final_time, counter, individual_regret)
            return x.object, simulation_time, file

        # phi_y_bar, s_t = average(group, phi_y_list, alpha)

        improvements = []

        for u in group.users:
            sim_start_time = time.time()
            x_bar = u.step(x.phi, x.object, alpha)
            bar = [
                np.array(x_bar.phi[0]),
                np.array(x_bar.phi[1]),
                np.array(x_bar.phi[2]),
                np.array(x_bar.phi[3])
            ]
            improvements.append(bar)

            # y, improvements[group.get_index(u)] = u.step(phi_y, alpha)
            temp_simulation_time = time.time() - sim_start_time
            simulation_time += temp_simulation_time
            print('Improvement user ', group.get_index(u), ': ', x_bar.object)

        phi_x_bar = utils.avg_phi_list(improvements, group.problem)

        group.update_weights(phi_x_bar, x.phi)

    x = utils.model(weights, problem)
    regret = group.utility_star_x_star - x.get_utility(group.avg_weights_star)
    r.append(regret)

    # Calculate the regret for every individual user
    #
    for i in range(0, group.size):
        usr = group.users[i]
        reg = usr.get_regret(x.phi)
        individual_regret[i, counter + 1] = reg

    final_time = (time.time() - start_time) + total_time
    #y, regret_avg, regret_min, regret_su_avg, regret_su_min, strategy, file_prefix, problem, group, sim_time, runtime, counter
    file = utils.save_to_pickle(x.object, r, regret_min, regret_avg,
                                regret_min, 'average', file_prefix, problem,
                                group, simulation_time, final_time,
                                problem.iterations, individual_regret)
    return x.object, simulation_time, file
def run(strategy,
        group,
        problem,
        alpha,
        file_prefix,
        total_time,
        simulation_time,
        avg=False):
    regret_min = []
    regret_avg = []
    regret_su_min = []
    regret_su_avg = []
    individual_regret = np.zeros([group.size, problem.iterations])
    start_time = time.time()
    ##########RUN PERCEPTRON AVERAGE###############
    extra_time = 0
    for counter in range(problem.iterations - 1):
        print('Iteration: (', counter, '/', problem.iterations, ')')
        ######USER SELECTION#######
        if (strategy == strategy.LEAST_MISERY):
            user, x_group = utils.get_least_misery_user(group, problem, avg)

        elif (strategy == strategy.RANDOM):
            user, x_group, extra_t = utils.get_random_user(group, problem, avg)
            extra_time += extra_t
        ############################

        #Find object to present to this user
        x = utils.model(user.current_weights, problem)
        print('System recommendation using the ', strategy.value,
              ' strategy: ', x.object)

        #Calculate regret for the group recommendation
        #note: I use x_group because we want to know group wide regret, x is generated using individual weights
        rgrt_avg = utils.get_regret(x_group, group, aggregation_function='avg')
        regret_avg.append(rgrt_avg)
        rgrt_min = utils.get_regret(x_group, group, aggregation_function='min')
        regret_min.append(rgrt_min)

        #Calculate the regret for the picked user
        rgrt_avg = utils.get_regret(x, group, aggregation_function='avg')
        regret_su_avg.append(rgrt_avg)
        rgrt_min = utils.get_regret(x, group, aggregation_function='min')
        regret_su_min.append(rgrt_min)

        #Calculate the regret for every individual user
        #
        for i in range(0, group.size):
            usr = group.users[i]
            reg = usr.get_regret(x_group.phi)
            individual_regret[i, counter] = reg

        #Algorithm ends when the ideal object is found
        if (rgrt_min == 0.0 and not avg):
            final_time = ((time.time() - start_time) + total_time) - extra_time
            file = utils.save_to_pickle(x_group.object, regret_avg, regret_min,
                                        regret_su_avg, regret_su_min,
                                        strategy.value, file_prefix, problem,
                                        group, simulation_time, final_time,
                                        counter, individual_regret)
            return x_group.object, simulation_time, file
        if (rgrt_avg == 0.0 and avg):
            final_time = ((time.time() - start_time) + total_time) - extra_time
            file = utils.save_to_pickle(x_group.object, regret_avg, regret_min,
                                        regret_su_avg, regret_su_min,
                                        strategy.value, file_prefix, problem,
                                        group, simulation_time, final_time,
                                        counter, individual_regret)
            return x_group.object, simulation_time, file
        # start_time = time.time()

        start_sim_time = time.time()
        #Simulate a step from the chosen user
        x_bar = user.step(x.phi, x.object, alpha)

        temp_simulation_time = time.time() - start_sim_time
        print('Improvement user: '******': ', x_bar.object)
        simulation_time += temp_simulation_time

        #update weights for the chosen user
        user.update_weights(x_bar.phi, x.phi)

    x_final = utils.get_aggregation_object(group, problem, avg)

    regret_avg.append(
        utils.get_regret(x_final, group, aggregation_function='avg'))
    regret_min.append(
        utils.get_regret(x_final, group, aggregation_function='min'))
    regret_su_avg.append(
        utils.get_regret(x_final, group, aggregation_function='avg'))
    regret_su_min.append(
        utils.get_regret(x_final, group, aggregation_function='min'))

    for i in range(0, group.size):
        usr = group.users[i]
        reg = usr.get_regret(x_final.phi)
        individual_regret[i, counter + 1] = reg

    final_time = ((time.time() - start_time) + total_time) - extra_time
    file = utils.save_to_pickle(x_final, regret_avg, regret_min, regret_su_avg,
                                regret_su_min, strategy.value, file_prefix,
                                problem, group, simulation_time, final_time,
                                problem.iterations, individual_regret)
    return x_final, simulation_time, file
def run_pubmed():
    data_path = "pubmed_adr/data/ADE-Corpus-V2/DRUG-AE.rel"
    final_data, idx2word, idx2label, maxlen, vocsize, nclasses, tok_senc_adr, train_lex, test_lex, train_y, test_y, saved_data = \
        data_processing(data_path)

    test_toks = []
    test_tok_senc_adr = tok_senc_adr[TRAIN_NUMBER:]
    for i in test_tok_senc_adr:
        test_toks.append(i[0])

    train_toks = []
    train_tok_senc_adr = tok_senc_adr[:TRAIN_NUMBER]
    for i in train_tok_senc_adr:
        train_toks.append(i[0])

    # Char embedding
    char_per_word = []
    char_word = []
    char_senc = []
    maxlen_char_word = 0
    a = []

    # save max_len_char_word
    for s in (train_toks + test_toks):
        for w in s:
            for c in w.lower():
                char_per_word.append(c)

            if len(char_per_word) > 37:
                a.append(char_per_word)
                char_per_word = char_per_word[:37]
            if len(char_per_word) > maxlen_char_word:
                maxlen_char_word = len(char_per_word)

            char_word.append(char_per_word)
            char_per_word = []

        char_senc.append(char_word)
        char_word = []

    charcounts = collections.Counter()
    for senc in char_senc:
        for word in senc:
            for charac in word:
                charcounts[charac] += 1
    chars = [
        charcount[0]
        for charcount in charcounts.most_common(MAX_CHAR_VOCAB_SIZE)
        if charcount[1] >= MIN_CHAR_VOCAB_COUNT
    ]
    char2idx = {c: i + 2
                for i, c in enumerate(chars)
                }  # same with word, we are keeping 'UNK' token

    char_word_lex = []
    char_lex = []
    char_word = []
    for senc in char_senc:
        for word in senc:
            for charac in word:
                char_word_lex.append(
                    [char2idx[charac] if charac in char2idx else 1])

            char_word.append(char_word_lex)
            char_word_lex = []

        char_lex.append(char_word)
        char_word = []

    char_per_word = []
    char_per_senc = []
    char_senc = []
    for s in char_lex:
        for w in s:
            for c in w:
                for e in c:
                    char_per_word.append(e)
            char_per_senc.append(char_per_word)
            char_per_word = []
        char_senc.append(char_per_senc)
        char_per_senc = []

    pad_char_all = []
    for senc in char_senc:
        while len(senc) < maxlen:
            senc.insert(0, [])
        pad_senc = pad_sequences(senc, maxlen=maxlen_char_word)
        pad_char_all.append(pad_senc)
        pad_senc = []

    pad_char_all = np.array(pad_char_all)

    pad_train_lex = pad_char_all[:TRAIN_NUMBER]
    pad_test_lex = pad_char_all[TRAIN_NUMBER:]

    idx2char = dict((k, v) for v, k in char2idx.items())
    idx2char[0] = 'PAD'
    idx2char[1] = 'UNK'
    charsize = max(idx2char.keys()) + 1

    # 加载词向量
    print('Loading word embeddings...')
    _ = glove2word2vec(glove_300d_path, glove_300d_tmp_path)
    w2v = KeyedVectors.load_word2vec_format(glove_300d_tmp_path,
                                            binary=False,
                                            unicode_errors='ignore')
    print('word embeddings loading done!')

    # Build the model
    print('Building the model...')

    model = build_model(maxlen, maxlen_char_word, idx2word, w2v, vocsize,
                        charsize, embed_dim, char_embed_dim, nclasses)
    model.compile(optimizer='adam', loss='categorical_crossentropy')

    # Save model call back
    cp_callback = keras.callbacks.ModelCheckpoint(filepath=MODEL_OUTPUT_PATH,
                                                  verbose=1,
                                                  save_weights_only=True,
                                                  period=2)

    print('Training...')
    history = model.fit([train_lex, pad_train_lex],
                        train_y,
                        batch_size=BATCH_SIZE,
                        validation_split=0.1,
                        epochs=NUM_EPOCHS,
                        callbacks=[cp_callback])

    # 预测结果
    predir = 'pubmed_adr/model_output/predictions'
    fileprefix = 'embedding_level_attention_'

    scores = predict_score(model, [test_lex, pad_test_lex],
                           test_toks,
                           test_y,
                           predir,
                           idx2label,
                           maxlen,
                           fileprefix=fileprefix)

    saved_data["char2idx"] = char2idx
    saved_data["max_char_len"] = maxlen_char_word
    saved_data["vocsize"] = vocsize
    saved_data["charsize"] = charsize
    saved_data["embed_dim"] = embed_dim
    saved_data["char_embed_dim"] = char_embed_dim
    saved_data["num_classes"] = nclasses
    saved_data["idx2label"] = idx2label

    utils.save_to_pickle(test_lex, "test_lex.pickle")
    utils.save_to_pickle(pad_test_lex, "pad_test_lex.pickle")
    utils.save_to_pickle(test_toks, "test_toks.pickle")

    with open(META_PATH, "w") as f:
        json.dump(saved_data, f)
Esempio n. 22
0
    def _save(self):
        """ Save all robot objects, using pickle. """ ### XXX better just save the factory itself instead?

        # Send a list of objects to save
        robot_list = [self.robots[k] for k in self.robots.keys()]
        save_to_pickle(robot_list)