Ejemplo n.º 1
0
    def _setup(self):
        self.val_acc = 0
        self.main_epoch = 0
        self.hparams = create_hyper_parameters()

        self.hparams.learning_rate = self.config['learning_rate']
        self.hparams.memn2n_rnn_dim = int(self.config['memn2n_rnn_dim'])
        self.hparams.hops = int(self.config['hops'])
        self.hparams.amp_val = int(self.config['amp'])

        self._read_data()
        model = self._build_model()

        # tensorboard = TensorBoard(log_dir="logs/{}".format(time()))
        tensorboard =\
        TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=True)
        # checkpointer =\
        # ModelCheckpoint(filepath='./dual_encoder_checkpoint.h5', verbose=1, save_best_only=True)
        # optim = keras.optimizers.SGD(lr=hparams.learning_rate, momentum=0.0, decay=0.0, nesterov=False)
        optim = keras.optimizers.Adam(lr=self.hparams.learning_rate,
                                      clipnorm=self.hparams.clip_norm,
                                      decay=self.hparams.learning_rate *
                                      0.0001)
        model.compile(
            loss={
                'probs': 'sparse_categorical_crossentropy',
                'context_attention': hack_loss,
                'responses_attention': hack_loss,
                'responses_dot': hack_loss
            },  #custom_loss(probs=probs),#{'probs': custom_loss},
            optimizer=optim,
            loss_weights={
                'probs': 1.0,
                'context_attention': 0.0,
                'responses_attention': 0.0,
                'responses_dot': 0.0
            },
            metrics=['accuracy'])  #, top2acc, top5acc, top10acc, top50acc])
        model.summary()
        self.model = model
Ejemplo n.º 2
0
def main():

    ############################# Load Configurations #############################    
    hparams = create_hyper_parameters()


    ############### Load a model and shaping model input&output ###############
    context = Input(shape=(hparams.max_context_len,))
    # context_speaker = Input(shape=(hparams.max_context_len, 2))
    context_mask = Input(shape=(hparams.max_context_len,))
    utterances = Input(shape=(hparams.num_utterance_options, hparams.max_utterance_len))
    # profile = Input(shape=(hparams.max_profile_len,))

    kb = Input(shape=(hparams.num_kb_options, hparams.max_kb_len))
    kb_flag = Input(shape=(hparams.max_context_len,2))
    kb_mask = Input(shape=(hparams.num_kb_options,))



    inputs = [context,
                context_mask,
                utterances,
                kb,
                kb_flag,
                kb_mask]
    # inputs = [context, context_speaker, utterances, profile]
    

    probs,\
    context_attention,\
    responses_attention,\
    responses_dot = memLstm_custom_model(hparams,
                                            context, context_mask,
                                            utterances,
                                            kb, kb_flag, kb_mask)
    # probs = memLstm_model(hparams, context, context_speaker, utterances, profile)


    model = Model(inputs=inputs, outputs=[probs,
                                            context_attention,
                                            responses_attention,
                                            responses_dot])
    print("Model loaded")


    # tensorboard = TensorBoard(log_dir="logs/{}".format(time()))
    tensorboard =\
    TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=True)
    # checkpointer =\
    # ModelCheckpoint(filepath='./dual_encoder_checkpoint.h5', verbose=1, save_best_only=True)
    # optim = keras.optimizers.SGD(lr=hparams.learning_rate, momentum=0.0, decay=0.0, nesterov=False)
    optim = keras.optimizers.Adam(lr=hparams.learning_rate, clipnorm=hparams.clip_norm)#, decay=0.001)
    model.compile(loss={'probs': 'sparse_categorical_crossentropy',
                        'context_attention': hack_loss,
                        'responses_attention': hack_loss,
                        'kb_attention': hack_loss},#custom_loss(probs=probs),#{'probs': custom_loss},
                    optimizer=optim,
                    loss_weights={'probs': 1.0,
                                    'context_attention': 0.0,
                                    'responses_attention': 0.0,
                                    'kb_attention': 0.0},
                    metrics=['accuracy'])#, top2acc, top5acc, top10acc, top50acc])
    model.summary()


    ############################# Load Datas #############################    
    print("Loading validation data")
    valid_context = np.load(hparams.valid_context_path)
    # valid_context_speaker =np.load(hparams.valid_context_speaker_path)
    valid_context_mask = np.load(hparams.valid_context_mask_path)
    # valid_context_len = np.load(hparam.valid_context_len_path)
    valid_target = np.load(hparams.valid_target_path)
    valid_target = valid_target.astype('i4')
    valid_options = np.load(hparams.valid_options_path)
    # valid_options_len = np.load(hparam.valid_context_path)
    # valid_profile = np.load(hparams.valid_profile_path)
    valid_kb = np.load(hparams.valid_kb_path)
    valid_kb_flag = np.load(hparams.valid_kb_flag_path)
    valid_kb_mask = np.load(hparams.valid_kb_mask_path)


    valid_context_mask = hparams.neg_inf * valid_context_mask
    valid_kb_mask = hparams.neg_inf * valid_kb_mask
    
    valid_X = [valid_context, valid_context_mask,
                valid_options,
                valid_kb, valid_kb_flag, valid_kb_mask]
    valid_Y = [valid_target, np.zeros((5000,1,1),dtype='i4'),
                                np.zeros((5000,1,1,1,1), dtype='i4'),
                                np.zeros((5000,1,1,1,1), dtype='i4')]


    ############################# TRAIN #############################
    print("Loading training data")
    train_context = np.load(hparams.train_context_path)
    # train_context_speaker =np.load(hparams.train_context_speaker_path)
    train_context_mask = np.load(hparams.train_context_mask_path)
    # train_context_len = np.load(hparam.train_context_len_path)
    train_target = np.load(hparams.train_target_path)
    train_target = train_target.astype('i4')
    train_options = np.load(hparams.train_options_path)
    # train_options_len = np.load(hparam.train_context_path)
    # train_profile = np.load(hparams.train_profile_path)
    train_kb = np.load(hparams.train_kb_path)
    train_kb_flag = np.load(hparams.train_kb_flag_path)
    train_kb_mask = np.load(hparams.train_kb_mask_path)

    train_context_mask = hparams.neg_inf * train_context_mask
    train_kb_mask = hparams.neg_inf * train_kb_mask


    # train_X = [train_context, train_context_speaker, train_options]
    # train_X = [train_context, train_context_speaker, train_options, train_profile]
    # train_Y = train_target

    ### model.fit(train_X, train_Y, batch_size=hparams.batch_size,
    ###             epochs=hparams.num_epochs,validation_data=(valid_X, valid_Y), verbose=1)#, callbacks=[tensorboard])#, callbacks=[checkpointer])
   
    # of actual epochs
    val_acc=0
    for i in range(10):
        print('\nMAIN EPOCH:', i+1)
        print('==================================================================================================')
        idx = random.sample(range(100000), 100000)
        for j in range(10):
            print('Sub epochs:', j+1)
            sub_idx = idx[j*10000 : (j+1)*(10000)]
            train_X = [np.take(train_context, sub_idx, axis=0),\
                        # np.take(train_context_speaker, sub_idx, axis=0),\
                        np.take(train_context_mask, sub_idx, axis=0),\
                        np.take(train_options, sub_idx, axis=0),
                        # np.take(train_profile, sub_idx, axis=0),
                        np.take(train_kb, sub_idx, axis=0),
                        np.take(train_kb_flag, sub_idx, axis=0),
                        np.take(train_kb_mask, sub_idx, axis=0)]
            train_Y = np.take(train_target, sub_idx, axis=0)

            A = model.fit(train_X, [train_Y,
                                    np.zeros((10000,1,1), dtype='i4'),
                                    np.zeros((10000,1,1,1,1), dtype='i4'),
                                    np.zeros((10000,1,1,1,1), dtype='i4')],
                                    batch_size=hparams.batch_size,
                                    epochs=1,validation_data=(valid_X, valid_Y), verbose=1)#, callbacks=[checkpointer])
            # for key in A.history.keys():
            #     print(key)
            
            if A.history['val_probs_acc'][0] > val_acc:
                val_acc = A.history['val_probs_acc'][0]
                if val_acc >= 0.16:
                    model.save_weights(hparams.weights_path+\
                                        'ubuntu_track1_'+\
                                        str(int(hparams.hops))+'hops_'+\
                                        str(int(hparams.learning_rate*10000))+'lr_'+\
                                        str(int(hparams.memn2n_rnn_dim))+'rnn_'+\
                                        str(int(val_acc*10000))+'_'+\
                                        str(i)+'_'+str(j)+'.h5', overwrite=True)




    print('Best acc:',val_acc)


    '''
Ejemplo n.º 3
0
def main():

    ############################# Load Configurations #############################    
    hparams = create_hyper_parameters()


    ############### Load a model and shaping model input&output ###############
    context = Input(shape=(hparams.max_context_len,))
    # context_speaker = Input(shape=(hparams.max_context_len, 2))
    context_mask = Input(shape=(hparams.max_context_len,))
    utterances = Input(shape=(hparams.num_utterance_options, hparams.max_utterance_len))
    # profile = Input(shape=(hparams.max_profile_len,))


    inputs = [context, context_mask, utterances]
    # inputs = [context, context_speaker, utterances, profile]
    

    probs,\
    context_attention,\
    responses_attention,\
    responses_dot = memLstm_custom_model(hparams, context, context_mask, utterances)
    # probs = memLstm_model(hparams, context, context_speaker, utterances, profile)


    model = Model(inputs=inputs, outputs=[probs,
                                            context_attention,
                                            responses_attention,
                                            responses_dot])
    print("Model loaded")


    # tensorboard = TensorBoard(log_dir="logs/{}".format(time()))
    tensorboard =\
    TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=True)
    # checkpointer =\
    # ModelCheckpoint(filepath='./dual_encoder_checkpoint.h5', verbose=1, save_best_only=True)
    # optim = keras.optimizers.SGD(lr=hparams.learning_rate, momentum=0.0, decay=0.0, nesterov=False)
    optim = keras.optimizers.Adam(lr=hparams.learning_rate, clipnorm=hparams.clip_norm)#, decay=0.001)
    model.compile(loss={'probs': 'sparse_categorical_crossentropy',
                        'context_attention': hack_loss,
                        'responses_attention': hack_loss,
                        'responses_dot': hack_loss},#custom_loss(probs=probs),#{'probs': custom_loss},
                    optimizer=optim,
                    loss_weights={'probs': 1.0,
                                    'context_attention': 0.0,
                                    'responses_attention': 0.0,
                                    'responses_dot': 0.0},
                    metrics=['accuracy'])#, top2acc, top5acc, top10acc, top50acc])
    model.summary()


    ############################# Load Validation Datas #############################    
    print("Loading validation data")
    valid_context = np.load(hparams.valid_context_path)
    # valid_context_speaker =np.load(hparams.valid_context_speaker_path)
    valid_context_mask = np.load(hparams.valid_context_mask_path)
    # valid_context_len = np.load(hparam.valid_context_len_path)
    valid_target = np.load(hparams.valid_target_path)
    valid_target = valid_target.astype('i4')
    valid_options = np.load(hparams.valid_options_path)
    # valid_options_len = np.load(hparam.valid_context_path)
    # valid_profile = np.load(hparams.valid_profile_path)


    valid_context_mask = hparams.neg_inf * valid_context_mask

    
    valid_X = [valid_context, valid_context_mask, valid_options]
    # valid_X = [valid_context, valid_context_speaker, valid_options, valid_profile]
    valid_Y = [valid_target, np.zeros((5000,1,1),dtype='i4'),
                                np.zeros((5000,1,1,1,1), dtype='i4'),
                                np.zeros((5000,1,1,1,1), dtype='i4')]

    '''
    ############################# TRAIN #############################
    print("Loading training data")
    train_context = np.load(hparams.train_context_path)
    # train_context_speaker =np.load(hparams.train_context_speaker_path)
    train_context_mask = np.load(hparams.train_context_mask_path)
    # train_context_len = np.load(hparam.train_context_len_path)
    train_target = np.load(hparams.train_target_path)
    train_target = train_target.astype('i4')
    train_options = np.load(hparams.train_options_path)
    # train_options_len = np.load(hparam.train_context_path)
    # train_profile = np.load(hparams.train_profile_path)


    train_context_mask = hparams.neg_inf * train_context_mask


    # train_X = [train_context, train_context_speaker, train_options]
    # train_X = [train_context, train_context_speaker, train_options, train_profile]
    # train_Y = train_target

    ### model.fit(train_X, train_Y, batch_size=hparams.batch_size,
    ###             epochs=hparams.num_epochs,validation_data=(valid_X, valid_Y), verbose=1)#, callbacks=[tensorboard])#, callbacks=[checkpointer])
   
    # of actual epochs
    val_acc=0
    final_model = None
    k=0
    l=0
    for i in range(10):
        print('\nMAIN EPOCH:', i+1)
        print('==================================================================================================')
        idx = random.sample(range(100000), 100000)
        for j in range(10):
            print('Sub epochs:', j+1)
            sub_idx = idx[j*10000 : (j+1)*(10000)]
            train_X = [np.take(train_context, sub_idx, axis=0),\
                        # np.take(train_context_speaker, sub_idx, axis=0),\
                        np.take(train_context_mask, sub_idx, axis=0),\
                        np.take(train_options, sub_idx, axis=0)]#,
                        #np.take(train_profile, sub_idx, axis=0)]
            train_Y = np.take(train_target, sub_idx, axis=0)

            A = model.fit(train_X, [train_Y,
                                    np.zeros((10000,1,1), dtype='i4'),
                                    np.zeros((10000,1,1,1,1), dtype='i4'),
                                    np.zeros((10000,1,1,1,1), dtype='i4')],
                                    batch_size=hparams.batch_size,
                                    epochs=1,validation_data=(valid_X, valid_Y), verbose=1)#, callbacks=[checkpointer])
            # for key in A.history.keys():
            #     print(key)
            
            if A.history['val_probs_acc'][0] > val_acc:
                val_acc = A.history['val_probs_acc'][0]
                if val_acc >= 0.18:
                    model.save_weights(hparams.weights_path+'_'+str(i)+'_'+str(j)+'_'+str(int(val_acc*10000))+'_amp2.h5', overwrite=True)


    print('Best acc:',val_acc)
    # import pdb; pdb.set_trace()

    '''
    ############################# EVALUATE #############################
    vocab=[]
    with open(hparams.vocab_path,'r') as f:
        A = f.read()
        vocab = A.split('\n')

    # model.load_weights('weights/memLstm2_bicon2_profile/2hops_3_5_1180.h5')
    # model.load_weights('weights/memLstm2_bicon3_ubuntu_shrink/9hops_2_9_1880.h5')
    # model.load_weights('weights/memLstm2_bicon4_amp/2hops_3_9_1933_amp5.h5')
    # model.load_weights('weights/memLstm2_bicon4_bisum/2hops_2_8_2066_bisum.h5')
    model.load_weights('/ext2/dstc7/weights/memLstm2_bicon4_amp2/1hops_3_9_2020_amp2.h5')
    score=model.evaluate(valid_X, valid_Y)
    print(score)


    predict_X = valid_X
    target_X = valid_target

    predict_Y,\
    context_attention,\
    responses_attention,\
    responses_dot = model.predict(predict_X, batch_size=50, verbose=1)
    predict_target = np.argmax(predict_Y, axis=-1)

    print(predict_Y.shape)
    print(context_attention.shape)
    print(responses_attention.shape)

    # context_argmax = [np.argsort(context_attetion[i][::-1] for i in range(len(predict_Y)))
    context_argmax = np.argsort(context_attention, axis=-1)
    # context_argmax = context_argmax[:,:,hparams.max_context_len-hparams.hops:]
    context_argmax = context_argmax[:,:,hparams.max_context_len-5:]
    context_argmax = np.flip(context_argmax, axis=-1)
    print('context_argmax:',context_argmax.shape)
    responses_attention = np.swapaxes(responses_attention, 1,3)
    responses_argmax = np.argmax(responses_attention, axis=-1)

    sorted_predict_Y = [np.argsort(predict_Y[i])[::-1] for i in range(len(predict_Y))]    
    prediction_set = [(target_X[i],sorted_predict_Y[i][:10]) for i in range(len(predict_Y))]
    # with open('valid_predict10_result.pickle','wb') as f:
    #     pickle.dump(prediction_set, f)    

    correct_sample = 0
    wrong_sample = 0
    for idx,value in enumerate(prediction_set):
        if value[0] == value[1][0]:
            correct_sample +=1
        else:
            wrong_sample +=1

    print(" Among {} samples, model predicted {} samples correct, {} samples wrong.".format(len(predict_Y),correct_sample,wrong_sample))
    
    for i in range(valid_target.shape[0]):
        context = valid_context[i]
        context_att = context_attention[i]
        context_arg = context_argmax[i]
        responses_arg = responses_argmax[i]
        if valid_target[i] == predict_target[i]:
            correct_sample += 1
            print(i, 'sample: (attention on Forward / Backward)')
            # print(context_argmax[i][0])
            # print(valid_context[i][ context_argmax[i][0] ])
            # print(vocab[ valid_context[i][context_argmax[i][0]] ])
            print('Attention: ', [ context_att[0][context_arg[0][j]]\
                                    for j in range(len(context_arg[0])) ], ' / ',
                                 [ context_att[1][context_arg[1][j]]\
                                    for j in range(len(context_arg[1])) ], ' / ',
                                    )
            print('Context :', [ vocab[ context[context_arg[0][j]]-1 ]+\
                                '('+str(context_arg[0][j])+')'\
                                for j in range(len(context_arg[0])) ], ' / ',
                                [ vocab[ context[context_arg[1][j]]-1 ]+\
                                '('+str(context_arg[1][j])+')'\
                                for j in range(len(context_arg[1])) ] )
            print('Response:', [ vocab[ context[responses_arg[predict_target[i]][0][j]]-1 ]+\
                                '('+str(responses_arg[predict_target[i]][0][j])+')'\
                                for j in range(hparams.hops)], ' / ',
                                [ vocab[ context[responses_arg[predict_target[i]][1][j]]-1 ]+\
                                '('+str(responses_arg[predict_target[i]][1][j])+')'\
                                for j in range(hparams.hops)], '\n')

    import pdb; pdb.set_trace()
    np.save('context_attention.npy', context_attention)
    np.save('responses_attenntion.npy', responses_attention)
    np.save('responses_dot.npy', responses_dot)