Beispiel #1
0
def test_matchzoo():
    
    params = Params()
    config_file = 'config/qalocal.ini'    # define dataset in the config
    params.parse_config(config_file)
    params.network_type = "anmm.ANMM"
    
    reader = qa.setup(params)
    qdnn = models.setup(params)
    model = qdnn.getModel()
    
    
    model.compile(loss = params.loss,
                optimizer = units.getOptimizer(name=params.optimizer,lr=params.lr),
                metrics=['accuracy'])
    model.summary()
    
#    generators = [reader.getTrain(iterable=False) for i in range(params.epochs)]
#    q,a,score = reader.getPointWiseSamples()
#    model.fit(x = [q,a],y = score,epochs = 1,batch_size =params.batch_size)
    
    def gen():
        while True:
            for sample in reader.getPointWiseSamples(iterable = True):
                yield sample
    model.fit_generator(gen(),epochs = 2,steps_per_epoch=1000)
def test_rep():
    import models.representation as models
    params = Params()
    config_file = 'config/local.ini'    # define dataset in the config
    params.parse_config(config_file)
    import dataset
    reader = dataset.setup(params)
    params = dataset.process_embedding(reader,params)
    qdnn = models.setup(params)
    model = qdnn.getModel()
    
    model.compile(loss = params.loss,
                optimizer = units.getOptimizer(name=params.optimizer,lr=params.lr),
                metrics=['accuracy'])
    model.summary()
    (train_x, train_y),(test_x, test_y),(val_x, val_y) = reader.get_processed_data()
#    print(train_x.shape,train_y.shape)
    history = model.fit(x=train_x, y = train_y, batch_size = params.batch_size, epochs= params.epochs,validation_data= (test_x, test_y))

    evaluation = model.evaluate(x = val_x, y = val_y)
Beispiel #3
0
def run(params,reader):
    params=dataset.process_embedding(reader,params)
    qdnn = models.setup(params)
    model = qdnn.getModel()
    
    model.compile(loss = params.loss,
              optimizer = units.getOptimizer(name=params.optimizer,lr=params.lr),
              metrics=['accuracy'])
    
    model.summary()    
    (train_x, train_y),(test_x, test_y),(val_x, val_y) = reader.get_processed_data()
    
    #pretrain_x, pretrain_y = dataset.get_sentiment_dic_training_data(reader,params)
    #model.fit(x=pretrain_x, y = pretrain_y, batch_size = params.batch_size, epochs= 3,validation_data= (test_x, test_y))
    
    history = model.fit(x=train_x, y = train_y, batch_size = params.batch_size, epochs= params.epochs,
                        validation_data= (test_x, test_y),callbacks=[logger.getCSVLogger()])
    
    evaluation = model.evaluate(x = val_x, y = val_y)
    save_experiment(model, params, evaluation, history, reader)
    #save_experiment(model, params, evaluation, history, reader, config_file)

    return history,evaluation
Beispiel #4
0
def run(params):
    evaluation = []
    #    params=dataset.classification.process_embedding(reader,params)
    qdnn = models.setup(params)
    model = qdnn.getModel()
    model.summary()
    if hasattr(loss.pairwise_loss, params.loss):
        loss_func = getattr(loss.pairwise_loss, params.loss)
    else:
        loss_func = params.loss
    optimizer = units.getOptimizer(name=params.optimizer, lr=params.lr)
    #

    #    test_data = [to_array(i,params.max_sequence_length) for i in test_data]
    if hasattr(loss.pairwise_loss, params.metric_type):
        metric_func = getattr(loss.pairwise_loss, params.metric_type)
    else:
        metric_func = params.metric_type

    model.compile(
        loss=loss_func,  #""
        optimizer=optimizer,
        metrics=[metric_func])
    # pairwise:
    # loss = identity_loss
    # metric = precision_batch

    # pointwise:
    # loss = categorical_hinge or mean_squared_error
    # metric = acc or mean_squared_error

    # classification:
    # loss = mean_squared_error
    # matrix = acc

    if params.dataset_type == 'qa':
        test_x, test_y = params.reader.get_test_2(
            iterable=False
        )  #        train_x,train_y = params.reader.get_train_2(iterable = False, sampling_per_question = True)
        #        model.fit(x=train_x, y = train_y, batch_size = params.batch_size, epochs= params.epochs,validation_data= (test_x, test_y))
        #        print(model.evaluate(x = test_x, y =test_y))

        #
        for i in range(params.epochs):
            model.fit_generator(params.reader.get_train_2(
                iterable=True, sampling_per_question=True).__iter__(),
                                epochs=1,
                                steps_per_epoch=int(reader.num_samples /
                                                    reader.batch_size),
                                verbose=True)
            y_pred = model.predict(x=test_x)
            score = batch_softmax_with_first_item(
                y_pred)[:, 1] if params.onehot else y_pred
            metric = params.reader.evaluate(score, mode="test")
            evaluation.append(metric)
            print(metric)
            logger.info(metric)
        df = pd.DataFrame(evaluation, columns=["map", "mrr", "p1"])


#        generator = params.reader.get_train_2(iterable = True, sampling_per_question = False,need_balanced=True,always=True,balance_temperature=0.5)
#        model.fit_generator(generator, epochs= params.epochs,validation_data= (test_x, test_y),steps_per_epoch=100)
#
#
#        test_data = params.reader.get_test(iterable = False)
#        y_pred = model.predict(x = test_data)
#        score = batch_softmax_with_first_item(y_pred)[:,1]  if params.onehot else y_pred
#
#        metric = params.reader.evaluate(score, mode = "test",acc=True)
#        evaluation.append(metric)
#        print(metric)
#
#        model.evaluate(x = test_x, y = test_y)

#        for i in range(params.epochs):
#            model.fit_generator(params.reader.get_train_2(iterable = True,sampling_per_question = False).__iter__(),epochs = 1,steps_per_epoch = int(reader.num_samples/reader.batch_size),verbose = True)
#
#            print(model.evaluate(x = test_x, y =test_y))

#        from models.match import keras as models
#        for i in range(params.epochs):
##            model.fit_generator(params.reader.batch_gen(params.reader.get_train(iterable = True)),epochs = 1,steps_per_epoch=int(len(reader.datas["train"])/reader.batch_size),verbose = True)
#            model.fit_generator(params.reader.get_train_2(iterable = True),epochs = 1)
#            y_pred = model.predict(x = test_x)
#            score = batch_softmax_with_first_item(y_pred)[:,1]  if params.onehot else y_pred
#
#            metric = params.reader.evaluate(score, mode = "test")
#            evaluation.append(metric)
#            print(metric)
#            logger.info(metric)

    elif params.dataset_type == 'classification':
        #        from models import representation as models

        #    model.summary()
        #        train_data = params.reader.get_train(iterable = False)
        #        test_data = params.reader.get_test(iterable = False)
        #        val_data =params.reader.get_val(iterable = False)
        #    #    (train_x, train_y),(test_x, test_y),(val_x, val_y) = reader.get_processed_data()
        #        train_x, train_y = train_data
        #        test_x, test_y = test_data
        #        val_x, val_y = val_data
        train_x, train_y = params.reader.get_train(iterable=False)
        test_x, test_y = params.reader.get_test(iterable=False)
        val_x, val_y = params.reader.get_val(iterable=False)

        history = model.fit(x=train_x,
                            y=train_y,
                            batch_size=params.batch_size,
                            epochs=params.epochs,
                            validation_data=(test_x, test_y))

        metric = model.evaluate(
            x=val_x, y=val_y)  # !!!!!! change the order to val and test

        evaluation.append(metric)
        logger.info(metric)
        print(history)
        print(metric)

        df = pd.DataFrame(evaluation, columns=["map", "mrr", "p1"])

    logger.info("\n".join(
        [params.to_string(), "score: " + str(df.max().to_dict())]))

    K.clear_session()
Beispiel #5
0
    from loss import *

    from models.match import keras as models
    from params import Params
    params = Params()

    config_file = 'config/qalocal.ini'  # define dataset in the config
    params.parse_config(config_file)

    reader = qa.setup(params)
    qdnn = models.setup(params)
    model = qdnn.getModel()

    from loss import *
    model.compile(loss=rank_hinge_loss({'margin': 0.2}),
                  optimizer=units.getOptimizer(name=params.optimizer,
                                               lr=params.lr),
                  metrics=['accuracy'])
    model.summary()

    #    generators = [reader.getTrain(iterable=False) for i in range(params.epochs)]
    #    [q,a,score] = reader.getPointWiseSamples()
    #    model.fit(x = [q,a,a],y = [q,a,q],epochs = 10,batch_size =params.batch_size)

    #    def gen():
    #        while True:
    #            for sample in reader.getTrain(iterable = True):
    #                yield sample
    model.fit_generator(reader.getPointWiseSamples4Keras(),
                        epochs=20,
                        steps_per_epoch=1000)
Beispiel #6
0
def run(params):
    if "bert" in params.network_type.lower() :
        params.max_sequence_length = 512
        reader.max_sequence_length = 512
    evaluation=[]
#    params=dataset.classification.process_embedding(reader,params)    
    qdnn = models.setup(params)
    model = qdnn.getModel()
    model.summary()
    if hasattr(loss.pairwise_loss, params.loss): 
            
        loss_func = getattr(loss.pairwise_loss, params.loss)
    else:
        loss_func = params.loss
    optimizer = units.getOptimizer(name=params.optimizer,lr=params.lr)
    
    test_data = params.reader.get_test(iterable = False)
    test_data = [to_array(i,reader.max_sequence_length) for i in test_data]
    if hasattr(loss.pairwise_loss, params.metric_type):
        metric_func = getattr(loss.pairwise_loss, params.metric_type)
    else:
        metric_func = params.metric_type
    
    model.compile(loss = loss_func, #""
                      optimizer = optimizer,
                      metrics=[metric_func])
    # pairwise:
    # loss = identity_loss
    # metric = precision_batch

    # pointwise:
    # loss = categorical_hinge or mean_squared_error
    # metric = acc or mean_squared_error
    
    # classification:
    # loss = mean_squared_error
    # matrix = acc
      
    if params.dataset_type == 'qa':
#        from models.match import keras as models   
        for i in range(params.epochs):
            model.fit_generator(reader.batch_gen(reader.get_train(iterable = True)),epochs = 1,steps_per_epoch=int(len(reader.datas["train"])/reader.batch_size),verbose = True)        
            y_pred = model.predict(x = test_data) 
            score = batch_softmax_with_first_item(y_pred)[:,1]  if params.onehot else y_pred
                
            metric = reader.evaluate(score, mode = "test")
            evaluation.append(metric)
            print(metric)
            logger.info(metric)
        df=pd.DataFrame(evaluation,columns=["map","mrr","p1"]) 

            
    elif params.dataset_type == 'classification':
#        from models import representation as models   
        
    #    model.summary()    
        train_data = params.reader.get_train(iterable = False)
        test_data = params.reader.get_test(iterable = False)
        val_data =params.reader.get_val(iterable = False)
    #    (train_x, train_y),(test_x, test_y),(val_x, val_y) = reader.get_processed_data()
        train_x, train_y = train_data
        test_x, test_y = test_data
        val_x, val_y = val_data
        if "bert" in params.network_type.lower() :
            train_x, train_x_mask = to_array(train_x,reader.max_sequence_length,use_mask=True) 
            test_x,test_x_mask =  to_array(test_x,reader.max_sequence_length,use_mask=True)
            val_x,val_x_mask =  to_array(val_x,reader.max_sequence_length,use_mask=True)
                #pretrain_x, pretrain_y = dataset.get_sentiment_dic_training_data(reader,params)
            #model.fit(x=pretrain_x, y = pretrain_y, batch_size = params.batch_size, epochs= 3,validation_data= (test_x, test_y))
        
            history = model.fit(x=[train_x,train_x_mask], y = train_y, batch_size = params.batch_size, epochs= params.epochs,validation_data= ([test_x,test_x_mask], test_y))
        
            metric = model.evaluate(x = [val_x,val_x_mask], y = val_y)   # !!!!!! change the order to val and test myzip(
        else:
            train_x = to_array(train_x,reader.max_sequence_length,use_mask=False) 
            test_x =  to_array(test_x,reader.max_sequence_length,use_mask=False)
            val_x =  to_array(val_x,reader.max_sequence_length,use_mask=False)
            #pretrain_x, pretrain_y = dataset.get_sentiment_dic_training_data(reader,params)
            #model.fit(x=pretrain_x, y = pretrain_y, batch_size = params.batch_size, epochs= 3,validation_data= (test_x, test_y))
        
            history = model.fit(x=train_x, y = train_y, batch_size = params.batch_size, epochs= params.epochs,validation_data= (test_x, test_y))
        
            metric = model.evaluate(x = val_x, y = val_y)   # !!!!!! change the order to val and test
            
        evaluation.append(metric)
        logger.info(metric)
        print(metric)

        df=pd.DataFrame(evaluation,columns=["map","mrr","p1"])  
        
    logger.info("\n".join([params.to_string(),"score: "+str(df.max().to_dict())]))

    K.clear_session()