Ejemplo n.º 1
0
def main():
    args = get_args()
    f = os.path.normpath
    device = torch.device(
        "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
    if args.do_init:
        init(f(args.train_file), f(args.dev_file), f(args.test_file),
             f(args.word_dict_path), f(args.tag_dict_path))
        return
    if args.do_train:
        do_train(f(args.train_file),
                 f(args.output_dir), f(args.word_dict_path),
                 f(args.tag_dict_path), args.max_seq_len, args.embed_dim,
                 args.hidden_dim, args.lr, args.batch_size, args.epochs,
                 args.print_step, device)
    if args.do_eval:
        do_eval(f(args.test_file), f(args.word_dict_path),
                f(args.tag_dict_path), args.max_seq_len,
                args.embed_dim, args.hidden_dim, f(args.output_dir),
                f(args.eval_log_dir), device)
Ejemplo n.º 2
0
 def epoch_end(self, run_context):
     """
     do eval in epoch end
     """
     cb_param = run_context.original_args()
     cur_epoch = cb_param.cur_epoch_num
     if cur_epoch % self.eval_epoch_frq == 0 or cur_epoch == self.epoch_size:
         # result = do_eval(self.eval_network, self.ds_val, self.metrics, self.rank_id, cur_epoch=cur_epoch)
         result = do_eval(self.ds_val, self.eval_network)
         if self.best_result is None or self.best_result < result:
             self.best_result = result
         if self.rank_id == 0:
             print(f"best evaluation result = {self.best_result}",
                   flush=True)
         if isinstance(self.result_evaluation, dict):
             for k, v in result.items():
                 r_list = self.result_evaluation.get(k)
                 if r_list is None:
                     r_list = []
                     self.result_evaluation[k] = r_list
                 r_list.append(v)
Ejemplo n.º 3
0
import paddle
import paddle.fluid as fluid

from eval import do_eval
from train import do_train
from predict import do_predict
from inference_model import do_save_inference_model

from dgu.utils.configure import PDConfig

if __name__ == "__main__":

    args = PDConfig(yaml_file="./data/config/dgu.yaml")
    args.build()
    args.Print()

    if args.do_train:
        do_train(args)

    if args.do_predict:
        do_predict(args)

    if args.do_eval:
        do_eval(args)

    if args.do_save_inference_model:
        do_save_inference_model(args)

# vim: set ts=4 sw=4 sts=4 tw=100:
Ejemplo n.º 4
0
def process(params,with_predict=True,with_eval=True):
    logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG)
    params['cnn']['n_out'] = int(params['dataset']['dim'])
    #params['cnn']['n_frames'] =  int(params['dataset']['window'] * SR / float(HR))
    with_metadata = params['dataset']['with_metadata']
    only_metadata = params['dataset']['only_metadata']
    metadata_source = params['dataset']['meta-suffix']
    if with_metadata:
        if 'w2v' in metadata_source:
            X_meta = np.load(common.TRAINDATA_DIR+'/X_train_%s_%s.npy' % (metadata_source,params['dataset']['dataset']))[:,:int(params['cnn']['sequence_length'])]
            params['cnn']['n_metafeatures'] = len(X_meta[0])
            if 'meta-suffix2' in params['dataset']:
                X_meta2 = np.load(common.TRAINDATA_DIR+'/X_train_%s_%s.npy' % (params['dataset']['meta-suffix2'],params['dataset']['dataset']))
                params['cnn']['n_metafeatures2'] = len(X_meta2[0])
            if 'meta-suffix3' in params['dataset']:
                X_meta3 = np.load(common.TRAINDATA_DIR+'/X_train_%s_%s.npy' % (params['dataset']['meta-suffix3'],params['dataset']['dataset']))
                params['cnn']['n_metafeatures3'] = len(X_meta3[0])
            if 'meta-suffix4' in params['dataset']:
                X_meta4 = np.load(common.TRAINDATA_DIR+'/X_train_%s_%s.npy' % (params['dataset']['meta-suffix4'],params['dataset']['dataset']))
                params['cnn']['n_metafeatures4'] = len(X_meta4[0])
        elif 'model' in metadata_source or not params['dataset']['sparse']:
            X_meta = np.load(common.TRAINDATA_DIR+'/X_train_%s_%s.npy' % (metadata_source,params['dataset']['dataset']))
            params['cnn']['n_metafeatures'] = len(X_meta[0])
            if 'meta-suffix2' in params['dataset']:
                X_meta2 = np.load(common.TRAINDATA_DIR+'/X_train_%s_%s.npy' % (params['dataset']['meta-suffix2'],params['dataset']['dataset']))
                params['cnn']['n_metafeatures2'] = len(X_meta2[0])
            if 'meta-suffix3' in params['dataset']:
                X_meta3 = np.load(common.TRAINDATA_DIR+'/X_train_%s_%s.npy' % (params['dataset']['meta-suffix3'],params['dataset']['dataset']))
                params['cnn']['n_metafeatures3'] = len(X_meta3[0])
            if 'meta-suffix4' in params['dataset']:
                X_meta4 = np.load(common.TRAINDATA_DIR+'/X_train_%s_%s.npy' % (params['dataset']['meta-suffix4'],params['dataset']['dataset']))
                params['cnn']['n_metafeatures4'] = len(X_meta4[0])
        else:
            X_meta = load_sparse_csr(common.TRAINDATA_DIR+'/X_train_%s_%s.npz' % (metadata_source,params['dataset']['dataset'])).todense()
            params['cnn']['n_metafeatures'] = X_meta.shape[1]
            if 'meta-suffix2' in params['dataset']:
                X_meta2 = load_sparse_csr(common.TRAINDATA_DIR+'/X_train_%s_%s.npz' % (params['dataset']['meta-suffix2'],params['dataset']['dataset']))
                params['cnn']['n_metafeatures2'] = X_meta2.shape[1]
            if 'meta-suffix3' in params['dataset']:
                X_meta3 = load_sparse_csr(common.TRAINDATA_DIR+'/X_train_%s_%s.npz' % (params['dataset']['meta-suffix3'],params['dataset']['dataset']))
                params['cnn']['n_metafeatures3'] = len(X_meta3[0])
            if 'meta-suffix4' in params['dataset']:
                X_meta4 = load_sparse_csr(common.TRAINDATA_DIR+'/X_train_%s_%s.npz' % (params['dataset']['meta-suffix4'],params['dataset']['dataset']))
                params['cnn']['n_metafeatures3'] = len(X_meta4[0])
        print(X_meta.shape)
    else:
        X_meta = None

    config = Config(params)
    model_dir = os.path.join(common.MODELS_DIR, config.model_id)
    common.ensure_dir(common.MODELS_DIR)
    common.ensure_dir(model_dir)
    model_file = os.path.join(model_dir, config.model_id + common.MODEL_EXT)
    logging.debug("Building Network...")
    #model = build_model(config)
    model = build_model(config)
    print(model.summary())
    #plot(model, to_file='model2.png', show_shapes=True)
    trained_model = config.get_dict()

    # Save model
    #plot(model, to_file=os.path.join(model_dir, config.model_id + PLOT_EXT))
    common.save_model(model, model_file)

    logging.debug(trained_model["model_id"])

    logging.debug("Loading Data...")

    with_generator = True

    if only_metadata:
        X_train, Y_train, X_val, Y_val, X_test, Y_test = \
            load_data_preprocesed(params, config.x_path, config.y_path, params['dataset']['dataset'], config.training_params["validation"],
                      config.training_params["test"], config.dataset_settings["nsamples"], with_metadata, only_metadata, metadata_source)
        if 'meta-suffix2' in params['dataset']:
            X_train2, Y_train2, X_val2, Y_val2, X_test2, Y_test2 = \
                load_data_preprocesed(params, config.x_path, config.y_path, params['dataset']['dataset'], config.training_params["validation"],
                          config.training_params["test"], config.dataset_settings["nsamples"], with_metadata, only_metadata, params['dataset']['meta-suffix2'])
            X_train = [X_train,X_train2]
            X_val = [X_val,X_val2]
            X_test = [X_test,X_test2]
            print("X_train bi", len(X_train))
        if 'meta-suffix3' in params['dataset']:
            X_train3, Y_train3, X_val3, Y_val3, X_test3, Y_test3 = \
                load_data_preprocesed(params, config.x_path, config.y_path, params['dataset']['dataset'], config.training_params["validation"],
                          config.training_params["test"], config.dataset_settings["nsamples"], with_metadata, only_metadata, params['dataset']['meta-suffix3'])
            X_train.append(X_train3)
            X_val.append(X_val3)
            X_test.append(X_test3)
            print("X_train tri", len(X_train))
        if 'meta-suffix4' in params['dataset']:
            X_train4, Y_train4, X_val4, Y_val4, X_test4, Y_test4 = \
                load_data_preprocesed(params, config.x_path, config.y_path, params['dataset']['dataset'], config.training_params["validation"],
                          config.training_params["test"], config.dataset_settings["nsamples"], with_metadata, only_metadata, params['dataset']['meta-suffix4'])
            X_train.append(X_train4)
            X_val.append(X_val4)
            X_test.append(X_test4)
            print("X_train four", len(X_train))
    else:
        if with_generator:
            id2gt = dict()
            factors = np.load(common.DATASETS_DIR+'/y_train_'+config.y_path+'.npy')
            index_factors = open(common.DATASETS_DIR+'/items_index_train_'+params['dataset']['dataset']+'.tsv').read().splitlines()
            id2gt = dict((index,factor) for (index,factor) in zip(index_factors,factors))
            X_val, Y_val, X_test, Y_test, N_train = load_data_hf5_memory(params,config.training_params["validation"],config.training_params["test"],config.y_path,id2gt,X_meta,config.training_params["val_from_file"])
            if params['dataset']['nsamples'] != 'all':
                N_train = min(N_train,params['dataset']['nsamples'])

        else:
            X_train, Y_train, X_val, Y_val, X_test, Y_test, N_train = load_data_hf5(params,config.training_params["validation"],config.training_params["test"])

    trained_model["whiten_scaler"] = common.TRAINDATA_DIR+'/scaler_%s.pk' % config.x_path
    logging.debug("Training...")

    if config.model_arch["final_activation"] == 'softmax':
        monitor_metric = 'val_categorical_accuracy'
    else:
        monitor_metric = 'val_loss'
    early_stopping = EarlyStopping(monitor=monitor_metric, patience=4)

    if only_metadata:
        epochs = model.fit(X_train, Y_train,
                  batch_size=config.training_params["n_minibatch"],
                  #shuffle='batch',
                  nb_epoch=config.training_params["n_epochs"],
                  verbose=2, validation_data=(X_val, Y_val),
                  callbacks=[early_stopping])
    else:
        if with_generator:
            print(N_train)
            epochs = model.fit_generator(batch_block_generator(params,config.y_path,N_train,id2gt,X_meta,config.training_params["val_from_file"]),
                        samples_per_epoch = N_train-(N_train % config.training_params["n_minibatch"]),
                        nb_epoch = config.training_params["n_epochs"],
                        verbose=2,
                        validation_data = (X_val, Y_val),
                        callbacks=[early_stopping])
        else:
            epochs = model.fit(X_train, Y_train,
                      batch_size=config.training_params["n_minibatch"],
                      shuffle='batch',
                      nb_epoch=config.training_params["n_epochs"],
                      verbose=2,
                      validation_data=(X_val, Y_val),
                      callbacks=[early_stopping])

    model.save_weights(os.path.join(model_dir, config.model_id + common.WEIGHTS_EXT))
    logging.debug("Saving trained model %s in %s..." %
                  (trained_model["model_id"], common.DEFAULT_TRAINED_MODELS_FILE))
    common.save_trained_model(common.DEFAULT_TRAINED_MODELS_FILE, trained_model)

    logging.debug("Evaluating...")

    print(X_test[0].shape,X_test[1].shape)
    preds=model.predict(X_test)
    print(preds.shape)
    if params["dataset"]["evaluation"] in ['binary','multiclass']:
        y_pred = (preds > 0.5).astype('int32')        
        acc = accuracy_score(Y_test,y_pred)
        prec = precision_score(Y_test,y_pred,average='macro')
        recall = recall_score(Y_test,y_pred,average='macro')
        f1 = f1_score(Y_test,y_pred,average='macro')
        print('Accuracy', acc)
        print("%.3f\t%.3f\t%.3f" % (prec,recall,f1))
    if params["dataset"]["fact"] == 'class':
        good_classes = np.nonzero(Y_test.sum(0))[0]
        print(Y_test.shape,preds.shape)
        roc_auc=roc_auc_score(Y_test[:,good_classes],preds[:,good_classes])
        logging.debug('ROC-AUC '+str(roc_auc))
        pr_auc = average_precision_score(Y_test[:,good_classes],preds[:,good_classes])
        print('PR-AUC',pr_auc)
        r2 = roc_auc
    elif params["dataset"]["evaluation"] not in ['binary','multiclass','multilabel']:
        r2s = []
        for i,pred in enumerate(preds):
            r2 = r2_score(Y_test[i],pred)
            r2s.append(r2)
        r2 = np.asarray(r2s).mean()
        logging.debug('R2 avg '+str(r2))
    # Batch prediction
    if X_test[1].shape == Y_test[1].shape:
        score = model.evaluate(X_test, Y_test, verbose=0)
        logging.debug(score)
        logging.debug(model.metrics_names)
        print(score)
        trained_model["loss_score"] = score[0]
        trained_model["mse"] = score[1]
        if params["dataset"]["evaluation"] not in ['binary','multiclass','multilabel']:
            trained_model["r2"] = r2

        fw=open(common.DATA_DIR+'/results/train_results.txt','a')
        fw.write(trained_model["model_id"]+'\n')
        if params["training"]["loss_func"] == 'binary_crossentropy':
            fw.write('ROC-AUC: '+str(roc_auc)+'\n')
            print('ROC-AUC: '+str(roc_auc))
            fw.write('Loss: '+str(score[0])+' ('+config.training_params["loss_func"]+')\n')
            fw.write('MSE: '+str(score[1])+'\n')
        elif params["dataset"]["evaluation"] not in ['binary','multiclass','multilabel']:
            fw.write('R2 avg: '+str(r2)+'\n')
            print('R2 avg: '+str(r2))
            fw.write('Loss: '+str(score[0])+' ('+config.training_params["loss_func"]+')\n')
            fw.write('MSE: '+str(score[1])+'\n')
        fw.write(json.dumps(epochs.history)+"\n\n")
        fw.close()

    if with_predict:
        trained_models = pd.read_csv(common.DEFAULT_TRAINED_MODELS_FILE, sep='\t')
        model_config = trained_models[trained_models["model_id"] == trained_model["model_id"]]
        model_config = model_config.to_dict(orient="list")
        testset = open(common.DATASETS_DIR+'/items_index_test_%s.tsv' % (config.dataset_settings["dataset"])).read().splitlines()
        if config.training_params["val_from_file"] and not only_metadata:
            predictions, predictions_index = obtain_predictions(model_config, testset, trained_model["model_id"], config.predicting_params["trim_coeff"], model=model, with_metadata=with_metadata, only_metadata=only_metadata, metadata_source=metadata_source, with_patches=True)
        else:
            predictions, predictions_index = obtain_predictions(model_config, testset, trained_model["model_id"], config.predicting_params["trim_coeff"], model=model, with_metadata=with_metadata, only_metadata=only_metadata, metadata_source=metadata_source)
        print("Predictions created")

    if with_eval:
        do_eval(trained_model["model_id"],get_roc=True,get_map=True,get_p=True,predictions=predictions,predictions_index=predictions_index)
Ejemplo n.º 5
0
import sys
import numpy as np
import paddle
import paddle.fluid as fluid

from arg_config import ArgConfig, print_arguments

from train import do_train
from predict import do_predict
from eval import do_eval
from inference_model import do_save_inference_model

if __name__ == "__main__":

    args = ArgConfig()
    args = args.build_conf()
    print_arguments(args)

    if args.do_train:
        do_train(args)

    if args.do_predict:
        predictions = do_predict(args)

        if args.do_eval:
            acc = do_eval(args, predictions)
            print("evaluation accuaracy %.3f percent" % (acc * 100))

    if args.do_save_inference_model:
        do_save_inference_model(args)