def main(): args = get_args() f = os.path.normpath device = torch.device( "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") if args.do_init: init(f(args.train_file), f(args.dev_file), f(args.test_file), f(args.word_dict_path), f(args.tag_dict_path)) return if args.do_train: do_train(f(args.train_file), f(args.output_dir), f(args.word_dict_path), f(args.tag_dict_path), args.max_seq_len, args.embed_dim, args.hidden_dim, args.lr, args.batch_size, args.epochs, args.print_step, device) if args.do_eval: do_eval(f(args.test_file), f(args.word_dict_path), f(args.tag_dict_path), args.max_seq_len, args.embed_dim, args.hidden_dim, f(args.output_dir), f(args.eval_log_dir), device)
def epoch_end(self, run_context): """ do eval in epoch end """ cb_param = run_context.original_args() cur_epoch = cb_param.cur_epoch_num if cur_epoch % self.eval_epoch_frq == 0 or cur_epoch == self.epoch_size: # result = do_eval(self.eval_network, self.ds_val, self.metrics, self.rank_id, cur_epoch=cur_epoch) result = do_eval(self.ds_val, self.eval_network) if self.best_result is None or self.best_result < result: self.best_result = result if self.rank_id == 0: print(f"best evaluation result = {self.best_result}", flush=True) if isinstance(self.result_evaluation, dict): for k, v in result.items(): r_list = self.result_evaluation.get(k) if r_list is None: r_list = [] self.result_evaluation[k] = r_list r_list.append(v)
import paddle import paddle.fluid as fluid from eval import do_eval from train import do_train from predict import do_predict from inference_model import do_save_inference_model from dgu.utils.configure import PDConfig if __name__ == "__main__": args = PDConfig(yaml_file="./data/config/dgu.yaml") args.build() args.Print() if args.do_train: do_train(args) if args.do_predict: do_predict(args) if args.do_eval: do_eval(args) if args.do_save_inference_model: do_save_inference_model(args) # vim: set ts=4 sw=4 sts=4 tw=100:
def process(params,with_predict=True,with_eval=True): logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG) params['cnn']['n_out'] = int(params['dataset']['dim']) #params['cnn']['n_frames'] = int(params['dataset']['window'] * SR / float(HR)) with_metadata = params['dataset']['with_metadata'] only_metadata = params['dataset']['only_metadata'] metadata_source = params['dataset']['meta-suffix'] if with_metadata: if 'w2v' in metadata_source: X_meta = np.load(common.TRAINDATA_DIR+'/X_train_%s_%s.npy' % (metadata_source,params['dataset']['dataset']))[:,:int(params['cnn']['sequence_length'])] params['cnn']['n_metafeatures'] = len(X_meta[0]) if 'meta-suffix2' in params['dataset']: X_meta2 = np.load(common.TRAINDATA_DIR+'/X_train_%s_%s.npy' % (params['dataset']['meta-suffix2'],params['dataset']['dataset'])) params['cnn']['n_metafeatures2'] = len(X_meta2[0]) if 'meta-suffix3' in params['dataset']: X_meta3 = np.load(common.TRAINDATA_DIR+'/X_train_%s_%s.npy' % (params['dataset']['meta-suffix3'],params['dataset']['dataset'])) params['cnn']['n_metafeatures3'] = len(X_meta3[0]) if 'meta-suffix4' in params['dataset']: X_meta4 = np.load(common.TRAINDATA_DIR+'/X_train_%s_%s.npy' % (params['dataset']['meta-suffix4'],params['dataset']['dataset'])) params['cnn']['n_metafeatures4'] = len(X_meta4[0]) elif 'model' in metadata_source or not params['dataset']['sparse']: X_meta = np.load(common.TRAINDATA_DIR+'/X_train_%s_%s.npy' % (metadata_source,params['dataset']['dataset'])) params['cnn']['n_metafeatures'] = len(X_meta[0]) if 'meta-suffix2' in params['dataset']: X_meta2 = np.load(common.TRAINDATA_DIR+'/X_train_%s_%s.npy' % (params['dataset']['meta-suffix2'],params['dataset']['dataset'])) params['cnn']['n_metafeatures2'] = len(X_meta2[0]) if 'meta-suffix3' in params['dataset']: X_meta3 = np.load(common.TRAINDATA_DIR+'/X_train_%s_%s.npy' % (params['dataset']['meta-suffix3'],params['dataset']['dataset'])) params['cnn']['n_metafeatures3'] = len(X_meta3[0]) if 'meta-suffix4' in params['dataset']: X_meta4 = np.load(common.TRAINDATA_DIR+'/X_train_%s_%s.npy' % (params['dataset']['meta-suffix4'],params['dataset']['dataset'])) params['cnn']['n_metafeatures4'] = len(X_meta4[0]) else: X_meta = load_sparse_csr(common.TRAINDATA_DIR+'/X_train_%s_%s.npz' % (metadata_source,params['dataset']['dataset'])).todense() params['cnn']['n_metafeatures'] = X_meta.shape[1] if 'meta-suffix2' in params['dataset']: X_meta2 = load_sparse_csr(common.TRAINDATA_DIR+'/X_train_%s_%s.npz' % (params['dataset']['meta-suffix2'],params['dataset']['dataset'])) params['cnn']['n_metafeatures2'] = X_meta2.shape[1] if 'meta-suffix3' in params['dataset']: X_meta3 = load_sparse_csr(common.TRAINDATA_DIR+'/X_train_%s_%s.npz' % (params['dataset']['meta-suffix3'],params['dataset']['dataset'])) params['cnn']['n_metafeatures3'] = len(X_meta3[0]) if 'meta-suffix4' in params['dataset']: X_meta4 = load_sparse_csr(common.TRAINDATA_DIR+'/X_train_%s_%s.npz' % (params['dataset']['meta-suffix4'],params['dataset']['dataset'])) params['cnn']['n_metafeatures3'] = len(X_meta4[0]) print(X_meta.shape) else: X_meta = None config = Config(params) model_dir = os.path.join(common.MODELS_DIR, config.model_id) common.ensure_dir(common.MODELS_DIR) common.ensure_dir(model_dir) model_file = os.path.join(model_dir, config.model_id + common.MODEL_EXT) logging.debug("Building Network...") #model = build_model(config) model = build_model(config) print(model.summary()) #plot(model, to_file='model2.png', show_shapes=True) trained_model = config.get_dict() # Save model #plot(model, to_file=os.path.join(model_dir, config.model_id + PLOT_EXT)) common.save_model(model, model_file) logging.debug(trained_model["model_id"]) logging.debug("Loading Data...") with_generator = True if only_metadata: X_train, Y_train, X_val, Y_val, X_test, Y_test = \ load_data_preprocesed(params, config.x_path, config.y_path, params['dataset']['dataset'], config.training_params["validation"], config.training_params["test"], config.dataset_settings["nsamples"], with_metadata, only_metadata, metadata_source) if 'meta-suffix2' in params['dataset']: X_train2, Y_train2, X_val2, Y_val2, X_test2, Y_test2 = \ load_data_preprocesed(params, config.x_path, config.y_path, params['dataset']['dataset'], config.training_params["validation"], config.training_params["test"], config.dataset_settings["nsamples"], with_metadata, only_metadata, params['dataset']['meta-suffix2']) X_train = [X_train,X_train2] X_val = [X_val,X_val2] X_test = [X_test,X_test2] print("X_train bi", len(X_train)) if 'meta-suffix3' in params['dataset']: X_train3, Y_train3, X_val3, Y_val3, X_test3, Y_test3 = \ load_data_preprocesed(params, config.x_path, config.y_path, params['dataset']['dataset'], config.training_params["validation"], config.training_params["test"], config.dataset_settings["nsamples"], with_metadata, only_metadata, params['dataset']['meta-suffix3']) X_train.append(X_train3) X_val.append(X_val3) X_test.append(X_test3) print("X_train tri", len(X_train)) if 'meta-suffix4' in params['dataset']: X_train4, Y_train4, X_val4, Y_val4, X_test4, Y_test4 = \ load_data_preprocesed(params, config.x_path, config.y_path, params['dataset']['dataset'], config.training_params["validation"], config.training_params["test"], config.dataset_settings["nsamples"], with_metadata, only_metadata, params['dataset']['meta-suffix4']) X_train.append(X_train4) X_val.append(X_val4) X_test.append(X_test4) print("X_train four", len(X_train)) else: if with_generator: id2gt = dict() factors = np.load(common.DATASETS_DIR+'/y_train_'+config.y_path+'.npy') index_factors = open(common.DATASETS_DIR+'/items_index_train_'+params['dataset']['dataset']+'.tsv').read().splitlines() id2gt = dict((index,factor) for (index,factor) in zip(index_factors,factors)) X_val, Y_val, X_test, Y_test, N_train = load_data_hf5_memory(params,config.training_params["validation"],config.training_params["test"],config.y_path,id2gt,X_meta,config.training_params["val_from_file"]) if params['dataset']['nsamples'] != 'all': N_train = min(N_train,params['dataset']['nsamples']) else: X_train, Y_train, X_val, Y_val, X_test, Y_test, N_train = load_data_hf5(params,config.training_params["validation"],config.training_params["test"]) trained_model["whiten_scaler"] = common.TRAINDATA_DIR+'/scaler_%s.pk' % config.x_path logging.debug("Training...") if config.model_arch["final_activation"] == 'softmax': monitor_metric = 'val_categorical_accuracy' else: monitor_metric = 'val_loss' early_stopping = EarlyStopping(monitor=monitor_metric, patience=4) if only_metadata: epochs = model.fit(X_train, Y_train, batch_size=config.training_params["n_minibatch"], #shuffle='batch', nb_epoch=config.training_params["n_epochs"], verbose=2, validation_data=(X_val, Y_val), callbacks=[early_stopping]) else: if with_generator: print(N_train) epochs = model.fit_generator(batch_block_generator(params,config.y_path,N_train,id2gt,X_meta,config.training_params["val_from_file"]), samples_per_epoch = N_train-(N_train % config.training_params["n_minibatch"]), nb_epoch = config.training_params["n_epochs"], verbose=2, validation_data = (X_val, Y_val), callbacks=[early_stopping]) else: epochs = model.fit(X_train, Y_train, batch_size=config.training_params["n_minibatch"], shuffle='batch', nb_epoch=config.training_params["n_epochs"], verbose=2, validation_data=(X_val, Y_val), callbacks=[early_stopping]) model.save_weights(os.path.join(model_dir, config.model_id + common.WEIGHTS_EXT)) logging.debug("Saving trained model %s in %s..." % (trained_model["model_id"], common.DEFAULT_TRAINED_MODELS_FILE)) common.save_trained_model(common.DEFAULT_TRAINED_MODELS_FILE, trained_model) logging.debug("Evaluating...") print(X_test[0].shape,X_test[1].shape) preds=model.predict(X_test) print(preds.shape) if params["dataset"]["evaluation"] in ['binary','multiclass']: y_pred = (preds > 0.5).astype('int32') acc = accuracy_score(Y_test,y_pred) prec = precision_score(Y_test,y_pred,average='macro') recall = recall_score(Y_test,y_pred,average='macro') f1 = f1_score(Y_test,y_pred,average='macro') print('Accuracy', acc) print("%.3f\t%.3f\t%.3f" % (prec,recall,f1)) if params["dataset"]["fact"] == 'class': good_classes = np.nonzero(Y_test.sum(0))[0] print(Y_test.shape,preds.shape) roc_auc=roc_auc_score(Y_test[:,good_classes],preds[:,good_classes]) logging.debug('ROC-AUC '+str(roc_auc)) pr_auc = average_precision_score(Y_test[:,good_classes],preds[:,good_classes]) print('PR-AUC',pr_auc) r2 = roc_auc elif params["dataset"]["evaluation"] not in ['binary','multiclass','multilabel']: r2s = [] for i,pred in enumerate(preds): r2 = r2_score(Y_test[i],pred) r2s.append(r2) r2 = np.asarray(r2s).mean() logging.debug('R2 avg '+str(r2)) # Batch prediction if X_test[1].shape == Y_test[1].shape: score = model.evaluate(X_test, Y_test, verbose=0) logging.debug(score) logging.debug(model.metrics_names) print(score) trained_model["loss_score"] = score[0] trained_model["mse"] = score[1] if params["dataset"]["evaluation"] not in ['binary','multiclass','multilabel']: trained_model["r2"] = r2 fw=open(common.DATA_DIR+'/results/train_results.txt','a') fw.write(trained_model["model_id"]+'\n') if params["training"]["loss_func"] == 'binary_crossentropy': fw.write('ROC-AUC: '+str(roc_auc)+'\n') print('ROC-AUC: '+str(roc_auc)) fw.write('Loss: '+str(score[0])+' ('+config.training_params["loss_func"]+')\n') fw.write('MSE: '+str(score[1])+'\n') elif params["dataset"]["evaluation"] not in ['binary','multiclass','multilabel']: fw.write('R2 avg: '+str(r2)+'\n') print('R2 avg: '+str(r2)) fw.write('Loss: '+str(score[0])+' ('+config.training_params["loss_func"]+')\n') fw.write('MSE: '+str(score[1])+'\n') fw.write(json.dumps(epochs.history)+"\n\n") fw.close() if with_predict: trained_models = pd.read_csv(common.DEFAULT_TRAINED_MODELS_FILE, sep='\t') model_config = trained_models[trained_models["model_id"] == trained_model["model_id"]] model_config = model_config.to_dict(orient="list") testset = open(common.DATASETS_DIR+'/items_index_test_%s.tsv' % (config.dataset_settings["dataset"])).read().splitlines() if config.training_params["val_from_file"] and not only_metadata: predictions, predictions_index = obtain_predictions(model_config, testset, trained_model["model_id"], config.predicting_params["trim_coeff"], model=model, with_metadata=with_metadata, only_metadata=only_metadata, metadata_source=metadata_source, with_patches=True) else: predictions, predictions_index = obtain_predictions(model_config, testset, trained_model["model_id"], config.predicting_params["trim_coeff"], model=model, with_metadata=with_metadata, only_metadata=only_metadata, metadata_source=metadata_source) print("Predictions created") if with_eval: do_eval(trained_model["model_id"],get_roc=True,get_map=True,get_p=True,predictions=predictions,predictions_index=predictions_index)
import sys import numpy as np import paddle import paddle.fluid as fluid from arg_config import ArgConfig, print_arguments from train import do_train from predict import do_predict from eval import do_eval from inference_model import do_save_inference_model if __name__ == "__main__": args = ArgConfig() args = args.build_conf() print_arguments(args) if args.do_train: do_train(args) if args.do_predict: predictions = do_predict(args) if args.do_eval: acc = do_eval(args, predictions) print("evaluation accuaracy %.3f percent" % (acc * 100)) if args.do_save_inference_model: do_save_inference_model(args)