def __init__(self, device = None, use_temporal = False, # Use CNN model by default num_students = 1, # Use only 1 student model. One can use at most five student models, which can cause more computation cost. OpenFace_exe = './OpenFace/build/bin/FeatureExtraction', # parameters for OpenFace feature extraction, passed to Video_Processor save_size=112, nomask=True, grey=False, quiet=True, tracked_vid=False, noface_save=False, # parameters for image sampling, passed to image sampler length = 32, num_workers = 0, # minimum frames allowed min_frames = 1, batch_size = 24, # whether to save csv file save_csv = True, # whether to save annotated video emotion_annotated_video=False ): self.num_workers = num_workers self.batch_size = batch_size self.length = length self.min_frames = min_frames if device is None: self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") else: self.device = device print("Using device :{}".format(device)) self.video_processor = Video_Processor(save_size, nomask, grey, quiet, tracked_vid, noface_save, OpenFace_exe) self.model_type = 'CNN' if not use_temporal else 'CNN_RNN' self.ensemble, self.val_transforms = ModelFactory.get(self.device, self.model_type, num_students) self.save_csv = save_csv self.emotion_annotated_video = emotion_annotated_video
def _pipeline_test(model, x_test, y_test, conf_load_dict, cmd_line_params, network_params, meta_info_project_dict, main_logger): """ Perform holdout and/or train depending on chosen step in cmd_line_params Params: ------- :model: model to test :x_train: testing samples :y_train: testing labels :conf_load_dict: dict, configuration for loading the dataset: it specify the bin to use, the sequence type, dataset path :cmd_line_params: dictionary, command line arguments :network_params:, dictionary, model configuration depending on the neural network architecture used :meta_info_project_dict: dictionary, it contains path for the output for each stage of the pipeline :main_logger: output logger """ if cmd_line_params.test is True: if ('model' not in locals() or model is None): if (cmd_line_params.pretrained_model is None): raise ValueError("In order to perform test a pretrained model " \ "must be specified") # Todo check whether the model actually exists # Get Callbacks. base_dir: str = meta_info_project_dict['base_dir'] results_dir = meta_info_project_dict['test_result_path'] network_params['result_base_dir'] = results_dir model = ModelFactory.getModelByName(cmd_line_params.load_network, network_params) model.build(main_logger) _test( model, x_test, y_test, conf_load_dict, cmd_line_params, network_params, meta_info_project_dict, main_logger, ) return
def loadModel(self): return ModelFactory(self.config).create()
def getModel(config, trainingData, validationData): return ModelFactory(config, trainingData=trainingData, validationData=validationData).create()
def main(): args = handleInputs() setRNGSeed(args.rng_seed) use_pyro = args.model_type is not 'nmt' if args.combine_results is not None: if os.path.isdir('./.results'): combineResults(args.combine_results) return else: ValueError( ".results/ does not exist, assumed no experiments previously ran" ) #create directory to store experiments if not os.path.isdir('./.results'): os.mkdir('./.results') #create directory for dataset source to target language pair exp_dir = './.results/{}_{}-{}/'.format(args.dataset, args.source, args.target) if not os.path.isdir(exp_dir): try: os.mkdir(exp_dir) except FileExistsError as e: logging.warning( "You might be trying to create {} twice (you running several runs?)" .format(exp_dir)) if use_pyro: args_name = 'kl-anneal_{}_{}_latents_{}_particles_{}_attn_{}/'.format( args.kl_anneal, args.to_anneal, args.z_dim, args.num_particles, args.use_attention) if args.use_flows: args_name = '{}_{}_'.format(args.flow_type, args.num_flows) + args_name exp_dir = exp_dir + '{}_'.format(args.model_type) + args_name else: exp_dir = exp_dir + 'RNNSearch/' #flag on whether this is an experiment continuation or not if args.opt == 'test' or args.opt == 'validate': #if we are test or validating, it is assumed the experiment was run 1st args.load_latest_epoch = True args.load_epoch = 1 args.load_latest_epoch = args.load_epoch >= 0 and args.load_latest_epoch cont_exp = args.load_epoch >= 0 or args.load_latest_epoch if not os.path.isdir(exp_dir): os.mkdir(exp_dir) else: #there's a logic gate for this...but can't remember what it is if not cont_exp: if not args.debug: raise ValueError( "{} already exists, if change other parameter, please rename existing file" .format(exp_dir)) #keep track of all parameters used log_file = exp_dir + 'experiment.log' init_logger(log_file, cont_exp) if cont_exp: logging.info( "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" ) logging.info( "load_epoch ({}) set. Loading exp config (seems silly otherwise)". format(args.load_epoch)) try: #to_pop is set to things we may want to actually update on the experiment. to_pop = [ "load_epoch", "epochs", "print_every", "decode_alg", "k", "length_norm", "load_latest_epoch", "opt", "bleu_score" ] args = load_args(args, exp_dir, to_pop=to_pop) except FileNotFoundError as e: logging.error( "could not load previous arguments, are you sure you put same parameters as experiment?" ) logging.error( "Starting experiment over and setting load_epoch = -1") args.load_epoch = -1 args.load_latest_epoch = False cont_exp = False #whether or not we loaded arguments, presumably should also make sure things are da same write_args(args, exp_dir) optimization_dict = get_optimization_dict(args) if args.use_bpe: logging.info("Using BPE models : {} -> {}".format( args.src_bpe, args.trg_bpe)) tokenize_src, tokenize_trg = getBPE(args.src_bpe, args.trg_bpe) else: logging.info("Using Tokenizer: {} -> {}".format( args.source, args.target)) tokenize_src, tokenize_trg = getTokenizer(args.source, args.target, args.on_whitespace) # we include lengths to provide to the RNNs data_save_path = './.data/{}_data_{}_to_{}.pth'.format( args.dataset, args.source, args.target) datahandler = DataHandler(tokenize_src, tokenize_trg, LOWER, EOS_TOKEN, SOS_TOKEN, PAD_TOKEN, UNK_TOKEN, args.min_freq, DEVICE) fields = [('src', datahandler.getSRCField()), ('trg', datahandler.getTRGField())] try: #TODO...figure out how to make this work if possible since...loading is expensive f = torch.load(data_save_path, pickle_module=dill) logging.info( 'found previous saved train and valid data, delete if undesired') datahandler.load_vocabs(f['src_vocab'], f['trg_vocab']) train_data = data.Dataset(f['train_examples'], fields=fields, filter_pred=None) valid_data = data.Dataset(f['valid_examples'], fields=fields, filter_pred=None) test_data = data.Dataset(f['test_examples'], fields=fields, filter_pred=None) except FileNotFoundError as e: logging.warning('could not find previous saved file, building new one') if args.dataset == 'tabular': logging.info("Using Tabular file, assumes no header in files") max_len = args.max_len train_data, valid_data, test_data = data.TabularDataset.splits( path='./.data/', format='tsv', train='train-{}-{}.tsv'.format(args.source, args.target), validation='dev-{}-{}.tsv'.format(args.source, args.target), test='test-{}-{}.tsv'.format(args.source, args.target), skip_header=False, fields=fields, filter_pred=lambda x: filter_fn(x, max_len)) elif args.dataset == 'IWSLT': logging.warning( "You need to create val.de-en.* and test.de-en.* by merging files before" ) train_data, valid_data, test_data = datasets.IWSLT.splits( exts=('.' + args.source, '.' + args.target), fields=(datahandler.getSRCField(), datahandler.getTRGField()), filter_pred=lambda x: filter_fn(x, args.max_len), validation='val', test='test') elif args.dataset == 'WMT14': train_data, valid_data, test_data = datasets.WMT14.splits( exts=('.' + args.source, '.' + args.target), fields=(datahandler.getSRCField(), datahandler.getTRGField())) datahandler.build_vocabs(train_data, args.custom_vocab_src, args.custom_vocab_trg) to_save = { 'train_examples': train_data.examples, 'valid_examples': valid_data.examples, 'test_examples': test_data.examples, 'src_vocab': datahandler.getSRCVocab(), 'trg_vocab': datahandler.getTRGVocab() } torch.save(to_save, data_save_path, pickle_module=dill) logging.info('Vocab Sizes: {} (SRC) {} (TRG)'.format( len(datahandler.getSRCVocab()), len(datahandler.getTRGVocab()))) logging.info('Train dataset Size: {}, Validation dataset Size: {}'.format( len(train_data), len(valid_data))) train_iter = datahandler.getBucketIter(train_data, batch_size=args.batch_size, train=True, sort_within_batch=True, sort_key=lambda x: (len(x.src), len(x.trg)), repeat=False) valid_iter = datahandler.getIter(valid_data, batch_size=1, train=False, sort=False, repeat=False) test_iter = datahandler.getIter(test_data, batch_size=1, train=False, sort=False, repeat=False) if args.use_bpe: trg_bpe = spm.SentencePieceProcessor() trg_bpe.Load(args.trg_bpe) src_bpe = spm.SentencePieceProcessor() src_bpe.Load(args.src_bpe) else: trg_bpe = None if args.bleu_score == 'raw': bleu_func = rawBLEU elif args.bleu_score == 'multi': bleu_func = get_moses_multi_bleu #this is where the magic starts (hopefully) modelfactory = ModelFactory(len(datahandler.getSRCVocab()), len(datahandler.getTRGVocab()), emb_size=args.emb_size, hidden_size=args.hidden_size, num_layers=args.num_layers, dropout=args.dropout, z_layer=args.z_dim, pool_size=args.max_out_dim, use_projection=args.use_projection) model = modelfactory.getModel(args.model_type, use_attention=args.use_attention) cond_flow_scale = 2 if args.use_flows and args.model_type is not 'nmt': if args.flow_type == 'planar': model.loadPlanarFlows(args.num_flows, z_dim=args.z_dim) elif args.flow_type == 'iaf': model.loadIAFs(args.num_flows, z_dim=args.z_dim) elif args.flow_type == 'cond-planar': model.loadConditionalPlanarFlows(args.num_flows, args.hidden_size * cond_flow_scale, z_dim=args.z_dim) elif args.flow_type == 'cond-planar-v2': model.loadConditionalPlanarFlows_v2(args.num_flows, args.hidden_size * cond_flow_scale, z_dim=args.z_dim) elif args.flow_type == 'cond-iaf': model.loadConditionalIAFFlows(args.num_flows, args.hidden_size * cond_flow_scale, z_dim=args.z_dim) if not cont_exp: logging.info( "Initialializing Model parameters randomly with {} scheme".format( args.init_type)) model.initParameters(args.init_type) if not cont_exp: logging.info(model) if USE_CUDA: model = model.cuda() #some internal hacky stuff to let me do hacky things.... model.setTrainDataSize(len(train_data)) model.setUnkTokenIndex(datahandler.getTRGVocab().stoi[UNK_TOKEN]) model.setSOSTokenIndex( datahandler.getSRCVocab().stoi[SOS_TOKEN]) #for gnmt model.setPadIndex(datahandler.getSRCVocab().stoi[PAD_TOKEN]) model.setWordDropout(args.word_dropout) model.setUseMeanField("Mean" in args.elbo_type) model.setToAnneal(args.to_anneal) if 'q' not in args.to_anneal and "Mean" in args.elbo_type and args.kl_anneal > 1.0: msg = "You are not annealing the variational distribution even though you request to anneal and are using mean field...which would use analytic form and needs to anneal q" logging.warning(msg) print(msg) if args.model_pth is not None: #model.load('./model_final.pth') model.load(args.model_pth) train_translator = Translator( valid_data, valid_iter, model, max_len=args.max_len, sos_index=datahandler.getTRGVocab().stoi[SOS_TOKEN], eos_index=datahandler.getTRGVocab().stoi[EOS_TOKEN], pad_index=datahandler.getPadIndex(), use_cuda=USE_CUDA) trainer = Trainer(model, train_iter, valid_iter, use_pyro, datahandler.getPadIndex(), train_translator, bleu_func, datahandler.getTRGVocab(), bpe_model=trg_bpe, use_cuda=USE_CUDA, savedir=exp_dir, optim_dict=optimization_dict, kl_anneal=args.kl_anneal, use_aux_loss=args.use_aux_loss, load_epoch=args.load_epoch, use_latest_epoch=args.load_latest_epoch) if args.opt == 'all' or args.opt == 'train': dev_perplexities = trainer.train(num_epochs=args.epochs, print_every=args.print_every) torch.save(dev_perplexities, exp_dir + 'perplexities.pth') elif args.model_pth is None: # get best performing model logging.info("No model path provided, using best model for evaluation") dev_perplexities = trainer.initDevPerplexities() #if dev perplexities is not in order it was trained, this will not work best = {'i': -1, 'val_bleu': 0.0} for i, p in enumerate(dev_perplexities): cur_bleu = p['val_bleu'] if cur_bleu > best['val_bleu']: best['i'] = i best['val_bleu'] = cur_bleu args.model_pth = trainer.getCheckpointPth(best['i']) try: check_pt = torch.load(args.model_pth) model.load(check_pt['model']) #with mutation...this is probably not necessary, but just in case.... trainer.setModel(model) except Exception as e: logging.warning( "Failed to load a model...you do know you request to evaluate right?" ) else: model.load(args.model_pth) val_or_test = args.opt == 'all' or args.opt == 'validate' or args.opt == 'test' or args.opt == 'test_lengths' if val_or_test: if args.opt == 'test' or args.opt == 'test_lengths': dataset = test_data data_iter = test_iter else: dataset = valid_data data_iter = valid_iter scores = {} debug = True if val_or_test and use_pyro and debug: #Test utility of latent variable #Another way to see how useful z is to 0 it out at translation time. That way, it gets no weight #This sort of test only makes sense if z is concatentaed as input at each step of decoding model.setUseLatent(False) translator = Translator( dataset, data_iter, model, max_len=args.max_len, sos_index=datahandler.getTRGVocab().stoi[SOS_TOKEN], eos_index=datahandler.getTRGVocab().stoi[EOS_TOKEN], pad_index=datahandler.getPadIndex(), use_cuda=USE_CUDA, k=args.k, length_norm=args.length_norm) no_latent_bleu, hypotheses, references = translator.FullEvalTranslate( datahandler.getTRGVocab(), bleu_func, decodefn=args.decode_alg, bpe_model=trg_bpe) #store information no_latent_name = exp_dir + 'no-latent-{}.tsv'.format(args.opt) write_translations(no_latent_name, hypotheses, references) scores['{}-no_latent'.format(args.opt)] = no_latent_bleu #subtle, but remember we need to use it after this test model.setUseLatent(True) #TODO: Probably not gonna do this...but presumably, because of mutation..., I really don't need to make another one of these... #Do this after the no latent test, because the Translator at this point can be used below for testing lengths if debug: translator = Translator( dataset, data_iter, model, max_len=args.max_len, sos_index=datahandler.getTRGVocab().stoi[SOS_TOKEN], eos_index=datahandler.getTRGVocab().stoi[EOS_TOKEN], pad_index=datahandler.getPadIndex(), use_cuda=USE_CUDA, k=args.k, length_norm=args.length_norm) bleu, hypotheses, references = translator.FullEvalTranslate( datahandler.getTRGVocab(), bleu_func, decodefn=args.decode_alg, bpe_model=trg_bpe) logging.info("{} BLEU score: {} which was ran using {} opt".format( args.bleu_score, bleu, args.opt)) scores[args.opt] = bleu translation_name = exp_dir + '{}.tsv'.format(args.opt) write_translations(translation_name, hypotheses, references) joint_modeling = isinstance(model, GenerativeEncoderDecoder) or isinstance( model, VanillaJointEncoderDecoder) if joint_modeling and debug: model.setDecodeTarget(False) lm_translator = Translator( dataset, data_iter, model, max_len=args.max_len, sos_index=datahandler.getSRCVocab().stoi[SOS_TOKEN], eos_index=datahandler.getSRCVocab().stoi[EOS_TOKEN], pad_index=datahandler.getPadIndex(), use_cuda=USE_CUDA, k=args.k, length_norm=args.length_norm, do_lang_model=True) #Do greedy decoding only for language model. With these parameters, performance isn't expected to be tooo amazing bleu, hypotheses, references = lm_translator.FullEvalTranslate( datahandler.getSRCVocab(), bleu_func, decodefn='greedy', bpe_model=src_bpe) scores["lm-{}".format(args.opt)] = bleu translation_name = exp_dir + 'lm-{}.tsv'.format(args.opt) write_translations(translation_name, hypotheses, references) #collect validation "perplexity" for models, mostly for the ELBO if joint_modeling and debug: def get_lm_toks(): return trainer.model.getSRCTokCount() eval_perplexity = trainer.run_lvnmt_eval( trainer.rebatch_iter(data_iter), custom_tok_count=get_lm_toks, count_both=True) #calculate perplexity of language model model.setTrainMT(False) model.setTrainLM(True) lm_eval_perplexity = trainer.run_lvnmt_eval( trainer.rebatch_iter(data_iter), custom_tok_count=get_lm_toks) torch.save(lm_eval_perplexity, exp_dir + '{}-lm_perplexity.pth'.format(args.opt)) else: eval_perplexity = trainer.run_lvnmt_eval( trainer.rebatch_iter(data_iter)) torch.save(eval_perplexity, exp_dir + '{}-eval_perplexity.pth'.format(args.opt)) flow_samples = generate_flow_samples(trainer.model, trainer.rebatch_iter(data_iter), datahandler.getSRCVocab(), datahandler.getTRGVocab(), src_bpe=src_bpe, trg_bpe=trg_bpe) torch.save(flow_samples, exp_dir + '{}-latent_spaces.pth'.format(args.opt)) try: with open(exp_dir + 'bleus-{}.json'.format(args.opt), 'r') as bleu_scores: prev_bleus = json.load(bleu_scores) except Exception as e: prev_bleus = {} with open(exp_dir + 'bleus-{}.json'.format(args.opt), 'w') as bleu_scores: prev_bleus[len(prev_bleus)] = scores json.dump(prev_bleus, bleu_scores) if args.opt == 'test_lengths': logging.info("Calculating BLEU score based on sentence lengths") BLEUS = {} for length in range(5, 70, 5): references_of_length = [] hypotheses_of_length = [] #TODO this is stupidly inefficient... sort the ref - hypo pairs for i in range(len(references)): count = len(references[i].split()) if (length - 4) <= count and count <= length: references_of_length.append(references[i]) hypotheses_of_length.append(hypotheses[i]) bleu = [bleu_func(hypotheses_of_length, references_of_length)] BLEUS['length={}'.format(length)] = bleu save_name = exp_dir + args.model_pth.split( '/')[-1] + "_lengths.tsv" pd.DataFrame.from_dict(BLEUS).to_csv(save_name, sep='\t', index=False) if args.opt == 'tuning': BLEUS = {} BLEUS_list = [] for i in range(0, args.epochs): load_pth = exp_dir + 'checkpoints/epoch_{}.pth'.format(i) model.load(load_pth) translator = Translator( valid_data, valid_iter, model, max_len=60, sos_index=datahandler.getTRGVocab().stoi[SOS_TOKEN], eos_index=datahandler.getTRGVocab().stoi[EOS_TOKEN], pad_index=datahandler.getPadIndex(), use_cuda=USE_CUDA) bleu, hypotheses, references = translator.FullEvalTranslate( datahandler.getTRGVocab(), bleu_func, decodefn='greedy', bpe_model=trg_bpe) BLEUS['epoch_{}'.format(i)] = [bleu] BLEUS_list.append(bleu) logging.info(load_pth) logging.info('{} BLEU score {}'.format(args.bleu_score, bleu)) logging.info("Best model for {} was {} with {} BLEU: {}".format( exp_dir, np.argmax(BLEUS_list), args.bleu_score, max(BLEUS_list))) save_name = exp_dir + "BLEU_scores.tsv" pd.DataFrame.from_dict(BLEUS).to_csv(save_name, sep='\t', index=False)
def _holdout( x_train, y_train, x_val, y_val, conf_load_dict: dict, cmd_line_params, network_params: dict, meta_info_project_dict: dict, tokenizer: Tokenizer, logger: logging.Logger, message: str = 'Performing first phase (holdout)...') -> object: """ It instantiates a new model and train it for the number of epoch specified in the model configuration file in order to subsequently validate Params: ------- :x_train: training feature matrix X.\n :y_train: training class labels.\n :x_val: validation feature matrix X.\n :y_val: validation class labels.\n :conf_load_dict: dictionary containing information about how input data have been loaded from source directory of data.\n :cmd_line_params: command line parameters.\n :network_params: model configuration read from file.\n :meta_info_project_dict: dictionary containing information about how project layout has been setted for collecting output data.\n :tokenizer: tokenizer for translating text to sequences.\n :logger: object used for loggin info, it might be None, if None it is just ignored by functions that are dealing with it.\n :message: str object used to either log or display on stdout a message about the fact train-phase is running.\n Returns: -------- :object: representing the trained model by means of holdout procedure. """ # Some logs recorded. _log_info_message(f" [*] {message}", logger) train_bins, val_bins = conf_load_dict['train_bins'], conf_load_dict['val_bins'] _log_info_message("Training on bins: {}, validation on {}".format(train_bins, val_bins), logger) vocabulary_len: int = len(tokenizer.index_word) + 1 network_params['vocabulary_len'] = vocabulary_len base_dir: str = meta_info_project_dict['base_dir'] results_dir = meta_info_project_dict['val_result_path'] history_filename: str = os.path.join(base_dir, 'history.csv') network_params['result_base_dir'] = results_dir # Get Model from ModelFactory Static class. network_model_name: str = cmd_line_params.load_network model = ModelFactory.getModelByName(network_model_name, network_params) # Build model. _log_info_message(f"> build model (holdout).", logger) # It compiles the model and print its summary (architecture's structure) model.build(logger) # Train model. _log_info_message(f"> train model (holdout)...", logger) history, trained_epochs = model.fit( X_tr=x_train, y_tr=y_train, epochs=cmd_line_params.num_epochs, callbacks_list=[], validation_data=(x_val, y_val) ) _log_info_message(f"> train model (holdout): Done.", logger) model.save_weights() # log number of epochs trained _log_info_message("Trained for {} epochs".format(trained_epochs), logger) # Eval model. _log_info_message(f"> eval model (holdout).", logger) # plot graph of loss and accuracy plot_loss(history, results_dir, "Training and validation losses", "loss", savefig_flag=True, showfig_flag=False) plot_accuracy(history, results_dir, "Training and validation accuracies", "accuracy", savefig_flag=True, showfig_flag=False) # serialize history with open(os.path.join(results_dir, "history"), 'wb') as history_pickle: pickle.dump(history.history, history_pickle) # scores contains [loss, accuracy, f1_score, precision, recall] results_dict: dict = model.evaluate(x_val, y_val) res_string = ", ".join(f'{k}:{v:.5f}' for k,v in results_dict.items()) _log_info_message("{}".format(res_string), logger) _log_info_message(f" [*] {message} Done.", logger) return model, trained_epochs, res_string
def _train( subtrain, validation_data, x_subtrain_size, conf_load_dict: dict, cmd_line_params, network_params: dict, meta_info_project_dict: dict, tokenizer: Tokenizer, logger: logging.Logger, epochs_trained=None, message: str = 'Performing training phase...') -> object: """ It instantiates a new model and train it for a specified number of update steps. The training phase executes: - either `algorithm 7.2*` if the command line flag specified for this phase is `--early_stopping_epoch`, - or `algorithm 7.3**` if the command line flag specified for this phase is `--early_stopping_on_loss`. * Algorithm 7.2 (Ian Goodfellow, Yoshua Bengio, and Aaron Courville. 2016. Deep Learning. The MIT Press, pp. 246-250.)\n ** Algorithm 7.3 (Ian Goodfellow, Yoshua Bengio, and Aaron Courville. 2016. Deep Learning. The MIT Press, pp. 246-250.) Params: ------- :x_train: training feature matrix X.\n :y_train: training class labels.\n :steps: number of steps of training.\n :conf_load_dict: dictionary containing information about how input data have been loaded from source directory of data.\n :cmd_line_params: command line parameters.\n :network_params: model configuration read from file.\n :meta_info_project_dict: dictionary containing information about how project layout has been setted for collecting output data.\n :tokenizer: tokenizer for translating text to sequences.\n :logger: object used for loggin info, it might be None, if None it is just ignored by functions that are dealing with it.\n :message: str object used to either log or display on stdout a message about the fact train-phase is running.\n Returns: -------- :object: representing the trained model.\n """ # Some logs recorded. _log_info_message(f" [*] {message}", logger) train_bins = conf_load_dict['train_bins'] + conf_load_dict['val_bins'] _log_info_message("Training on bins: {}".format(train_bins), logger) vocabulary_len: int = len(tokenizer.index_word) + 1 network_params['vocabulary_len'] = vocabulary_len # Get Callbacks. base_dir: str = meta_info_project_dict['base_dir'] results_dir = meta_info_project_dict['train_result_path'] # history_filename: str = os.path.join(base_dir, 'history.csv') network_params['result_base_dir'] = results_dir # adding bin 5 to training data x_subtrain, y_subtrain = subtrain x_validation, y_validation = validation_data x_train = np.concatenate((x_subtrain, x_validation), axis=0) y_train = np.concatenate((y_subtrain, y_validation), axis=0) # Get Model from ModelFactory Static class. network_model_name: str = cmd_line_params.load_network # if early stopping with loss val load trained model from holdout if cmd_line_params.early_stopping_on_loss: # Algorithm 7.3 _log_info_message("> loading holdout training weights", logger) # if train after validation in a single run if network_params['pretrained_model'] == None: network_params['pretrained_model'] = os.path.join(base_dir,cmd_line_params.output_dir, "results_holdout_validation/model_checkpoint_weights.h5") model = ModelFactory.getModelByName(network_model_name, network_params) else: # Algorithm 7.2 model = ModelFactory.getModelByName(network_model_name, network_params) # Build model. _log_info_message(f"> build model", logger) model.build(logger) # Train for the specified amount of steps. # _log_info_message(f"> training model for {}".format(steps), logger) if cmd_line_params.early_stopping_on_loss: # Algorithm 7.3 early_stopping_loss = model.evaluate(x_subtrain, y_subtrain)['loss'] history = model.fit_early_stopping_by_loss_val(x_train, y_train, epochs=cmd_line_params.num_epochs, early_stopping_loss=early_stopping_loss, callbacks_list=[], validation_data=validation_data ) else: # Algorithm 7.2 history = model.fit_generator( generator=gen(x_train, y_train, batch_size=network_params['batch_size'], verbose=1), steps_per_epoch=np.floor(x_subtrain_size/network_params['batch_size']), epochs=epochs_trained, callbacks_list=[] ) model.save_weights() # plot graph of loss and accuracy plot_loss(history, results_dir, "Training loss", "loss", savefig_flag=True, showfig_flag=False) # plot_accuracy(history, results_dir, "Training and validation accuracies", "accuracy", save_fig_flag=True) # serialize history with open(os.path.join(results_dir, "history"), 'wb') as history_pickle: pickle.dump(history.history, history_pickle) _log_info_message(f"> train model: Done.", logger) _log_info_message(f" [*] {message} Done.", logger) return model