def train( i_exp = 0, mode = 'train', model_param = None, batch_size = None, sample_frac = None ): # Memory before the training mem = psutil.virtual_memory() print( 'Memory:', mem[2] ) # Experiment parameters e = exp.EXPERIMENTS[ i_exp ] # Store path overwrite = False if sample_frac == None: outpath = exp.OUTPATH + e['NAME'] + '/' else: outpath = exp.OUTPATH + e['NAME'] + '/sample_frac_best' + str(sample_frac) + '/' create_dir(outpath, overwrite) # Load the data inpath = exp.INPATH + 'input_NOMINAL' + '.h5' if sample_frac == None: actionshist, codes, sites = fit_handler.load_data(inpath) else: actionshist, codes, sites = fit_handler.load_data(inpath, load_labels = False, sample = True, sample_frac = sample_frac) # Setup the fit handler handler = fit_handler.FitHandler( exp.MODEL, codes, sites, pruning_mode = e['PRUNE'], callback_args = e['CALLBACK'], train_on_batch = False, path = outpath, verbose=2) # Get the count matrix X,y = handler.count_matrix(actionshist) mem = psutil.virtual_memory() print( 'Memory:', mem[2] ) if model_param is None: model_param = e['HYPERPARAM'] if batch_size is None: batch_size = exp.BATCH_SIZE if mode == 'optimize': score = handler.find_optimal_parameters( exp.SKOPT_DIM, model_param, X, y, cv=exp.CV, kfold_splits=exp.FOLDS, num_calls=exp.SKOPTCALLS, max_epochs=exp.MAX_EPOCHS, batch_size=batch_size) elif mode == 'cv': score = handler.kfold_val( X, y, model_param = model_param, kfold_splits = exp.FOLDS, max_epochs = exp.MAX_EPOCHS, batch_size = batch_size) else: score = handler.run_training(X, y, batch_size = batch_size, max_epochs = exp.MAX_EPOCHS, model_param = model_param) return score
def train( i_exp = 0, model_param = None, sample_frac = None ): # Memory before the training mem = psutil.virtual_memory() print( 'Memory:', mem[2] ) # Experiment parameters e = exp.experiments[ i_exp ] # Load the data if 'NOMINAL' in e.name: path = e.inpath + 'input_' + 'NOMINAL' + '.h5' e.nlp_param['embedding_matrix_path'] = e.inpath + 'embedding_matrix_' + 'NOMINAL' + '.npy' elif 'AVG' in e.name: path = e.inpath + 'input_' + 'NOMINAL' + '.h5' e.nlp_param['embedding_matrix_path'] = e.inpath + 'embedding_matrix_' + 'NOMINAL' + '.npy' else: path = e.inpath + 'input_' + 'VAR_DIM' + '.h5' e.nlp_param['embedding_matrix_path'] = e.inpath + 'embedding_matrix_' + 'VAR_DIM' + '.npy' if sample_frac == None: actionshist, codes, sites = fit_handler.load_data(path) else: actionshist, codes, sites = fit_handler.load_data(path, sample = True, sample_frac = sample_frac) # Setup the fit handler handler = fit_handler.FitHandler( e.model, codes, sites, e.max_words, e.gen_param, pruning_mode = e.pruning, model_args = e.nlp_param, callback_args = e.callback, train_on_batch = e.train_on_batch, verbose=1 ) if model_param is None: model_param = e.hyperparam #score = handler.run_training(actionshist, batch_size = exp.BATCH_SIZE, max_epochs = exp.MAX_EPOCHS, # model_param = e['HYPERPARAM']) cvscores = handler.kfold_val( actionshist, model_param = model_param, kfold_splits = e.folds, max_epochs = e.max_epochs, batch_size = e.batch_size)
def evaluate(o, fold=None): hash_value = o.pop('hash') i_exp = int(o.pop('i_exp')) # Memory before the training mem = psutil.virtual_memory() print('Memory:', mem[2]) # Experiment parameters e = exp.experiments[i_exp] out_path = e.outpath + e.name + '/' # Load the data if 'NOMINAL' in e.name: path = e.inpath + 'input_' + 'NOMINAL' + '.h5' e.nlp_param[ 'embedding_matrix_path'] = e.inpath + 'embedding_matrix_' + 'NOMINAL' + '.npy' elif 'AVG' in e.name: path = e.inpath + 'input_' + 'NOMINAL' + '.h5' e.nlp_param[ 'embedding_matrix_path'] = e.inpath + 'embedding_matrix_' + 'NOMINAL' + '.npy' else: path = e.inpath + 'input_' + 'VAR_DIM' + '.h5' e.nlp_param[ 'embedding_matrix_path'] = e.inpath + 'embedding_matrix_' + 'VAR_DIM' + '.npy' actionshist, codes, sites = fit_handler.load_data(path) # Setup the fit handler handler = fit_handler.FitHandler(e.model, codes, sites, e.max_words, e.gen_param, pruning_mode=e.pruning, model_args=e.nlp_param, callback_args=e.callback, train_on_batch=e.train_on_batch, verbose=2) # Initial hyper parameters model_param = e.hyperparam # Overwrite with bayesian suggestion for name, value in o.items(): model_param[name] = value """ score = handler.run_training(actionshist, batch_size = exp.BATCH_SIZE, max_epochs = exp.MAX_EPOCHS, model_param = model_param) #value = -1 * score #print( value ) """ cvscores = handler.kfold_val(actionshist, model_param=model_param, kfold_splits=e.folds, max_epochs=e.max_epochs, batch_size=e.batch_size) value = -1 * np.mean(cvscores) std_dv = np.std(cvscores) res = { 'result': value, 'params': o, 'annotate': 'a free comment', 'std_dv': std_dv } print(res) if fold is not None: res['fold'] = fold out = out_path + hash_value dest = '%s.json' % out if fold is None else '%s_f%d.json' % (out, fold) open(dest, 'w').write(json.dumps(res))
def evaluate(o, fold=None): hash_value = o.pop('hash') i_exp = int(o.pop('i_exp')) # Memory before the training mem = psutil.virtual_memory() print('Memory:', mem[2]) # Experiment parameters e = exp.EXPERIMENTS[i_exp] out_path = exp.OUTPATH + e['NAME'] + '/' # Load the data if 'VAR_LOW' in e['NAME']: path = exp.INPATH + 'input_' + 'VAR_LOW' + '.h5' e['NLP_PARAM'][ 'embedding_matrix_path'] = exp.INPATH + 'embedding_matrix_' + 'VAR_LOW' + '.npy' elif 'VAR_DIM' in e['NAME']: path = exp.INPATH + 'input_' + 'VAR_DIM' + '.h5' e['NLP_PARAM'][ 'embedding_matrix_path'] = exp.INPATH + 'embedding_matrix_' + 'VAR_DIM' + '.npy' elif 'AVG' in e['NAME']: path = exp.INPATH + 'input_' + 'VAR_DIM' + '.h5' e['NLP_PARAM'][ 'embedding_matrix_path'] = exp.INPATH + 'embedding_matrix_' + 'VAR_DIM' + '.npy' else: path = exp.INPATH + 'input_' + 'NOMINAL' + '.h5' e['NLP_PARAM'][ 'embedding_matrix_path'] = exp.INPATH + 'embedding_matrix_' + 'NOMINAL' + '.npy' actionshist, codes, sites = fit_handler.load_data( path, msg_only=exp.MSG_ONLY, sample=exp.SAMPLE, sample_fact=exp.SAMPLE_FACT) # Setup the fit handler handler = fit_handler.FitHandler(exp.MODEL, codes, sites, exp.MAX_WORDS, exp.GEN_PARAM, pruning_mode=exp.PRUNING, model_args=e['NLP_PARAM'], callback_args=e['CALLBACK'], train_on_batch=exp.TRAIN_ON_BATCH, verbose=2) # Initial hyper parameters model_param = e['HYPERPARAM'] # Overwrite with bayesian suggestion for name, value in o.items(): model_param[name] = value """ score = handler.run_training(actionshist, batch_size = exp.BATCH_SIZE, max_epochs = exp.MAX_EPOCHS, model_param = model_param) """ cvscores = handler.kfold_val(actionshist, model_param=model_param, kfold_splits=exp.FOLDS, max_epochs=exp.MAX_EPOCHS, batch_size=exp.BATCH_SIZE) #value = -1 * score #print( value ) value = -1 * np.mean(cvscores) std_dv = np.std(cvscores) #X = (o['learning_rate'], o['learning_rate']*2) #value = dummy_func( X , fold = fold) #dummy_func( X , fold = fold) res = { 'result': value, 'params': o, 'annotate': 'a free comment', 'std_dv': std_dv } print(res) if fold is not None: res['fold'] = fold out = out_path + hash_value dest = '%s.json' % out if fold is None else '%s_f%d.json' % (out, fold) open(dest, 'w').write(json.dumps(res))
def train(i_exp=0, model_param=None): # Memory before the training mem = psutil.virtual_memory() print('Memory:', mem[2]) # Experiment parameters e = exp.EXPERIMENTS[i_exp] # Load the data if 'VAR_LOW' in e['NAME']: path = exp.INPATH + 'input_' + 'VAR_LOW' + '.h5' e['NLP_PARAM'][ 'embedding_matrix_path'] = exp.INPATH + 'embedding_matrix_' + 'VAR_LOW' + '.npy' elif 'VAR_DIM' in e['NAME']: path = exp.INPATH + 'input_' + 'VAR_DIM' + '.h5' e['NLP_PARAM'][ 'embedding_matrix_path'] = exp.INPATH + 'embedding_matrix_' + 'VAR_DIM' + '.npy' elif 'AVG' in e['NAME']: path = exp.INPATH + 'input_' + 'VAR_DIM' + '.h5' e['NLP_PARAM'][ 'embedding_matrix_path'] = exp.INPATH + 'embedding_matrix_' + 'VAR_DIM' + '.npy' else: path = exp.INPATH + 'input_' + 'NOMINAL' + '.h5' e['NLP_PARAM'][ 'embedding_matrix_path'] = exp.INPATH + 'embedding_matrix_' + 'NOMINAL' + '.npy' actionshist, codes, sites = fit_handler.load_data( path, msg_only=exp.MSG_ONLY, sample=exp.SAMPLE, sample_fact=exp.SAMPLE_FACT) # Setup the fit handler handler = fit_handler.FitHandler(exp.MODEL, codes, sites, exp.MAX_WORDS, exp.GEN_PARAM, pruning_mode=exp.PRUNING, model_args=e['NLP_PARAM'], callback_args=e['CALLBACK'], train_on_batch=exp.TRAIN_ON_BATCH) if model_param is None: #score = handler.run_training(actionshist, batch_size = exp.BATCH_SIZE, max_epochs = exp.MAX_EPOCHS, # model_param = e['HYPERPARAM']) cvscores = handler.kfold_val(actionshist, model_param=e['HYPERPARAM'], kfold_splits=exp.FOLDS, max_epochs=exp.MAX_EPOCHS, batch_size=exp.BATCH_SIZE) else: #score = handler.run_training(actionshist, batch_size = exp.BATCH_SIZE, max_epochs = exp.MAX_EPOCHS, # model_param = model_param) cvscores = handler.kfold_val(actionshist, model_param=model_param, kfold_splits=exp.FOLDS, max_epochs=exp.MAX_EPOCHS, batch_size=exp.BATCH_SIZE) return score