Example #1
0
def train( i_exp = 0, mode = 'train', model_param = None, batch_size = None, sample_frac = None ):
    
    # Memory before the training
    mem = psutil.virtual_memory()
    print( 'Memory:', mem[2] )
    
    # Experiment parameters
    e = exp.EXPERIMENTS[ i_exp ]
    
    # Store path
    overwrite = False
    if sample_frac == None:
        outpath = exp.OUTPATH + e['NAME'] + '/'
    else:
        outpath = exp.OUTPATH + e['NAME'] + '/sample_frac_best' + str(sample_frac) + '/'
    create_dir(outpath, overwrite)
    
    # Load the data
    inpath = exp.INPATH + 'input_NOMINAL' + '.h5'
    if sample_frac == None:
        actionshist, codes, sites = fit_handler.load_data(inpath)
    else:
        actionshist, codes, sites = fit_handler.load_data(inpath, load_labels = False, sample = True, sample_frac = sample_frac)
    
    # Setup the fit handler
    handler = fit_handler.FitHandler( exp.MODEL, codes, sites, pruning_mode = e['PRUNE'],
                                      callback_args = e['CALLBACK'], train_on_batch = False, path = outpath, verbose=2)
    
    # Get the count matrix
    X,y = handler.count_matrix(actionshist)
    
    mem = psutil.virtual_memory()
    print( 'Memory:', mem[2] )
    
    if model_param is None:
        model_param = e['HYPERPARAM']
    if batch_size is None:
        batch_size = exp.BATCH_SIZE
    
    if mode == 'optimize':
        score = handler.find_optimal_parameters( exp.SKOPT_DIM, model_param, X, y, cv=exp.CV, kfold_splits=exp.FOLDS, 
                                                num_calls=exp.SKOPTCALLS, max_epochs=exp.MAX_EPOCHS, batch_size=batch_size)
    elif mode == 'cv':
        score = handler.kfold_val( X, y, model_param = model_param, kfold_splits = exp.FOLDS,
                                   max_epochs = exp.MAX_EPOCHS, batch_size = batch_size)
    else:
        score = handler.run_training(X, y, batch_size = batch_size, max_epochs = exp.MAX_EPOCHS, 
                                     model_param = model_param)    
    
    return score
Example #2
0
def train( i_exp = 0, model_param = None, sample_frac = None ):
    
    # Memory before the training
    mem = psutil.virtual_memory()
    print( 'Memory:', mem[2] )
    
    # Experiment parameters
    e = exp.experiments[ i_exp ]
    
    # Load the data
    if 'NOMINAL' in e.name:
        path = e.inpath + 'input_' + 'NOMINAL' + '.h5'
        e.nlp_param['embedding_matrix_path'] = e.inpath + 'embedding_matrix_' + 'NOMINAL' + '.npy'
    elif 'AVG' in e.name:
        path = e.inpath + 'input_' + 'NOMINAL' + '.h5'
        e.nlp_param['embedding_matrix_path'] = e.inpath + 'embedding_matrix_' + 'NOMINAL' + '.npy'        
    else:
        path = e.inpath + 'input_' + 'VAR_DIM' + '.h5'
        e.nlp_param['embedding_matrix_path'] = e.inpath + 'embedding_matrix_' + 'VAR_DIM' + '.npy'
        
        
    if sample_frac == None:
        actionshist, codes, sites = fit_handler.load_data(path)
    else:
        actionshist, codes, sites = fit_handler.load_data(path, sample = True, sample_frac = sample_frac)
    
    # Setup the fit handler
    handler = fit_handler.FitHandler( e.model, codes, sites, e.max_words, 
                                     e.gen_param, pruning_mode = e.pruning,
                                     model_args = e.nlp_param, callback_args = e.callback,
                                     train_on_batch = e.train_on_batch, verbose=1 )

    if model_param is None:
        model_param = e.hyperparam
    #score = handler.run_training(actionshist, batch_size = exp.BATCH_SIZE, max_epochs = exp.MAX_EPOCHS, 
    #                             model_param = e['HYPERPARAM'])
    cvscores = handler.kfold_val( actionshist, model_param = model_param, kfold_splits = e.folds,
               max_epochs = e.max_epochs, batch_size = e.batch_size)
def evaluate(o, fold=None):

    hash_value = o.pop('hash')
    i_exp = int(o.pop('i_exp'))

    # Memory before the training
    mem = psutil.virtual_memory()
    print('Memory:', mem[2])

    # Experiment parameters
    e = exp.experiments[i_exp]
    out_path = e.outpath + e.name + '/'

    # Load the data
    if 'NOMINAL' in e.name:
        path = e.inpath + 'input_' + 'NOMINAL' + '.h5'
        e.nlp_param[
            'embedding_matrix_path'] = e.inpath + 'embedding_matrix_' + 'NOMINAL' + '.npy'
    elif 'AVG' in e.name:
        path = e.inpath + 'input_' + 'NOMINAL' + '.h5'
        e.nlp_param[
            'embedding_matrix_path'] = e.inpath + 'embedding_matrix_' + 'NOMINAL' + '.npy'
    else:
        path = e.inpath + 'input_' + 'VAR_DIM' + '.h5'
        e.nlp_param[
            'embedding_matrix_path'] = e.inpath + 'embedding_matrix_' + 'VAR_DIM' + '.npy'

    actionshist, codes, sites = fit_handler.load_data(path)

    # Setup the fit handler
    handler = fit_handler.FitHandler(e.model,
                                     codes,
                                     sites,
                                     e.max_words,
                                     e.gen_param,
                                     pruning_mode=e.pruning,
                                     model_args=e.nlp_param,
                                     callback_args=e.callback,
                                     train_on_batch=e.train_on_batch,
                                     verbose=2)

    # Initial hyper parameters
    model_param = e.hyperparam
    # Overwrite with bayesian suggestion
    for name, value in o.items():
        model_param[name] = value
    """    
    score = handler.run_training(actionshist, batch_size = exp.BATCH_SIZE, max_epochs = exp.MAX_EPOCHS, 
                                     model_param = model_param)
    #value = -1 * score
    #print( value )
    
    """

    cvscores = handler.kfold_val(actionshist,
                                 model_param=model_param,
                                 kfold_splits=e.folds,
                                 max_epochs=e.max_epochs,
                                 batch_size=e.batch_size)

    value = -1 * np.mean(cvscores)
    std_dv = np.std(cvscores)

    res = {
        'result': value,
        'params': o,
        'annotate': 'a free comment',
        'std_dv': std_dv
    }
    print(res)
    if fold is not None:
        res['fold'] = fold
    out = out_path + hash_value
    dest = '%s.json' % out if fold is None else '%s_f%d.json' % (out, fold)

    open(dest, 'w').write(json.dumps(res))
Example #4
0
def evaluate(o, fold=None):

    hash_value = o.pop('hash')
    i_exp = int(o.pop('i_exp'))

    # Memory before the training
    mem = psutil.virtual_memory()
    print('Memory:', mem[2])

    # Experiment parameters
    e = exp.EXPERIMENTS[i_exp]
    out_path = exp.OUTPATH + e['NAME'] + '/'

    # Load the data
    if 'VAR_LOW' in e['NAME']:
        path = exp.INPATH + 'input_' + 'VAR_LOW' + '.h5'
        e['NLP_PARAM'][
            'embedding_matrix_path'] = exp.INPATH + 'embedding_matrix_' + 'VAR_LOW' + '.npy'
    elif 'VAR_DIM' in e['NAME']:
        path = exp.INPATH + 'input_' + 'VAR_DIM' + '.h5'
        e['NLP_PARAM'][
            'embedding_matrix_path'] = exp.INPATH + 'embedding_matrix_' + 'VAR_DIM' + '.npy'
    elif 'AVG' in e['NAME']:
        path = exp.INPATH + 'input_' + 'VAR_DIM' + '.h5'
        e['NLP_PARAM'][
            'embedding_matrix_path'] = exp.INPATH + 'embedding_matrix_' + 'VAR_DIM' + '.npy'
    else:
        path = exp.INPATH + 'input_' + 'NOMINAL' + '.h5'
        e['NLP_PARAM'][
            'embedding_matrix_path'] = exp.INPATH + 'embedding_matrix_' + 'NOMINAL' + '.npy'

    actionshist, codes, sites = fit_handler.load_data(
        path,
        msg_only=exp.MSG_ONLY,
        sample=exp.SAMPLE,
        sample_fact=exp.SAMPLE_FACT)

    # Setup the fit handler
    handler = fit_handler.FitHandler(exp.MODEL,
                                     codes,
                                     sites,
                                     exp.MAX_WORDS,
                                     exp.GEN_PARAM,
                                     pruning_mode=exp.PRUNING,
                                     model_args=e['NLP_PARAM'],
                                     callback_args=e['CALLBACK'],
                                     train_on_batch=exp.TRAIN_ON_BATCH,
                                     verbose=2)

    # Initial hyper parameters
    model_param = e['HYPERPARAM']
    # Overwrite with bayesian suggestion
    for name, value in o.items():
        model_param[name] = value
    """    
    score = handler.run_training(actionshist, batch_size = exp.BATCH_SIZE, max_epochs = exp.MAX_EPOCHS, 
                                     model_param = model_param)
    """

    cvscores = handler.kfold_val(actionshist,
                                 model_param=model_param,
                                 kfold_splits=exp.FOLDS,
                                 max_epochs=exp.MAX_EPOCHS,
                                 batch_size=exp.BATCH_SIZE)
    #value = -1 * score
    #print( value )

    value = -1 * np.mean(cvscores)
    std_dv = np.std(cvscores)

    #X = (o['learning_rate'], o['learning_rate']*2)
    #value = dummy_func( X , fold = fold)
    #dummy_func( X , fold = fold)
    res = {
        'result': value,
        'params': o,
        'annotate': 'a free comment',
        'std_dv': std_dv
    }
    print(res)
    if fold is not None:
        res['fold'] = fold
    out = out_path + hash_value
    dest = '%s.json' % out if fold is None else '%s_f%d.json' % (out, fold)

    open(dest, 'w').write(json.dumps(res))
Example #5
0
def train(i_exp=0, model_param=None):

    # Memory before the training
    mem = psutil.virtual_memory()
    print('Memory:', mem[2])

    # Experiment parameters
    e = exp.EXPERIMENTS[i_exp]

    # Load the data
    if 'VAR_LOW' in e['NAME']:
        path = exp.INPATH + 'input_' + 'VAR_LOW' + '.h5'
        e['NLP_PARAM'][
            'embedding_matrix_path'] = exp.INPATH + 'embedding_matrix_' + 'VAR_LOW' + '.npy'
    elif 'VAR_DIM' in e['NAME']:
        path = exp.INPATH + 'input_' + 'VAR_DIM' + '.h5'
        e['NLP_PARAM'][
            'embedding_matrix_path'] = exp.INPATH + 'embedding_matrix_' + 'VAR_DIM' + '.npy'
    elif 'AVG' in e['NAME']:
        path = exp.INPATH + 'input_' + 'VAR_DIM' + '.h5'
        e['NLP_PARAM'][
            'embedding_matrix_path'] = exp.INPATH + 'embedding_matrix_' + 'VAR_DIM' + '.npy'
    else:
        path = exp.INPATH + 'input_' + 'NOMINAL' + '.h5'
        e['NLP_PARAM'][
            'embedding_matrix_path'] = exp.INPATH + 'embedding_matrix_' + 'NOMINAL' + '.npy'
    actionshist, codes, sites = fit_handler.load_data(
        path,
        msg_only=exp.MSG_ONLY,
        sample=exp.SAMPLE,
        sample_fact=exp.SAMPLE_FACT)

    # Setup the fit handler
    handler = fit_handler.FitHandler(exp.MODEL,
                                     codes,
                                     sites,
                                     exp.MAX_WORDS,
                                     exp.GEN_PARAM,
                                     pruning_mode=exp.PRUNING,
                                     model_args=e['NLP_PARAM'],
                                     callback_args=e['CALLBACK'],
                                     train_on_batch=exp.TRAIN_ON_BATCH)

    if model_param is None:
        #score = handler.run_training(actionshist, batch_size = exp.BATCH_SIZE, max_epochs = exp.MAX_EPOCHS,
        #                             model_param = e['HYPERPARAM'])
        cvscores = handler.kfold_val(actionshist,
                                     model_param=e['HYPERPARAM'],
                                     kfold_splits=exp.FOLDS,
                                     max_epochs=exp.MAX_EPOCHS,
                                     batch_size=exp.BATCH_SIZE)
    else:
        #score = handler.run_training(actionshist, batch_size = exp.BATCH_SIZE, max_epochs = exp.MAX_EPOCHS,
        #                             model_param = model_param)
        cvscores = handler.kfold_val(actionshist,
                                     model_param=model_param,
                                     kfold_splits=exp.FOLDS,
                                     max_epochs=exp.MAX_EPOCHS,
                                     batch_size=exp.BATCH_SIZE)

    return score