Exemple #1
0
def main(config_filename):
    config = Config()
    config.load(config_filename)

    train_data, test_data = get_data(config.data_config, config.competition)
    vocabulary = train_data.get_vocabulary(config.lower_vocabulary).merge(
        test_data.get_vocabulary(config.lower_vocabulary))
    if not os.path.exists(config.embeddings_filename) and \
            config.data_config.language == "ru" and \
            'fasttext' in config.embeddings_filename:
        shrink_w2v("/media/yallen/My Passport/Models/Vectors/FastText/wiki.ru.vec", vocabulary,
                   100000, config.embeddings_filename)
    if not os.path.exists(config.embeddings_filename) and \
            config.data_config.language == "ru" and \
            'w2v' in config.embeddings_filename:
        shrink_w2v("/media/yallen/My Passport/Models/Vectors/RDT/russian-big-w2v.txt", vocabulary,
                   100000, config.embeddings_filename)
    if not os.path.exists(config.embeddings_filename) and \
            config.data_config.language == "en" and \
            'w2v' in config.embeddings_filename:
        shrink_w2v("/media/yallen/My Passport/Models/Vectors/W2V/GoogleNews-vectors-negative300.vec", vocabulary,
                   150000, config.embeddings_filename)
    if not os.path.exists(config.embeddings_filename) and \
            config.data_config.language == "en" and \
            'fasttext' in config.embeddings_filename:
        shrink_w2v("/media/yallen/My Passport/Models/Vectors/FastText/wiki.en.vec", vocabulary,
                   150000, config.embeddings_filename)
    char_set = train_data.get_char_set()
    print(vocabulary.size())
    print(char_set)

    targets, additionals, rev_categories, output_sizes = get_targets_additionals(train_data)
    train_model(config_filename, train_data, vocabulary, char_set, targets, additionals, output_sizes)
    predict(config_filename, test_data, vocabulary, char_set,
            targets, additionals, rev_categories)
Exemple #2
0
def main():
    paras = args_process()
    if paras:
        # print(paras)
        if paras.command == "detection":
            detection_msi(paras)
        elif paras.command == "train":
            train_model(paras)
        elif paras.command == "show":
            show_model(paras)

    pass
Exemple #3
0
def test_train_model(mock_cross_validate, input_df, cv_results):
    # Mock retuirn value of cross_validate
    mock_cross_validate.return_value = cv_results

    # Train model
    df = preprocess_data(input_df)
    model, test_accuracy = train_model(df)

    # Should return an sklearn pipeline and test accuracy
    assert type(model) == Pipeline
    assert type(test_accuracy) == np.float64
Exemple #4
0
def get_model_loss(params_dict):
    """
    Train model and compute loss (1-AUC) on fold validation sets

    Parameters:
    params_dict (dict): Model parameters

    Returns:
    dict: Dict with loss and round status
    """

    global max_mean_val_AUC
    global best_params

    params_dict["n_estimators"] = int(params_dict["n_estimators"])

    kf = KFold(n_splits=NFOLDS)

    sum_val_AUC = 0

    for i, (train_idxs, validation_idxs) in enumerate(kf.split(X)):

        X_train, X_validation = X[train_idxs], X[validation_idxs]
        y_train, y_validation = Y[train_idxs], Y[validation_idxs]

        model, train_AUC, val_AUC = train_model(X_train=X_train,
                                                y_train=y_train,
                                                X_validation=X_validation,
                                                y_validation=y_validation,
                                                params_dict=params_dict,
                                                verbose=False)

        sum_val_AUC += val_AUC

        print("Fold {} Val AUC: {}".format(i, val_AUC))

    mean_val_AUC = sum_val_AUC / NFOLDS

    loss = 1 - mean_val_AUC

    pickle.dump(trials, open(trials_filepath, "wb"))

    if max_mean_val_AUC < mean_val_AUC:
        max_mean_val_AUC = mean_val_AUC
        best_params = params_dict

    print("Round Mean Val AUC: {}".format(mean_val_AUC))
    print("Max Mean Val AUC: {}".format(max_mean_val_AUC))
    print("Best params: {}".format(best_params))
    print("Nb trials executed: {}".format(len(trials)))

    return {"loss": loss, "status": STATUS_OK}
Exemple #5
0
    def train(self, pretrain=False):
        if pretrain:
            self.reid_network.load_weights("pretrain.h5")
            log("Loaded pretrain weights")

        generator_train = ReidGenerator(database=self.dataset,
                                        batch_size=self.batch_size,
                                        flag="train",
                                        p=0.33)
        generator_val = ReidGenerator(database=self.dataset,
                                      batch_size=self.batch_size,
                                      flag="validation")

        log("Training [reid]")
        train_model(self.reid_network,
                    generator_train=generator_train,
                    generator_val=generator_val,
                    batch_size=self.batch_size,
                    steps_per_epoch=self.steps_per_epoch,
                    epochs=self.epochs,
                    validation_steps=self.validation_steps,
                    plot_title="loss [reid training]")
def main():
    cudnn.benchmark = True
    args = get_args()

    data_loaders, num_classes = \
        get_etl2_dataloaders(args.model) if args.dataset == 'etl2' \
        else get_etl2_9g_dataloaders(args.model)

    if args.train:
        model, name = \
            vgg_model(num_classes) if args.model == "vgg11_bn" \
            else chinese_model(num_classes)
        train_model(model, data_loaders)
        torch.save(model.state_dict(),
                   f'trained_models/{args.model}_{args.dataset}.weights')
    elif args.prune:
        model, name = vgg_model(num_classes) if args.model == "vgg11_bn" \
            else chinese_model(num_classes)
        model.load_state_dict(
            torch.load(f'trained_models/{args.model}_{args.dataset}.weights'))
        finetuning_passes = 250
        prune_ratio = 0.90
        prune_model(model,
                    data_loaders,
                    prune_ratio=prune_ratio,
                    finetuning_passes=finetuning_passes)

        print(model)
        print(
            model,
            file=open(
                f"trained_models/{args.model}_{prune_ratio}p_{finetuning_passes}it.txt",
                'w'))
        torch.save(
            model.state_dict(),
            f'trained_models/pruned_{args.model}_{args.dataset}_finetune{finetuning_passes}.weights'
        )
Exemple #7
0
    def pretrain_network(self):

        modellib = importlib.import_module("src.model." + self.model_name)
        reid_network, feature_network = modellib.generate_model(
            input_shape=self.input_shape, lr=self.lr, feature=True)

        generator_train = featureGenerator(database=self.dataset,
                                           batch_size=self.batch_size,
                                           flag="train")
        generator_val = featureGenerator(database=self.dataset,
                                         batch_size=self.batch_size,
                                         flag="validation")

        log("Training [features]")
        train_model(feature_network,
                    generator_train=generator_train,
                    generator_val=generator_train,
                    batch_size=self.batch_size,
                    steps_per_epoch=self.steps_per_epoch,
                    epochs=self.epochs,
                    validation_steps=self.validation_steps,
                    plot_title="loss [feature training]")

        reid_network.save_weights("pretrain.h5")
Exemple #8
0
def test_train_model_unhappy():
    df = pd.DataFrame(data_bad, columns=columns_bad)
    assert train_model(df, 0.3, "delay", 10) is None
Exemple #9
0
def test_train_model():
    df = pd.DataFrame(data, columns=columns)
    model = train_model(df, 0.3, "delay", 10)
    assert isinstance(model, sklearn.tree.DecisionTreeClassifier)
Exemple #10
0
from src.train import train_model
from config.se101_768_config import cfg
from src.unet_plus import *
import torch.optim.lr_scheduler as lr_scheduler
from src.nadam import Nadam

model = SE_Res101UNet(6)
optimizer = Nadam(model.parameters(), lr=cfg['base_lr'])
scheduler = lr_scheduler.MultiStepLR(optimizer,
                                     cfg['milestone'],
                                     gamma=cfg['gamma'])
train_model(model, optimizer, scheduler, cfg)
Exemple #11
0
    # Load input file
    if args.input is not None:
        input = pd.read_csv(args.input)
        logger.info('Input data loaded from %s', args.input)

    # acquire data from S3 and download it to the specified folder
    if args.step == 'acquire':
        try:
            output = acquire(S3_PUBLIC_KEY, S3_SECRET_KEY, FILE_NAME,
                             BUCKET_NAME, S3_OBJECT_NAME)
            logger.info("Dataset is successfully downloaded from S3 bucket! ")
        except Exception:
            logger.error(
                "Could not open S3 connections given the input credentials! ")

    # clean the dataset
    if args.step == 'clean':
        output = clean(input, **config['transform']['clean'])
        logger.info("Finished cleaning the raw dataset!")
        if args.output is not None:
            output.to_csv(args.output, index=False, header=True)
            logger.info("Output saved to %s" % args.output)

    # train the model
    elif args.step == 'train':
        output = train_model(input, **config['train']['train_model'])
        logger.info("Finished training the model!")
        if args.output is not None:
            pickle.dump(output, open(args.output, "wb"))
            logger.info("Model saved to %s" % args.output)
Exemple #12
0
def train(event=None, _=None):
    train_model()
    return "API ONLINE v1.0st", 200
                f.write("Best n_estimators: " +
                        str(classifier.best_params_["n_estimators"]) + '\n')
                f.write("Best max_depth: " +
                        str(classifier.best_params_["max_depth"]) + '\n')
                f.write("Best subsample: " +
                        str(classifier.best_params_["subsample"]) + '\n')
                f.write("CV AUC: " + str(cv_auc) + "\n")
                f.write("CV Accuracy: " + str(cv_acc) + "\n")
                f.write("Test AUC: " + str(test_auc) + "\n")
                f.write("Test Accuracy: " + str(test_acc) + "\n")
                f.close()
            logger.info(
                "File: {} created -- hyperparameters and scoring metrics saved"
                .format(args.scores_path))
        except Exception:
            logger.error("Failed to save hyperparameters and scoring metrics")
            raise

    if args.full_model:
        try:
            trained_model = train_model(args.imputed_path, config.RANDOM_STATE,
                                        config.BEST_LR, config.BEST_NUM_EST,
                                        config.BEST_MAX_DEPTH,
                                        config.BEST_SUBSAMPLE,
                                        args.encoder_path)
            pickle.dump(trained_model, open(args.model_path, "wb"))
            logger.info("Trained model successfully created")
        except Exception:
            logger.error("Trained model was not fit successfully")
            raise
"""Run Scripts from Command Line"""

from src.preprocess import make_dataset
from src.train import train_model
from src.evaluation import single_customer_evaluation, root_mean_squared_error

if __name__ == '__main__':
    make_dataset()
    train_model()
    freq_predictions, freq_holdout = single_customer_evaluation(time_units=243)
    rmse = root_mean_squared_error(time_units=243)

    print(f"Single Customer Predictions:"
          f"\nPrediction:"
          f"\n {freq_predictions}"
          f"\nGround Truth: "
          f"\n {freq_holdout}"
          f"\n OVERALL RMSE: {rmse}")
Exemple #15
0
def single_test(doc):
    # 生成训练模型
    m = train_model()
    m.train_model()
    predict(doc)
    logger.info('预测成功')
data['TEST'] = pd.read_csv(cfg['PATHS']['TEST_SET'])

# Custom Keras callback that logs all training and validation metrics after each epoch to the current Azure run
class LogRunMetrics(Callback):
    def on_epoch_end(self, epoch, log):
        for metric_name in log:
            if 'val' in metric_name:
                run.log('validation_' + metric_name.split('_')[-1], log[metric_name])
            else:
                run.log('training_' + metric_name, log[metric_name])
        #run.log('validation_auc', log['val_auc'])

# Set model callbacks
callbacks = [EarlyStopping(monitor='val_loss', verbose=1, patience=cfg['TRAIN']['PATIENCE'], mode='min', restore_best_weights=True),
             LogRunMetrics()]

# Train a model
start_time = datetime.datetime.now()
model, test_metrics, test_generator = train_model(cfg, data, callbacks)
print("TRAINING TIME = " + str((datetime.datetime.now() - start_time).total_seconds() / 60.0) + " min")

# Log test set performance metrics, ROC, confusion matrix in Azure run
test_predictions = model.predict_generator(test_generator, verbose=0)
test_labels = test_generator.labels
for metric_name in test_metrics:
    run.log('test_' + metric_name, test_metrics[metric_name])
covid_idx = test_generator.class_indices['COVID-19']
roc_plt = plot_roc("Test set", test_generator.labels, test_predictions, class_id=covid_idx)
run.log_image("ROC", plot=roc_plt)
cm_plt = plot_confusion_matrix(test_generator.labels, test_predictions, class_id=covid_idx)
run.log_image("Confusion matrix", plot=cm_plt)
Exemple #17
0
from src.load_data import load_data, prepare_data
from src.model import classifier_model
from src.train import train_model, save_model, evaluate_model
from src.utils import plot_learning_curves

# Main file to train a model with the name defined below

model_name = "2_blocks_20_epochs"

# Load and prepare dataset
x_train, y_train, x_test, y_test = load_data()
x_train, y_train, x_test, y_test = prepare_data(x_train, y_train, x_test,
                                                y_test)

# Train model on data
model = classifier_model()
trained_model, model_history = train_model(model, x_train, y_train, x_test,
                                           y_test)

# Save model and output results
save_model(trained_model, model_name)
evaluate_model(trained_model, x_test, y_test)
plot_learning_curves(model_history, model_name)
<p style="text-align:justify">This is a proof of concept using streamlit for a breast cancer diagnostic model, 
since one of the fundamental parts of a project is to allow the end user to have access to the product and be able to use it</p>
''', unsafe_allow_html=True)

if check_if_model_exists():
    st.write('<p style="color:blue;"><b>There is already a trained model! You can predict data now!</b></p>', unsafe_allow_html=True)
else:
    st.write('<p style="color:red;"><b>We didn\'t find a trained model! Please train a model before making a prediction</b></p>', unsafe_allow_html=True)

left_column, center_column, right_column = st.beta_columns(3)

with left_column:
    if st.button('Train Model!'):
        with st.spinner('Starting trainning the Model!'):
            train_model(True)
        st.success('Model Trained and Saved!')


di = dict()
if check_if_model_exists():
    with center_column:
        di['radius_mean'] = st.number_input('Radius Mean')
        di['texture_mean'] = st.number_input('Texture Mean')
        di['perimeter_mean'] = st.number_input('Perimeter Mean')
        di['area_mean'] = st.number_input('Area Mean')
        di['smoothness_mean'] = st.number_input('Smoothness Mean')
        di['compactness_mean'] = st.number_input('Compactness Mean')
        di['concavity_mean'] = st.number_input('Concavity Mean')
        di['concave points_mean'] = st.number_input('Concave Points Mean')
        di['symmetry_mean'] = st.number_input('Symmetry Mean')
Exemple #19
0
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
from src.dataload import load_data
from src.train import train_model
from src.predict import predict_emotion
from src.audio_utils import record_audio, play_audio
from src.generic_utils import generate_report
print("Welcome to my Speech-Emotion-Detection project. Check out my handles:-\n\n[linkedin.com/in/aitik-gupta][github.com/aitikgupta][kaggle.com/aitikgupta]\n\n")

DATASET_PATH = "./voices"
OUTPUT_PATH = "./output"
MODEL_PATH = "./model/model.h5"

choice = int(input("1) Train the model again.\n2) Test the model on 3 random voices.\n3) Test the model by your voice.[Note: In realtime, there are lot of noises than 'just' white noise, so results may differ.] \nEnter choice: "))
if choice == 1:
    print("[INFO] Model file will be overwritten!")
    dataset, labels = load_data(DATASET_PATH, mode="dev", n_random=-1, play_runtime=False)
    train_model(dataset=dataset, labels=labels, model_path=MODEL_PATH, n_splits=5, learning_rate=0.0001, epochs=30, batch_size=64, verbose=True)
    ytrue, ypred, probabilities = predict_emotion(dataset, labels, mode="dev", model_path=MODEL_PATH, verbose=False)
    generate_report(ytrue, ypred, verbose=True, just_acc=False)
elif choice == 2:
    dataset, labels = load_data(DATASET_PATH, mode="dev", n_random=3, play_runtime=True)
    ytrue, ypred, probabilities = predict_emotion(dataset, labels, mode="dev", model_path=MODEL_PATH, verbose=True)
    generate_report(ytrue, ypred, verbose=True, just_acc=True)
else:
    recording_path = os.path.join(OUTPUT_PATH, "recording.wav")
    inp = str(input(f"Record audio again? [All voices in {OUTPUT_PATH} will be used] (y|n): ")).lower()
    if inp == "y" or inp == "yes":
        record_audio(output_path=recording_path)
    dataset, _ = load_data(OUTPUT_PATH, mode="user", n_random=-1, play_runtime=True)
    _, _ = predict_emotion(dataset, mode="user", model_path=MODEL_PATH, verbose=True)
Exemple #20
0
from src.preprocess import make_dataset
from src.folds import generate_folds
from src.train import train_model
from src.predict import test_model

import numpy as np

if __name__ == '__main__':
    # Pre-process dataset.
    train_clean, test_clean = make_dataset(raw_file_name="raw.csv")

    # Generate Folds for train_clean
    train_folds = generate_folds(file_name="train_clean.csv",
                                 fold_type="skfold",
                                 n_splits=5,
                                 save_file_name="train_folds.csv")
    # Train Model
    cv_results = train_model()
    print(f"\nTRAIN SCORES:"
          f"\nROC_AUC: {np.mean(cv_results['test_roc_auc'])}"
          f"\nPrecision: {np.mean(cv_results['test_roc_auc'])}"
          f"\nRecall: {np.mean(cv_results['test_recall'])}")

    # Test Model.
    report, conf_mx = test_model()
    print(f"\nTEST RESULTS:"
          f"\n {report}"
          f"\nTrue Positive Rate: {round(conf_mx[1][1]*100, 3)}"
          f"\nFalse Positive Rate: {round(conf_mx[0][1]*100, 3)}"
          f"\nFalse Negative Rate: {round(conf_mx[1][0]*100, 3)}")
Exemple #21
0
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from src.train import train_model

if __name__=="__main__":
    # training cyclic lr 2 hrs
    # train_model(num_epochs=35)
    
    #training constant lr
    # train_model(num_epochs=25, clr=False)
    train_model(num_epochs=35, clr=False, weighted_classes=True)