Beispiel #1
0
def train(x_train, y_train, x_test, y_test):

    exp = Experiment(api_key="API_KEY",
                     project_name="PROJECT",
                     workspace="WORKSPACE")

    # log custom hyperparameters
    exp.log_parameters(params)

    # Define model
    model = build_model_graph(exp)

    model.fit(
        x_train,
        y_train,
        batch_size=exp.get_parameter('batch-size'),
        epochs=exp.get_parameter('epochs'),
        validation_data=(x_test, y_test),
    )

    score = model.evaluate(x_test, y_test, verbose=0)
    logging.info("Score %s", score)

    # Finalize model includes the following calls
    # exp.log_confusion_matrix()
    # exp.log_image()
    # exp.log_histogram_3d()
    # exp.add_tag()
    # exp.log_model()
    utils.finalize_model(model, x_train, y_train, x_test, y_test, exp)
def train(x_train, y_train, x_test, y_test):

    exp = Experiment(project_name="perception",
                     auto_histogram_gradient_logging=True)

    # log custom hyperparameters
    exp.log_parameters(params)

    # log any custom metric
    exp.log_metric('custom_metric', 0.95)

    # log a dataset hash
    exp.log_dataset_hash(x_train)

    # Define model
    model = build_model_graph(exp)

    model.fit(
        x_train,
        y_train,
        batch_size=exp.get_parameter('batch-size'),
        epochs=exp.get_parameter('epochs'),
        validation_data=(x_test, y_test),
    )

    score = model.evaluate(x_test, y_test, verbose=0)
    logging.info("Score %s", score)

    # Finalize model includes the following calls
    # exp.log_confusion_matrix()
    # exp.log_image()
    # exp.log_histogram_3d()
    # exp.add_tag()
    # exp.log_model()
    utils.finalize_model(model, x_train, y_train, x_test, y_test, exp)
Beispiel #3
0
    
    adam = Adam(lr=startlearningrate,
                beta_1=yamlparameters["Training_learning_beta1"],
                beta_2=yamlparameters["Training_learning_beta2"],
                amsgrad=True)
    
    keras_model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['binary_accuracy'])
    
    callbacks=all_callbacks(stop_patience=yamlparameters["Training_early_stopping"], 
                            initial_lr=yamlparameters["Training_learning_rate"],
                            lr_factor=yamlparameters["Training_lr_factor"],
                            lr_patience=yamlparameters["Training_lr_patience"],
                            lr_epsilon=yamlparameters["Training_lr_min_delta"], 
                            lr_cooldown=yamlparameters["Training_lr_cooldown"], 
                            lr_minimum=yamlparameters["Training_lr_minimum"],
                            Prune_begin=experiment.get_parameter("pruning_begin_epoch"),
                            Prune_end=experiment.get_parameter("pruning_end_epoch"),
                            prune_lrs=[experiment.get_parameter("pruning_lr_factor_1"),
                                       experiment.get_parameter("pruning_lr_factor_2"),
                                       experiment.get_parameter("pruning_lr_factor_3")],
                            outputDir=yamlparameters["TrainDir"])

    callbacks.callbacks.append(pruning_callbacks.UpdatePruningStep())

    with experiment.train():
    
        keras_model.fit(X_train,y_train,
                        batch_size=yamlparameters["Training_batch_size"],
                        epochs=yamlparameters["Training_epochs"],
                        callbacks=callbacks.callbacks,
                        verbose=1,
Beispiel #4
0
def main():

    exp = Experiment(project_name="movie-reviews",
                     auto_histogram_weight_logging=True)

    params = {
        'layer-1-size': 16,
        'epochs': 10,
        'batch-size': 512,
        'dropout': 0.15,
    }

    exp.log_parameters(params)

    # Load data
    train_data, test_data = tfds.load(name="imdb_reviews",
                                      split=["train", "test"],
                                      batch_size=-1,
                                      as_supervised=True)
    train_examples, train_labels = tfds.as_numpy(train_data)
    test_examples, test_labels = tfds.as_numpy(test_data)

    x_val = train_examples[:10000]
    partial_x_train = train_examples[10000:]
    y_val = train_labels[:10000]
    partial_y_train = train_labels[10000:]

    # Load model
    model = "https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim/1"
    hub_layer = hub.KerasLayer(model,
                               output_shape=[20],
                               input_shape=[],
                               dtype=tf.string,
                               trainable=True)
    hub_layer(train_examples[:3])

    # Build model
    model = tf.keras.Sequential()
    model.add(hub_layer)
    model.add(
        tf.keras.layers.Dense(exp.get_parameter('layer-1-size'),
                              activation='relu'))
    model.add(tf.keras.layers.Dropout(exp.get_parameter('dropout')))
    model.add(tf.keras.layers.Dense(1))

    model.compile(
        optimizer='adam',
        loss=tf.losses.BinaryCrossentropy(from_logits=True),
        metrics=[tf.metrics.BinaryAccuracy(threshold=0.0, name='accuracy')])

    # Train model
    model.fit(partial_x_train,
              partial_y_train,
              epochs=exp.get_parameter('epochs'),
              batch_size=exp.get_parameter('batch-size'),
              validation_data=(x_val, y_val),
              verbose=1)

    # log any custom metric

    exp.log_metric('custom_metric', 0.98)

    # log a dataset hash
    exp.log_dataset_hash(partial_x_train)

    # finalize_model invokes:
    #     * exp.log_confusion_matrix()
    #     * exp.log_text()
    #     * exp.log_model()
    finalize_model(model, test_examples, test_labels, exp)
def main():

    experiment = Experiment(api_key="API_KEY",
                            project_name="PROJECT",
                            workspace="WORKSPACE")

    raw_df = pd.read_csv(f'{DATA}Tweets.csv')

    df = raw_df[['tweet_id', 'text', 'airline_sentiment']]

    # Preprocess text and put it in a new column
    preprocessor = PreProcessor(df, 'text')
    df['cleaned_text'] = preprocessor.full_preprocess()

    # Shuffling so we can get random tweets for the test set
    df = shuffle(df, random_state=seed)
    # Keep 1000 samples of the data as test set
    test_set = df[:1000]

    # Get training and validation data
    X_train, X_val, y_train, y_val = train_test_split(
        df['cleaned_text'][1000:],
        df['airline_sentiment'][1000:],
        test_size=0.2,
        random_state=seed)

    # Get sentiment labels for test set
    y_test = test_set['airline_sentiment']

    # Create matrix based on word frequency in tweets
    vectorizer = TfidfVectorizer()
    X_train = vectorizer.fit_transform(X_train)
    X_val = vectorizer.transform(X_val)
    X_test = vectorizer.transform(test_set['cleaned_text'])

    # Onehot encoding of target variable
    # Negative = [1,0,0], Neutral = [0,1,0], Positive = [0,0,1]

    # Initialize sklearn's one-hot encoder class
    onehot_encoder = OneHotEncoder(sparse=False)

    # One hot encoding for training set
    integer_encoded_train = np.array(y_train).reshape(len(y_train), 1)
    onehot_encoded_train = onehot_encoder.fit_transform(integer_encoded_train)

    # One hot encoding for validation set
    integer_encoded_val = np.array(y_val).reshape(len(y_val), 1)
    onehot_encoded_val = onehot_encoder.fit_transform(integer_encoded_val)

    # One hot for test_set
    integer_encoded_test = np.array(y_test).reshape(len(y_test), 1)
    onehot_encoded_test = onehot_encoder.fit_transform(integer_encoded_test)

    from comet_ml import Optimizer

    config = {
        "algorithm": "bayes",
        "parameters": {
            "batch_size": {
                "type": "integer",
                "min": 16,
                "max": 128
            },
            "dropout": {
                "type": "float",
                "min": 0.1,
                "max": 0.5
            },
            "lr": {
                "type": "float",
                "min": 0.0001,
                "max": 0.001
            },
            "beta1": {
                "type": "float",
                "min": 0.95,
                "max": 0.999
            },
            "beta2": {
                "type": "float",
                "min": 0.95,
                "max": 0.999
            },
            "epsilon": {
                "type": "float",
                "min": 1e-9,
                "max": 1e-7
            },
            "patience": {
                "type": "integer",
                "min": 3,
                "max": 7
            }
        },
        "spec": {
            "metric": "loss",
            "objective": "minimize",
        },
    }

    opt = Optimizer(config,
                    api_key="ERPBfa6mmwJzQnk61oiqLOCie",
                    project_name="nlp-airline",
                    workspace="demo")

    for experiment in opt.get_experiments():
        experiment.add_tag('LR-Optimizer')
        # Neural network architecture
        initializer = keras.initializers.he_normal(seed=seed)
        activation = keras.activations.elu
        optimizer = keras.optimizers.Adam(
            lr=experiment.get_parameter("lr"),
            beta_1=experiment.get_parameter("beta1"),
            beta_2=experiment.get_parameter("beta2"),
            epsilon=experiment.get_parameter('epsilon'))
        es = EarlyStopping(monitor='val_acc',
                           mode='max',
                           verbose=1,
                           patience=4)

        batch_size = experiment.get_parameter("batch_size")
        # Build model architecture
        model = Sequential()
        model.add(
            Dense(20,
                  activation=activation,
                  kernel_initializer=initializer,
                  input_dim=X_train.shape[1]))
        model.add(Dropout(experiment.get_parameter("dropout")))
        model.add(
            Dense(3, activation='softmax', kernel_initializer=initializer))
        model.compile(optimizer=optimizer,
                      loss='binary_crossentropy',
                      metrics=['accuracy'])
        # Fit the model using the batch_generator
        hist = model.fit_generator(
            generator=batch_generator(X_train,
                                      onehot_encoded_train,
                                      batch_size=batch_size,
                                      shuffle=True),
            epochs=5,
            validation_data=(X_val, onehot_encoded_val),
            steps_per_epoch=X_train.shape[0] / batch_size,
            callbacks=[es])
        score = model.evaluate(X_test, onehot_encoded_test, verbose=0)
        logging.info("Score %s", score)