Ejemplo n.º 1
0
def train_model(n_iter: int = 100) -> None:

    # You can load other languages if specificied in Dockerfile
    nlp = spacy.load("en_core_web_sm")

    if "ner" not in nlp.pipe_names:
        ner = nlp.create_pipe("ner")
        nlp.add_pipe(ner)
    else:
        ner = nlp.get_pipe("ner")

    # add labels
    for _, annotations in TRAIN_DATA:
        for ent in annotations.get("entities"):
            ner.add_label(ent[2])

    # get names of other pipes to disable them during training
    pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"]
    other_pipes = [
        pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions
    ]

    # only train NER
    with nlp.disable_pipes(*other_pipes):
        nlp.begin_training()
        for step_n in range(n_iter):
            random.shuffle(TRAIN_DATA)
            losses = {}
            batches = minibatch(TRAIN_DATA, size=compounding(1.0, 4.0, 1.001))
            for batch in batches:
                texts, annotations = zip(*batch)
                nlp.update(
                    texts,
                    annotations,
                    drop=0.5,
                    losses=losses,
                )
            print("Losses: ", losses)
            tracking.log_metrics(step=step_n, **losses)

    nlp.to_disk("custom_spacy_model")
Ejemplo n.º 2
0
def main():
    tracking.init()

    for i in range(args.steps):
        logger.info('Step %s', i)
        # Scalars
        loss = get_loss(i)
        accuracy = get_accuracy(loss)
        # training metrics, but don't commit the step.
        tracking.log_metrics(step=i, loss=loss, accuracy=accuracy)
        # validation metrics, which could be reported in another part of the code
        if i % args.validate_every == 0:
            tracking.log_metric(name='val_acc', value=accuracy - 0.05, step=i)

        # Dist
        tracking.log_histogram('distribution', get_dist(i), 'auto', step=i)

        # Text
        tracking.log_text('text-ex', text=get_text(i), step=i)

        # images
        log_images(i)
        # HTML
        tracking.log_html('html-ex', html=get_html(i), step=i)

        # Generate sin wave as audio
        tracking.log_audio(data=get_audio(i), name='audio', step=i)

        time.sleep(0.25)

    plot_scatter(100)
    get_sin_plot(100)
    plot_mpl_figure(100)
    log_bokeh(100)
    log_altair(100)
    log_curves(100)
    log_plotly(100)
    log_curves(100)

    train_network()
Ejemplo n.º 3
0
    tracking.log_data_ref(content=x_train, name='x_train')
    tracking.log_data_ref(content=y_train, name='y_train')
    tracking.log_data_ref(content=x_test, name='x_test')
    tracking.log_data_ref(content=y_test, name='y_test')

    logger.info('Transforming data...')
    x_train, y_train, x_test, y_test = transform_data(x_train,
                                                      y_train,
                                                      x_test,
                                                      y_test,
                                                      args.maxlen)

    logger.info('Training...')
    score, accuracy = train(max_features=args.max_features,
                            maxlen=args.maxlen,
                            epochs=args.epochs,
                            embedding_size=args.embedding_size,
                            pool_size=args.pool_size,
                            kernel_size=args.kernel_size,
                            filters=args.filters,
                            lstm_output_size=args.lstm_output_size,
                            batch_size=args.batch_size,
                            optimizer=args.optimizer,
                            log_learning_rate=args.log_learning_rate)

    # Polyaxon
    tracking.log_metrics(eval_score=score, eval_accuracy=accuracy)

    logger.info('Test score: %s', score)
    logger.info('Test accuracy: %s', accuracy)
Ejemplo n.º 4
0
    logger.info('Loading data...')
    (x_train, y_train), (x_test,
                         y_test) = imdb.load_data(num_words=args.max_features,
                                                  skip_top=args.skip_top,
                                                  seed=args.seed)
    logger.info('train sequences %s', len(x_train))
    logger.info('test sequences %s', len(x_test))

    # Polyaxon
    tracking.log_data_ref(content=x_train, name='x_train')
    tracking.log_data_ref(content=y_train, name='y_train')
    tracking.log_data_ref(content=x_test, name='x_test')
    tracking.log_data_ref(content=y_test, name='y_test')

    logger.info('Transforming data...')
    x_train, y_train, x_test, y_test = transform_data(x_train, y_train, x_test,
                                                      y_test, args.maxlen)
    logger.info('Training...')
    accuracy = train(max_features=args.max_features,
                     maxlen=args.maxlen,
                     batch_size=args.batch_size,
                     num_nodes=args.num_nodes,
                     optimizer=args.optimizer,
                     log_learning_rate=args.log_learning_rate,
                     dropout=args.dropout,
                     epochs=args.epochs)

    # Polyaxon
    tracking.log_metrics(accuracy=accuracy)
Ejemplo n.º 5
0
    train_iter = mx.io.NDArrayIter(mnist['train_data'], mnist['train_label'], args.batch_size,
                                   shuffle=True)
    val_iter = mx.io.NDArrayIter(mnist['test_data'], mnist['test_label'], args.batch_size)

    # Polyaxon
    tracking.log_data_ref(content=mnist['train_data'], name='x_train')
    tracking.log_data_ref(content=mnist['train_label'], name='y_train')
    tracking.log_data_ref(content=mnist['test_data'], name='x_test')
    tracking.log_data_ref(content=mnist['test_label'], name='y_test')

    context = mx.gpu if os.environ.get('NVIDIA_VISIBLE_DEVICES') else mx.cpu

    metrics = model(context=context,
                    train_iter=train_iter,
                    val_iter=val_iter,
                    conv1_kernel=args.conv1_kernel,
                    conv1_filters=args.conv1_filters,
                    conv1_activation=args.conv1_activation,
                    conv2_kernel=args.conv1_kernel,
                    conv2_filters=args.conv1_filters,
                    conv2_activation=args.conv1_activation,
                    fc1_hidden=args.fc1_hidden,
                    fc1_activation=args.fc1_activation,
                    optimizer=args.optimizer,
                    log_learning_rate=args.log_learning_rate,
                    batch_size=args.batch_size,
                    epochs=args.epochs)

    # Polyaxon
    tracking.log_metrics(accuracy=metrics)
Ejemplo n.º 6
0
                        type=int,
                        default=2000,
                        help='The maximum number of features.')
    parser.add_argument('--max_df',
                        type=float,
                        default=1.0,
                        help='the maximum document frequency.')
    parser.add_argument(
        '--C',
        type=float,
        default=1.0,
        help='Inverse of regularization strength of LogisticRegression')
    args = parser.parse_args()

    # Polyaxon
    tracking.init()

    # Train and eval the model with given parameters.
    # Polyaxon
    output_path = os.path.join(tracking.get_outputs_path(), "model.joblib")
    metrics = train_and_eval(output=output_path,
                             ngram_range=(args.ngram, args.ngram),
                             max_features=args.max_features,
                             max_df=args.max_df,
                             C=args.C)

    # Logging metrics
    print("Testing metrics: {}", metrics)
    # Polyaxon
    tracking.log_metrics(**metrics)
Ejemplo n.º 7
0
    # Polyaxon
    tracking.init()

    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

    # Polyaxon
    tracking.log_data_ref(content=x_train, name='x_train')
    tracking.log_data_ref(content=y_train, name='y_train')
    tracking.log_data_ref(content=x_test, name='x_test')
    tracking.log_data_ref(content=y_test, name='y_test')

    x_train, y_train, x_test, y_test = transform_data(x_train, y_train, x_test,
                                                      y_test)
    model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(256, activation=tf.keras.activations.relu),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(10, activation=tf.keras.activations.softmax)
    ])

    accuracy = train(conv1_size=args.conv1_size,
                     conv2_size=args.conv2_size,
                     dropout=args.dropout,
                     hidden1_size=args.hidden1_size,
                     optimizer=args.optimizer,
                     log_learning_rate=args.log_learning_rate,
                     epochs=args.epochs)

    # Polyaxon
    tracking.log_metrics(eval_accuracy=accuracy)