Ejemplo n.º 1
0
    except getopt.GetoptError as err:
        print(err)
        print_usage()
        exit()
    if '--help' in opts:
        print_usage()
        exit()

    parameters = deepMirCut.load_parameters(opts)
    parameters = load_input_output_file_parameters(parameters, opts)

    validationSet = deepMirCut.readDataset(parameters["validation_file"],
                                           parameters)
    new_validationSet = deepMirCut.dropLongSequences(validationSet, parameters)
    X_vl, y_vl = deepMirCut.prepareData(new_validationSet, parameters)
    validation_labels = deepMirCut.pred2label(y_vl, parameters)
    if "ensemble" in parameters:
        validation_pred = []
        for i in range(0, len(parameters["ensemble"])):
            print("loading model %d: %s\n" % (i, parameters["ensemble"][i]))
            model = load_model(parameters["ensemble"][i],
                               custom_objects={'prox': avg_proximity_metric()})
            validation_pred.append(
                model.predict(X_vl, verbose=parameters["verbose"]))
            #model.summary()
        validation_pred_avg = apply_weights(validation_pred,
                                            w=parameters["ensemble_weights"])
        pred_labels = deepMirCut.pred2label(validation_pred_avg, parameters)
        deepMirCut.print_validation_output_file(new_validationSet, X_vl,
                                                validation_labels, pred_labels,
                                                parameters["predictions_file"],
Ejemplo n.º 2
0
def hyperopt_train_test(params):

    epsilon = 10**params['epsilon_exp']
    optimizer = optimizers.adam(lr=params['learning_rate'], epsilon=epsilon)

    if dmc_parameters["use_embedding_layer"]:
        input = Input(shape=(dmc_parameters["max_seq_len"], ))
        model = Embedding(input_dim=dmc_parameters["one_hot_vector_len"],
                          output_dim=params['embedding_layer_output'],
                          input_length=dmc_parameters["max_seq_len"])(input)
        model = Dropout(rate=params['embedding_dropout'])(model)
    else:
        input = Input(shape=(dmc_parameters["max_seq_len"],
                             dmc_parameters["one_hot_vector_len"]))
        model = input
    if params['bi_lstm1_units'] > 0:
        model = Bidirectional(
            CuDNNLSTM(units=params['bi_lstm1_units'],
                      return_sequences=True))(model)
    if params['bi_lstm2_units'] > 0:
        model = Bidirectional(
            CuDNNLSTM(units=params['bi_lstm2_units'],
                      return_sequences=True))(model)
    if dmc_parameters["use_crf_layer"]:
        crf = CRF(dmc_parameters["num_tags"])  # CRF layer
        out = crf(model)  # output
        model = Model(input, out)
        model.compile(optimizer=optimizer,
                      loss=losses.crf_loss,
                      metrics=[metrics.crf_accuracy,
                               avg_proximity_metric()])
    else:
        out = TimeDistributed(
            Dense(dmc_parameters["num_tags"], activation="softmax"))(model)
        model = Model(input, out)
        model.compile(optimizer=optimizer,
                      loss="categorical_crossentropy",
                      metrics=["accuracy", avg_proximity_metric()])
    model.summary()
    es = EarlyStopping(monitor='val_loss',
                       min_delta=0,
                       patience=dmc_parameters["patience"],
                       verbose=False,
                       mode='min',
                       restore_best_weights=True)
    history = model.fit(X_tr,
                        np.array(y_tr),
                        batch_size=dmc_parameters['batch_size'],
                        epochs=dmc_parameters["epochs"],
                        validation_data=(X_vl, np.array(y_vl)),
                        verbose=False,
                        shuffle=True,
                        callbacks=[es])
    loss, acc, prox = model.evaluate(x=X_vl,
                                     y=np.array(y_vl),
                                     batch_size=dmc_parameters['batch_size'],
                                     verbose=False)
    validation_labels = deepMirCut.pred2label(y_vl, dmc_parameters)
    validation_pred = model.predict(X_vl, verbose=False)
    pred_labels = deepMirCut.pred2label(validation_pred, dmc_parameters)
    fScore = f1_score(validation_labels, pred_labels)
    return loss, acc, prox, fScore
Ejemplo n.º 3
0
        opts.update({o: a for o, a in opts_array if o not in longopts_map})
        opts["--validation_file"] = validation_file
    except getopt.GetoptError as err:
        print(err)
        print_usage()
        exit()
    if '--help' in opts:
        print_usage()
        exit()

    parameters = deepMirCut.load_parameters(opts)
    parameters = load_input_output_file_parameters(parameters, opts)

    model = load_model(parameters["model"],
                       custom_objects={
                           'CRF': CRF,
                           'crf_loss': losses.crf_loss,
                           'crf_accuracy': metrics.crf_accuracy,
                           'prox': avg_proximity_metric()
                       })

    model.summary()
    validationSet = deepMirCut.readDataset(parameters["validation_file"],
                                           parameters)
    new_validationSet = deepMirCut.dropLongSequences(validationSet, parameters)
    X_vl, y_vl = deepMirCut.prepareData(new_validationSet, parameters)
    validation_labels = deepMirCut.pred2label(y_vl, parameters)
    validation_pred = model.predict(X_vl, verbose=parameters["verbose"])
    print_cutsite_scores(validationSet, validation_labels, validation_pred,
                         parameters)