Example #1
0
                file.write("{}\n".format(" ".join(iob2iobes(tag))))

    ### Compute vocabulary
    build_vocab(original_path)

    ### Trim glove embeddings
    word_vocab_path = original_path + "vocab.words.txt"
    embedding_path = EMBEDDINGS_DIR + "glove.840B/glove.840B.300d.txt"
    saving_path = original_path + "glove.840B.300d"
    trim_embeddings(word_vocab_path,
                    embedding_path,
                    saving_path,
                    check_exists=False)

    ### Remap dataset
    data = load_data(original_path, scheme="iob")
    tag2idx = data["vocab"]["tag"][0]

    mapping = {
        t: "O"
        for t in
        ["-".join(w.split("-")[1:]) for w in tag2idx.keys() if w[0] == "B"]
    }

    mapping["PERSON"] = "PER"
    mapping["ORG"] = "ORG"
    mapping["LOC"] = "LOC"
    mapping["GPE"] = "LOC"
    mapping["LANGUAGE"] = "MISC"
    mapping["NORP"] = "MISC"
Example #2
0
    X_valid = X[num_train:]
    y_valid = y[num_train:]
    lgb_train = lgb.Dataset(X_train, y_train)
    lgb_valid = lgb.Dataset(X_valid, y_valid)

    lgb_data = lgb.Dataset(X, y)

    params = study.best_params
    params['metric'] = 'l2'
    model = lgb.train(params,
                      lgb_data,
                      valid_sets=lgb_valid,
                      verbose_eval=10,
                      early_stopping_rounds=30)

    return model


X, y = load_data(trivias_list)

if __name__ == "__main__":

    model = build_model()

    content = 'ミツバチが一生かけて集める蜂蜜はティースプーン1杯程度。'
    content_df = get_features(trivias_list, content=content, mode='inference')
    output = model.predict(content_df)
    hee = int(output * 100)

    print(f"{content}")
    print(f"{hee}へぇ")
Example #3
0
    """ Check parameters and format """
    check_and_format(parameters, train=False)
    """ logger """
    print("Logging in {}".format(
        os.path.join(parameters["run_dir"],
                     "test_ood_{}.log".format(parameters["ood_dataset"]))))
    set_logger(
        os.path.join(parameters["run_dir"],
                     "test_ood_{}.log".format(parameters["ood_dataset"])))
    """ Load Data """
    logging.info("Loading data...")
    filenames = splits = ["train", "dev", "test"]

    # Load training dataset
    data_path = DATA_DIR + parameters["dataset"] + "/"
    train_data = load_data(data_path + "original/")
    train_iterators = init_iterators(train_data["data"],
                                     train_data["vocab"],
                                     batch_size=parameters["batch_size"],
                                     device=parameters["device"],
                                     shuffle_train=True)
    train_entities = extract_entities_corpus(train_data["data"]["train"])

    # Load ood test dataset
    ood_data_path = DATA_DIR + parameters["ood_dataset"] + "/"
    ood_data = load_data(ood_data_path + "remapped/",
                         embedding_path=ood_data_path +
                         "original/glove.840B.300d")

    test_vocab = deepcopy(train_data["vocab"])
    test_vocab["word"] = ood_data["vocab"]["word"]
Example #4
0
                file.write("{}\n".format(" ".join(iob2iobes(sent))))

    ### Compute vocabulary
    build_vocab(original_path)

    ### Trim glove embeddings
    word_vocab_path = original_path + "vocab.words.txt"
    embedding_path = EMBEDDINGS_DIR + "glove.840B/glove.840B.300d.txt"
    saving_path = original_path + "glove.840B.300d"
    trim_embeddings(word_vocab_path,
                    embedding_path,
                    saving_path,
                    check_exists=False)

    ### Remap dataset
    data = load_data(original_path)
    tag2idx = data["vocab"]["tag"][0]

    mapping = {
        t: "O"
        for t in
        ["-".join(w.split("-")[1:]) for w in tag2idx.keys() if w[0] == "B"]
    }

    mapping["corporation"] = "ORG"
    mapping["location"] = "LOC"
    mapping["person"] = "PER"

    for split, file in zip(
        ["train", "dev", "test"],
        ["wnut17train.conll", "emerging.dev.conll", "emerging.test.annotated"
Example #5
0

def evaluate_generation(step, num_images=16):
    for i in range(len(genres)):
        label = idx2label[i]
        labels = generate_labels(num_images, num_classes, condition=i)
        noise = tf.random.normal([num_images, noise_size])
        gen_images = gen.model.predict([noise, labels])
        monitor.image(label, gen_images, step)


if __name__ == '__main__':
    print("\n[info] Loading image data...\n")
    data_root = os.path.join(os.path.expanduser('~'), 'datasets', 'artworks')
    train_gen, valid_gen = load_data(data_root,
                                     batch_size=batch_size,
                                     image_width=image_size,
                                     split=.05)
    num_classes = train_gen.num_classes
    idx2label = {v: k for k, v in valid_gen.class_indices.items()}

    print(
        "\n[info] Creating generator and discriminator architectures for GAN...\n"
    )
    gen = Generator(num_classes, image_size, bn=True)
    disc = Discriminator(num_classes, image_size, min_neurons, bn_epsilon=1e-5)

    print("\n[info] Pre-training or loading pre-trained discriminator...\n")
    disc.pretrain(train_gen,
                  valid_gen,
                  pretrain_iterations,
                  pretrain_learning_rate,