file.write("{}\n".format(" ".join(iob2iobes(tag)))) ### Compute vocabulary build_vocab(original_path) ### Trim glove embeddings word_vocab_path = original_path + "vocab.words.txt" embedding_path = EMBEDDINGS_DIR + "glove.840B/glove.840B.300d.txt" saving_path = original_path + "glove.840B.300d" trim_embeddings(word_vocab_path, embedding_path, saving_path, check_exists=False) ### Remap dataset data = load_data(original_path, scheme="iob") tag2idx = data["vocab"]["tag"][0] mapping = { t: "O" for t in ["-".join(w.split("-")[1:]) for w in tag2idx.keys() if w[0] == "B"] } mapping["PERSON"] = "PER" mapping["ORG"] = "ORG" mapping["LOC"] = "LOC" mapping["GPE"] = "LOC" mapping["LANGUAGE"] = "MISC" mapping["NORP"] = "MISC"
X_valid = X[num_train:] y_valid = y[num_train:] lgb_train = lgb.Dataset(X_train, y_train) lgb_valid = lgb.Dataset(X_valid, y_valid) lgb_data = lgb.Dataset(X, y) params = study.best_params params['metric'] = 'l2' model = lgb.train(params, lgb_data, valid_sets=lgb_valid, verbose_eval=10, early_stopping_rounds=30) return model X, y = load_data(trivias_list) if __name__ == "__main__": model = build_model() content = 'ミツバチが一生かけて集める蜂蜜はティースプーン1杯程度。' content_df = get_features(trivias_list, content=content, mode='inference') output = model.predict(content_df) hee = int(output * 100) print(f"{content}") print(f"{hee}へぇ")
""" Check parameters and format """ check_and_format(parameters, train=False) """ logger """ print("Logging in {}".format( os.path.join(parameters["run_dir"], "test_ood_{}.log".format(parameters["ood_dataset"])))) set_logger( os.path.join(parameters["run_dir"], "test_ood_{}.log".format(parameters["ood_dataset"]))) """ Load Data """ logging.info("Loading data...") filenames = splits = ["train", "dev", "test"] # Load training dataset data_path = DATA_DIR + parameters["dataset"] + "/" train_data = load_data(data_path + "original/") train_iterators = init_iterators(train_data["data"], train_data["vocab"], batch_size=parameters["batch_size"], device=parameters["device"], shuffle_train=True) train_entities = extract_entities_corpus(train_data["data"]["train"]) # Load ood test dataset ood_data_path = DATA_DIR + parameters["ood_dataset"] + "/" ood_data = load_data(ood_data_path + "remapped/", embedding_path=ood_data_path + "original/glove.840B.300d") test_vocab = deepcopy(train_data["vocab"]) test_vocab["word"] = ood_data["vocab"]["word"]
file.write("{}\n".format(" ".join(iob2iobes(sent)))) ### Compute vocabulary build_vocab(original_path) ### Trim glove embeddings word_vocab_path = original_path + "vocab.words.txt" embedding_path = EMBEDDINGS_DIR + "glove.840B/glove.840B.300d.txt" saving_path = original_path + "glove.840B.300d" trim_embeddings(word_vocab_path, embedding_path, saving_path, check_exists=False) ### Remap dataset data = load_data(original_path) tag2idx = data["vocab"]["tag"][0] mapping = { t: "O" for t in ["-".join(w.split("-")[1:]) for w in tag2idx.keys() if w[0] == "B"] } mapping["corporation"] = "ORG" mapping["location"] = "LOC" mapping["person"] = "PER" for split, file in zip( ["train", "dev", "test"], ["wnut17train.conll", "emerging.dev.conll", "emerging.test.annotated"
def evaluate_generation(step, num_images=16): for i in range(len(genres)): label = idx2label[i] labels = generate_labels(num_images, num_classes, condition=i) noise = tf.random.normal([num_images, noise_size]) gen_images = gen.model.predict([noise, labels]) monitor.image(label, gen_images, step) if __name__ == '__main__': print("\n[info] Loading image data...\n") data_root = os.path.join(os.path.expanduser('~'), 'datasets', 'artworks') train_gen, valid_gen = load_data(data_root, batch_size=batch_size, image_width=image_size, split=.05) num_classes = train_gen.num_classes idx2label = {v: k for k, v in valid_gen.class_indices.items()} print( "\n[info] Creating generator and discriminator architectures for GAN...\n" ) gen = Generator(num_classes, image_size, bn=True) disc = Discriminator(num_classes, image_size, min_neurons, bn_epsilon=1e-5) print("\n[info] Pre-training or loading pre-trained discriminator...\n") disc.pretrain(train_gen, valid_gen, pretrain_iterations, pretrain_learning_rate,