def get_embedder(embed_type, embed_file): """Get an embedding object by type so we can evaluate one hot vectors :param embed_type: (``str``) The name of the embedding in the `BASELINE_EMBEDDINGS` :param embed_file: (``str``) Either the file or a URL to a hub location for the model :return: An embeddings dict containing vocab and graph """ if embed_type == 'bert' or embed_type == 'elmo': embed_type += '-embed' embed = baseline.load_embeddings('word', embed_type=embed_type, embed_file=embed_file, keep_unused=True, trainable=False, known_vocab={}) return embed
valid_file = args.valid test_file = args.test # This builds a set of counters vocabs, labels = reader.build_vocab([train_file, valid_file, test_file]) # This builds a set of embeddings objects, these are typically not DL-specific # but if they happen to be addons, they can be embeddings = dict() for k, v in feature_desc.items(): embed_config = v['embed'] embeddings_for_k = bl.load_embeddings('word', embed_file=embed_config['file'], known_vocab=vocabs[k], embed_type=embed_config.get('type', 'default'), unif=embed_config.get('unif', 0.), use_mmap=True) embeddings[k] = embeddings_for_k['embeddings'] # Reset the vocab to the embeddings one vocabs[k] = embeddings_for_k['vocab'] X_train, y_train = to_tensors(reader.load(train_file, vocabs=vocabs, batchsz=1)) X_valid, y_valid = to_tensors(reader.load(valid_file, vocabs=vocabs, batchsz=1)) X_test, y_test = to_tensors(reader.load(test_file, vocabs=vocabs, batchsz=1)) def train_input_fn(): dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))