コード例 #1
0
def get_embedder(embed_type, embed_file):
    """Get an embedding object by type so we can evaluate one hot vectors

    :param embed_type: (``str``) The name of the embedding in the `BASELINE_EMBEDDINGS`
    :param embed_file: (``str``) Either the file or a URL to a hub location for the model
    :return: An embeddings dict containing vocab and graph
    """
    if embed_type == 'bert' or embed_type == 'elmo':
        embed_type += '-embed'
    embed = baseline.load_embeddings('word',
                                     embed_type=embed_type,
                                     embed_file=embed_file,
                                     keep_unused=True,
                                     trainable=False,
                                     known_vocab={})
    return embed
コード例 #2
0
ファイル: tf-estimator.py プロジェクト: dpressel/baseline
valid_file = args.valid
test_file = args.test

# This builds a set of counters
vocabs, labels = reader.build_vocab([train_file,
                                     valid_file,
                                     test_file])

# This builds a set of embeddings objects, these are typically not DL-specific
# but if they happen to be addons, they can be
embeddings = dict()
for k, v in feature_desc.items():
    embed_config = v['embed']
    embeddings_for_k = bl.load_embeddings('word',
                                          embed_file=embed_config['file'],
                                          known_vocab=vocabs[k],
                                          embed_type=embed_config.get('type', 'default'),
                                          unif=embed_config.get('unif', 0.),
                                          use_mmap=True)

    embeddings[k] = embeddings_for_k['embeddings']
    # Reset the vocab to the embeddings one
    vocabs[k] = embeddings_for_k['vocab']


X_train, y_train = to_tensors(reader.load(train_file, vocabs=vocabs, batchsz=1))
X_valid, y_valid = to_tensors(reader.load(valid_file, vocabs=vocabs, batchsz=1))
X_test, y_test = to_tensors(reader.load(test_file, vocabs=vocabs, batchsz=1))


def train_input_fn():
    dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
コード例 #3
0
valid_file = args.valid
test_file = args.test

# This builds a set of counters
vocabs, labels = reader.build_vocab([train_file,
                                     valid_file,
                                     test_file])

# This builds a set of embeddings objects, these are typically not DL-specific
# but if they happen to be addons, they can be
embeddings = dict()
for k, v in feature_desc.items():
    embed_config = v['embed']
    embeddings_for_k = bl.load_embeddings('word',
                                          embed_file=embed_config['file'],
                                          known_vocab=vocabs[k],
                                          embed_type=embed_config.get('type', 'default'),
                                          unif=embed_config.get('unif', 0.),
                                          use_mmap=True)

    embeddings[k] = embeddings_for_k['embeddings']
    # Reset the vocab to the embeddings one
    vocabs[k] = embeddings_for_k['vocab']


X_train, y_train = to_tensors(reader.load(train_file, vocabs=vocabs, batchsz=1))
X_valid, y_valid = to_tensors(reader.load(valid_file, vocabs=vocabs, batchsz=1))
X_test, y_test = to_tensors(reader.load(test_file, vocabs=vocabs, batchsz=1))


def train_input_fn():
    dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))