Beispiel #1
0
    labels.add(label)

print("Vocabulary:", len(vocab))
print("Classes:", len(labels))

token2id = dict((t, i + 1) for i, t in enumerate(vocab))
label2id = dict((t, i) for i, t in enumerate(labels))
id2token = dict((i + 1, t) for i, t in enumerate(vocab))
id2label = dict((i, t) for i, t in enumerate(labels))

# Save token and label mapping (is randomly initialized in python3)
json.dump([token2id, label2id, id2token, id2label], open("model.map", "w"))

# Read pre-trained word2vec vectors
#vectors = resources.get_vectors(vocab, token2id, "zh-Gigaword-300.txt")
vectors = resources.get_vectors(vocab, token2id, "zh-gw300_intersect.w2v")
emb_dim = vectors.shape[1]  # Word embedding dimensions

# Initialize datasets
X, y = convert_data(trainset)
dev_X, dev_y = convert_data(devset, nclasses=y.shape[1])
test_X, test_y = convert_data(testset, nclasses=y.shape[1])
blind_X, blind_y = convert_data(blindset, nclasses=y.shape[1])

## Define model
batch_size = 80

# was cheating: X, y, dev_X, dev_y = shift(X, y, dev_X, dev_y, val_size=None, shuffle=True)

for nexp in range(5):
    # Repeat experiment
Beispiel #2
0
	labels.add(label)

print ("Vocabulary:", len(vocab))
print ("Classes:", len(labels))

token2id = dict((t, i+1) for i, t in enumerate(vocab))
label2id = dict((t, i) for i, t in enumerate(labels))
id2token = dict((i+1, t) for i, t in enumerate(vocab))
id2label = dict((i, t) for i, t in enumerate(labels))

# Save token and label mapping (is randomly initialized in python3)
json.dump([token2id, label2id, id2token, id2label], open("model.map","w"))

# Read pre-trained word2vec vectors
#vectors = resources.get_vectors(vocab, token2id, "zh-Gigaword-300.txt")
vectors = resources.get_vectors(vocab, token2id, "data/GoogleNews-vectors-negative300.bin")
emb_dim = vectors.shape[1] # Word embedding dimensions

# Initialize datasets
X, y = convert_data(trainset)
dev_X, dev_y = convert_data(devset, nclasses=y.shape[1])
test_X, test_y = convert_data(testset, nclasses=y.shape[1])

## Define model
batch_size = 80

X, y, dev_X, dev_y = shift(X, y, dev_X, dev_y, val_size=None, shuffle=True)

for nexp in range(5):
	# Repeat experiment
	inlayer1 = Input(shape=(max_len,))