예제 #1
0
 def test_simple(self):
     tree = functions.create_huffman_tree({
         'x': 8,
         'y': 6,
         'z': 5,
         'w': 4,
         'v': 3
     })
     expect = (('z', 'y'), (('v', 'w'), 'x'))
     self.assertEqual(expect, tree)
 def test_same_count(self):
     tree = functions.create_huffman_tree({"x": 1, "y": 2, "z": 3})
     # Order of the same items are not defined.
     self.assertTrue((("x", "y"), "z") == tree or ("z", ("x", "y")) == tree)
 def test_simple(self):
     tree = functions.create_huffman_tree({"x": 8, "y": 6, "z": 5, "w": 4, "v": 3})
     expect = (("z", "y"), (("v", "w"), "x"))
     self.assertEqual(expect, tree)
 def test_empty(self):
     with self.assertRaises(ValueError):
         functions.create_huffman_tree({})
예제 #5
0
n_vocab = len(word2index)

print('n_vocab: %d' % n_vocab)
print('data length: %d' % len(dataset))

if args.model == 'skipgram':
    train_model = skip_gram
elif args.model == 'cbow':
    train_model = continuous_bow
else:
    raise Exception('Unknown model type: {}'.format(args.model))

model = chainer.FunctionSet(embed=F.EmbedID(n_vocab, args.unit), )

if args.out_type == 'hsm':
    tree = F.create_huffman_tree(counts)
    model.l = F.BinaryHierarchicalSoftmax(args.unit, tree)
    loss_func = model.l
elif args.out_type == 'ns':
    cs = [counts[w] for w in range(len(counts))]
    model.l = F.NegativeSampling(args.unit, cs, 20)
    loss_func = model.l
elif args.out_type == 'original':
    model.l = F.Linear(args.unit, n_vocab)
    loss_func = lambda h, t: F.softmax_cross_entropy(model.l(h), t)
else:
    raise Exception('Unknown output type: {}'.format(args.out_type))

if args.gpu >= 0:
    model.to_gpu()
예제 #6
0
 def test_empty(self):
     with self.assertRaises(ValueError):
         functions.create_huffman_tree({})
예제 #7
0
 def test_same_count(self):
     tree = functions.create_huffman_tree(
         {'x': 1, 'y': 2, 'z': 3})
     # Order of the same items are not defined.
     self.assertTrue((('x', 'y'), 'z') == tree or
                     ('z', ('x', 'y')) == tree)
예제 #8
0
 def test_simple(self):
     tree = functions.create_huffman_tree(
         {'x': 8, 'y': 6, 'z': 5, 'w': 4, 'v': 3})
     expect = (('z', 'y'), (('v', 'w'), 'x'))
     self.assertEqual(expect, tree)
예제 #9
0
 def test_same_count(self):
     tree = functions.create_huffman_tree({'x': 1, 'y': 2, 'z': 3})
     # Order of the same items are not defined.
     self.assertTrue((('x', 'y'), 'z') == tree or ('z', ('x', 'y')) == tree)
예제 #10
0
print('n_vocab: %d' % n_vocab)
print('data length: %d' % len(dataset))

if args.model == 'skipgram':
    train_model = skip_gram
elif args.model == 'cbow':
    train_model = continuous_bow
else:
    raise Exception('Unknown model type: {}'.format(args.model))

model = chainer.FunctionSet(
    embed=F.EmbedID(n_vocab, args.unit),
)

if args.out_type == 'hsm':
    tree = F.create_huffman_tree(counts)
    model.l = F.BinaryHierarchicalSoftmax(args.unit, tree)
    loss_func = model.l
elif args.out_type == 'ns':
    cs = [counts[w] for w in range(len(counts))]
    model.l = F.NegativeSampling(args.unit, cs, 20)
    loss_func = model.l
elif args.out_type == 'original':
    model.l = F.Linear(args.unit, n_vocab)
    loss_func = lambda h, t: F.softmax_cross_entropy(model.l(h), t)
else:
    raise Exception('Unknown output type: {}'.format(args.out_type))

if args.gpu >= 0:
    model.to_gpu()