Beispiel #1
0
def emb():
    meta = Meta(
        id='test_1B_3k_6d_2q'
    )
    pq = PQ(
        vectors=3,
        dim=6,
        qdim=2,
        # 1 0 0 | 1 0 0
        # 0 1 1 | 0 0 0
        # 0 0 0 | 0 1 0
        centroids=3,
        indexes=np.array([  # vectors x qdim
            [0, 1],
            [1, 0],
            [2, 2]
        ]).astype(np.uint8),
        codes=np.array([  # qdim x centroids x chunk
            [[1, 0, 0], [0, 1, 1], [0, 0, 0]],
            [[0, 0, 0], [1, 0, 0], [0, 1, 0]],
        ]).astype(np.float32),
    )
    vocab = Vocab(
        words=['a', 'b', 'c'],
        counts=[1, 2, 3]
    )
    return Navec(meta, vocab, pq)
Beispiel #2
0
def pack_(vocab, pq, id):
    meta = Meta(id)

    with open_bin(vocab) as file:
        vocab = Vocab.from_file(file)

    with open_bin(pq) as file:
        pq = PQ.from_file(file)

    path = 'navec_%s.tar' % id
    log_info('Dumping %s', path)
    Navec(meta, vocab, pq).dump(path)
Beispiel #3
0
def pack(args):
    meta = Meta(args.id)

    with open_bin(args.vocab) as file:
        vocab = Vocab.from_file(file)

    with open_bin(args.pq) as file:
        pq = PQ.from_file(file)

    path = 'navec_%s.tar' % args.id
    log_info('Dumping %s', path)
    Navec(meta, vocab, pq).dump(path)
Beispiel #4
0
def quantize_(emb, output, subdim, sample, iterations):
    with open(emb) as file:
        log_info('Load %s', emb)
        words, weights = parse_glove_emb(file)
        log_info(
            'PQ, subdim: %d, sample: %d, iterations: %d',
            subdim, sample, iterations
        )
        pq = quantize__(weights, subdim, sample, iterations)
        vocab = Vocab(words)
        log_info('Dump %s', output)
        Navec(vocab, pq).dump(output)