def test_loading_unordered_feature_lists(tmpdir):
    d = {
        'a/N': [('f1', 1), ('f2', 2), ('f3', 3)],
        'b/N': [('f3', 3), ('f1', 1), ('f2', 2), ],
        'c/N': [('f3', 3), ('f2', 2), ('f1', 1)],
    }  # three identical vectors
    v = Vectors(d)
    filename = str(tmpdir.join('outfile.txt'))
    v.to_tsv(filename)

    v1 = v.from_tsv(filename)
    assert v.columns == v1.columns # rows can be in any order, but columns need to be sorted
    for word in d.keys():
        assert_array_equal(v.get_vector(word).A, v1.get_vector(word).A)
def write_gensim_vectors_to_tsv(model, output_path, vocab=None):
    # get word2vec vectors for each word, write to TSV
    if not vocab:
        vocab = model.vocab.keys()
    vectors = dict()

    dims = len(model[next(iter(vocab))])  # vector dimensionality
    dimension_names = ['f%02d' % i for i in range(dims)]
    for word in vocab:
        # watch for non-DocumentFeatures, these break to_tsv
        # also ignore words with non-ascii characters
        # if DocumentFeature.from_string(word).type == 'EMPTY': # todo assumes there is a PoS tag
        # logging.info('Ignoring vector for %s', word)
        # continue
        vectors[word] = zip(dimension_names, model[word])
    vectors = Vectors(vectors)
    vectors.to_tsv(output_path, gzipped=True,
                   enforce_word_entry_pos_format=True,
                   entry_filter=lambda _: True,
                   dense_hd5=True)
    del model
    return vectors