Ejemplo n.º 1
0
# twitter_reader.save_as_csv(join(EMBEDDINGS_DIR, 'Glove twitter/glove.twitter.27B.25d_small.csv'))
#
#
# print "Reading %s" % 'glove_reader_wiki'
# glove_reader_wiki = GloveEmbeddingReader(join(EMBEDDINGS_DIR, 'Glove wiki/glove.6B.50d.txt'))
# glove_reader_wiki.read_embedding(words=words)
# glove_reader_wiki.save_as_csv(join(EMBEDDINGS_DIR, 'Glove wiki/glove.6B.50d_small.csv'))



print "Reading %s" % 'Google news W2V'
w2v_reader = W2VEmbeddingReader(join(EMBEDDINGS_DIR, 'Google news w2v/GoogleNews-vectors-negative300.bin'))
w2v_reader.read_embedding(words=words)
w2v_reader.save_as_csv(join(EMBEDDINGS_DIR, 'Google news w2v/GoogleNews-vectors-negative300_small.csv'))
#
print "Reading %s" % 'twitter_reader'
twitter_reader = GloveEmbeddingReader(join(EMBEDDINGS_DIR, 'Glove twitter/glove.twitter.27B.200d.txt'))
twitter_reader.read_embedding(words=words)
twitter_reader.save_as_csv(join(EMBEDDINGS_DIR, 'Glove twitter/glove.twitter.27B.200d_small.csv'))

print "Reading %s" % 'glove_reader_common_crawl'
glove_reader_common_crawl = GloveEmbeddingReader(join(EMBEDDINGS_DIR, 'Glove Common Crawl/glove.840B.300d.txt'))
glove_reader_common_crawl.read_embedding(words=words)
glove_reader_common_crawl.save_as_csv(join(EMBEDDINGS_DIR, 'Glove Common Crawl/glove.840B.300d_small.csv'))

print "Reading %s" % 'glove_reader_wiki'
glove_reader_wiki = GloveEmbeddingReader(join(EMBEDDINGS_DIR, 'Glove wiki/glove.6B.300d.txt'))
glove_reader_wiki.read_embedding(words=words)
glove_reader_wiki.save_as_csv(join(EMBEDDINGS_DIR, 'Glove wiki/glove.6B.300d_small.csv'))

Ejemplo n.º 2
0
        pass
        # print 'error'


def memory_usage_psutil():
    # return the memory usage in MB
    import psutil
    process = psutil.Process(os.getpid())
    mem = process.get_memory_info()[0] / float(2 ** 20)
    return mem

print len(words)

# words = {'car':0, 'flower':0, 'truck':0, 'train':0, 'glove':0}

twitter_reader = GloveEmbeddingReader('H:/Embeddings/Glove twitter/glove.twitter.27B.25d.txt')
twitter_reader.read_embedding(words=words)
glove_reader = GloveEmbeddingReader('H:/Embeddings/Glove wiki/glove.6B.50d.txt')
glove_reader.read_embedding(words=words)
glove_reader = GloveEmbeddingReader('H:/Embeddings/Glove Common Crawl/glove.6B.50d.txt')
glove_reader.read_embedding(words=words)

print "Memory usage %1.1f MB" % memory_usage_psutil()

words2 = set(twitter_reader.word2index.keys()).union(glove_reader.word2index.keys())
print len(words2)
diff = set(words.keys()).difference(words2)

words2 = {w:0 for w in words2}

print list(diff)[0:100]
Ejemplo n.º 3
0
def memory_usage_psutil():
    # return the memory usage in MB
    import psutil
    process = psutil.Process(os.getpid())
    mem = process.get_memory_info()[0] / float(2 ** 20)
    return mem

print len(words)

w2v_reader = W2VEmbeddingReader('E:/Embeddings/Google news w2v/GoogleNews-vectors-negative300.bin')
w2v_reader.read_embedding()
w2v_reader.save_as_csv('E:/Embeddings/Google news w2v/GoogleNews-vectors-negative300.txt')

print "Reading %s" % 'glove_reader_wiki'
glove_reader_wiki = GloveEmbeddingReader('E:/Embeddings/Glove wiki/glove.6B.50d.txt')
glove_reader_wiki.read_embedding(words=words)
# import cProfile
# cProfile.run('glove_reader_wiki.read_embedding(words=words)')


print "Reading %s" % 'twitter_reader'
twitter_reader = GloveEmbeddingReader('E:/Embeddings/Glove twitter/glove.twitter.27B.200d.txt')
twitter_reader.read_embedding(words=words)

print "Reading %s" % 'W2VEmbeddingReader'
w2v_reader = W2VEmbeddingReader('E:/Embeddings/Google news w2v/GoogleNews-vectors-negative300.bin')
w2v_reader.read_embedding(words=words)

print "Reading %s" % 'glove_reader_common_crawl'
glove_reader_common_crawl = GloveEmbeddingReader('E:/Embeddings/Glove Common Crawl/glove.840B.300d.txt')