Example #1
0
def main():
    model = FastText('model_text8.bin')

    target_words = [
        'granada', 'python', 'harmony', 'mafia', 'yoga', 'goth', 'cyberpunk',
        'nasa', 'japan', 'boolean', 'foodball', 'algorithm', 'china', 'usa',
        'internet', 'harvard', 'earth', 'horse', 'angel', 'rock'
    ]
    for t_word in target_words:
        # get embedding
        target_word_embedding = model.get_numpy_vector(t_word)
        print('Target word:', t_word)
        #print('Embedding shape:', target_word_embedding.shape)
        #print('Embedding:', target_word_embedding[0:10], '...')

        # find closest words
        closest_words = model.nearest_neighbors(t_word, k=15)
        # init array
        nn_word_embedding = np.zeros(shape=(15, 128))
        i = 0
        for word, similarity in closest_words:
            # get each word embedding
            nn_word_embedding[i] = model.get_numpy_vector(word)
            #print('Word:', word, 'Vec:', nn_word_embedding[i])
            i = i + 1
        # kmeans
        #print(nn_word_embedding.shape)
        #print(closest_words)
        cluster_model = KMeans(n_clusters=3, init='k-means++')
        prediction = cluster_model.fit_predict(nn_word_embedding)
        print(prediction)
        j = 0
        for word in closest_words:
            print('Word:', word[0], '- Cluster #%d' % (prediction[j] + 1))
            j = j + 1
Example #2
0
def text():
	model = FastText('wiki.zh.bin')
	print('load over..')
	s1 = '启航'
	s2 = '董启航'
	s3 = ' 董启文'
	print(model.nearest_neighbors('桃', k=5))
		
#text()
Example #3
0
def main():
    model = FastText('model_text8.bin')

    target_word = 'dog'

    # get embedding
    target_word_embedding = model.get_numpy_vector(target_word)
    print('Target word:', target_word)
    print('Embedding shape:', target_word_embedding.shape)
    print('Embedding:', target_word_embedding[0:10], '...')

    # find closest words
    closest_words = model.nearest_neighbors(target_word, k=15)
    for word, similarity in closest_words:
        print('Word:', word, 'similarity:', similarity)
Example #4
0
class FastTextEmbedding(Embedding):

  def __init__(self, binfile, normalize = False):
    self.file = binfile
    self.vdim = -1
    self.normalize = normalize

  def load(self):
    print('Loading fasttext model.')
    self.ftmodel = FastText()
    self.ftmodel.load_model(self.file)
    self.vdim = len(self.ftmodel['is'])
    print('Finished loading fasttext model.')
    return self

  def getVector(self, word):
    return self.ftmodel.get_numpy_vector(word, normalized = self.normalize)

  def search(self, q, topk = 4):
    raise NotImplementedError()

  def wordForVec(self, v):
    word, sim = self.ftmodel.words_for_vector(v)[0]
    return word, sim

  def containsWord(self, word):
    return True

  def vocabulary(self):
    return self.ftmodel.words

  def nearest_neighbors(self, term, n=1000):
    return self.ftmodel.nearest_neighbors(term, n)

  def all_nearest_neighbors(self, term):
    return self.nearest_neighbors(term, len(self.vocabulary()))

  def dim(self):
    return self.vdim
Example #5
0
model.skipgram(input="./clean_corpus", output='model', epoch=100, lr=0.7)
print(model.nwords)
'''
#model = fasttext.load_model('../data/model.bin')
model = FastText('../data/model.bin')


def get_set(filepath):
    result = set()
    with open(filepath, 'r') as file_in:
        for line in file_in:
            result.add(line.strip())
    return result


vocab_in = get_set("../data/vocab_in")
vocab_out = get_set("../data/vocab_out")
in2out_file = open("../data/in2out_map", 'w')
for word in vocab_out:
    candi_list = model.nearest_neighbors(word, k=10)
    mark = False
    for temp in candi_list:
        if (temp[0] in vocab_in):
            in2out_file.write("\t".join([word, temp[0]]) + "\n")
            mark = True
            break
    if (not mark):
        in2out_file.write("\t".join([word, "none"]) + "\n")

in2out_file.close()
            if word_lst[j]=='ළ':
                if len(i)<=1:
                    per_word = word_lst.copy()
                per_word[j] = 'ල'
        concat = ''
        for k in per_word:
            concat+=k
        permutated_words.append(concat)          
    return permutated_words


  
model = FastText('sinhala_all.bin')

#print(model.similarity('බල්ල', 'බල්ලා'))
sin_word = 'බළ්ළාට'
permutated_sin_word = NanaLala(sin_word)
words = model.nearest_neighbors(sin_word, k=10000)
suggested_words = []
for i in words:
    suggested_words.append(i[0])
copy_suggested_words = suggested_words.copy()
for i in suggested_words:
    if i in permutated_sin_word:
        copy_suggested_words.remove(i)
        copy_suggested_words.insert(0,i)
    
print(copy_suggested_words[:5])


Example #7
0
def evaluate():
    model = FastText("debate2vec.bin")

    #print(model.similarity('dog', 'cat'))

    pprint.pprint(model.nearest_neighbors('praxis', k=30))
Example #8
0
start = time.time()

model = FastText('output.bin')

with open("testDataMasked.txt", encoding="utf8") as f:
    contentData = f.readlines()

contentData = [x.strip() for x in contentData]
lineIndex = 1
with open("FastTextResult.txt", 'a', encoding="utf8") as fastTextResultFile:
    for line in contentData:
        tmpArray = line.split()
        vectors = []
        maskedIndex = tmpArray.index('[MASK]')

        similiarsLeft = model.nearest_neighbors(tmpArray[maskedIndex - 1],
                                                k=1000)
        if (maskedIndex < len(tmpArray) - 1):
            similiarsRight = model.nearest_neighbors(tmpArray[maskedIndex + 1],
                                                     k=1000)

        predictions = []
        if (maskedIndex < len(tmpArray) - 1):
            for simL in similiarsLeft:
                index = 0
                for simR in similiarsRight:
                    if (simL[0] == simR[0]):
                        predictions.append(simL[0])
        else:
            for simL in similiarsLeft:
                predictions.append(simL[0])
Example #9
0
                         output='skip_gram_model',
                         epoch=100,
                         lr=0.7)
print(skip_gram_model['贷款'])
# print(skip_gram_model.get_numpy_vector('贷款'))
# print(skip_gram_model.get_numpy_vector('贷款', normalized=True))

var1 = skip_gram_model.get_numpy_vector('人民币')
var2 = skip_gram_model.get_numpy_vector('贷款')
var3 = skip_gram_model.get_numpy_vector('外币')
skip_gram_model.words_for_vector(var1 + var2 - var3, k=1)

# for word in skip_gram_model.words:
#    print(word, skip_gram_model[word])

print(skip_gram_model.nearest_neighbors('贷款', k=2))

# test data is stored inside a file, use this:
# skip_gram_model.predict_proba_file('./test.txt', k=2)

print("\n")

##################
# 使用cbow模型训练 #
##################
cbow_model = FastText()
cbow_model.cbow(input='./train.txt', output='cbow_model', epoch=100, lr=0.7)
print(cbow_model['贷款'])
# print(cbow_model.get_numpy_vector('贷款'))
# print(cbow_model.get_numpy_vector('贷款', normalized=True))

def lemma(girdi):
    istek = {"text": girdi, "fields": "lemma"}
    lemmas = ((requests.post(url, istek, headers)).text)
    sonuc = lemmas.strip(punctuations)
    cikti = re.sub(r'^.*\"', '', sonuc)
    if cikti == "No_Lemma":
        cikti = girdi
    else:
        cikti == sonuc
    return cikti


# Load model and set query key
primary = (model.nearest_neighbors(key, k=25))

my_dict = dict(primary)

# Empty list for first cycle
f_cycle = []
f_second_input = []
# Create first cycle data
for word, weight in my_dict.items():
    word = word.lower()
    weight = format(weight, '.3f')
    if word != key and not word.startswith(key):
        word = lemma(word)
        f_second_input.append(word)
        # sonuc = (f"{key},{word},{weight}")
        sonuc = key, word, weight
Example #11
0
class WordModel:
    def __init__(self):
        self.model_name = None
        self.model = None
        self.model_path = None

    def get_vector(self, word):
        try:
            vector = self.model[word]
        except KeyError:
            vector = None

        return vector

    def get_full_path(self, relative_path):
        pwd = os.path.dirname(os.path.abspath(__file__))
        pwd = os.path.join(pwd, relative_path)
        return pwd

    def path_exists(self, relative_path):
        full_path = self.get_full_path(relative_path)
        return os.path.exists(full_path)

    def validate_model_path(self, path):
        if path is None or not self.path_exists(path):
            if self.model_name == 'fasttext':
                default_path = self.get_full_path(
                    'models/fasttext_skipgram_model.bin')
            else:
                default_path = self.get_full_path(
                    'models/word2vec_skipgram.w2v')

            return default_path
        else:
            path = self.get_full_path(path)

            return path

    def validate_model_name(self, name):
        if name is not None and name in ['fasttext', 'word2vec']:
            correct_name = name
        else:
            correct_name = 'fasttext'

        return correct_name

    def get_numpy_vector(self, word):
        if self.model_name == 'fasttext':
            return self.model.get_numpy_vector(word, normalized=True)
        else:
            try:
                np_vector = self.model.wv[word]
            except KeyError:
                np_vector = None

            return np_vector

    def load_model(self):
        if not os.path.exists(self.model_path):
            raise FileNotFoundError('model file not found!')
        if self.model_name == 'fasttext':
            self.model = FastText(self.model_path)
        else:
            self.model = gensim.models.Word2Vec.load(self.model_path, mmap='r')

    def get_most_similar_words(self, word, k=5):
        if self.model_name == 'fasttext':
            return self.model.nearest_neighbors(word, k=k)
        else:
            try:
                similar_words = self.model.similar_by_word(word, topn=k)
            except KeyError:
                similar_words = []

            return similar_words

    def get_words_for_vector(self, vector, k=3):
        if self.model_name == 'fasttext':
            return self.model.words_for_vector(vector, k)
        else:
            try:
                similar_words = self.model.similar_by_vector(vector, topn=k)
            except KeyError:
                similar_words = []

            return similar_words

    def similarity(self, word_1, word_2):
        if self.model_name == 'fasttext':
            return self.model.similarity(word_1, word_2)
        else:
            try:
                sim = self.model.wv.similarity(word_1, word_2)
            except KeyError:
                sim = 0
            return sim

    def word_analogies(self, words, k=3):
        word_list = words.split()
        if len(word_list) == 3:
            word_vec_1 = self.get_numpy_vector(word_list[0])
            word_vec_2 = self.get_numpy_vector(word_list[1])
            word_vec_3 = self.get_numpy_vector(word_list[2])
            word_vec_4 = word_vec_3 - word_vec_1 + word_vec_2
            return self.get_words_for_vector(word_vec_4, k)

    def odd_one_out(self, words):
        word_list = words.split()
        if len(word_list) == 4:
            scores = [0.0, 0.0, 0.0, 0.0]

            for index in range(len(word_list)):
                for another_word in word_list:
                    if another_word != word_list[index]:
                        scores[index] += self.similarity(
                            word_list[index], another_word)**2
                scores[index] = sqrt(scores[index] / 3.0)

            min_index = scores.index(min(scores))

            result = f'{word_list[min_index]}\n'
            result += '\n'

            scores, word_list = zip(*sorted(zip(scores, word_list)))
            for word, score in zip(word_list, scores):
                result += f'{word}: {score}\t'

            return result
Example #12
0
def main():
    model = FastText('model_text8.bin')

    target_word = 'deep'    
    
    # get embedding
    target_word_embedding = model.get_numpy_vector(target_word)
    print('Target word:', target_word)
    print('Embedding shape:', target_word_embedding.shape)
    print('Embedding:', target_word_embedding[0:15], '...')

    # find closest words
    closest_words = model.nearest_neighbors(target_word, k=15)
    closest_word_embeddings = []
    numw = 0
    for word, similarity in closest_words:
        print('Word:', word, 'similarity:', similarity)
        closest_word_embeddings.append(model.get_numpy_vector(word))
           
    kmeans = cluster.KMeans(n_clusters=3)
    kmeans.fit(closest_word_embeddings)
    labels = kmeans.labels_
    print ('Cluster id labels for inputted data')
    print (labels)
    
    cluster1 = []
    cluster2 = []
    cluster3 = []
    
    for i in range(0,15):
      if labels[i] == 0:
          cluster1.append(closest_words[i][0]) 
          
      if labels[i] == 1:
          cluster2.append(closest_words[i][0])
          
      if labels[i] == 2:
          cluster3.append(closest_words[i][0])
      
    print("cluster #1 : ", cluster1)
    print("cluster #2 : ", cluster2)
    print("cluster #3 : ", cluster3)
      
    
        
    target_word = 'president'

    # get embedding
    target_word_embedding = model.get_numpy_vector(target_word)
    print('Target word:', target_word)
    #print('Embedding shape:', target_word_embedding.shape)
    #print('Embedding:', target_word_embedding[0:10], '...')

    # find closest words
    closest_words = model.nearest_neighbors(target_word, k=15)
    closest_word_embeddings = []
    numw = 0
    for word, similarity in closest_words:
        print('Word:', word, 'similarity:', similarity)
        closest_word_embeddings.append(model.get_numpy_vector(word))
           
    kmeans = cluster.KMeans(n_clusters=3)
    kmeans.fit(closest_word_embeddings)
    labels = kmeans.labels_
    print ('Cluster id labels for inputted data')
    print (labels)
    
    cluster1 = []
    cluster2 = []
    cluster3 = []
    
    for i in range(0,15):
      if labels[i] == 0:
          cluster1.append(closest_words[i][0]) 
          
      if labels[i] == 1:
          cluster2.append(closest_words[i][0])
          
      if labels[i] == 2:
          cluster3.append(closest_words[i][0])
      
    print("cluster #1 : ", cluster1)
    print("cluster #2 : ", cluster2)
    print("cluster #3 : ", cluster3)
      
        
    target_word = 'self'

    # get embedding
    target_word_embedding = model.get_numpy_vector(target_word)
    print('Target word:', target_word)
    #print('Embedding shape:', target_word_embedding.shape)
    #print('Embedding:', target_word_embedding[0:10], '...')

    # find closest words
    closest_words = model.nearest_neighbors(target_word, k=15)
    closest_word_embeddings = []
    numw = 0
    for word, similarity in closest_words:
        print('Word:', word, 'similarity:', similarity)
        closest_word_embeddings.append(model.get_numpy_vector(word))
           
    kmeans = cluster.KMeans(n_clusters=3)
    kmeans.fit(closest_word_embeddings)
    labels = kmeans.labels_
    print ('Cluster id labels for inputted data')
    print (labels)
    
    cluster1 = []
    cluster2 = []
    cluster3 = []
    
    for i in range(0,15):
      if labels[i] == 0:
          cluster1.append(closest_words[i][0]) 
          
      if labels[i] == 1:
          cluster2.append(closest_words[i][0])
          
      if labels[i] == 2:
          cluster3.append(closest_words[i][0])
      
    print("cluster #1 : ", cluster1)
    print("cluster #2 : ", cluster2)
    print("cluster #3 : ", cluster3)
      
        
    target_word = 'insult'

    # get embedding
    target_word_embedding = model.get_numpy_vector(target_word)
    print('Target word:', target_word)
    #print('Embedding shape:', target_word_embedding.shape)
    #print('Embedding:', target_word_embedding[0:10], '...')

    # find closest words
    closest_words = model.nearest_neighbors(target_word, k=15)
    closest_word_embeddings = []
    numw = 0
    for word, similarity in closest_words:
        print('Word:', word, 'similarity:', similarity)
        closest_word_embeddings.append(model.get_numpy_vector(word))
           
    kmeans = cluster.KMeans(n_clusters=3)
    kmeans.fit(closest_word_embeddings)
    labels = kmeans.labels_
    print ('Cluster id labels for inputted data')
    print (labels)
    
    cluster1 = []
    cluster2 = []
    cluster3 = []
    
    for i in range(0,15):
      if labels[i] == 0:
          cluster1.append(closest_words[i][0]) 
          
      if labels[i] == 1:
          cluster2.append(closest_words[i][0])
          
      if labels[i] == 2:
          cluster3.append(closest_words[i][0])
      
    print("cluster #1 : ", cluster1)
    print("cluster #2 : ", cluster2)
    print("cluster #3 : ", cluster3)
      

        
    target_word = 'general'

    # get embedding
    target_word_embedding = model.get_numpy_vector(target_word)
    print('Target word:', target_word)
    #print('Embedding shape:', target_word_embedding.shape)
    #print('Embedding:', target_word_embedding[0:10], '...')

    # find closest words
    closest_words = model.nearest_neighbors(target_word, k=15)
    closest_word_embeddings = []
    numw = 0
    for word, similarity in closest_words:
        print('Word:', word, 'similarity:', similarity)
        closest_word_embeddings.append(model.get_numpy_vector(word))
           
    kmeans = cluster.KMeans(n_clusters=3)
    kmeans.fit(closest_word_embeddings)
    labels = kmeans.labels_
    print ('Cluster id labels for inputted data')
    print (labels)
    
    cluster1 = []
    cluster2 = []
    cluster3 = []
    
    for i in range(0,15):
      if labels[i] == 0:
          cluster1.append(closest_words[i][0]) 
          
      if labels[i] == 1:
          cluster2.append(closest_words[i][0])
          
      if labels[i] == 2:
          cluster3.append(closest_words[i][0])
      
    print("cluster #1 : ", cluster1)
    print("cluster #2 : ", cluster2)
    print("cluster #3 : ", cluster3)
      
        
    target_word = 'inclined'
    # get embedding
    target_word_embedding = model.get_numpy_vector(target_word)
    print('Target word:', target_word)
    #print('Embedding shape:', target_word_embedding.shape)
    #print('Embedding:', target_word_embedding[0:10], '...')

    # find closest words
    closest_words = model.nearest_neighbors(target_word, k=15)
    closest_word_embeddings = []
    numw = 0
    for word, similarity in closest_words:
        print('Word:', word, 'similarity:', similarity)
        closest_word_embeddings.append(model.get_numpy_vector(word))
           
    kmeans = cluster.KMeans(n_clusters=3)
    kmeans.fit(closest_word_embeddings)
    labels = kmeans.labels_
    print ('Cluster id labels for inputted data')
    print (labels)
    
    cluster1 = []
    cluster2 = []
    cluster3 = []
    
    for i in range(0,15):
      if labels[i] == 0:
          cluster1.append(closest_words[i][0]) 
          
      if labels[i] == 1:
          cluster2.append(closest_words[i][0])
          
      if labels[i] == 2:
          cluster3.append(closest_words[i][0])
      
    print("cluster #1 : ", cluster1)
    print("cluster #2 : ", cluster2)
    print("cluster #3 : ", cluster3)
      
        
    target_word = 'property'

    # get embedding
    target_word_embedding = model.get_numpy_vector(target_word)
    print('Target word:', target_word)
    #print('Embedding shape:', target_word_embedding.shape)
    #print('Embedding:', target_word_embedding[0:10], '...')

    # find closest words
    closest_words = model.nearest_neighbors(target_word, k=15)
    closest_word_embeddings = []
    numw = 0
    for word, similarity in closest_words:
        print('Word:', word, 'similarity:', similarity)
        closest_word_embeddings.append(model.get_numpy_vector(word))
           
    kmeans = cluster.KMeans(n_clusters=3)
    kmeans.fit(closest_word_embeddings)
    labels = kmeans.labels_
    print ('Cluster id labels for inputted data')
    print (labels)
    
    cluster1 = []
    cluster2 = []
    cluster3 = []
    
    for i in range(0,15):
      if labels[i] == 0:
          cluster1.append(closest_words[i][0]) 
          
      if labels[i] == 1:
          cluster2.append(closest_words[i][0])
          
      if labels[i] == 2:
          cluster3.append(closest_words[i][0])
      
    print("cluster #1 : ", cluster1)
    print("cluster #2 : ", cluster2)
    print("cluster #3 : ", cluster3)
      
        
    target_word = 'international'

    # get embedding
    target_word_embedding = model.get_numpy_vector(target_word)
    print('Target word:', target_word)
    #print('Embedding shape:', target_word_embedding.shape)
    #print('Embedding:', target_word_embedding[0:10], '...')

    # find closest words
    closest_words = model.nearest_neighbors(target_word, k=15)
    closest_word_embeddings = []
    numw = 0
    for word, similarity in closest_words:
        print('Word:', word, 'similarity:', similarity)
        closest_word_embeddings.append(model.get_numpy_vector(word))
           
    kmeans = cluster.KMeans(n_clusters=3)
    kmeans.fit(closest_word_embeddings)
    labels = kmeans.labels_
    print ('Cluster id labels for inputted data')
    print (labels)
    
    cluster1 = []
    cluster2 = []
    cluster3 = []
    
    for i in range(0,15):
      if labels[i] == 0:
          cluster1.append(closest_words[i][0]) 
          
      if labels[i] == 1:
          cluster2.append(closest_words[i][0])
          
      if labels[i] == 2:
          cluster3.append(closest_words[i][0])
      
    print("cluster #1 : ", cluster1)
    print("cluster #2 : ", cluster2)
    print("cluster #3 : ", cluster3)
      
    target_word = 'many'

    # get embedding
    target_word_embedding = model.get_numpy_vector(target_word)
    print('Target word:', target_word)
    #print('Embedding shape:', target_word_embedding.shape)
    #print('Embedding:', target_word_embedding[0:10], '...')

    # find closest words
    closest_words = model.nearest_neighbors(target_word, k=15)
    closest_word_embeddings = []
    numw = 0
    for word, similarity in closest_words:
        print('Word:', word, 'similarity:', similarity)
        closest_word_embeddings.append(model.get_numpy_vector(word))
           
    kmeans = cluster.KMeans(n_clusters=3)
    kmeans.fit(closest_word_embeddings)
    labels = kmeans.labels_
    print ('Cluster id labels for inputted data')
    print (labels)
    
    cluster1 = []
    cluster2 = []
    cluster3 = []
    
    for i in range(0,15):
      if labels[i] == 0:
          cluster1.append(closest_words[i][0]) 
          
      if labels[i] == 1:
          cluster2.append(closest_words[i][0])
          
      if labels[i] == 2:
          cluster3.append(closest_words[i][0])
      
    print("cluster #1 : ", cluster1)
    print("cluster #2 : ", cluster2)
    print("cluster #3 : ", cluster3)
      
        
    target_word = 'imprisoned'

    # get embedding
    target_word_embedding = model.get_numpy_vector(target_word)
    print('Target word:', target_word)
    #print('Embedding shape:', target_word_embedding.shape)
    #print('Embedding:', target_word_embedding[0:10], '...')

    # find closest words
    closest_words = model.nearest_neighbors(target_word, k=15)
    closest_word_embeddings = []
    numw = 0
    for word, similarity in closest_words:
        print('Word:', word, 'similarity:', similarity)
        closest_word_embeddings.append(model.get_numpy_vector(word))
           
    kmeans = cluster.KMeans(n_clusters=3)
    kmeans.fit(closest_word_embeddings)
    labels = kmeans.labels_
    print ('Cluster id labels for inputted data')
    print (labels)
    
    cluster1 = []
    cluster2 = []
    cluster3 = []
    
    for i in range(0,15):
      if labels[i] == 0:
          cluster1.append(closest_words[i][0]) 
          
      if labels[i] == 1:
          cluster2.append(closest_words[i][0])
          
      if labels[i] == 2:
          cluster3.append(closest_words[i][0])
      
    print("cluster #1 : ", cluster1)
    print("cluster #2 : ", cluster2)
    print("cluster #3 : ", cluster3)
      
    target_word = 'branches'

    # get embedding
    target_word_embedding = model.get_numpy_vector(target_word)
    print('Target word:', target_word)
    #print('Embedding shape:', target_word_embedding.shape)
    #print('Embedding:', target_word_embedding[0:10], '...')

    # find closest words
    closest_words = model.nearest_neighbors(target_word, k=15)
    closest_word_embeddings = []
    numw = 0
    for word, similarity in closest_words:
        print('Word:', word, 'similarity:', similarity)
        closest_word_embeddings.append(model.get_numpy_vector(word))
           
    kmeans = cluster.KMeans(n_clusters=3)
    kmeans.fit(closest_word_embeddings)
    labels = kmeans.labels_
    print ('Cluster id labels for inputted data')
    print (labels)
    
    cluster1 = []
    cluster2 = []
    cluster3 = []
    
    for i in range(0,15):
      if labels[i] == 0:
          cluster1.append(closest_words[i][0]) 
          
      if labels[i] == 1:
          cluster2.append(closest_words[i][0])
          
      if labels[i] == 2:
          cluster3.append(closest_words[i][0])
      
    print("cluster #1 : ", cluster1)
    print("cluster #2 : ", cluster2)
    print("cluster #3 : ", cluster3)
      
        
    target_word = 'communist'

    # get embedding
    target_word_embedding = model.get_numpy_vector(target_word)
    print('Target word:', target_word)
    #print('Embedding shape:', target_word_embedding.shape)
    #print('Embedding:', target_word_embedding[0:10], '...')

    # find closest words
    closest_words = model.nearest_neighbors(target_word, k=15)
    closest_word_embeddings = []
    numw = 0
    for word, similarity in closest_words:
        print('Word:', word, 'similarity:', similarity)
        closest_word_embeddings.append(model.get_numpy_vector(word))
           
    kmeans = cluster.KMeans(n_clusters=3)
    kmeans.fit(closest_word_embeddings)
    labels = kmeans.labels_
    print ('Cluster id labels for inputted data')
    print (labels)
    
    cluster1 = []
    cluster2 = []
    cluster3 = []
    
    for i in range(0,15):
      if labels[i] == 0:
          cluster1.append(closest_words[i][0]) 
          
      if labels[i] == 1:
          cluster2.append(closest_words[i][0])
          
      if labels[i] == 2:
          cluster3.append(closest_words[i][0])
      
    print("cluster #1 : ", cluster1)
    print("cluster #2 : ", cluster2)
    print("cluster #3 : ", cluster3)
      
    target_word = 'france'

    # get embedding
    target_word_embedding = model.get_numpy_vector(target_word)
    print('Target word:', target_word)
    #print('Embedding shape:', target_word_embedding.shape)
    #print('Embedding:', target_word_embedding[0:10], '...')

    # find closest words
    closest_words = model.nearest_neighbors(target_word, k=15)
    closest_word_embeddings = []
    numw = 0
    for word, similarity in closest_words:
        print('Word:', word, 'similarity:', similarity)
        closest_word_embeddings.append(model.get_numpy_vector(word))
           
    kmeans = cluster.KMeans(n_clusters=3)
    kmeans.fit(closest_word_embeddings)
    labels = kmeans.labels_
    print ('Cluster id labels for inputted data')
    print (labels)
    
    cluster1 = []
    cluster2 = []
    cluster3 = []
    
    for i in range(0,15):
      if labels[i] == 0:
          cluster1.append(closest_words[i][0]) 
          
      if labels[i] == 1:
          cluster2.append(closest_words[i][0])
          
      if labels[i] == 2:
          cluster3.append(closest_words[i][0])
      
    print("cluster #1 : ", cluster1)
    print("cluster #2 : ", cluster2)
    print("cluster #3 : ", cluster3)
      
    target_word = 'strict'

    # get embedding
    target_word_embedding = model.get_numpy_vector(target_word)
    print('Target word:', target_word)
    #print('Embedding shape:', target_word_embedding.shape)
    #print('Embedding:', target_word_embedding[0:10], '...')

    # find closest words
    closest_words = model.nearest_neighbors(target_word, k=15)
    closest_word_embeddings = []
    numw = 0
    for word, similarity in closest_words:
        print('Word:', word, 'similarity:', similarity)
        closest_word_embeddings.append(model.get_numpy_vector(word))
           
    kmeans = cluster.KMeans(n_clusters=3)
    kmeans.fit(closest_word_embeddings)
    labels = kmeans.labels_
    print ('Cluster id labels for inputted data')
    print (labels)
    
    cluster1 = []
    cluster2 = []
    cluster3 = []
    
    for i in range(0,15):
      if labels[i] == 0:
          cluster1.append(closest_words[i][0]) 
          
      if labels[i] == 1:
          cluster2.append(closest_words[i][0])
          
      if labels[i] == 2:
          cluster3.append(closest_words[i][0])
      
    print("cluster #1 : ", cluster1)
    print("cluster #2 : ", cluster2)
    print("cluster #3 : ", cluster3)
      
        
    target_word = 'earthly'

    # get embedding
    target_word_embedding = model.get_numpy_vector(target_word)
    print('Target word:', target_word)
    #print('Embedding shape:', target_word_embedding.shape)
    #print('Embedding:', target_word_embedding[0:10], '...')

    # find closest words
    closest_words = model.nearest_neighbors(target_word, k=15)
    closest_word_embeddings = []
    numw = 0
    for word, similarity in closest_words:
        print('Word:', word, 'similarity:', similarity)
        closest_word_embeddings.append(model.get_numpy_vector(word))
           
    kmeans = cluster.KMeans(n_clusters=3)
    kmeans.fit(closest_word_embeddings)
    labels = kmeans.labels_
    print ('Cluster id labels for inputted data')
    print (labels)
    
    cluster1 = []
    cluster2 = []
    cluster3 = []
    
    for i in range(0,15):
      if labels[i] == 0:
          cluster1.append(closest_words[i][0]) 
          
      if labels[i] == 1:
          cluster2.append(closest_words[i][0])
          
      if labels[i] == 2:
          cluster3.append(closest_words[i][0])
      
    print("cluster #1 : ", cluster1)
    print("cluster #2 : ", cluster2)
    print("cluster #3 : ", cluster3)
      
    terget_word = "zero"

    # get embedding
    target_word_embedding = model.get_numpy_vector(target_word)
    print('Target word:', target_word)
    #print('Embedding shape:', target_word_embedding.shape)
    #print('Embedding:', target_word_embedding[0:10], '...')

    # find closest words
    closest_words = model.nearest_neighbors(target_word, k=15)
    closest_word_embeddings = []
    numw = 0
    for word, similarity in closest_words:
        print('Word:', word, 'similarity:', similarity)
        closest_word_embeddings.append(model.get_numpy_vector(word))
           
    kmeans = cluster.KMeans(n_clusters=3)
    kmeans.fit(closest_word_embeddings)
    labels = kmeans.labels_
    print ('Cluster id labels for inputted data')
    print (labels)
    
    cluster1 = []
    cluster2 = []
    cluster3 = []
    
    for i in range(0,15):
      if labels[i] == 0:
          cluster1.append(closest_words[i][0]) 
          
      if labels[i] == 1:
          cluster2.append(closest_words[i][0])
          
      if labels[i] == 2:
          cluster3.append(closest_words[i][0])
      
    print("cluster #1 : ", cluster1)
    print("cluster #2 : ", cluster2)
    print("cluster #3 : ", cluster3)
      
    target_word = 'feminism'

    # get embedding
    target_word_embedding = model.get_numpy_vector(target_word)
    print('Target word:', target_word)
    #print('Embedding shape:', target_word_embedding.shape)
    #print('Embedding:', target_word_embedding[0:10], '...')

    # find closest words
    closest_words = model.nearest_neighbors(target_word, k=15)
    closest_word_embeddings = []
    numw = 0
    for word, similarity in closest_words:
        print('Word:', word, 'similarity:', similarity)
        closest_word_embeddings.append(model.get_numpy_vector(word))
           
    kmeans = cluster.KMeans(n_clusters=3)
    kmeans.fit(closest_word_embeddings)
    labels = kmeans.labels_
    print ('Cluster id labels for inputted data')
    print (labels)
    
    cluster1 = []
    cluster2 = []
    cluster3 = []
    
    for i in range(0,15):
      if labels[i] == 0:
          cluster1.append(closest_words[i][0]) 
          
      if labels[i] == 1:
          cluster2.append(closest_words[i][0])
          
      if labels[i] == 2:
          cluster3.append(closest_words[i][0])
      
    print("cluster #1 : ", cluster1)
    print("cluster #2 : ", cluster2)
    print("cluster #3 : ", cluster3)
         
    target_word = 'ideas'

    # get embedding
    target_word_embedding = model.get_numpy_vector(target_word)
    print('Target word:', target_word)
    #print('Embedding shape:', target_word_embedding.shape)
    #print('Embedding:', target_word_embedding[0:10], '...')

    # find closest words
    closest_words = model.nearest_neighbors(target_word, k=15)
    closest_word_embeddings = []
    numw = 0
    for word, similarity in closest_words:
        print('Word:', word, 'similarity:', similarity)
        closest_word_embeddings.append(model.get_numpy_vector(word))
           
    kmeans = cluster.KMeans(n_clusters=3)
    kmeans.fit(closest_word_embeddings)
    labels = kmeans.labels_
    print ('Cluster id labels for inputted data')
    print (labels)
    
    cluster1 = []
    cluster2 = []
    cluster3 = []
    
    for i in range(0,15):
      if labels[i] == 0:
          cluster1.append(closest_words[i][0]) 
          
      if labels[i] == 1:
          cluster2.append(closest_words[i][0])
          
      if labels[i] == 2:
          cluster3.append(closest_words[i][0])
      
    print("cluster #1 : ", cluster1)
    print("cluster #2 : ", cluster2)
    print("cluster #3 : ", cluster3)
      
        
    target_word = 'theory'

     # get embedding
    target_word_embedding = model.get_numpy_vector(target_word)
    print('Target word:', target_word)
    #print('Embedding shape:', target_word_embedding.shape)
    #print('Embedding:', target_word_embedding[0:10], '...')

    # find closest words
    closest_words = model.nearest_neighbors(target_word, k=15)
    closest_word_embeddings = []
    numw = 0
    for word, similarity in closest_words:
        print('Word:', word, 'similarity:', similarity)
        closest_word_embeddings.append(model.get_numpy_vector(word))
           
    kmeans = cluster.KMeans(n_clusters=3)
    kmeans.fit(closest_word_embeddings)
    labels = kmeans.labels_
    print ('Cluster id labels for inputted data')
    print (labels)
    
    cluster1 = []
    cluster2 = []
    cluster3 = []
    
    for i in range(0,15):
      if labels[i] == 0:
          cluster1.append(closest_words[i][0]) 
          
      if labels[i] == 1:
          cluster2.append(closest_words[i][0])
          
      if labels[i] == 2:
          cluster3.append(closest_words[i][0])
      
    print("cluster #1 : ", cluster1)
    print("cluster #2 : ", cluster2)
    print("cluster #3 : ", cluster3)
      
        
    target_word = 'writings'

     # get embedding
    target_word_embedding = model.get_numpy_vector(target_word)
    print('Target word:', target_word)
    #print('Embedding shape:', target_word_embedding.shape)
    #print('Embedding:', target_word_embedding[0:10], '...')

    # find closest words
    closest_words = model.nearest_neighbors(target_word, k=15)
    closest_word_embeddings = []
    numw = 0
    for word, similarity in closest_words:
        print('Word:', word, 'similarity:', similarity)
        closest_word_embeddings.append(model.get_numpy_vector(word))
           
    kmeans = cluster.KMeans(n_clusters=3)
    kmeans.fit(closest_word_embeddings)
    labels = kmeans.labels_
    print ('Cluster id labels for inputted data')
    print (labels)
    
    cluster1 = []
    cluster2 = []
    cluster3 = []
    
    for i in range(0,15):
      if labels[i] == 0:
          cluster1.append(closest_words[i][0]) 
          
      if labels[i] == 1:
          cluster2.append(closest_words[i][0])
          
      if labels[i] == 2:
          cluster3.append(closest_words[i][0])
      
    print("cluster #1 : ", cluster1)
    print("cluster #2 : ", cluster2)
    print("cluster #3 : ", cluster3)