Ejemplo n.º 1
0
"can decide to use numbers as well since rent and dates etc get deleted"

#remarks=[re.sub("[^A-Za-z0-9\s]","",x) for x in remarks]

"converting sentences to list of words"
fin=[]
for i in remarks:
  f1=[]
  for j in i.split():
    f1.append(j)
  fin.append(f1)


from gensim.corpora.dictionary import Dictionary
dictionary = Dictionary(fin)
dict_fin=dict([(x[0],x[1]) for x in dictionary.iteritems()])
dict_fin[1]#also
rev_dict_fin=dict([(x[1],x[0]) for x in dictionary.iteritems()])
rev_dict_fin["also"]#1


#lots fo spelling mistakes, we can use character level model!!
from gensim.models import Word2Vec

model=Word2Vec(fin,min_count=1)
#model["also"]
model.most_similar("nyasa")
len(list(model.wv.vocab))
model.wv.get_vector("also")

"""