def __init__(self, dict, model_source, tag_source): utils.jieba_add_dict(dict) self.tag_repo = [] self.tags_repo = [] for line in open(tag_source): self.tags_repo.append(line) self.model = gensim.models.Word2Vec.load(model_source)
def __init__(self,dict,model_source,tag_source): utils.jieba_add_dict(dict) self.tag_repo=[] self.tags_repo=[] for line in open(tag_source): self.tags_repo.append(line) self.model = gensim.models.Word2Vec.load(model_source)
def train_word_vector(source,dict,wordvec): utils.jieba_add_dict(dict) comments_df = DataFrame.from_csv(source,sep = '\t') document = [] for line in comments_df['comment'].values: line = utils.remove_punctuation(line) cutted_line = jieba.cut(line) document.append(list(cutted_line)) model = gensim.models.Word2Vec(document) print 'saving word vector model' model.save(wordvec) return model
def train_word_vector(source, dict, wordvec): utils.jieba_add_dict(dict) comments_df = DataFrame.from_csv(source, sep='\t') document = [] for line in comments_df['comment'].values: line = utils.remove_punctuation(line) cutted_line = jieba.cut(line) document.append(list(cutted_line)) model = gensim.models.Word2Vec(document) print 'saving word vector model' model.save(wordvec) return model