def vectorize(docs, model): try: mecab = MeCab.Tagger("-Ochasen") data = mecab.parse(docs) data = StringIO(data.replace("\"", "")) data = pd.read_csv(data, sep='\t', header=None) data = data.loc[(data[3].str.find("名詞") >= 0) & (data[3].str.find("接頭詞") < 0) & (data[3].str.find("サ変接続") < 0)] data = data[data[0].isin( pd.DataFrame.from_dict(model.wv.vocab, orient='index').index)] data = data[[0]] vec = pd.DataFrame( data.applymap(model.wv.word_vec)[0].values.mean(axis=0)) return vec except: return []