def token_frequency(model_name, corpus_vec): dict_token = {} try: sep = os.sep file_output = DIR_EMBEDDING + 'frequency' + sep + 'frequency_' + model_name + '.csv' for list_tokens in corpus_vec: for token in list_tokens: if token not in [ ' ', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.' ]: if token in dict_token: value = dict_token[token] dict_token[token] = value + 1 else: dict_token[token] = 1 list_token = [{ 'token': k, 'freq': v } for k, v in dict_token.items()] df = pd.DataFrame(list_token, columns=['token', 'freq']) df.to_csv(file_output, encoding="utf-8", sep=";", index=False) except Exception as e: Utils.standard_error(sys.exc_info()) print('Error token_frequency: {0}'.format(e)) return dict_token
def import_words_corpus(self): """ :Version: 1.0 :Author: Edwin Puertas This function import corpus in spanish and english from SemEval-2018 AIT DISC. :param lang: language :type lang: Text :rtype: Object :return: Object embedding """ result = [] try: file_es = 'SemEval-2018_AIT_DISC_ES.csv' file_en = 'SemEval-2018_AIT_DISC_EN.csv' print('Loading.... {0} corpus'.format(file_es if self.lang == 'es' else file_en)) if self.lang == 'es': corpus = self.text_analysis.import_corpus(file=file_es) else: corpus = self.text_analysis.import_corpus(file=file_en) result = [i[1] for i in corpus] except Exception as e: Utils.standard_error(sys.exc_info()) print('Error import_words_corpus: {0}'.format(e)) return result
def get_features(self, messages, model_type='11111', binary_vad='0000'): try: # W: Word, S:Syllable, F: Frequency Phoneme, S: One/All Phoneme # '1111', '1110', '1101', '1100', '1011', '1010', '1001', '1000', # '0111', '0110', '0101', '0100', '0011', '0010' word_features = self.get_feature_word(messages) syllable_features = self.get_feature_syllable(messages) phoneme_frequency = self.get_frequency_phoneme(messages) one_syllable = self.get_feature_phoneme(messages) all_syllable = self.get_feature_phoneme(messages, syllable=True) vad_features = self.get_feature_vad(messages, binary=binary_vad) result = np.zeros((len(messages), 0), dtype="float32") if int(model_type[0]) == 1: result = np.append(result, word_features, axis=1) elif int(model_type[1]) == 1: result = np.append(result, syllable_features, axis=1) elif int(model_type[2]) == 1: result = np.append(result, phoneme_frequency, axis=1) elif int(model_type[3]) == 1: result = np.append(result, one_syllable, axis=1) elif int(model_type[4]) == 1: result = np.append(result, all_syllable, axis=1) result = np.append(result, vad_features, axis=1) return result except Exception as e: Utils.standard_error(sys.exc_info()) print('Error get_features: {0}'.format(e)) return None
def __init__(self, lang='es', text_analysis=None): try: if text_analysis is None: self.ta = TextAnalysis(lang=lang) else: self.ta = text_analysis file_lexicon = DIR_INPUT + 'NRC-VAD-Lexicon.txt' file_word_embedding_en = DIR_MODELS + 'word_embedding_en.model' file_word_embedding_es = DIR_MODELS + 'word_embedding_es.model' file_syllable_embedding_en = DIR_MODELS + 'syllable_embedding_en.model' file_syllable_embedding_es = DIR_MODELS + 'syllable_embedding_es.model' file_phoneme_embedding_en = DIR_MODELS + 'phoneme_embedding_en.model' file_phoneme_embedding_es = DIR_MODELS + 'phoneme_embedding_es.model' print('Loading Lexicons and Embedding.....') if lang == 'es': epi = epitran.Epitran('spa-Latn') lexicon = self.ta.import_lexicon_vad(file_lexicon, lang=lang) word_embedding = Word2Vec.load(file_word_embedding_es) syllable_embedding = Word2Vec.load(file_syllable_embedding_es) phoneme_embedding = Word2Vec.load(file_phoneme_embedding_es) else: epi = epitran.Epitran('eng-Latn') lexicon = self.ta.import_lexicon_vad(file_lexicon, lang=lang) word_embedding = Word2Vec.load(file_word_embedding_en) syllable_embedding = Word2Vec.load(file_syllable_embedding_en) phoneme_embedding = Word2Vec.load(file_phoneme_embedding_en) self.epi = epi self.lexicon = lexicon self.word_embedding = word_embedding self.syllable_embedding = syllable_embedding self.phoneme_embedding = phoneme_embedding except Exception as e: Utils.standard_error(sys.exc_info()) print('Error FeatureExtraction: {0}'.format(e))
def dependency_child(self, text): result = [] try: doc = self.analysis_pipe(text.lower()) for token in doc: item = { 'chunk': token.text, 'text': token.text, 'pos_': token.pos_, 'dep_': token.dep_, 'tag_': token.tag_, 'head_text': token.head.text, 'head_pos': token.head.pos_, 'children': None } if len(list(token.children)) > 0: item['children'] = [{ 'child': child, 'pos_': child.pos_, 'dep_': child.dep_, 'tag_': child.tag_, 'head.text': child.head.text, 'head.pos_': child.head.pos_ } for child in token.children] result.append(item) except Exception as e: Utils.standard_error(sys.exc_info()) print('Error dependency_child: {0}'.format(e)) return result
def transform(self, list_messages): try: result = self.get_features(list_messages) return result except Exception as e: Utils.standard_error(sys.exc_info()) print('Error transform: {0}'.format(e))
def get_frequency_phoneme(self, messages): try: counter = 0 model = self.phoneme_embedding index2phoneme = list(model.wv.index2word) num_features = len(index2phoneme) msg_feature_vec = np.zeros((len(messages), num_features), dtype="float32") for msg in tqdm(messages): # print('Msg: {0}'.format(msg)) feature_vec = np.zeros(num_features, dtype="float32") list_syllable = [ token['syllables'] for token in self.ta.tagger(msg) if token['syllables'] is not None ] for syllable in list_syllable: for s in syllable: syllable_phonetic = self.epi.transliterate( s, normpunc=True) if syllable_phonetic in index2phoneme: index = index2phoneme.index(syllable_phonetic) value = feature_vec[index] feature_vec[index] = value + 1 msg_feature_vec[counter] = feature_vec counter += 1 return msg_feature_vec except Exception as e: Utils.standard_error(sys.exc_info()) print('Error get_frequency_phoneme: {0}'.format(e)) return None
def get_feature_syllable(self, messages, syllable_binary='11'): try: counter = 0 model = self.syllable_embedding num_features = model.vector_size index2phoneme_set = set(model.wv.index2word) msg_feature_vec = np.zeros((len(messages), num_features), dtype="float32") for msg in tqdm(messages): num_phonemes = 1 feature_vec = [] # print('Msg: {0}'.format(msg)) list_syllable = [ token['syllables'] for token in self.ta.tagger(msg) if token['syllables'] is not None ] for syllable in list_syllable: for s in syllable: syllable_phonetic = self.epi.transliterate( s, normpunc=True) if syllable_phonetic in index2phoneme_set: vec = model.wv[syllable_phonetic] feature_vec.append(vec) num_phonemes += 1 feature_vec = np.array(feature_vec, dtype="float32") feature_vec = np.sum(feature_vec, axis=0) feature_vec = np.divide(feature_vec, num_phonemes) msg_feature_vec[counter] = feature_vec counter += 1 return msg_feature_vec except Exception as e: Utils.standard_error(sys.exc_info()) print('Error get_feature_syllable: {0}'.format(e)) return None
def get_feature_word(self, messages): try: counter = 0 model = self.word_embedding num_features = model.vector_size index2word_set = set(model.wv.index2word) msg_feature_vec = np.zeros((len(messages), num_features), dtype="float32") for msg in tqdm(messages): num_words = 1 feature_vec = [] list_words = [token['text'] for token in self.ta.tagger(msg)] for word in list_words: if word in index2word_set: vec = model.wv[word] feature_vec.append(vec) else: feature_vec.append( np.zeros(num_features, dtype="float32")) num_words += 1 feature_vec = np.array(feature_vec, dtype="float32") feature_vec = np.sum(feature_vec, axis=0) feature_vec = np.divide(feature_vec, num_words) msg_feature_vec[counter] = feature_vec counter = counter + 1 return msg_feature_vec except Exception as e: Utils.standard_error(sys.exc_info()) print('Error get_feature_word: {0}'.format(e)) return None
def tagger(self, text): result = None try: list_tagger = [] doc = self.analysis_pipe(text.lower()) for token in doc: item = { 'text': token.text, 'lemma': token.lemma_, 'stem': token._.stem, 'pos': token.pos_, 'tag': token.tag_, 'dep': token.dep_, 'shape': token.shape_, 'is_alpha': token.is_alpha, 'is_stop': token.is_stop, 'is_digit': token.is_digit, 'is_punct': token.is_punct, 'syllables': token._.syllables } list_tagger.append(item) result = list_tagger except Exception as e: Utils.standard_error(sys.exc_info()) print('Error tagger: {0}'.format(e)) return result
def analysis_pipe(self, text): doc = None try: doc = self.nlp(text.lower()) except Exception as e: Utils.standard_error(sys.exc_info()) print('Error analysis_pipe: {0}'.format(e)) return doc
def part_embedding(self, model_name='part_embedding', size=150, min_count=10, window=5, sample=6e-5, negative=20, alpha=0.03, min_alpha=0.0007, syllable=True): """ :Version: 1.0 :Author: Edwin Puertas This function generated phonemes embedding in spanish and english. :param list_doc: list of documents (corpus) :type list: Text :rtype: dict :return: terms by documents """ try: start_time = time.time() corpus_vec = self.text_analysis.part_vector(self.part_corpus, syllable=syllable) model = Word2Vec(corpus_vec, cbow_mean=1, workers=self.cores - 1, size=size, min_count=min_count, window=window, sample=sample, negative=negative, alpha=alpha, min_alpha=min_alpha, iter=10) model_name = model_name + '_' + self.lang file_name = DIR_MODELS + model_name + '.model' model.save(file_name) print('Model {0} generated successful!'.format(file_name)) vocabulary = list(model.wv.vocab) print('Vocabulary: {0}'.format(vocabulary)) self.text_analysis.token_frequency(model_name=model_name, corpus_vec=corpus_vec) #Calculated Time processing t_sec = round(time.time() - start_time) (t_min, t_sec) = divmod(t_sec, 60) (t_hour, t_min) = divmod(t_min, 60) time_processing = '{} hour:{} min:{} sec'.format( t_hour, t_min, t_sec) print('Time Processing: {}'.format(time_processing)) except Exception as e: Utils.standard_error(sys.exc_info()) print('Error part_embedding: {0}'.format(e))
def stemming(self, text): try: tokens = word_tokenize(text) stemmed = [self.stemmer.stem(word) for word in tokens] text = ' '.join(stemmed) return text except Exception as e: Utils.standard_error(sys.exc_info()) print('Error stemming: {0}'.format(e)) return None
def proper_encoding(text): result = '' try: text = unicodedata.normalize('NFD', text) text = text.encode('ascii', 'ignore') result = text.decode("utf-8") except Exception as e: Utils.standard_error(sys.exc_info()) print('Error proper_encoding: {0}'.format(e)) return result
def __init__(self, lang='es', text_analysis=None): try: print('Load Machine Learning') if text_analysis is None: self.ta = TextAnalysis(lang=lang) else: self.ta = text_analysis self.features = FeatureExtraction(lang=lang, text_analysis=self.ta) except Exception as e: Utils.standard_error(sys.exc_info()) print('Error MachineLearning: {0}'.format(e))
def lemmatization(self, text): result = '' list_tmp = [] try: doc = TextAnalysis.analysis_pipe(text.lower()) for token in doc: list_tmp.append(str(token.lemma_)) result = ' '.join(list_tmp) except Exception as e: Utils.standard_error(sys.exc_info()) print('Error lemmatization: {0}'.format(e)) return result
def get_feature_phoneme(self, messages, syllable=False): try: counter = 0 model = self.phoneme_embedding num_features = model.vector_size index2phoneme_set = set(model.wv.index2word) msg_feature_vec = np.zeros((len(messages), num_features), dtype="float32") for msg in tqdm(messages): size = 1 feature_vec = [] list_syllable = [ token['syllables'] for token in self.ta.tagger(msg) if token['syllables'] is not None ] if syllable: try: first_syllable = str(list_syllable[0][0]) first_syllable = first_syllable[0] \ if (first_syllable is not None) and (len(first_syllable) > 0) else '' syllable_phonetic = self.epi.transliterate( first_syllable) if syllable_phonetic in index2phoneme_set: vec = model.wv[syllable_phonetic] feature_vec.append(vec) else: feature_vec.append( np.zeros(num_features, dtype="float32")) except Exception as e_epi: print('Error transliterate: {0}'.format(e_epi)) pass else: list_phoneme = self.epi.trans_list(msg) size = len(list_phoneme) for phoneme in list_phoneme: if phoneme in index2phoneme_set: vec = model.wv[phoneme] feature_vec.append(vec) else: feature_vec.append( np.zeros(num_features, dtype="float32")) # print('Vector: {0}'.format(feature_vec)) feature_vec = np.array(feature_vec, dtype="float32") feature_vec = np.sum(feature_vec, axis=0) feature_vec = np.divide(feature_vec, size) msg_feature_vec[counter] = feature_vec counter += 1 return msg_feature_vec except Exception as e: Utils.standard_error(sys.exc_info()) print('Error get_feature_phoneme: {0}'.format(e)) return None
def get_feature_vad(self, messages, binary='0000'): try: counter = 0 num_features = 4 msg_feature_vec = np.zeros((len(messages), num_features), dtype="float32") for msg in tqdm(messages): dict_vad = self.get_vad(msg) v = dict_vad['valence'] a = dict_vad['arousal'] d = dict_vad['dominance'] vad = dict_vad['vad'] row = [] if binary == '0001': row = [0.0, 0.0, 0.0, vad] elif binary == '0010': row = [0.0, 0.0, d, 0.0] elif binary == '0011': row = [0.0, 0.0, d, vad] elif binary == '0100': row = [0.0, a, 0.0, 0.0] elif binary == '0101': row = [0.0, a, 0.0, vad] elif binary == '0110': row = [0.0, a, d, 0.0] elif binary == '0111': row = [0.0, a, d, vad] elif binary == '1000': row = [v, 0.0, 0.0, 0.0] elif binary == '1001': row = [v, 0.0, 0.0, vad] elif binary == '1010': row = [v, 0.0, d, 0.0] elif binary == '1011': row = [v, 0.0, d, vad] elif binary == '1100': row = [v, a, 0.0, 0.0] elif binary == '1101': row = [v, a, 0.0, vad] elif binary == '1110': row = [v, a, d, 0.0] elif binary == '1111': row = [v, a, d, vad] elif binary == '0000': row = [0.0, 0.0, 0.0, 0.0] msg_feature_vec[counter] = row counter = counter + 1 return msg_feature_vec except Exception as e: Utils.standard_error(sys.exc_info()) print('Error get_feature_vad: {0}'.format(e)) return None
def stopwords(text): result = '' try: nlp = Spanish() if TextAnalysis.lang == 'es' else English() doc = nlp(text) token_list = [token.text for token in doc] sentence = [] for word in token_list: lexeme = nlp.vocab[word] if not lexeme.is_stop: sentence.append(word) result = ' '.join(sentence) except Exception as e: Utils.standard_error(sys.exc_info()) print('Error stopwords: {0}'.format(e)) return result
def dependency(self, text): result = [] try: doc = self.analysis_pipe(text.lower()) doc_chunks = list(doc.noun_chunks) for chunk in doc_chunks: item = { 'chunk': chunk, 'text': chunk.text, 'root_text': chunk.root.text, 'root_dep': chunk.root.dep_ } result.append(item) except Exception as e: Utils.standard_error(sys.exc_info()) print('Error dependency: {0}'.format(e)) return result
def words_embedding(self, model_name='word_embedding', size=300, min_count=50, window=5, sample=6e-5, negative=20, alpha=0.03, min_alpha=0.0007): try: start_time = time.time() corpus_vec = self.text_analysis.sentences_vector(self.corpus) model = Word2Vec(corpus_vec, cbow_mean=1, workers=self.cores - 1, size=size, min_count=min_count, window=window, sample=sample, negative=negative, alpha=alpha, min_alpha=min_alpha, iter=10) model_name = model_name + '_' + self.lang file_name = DIR_MODELS + model_name + '.model' model.save(file_name) print('Model {0} generated successful!'.format(model_name)) vocabulary = list(model.wv.vocab) print('Vocabulary: {0}'.format(vocabulary)) self.text_analysis.token_frequency(model_name=model_name, corpus_vec=corpus_vec) # Calculated Time processing t_sec = round(time.time() - start_time) (t_min, t_sec) = divmod(t_sec, 60) (t_hour, t_min) = divmod(t_min, 60) time_processing = '{} hour:{} min:{} sec'.format( t_hour, t_min, t_sec) print('Time Processing: {}'.format(time_processing)) except Exception as e: Utils.standard_error(sys.exc_info()) print('Error words_embedding: {0}'.format(e))
def get_similarity(self, model_name): dict_vocabulary = {} try: file_model = "{0}{1}_{2}.model".format(DIR_MODELS, model_name, self.lang) model = Word2Vec.load(file_model, mmap=None) vocabulary = list(model.wv.vocab) for i in vocabulary: dict_vocabulary[i] = model.most_similar(i) if i != '': print('Token: {0}\nMost Similar:'.format(i)) for j in model.most_similar(i): print(j) print(vocabulary) except Exception as e: Utils.standard_error(sys.exc_info()) print('Error get_similarity: {0}'.format(e)) return dict_vocabulary
def delete_special_patterns(text): result = '' try: text = re.sub(r'\©|\×|\⇔|\_|\»|\«|\~|\#|\$|\€|\Â|\�|\¬', ' ', text) # Elimina caracteres especilaes text = re.sub(r'\,|\;|\:|\!|\¡|\’|\‘|\”|\“|\"|\'|\`', ' ', text) # Elimina puntuaciones text = re.sub(r'\}|\{|\[|\]|\(|\)|\<|\>|\?|\¿|\°|\|', ' ', text) # Elimina parentesis text = re.sub(r'\/|\-|\+|\*|\=|\^|\%|\&|\$|\.', ' ', text) # Elimina operadores text = re.sub(r'\b\d+(?:\.\d+)?\s+', ' ', text) # Elimina número con puntuacion result = text.lower() except Exception as e: Utils.standard_error(sys.exc_info()) print('Error delete_special_patterns: {0}'.format(e)) return result
def part_vector(self, list_text, syllable=True, size_syllable=0): result = [] try: for text in list_text: doc = self.analysis_pipe(text.lower()) for stm in doc.sents: stm = str(stm).rstrip() stm = self.clean_text(stm) if stm != '': print('Sentence: {0}'.format(stm)) if syllable: list_syllable = [ token['syllables'] for token in self.tagger(stm) if token['syllables'] is not None ] list_syllable_phonetic = [] for syllable in list_syllable: n = len( syllable ) if size_syllable == 0 else size_syllable for s in list_syllable[:n]: syllable_phonetic = self.epi.transliterate( s, normpunc=True) if syllable_phonetic is not [ ' ', '', '\ufeff', '1' ]: list_syllable_phonetic.append( syllable_phonetic) result.append(list_syllable_phonetic) print('vector: {0}'.format(list_syllable_phonetic)) else: list_phonemes = self.epi.trans_list(stm, normpunc=True) list_phonemes = [ i for i in list_phonemes if i is not [' ', '', '\ufeff', '1'] ] result.append(list_phonemes) print('Vector: {0}'.format(list_phonemes)) except Exception as e: Utils.standard_error(sys.exc_info()) print('Error phonemes_vector: {0}'.format(e)) return result
def dependency_all(self, text): result = [] try: doc = self.analysis_pipe(text.lower()) for chunk in doc.noun_chunks: item = { 'chunk': chunk, 'text': chunk.root.text, 'pos_': chunk.root.pos_, 'dep_': chunk.root.dep_, 'tag_': chunk.root.tag_, 'lemma_': chunk.root.lemma_, 'is_stop': chunk.root.is_stop, 'is_punct': chunk.root.is_punct, 'head_text': chunk.root.head.text, 'head_pos': chunk.root.head.pos_, 'children': [{ 'child': child, 'pos_': child.pos_, 'dep_': child.dep_, 'tag_': child.tag_, 'lemma_': child.lemma_, 'is_stop': child.is_stop, 'is_punct': child.is_punct, 'head.text': child.head.text, 'head.pos_': child.head.pos_ } for child in chunk.root.children] } result.append(item) except Exception as e: Utils.standard_error(sys.exc_info()) print('Error dependency_all: {0}'.format(e)) return result
def import_corpus(self, file, sep=';', name_id="id", name_text="text"): result = [] try: count = 0 file = DIR_INPUT + file df = pd.read_csv(file, sep=sep) df.dropna(inplace=True) df = df[[name_id, name_text]].values.tolist() for row in tqdm(df): id = row[0] text = str(row[1]) if len(text) > 0 or text != '': result.append([id, text]) count = count + 1 print('# Sentence: {0}'.format(count)) except Exception as e: Utils.standard_error(sys.exc_info()) print('Error import_corpus: {0}'.format(e)) return result
def load_sapcy(self, lang): result = None try: if lang == 'es': result = spacy.load('es_core_news_md', disable=['ner']) else: result = spacy.load('en_core_web_md', disable=['ner']) stemmer_text = Steaming(lang) # initialise component syllables = SpacySyllables(result) emoji = Emoji(result) result.add_pipe(syllables, after="tagger") result.add_pipe(emoji, first=True) result.add_pipe(stemmer_text, after='parser', name='stemmer') print('Language: {0}\nText Analysis: {1}'.format( lang, result.pipe_names)) except Exception as e: Utils.standard_error(sys.exc_info()) print('Error load_sapcy: {0}'.format(e)) return result
def plot(model_name, size=15): try: print('Plot {0} embedding...'.format(model_name)) sep = os.sep #Creates and TSNE model and plots it file_model = DIR_MODELS + model_name + ".model" model = Word2Vec.load(file_model, mmap=None) labels = [] tokens = [] list_vocabulary = list(model.wv.vocab) for word in list_vocabulary: tokens.append(model[word]) labels.append(word) tsne_model = TSNE(perplexity=40, n_components=2, init='pca', n_iter=2500, random_state=23) new_values = tsne_model.fit_transform(tokens) x = [] y = [] for value in new_values: x.append(value[0]) y.append(value[1]) plt.figure(figsize=(size, size)) for i in range(len(x)): plt.scatter(x[i], y[i], marker='X', color='blue') plt.annotate(labels[i], xy=(x[i], y[i]), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom') file_output = DIR_EMBEDDING + sep + 'images' + sep + model_name + '.png' plt.savefig(file_output) plt.show() except Exception as e: Utils.standard_error(sys.exc_info()) print('Error plot: {0}'.format(e))
def import_dataset(file, **kwargs): result = None try: print('Loading dataset {0}...'.format(file)) setting = {} mini_size = kwargs.get('mini_size') if type( kwargs.get('mini_size')) is int else 2 sep = ';' if type(kwargs.get('sep')) is str else kwargs.get('sep') setting['url'] = kwargs.get('url') if type( kwargs.get('url')) is bool else False setting['mention'] = kwargs.get('mention') if type( kwargs.get('mention')) is bool else False setting['emoji'] = kwargs.get('emoji') if type( kwargs.get('emoji')) is bool else False setting['hashtag'] = kwargs.get('hashtag') if type( kwargs.get('hashtag')) is bool else False setting['lemmatize'] = kwargs.get('lemmatizer') if type( kwargs.get('lemmatizer')) is bool else False setting['stopwords'] = kwargs.get('stopwords') if type( kwargs.get('stopwords')) is bool else False data = [] file_path = DIR_INPUT + file raw_data = pd.read_csv(file_path, sep=sep, encoding='UTF-8') for i, row in raw_data.iterrows(): text = TextAnalysis.clean_text(row['Tweet'], **setting) len_text = len(text.split(' ')) if len_text > mini_size: tag = int(row['Intensity']) value = 0 if tag > 0: value = 1 elif tag < 0: value = -1 elif tag == 0: value = 0 data.append([text, value]) result = pd.DataFrame(data, columns=['message', 'valence']) except Exception as e: Utils.standard_error(sys.exc_info()) print('Error import_dataset: {0}'.format(e)) return result
def sentences_vector(self, list_text): result = [] try: setting = { 'url': True, 'mention': True, 'emoji': False, 'hashtag': True, 'stopwords': True } for text in tqdm(list_text): text = self.clean_text(text, **setting) if text is not None: doc = self.analysis_pipe(text) if doc is not None: vector = [i.text for i in doc] result.append(vector) except Exception as e: Utils.standard_error(sys.exc_info()) print('Error sentences_vector: {0}'.format(e)) return result