def neuralNet(text, model, vocabulary, classes, processed=False): if not processed: text = sp.minimize(text) vector = __vectorize([text], vocabulary) prediction = __predict(vector, model) prediction_decoded = __decodeY(prediction) polarity = __decodeLabel(prediction_decoded, classes) return polarity[0]
def dictionary(text, processed=False): if not processed: text = sp.minimize(text) dictionary = [] for [positive, negative] in dl.dictionary().values: dictionary.append([sp.getStem(positive), sp.getStem(negative)]) pos_count = 0 neg_count = 0 for [positive, negative] in dictionary: pos_count += text.count(positive) neg_count += text.count(negative) result = pos_count - neg_count if result > 0: return __POSITIVE else: return __NEGATIVE
CALIFICADO = "datasets" + os.path.sep + "train-telefonia.csv" # Obtiene los datos del csv polaridades = dl.readFile(CALIFICADO)["polaridad"].values tweets = dl.readFile(CALIFICADO)["tweet"].values #reorganizamos la dimencion de los vectorees polaridades = polaridades.reshape(len(polaridades), 1) tweets = tweets.reshape(len(tweets), 1) datos = np.concatenate((polaridades, tweets), axis=1) print(datos) processed = 0 num_saved = 0 with open('train-depurada.csv', 'w', newline='', encoding='utf-8') as csvfile: writer = csv.writer(csvfile) headers = ('polaridad', 'tweet') writer.writerow(headers) for [polaridad, texto_original] in datos: tweet_limpio = sp.minimize(texto_original) if len(tweet_limpio) > 4: #saved = db.saveTrainingData([polaridad.replace(" ", ""), tweet_limpio]) writer.writerow([polaridad.replace(" ", ""), tweet_limpio]) #if saved: #num_saved += 1 processed += 1 if processed % 10 == 0: print("{} tweet procesados".format(processed)) #print("{} tweet guardados en la base".format(num_saved))
if __name__ == '__main__': model, vocabulary, classes = sm.initNeuralNet() #Cargar datos de CSV print(__DATA_COMPLETA) excelFile = pd.read_excel(__DATA_COMPLETA) tweets=excelFile['text'].values print(tweets) #id_tweet=tweets["id_tweet"] #text=tweets["text"] #tweets = db.getAllUnprocessedTweets().values polaridad=[] num = 0 data_conlaridad=open(__DATA_POLARIDAD,"w",encoding="utf8") for text in tweets: #print(text) text = sp.minimize(text) print(text) polarityDict = sm.dictionary(text, True) polarityModel = sm.neuralNet(text, model, vocabulary, classes, True) #escribir csv polaridad.append(polarityModel) data_conlaridad.write("{};{}\n".format(polaridad[num],text)) #db.saveResult([id_tweet, polarityDict, polarityModel, text]) num += 1 if num%10 == 0: print("{} tweets procesados".format(num)) data_conlaridad.close() print("{} tweets procesados en total".format(num))