def testData(text): testData = [] for s in nlp.tokenizationSentence(text): txt = nlp.lemmatization(nlp.cleanText(text)) testData.append(txt) testData = nlp.tfidf(nlp.bagOfWords(testData)) return testData
def tfidf(): request_data = request.get_json() sentences = nlp.tokenizationSentence(request_data['text']) sent = [] for s in sentences: sent.append(nlp.lemmatization(nlp.cleanText(s))) return jsonify(tfidf=str(nlp.tfidf(nlp.bagOfWords(sent))).replace( '\t', ' ').splitlines())
def dataPreparation(): scraping.scraping() trueData = open(rootPath + "/Scraping/data/trueData.txt", "r", encoding="utf8") trueTxt = trueData.read() fakeData = open(rootPath + "/Scraping/data/fakeData.txt", "r", encoding="utf8") fakeTxt = fakeData.read() data = [] for s in nlp.tokenizationSentence(trueTxt): txt = nlp.lemmatization(nlp.cleanText(s)) if (len(txt) >= 20): data.append([txt, 'true']) for s in nlp.tokenizationSentence(fakeTxt): txt = nlp.lemmatization(nlp.cleanText(s)) if (len(txt) >= 20): data.append([txt, 'false']) dataCsv = pd.DataFrame(data, columns=['news', 'class']) dataCsv.drop_duplicates(subset=None, inplace=True) dataCsv = dataCsv.sample(frac=1) dataCsv.to_csv(rootPath + '/FakeNews/data.csv', index=False)
def cleanText(): request_data = request.get_json() return jsonify(text=nlp.cleanText(request_data['text']))