Beispiel #1
0
def testData(text):
    testData = []
    for s in nlp.tokenizationSentence(text):
        txt = nlp.lemmatization(nlp.cleanText(text))
        testData.append(txt)
    testData = nlp.tfidf(nlp.bagOfWords(testData))
    return testData
def tfidf():
    request_data = request.get_json()
    sentences = nlp.tokenizationSentence(request_data['text'])
    sent = []
    for s in sentences:
        sent.append(nlp.lemmatization(nlp.cleanText(s)))
    return jsonify(tfidf=str(nlp.tfidf(nlp.bagOfWords(sent))).replace(
        '\t', ' ').splitlines())
Beispiel #3
0
def dataPreparation():
    scraping.scraping()
    trueData = open(rootPath + "/Scraping/data/trueData.txt",
                    "r",
                    encoding="utf8")
    trueTxt = trueData.read()
    fakeData = open(rootPath + "/Scraping/data/fakeData.txt",
                    "r",
                    encoding="utf8")
    fakeTxt = fakeData.read()
    data = []
    for s in nlp.tokenizationSentence(trueTxt):
        txt = nlp.lemmatization(nlp.cleanText(s))
        if (len(txt) >= 20):
            data.append([txt, 'true'])
    for s in nlp.tokenizationSentence(fakeTxt):
        txt = nlp.lemmatization(nlp.cleanText(s))
        if (len(txt) >= 20):
            data.append([txt, 'false'])
    dataCsv = pd.DataFrame(data, columns=['news', 'class'])
    dataCsv.drop_duplicates(subset=None, inplace=True)
    dataCsv = dataCsv.sample(frac=1)
    dataCsv.to_csv(rootPath + '/FakeNews/data.csv', index=False)
def cleanText():
    request_data = request.get_json()
    return jsonify(text=nlp.cleanText(request_data['text']))