Example #1
0
def parserNegativeVerb():
    with open("data/negative_verbs_dump.txt", "r") as f:
        words_to_check = f.readlines()
        words_to_check = [x.strip() for x in words_to_check]

    sentences_containing_words = []
    for sentence in sentences:
        for word in words_to_check:
            if sentence.lower().find(word) != -1:
                sentences_containing_words.append(sentence)
                break

    with open('data/negative_verbs_sentences_dump.txt', 'w+') as f:
        f.writelines("%s\n" % line for line in sentences_containing_words)

    with open('data/negative_verbs_sentences_dump.txt', 'r') as f:
        sentences = f.readlines()
        sentences = [x.strip() for x in sentences]

    from finnish_toolkit import parser
    mp = parser.Parser()
    parsed_sentences = []
    for i in range(30):
        sentence = sentences[i]
        parsed_sentence = mp.parse(sentence)
        parsed_sentences.append(parsed_sentence)
    
    import os
    dump_folder = './data/visualizations/negative_verbs'
    if not os.path.exists(dump_folder):
        os.makedirs(dump_folder)

    for i, parsed_sentence in enumerate(parsed_sentences):
        parser.visualize(
            parsed_sentence, '{}/{}.html'.format(dump_folder, i))
Example #2
0
def parse_most_frequent_named_entity_sentences():
    with open('data/five_diseases_sentences_dump.txt', 'r') as f:
        sentences = f.readlines()
        sentences = [x.strip() for x in sentences]

    from finnish_toolkit import parser

    mp = parser.Parser()

    parsed_sentences = []
    for i in range(30):
        sentence = sentences[i]
        parsed_sentence = mp.parse(sentence)
        parsed_sentences.append(parsed_sentence)
    
    import os
    dump_folder = './data/visualizations/five_diseases'
    if not os.path.exists(dump_folder):
        os.makedirs(dump_folder)

    for i, parsed_sentence in enumerate(parsed_sentences):
        parser.visualize(
            parsed_sentence, '{}/{}.html'.format(dump_folder, i))
Example #3
0
def getTopicVariation():

    topics=[]
    for fileN in listdir("./post_process/time_lemmatized"):
        if fileN.endswith(".txt"):
            filename="./post_process/time_lemmatized/{}".format(fileN)
        else:
            continue

        with open(filename) as f:
            print(filename)
            sentencespre = f.read()
            f.close()
        
        topic=overallTopic(sentencespre)
        print(topic)
        topics.append(topic)

    return topics
    

    sentiment=[]
    for fileN in listdir("./post_process/time"):
        if fileN.endswith(".txt"):
            filename="./post_process/time/{}".format(fileN)
        else:
            continue

        with open(filename) as f:
            print(filename)
            sentencespre = f.read()
            f.close()

        sentiment_item=overallSentiment(sentencespre)
        print(sentiment_item)
        sentiment.append(sentiment_item)

    return sentiment


    with open('data/five_diseases_sentences_dump.txt', 'r') as f:
        sentences = f.readlines()
        sentences = [x.strip() for x in sentences]

    from finnish_toolkit import parser

    mp = parser.Parser()

    parsed_sentences = []
    for i in range(30):
        sentence = sentences[i]
        parsed_sentence = mp.parse(sentence)
        parsed_sentences.append(parsed_sentence)
    
    import os
    dump_folder = './data/visualizations/five_diseases'
    if not os.path.exists(dump_folder):
        os.makedirs(dump_folder)

    for i, parsed_sentence in enumerate(parsed_sentences):
        parser.visualize(
            parsed_sentence, '{}/{}.html'.format(dump_folder, i))


# sent = overallSentiment(sentencespre)
# overall_sent = [overallSentiment(sentence) for sentence in sentences]
# sentences=fetchSentences()
# histogramNER()
# commoncooccurred = mostCooccuring()
# print(overallTopic(". ".join(sent) for sent in sentences))
# analyzeFiles()
# translationFunc()
# getTopicVariation()
# getSentVariation()
# translateDivided()
# extract_sentences_of_most_frequent_named_entities()
# parse_most_frequent_named_entity_sentences()