Example #1
0
def freq_baseline(wordcloud=False, venn=False):
    basicConfig(format='%(levelname)s %(message)s', level=INFO)
    nouns_files = check_for_nouns_files()
    for filename in nouns_files:
        nouns = post_processing(filename)

        freq = FreqDist(nouns)
        frequencies = freq.most_common()

        minimum_count = 10

        candidate_aspect_terms = []
        for_venn = []
        for frequency in frequencies:
            if frequency[1] >= minimum_count:
                string = '{}: {}'.format(frequency[0], frequency[1])
                for_venn.append(frequency[0])
                candidate_aspect_terms.append(string)

        corpus = str.split(filename, '_')[0]
        candidate_file = 'aspect_terms_{}'.format(filename)
        pp.save_to_file(candidate_file, candidate_aspect_terms)

        if venn:
            venn_file = 'venn_{}'.format(filename)
            pp.save_to_file(venn_file, for_venn)

        if wordcloud:
            words_for_wordcloud(corpus, frequencies, nouns)
Example #2
0
def words_for_wordcloud(corpus, frequencies, nouns):
    terms = []
    for frequency in frequencies:
        if frequency[1] >= 10:
            terms.append(frequency[0])

    words = []
    for noun in nouns:
        if noun in terms:
            words.append(noun)
    words = ' '.join(words)
    pp.save_to_file('wordcloud_{}.txt'.format(corpus), words)
Example #3
0
def write_nouns_to_file(result, corpus, tag, filename):
    words = []
    for r in result:
        if r[1] == tag:
            words.append(r[0])

    print('\nwriting nouns to file...')

    if len(str.split(filename, '/')) > 1:
        filename = str.split(filename, '/')[len(str.split(filename, '/')) - 1]

    pp.save_to_file('{}_nouns_{}'.format(corpus, filename), words)
Example #4
0
def write_semicolon_nouns(tokens_list, tagger, corpus, corpus_tag, filename):
    nouns_per_tweet = []
    result_csv = []
    for tokens in tokens_list:
        nouns_per_tweet.append(tagger.tag(tokens))

    for itens in nouns_per_tweet:
        aux = []
        for item in itens:
            if corpus_tag in item:
                aux.append(item[0])
        result_csv.append(';'.join(aux))
    pp.save_to_file('{}_semicolon_{}'.format(corpus, filename), result_csv)