Esempio n. 1
0
def parse_all():
    """
    This method parses all used datasets whether they aren't already parsed.
    :return: None
    """
    bn2wn = build_bn2wns()

    # if Eurosense parsed sentences doesn't exists
    if not os.path.isfile(EURO_SENTENCES):
        write_file(EURO_SENTENCES, parser_eurosense(bn2wn))

    # if SEW parsed sentences doesn't exists
    if not os.path.isfile(SEW_SENTENCES):
        write_file(SEW_SENTENCES, parser_sew(bn2wn))

    # if TOM parsed sentences doesn't exists
    if not os.path.isfile(TOM_SENTENCES):
        wns2bn = build_wns2bn()
        write_file(TOM_SENTENCES, parser_tom(wns2bn))
'''

def write_csv(data, filename):
	with open(filename, 'wb') as csvfile:
		writer = csv.writer(csvfile)
		writer.writerows(data)

if __name__ == '__main__':
	# get path for all outputs
	inp, urls, tags, replyats, sentiment, trends = txu.extract_arguments(n=7)
	tweets = twu.get_input(inp)
	tweets_text = "\n".join(tweets)	
	
	# extract all URLs in the tweets and write in a file
	start = time.clock()
	txu.write_file('\n'.join(txu.get_urls(tweets_text)), urls)
	print "Time taken in extracting URLs: ", time.clock() - start

	# extract all hashtags in the tweets and write in a file
	start = time.clock()
	txu.write_file('\n'.join(txu.extract_hashtags(tweets_text)), tags)
	print "Time taken in extracting hashtags: ", time.clock() - start

	# extract all replyats in the tweets and write in a file
	start = time.clock()
	txu.write_file('\n'.join(txu.extract_replyat(tweets_text)), replyats)
	print "Time taken in extracting replyats: ", time.clock() - start

	# get sentiments in the tweets and write in a file
	start = time.clock()
	write_csv(st.get_sentiments(tweets), sentiment)
import time

import text_utils as txu
import twitter_utils as twu

'''
Implementing the first feature which would produce the total count for each word in a file ft1.txt
'''

# Calculate the time when the processing starts
start = time.clock()

inp, outp = txu.extract_arguments() 
# The mode of input can be a text file or twitter api json
tweets = '\n'.join(twu.get_input(inp))

words = txu.extract_words(tweets)
counter = txu.get_counter(words)
txu.write_file("\n".join("{} \t\t\t\t\t {}".format(k, v) for k, v in sorted(dict(counter).items())),outp)

# Calculate the time processing ends
end = time.clock()

# Print total time taken 
print "Total time taken in processing word count: ", end - start
Esempio n. 4
0
import time

import text_utils as txu
import twitter_utils as twu
'''
Implementing the first feature which would produce the total count for each word in a file ft1.txt
'''

# Calculate the time when the processing starts
start = time.clock()

inp, outp = txu.extract_arguments()
# The mode of input can be a text file or twitter api json
tweets = '\n'.join(twu.get_input(inp))

words = txu.extract_words(tweets)
counter = txu.get_counter(words)
txu.write_file(
    "\n".join("{} \t\t\t\t\t {}".format(k, v)
              for k, v in sorted(dict(counter).items())), outp)

# Calculate the time processing ends
end = time.clock()

# Print total time taken
print "Total time taken in processing word count: ", end - start
Esempio n. 5
0
def write_csv(data, filename):
    with open(filename, 'wb') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerows(data)


if __name__ == '__main__':
    # get path for all outputs
    inp, urls, tags, replyats, sentiment, trends = txu.extract_arguments(n=7)
    tweets = twu.get_input(inp)
    tweets_text = "\n".join(tweets)

    # extract all URLs in the tweets and write in a file
    start = time.clock()
    txu.write_file('\n'.join(txu.get_urls(tweets_text)), urls)
    print "Time taken in extracting URLs: ", time.clock() - start

    # extract all hashtags in the tweets and write in a file
    start = time.clock()
    txu.write_file('\n'.join(txu.extract_hashtags(tweets_text)), tags)
    print "Time taken in extracting hashtags: ", time.clock() - start

    # extract all replyats in the tweets and write in a file
    start = time.clock()
    txu.write_file('\n'.join(txu.extract_replyat(tweets_text)), replyats)
    print "Time taken in extracting replyats: ", time.clock() - start

    # get sentiments in the tweets and write in a file
    start = time.clock()
    write_csv(st.get_sentiments(tweets), sentiment)