def main(): wordlist = InitializeWords() tweetFile = '../dataset_raw/semeval2016-task6-trainingdata.txt' tweetTags = '../processedTweets.txt' tweetWriter = open(tweetTags, 'w') tweets = oldWork.readTweets(tweetFile) tweets = tweets[:len(tweets) - 1] tweetClasses = tweets[len(tweets) - 1] for tweet in tweets: sentence = '' for word in tweet: splitWord = word # print word if len(word) > 0 and word[0] == '#' and word != '#semst': splitWord = ParseSentence(splitWord, wordlist) # print splitWord sentence += splitWord sentence += ' ' # sentence = ' '.join(tweet[1:len(tweet)]) sentence = ' '.join(sentence.split()) tweetWriter.write(sentence + '\n')
import os import sys import IntermediateProjectWork as oldWork origTweetFile = '../dataset_raw/semeval2016-task6-trainingdata.txt' origTweets = oldWork.readTweets(origTweetFile) origTweetClasses = origTweets[len(origTweets) - 1] origTweets = origTweets[:len(origTweets) - 1] origTweetReader = open(origTweetFile) taggedTweetFile = '../tweetsTagged.txt' taggedTweetReader = open(taggedTweetFile) outputFile = '../dataset_raw/semeval2016-task6-edited-trainingdata.txt' outputWriter = open(outputFile, 'w') origLine = origTweetReader.readline() outputWriter.write(origLine) # print origTweets[0] for taggedTweet in taggedTweetReader: print taggedTweet origLine = origTweetReader.readline() # print 'yo' origLineSplit = origLine.split('\t') # print origLineSplit outputWriter.write(origLineSplit[0] + '\t' + origLineSplit[1] + '\t') # print taggedTweet line = taggedTweet.strip().split('\t') tweetString = line[0]