Ejemplo n.º 1
0
	def create_sets(self):
		""" Create training/test/validation set via indices """
		debug = self.debug

		if (debug):
			try:
				print "reading from file"
				totallist = self.read_from_file(self.DEBUG_SETS)
				self.trainset = totallist[0]
				self.testset = totallist[1]
			except:
				print "! Error in reading from file debug.txt. Redo create_sets"
				debug = False
		if (not debug):
			for i in range(0, len(self.tweets)):
				# Test if random number is smaller than distribution for trainset
				r_nr = random.random()
				if (r_nr < self.distribution[0]):
					self.trainset.append(i)
				else:
					self.testset.append(i)
		if (self.dump):
			totallist = []
			totallist.append(self.trainset)
			totallist.append(self.testset)
			helpers.dump_to_file(self.DEBUG_SETS, totallist)			
Ejemplo n.º 2
0
	def create_wordpostuples(self, array):
		""" Create tokens and POS tags for tweets """
		filename = self.TOPICFILE.split('.')[0]
		wordpos_filename = filename + "_wordpos.txt"

		readfromfile = self.debug
		if (readfromfile):
			try:
				self.tuples = helpers.read_from_file(wordpos_filename)
			except: 
				print "! Error in reading from file. Redo posword tuples"
				readfromfile = False

		if (not readfromfile):
			self.startFrogServer('start')			
			time.sleep(20)							# Time for startup server
			frogclient = FrogClient('localhost',self.PORTNUMBER)
			print "** START frog analysis."
			print "** Creating POS tags.. (This may take a while)"
			for item in array:
				lemmapos_array = self.frog_tweets(frogclient, item)
				self.tuples.append(lemmapos_array)	
		
			helpers.dump_to_file(wordpos_filename, self.tuples)
			self.startFrogServer('stop')			
Ejemplo n.º 3
0
	def tryout_tuples(self, array):
		self.startFrogServer('start')			
		time.sleep(15)							# Time for startup server
		frogclient = FrogClient('localhost',self.PORTNUMBER)

		print "** START frog analysis."
		print "** Creating POS tags.. (This may take a while)"
		for item in array:
			wordpos_array = self.frog_tweets(frogclient, item)
			self.tuples.append(wordpos_array)	

		helpers.dump_to_file('te-tuples.txt', self.tuples)
		self.startFrogServer('stop')			
	def __init__(self, mode, corpusfile, referencefile):
		""" Initialize tweets from files and dictionaries"""
		self.load_stopword_file()
		if '--debug' in mode:
			self.corpusfile_tweets = helpers.read_from_file("corpusfile_lda_testing.txt")
			self.referencefile_tweets = helpers.read_from_file("referencefile_lda_testing.txt")
		else:
			self.corpusfile_tweets = self.get_tweets(corpusfile)
			helpers.dump_to_file("corpusfile_lda_testing.txt", self.corpusfile_tweets)
			self.referencefile_tweets = self.get_tweets(referencefile)
			helpers.dump_to_file("referencefile_lda_testing.txt", self.referencefile_tweets)

		self.corpus = self.create_dictionary(self.corpusfile_tweets)
		self.referencecorpus = self.create_dictionary(self.referencefile_tweets)

		self.loglikelihood = self.calculate_loglikelihood(self.corpus, self.referencecorpus)
Ejemplo n.º 5
0
	def write_all_to_file(self):
		"""	Dumps all filled arrays to file """
		print "WRITE TO FILE"
		if ( self.stemmed_tweets_array ):
			helpers.dump_to_file(self.DEBUG_STEM, self.stemmed_tweets_array)
		if ( self.tokenized_tweets_array ):
			helpers.dump_to_file(self.DEBUG_TOKEN, self.tokenized_tweets_array)
		if ( self.lemmatized_tweets_array ):
			helpers.dump_to_file(self.DEBUG_LEMMA, self.lemmatized_tweets_array)
		if ( self.pos_tweets_array ):
			helpers.dump_to_file(self.DEBUG_POS, self.pos_tweets_array)
Ejemplo n.º 6
0
	def dump_classifier(self, filename):
		""" Dump classifier and scaler to file """
		dumptuple = (self.classifier, self.scaler)
		helpers.dump_to_file(filename, dumptuple)