Example #1
0
	def create_dictionary(self,feature_to_do):
		self.dictionary = {}
		previously_searched = {}
		different_searched = {}
		
		stop_words = methods.load_stop_words()
		
		tweet_list = self.tweets.get_training_base()
		
		for i in feature_to_do:
			self.dictionary[self.features[i]] = set([])
			for tweet in tweet_list:
				values = tweet.get(self.features[i])
				if isinstance(values,list):
					for value in values:
						if value not in stop_words:
							if (self.features[i],value) not in previously_searched:
								previously_searched[(self.features[i],value)] = 0
							self.dictionary[self.features[i]].add(value)
							previously_searched[(self.features[i],value)] += 1
				else:
					if (self.features[i],values) not in previously_searched:
						previously_searched[(self.features[i],values)] = 0
					previously_searched[(self.features[i],values)] += 1
					self.dictionary[self.features[i]].add(values)
		self.tweets.set_previously_searched(dict(previously_searched))
Example #2
0
File: tweet.py Project: pazzini/svm
	def split_text(self,text,min_tam_word):
		stop_words = methods.load_stop_words()
		filtered_text = text
		if filtered_text != None:
			temp = filtered_text.split()
			if min_tam_word > 0:
				filtered_text = []
				for word in temp:
					processed_word = methods.process_text(word,min_tam_word)
					if processed_word != "" and (processed_word not in stop_words):
						filtered_text.append(processed_word)
			else:
				filtered_text = temp
		return filtered_text
Example #3
0
	def create_base_list(self,tweet_list,feature_to_do):
		stop_words = methods.load_stop_words()
		label = []
		value = []
		for tweet in tweet_list:
			i = 0
			temp_label = 0
			#temp_value = {}
			temp_value = []
			found = False
			for j in feature_to_do:
				values = tweet.get(self.features[j])
				
				for word in self.dictionary[self.features[j]]:
					if isinstance(values,list):
						p = 0
						if word in values:
							if not found:
								if tweet.get_manual_classification() == "important":
									temp_label = 1
								else:
									temp_label = -1
								found = True
							p = self.idf(self.tweets.get_training_base(),tweet,word,j)
							#temp_value[i] = p
						temp_value.append(p)
					else:
						if word == values:
							if not found:
								if tweet.get_manual_classification() == "important":
									temp_label = 1
								else:
									temp_label = -1
								found = True
							p = self.idf(self.tweets.get_training_base(),tweet,word,j)
							#temp_value[i] = p
						temp_value.append(p)
					i += 1
				
			if temp_label != 0:
				value.append(tuple(temp_value))
				label.append(temp_label)
		return label,value