Python Parser.load_pickled_data Examples

Programming Language: Python

Namespace/Package Name: parser

Class/Type: Parser

Method/Function: load_pickled_data

Examples at hotexamples.com: 1

Python Parser.load_pickled_data - 1 examples found. These are the top rated real world Python examples of parser.Parser.load_pickled_data extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Parser(30)

__init__(15)

parse_operand(15)

scan(6)

parse_from_re(6)

output(3)

read_next_case(3)

get_dataset(3)

open(3)

get_organization(3)

stem_sentence_porter(3)

generate_ci_model_from_xml_string(2)

search(2)

generate_values(2)

getArqs(2)

get_datasource(2)

printer(2)

processInput(2)

parse_member_declaration(2)

parse_css(2)

get_file_name(2)

compute(2)

parse_files(2)

get_links(2)

get_serverdata_for_connection(2)

get_yr_parser(2)

validate_and_parse(2)

_advance(2)

_commandRecognized(2)

get_values_for_query(2)

get_user(2)

get_today_tasks(2)

get_slices(2)

get_simple_indicators(2)

start_paser(2)

get_meta_data(2)

get_profit_rate(2)

Parse(2)

add_visitor(2)

get_observations(2)

parseFAZ(1)

parse_class_or_interface_declaration(1)

inflect(1)

parseURLText(1)

parseQuery(1)

parseMessage(1)

inputMonth(1)

is_number(1)

parseIntputString(1)

parseDataset(1)

Example #1

Show file

File: svm.py Project: taehoonl/HashtagWeather

class SVM:

	def __init__(self):
		self.parser = Parser()
		self.weather_models = []
		self.time_models = []
		self.is_weather_model = None
		self.default_data_features = []
		self.data = None
		self.index = None
		self.index_map = None
		self.threshold = 0.7
		self.weather_labels = ["clouds", "cold", "dry", "hot", "humid", "hurricane",
							   "I can't tell", "ice", "other", "rain", "snow", "storms",
							   "sun", "tornado", "wind"]

	def initialize_svm(self):
		# get file path, depending on the location from which the class is called
		cwd = os.getcwd()
		cwd = cwd.split('/')
		if cwd[len(cwd)-1] == 'src':
			index_file_path = '../data/svm/data.index'
			map_file_path = '../data/svm/data.map'
			models_file_path = '../data/svm/models/'
		else:
			index_file_path = 'data/svm/data.index'
			map_file_path = 'data/svm/data.map'
			models_file_path = 'data/svm/models/'
		self.load_all_models(models_file_path)
		if self.index is None:
			index = self.parser.load_pickled_data(index_file_path)
			index_map = self.parser.load_pickled_data(map_file_path)
			self.index = index
			self.index_map = index_map

	def load_all_models(self, path):

		filepath = path + 's5.model0.01'
		model = self.read_model(filepath)
		self.is_weather_model = model

		for i in range(4):
			filepath = path + 'new_c_w{}.model1'.format(i+1)
			model = self.read_model(filepath)
			self.time_models.append(model)

		for i in range(15):
			# filepath = path + 'new_c_k{}.model0.1'.format(i+1)
			filepath = path + 'k{}.model0.1'.format(i+1)
			model = self.read_model(filepath)
			self.weather_models.append(model)

	def load_data(self, rel_path):
		'''
		Loads data from a SVMLight file using the svmlight_loader
		library: https://github.com/mblondel/svmlight-loader
		Returns a list of the dataset and the labels
		'''
		abs_path = os.path.abspath(rel_path)

		(x_train, labels) = svml.load_svmlight_file(abs_path)
		return [x_train, labels]

	def combine_data(self, data):
		'''
		Returns a list that combines the point coordinates
		and their labels
		'''
		print 'Combining data...'
		combined_data = []
		labels = data[1]
		data_list = np.array(data[0].todense()).tolist()
		for i in range(len(labels)):
			combined_data.append([labels[i], data_list[i]])
			if i%100 == 0:
				print 'Combined {} data'.format(i)
		return combined_data

	def format_data(self, data):
		formatted_data = []
		print 'Formatting data...'

		default_data_features = []

		for i in range(len(data[0][1])):
			default_data_features.append((i+1, 0))

		data_num = 0
		for datum in data:
			nonzero_elements = np.nonzero(datum[1])[0]
			data_features = default_data_features[:]
			# pdb.set_trace()
			for e in nonzero_elements:
				data_features[e-1] = (e+1, datum[1][e])

			if data_num%100 == 0:
				print 'Formatted {} data'.format(data_num)
			data_num += 1
			formatted_data.append((datum[0], data_features))
		return formatted_data

	def format_for_svmlight(self, data):
		combined_data = self.combine_data(data)
		formatted_data = self.format_data(combined_data)
		return formatted_data

	def format_tweet_for_svmlight(self, tweet):
		data_features = []
		word_dict = {}
		for word in tweet:
			try:
				word_dict[word] += 1
			except:
				word_dict[word] = 1
		for word in tweet:
			try:
				idx = self.index_map[word]
				data_features.append((idx, word_dict[word]))
			except:
				pass
		return [(1, data_features)]


	def read_model(self, rel_path):
		abs_path = os.path.abspath(rel_path)
		model = svmlight.read_model(abs_path)
		return model

	def train(self, data, t=0, C=1.0):
		model = svmlight.learn(data, type="classifier", t=t, C=C)
		return model

	def get_weather_tweets(self, tweets):
		weather_tweets = []
		if not isinstance(tweets, list):
			tweets = [tweets]
		count = 0
		for tweet in tweets:
			count += 1
			formatted_tweet = self.parser.stem_sentence_porter(tweet)
			formatted_tweet = self.format_tweet_for_svmlight(formatted_tweet)
			c = svmlight.classify(self.is_weather_model, formatted_tweet)
			if count%100 == 0:
				print count
			if c[0] < 0:
				weather_tweets.append(tweet)
		return weather_tweets

	def classify(self, model, data):
		classifications = svmlight.classify(model, data)
		return classifications

	def classify_tweet(self, tweet):
		try:
			tweet = self.parser.stem_sentence_porter(tweet)
			formatted_tweet = self.format_tweet_for_svmlight(tweet)
			time_class = []
			weather_class = []
			for model in self.time_models:
				time_class.append(self.classify(model, formatted_tweet)[0])
			for model in self.weather_models:
				weather_class.append(self.classify(model, formatted_tweet)[0])
			return weather_class, time_class
		except:
			print 'You have yet to load the models.'
			print 'Please load all models with load_all_models()'
			return None

	def classify_tweets(self, tweets, formatted_tweets):
		weather_class = []
		tweet_dict = {}
		count = 0
		for model in self.weather_models:
			scores = self.classify(model, formatted_tweets)
			weather_class.append(scores)
			for i in range(len(scores)):
				if scores[i] > self.threshold:
					try:
						tweet_dict[self.weather_labels[count]].append(tweets[i])
					except:
						tweet_dict[self.weather_labels[count]] = [tweets[i]]
			count += 1
		results = []
		for i in range(len(weather_class)):
			results.append([sum(weather_class[i]), self.weather_labels[i]])
		return results, tweet_dict