Beispiel #1
0
	def __init__(self,articles,average,views,pattern,cycle,plotdir,c=None):
		self.plotfolder = plotdir + "/"
		self.articles = articles
		self.average = average
		self.errors = {}
		self.pattern = pattern
		self.red = Redistributor()
		self.red.run(views,cycle,pattern,c)
		self.beta = 0
		self.gamma = 0
		self.gamma_func = [0,0]
		self.logdict = {}
Beispiel #2
0
class Analyser:
	'''Analysing the page view data'''

	def __init__(self,articles,average,views,pattern,cycle,plotdir,c=None):
		self.plotfolder = plotdir + "/"
		self.articles = articles
		self.average = average
		self.errors = {}
		self.pattern = pattern
		self.red = Redistributor()
		self.red.run(views,cycle,pattern,c)
		self.beta = 0
		self.gamma = 0
		self.gamma_func = [0,0]
		self.logdict = {}

	def run(self,overall=True,plot=False):
		'''
		feat: True if featured articles have to be considered
		hist: True if on this day articles have to be considered
		overall: True if all articles have to be estimated using the same overall parameter values
		plot: True if all articles have to be plotted
		'''
		self.calc_red()
		self.calc_params()
		self.calc_estimates(overall)
		self.calc_plot_error()

		if self.pattern.has_gamma:
			plt.plot_gamma_corr(self.articles,self.gamma_func,self.plotfolder)

		plt.plot_articles(self.average,self.articles,self.pattern,self.plotfolder,plot)

		self.make_output(overall)

	def make_output(self,overall):
		with open("plots/"+self.plotfolder+"results.txt","wb") as result:
			if overall:
				result.write("optimal c:\t{0:.4f}\n".format(self.red.c_opt)) 
				result.write("Number of articles for {1}:\t{0}\n".format(len(self.articles),self.pattern.title))
				result.write("Parameter values for articles:\n")
				result.write("Beta:\t{0:.4f}\n".format(self.beta))
				if self.pattern.has_gamma:
					result.write("Gamma:\t{0:.4f}\n".format(self.gamma))
					result.write("Gamma func:\t[{0:.4f},{1:.4f}]\n".format(self.gamma_func[0],self.gamma_func[1]))
			else:
				result.write("optimal c:\t{0:.4f}\n".format(self.red.c_opt)) 
				result.write("Number of articles for {1}:\t{0}\n".format(len(self.articles),self.pattern.title))
				result.write("Parameter values for average article:\n")
				result.write("Beta:\t{0:.4f}\n".format(self.average.beta))
				if self.pattern.has_gamma:
					result.write("Gamma:\t{0:.4f}\n".format(self.average.gamma))
					result.write("Gamma func:\t[{0:.4f},{1:.4f}]\n".format(self.average.est_gamma_func[0],self.average.est_gamma_func[1]))

			result.write("\n\n")
			for title in self.logdict:
				for err in self.logdict[title]:
					result.write("{0} {1}\n".format(title,err))
					
	def calc_red(self):
		self.average.redistribute_views(self.red.xtick)
		for art in self.articles:
			art.redistribute_views(self.red.xtick)

			### MAKING SURE THAT ARTICLES WITH red_views[self.pattern.start] = 0.0 are deleted.
			try:
				log(art.red_views[self.pattern.start])
			except ValueError as e:
				if str(e) == "math domain error":
					self.catch_error(art,"calc_redistribution")
				else:
					print str(e)

		
	def calc_params(self):
		if self.pattern.has_gamma:
			num_vars = 2
		else:
			num_vars = 1
		
		self.average.get_param(num_vars)
		for art in self.articles:
			try:
				art.get_param(num_vars)
			except ValueError as e:
				if str(e) == "math domain error":
					self.catch_error(art,"calc_params")
				else:
					raise e
			except IndexError:
				art.redistribute_views(self.red.xtick)
				try:
					art.get_param(num_vars)
				except ValueError as e:
					if str(e) == "math domain error":
						self.catch_error(art,"calc_params")
					else:
						raise e

		self.get_param()
		if self.pattern.has_gamma:
			self.get_gamma_func()

	def calc_estimates(self,overall):
		if overall:
			self.get_estimate(self.average,self.red.xtick_back)
			if self.pattern.has_gamma:
				self.get_gamma_func_estimate(self.average,self.gamma_func,self.red.xtick_back)
			for art in self.articles:
				self.get_estimate(art,self.red.xtick_back)
				if self.pattern.has_gamma:
					self.get_gamma_func_estimate(art,self.gamma_func,self.red.xtick_back)

		else:
			self.average.get_estimate(self.red.xtick_back)
			if self.pattern.has_gamma:
				self.average.get_gamma_func_estimate(self.gamma_func,self.red.xtick_back)
			for art in self.articles:
				try:
					art.get_estimate(self.red.xtick_back)
					if self.pattern.has_gamma:
						art.get_gamma_func_estimate(self.gamma_func,self.red.xtick_back)
				except ValueError as e:
					if str(e) == "math domain error":
						self.catch_error(art,"calc_estimates")

	def calc_plot_error(self):
		perc_errors = {"gamma_func": [], "params": []}
		abs_errors = {"gamma_func": [], "params": []}
		for art in self.articles:
			try:
				perc_error,abs_error = compare(art.views,art.est_params)
				perc_errors["params"].append(perc_error)
				abs_errors["params"].append(abs_error)
				if self.pattern.has_gamma:
					perc_error,abs_error = compare(art.views,art.est_gamma_func)
					perc_errors["gamma_func"].append(perc_error)
					abs_errors["gamma_func"].append(abs_error)
			except IndexError:
				self.catch_error(art,"calc_error")

		plt.plot_errors(perc_errors,abs_errors,self.plotfolder,self.pattern)
		self.errors = {"normalized": perc_errors, "absolute": abs_errors}
		
	def get_gamma_func(self):
		gamma_list = []
		v1_list = []
		for art in self.articles:
			reason = ""
			try:
				log(art.gamma)
			except ValueError as e:
				if str(e) == "math domain error":
					reason = r"$\gamma$ equals 0"
			
			try:
				log(art.views[0])
			except ValueError as e:
				if str(e) == "math domain error":
					if art.views[0] == 0 and reason == "":
						reason = r"$v_1$ equals 0"
					else:
						reason = r"$\gamma$ and $v_1$ equal 0"

			if reason != "":
				if art.link_title in self.logdict:
					self.logdict[art.link_title].append("was not included in gamma function calculation ({}).".format(reason))
				else:
					self.logdict[art.link_title] = ["was not included in gamma function calculation ({}).".format(reason)]
			else:
				# if -4 < log(art.gamma) < 0.3:
				# 	if 5 < log(art.views[0]) < 10:
				gamma_list.append(log(art.gamma))
				v1_list.append(log(art.views[0]))

		if gamma_list:
				self.gamma_func = polyfit(v1_list,gamma_list,1).tolist();

	def get_param(self):
		'''Calculate parameters:
		'''
		if self.pattern.has_gamma:
			params0 = [-.05,-1.5] # initial guess for the log of the parameters 
			output = minimize(self.beta_gamma_fit,params0)
			self.beta = exp(output.x[0])
			self.gamma = exp(output.x[1])
		else:
			param0 = -.05
			output = minimize(self.beta_fit,param0)
			self.beta = exp(output.x[0])

	def beta_gamma_fit(self,params):
		error = 0
		for art in self.articles:
			max_length = len(art.views)
			log_estimate = zeros(max_length).tolist()
			# log_estimate[:self.pattern.length*24] = [log(art.red_views[self.pattern.start])+params[0]*x for x in range(self.pattern.length*24)]
			log_estimate[self.pattern.start:self.pattern.length*24] = [log(art.red_views[self.pattern.start])+params[0]*x for x in range(self.pattern.length*24)]
			log_estimate[self.pattern.length*24:max_length] = [log(art.red_views[self.pattern.start])+params[1]+params[0]*(x-1) for x in range(self.pattern.length*24,max_length)]

			error += calc_difference(art.red_views,log_estimate)

		return error

	def beta_fit(self,param):
		error = 0
		for art in self.articles:
			max_length = len(art.views)
			log_estimate = zeros(max_length).tolist()
			log_estimate[self.pattern.start:self.pattern.start+self.pattern.length*24] = [log(art.red_views[self.pattern.start])+param*x for x in range(self.pattern.length*24)]

			error += calc_difference(art.red_views,log_estimate)

		return error

	def get_estimate(self,art,ticks):
		max_length = len(art.views)
		log_estimate = zeros(max_length).tolist()

		if self.pattern.has_gamma:
			# log_estimate[:self.pattern.length*24] = [log(art.red_views[self.pattern.start])+log(self.beta)*x for x in range(self.pattern.length*24)]
			log_estimate[self.pattern.start:self.pattern.length*24] = [log(art.red_views[self.pattern.start])+log(self.beta)*x for x in range(self.pattern.length*24)]
			log_estimate[self.pattern.length*24:max_length] = [log(art.red_views[self.pattern.start])+log(self.gamma)+log(self.beta)*(x-1) for x in range(self.pattern.length*24,max_length)]
		else:
			log_estimate[self.pattern.start:self.pattern.start+self.pattern.length*24] = [log(art.red_views[self.pattern.start])+log(self.beta)*x for x in range(self.pattern.length*24)]
		
		estimate = [exp(log_estimate[x]) for x in range(max_length)]	
		art.est_params = redistribute(ticks,estimate)

	def get_gamma_func_estimate(self,art,gamma_func,ticks):
		max_length = len(art.views)
		log_estimate = zeros(max_length).tolist()
		
		if self.pattern.has_gamma:
			# log_estimate[:self.pattern.length*24] = [log(art.red_views[self.pattern.start])+log(self.beta)*x for x in range(self.pattern.length*24)]
			log_estimate[self.pattern.start:self.pattern.length*24] = [log(art.red_views[self.pattern.start])+log(self.beta)*x for x in range(self.pattern.length*24)]
			log_estimate[self.pattern.length*24:max_length] = [log(art.red_views[self.pattern.start])+gamma_func[1]+log(art.red_views[self.pattern.start])*gamma_func[0]+log(self.beta)*(x-1) for x in range(self.pattern.length*24,max_length)]
		else:
			log_estimate = zeros(max_length).tolist()
			log_estimate[self.pattern.start:self.pattern.start+self.pattern.length*24] = [log(art.red_views[self.pattern.start])+log(self.beta)*x for x in range(self.pattern.length*24)]
		
		estimate = [exp(log_estimate[x]) for x in range(max_length)]
		art.est_gamma_func = redistribute(ticks,estimate)

	def catch_error(self,art,location):
		if "plot" in location:
			if art.link_title in self.logdict:
				self.logdict[art.link_title].append("is not included plots.")
			else:
				self.logdict[art.link_title] = ["is not included plots."]
		else: 
			idx = self.articles.index(art)
			del self.articles[idx]
			if art.link_title in self.logdict:
				self.logdict[art.link_title].append("is deleted due to an error in function {}.".format(location))
			else:
				self.logdict[art.link_title] = ["is deleted due to an error in function {}.".format(location)]