Beispiel #1
0
	def run(self, message):

		if not cherrypy.engine.state == cherrypy.engine.states.STARTED:
			return

		"""
		check if we need to update the config
		"""

		message = TextMessage(message.__str__())

		""" first check memcache """
		if not self.memcache is None:
			salt = hashlib.md5(message.__str__()).hexdigest()
			m = self.memcache.get(salt)

			if m:
				bytes = m

				if self.multiplex:
					if self.multiplex_current == self.multiplex_amount:
						""" send the message """

						message = dict(message=self.multiplex_container)
						bytes = map(ord, bsonlib.dumps(message)).__str__()

						for i in client:
							i.send(bytes)

						try:
							inspect.currentframe().f_back.f_locals['self'].provider(bytes)
						except:
							pass

						self.multiplex_current = 0
						self.multiplex_container = []

					else:
						""" store the message """
						#self.multiplex_container.append(m)
						self.multiplex_current += 1

				else:
					for i in client:
						i.send(bytes)

					try:
						inspect.currentframe().f_back.f_locals['self'].provider(bytes)
					except:
						pass				

				_time.sleep(self.caller.freq)
				return


		if len(re.findall(r'interop', message.__str__())) > 0:
			m = json.loads(message.__str__())
			is_json = True

		else:
			literal = ast.literal_eval(message.__str__())

			"""
			ensure the message fits in
			"""

			m = bsonlib.loads(bytearray(literal).__str__())
			is_json = False

		if self._objs == '':
			self._objs = [globals()[i]() for i in m['packet'] if i in OBJS]

		p = m['packet']

		"""
		if no limit is set
		set the limit to the default
		"""
		if not 'limit' in dir(p):
			m['limit'] = 20
		else:
			m['limit'] = p['limit']

		c = 0
		i_m = m

		for i in self._objs:
			try:
				i.storage = self.storage
				i.logger = self.logger

				if c == 0:
					m = i.run(m)
				else:
					m = i.run(self._append(m, p))
			except:
				i.log()
				c += 1

			c += 1

		if self.storage.union_table:
			mp = m
			m = i_m

			_OL_DB = self.storage.get()['db']
			_OL_TABLE = self.storage.get()['table']
			self.storage.set('table', self.storage.union_table)

			c = 0
			for i in self._objs:
				try:
					i.storage = self.storage
					i.logger = self.logger

					if c == 0:
						m = i.run(m)
					else:
						m = i.run(self._append(m, p))
				except:
					i.log()
					c += 1

				c += 1

			m = list(set(m + mp))
		else:
			pass

		"""
		if we dont have a confidence value by now 
		append one to all before filtering
		"""
		try:
			if not 'confidence' in m[0].keys():
				m = [dict(i.items() + [('confidence', 1)]) for i in m]

			"""
			by now m should be a nodecollection.
			we must filter this to only the needed
			amount of node:ws
			"""
			if type(m) is list:
				m = self._filter(m)

		except:
			pass


		"""
		add any join table if set
		also look for id if available
		if it isnt use "table_name"_id for 
		match
		"""
		if self.storage.join_table and type(m) is list:
			_OL_DB = self.storage.get()['db']
			_OL_TABLE = self.storage.get()['table']
			_OL_JOIN_TABLE = self.storage.join_table

			if _OL_JOIN_TABLE in getattr(_OL_DB, 'tables'):
				if self.storage.join_on:
					_OL_JOIN_ON = self.storage.join_on
				else:
					_OL_JOIN_ON = _OL_TABLE

				for i in range(0, len(m)):
					queries = []
					queries.append(getattr(getattr(_OL_DB, _OL_JOIN_TABLE), _OL_JOIN_ON) == m[i]['id'])
					query = reduce(lambda a,b:(a&b),queries)
					row = _OL_DB(query).select()

					try:
						row = row.as_list()[0]

						for j in row.items():
							""" prepend ambigious columns with join table name """

							if j[0] in m[i].keys():
								continue

							m[i][j[0]] = j[1]

					except:
						pass

			else:
				self.logger.append(dict(message="Could not find join table", object=self.__str__()))

		"""
		omit any fields
		that need to be
		erased
		"""
		if self.storage.omitlist:
			for i in range(0, len(m)):
				for j in self.storage.omitlist:
					if j == 'confidence':
						continue

					del m[i][j]

		"""
		if this is a downstream
		request simply return it
		"""
		if self.config['downstream']:
			return m

		"""
		is this an upstream
		request?
		then bind a socket
		to the request, 
		and listen for
		the responses
		when all responses
		are fulfilled, return
		"""
		if self.config['upstream']:
			faddrs = []
			cluster = self.config['cluster']
			addr = self.config['dispatcher_address']
			timeout = int(self.config['dispatcher_timeout']);
			port = int(self.config['dispatcher_port'])
			print "Upstreaming to other servers"

			sock = socket.socket()
			sock.bind((addr, port))
			sock.listen(5)

			try:
				start = time_.time()
				while True:
					now = time_.time()

					if now - start > timeout:
						break

					if len(faddrs) == len(cluster):
						break

					client, addr = sock.accept()

					if not addr in self.config.cluster:
						continue

					## dont do it twice
					if addr in faddrs:
						continue

					faddrs.append(addr)
					client.send(m['packet'])

					message_ = client.recv(20024)

					literal = ast.literal_eval(message_.__str__())

					"""
					ensure the message fits in
					"""

					m_ = bsonlib.loads(bytearray(literal).__str__())

					"""
					now merge both m_ and m
					"""
			except:
				self.logger.append(dict(message="Unable to bind socket to upstream", object=self.__str__()))


		"""
		if results are met
		we need to run the
		provider. First get the client
		this message was received from
		"""

		client = cherrypy.engine.publish('get-client', self.caller.unique).pop()

		try:
			for k in range(0, len(m)):
				if isinstance(m[k], dict):
					for k1, v1 in m[k].iteritems():
						if isinstance(m[k][k1], str):
							m[k][k1] = unicode(m[k][k1])	

			m = dict(data=m, status=u'ok')
		except:
			m = dict(data=[], status=u'empty')
	
		if is_json:
			bytes = json.dumps(m)
		else:
			bytes = map(ord, bsonlib.dumps(m)).__str__()

		if self.multiplex:
			if self.multiplex_current == self.multiplex_amount:
				""" send the message """

				message = dict(message=self.multiplex_container)
				bytes = map(ord, bsonlib.dumps(message)).__str__()

				for i in client:
					i.send(bytes)

				try:
					inspect.currentframe().f_back.f_locals['self'].provider(bytes)
				except:
					pass

				self.multiplex_current = 0
				self.multiplex_container = []

			else:
				""" store the message """
				self.multiplex_container.append(m)
				self.multiplex_current += 1

		else:
			for i in client:
				i.send(bytes)	

			try:
				inspect.currentframe().f_back.f_locals['self'].provider(bytes)
			except:
				pass


		""" stream output into another file """
		""" we recognize the following file types: """
		""" .php, .py, .pl, .rb, and .txt """
		""" first four will invoke their interpreter. .txt will """
		""" merely dump contents """
		""" fix for subprocess """
		""" NOTE: interop data in this case should always be JSON to  """
		""" let ease of integration. For this cause it is best to use 'JSON' as """
		""" interop when using stream_into """
		if self.config['stream_into']:
			type = re.findall("\.(\w+)$", self.config['stream_into'])
			if len(type) > 0:
				inter = type[0]

			try:
				if inter == 'php':
					os.system("php {0} '{1}'".format(os.path.abspath(self.config['stream_into']), json.dumps(m)))

				elif inter == 'py':
					os.system("python {0} '{1}'".format(self.config['stream_into'], json.dumps(m)))

				elif inter == 'pl':
					os.system("perl {0} '{1}'".format(self.config['stream_into'], json.dumps(m)))

				elif inter == 'rb':
					os.system("ruby {0} '{1}'".format(self.config['stream_into'], json.dumps(m)))

			except:
				self.logger.append(dict(message="Unable to call ScriptingEngine for {1}".format(inter), object=self.__str__()))



		if not self.memcache is None:
			salt = hashlib.md5(message.__str__()).hexdigest()
			self.memcache.set(salt, bytes)

		_time.sleep(self.caller.freq)