예제 #1
0
	def __init__(self, job, eventdriver):
		# unique job name
		self.job = job

		# import data map config, such as MAP_ITORANGE
		self.config = Configuration()
		self.config.import_internal_config(job)
		self.config.print_config()

		# refresh config
		# config_listener = threading.Thread(target=self.config.update_config)
		# config_listener.setDaemon(True)
		# config_listener.start()

		self._lcurl = Lcurl()
		self._logger = Nlog()

		# event driver
		self._eventdriver = eventdriver
		self._eventdriver.add_event_listener(job, self.process)

		try:
			pull_strategy = self.config.CONFIG['GLOBAL']['JOB'][job]['PULL_STRATEGY']
			print('[PULL STRATEGY] %s' % pull_strategy)
			if pull_strategy is not None:
				pull_method = eval('self.pull_from_' + pull_strategy)
				setattr(self, 'pull', pull_method)
		except Exception as e:
			raise InternalError('[pusher constructor ERROR]', e)
예제 #2
0
파일: company2b.py 프로젝트: Dkner/mr2p
 def upload_company_extend_info(self, corp_id, post_data):
     if not corp_id or not post_data:
         return False
     url = self.config.CONFIG['GLOBAL']['API'][
         'COMPANY_INFO_API'] + '/upload_company_extend_info_v2?type=200&scheme=yunying&company_id=' + corp_id
     curl = Lcurl()
     r = curl.post(url=url, data=json.dumps(post_data))
     if not r:
         return False
     ret = r.json()
     if ret['status'] == '1':
         return True
     else:
         return False
예제 #3
0
파일: company2data.py 프로젝트: Dkner/mr2p
 def upload_company_increment(self, document):
     if not document:
         return False
     url = self.config.CONFIG['GLOBAL']['API']['COMPANY_INCREMENT_API']
     curl = Lcurl()
     data = {"document": document}
     r = curl.post(url=url,
                   data=json.dumps(data),
                   headers={"Content-Type": "application/json"})
     if not r:
         return False
     ret = r.json()
     if ret['code'] == 0:
         return True
     else:
         return False
예제 #4
0
파일: pusher.py 프로젝트: Dkner/mr2p
    def __init__(self, job):
        # unique job name
        self.job = job

        # import data map config, such as MAP_ITORANGE
        global_config = Configuration()
        self.config = copy.deepcopy(global_config)
        self.config.import_internal_config(job)
        self.config.print_config()

        self._lcurl = Lcurl()
        self._loop = None
예제 #5
0
class pusher(object):
	def __init__(self, job, eventdriver):
		# unique job name
		self.job = job

		# import data map config, such as MAP_ITORANGE
		self.config = Configuration()
		self.config.import_internal_config(job)
		self.config.print_config()

		# refresh config
		# config_listener = threading.Thread(target=self.config.update_config)
		# config_listener.setDaemon(True)
		# config_listener.start()

		self._lcurl = Lcurl()
		self._logger = Nlog()

		# event driver
		self._eventdriver = eventdriver
		self._eventdriver.add_event_listener(job, self.process)

		try:
			pull_strategy = self.config.CONFIG['GLOBAL']['JOB'][job]['PULL_STRATEGY']
			print('[PULL STRATEGY] %s' % pull_strategy)
			if pull_strategy is not None:
				pull_method = eval('self.pull_from_' + pull_strategy)
				setattr(self, 'pull', pull_method)
		except Exception as e:
			raise InternalError('[pusher constructor ERROR]', e)

	# data pull strategy
	def pull_from_mongo(self):
		(mongo_client, mongo_database) = self.connect_mongo(self.config.CONFIG['GLOBAL']['JOB'][self.job]['MONGO_DB'])
		if not mongo_client:
			raise NetworkError("cannot connect to mongo server")
		mongo_collection = eval('mongo_database.' + self.config.CONFIG['GLOBAL']['JOB'][self.job]['MONGO_COLLECTION'])
		# total number to process
		count = mongo_collection.count()
		# loop start and loop range, related to the memory consumed
		start, step = 0, 10
		while start < count:
			this_loop_records = mongo_collection.find().limit(step).skip(start)
			for i in this_loop_records:
				yield i
			start += step
		mongo_client.close()
		raise FinishedError('finished')

	def pull_from_redis(self):
		push_redis_key = self.config.CONFIG['GLOBAL']['JOB'][self.job]['PUSH_REDIS_KEY']
		redis_conn = self.connect_redis()
		if not redis_conn or not redis_conn.ping():
			raise NetworkError("cannot connect to redis server")
		while True:
			record = redis_conn.blpop(push_redis_key, POP_TIMEOUT)
			if record is None:
				print('redis time out')
				continue
				# raise FinishedError('finished')
			data = json.loads(record[1])
			yield data

	def connect_mongo(self, dbname):
		mongo_config = self.config.CONFIG['GLOBAL']['MONGO']
		if not mongo_config or not 'host' in mongo_config or not 'port' in mongo_config or not 'user' in mongo_config or not 'password' in mongo_config:
			return False
		conn = pymongo.MongoClient(mongo_config['host'], int(mongo_config['port']))
		db = eval("conn."+dbname)
		ret = db.authenticate(mongo_config['user'], mongo_config['password'], dbname)
		if False == ret:
			return (conn, False)
		return (conn, db)

	def connect_redis(self):
		redis_config = self.config.CONFIG['GLOBAL']['REDIS']
		if not redis_config or not 'host' in redis_config or not 'port' in redis_config or not 'db' in redis_config:
			return False
		connection = redis.Redis(host=redis_config['host'], port=int(redis_config['port']), db=int(redis_config['db']), password=redis_config['password'])
		return connection

	def trans(self, input, map, exception_default=''):
		output = {}
		func_service = FUNCBOX()
		try:
			for (k, (v, func_name)) in map.items():
				kw = {"value": v, "new": input, "instance": self, "method": func_name}
				try:
					col = getattr(func_service, func_name)(**kw)
					output[k] = col
				except Exception as e:
					output[k] = exception_default
					# print('[Trans %s Error] %s' % (k,e))
			return output
		except Exception as e:
			print(e)
			return False

	@count_second
	def run(self):
		try:
			data_gen = self.pull()
			for data in data_gen:
				# self.process(data)
				self._eventdriver.send_event(TheEvent(self.job, {'data': data}))
		except NetworkError as e:
			print('Network Error: %s' % e)
		except FinishedError as e:
			print('Data Received, processing background')
			self._eventdriver.stop_util_complete()
			print('Job Done: %s' % e)
		except InternalError as e:
			print('Internal Error: %s' % e)
		except Exception as e:
			print('Unknown Error: %s' % e)
		finally:
			pass

	# replaced by pull_from_xxx
	def pull(self):
		pass

	# extended by son object
	def process(self, event):
		print(event)
		return True

	@count_second
	def mongo2redis(self):
		job_config = self.config.CONFIG['GLOBAL']['JOB'][self.job]
		redis_conn = self.connect_redis()
		(mongo_client, mongo_database) = self.connect_mongo(job_config['MONGO_DB'])
		mongo_collection = eval('mongo_database.' + job_config['MONGO_COLLECTION'])
		count = mongo_collection.count()
		count = 1000
		start, step = 0, 10
		while start < count:
			print(start)
			this_loop_records = mongo_collection.find().limit(step).skip(start)
			for i in this_loop_records:
				i['_id'] = str(i['_id'])
				redis_conn.rpush(job_config['PUSH_REDIS_KEY'], json.dumps(i))
			start += step
		mongo_client.close()

	def stat_by_redis(self, ret):
		redis_conn = self.connect_redis()
		if not redis_conn or not redis_conn.ping():
			print("cannot connect to redis server")
			return False
		push_redis_key = self.config.CONFIG['GLOBAL']['JOB'][self.job]['PUSH_REDIS_KEY']
		stat_key = push_redis_key + '_stat_' + datetime.datetime.now().strftime("%Y-%m-%d")
		is_existed = redis_conn.hgetall(stat_key)
		if is_existed:
			if ret:
				redis_conn.hincrby(stat_key, "success", 1)
			else:
				redis_conn.hincrby(stat_key, "fail", 1)
			redis_conn.hset(stat_key, "last_push", int(time.time()))
		else:
			stat_map = {
				"project": self.config.NAME,
				"task": self.job,
				"success": 0,
				"fail": 0,
				"last_push": int(time.time())
			}
			if ret:
				stat_map['success'] = 1
			else:
				stat_map['fail'] = 1
			redis_conn.hmset(stat_key, stat_map)
		return True

	def write_back_mongo(self, ret, data, flag_name):
		if not ret or not isinstance(data, dict) or not '_id' in data:
			return False
		job_config = self.config.CONFIG['GLOBAL']['JOB'][self.job]
		(mongo_client, mongo_database) = self.connect_mongo(job_config['MONGO_DB'])
		mongo_collection = eval('mongo_database.' + job_config['MONGO_COLLECTION'])
		ret = mongo_collection.update_one({'_id': ObjectId(data['_id'])}, {"$set": {flag_name: 1}})
		mongo_client.close()
		return ret

	'''
		api func start
	'''
	def get_related_corps(self, url, source):
		if not source:
			req_body = {'url':url}
		else:
			req_body = {'url':url, 'source':source}
		ret = self._lcurl.post(self.config.CONFIG['GLOBAL']['API']['TIDY_SERVER_API'] + "/tidyserver/getrelatedcorps", json.dumps(req_body), True, {"content-type":"application/json"})
		if not ret:
			return False
		try:
			ret = json.loads(ret.json())
		except Exception as e:
			print('[get_related_corps ERROR] %s' % e)
			ret = False
		if ret and not 0 == ret['error_no']:
			ret = False
		else:
			ret = ret['data']
		return ret

	def get_nace_id(self, url, source):
		if not source:
			req_body = {'url':url}
		else:
			req_body = {'url':url, 'source':source}
		ret = self._lcurl.post(self.config.CONFIG['GLOBAL']['API']['TIDY_SERVER_API'] + "/tidyserver/getnacetags", json.dumps(req_body), True, {"content-type":"application/json"})
		if not ret:
			return False
		try:
			ret = json.loads(ret.json())
		except Exception as e:
			print('[get_nace_id ERROR] %s' % e)
			return False
		if ret and not 0 == ret['error_no']:
			ret = False
		else:
			ret = ret['nace_tag']
		return ret

	def get_weixin_gzh_detail(self, src_id):
		ret = self._lcurl.post('http://ccinfo.intsig.net/ccinfo/v2/weixin_gzh', json.dumps({'weixin_gzh_id':src_id}), True)
		if not ret:
			return False
		ret = ret.json()
		return ret

	def get_hot_key_detail(self, hot_key):
		ret = self._lcurl.post('http://ccinfo.intsig.net/ccinfo/v2/hot_words', json.dumps({'word':hot_key}), True)
		if not ret:
			return False
		ret = ret.json()
		return ret

	def add_push_cnt(self, id, type):
		ret = self._lcurl.post('http://ccinfo.intsig.net/ccinfo/v2/modify_hot_words', json.dumps({'id':id, 'type':type, 'opt':"push"}), True)
		if not ret:
			return False
		ret = ret.json()
		if not ret['ret']:
			return False
		else:
			return ret

	def add_industry_push_cnt(self, industry_id):
		ret = self._lcurl.post('http://ccinfo.intsig.net/ccinfo/v2/modify_industry_tag', json.dumps({'industry_id':industry_id, 'opt':"push"}), True)
		if not ret:
			return False
		ret = ret.json()
		if not ret['ret']:
			return False
		else:
			return ret

	def add_yunying_push_target(self, post_data):
		ret = self._lcurl.post(self.config.CONFIG['GLOBAL']['API']['YUNYING_PUSH_API']+'/push/add_ccinfo_msg_target', json.dumps(post_data),True)
		if not ret:
			return False
		ret = ret.json()
		return ret

	def fuzzySuggestCorpName(self, keyword):
		if not keyword:
			return False
		url = self.config.CONFIG['GLOBAL']['API']['INTSIG_API'] + '/user/CCAppService/enterprise/advanceSearch'
		url_param = {
			'keyword': keyword,
			'start': 0
		}
		r = self._lcurl.get(url, url_param)
		if not r:
			return False
		try:
			ret = r.json()
			if ret['status'] == '1' and ret['data']['total'] > 0:
				return ret['data']['items'][0]
			else:
				return False
		except Exception as e:
			print('[fuzzySuggestCorpName ERROR] %s' % e)
			return False

	def getSummaryByName(self, name):
		if not name:
			return False
		url = self.config.CONFIG['GLOBAL']['API']['CORP_QUERY_API'] + '/CCAppService/enterprise/getSummaryByName'
		url_param = {
			'name': name
		}
		r = self._lcurl.get(url, url_param)
		if not r:
			return False
		ret = r.json()
		if ret['status'] == '1':
			return ret['data']
		else:
			return False

	def download_from_camfs(self, filename):
		if not filename:
			return False
		url = self.config.CONFIG['GLOBAL']['API']['FILE_API'] + '/download'
		r = self._lcurl.get(url,{'filename':filename}, False)
		if not r:
			return False
		return r.content

	def upload_pic_2b(self, binary_data):
		if not binary_data:
			return False
		url = self.config.CONFIG['GLOBAL']['API']['COMPANY_FILE_API'] + '/common_upload_picture?type=1&user_id=11111&client_id=1001'
		r = self._lcurl.post(url, binary_data, False)
		if not r:
			return False
		ret = r.json()
		return ret['url']
	'''