def __init__(self, task_type: TaskType, temporary_city_str=None, number=None): # 任务类型 self.task_type = task_type self.number = number if number is not None else '' self.temporary_city_str = temporary_city_str # 初始化 collections 名称 self.collection_name = self.generate_collection_name() # logger 记录日志 self.logger = get_logger("InsertBaseTask") # 数据游标偏移量,用于在查询时发生异常恢复游标位置 self.offset = 0 # 数据游标前置偏移量,用于在入库时恢复游标位置 self.pre_offset = 0 client = pymongo.MongoClient(host=config.mongo_host) # if self.task_type == TaskType.hotel: # self.db = client[config.hotel_base_task_db] # else: self.db = client[config.mongo_base_task_db] self.tasks = BaseTaskList() # 初始化建立索引 self.create_mongo_indexes()
def __init__(self, task_type: TaskType, number=None, routine=True, is_test=False): self.task_type = task_type self.number = number if number is not None else '' # logger 记录日志 self.logger = get_logger("InsertBaseTask") client = pymongo.MongoClient(host=config.mongo_host) self.base_task_db = client[config.mongo_base_task_db] self.date_task_db = client[config.mongo_date_task_db] self.is_test = is_test if self.is_test: self.date_task_db = client['Test_RoutineDateTask'] self.base_collections = self.base_task_db[self.generate_base_collections()] # 初始化 PackageInfo 类 self.package_info = PackageInfo() #删除切片周期为1的mongo文档。 if routine and is_test is False: self.delete_single_slice() # 按源生成多个 collections, 多个 tasks 队列 self.date_collections_dict = {} self.tasks_dict = {} for each_source in self.get_total_source(): self.date_collections_dict[each_source] = self.generate_date_collections(source=each_source) # 建立索引 self.create_indexes(source=each_source) # 初始化任务队列 self.tasks_dict[each_source] = DateTaskList() # 数据游标偏移量,用于在查询时发生异常恢复游标位置 self.offset = 0 # 数据游标前置偏移量,用于在入库时恢复游标位置 self.pre_offset = 0 # 用于遇到上一次无含早情况下,提示下一次再多取一次含早 self.take_one_more = 0 # self.memory_count_by_source = {}
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2017/11/30 下午2:58 # @Author : Hou Rong # @Site : # @File : generate_task_info.py # @Software: PyCharm import traceback from conn_pool import base_data_pool, source_info_pool, task_db_monitor_db_pool from mysql_execute import fetchall, fetchall_ss from logger_file import get_logger from common.sql_query import flight_base_task_query, temporary_flight_base_task_query, roundflight_base_task_query,\ hotel_base_task_query,multiflight_base_task_query,rail_base_task_query,ferries_base_task_query logger = get_logger("generate_base_task") def get_rank_result(sql): rank = {} rank_list = [ (rank, sql), ] for each_rank, each_rank_sql in rank_list: for row in fetchall(base_data_pool, each_rank_sql): iata_code = row[0] inner_order = row[1] city_id = row[2] continent_id = row[3] city_name = row[4] tri_code = row[5] if city_id in each_rank:
import datetime from urllib import parse from queue import Queue from rabbitmq import pika_send from tornado.options import define from conf import config, task_source from concurrent.futures import ThreadPoolExecutor from functools import partial from rabbitmq.producter import final_distribute, insert_mongo_data, update_running from logger_file import get_logger from rabbitmq.consumer import insert_spider_result, feed_back_date_task, slave_take_times, task_temporary_monitor, query_temporary_task, stop_temporary_task from model.TaskType import TaskType from common.InsertDateTask import InsertDateTask from common.InsertBaseTask import InsertBaseTask logger = get_logger('server') logger.info('aa') port = config.server_port define("port", default=port, help="Run server on a specific port", type=int) tornado.options.parse_command_line() class Executor(ThreadPoolExecutor): _instance = None def __new__(cls, *args, **kwargs): if not getattr(cls, '_instance', None): cls._instance = ThreadPoolExecutor(max_workers=args[0]) return cls._instance
import tornado.ioloop import tornado.web import pika from pika.adapters.tornado_connection import TornadoConnection import pymongo from conf import task_source, config from functools import partial from logger_file import get_logger from bson.objectid import ObjectId logger = get_logger('pika_send') TORNADO_PORT = 8889 # RMQ_USER = '******' # RMQ_PWD = 'password' # RMQ_HOST = 'localhost' # RMQ_PORT = 5762 # IOLOOP_TIMEOUT = 500 # holds channel objects client = pymongo.MongoClient(host=config.mongo_host) date_task_db = client[config.mongo_date_task_db] channel = None class PikaClient(object): # all the following functions precede in order starting with connect def connect(self): try:
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2017/12/1 下午4:47 # @Author : Hou Rong # @Site : # @File : mysql_execute.py # @Software: PyCharm import pymysql.cursors from logger_file import get_logger logger = get_logger("mysql_executor") def fetch_count(conn_pool, sql): try: conn = conn_pool.connection() cursor = conn.cursor() cursor.execute(sql) result = cursor.fetchone() cursor.close() return result[0] except Exception as e: logger.exception(msg="[sql error]", exc_info=e) logger.info(sql) finally: if conn: conn.close() def fetchall(conn_pool, sql): try: conn = conn_pool.connection() cursor = conn.cursor()
import toolbox.Date import init_path from collections import defaultdict import common.patched_mongo_insert from model.TaskType import TaskType from logger_file import get_logger from conf import config, task_source from model.PackageInfo import PackageInfo from toolbox.Date import date_takes from model.DateTask import DateTask from logger_file import get_logger from common.TempTask import TempTask from common.generate_task_utils import sort_date_task_cursor, decide_need_hotel_task, today_date from conf.config import multiply_times, frequency logger = get_logger('InsertDateTask') logger_2 = get_logger('Insert_process') toolbox.Date.DATE_FORMAT = '%Y%m%d' INSERT_WHEN = 2000 class DateTaskList(list): def append_task(self, task: DateTask): self.append(task.to_dict()) class InsertDateTask(object): def __init__(self, task_type: TaskType, number=None, routine=True, is_test=False): self.task_type = task_type
import gevent import random import pika_send import datetime import math import time import traceback from collections import defaultdict from functools import partial from conf.config import used_times_config, used_times_by_source as _used_times_by_source, used_times_specified as _used_times_specified from logger_file import get_logger from model.TaskType import TaskType logger = get_logger('producter') used_times_specified = _used_times_specified used_times_by_source = _used_times_by_source # total_task_level_dict = {} # collection_task_level = {} # level_dict = defaultdict(list) # distribute_result = defaultdict(dict) # collection_advance_dict = {} # date_list = [] def init_variable(_task_type): global total_task_level_dict global collection_task_level global level_dict global distribute_result
#coding:utf-8 import pymongo import datetime import init_path from conf import config from collections import defaultdict from mysql_execute import update_monitor from logger_file import get_logger from rabbitmq.supervise import get_source_info, update_dead_running, query_mongo, get_average_success_count from logger_file import get_logger from model.TaskType import TaskType from rabbitmq.pika_send import date_task_db logger = get_logger('delete_task_supervise') client = pymongo.MongoClient(host=config.mongo_host) date_task_db = client[config.mongo_date_task_db] base_task_db = client[config.mongo_base_task_db] def delete_datetask_documents(task_type): ''' 每天零点定时执行一次! 刪除已发完任务的文档,不能在监控中使用,因为会把失败的任务删掉从而被绿皮过滤掉。 :return: ''' delete_documents = defaultdict(dict) # 插入分源、分package_id,所有完成反馈任务的切片最大值信息 for package_id, source, source_info in get_source_info(task_type): if source_info['record_count'] == source_info[ 'fail_count'] + source_info['success_count']:
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2017/11/29 下午9:08 # @Author : Hou Rong # @Site : # @File : BaseDistribution.py # @Software: PyCharm import redis import json import datetime from Task import Task from logger_file import get_logger from collections import defaultdict logger = get_logger("BaseDistribution") MIN_FLIGHT_TASK_SIZE = 50000 MIN_TRAIN_TASK_SIZE = 10000 MIN_BUS_TASK_SIZE = 10000 MIN_HOTEL_TASK_SIZE = 50000 MIN_ROUNDFLIGHT_TASK_SIZE = 20000 MIN_MULTIFLIGHT_TASK_SIZE = 20000 ADJUST_FREQUENCY_RATE = 0.4 class BaseDistribution(object): def __init__(self): self.source_rate_map = {} self.m_feedback_source_vec = [] # queue
from pika import adapters import pika import datetime from rabbitmq import pika_send from logger_file import get_logger from mysql_execute import update_monitor, fetchall from conn_pool import task_db_monitor_db_pool from model.TaskType import TaskType logger = get_logger('server') logger2 = get_logger('consumer') content = [] def insert_spider_result(result): ''' 爬虫入库,可以增加mongo数据入库出现异常的处理(暂未) :param result: :return: ''' try: today = datetime.datetime.today().strftime('%Y%m%d') # for task_info in result: # task_info['update_time'] = datetime.datetime.now() for i in range(len(result)): result[i].update({ 'update_time': datetime.datetime.now().strftime('%Y%m%d%H%M%S') }) pika_send.client['case_result'][today].insert_many(result) # logger.info('tid:%s'%(task_info['tid']))
import time import hashlib import logging import datetime from logger_file import get_logger from bson.objectid import ObjectId from rabbitmq import pika_send from conf import config add_city_id = ''' ['60184','60185','60186','60187','60188','60189','60190','60191','60192','60193','60194','60195','60196','60197','60198','60199','60200','60201','60202','60203','60204','60205','60206','60207','60208','60209','60210','60211','60212','60213','60214','60215','60216','60217','60218','60219','60220','60221','60222','60223','60224','60225','60226','60227','60228','60229','60230','60231','60232','60233','60234','60235','60236','60237','60238','60239','60240','60241','60242','60243','60244','60245','60246','60247','60248','60249','60250','60251','60252','60253','60254','60255','60256','60257','60258','60259','60260','60261','60262','60263','60264','60265','60266','60267','60268','60269','60270','60271','60272','60273','60274','60275','60276','60277','60278','60279','60280','60281','60282','60283','60284','60285','60286','60287','60288','60289','60290','60291','60292','60293','60294','60295','60296','60297','60298','60299','60300','60301','60302','60303','60304','60305','60306','60307','60308','60309','60310','60311','60312','60313','60314','60315','60316','60317','60318','60319','60320','60321','60322','60323','60324','60325','60326','60327','60328','60329','60330','60331','60332','60333','60334','60335','60336','60337','60338','60339','60340','60341','60342','60343','60344','60345','60346','60347','60348','60349','60350','60351','60352','60353','60354','60355','60356','60357','60358','60359','60360','60361','60362','60363','60364','60365','60366','60367','60368','60369','60370','60371','60372','60373','60374','60375','60376','60377','60378','60379','60380','60381','60382','60383','60384','60385','60386','60387','60388','60389','60390','60391','60392','60393','60394','60395','60396','60397','60398','60399','60400','60401','60402','60403','60404','60405','60406','60407','60408','60409','60410','60411','60412','60413','60414','60415','60416','60417','60418','60419','60420','60421','60422','60423','60424','60425','60426','60427','60428','60429','60430','60431','60432','60433','60434','60435','60436','60437','60438','60439','60440','60441','60442','60443','60444','60445','60446','60447','60448','60449','60450','60451','60452','60453','60454','60455','60456','60457','60458','60459','60460','60461','60462','60463','60464','60465','60466','60467','60468','60469','60470','60471','60472','60473','60474','60475','60476','60477','60478','60479','60480','60481','60482','60483','60484','60485','60486','60487','60488','60489','60490','60491','60492','60493','60494','60495','60496','60497','60498','60499','60500','60501','60502','60503','60504','60505','60506','60507','60508','60509','60510','60511','60512','60513','60514','60515','60516','60517','60518','60519','60520','60521','60522','60523','60524','60525','60526','60527','60528','60529','60530','60531','60532','60533','60534','60535','60536','60537','60538','60539','60540','60541','60542','60543','60544','60545','60546','60547','60548','60549','60550','60551','60552','60553','60554','60555','60556','60557','60558','60559','60560','60561','60562','60563','60564','60565','60566','60567','60568','60569','60570','60571','60572','60573','60574','60575','60576','60577','60578','60579','60580','60581','60582','60583','60584','60585','60586','60587','60588','60589','60590','60591','60592','60593','60594','60595','60596','60597','60598','60599','60600','60601','60602','60603','60604','60605','60606','60607','60608','60609','60610','60611','60612','60613','60614','60615','60616','60617','60618','60619','60620','60621','60622','60623','60624','60625','60626','60627','60628','60629','60630','60631','60632','60633','60634','60635','60636','60637','60638','60639','60640','60641','60642','60643','60644','60645','60646','60647','60648','60649','60650','60651','60652','60653','60654','60655','60656','60657','60658','60659','60660','60661','60662','60663','60664','60665','60666','60667','60668','60669','60670','60671','60672','60673','60674','60675','60676','60677','60678','60679','60680','60681','60682','60683','60684','60685','60686','60687','60688','60689','60690','60691','60692','60693','60694','60695','60696','60697','60698','60699','60700','60701','60702','60703','60704','60705','60706','60707','60708','60709','60710','60711','60712','60713','60714','60715','60716','60717','60718','60719','60720','60721','60722','60723','60724','60725','60726','60727','60728','60729','60730','60731','60732','60733','60734','60735','60736','60737','60738','60739','60740','60741','60742','60743','60744','60745','60746','60747','60748','60749','60750','60751','60752','60753','60754','60755','60756','60757','60758','60759','60760','60761','60762','60763','60764','60765','60766','60767','60768','60769','60770','60771','60772','60773','60774','60775','60776','60777','60778','60779','60780','60781','60782','60783','60784','60785','60786','60787','60788','60789','60790','60791','60792','60793','60794','60795','60796','60797','60798','60799','60800','60801','60802','60803','60804','60805','60806','60807','60808','60809','60810','60811','60812','60813','60814','60815','60816','60817','60818','60819','60820','60821','60822','60823','60824','60825','60826','60827','60828','60829','60830','60831','60832','60833','60834','60835','60836','60837','60838','60839','60840','60841','60842','60843','60844','60845','60846','60847','60848','60849','60850','60851','60852','60853','60854','60855','60856','60857','60858','60859','60860','60861','60862','60863','60864','60865','60866','60867','60868','60869','60870','60871','60872','60873','60874','60875','60876','60877','60878','60879','60880','60881','60882','60883','60884','60885','60886','60887','60888','60889','60890','60891','60892','60893','60894','60895','60896','60897','60898','60899','60900','60901','60902','60903','60904','60905','60906','60907','60908','60909','60910','60911','60912','60913','60914','60915','60916','60917','60918','60919','60920','60921','60922','60923','60924','60925','60926','60927','60928','60929','60930','60931','60932','60933','60934','60935','60936','60937','60938','60939','60940','60941','60942','60943','60944','60945','60946','60947','60948','60949','60950','60951','60952','60953','60954','60955','60956','60957','60958','60959','60960','60961','60962','60963','60964','60965','60966','60967','60968','60969','60970','60971','60972','60973','60974','60975','60976','60977','60978','60979','60980','60981','60982','60983','60984','60985','60986','60987','60988','60989','60990','60991','60992','60993','60994','60995','60996','60997','60998','60999','61000','61001','61002','61003','61004','61005','61006','61007','61008','61009','61010','61011','61012','61013','61014','61015','61016','61017','61018','61019','61020','61021','61022','61023','61024','61025','61026','61027','61028','61029','61030','61031','61032','61033','61034','61035','61036','61037','61038','61039','61040','61041','61042','61043','61044','61045','61046','61047','61048','61049','61050','61051','61052','61053','61054','61055','61056','61057','61058','61059','61060','61061','61062','61063','61064','61065','61066','61067','61068','61069','61070','61071','61072','61073','61074','61075','61076','61077','61078','61079','61080','61081','61082','61083','61084','61085','61086','61087','61088','61089','61090','61091','61092','61093','61094','61095','61096','61097','61098','61099','61100','61101','61102','61103','61104','61105','61106','61107','61108','61109','61110','61111','61112','61113','61114','61115','61116','61117','61118','61119','61120','61121','61122','61123','61124','61125','61126','61127','61128','61129','61130','61131','61132','61133','61134','61135','61136','61137','61138','61139','61140','61141','61142','61143','61144','61145','61146','61147','61148','61149','61150','61151','61152','61153','61154'] ''' add_city_id = '''['50696']''' add_city_id = '''['60677']''' logger = get_logger('test2') # host = 'localhost:123456' host = '10.10.239.46:12345' # host = '10.10.110.74:123456' # url = 'http://localhost:12345/workload' res = requests.post('http://10.10.239.46:123456/template_workload', data={ 'number': 555, 'operate': 'stop', 'type': 'Flight' }) for i in range(1000): url = 'http://{host}/workload'.format(host=host) try:
import pymongo import datetime import init_path from conf import config import math from model.TaskType import TaskType from common.InsertDateTask import InsertDateTask from common.InsertBaseTask import InsertBaseTask from mysql_execute import fetchall, fetchall_ss, fetch_count, update_monitor from conn_pool import base_data_pool, task_db_monitor_db_pool from common.sql_query import supervise_supplement_city_sql from logger_file import get_logger logger = get_logger('supervise_supplement_city') client = pymongo.MongoClient(host=config.mongo_host) base_task_db = client[config.mongo_base_task_db] def recreate_base_data(base_name): base_task_db[base_name].drop() insert_task = InsertBaseTask(task_type=TaskType.Flight) insert_task.insert_task() def supplement_city_count(): logger.info('开始监控本日城市数量:') flight_2_count = fetch_count( base_data_pool, supervise_supplement_city_sql['flight_package_id_2']) flight_3_count = fetch_count( base_data_pool, supervise_supplement_city_sql['flight_package_id_3'])
import pymongo import datetime import init_path from conf.config import mongo_host, mongo_base_task_db, mongo_date_task_db, used_times_config from collections import defaultdict from mysql_execute import update_monitor, update_code, fetchall from logger_file import get_logger from conf import task_source from conf.config import frequency from model.TaskType import TaskType from conn_pool import task_db_monitor_db_pool logger = get_logger('supervise') client = pymongo.MongoClient(host=mongo_host) date_task_db = client[mongo_date_task_db] base_task_db = client[mongo_base_task_db] slices_result = None package_count_list = {} package_statistic = {} update_time = datetime.datetime.now().strftime('%Y%m%d%H') + '00' update_day = datetime.datetime.now().strftime('%Y%m%d') today = datetime.datetime.today().strftime('%Y%m%d') avg_res = {} def query_mongo(task_type): ''' 分package_id,分collection查询datetask各集合中的任务状态。 :return: