Example #1
0
    def __init__(self, task_type: TaskType, temporary_city_str=None, number=None):
        # 任务类型
        self.task_type = task_type

        self.number = number if number is not None else ''
        self.temporary_city_str = temporary_city_str

        # 初始化 collections 名称
        self.collection_name = self.generate_collection_name()

        # logger 记录日志
        self.logger = get_logger("InsertBaseTask")

        # 数据游标偏移量,用于在查询时发生异常恢复游标位置
        self.offset = 0
        # 数据游标前置偏移量,用于在入库时恢复游标位置
        self.pre_offset = 0

        client = pymongo.MongoClient(host=config.mongo_host)
        # if self.task_type == TaskType.hotel:
        #     self.db = client[config.hotel_base_task_db]
        # else:
        self.db = client[config.mongo_base_task_db]

        self.tasks = BaseTaskList()

        # 初始化建立索引
        self.create_mongo_indexes()
Example #2
0
    def __init__(self, task_type: TaskType, number=None, routine=True, is_test=False):
        self.task_type = task_type

        self.number = number if number is not None else ''

        # logger 记录日志
        self.logger = get_logger("InsertBaseTask")

        client = pymongo.MongoClient(host=config.mongo_host)
        self.base_task_db = client[config.mongo_base_task_db]
        self.date_task_db = client[config.mongo_date_task_db]
        self.is_test = is_test
        if self.is_test:
            self.date_task_db = client['Test_RoutineDateTask']

        self.base_collections = self.base_task_db[self.generate_base_collections()]

        # 初始化 PackageInfo 类
        self.package_info = PackageInfo()

        #删除切片周期为1的mongo文档。
        if routine and is_test is False:
            self.delete_single_slice()

        # 按源生成多个 collections, 多个 tasks 队列
        self.date_collections_dict = {}
        self.tasks_dict = {}
        for each_source in self.get_total_source():
            self.date_collections_dict[each_source] = self.generate_date_collections(source=each_source)

            # 建立索引
            self.create_indexes(source=each_source)

            # 初始化任务队列
            self.tasks_dict[each_source] = DateTaskList()

        # 数据游标偏移量,用于在查询时发生异常恢复游标位置
        self.offset = 0
        # 数据游标前置偏移量,用于在入库时恢复游标位置
        self.pre_offset = 0
        # 用于遇到上一次无含早情况下,提示下一次再多取一次含早
        self.take_one_more = 0
        #
        self.memory_count_by_source = {}
Example #3
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2017/11/30 下午2:58
# @Author  : Hou Rong
# @Site    : 
# @File    : generate_task_info.py
# @Software: PyCharm
import traceback
from conn_pool import base_data_pool, source_info_pool, task_db_monitor_db_pool
from mysql_execute import fetchall, fetchall_ss
from logger_file import get_logger
from common.sql_query import  flight_base_task_query, temporary_flight_base_task_query, roundflight_base_task_query,\
    hotel_base_task_query,multiflight_base_task_query,rail_base_task_query,ferries_base_task_query

logger = get_logger("generate_base_task")


def get_rank_result(sql):
    rank = {}
    rank_list = [
        (rank, sql),
    ]
    for each_rank, each_rank_sql in rank_list:
        for row in fetchall(base_data_pool, each_rank_sql):
            iata_code = row[0]
            inner_order = row[1]
            city_id = row[2]
            continent_id = row[3]
            city_name = row[4]
            tri_code = row[5]
            if city_id in each_rank:
Example #4
0
import datetime
from urllib import parse
from queue import Queue
from rabbitmq import pika_send
from tornado.options import define
from conf import config, task_source
from concurrent.futures import ThreadPoolExecutor
from functools import partial
from rabbitmq.producter import final_distribute, insert_mongo_data, update_running
from logger_file import get_logger
from rabbitmq.consumer import insert_spider_result, feed_back_date_task, slave_take_times, task_temporary_monitor, query_temporary_task, stop_temporary_task
from model.TaskType import TaskType
from common.InsertDateTask import InsertDateTask
from common.InsertBaseTask import InsertBaseTask

logger = get_logger('server')
logger.info('aa')
port = config.server_port
define("port", default=port, help="Run server on a specific port", type=int)
tornado.options.parse_command_line()


class Executor(ThreadPoolExecutor):
    _instance = None

    def __new__(cls, *args, **kwargs):
        if not getattr(cls, '_instance', None):
            cls._instance = ThreadPoolExecutor(max_workers=args[0])
        return cls._instance

Example #5
0
import tornado.ioloop
import tornado.web
import pika
from pika.adapters.tornado_connection import TornadoConnection
import pymongo
from conf import task_source, config
from functools import partial
from logger_file import get_logger
from bson.objectid import ObjectId

logger = get_logger('pika_send')
TORNADO_PORT = 8889
# RMQ_USER = '******'
# RMQ_PWD = 'password'
# RMQ_HOST = 'localhost'
# RMQ_PORT = 5762
#
IOLOOP_TIMEOUT = 500

# holds channel objects

client = pymongo.MongoClient(host=config.mongo_host)
date_task_db = client[config.mongo_date_task_db]

channel = None


class PikaClient(object):
    # all the following functions precede in order starting with connect
    def connect(self):
        try:
Example #6
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2017/12/1 下午4:47
# @Author  : Hou Rong
# @Site    : 
# @File    : mysql_execute.py
# @Software: PyCharm
import pymysql.cursors
from logger_file import get_logger
logger = get_logger("mysql_executor")

def fetch_count(conn_pool, sql):
    try:
        conn = conn_pool.connection()
        cursor = conn.cursor()
        cursor.execute(sql)
        result = cursor.fetchone()
        cursor.close()
        return result[0]
    except Exception as e:
        logger.exception(msg="[sql error]", exc_info=e)
        logger.info(sql)
    finally:
        if conn:
            conn.close()


def fetchall(conn_pool, sql):
    try:
        conn = conn_pool.connection()
        cursor = conn.cursor()
Example #7
0
import toolbox.Date
import init_path
from collections import defaultdict
import common.patched_mongo_insert
from model.TaskType import TaskType
from logger_file import get_logger
from conf import config, task_source
from model.PackageInfo import PackageInfo
from toolbox.Date import date_takes
from model.DateTask import DateTask
from logger_file import get_logger
from common.TempTask import TempTask
from common.generate_task_utils import sort_date_task_cursor, decide_need_hotel_task, today_date
from conf.config import multiply_times, frequency

logger = get_logger('InsertDateTask')
logger_2 = get_logger('Insert_process')

toolbox.Date.DATE_FORMAT = '%Y%m%d'

INSERT_WHEN = 2000


class DateTaskList(list):
    def append_task(self, task: DateTask):
        self.append(task.to_dict())


class InsertDateTask(object):
    def __init__(self, task_type: TaskType, number=None, routine=True, is_test=False):
        self.task_type = task_type
Example #8
0
import gevent
import random
import pika_send
import datetime
import math
import time
import traceback
from collections import defaultdict
from functools import partial
from conf.config import used_times_config, used_times_by_source as _used_times_by_source, used_times_specified as _used_times_specified
from logger_file import get_logger
from model.TaskType import TaskType

logger = get_logger('producter')

used_times_specified = _used_times_specified
used_times_by_source = _used_times_by_source

# total_task_level_dict = {}
# collection_task_level = {}
# level_dict = defaultdict(list)
# distribute_result = defaultdict(dict)
# collection_advance_dict = {}
# date_list = []


def init_variable(_task_type):
    global total_task_level_dict
    global collection_task_level
    global level_dict
    global distribute_result
Example #9
0
#coding:utf-8
import pymongo
import datetime
import init_path
from conf import config
from collections import defaultdict
from mysql_execute import update_monitor
from logger_file import get_logger
from rabbitmq.supervise import get_source_info, update_dead_running, query_mongo, get_average_success_count
from logger_file import get_logger
from model.TaskType import TaskType
from rabbitmq.pika_send import date_task_db

logger = get_logger('delete_task_supervise')

client = pymongo.MongoClient(host=config.mongo_host)
date_task_db = client[config.mongo_date_task_db]
base_task_db = client[config.mongo_base_task_db]


def delete_datetask_documents(task_type):
    '''
    每天零点定时执行一次!
    刪除已发完任务的文档,不能在监控中使用,因为会把失败的任务删掉从而被绿皮过滤掉。
    :return:
    '''
    delete_documents = defaultdict(dict)
    # 插入分源、分package_id,所有完成反馈任务的切片最大值信息
    for package_id, source, source_info in get_source_info(task_type):
        if source_info['record_count'] == source_info[
                'fail_count'] + source_info['success_count']:
Example #10
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2017/11/29 下午9:08
# @Author  : Hou Rong
# @Site    :
# @File    : BaseDistribution.py
# @Software: PyCharm
import redis
import json
import datetime
from Task import Task
from logger_file import get_logger
from collections import defaultdict

logger = get_logger("BaseDistribution")

MIN_FLIGHT_TASK_SIZE = 50000
MIN_TRAIN_TASK_SIZE = 10000
MIN_BUS_TASK_SIZE = 10000
MIN_HOTEL_TASK_SIZE = 50000
MIN_ROUNDFLIGHT_TASK_SIZE = 20000
MIN_MULTIFLIGHT_TASK_SIZE = 20000
ADJUST_FREQUENCY_RATE = 0.4


class BaseDistribution(object):
    def __init__(self):
        self.source_rate_map = {}
        self.m_feedback_source_vec = []

        # queue
Example #11
0
from pika import adapters
import pika
import datetime
from rabbitmq import pika_send
from logger_file import get_logger
from mysql_execute import update_monitor, fetchall
from conn_pool import task_db_monitor_db_pool
from model.TaskType import TaskType

logger = get_logger('server')
logger2 = get_logger('consumer')
content = []


def insert_spider_result(result):
    '''
    爬虫入库,可以增加mongo数据入库出现异常的处理(暂未)
    :param result:
    :return:
    '''
    try:
        today = datetime.datetime.today().strftime('%Y%m%d')
        # for task_info in result:
        #     task_info['update_time'] = datetime.datetime.now()
        for i in range(len(result)):
            result[i].update({
                'update_time':
                datetime.datetime.now().strftime('%Y%m%d%H%M%S')
            })
        pika_send.client['case_result'][today].insert_many(result)
        # logger.info('tid:%s'%(task_info['tid']))
Example #12
0
File: test2.py Project: 20113261/t
import time
import hashlib
import logging
import datetime
from logger_file import get_logger
from bson.objectid import ObjectId
from rabbitmq import pika_send
from conf import config

add_city_id = '''
['60184','60185','60186','60187','60188','60189','60190','60191','60192','60193','60194','60195','60196','60197','60198','60199','60200','60201','60202','60203','60204','60205','60206','60207','60208','60209','60210','60211','60212','60213','60214','60215','60216','60217','60218','60219','60220','60221','60222','60223','60224','60225','60226','60227','60228','60229','60230','60231','60232','60233','60234','60235','60236','60237','60238','60239','60240','60241','60242','60243','60244','60245','60246','60247','60248','60249','60250','60251','60252','60253','60254','60255','60256','60257','60258','60259','60260','60261','60262','60263','60264','60265','60266','60267','60268','60269','60270','60271','60272','60273','60274','60275','60276','60277','60278','60279','60280','60281','60282','60283','60284','60285','60286','60287','60288','60289','60290','60291','60292','60293','60294','60295','60296','60297','60298','60299','60300','60301','60302','60303','60304','60305','60306','60307','60308','60309','60310','60311','60312','60313','60314','60315','60316','60317','60318','60319','60320','60321','60322','60323','60324','60325','60326','60327','60328','60329','60330','60331','60332','60333','60334','60335','60336','60337','60338','60339','60340','60341','60342','60343','60344','60345','60346','60347','60348','60349','60350','60351','60352','60353','60354','60355','60356','60357','60358','60359','60360','60361','60362','60363','60364','60365','60366','60367','60368','60369','60370','60371','60372','60373','60374','60375','60376','60377','60378','60379','60380','60381','60382','60383','60384','60385','60386','60387','60388','60389','60390','60391','60392','60393','60394','60395','60396','60397','60398','60399','60400','60401','60402','60403','60404','60405','60406','60407','60408','60409','60410','60411','60412','60413','60414','60415','60416','60417','60418','60419','60420','60421','60422','60423','60424','60425','60426','60427','60428','60429','60430','60431','60432','60433','60434','60435','60436','60437','60438','60439','60440','60441','60442','60443','60444','60445','60446','60447','60448','60449','60450','60451','60452','60453','60454','60455','60456','60457','60458','60459','60460','60461','60462','60463','60464','60465','60466','60467','60468','60469','60470','60471','60472','60473','60474','60475','60476','60477','60478','60479','60480','60481','60482','60483','60484','60485','60486','60487','60488','60489','60490','60491','60492','60493','60494','60495','60496','60497','60498','60499','60500','60501','60502','60503','60504','60505','60506','60507','60508','60509','60510','60511','60512','60513','60514','60515','60516','60517','60518','60519','60520','60521','60522','60523','60524','60525','60526','60527','60528','60529','60530','60531','60532','60533','60534','60535','60536','60537','60538','60539','60540','60541','60542','60543','60544','60545','60546','60547','60548','60549','60550','60551','60552','60553','60554','60555','60556','60557','60558','60559','60560','60561','60562','60563','60564','60565','60566','60567','60568','60569','60570','60571','60572','60573','60574','60575','60576','60577','60578','60579','60580','60581','60582','60583','60584','60585','60586','60587','60588','60589','60590','60591','60592','60593','60594','60595','60596','60597','60598','60599','60600','60601','60602','60603','60604','60605','60606','60607','60608','60609','60610','60611','60612','60613','60614','60615','60616','60617','60618','60619','60620','60621','60622','60623','60624','60625','60626','60627','60628','60629','60630','60631','60632','60633','60634','60635','60636','60637','60638','60639','60640','60641','60642','60643','60644','60645','60646','60647','60648','60649','60650','60651','60652','60653','60654','60655','60656','60657','60658','60659','60660','60661','60662','60663','60664','60665','60666','60667','60668','60669','60670','60671','60672','60673','60674','60675','60676','60677','60678','60679','60680','60681','60682','60683','60684','60685','60686','60687','60688','60689','60690','60691','60692','60693','60694','60695','60696','60697','60698','60699','60700','60701','60702','60703','60704','60705','60706','60707','60708','60709','60710','60711','60712','60713','60714','60715','60716','60717','60718','60719','60720','60721','60722','60723','60724','60725','60726','60727','60728','60729','60730','60731','60732','60733','60734','60735','60736','60737','60738','60739','60740','60741','60742','60743','60744','60745','60746','60747','60748','60749','60750','60751','60752','60753','60754','60755','60756','60757','60758','60759','60760','60761','60762','60763','60764','60765','60766','60767','60768','60769','60770','60771','60772','60773','60774','60775','60776','60777','60778','60779','60780','60781','60782','60783','60784','60785','60786','60787','60788','60789','60790','60791','60792','60793','60794','60795','60796','60797','60798','60799','60800','60801','60802','60803','60804','60805','60806','60807','60808','60809','60810','60811','60812','60813','60814','60815','60816','60817','60818','60819','60820','60821','60822','60823','60824','60825','60826','60827','60828','60829','60830','60831','60832','60833','60834','60835','60836','60837','60838','60839','60840','60841','60842','60843','60844','60845','60846','60847','60848','60849','60850','60851','60852','60853','60854','60855','60856','60857','60858','60859','60860','60861','60862','60863','60864','60865','60866','60867','60868','60869','60870','60871','60872','60873','60874','60875','60876','60877','60878','60879','60880','60881','60882','60883','60884','60885','60886','60887','60888','60889','60890','60891','60892','60893','60894','60895','60896','60897','60898','60899','60900','60901','60902','60903','60904','60905','60906','60907','60908','60909','60910','60911','60912','60913','60914','60915','60916','60917','60918','60919','60920','60921','60922','60923','60924','60925','60926','60927','60928','60929','60930','60931','60932','60933','60934','60935','60936','60937','60938','60939','60940','60941','60942','60943','60944','60945','60946','60947','60948','60949','60950','60951','60952','60953','60954','60955','60956','60957','60958','60959','60960','60961','60962','60963','60964','60965','60966','60967','60968','60969','60970','60971','60972','60973','60974','60975','60976','60977','60978','60979','60980','60981','60982','60983','60984','60985','60986','60987','60988','60989','60990','60991','60992','60993','60994','60995','60996','60997','60998','60999','61000','61001','61002','61003','61004','61005','61006','61007','61008','61009','61010','61011','61012','61013','61014','61015','61016','61017','61018','61019','61020','61021','61022','61023','61024','61025','61026','61027','61028','61029','61030','61031','61032','61033','61034','61035','61036','61037','61038','61039','61040','61041','61042','61043','61044','61045','61046','61047','61048','61049','61050','61051','61052','61053','61054','61055','61056','61057','61058','61059','61060','61061','61062','61063','61064','61065','61066','61067','61068','61069','61070','61071','61072','61073','61074','61075','61076','61077','61078','61079','61080','61081','61082','61083','61084','61085','61086','61087','61088','61089','61090','61091','61092','61093','61094','61095','61096','61097','61098','61099','61100','61101','61102','61103','61104','61105','61106','61107','61108','61109','61110','61111','61112','61113','61114','61115','61116','61117','61118','61119','61120','61121','61122','61123','61124','61125','61126','61127','61128','61129','61130','61131','61132','61133','61134','61135','61136','61137','61138','61139','61140','61141','61142','61143','61144','61145','61146','61147','61148','61149','61150','61151','61152','61153','61154']
'''
add_city_id = '''['50696']'''
add_city_id = '''['60677']'''

logger = get_logger('test2')

# host = 'localhost:123456'
host = '10.10.239.46:12345'
# host = '10.10.110.74:123456'

# url = 'http://localhost:12345/workload'
res = requests.post('http://10.10.239.46:123456/template_workload',
                    data={
                        'number': 555,
                        'operate': 'stop',
                        'type': 'Flight'
                    })
for i in range(1000):
    url = 'http://{host}/workload'.format(host=host)
    try:
Example #13
0
import pymongo
import datetime
import init_path
from conf import config
import math
from model.TaskType import TaskType
from common.InsertDateTask import InsertDateTask
from common.InsertBaseTask import InsertBaseTask
from mysql_execute import fetchall, fetchall_ss, fetch_count, update_monitor
from conn_pool import base_data_pool, task_db_monitor_db_pool
from common.sql_query import supervise_supplement_city_sql
from logger_file import get_logger

logger = get_logger('supervise_supplement_city')

client = pymongo.MongoClient(host=config.mongo_host)
base_task_db = client[config.mongo_base_task_db]


def recreate_base_data(base_name):
    base_task_db[base_name].drop()
    insert_task = InsertBaseTask(task_type=TaskType.Flight)
    insert_task.insert_task()


def supplement_city_count():
    logger.info('开始监控本日城市数量:')
    flight_2_count = fetch_count(
        base_data_pool, supervise_supplement_city_sql['flight_package_id_2'])
    flight_3_count = fetch_count(
        base_data_pool, supervise_supplement_city_sql['flight_package_id_3'])
Example #14
0
import pymongo
import datetime
import init_path
from conf.config import mongo_host, mongo_base_task_db, mongo_date_task_db, used_times_config
from collections import defaultdict
from mysql_execute import update_monitor, update_code, fetchall
from logger_file import get_logger
from conf import task_source
from conf.config import frequency
from model.TaskType import TaskType
from conn_pool import task_db_monitor_db_pool

logger = get_logger('supervise')

client = pymongo.MongoClient(host=mongo_host)
date_task_db = client[mongo_date_task_db]
base_task_db = client[mongo_base_task_db]

slices_result = None
package_count_list = {}
package_statistic = {}
update_time = datetime.datetime.now().strftime('%Y%m%d%H') + '00'
update_day = datetime.datetime.now().strftime('%Y%m%d')
today = datetime.datetime.today().strftime('%Y%m%d')
avg_res = {}


def query_mongo(task_type):
    '''
    分package_id,分collection查询datetask各集合中的任务状态。
    :return: