def __init__(self): self.pool = redis.ConnectionPool( host=ConfManage.getString("REDIS_HOST"), port=ConfManage.getInt("REDIS_PORT"), db=ConfManage.getInt("REDIS_DB"), password=ConfManage.getString("REDIS_PASSWORD"), max_connections=ConfManage.getInt("REDIS_MAX_CONNECTIONS"), decode_responses=True, socket_keepalive=True) self.conn = redis.StrictRedis(connection_pool=self.pool, socket_connect_timeout=5) self.logger = Logger.get_instance(ConfManage.getString("LOG_REQ_NAME"))
def main(): logger = Logger.get_instance(ConfManage.getString("LOG_CRON_NAME")) parser = argparse.ArgumentParser() parser.add_argument('-d', '--date', help='日期', type=str) parser.add_argument('-p', '--pickle', type=str, help='数据集', default='data') parser.add_argument('estimator', help='算法选择', nargs='?', type=str, default='xgb') parser.add_argument('predict_target', help='目标值', nargs='?', type=str, default='accept') parser.add_argument('-f', '--feature-selected', help='特征值选择', action='store_true') parser.add_argument('-w', '--withhold', help='是否保存数据到bi数据库', action='store_true') parser.add_argument("-s", "--shift_days", help="The last few days", type=int, default=-1) args = parser.parse_args() logger.info('Arguments: estimator=%s, predict-target=%s, feature-selected=%r, withhold-bi-insertion=%r' % \ (args.estimator, args.predict_target, args.feature_selected, args.withhold)) try: process(logger, args.pickle, args.estimator, args.predict_target, args.withhold, args.date, args.shift_days) except TestDataEmpty: logger.error('Test Data Empty!') except (AttributeError, ValueError) as err: logger.error(err) logger.error('Trace: {}'.format(traceback.format_exc())) except KeyboardInterrupt: logger.info('Process manually interupted at %s' % arrow.now(tz=ConfManage.getString("ARROW_TIMEZONE")).format(loggable)) logger.info('Releasing Logger...')
from core.predictor import * from tools.cache import Cache from tools.pickler import reload_pickle_cache, init_pickle_cache from tools.po_cache_ret import POCacheRet from tools.logger import Logger if sys.version_info[:2] in [(2, 6), (2, 7)]: reload(sys) sys.setdefaultencoding('utf-8') elif sys.version_info[:2] in [(3, 6), (3, 7)]: # pylint: disable=E0401, E0611, E1101 import importlib importlib.reload(sys) logger = Logger.get_instance(ConfManage.getString("LOG_BASE_NAME")) # Import Thrift generated classes sys.path.append('gen-py') sys.path.insert(0, glob.glob('../../lib/py/build/lib*'[0])) try: from eta import EstimateTaskDurations from eta.ttypes import Duration, Range, InvalidInput, ModelMissing, Prediction, Params, \ Prediction_showup, Prediction_quote, Prediction_delivery, PredictionResult, Data except ImportError: logger.error('Cannot find thrift classes.') logger.error('Have (you run `thrift --gen py eta.thrift`?') raise def __validate_coordinates__(query): if query.showup_distance is None:
def main(): """Obtain Information from Data-API and MySQL Database""" logger = Logger.get_instance(ConfManage.getString("LOG_CRON_NAME")) Logger.resource_checkpoint('init') parser = argparse.ArgumentParser() parser.add_argument('-c', '--clear', help='Clear previously saved pickles.', action='store_true') parser.add_argument('-r', '--reverse', help='whether clear previously data.', action='store_true') parser.add_argument('-d', '--date', help='Date used for calculation.', type=str) parser.add_argument('-p', '--pickle', type=str, default='stream_data', help='Pickle name for saving latest data-collection.') parser.add_argument('-u', '--updata', type=bool, help='Merge data with new feature to data.pkl.', default=False) parser.add_argument('-f', '--funtion', help='Update new feature from funtion.') parser.add_argument( '-m', '--merge_on', help='Field names to join on. Must be found in both DataFrames.') args = parser.parse_args() # 更新新的数据: if args.updata: data = load_pickle(args.pickle) if args.funtion is not None and args.merge_on is not None: update_data(logger, args.funtion, data, args.merge_on) else: logger.error( 'Funtion and Merge_on is None, Please provide corresponding parameters' ) return # 清除所有子pkl: if args.clear: clear_pickles(logger) return is_reverse = False if args.reverse is None else True # 向前或向后收集数据 # 处理时间段 run_time = get_run_time(None, 0, False) logger.info('Run-Time: %s' % run_time) collect_date = None if args.date is None else get_run_time(args.date) logger.info('Collect-Date: %s' % collect_date) end_time = run_time.shift(days=-1).ceil('day') logger.info('End-Time: %s' % end_time) # 读取数据 pickled = load_pickle(args.pickle) collected_count = 0 if pickled is not None and isinstance( pickled, pd.DataFrame) and 'order_time' in pickled.columns: order_times = pickled['order_time'] del pickled # Release pickle collected_count = len( order_times.apply(lambda order_time: order_time.date()).unique()) collected_start_time = order_times.min() logger.info('Min collected order_time Date: %s' % collected_start_time.format(loggable)) collected_end_time = order_times.max() logger.info('Max collected order_time Date: %s' % collected_end_time.format(loggable)) if collect_date is not None: if collect_date > end_time: logger.warning( 'collect_date can not greater then end_time {} > {}'. format(collect_date.format(loggable), end_time.format(loggable))) return if collect_date < collected_start_time.floor('day'): start_time = collect_date.floor('day') end_time = collected_start_time.shift(days=-1).ceil('day') elif collected_start_time.floor( 'day') <= collect_date <= collected_end_time.ceil('day'): trim_data(logger, collect_date, is_reverse) if is_reverse: start_time = collected_start_time.floor('day') end_time = collect_date.ceil('day') else: start_time = collect_date.floor('day') end_time = collected_end_time.ceil('day') elif collect_date > collected_end_time.ceil('day'): start_time = collected_end_time.shift(days=1).floor('day') end_time = collect_date.ceil('day') else: logger.warning('collect_data invalid. {}'.format(collect_date)) return else: if collected_end_time >= end_time: logger.info('Targeted Run-Time already in Collection-Interval') return else: start_time = collected_end_time.shift(days=1).floor('day') gap = start_time.shift(days=-1).date() - end_time.date() gap_days = gap.days else: logger.info('Data empty!') gap_days = -ConfManage.getInt("COLLECTION_GAP") start_time = end_time.shift(days=gap_days + 1).floor('day') logger.info('Total Collection Interval: %d/%d [%s - %s]' % (collected_count, ConfManage.getInt("COLLECTION_INTERVAL"), start_time.format(loggable), end_time.format(loggable))) if gap_days >= 0: logger.info('Targeted Run-Time already in Collection-Interval') return logger.info('Gap: %d' % gap_days) logger.info( 'Gap Interval: %d [%s - %s]' % (gap_days, start_time.format(loggable), end_time.format(loggable))) try: # 这部分代码是针对缺失1天以上的数据进行每日收集 for i in range(-gap_days, 0, -1): end_time = start_time.ceil('day') logger.info('Collecting data in [{} - {}]'.format( start_time.format(loggable), end_time.format(loggable))) collect(logger, start_time, end_time, args.pickle) logger.info('Success collect data in [{} - {}] \n\n'.format( start_time.format(loggable), end_time.format(loggable))) start_time = start_time.shift(days=1) trim_outdated(logger, run_time, args.pickle) # 没有环境变量下,默认截取最近30天的数据 except (AttributeError, ValueError) as err: logger.error(err) logger.error('Trace: {}'.format(traceback.format_exc())) except KeyboardInterrupt: logger.info('Process manually interupted at {}'.format(arrow.utcnow())) logger.info('Releasing Logger...') return 0
def __init__(self): self.logger = Logger.get_instance( ConfManage.getString("LOG_BASE_NAME")) self.cache = Cache().client self.cache_expire = ConfManage.getInt("OSRM_CACHE_EXPIRE")
from models_bi.eta_metric_all import EtaMetricAll from configs import conf from tools.logger import Logger from configs.ConfManage import ConfManage import os import arrow from core.process import process from argparse import ArgumentParser from core.stream_collect import main logger = Logger.get_instance(ConfManage.getString("LOG_CRON_NAME")) ENV_APP_MODE = ConfManage.getString("APP_MODE") ENV_ZONE = ConfManage.getString("ZONE") def check_collect(): """检查collect是否采集""" pickle_prefix = os.getenv('ENV_PICKLE_PREFIX', '') if pickle_prefix == '' and ENV_APP_MODE != 'release': pickle_prefix = '%s-%s-' % (ENV_APP_MODE, ENV_ZONE) filename = '%sdata.pkl' % (pickle_prefix) file = './pickles/%s' % (filename) collect_ctimes = 3 while collect_ctimes > 0: try: datacreattime = arrow.get(os.path.getctime(file)).date() except FileNotFoundError as err: logger.info(err) main() datacreattime = arrow.get(os.path.getctime(file)).date() today = arrow.now().date() if datacreattime != today: