def start(): scheduler = BlockingScheduler() try: # using in test,run every 10s # scheduler.add_job(UserOverviewService().collect_data, trigger="cron", second="*/5") # scheduler.add_job(ApplicationOverviewService().collect_data, trigger="cron", second="*/5") # # scheduler.add_job(UserOverviewService().collect_data, trigger="cron", hour="*/1") # scheduler.add_job(ApplicationOverviewService().collect_data, trigger="cron", hour="*/1") # scheduler.add_job(ApplicationOverallTrendService().collect_data, trigger="cron", hour="*/1") # scheduler.add_job(RetentionRateDayService().collect_data, trigger="cron", day="*/1", hour="1") # scheduler.add_job(NewUsersAnalysisService().collect_data, trigger="cron", day="*/1", hour="1") # scheduler.add_job(ActiveUsersAnalysisService().collect_data, trigger="cron", day="*/1", hour="1") # scheduler.add_job(LaunchUsersAnalysisService().collect_data, trigger="cron", day="*/1", hour="1") # scheduler.add_job(ChannelService().collect_data, trigger="cron", day="*/1", hour="1") # UserOverviewService().collect_data() # ApplicationOverviewService().collect_data() ApplicationOverallTrendService().collect_data() # ChannelService().collect_data() # RetentionRateDayService().collect_data() # NewUsersAnalysisService().collect_data() # ActiveUsersAnalysisService().collect_data() # LaunchUsersAnalysisService().collect_data() # scheduler.add_job(LaunchUsersAnalysisService().collect_data, trigger="cron", second="*/100") # scheduler.start() except Exception as e: log.get_log().error("启动采集任务") ex.collection_ex(e)
def collect_data(self): apps = self.get_apps() for app in apps: channels = self.execute( function=lambda session, app_id: channle_dao.query_channel( session=session, app_id=app_id), app_id=app.app_id.decode()) old_name = self.name for channel in channels: self.name = old_name + channel.name.decode( ) + "[" + channel.channel_id.decode() + "]" _pre_collect_date = self.__get_pre_collect_date( app_id=app.app_id.decode(), channel_id=channel.channel_id.decode()) if _pre_collect_date != date_utils.get_before_n_day( -2).strftime("%Y-%m-%d"): self.collect_data_inner( url=self.url.format( app_id=app.app_id.decode(), start_date=self.__get_start_date( _pre_collect_date), end_date=date_utils.get_format_date(), channel_id=channel.channel_id.decode()), name=self.name + "--" + app.name.decode(), app_id=app.app_id.decode(), channel_id=channel.channel_id.decode()) else: log.get_log().info(app.name.decode() + "[" + channel.channel_id.decode() + "]" + "采集已经完成,无需重复采集") self.name = old_name
def execute(self, function, **kwargs): session = dao_manager.Session() try: return function(session, **kwargs) except Exception as e: log.get_log().error("执行SQL[" + function.__name__ + "]错误") ex.collection_ex(e) finally: session.close()
def execute(function, **kwargs): session = dao_manager.Session() try: function(session, **kwargs) except Exception as e: session.rollback() log.get_log().error("执行SQL[" + function.__name__ + "]错误") ex.collection_ex(e) else: session.commit() finally: session.close()
def __login_umeng(): driver = webdriver.PhantomJS( executable_path=config.get_global()["selenium"]["driver_path"]) driver.get(config.get_umeng()["url"]["login"]) wait = WebDriverWait(driver, 30) try: wait.until(EC.element_to_be_clickable((By.ID, 'submitForm'))) log.get_log().info("页面加载完成") except Exception as e: log.get_log().error("等待登录页面错误错误") ex.collection_ex(e) else: # auto login driver.find_element_by_css_selector("#ump .loginForm .list input[type='text']")\ .send_keys(config.get_umeng()["user_name"]) driver.find_element_by_css_selector("#ump .loginForm .list input[type='password']")\ .send_keys(config.get_umeng()["password"]) driver.find_element_by_id('submitForm').click() time.sleep(5) # 等待一段时间,用于页面跳转 return driver
def collect_data_inner(self, url, name, **kwargs): log.get_log().info(name + "开始") session = session_manager.get_request_session() try: r = session.get(url, timeout=10000) except Exception as e: log.get_log().error(name + "失败") ex.collection_ex(e) else: if r.status_code == 200: result = r.json() if "ret" not in result or ("result" in result and result["result"] == "success"): if len(kwargs) > 0: self.callback(result, **kwargs) else: self.callback(result) else: log.get_log().error("本次[" + name + "]失败[" + result["msg"] + "]") log.get_log().info(name + "完成")
def collection_ex(e): ty, tv, tb = sys.exc_info() log.get_log().critical("异常对象:%s" % e) log.get_log().critical("错误类型:{0},错误详细信息:{1}".format(ty, tv)) log.get_log().critical("".join(traceback.format_tb(tb)))