def mouse_hover(self, elm): """ :user : 模拟鼠标悬停 :param element: 调用参数时传需要定位的元素 """ logging.info(f"模拟鼠标悬停在{elm}上") ActionChains(self.driver).move_to_element(elm).perform()
def test_login(self): logging.info("正常登录测试") LaunchPage(self.driver).click_login() data = read_yaml('test_app_data.yaml', 'test_login') LoginPage(self.driver).login_action(data['phone'], data['pwd']) self.wait() self.assertIsNotNone(MainPage(self.driver).check_contants_ico())
def snapshot(self, prefix, title): title = title.replace('/', '').replace(':', '') file_path = os.path.join(basedir, 'snapshots', "{}_{}.png".format(prefix, title)) logging.info("截图, 保存路径: {}".format(file_path)) self.driver.get_screenshot_as_file(file_path) self.wait(0.5)
def _get_revisions(args): MethodArgsClass = collections.namedtuple('MethodArg', 'log start end project namespace') users = args[0] state = args[1] arg_obj = MethodArgsClass(state[0], state[1], state[2], state[3], state[4]) conn = um.dl.Connector(instance='slave') if arg_obj.log: logging.info('Computing revisions, PID = %s' % os.getpid()) rev_query = bytes_added_rev_query(arg_obj.start, arg_obj.end, users, arg_obj.namespace, arg_obj.project) if arg_obj.log: logging.info(__name__ + '::Querying revisions for %(count)s users ' '(project = %(project)s, ' 'namespace = %(namespace)s)... ' % { 'count' : len(users), 'project' : arg_obj.project, 'namespace' : arg_obj.namespace} ) try: return list(conn.execute_SQL(rev_query)) except ProgrammingError: raise um.UserMetric.UserMetricError( message=str(BytesAdded) + '::Could not get revisions ' 'for specified users(s) - Query Failed.')
def try_find(self, element_loc): """尝试定位, 处理偶现元素, 或判断元素是否存在""" logging.info("尝试定位元素: {}".format(element_loc)) try: return self.driver.find_element(*element_loc) except NoSuchElementException: logging.warning("元素未出现: {}".format(element_loc))
def async_cli(self, action, coin): ''' eventually offer async_cli functions :param action: :param coin: :return: ''' try: cmd = f"{self.installed_folder}/{coin.cli} -datadir={self.wallet_directory} {action}" logging.info( f"Attempting to execute command from masternode object: {cmd}") ''' need to have a threadpool and throw this in there and await the result ''' result = self.connection.run(cmd, hide=False) logging.info( f"Executed {result.command} on {result.connection.host}, got stdout:\n{result.stdout}" ) return result.stdout except UnexpectedExit as e: #possibly try to start the daemon again logging.warning(f"{coin.cli} exited unexpectedly", exc_info=e) return '{"status":"restart"}' except Exception as e: logging.error(f"Could not do action on daemon at {self.getIP()}")
def time_series_worker(time_series, metric, aggregator, cohort, kwargs, q): """ worker thread which computes time series data for a set of points """ log = bool(kwargs['log']) if 'log' in kwargs else False data = list() ts_s = time_series.next() new_kwargs = deepcopy(kwargs) # re-map some keyword args relating to thread counts if 'metric_threads' in new_kwargs: d = json.loads(new_kwargs['metric_threads']) for key in d: new_kwargs[key] = d[key] del new_kwargs['metric_threads'] while 1: try: ts_e = time_series.next() except StopIteration: break if log: logging.info(__name__ + ' :: Processing thread %s, %s - %s ...' % ( os.getpid(), str(ts_s), str(ts_e))) metric_obj = metric(date_start=ts_s,date_end=ts_e,**new_kwargs).\ process(cohort, **new_kwargs) r = um.aggregator(aggregator, metric_obj, metric.header()) if log: logging.info(__name__ + ' :: Processing complete %s, %s - %s ...' % ( os.getpid(), str(ts_s), str(ts_e))) data.append([str(ts_s), str(ts_e)] + r.data) ts_s = ts_e q.put(data) # add the data to the queue
def get_t_result(li): ret_text = "" for t in li: isbreak = False t.join() #if t.get_result(): ret_text += t.get_result() + '\r\n' count = 0 #logging.info(t.get_result()) while t.get_result() == None: #logging.info(t.get_result()) count += 1 logging.info(t.cmd + ": 执行失败,重新执行。") #logging.info(t.ip) #logging.info(t.cmd + ": 执行失败,重新执行。") t.run() if t.get_result() != None: ret_text += t.info + " " + '重复推送 %d次 成功!\r\n' %count isbreak = True break if count == 2: break if t.get_result() == None: logging.info(t.info + " " + ': 执行失败!\r\n') ret_text += t.info + " " + ': 执行失败!\r\n' else: if not isbreak: ret_text += t.info + t.get_result() + " " + '\r\n' return ret_text
def start_polisd(request): if request.method == 'POST': mns = request.form.getlist('mns') actions = request.form.getlist('params') result = 'Attempted starting: ' + ', '.join(mns) for idx in mns: vps = VPS(config['masternodes'][int(idx)], Polis(config['Polis'])) result = vps.daemon_action(Polis(config["Polis"])) logging.info(f"Restarted {vps.getIP()} got: {result}") return f"Result of polisd {actions}: {result} <br><a href=/mns/cli/masternodes/status></a>" else: #diisplay list of all MNs with "start" button mnlist = "<form method='POST'>\n<select name=mns multiple>\n" idx = 0 for masternode in config["masternodes"]: mnlist += f"\t<option value='{str(idx)}'>{masternode['connection_string']}</option>\n" idx += 1 mnlist += "</select>\n" return mnlist + "<p><input type=submit value=start></form>"
def cron_read(request, mnidx): coin = Polis(config["Polis"]) vps = VPS(config["masternodes"][mnidx], coin) result = {"result": vps.actions("view_crontab", coin).splitlines()} logging.info(f"Crontab requested got:\n{result}") return json.dumps(result)
def upgrade(request, mnidx): coin = Polis(config["Polis"]) vps = VPS(config["masternodes"][mnidx], coin) logging.info(f"vps.upgrade called ! for: {vps.getIP()}") result = vps.upgrade(coin) return result
async def handle_start(self): # Check if user is already registered if self.is_invited: return 'You are already registered with me. \ Use /stop to deregister' # Check deeplinked start param for invite key invite_key = self.text[len('/start '):] if not invite_key: return 'Sorry, you must have a valid invite code to interact' # TODO: Call main app to validate invite key # For now a static invite key if invite_key != 'alethea': return 'Invalid invite code' # Register user username = self.user['username'] id = self.user['id'] try: await self.db.register_user( first_name=self.user.get('first_name', '-'), last_name=self.user.get('last_name', '-'), username=username, id=id, role='user') logging.info(f'{id} has registered') except Exception as e: logging.error(e) return 'Failed to register you. Try again.' return f'Welcome, {username}'
def add_sign(): """ 邮件末尾签名 """ logging.info("") logging.info("") logging.info("") logging.info("---------------------------------------------------------") logging.info("愿天下有情人, 我喜欢的人, 都是你这个样")
def add_head(): """ 为邮件添加头部信息 """ logging.info("From: heraldstudio < *****@*****.** >") logging.info("Subject: 小猴偷米日志记录") logging.info("") logging.info("邮件为自动生成, 请勿回复") logging.info("")
async def loop(): while True: n=datetime.datetime.utcnow()+datetime.timedelta(0,-6000,0) logging.info("起始时间-结束时间 %s %s",n,datetime.datetime.utcnow()) await findallhdmp4(n) print("over2") await asyncio.sleep(5) return
def my_sleep(self, s): """ :user : 强制等待 :type : int :param s: 调用该方法时传入一个等待的时间,单位秒 """ logging.info("强制等待{}秒".format(s)) sleep(s)
def create_logging_header(self): """ 向logging文件中记录综合信息 """ logging.info("日期: %s, 总访问量: %d" % (self.date, len(self.log_list))) logging.info("")
async def handle_stop(self): id = self.user['id'] try: await self.db.deregister_user(id) logging.info(f'{id} has deregistered') except Exception as e: logging.error(e) return 'Goodbye'
def select_pull(self, elm, text): """ :user : 通过Select类处理选择下拉框 :param text: 调用该函数时传入要选择的数据text文本,str 类型 :param elm: 调用该函数时传入下拉框的element, str 类型 """ logging.info(f"在{elm}选择框选择:{text}") Select(self.driver.find_element("xpath", elm)).select_by_visible_text(text)
def infer(model_path, model_fname, sample): # load model with open(os.path.join(model_path, model_fname), 'rb') as model_in: model = pickle.load(model_in) logging.info(f"{model_fname} loaded for inference") pred_proba = model.predict_proba(sample) logging.info(f"prediction complete") return pred_proba
def job_queue(): """ View for listing current jobs working """ error = get_errors(request.args) def error_class(em): return { 'failure': 'error', 'pending': 'warning', 'success': 'success' }.get(em, '') p_list = list() p_list.append(Markup('<thead><tr><th>is_alive</th><th>PID</th><th>url' '</th><th>status</th></tr></thead>\n<tbody>\n')) for p in processQ: try: # Pull data off of the queue and add it to the queue data while not p.queue.empty(): if not queue_data.has_key(p.id): queue_data[p.id] = json.loads(p.queue.get().data) else: for k,v in queue_data[p.id]: if hasattr(v,'__iter__'): queue_data[p.id][k].extend(v) # once a process has finished working remove it and put its # contents into the cache if not p.process.is_alive() and p.status[0] == 'pending': q_response = make_response(jsonify(queue_data[p.id])) del queue_data[p.id] set_data(p.request, q_response, pkl_data) p.status[0] = 'success' logging.info(__name__ + '::Completed request %s.' % p.url) except Exception as e: p.status[0] = 'failure' logging.error(__name__ + "::Could not update request: %s. " "Exception: %s" % (p.url, e.message) ) # Log the status of the job response_url = "".join(['<a href="', request.url_root, p.url + '">', p.url, '</a>']) p_list.append(Markup('<tr class="'+ error_class(p.status[0])+'"><td>')) p_list.append("</td><td>".join([str(p.process.is_alive()), str(p.process.pid), escape(Markup(response_url)), p.status[0]])) p_list.append(Markup('</td></tr>')) p_list.append(Markup('\n</tbody>')) if error: return render_template('queue.html', procs=p_list, error=error) else: return render_template('queue.html', procs=p_list)
def find_all(self, element_loc): """定位一组元素""" logging.info("定位一组元素: {}".format(element_loc)) try: return self.driver.find_elements(*element_loc) except NoSuchElementException: logging.error("定位一组元素失败: {}".format(element_loc)) self.driver.save_snapshot("1.png") raise
def js_del_element(self, element_loc, element): """ :user : 使用js删除页面元素 :param element_loc: 调用该函数时传入css selector 定位元素,str类型 :param element: 调用函数时传入要删除的元素 """ logging.info(f"使用js移除{element}元素") self.driver.execute_script( f"document.querySelector({element_loc}).removeAttribute({element})" )
def trade(self, title): """ :user : 切换网页 :param title: 调用该函数时传入一个网页的标题,str类型 """ logging.info(f"切换窗口,切到{title}窗口") all_win = self.driver.window_handles for x in all_win: if self.driver.title != title: self.driver.switch_to.window(x)
def daemon_masternode_start(request, mn_idx, reindex): coin = Polis(config['Polis']) vps = VPS(config["masternodes"][mn_idx], coin) result = vps.kill_daemon(coin) time.sleep(10) logging.info(f"Killed daemon {result}") result = vps.daemon_action(coin, reindex) logging.info(f"Executed: polisd @ {mn_idx} returned: {result}") return result
def find(self, element_loc): # ('id', 'kw') """通过元素定位器定位元素""" logging.info("定位元素: {}".format(element_loc)) try: return self.driver.find_element(*element_loc) except NoSuchElementException: logging.error("定位元素失败: {}".format(element_loc)) self.snapshot("定位失败", "_".join(element_loc)) # element_loc = ('id', 'kw') raise
def _process_help(args): state = args[1] thread_args = NamespaceEditsArgsClass(state[0],state[1],state[2],state[3]) user_data = args[0] conn = Connector(instance='slave') to_string = DataLoader().cast_elems_to_string to_csv_str = DataLoader().format_comma_separated_list # Format user condition user_cond = "rev_user in (" + to_csv_str(to_string(user_data)) + ")" # Format timestamp condition ts_cond = "rev_timestamp >= %s and rev_timestamp < %s" % (thread_args.date_start, thread_args.date_end) if thread_args.log: logging.info(__name__ + '::Computing namespace edits. (PID = %s)' % getpid()) logging.info(__name__ + '::From %s to %s. (PID = %s)' % ( str(thread_args.date_start), str(thread_args.date_end), getpid())) sql = """ SELECT r.rev_user, p.page_namespace, count(*) AS revs FROM %(project)s.revision AS r JOIN %(project)s.page AS p ON r.rev_page = p.page_id WHERE %(user_cond)s AND %(ts_cond)s GROUP BY 1,2 """ % { "user_cond" : user_cond, "ts_cond" : ts_cond, "project" : thread_args.project, } conn._cur_.execute(" ".join(sql.split('\n'))) # Tally counts of namespace edits results = dict() for user in user_data: results[str(user)] = OrderedDict() for ns in NamespaceEdits.VALID_NAMESPACES: results[str(user)][str(ns)] = 0 for row in conn._cur_: try: if row[1] in NamespaceEdits.VALID_NAMESPACES: results[str(row[0])][str(row[1])] = int(row[2]) except KeyError: logging.error(__name__ + "::Could not process row: %s" % str(row)) pass except IndexError: logging.error(__name__ + "::Could not process row: %s" % str(row)) pass del conn return [(user, results[user]) for user in results]
def iframe_enter(self, element): """ :user : 切入框架 :param element: 传入要切入的框架,name or id """ logging.info(f'切入框架{element}') try: self.driver.switch_to.frame(element) except BaseException: logging.error(f'切入框架失败,元素是{element}') raise
def my_element(self, element): """ :user : 定位器 :param element: 调用函数时传入element 实例('xpath','元素'),元组类型 """ logging.info("定位器,元素是:{}".format(element)) try: return self.driver.find_element(*element) except BaseException: id = random.randint(10000, 99999) logging.error('定位失败,定位的元素是:{},图片id是:{}'.format(element, id)) raise
async def findlargemp4fileffmpeg(starttime, endtime): #print("begin findlargemp4fileffmpeg") mp4list = [] client = AsyncIOMotorClient(ServerParameters.mongodbpath) db = client.jt808 bucket = AsyncIOMotorGridFSBucket(db, "eventuploadvideos") cursor = bucket.find({ "uploadDate": { '$gt': starttime, '$lte': endtime }, "filename": { "$regex": ".mp4$" } }) filelist = await cursor.to_list(100000) ccount = 0 for fi in filelist: if fi["length"] > 1000000: print(fi) if os.path.exists(fi["filename"]): os.remove(fi["filename"]) ds = await bucket.open_download_stream(fi["_id"]) f = open("input" + fi["filename"], 'wb') bbb = await ds.read() f.write(bbb) f.close() ds.close() converttstoh264("input" + fi["filename"], fi["filename"]) if os.path.exists("input" + fi["filename"]): os.remove("input" + fi["filename"]) # 保存到bucket try: if os.path.exists(fi["filename"]): uf = open(fi["filename"], "rb") ubbb = uf.read() uf.close() os.remove(fi["filename"]) bucket.delete(fi["_id"]) uds = bucket.open_upload_stream_with_id( fi["_id"], fi["filename"]) await uds.write(ubbb) uds.close() ccount = ccount + 1 logging.info("convert %s %s", fi["_id"], fi["filename"]) except BaseException as e: logging.error(e) logging.info("end findlargemp4fileffmpeg total %s convert %s", len(filelist), ccount) return
def screenshot(self, id, data): """ :user : 截图 :param id: 传入一个id作为图片的id号,int类型 or str类型 :param data: 传入一个描述,str类型,示例:定位失败 """ logging.info('调用截图函数,进行截图') now_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) user = "******".format(id) + "{}".format(data) + \ now_time.replace(" ", '-').replace(":", '-') self.driver.get_screenshot_as_file(printscreen_path + "/{}.png".format(user))
def __init__(self, device_name, class_room): if not device_name + '+' + class_room in TCP_CONNECTION.keys(): logging.info(TCP_CONNECTION.keys()) raise DeviceNotConnectException result = WifiDevice.select() \ .where(WifiDevice.class_number == class_room).execute() self.devices = [] for device in result: self.devices.append(device.device_number) self.device_name = device_name self.class_room = class_room self.send_data = {"device_name": device_name, "class": class_room}
async def run_schedule(self, schedule, loop): logging.debug('run_schedule: calling controller.stop_all()') self.controller.stop_all() #This is currently unnecessary with the processes cancelling in the processes method. Which is better? for i, row in enumerate(schedule): logging.info('running {} at {}'.format(row['name'], row['start_time'])) starting_time = row['start_time']-config.STARTING_WARNING start_time = row['start_time'] warning_time = row['start_time'] + row['talk_length'] - config.TALK_WARNING questions_time = row['start_time'] + row['talk_length'] q_warning_time = row['start_time'] + row['talk_length'] + row['question_length'] - config.QUESTION_WARNING end_time = row['start_time'] + row['talk_length'] + row['question_length'] if seconds_until(starting_time) > 0: logging.debug('nothing until {}'.format(start_time)) self.controller.stop_all() self.controller.start([light_controls.stop, self.screen.stop(row['name'], row['title'], start_time)]) await asyncio.sleep(seconds_until(starting_time), loop=loop) if seconds_until(start_time) > 0: #before talk start logging.debug('start until {}'.format(start_time)) self.controller.stop_all() self.controller.start([light_controls.starting, self.screen.starting(row['name'], row['title'], start_time)]) await asyncio.sleep(seconds_until(start_time), loop=loop) if seconds_until(warning_time) > 0: #before talk warning logging.debug('speaking until {}'.format(warning_time)) self.controller.stop_all() self.controller.start([light_controls.speaking, self.screen.speaking(row['name'], row['title'], questions_time)]) await asyncio.sleep(seconds_until(warning_time), loop=loop) if seconds_until(questions_time) > 0: #before question time logging.debug('speaking warning until {}'.format(questions_time)) self.controller.stop_all() self.controller.start([light_controls.speaking_warning, self.screen.speaking_warning(row['name'], row['title'], questions_time)]) await asyncio.sleep(seconds_until(questions_time), loop=loop) if seconds_until(q_warning_time) > 0: #before question warning logging.debug('questions until {}'.format(q_warning_time)) self.controller.stop_all() self.controller.start([light_controls.questions, self.screen.questions(row['name'], row['title'], end_time)]) await asyncio.sleep(seconds_until(q_warning_time), loop=loop) if seconds_until(end_time) > 0: #before end of talk logging.debug('questions warning until {}'.format(end_time)) self.controller.stop_all() self.controller.start([light_controls.questions_warning, self.screen.questions_warning(row['name'], row['title'], end_time)]) await asyncio.sleep(seconds_until(end_time), loop=loop) logging.debug('end of talk. stopping all coroutines') self.controller.stop_all() logging.debug('empty schedule. Disco!') self.controller.stop_all() self.controller.start([light_controls.empty_schedule, self.screen.empty_schedule()]) await asyncio.sleep(60*60*6, loop=loop) self.controller.stop_all()
def process(self, user_handle, **kwargs): self.apply_default_kwargs(kwargs,'process') if not hasattr(user_handle, '__iter__'): user_handle = [user_handle] # ensure the handles are iterable k = int(kwargs['num_threads']) log = bool(kwargs['log']) if log: logging.info(__name__ + "::parameters = " + str(kwargs)) # Multiprocessing vs. single processing execution args = [self._project_, log, self._start_ts_, self._end_ts_] self._results = mpw.build_thread_pool(user_handle,_process_help,k,args) return self
def daemon_action(self, coin, reindex=0): try: cmd = f"{self.installed_folder}/{coin.daemon} -datadir={self.wallet_directory}" if reindex == 1: cmd += " -reindex" result = self.connection.run(cmd, hide=False) logging.info( f"Executed {result.command} on {result.connection.host}, got stdout:\n{result.stdout}" ) return result.stdout except UnexpectedExit as e: logging.warning(f"{coin.daemon} exited unexpectedly", exc_info=e) return '{"status":"restart"}' except Exception as e: logging.error(f"Could not do action on daemon at {self.getIP()}")
def _process_help(args): """ Used by Threshold::process() for forking. Should not be called externally. """ state = args[1] thread_args = RevertRateArgsClass(state[0],state[1],state[2], state[3],state[4],state[5],state[6]) user_data = args[0] if thread_args.log_progress: logging.info(__name__ + '::Computing reverts on %s users in thread %s.' % (len(user_data), str(os.getpid()))) results_agg = list() for user in user_data: conn = dl.Connector(instance='slave') conn._cur_.execute( revert_rate_user_revs_query(thread_args.project, user, thread_args.date_start, thread_args.date_end) ) total_revisions = 0.0 total_reverts = 0.0 revisions = [rev for rev in conn._cur_] del conn results_thread = mpw.build_thread_pool(revisions, _revision_proc, thread_args.rev_threads, state) for r in results_thread: total_revisions += r[0] total_reverts += r[1] if not total_revisions: results_agg.append([user, 0.0, total_revisions]) else: results_agg.append([user, total_reverts / total_revisions, total_revisions]) if thread_args.log_progress: logging.info(__name__ + '::PID %s complete.' % (str(os.getpid()))) return results_agg
def read_schedule(fileName): logging.info('Reading new schedule') talks = [] with open(fileName, 'r') as csvfile: reader = csv.DictReader(csvfile, delimiter=',') for row in reader: if (row['room_code'] == config.ROOM_CODE): #if in this room #field parsing row['start_time'] = datetime.datetime.strptime(row['start_time'], date_format) row['talk_length'] = datetime.timedelta(minutes=int(row['talk_length'])) row['question_length'] = datetime.timedelta(minutes=int(row['question_length'])) print(row['start_time']+row['talk_length']+row['question_length']) if row['start_time']+row['talk_length']+row['question_length'] > datetime.datetime.today():#if not finished talks.append(row) talks = sorted(talks, key=lambda x: x['start_time']) if len(talks) == 0: logging.info('Empty schedule') return talks
def _process_help(args): # Unpack args state = args[1] thread_args = LiveAccountArgsClass(state[0],state[1],state[2],state[3], state[4],state[5]) user_data = args[0] conn = Connector(instance='slave') # Log progress if thread_args.log: logging.info(__name__ + '::Computing live account. (PID = %s)' % getpid()) logging.info(__name__ + '::From %s to %s. (PID = %s)' % ( str(thread_args.date_start), str(thread_args.date_end), getpid())) # Extract edit button click from edit_page_tracking table (namespace, # article title, timestamp) of first click and registration timestamps # (join on logging table) # # Query will return: (user id, time of registration, time of first # edit button click) la_query = live_account_query(user_data, thread_args.namespace, thread_args.project) conn._cur_.execute(la_query) # Iterate over results to determine boolean indicating whether # account is "live" results = { long(user) : -1 for user in user_data} for row in conn._cur_: try: diff = (date_parse(row[2]) - date_parse(row[1])).total_seconds() diff /= 60 # get the difference in minutes except Exception: continue if diff <= thread_args.t: results[row[0]] = 1 else: results[row[0]] = 0 return [(str(key), results[key]) for key in results]
def get_users(cohort_expr): """ get users from cohort """ if search(COHORT_REGEX, cohort_expr): logging.info(__name__ + '::Processing cohort by expression.') users = [user for user in parse_cohorts(cohort_expr)] else: logging.info(__name__ + '::Processing cohort by tag name.') conn = dl.Connector(instance='slave') try: conn._cur_.execute('select utm_id from usertags_meta ' 'WHERE utm_name = "%s"' % str(cohort_expr)) res = conn._cur_.fetchone()[0] conn._cur_.execute('select ut_user from usertags ' 'WHERE ut_tag = "%s"' % res) except IndexError: redirect(url_for('cohorts')) users = [r[0] for r in conn._cur_] del conn return users
def process(self, user_handle, **kwargs): """ Setup metrics gathering using multiprocessing """ self.apply_default_kwargs(kwargs,'process') k = kwargs['num_threads'] log_progress = bool(kwargs['log_progress']) log_frequency = int(kwargs['log_frequency']) if user_handle: if not hasattr(user_handle, '__iter__'): user_handle = [user_handle] # build the argument lists for each thread if not user_handle: sql = bytes_added_rev_user_query(self._start_ts_, self._end_ts_) if log_progress: logging.info( __name__ + '::Getting all distinct users: " %s "' % sql) user_handle = [str(row[0]) for row in self._data_source_.execute_SQL(sql)] if log_progress: logging.info( __name__ + '::Retrieved %s users.' % len(user_handle)) # get revisions args = [log_progress, self._start_ts_, self._end_ts_, self._project_, self._namespace_] revs = mpw.build_thread_pool(user_handle,_get_revisions,k,args) # Start worker threads and aggregate results for bytes added args = [log_progress, log_frequency, self._project_] self._results = agg.list_sum_by_group( mpw.build_thread_pool(revs,_process_help,k,args),0) # Add any missing users - O(n) tallied_users = set([str(r[0]) for r in self._results]) for user in user_handle: if not tallied_users.__contains__(str(user)): # Add a row indicating no activity for that user self._results.append([user,0,0,0,0,0]) return self
def process_metrics(p, rm): """ Worker process for requests - this will typically operate in a forked process """ conn = dl.Connector(instance='slave') logging.info(__name__ + '::START JOB %s (PID = %s)' % (str(rm), os.getpid())) # obtain user list - handle the case where a lone user ID is passed if search(MW_UID_REGEX, str(rm.cohort_expr)): users = [rm.cohort_expr] # Special case where user lists are to be generated based on registered # user reg dates from the logging table -- see src/metrics/users.py elif rm.cohort_expr == 'all': users = MediaWikiUser(query_type=1) else: users = get_users(rm.cohort_expr) # unpack RequestMeta into dict using MEDIATOR args = { attr.metric_var : getattr(rm, attr.query_var) for attr in QUERY_PARAMS_BY_METRIC[rm.metric] } logging.info(__name__ + '::Calling %s with args = %s.' % (rm.metric, str(args))) # process request results = mm.process_data_request(rm.metric, users, **args) p.put(jsonify(results)) del conn logging.info(__name__ + '::END JOB %s (PID = %s)' % (str(rm), os.getpid()))
def _process_help(args): """ Used by Threshold::process() for forking. Should not be called externally. """ ThresholdArgsClass = collections.namedtuple('ThresholdArgs', 'project namespace n t log_progress survival restrict ts_start ts_end') user_data = args[0] state = args[1] thread_args = ThresholdArgsClass(state[0],state[1],state[2], state[3],state[4],state[5],state[6],state[7],state[8]) if thread_args.log_progress: logging.info(__name__ + ' :: Processing revision data ' + \ '(%s users) by user... (PID = %s)' % (len(user_data), os.getpid())) logging.info(__name__ + ' :: ' + str(thread_args)) # only proceed if there is user data if not len(user_data): return [] conn = um.dl.Connector(instance='slave') results = list() dropped_users = 0 for r in user_data: try: threshold_ts = um.UserMetric._get_timestamp(um.date_parse(r[1]) + timedelta(hours=thread_args.t)) uid = long(r[0]) rev_query = threshold_rev_query(uid, thread_args.survival, thread_args.namespace, thread_args.project, thread_args.restrict, thread_args.ts_start, thread_args.ts_start, threshold_ts) conn._cur_.execute(rev_query) count = int(conn._cur_.fetchone()[0]) except IndexError: dropped_users += 1 continue except ValueError: dropped_users += 1 continue if count < thread_args.n: results.append((r[0],0)) else: results.append((r[0],1)) if thread_args.log_progress: logging.info( __name__ + '::Processed PID = %s. Dropped users = %s.' % ( os.getpid(), str(dropped_users))) return results
def _process_help(args): """ Determine the bytes added over a number of revisions for user(s). The parameter *user_handle* can be either a string or an integer or a list of these types. When the *user_handle* type is integer it is interpreted as a user id, and as a user_name for string input. If a list of users is passed to the *process* method then a dict object with edit rates keyed by user handles is returned. The flow of the request is as follows: #. Get all revisions for the specified users in the given timeframe #. For each parent revision get its length #. Compute the difference in length between each revision and its parent #. Record edit count, raw bytes added (with sign and absolute), amount of positive bytes added, amount of negative bytes added - Parameters: - **user_handle** - String or Integer (optionally lists). Value or list of values representing user handle(s). - Return: - Dictionary. key(string): user handle, value(Float): edit counts """ BytesAddedArgsClass = collections.namedtuple('BytesAddedArgs', 'is_log freq project') revs = args[0] state = args[1] thread_args = BytesAddedArgsClass(state[0],state[1],state[2]) conn = um.dl.Connector(instance='slave') bytes_added = dict() # Get the difference for each revision length from the parent # to compute bytes added row_count = 1 missed_records = 0 total_rows = len(revs) if thread_args.is_log: logging.info( __name__ + '::Processing revision data ' '(%s rows) by user... (PID = %s)' % ( total_rows, os.getpid())) for row in revs: try: user = str(row[0]) rev_len_total = int(row[1]) parent_rev_id = row[2] except IndexError: missed_records += 1 continue except TypeError: missed_records += 1 continue # Produce the revision length of the parent. In case of a new # article, parent_rev_id = 0, no record in the db if parent_rev_id == 0: parent_rev_len = 0 else: sql = bytes_added_rev_len_query(parent_rev_id, thread_args.project) try: parent_rev_len = conn.execute_SQL(sql)[0][0] except IndexError: missed_records += 1 continue except TypeError: missed_records += 1 continue except ProgrammingError: raise um.UserMetric.UserMetricError(message=str(BytesAdded) + '::Could not produce rev diff for %s on rev_id %s.' % ( user, str(parent_rev_id))) # Update the bytes added hash - ignore revision if either rev length # is undetermined try: bytes_added_bit = int(rev_len_total) - int(parent_rev_len) except TypeError: missed_records += 1 continue try: # Exception where the user does not exist. Handle this by # creating the key bytes_added[user][0] += bytes_added_bit except KeyError: bytes_added[user] = [0] * 5 bytes_added[user][0] += bytes_added_bit pass bytes_added[user][1] += abs(bytes_added_bit) if bytes_added_bit > 0: bytes_added[user][2] += bytes_added_bit else: bytes_added[user][3] += bytes_added_bit bytes_added[user][4] += 1 if thread_args.freq and row_count % thread_args.freq == 0 and \ thread_args.is_log: logging.info( __name__ + '::Processed %s of %s records. (PID = %s)' % ( row_count, total_rows, os.getpid())) row_count += 1 results = [[user] + bytes_added[user] for user in bytes_added] if thread_args.is_log: logging.info( __name__ + '::Processed %s out of %s records. (PID = %s)' % ( total_rows - missed_records,total_rows, os.getpid())) return results
def build_time_series(start, end, interval, metric, aggregator, cohort, **kwargs): """ Builds a timeseries dataset for a given metric. Parameters: - **start**: str or datetime. date + time indicating start of time series - **end**: str or datetime. date + time indicating end of time series - **interval**: int. integer value in hours that defines the amount of time between data-points - **metric**: class object. Metrics class (derived from UserMetric) - **aggregator**: method. Aggregator method used to aggregate data for time series data points - **cohort**: list(str). list of user IDs e.g. >>> cohort = ['156171','13234584'] >>> metric = ba.BytesAdded >>> aggregator = agg.list_sum_indices >>> build_time_series('20120101000000', '20120112000000', 24, metric, aggregator, cohort, num_threads=4, num_threads_metric=2, log=True) """ log = bool(kwargs['log']) if 'log' in kwargs else False # Get datetime types, and the number of threads start = date_parse(um.UserMetric._get_timestamp(start)) end = date_parse(um.UserMetric._get_timestamp(end)) k = kwargs['num_threads'] if 'num_threads' in kwargs else 1 # Compute window size and ensure that all the conditions # necessary to generate a proper time series are met num_intervals = int((end - start).total_seconds() / (3600 * interval)) intervals_per_thread = num_intervals / k # Compose the sets of time series lists f = lambda t,i: t + datetime.timedelta( hours = intervals_per_thread * interval * i) time_series = [_get_timeseries(f(start, i), f(start, i+1), interval) for i in xrange(k)] if f(start, k) < end: time_series.append( _get_timeseries(f(start, k), end, interval)) data = list() q = Queue() processes = list() if log: logging.info( 'Spawning procs, %s - %s, interval = %s, threads = %s ... ' % ( str(start), str(end), interval, k)) for i in xrange(len(time_series)): p = Process( target=time_series_worker, args=( time_series[i], metric, aggregator, cohort, kwargs, q)) p.start() processes.append(p) while 1: # sleep before checking worker threads time.sleep(PROCESS_SLEEP_TIME) if log: logging.info('Process queue, %s threads.' % str(len(processes))) while not q.empty(): data.extend(q.get()) for p in processes: if not p.is_alive(): p.terminate() processes.remove(p) # exit if all process have finished if not len(processes): break # sort return sorted(data, key=operator.itemgetter(0), reverse=False)
def output(cohort, metric): """ View corresponding to a data request - All of the setup and execution for a request happens here. """ global global_id url = request.url.split(request.url_root)[1] # Check for refresh flag - drop from url refresh = True if 'refresh' in request.args else False if refresh: url = sub(REFRESH_REGEX,'',url) # Get the refresh date of the cohort try: cid = get_cohort_id(cohort) cohort_refresh_ts = get_cohort_refresh_datetime(cid) except Exception: cohort_refresh_ts = None logging.error(__name__ + '::Could not retrieve refresh ' 'time of cohort.') # Build a request. Populate with request parameters from query args. rm = RequestMetaFactory(cohort, cohort_refresh_ts, metric) for param in REQUEST_META_QUERY_STR: if param in request.args and hasattr(rm, param): if not request.args[param]: # Assign a value indicating presence of a query var setattr(rm, param, DEFAULT_QUERY_VAL) else: setattr(rm, param, request.args[param]) # Process defaults for request parameters try: process_request_params(rm) except MetricsAPIError as e: return redirect(url_for('cohorts') + '?error=' + e.message) # Determine if the request maps to an existing response. If so return it. # Otherwise compute. data = get_data(rm, pkl_data) if data and not refresh: return data else: # Ensure that the job for this url is not already running is_pending_job = False for p in processQ: if not cmp(rm, p.request) and p.status[0] == 'pending': is_pending_job = True if not is_pending_job: # Queue the job q = mp.Queue() p = mp.Process(target=process_metrics, args=(q, rm)) p.start() global_id += 1 logging.info(__name__ + '::Appending request %s to the queue...' % rm) processQ.append(QStructClass(global_id,p,rm,url,q,['pending'])) return render_template('processing.html', url_str=str(rm)) else: return redirect(url_for('job_queue') + '?error=0')
@version: ''' import pandas as pd import numpy as np from sklearn.preprocessing import OneHotEncoder import xgboost as xgb from sklearn.linear_model import LogisticRegression from config import logging import utils import config class LoadData: def __init__(self,aim = 'train'): self.aim = aim def load_data(self,): logging.info('开始加载订单数据!') df_order_data = utils.generate_data(column_names=config.order_data_names,\ aim='train',\ table_name='order_data' ) logging.info('订单数据加载完毕!') logging.info('开始加载poi数据!') df_order_data = utils.generate_data(column_names=config.poi_data_names,\ aim='train',\ table_name='poi_data' ) logging.info('poi数据加载完毕!') logging.info('开始加载道路拥堵数据!') df_traffic_data = utils.generate_data(column_names=config.traffic_data_names,\ aim='train',\ table_name='traffic_data') logging.info('道路拥堵数据加载完成!') logging.info('开始加载天气数据!')
def on_created(self, event): logging.info('FileChangeHandler.on_created: File creation detected') self.process(event.src_path)
def on_modified(self, event): logging.info('FileChangeHandler.on_modified: File change detected') self.process(event.src_path)
def process(self, user_handle, **kwargs): """ Process method for the "blocks" metric. Computes a list of block and ban events for users. Parameters: - **user_handle** - List. List of user IDs. - **is_id** - Boolean. Defaults to False. Return: - UserMetric::Blocks (self). """ self.apply_default_kwargs(kwargs,'process') rowValues = {} log = bool(kwargs['log_progress']) if not hasattr(user_handle, '__iter__'): user_handle = [user_handle] # ensure the handles are iterable users = um.dl.DataLoader().cast_elems_to_string(user_handle) for i in xrange(len(users)): rowValues[users[i]] = {'block_count' : 0, 'block_first' : -1, 'block_last' : -1, 'ban' : -1} cursor = self._data_source_._cur_ user_dict = dict() # Get usernames for user ids to detect in block events users = um.dl.DataLoader().cast_elems_to_string(users) user_str = um.dl.DataLoader().format_comma_separated_list(users) cursor.execute('select user_id, user_name from enwiki.user where user_id in (%s)' % user_str) for r in cursor: user_dict[r[1]] = r[0] # keys username on userid user_handle_str = um.dl.DataLoader().format_comma_separated_list(user_dict.keys()) # Get blocks from the logging table if log: logging.info(__name__ + '::Processing blocks for %s users.' % len(user_handle)) sql = """ SELECT log_title as user, IF(log_params LIKE "%%indefinite%%", "ban", "block") as type, count(*) as count, min(log_timestamp) as first, max(log_timestamp) as last FROM %(wiki)s.logging WHERE log_type = "block" AND log_action = "block" AND log_title in (%(user_str)s) AND log_timestamp >= "%(timestamp)s" GROUP BY 1, 2 """ % { 'user_str' : user_handle_str, 'timestamp': self._start_ts_, 'user_cond': user_handle_str, 'wiki' : self._project_ } sql = " ".join(sql.strip().split()) cursor.execute(sql) # Process rows - extract block and ban events for row in cursor: userid = str(user_dict[row[0]]) type = row[1] count = row[2] first = row[3] last = row[4] if type == "block": rowValues[userid]['block_count'] = count rowValues[userid]['block_first'] = first rowValues[userid]['block_last'] = last elif type == "ban": rowValues[userid][type] = first self._results = [[user, rowValues.get(user)['block_count'], rowValues.get(user)['block_first'], rowValues.get(user)['block_last'], rowValues.get(user)['ban']] for user in rowValues.keys()] return self
def process_data_request(metric_handle, users, **kwargs): # create shorthand method refs to_string = dl.DataLoader().cast_elems_to_string aggregator = kwargs['aggregator'] if 'aggregator' in kwargs else None agg_key = get_agg_key(aggregator, metric_handle) if aggregator else None # Initialize the results results = OrderedDict() metric_class = metric_dict[metric_handle] metric_obj = metric_class(**kwargs) start = metric_obj.date_start end = metric_obj.date_end results['header'] = " ".join(metric_obj.header()) for key in metric_obj.__dict__: if re.search(r'_.*_', key): results[str(key[1:-1])] = str(metric_obj.__dict__[key]) results['metric'] = OrderedDict() # Parse the aggregator aggregator_func = None if agg_key in aggregator_dict.keys(): aggregator_func = aggregator_dict[agg_key] # Parse the time series flag time_series = True if 'time_series' in kwargs and kwargs['time_series'] \ else False if aggregator_func: if time_series: # interval length in hours interval = int(kwargs['interval']) total_intervals = (date_parse(end) - date_parse(start)).total_seconds() / (3600 * interval) time_threads = max(1,int(total_intervals / INTERVALS_PER_THREAD)) time_threads = min(MAX_THREADS, time_threads) logging.info('Metrics Manager: Initiating time series for ' '%(metric)s with %(agg)s from ' '%(start)s to %(end)s.' % { 'metric' : metric_class.__name__, 'agg' : aggregator_func.__name__, 'start' : str(start), 'end' : str(end), }) out = tspm.build_time_series(start, end, interval, metric_class, aggregator_func, users, num_threads=time_threads, metric_threads='{"num_threads" : %(user_threads)s, ' '"rev_threads" : %(rev_threads)s}' % { 'user_threads' : USER_THREADS, 'rev_threads': REVISION_THREADS}, log=True) count = 1 for row in out: results['metric'][count] = " ".join( to_string([row[0][:10] + 'T' + row[0][11:13]] + row[3:])) count += 1 else: logging.info('Metrics Manager: Initiating aggregator for ' '%(metric)s with %(agg)s from ' '%(start)s to %(end)s.' % { 'metric' : metric_class.__name__, 'agg' : aggregator_func.__name__, 'start' : str(start), 'end' : str(end), }) metric_obj.process(users, num_threads=USER_THREADS, rev_threads=REVISION_THREADS, **kwargs) r = um.aggregator(aggregator_func, metric_obj, metric_obj.header()) results['metric'][r.data[0]] = " ".join(to_string(r.data[1:])) results['header'] = " ".join(to_string(r.header)) else: logging.info('Metrics Manager: Initiating user data for ' '%(metric)s from %(start)s to %(end)s.' % { 'metric' : metric_class.__name__, 'start' : str(start), 'end' : str(end), }) metric_obj.process(users, num_threads=USER_THREADS, rev_threads=REVISION_THREADS, log_progress=True, **kwargs) for m in metric_obj.__iter__(): results['metric'][m[0]] = " ".join(to_string(m[1:])) return results