Beispiel #1
0
 def mouse_hover(self, elm):
     """
     :user : 模拟鼠标悬停
     :param element: 调用参数时传需要定位的元素
     """
     logging.info(f"模拟鼠标悬停在{elm}上")
     ActionChains(self.driver).move_to_element(elm).perform()
Beispiel #2
0
 def test_login(self):
     logging.info("正常登录测试")
     LaunchPage(self.driver).click_login()
     data = read_yaml('test_app_data.yaml', 'test_login')
     LoginPage(self.driver).login_action(data['phone'], data['pwd'])
     self.wait()
     self.assertIsNotNone(MainPage(self.driver).check_contants_ico())
Beispiel #3
0
 def snapshot(self, prefix, title):
     title = title.replace('/', '').replace(':', '')
     file_path = os.path.join(basedir, 'snapshots',
                              "{}_{}.png".format(prefix, title))
     logging.info("截图, 保存路径: {}".format(file_path))
     self.driver.get_screenshot_as_file(file_path)
     self.wait(0.5)
Beispiel #4
0
def _get_revisions(args):

    MethodArgsClass = collections.namedtuple('MethodArg',
        'log start end project namespace')
    users = args[0]
    state = args[1]
    arg_obj = MethodArgsClass(state[0], state[1], state[2], state[3], state[4])
    conn = um.dl.Connector(instance='slave')

    if arg_obj.log: logging.info('Computing revisions, PID = %s' % os.getpid())

    rev_query = bytes_added_rev_query(arg_obj.start, arg_obj.end, users, arg_obj.namespace,
                          arg_obj.project)

    if arg_obj.log:
        logging.info(__name__ +
                     '::Querying revisions for %(count)s users '
                     '(project = %(project)s, '
                     'namespace = %(namespace)s)... ' % {
                  'count' : len(users),
                  'project' : arg_obj.project,
                  'namespace' : arg_obj.namespace}
        )
    try:
        return list(conn.execute_SQL(rev_query))
    except ProgrammingError:
       raise um.UserMetric.UserMetricError(
           message=str(BytesAdded) + '::Could not get revisions '
                                     'for specified users(s) - Query Failed.')
Beispiel #5
0
 def try_find(self, element_loc):
     """尝试定位, 处理偶现元素, 或判断元素是否存在"""
     logging.info("尝试定位元素: {}".format(element_loc))
     try:
         return self.driver.find_element(*element_loc)
     except NoSuchElementException:
         logging.warning("元素未出现: {}".format(element_loc))
Beispiel #6
0
    def async_cli(self, action, coin):
        '''
        eventually offer async_cli functions

        :param action:
        :param coin:
        :return:
        '''
        try:
            cmd = f"{self.installed_folder}/{coin.cli} -datadir={self.wallet_directory} {action}"
            logging.info(
                f"Attempting to execute command from masternode object: {cmd}")
            '''
            need to have a threadpool and throw this in there and await the result
            '''
            result = self.connection.run(cmd, hide=False)
            logging.info(
                f"Executed {result.command} on {result.connection.host}, got stdout:\n{result.stdout}"
            )
            return result.stdout
        except UnexpectedExit as e:
            #possibly try to start  the daemon again
            logging.warning(f"{coin.cli} exited unexpectedly", exc_info=e)
            return '{"status":"restart"}'
        except Exception as e:
            logging.error(f"Could not do action on daemon at {self.getIP()}")
def time_series_worker(time_series, metric, aggregator, cohort, kwargs, q):
    """ worker thread which computes time series data for a set of points """
    log = bool(kwargs['log']) if 'log' in kwargs else False

    data = list()
    ts_s = time_series.next()
    new_kwargs = deepcopy(kwargs)

    # re-map some keyword args relating to thread counts
    if 'metric_threads' in new_kwargs:
        d = json.loads(new_kwargs['metric_threads'])
        for key in d: new_kwargs[key] = d[key]
        del new_kwargs['metric_threads']

    while 1:
        try: ts_e = time_series.next()
        except StopIteration: break

        if log: logging.info(__name__ +
                             ' :: Processing thread %s, %s - %s ...' % (
            os.getpid(), str(ts_s), str(ts_e)))

        metric_obj = metric(date_start=ts_s,date_end=ts_e,**new_kwargs).\
            process(cohort, **new_kwargs)

        r = um.aggregator(aggregator, metric_obj, metric.header())

        if log: logging.info(__name__ +
                             ' :: Processing complete %s, %s - %s ...' % (
                                 os.getpid(), str(ts_s), str(ts_e)))

        data.append([str(ts_s), str(ts_e)] + r.data)
        ts_s = ts_e
    q.put(data) # add the data to the queue
Beispiel #8
0
def get_t_result(li):
    ret_text = ""
    for t in li:
        isbreak = False
        t.join()
        #if t.get_result(): ret_text += t.get_result() + '\r\n'
        count = 0
        #logging.info(t.get_result())
        while t.get_result() == None:
            #logging.info(t.get_result())
            count += 1
            logging.info(t.cmd + ": 执行失败,重新执行。")
            #logging.info(t.ip)
            #logging.info(t.cmd + ": 执行失败,重新执行。")
            t.run()
            if t.get_result() != None:
                ret_text += t.info + " " + '重复推送 %d次 成功!\r\n' %count
                isbreak = True
                break
            if count == 2: break
        if t.get_result() == None:
            logging.info(t.info + " " + ': 执行失败!\r\n')
            ret_text += t.info + " " + ': 执行失败!\r\n'
        else:
            if not isbreak:
                ret_text += t.info + t.get_result() + " " + '\r\n'
    return ret_text
Beispiel #9
0
    def start_polisd(request):

        if request.method == 'POST':
            mns = request.form.getlist('mns')
            actions = request.form.getlist('params')

            result = 'Attempted starting: ' + ', '.join(mns)
            for idx in mns:
                vps = VPS(config['masternodes'][int(idx)],
                          Polis(config['Polis']))
                result = vps.daemon_action(Polis(config["Polis"]))
                logging.info(f"Restarted {vps.getIP()} got: {result}")

            return f"Result of polisd {actions}: {result} <br><a href=/mns/cli/masternodes/status></a>"
        else:
            #diisplay list of all MNs with "start" button
            mnlist = "<form method='POST'>\n<select name=mns multiple>\n"
            idx = 0

            for masternode in config["masternodes"]:
                mnlist += f"\t<option value='{str(idx)}'>{masternode['connection_string']}</option>\n"
                idx += 1

            mnlist += "</select>\n"
            return mnlist + "<p><input type=submit value=start></form>"
Beispiel #10
0
    def cron_read(request, mnidx):
        coin = Polis(config["Polis"])
        vps = VPS(config["masternodes"][mnidx], coin)
        result = {"result": vps.actions("view_crontab", coin).splitlines()}
        logging.info(f"Crontab requested got:\n{result}")

        return json.dumps(result)
Beispiel #11
0
def upgrade(request, mnidx):
    coin = Polis(config["Polis"])
    vps = VPS(config["masternodes"][mnidx], coin)

    logging.info(f"vps.upgrade called ! for: {vps.getIP()}")
    result = vps.upgrade(coin)
    return result
Beispiel #12
0
    async def handle_start(self):
        # Check if user is already registered
        if self.is_invited:
            return 'You are already registered with me. \
Use /stop to deregister'

        # Check deeplinked start param for invite key
        invite_key = self.text[len('/start '):]
        if not invite_key:
            return 'Sorry, you must have a valid invite code to interact'

        # TODO: Call main app to validate invite key
        # For now a static invite key
        if invite_key != 'alethea':
            return 'Invalid invite code'

        # Register user
        username = self.user['username']
        id = self.user['id']
        try:
            await self.db.register_user(
                first_name=self.user.get('first_name', '-'),
                last_name=self.user.get('last_name', '-'),
                username=username,
                id=id,
                role='user')
            logging.info(f'{id} has registered')
        except Exception as e:
            logging.error(e)
            return 'Failed to register you. Try again.'
        return f'Welcome, {username}'
Beispiel #13
0
def add_sign():
    """ 邮件末尾签名
    """
    logging.info("")
    logging.info("")
    logging.info("")
    logging.info("---------------------------------------------------------")
    logging.info("愿天下有情人, 我喜欢的人, 都是你这个样")
Beispiel #14
0
def add_head():
    """ 为邮件添加头部信息
    """
    logging.info("From: heraldstudio < *****@*****.** >")
    logging.info("Subject: 小猴偷米日志记录")
    logging.info("")
    logging.info("邮件为自动生成, 请勿回复")
    logging.info("")
Beispiel #15
0
async def loop():
    while True:
        n=datetime.datetime.utcnow()+datetime.timedelta(0,-6000,0)
        logging.info("起始时间-结束时间 %s %s",n,datetime.datetime.utcnow())
        await findallhdmp4(n)
        print("over2")
        await asyncio.sleep(5)
    return
Beispiel #16
0
 def my_sleep(self, s):
     """
         :user : 强制等待
         :type : int
         :param s: 调用该方法时传入一个等待的时间,单位秒
     """
     logging.info("强制等待{}秒".format(s))
     sleep(s)
Beispiel #17
0
    def create_logging_header(self):
        """
        向logging文件中记录综合信息

        """
        logging.info("日期: %s, 总访问量: %d" % (self.date, len(self.log_list)))

        logging.info("")
Beispiel #18
0
 async def handle_stop(self):
     id = self.user['id']
     try:
         await self.db.deregister_user(id)
         logging.info(f'{id} has deregistered')
     except Exception as e:
         logging.error(e)
     return 'Goodbye'
Beispiel #19
0
 def select_pull(self, elm, text):
     """
     :user : 通过Select类处理选择下拉框
     :param text: 调用该函数时传入要选择的数据text文本,str 类型
     :param elm: 调用该函数时传入下拉框的element, str 类型
     """
     logging.info(f"在{elm}选择框选择:{text}")
     Select(self.driver.find_element("xpath",
                                     elm)).select_by_visible_text(text)
Beispiel #20
0
def infer(model_path, model_fname, sample):
    # load model
    with open(os.path.join(model_path, model_fname), 'rb') as model_in:
        model = pickle.load(model_in)
    logging.info(f"{model_fname} loaded for inference")

    pred_proba = model.predict_proba(sample)
    logging.info(f"prediction complete")
    return pred_proba
Beispiel #21
0
def job_queue():
    """ View for listing current jobs working """

    error = get_errors(request.args)

    def error_class(em):
        return {
            'failure': 'error',
            'pending': 'warning',
        	'success': 'success'
        	}.get(em, '') 

    p_list = list()
    p_list.append(Markup('<thead><tr><th>is_alive</th><th>PID</th><th>url'
                         '</th><th>status</th></tr></thead>\n<tbody>\n'))
    for p in processQ:
        try:

            # Pull data off of the queue and add it to the queue data
            while not p.queue.empty():
                if not queue_data.has_key(p.id):
                    queue_data[p.id] = json.loads(p.queue.get().data)
                else:
                    for k,v in queue_data[p.id]:
                        if hasattr(v,'__iter__'): queue_data[p.id][k].extend(v)

            # once a process has finished working remove it and put its
            # contents into the cache
            if not p.process.is_alive() and p.status[0] == 'pending':
                q_response = make_response(jsonify(queue_data[p.id]))
                del queue_data[p.id]
                set_data(p.request, q_response, pkl_data)

                p.status[0] = 'success'
                logging.info(__name__ + '::Completed request %s.' % p.url)

        except Exception as e:
            p.status[0] = 'failure'
            logging.error(__name__ + "::Could not update request: %s.  "
                                     "Exception: %s" % (p.url, e.message) )

        # Log the status of the job
        response_url = "".join(['<a href="',
                                request.url_root, p.url + '">', p.url, '</a>'])
	
        p_list.append(Markup('<tr class="'+ error_class(p.status[0])+'"><td>'))
        p_list.append("</td><td>".join([str(p.process.is_alive()),
                                        str(p.process.pid),
                                  escape(Markup(response_url)), p.status[0]]))
        p_list.append(Markup('</td></tr>'))

    p_list.append(Markup('\n</tbody>'))

    if error:
        return render_template('queue.html', procs=p_list, error=error)
    else:
        return render_template('queue.html', procs=p_list)
Beispiel #22
0
 def find_all(self, element_loc):
     """定位一组元素"""
     logging.info("定位一组元素: {}".format(element_loc))
     try:
         return self.driver.find_elements(*element_loc)
     except NoSuchElementException:
         logging.error("定位一组元素失败: {}".format(element_loc))
         self.driver.save_snapshot("1.png")
         raise
Beispiel #23
0
 def js_del_element(self, element_loc, element):
     """
     :user : 使用js删除页面元素
     :param element_loc: 调用该函数时传入css selector 定位元素,str类型
     :param element: 调用函数时传入要删除的元素
     """
     logging.info(f"使用js移除{element}元素")
     self.driver.execute_script(
         f"document.querySelector({element_loc}).removeAttribute({element})"
     )
Beispiel #24
0
 def trade(self, title):
     """
     :user : 切换网页
     :param title: 调用该函数时传入一个网页的标题,str类型
     """
     logging.info(f"切换窗口,切到{title}窗口")
     all_win = self.driver.window_handles
     for x in all_win:
         if self.driver.title != title:
             self.driver.switch_to.window(x)
Beispiel #25
0
    def daemon_masternode_start(request, mn_idx, reindex):
        coin = Polis(config['Polis'])

        vps = VPS(config["masternodes"][mn_idx], coin)
        result = vps.kill_daemon(coin)
        time.sleep(10)
        logging.info(f"Killed daemon {result}")
        result = vps.daemon_action(coin, reindex)
        logging.info(f"Executed: polisd @ {mn_idx} returned: {result}")
        return result
Beispiel #26
0
 def find(self, element_loc):  # ('id', 'kw')
     """通过元素定位器定位元素"""
     logging.info("定位元素: {}".format(element_loc))
     try:
         return self.driver.find_element(*element_loc)
     except NoSuchElementException:
         logging.error("定位元素失败: {}".format(element_loc))
         self.snapshot("定位失败",
                       "_".join(element_loc))  # element_loc = ('id', 'kw')
         raise
Beispiel #27
0
def _process_help(args):

    state = args[1]
    thread_args = NamespaceEditsArgsClass(state[0],state[1],state[2],state[3])
    user_data = args[0]
    conn = Connector(instance='slave')

    to_string = DataLoader().cast_elems_to_string
    to_csv_str = DataLoader().format_comma_separated_list

    # Format user condition
    user_cond = "rev_user in (" + to_csv_str(to_string(user_data)) + ")"

    # Format timestamp condition
    ts_cond = "rev_timestamp >= %s and rev_timestamp < %s" % (thread_args.date_start, thread_args.date_end)

    if thread_args.log:
        logging.info(__name__ + '::Computing namespace edits. (PID = %s)' % getpid())
        logging.info(__name__ + '::From %s to %s. (PID = %s)' % (
            str(thread_args.date_start), str(thread_args.date_end), getpid()))
    sql = """
            SELECT
                r.rev_user,
                p.page_namespace,
                count(*) AS revs
            FROM %(project)s.revision AS r JOIN %(project)s.page AS p
                ON r.rev_page = p.page_id
            WHERE %(user_cond)s AND %(ts_cond)s
            GROUP BY 1,2
        """ % {
        "user_cond" : user_cond,
        "ts_cond" : ts_cond,
        "project" : thread_args.project,
    }
    conn._cur_.execute(" ".join(sql.split('\n')))

    # Tally counts of namespace edits
    results = dict()

    for user in user_data:
        results[str(user)] = OrderedDict()
        for ns in NamespaceEdits.VALID_NAMESPACES: results[str(user)][str(ns)] = 0
    for row in conn._cur_:
        try:
            if row[1] in NamespaceEdits.VALID_NAMESPACES:
                results[str(row[0])][str(row[1])] = int(row[2])
        except KeyError:
            logging.error(__name__ + "::Could not process row: %s" % str(row))
            pass
        except IndexError:
            logging.error(__name__ + "::Could not process row: %s" % str(row))
            pass

    del conn
    return [(user, results[user]) for user in results]
Beispiel #28
0
 def iframe_enter(self, element):
     """
     :user : 切入框架
     :param element: 传入要切入的框架,name or id
     """
     logging.info(f'切入框架{element}')
     try:
         self.driver.switch_to.frame(element)
     except BaseException:
         logging.error(f'切入框架失败,元素是{element}')
         raise
Beispiel #29
0
 def my_element(self, element):
     """
     :user : 定位器
     :param element: 调用函数时传入element 实例('xpath','元素'),元组类型
     """
     logging.info("定位器,元素是:{}".format(element))
     try:
         return self.driver.find_element(*element)
     except BaseException:
         id = random.randint(10000, 99999)
         logging.error('定位失败,定位的元素是:{},图片id是:{}'.format(element, id))
         raise
Beispiel #30
0
async def findlargemp4fileffmpeg(starttime, endtime):
    #print("begin findlargemp4fileffmpeg")
    mp4list = []
    client = AsyncIOMotorClient(ServerParameters.mongodbpath)
    db = client.jt808

    bucket = AsyncIOMotorGridFSBucket(db, "eventuploadvideos")
    cursor = bucket.find({
        "uploadDate": {
            '$gt': starttime,
            '$lte': endtime
        },
        "filename": {
            "$regex": ".mp4$"
        }
    })
    filelist = await cursor.to_list(100000)

    ccount = 0
    for fi in filelist:
        if fi["length"] > 1000000:
            print(fi)
            if os.path.exists(fi["filename"]):
                os.remove(fi["filename"])
            ds = await bucket.open_download_stream(fi["_id"])
            f = open("input" + fi["filename"], 'wb')
            bbb = await ds.read()
            f.write(bbb)
            f.close()
            ds.close()
            converttstoh264("input" + fi["filename"], fi["filename"])
            if os.path.exists("input" + fi["filename"]):
                os.remove("input" + fi["filename"])
            # 保存到bucket
            try:
                if os.path.exists(fi["filename"]):
                    uf = open(fi["filename"], "rb")
                    ubbb = uf.read()
                    uf.close()
                    os.remove(fi["filename"])
                    bucket.delete(fi["_id"])
                    uds = bucket.open_upload_stream_with_id(
                        fi["_id"], fi["filename"])
                    await uds.write(ubbb)
                    uds.close()
                    ccount = ccount + 1
                    logging.info("convert %s %s", fi["_id"], fi["filename"])
            except BaseException as e:
                logging.error(e)
    logging.info("end findlargemp4fileffmpeg total %s convert %s",
                 len(filelist), ccount)
    return
Beispiel #31
0
    def screenshot(self, id, data):
        """
        :user : 截图
        :param id: 传入一个id作为图片的id号,int类型 or str类型
        :param data: 传入一个描述,str类型,示例:定位失败
        """
        logging.info('调用截图函数,进行截图')
        now_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        user = "******".format(id) + "{}".format(data) + \
            now_time.replace(" ", '-').replace(":", '-')

        self.driver.get_screenshot_as_file(printscreen_path +
                                           "/{}.png".format(user))
Beispiel #32
0
    def __init__(self, device_name, class_room):
        if not device_name + '+' + class_room in TCP_CONNECTION.keys():
            logging.info(TCP_CONNECTION.keys())
            raise DeviceNotConnectException

        result = WifiDevice.select() \
            .where(WifiDevice.class_number == class_room).execute()
        self.devices = []
        for device in result:
            self.devices.append(device.device_number)
        self.device_name = device_name
        self.class_room = class_room
        self.send_data = {"device_name": device_name, "class": class_room}
    async def run_schedule(self, schedule, loop):
        logging.debug('run_schedule: calling controller.stop_all()')
        self.controller.stop_all() #This is currently unnecessary with the processes cancelling in the processes method. Which is better?
        for i, row in enumerate(schedule):
            logging.info('running {} at {}'.format(row['name'], row['start_time']))
            starting_time =  row['start_time']-config.STARTING_WARNING
            start_time =     row['start_time']
            warning_time =   row['start_time'] + row['talk_length'] - config.TALK_WARNING
            questions_time = row['start_time'] + row['talk_length']
            q_warning_time = row['start_time'] + row['talk_length'] + row['question_length'] - config.QUESTION_WARNING
            end_time =       row['start_time'] + row['talk_length'] + row['question_length']
            
            if seconds_until(starting_time) > 0:
                logging.debug('nothing until {}'.format(start_time))
                self.controller.stop_all()
                self.controller.start([light_controls.stop, self.screen.stop(row['name'], row['title'], start_time)])
                await asyncio.sleep(seconds_until(starting_time), loop=loop)
            if seconds_until(start_time) > 0: #before talk start
                logging.debug('start until {}'.format(start_time))
                self.controller.stop_all()
                self.controller.start([light_controls.starting, self.screen.starting(row['name'], row['title'], start_time)])
                await asyncio.sleep(seconds_until(start_time), loop=loop)
            if seconds_until(warning_time) > 0: #before talk warning
                logging.debug('speaking until {}'.format(warning_time))
                self.controller.stop_all()
                self.controller.start([light_controls.speaking, self.screen.speaking(row['name'], row['title'], questions_time)])
                await asyncio.sleep(seconds_until(warning_time), loop=loop)
            if seconds_until(questions_time) > 0: #before question time
                logging.debug('speaking warning until {}'.format(questions_time))
                self.controller.stop_all()
                self.controller.start([light_controls.speaking_warning, self.screen.speaking_warning(row['name'], row['title'], questions_time)])
                await asyncio.sleep(seconds_until(questions_time), loop=loop)
            if seconds_until(q_warning_time) > 0: #before question warning
                logging.debug('questions until {}'.format(q_warning_time))
                self.controller.stop_all()
                self.controller.start([light_controls.questions, self.screen.questions(row['name'], row['title'], end_time)])
                await asyncio.sleep(seconds_until(q_warning_time), loop=loop)
            if seconds_until(end_time) > 0: #before end of talk
                logging.debug('questions warning until {}'.format(end_time))
                self.controller.stop_all()
                self.controller.start([light_controls.questions_warning, self.screen.questions_warning(row['name'], row['title'], end_time)])
                await asyncio.sleep(seconds_until(end_time), loop=loop)
            logging.debug('end of talk. stopping all coroutines')
            self.controller.stop_all()

        logging.debug('empty schedule. Disco!')
        self.controller.stop_all()
        self.controller.start([light_controls.empty_schedule, self.screen.empty_schedule()])
        await asyncio.sleep(60*60*6, loop=loop)
        self.controller.stop_all()
Beispiel #34
0
    def process(self, user_handle, **kwargs):

        self.apply_default_kwargs(kwargs,'process')

        if not hasattr(user_handle, '__iter__'): user_handle = [user_handle] # ensure the handles are iterable
        k = int(kwargs['num_threads'])
        log = bool(kwargs['log'])

        if log: logging.info(__name__ + "::parameters = " + str(kwargs))

        # Multiprocessing vs. single processing execution
        args = [self._project_, log, self._start_ts_, self._end_ts_]
        self._results = mpw.build_thread_pool(user_handle,_process_help,k,args)

        return self
Beispiel #35
0
 def daemon_action(self, coin, reindex=0):
     try:
         cmd = f"{self.installed_folder}/{coin.daemon} -datadir={self.wallet_directory}"
         if reindex == 1:
             cmd += " -reindex"
         result = self.connection.run(cmd, hide=False)
         logging.info(
             f"Executed {result.command} on {result.connection.host}, got stdout:\n{result.stdout}"
         )
         return result.stdout
     except UnexpectedExit as e:
         logging.warning(f"{coin.daemon} exited unexpectedly", exc_info=e)
         return '{"status":"restart"}'
     except Exception as e:
         logging.error(f"Could not do action on daemon at {self.getIP()}")
Beispiel #36
0
def _process_help(args):
    """ Used by Threshold::process() for forking.
        Should not be called externally. """

    state = args[1]
    thread_args = RevertRateArgsClass(state[0],state[1],state[2],
                                      state[3],state[4],state[5],state[6])
    user_data = args[0]

    if thread_args.log_progress:
        logging.info(__name__ +
                    '::Computing reverts on %s users in thread %s.'
                    % (len(user_data), str(os.getpid())))
    results_agg = list()
    for user in user_data:
        conn = dl.Connector(instance='slave')
        conn._cur_.execute(
            revert_rate_user_revs_query(thread_args.project, user,
                thread_args.date_start,
                thread_args.date_end)
        )

        total_revisions = 0.0
        total_reverts = 0.0

        revisions = [rev for rev in conn._cur_]
        del conn

        results_thread = mpw.build_thread_pool(revisions, _revision_proc,
                                               thread_args.rev_threads, state)

        for r in results_thread:
            total_revisions += r[0]
            total_reverts += r[1]

        if not total_revisions:
            results_agg.append([user, 0.0, total_revisions])
        else:
            results_agg.append([user, total_reverts / total_revisions,
                                total_revisions])

    if thread_args.log_progress: logging.info(__name__ +
                                              '::PID %s complete.' %
                                              (str(os.getpid())))
    return results_agg
def read_schedule(fileName):
    logging.info('Reading new schedule')
    talks = []
    with open(fileName, 'r') as csvfile:
        reader = csv.DictReader(csvfile, delimiter=',')
        for row in reader:
            if (row['room_code'] == config.ROOM_CODE): #if in this room
                #field parsing
                row['start_time'] = datetime.datetime.strptime(row['start_time'], date_format)
                row['talk_length'] = datetime.timedelta(minutes=int(row['talk_length']))
                row['question_length'] = datetime.timedelta(minutes=int(row['question_length']))
                print(row['start_time']+row['talk_length']+row['question_length'])
                if row['start_time']+row['talk_length']+row['question_length'] > datetime.datetime.today():#if not finished
                    talks.append(row)
    talks = sorted(talks, key=lambda x: x['start_time'])
    if len(talks) == 0:
        logging.info('Empty schedule')
    return talks
Beispiel #38
0
def _process_help(args):

    # Unpack args
    state = args[1]
    thread_args = LiveAccountArgsClass(state[0],state[1],state[2],state[3],
                                        state[4],state[5])
    user_data = args[0]
    conn = Connector(instance='slave')

    # Log progress
    if thread_args.log:
        logging.info(__name__ + '::Computing live account. (PID = %s)' %
                                getpid())
        logging.info(__name__ + '::From %s to %s. (PID = %s)' % (
            str(thread_args.date_start), str(thread_args.date_end), getpid()))

    # Extract edit button click from edit_page_tracking table (namespace,
    # article title, timestamp) of first click and registration timestamps
    # (join on logging table)
    #
    # Query will return: (user id, time of registration, time of first
    # edit button click)
    la_query = live_account_query(user_data, thread_args.namespace,
                                thread_args.project)
    conn._cur_.execute(la_query)

    # Iterate over results to determine boolean indicating whether
    # account is "live"
    results = { long(user) : -1 for user in user_data}
    for row in conn._cur_:
        try:
            diff = (date_parse(row[2]) - date_parse(row[1])).total_seconds()
            diff /= 60 # get the difference in minutes
        except Exception:
            continue

        if diff <= thread_args.t:
            results[row[0]] = 1
        else:
            results[row[0]] = 0

    return [(str(key), results[key]) for key in results]
Beispiel #39
0
def get_users(cohort_expr):
    """ get users from cohort """

    if search(COHORT_REGEX, cohort_expr):
        logging.info(__name__ + '::Processing cohort by expression.')
        users = [user for user in parse_cohorts(cohort_expr)]
    else:
        logging.info(__name__ + '::Processing cohort by tag name.')
        conn = dl.Connector(instance='slave')
        try:
            conn._cur_.execute('select utm_id from usertags_meta '
                               'WHERE utm_name = "%s"' % str(cohort_expr))
            res = conn._cur_.fetchone()[0]
            conn._cur_.execute('select ut_user from usertags '
                               'WHERE ut_tag = "%s"' % res)
        except IndexError:
            redirect(url_for('cohorts'))
        users = [r[0] for r in conn._cur_]
        del conn
    return users
Beispiel #40
0
    def process(self, user_handle, **kwargs):
        """ Setup metrics gathering using multiprocessing """

        self.apply_default_kwargs(kwargs,'process')

        k = kwargs['num_threads']
        log_progress = bool(kwargs['log_progress'])
        log_frequency = int(kwargs['log_frequency'])

        if user_handle:
            if not hasattr(user_handle, '__iter__'):
                user_handle = [user_handle]
            # build the argument lists for each thread

        if not user_handle:
            sql = bytes_added_rev_user_query(self._start_ts_, self._end_ts_)

            if log_progress: logging.info(
                __name__ + '::Getting all distinct users: " %s "' % sql)
            user_handle = [str(row[0]) for row in
                           self._data_source_.execute_SQL(sql)]
            if log_progress: logging.info(
                __name__ + '::Retrieved %s users.' % len(user_handle))

        # get revisions
        args = [log_progress, self._start_ts_,
                self._end_ts_, self._project_, self._namespace_]
        revs = mpw.build_thread_pool(user_handle,_get_revisions,k,args)

        # Start worker threads and aggregate results for bytes added
        args = [log_progress, log_frequency, self._project_]
        self._results = agg.list_sum_by_group(
            mpw.build_thread_pool(revs,_process_help,k,args),0)

        # Add any missing users - O(n)
        tallied_users = set([str(r[0]) for r in self._results])
        for user in user_handle:
            if not tallied_users.__contains__(str(user)):
                # Add a row indicating no activity for that user
                self._results.append([user,0,0,0,0,0])
        return self
Beispiel #41
0
def process_metrics(p, rm):
    """ Worker process for requests -
        this will typically operate in a forked process """

    conn = dl.Connector(instance='slave')
    logging.info(__name__ + '::START JOB %s (PID = %s)' % (str(rm),
                                                           os.getpid()))

    # obtain user list - handle the case where a lone user ID is passed
    if search(MW_UID_REGEX, str(rm.cohort_expr)):
        users = [rm.cohort_expr]
    # Special case where user lists are to be generated based on registered
    # user reg dates from the logging table -- see src/metrics/users.py
    elif rm.cohort_expr == 'all':
        users = MediaWikiUser(query_type=1)
    else:
        users = get_users(rm.cohort_expr)

    # unpack RequestMeta into dict using MEDIATOR
    args = { attr.metric_var : getattr(rm, attr.query_var)
             for attr in QUERY_PARAMS_BY_METRIC[rm.metric] }
    logging.info(__name__ + '::Calling %s with args = %s.' % (rm.metric,
                                                              str(args)))

    # process request
    results = mm.process_data_request(rm.metric, users, **args)

    p.put(jsonify(results))
    del conn
    logging.info(__name__ + '::END JOB %s (PID = %s)' % (str(rm), os.getpid()))
Beispiel #42
0
def _process_help(args):
    """ Used by Threshold::process() for forking.
        Should not be called externally. """

    ThresholdArgsClass = collections.namedtuple('ThresholdArgs',
        'project namespace n t log_progress survival restrict ts_start ts_end')
    user_data = args[0]
    state = args[1]
    thread_args = ThresholdArgsClass(state[0],state[1],state[2],
        state[3],state[4],state[5],state[6],state[7],state[8])

    if thread_args.log_progress:
        logging.info(__name__ + ' :: Processing revision data ' + \
        '(%s users) by user... (PID = %s)' % (len(user_data), os.getpid()))
        logging.info(__name__ + ' :: ' + str(thread_args))

    # only proceed if there is user data
    if not len(user_data): return []

    conn = um.dl.Connector(instance='slave')
    results = list()
    dropped_users = 0
    for r in user_data:
        try:
            threshold_ts = um.UserMetric._get_timestamp(um.date_parse(r[1]) +
                                              timedelta(hours=thread_args.t))
            uid = long(r[0])
            rev_query = threshold_rev_query(uid,
                                            thread_args.survival,
                                            thread_args.namespace,
                                            thread_args.project,
                                            thread_args.restrict,
                                            thread_args.ts_start,
                                            thread_args.ts_start,
                                            threshold_ts)
            conn._cur_.execute(rev_query)
            count = int(conn._cur_.fetchone()[0])
        except IndexError:
            dropped_users += 1
            continue
        except ValueError:
            dropped_users += 1
            continue

        if count < thread_args.n:
            results.append((r[0],0))
        else:
            results.append((r[0],1))

    if thread_args.log_progress: logging.info(
        __name__ + '::Processed PID = %s.  Dropped users = %s.' % (
            os.getpid(), str(dropped_users)))

    return results
Beispiel #43
0
def _process_help(args):

    """
        Determine the bytes added over a number of revisions for user(s).  The
        parameter *user_handle* can be either a string or an integer or a list
        of these types.  When the *user_handle* type is integer it is
        interpreted as a user id, and as a user_name for string input.  If a
        list of users is passed to the *process* method then a dict object
        with edit rates keyed by user handles is returned.

        The flow of the request is as follows:

            #. Get all revisions for the specified users in the given
                timeframe
            #. For each parent revision get its length
            #. Compute the difference in length between each revision and its
                parent
            #. Record edit count, raw bytes added (with sign and absolute),
                amount of positive bytes added, amount of negative bytes added

        - Parameters:
            - **user_handle** - String or Integer (optionally lists).  Value
                or list of values representing user handle(s).
        - Return:
            - Dictionary. key(string): user handle, value(Float): edit counts
    """

    BytesAddedArgsClass = collections.namedtuple('BytesAddedArgs',
        'is_log freq project')
    revs = args[0]
    state = args[1]
    thread_args = BytesAddedArgsClass(state[0],state[1],state[2])

    conn = um.dl.Connector(instance='slave')
    bytes_added = dict()

    # Get the difference for each revision length from the parent
    # to compute bytes added
    row_count = 1
    missed_records = 0
    total_rows = len(revs)

    if thread_args.is_log:
        logging.info(
            __name__ + '::Processing revision data '
                       '(%s rows) by user... (PID = %s)' % (
                total_rows, os.getpid()))

    for row in revs:
        try:
            user = str(row[0])
            rev_len_total = int(row[1])
            parent_rev_id = row[2]

        except IndexError:
            missed_records += 1
            continue
        except TypeError:
            missed_records += 1
            continue

        # Produce the revision length of the parent.  In case of a new
        # article, parent_rev_id = 0, no record in the db
        if parent_rev_id == 0:
            parent_rev_len = 0
        else:
            sql = bytes_added_rev_len_query(parent_rev_id, thread_args.project)
            try:
                parent_rev_len = conn.execute_SQL(sql)[0][0]
            except IndexError:
                missed_records += 1
                continue
            except TypeError:
                missed_records += 1
                continue
            except ProgrammingError:
                raise um.UserMetric.UserMetricError(message=str(BytesAdded) +
                        '::Could not produce rev diff for %s on rev_id %s.' % (
                                user, str(parent_rev_id)))

        # Update the bytes added hash - ignore revision if either rev length
        # is undetermined
        try:
            bytes_added_bit = int(rev_len_total) - int(parent_rev_len)
        except TypeError:
            missed_records += 1
            continue

        try:
            # Exception where the user does not exist.  Handle this by
            # creating the key
            bytes_added[user][0] += bytes_added_bit
        except KeyError:
            bytes_added[user] = [0] * 5
            bytes_added[user][0] += bytes_added_bit
            pass

        bytes_added[user][1] += abs(bytes_added_bit)
        if bytes_added_bit > 0:
            bytes_added[user][2] += bytes_added_bit
        else:
            bytes_added[user][3] += bytes_added_bit
        bytes_added[user][4] += 1


        if thread_args.freq and row_count % thread_args.freq == 0 and \
           thread_args.is_log:
            logging.info(
                __name__ + '::Processed %s of %s records. (PID = %s)' % (
                    row_count, total_rows, os.getpid()))

        row_count += 1

    results = [[user] + bytes_added[user] for user in bytes_added]
    if thread_args.is_log:
        logging.info(
            __name__ + '::Processed %s out of %s records. (PID = %s)' % (
                total_rows - missed_records,total_rows, os.getpid()))

    return results
def build_time_series(start, end, interval, metric, aggregator, cohort,
                      **kwargs):
    """
        Builds a timeseries dataset for a given metric.

            Parameters:
                - **start**: str or datetime. date + time indicating start of
                    time series
                - **end**: str or datetime. date + time indicating end of
                    time series
                - **interval**: int. integer value in hours that defines the
                    amount of time between data-points
                - **metric**: class object. Metrics class (derived from
                    UserMetric)
                - **aggregator**: method. Aggregator method used to
                    aggregate data for time series data points
                - **cohort**: list(str). list of user IDs
        e.g.

        >>> cohort = ['156171','13234584']
        >>> metric = ba.BytesAdded
        >>> aggregator = agg.list_sum_indices

        >>> build_time_series('20120101000000', '20120112000000', 24, metric,
                aggregator, cohort,
            num_threads=4, num_threads_metric=2, log=True)

    """

    log = bool(kwargs['log']) if 'log' in kwargs else False

    # Get datetime types, and the number of threads
    start = date_parse(um.UserMetric._get_timestamp(start))
    end = date_parse(um.UserMetric._get_timestamp(end))
    k = kwargs['num_threads'] if 'num_threads' in kwargs else 1

    # Compute window size and ensure that all the conditions
    # necessary to generate a proper time series are met
    num_intervals = int((end - start).total_seconds() / (3600 * interval))
    intervals_per_thread = num_intervals / k

    # Compose the sets of time series lists
    f = lambda t,i:  t + datetime.timedelta(
        hours = intervals_per_thread * interval * i)
    time_series = [_get_timeseries(f(start, i),
        f(start, i+1), interval) for i in xrange(k)]
    if f(start, k) <  end: time_series.append(
        _get_timeseries(f(start, k), end, interval))

    data = list()
    q = Queue()
    processes = list()

    if log: logging.info(
        'Spawning procs, %s - %s, interval = %s, threads = %s ... ' % (
        str(start), str(end), interval, k))
    for i in xrange(len(time_series)):
        p = Process(
            target=time_series_worker, args=(
                time_series[i], metric, aggregator, cohort, kwargs, q))
        p.start()
        processes.append(p)

    while 1:
        # sleep before checking worker threads
        time.sleep(PROCESS_SLEEP_TIME)

        if log:
            logging.info('Process queue, %s threads.' % str(len(processes)))

        while not q.empty():
            data.extend(q.get())
        for p in processes:
            if not p.is_alive():
                p.terminate()
                processes.remove(p)

        # exit if all process have finished
        if not len(processes):
            break

    # sort
    return sorted(data, key=operator.itemgetter(0), reverse=False)
Beispiel #45
0
def output(cohort, metric):
    """ View corresponding to a data request -
        All of the setup and execution for a request happens here. """

    global global_id
    url = request.url.split(request.url_root)[1]

    # Check for refresh flag - drop from url
    refresh = True if 'refresh' in request.args else False
    if refresh:
        url = sub(REFRESH_REGEX,'',url)

    # Get the refresh date of the cohort
    try:
        cid = get_cohort_id(cohort)
        cohort_refresh_ts = get_cohort_refresh_datetime(cid)
    except Exception:
        cohort_refresh_ts = None
        logging.error(__name__ + '::Could not retrieve refresh '
                                 'time of cohort.')

    # Build a request. Populate with request parameters from query args.
    rm = RequestMetaFactory(cohort, cohort_refresh_ts, metric)
    for param in REQUEST_META_QUERY_STR:
        if param in request.args and hasattr(rm, param):
            if not request.args[param]:
                # Assign a value indicating presence of a query var
                setattr(rm, param, DEFAULT_QUERY_VAL)
            else:
                setattr(rm, param, request.args[param])

    # Process defaults for request parameters
    try:
        process_request_params(rm)
    except MetricsAPIError as e:
        return redirect(url_for('cohorts') + '?error=' + e.message)

    # Determine if the request maps to an existing response.  If so return it.
    # Otherwise compute.
    data = get_data(rm, pkl_data)
    if data and not refresh:
        return data
    else:

        # Ensure that the job for this url is not already running
        is_pending_job = False
        for p in processQ:
            if not cmp(rm, p.request) and p.status[0] == 'pending':
                is_pending_job = True

        if not is_pending_job: # Queue the job

            q = mp.Queue()
            p = mp.Process(target=process_metrics, args=(q, rm))
            p.start()

            global_id += 1

            logging.info(__name__ + '::Appending request %s to the queue...'
                % rm)
            processQ.append(QStructClass(global_id,p,rm,url,q,['pending']))

            return render_template('processing.html', url_str=str(rm))
        else:
            return redirect(url_for('job_queue') + '?error=0')
Beispiel #46
0
@version: 
'''
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
import xgboost as xgb
from sklearn.linear_model import LogisticRegression
from config import logging
import utils
import config

class LoadData:
    def __init__(self,aim = 'train'):
        self.aim = aim
    def load_data(self,):
logging.info('开始加载订单数据!')
df_order_data = utils.generate_data(column_names=config.order_data_names,\
                    aim='train',\
                    table_name='order_data' )
logging.info('订单数据加载完毕!')
logging.info('开始加载poi数据!')
df_order_data = utils.generate_data(column_names=config.poi_data_names,\
                    aim='train',\
                    table_name='poi_data' )
logging.info('poi数据加载完毕!')
logging.info('开始加载道路拥堵数据!')
df_traffic_data = utils.generate_data(column_names=config.traffic_data_names,\
                    aim='train',\
                    table_name='traffic_data')
logging.info('道路拥堵数据加载完成!')
logging.info('开始加载天气数据!')
 def on_created(self, event):
     logging.info('FileChangeHandler.on_created: File creation detected')
     self.process(event.src_path)
 def on_modified(self, event):
     logging.info('FileChangeHandler.on_modified: File change detected')
     self.process(event.src_path)
Beispiel #49
0
    def process(self, user_handle, **kwargs):
        """
            Process method for the "blocks" metric.  Computes a list of block and ban events for users.

            Parameters:
                - **user_handle** - List.  List of user IDs.
                - **is_id** - Boolean.  Defaults to False.

            Return:
                - UserMetric::Blocks (self).

        """

        self.apply_default_kwargs(kwargs,'process')
        rowValues = {}

        log = bool(kwargs['log_progress'])

        if not hasattr(user_handle, '__iter__'): user_handle = [user_handle] # ensure the handles are iterable
        users = um.dl.DataLoader().cast_elems_to_string(user_handle)

        for i in xrange(len(users)):
            rowValues[users[i]] = {'block_count' : 0, 'block_first' : -1, 'block_last' : -1, 'ban' : -1}

        cursor = self._data_source_._cur_
        user_dict = dict()

        # Get usernames for user ids to detect in block events
        users = um.dl.DataLoader().cast_elems_to_string(users)
        user_str = um.dl.DataLoader().format_comma_separated_list(users)
        cursor.execute('select user_id, user_name from enwiki.user where user_id in (%s)' % user_str)

        for r in cursor: user_dict[r[1]] = r[0] # keys username on userid
        user_handle_str = um.dl.DataLoader().format_comma_separated_list(user_dict.keys())

        # Get blocks from the logging table
        if log: logging.info(__name__ + '::Processing blocks for %s users.' % len(user_handle))
        sql = """
				SELECT
				    log_title as user,
					IF(log_params LIKE "%%indefinite%%", "ban", "block") as type,
					count(*) as count,
					min(log_timestamp) as first,
					max(log_timestamp) as last
				FROM %(wiki)s.logging
				WHERE log_type = "block"
				AND log_action = "block"
				AND log_title in (%(user_str)s)
				AND log_timestamp >= "%(timestamp)s"
				GROUP BY 1, 2
			""" % {
            'user_str' : user_handle_str,
            'timestamp': self._start_ts_,
            'user_cond': user_handle_str,
            'wiki' : self._project_
            }

        sql = " ".join(sql.strip().split())
        cursor.execute(sql)

        # Process rows - extract block and ban events
        for row in cursor:

            userid = str(user_dict[row[0]])
            type = row[1]
            count = row[2]
            first = row[3]
            last = row[4]

            if type == "block":
                rowValues[userid]['block_count'] = count
                rowValues[userid]['block_first'] = first
                rowValues[userid]['block_last'] = last

            elif type == "ban":
                rowValues[userid][type] = first

        self._results = [[user, rowValues.get(user)['block_count'], rowValues.get(user)['block_first'], rowValues.get(user)['block_last'], rowValues.get(user)['ban']] for user in rowValues.keys()]
        return self
Beispiel #50
0
def process_data_request(metric_handle, users, **kwargs):

    # create shorthand method refs
    to_string = dl.DataLoader().cast_elems_to_string

    aggregator = kwargs['aggregator'] if 'aggregator' in kwargs else None
    agg_key = get_agg_key(aggregator, metric_handle) if aggregator else None

    # Initialize the results
    results = OrderedDict()

    metric_class = metric_dict[metric_handle]
    metric_obj = metric_class(**kwargs)

    start = metric_obj.date_start
    end = metric_obj.date_end

    results['header'] = " ".join(metric_obj.header())
    for key in metric_obj.__dict__:
        if re.search(r'_.*_', key):
            results[str(key[1:-1])] = str(metric_obj.__dict__[key])
    results['metric'] = OrderedDict()

    # Parse the aggregator
    aggregator_func = None
    if agg_key in aggregator_dict.keys():
        aggregator_func = aggregator_dict[agg_key]

    # Parse the time series flag
    time_series = True if 'time_series' in kwargs and kwargs['time_series'] \
        else False

    if aggregator_func:
        if time_series:
            # interval length in hours
            interval = int(kwargs['interval'])
            total_intervals = (date_parse(end) -
                        date_parse(start)).total_seconds() / (3600 * interval)
            time_threads = max(1,int(total_intervals / INTERVALS_PER_THREAD))
            time_threads = min(MAX_THREADS, time_threads)

            logging.info('Metrics Manager: Initiating time series for '
                         '%(metric)s with %(agg)s from '
                         '%(start)s to %(end)s.' % {
                'metric' : metric_class.__name__,
                'agg' : aggregator_func.__name__,
                'start' : str(start),
                'end' : str(end),
            })
            out = tspm.build_time_series(start, end,
                interval, metric_class, aggregator_func, users,
                num_threads=time_threads,
                metric_threads='{"num_threads" : %(user_threads)s, '
                               '"rev_threads" : %(rev_threads)s}' %
                { 'user_threads' : USER_THREADS,
                  'rev_threads': REVISION_THREADS},
                log=True)

            count = 1
            for row in out:
                results['metric'][count] = " ".join(
                    to_string([row[0][:10] + 'T' + row[0][11:13]] + row[3:]))
                count += 1
        else:

            logging.info('Metrics Manager: Initiating aggregator for '
                         '%(metric)s with %(agg)s from '
                         '%(start)s to %(end)s.' % {
                'metric' : metric_class.__name__,
                'agg' : aggregator_func.__name__,
                'start' : str(start),
                'end' : str(end),
                })

            metric_obj.process(users, num_threads=USER_THREADS,
                rev_threads=REVISION_THREADS, **kwargs)
            r = um.aggregator(aggregator_func, metric_obj, metric_obj.header())
            results['metric'][r.data[0]] = " ".join(to_string(r.data[1:]))
            results['header'] = " ".join(to_string(r.header))
    else:

        logging.info('Metrics Manager: Initiating user data for '
                     '%(metric)s from %(start)s to %(end)s.' % {
            'metric' : metric_class.__name__,
            'start' : str(start),
            'end' : str(end),
            })
        metric_obj.process(users, num_threads=USER_THREADS,
            rev_threads=REVISION_THREADS, log_progress=True, **kwargs)
        for m in metric_obj.__iter__():
            results['metric'][m[0]] = " ".join(to_string(m[1:]))

    return results