Beispiel #1
0
    def check(self):
        """Check our monitoring target"""

        result = False

        result_log = "{url}: {result}"

        try:
            start = time()
            contents = self._fetch_contents()
            end = time()
            elapsed = "{0:.3f}".format(end - start)

            if self._check_contents(contents, self.settings):
                log.info(result_log.format(
                    url=self.url,
                    result="GOOD {0} ms response time".format(elapsed)
                ))

                result = elapsed
            else:
                log.warning(result_log.format(
                    url=self.url,
                    result="BAD, Content mismatch, {0} ms response time".format(
                        elapsed
                    )
                ))

        except IOError as e:
            log.error(result_log.format(
                url=self.url,
                result="BAD, Connection error: {0}".format(e)
            ))

        return result
Beispiel #2
0
    def start(self, args):
        """Start up all of the application components"""

        enable_log()

        # Make sure the results have some data in it before the web requests
        # come in
        self.results = App._init_results(self.config)

        if args.poll_seconds:
            self.config.override_monitor_setting(
                'poll_seconds',
                args.poll_seconds
            )

        if self.config.log_file:
            set_log_file(self.config.log_file)
            log.debug("Opening log file {}".format(self.config.log_file))

        self._start_monitoring()

        if self.config.http_port:
            log.info("Starting web frontend on port {}".format(
                self.config.http_port
            ))
            self._start_frontend()
Beispiel #3
0
    def consume_item(self, event):
        log.info('uploading to Dropbox: %s -> %s' %
                 (event.source_absolute, event.target_absolute))

        self.send_progress(event.source_absolute, 0.0)
        try:
            self._upload(event, event.target_absolute)
        except IOError as e:
            # file was deleted immediatily
            log.warning('upload failed' + str(e))
            self.send_progress(event.source_absolute, 1.0)
Beispiel #4
0
    def consume_item(self, event):
        log.info('uploading to GoogleDrive: %s -> %s' %
                 (event.source_absolute, event.target_absolute))

        self.send_progress(event.source_absolute, 0.0)
        # TODO handle dir/file removal
        try:
            if event.isdir:
                self._path_to_ids(event.target_absolute, create_missing=True)
            else:
                self._put_file(event.source_absolute, event.target_absolute)
        except IOError as e:
            # file was deleted immediatily?
            log.warning('upload failed' + str(e))
        finally:
            self.send_progress(event.source_absolute, 1.0)
Beispiel #5
0
    def _start_monitoring(self):
        """Start monitoring the URLs"""

        for url in self.config.monitors:
            log.info("Monitoring {} every {} seconds".format(
                url,
                self.config.monitors[url]["poll_seconds"]
            ))
            self.monitors.append({
                "monitor": Monitor(url, self.config.monitors[url]),
                "nextRun": time()
            })

        def _loop():
            """Monitoring loop"""

            while True:
                now = time()

                for item in self.monitors:
                    # Is it time to run this monitor yet?
                    if item["nextRun"] < now:
                        # Run it
                        monitor = item["monitor"]
                        url = monitor.get_url()
                        result = monitor.check()

                        # Update results
                        self.results[url] = result

                        # Update next run time
                        settings = monitor.get_settings()
                        item["nextRun"] = time() + float(
                            settings["poll_seconds"])

                # Wait a while to not eat all CPU
                sleep(1)

        self.thread = Thread(target=_loop)

        # For some reason this is needed for the main process to catch
        # CTRL+C properly
        self.thread.daemon = True

        self.thread.start()
Beispiel #6
0
def speedy_parameter_load(pdicts, params):
    global child_pids
    assert len(pdicts) > 4
    signal.signal(signal.SIGINT, signal_handler)
    for i in xrange(4):
        sample = pdicts[i*len(pdicts)/4 : (i+1)*len(pdicts)/4]
        cpid = os.fork()
        if cpid:
            child_pids.append(cpid)
        else:
            log.info('Child Process Launched')
            engine = create_engine(conn_string)
            Session = sessionmaker()
            Session.configure(bind=engine)
            session = Session()
            linear_parameter_load(sample, params, session)
            session.close()
            sys.exit(0)
    for i,cpid in enumerate(child_pids):
        os.waitpid(cpid, 0)
        log.info('Child %s Finished' % i)

    child_pids = []
 def end_callback():
     log.info('\n********** wwa end **********')
     export_data.main()
Beispiel #8
0
    def __parse_article_list(self, article_list, __biz, is_first_page=False):
        """
        @summary: 解析文章列表
        ---------
        @param article_list: 文章列表信息 str
        ---------
        @result: True / None (True: 继续向下抓取; None: 停止向下抓取)
        """

        # log.debug(tools.dumps_json(article_list))

        # 解析json内容里文章信息
        def parse_article_info(article_info, comm_msg_info):
            if not article_info:
                return

            # log.debug(tools.dumps_json(article_info))

            title = article_info.get("title")
            digest = article_info.get("digest")
            url = article_info.get("content_url").replace("\\", "").replace(
                "amp;", "")
            source_url = article_info.get("source_url").replace("\\",
                                                                "")  # 引用的文章链接
            cover = article_info.get("cover").replace("\\", "")
            subtype = article_info.get("subtype")
            is_multi = article_info.get("is_multi")
            author = article_info.get("author")
            copyright_stat = article_info.get("copyright_stat")
            duration = article_info.get("duration")
            del_flag = article_info.get("del_flag")
            type = comm_msg_info.get("type")
            publish_time = tools.timestamp_to_date(
                comm_msg_info.get("datetime"))
            sn = tools.get_param(url, "sn")

            if sn:
                # 缓存文章信息
                article_data = {
                    "title": title,
                    "digest": digest,
                    "url": url,
                    "source_url": source_url,
                    "cover": cover,
                    "subtype": subtype,
                    "is_multi": is_multi,
                    "author": author,
                    "copyright_stat": copyright_stat,
                    "duration": duration,
                    "del_flag": del_flag,
                    "type": type,
                    "publish_time": publish_time,
                    "sn": sn,
                    "__biz": __biz,
                    "spider_time": tools.get_current_date(),
                }

                return article_data

        # log.debug(tools.dumps_json(article_list))
        article_list = tools.get_json(article_list)

        article_list_data = []
        publish_time = None
        is_need_get_more = True
        article_list = article_list.get("list", [])
        is_first_article = True
        for article in article_list:
            comm_msg_info = article.get("comm_msg_info", {})

            publish_timestamp = comm_msg_info.get("datetime")
            publish_time = tools.timestamp_to_date(publish_timestamp)

            # 记录最新发布时间
            if is_first_page and is_first_article:
                self._task_manager.record_new_last_article_publish_time(
                    __biz, publish_time)
                is_first_article = False

                if publish_timestamp and self._task_manager.is_zombie_account(
                        publish_timestamp):  # 首页检测是否为最新发布的文章 若最近未发布 则为僵尸账号
                    log.info("公众号 {} 为僵尸账号 不再监控".format(__biz))
                    self._task_manager.sign_account_is_zombie(
                        __biz, publish_time)
                    is_need_get_more = False
                    break

            # 对比时间 若采集到上次时间,则跳出
            is_reach = self._task_manager.is_reach_last_article_publish_time(
                __biz, publish_time)
            if is_reach:
                log.info("采集到上次发布时间 公众号 {} 采集完成".format(__biz))
                new_last_publish_time = self._task_manager.get_new_last_article_publish_time(
                    __biz)
                self._task_manager.update_account_last_publish_time(
                    __biz, new_last_publish_time)
                is_need_get_more = False
                break

            elif is_reach is None:
                log.info(
                    "公众号 {} 为爬虫启动时的手点公众号。不遍历历史消息,即将抓取监控池里的公众号".format(__biz))
                return

            article_type = comm_msg_info.get("type")
            if article_type != 49:  # 49为常见的图文消息、其他消息有文本、语音、视频,此处不采集,格式不统一
                continue

            # 看是否在抓取时间范围
            publish_time_status = self._task_manager.is_in_crawl_time_range(
                publish_time)
            if publish_time_status == TaskManager.OVER_MIN_TIME_RANGE:
                log.info("公众号 {} 超过采集时间范围 采集完成".format(__biz))
                new_last_publish_time = self._task_manager.get_new_last_article_publish_time(
                    __biz)
                self._task_manager.update_account_last_publish_time(
                    __biz, new_last_publish_time)
                is_need_get_more = False
                break
            elif publish_time_status == TaskManager.NOT_REACH_TIME_RANGE:
                log.info("公众号 {} 当前采集到的时间 {} 未到采集时间范围 不采集".format(
                    __biz, publish_time))
                continue

            # 在时间范围

            # 微信公众号每次可以发多个图文消息
            # 第一个图文消息
            app_msg_ext_info = article.get("app_msg_ext_info", {})
            article_data = parse_article_info(app_msg_ext_info, comm_msg_info)
            if article_data:
                article_list_data.append(article_data)

            # 同一天附带的图文消息
            multi_app_msg_item_list = app_msg_ext_info.get(
                "multi_app_msg_item_list")
            for multi_app_msg_item in multi_app_msg_item_list:
                article_data = parse_article_info(multi_app_msg_item,
                                                  comm_msg_info)
                if article_data:
                    article_list_data.append(article_data)

        if article_list_data:
            data_pipeline.save_article_list(article_list_data)

        if is_need_get_more:
            return publish_time
 def end_callback():
     log.info('\n********** template end **********')
Beispiel #10
0
 def end_callback():
     log.info('\n********** spider_main end **********')
     task_status.is_doing = False
 def end_callback():
     export_data.main()
     log.info('\n********** VA_APP end **********')
Beispiel #12
0
    def get_database_data(self, host):
        log.info("从数据库获取接口信息")
        session = self.__db_engine.creat_session()

        # 获取该服务器下所有接口
        whichService = session.query(Server.id).filter(
            or_(Server.qaURL == host, Server.devURL == host),
            and_(Server.serverStatus == 1)).first()
        if whichService == [] or whichService is None:
            servername = ''
            for i in self.env.keys():
                if self.env.get(i) == host:
                    servername = i
                    break
            serverid = session.query(Server).filter(
                and_(Server.serverName == servername,
                     Server.serverStatus == 1)).first()
            if config.get('updateActionAPI') in ('DEV_API', 'DEV'):
                devurl = host
                qaurl = config.get('hosts').get('QA').get(servername)
            else:
                qaurl = host
                devurl = config.get('hosts').get('DEV').get(servername)
            if serverid == [] or serverid is None:
                addserver = Server(serverName=servername,
                                   swaggerURI='/v2/api-docs',
                                   qaURL=qaurl,
                                   devURL=devurl,
                                   serverStatus=1)
                session.add(addserver)
            else:
                log.info('数据库中已有相同的服务名:%s' % servername)
            session.commit()
            whichService = session.query(Server.id).filter(
                or_(Server.qaURL == host, Server.devURL == host)).first()
        whichService = whichService.id
        apis = session.query(Interface.id, Interface.apiPath,
                             Interface.apiDesc,
                             Interface.apiRequestMethod).filter(
                                 and_(Interface.apiServerId == whichService,
                                      Interface.apiStatus == 1)).all()
        api_result = {}

        # 获取接口中所有参数信息
        for api in apis:
            api_id = api[0]
            api_path = api[1]
            api_desc = api[2]
            api_method = api[3]

            # 组装要返回的api请求响应参数信息
            api_result.update({
                api_path: {
                    "desc": api_desc,
                    "method": api_method,
                    "inParameter": None,
                    "outParameter": None
                }
            })

            # 组装请求字段信息
            try:
                in_parameter = session.query(
                    InputParameters.inParameter).filter(
                        and_(InputParameters.apiId == api_id,
                             InputParameters.inParameterStatus == 1)).one()
                api_result.get(api_path).update(
                    {"inParameter": json.loads(in_parameter[0])})
            except orm.exc.NoResultFound:
                api_result.get(api_path).update({"inParameter": None})

            # 组装响应字段信息
            try:
                out_parameter = session.query(
                    ExtractParameters.extParameter).filter(
                        and_(ExtractParameters.apiId == api_id,
                             ExtractParameters.extParameterStatus == 1)).one()
                api_result.get(api_path).update(
                    {"outParameter": json.loads(out_parameter[0])})
            except orm.exc.NoResultFound:
                api_result.get(api_path).update({"outParameter": None})

        self.__db_engine.close_session()
        return api_result
Beispiel #13
0
def get_all_hot():
    '''
    @summary: 全网热点
    ---------
    @param :
    ---------
    @result:
    '''

    url = 'http://192.168.60.38:8001/hotspot_al/interface/getHotAnalysis?type=0'
    json = tools.get_json_by_requests(url, headers=HEADERS)
    datas = json['data']

    hot_count = 0
    # 相关新闻获取url
    root_url = 'http://192.168.60.38:8001/hotspot_al/interface/getHotRelateInfo?ids=%s'
    for data in datas:
        sql = 'select sequence.nextval, SEQ_IOPM_ARTICLE.nextval from dual'
        result = oracledb.find(sql)[0]
        hot_id = result[0]
        article_id = result[1]

        def export_callback(execute_type, sql, data_json):
            if execute_type != ExportData.EXCEPTION:
                infoIds = data['infoIds']
                url = root_url % infoIds
                json = tools.get_json_by_requests(url, headers=HEADERS)
                articles = json['data']

                # "EMOTION": 'vint_3',
                # "ACCOUNT": null,
                # "WEIGHT": 0,
                # "TITLE": "str_title",
                # "URL": "str_url",
                # "MAY_INVALID": ,
                # "CLUES_IDS": "",
                # "WEBSITE_NAME": "str_site",
                # "KEYWORDS_COUNT": 1,
                # "HOST": "str_site",
                # "INFO_TYPE": 'int_type',
                # "COMMENT_COUNT": null,
                # "HOT_ID": "vint_%d"%hot_id,
                # "REVIEW_COUNT": null,
                # "UUID": "73ec16038e074530ff109e3cfad2594c",
                # "ID": 'vint_%d'%article_id,
                # "IS_VIP": null,
                # "IMAGE_URL": 'str_picture',
                # "KEYWORDS": "str_keywords",
                # "KEYWORD_CLUES_ID": "{"中央电视台":"88758"}",
                # "RELEASE_TIME": "date_pubtime",
                # "AUTHOR": "江门日报",
                # "CONTENT": "clob_content",
                # "RECORD_TIME": 'vdate_%s'%tools.get_current_date(),
                # "UP_COUNT": 'vint_null'

                key_map = {
                    'id': 'int_dataId',
                    'content': 'clob_content',
                    'url': 'str_url',
                    'website_name': 'str_site',
                    'image_url': 'str_picture',
                    'release_time': 'date_pubtime',
                    'keywords': 'str_keywords',
                    'emotion': 'str_emotion',
                    'host': 'str_site',
                    'title': 'str_title',
                    'info_type': 'int_type',
                    'hot_id': "vint_%d" % hot_id,
                    'record_time': 'vdate_%s' % tools.get_current_date()
                }

                export_data.export_to_oracle(
                    key_map=key_map,
                    aim_table='TAB_IOPM_ARTICLE_INFO',
                    unique_key='url',
                    datas=articles,
                    unique_key_mapping_source_key={'url': 'str_url'},
                    sync_to_es=True)

        # 导出全国热点数据

        key_map = {
            'id': 'vint_%d' % hot_id,
            'title': 'str_kw',
            'hot': 'int_hot',
            'hot_type': 'vint_0',
            'record_time': 'vdate_%s' % tools.get_current_date()
        }
        # print(data['kw'])

        hot_count += export_data.export_to_oracle(
            key_map=key_map,
            aim_table='TAB_IOPM_HOT_INFO',
            unique_key='title',
            datas=data,
            callback=export_callback,
            sync_to_es=True)

    log.info('''
        共导出%d条全网热点
        ''' % (hot_count))
 def end_callback():
     # 更新关键词状态 做完
     log.info('\n********** live_app end **********')
 def begin_callback():
     log.info('\n********** live_app begin **********')
     db.delete('LiveApp_urls', {})
 def tearDown(self):
     # Runs after each test
     self.end = time()
     log.info('Elapsed time in seconds: %s',
              "%.5f" % (self.end - self.start))
     return super().tearDown()
 def tearDownClass(self):
     # Runs once before all tests
     log.info('Ending %s', self.__name__)
 def setUpClass(self):
     # Runs once before all tests
     log.info('Setting up %s', self.__name__)
     self.maxDiff = None
     # self.sortTestMethodsUsing = None
     self.client = app.test_client()
 def add_latest_action(self):
     for key, host in self.hosts.items():
         log.info(key)
         self.create_action(key, host)
Beispiel #20
0
 def handle_sync_progress(self, syncer, file, progress):
     log.info("%s: %s %s" % (syncer.name, progress, file))
     self.progress_callback(syncer, file, progress)
Beispiel #21
0
def initialize_database(path=MASTER_DOC):
    global models 

    CSVModel.clear()

    csv_docs = xls_parse_from_url(path)
    log.info('Downloaded %s' % path)
    model_instances = {}
    for k,doc in csv_docs.iteritems():
        if k in ['IDMap', 'AllScenarios']:
            continue
        try:
            csv_model = CSVModel(doc).create_model(k)
            models[csv_model.__name__] = csv_model
            model_instances[k] = csv_model.from_csv(doc)
            log.info("Parsed sheet %s" % k)
        except ArgumentError:
            log.exception("Couldn't load %s" % k)
            continue
        except TypeError:
            log.exception("Couldn't load %s" % k)
            continue
    # We want a late load so that the order is preserved and deterministic
    from model.refs.parameter_ref import ParameterRef

    log.info('Dropping view')
    drop_dp_view(engine)
    drop_view(engine)
    CSVModel.drop_all(engine)
    CSVModel.create_all(engine)
    log.info('Creating view')
    initialize_view(engine)
    initialize_dp_view(engine)

    for k,v in model_instances.iteritems():
        for inst in v:
            session.add(inst)
            try:
                session.commit()
            except Exception as e:
                session.rollback()
                from traceback import print_exc
                print_exc(e)
        log.info("Initialized %s" % k)
    log.info("Initializing Parameter References and Associations")
    pdicts = [(pdict.scenario, pdict.id, pdict.parameter_ids) for pdict in model_instances['ParameterDictionary']]
    log.info("Loaded ParameterDictionary into memory")
    params = {p.id : p.scenario for p in model_instances['ParameterDefs']}
    log.info("Loaded Parameters into Memory")
    if engine.name == 'postgresql':
        speedy_parameter_load(pdicts, params)
    else:
        linear_parameter_load(pdicts, params, session)
    def data_check(self):
        # 检查方法
        def api_info_checker(key: str, database_data):
            if key == self.api_key_map_method:
                tmp_str = "请求方法"
                tmp_method_new = self.new_api_method.get("cmp_result")
                tmp_method_del = self.del_api_method.get("cmp_result")
                tmp_method_update = self.update_api_method.get("cmp_result")
                old_name = "old_" + key
            elif key == self.api_key_map_inParameter:
                tmp_str = "入参"
                tmp_method_new = self.new_in_parameter.get("cmp_result")
                tmp_method_del = self.del_in_parameter.get("cmp_result")
                tmp_method_update = self.update_in_parameter.get("cmp_result")
                old_name = "old_" + key

            elif key == self.api_key_map_outParameter:
                tmp_str = "出参"
                tmp_method_new = self.new_ext_parameter.get("cmp_result")
                tmp_method_del = self.del_ext_parameter.get("cmp_result")
                tmp_method_update = self.update_ext_parameter.get("cmp_result")
                old_name = "old_" + key

            elif key == self.api_key_map_desc:
                tmp_str = "描述"
                tmp_method_new = self.new_api_desc.get("cmp_result")
                tmp_method_del = self.del_api_desc.get("cmp_result")
                tmp_method_update = self.update_api_desc.get("cmp_result")
                old_name = "old_" + key

            else:
                raise KeyError("传入参数错误")

            if self.swagger_data.get(check_api).get(
                    key) != self.database_data.get(check_api).get(key):
                # 数据被删除
                if self.swagger_data.get(check_api).get(key) is None:
                    log.info("接口%s %s被删除" % (check_api, tmp_str))
                    self.check_result.get("update_info").get(check_api).get("cmp_result"). \
                        append(tmp_method_del)
                    self.check_result.get("update_info").get(check_api).update(
                        {old_name: self.database_data.get(check_api).get(key)})
                # 数据新增
                elif self.swagger_data.get(check_api).get(key) is None:
                    log.info("接口%s 新增%s" % (check_api, tmp_str))
                    self.check_result.get("update_info").get(check_api).get("cmp_result"). \
                        append(tmp_method_new)
                    self.check_result.get("update_info").get(check_api).update(
                        {old_name: self.database_data.get(check_api).get(key)})

                # 数据修改
                else:
                    log.info("接口%s %s发生更新" % (check_api, tmp_str))
                    self.check_result.get("update_info").get(check_api).get("cmp_result"). \
                        append(tmp_method_update)
                    self.check_result.get("update_info").get(check_api).update(
                        {old_name: self.database_data.get(check_api).get(key)})

        log.info("检查数据库和服务器返回的数据")
        if self.database_data != self.swagger_data:

            # 以服务器返回的数据为准,进行全遍历,并将对比过的数据删除
            for check_api in list(self.swagger_data.keys()):
                # 检查接口是否在数据库中存在
                try:
                    if check_api in self.database_data.keys():

                        log.debug("检查接口%s是否有变更" % check_api)
                        if self.swagger_data.get(
                                check_api) != self.database_data.get(
                                    check_api):
                            # 接口发生变更,将变更数据记录到结果中
                            self.check_result.get("update_info").update(
                                {check_api: self.swagger_data.get(check_api)})
                            self.check_result.get("update_info").get(
                                check_api).update({"cmp_result": []})

                            # 检查接口描述
                            # api_info_checker(self.api_key_map_desc, self.database_data)
                            # 检查接口方法
                            api_info_checker(self.api_key_map_method,
                                             self.database_data)
                            # 检查接口入参
                            api_info_checker(self.api_key_map_inParameter,
                                             self.database_data)
                            # 检查接口出参
                            api_info_checker(self.api_key_map_outParameter,
                                             self.database_data)

                        # 检查完成后,将接口从数据中删除,方便下次遍历检查
                        self.swagger_data.__delitem__(check_api)
                        self.database_data.__delitem__(check_api)

                    else:
                        # 数据库中没有的接口,走新增数据流程
                        log.debug("发现有新增接口%s" % check_api)
                        self.check_result.get("new_info").update(
                            {check_api: self.swagger_data.get(check_api)})
                        self.check_result.get("new_info").get(
                            check_api).update(self.new_api)
                        self.swagger_data.__delitem__(check_api)
                except AttributeError:
                    log.error("数据库未配置swagger地址或地址错误")
                    exit(0)

            # 剩余database_data中是数据,是服务器删除的数据
            if 0 < len(self.database_data.items()):
                log.debug("发现有删除接口%s" %
                          ",".join(list(self.database_data.keys())))
                self.check_result.get("del_info").update(self.database_data)
Beispiel #23
0
 def updata_database(self, keys, host):
     """
     读取校验后的数据对数据库进行增删改
     :param dict keys:
     :param str host:
     :return:
     """
     log.info('执行数据更新接口信息')
     session = self.__db_engine.creat_session()
     serverid = session.query(Server.id).filter(
         or_(Server.qaURL == host, Server.devURL == host),
         and_(Server.serverStatus == 1)).first()
     for status in keys.keys():
         if status == 'new_info':
             nwe_info = keys.get(status)
             for api in nwe_info.keys():
                 apiparameter = nwe_info.get(api)
                 desc = str(apiparameter.get('desc'))
                 method = str(apiparameter.get('method'))
                 apiId = session.query(Interface.id).filter(
                     and_(Interface.apiPath == api,
                          Interface.apiStatus == 1,
                          Interface.apiServerId == serverid.id)).first()
                 if apiId is None:
                     ifs = Interface(apiPath=api,
                                     apiServerId=serverid.id,
                                     apiDesc=desc,
                                     apiRequestMethod=method,
                                     apiStatus=1)
                     session.add(ifs)
                     session.commit()
                     apiId = session.query(Interface.id).filter(
                         and_(
                             Interface.apiPath == api,
                             Interface.apiStatus == 1,
                             Interface.apiServerId == serverid.id)).first()
                 inParameter = json.dumps(apiparameter.get('inParameter'))
                 extParameter = json.dumps(apiparameter.get('outParameter'))
                 ips = InputParameters(apiId=apiId.id,
                                       inParameter=inParameter,
                                       inParameterStatus=1)
                 eps = ExtractParameters(apiId=apiId.id,
                                         extParameter=extParameter,
                                         extParameterStatus=1)
                 session.add_all([ips, eps])
                 session.commit()
         elif status == 'update_info':
             update_info = keys.get(status)
             for api in update_info.keys():
                 apiparameter = update_info.get(api)
                 apiId = session.query(Interface.id).filter(
                     and_(Interface.apiPath == api,
                          Interface.apiStatus == 1,
                          Interface.apiServerId == serverid.id)).first()
                 inParameter = json.dumps(apiparameter.get('inParameter'))
                 extParameter = json.dumps(apiparameter.get('outParameter'))
                 ips = session.query(InputParameters).filter(
                     and_(InputParameters.apiId == apiId.id,
                          InputParameters.inParameterStatus == 1)).first()
                 ips.inParameter = inParameter
                 eps = session.query(ExtractParameters).filter(
                     and_(ExtractParameters.apiId == apiId.id,
                          ExtractParameters.extParameterStatus ==
                          1)).first()
                 eps.extParameter = extParameter
                 session.commit()
         elif status == 'del_info':
             update_info = keys.get(status)
             for api in update_info.keys():
                 apiId = session.query(Interface).filter(
                     and_(Interface.apiPath == api,
                          Interface.apiStatus == 1,
                          Interface.apiServerId == serverid.id)).first()
                 ips = session.query(InputParameters).filter(
                     and_(InputParameters.apiId == apiId.id,
                          InputParameters.inParameterStatus == 1)).first()
                 ips.inParameterStatus = 2
                 eps = session.query(ExtractParameters).filter(
                     and_(ExtractParameters.apiId == apiId.id,
                          ExtractParameters.extParameterStatus ==
                          1)).first()
                 eps.extParameterStatus = 2
                 apiId.apiStatus = 2
                 session.commit()
     self.__db_engine.close_session()
Beispiel #24
0
# encoding=utf-8
import sys
sys.path.append("..")

import utils.tools as tools
from utils.log import log
from base.collector import Collector
from base.root_url import AddRootUrl
from html_parser.parser_control import PaserControl


def init():
    pass


if __name__ == '__main__':
    log.info("--------begin--------")

    addRootUrl = AddRootUrl()
    addRootUrl.start()

    coll = Collector()
    coll.start()

    paserCount = int(tools.getConfValue("html_parser", "parser_count"))
    while paserCount:
        paser = PaserControl()
        paser.start()
        paserCount = paserCount - 1
 def begin_callback():
     log.info('\n********** VA_APP begin **********')
     db = MongoDB()
     db.delete('VAApp_urls', {})
Beispiel #26
0
 def begin_callback():
     # mongo_db = MongoDB()
     # mongo_db.update('ZHEJIANG_APP_urls', {'depth': 0}, {'status': 0})
     log.info('\n********** spider_main begin **********')
Beispiel #27
0
 def begin_callback():
     log.info('\n********** spider_main begin **********')
Beispiel #28
0
 def end_callback():
     log.info('\n********** spider_main end **********')
 def begin_callback():
     log.info('\n********** template begin **********')
     db.delete('op_urls', {})
     db.delete('op_content_info', {})
 def begin_callback():
     log.info('\n********** spider_article begin **********')
Beispiel #31
0
    def deal_article_list(self, req_url, text):
        """
        @summary: 获取文章列表
        分为两种
            1、第一次查看历史消息 返回的是html格式 包含公众号信息
            2、下拉显示更多时 返回json格式
        但是文章列表都是json格式 且合适相同
        抓取思路:
        1、如果是第一种格式,直接解析文章内容,拼接下一页json格式的地址
        2、如果是第二种格式,
        ---------
        @param data:
        ---------
        @result:
        """
        try:
            # 判断是否为被封的账号, 被封账号没有文章列表
            __biz = tools.get_param(req_url, "__biz")

            if "list" in text:
                # 取html格式里的文章列表
                if "action=home" in req_url:
                    # 解析公众号信息
                    self.__parse_account_info(text, req_url)

                    # 解析文章列表
                    regex = "msgList = '(.*?})';"
                    article_list = tools.get_info(text, regex, fetch_one=True)
                    article_list = article_list.replace("&quot;", '"')
                    publish_time = self.__parse_article_list(
                        article_list, __biz, is_first_page=True)

                    # 判断是否还有更多文章 没有跳转到下个公众号,有则下拉显示更多
                    regex = "can_msg_continue = '(\d)'"
                    can_msg_continue = tools.get_info(text,
                                                      regex,
                                                      fetch_one=True)
                    if can_msg_continue == "0":  # 无更多文章
                        log.info("抓取到列表底部 无更多文章,公众号 {} 抓取完毕".format(__biz))
                        new_last_publish_time = self._task_manager.get_new_last_article_publish_time(
                            __biz)
                        if not new_last_publish_time:
                            # 标记成僵尸号
                            log.info("公众号 {} 为僵尸账号 不再监控".format(__biz))
                            self._task_manager.sign_account_is_zombie(__biz)
                        else:
                            self._task_manager.update_account_last_publish_time(
                                __biz, new_last_publish_time)

                    elif publish_time:
                        # 以下是拼接下拉显示更多的历史文章 跳转
                        # 取appmsg_token 在html中
                        regex = 'appmsg_token = "(.*?)";'
                        appmsg_token = tools.get_info(text,
                                                      regex,
                                                      fetch_one=True)

                        # 取其他参数  在url中
                        __biz = tools.get_param(req_url, "__biz")
                        pass_ticket = tools.get_param(req_url, "pass_ticket")

                        next_page_url = "https://mp.weixin.qq.com/mp/profile_ext?action=getmsg&__biz={__biz}&f=json&offset={offset}&count=10&is_ok=1&scene=124&uin=777&key=777&pass_ticket={pass_ticket}&wxtoken=&appmsg_token={appmsg_token}&x5=0&f=json".format(
                            __biz=__biz,
                            offset=10,
                            pass_ticket=pass_ticket,
                            appmsg_token=appmsg_token,
                        )
                        return self._task_manager.get_task(
                            next_page_url,
                            tip="正在抓取列表 next_offset {} 抓取到 {}".format(
                                10, publish_time),
                        )

                else:  # json格式
                    text = tools.get_json(text)
                    article_list = text.get("general_msg_list", {})
                    publish_time = self.__parse_article_list(
                        article_list, __biz)

                    # 判断是否还有更多文章 没有跳转到下个公众号,有则下拉显示更多
                    can_msg_continue = text.get("can_msg_continue")
                    if not can_msg_continue:  # 无更多文章
                        log.info("抓取到列表底部 无更多文章,公众号 {} 抓取完毕".format(__biz))
                        new_last_publish_time = self._task_manager.get_new_last_article_publish_time(
                            __biz)
                        self._task_manager.update_account_last_publish_time(
                            __biz, new_last_publish_time)
                        pass

                    elif publish_time:
                        # 以下是拼接下拉显示更多的历史文章 跳转
                        # 取参数  在url中
                        __biz = tools.get_param(req_url, "__biz")
                        pass_ticket = tools.get_param(req_url, "pass_ticket")
                        appmsg_token = tools.get_param(req_url, "appmsg_token")

                        # 取offset 在json中
                        offset = text.get("next_offset", 0)

                        next_page_url = "https://mp.weixin.qq.com/mp/profile_ext?action=getmsg&__biz={__biz}&f=json&offset={offset}&count=10&is_ok=1&scene=124&uin=777&key=777&pass_ticket={pass_ticket}&wxtoken=&appmsg_token={appmsg_token}&x5=0&f=json".format(
                            __biz=__biz,
                            offset=offset,
                            pass_ticket=pass_ticket,
                            appmsg_token=appmsg_token,
                        )
                        return self._task_manager.get_task(
                            next_page_url,
                            tip="正在抓取列表 next_offset {} 抓取到 {}".format(
                                offset, publish_time),
                        )

            else:  # 该__biz 账号已被封
                self._task_manager.sign_account_is_zombie(__biz)
                pass

        except Exception as e:
            log.exception(e)

        return self._task_manager.get_task()
 def end_callback():
     log.info('\n********** spider_article end **********')
 def begin_callback():
     #db.update('WWA_app_urls',{'depth':0}, {'status':0})
     db.delete('WWA_search_app_urls')
     log.info('\n********** wwa begin **********')
Beispiel #34
0
    ('/app/managelist', ManageAllHandler),
    ('/app/manage/changepasswd', ManageChangePasswordHandler),
    ('/app/recordlist', RecordAllHandler),
    ('/app/audio', AudioHandler),
    ('/app/audiolist', AudioAllHandler),
    ('/app/upload', UploadHandler),
    ('/app/events', EventsHandler),
    ('/server/releasealarm', ReleaseAlarmHandler),
    #('/static/(.*)', StaticHandler),
    ('/.*', RedirectHandler),
], **settings)

#chrome --allow-running-insecure-content
# usage from http://stackoverflow.com/questions/8045698/https-python-client
# openssl genrsa -out privatekey.pem 2048
# openssl req -new -key privatekey.pem -out certrequest.csr
# openssl x509 -req -in certrequest.csr -signkey privatekey.pem -out certificate.pem
if __name__ == "__main__":
    threading.Thread(target=alarm_sync).start()
    http_server = tornado.httpserver.HTTPServer(
        application,
        #ssl_options={
        #    "certfile": os.path.join("./", "certificate.pem"),
        #    "keyfile": os.path.join("./", "privatekey.pem"),
        #}
    )

    http_server.listen(SERVERPORT)
    log.info("server start")
    tornado.ioloop.IOLoop.instance().start()
Beispiel #35
0
    test_file_name = '/home/h4ct1c/omnisync/local/test2'
    test_event = InotifyEvent(
        None,
        {'source': '/home/h4ct1c/omnisync/local/',
         'syncers': ['GoogleDrive'],
         'target': '/omniSync'},
        mask=None,
        file_name=os.path.basename(test_file_name),
        base_path=os.path.dirname(test_file_name),
        source_absolute=test_file_name,
        isdir=False
    )

    drive = GoogleDrive(
        progress_callback=lambda syncer, file, progress:
        log.info("%s: %s %s" % (syncer.name, progress, file))
    )
    drive.get_credentials()
    drive.authorize()
    for item in drive.walk():
        print(item)
    #drive.consume_item(test_event)
    #drive._create_folder('omniSync')
    #drive._path_to_ids('/omniSync/', create_missing=True)
    #drive._get_file('omnisync')

# region modline

# vim: set tabstop=4 shiftwidth=4 expandtab:
# vim: foldmethod=marker foldmarker=region,endregion:
 def begin_callback():
     log.info('\n********** VA begin **********')
Beispiel #37
0
def initialize_saf(database='data/objects_20131126_112742.xls'):
    global models

    CSVModel.clear()

    csv_docs = xls_parse_from_url(database)
    log.info('Loaded %s' % database)

    model_instances = {}
    for k,doc in csv_docs.iteritems():
        try:
            csv_model = CSVModel(doc).create_model('saf_%s' % k)
            models[csv_model.__name__] = csv_model
            model_instances[csv_model.__name__] = csv_model.from_csv(doc)
            log.info("Parsed sheet %s" % k)
        except ArgumentError:
            log.exception("Couldn't load %s" % k)
            continue
        except TypeError:
            log.exception("Couldn't load %s" % k)
            continue
    from model.refs.saf_instrument_ref import SAFInstrumentRef

    log.info("Dropping SAF Views")
    drop_saf_instrument_view(engine)
    drop_qc_view(engine)
    log.info("Dropping SAF Models")
    CSVModel.drop_all(engine)
    log.info("Creating SAF Models")
    CSVModel.create_all(engine)
    log.info("Creating SAF Views")
    initialize_saf_instrument_view(engine)
    initialize_qc_view(engine)
    
    for k,v in model_instances.iteritems():
        for inst in v:
            session.add(inst)
            try:
                session.commit()
            except Exception as e:
                session.rollback()
                from traceback import print_exc
                print_exc(e)
                raise
        log.info('Initialized %s' % k)
    log.info('Initialized SAF Data instances')
    instruments = model_instances['saf_instrument']
    instruments = [(i.id, i.data_product_list) for i in instruments]
    log.info("Loaded instruments into memory")
    if engine.name == 'postgresql':
        speedy_saf_ref(instruments)
    else:
        linear_saf_ref(instruments, session)
Beispiel #38
0
    def _upload(self, event, dropbox_path):
        if event.isdir:
            if event.type != 'CREATE': return
            try:
                self.client.file_create_folder(dropbox_path)
            except dropbox.rest.ErrorResponse as e:
                log.exception(e)
            finally: return

        with open(event.source_absolute, 'rb') as file:
            self._put_file(file, event.source_absolute, dropbox_path)
    # endregion

if __name__ == '__main__':
    sys.path = sys.path[1:]
    import dropbox
    remote = Dropbox(
        progress_callback=lambda syncer, path, progress:
        log.info("%s: %s %s" % (syncer.name, progress, path))
    )
    remote.init()
    remote.walk('/')


# region modline

# vim: set tabstop=4 shiftwidth=4 expandtab:
# vim: foldmethod=marker foldmarker=region,endregion:

# endregion
Beispiel #39
0
def initialize_saf(database='data/objects_20131126_112742.xls'):
    global models

    CSVModel.clear()

    csv_docs = xls_parse_from_url(database)
    log.info('Loaded %s' % database)

    model_instances = {}
    for k, doc in csv_docs.iteritems():
        try:
            csv_model = CSVModel(doc).create_model('saf_%s' % k)
            models[csv_model.__name__] = csv_model
            model_instances[csv_model.__name__] = csv_model.from_csv(doc)
            log.info("Parsed sheet %s" % k)
        except ArgumentError:
            log.exception("Couldn't load %s" % k)
            continue
        except TypeError:
            log.exception("Couldn't load %s" % k)
            continue
    from model.refs.saf_instrument_ref import SAFInstrumentRef

    log.info("Dropping SAF Views")
    drop_saf_instrument_view(engine)
    drop_qc_view(engine)
    log.info("Dropping SAF Models")
    CSVModel.drop_all(engine)
    log.info("Creating SAF Models")
    CSVModel.create_all(engine)
    log.info("Creating SAF Views")
    initialize_saf_instrument_view(engine)
    initialize_qc_view(engine)

    for k, v in model_instances.iteritems():
        for inst in v:
            session.add(inst)
            try:
                session.commit()
            except Exception as e:
                session.rollback()
                from traceback import print_exc
                print_exc(e)
                raise
        log.info('Initialized %s' % k)
    log.info('Initialized SAF Data instances')
    instruments = model_instances['saf_instrument']
    instruments = [(i.id, i.data_product_list) for i in instruments]
    log.info("Loaded instruments into memory")
    if engine.name == 'postgresql':
        speedy_saf_ref(instruments)
    else:
        linear_saf_ref(instruments, session)
Beispiel #40
0
def initialize_database(path=MASTER_DOC):
    global models

    CSVModel.clear()

    csv_docs = xls_parse_from_url(path)
    log.info('Downloaded %s' % path)
    model_instances = {}
    for k, doc in csv_docs.iteritems():
        if k in ['IDMap', 'AllScenarios']:
            continue
        try:
            csv_model = CSVModel(doc).create_model(k)
            models[csv_model.__name__] = csv_model
            model_instances[k] = csv_model.from_csv(doc)
            log.info("Parsed sheet %s" % k)
        except ArgumentError:
            log.exception("Couldn't load %s" % k)
            continue
        except TypeError:
            log.exception("Couldn't load %s" % k)
            continue
    # We want a late load so that the order is preserved and deterministic
    from model.refs.parameter_ref import ParameterRef

    log.info('Dropping view')
    drop_dp_view(engine)
    drop_view(engine)
    CSVModel.drop_all(engine)
    CSVModel.create_all(engine)
    log.info('Creating view')
    initialize_view(engine)
    initialize_dp_view(engine)

    for k, v in model_instances.iteritems():
        for inst in v:
            session.add(inst)
            try:
                session.commit()
            except Exception as e:
                session.rollback()
                from traceback import print_exc
                print_exc(e)
        log.info("Initialized %s" % k)
    log.info("Initializing Parameter References and Associations")
    pdicts = [(pdict.scenario, pdict.id, pdict.parameter_ids)
              for pdict in model_instances['ParameterDictionary']]
    log.info("Loaded ParameterDictionary into memory")
    params = {p.id: p.scenario for p in model_instances['ParameterDefs']}
    log.info("Loaded Parameters into Memory")
    if engine.name == 'postgresql':
        speedy_parameter_load(pdicts, params)
    else:
        linear_parameter_load(pdicts, params, session)
 def end_callback():
     # 更新关键词状态 做完
     log.info('\n********** VA end **********')
Beispiel #42
0
def telemetry_query(datatype, amount, session):
    """
    Datatype specifies the data that wants to be queried inside of TelemetryModel
    Datatype can be: "GOM", "RTC", "RPI", "GYRO", "THERMO", "PRESSURE", or "ALL"
    Prints amount number of most recent entries in the database for desired datatype
    A sqlalchemy session (session) must be passed in
    """
    try:
        if datatype == "ALL":
            entries = session.query(TelemetryModel).all()
            length = len(entries)
            for entry in range(length - amount, length):
                logger.info(entries[entry])
        elif datatype == "GOM":
            GOM_query = session.query(
                TelemetryModel.time_polled,
                TelemetryModel.GOM_vboost1,
                TelemetryModel.GOM_vboost2,
                TelemetryModel.GOM_vboost3,
                TelemetryModel.GOM_vbatt,
                TelemetryModel.GOM_curin1,
                TelemetryModel.GOM_curin2,
                TelemetryModel.GOM_curin3,
                TelemetryModel.GOM_cursun,
                TelemetryModel.GOM_cursys,
                TelemetryModel.GOM_reserved1,
                TelemetryModel.GOM_curout1,
                TelemetryModel.GOM_curout2,
                TelemetryModel.GOM_curout3,
                TelemetryModel.GOM_curout4,
                TelemetryModel.GOM_curout5,
                TelemetryModel.GOM_curout6,
                TelemetryModel.GOM_outputs,
                TelemetryModel.GOM_latchup1,
                TelemetryModel.GOM_latchup2,
                TelemetryModel.GOM_latchup3,
                TelemetryModel.GOM_latchup4,
                TelemetryModel.GOM_latchup5,
                TelemetryModel.GOM_latchup6,
                TelemetryModel.GOM_wdt_i2c_time_left,
                TelemetryModel.GOM_wdt_gnd_time_left,
                TelemetryModel.GOM_counter_wdt_i2c,
                TelemetryModel.GOM_counter_wdt_gnd,
                TelemetryModel.GOM_counter_boot,
                TelemetryModel.GOM_bootcause,
                TelemetryModel.GOM_battmode,
                TelemetryModel.GOM_temp1,
                TelemetryModel.GOM_temp2,
                TelemetryModel.GOM_temp3,
                TelemetryModel.GOM_temp4,
                TelemetryModel.GOM_pptmode,
                TelemetryModel.GOM_reserved2,
            ).all()
            length = len(GOM_query)
            for entry in range(length - amount, length):
                logger.info(GOM_query[entry])
        elif datatype == "RTC":
            RTC_query = session.query(
                TelemetryModel.time_polled,
                TelemetryModel.RTC_measurement_taken).all()
            length = len(RTC_query)
            for entry in range(length - amount, length):
                logger.info(RTC_query[entry])
        elif datatype == "RPI":
            RPI_query = session.query(
                TelemetryModel.time_polled,
                TelemetryModel.RPI_cpu,
                TelemetryModel.RPI_ram,
                TelemetryModel.RPI_dsk,
                TelemetryModel.RPI_tmp,
                TelemetryModel.RPI_boot,
                TelemetryModel.RPI_uptime,
            ).all()
            length = len(RPI_query)
            for entry in range(length - amount, length):
                logger.info(RPI_query[entry])
        elif datatype == "GYRO":
            GYRO_query = session.query(
                TelemetryModel.time_polled, TelemetryModel.GYRO_gyr_x,
                TelemetryModel.GYRO_gyr_y, TelemetryModel.GYRO_gyr_z,
                TelemetryModel.GYRO_acc_x, TelemetryModel.GYRO_acc_y,
                TelemetryModel.GYRO_acc_z, TelemetryModel.GYRO_mag_x,
                TelemetryModel.GYRO_mag_y, TelemetryModel.GYRO_mag_z,
                TelemetryModel.GYRO_temperature).all()
            length = len(GYRO_query)
            for entry in range(length - amount, length):
                logger.info(GYRO_query[entry])
        elif datatype == "THERMO":
            THERMO_query = session.query(
                TelemetryModel.time_polled,
                TelemetryModel.THERMOCOUPLE_temperature).all()
            length = len(THERMO_query)
            for entry in range(length - amount, length):
                logger.info(THERMO_query[entry])
        elif datatype == "PRESSURE":
            PRESSURE_query = session.query(
                TelemetryModel.time_polled,
                TelemetryModel.PRESSURE_pressure).all()
            length = len(PRESSURE_query)
            for entry in range(length - amount, length):
                logger.info(PRESSURE_query[entry])
    except:
        logger.error("error during telemetry_query")