Example #1
0
    def __init__(self, event, watch_config, **kwargs):
        self._mask = getattr(event, 'mask', None)
        self.file_name = getattr(event, 'name', None)
        # path without filename/foldername
        self.base_path = getattr(event, 'path', None)
        # path with filename/foldername
        self.source_absolute = getattr(event, 'pathname', None)
        self.isdir = getattr(event, 'dir', None)
        self.type = self.get_type()
        self.__dict__.update(kwargs)
        self.target_base_dir = watch_config['target']  # target dir
        self.source_base_dir = watch_config['source']  # source dir
        self.syncers = watch_config['syncers']
        self.config = watch_config

        self.source_relative = os.path.join(
            os.path.relpath(self.source_absolute, self.source_base_dir)
        )
        self.source_base_dir_relative = os.path.join(
            os.path.relpath(self.base_path, self.source_base_dir)
        )
        self.target_absolute = os.path.join(
            self.target_base_dir, self.source_relative
        )
        self.target_base_dir_absolute = os.path.normpath(os.path.join(
            self.target_base_dir, self.source_base_dir_relative
        ))
        self.moved_from_path = getattr(event, 'src_pathname', None)
        log.debug('EVENT %s (%s): %s ' % (
            self.type, self.syncers, self.source_absolute))
Example #2
0
 def query_data(self, sql):
     con = self.__db_pool.connection()
     cursor = con.cursor(cursor=pymysql.cursors.DictCursor)
     try:
         log.debug('执行sql:%s' % sql)
         cursor.execute(sql)
         results = cursor.fetchall()
         for result in results:
             for fields in result:
                 if isinstance(result[fields], datetime.datetime):
                     result[fields] = str(
                         result[fields].strftime('%Y-%m-%d %H:%M:%S'))
                 elif isinstance(result[fields], datetime.date):
                     result[fields] = str(
                         result[fields].strftime('%Y-%m-%d'))
                 elif isinstance(result[fields], decimal.Decimal):
                     result[fields] = float(result[fields])
         # if len(results) >= 1:
         # log.debug('sql查询结果:\n %s' % results)
         else:
             log.debug('sql查询结果为空')
         return results
     except Exception as e:
         log.error('执行sql异常:\n%s' % e)
     finally:
         cursor.close()
         con.close()
Example #3
0
    def get(self):
        log.debug("AlarmHandler get in")
        try:
            id = int(self.get_argument("id"))
        except:
            log.debug("param id is not int")
            return
        data = db.get_alarm(id)
        result = {}
        if data:

            result["result"] = "ok"
            result["data"] = {"id" : data[0],
                                        "create_time" : data[1],
                                        "zwaveid" : data[2],
                                        "deviceid": data[3],
                                        "deal_progress":data[4],
                                        "deal_user":data[5],

                                       }

        else:
            result["result"] = "error"
            result["message"] = "can not find this user"

        self.send_data(result)
def add_root_url(parser_params={}):
    log.debug('''
        添加根url
        parser_params : %s
        ''' % str(parser_params))

    url = 'http://www.1kkk.com'
    html = tools.get_html_by_urllib(url)
    regex = '<li class="">.*?href="(.*?)" target="_parent"><span>.*?</span></a></li>'
    infos = tools.get_info(html, regex)
    china_cartoon = ['//manhua-china//']
    infos = infos + china_cartoon
    for info in infos:
        info = info[:-1]
        url = 'http://www.1kkk.com' + info
        url_fenye = url + '-p'
        urls = url + '-p1'
        html = tools.get_html_by_urllib(urls)
        page_count = '\.\.\.<a href=".*?">(.*?)</a><a href=".*?">下一页</a>'
        page_count = tools.get_info(html, page_count)
        if not page_count:
            while url:
                html = tools.get_html_by_urllib(url)
                url = '<div id="search_fy">.*<a href="(.*?)" style=\'padding: 5px 20px; margin: 0 8px;\'> 下一页 </a>'
                url = tools.get_info(html, url)
                url = ''.join(url)
                url = 'http://www.1kkk.com' + url
                base_parser.add_url('WP_urls', SITE_ID, url)
        else:
            page_count = int(''.join(page_count))
            for page in range(1, page_count + 1):
                url = url_fenye + '%d' % page
                base_parser.add_url('WP_urls', SITE_ID, url)
Example #5
0
    def post(self):
        log.debug("ManageHandler post in")
        data = self.get_data()
        if data:
            log.debug(data)
            if data.has_key("ManageTelephone") and data.has_key("ManagePassword") \
                    and data.has_key("ManageName") :

                if data["ManageName"] == "":
                    log.error("ManageName is NULL")
                else:
                    db.insert_manage(data["ManageName"], data["ManageTelephone"],
                                   data["ManagePassword"])


                    result ={}
                    result["result"] = "ok"
                    self.send_data(result)
                    str = "add manage name %s telephone %s"%(data["ManageName"].encode('utf-8'),data["ManageTelephone"].encode('utf-8') )
                    save_record(self.login_user, "manage", 0, "add", str)
                    return

            else:
                log.error("Manaage data key is not right")
        else:
            log.error("data is none")

        result = {}
        result["result"] = "error"
        result["message"] = "Manaage info is error"
        self.send_data(result)
Example #6
0
    def login(self):
        """登录QQ空间"""

        log.run().debug("执行Like.login()")  # 打印日志

        author_info = False
        try:
            log.info().info("正在读取用户信息")  # 打印日志

            with open("config/user.json", "r", encoding="utf-8") as usr:
                infos = json.load(usr)
                account = infos['account']
                password = infos['password']

            author_info = True
            log.info().info("用户信息读取成功")  # 打印日志

        except Exception as e:

            log.exception().exception(e)  # 打印日志
            log.error().error("用户信息读取失败")  # 打印日志

        if author_info:
            # 登录部分
            log.info().info("开始登录QQ空间")  # 打印日志
            driver = webdriver.PhantomJS()
            driver.maximize_window()
            url = "https://qzone.qq.com/"
            driver.get(url)
            driver.implicitly_wait(3)

            try:
                driver.switch_to.frame("login_frame")
                try:
                    driver.find_element_by_id('switcher_plogin').click()
                except:
                    log.run().info("默认显示账号密码登录,不需要切换")

                driver.find_element_by_id('u').clear()
                driver.find_element_by_id('u').send_keys(account)
                driver.find_element_by_id('p').click()
                driver.find_element_by_id('p').send_keys(password)

                driver.find_element_by_id('login_button').click()
                time.sleep(3)
                driver.implicitly_wait(20)

                log.debug().debug("即将开始验证QQ登录")

                return self.login_on(driver)  # 判断是否登录

            except Exception as login_01:

                log.exception().exception(login_01)  # 打印日志
                log.error().info("QQ空间登录模块获取失败")  # 打印日志

                return 'error'

        else:
            return 'error'
def add_root_url(parser_params={}):
    log.debug('''
        添加根url
        parser_params : %s
        ''' % str(parser_params))

    search_keyword1 = parser_params['search_keyword1']
    search_keyword2 = parser_params['search_keyword2']
    search_keyword3 = parser_params['search_keyword3']

    remark = parser_params

    search_keywords = []
    for str_key1 in search_keyword1:
        for str_key2 in search_keyword2:
            search_keywords.append((str_key1 + str_key2).strip())
    else:
        if not search_keyword1:
            search_keywords = search_keyword2
        if not search_keyword2:
            search_keywords = search_keyword1

    for i in search_keywords:
        # print(i)
        if not i.strip():
            continue
        for num in range(0, 760, 10):
            link = "https://www.baidu.com/s?wd=%s%s&pn=%d" % (i, ' 视频', num)
            # print(link)
            link = tools.quote(link, safe='#/:?=&%')
            if not base_parser.add_url('VA_urls', SITE_ID, link,
                                       remark=remark):
                base_parser.update_url('VA_urls', link, Constance.TODO)
def add_root_url(parser_params = {}):
    log.debug('''
        添加根url
        parser_params : %s
        '''% str(parser_params))

    _db = base_parser.MongoDB()
    _db.set_unique_key('PROGRAM_EPISODE_info', 'episode_url')
    _db.update('PROGRAM_urls', {'depth': 0, 'site_id': SITE_ID}, {'status': 0}, multi=True)

    for page_num in range(1, 14):
        urls = [
                'http://list.youku.com/category/show/c_85_g_热门网综_s_1_d_1_p_%d.html' % page_num,
                'http://list.youku.com/category/show/c_97_g_优酷出品_s_1_d_1_p_%d.html' % page_num,
                'http://list.youku.com/category/show/c_96_g_优酷出品_s_1_d_1_p_%d.html' % page_num,
                ]
        for url in urls:
            print(url)
            print('********************************************************')
            html = tools.get_html_by_urllib(url)
            if tools.get_info(html, ['小酷没有筛选到相关视频']):
                continue
            links = tools.get_tag(html, 'div', {'class': 'p-thumb'})
            for link in links:
                try:
                    link = link.a['href']
                    link = tools.get_full_url('http:', link)
                    link_html = tools.get_html_by_urllib(link)
                    link = tools.get_tag(link_html, 'a', {'class': 'desc-link'}, find_all=False)
                    link = link['href']
                    link = tools.get_full_url('http:', link)
                    base_parser.add_url('PROGRAM_urls', SITE_ID, link, depth=0)
                except Exception as e:
                    log.error(e)
                    print(link_html)
def add_root_url(parser_params={}):
    log.debug('''
        添加根url
        parser_params : %s
        ''' % str(parser_params))

    def get_page_count(url):
        html, r = tools.get_html_by_requests(url)
        regex = '<div class="mod_pages" r-notemplate="true">.*>(\d*)</a>.*?</span>'
        page_count = tools.get_info(html, regex, fetch_one=True)
        print(page_count)
        return int(page_count) if page_count else 0

    def inner_add_root_url(root_url, page_url, remark):
        page_count = get_page_count(root_url)
        for page in range(0, page_count + 1):
            url = page_url % (page * 30)
            base_parser.add_url('PROGRAM_urls', SITE_ID, url, remark=remark)

    #首页-综艺-腾讯出品
    root_url = 'http://v.qq.com/x/list/variety?offset=0&isource=2'
    page_url = 'http://v.qq.com/x/list/variety?offset=%d&isource=2'
    # inner_add_root_url(root_url, page_url, remark = {'classify' : '综艺'})

    # 首页-电视剧-腾讯出品(自制剧)
    root_url = 'http://v.qq.com/x/list/tv?sort=4&offset=0&iarea=-1&iyear=-1&itype=843'
    page_url = 'http://v.qq.com/x/list/tv?sort=4&offset=%d&iarea=-1&iyear=-1&itype=843'
    inner_add_root_url(root_url, page_url, remark={'classify': '电视剧'})
Example #10
0
    def get(self):
        log.debug("alarmAllHandler get in")
        try:
            alarm_id = int(self.get_argument("alarm"))
        except:
            index = 1


        data = db.get_audio_list(alarm_id)
        result = {}
        result["result"] = "ok"
        result["data"] =[]

        if data:
            for one in data:
                if os.path.exists("./static/audio/%s"%one[6]):
                    has_audio=1
                else:
                    has_audio =0
                result["data"].append({
                    "deal_user":one[2],
                    "telephone":one[3],
                    "deal_time":one[4],
                    "deal_remark":one[5],
                    "audio":one[6],
                    "has_audio":has_audio

                })

            self.send_data(result)
        else:
            self.send_data(result)
def parser_program_url(url_info):
    log.debug('处理 \n' + tools.dumps_json(url_info))

    root_url = url_info['url']
    depth = url_info['depth']
    site_id = url_info['site_id']
    remark = url_info['remark']
    classify = remark['classify']

    # 解析
    html, request = tools.get_html_by_requests(root_url)
    if not html:
        base_parser.update_url('PROGRAM_urls', root_url, Constance.EXCEPTION)
        return

    program_blocks = tools.get_tag(html, 'li', {'class': "list_item"})
    for program_block in program_blocks:
        program_block = str(program_block)

        # 地址
        regex = 'r-props="{id: \'(.*?)\''
        program_id = tools.get_info(program_block, regex, fetch_one=True)
        program_url = 'http://v.qq.com/detail/5/%s.html' % program_id
        base_parser.add_url("PROGRAM_urls",
                            site_id,
                            program_url,
                            depth=1,
                            remark={
                                'program_id': program_id,
                                'classify': classify
                            })

    base_parser.update_url("PROGRAM_urls", root_url, Constance.DONE)
def add_root_url(search_keyword1=[], search_keyword2=[], search_keyword3=[]):
    log.debug(
        '''
        添加根url
        search_keyword1 = %s
        search_keyword2 = %s
        search_keyword3 = %s
        ''' %
        (str(search_keyword1), str(search_keyword2), str(search_keyword3)))

    remark = {
        'search_keyword1': search_keyword1,
        'search_keyword2': search_keyword2,
        'search_keyword3': search_keyword3
    }

    search_keywords = search_keyword1 + search_keyword2

    for search_keyword in search_keywords:
        # 取页数
        url = 'https://movie.douban.com/subject_search?start=0&search_text=%s&cat=1002' % search_keyword
        html = tools.get_html_by_urllib(url)
        regex = '<div class="paginator">.*<a href.*?>(.*?)</a><span class="next"'
        page_count = tools.get_info(html, regex)
        page_count = int(page_count[0]) if page_count else 0
        print(page_count)

        for page in range(0, page_count):
            url = 'https://movie.douban.com/subject_search?start=%d&search_text=%s&cat=1002' % (
                page * 15, search_keyword)
            if not base_parser.add_url('VA_urls', SITE_ID, url, remark=remark):
                base_parser.update_url('VA_urls', url, Constance.TODO)
Example #13
0
def add_site_info():
    log.debug('添加网站信息')
    site_id = SITE_ID
    name = NAME
    table = 'PROGRAM_site_info'
    url = "http://best.le.com/"
    base_parser.add_website_info(table, site_id, url, name)
Example #14
0
    def deal_request(self, name):
        web.header('Content-Type', 'text/html;charset=UTF-8')

        data_json = json.loads(json.dumps(web.input()))
        data = data_json.get('data')  # data为str
        req_url = data_json.get('req_url')

        # log.debug('''
        #     method : %s
        #     data   :%s
        #     '''%(name, data))

        log.debug('''
            method : %s
            url   :%s
            ''' % (name, req_url))

        reponse = ''
        if name == 'get_article_list':
            reponse = self.get_article_list(data, req_url)

        elif name == 'get_article_content':
            reponse = self.get_article_content(data, req_url)

        elif name == 'get_read_watched_count':
            reponse = self.get_read_watched_count(data, req_url)

        elif name == 'get_comment':
            reponse = self.get_comment(data, req_url)

        # log.debug('''
        #     ---------reponse---------
        #     %s'''%reponse)

        return reponse  # 此处返回''空字符串  不会触发node-js http 的回调
Example #15
0
def add_site_info():
    log.debug('添加网站信息')
    site_id = SITE_ID
    name = NAME
    table = 'PROGRAM_site_info'
    url = "http://news.v1.cn/V1make.shtml"
    base_parser.add_website_info(table, site_id, url, name)
Example #16
0
def add_site_info():
    log.debug('添加网站信息')
    site_id = SITE_ID
    name = NAME
    table = 'site_info'
    url = 'https://www.itouchtv.cn/'
    base_parser.add_website_info(table, site_id, url, name)
Example #17
0
def parser(url_info):
    url_info['_id'] = str(url_info['_id'])
    log.debug('处理 \n' + tools.dumps_json(url_info))

    root_url = url_info['url']
    depth = url_info['depth']
    site_id = url_info['site_id']
    remark = url_info['remark']
    offset = remark.get('offset')

    html = tools.get_html_by_webdirver(root_url)
    headers = tools.get_tag(html, 'div', {'class': 'result'}, find_all=True)
    if not headers:
        base_parser.update_url('BAIDU_NEWS_urls', root_url, Constance.DONE)

    for header in headers:
        # 查看更多相关新闻
        regex = ' <span class="c-info"><a.*?href="(.*?)".*?查看更多相关新闻'
        more_news_url = tools.get_info(str(header), regex, fetch_one = True)
        if more_news_url:
            more_news_url = tools.get_full_url('http://news.baidu.com', more_news_url)
            more_news_url = more_news_url.replace('amp;', '')
            base_parser.add_url('BAIDU_NEWS_urls', SITE_ID, more_news_url, depth = 1, remark = {'offset':0})

        url = header.h3.a['href']
        article_extractor = ArticleExtractor(url)
        content = title = release_time = author = website_domain =''
        content = article_extractor.get_content()
        if content:
            title = article_extractor.get_title()
            release_time = article_extractor.get_release_time()
            author = article_extractor.get_author()
            website_domain = tools.get_domain(url)
            uuid = tools.get_uuid(title, website_domain)
            website_name = ''
            website_position = None

            log.debug('''
                uuid         %s
                title        %s
                author       %s
                release_time %s
                domain       %s
                url          %s
                content      %s
                '''%(uuid, title, author, release_time, website_domain, url, '...'))

            # 入库
            if tools.is_have_chinese(content):
                is_continue = self_base_parser.add_news_acticle(uuid, title, author, release_time, website_name , website_domain, website_position, url, content)

                if not is_continue:
                    break
    else:
        # 循环正常结束 该页均正常入库, 继续爬取下页
        offset += 50
        url = tools.replace_str(root_url, 'pn=\d*', 'pn=%d'%offset)
        base_parser.add_url('BAIDU_NEWS_urls', SITE_ID, url, depth = 0, remark = {'offset': offset})

    base_parser.update_url('BAIDU_NEWS_urls', root_url, Constance.DONE)
Example #18
0
def add_site_info():
    log.debug('添加网站信息')

    table = 'BAIDU_NEWS_site_info'
    url = 'http://news.baidu.com'

    base_parser.add_website_info(table, site_id=SITE_ID, url=url, name=NAME)
Example #19
0
def add_site_info():
    log.debug('添加网站信息')

    table = 'VIDEO_NEWS_site_info'
    url = 'https://v.qq.com'

    base_parser.add_website_info(table, site_id=SITE_ID, url=url, name=NAME)
Example #20
0
 def calculate_time(*args, **kw):
     began_time = time.time()
     callfunc = func(*args, **kw)
     end_time = time.time()
     log.debug(func.__name__ + " run time  = " +
               str(end_time - began_time))
     return callfunc
def add_root_url(search_keyword1=[], search_keyword2=[], search_keyword3=[]):
    log.debug(
        '''
        添加根url
        search_keyword1 = %s
        search_keyword2 = %s
        search_keyword3 = %s
        ''' %
        (str(search_keyword1), str(search_keyword2), str(search_keyword3)))

    remark = {
        'search_keyword1': search_keyword1,
        'search_keyword2': search_keyword2,
        'search_keyword3': search_keyword3
    }

    search_keywords = search_keyword1 + search_keyword2

    for search_keyword in search_keywords:
        if not search_keyword:
            continue
        # 最多显示10页
        for page in range(1, 11):
            url = 'http://weixin.sogou.com/weixin?type=2&query=' + search_keyword + '&page=%d&ie=utf8' % page
            if not base_parser.add_url('VA_urls', SITE_ID, url, remark=remark):
                base_parser.update_url('VA_urls', url, Constance.TODO)
Example #22
0
def main():
    while True:
        if task_status.is_doing:  #done
            log.debug('is doing sleep ...%ss' % SLEEP_TIME)
            time.sleep(SLEEP_TIME)
            continue

        task_status.is_doing = True

        keywords = Keywords().get_keywords()

        def begin_callback():
            log.info('\n********** spider_main begin **********')

        def end_callback():
            log.info('\n********** spider_main end **********')
            task_status.is_doing = False

        # 配置spider
        spider = Spider(tab_list,
                        tab_unique_key_list,
                        tab_ensure_index_list,
                        parser_count=1,
                        site_parsers=parser_siteid_list,
                        begin_callback=begin_callback,
                        end_callback=end_callback,
                        parser_params=keywords)

        # 添加parser
        for parser in parser_list:
            spider.add_parser(parser)

        spider.start()
Example #23
0
    def connect(self):
        """
        Connect to the broker, define the callbacks, and subscribe
        This will also set the Last Will and Testament (LWT)
        The LWT will be published in the event of an unclean or
        unexpected disconnection.
        """
        self.is_connected = False
        # Add the callbacks
        self.mqttc.on_connect = self.on_mqtt_connect
        self.mqttc.on_disconnect = self.on_mqtt_disconnect

        # Set the Last Will and Testament (LWT) *before* connecting
        self.mqttc.will_set(self.lwt, payload="0", qos=0, retain=True)

        # Attempt to connect
        log.debug("Connecting to {}:{}...".format(self.host, self.port))
        try:
            self.mqttc.connect(self.host, self.port, 60)
        except Exception as e:
            log.error("Error connecting to {}:{}: {}".format(
                self.host, self.port, str(e)))
        self.is_connected = True
        # Let the connection run forever
        self.mqttc.loop_start()
        return self.is_connected
Example #24
0
    def start(self, args):
        """Start up all of the application components"""

        enable_log()

        # Make sure the results have some data in it before the web requests
        # come in
        self.results = App._init_results(self.config)

        if args.poll_seconds:
            self.config.override_monitor_setting(
                'poll_seconds',
                args.poll_seconds
            )

        if self.config.log_file:
            set_log_file(self.config.log_file)
            log.debug("Opening log file {}".format(self.config.log_file))

        self._start_monitoring()

        if self.config.http_port:
            log.info("Starting web frontend on port {}".format(
                self.config.http_port
            ))
            self._start_frontend()
    def is_have_new_article(self, account_id='', account=''):
        '''
        @summary: 检查公众号今日是否发文
        ---------
        @param account_id:
        @param account:
        ---------
        @result:
        '''

        account_block = self.__get_account_blocks(account_id, account)
        if account_block == constance.VERIFICATION_CODE:
            return constance.VERIFICATION_CODE

        regex = "timeConvert\('(\d*?)'\)"
        release_time = tools.get_info(account_block, regex, fetch_one=True)

        if release_time:
            release_time = int(release_time)
            release_time = tools.timestamp_to_date(release_time)
            log.debug("最近发文时间 %s" % release_time)

            if release_time >= tools.get_current_date('%Y-%m-%d'):
                return constance.UPDATE
            else:
                return constance.NOT_UPDATE

        else:
            return constance.ERROR
Example #26
0
    def get(self):
        log.debug("ManaageAllHandler get in")
        try:
            index = int(self.get_argument("index"))
        except:
            index = 1
        if index > 0:
            pass
        else:
            index = 1

        data = db.get_manage_list(index)
        result = {}
        if data:
            result["result"] = "ok"
            result["maxindex"] = data["maxindex"]
            result["curruntindex"] = data["curruntindex"]
            result["data"] =[]
            for one in data["data"]:
                result["data"].append({"manageid" : one[0],
                                        "managename" : one[1],
                                        "managetelephone" : one[2],
                                        "managelogintime": one[5],
                                       "managelogouttime": one[6]
                                       })

            self.send_data(result)

        else:

            result["result"] = "error"
            result["message"] = "get Manage list failed"
            self.send_data(result)
Example #27
0
def add_root_url(parser_params={}):
    log.debug('''
        添加根url
        parser_params : %s
        ''' % str(parser_params))

    for program in parser_params:  #[[91, '山东卫视', '调查', '新闻'], [...]]
        program_id = program[0]
        chan_name = program[1]
        program_name = program[2]
        program_type = program[3]
        image_url = program[4]
        if program_type != '其他':
            url = 'http://so.iqiyi.com/so/q_%s %s?source=input&sr=1170053009947' % (
                program_name, program_type)
        else:
            url = 'http://so.iqiyi.com/so/q_%s?source=input&sr=1170053009947' % (
                program_name)
        base_parser.add_url('mms_urls',
                            SITE_ID,
                            url,
                            remark={
                                'program_id': program_id,
                                'program_name': program_name,
                                'chan_name': chan_name,
                                'program_type': program_type,
                                'image_url': image_url
                            })
    def __input_data(self):
        # log.debug('read_pos %d, write_pos %d buffer size %d'%(self._read_pos, self._write_pos, self.get_max_read_size()))
        # log.debug('buffer can write size = %d'%self.get_max_write_size())
        if self.get_max_write_size() == 0:
            log.debug("collector 已满 size = %d" % self.get_max_read_size())
            return

        url_count = self._url_count if self._url_count <= self.get_max_write_size(
        ) else self.get_max_write_size()

        urls_list = []
        if self._depth:
            urls_list = self._db.find(self._tab_urls, {
                "status": Constance.TODO,
                "depth": {
                    "$lte": self._depth
                }
            },
                                      limit=url_count)
        else:
            urls_list = self._db.find(self._tab_urls,
                                      {"status": Constance.TODO},
                                      limit=url_count)

        #更新已取到的url状态为doing
        for url in urls_list:
            self._db.update(self._tab_urls, url, {'status': Constance.DOING})

        # 存url
        self.put_urls(urls_list)

        if self.is_all_have_done():
            self.stop()
Example #29
0
def parser_comment(content_id, wall_id, page=1):
    log.debug('正在爬取第 %s 页文章评论 content_id = %s' % (page, content_id))
    flow_comment_url = 'http://sns-comment.iqiyi.com/v2/comment/get_comments.action?contentid={content_id}&page={page}&authcookie=null&page_size=40&wallId={wall_id}&agenttype=117&t={timestamp_m}'.format(
        content_id=content_id,
        page=page,
        wall_id=wall_id,
        timestamp_m=int(tools.get_current_timestamp() * 1000))

    comment_json = tools.get_json_by_requests(flow_comment_url)
    data = comment_json.get('data', {})

    # 可作为翻页的依据
    total_count = data.get('totalCount', 0)
    count = data.get('count', 0)

    replies = data.get('replies', [])
    for reply in replies:
        reply_source = reply.get("replySource", {})
        if not deal_comment(reply_source):
            break

        if not deal_comment(reply):
            break

    else:
        if replies:
            parser_comment(content_id, wall_id, page + 1)
    def add_account_info(self, account_info):
        log.debug('''
            -----公众号信息-----
            %s''' % tools.dumps_json(account_info))

        WechatService._es.add('wechat_account', account_info,
                              account_info.get('__biz'))
Example #31
0
def deal_comment(reply):
    if not reply: return

    comment_id = reply.get('id')
    pre_id = reply.get('replyId')
    content = reply.get('content')
    article_id = reply.get('mainContentId')
    release_time = reply.get('addTime')
    release_time = tools.timestamp_to_date(release_time)
    head_url = reply.get('userInfo', {}).get('icon')
    consumer = reply.get('userInfo', {}).get('uname')
    gender = int(reply.get('userInfo', {}).get('gender'))
    up_count = reply.get('likes')

    # TODO
    emotion = random.randint(0, 2)
    hot_id = comment_id

    log.debug('''
        评论id:  %s
        父id      %s
        文章id    %s
        发布人:  %s
        头像地址  %s
        性别      %s
        内容:    %s
        点赞量    %s
        发布时间  %s
        ''' % (comment_id, pre_id, article_id, consumer, head_url, gender,
               content, up_count, release_time))

    return self_base_parser.add_comment(comment_id, pre_id, article_id,
                                        consumer, head_url, gender, content,
                                        up_count, release_time, emotion,
                                        hot_id)
def add_root_url(parser_params={}):
    log.debug('''
        添加根url
        parser_params : %s
        ''' % str(parser_params))

    def inner_add_url(url, remark):
        html = tools.get_html_by_urllib(url)
        regex = '<li><span></span><a  href="(.*?)">.*?</a></li>'
        infos = tools.get_info(html, regex)
        for info in infos:
            info = ''.join(info)
            type_url = 'http://shouji.baidu.com' + info
            type_html = tools.get_html_by_urllib(type_url)
            page_count = '<div class="pager">.*">(.*?)</a>.*?<li class="next">'
            page_count = tools.get_info(type_html, page_count)
            page_count = ''.join(page_count)
            if not page_count:
                page_count = '1'
            page_count = int(page_count)
            for page in range(1, page_count + 1):
                url = type_url + 'list_%d.html' % page
                if not base_parser.add_url(
                        'GameApp_urls', SITE_ID, url, remark=remark):
                    base_parser.update_url('GameApp_urls', url, Constance.TODO)

    inner_add_url('http://shouji.baidu.com/game/401/', Constance.休闲益智)
    inner_add_url('http://shouji.baidu.com/game/board_102_200/',
                  Constance.网络游戏)
    inner_add_url('http://shouji.baidu.com/game/403/', Constance.飞行射击)
    inner_add_url('http://shouji.baidu.com/game/406/', Constance.赛车竞速)
    inner_add_url('http://shouji.baidu.com/game/405/', Constance.体育竞技)
    inner_add_url('http://shouji.baidu.com/game/407/', Constance.体育竞技)
    inner_add_url('http://shouji.baidu.com/game/408/', Constance.经营策略)
    inner_add_url('http://shouji.baidu.com/game/402/', Constance.角色扮演)
Example #33
0
    def creat_db_pool(self, host):
        connection_config = decrypt(getenv('INTERFACE_CIPHER'))
        host = decrypt(host)

        if connection_config.get(host, None) is None:
            log.error("IP域名错误")
            exit(0)

        user = connection_config.get(host).get('user')
        password = connection_config.get(host).get('password')
        port = connection_config.get(host).get('port')

        log.debug('创建数据库连接池:%s' % host)
        self.__db_pool = PooledDB(creator=pymysql,
                                  mincached=3,
                                  maxcached=5,
                                  maxshared=0,
                                  maxconnections=20,
                                  blocking=True,
                                  maxusage=None,
                                  setsession=None,
                                  host=host,
                                  port=port,
                                  user=user,
                                  db=None,
                                  passwd=password)
        log.debug('创建数据库连接池完成!')
Example #34
0
def add_site_info():
    log.debug('添加网站信息')
    site_id = SITE_ID
    name = NAME
    table = 'site_info'
    url = 'https://www.wandoujia.com/app/org.fungo.fungolive'
    base_parser.add_website_info(table, site_id, url, name)
Example #35
0
    def get(self):
        log.debug("EventsHandler get in")
        try:
            alarmid = int(self.get_argument("alarmid"))
        except:
            log.debug("param alarmid is not int")
            return
        data = db.get_events(alarmid)
        result = {}
        if data:

            result["result"] = "ok"
            result["data"] = []
            for one in data:
                result["data"].append({"id": one[0],
                                        "type":one[1],
                                       "eventtime": one[4],
                                       "context": one[5]
                                       })

        else:
            result["result"] = "error"
            result["message"] = "can not find this user"

        self.send_data(result)
Example #36
0
def get_domain(url):
    domain = ''
    try:
        domain = get_tld(url)
    except Exception as e:
        log.debug(e)
    return domain
def add_root_url(parser_params={}):
    log.debug('''
        添加根url
        parser_params : %s
        ''' % str(parser_params))

    base_parser.add_url('VAApp_urls', SITE_ID, URL, remark=NEWS_LOCAL)
Example #38
0
    def get(self, url):

        try:
            url = self.url + url
            log.debug( url)
            urllib2.urlopen(url)
        except:
            print "get request error"
Example #39
0
 def releasealarm(self, zwaveid, user):
     values = {'zwavedeviceid': zwaveid, 'token': self.token, "employeename" : user}
     print values
     data = self.post("/thirdpart/zufang/unalarmdevicewarning", values)
     if data:
         if data["resultCode"] == 0:
             log.debug("release alarm ok")
         else:
             log.debug("release alarm error")
Example #40
0
 def run(self):
     log.debug(self.__class__.__name__ + " running")
     while True:
         item = self.queue.get()
         if item is None:  # trick to break out of while
             break
         self.consume_item(item)
         # TODO what if a file gets added again while syncing in progress?
         self.queue.task_done()
Example #41
0
 def rm(self, path, *args, **kwargs):
     if path == '/':
         log.critical('prevented delete / (root)')
         return
     try:
         self.client.file_delete(path)
     except dropbox.rest.ErrorResponse as e:
         log.debug('Delete failed: %s (%s)' % (e.reason, path))
         if not e.reason == 'Not Found':
             raise e
Example #42
0
    def get_syncer_instances(filter=lambda: True):
        # Import syncers from 'syncers' package and start them.
        # Does something like: from syncers.dropbox import Dropbox
        syncer_instances = {}
        # find classes inside syncers package that have the superclass SyncBase
        available_syncers = dict(find_modules_with_super_class(syncers, SyncBase))
        log.debug("available_syncers: %s" % list(available_syncers.keys()))

        for syncer in builtins.filter(filter, available_syncers.keys()):
            syncer_instances[syncer] = getattr(import_module(available_syncers[syncer]), syncer)()
        return syncer_instances
Example #43
0
    def post(self, url, values):

        try:
            url = self.url + url
            data = urllib.urlencode(values)
            req = urllib2.Request(url, data)
            response = urllib2.urlopen(req)
            data= response.read()
            log.debug(data)
            data = json.loads(data)
            return data
        except:
            print "post request error"
            return None
Example #44
0
 def login(self):
     token_file = os.path.expanduser(self.configuration['token_file'])
     token_dir = os.path.dirname(token_file)
     if not os.path.exists(token_dir):
         os.makedirs(token_dir)
     try:
         with open(token_file) as token:
             self.access_token = token.read()
     except IOError:
         self.access_token = None
     if not (self.access_token):
         self.authorize()
     self.client = dropbox.client.DropboxClient(self.access_token)
     log.debug('dropbox authorized: ' + self.client.account_info()['email'])
Example #45
0
    def put(self):
        log.debug("alarmHandler put in")
        data = self.get_data()
        if data:
            log.debug(data)
            if data.has_key("deal_progress")and data.has_key("alarmId") :
                db.update_alarm_progress(data["alarmId"],data["deal_progress"])
                result = {}
                result["result"] = "ok"
                self.send_data(result)

                return
            else:
                result = {}
                result["result"] = "error"
                result["message"] = "data is error"
                self.send_data(result)
Example #46
0
 def put(self):
     log.debug("ManageChangePasswordHandler put in")
     data = self.get_data()
     if data:
         log.debug(data)
         if data.has_key("ManagePassword") and data.has_key("ManageId"):
             db.update_manage_passwd(data["ManageId"],data["ManagePassword"])
             result = {}
             result["result"] = "ok"
             self.send_data(result)
             save_record(self.login_user, "manage",data["ManageId"], "change_passwd", "update manage password")
             return
             return
         else:
             result = {}
             result["result"] = "error"
             result["message"] = "data is error"
             self.send_data(result)
Example #47
0
 def post(self):
     log.debug("UploadHandler post in")
     if self.request.files == {}:
         result = {}
         result["error"] = "null file upload"
         self.send_data(result)
         return
     file_metas = self.request.files['file']# 提取表单中‘name’为‘file’的文件元数据
     for meta in file_metas:
         filename = meta['filename']
         upload_path = "./static/audio"
         filepath = os.path.join(upload_path, filename)
         log.debug("write %s"%filepath)
         with open(filepath, 'wb') as up:  # 有些文件需要已二进制的形式存储,实际中可以更改
             up.write(meta['body'])
     result = {}
     result["result"] = "ok"
     self.send_data(result)
Example #48
0
    def post(self):
        log.debug("ReleaseAlarmHandler post in")
        data = self.get_data()
        if data:
            log.debug(data)
            if data.has_key("alarmId"):
                zwaveid = db.get_zwaveid_from_alarm(data["alarmId"])
                result = {}
                result["result"] = "ok"
                self.send_data(result)

                client.releasealarm(zwaveid, self.login_user)
                return
            else:
                result = {}
                result["result"] = "error"
                result["message"] = "data is error"
                self.send_data(result)
Example #49
0
    def post(self):
        log.debug("AudioHandler post in")
        data = self.get_data()
        if data:
            log.debug(data)
            if data.has_key("AlarmID") and data.has_key("AlarmRemark") \
                    and data.has_key("AlarmAudio")and data.has_key("AlarmTelephone"):
                db.insert_alarm_deal(data["AlarmID"], self.login_user, data["AlarmTelephone"],data["AlarmRemark"], "%s.wav"%data["AlarmAudio"])
                db.update_alarm_progress(data["AlarmID"],1)
                result = {}
                result["result"] = "ok"
                self.send_data(result)

                return
            else:
                result = {}
                result["result"] = "error"
                result["message"] = "data is error"
                self.send_data(result)
Example #50
0
    def get(self):
        log.debug("alarmAllHandler get in")
        try:
            index = int(self.get_argument("index"))
        except:
            index = 1
        if index > 0:
            pass
        else:
            index = 1

        data = db.get_alarm_list(index)
        result = {}
        if data:
            result["result"] = "ok"
            result["maxindex"] = data["maxindex"]
            result["curruntindex"] = data["curruntindex"]
            result["data"] =[]
            for one in data["data"]:
                context = ""
                eventlist = db.get_events(one[0])
                for event in eventlist:
                    context = context + event[5] + " "
                if one[5] == 0:
                   user = "******"
                else:
                   user = db.get_username_by_id(one[5])
                result["data"].append({"id" : one[0],
                                        "create_time" : one[1],
                                        "zwaveid": one[2],
                                        "deviceid":one[3],
                                        "deal_progress":one[4],
                                        "deal_user":one[5],
                                        "deal_user_name": user,
                                        "deal_context": context
                                       })

            self.send_data(result)
        else:

            result["result"] = "error"
            result["message"] = "get alarm list failed"
            self.send_data(result)
Example #51
0
 def put(self):
     log.debug("ManageHandler put in")
     data = self.get_data()
     if data:
         log.debug(data)
         if data.has_key("ManageTelephone") and data.has_key("ManageId") \
                 and data.has_key("ManageName"):
             db.update_manage(data["ManageId"],data["ManageName"], data["ManageTelephone"])
             result = {}
             result["result"] = "ok"
             self.send_data(result)
             str = "update manage name %s telephone %s" % (
             data["ManageName"].encode('utf-8'), data["ManageTelephone"].encode('utf-8'))
             save_record(self.login_user, "manage", data["ManageId"], "update", str)
             return
         else:
             result = {}
             result["result"] = "error"
             result["message"] = "data is error"
             self.send_data(result)
Example #52
0
    def get(self):
        log.debug("ManageHandler get in")
        try:
            id = int(self.get_argument("id"))
        except:
            log.debug("param id is not int")
            return
        data = db.get_manage(id)
        result = {}
        if data:

            result["result"] = "ok"
            result["data"] = {"manageid" : data[0],
                                        "managename" : data[1],
                                        "managetelephone" : data[2]
                                       }

        else:
            result["result"] = "error"
            result["message"] = "can not find this user"

        self.send_data(result)
Example #53
0
def alarm_sync():
    db =DB()
    client.get_token()
    while True:
        try:
            time.sleep(10)
            lastid = db.get_sync_id()
            events = client.get_alarm(lastid)
            if events == None:
                client.get_token()
                continue
            else:
                if events == []:
                    #log.debug("not has any event")
                    continue
                for one in events:
                    log.debug(one)
                    db.save_event(one["id"], one["type"],one["deviceid"],one["zwavedeviceid"],one["eventtime"],one["objparam"])
        except:
            import traceback
            traceback.print_exc()
            continue
Example #54
0
 def get(self, html):
     url = "%s/%s"%(BASEDIR,html)
     log.debug(url)
     #try :
     self.render(url)
Example #55
0
 def stop(self):
     self.queue.put(None)  # trick to break out of while
     log.debug(self.__class__.__name__ + " stopped")
Example #56
0
 def __init__(self, queue=None):
     self.queue = queue or OrderedSetQueue()
     super().__init__()
     log.debug(self.__class__.__name__ + " init")
Example #57
0
 def get(self):
     log.debug("UploadHandler get in")
Example #58
0
 def get(self):
     log.debug("RedirectHandler")
     self.redirect("/static/index.html")
     pass