コード例 #1
0
ファイル: file_watcher.py プロジェクト: jandob/omniSync
    def __init__(self, event, watch_config, **kwargs):
        self._mask = getattr(event, 'mask', None)
        self.file_name = getattr(event, 'name', None)
        # path without filename/foldername
        self.base_path = getattr(event, 'path', None)
        # path with filename/foldername
        self.source_absolute = getattr(event, 'pathname', None)
        self.isdir = getattr(event, 'dir', None)
        self.type = self.get_type()
        self.__dict__.update(kwargs)
        self.target_base_dir = watch_config['target']  # target dir
        self.source_base_dir = watch_config['source']  # source dir
        self.syncers = watch_config['syncers']
        self.config = watch_config

        self.source_relative = os.path.join(
            os.path.relpath(self.source_absolute, self.source_base_dir)
        )
        self.source_base_dir_relative = os.path.join(
            os.path.relpath(self.base_path, self.source_base_dir)
        )
        self.target_absolute = os.path.join(
            self.target_base_dir, self.source_relative
        )
        self.target_base_dir_absolute = os.path.normpath(os.path.join(
            self.target_base_dir, self.source_base_dir_relative
        ))
        self.moved_from_path = getattr(event, 'src_pathname', None)
        log.debug('EVENT %s (%s): %s ' % (
            self.type, self.syncers, self.source_absolute))
コード例 #2
0
 def query_data(self, sql):
     con = self.__db_pool.connection()
     cursor = con.cursor(cursor=pymysql.cursors.DictCursor)
     try:
         log.debug('执行sql:%s' % sql)
         cursor.execute(sql)
         results = cursor.fetchall()
         for result in results:
             for fields in result:
                 if isinstance(result[fields], datetime.datetime):
                     result[fields] = str(
                         result[fields].strftime('%Y-%m-%d %H:%M:%S'))
                 elif isinstance(result[fields], datetime.date):
                     result[fields] = str(
                         result[fields].strftime('%Y-%m-%d'))
                 elif isinstance(result[fields], decimal.Decimal):
                     result[fields] = float(result[fields])
         # if len(results) >= 1:
         # log.debug('sql查询结果:\n %s' % results)
         else:
             log.debug('sql查询结果为空')
         return results
     except Exception as e:
         log.error('执行sql异常:\n%s' % e)
     finally:
         cursor.close()
         con.close()
コード例 #3
0
ファイル: alarm.py プロジェクト: ssdxiao/alarm_platform
    def get(self):
        log.debug("AlarmHandler get in")
        try:
            id = int(self.get_argument("id"))
        except:
            log.debug("param id is not int")
            return
        data = db.get_alarm(id)
        result = {}
        if data:

            result["result"] = "ok"
            result["data"] = {"id" : data[0],
                                        "create_time" : data[1],
                                        "zwaveid" : data[2],
                                        "deviceid": data[3],
                                        "deal_progress":data[4],
                                        "deal_user":data[5],

                                       }

        else:
            result["result"] = "error"
            result["message"] = "can not find this user"

        self.send_data(result)
コード例 #4
0
def add_root_url(parser_params={}):
    log.debug('''
        添加根url
        parser_params : %s
        ''' % str(parser_params))

    url = 'http://www.1kkk.com'
    html = tools.get_html_by_urllib(url)
    regex = '<li class="">.*?href="(.*?)" target="_parent"><span>.*?</span></a></li>'
    infos = tools.get_info(html, regex)
    china_cartoon = ['//manhua-china//']
    infos = infos + china_cartoon
    for info in infos:
        info = info[:-1]
        url = 'http://www.1kkk.com' + info
        url_fenye = url + '-p'
        urls = url + '-p1'
        html = tools.get_html_by_urllib(urls)
        page_count = '\.\.\.<a href=".*?">(.*?)</a><a href=".*?">下一页</a>'
        page_count = tools.get_info(html, page_count)
        if not page_count:
            while url:
                html = tools.get_html_by_urllib(url)
                url = '<div id="search_fy">.*<a href="(.*?)" style=\'padding: 5px 20px; margin: 0 8px;\'> 下一页 </a>'
                url = tools.get_info(html, url)
                url = ''.join(url)
                url = 'http://www.1kkk.com' + url
                base_parser.add_url('WP_urls', SITE_ID, url)
        else:
            page_count = int(''.join(page_count))
            for page in range(1, page_count + 1):
                url = url_fenye + '%d' % page
                base_parser.add_url('WP_urls', SITE_ID, url)
コード例 #5
0
ファイル: manage.py プロジェクト: ssdxiao/alarm_platform
    def post(self):
        log.debug("ManageHandler post in")
        data = self.get_data()
        if data:
            log.debug(data)
            if data.has_key("ManageTelephone") and data.has_key("ManagePassword") \
                    and data.has_key("ManageName") :

                if data["ManageName"] == "":
                    log.error("ManageName is NULL")
                else:
                    db.insert_manage(data["ManageName"], data["ManageTelephone"],
                                   data["ManagePassword"])


                    result ={}
                    result["result"] = "ok"
                    self.send_data(result)
                    str = "add manage name %s telephone %s"%(data["ManageName"].encode('utf-8'),data["ManageTelephone"].encode('utf-8') )
                    save_record(self.login_user, "manage", 0, "add", str)
                    return

            else:
                log.error("Manaage data key is not right")
        else:
            log.error("data is none")

        result = {}
        result["result"] = "error"
        result["message"] = "Manaage info is error"
        self.send_data(result)
コード例 #6
0
    def login(self):
        """登录QQ空间"""

        log.run().debug("执行Like.login()")  # 打印日志

        author_info = False
        try:
            log.info().info("正在读取用户信息")  # 打印日志

            with open("config/user.json", "r", encoding="utf-8") as usr:
                infos = json.load(usr)
                account = infos['account']
                password = infos['password']

            author_info = True
            log.info().info("用户信息读取成功")  # 打印日志

        except Exception as e:

            log.exception().exception(e)  # 打印日志
            log.error().error("用户信息读取失败")  # 打印日志

        if author_info:
            # 登录部分
            log.info().info("开始登录QQ空间")  # 打印日志
            driver = webdriver.PhantomJS()
            driver.maximize_window()
            url = "https://qzone.qq.com/"
            driver.get(url)
            driver.implicitly_wait(3)

            try:
                driver.switch_to.frame("login_frame")
                try:
                    driver.find_element_by_id('switcher_plogin').click()
                except:
                    log.run().info("默认显示账号密码登录,不需要切换")

                driver.find_element_by_id('u').clear()
                driver.find_element_by_id('u').send_keys(account)
                driver.find_element_by_id('p').click()
                driver.find_element_by_id('p').send_keys(password)

                driver.find_element_by_id('login_button').click()
                time.sleep(3)
                driver.implicitly_wait(20)

                log.debug().debug("即将开始验证QQ登录")

                return self.login_on(driver)  # 判断是否登录

            except Exception as login_01:

                log.exception().exception(login_01)  # 打印日志
                log.error().info("QQ空间登录模块获取失败")  # 打印日志

                return 'error'

        else:
            return 'error'
コード例 #7
0
def add_root_url(parser_params={}):
    log.debug('''
        添加根url
        parser_params : %s
        ''' % str(parser_params))

    search_keyword1 = parser_params['search_keyword1']
    search_keyword2 = parser_params['search_keyword2']
    search_keyword3 = parser_params['search_keyword3']

    remark = parser_params

    search_keywords = []
    for str_key1 in search_keyword1:
        for str_key2 in search_keyword2:
            search_keywords.append((str_key1 + str_key2).strip())
    else:
        if not search_keyword1:
            search_keywords = search_keyword2
        if not search_keyword2:
            search_keywords = search_keyword1

    for i in search_keywords:
        # print(i)
        if not i.strip():
            continue
        for num in range(0, 760, 10):
            link = "https://www.baidu.com/s?wd=%s%s&pn=%d" % (i, ' 视频', num)
            # print(link)
            link = tools.quote(link, safe='#/:?=&%')
            if not base_parser.add_url('VA_urls', SITE_ID, link,
                                       remark=remark):
                base_parser.update_url('VA_urls', link, Constance.TODO)
コード例 #8
0
def add_root_url(parser_params = {}):
    log.debug('''
        添加根url
        parser_params : %s
        '''% str(parser_params))

    _db = base_parser.MongoDB()
    _db.set_unique_key('PROGRAM_EPISODE_info', 'episode_url')
    _db.update('PROGRAM_urls', {'depth': 0, 'site_id': SITE_ID}, {'status': 0}, multi=True)

    for page_num in range(1, 14):
        urls = [
                'http://list.youku.com/category/show/c_85_g_热门网综_s_1_d_1_p_%d.html' % page_num,
                'http://list.youku.com/category/show/c_97_g_优酷出品_s_1_d_1_p_%d.html' % page_num,
                'http://list.youku.com/category/show/c_96_g_优酷出品_s_1_d_1_p_%d.html' % page_num,
                ]
        for url in urls:
            print(url)
            print('********************************************************')
            html = tools.get_html_by_urllib(url)
            if tools.get_info(html, ['小酷没有筛选到相关视频']):
                continue
            links = tools.get_tag(html, 'div', {'class': 'p-thumb'})
            for link in links:
                try:
                    link = link.a['href']
                    link = tools.get_full_url('http:', link)
                    link_html = tools.get_html_by_urllib(link)
                    link = tools.get_tag(link_html, 'a', {'class': 'desc-link'}, find_all=False)
                    link = link['href']
                    link = tools.get_full_url('http:', link)
                    base_parser.add_url('PROGRAM_urls', SITE_ID, link, depth=0)
                except Exception as e:
                    log.error(e)
                    print(link_html)
コード例 #9
0
def add_root_url(parser_params={}):
    log.debug('''
        添加根url
        parser_params : %s
        ''' % str(parser_params))

    def get_page_count(url):
        html, r = tools.get_html_by_requests(url)
        regex = '<div class="mod_pages" r-notemplate="true">.*>(\d*)</a>.*?</span>'
        page_count = tools.get_info(html, regex, fetch_one=True)
        print(page_count)
        return int(page_count) if page_count else 0

    def inner_add_root_url(root_url, page_url, remark):
        page_count = get_page_count(root_url)
        for page in range(0, page_count + 1):
            url = page_url % (page * 30)
            base_parser.add_url('PROGRAM_urls', SITE_ID, url, remark=remark)

    #首页-综艺-腾讯出品
    root_url = 'http://v.qq.com/x/list/variety?offset=0&isource=2'
    page_url = 'http://v.qq.com/x/list/variety?offset=%d&isource=2'
    # inner_add_root_url(root_url, page_url, remark = {'classify' : '综艺'})

    # 首页-电视剧-腾讯出品(自制剧)
    root_url = 'http://v.qq.com/x/list/tv?sort=4&offset=0&iarea=-1&iyear=-1&itype=843'
    page_url = 'http://v.qq.com/x/list/tv?sort=4&offset=%d&iarea=-1&iyear=-1&itype=843'
    inner_add_root_url(root_url, page_url, remark={'classify': '电视剧'})
コード例 #10
0
ファイル: audio.py プロジェクト: ssdxiao/alarm_platform
    def get(self):
        log.debug("alarmAllHandler get in")
        try:
            alarm_id = int(self.get_argument("alarm"))
        except:
            index = 1


        data = db.get_audio_list(alarm_id)
        result = {}
        result["result"] = "ok"
        result["data"] =[]

        if data:
            for one in data:
                if os.path.exists("./static/audio/%s"%one[6]):
                    has_audio=1
                else:
                    has_audio =0
                result["data"].append({
                    "deal_user":one[2],
                    "telephone":one[3],
                    "deal_time":one[4],
                    "deal_remark":one[5],
                    "audio":one[6],
                    "has_audio":has_audio

                })

            self.send_data(result)
        else:
            self.send_data(result)
コード例 #11
0
def parser_program_url(url_info):
    log.debug('处理 \n' + tools.dumps_json(url_info))

    root_url = url_info['url']
    depth = url_info['depth']
    site_id = url_info['site_id']
    remark = url_info['remark']
    classify = remark['classify']

    # 解析
    html, request = tools.get_html_by_requests(root_url)
    if not html:
        base_parser.update_url('PROGRAM_urls', root_url, Constance.EXCEPTION)
        return

    program_blocks = tools.get_tag(html, 'li', {'class': "list_item"})
    for program_block in program_blocks:
        program_block = str(program_block)

        # 地址
        regex = 'r-props="{id: \'(.*?)\''
        program_id = tools.get_info(program_block, regex, fetch_one=True)
        program_url = 'http://v.qq.com/detail/5/%s.html' % program_id
        base_parser.add_url("PROGRAM_urls",
                            site_id,
                            program_url,
                            depth=1,
                            remark={
                                'program_id': program_id,
                                'classify': classify
                            })

    base_parser.update_url("PROGRAM_urls", root_url, Constance.DONE)
コード例 #12
0
def add_root_url(search_keyword1=[], search_keyword2=[], search_keyword3=[]):
    log.debug(
        '''
        添加根url
        search_keyword1 = %s
        search_keyword2 = %s
        search_keyword3 = %s
        ''' %
        (str(search_keyword1), str(search_keyword2), str(search_keyword3)))

    remark = {
        'search_keyword1': search_keyword1,
        'search_keyword2': search_keyword2,
        'search_keyword3': search_keyword3
    }

    search_keywords = search_keyword1 + search_keyword2

    for search_keyword in search_keywords:
        # 取页数
        url = 'https://movie.douban.com/subject_search?start=0&search_text=%s&cat=1002' % search_keyword
        html = tools.get_html_by_urllib(url)
        regex = '<div class="paginator">.*<a href.*?>(.*?)</a><span class="next"'
        page_count = tools.get_info(html, regex)
        page_count = int(page_count[0]) if page_count else 0
        print(page_count)

        for page in range(0, page_count):
            url = 'https://movie.douban.com/subject_search?start=%d&search_text=%s&cat=1002' % (
                page * 15, search_keyword)
            if not base_parser.add_url('VA_urls', SITE_ID, url, remark=remark):
                base_parser.update_url('VA_urls', url, Constance.TODO)
コード例 #13
0
def add_site_info():
    log.debug('添加网站信息')
    site_id = SITE_ID
    name = NAME
    table = 'PROGRAM_site_info'
    url = "http://best.le.com/"
    base_parser.add_website_info(table, site_id, url, name)
コード例 #14
0
    def deal_request(self, name):
        web.header('Content-Type', 'text/html;charset=UTF-8')

        data_json = json.loads(json.dumps(web.input()))
        data = data_json.get('data')  # data为str
        req_url = data_json.get('req_url')

        # log.debug('''
        #     method : %s
        #     data   :%s
        #     '''%(name, data))

        log.debug('''
            method : %s
            url   :%s
            ''' % (name, req_url))

        reponse = ''
        if name == 'get_article_list':
            reponse = self.get_article_list(data, req_url)

        elif name == 'get_article_content':
            reponse = self.get_article_content(data, req_url)

        elif name == 'get_read_watched_count':
            reponse = self.get_read_watched_count(data, req_url)

        elif name == 'get_comment':
            reponse = self.get_comment(data, req_url)

        # log.debug('''
        #     ---------reponse---------
        #     %s'''%reponse)

        return reponse  # 此处返回''空字符串  不会触发node-js http 的回调
コード例 #15
0
def add_site_info():
    log.debug('添加网站信息')
    site_id = SITE_ID
    name = NAME
    table = 'PROGRAM_site_info'
    url = "http://news.v1.cn/V1make.shtml"
    base_parser.add_website_info(table, site_id, url, name)
コード例 #16
0
def add_site_info():
    log.debug('添加网站信息')
    site_id = SITE_ID
    name = NAME
    table = 'site_info'
    url = 'https://www.itouchtv.cn/'
    base_parser.add_website_info(table, site_id, url, name)
コード例 #17
0
def parser(url_info):
    url_info['_id'] = str(url_info['_id'])
    log.debug('处理 \n' + tools.dumps_json(url_info))

    root_url = url_info['url']
    depth = url_info['depth']
    site_id = url_info['site_id']
    remark = url_info['remark']
    offset = remark.get('offset')

    html = tools.get_html_by_webdirver(root_url)
    headers = tools.get_tag(html, 'div', {'class': 'result'}, find_all=True)
    if not headers:
        base_parser.update_url('BAIDU_NEWS_urls', root_url, Constance.DONE)

    for header in headers:
        # 查看更多相关新闻
        regex = ' <span class="c-info"><a.*?href="(.*?)".*?查看更多相关新闻'
        more_news_url = tools.get_info(str(header), regex, fetch_one = True)
        if more_news_url:
            more_news_url = tools.get_full_url('http://news.baidu.com', more_news_url)
            more_news_url = more_news_url.replace('amp;', '')
            base_parser.add_url('BAIDU_NEWS_urls', SITE_ID, more_news_url, depth = 1, remark = {'offset':0})

        url = header.h3.a['href']
        article_extractor = ArticleExtractor(url)
        content = title = release_time = author = website_domain =''
        content = article_extractor.get_content()
        if content:
            title = article_extractor.get_title()
            release_time = article_extractor.get_release_time()
            author = article_extractor.get_author()
            website_domain = tools.get_domain(url)
            uuid = tools.get_uuid(title, website_domain)
            website_name = ''
            website_position = None

            log.debug('''
                uuid         %s
                title        %s
                author       %s
                release_time %s
                domain       %s
                url          %s
                content      %s
                '''%(uuid, title, author, release_time, website_domain, url, '...'))

            # 入库
            if tools.is_have_chinese(content):
                is_continue = self_base_parser.add_news_acticle(uuid, title, author, release_time, website_name , website_domain, website_position, url, content)

                if not is_continue:
                    break
    else:
        # 循环正常结束 该页均正常入库, 继续爬取下页
        offset += 50
        url = tools.replace_str(root_url, 'pn=\d*', 'pn=%d'%offset)
        base_parser.add_url('BAIDU_NEWS_urls', SITE_ID, url, depth = 0, remark = {'offset': offset})

    base_parser.update_url('BAIDU_NEWS_urls', root_url, Constance.DONE)
コード例 #18
0
def add_site_info():
    log.debug('添加网站信息')

    table = 'BAIDU_NEWS_site_info'
    url = 'http://news.baidu.com'

    base_parser.add_website_info(table, site_id=SITE_ID, url=url, name=NAME)
コード例 #19
0
ファイル: tencent.py プロジェクト: zhongyinhei/video_news
def add_site_info():
    log.debug('添加网站信息')

    table = 'VIDEO_NEWS_site_info'
    url = 'https://v.qq.com'

    base_parser.add_website_info(table, site_id=SITE_ID, url=url, name=NAME)
コード例 #20
0
 def calculate_time(*args, **kw):
     began_time = time.time()
     callfunc = func(*args, **kw)
     end_time = time.time()
     log.debug(func.__name__ + " run time  = " +
               str(end_time - began_time))
     return callfunc
コード例 #21
0
def add_root_url(search_keyword1=[], search_keyword2=[], search_keyword3=[]):
    log.debug(
        '''
        添加根url
        search_keyword1 = %s
        search_keyword2 = %s
        search_keyword3 = %s
        ''' %
        (str(search_keyword1), str(search_keyword2), str(search_keyword3)))

    remark = {
        'search_keyword1': search_keyword1,
        'search_keyword2': search_keyword2,
        'search_keyword3': search_keyword3
    }

    search_keywords = search_keyword1 + search_keyword2

    for search_keyword in search_keywords:
        if not search_keyword:
            continue
        # 最多显示10页
        for page in range(1, 11):
            url = 'http://weixin.sogou.com/weixin?type=2&query=' + search_keyword + '&page=%d&ie=utf8' % page
            if not base_parser.add_url('VA_urls', SITE_ID, url, remark=remark):
                base_parser.update_url('VA_urls', url, Constance.TODO)
コード例 #22
0
def main():
    while True:
        if task_status.is_doing:  #done
            log.debug('is doing sleep ...%ss' % SLEEP_TIME)
            time.sleep(SLEEP_TIME)
            continue

        task_status.is_doing = True

        keywords = Keywords().get_keywords()

        def begin_callback():
            log.info('\n********** spider_main begin **********')

        def end_callback():
            log.info('\n********** spider_main end **********')
            task_status.is_doing = False

        # 配置spider
        spider = Spider(tab_list,
                        tab_unique_key_list,
                        tab_ensure_index_list,
                        parser_count=1,
                        site_parsers=parser_siteid_list,
                        begin_callback=begin_callback,
                        end_callback=end_callback,
                        parser_params=keywords)

        # 添加parser
        for parser in parser_list:
            spider.add_parser(parser)

        spider.start()
コード例 #23
0
ファイル: mqtt.py プロジェクト: Esiravegna/0pizero_sensors
    def connect(self):
        """
        Connect to the broker, define the callbacks, and subscribe
        This will also set the Last Will and Testament (LWT)
        The LWT will be published in the event of an unclean or
        unexpected disconnection.
        """
        self.is_connected = False
        # Add the callbacks
        self.mqttc.on_connect = self.on_mqtt_connect
        self.mqttc.on_disconnect = self.on_mqtt_disconnect

        # Set the Last Will and Testament (LWT) *before* connecting
        self.mqttc.will_set(self.lwt, payload="0", qos=0, retain=True)

        # Attempt to connect
        log.debug("Connecting to {}:{}...".format(self.host, self.port))
        try:
            self.mqttc.connect(self.host, self.port, 60)
        except Exception as e:
            log.error("Error connecting to {}:{}: {}".format(
                self.host, self.port, str(e)))
        self.is_connected = True
        # Let the connection run forever
        self.mqttc.loop_start()
        return self.is_connected
コード例 #24
0
ファイル: app.py プロジェクト: lietu/pywebstatmon
    def start(self, args):
        """Start up all of the application components"""

        enable_log()

        # Make sure the results have some data in it before the web requests
        # come in
        self.results = App._init_results(self.config)

        if args.poll_seconds:
            self.config.override_monitor_setting(
                'poll_seconds',
                args.poll_seconds
            )

        if self.config.log_file:
            set_log_file(self.config.log_file)
            log.debug("Opening log file {}".format(self.config.log_file))

        self._start_monitoring()

        if self.config.http_port:
            log.info("Starting web frontend on port {}".format(
                self.config.http_port
            ))
            self._start_frontend()
コード例 #25
0
    def is_have_new_article(self, account_id='', account=''):
        '''
        @summary: 检查公众号今日是否发文
        ---------
        @param account_id:
        @param account:
        ---------
        @result:
        '''

        account_block = self.__get_account_blocks(account_id, account)
        if account_block == constance.VERIFICATION_CODE:
            return constance.VERIFICATION_CODE

        regex = "timeConvert\('(\d*?)'\)"
        release_time = tools.get_info(account_block, regex, fetch_one=True)

        if release_time:
            release_time = int(release_time)
            release_time = tools.timestamp_to_date(release_time)
            log.debug("最近发文时间 %s" % release_time)

            if release_time >= tools.get_current_date('%Y-%m-%d'):
                return constance.UPDATE
            else:
                return constance.NOT_UPDATE

        else:
            return constance.ERROR
コード例 #26
0
ファイル: manage.py プロジェクト: ssdxiao/alarm_platform
    def get(self):
        log.debug("ManaageAllHandler get in")
        try:
            index = int(self.get_argument("index"))
        except:
            index = 1
        if index > 0:
            pass
        else:
            index = 1

        data = db.get_manage_list(index)
        result = {}
        if data:
            result["result"] = "ok"
            result["maxindex"] = data["maxindex"]
            result["curruntindex"] = data["curruntindex"]
            result["data"] =[]
            for one in data["data"]:
                result["data"].append({"manageid" : one[0],
                                        "managename" : one[1],
                                        "managetelephone" : one[2],
                                        "managelogintime": one[5],
                                       "managelogouttime": one[6]
                                       })

            self.send_data(result)

        else:

            result["result"] = "error"
            result["message"] = "get Manage list failed"
            self.send_data(result)
コード例 #27
0
def add_root_url(parser_params={}):
    log.debug('''
        添加根url
        parser_params : %s
        ''' % str(parser_params))

    for program in parser_params:  #[[91, '山东卫视', '调查', '新闻'], [...]]
        program_id = program[0]
        chan_name = program[1]
        program_name = program[2]
        program_type = program[3]
        image_url = program[4]
        if program_type != '其他':
            url = 'http://so.iqiyi.com/so/q_%s %s?source=input&sr=1170053009947' % (
                program_name, program_type)
        else:
            url = 'http://so.iqiyi.com/so/q_%s?source=input&sr=1170053009947' % (
                program_name)
        base_parser.add_url('mms_urls',
                            SITE_ID,
                            url,
                            remark={
                                'program_id': program_id,
                                'program_name': program_name,
                                'chan_name': chan_name,
                                'program_type': program_type,
                                'image_url': image_url
                            })
コード例 #28
0
    def __input_data(self):
        # log.debug('read_pos %d, write_pos %d buffer size %d'%(self._read_pos, self._write_pos, self.get_max_read_size()))
        # log.debug('buffer can write size = %d'%self.get_max_write_size())
        if self.get_max_write_size() == 0:
            log.debug("collector 已满 size = %d" % self.get_max_read_size())
            return

        url_count = self._url_count if self._url_count <= self.get_max_write_size(
        ) else self.get_max_write_size()

        urls_list = []
        if self._depth:
            urls_list = self._db.find(self._tab_urls, {
                "status": Constance.TODO,
                "depth": {
                    "$lte": self._depth
                }
            },
                                      limit=url_count)
        else:
            urls_list = self._db.find(self._tab_urls,
                                      {"status": Constance.TODO},
                                      limit=url_count)

        #更新已取到的url状态为doing
        for url in urls_list:
            self._db.update(self._tab_urls, url, {'status': Constance.DOING})

        # 存url
        self.put_urls(urls_list)

        if self.is_all_have_done():
            self.stop()
コード例 #29
0
def parser_comment(content_id, wall_id, page=1):
    log.debug('正在爬取第 %s 页文章评论 content_id = %s' % (page, content_id))
    flow_comment_url = 'http://sns-comment.iqiyi.com/v2/comment/get_comments.action?contentid={content_id}&page={page}&authcookie=null&page_size=40&wallId={wall_id}&agenttype=117&t={timestamp_m}'.format(
        content_id=content_id,
        page=page,
        wall_id=wall_id,
        timestamp_m=int(tools.get_current_timestamp() * 1000))

    comment_json = tools.get_json_by_requests(flow_comment_url)
    data = comment_json.get('data', {})

    # 可作为翻页的依据
    total_count = data.get('totalCount', 0)
    count = data.get('count', 0)

    replies = data.get('replies', [])
    for reply in replies:
        reply_source = reply.get("replySource", {})
        if not deal_comment(reply_source):
            break

        if not deal_comment(reply):
            break

    else:
        if replies:
            parser_comment(content_id, wall_id, page + 1)
コード例 #30
0
    def add_account_info(self, account_info):
        log.debug('''
            -----公众号信息-----
            %s''' % tools.dumps_json(account_info))

        WechatService._es.add('wechat_account', account_info,
                              account_info.get('__biz'))
コード例 #31
0
def deal_comment(reply):
    if not reply: return

    comment_id = reply.get('id')
    pre_id = reply.get('replyId')
    content = reply.get('content')
    article_id = reply.get('mainContentId')
    release_time = reply.get('addTime')
    release_time = tools.timestamp_to_date(release_time)
    head_url = reply.get('userInfo', {}).get('icon')
    consumer = reply.get('userInfo', {}).get('uname')
    gender = int(reply.get('userInfo', {}).get('gender'))
    up_count = reply.get('likes')

    # TODO
    emotion = random.randint(0, 2)
    hot_id = comment_id

    log.debug('''
        评论id:  %s
        父id      %s
        文章id    %s
        发布人:  %s
        头像地址  %s
        性别      %s
        内容:    %s
        点赞量    %s
        发布时间  %s
        ''' % (comment_id, pre_id, article_id, consumer, head_url, gender,
               content, up_count, release_time))

    return self_base_parser.add_comment(comment_id, pre_id, article_id,
                                        consumer, head_url, gender, content,
                                        up_count, release_time, emotion,
                                        hot_id)
def add_root_url(parser_params={}):
    log.debug('''
        添加根url
        parser_params : %s
        ''' % str(parser_params))

    def inner_add_url(url, remark):
        html = tools.get_html_by_urllib(url)
        regex = '<li><span></span><a  href="(.*?)">.*?</a></li>'
        infos = tools.get_info(html, regex)
        for info in infos:
            info = ''.join(info)
            type_url = 'http://shouji.baidu.com' + info
            type_html = tools.get_html_by_urllib(type_url)
            page_count = '<div class="pager">.*">(.*?)</a>.*?<li class="next">'
            page_count = tools.get_info(type_html, page_count)
            page_count = ''.join(page_count)
            if not page_count:
                page_count = '1'
            page_count = int(page_count)
            for page in range(1, page_count + 1):
                url = type_url + 'list_%d.html' % page
                if not base_parser.add_url(
                        'GameApp_urls', SITE_ID, url, remark=remark):
                    base_parser.update_url('GameApp_urls', url, Constance.TODO)

    inner_add_url('http://shouji.baidu.com/game/401/', Constance.休闲益智)
    inner_add_url('http://shouji.baidu.com/game/board_102_200/',
                  Constance.网络游戏)
    inner_add_url('http://shouji.baidu.com/game/403/', Constance.飞行射击)
    inner_add_url('http://shouji.baidu.com/game/406/', Constance.赛车竞速)
    inner_add_url('http://shouji.baidu.com/game/405/', Constance.体育竞技)
    inner_add_url('http://shouji.baidu.com/game/407/', Constance.体育竞技)
    inner_add_url('http://shouji.baidu.com/game/408/', Constance.经营策略)
    inner_add_url('http://shouji.baidu.com/game/402/', Constance.角色扮演)
コード例 #33
0
    def creat_db_pool(self, host):
        connection_config = decrypt(getenv('INTERFACE_CIPHER'))
        host = decrypt(host)

        if connection_config.get(host, None) is None:
            log.error("IP域名错误")
            exit(0)

        user = connection_config.get(host).get('user')
        password = connection_config.get(host).get('password')
        port = connection_config.get(host).get('port')

        log.debug('创建数据库连接池:%s' % host)
        self.__db_pool = PooledDB(creator=pymysql,
                                  mincached=3,
                                  maxcached=5,
                                  maxshared=0,
                                  maxconnections=20,
                                  blocking=True,
                                  maxusage=None,
                                  setsession=None,
                                  host=host,
                                  port=port,
                                  user=user,
                                  db=None,
                                  passwd=password)
        log.debug('创建数据库连接池完成!')
コード例 #34
0
ファイル: quanming_tv.py プロジェクト: yantoumu/guangdong_app
def add_site_info():
    log.debug('添加网站信息')
    site_id = SITE_ID
    name = NAME
    table = 'site_info'
    url = 'https://www.wandoujia.com/app/org.fungo.fungolive'
    base_parser.add_website_info(table, site_id, url, name)
コード例 #35
0
ファイル: alarm.py プロジェクト: ssdxiao/alarm_platform
    def get(self):
        log.debug("EventsHandler get in")
        try:
            alarmid = int(self.get_argument("alarmid"))
        except:
            log.debug("param alarmid is not int")
            return
        data = db.get_events(alarmid)
        result = {}
        if data:

            result["result"] = "ok"
            result["data"] = []
            for one in data:
                result["data"].append({"id": one[0],
                                        "type":one[1],
                                       "eventtime": one[4],
                                       "context": one[5]
                                       })

        else:
            result["result"] = "error"
            result["message"] = "can not find this user"

        self.send_data(result)
コード例 #36
0
def get_domain(url):
    domain = ''
    try:
        domain = get_tld(url)
    except Exception as e:
        log.debug(e)
    return domain
コード例 #37
0
def add_root_url(parser_params={}):
    log.debug('''
        添加根url
        parser_params : %s
        ''' % str(parser_params))

    base_parser.add_url('VAApp_urls', SITE_ID, URL, remark=NEWS_LOCAL)
コード例 #38
0
ファイル: httpclient.py プロジェクト: ssdxiao/alarm_platform
    def get(self, url):

        try:
            url = self.url + url
            log.debug( url)
            urllib2.urlopen(url)
        except:
            print "get request error"
コード例 #39
0
ファイル: httpclient.py プロジェクト: ssdxiao/alarm_platform
 def releasealarm(self, zwaveid, user):
     values = {'zwavedeviceid': zwaveid, 'token': self.token, "employeename" : user}
     print values
     data = self.post("/thirdpart/zufang/unalarmdevicewarning", values)
     if data:
         if data["resultCode"] == 0:
             log.debug("release alarm ok")
         else:
             log.debug("release alarm error")
コード例 #40
0
ファイル: sync_api.py プロジェクト: jandob/omniSync
 def run(self):
     log.debug(self.__class__.__name__ + " running")
     while True:
         item = self.queue.get()
         if item is None:  # trick to break out of while
             break
         self.consume_item(item)
         # TODO what if a file gets added again while syncing in progress?
         self.queue.task_done()
コード例 #41
0
ファイル: dropbox_sync.py プロジェクト: jandob/omniSync
 def rm(self, path, *args, **kwargs):
     if path == '/':
         log.critical('prevented delete / (root)')
         return
     try:
         self.client.file_delete(path)
     except dropbox.rest.ErrorResponse as e:
         log.debug('Delete failed: %s (%s)' % (e.reason, path))
         if not e.reason == 'Not Found':
             raise e
コード例 #42
0
ファイル: sync_api.py プロジェクト: jandob/omniSync
    def get_syncer_instances(filter=lambda: True):
        # Import syncers from 'syncers' package and start them.
        # Does something like: from syncers.dropbox import Dropbox
        syncer_instances = {}
        # find classes inside syncers package that have the superclass SyncBase
        available_syncers = dict(find_modules_with_super_class(syncers, SyncBase))
        log.debug("available_syncers: %s" % list(available_syncers.keys()))

        for syncer in builtins.filter(filter, available_syncers.keys()):
            syncer_instances[syncer] = getattr(import_module(available_syncers[syncer]), syncer)()
        return syncer_instances
コード例 #43
0
ファイル: httpclient.py プロジェクト: ssdxiao/alarm_platform
    def post(self, url, values):

        try:
            url = self.url + url
            data = urllib.urlencode(values)
            req = urllib2.Request(url, data)
            response = urllib2.urlopen(req)
            data= response.read()
            log.debug(data)
            data = json.loads(data)
            return data
        except:
            print "post request error"
            return None
コード例 #44
0
ファイル: dropbox_sync.py プロジェクト: jandob/omniSync
 def login(self):
     token_file = os.path.expanduser(self.configuration['token_file'])
     token_dir = os.path.dirname(token_file)
     if not os.path.exists(token_dir):
         os.makedirs(token_dir)
     try:
         with open(token_file) as token:
             self.access_token = token.read()
     except IOError:
         self.access_token = None
     if not (self.access_token):
         self.authorize()
     self.client = dropbox.client.DropboxClient(self.access_token)
     log.debug('dropbox authorized: ' + self.client.account_info()['email'])
コード例 #45
0
ファイル: alarm.py プロジェクト: ssdxiao/alarm_platform
    def put(self):
        log.debug("alarmHandler put in")
        data = self.get_data()
        if data:
            log.debug(data)
            if data.has_key("deal_progress")and data.has_key("alarmId") :
                db.update_alarm_progress(data["alarmId"],data["deal_progress"])
                result = {}
                result["result"] = "ok"
                self.send_data(result)

                return
            else:
                result = {}
                result["result"] = "error"
                result["message"] = "data is error"
                self.send_data(result)
コード例 #46
0
ファイル: manage.py プロジェクト: ssdxiao/alarm_platform
 def put(self):
     log.debug("ManageChangePasswordHandler put in")
     data = self.get_data()
     if data:
         log.debug(data)
         if data.has_key("ManagePassword") and data.has_key("ManageId"):
             db.update_manage_passwd(data["ManageId"],data["ManagePassword"])
             result = {}
             result["result"] = "ok"
             self.send_data(result)
             save_record(self.login_user, "manage",data["ManageId"], "change_passwd", "update manage password")
             return
             return
         else:
             result = {}
             result["result"] = "error"
             result["message"] = "data is error"
             self.send_data(result)
コード例 #47
0
ファイル: upload.py プロジェクト: ssdxiao/alarm_platform
 def post(self):
     log.debug("UploadHandler post in")
     if self.request.files == {}:
         result = {}
         result["error"] = "null file upload"
         self.send_data(result)
         return
     file_metas = self.request.files['file']# 提取表单中‘name’为‘file’的文件元数据
     for meta in file_metas:
         filename = meta['filename']
         upload_path = "./static/audio"
         filepath = os.path.join(upload_path, filename)
         log.debug("write %s"%filepath)
         with open(filepath, 'wb') as up:  # 有些文件需要已二进制的形式存储,实际中可以更改
             up.write(meta['body'])
     result = {}
     result["result"] = "ok"
     self.send_data(result)
コード例 #48
0
ファイル: server.py プロジェクト: ssdxiao/alarm_platform
    def post(self):
        log.debug("ReleaseAlarmHandler post in")
        data = self.get_data()
        if data:
            log.debug(data)
            if data.has_key("alarmId"):
                zwaveid = db.get_zwaveid_from_alarm(data["alarmId"])
                result = {}
                result["result"] = "ok"
                self.send_data(result)

                client.releasealarm(zwaveid, self.login_user)
                return
            else:
                result = {}
                result["result"] = "error"
                result["message"] = "data is error"
                self.send_data(result)
コード例 #49
0
ファイル: audio.py プロジェクト: ssdxiao/alarm_platform
    def post(self):
        log.debug("AudioHandler post in")
        data = self.get_data()
        if data:
            log.debug(data)
            if data.has_key("AlarmID") and data.has_key("AlarmRemark") \
                    and data.has_key("AlarmAudio")and data.has_key("AlarmTelephone"):
                db.insert_alarm_deal(data["AlarmID"], self.login_user, data["AlarmTelephone"],data["AlarmRemark"], "%s.wav"%data["AlarmAudio"])
                db.update_alarm_progress(data["AlarmID"],1)
                result = {}
                result["result"] = "ok"
                self.send_data(result)

                return
            else:
                result = {}
                result["result"] = "error"
                result["message"] = "data is error"
                self.send_data(result)
コード例 #50
0
ファイル: alarm.py プロジェクト: ssdxiao/alarm_platform
    def get(self):
        log.debug("alarmAllHandler get in")
        try:
            index = int(self.get_argument("index"))
        except:
            index = 1
        if index > 0:
            pass
        else:
            index = 1

        data = db.get_alarm_list(index)
        result = {}
        if data:
            result["result"] = "ok"
            result["maxindex"] = data["maxindex"]
            result["curruntindex"] = data["curruntindex"]
            result["data"] =[]
            for one in data["data"]:
                context = ""
                eventlist = db.get_events(one[0])
                for event in eventlist:
                    context = context + event[5] + " "
                if one[5] == 0:
                   user = "******"
                else:
                   user = db.get_username_by_id(one[5])
                result["data"].append({"id" : one[0],
                                        "create_time" : one[1],
                                        "zwaveid": one[2],
                                        "deviceid":one[3],
                                        "deal_progress":one[4],
                                        "deal_user":one[5],
                                        "deal_user_name": user,
                                        "deal_context": context
                                       })

            self.send_data(result)
        else:

            result["result"] = "error"
            result["message"] = "get alarm list failed"
            self.send_data(result)
コード例 #51
0
ファイル: manage.py プロジェクト: ssdxiao/alarm_platform
 def put(self):
     log.debug("ManageHandler put in")
     data = self.get_data()
     if data:
         log.debug(data)
         if data.has_key("ManageTelephone") and data.has_key("ManageId") \
                 and data.has_key("ManageName"):
             db.update_manage(data["ManageId"],data["ManageName"], data["ManageTelephone"])
             result = {}
             result["result"] = "ok"
             self.send_data(result)
             str = "update manage name %s telephone %s" % (
             data["ManageName"].encode('utf-8'), data["ManageTelephone"].encode('utf-8'))
             save_record(self.login_user, "manage", data["ManageId"], "update", str)
             return
         else:
             result = {}
             result["result"] = "error"
             result["message"] = "data is error"
             self.send_data(result)
コード例 #52
0
ファイル: manage.py プロジェクト: ssdxiao/alarm_platform
    def get(self):
        log.debug("ManageHandler get in")
        try:
            id = int(self.get_argument("id"))
        except:
            log.debug("param id is not int")
            return
        data = db.get_manage(id)
        result = {}
        if data:

            result["result"] = "ok"
            result["data"] = {"manageid" : data[0],
                                        "managename" : data[1],
                                        "managetelephone" : data[2]
                                       }

        else:
            result["result"] = "error"
            result["message"] = "can not find this user"

        self.send_data(result)
コード例 #53
0
ファイル: server.py プロジェクト: ssdxiao/alarm_platform
def alarm_sync():
    db =DB()
    client.get_token()
    while True:
        try:
            time.sleep(10)
            lastid = db.get_sync_id()
            events = client.get_alarm(lastid)
            if events == None:
                client.get_token()
                continue
            else:
                if events == []:
                    #log.debug("not has any event")
                    continue
                for one in events:
                    log.debug(one)
                    db.save_event(one["id"], one["type"],one["deviceid"],one["zwavedeviceid"],one["eventtime"],one["objparam"])
        except:
            import traceback
            traceback.print_exc()
            continue
コード例 #54
0
ファイル: static.py プロジェクト: ssdxiao/alarm_platform
 def get(self, html):
     url = "%s/%s"%(BASEDIR,html)
     log.debug(url)
     #try :
     self.render(url)
コード例 #55
0
ファイル: sync_api.py プロジェクト: jandob/omniSync
 def stop(self):
     self.queue.put(None)  # trick to break out of while
     log.debug(self.__class__.__name__ + " stopped")
コード例 #56
0
ファイル: sync_api.py プロジェクト: jandob/omniSync
 def __init__(self, queue=None):
     self.queue = queue or OrderedSetQueue()
     super().__init__()
     log.debug(self.__class__.__name__ + " init")
コード例 #57
0
ファイル: upload.py プロジェクト: ssdxiao/alarm_platform
 def get(self):
     log.debug("UploadHandler get in")
コード例 #58
0
ファイル: server.py プロジェクト: ssdxiao/alarm_platform
 def get(self):
     log.debug("RedirectHandler")
     self.redirect("/static/index.html")
     pass