Exemple #1
0
    def api(sql=None, method='first'):
        try:
            data = None
            for case in switch(method):
                if case('first'):
                    data = mysql_session.execute(sql).first()
                    if data is not None:
                        data = dict(data)
                    break
                if case('scalar'):
                    data = mysql_session.execute(sql).scalar()
                    break
                if case('fetchall'):
                    data = mysql_session.execute(sql).fetchall()
                    break
                if case('execute'):
                    data = mysql_session.execute(sql)
                    mysql_session.commit()
                    data = data.lastrowid
                    break

            # 是否打印日志
            if Config.MYSQL_DEBUG:
                service_logger.warn("sql:api", {"sql": sql})

            return data
        except Exception, err:
            mysql_session.rollback()
            service_logger.error("sql:error", {
                "sql": sql,
                "data": traceback.format_exc()
            })

            return None
Exemple #2
0
    def update(self, up_str='', id=0):
        if up_str == '':
            service_logger.error({"msg": "up_str error", "task_id": id})

        sql = SqlService.sql(SqlService.TASK_UPDATE, up_str, id)
        SqlService.api(sql, 'execute')

        return True
Exemple #3
0
def tengxun_detail(url, links):
    print json.dumps(links)

    cate = []
    if 'tech' in url:
        cate = ['科技']
    elif 'finance' in url:
        cate = ['财经']
    elif 'edu' in url:
        cate = ['教育']
    elif 'house' in url:
        cate = ['房产']
    elif 'visit' in url:
        cate = ['旅游']
    elif 'internet' in url or 'tcctit' in url or 'ai' in url:
        cate = ["互联网"]

    if len(links) > 0:
        for vo in links:
            # todo 检查链接
            if ImportService.check_url(vo['link']):
                continue

            # 延时抓取
            tm = random.randint(4, 10)
            time.sleep(tm)

            try:
                page = Tengxun(vo['link'])
                # 补全数据
                page.set_category(cate)
                data = page.get_content()
                if vo['image'] != '':
                    data['image'] = vo['image']
                # 如果图示:开头要加http
                if data['image'] != '' and data['image'][0:2] == '//':
                    data['image'] = 'http:' + data['image']

                # 记录日志
                service_logger.warn(data=data)
                if data['send_time'] == '' or data['title'] == '':
                    continue

                # todo 保存数据
                ImportService.insert_handle(data)
                # break
            except Exception, err:
                service_logger.error("tengxun-exception", {
                    "msg": traceback.format_exc(),
                    "link": vo['link']
                })

            # 删除文件
            delete_file(vo['link'])

        # 删除列表
        delete_file(url, ext='.list')
Exemple #4
0
    def handle(self):
        service_logger.log(self.url)

        try:
            self._handle()
        except Exception, err:
            service_logger.error("task-exception", {
                "msg": traceback.format_exc(),
                "url": self.url
            })
Exemple #5
0
def toutiao_detail(url, links):
    print json.dumps(links)

    cate = []
    if 'news_baby' in url:
        cate = ['教育']
    elif 'news_travel' in url:
        cate = ['旅游']
    elif '人工智能' in url or '大数据' in url:
        cate = ['技术']

    if len(links) > 0:
        for vo in links:
            # todo 检查链接
            if ImportService.check_url(vo['link']):
                continue

            # 延时抓取
            tm = random.randint(4, 10)
            time.sleep(tm)

            try:
                page = Toutiao(vo['link'])
                # 补全数据
                if len(cate) > 0:
                    page.set_category(cate)

                data = page.get_content()
                if vo['image'] != '':
                    data['image'] = vo['image']
                # 如果图示:开头要加http
                if data['image'] != '' and data['image'][0:2] == '//':
                    data['image'] = 'http:' + data['image']

                # 记录日志
                service_logger.warn(data=data)
                if data['send_time'] == '' or data['title'] == '':
                    continue

                # todo 保存数据
                ImportService.insert_handle(data)
                # break
            except Exception, err:
                service_logger.error("toutiao-exception", {
                    "msg": traceback.format_exc(),
                    "link": vo['link']
                })

            # 删除文件
            delete_file(vo['link'])

        # 删除列表
        delete_file(url, ext='.list')
Exemple #6
0
def dytt_detail(url, links):
    print json.dumps(links)

    cate = []
    if 'jddy' in url:
        cate = ['综合电影']
    elif 'oumei' in url:
        cate = ['欧美电影']
    elif 'china' in url:
        cate = ['国内电影']
    elif 'rihan' in url:
        cate = ['日韩电影']
    elif 'dyzz' in url:
        cate = ['最新电影']

    if len(links) > 0:
        for vo in links:
            print vo['link']
            # todo 检查链接
            if ImportService.check_url(vo['link']):
                continue

            # 延时抓取
            tm = random.randint(4, 10)
            time.sleep(tm)

            try:
                page = Dytt(vo['link'])
                # 补全数据
                page.set_category(cate)

                data = page.get_content(flag=False)
                # 记录日志
                service_logger.warn(data=data)
                if data['send_time'] == '' or data['title'] == '':
                    continue

                # todo 保存数据
                ImportService.insert_handle(data, 'video')
                # break

            except Exception, err:
                service_logger.error("dytt-exception", {
                    "msg": traceback.format_exc(),
                    "link": vo['link']
                })

            # 删除文件
            delete_file(vo['link'])

        # 删除列表
        delete_file(url, ext='.list')
Exemple #7
0
    def upload_image(image, iscut=False, w=300, h=200):
        if image == '':
            return

        file = time.strftime("%Y%m%d%H%M%S", time.localtime()) + '_' + str(
            random.randint(10000, 99999))
        subs = image.split('/')[-1]
        exts = subs.split('.')
        ext = 'jpg'
        if len(exts) > 1:
            ext = exts[-1]

        filename = file + '.' + ext
        y = time.strftime("%Y", time.localtime())
        m = time.strftime("%m", time.localtime())
        filepath = Config.IMAGE_PATH + '/' + y + '/' + m
        if os.path.isdir(filepath) == False:
            os.makedirs(filepath, 0775)

        newfile = filepath + '/' + filename
        oldfile = Config.DIR_PATH + filename

        try:
            # 存储原图
            headers = {
                "User-Agent":
                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36"
            }
            response = requests.get(image, headers=headers)
            if response.status_code != 200:
                return ''
            if '<!DOCTYPE' in response.content or '<iframe' in response.content:
                return ''
            cat_img = response.content
            with open(oldfile, "wb") as f:
                f.write(cat_img)
        except Exception, err:
            service_logger.error("task-exception", {
                "msg": traceback.format_exc(),
                "image": image
            })
            return ''
Exemple #8
0
def error_handle(msg='', data=None):
    service_logger.error(data={"msg": msg, "data": data})
    raise ApiException(msg)