コード例 #1
0
def set_task_status_99(id_, redo, status=-99):
    '''Set task status to -99 to lock it'''
    result = session.query(Task).filter(Task.id==id_).with_lockmode('update').all()
    status = redo and 0 or -88

    if result:
        status = redo and 0 or result[0].status

        if (redo or (result[0].status < WEIXIN_TRY_TIME and result[0].status >= 0)):
            if not redo and (result[0].status > 0 and result[0].ret == 0):
                # NOTE: Task has been done, and the result is success, so we
                # just exit
                logger.debug('Task has been done, exit task %s' % id_)
                return sys.exit(0)
            # We locked the item
            try:
                session.query(Task).filter(Task.id==id_).update({Task.status: status,})
                session.commit()
            except Exception as e:
                session.rollback()
                logger.error('Can not update task(%s) status msg: %s' % (id_, str(e)))
                return False, status
        else:
            return False, status

    return True, status
コード例 #2
0
def set_task_status_99(id_, redo, status=-99):
    '''Set task status to -99 to lock it'''
    result = session.query(Task).filter(
        Task.id == id_).with_lockmode('update').all()
    status = redo and 0 or -88

    if result:
        status = redo and 0 or result[0].status

        if (redo or
            (result[0].status < WEIXIN_TRY_TIME and result[0].status >= 0)):
            if not redo and (result[0].status > 0 and result[0].ret == 0):
                # NOTE: Task has been done, and the result is success, so we
                # just exit
                logger.debug('Task has been done, exit task %s' % id_)
                sys.exit(0)
            # We locked the item
            try:
                session.query(Task).filter(Task.id == id_).update({
                    Task.status:
                    status,
                })
                session.commit()
            except Exception as e:
                session.rollback()
                logger.error('Can not update task(%s) status msg: %s' %
                             (id_, str(e)))
                return False, status
        else:
            return False, status

    return True, status
コード例 #3
0
def set_task_ret(id_, status, ret, retmsg):
    result = session.query(Task).filter(Task.id==id_).with_lockmode('update').all()
    if result and result[0].status < WEIXIN_TRY_TIME and result[0].status >= 0:
        # We locked the item
        try:
            session.query(Task).filter(Task.id==id_).update({Task.status: status, Task.ret: ret, Task.retmsg: retmsg})
            session.commit()
        except Exception as e:
            session.rollback()
            logger.error('Can not update task(%s) status msg: %s' % (id_, str(e)))
            return False
    else:
        return False

    return True
コード例 #4
0
def set_task_ret(id_, status, ret, retmsg):
    result = session.query(Task).filter(
        Task.id == id_).with_lockmode('update').all()
    if result and result[0].status < WEIXIN_TRY_TIME and result[0].status >= 0:
        # We locked the item
        try:
            session.query(Task).filter(Task.id == id_).update({
                Task.status:
                status,
                Task.ret:
                ret,
                Task.retmsg:
                retmsg
            })
            session.commit()
        except Exception as e:
            session.rollback()
            logger.error('Can not update task(%s) status msg: %s' %
                         (id_, str(e)))
            return False
    else:
        return False

    return True
コード例 #5
0
    def text(message):
        logger.debug(str(message))
        # Find uesr information from database, if not exists, ask user to send to me
        result = session.query(User).filter(
            User.openid == message.FromUserName).scalar()
        #sql = u'select id, openid from wordp_user where openid = "%s"' % message.FromUserName
        logger.debug('get user info from database %s' % str(result))
        # This text will be insert into database, so we must escape some
        # special character.
        #import MySQLdb
        #content = MySQLdb.escape_string(message.Content)
        content = message.Content
        if not result:
            if content.startswith('id:'):
                # insert uesr info into database, and send hello message.
                ret = Home.insert_user_info(message.FromUserName,
                                            content[len('id:'):].strip())
                if ret is not None:
                    return _(
                        '''Success insert your information, and you are at \
default group to post articles to Linuxfans, and if you want to \
post articles to HackOS, please contact with administrator %s''') % ADMIN_MAIL
                else:
                    return _(
                        '''Can not insert your information, please contact \
with administrator %s''') % ADMIN_MAIL
            else:
                return (
                    '''We have no information about you, please SEND me your \
information at least your username with format id:<username> \
thanks.''')
        user_id = result.id
        user_groupid = result.groupid
        ## Now we can insert the url into task list
        # First we should check the url format
        from django.core.validators import URLValidator
        from django.core.exceptions import ValidationError
        val = URLValidator()
        try:
            val(content)
        except ValidationError, e:
            return _(
                '''Your URL format is malformed, please give me a correct URL.'''
            )
コード例 #6
0
ファイル: views.py プロジェクト: zy-sunshine/wordprocessor
    def text(message):
        logger.debug(str(message))
        # Find uesr information from database, if not exists, ask user to send to me
        logger.debug(dir(message))
        logger.debug(message.source)
        logger.debug(message.target)
        result = session.query(User).filter(User.openid == message.source).scalar()
        #sql = u'select id, openid from wordp_user where openid = "%s"' % message.source
        logger.debug('get user info from database %s' % str(result))
        # This text will be insert into database, so we must escape some
        # special character.
        #import MySQLdb
        #content = MySQLdb.escape_string(message.Content)
        content = message.content
        if not result:
            if content.startswith('id:'):
                # insert uesr info into database, and send hello message.
                ret = Home.insert_user_info(message.source, content[len('id:'):].strip())
                if ret is not None:
                    return _('''Success insert your information, and you are at \
default group to post articles to Linuxfans, and if you want to \
post articles to HackOS, please contact with administrator %s''') % ADMIN_MAIL
                else:
                    return _('''Can not insert your information, please contact \
with administrator %s''') % ADMIN_MAIL
            else:
                return ('''We have no information about you, please SEND me your \
information at least your username with format id:<username> \
thanks.''')
        user_id = result.id
        user_groupid = result.groupid
        ## Now we can insert the url into task list
        # First we should check the url format
        from django.core.validators import URLValidator
        from django.core.exceptions import ValidationError
        val = URLValidator()
        try:
            val(content)
        except ValidationError, e:
            return _('''Your URL format is malformed, please give me a correct URL.''')
コード例 #7
0
def main():
    try:
        url = sys.argv[1]
    except:
        sys.stderr.write("Please input crawler url\n")
        sys.exit(-1)

    if len(sys.argv) > 2 and sys.argv[2].strip() == 'redo':
        redo = True
    else:
        redo = False

    ### Get task from database, and determine the task's status is unfinished(0)
    #   That status(1,2,3) represent try execute time, There is an option in
    #   database to limit the retry time.
    global g_id_
    global g_ret
    global g_ret_msg
    global g_oldstatus
    g_id_ = int(sys.argv[1].strip())
    g_ret = 0
    g_ret_msg = ''
    g_oldstatus = -1

    author = 'testuser'
    class_ = CRAWLER_CLASS
    delivery_url = 'http://weixin.qq.com'
    delivery_name = author
    url = None

    ret = session.query(Task).filter(Task.id == g_id_).scalar()
    if ret and ret.client_name != CLIENT_NAME:
        logger.error('We got a error request, let me do task %s, but this is %s\'s task' % (g_id_, ret.client_name))
        # NOTE: we do not record this error in database, just exit.
        sys.exit(0)

    #sql = 'UPDATE wordp_task SET status=%s where id=%s' % (-99, g_id_) # status(-99) indicate that we are processing
    ret, g_oldstatus = set_task_status_99(g_id_, redo)
    if not ret:
        g_ret_msg = 'Get task(%s) failed' % g_id_
        logger.error(g_ret_msg)
        g_ret = -3
        return

    #sql = 'select t.*, u.nickname from wordp_task t LEFT JOIN wordp_user u ON \
    #(t.uid = u.id) where t.id=%s' % g_id_
    result = session.query(Task, User.nickname).join(User, User.id == Task.uid).filter(Task.id==g_id_).all()
    if result:
        url = result[0].Task.param1.strip()
        author = result[0].nickname.strip()
        delivery_name = author
    else:
        g_ret_msg = 'Can not get the task %s' % g_id_
        logger.error(g_ret_msg)
        g_ret = -3
        return

    logger.debug('Get task(%s) information author(%s) url(%s)' % (g_id_, author, url))

    try:
        ret, msg = get_content_from_url(url)
        if not ret:
            g_ret_msg = msg
            g_ret = -1
            return

        processor_name, proc = process_content(msg, url)

        today_tmpdir = os.path.join(tmpdir, time.strftime("%Y-%m-%d", time.localtime()))
        if not os.path.exists(today_tmpdir):
            os.makedirs(today_tmpdir)
        # Notice, please input all parameter use unicode
        try:
            post_article(proc.title, str(proc.content), proc.first_img,
                author, url, proc.name_cn, delivery_url, delivery_name, class_, today_tmpdir)
            g_ret = 0
            return
        except CanNotUploadException as e:
            g_ret = -2
            g_ret_msg = 'CanNotUploadException %s' % str(e)
            return

    except Exception as e:
        import traceback
        msg = 'generic exception: ' + traceback.format_exc()
        logger.error(msg)
        g_ret_msg = 'Unknown error %s \nError: %s' % (url, str(e))
        logger.error(g_ret_msg)
        g_ret = -4
        return
コード例 #8
0
ファイル: views.py プロジェクト: zy-sunshine/wordprocessor
    def assign_content_to_client(user_id, user_groupid, client_name, content):
        # Save url to taks list in database
        # Frist, we should find url in database, which user insert in 7
        # days, if find same url, we reject the request.(There have some
        # `status` and `ret` check)
        #sql = 'SELECT * from wordp_task where param1=\'%s\' and add_time>=%s \
        #and uid=%s' \
        #% (content, int(time.time())-3600*24*7, user_id)
        t_before_7_day = int(time.time())-3600*24*7
        result = session.query(Task).filter(Task.uid==user_id).filter(Task.add_time>=t_before_7_day).filter(Task.param1==content).filter(Task.client_name==client_name).scalar()
        if result:
            def common_resend():
                # NOTE: send signal to clients
                ret = Home.send_task_request(client_name, result.id)
                if ret:
                    return _('Your request have some error(status%s ret%s), \
and we have send the process request') % (result.status, result.ret)
                else:
                    return _('Can not deal with your request, (Something about client manager \
error), please inform administrator %s. Thanks.') % ADMIN_MAIL

            if (result.status == 0):
                return common_resend()

            elif (result.status > 0 and result.status < WEIXIN_RETRY_TIME):
                # Resend the request
                if result.ret != 0:
                    return common_resend()
                else:
                    # process successfully
                    return _('''Do not submit request duplicated(status%s ret%s), your \
request been processed successfully.''') % (result.status, result.ret)
            elif (result.status >= WEIXIN_RETRY_TIME):
                # The task has been processed out of retry time, and we
                # will not process it again
                if result.status == -99:
                    return _('''Your request is being processed, please wait.''')
                if result.ret != 0:
                    return _('''Your request has been processed, but not \
success, is %s, please contact with administrator %s''') % (result.ret, ADMIN_MAIL)
                else:
                    return _('''Do not submit request duplicated(status%s ret%s), your \
request been processed successfully.''') % (result.status, result.ret)

            return _('''ERROR, duplicated URL.''')

        # This is a new URL request, we should insert it into task list.
        #sql = 'INSERT INTO wordp_task (uid, status, param1, param2, param3, \
        #add_time) VALUES (%s, %s, \'%s\', \'%s\', \'%s\', %s)' % (user_id, 0,
        #content, '', '', int(time.time()))
        try:
            task = Task(uid=user_id, status=0, param1=content, param2='', 
                        param3='', add_time=int(time.time()), ret=0, retmsg='', client_name=client_name)
            session.add(task)
            session.commit()
        except Exception as e:
            session.rollback()
            logger.error('Can not insert URL into database %s' % content)
            logger.error(str(e))
            return _('Can not deal with your request, (Something about database \
error), please inform administrator %s. Thanks.') % ADMIN_MAIL
        else:
            # NOTE: send signal to clients
            ret = Home.send_task_request(client_name, task.id)
            if ret:
                return _('We have received your URL request, please wait to processing (taskid%s).') % task.id
            else:
                return _('Can not deal with your request, (Something about client manager \
error), please inform administrator %s. Thanks.') % ADMIN_MAIL
コード例 #9
0
    def assign_content_to_client(user_id, user_groupid, client_name, content):
        # Save url to taks list in database
        # Frist, we should find url in database, which user insert in 7
        # days, if find same url, we reject the request.(There have some
        # `status` and `ret` check)
        #sql = 'SELECT * from wordp_task where param1=\'%s\' and add_time>=%s \
        #and uid=%s' \
        #% (content, int(time.time())-3600*24*7, user_id)
        t_before_7_day = int(time.time()) - 3600 * 24 * 7
        result = session.query(Task).filter(Task.uid == user_id).filter(
            Task.add_time >= t_before_7_day).filter(
                Task.param1 == content).filter(
                    Task.client_name == client_name).scalar()
        if result:

            def common_resend():
                # NOTE: send signal to clients
                ret = Home.send_task_request(client_name, result.id)
                if ret:
                    return _('Your request have some error(status%s ret%s), \
and we have send the process request') % (result.status, result.ret)
                else:
                    return _(
                        'Can not deal with your request, (Something about client manager \
error), please inform administrator %s. Thanks.') % ADMIN_MAIL

            if (result.status == 0):
                return common_resend()

            elif (result.status > 0 and result.status < WEIXIN_RETRY_TIME):
                # Resend the request
                if result.ret != 0:
                    return common_resend()
                else:
                    # process successfully
                    return _(
                        '''Do not submit request duplicated(status%s ret%s), your \
request been processed successfully.''') % (result.status, result.ret)
            elif (result.status >= WEIXIN_RETRY_TIME):
                # The task has been processed out of retry time, and we
                # will not process it again
                if result.status == -99:
                    return _(
                        '''Your request is being processed, please wait.''')
                if result.ret != 0:
                    return _('''Your request has been processed, but not \
success, is %s, please contact with administrator %s''') % (result.ret,
                                                            ADMIN_MAIL)
                else:
                    return _(
                        '''Do not submit request duplicated(status%s ret%s), your \
request been processed successfully.''') % (result.status, result.ret)

            return _('''ERROR, duplicated URL.''')

        # This is a new URL request, we should insert it into task list.
        #sql = 'INSERT INTO wordp_task (uid, status, param1, param2, param3, \
        #add_time) VALUES (%s, %s, \'%s\', \'%s\', \'%s\', %s)' % (user_id, 0,
        #content, '', '', int(time.time()))
        try:
            task = Task(uid=user_id,
                        status=0,
                        param1=content,
                        param2='',
                        param3='',
                        add_time=int(time.time()),
                        ret=0,
                        retmsg='',
                        client_name=client_name)
            session.add(task)
            session.commit()
        except Exception as e:
            session.rollback()
            logger.error('Can not insert URL into database %s' % content)
            logger.error(str(e))
            return _(
                'Can not deal with your request, (Something about database \
error), please inform administrator %s. Thanks.') % ADMIN_MAIL
        else:
            # NOTE: send signal to clients
            ret = Home.send_task_request(client_name, task.id)
            if ret:
                return _(
                    'We have received your URL request, please wait to processing (taskid%s).'
                ) % task.id
            else:
                return _(
                    'Can not deal with your request, (Something about client manager \
error), please inform administrator %s. Thanks.') % ADMIN_MAIL
コード例 #10
0
def main():
    try:
        url = sys.argv[1]
    except:
        sys.stderr.write("Please input crawler url\n")
        sys.exit(-1)

    if len(sys.argv) > 2 and sys.argv[2].strip() == 'redo':
        redo = True
    else:
        redo = False

    ### Get task from database, and determine the task's status is unfinished(0)
    #   That status(1,2,3) represent try execute time, There is an option in
    #   database to limit the retry time.
    global g_id_
    global g_ret
    global g_ret_msg
    global g_oldstatus
    g_id_ = int(sys.argv[1].strip())
    g_ret = 0
    g_ret_msg = ''
    g_oldstatus = -1

    author = 'testuser'
    class_ = CRAWLER_CLASS
    delivery_url = 'http://weixin.qq.com'
    delivery_name = author
    url = None

    ret = session.query(Task).filter(Task.id == g_id_).scalar()
    if ret and ret.client_name != CLIENT_NAME:
        logger.error(
            'We got a error request, let me do task %s, but this is %s\'s task'
            % (g_id_, ret.client_name))
        # NOTE: we do not record this error in database, just exit.
        sys.exit(0)

    #sql = 'UPDATE wordp_task SET status=%s where id=%s' % (-99, g_id_) # status(-99) indicate that we are processing
    ret, g_oldstatus = set_task_status_99(g_id_, redo)
    if not ret:
        g_ret_msg = 'Get task(%s) failed' % g_id_
        logger.error(g_ret_msg)
        g_ret = -3
        return

    #sql = 'select t.*, u.nickname from wordp_task t LEFT JOIN wordp_user u ON \
    #(t.uid = u.id) where t.id=%s' % g_id_
    result = session.query(Task, User.nickname).join(
        User, User.id == Task.uid).filter(Task.id == g_id_).all()
    if result:
        url = result[0].Task.param1.strip()
        author = result[0].nickname.strip()
        delivery_name = author
    else:
        g_ret_msg = 'Can not get the task %s' % g_id_
        logger.error(g_ret_msg)
        g_ret = -3
        return

    logger.debug('Get task(%s) information author(%s) url(%s)' %
                 (g_id_, author, url))

    try:
        ret, msg = get_content_from_url(url)
        if not ret:
            g_ret_msg = msg
            g_ret = -1
            return

        processor_name, proc = process_content(msg, url)

        today_tmpdir = os.path.join(
            tmpdir, time.strftime("%Y-%m-%d", time.localtime()))
        if not os.path.exists(today_tmpdir):
            os.makedirs(today_tmpdir)
        # Notice, please input all parameter use unicode
        try:
            post_article(proc.title, str(proc.content), proc.first_img, author,
                         url, proc.name_cn, delivery_url, delivery_name,
                         class_, today_tmpdir)
            g_ret = 0
            return
        except CanNotUploadException as e:
            g_ret = -2
            g_ret_msg = 'CanNotUploadException %s' % str(e)
            return

    except Exception as e:
        import traceback
        msg = 'generic exception: ' + traceback.format_exc()
        logger.error(msg)
        g_ret_msg = 'Unknown error %s \nError: %s' % (url, str(e))
        logger.error(g_ret_msg)
        g_ret = -4
        return