コード例 #1
0
ファイル: views.py プロジェクト: knightsss/tencent_qzone
def qzone_friend(request):
    #用于判断前端页面显示活跃状态
    friend_thread = True
    #获取本机IP
    IP = get_ip()
    thread_list = Thread_qq_friend.objects.filter(thread_ip=IP)
    for thread in thread_list:
        c = ThreadControl()
        try:
            #查看是否处于活跃状态
            status = c.is_alive(thread.thread_name)
            if status:
                #设置状态为1
                thread.thread_status = 1
                thread.save()
            else:
                #设置状态为0
                thread.thread_status = 0
                thread.save()
        except:
            thread.thread_status = 0
            thread.save()
    return render_to_response("qzone_friend.html", {
        "thread_list": thread_list,
        "friend_thread": friend_thread
    })
コード例 #2
0
ファイル: views.py プロジェクト: cash2one/spider_tencent
def index(request):
    thread1_status = False
    msg_active = True
    IP = get_ip()
    thread_list = ThreadMsg.objects.filter(thread_ip=IP)
    for thread in thread_list:
        c = ThreadControl()
        try:
            #查看是否处于活跃状态
            status = c.is_alive(thread.thread_name)
            if status:
                #设置状态为1
                thread.thread_status = 1
                thread.save()
            else:
                #设置状态为0
                thread.thread_status = 0
                thread.save()
        except:
            thread.thread_status = 0
            thread.save()
    return render_to_response(
        'index.html', {
            'thread1_status': thread1_status,
            "msg_active": msg_active,
            "thread_list": thread_list
        })
コード例 #3
0
ファイル: views.py プロジェクト: cash2one/spider_tencent
def auditor(request):
    thread1_status = False
    auditor_thread = True
    IP = get_ip()
    thread_list = Threadauditor.objects.filter(thread_ip=IP)
    for thread in thread_list:
        c = ThreadControl()
        try:
            #查看是否处于活跃状态
            status = c.is_alive(thread.thread_name)
            if status:
                #设置状态为1
                thread.thread_status = 1
                thread.save()
            else:
                #设置状态为0
                thread.thread_status = 0
                thread.save()
        except:
            thread.thread_status = 0
            thread.save()
    return render_to_response("auditor.html", {
        "thread_list": thread_list,
        "auditor_thread": auditor_thread
    })
コード例 #4
0
def qzone_info(request):
    thread_status = False
    info_active = True
    IP = get_ip()
    thread_list = ThreadQzoneInfo.objects.filter(thread_ip=IP)
    for thread in thread_list:
        c = ThreadControl()
        try:
            #查看是否处于活跃状态
            status = c.is_alive(thread.thread_name)
            if status:
                #设置状态为1
                thread.thread_status = 1
                thread.save()
            else:
                #设置状态为0
                thread.thread_status = 0
                thread.save()
        except:
            thread.thread_status = 0
            thread.save()
    return render_to_response('qzone_info.html', {
        "info_active": info_active,
        "thread_list": thread_list
    })
コード例 #5
0
def loaddata(c_thread, thread_num, interval):
    log_name_title = "tencent_wb_msg_"
    ip = get_ip()
    base_date = time.strftime("%Y%m%d", time.localtime())
    log = log_setting(log_name_title + base_date + ".log")
    log.info("run......")
    driver = qq_login()
    time.sleep(3)

    if driver == None:
        log.info("phantomjs error!quit")
        return 0
    else:
        pass
    #出队
    conn_redis = redis_connect()
    conn_mongo = connect_mongodb()

    if conn_redis == 0 or conn_mongo == 0:
        log.info("redis or mongodb connect error")
    else:
        log.info("connect redis ok")
        log.info("connect mongodb ok")
        while not c_thread.thread_stop:
            current_date = time.strftime("%Y%m%d", time.localtime())
            if current_date == base_date:
                pass
            else:
                base_date = current_date
                log = log_setting(log_name_title + base_date + ".log")
            log.info('Thread:(%s)' % (thread_num))
            url = pop_redis_list(conn_redis)
            #判断队列是否为空
            if url == None:
                log.info("msg queue is NULL")
                break
            else:
                #获取详细信息
                msg = get_msg(driver, url, log)
                # print "load to mongodb"
                try:
                    load_mongodb(conn_mongo, url, msg)
                except:
                    rtx('ip', ip + "机器mongodb失败")
                    log.info('ip' + ip + "机器mongodb失败")
                    log.info("mongodb error")
                    break
        # rtx('IP','正常停止')
        log.info(thread_num + "quit phantomjs")
        driver.quit()
        #rtx提醒
        rtx('ip', ip + "机器" + thread_num + "停止运行")
        log.info('ip' + ip + "机器" + thread_num + "停止运行")
        #数据库状态更新,根据线程名称
        log.info("更新数据库线程状态")
        thread = ThreadMsg.objects.get(thread_name=thread_num)
        thread.thread_status = 0
        thread.save()
コード例 #6
0
ファイル: views.py プロジェクト: cash2one/spider_tencent
def control_thread(request):
    th_name = request.POST['id']
    control = request.POST['control']
    print "thread_name is ", th_name
    #显示活跃状态
    msg_active = True
    thread = ThreadMsg.objects.get(thread_name=th_name)
    if control == 'start':
        rtx('ip', '进程' + str(th_name) + '  开始采集标签信息')
        #状态信息
        # thread1_status = True
        c = ThreadControl()
        # status = 1
        #出现错误,则线程不存在,因此启动线程
        try:
            status = c.is_alive(th_name)
            print "thread is alive? ", status
            if status:
                print "thread is alive,caonot start twice!"
            else:
                print "start ..........thread1"
                c.start(th_name, 1)
        except:
            print "thread is not alive start!!!"
            c.start(th_name, 1)
        thread.thread_status = 1
        thread.save()
    if control == 'stop':
        # thread1_status = False
        # status = 0
        rtx('ip', '进程' + str(th_name) + '  采集标签信息即将停止')
        c = ThreadControl()
        try:
            c.stop(th_name)
            thread.thread_status = 0
            thread.save()
        except:
            print "not thread alive"

    IP = get_ip()
    thread_list = ThreadMsg.objects.filter(thread_ip=IP)
    return render_to_response(
        'index.html', {
            "thread_name": th_name,
            "control": control,
            "thread_list": thread_list,
            "msg_active": msg_active
        })
コード例 #7
0
def loaddata(c_thread, thread_num, interval):
    log_name_title = str(thread_num) + "_tencent_qzone_info_"
    ip = get_ip()
    base_date = time.strftime("%Y%m%d", time.localtime())
    log = log_setting(log_name_title + base_date + ".log")
    log.info(thread_num + "run......")
    driver = qzone_login()
    time.sleep(3)

    if driver == None:
        log.info("phantomjs error!quit")
        return 0
    else:
        pass
    #出队
    conn_redis = redis_connect()
    conn_mongo = connect_mongodb()
    # print "conn_redis",conn_redis
    # print "conn_mongo",conn_mongo
    #定义pop的redis名字
    redis_list_pop_name = "tencent_qzone_qq_info"
    redis_list_push_qzone_forbid_name = "tencent_qzone_forbid_qq"

    if conn_redis == 0 or conn_mongo == 0:
        log.info("redis or mongodb connect error")
    else:
        log.info("connect redis ok")
        log.info("connect mongodb ok")
        ip = get_ip()
        while not c_thread.thread_stop:
            current_date = time.strftime("%Y%m%d", time.localtime())
            if current_date == base_date:
                pass
            else:
                base_date = current_date
                log = log_setting(log_name_title + base_date + ".log")

            print 'Thread:(%s) Time:%s\n' % (thread_num, time.ctime())
            # log = log_setting()
            #pop_redis_list(redis_conn,redis_list_name)
            qq = pop_redis_list(conn_redis, redis_list_pop_name)
            log.info('Thread:(%s) QQ:%s' % (thread_num, qq))
            #判断队列是否为空
            if qq == None:
                log.info("queue is NULL")
                break
            else:
                #获取详细信息
                url = "http://user.qzone.qq.com/" + str(qq) + "/profile"
                info_list = get_info(driver, url, log)

                # print info_list
                # msg = get_msg(driver,url)
                if info_list == 0:
                    #qq放入redis消息队列
                    push_redis_list_tmp(conn_redis,
                                        redis_list_push_qzone_forbid_name, qq)
                    log.info(qq + "请求失败,入队禁止访问消息队列")
                    driver = qzone_login()
                    pass
                elif info_list == 1:
                    #qq放入redis消息队列
                    push_redis_list_tmp(conn_redis,
                                        redis_list_push_qzone_forbid_name, qq)
                    log.info(qq + "入队禁止访问消息队列")
                    pass
                else:
                    #存入mongodb
                    log.info("load to mongodb")
                    try:
                        load_mongodb_qzone_info(conn_mongo, qq, info_list)
                    except:
                        rtx('ip', ip + "机器mongodb失败")
                        log.info('ip' + ip + "机器mongodb失败")
                        log.info("mongodb error")
                        break
        # rtx('IP','正常停止')
        log.info(thread_num + "quit phantomjs")
        driver.quit()
        #rtx提醒
        rtx('ip', ip + "机器" + thread_num + "停止运行")
        log.info('ip' + ip + "机器" + thread_num + "停止运行")
        #数据库状态更新,根据线程名称
        log.info("更新数据库线程状态")
        thread = ThreadQzoneInfo.objects.get(thread_name=thread_num)
        thread.thread_status = 0
        thread.save()
コード例 #8
0
def loaddata(c_thread, thread_num, interval):
    print "run......"
    driver = qzone_login()
    time.sleep(3)

    if driver == None:
        "phantomjs error!quit"
        return 0
    else:
        pass

    #连接redis
    conn_redis = redis_connect()
    redis_list_name_pop = "tencent_qzone_qq_test"
    redis_list_name_push = "tencent_qzone_qq_tmp_test"
    print "conn_redis", conn_redis
    if conn_redis == None:
        print "redis connect error"
    else:
        while not c_thread.thread_stop:
            print 'qzone_qq_friend Thread:(%s) Time:%s\n' % (thread_num,
                                                             time.ctime())
            qq = pop_redis_list(conn_redis, redis_list_name_pop)
            if qq == None:
                print "queue is NULL"
                break
            else:
                url = "http://user.qzone.qq.com/" + qq + "/mood"
                print "url", url
                driver.get(url)
                try:
                    #等待页面加载完成
                    frame_element = WebDriverWait(driver, 3).until(
                        EC.presence_of_element_located(
                            (By.ID, "app_canvas_frame")))
                    print "find frame id"
                    driver.switch_to.frame('app_canvas_frame')
                    try:
                        #等待切换后的元素存在
                        class_name_element = WebDriverWait(driver, 10).until(
                            EC.presence_of_element_located(
                                (By.CLASS_NAME, "comments_content")))
                        print "find conment"
                        html = driver.page_source
                        soup = BeautifulSoup(html)
                        print "======"
                        my_set = set()
                        for i in soup.find_all(class_='comments_content'):
                            friend_qq = str(i.find('a')['href'])[25:-6]
                            print friend_qq
                            if friend_qq != qq:
                                my_set.add(friend_qq)
                        print my_set
                        friend_qq_list = list(my_set)
                        print friend_qq_list
                    except:
                        print "not found conment"
                        friend_qq_list = ['0']
                except:
                    print "没有权限访问"
                    friend_qq_list = ['-1']

                print friend_qq_list
                #############################################存入mysql
                print "insert mysql"
                #获取qq和friend_qq组成的元组,多个
                tmp_tuple = get_tuple(qq, friend_qq_list)
                #插入mysql数据库
                print "insert into table "
                mysql_conn = mysql_connect_local_qq()
                insert_mysql_qq(mysql_conn, tmp_tuple)
                #关闭数据库
                mysql_conn.close()

                ############################################存入临时的redis
                print "put mid redis"
                push_redis_list_tmp(conn_redis, redis_list_name_push, qq)
                print "put auditor mid redis"
                for friend_qq in friend_qq_list:
                    push_redis_list_tmp(conn_redis, redis_list_name_push,
                                        friend_qq)
        print thread_num, "quit phantomjs"
        driver.quit()
        #rtx提醒
        ip = get_ip()
        rtx('ip', ip + "机器" + thread_num + "停止运行")
        #数据库状态更新,根据线程名称
        print "更新数据库线程状态"
        thread = Thread_qq_friend.objects.get(thread_name=thread_num)
        thread.thread_status = 0
        thread.save()
コード例 #9
0
def loaddata(c_thread, thread_num, interval):
    log_name_title = "tencent_wb_auditor_"
    base_date = time.strftime("%Y%m%d", time.localtime())
    log = log_setting(log_name_title + base_date + ".log")
    log.info(thread_num + "run......")
    driver = qq_login()
    time.sleep(3)

    if driver == None:
        log.info("phantomjs error!quit")
        return 0
    else:
        pass

    #连接redis
    conn_redis = redis_connect()
    #mysql连接 异常返回None
    #mysql_conn = mysql_connect()
    # conn_mongo = connect_mongodb()
    # print "conn_mongo",conn_mongo
    if conn_redis == None:
        log.info("redis connect error")
    else:
        log.info("connect redis ok")
        ip = get_ip()
        while not c_thread.thread_stop:
            current_date = time.strftime("%Y%m%d", time.localtime())
            if current_date == base_date:
                pass
            else:
                base_date = current_date
                log = log_setting(log_name_title + base_date + ".log")
            # log.info('Thread:(%s) Time:%s'%(thread_num,time.ctime()))
            log.info('Thread:(%s)' % (thread_num))
            mid = pop_redis_list(conn_redis)
            if mid == None:
                log.info("queue is NULL")
                break
            else:

                url = "http://t.qq.com/" + str(mid)
                log.info("url is: " + url)
                time.sleep(3)
                #根据用户的主页url获取收听的所有页面
                auditor_page_url_list = get_auditor_page_url_via_url(
                    driver, url)
                if auditor_page_url_list == None:
                    log.info("page is not personal,login again")
                    driver.quit()
                    driver = qq_login()
                    if driver == None:
                        break
                    else:
                        pass
                #根据收听的所有页面获取收听者的主页url
                ################根据已知mid获取所有收听的mid
                else:
                    mid_list = get_auditor_main_url(driver,
                                                    auditor_page_url_list)
                    if mid_list == None:
                        continue
                    else:
                        #############################################存入mysql
                        try:
                            log.info("insert mysql")
                            #获取mid和auditor_mid组成的元组,多个
                            tmp_tuple = get_tuple(mid, mid_list)
                            #插入mysql数据库
                            print "insert into table "
                            mysql_conn = mysql_connect()
                            insert_mysql(mysql_conn, tmp_tuple)
                            #关闭数据库
                            mysql_conn.close()
                        except:
                            rtx('ip', ip + "机器mysql出错")
                            log.info('ip' + ip + "机器mysql出错")
                            log.info("insert mysql error")
                        ############################################存入临时的redis
                        try:
                            log.info("put mid redis")
                            push_redis_list_tmp(conn_redis, mid)
                            log.info("put auditor mid redis")
                            for auditor_mid in mid_list:
                                push_redis_list_tmp(conn_redis, auditor_mid)
                        except:
                            rtx('ip', ip + "机器redis出错")
                            log.info('ip' + ip + "机器redis出错")
                            log.info("insert redis error")

        log.info(thread_num + "quit phantomjs")
        driver.quit()
        #rtx提醒
        rtx('ip', ip + "机器" + thread_num + "停止运行")
        log.info('ip' + ip + "机器" + thread_num + "停止运行")
        #数据库状态更新,根据线程名称
        log.info("更新数据库线程状态")
        thread = Threadauditor.objects.get(thread_name=thread_num)
        thread.thread_status = 0
        thread.save()
コード例 #10
0
def loaddata(c_thread, thread_num, interval):

    log_name_title = "tencent_wb_auditor_"
    base_date = time.strftime("%Y%m%d", time.localtime())
    log = log_setting(log_name_title + base_date + ".log")
    log.info(thread_num + "run......")
    driver = qzone_login()
    time.sleep(3)

    if driver == None:
        log.info("phantomjs error!quit")
        return 0
    else:
        pass

    #连接redis
    conn_redis = redis_connect()
    redis_list_name_pop = "tencent_qzone_qq"
    redis_list_name_push = "tencent_qzone_qq_transfer"
    print "conn_redis", conn_redis
    if conn_redis == None:
        log.info("redis connect error")
    else:
        log.info("redis connect ok")
        ip = get_ip()
        while not c_thread.thread_stop:
            log.info('qzone_qq_friend Thread:(%s) Time:%s' %
                     (thread_num, time.ctime()))
            qq = pop_redis_list(conn_redis, redis_list_name_pop)
            if qq == None:
                log.info("queue is NULL")
                break
            else:
                url = "http://user.qzone.qq.com/" + qq + "/mood"
                log.info("url" + url)
                driver.get(url)
                try:
                    #等待页面加载完成
                    frame_element = WebDriverWait(driver, 3).until(
                        EC.presence_of_element_located(
                            (By.ID, "app_canvas_frame")))
                    log.info("find frame id")
                    driver.switch_to.frame('app_canvas_frame')
                    try:
                        #等待切换后的元素存在
                        class_name_element = WebDriverWait(driver, 10).until(
                            EC.presence_of_element_located(
                                (By.CLASS_NAME, "comments_content")))
                        log.info("find conment")
                        html = driver.page_source
                        soup = BeautifulSoup(html)
                        print "======"
                        my_set = set()
                        for i in soup.find_all(class_='comments_content'):
                            friend_qq = str(i.find('a')['href'])[25:-6]
                            print friend_qq
                            if friend_qq != qq:
                                my_set.add(friend_qq)
                        print my_set
                        friend_qq_list = list(my_set)
                        print friend_qq_list
                    except:
                        log.info("not found conment")
                        friend_qq_list = ['0']
                except:
                    log.info("没有权限访问")
                    friend_qq_list = ['-1']

                print friend_qq_list
                #############################################存入mysql
                try:
                    log.info("insert mysql")
                    #获取qq和friend_qq组成的元组,多个
                    tmp_tuple = get_tuple(qq, friend_qq_list)
                    #插入mysql数据库
                    print "insert into table "
                    mysql_conn = mysql_connect_qq()
                    insert_mysql_qq(mysql_conn, tmp_tuple)
                    #关闭数据库
                    mysql_conn.close()
                except:
                    rtx('ip', ip + "机器QQ空间关系链采集mysql出错")
                    log.info('ip' + ip + "机器QQ空间关系链采集mysql出错")
                ############################################存入临时的redis
                try:
                    log.info("put mid redis")
                    push_redis_list_tmp(conn_redis, redis_list_name_push, qq)
                    log.info("put auditor mid redis")
                    for friend_qq in friend_qq_list:
                        push_redis_list_tmp(conn_redis, redis_list_name_push,
                                            friend_qq)
                except:
                    rtx('ip', ip + "机器QQ空间关系链采集redis入队出错")
                    log.info('ip' + ip + "机器QQ空间关系链采集redis入队出错")
        log.info(thread_num + "quit phantomjs")
        driver.quit()
        #rtx提醒
        rtx('ip', ip + "机器" + thread_num + "停止运行")
        log.info('ip' + ip + "机器" + thread_num + "停止运行")
        #数据库状态更新,根据线程名称
        log.info("更新数据库线程状态")
        thread = Thread_qq_friend.objects.get(thread_name=thread_num)
        thread.thread_status = 0
        thread.save()