def allocateJobs(tiebaname,avpages,onlinecount):
    sum = int(avpages*onlinecount)
    clist = getData(TZDS.DATA_CRAWLER_LIST)
    aledpages = 0
    i=0
    sum = len(clist)
    for crawler in clist:
        i+=1
        print("\t\t\tAllocating for #",i," / ",sum," crawler...")
        cmd = TZDF.makeUpCommand(TZDS.JOB_CONFIRM,[tiebaname,aledpages,aledpages + avpages])
        aledpages+=avpages
        crawler[3].sendall(cmd.encode("utf-8"))
    print("TZ TaskManager: Jobs has been allocate to ",onlinecount,"crawlers")
Esempio n. 2
0
def Interactive(conn, ct, nct, crawlist):
    TZIC.clientInterreactiveSend(
        conn,
        "TiebaZhuaqu TaskManager ver1.0 by Kanch\[email protected]\nhttp://akakanch.com\n"
    )
    TZIF.setDate(TZDS.DATA_CRAWLER_STATUS, 0.0, int(nct))
    FIRSTRUN = True
    ADMINCONVERSITION = False
    IS_ADMIN = False  #用来关闭回显
    while 1:
        data = TZIC.clientInterreactiveRecv(conn)
        if len(data) < 3:
            if ADMINCONVERSITION == True:
                print("Admin conversition offline.")
                break
            else:
                print("conversiton interrupted!")
                break
        try:
            cmd = TZDF.findMatchCommand(data)
            relcmd = TZDF.resolveCommand(data)
            IS_ADMIN = TZIF.CheckAdmin(int(relcmd[0]))
        except Exception as e:
            pass
        if FIRSTRUN == True:
            if TZIF.CheckAdmin(int(relcmd[0])) == True:
                ADMINCONVERSITION = True
        if (int(relcmd[0]) != TZDS.ADMIN_JOBCREATE and IS_ADMIN == False):
            showMsg("\tInteractive:Client #" + str(ct + 1) + ":" + str(data))
            print("\t\t\tInteractive:relcmd", relcmd)
        else:
            print("JOB RECIVED!")
        if (int(relcmd[0]) == TZDS.FINISH):
            break
        elif (int(relcmd[0]) == TZDS.OFFLINE):
            xc = 0
            BK = False
            for item in crawlist:  #ID,IP,PORT
                #print("item=",str(item),"\tnct=",nct)
                if int(item[0]) == int(nct):
                    #print("found delete!")
                    del crawlist[xc]
                    BK = True
                    break
                xc += 1
            TZIF.delData(TZDS.DATA_CRAWLER_STATUS, int(nct))
            TZIF.setDate(TZDS.DATA_CRAWLER_LIST, crawlist)
            if BK == True:
                break
        elif (int(relcmd[0]) == TZDS.ADMIN_SHUTDOWN):
            SERVER_SHUTDOWN[0] = True
            break
        cmd, CRLCHANGE, CRLCHANGE_LIST = TZIF.autoInteract(
            relcmd, conn, nct, crawlist)
        if CRLCHANGE == True:
            crawlist = CRLCHANGE_LIST
            CRAWLER_LIST = CRLCHANGE_LIST
        TZIC.clientInterreactiveSend(conn, str(cmd))
    TZIC.closeConnection(conn)
    del t[nct]
    count[0] -= 1
    if (IS_ADMIN == False):
        showMsg(
            "*********************\r\n***\t***Client #" + str(ct + 1) +
            ": connection closed,resource cleaned***\r\n*********************")
        CS = False
    except Exception as e:
        print(e)  
print('>>>>>服务器连接成功!')
#注册爬虫
localIP = socket.gethostbyname(socket.gethostname())    #得到本地ip
LOCAL_PORT = 50006
print ("本地地址:",localIP)
data=s.recv(1024)  
print("TaskManager:\n",data.decode("utf-8"))
ID = -1
registcmd = "101," + str(localIP) + ",50006"  #向服务器发送新爬虫注册命令
s.sendall(registcmd.encode("utf-8"))  
data=s.recv(1024)  
data = data.decode("utf-8")
CID = TZDF.resolveCommand(data)[1]
CID = int(CID)   #得到分配的ID
if CID < 0 :
    print("TaskManager服务器无法完成爬虫注册:爬虫ID分配失败---->程序退出!")
    exit()
print("爬虫ID:",CID)
#====初始化完毕====================================================================
#====进入消息循环,等待任务分配======================================================
#只有当服务器传回JOB_COMFIRM (503) 状态码的时候
#才会跳出消息循环,读取任务信息。
data=s.recv(1024)
data = data.decode("utf-8")
TMCMD = int(TZDF.resolveCommand(data)[0])
print ("等待任务分配中...")
#s.sendall("700".encode("utf-8"))    #发送任务测试命令
while TMCMD != TZDS.JOB_CONFIRM:
def autoInteract(relcmd,conn,crawlerid,crawlerlist):
    cmd = 123
    cmd_head = int(relcmd[0])
    print("cmd_head=",cmd_head)
    if cmd_head == TZDS.FINISH:    #完成交互,暂时断线
        cmd = TZDF.makeUpCommand(TZDS.OKCLOSE,["crawler has been temparily disconnect to the server"])
    elif cmd_head == TZDS.REGISTE:  #将爬虫注册至服务器
        crawlerlist.append([str(crawlerid),str(relcmd[1]),int(relcmd[2]),conn])
        setDate(TZDS.DATA_CRAWLER_LIST,crawlerlist)
        cmd = TZDF.makeUpCommand(TZDS.ONLINE_ECHO,[crawlerid,"crawler has been registe to server"])
        TZIC.clientInterreactiveSend(conn,cmd)
    elif cmd_head == TZDS.JOBSTATUS:    #从爬虫那里获取完成进度  CODE,ID,POCESS RATE
        pstatus = str(relcmd[2])
        setDate(TZDS.DATA_CRAWLER_STATUS,pstatus,crawlerid=crawlerid)
        showMsg("recive pocess rate:" + str(pstatus),crawlerid)
        cmd = TZDF.makeUpCommand(TZDS.OK,["crawler job status recived"])
    elif cmd_head == TZDS.JOBTRANSFER:  #上传爬取结果文件至服务器
        cmd = TZDF.makeUpCommand(TZDS.OK,["ready to transfer"])
        TZIC.clientInterreactiveSend(conn,cmd)
        recvFile(conn,crawlerid=crawlerid)
    elif cmd_head == TZDS.ADMIN_STATUS: #回传总体处理率到管理端
        Updata()
        sum = getData(TZDS.DATA_TOTAL_AVERAGE_STATUS)
        cmd = TZDF.makeUpCommand(TZDS.OK,[str(sum),"server total task status sended"])
    elif cmd_head == TZDS.ADMIN_CRAWLER_LIST:   #从服务器端获取在线爬虫列表
        strIDList = []
        for item in crawlerlist:
            strIDList.append( [item[0],item[1],item[2]] )
        strIDList = str(strIDList)
        strIDList = strIDList.replace(",","@")
        strIDList = strIDList.replace("[","")
        strIDList = strIDList.replace("]","")
        strIDList = strIDList.replace("'","")
        strIDList = strIDList.replace(" ","")
        print("\t\t\t",strIDList)
        cmd = TZDF.makeUpCommand(TZDS.CRAWLER_LIST,[strIDList,"crawler list has been sended"])
    elif cmd_head == TZDS.ADMIN_JOBCREATE:  #接收管理端创建的任务 CODE,ADMINID,TIEBANAME,PAGE
        if str(relcmd[1]) == TZDS.ESSEN_ADMIN_CODE:
            TiebaName = str(relcmd[2])
            Pages = int(relcmd[3])
            setDate(TZDS.DATA_POCESS_TIEBA_NAME,TiebaName)
            setDate(TZDS.DATA_POCESS_PAGES_TO,Pages)
            rr = "TiebaName=" + TiebaName +";pages to pocess=" + str(Pages)
            print("\t\t\twaiting for Admin confirm job...")
            cmd = TZDF.makeUpCommand(TZDS.JOB_CONFIRM,[rr,"confirm job ?"])
            TZIC.clientInterreactiveSend(conn,cmd)
            got,data = TZDF.getPerferResponse(TZDS.OK,conn)   #perfer: OK
            if got == True:
                #先假设所有爬虫都是在线的
                onlinecount = len(crawlerlist)
                 #一次爬虫在线验证
                clist = getData(TZDS.DATA_CRAWLER_LIST)
                onlineclist = []
                i = 0
                sum = len(clist)
                for crawler in clist:
                    i+=1
                    print("\t\t\tchecking online  for #",i," / ",sum," crawler...",end="\t")
                    cmd = TZDF.makeUpCommand(TZDS.OK,["tiebaname,aledpages,","aledpages + avpages"])
                    try:
                        crawler[3].sendall(cmd.encode("utf-8"))
                        onlineclist.append(crawler)
                        print("\tOK")
                    except Exception as e:
                        print("\tFAILED")
                        onlinecount-=1
                #
                setDate(TZDS.DATA_CRAWLER_LIST,onlineclist)
                print("\t\t\tJob Confirmed by Admin.")
                avergepage = int(Pages / onlinecount)
                print("\t\tallocate job...")
                allocateJobs(TiebaName,avergepage,onlinecount)
                print("\t\t\tjob allocate done!")
            else:
                cmd = TZDF.makeUpCommand(TZDS.OKCLOSE,["Job allocate interrupt by admin"])
        else:
            cmd = TZDF.makeUpCommand(TZDS.ERROR,["ADMIN IDENTIFY FAILED!"])
    elif cmd_head == TZDS.ADMIN_JOBTRANSFER:    #传送所有抓取结果至admin端
        gatherSubjobs()
        #cmd = TZDF.makeUpCommand(TZDS.OK,["start transfer..."])
        #TZIC.clientInterreactiveSend(conn,cmd)
        sendFile(conn,"/../tieba-zhuaqu/reciveCache/tresult.txt")
        cmd = TZDF.makeUpCommand(TZDS.START_TRANSFER,["transfer done"])
    elif cmd_head == TZDS.ADMIN_SHUTDOWN:   #关闭任务管理服务器
        cmd = TZDF.makeUpCommand(TZDS.OKCLOSE,["task server is going offline"])
    elif cmd_head == TZDS.ONLINE_ECHO:  #线路联通测试
        cmd = TZDF.makeUpCommand(TZDS.ONLINE_ECHO,[" online - connection is ok"])
    elif cmd_head == TZDS.ADMIN_ONLINE:  #
        cmd = TZDF.makeUpCommand(TZDS.ONLINE_ECHO,[" Admin is online"])
    elif cmd_head == TZDS.FACTORY_TEST:
            cmd = TZDF.makeUpCommand(TZDS.JOB_CONFIRM,["成都信息工程大学","0","8"])
            print("--TEST MODE---")
    #print("\t\t\tautoInteract() return with cmd:",cmd)
    return cmd
        print(e)

localIP = socket.gethostbyname(socket.gethostname())  #得到本地ip
LOCAL_PORT = 50006
os.system('cls')
print("local ip:", localIP)
data = s.recv(1024)
print("Remote Server(TM):", data.decode("utf-8"))
cmd = "123"
STATUS = 0  #0=ok,1=file send mode ,2 = regist
ID = -1
registcmd = "101," + str(localIP) + ",50006"
s.sendall(registcmd.encode("utf-8"))
data = s.recv(1024)
data = data.decode("utf-8")
CID = TZDF.resolveCommand(data)[1]
CID = int(CID)  #得到分配的ID
if CID < 0:
    print("remote server cannot  allocate ID,applicaiton will exit!")
    exit()
print("ID=", CID)
while cmd != "SHUTDOWN":
    data = s.recv(1024)
    if (STATUS == 0):
        print("Remote Server(TM):", data.decode("utf-8"))
    elif (STATUS == 1):
        print("Remote Server(TM):", data.decode("utf-8"))
        TZIF.sendFile(s, crawlerid=ID)
        STATUS = 0
    elif (STATUS == 2):
        data = data.decode("utf-8")