Esempio n. 1
0
def acqMainItems():
    while True:

        try:
            page = tz.decodeForThisSys(acqHtml(mySpiderCfgMain.iter.next()))

        except:
            print 'done'
            return

        items = pq(tz.decodeForThisSys(page)).find('.nr3 dd a')

        for item in items:
            itemT = pq(item)

            peeps = wrapper.goalClass.selectBy(gameUrl=itemT.attr('href'))

            if peeps.count() == 0:
                mainItems.put(itemT.attr('href'))
Esempio n. 2
0
def dealCertainItem():
    while True:

        # if mainItems.empty():
        #     time.sleep(5)
        #     if mainItems.empty():
        #         return

        urlT = mainItems.get()

        page = tz.decodeForThisSys(acqHtml(urlT))

        q = pq(page)

        def acqScore():
            id = q.find('#softid').val()

            if id == None:
                return -1.0, -1

            try:
                objT = json.loads(
                    acqHtml('http://dy.www.yxdown.com/open/op.ashx?action=/soft/votes/data.json&sid=%s' % id))
            except:
                print 'http://dy.www.yxdown.com/open/op.ashx?action=/soft/votes/data.json&sid=%s' % id
                return -1.0, -1

            r = objT['Score']

            commentCount = objT['Normal'] + objT['DOWN'] + objT['UP']

            return r, commentCount

        def acqImg():
            r = ''
            url = q.find('div.dl>dl>dd>img').attr('src')

            if not tz.emptyOrNoneAll(url):
                r = acqHtml(url)

            return r

        def acqDate(str):
            r = datetime.date(1949, 10, 1)

            if not tz.emptyOrNoneAll(str):
                try:
                    r = datetime.datetime.strptime(str, '%Y/%m/%d').date()
                except:
                    return r

            return r

        def acqCommentAll():
            id = q.find('#softid').val()

            if id == None:
                return ''

            try:
                strT = acqHtml(
                    'http://pl.yxdown.com/ping.ashx/hot.js?key=soft&vote=6&sid=%s&count=10&callback=window.Pinglun.GetHotCommentsCallback()&encoding=gb2312' % id)

                strT = strT[strT.index('= {') + 2:strT.index(';window.Pinglun.GetHot')]

                objT = json.loads(strT)

                strRs = []

                for item in objT['comments']:
                    strRs.append('%s,%s:%s @%s\n'%(item['city'],item['ip'],item['content'],item['datetime']))

                return ''.join(strRs)

            except:
                print 'http://pl.yxdown.com/ping.ashx/hot.js?key=soft&vote=6&sid=%s&count=10&callback=window.Pinglun.GetHotCommentsCallback($data)&encoding=gb2312' % id
                print strT
                return ''


        if q.find('h1[itemprop=name]').text().strip() == '':
            continue

        rt = acqScore()

        gameObj = {
            'name': q.find('h1[itemprop=name]').text().strip(),
            'softwareVersion': q.find('span[itemprop=softwareVersion]').text().strip(),
            'ename': q.find('span.ename').text().strip(),
            'img': acqImg(),
            'gameType': q.find('div.dl>dl>dt>span:eq(0)>b:eq(0)>a').text(),
            'inLanguage': q.find('div.dl>dl>dt>span:eq(0)>b:eq(1)>em').text(),
            'fileSize': q.find('div.dl>dl>dt>span:eq(1)>b:eq(0)>em').text(),
            'fileComany': q.find('div.dl>dl>dt>span:eq(1)>b:eq(1)>em').text(),
            'startingTime': acqDate(q.find('div.dl>dl>dt>span:eq(2)>b:eq(0)>em').text()),
            'dateModified': acqDate(q.find('div.dl>dl>dt>span:eq(2)>b:eq(1)>em').text()),
            'operatingSystem': q.find('div.dl>dl>dt>span:eq(3) a').text(),
            'tag': q.find('div.dl>dl>dt>span:eq(4)>em').text(),
            'zt_text': q.find('div.dl>dl>dt>span:eq(5)>em').text(),
            'pf_score': rt[0],
            'commentCount': rt[1],
            'pf_score_des': q.find('div.pinja_box').text().strip(),
            'game_des': mySpiderTools.myDecodeHtml(q.find('li.yx1>span').text().strip()),
            'gameUrl': urlT,
            'commentAll':acqCommentAll(),
        }

        if len(gameObj['name']) != 0:
            gameItems.put(gameObj)
            print 'now index is %d,put %s' % (mySpiderCfgMain.countT, gameObj['name'])

            # lockT = thread.allocate_lock()

            # lockT.acquire()
            mySpiderCfgMain.countT += 1