Example #1
0
def movie_from_div(div):
    """
    从一个 div 里面获取到一个电影信息
    """
    e = pq(div)

    # 小作用域变量用单字符
    m = Movie()
    m.name = e('.title').text()
    m.other = e('.other').text()
    m.score = e('.rating_num').text()
    m.quote = e('.inq').text()
    m.cover_url = e('img').attr('src')
    m.ranking = e('.pic').find('em').text()
    return m
Example #2
0
def get_info():
    global stage
    time.sleep(5)
    print "-CRAWLER- Start to get movie feature..."
    while (not mvIDQ.empty()) or stage == 0:
        try:
            mvID = mvIDQ.get()
            # get info from imdmpy with movie id
            # print "-CRAWLER- Getting movie(id: %s) feature..." % mvID
            mvIN = imdb_access.get_movie(mvID)
            # create new Movie object
            mvOJ = Movie()
            # ID string
            mvOJ.id = mvID
            # title string
            mvOJ.title = mvIN.get('title')
            # poster url string
            mvOJ.cover_url = mvIN.get('cover url')
            # Bigger poster url string
            mvOJ.giant_cover_url = mvIN.get('full-size cover url')
            # genres string list
            if mvIN.has_key('genres'):
                sIN = ""
                for i in mvIN.get('genres'):
                    sIN += (i + '$')
                mvOJ.genres = sIN[0:len(sIN) - 1]
            # color string list
            if mvIN.has_key('color info'):
                sIN = ""
                for i in mvIN.get('color info'):
                    sIN += (i + '$')
                mvOJ.color_info = sIN[0:len(sIN) - 1]
            # director string list
            if mvIN.has_key('director'):
                sIN = ""
                for i in mvIN.get('director'):
                    sIN += i['name'] + '$'
                mvOJ.director = sIN[0:len(sIN) - 1]
            # 1st Actor
            mvOJ.cast_1st = mvIN.get('cast')[0]['name']
            if len(mvIN.get('cast')) >= 2:
                # 2nd Actor
                mvOJ.cast_2nd = mvIN.get('cast')[1]['name']
            if len(mvIN.get('cast')) >= 3:
                # 3rd Actor
                mvOJ.cast_3rd = mvIN.get('cast')[2]['name']
            # country string list
            if mvIN.has_key('countries'):
                sIN = ""
                for i in mvIN.get('countries'):
                    sIN += (i + '$')
                mvOJ.countries = sIN[0:len(sIN) - 1]
            # language string list
            if mvIN.has_key('languages'):
                sIN = ""
                for i in mvIN.get('languages'):
                    sIN += (i + '$')
                mvOJ.languages = sIN[0:len(sIN) - 1]
            # writer string list
            if mvIN.has_key('writer'):
                sIN = ""
                for i in mvIN.get('writer'):
                    sIN += i['name'] + '$'
                mvOJ.writer = sIN[0:len(sIN) - 1]
            # editor string list
            if mvIN.has_key('editor'):
                sIN = ""
                for i in mvIN.get('editor'):
                    sIN += i['name'] + '$'
                mvOJ.editor = sIN[0:len(sIN) - 1]
            # cinematographer string list
            if mvIN.has_key('cinematographer'):
                sIN = ""
                for i in mvIN.get('cinematographer'):
                    sIN += i['name'] + '$'
                mvOJ.cinematographer = sIN[0:len(sIN) - 1]
            # art direction string list
            if mvIN.has_key('art direction'):
                sIN = ""
                for i in mvIN.get('art direction'):
                    sIN += i['name'] + '$'
                mvOJ.art_director = sIN[0:len(sIN) - 1]
            # costume designer string list
            if mvIN.has_key('costume designer'):
                sIN = ""
                for i in mvIN.get('costume designer'):
                    sIN += i['name'] + '$'
                mvOJ.costume_designer = sIN[0:len(sIN) - 1]
            # music By string list
            if mvIN.has_key('original music'):
                sIN = ""
                for i in mvIN.get('original music'):
                    sIN += i['name'] + '$'
                mvOJ.original_music = sIN[0:len(sIN) - 1]
            # sound string list
            if mvIN.has_key('sound mix'):
                sIN = ""
                for i in mvIN.get('sound mix'):
                    sIN += (i + '$')
                mvOJ.sound_mix = sIN[0:len(sIN) - 1]
            # production company string list
            if mvIN.has_key('production companies'):
                sIN = ""
                for i in mvIN.get('production companies'):
                    sIN += i['name'] + '$'
                mvOJ.production_companies = sIN[0:len(sIN) - 1]
            # year int
            if mvIN.has_key('year'):
                mvOJ.year = mvIN.get('year')
            else:
                mvOJ.year = 0
            # running time int
            if mvIN.has_key('runtimes'):
                try:
                    if str(mvIN.get('runtimes')[0]).find(':') != -1:
                        mvOJ.runtimes = int(
                            str(mvIN.get('runtimes')[0]).split(':')[1])
                    else:
                        mvOJ.runtimes = int(mvIN.get('runtimes')[0])
                except Exception:
                    mvOJ.runtimes = 0
            else:
                mvOJ.runtimes = 0
            # budget int
            # if 'budget' in mvIN:
            #     mvOJ.budget = mvIN.get('budget')
            # get rating for old movies
            if mode == "old":
                mvOJ.number_of_votes = get_rating(mvID)
            mvINQ.put(mvOJ)
            mvIDQ.task_done()
            # print '-CRAWLER- Get movie features(ID: %s) successfully.' % mvID
        # TODO cannot handle exception
        except Exception, e:
            print '-CRAWLER- An {} exception occured!'.format(e), mvID
            mvINQ.put(mvID)
        time.sleep(1)