コード例 #1
0
ファイル: parser_movies.py プロジェクト: xpad/ityao
def parserDaily():
    unparsed_dailys = DailyLink.objects.filter(parsed=False)
    #unparsed_dailys = DailyLink.objects.filter(id__gte=1, id__lt=10)
    #unparsed_dailys = DailyLink.objects.all()
    for daily in unparsed_dailys:
        HTML = ''
        if daily.raw_desc:
            HTML = daily.raw_desc
        else:
            HTML = getHTML(daily.link)
            daily.raw_desc = HTML

        if HTML:
            dcp = DailyCollectionParser()
            dcp.feed(HTML)
            for movie in dcp.all_movies:
                desc = ''
                digestkey = ''
                title = ''
                if movie.desc:
                    title = movie.desc[0]
                    desc = '\r\n'.join(movie.desc)
                    desc = desc.strip()
                    #import chardet
                    #print chardet.detect(desc)
                    #digestkey = hashlib.sha256(desc.decode('utf-8')).hexdigest()
                images = ';'.join(movie.imgs)
                downloadlink = ';'.join(movie.links)
                digestkey = hashlib.sha256(downloadlink).hexdigest()
                result = MovieLink.objects.filter(digestkey=digestkey)
                if not len(result):
                    # didn't exists same movie.
                    ml = MovieLink(title=title,
                                   raw_desc=desc,
                                   digestkey=digestkey,
                                   daily_link=daily,
                                   images=images,
                                   downloadlink=downloadlink)
                    ml.save()
                    '''
                    try:
                        ml = MovieLink(title=title, raw_desc=desc, digestkey=digestkey, daily_link=daily, images=images, downloadlink=downloadlink)
                        ml.save()
                    except Exception, e:
                        print '[%s]%s' % (title, str(e))
                        exit(1)
                    '''

        daily.parsed = True
        daily.save()
コード例 #2
0
ファイル: parser_movies.py プロジェクト: xpad/ityao
def parserDaily():
    unparsed_dailys = DailyLink.objects.filter(parsed=False)
    #unparsed_dailys = DailyLink.objects.filter(id__gte=1, id__lt=10)
    #unparsed_dailys = DailyLink.objects.all()
    for daily in unparsed_dailys:
        HTML = ''
        if daily.raw_desc:
            HTML = daily.raw_desc
        else:
            HTML = getHTML(daily.link)
            daily.raw_desc = HTML

        if HTML:
            dcp = DailyCollectionParser()
            dcp.feed(HTML)
            for movie in dcp.all_movies:
                desc = ''
                digestkey = ''
                title = ''
                if movie.desc:
                    title = movie.desc[0]
                    desc = '\r\n'.join(movie.desc)
                    desc = desc.strip()
                    #import chardet
                    #print chardet.detect(desc)
                    #digestkey = hashlib.sha256(desc.decode('utf-8')).hexdigest()
                images = ';'.join(movie.imgs)
                downloadlink = ';'.join(movie.links)
                digestkey = hashlib.sha256(downloadlink).hexdigest()
                result = MovieLink.objects.filter(digestkey=digestkey)
                if not len(result):
                    # didn't exists same movie.
                    ml = MovieLink(title=title, raw_desc=desc, digestkey=digestkey, daily_link=daily, images=images, downloadlink=downloadlink)
                    ml.save()
                    '''
                    try:
                        ml = MovieLink(title=title, raw_desc=desc, digestkey=digestkey, daily_link=daily, images=images, downloadlink=downloadlink)
                        ml.save()
                    except Exception, e:
                        print '[%s]%s' % (title, str(e))
                        exit(1)
                    '''

        daily.parsed = True
        daily.save()
コード例 #3
0
ファイル: parser_movies.py プロジェクト: xpad/ityao
def debugdaily():
    #unparsed_dailys = DailyLink.objects.filter(parsed=False)
    dl = DailyLink.objects.get(id=4)
    import chardet
    import re
    
    dcp = DailyCollectionParser()
    dcp.feed(dl.raw_desc)
    f = open('bt_parser.txt', 'w')
    for movie in dcp.all_movies:
        #f.write(' ^ '.join(movie.desc.encode('utf-8')))
        dd = []
        for d in movie.desc:
            dd.append(d.encode('utf-8'))
        f.write(' ^ '.join(dd))
        f.write('\r\n=======================\r\n')
        f.write(';'.join(movie.imgs))
        f.write('\r\n=======================\r\n')
        f.write(';'.join(movie.links))
        f.write("\r\n\r\n***********************\r\n\r\n")
    f.close()    
コード例 #4
0
ファイル: parser_movies.py プロジェクト: xpad/ityao
def debugdaily():
    #unparsed_dailys = DailyLink.objects.filter(parsed=False)
    dl = DailyLink.objects.get(id=4)
    import chardet
    import re

    dcp = DailyCollectionParser()
    dcp.feed(dl.raw_desc)
    f = open('bt_parser.txt', 'w')
    for movie in dcp.all_movies:
        #f.write(' ^ '.join(movie.desc.encode('utf-8')))
        dd = []
        for d in movie.desc:
            dd.append(d.encode('utf-8'))
        f.write(' ^ '.join(dd))
        f.write('\r\n=======================\r\n')
        f.write(';'.join(movie.imgs))
        f.write('\r\n=======================\r\n')
        f.write(';'.join(movie.links))
        f.write("\r\n\r\n***********************\r\n\r\n")
    f.close()