예제 #1
0
def get_book(start, end, cat_pk, task_target):
	"""
	target is 'get_task'
	"""
	for book in Book.objects.filter(category__id=int(cat_pk)).order_by('pk')[start:end]:
		path = os.path.join(BOOK_PATH, book.name).encode('utf-8')
		if not os.path.exists(path):
			pk = book.pk
			cls = get_bookitem_model(pk)
			items = cls.objects.values_list('content', flat=True).filter(book__id=pk).order_by('pk')
			raws = set(map(lambda x : x.rsplit('/', 1)[-1], items))
			if raws:
				task_target.send((book.name, book.author, raws))
예제 #2
0
파일: crawl_re.py 프로젝트: pymmrd/ireader
def get_book():
	for name in os.listdir(BOOK_PATH):
		path = os.path.join(BOOK_PATH, name)
		crawled = set(os.listdir(path))
		book = Book.objects.get(name=name)
		pk = book.pk
		cls = get_bookitem_model(pk)
		items = cls.objects.values_list('content', flat=True).filter(book__id=pk).order_by('pk')
		raws = set(map(lambda x : x.rsplit('/', 1)[-1], items))
		reserved = raws.difference(crawled)
		if reserved:
			s = '%s%s' % (book.name, book.author) 
			key = md5(s.encode('utf-8')).hexdigest()
			yield (book.name, book.author, reserved)
예제 #3
0
def load_full_detail(data_path):
    has_part = False 
    item = get_data(data_path) 
    name = item.get('name', '')
    book = Book.objects.get(name=name)
    cls = get_bookitem_model(book.pk)
    intro = item.get('intro', '')
    book.intro = intro
    content = item.get('content', '')
    keys = content.keys()
    if len(keys) > 1:
        has_part = True
        process_has_part(content, cls, book)
    else:
        process_null_part(content, cls, book)
    book.has_part = has_part
    book.save()
예제 #4
0
def print_miss():
        book = Book.objects.values('id', 'name')
        for b in book:
            pk = b['id']
            part = pk % 10
            filename = '%s.part' % part 
            cls = get_bookitem_model(pk)
            count = cls.objects.filter(book__id=pk).count()
            if count == 0:
                name = b['name']
                path = DATA_PATTERN %  name
                if os.path.exists(path):
                    with open(filename, 'a') as f:
                        f.write('%s\n'% name.decode('utf-8'))
                else:
                    with open('lost.txt', 'a') as f1:
                        f1.write('%s\n'% name.encode('utf-8'))
예제 #5
0
파일: craw_co.py 프로젝트: pymmrd/ireader
def get_book(start, end, filename, task_target):
    """
    target is 'get_task'
    """
    names = (linecache.getline(filename, n) for n in range(start, end+1))
    for name in names:
        name = name.strip()
        path = os.path.join(BOOK_PATH, name)
        crawled = set(os.listdir(path))
        book = Book.objects.get(name=name)
        pk = book.pk
        cls = get_bookitem_model(pk)
        items = cls.objects.values_list('content', flat=True).filter(book__id=pk).order_by('pk')
        raws = set(map(lambda x : x.rsplit('/', 1)[-1], items))
        reserved = raws.difference(crawled)
        if reserved:
            task_target.send((book.name, book.author, reserved))
예제 #6
0
def load_signle_detail(pk, sleep):
    has_part = False
    data_dir = "/var/www/wwwroot/ireader/pybook/detail/"
    #data_dir = "/home/zg163/djcode/ireader/pybook/detail/"
    book = Book.objects.get(pk=pk)
    cls = get_bookitem_model(pk)
    data_path = os.path.join(data_dir, '%s.json' % book.name.encode('utf-8'))
    item = get_data(data_path) 
    intro = item.get('intro', '')
    content = item.get('content', '')
    book.intro = intro
    keys = content.keys()
    if len(keys) > 1:
        has_part = True
        process_has_part(content, cls, book, sleep)
    else:
        process_null_part(content, cls, book, sleep)
    book.has_part = has_part
    book.save()
예제 #7
0
def print_lost():
    dir1 = '/mnt/data1/book'
    dir2 = '/data'
    book = Book.objects.all()
    f = open('lost_url.log', 'a')
    for b in book:
        cls = get_bookitem_model(b.pk)
        s = '%s%s' % (b.name.encode('utf-8'), b.author.encode('utf-8')) 
        key = md5(s).hexdigest()
        prefix = adict.get(key)
        items = cls.objects.filter(book__id=b.pk)
        for item in items:
            content = item.content
            path1 = os.path.join(dir1, content)
            path2 = os.path.join(dir2, content)
            if not os.path.exists(path1) and  not os.path.exists(path2): 
                sub, name, pk = content.split('/') 
                suffix  = '%s.html' % pk
                url = urlparse.urljoin(prefix, suffix)
                f.write('%s\t%s\t%s\n' % ( name.encode('utf-8'), sub.encode('utf-8'), url.encode("utf-8")))
    f.close()
예제 #8
0
def get_book(pk):
    d = 'new%s.txt' % pk
    for name in os.listdir(BOOK_PATH):
        try:
            path = os.path.join(BOOK_PATH, name)
            crawled = set(os.listdir(path))
            book = Book.objects.get(name=name)
            pk = book.pk
            cls = get_bookitem_model(pk)
            items = cls.objects.values_list('content', flat=True).filter(book__id=pk).order_by('pk')
            raws = set(map(lambda x : x.rsplit('/', 1)[-1], items))
            reserved = raws.difference(crawled)
            if reserved:
                #print name, reserved
                with open(d, 'a') as f:
                    f.write('%s%s' % (name, os.linesep))
            #for item in reserved:
            #    log_path = os.path.join(path, item)
            #    with open(log_path, 'w') as f:
            #        f.write('')
        except:
            pass
예제 #9
0
파일: utils.py 프로젝트: pymmrd/ireader
def get_book_chapters(pk):
    object_list = []
    values = ('id', 'name')
    book = get_single_book(pk)
    has_part = book.get('has_part', False)
    partition = pk % settings.BOOKITEM_PARTITION
    itemcls = get_bookitem_model(pk)
    if has_part:
        bookparts = BookPart.objects.filter(
                        book__id=pk
                    ).order_by('pk')
        for bp in bookparts:
            chapters = itemcls.objects.values(
                           *values
                       ).filter(
                            part__id=bp.pk
                       ).order_by('pk')
            object_list.append({bp.name: chapters})
    else:
        chapters = itemcls.objects.values(*values).filter(book__id=pk).order_by('pk')
        object_list.append({'A': chapters})
    recom_list = get_recommand_books(pk)
    return book, object_list, partition, recom_list