def fetch_all_posts(use_pool=True, max_count=999999,
                    list_file=None, output=OUTPUT):
    print('Fetch all posts use_pool=%s' % use_pool)
    output = output or OUTPUT
    list_file = list_file or os.path.join(output, 'all.txt')
    urls = read_list(list_file) or []
    urls = urls[:max_count]
    if use_pool:
        commons.run_in_pool(fetch_post, urls)
    else:
        for url in urls:
            fetch_post(url)
            time.sleep(random.randint(0, 2))
def fetch_all_urls(start=0, end=5, list_file=None, output=OUTPUT):
    output = output or OUTPUT
    list_file = list_file or os.path.join(output, 'all.txt')
    all_urls = read_list(list_file) or []
    pl_output = os.path.join(output, 'list')
    if not os.path.exists(pl_output):
        os.makedirs(pl_output)
    new_urls = commons.run_in_pool(fetch_page_urls, range(start, end))
    if new_urls:
        new_urls = flatten_list(new_urls)
        print('fetch %s new urls' % len(new_urls))
        all_urls.extend(new_urls)
        write_list(list_file, fix_urls(all_urls))
        new_list_file = os.path.join(output, 'new_%s_%s.txt' % (start, end))
        write_list(new_list_file, fix_urls(new_urls))
Exemple #3
0
def create_html_from_text(text_file, dst=None):
    if not isinstance(text_file, unicode):
        text_file = text_file.decode('utf-8')
    output = dst or os.path.dirname(text_file)
    # print('create_chapter from %s' % text_file)
    if not os.path.exists(output):
        os.makedirs(output)
    filename = os.path.basename(text_file)
    name, ext = os.path.splitext(filename)
    html_file = os.path.join(output, '%s.html' % name)
    if os.path.exists(html_file):
        return html_file, name
    else:
        text_lines = read_list(text_file)
        text_lines = ['<p>%s</p>' % line for line in text_lines]
        # first line as title, h2
        body_str = '\n'.join(text_lines)
        html_tpl = read_file(CHAPTER_TEMPLATE)
        html_str = html_tpl.format(name, name, body_str)
        write_file(html_file, html_str)
        print('create_chapter to %s' % html_file)
        return html_file, name
Exemple #4
0
def upload_photos_to_album(album_id, photos):
    album_id = _compat_album_id(album_id)
    logger.info('upload_photos_to_album %s' % album_id)
    if os.path.isfile(photos):
        files = [os.path.basename(photos)]
        output = os.path.dirname(photos)
    else:
        files = os.listdir(photos)
        output = photos
    done_file = os.path.join(output, '%s_done.txt' % album_id)
    finished = read_list(done_file)
    error_count = 0
    for f in files:
        image = os.path.join(output, f)
        _, ext = os.path.splitext(f)
        if not ext or ext.lower() not in ['.jpg', '.png', '.gif']:
            # print('Invalid %s' % image)
            continue
        try:
            if f not in finished:
                logger.info('upload_photos_to_album uploading %s' % image)
                api.photo_upload(album_id, image, f)
                finished.append(f)
                write_list(done_file, finished)
                time.sleep(random.randint(1, 3))
            else:
                print('Skip %s' % image)
        except KeyboardInterrupt as e:
            logger.warning("upload_photos_to_album user interrupt, quit.")
            raise
        except Exception as e:
            logger.warning(
                "upload_photos_to_album error:%s on uploading :%s" % (e, image))
            traceback.print_exc()
            error_count += 1
            if error_count > 5:
                break
            time.sleep(error_count * 10)
    write_list(done_file, finished)
Exemple #5
0
def upload_photos_to_album(album_id, photos):
    album_id = _compat_album_id(album_id)
    logger.info('upload_photos_to_album %s' % album_id)
    if os.path.isfile(photos):
        files = [os.path.basename(photos)]
        output = os.path.dirname(photos)
    else:
        files = os.listdir(photos)
        output = photos
    done_file = os.path.join(output, '%s_done.txt' % album_id)
    finished = read_list(done_file)
    error_count = 0
    for f in files:
        image = os.path.join(output, f)
        _, ext = os.path.splitext(f)
        if not ext or ext.lower() not in ['.jpg', '.png', '.gif']:
            # print('Invalid %s' % image)
            continue
        try:
            if f not in finished:
                logger.info('upload_photos_to_album uploading %s' % image)
                api.photo_upload(album_id, image, f)
                finished.append(f)
                write_list(done_file, finished)
                time.sleep(random.randint(1, 3))
            else:
                print('Skip %s' % image)
        except KeyboardInterrupt as e:
            logger.warning("upload_photos_to_album user interrupt, quit.")
            raise
        except Exception as e:
            logger.warning("upload_photos_to_album error:%s on uploading :%s" %
                           (e, image))
            traceback.print_exc()
            error_count += 1
            if error_count > 5:
                break
            time.sleep(error_count * 10)
    write_list(done_file, finished)
def fix_list_file():
    output = OUTPUT
    list_file = os.path.join(output, 'all.txt')
    all_urls = read_list(list_file) or []
    all_urls = [unquote_url(url) for url in all_urls]
    write_list(list_file, all_urls)