def fetch_all_posts(use_pool=True, max_count=999999, list_file=None, output=OUTPUT): print('Fetch all posts use_pool=%s' % use_pool) output = output or OUTPUT list_file = list_file or os.path.join(output, 'all.txt') urls = read_list(list_file) or [] urls = urls[:max_count] if use_pool: commons.run_in_pool(fetch_post, urls) else: for url in urls: fetch_post(url) time.sleep(random.randint(0, 2))
def fetch_all_urls(start=0, end=5, list_file=None, output=OUTPUT): output = output or OUTPUT list_file = list_file or os.path.join(output, 'all.txt') all_urls = read_list(list_file) or [] pl_output = os.path.join(output, 'list') if not os.path.exists(pl_output): os.makedirs(pl_output) new_urls = commons.run_in_pool(fetch_page_urls, range(start, end)) if new_urls: new_urls = flatten_list(new_urls) print('fetch %s new urls' % len(new_urls)) all_urls.extend(new_urls) write_list(list_file, fix_urls(all_urls)) new_list_file = os.path.join(output, 'new_%s_%s.txt' % (start, end)) write_list(new_list_file, fix_urls(new_urls))
def create_html_from_text(text_file, dst=None): if not isinstance(text_file, unicode): text_file = text_file.decode('utf-8') output = dst or os.path.dirname(text_file) # print('create_chapter from %s' % text_file) if not os.path.exists(output): os.makedirs(output) filename = os.path.basename(text_file) name, ext = os.path.splitext(filename) html_file = os.path.join(output, '%s.html' % name) if os.path.exists(html_file): return html_file, name else: text_lines = read_list(text_file) text_lines = ['<p>%s</p>' % line for line in text_lines] # first line as title, h2 body_str = '\n'.join(text_lines) html_tpl = read_file(CHAPTER_TEMPLATE) html_str = html_tpl.format(name, name, body_str) write_file(html_file, html_str) print('create_chapter to %s' % html_file) return html_file, name
def upload_photos_to_album(album_id, photos): album_id = _compat_album_id(album_id) logger.info('upload_photos_to_album %s' % album_id) if os.path.isfile(photos): files = [os.path.basename(photos)] output = os.path.dirname(photos) else: files = os.listdir(photos) output = photos done_file = os.path.join(output, '%s_done.txt' % album_id) finished = read_list(done_file) error_count = 0 for f in files: image = os.path.join(output, f) _, ext = os.path.splitext(f) if not ext or ext.lower() not in ['.jpg', '.png', '.gif']: # print('Invalid %s' % image) continue try: if f not in finished: logger.info('upload_photos_to_album uploading %s' % image) api.photo_upload(album_id, image, f) finished.append(f) write_list(done_file, finished) time.sleep(random.randint(1, 3)) else: print('Skip %s' % image) except KeyboardInterrupt as e: logger.warning("upload_photos_to_album user interrupt, quit.") raise except Exception as e: logger.warning( "upload_photos_to_album error:%s on uploading :%s" % (e, image)) traceback.print_exc() error_count += 1 if error_count > 5: break time.sleep(error_count * 10) write_list(done_file, finished)
def upload_photos_to_album(album_id, photos): album_id = _compat_album_id(album_id) logger.info('upload_photos_to_album %s' % album_id) if os.path.isfile(photos): files = [os.path.basename(photos)] output = os.path.dirname(photos) else: files = os.listdir(photos) output = photos done_file = os.path.join(output, '%s_done.txt' % album_id) finished = read_list(done_file) error_count = 0 for f in files: image = os.path.join(output, f) _, ext = os.path.splitext(f) if not ext or ext.lower() not in ['.jpg', '.png', '.gif']: # print('Invalid %s' % image) continue try: if f not in finished: logger.info('upload_photos_to_album uploading %s' % image) api.photo_upload(album_id, image, f) finished.append(f) write_list(done_file, finished) time.sleep(random.randint(1, 3)) else: print('Skip %s' % image) except KeyboardInterrupt as e: logger.warning("upload_photos_to_album user interrupt, quit.") raise except Exception as e: logger.warning("upload_photos_to_album error:%s on uploading :%s" % (e, image)) traceback.print_exc() error_count += 1 if error_count > 5: break time.sleep(error_count * 10) write_list(done_file, finished)
def fix_list_file(): output = OUTPUT list_file = os.path.join(output, 'all.txt') all_urls = read_list(list_file) or [] all_urls = [unquote_url(url) for url in all_urls] write_list(list_file, all_urls)