def run(self, args): course_id = args[0] url_only = '--url-only' in args[1:] hd_only = '--hd-only' in args[1:] for arg in args[1:]: if '--out-dir=' in arg: out_dir = arg.split('--out-dir=')[1] or './mp4' break else: out_dir = './mp4' if not os.path.isdir(out_dir): os.makedirs(out_dir) dc = DataClient() course_data = dc.get_course_intro(course_id) if int(course_data['column_type']) != 3: raise Exception('该课程不是视频课程:%s' % course_data['column_title']) out_dir = os.path.join(out_dir, course_data['column_title']) if not os.path.isdir(out_dir): os.makedirs(out_dir) data = dc.get_course_content(course_id) if url_only: with open( os.path.join(out_dir, '%s.mp4.txt' % course_data['column_title']), 'w') as f: f.write('\n'.join([ "{}:\n{}\n{}\n\n".format( post['article_title'], json.loads(post['video_media']).get('hd', {}).get('url'), json.loads(post['video_media']).get('sd', {}).get('url')) for post in data ])) print("download mp4 url done: " + course_data['column_title']) return for post in data: file_name = format_path(post['article_title'] + ('.hd' if hd_only else '.sd')) if os.path.isfile(os.path.join(out_dir, file_name) + '.ts'): print(file_name + ' exists') continue if hd_only: # some post has sd mp4 only url = json.loads(post['video_media']).get( 'hd', {}).get('url') or json.loads( post['video_media']).get('sd', {}).get('url') else: url = json.loads(post['video_media']).get('sd', {}).get('url') dl = Downloader(3) dl.run(url, dir=out_dir, file_name=file_name) print('download mp4 done: ' + file_name)
def run(self, args): if '--all' in args: dc = DataClient() data = dc.get_course_list() for i in [1, 2]: for c in data[str(i)]['list']: if not c['had_sub']: continue if True: # if c['update_frequency'] == '全集': try: super(EbookBatch, self).run([str(c['id'])] + args) print('\n') except Exception as e: print(e) # else: # super(EbookBatch, self).run([str(c['id']), '--source-only'] + args) # print('\n') else: course_ids = args[0] cid_list = course_ids.split(',') for cid in cid_list: super(EbookBatch, self).run([cid.strip()] + args) print('\n')
def run(self, args): course_id = args[0] for arg in args[1:]: if '--out-dir=' in arg: out_dir = arg.split('--out-dir=')[1] or './ebook' break else: out_dir = './ebook' force = '--force' in args[1:] enable_comments = '--enable-comments' in args[1:] source_only = '--source-only' in args[1:] for arg in args[1:]: if '--comment-count=' in arg: comment_count = arg.split('--comment-count=')[1] or 10 break else: comment_count = 10 if not os.path.isdir(out_dir): os.makedirs(out_dir) dc = DataClient() course_data = dc.get_course_intro(course_id) if int(course_data['column_type']) not in (1, 2): raise Exception('该课程不提供文本:%s' % course_data['column_title']) # data data = dc.get_course_content(course_id, force=force) if enable_comments: for post in data: post['article_content'] += self._render_comment_html( post['comments'], comment_count) # source file course_data['column_title'] = maker.format_file_name( course_data['column_title']) self.render_column_source_files(course_data, data, out_dir, force=force) # ebook if not source_only: if course_data['update_frequency'] == '全集' and os.path.isfile( os.path.join(out_dir, self._title(course_data)) + '.mobi'): print("{} exists ".format(self._title(course_data))) else: make_mobi(source_dir=os.path.join(out_dir, course_data['column_title']), output_dir=out_dir)
def run(self, args): course_id = args[0] url_only = '--url-only' in args[1:] for arg in args[1:]: if '--out-dir=' in arg: out_dir = arg.split('--out-dir=')[1] or './mp3' break else: out_dir = './mp3' if not os.path.isdir(out_dir): os.makedirs(out_dir) dc = DataClient() course_data = dc.get_course_intro(course_id) if int(course_data['column_type']) != 1: raise Exception('该课程不提供音频:%s' % course_data['column_title']) out_dir = os.path.join(out_dir, course_data['column_title']) if not os.path.isdir(out_dir): os.makedirs(out_dir) data = dc.get_course_content(course_id) if url_only: with open( os.path.join(out_dir, '%s.mp3.txt' % course_data['column_title']), 'w') as f: # TODO alignment f.write('\n'.join([ "{}:\t\t{}".format(post['article_title'], post['audio_download_url']) for post in data ])) return dl = Downloader() for post in data: file_name = format_path(post['article_title'] + '.mp3') if os.path.isfile(os.path.join(out_dir, file_name)): print(file_name + ' exists') continue if post['audio_download_url']: dl.run(post['audio_download_url'], out_file=file_name, out_dir=out_dir) print('download mp3 done: ' + file_name)
def run(self, args): if '--all' in args: dc = DataClient() data = dc.get_course_list() cid_list = [] for c in data['1']['list']: if c['had_sub']: cid_list.append(str(c['id'])) else: course_ids = args[0] cid_list = course_ids.split(',') for cid in cid_list: super(Mp3Batch, self).run([cid.strip()] + args)
def dc() -> DataClient: db = TinyDB(storage=MemoryStorage) _gk = FakeGk() _dc = DataClient(_gk, db) yield _dc _dc.db.close()
def run(self, args): if '--all' in args: dc = DataClient() data = dc.get_course_list() for c in data['1']['list'] + data['2']['list']: if c['had_sub'] and c['update_frequency'] == '全集': super(EbookBatch, self).run([str(c['id'])] + args) else: super(EbookBatch, self).run([str(c['id']), '--source-only'] + args) else: course_ids = args[0] cid_list = course_ids.split(',') for cid in cid_list: super(EbookBatch, self).run([cid.strip()] + args)
def run(self, args): course_id = args[0] for arg in args[1:]: if '--out-dir=' in arg: out_dir = arg.split('--out-dir=')[1] or './ebook' break else: out_dir = './ebook' force = '--force' in args[1:] enable_comments = '--enable-comments' in args[1:] source_only = '--source-only' in args[1:] push = '--push' in args[1:] for arg in args[1:]: if '--comment-count=' in arg: comment_count = arg.split('--comment-count=')[1] or 10 break else: comment_count = 10 if not os.path.isdir(out_dir): os.makedirs(out_dir) dc = DataClient() course_data = dc.get_course_intro(course_id) if int(course_data['column_type']) not in (1, 2): raise Exception('该课程不提供文本:%s' % course_data['column_title']) # data data = dc.get_course_content(course_id, force=force) if enable_comments: for post in data: post['article_content'] += self._render_comment_html(post['comments'], comment_count) # source file course_data['column_title'] = maker.format_file_name(course_data['column_title']) self.render_column_source_files(course_data, data, out_dir, force=force)
def test_force(dc: DataClient): course_id = 212 course = Query() # read from gk api res = dc.get_course_intro(course_id) assert res['access_count'] == 1 # check local storage res = dc.db.table('course').search(course.id == course_id) assert len(res) == 1 assert res[0]['access_count'] == 1 # read from local storage res = dc.get_course_intro(course_id) assert res['access_count'] == 1 # force read from gk api res = dc.get_course_intro(course_id, force=True) assert res['access_count'] == 2 # check local storage res = dc.db.table('course').search(course.id == course_id) assert len(res) == 1 assert res[0]['access_count'] == 2
def get_all_course_ids(self, dc: DataClient, type_: str) -> List[int]: cid_list = [] data = dc.get_course_list() for c in data['1']['list'] + data['2']['list']: if type_ == 'all': cid_list.append(int(c['id'])) elif type_ == 'all-sub' and c['had_sub']: cid_list.append(int(c['id'])) elif (type_ == 'all-done' and c['had_sub'] and self.is_course_finished(c)): cid_list.append(int(c['id'])) return cid_list
def run(self, args): dc = DataClient() if not dc.cookies: print("尚未登录, 可以先 geektime login 以便查看更详细的信息") data = dc.get_course_list() result_str = '' for i in ['1', '2', '3', '4']: columns = data[i]['list'] result_str += { '1': '专栏', '2': '微课', '3': '视频', '4': '其他' }[i] + '\n' result_str += "\t{:<12}{:<10}{}\t\t{}\n".format( '课程ID', '已订阅', '课程标题', '更新频率/课时·时长') for c in columns: result_str += "\t{:<15}{:<10}{}\t({})\n".format( str(c['id']), '是' if c['had_sub'] else '否', c['column_title'], c['update_frequency'] or None) print(result_str)
def test_get_course_list(dc: DataClient): assert isinstance(dc.get_course_list(), dict)
def run(self, args): course_id = args[0] for arg in args[1:]: if '--out-dir=' in arg: out_dir = arg.split('--out-dir=')[1] or './ebook' break else: out_dir = './ebook' force = '--force' in args[1:] enable_comments = '--enable-comments' in args[1:] source_only = '--source-only' in args[1:] push = '--push' in args[1:] for arg in args[1:]: if '--comment-count=' in arg: comment_count = arg.split('--comment-count=')[1] or 10 break else: comment_count = 10 if not os.path.isdir(out_dir): os.makedirs(out_dir) dc = DataClient() course_data = dc.get_course_intro(course_id, force=True) if int(course_data['column_type']) not in (1, 2): raise Exception('该课程不提供文本:%s' % course_data['column_title']) # data data = dc.get_course_content(course_id, force=force) if enable_comments: for post in data: post['article_content'] += self._render_comment_html( post['comments'], comment_count) # source file course_data['column_title'] = maker.format_file_name( course_data['column_title']) self.render_column_source_files(course_data, data, out_dir, force=force) # ebook if not source_only: if course_data['update_frequency'] == '全集' and os.path.isfile( os.path.join(out_dir, self._title(course_data)) + '.mobi'): print("{} exists ".format(self._title(course_data))) else: print( os.path.join(out_dir, course_data['column_title']) + " -> " + course_data['column_title'].replace("html", "pdf")) # make_mobi(source_dir=os.path.join(out_dir, course_data['column_title']), output_dir=out_dir) if push: fn = os.path.join(out_dir, "{}.mobi".format(self._title(course_data))) if os.path.getsize(fn) / 1024.0 / 1024 > 50: print("电子书大小超过50M") return f = open(fn, 'rb') d = f.read() f.close() with open('smtp.conf') as f: smtp_conf = json.loads(f.read()) m = MailServer(host=smtp_conf['host'], port=smtp_conf['port'], user=smtp_conf['user'], password=smtp_conf['password'], encryption=smtp_conf['encryption']) message = m.build_email(email_to=smtp_conf['email_to'], subject='convert', body='', attachments=[("{}.mobi".format( self._title(course_data)), d)]) m.send_email(message) print("push to kindle done")
def test_get_course_intro(dc: DataClient): assert dc.get_course_intro(212)
def test_get_post_content(dc: DataClient): assert dc.get_post_content(333)
def test_get_course_content(dc: DataClient): assert dc.get_course_content(212)
def test_local_storage(dc: DataClient): course_id = 212 dc.get_course_intro(course_id) course = Query() assert dc.db.table('course').search(course.id == course_id)[0]