Exemple #1
0
    def run(self, args):

        course_id = args[0]
        url_only = '--url-only' in args[1:]
        hd_only = '--hd-only' in args[1:]
        for arg in args[1:]:
            if '--out-dir=' in arg:
                out_dir = arg.split('--out-dir=')[1] or './mp4'
                break
        else:
            out_dir = './mp4'
        if not os.path.isdir(out_dir):
            os.makedirs(out_dir)

        dc = DataClient()

        course_data = dc.get_course_intro(course_id)

        if int(course_data['column_type']) != 3:
            raise Exception('该课程不是视频课程:%s' % course_data['column_title'])

        out_dir = os.path.join(out_dir, course_data['column_title'])
        if not os.path.isdir(out_dir):
            os.makedirs(out_dir)

        data = dc.get_course_content(course_id)

        if url_only:
            with open(
                    os.path.join(out_dir,
                                 '%s.mp4.txt' % course_data['column_title']),
                    'w') as f:

                f.write('\n'.join([
                    "{}:\n{}\n{}\n\n".format(
                        post['article_title'],
                        json.loads(post['video_media']).get('hd',
                                                            {}).get('url'),
                        json.loads(post['video_media']).get('sd',
                                                            {}).get('url'))
                    for post in data
                ]))
            print("download mp4 url done: " + course_data['column_title'])
            return

        for post in data:
            file_name = format_path(post['article_title'] +
                                    ('.hd' if hd_only else '.sd'))
            if os.path.isfile(os.path.join(out_dir, file_name) + '.ts'):
                print(file_name + ' exists')
                continue
            if hd_only:  # some post has sd mp4 only
                url = json.loads(post['video_media']).get(
                    'hd', {}).get('url') or json.loads(
                        post['video_media']).get('sd', {}).get('url')
            else:
                url = json.loads(post['video_media']).get('sd', {}).get('url')
            dl = Downloader(3)
            dl.run(url, dir=out_dir, file_name=file_name)
            print('download mp4 done: ' + file_name)
Exemple #2
0
    def run(self, args):
        if '--all' in args:
            dc = DataClient()
            data = dc.get_course_list()

            for i in [1, 2]:
                for c in data[str(i)]['list']:
                    if not c['had_sub']:
                        continue
                    if True:
                        # if c['update_frequency'] == '全集':
                        try:
                            super(EbookBatch, self).run([str(c['id'])] + args)
                            print('\n')
                        except Exception as e:
                            print(e)
                    # else:
                    #     super(EbookBatch, self).run([str(c['id']), '--source-only'] + args)
                    #     print('\n')

        else:
            course_ids = args[0]
            cid_list = course_ids.split(',')

            for cid in cid_list:
                super(EbookBatch, self).run([cid.strip()] + args)
                print('\n')
Exemple #3
0
    def run(self, args):

        course_id = args[0]
        for arg in args[1:]:
            if '--out-dir=' in arg:
                out_dir = arg.split('--out-dir=')[1] or './ebook'
                break
        else:
            out_dir = './ebook'

        force = '--force' in args[1:]
        enable_comments = '--enable-comments' in args[1:]
        source_only = '--source-only' in args[1:]

        for arg in args[1:]:
            if '--comment-count=' in arg:
                comment_count = arg.split('--comment-count=')[1] or 10
                break
        else:
            comment_count = 10

        if not os.path.isdir(out_dir):
            os.makedirs(out_dir)

        dc = DataClient()

        course_data = dc.get_course_intro(course_id)

        if int(course_data['column_type']) not in (1, 2):
            raise Exception('该课程不提供文本:%s' % course_data['column_title'])

        # data
        data = dc.get_course_content(course_id, force=force)

        if enable_comments:
            for post in data:
                post['article_content'] += self._render_comment_html(
                    post['comments'], comment_count)

        # source file
        course_data['column_title'] = maker.format_file_name(
            course_data['column_title'])
        self.render_column_source_files(course_data,
                                        data,
                                        out_dir,
                                        force=force)

        # ebook
        if not source_only:
            if course_data['update_frequency'] == '全集' and os.path.isfile(
                    os.path.join(out_dir, self._title(course_data)) + '.mobi'):
                print("{} exists ".format(self._title(course_data)))
            else:
                make_mobi(source_dir=os.path.join(out_dir,
                                                  course_data['column_title']),
                          output_dir=out_dir)
Exemple #4
0
    def run(self, args):

        course_id = args[0]
        url_only = '--url-only' in args[1:]
        for arg in args[1:]:
            if '--out-dir=' in arg:
                out_dir = arg.split('--out-dir=')[1] or './mp3'
                break
        else:
            out_dir = './mp3'
        if not os.path.isdir(out_dir):
            os.makedirs(out_dir)

        dc = DataClient()
        course_data = dc.get_course_intro(course_id)
        if int(course_data['column_type']) != 1:
            raise Exception('该课程不提供音频:%s' % course_data['column_title'])

        out_dir = os.path.join(out_dir, course_data['column_title'])
        if not os.path.isdir(out_dir):
            os.makedirs(out_dir)

        data = dc.get_course_content(course_id)

        if url_only:
            with open(
                    os.path.join(out_dir,
                                 '%s.mp3.txt' % course_data['column_title']),
                    'w') as f:
                # TODO alignment
                f.write('\n'.join([
                    "{}:\t\t{}".format(post['article_title'],
                                       post['audio_download_url'])
                    for post in data
                ]))

            return

        dl = Downloader()
        for post in data:
            file_name = format_path(post['article_title'] + '.mp3')
            if os.path.isfile(os.path.join(out_dir, file_name)):
                print(file_name + ' exists')
                continue
            if post['audio_download_url']:
                dl.run(post['audio_download_url'],
                       out_file=file_name,
                       out_dir=out_dir)
                print('download mp3 done: ' + file_name)
Exemple #5
0
    def run(self, args):
        if '--all' in args:
            dc = DataClient()
            data = dc.get_course_list()
            cid_list = []
            for c in data['1']['list']:
                if c['had_sub']:
                    cid_list.append(str(c['id']))

        else:
            course_ids = args[0]
            cid_list = course_ids.split(',')

        for cid in cid_list:
            super(Mp3Batch, self).run([cid.strip()] + args)
Exemple #6
0
def dc() -> DataClient:
    db = TinyDB(storage=MemoryStorage)
    _gk = FakeGk()
    _dc = DataClient(_gk, db)
    yield _dc

    _dc.db.close()
Exemple #7
0
    def run(self, args):
        if '--all' in args:
            dc = DataClient()
            data = dc.get_course_list()

            for c in data['1']['list'] + data['2']['list']:
                if c['had_sub'] and c['update_frequency'] == '全集':
                    super(EbookBatch, self).run([str(c['id'])] + args)
                else:
                    super(EbookBatch,
                          self).run([str(c['id']), '--source-only'] + args)

        else:
            course_ids = args[0]
            cid_list = course_ids.split(',')

            for cid in cid_list:
                super(EbookBatch, self).run([cid.strip()] + args)
Exemple #8
0
    def run(self, args):

        course_id = args[0]
        for arg in args[1:]:
            if '--out-dir=' in arg:
                out_dir = arg.split('--out-dir=')[1] or './ebook'
                break
        else:
            out_dir = './ebook'

        force = '--force' in args[1:]
        enable_comments = '--enable-comments' in args[1:]
        source_only = '--source-only' in args[1:]
        push = '--push' in args[1:]

        for arg in args[1:]:
            if '--comment-count=' in arg:
                comment_count = arg.split('--comment-count=')[1] or 10
                break
        else:
            comment_count = 10

        if not os.path.isdir(out_dir):
            os.makedirs(out_dir)

        dc = DataClient()

        course_data = dc.get_course_intro(course_id)

        if int(course_data['column_type']) not in (1, 2):
            raise Exception('该课程不提供文本:%s' % course_data['column_title'])

        # data
        data = dc.get_course_content(course_id, force=force)

        if enable_comments:
            for post in data:
                post['article_content'] += self._render_comment_html(post['comments'], comment_count)

        # source file
        course_data['column_title'] = maker.format_file_name(course_data['column_title'])
        self.render_column_source_files(course_data, data, out_dir, force=force)
def test_force(dc: DataClient):
    course_id = 212
    course = Query()
    # read from gk api
    res = dc.get_course_intro(course_id)
    assert res['access_count'] == 1
    # check local storage
    res = dc.db.table('course').search(course.id == course_id)
    assert len(res) == 1
    assert res[0]['access_count'] == 1

    # read from local storage
    res = dc.get_course_intro(course_id)
    assert res['access_count'] == 1
    # force read from gk api
    res = dc.get_course_intro(course_id, force=True)
    assert res['access_count'] == 2
    # check local storage
    res = dc.db.table('course').search(course.id == course_id)
    assert len(res) == 1
    assert res[0]['access_count'] == 2
Exemple #10
0
    def get_all_course_ids(self, dc: DataClient, type_: str) -> List[int]:

        cid_list = []
        data = dc.get_course_list()
        for c in data['1']['list'] + data['2']['list']:
            if type_ == 'all':
                cid_list.append(int(c['id']))
            elif type_ == 'all-sub' and c['had_sub']:
                cid_list.append(int(c['id']))
            elif (type_ == 'all-done' and c['had_sub']
                  and self.is_course_finished(c)):
                cid_list.append(int(c['id']))
        return cid_list
Exemple #11
0
    def run(self, args):
        dc = DataClient()
        if not dc.cookies:
            print("尚未登录, 可以先 geektime login 以便查看更详细的信息")

        data = dc.get_course_list()

        result_str = ''
        for i in ['1', '2', '3', '4']:
            columns = data[i]['list']
            result_str += {
                '1': '专栏',
                '2': '微课',
                '3': '视频',
                '4': '其他'
            }[i] + '\n'
            result_str += "\t{:<12}{:<10}{}\t\t{}\n".format(
                '课程ID', '已订阅', '课程标题', '更新频率/课时·时长')
            for c in columns:
                result_str += "\t{:<15}{:<10}{}\t({})\n".format(
                    str(c['id']), '是' if c['had_sub'] else '否',
                    c['column_title'], c['update_frequency'] or None)

        print(result_str)
def test_get_course_list(dc: DataClient):
    assert isinstance(dc.get_course_list(), dict)
Exemple #13
0
    def run(self, args):

        course_id = args[0]
        for arg in args[1:]:
            if '--out-dir=' in arg:
                out_dir = arg.split('--out-dir=')[1] or './ebook'
                break
        else:
            out_dir = './ebook'

        force = '--force' in args[1:]
        enable_comments = '--enable-comments' in args[1:]
        source_only = '--source-only' in args[1:]
        push = '--push' in args[1:]

        for arg in args[1:]:
            if '--comment-count=' in arg:
                comment_count = arg.split('--comment-count=')[1] or 10
                break
        else:
            comment_count = 10

        if not os.path.isdir(out_dir):
            os.makedirs(out_dir)

        dc = DataClient()
        course_data = dc.get_course_intro(course_id, force=True)

        if int(course_data['column_type']) not in (1, 2):
            raise Exception('该课程不提供文本:%s' % course_data['column_title'])

        # data
        data = dc.get_course_content(course_id, force=force)

        if enable_comments:
            for post in data:
                post['article_content'] += self._render_comment_html(
                    post['comments'], comment_count)

        # source file
        course_data['column_title'] = maker.format_file_name(
            course_data['column_title'])
        self.render_column_source_files(course_data,
                                        data,
                                        out_dir,
                                        force=force)

        # ebook
        if not source_only:
            if course_data['update_frequency'] == '全集' and os.path.isfile(
                    os.path.join(out_dir, self._title(course_data)) + '.mobi'):
                print("{} exists ".format(self._title(course_data)))
            else:
                print(
                    os.path.join(out_dir, course_data['column_title']) +
                    " -> " +
                    course_data['column_title'].replace("html", "pdf"))
                # make_mobi(source_dir=os.path.join(out_dir, course_data['column_title']), output_dir=out_dir)
        if push:

            fn = os.path.join(out_dir,
                              "{}.mobi".format(self._title(course_data)))
            if os.path.getsize(fn) / 1024.0 / 1024 > 50:
                print("电子书大小超过50M")
                return
            f = open(fn, 'rb')
            d = f.read()
            f.close()

            with open('smtp.conf') as f:
                smtp_conf = json.loads(f.read())
            m = MailServer(host=smtp_conf['host'],
                           port=smtp_conf['port'],
                           user=smtp_conf['user'],
                           password=smtp_conf['password'],
                           encryption=smtp_conf['encryption'])
            message = m.build_email(email_to=smtp_conf['email_to'],
                                    subject='convert',
                                    body='',
                                    attachments=[("{}.mobi".format(
                                        self._title(course_data)), d)])
            m.send_email(message)
            print("push to kindle done")
def test_get_course_intro(dc: DataClient):
    assert dc.get_course_intro(212)
def test_get_post_content(dc: DataClient):
    assert dc.get_post_content(333)
def test_get_course_content(dc: DataClient):
    assert dc.get_course_content(212)
def test_local_storage(dc: DataClient):
    course_id = 212
    dc.get_course_intro(course_id)

    course = Query()
    assert dc.db.table('course').search(course.id == course_id)[0]