def make_column_ebook(column_id, column_title, output_dir):

    db_url = os.path.join(output_dir, 'sqlite3.db')

    # start spider
    start_url = 'https://time.geekbang.org/serv/v1/column/articles'
    json = {"cid": str(column_id), "size": 1000, "prev": 0, "order": "newest"}
    headers = {
        'Content-Type': 'application/json',
        'Referer': 'https://time.geekbang.org/column/{}'.format(column_id)
    }

    geektime_spider = spider.get_spider(backend_db_url=db_url,
                                        start_url=start_url,
                                        headers=headers,
                                        json=json)
    geektime_spider.add_url(
        url='https://time.geekbang.org/serv/v1/column/intro',
        headers=headers,
        json={'cid': str(column_id)})
    geektime_spider.start_crawl()

    # generate source files
    source_dir = os.path.join(output_dir, str(column_id))
    render_column_source_files(column_id,
                               column_title,
                               source_dir,
                               source_db_path=db_url)

    # generate ebook
    make_mobi(source_dir=source_dir, output_dir=output_dir)
Beispiel #2
0
    def run(self, cfg: dict) -> None:

        course_ids = self.parse_course_ids(cfg['course_ids'])
        output_folder = self._format_output_folder(cfg)

        dc = self.get_data_client(cfg)

        for course_id in course_ids:
            try:
                course_intro = dc.get_course_intro(course_id, force=True)
            except GkApiError as e:
                sys.stderr.write('{}\n\n'.format(e))
                continue
            if int(course_intro['column_type']) not in (1, 2):
                sys.stderr.write("ERROR: 该课程不提供文本:{}".format(
                    course_intro['column_title']))
                continue
            course_intro['column_title'] = Render.format_file_name(
                course_intro['column_title'])

            # fetch raw data
            print(
                colored(
                    '开始制作电子书:{}-{}'.format(course_id,
                                           course_intro['column_title']),
                    'green'))
            pbar_desc = '数据爬取中:{}'.format(course_intro['column_title'][:10])
            data = dc.get_course_content(course_id,
                                         force=cfg['force'],
                                         pbar_desc=pbar_desc)
            if cfg['comments_count'] > 0:
                for post in data:
                    post['article_content'] += self._render_comment_html(
                        post['comments'], cfg['comments_count'])

            # source file
            self._render_source_files(course_intro,
                                      data,
                                      output_folder,
                                      force=cfg['force'])

            # ebook 未完结或者 force 都会重新制作电子书
            ebook_name = self._format_title(course_intro)
            fn = os.path.join(output_folder, ebook_name) + '.mobi'
            if (not cfg['force'] and self.is_course_finished(course_intro)
                    and os.path.isfile(fn)):
                sys.stdout.write("{} exists\n".format(ebook_name))
            else:
                src_dir = os.path.join(output_folder,
                                       course_intro['column_title'])
                make_mobi(source_dir=src_dir, output_dir=output_folder)

            # push to kindle
            if cfg['push']:
                self._send_to_kindle(cfg, fn)
                sys.stdout.write("{} 已推送到 kindle\n\n".format(ebook_name))
Beispiel #3
0
    def run(self, args):

        course_id = args[0]
        for arg in args[1:]:
            if '--out-dir=' in arg:
                out_dir = arg.split('--out-dir=')[1] or './ebook'
                break
        else:
            out_dir = './ebook'

        force = '--force' in args[1:]
        enable_comments = '--enable-comments' in args[1:]
        source_only = '--source-only' in args[1:]

        for arg in args[1:]:
            if '--comment-count=' in arg:
                comment_count = arg.split('--comment-count=')[1] or 10
                break
        else:
            comment_count = 10

        if not os.path.isdir(out_dir):
            os.makedirs(out_dir)

        dc = DataClient()

        course_data = dc.get_course_intro(course_id)

        if int(course_data['column_type']) not in (1, 2):
            raise Exception('该课程不提供文本:%s' % course_data['column_title'])

        # data
        data = dc.get_course_content(course_id, force=force)

        if enable_comments:
            for post in data:
                post['article_content'] += self._render_comment_html(
                    post['comments'], comment_count)

        # source file
        course_data['column_title'] = maker.format_file_name(
            course_data['column_title'])
        self.render_column_source_files(course_data,
                                        data,
                                        out_dir,
                                        force=force)

        # ebook
        if not source_only:
            if course_data['update_frequency'] == '全集' and os.path.isfile(
                    os.path.join(out_dir, self._title(course_data)) + '.mobi'):
                print("{} exists ".format(self._title(course_data)))
            else:
                make_mobi(source_dir=os.path.join(out_dir,
                                                  course_data['column_title']),
                          output_dir=out_dir)
Beispiel #4
0
    def run(self, cfg: dict) -> None:
        # from ipdb import set_trace;set_trace()
        course_id = cfg['course_id']
        if not course_id:
            sys.stderr.write("ERROR: couldn't find the target course id\n")
            return
        out_dir = os.path.join(cfg['output_folder'], 'ebook')
        if not os.path.isdir(out_dir):
            try:
                os.makedirs(out_dir)
            except OSError:
                sys.stderr.write("ERROR: couldn't create the output folder {}\n".format(out_dir))
                return
        try:
            dc = get_data_client(cfg)
        except:
            sys.stderr.write("ERROR: invalid geektime account or password\n"
                             "Use '%s <command> login --help' for  help.\n" % sys.argv[0].split(os.path.sep)[-1])
            return

        course_data = dc.get_course_intro(course_id, force=True)
        if int(course_data['column_type']) not in (1, 2):
            sys.stderr.write("ERROR: 该课程不提供文本:%s" % course_data['column_title'])
            return

        # data
        data = dc.get_course_content(course_id, force=cfg['force'])

        if cfg['enable_comments']:
            for post in data:
                post['article_content'] += self._render_comment_html(post['comments'], cfg['comments_count'])

        # source file
        course_data['column_title'] = maker.format_file_name(course_data['column_title'])
        self._render_column_source_files(course_data, data, out_dir, force=cfg['force'])

        # ebook
        if not cfg['source_only']:
            if course_data['update_frequency'] == '全集' and os.path.isfile(os.path.join(out_dir, self._title(course_data)) + '.mobi'):
                sys.stdout.write("{} exists\n".format(self._title(course_data)))
            else:
                make_mobi(source_dir=os.path.join(out_dir, course_data['column_title']), output_dir=out_dir)

        # push to kindle
        if cfg['push'] and not cfg['source_only']:
            fn = os.path.join(out_dir, "{}.mobi".format(self._title(course_data)))
            try:
                send_to_kindle(fn, cfg)
                sys.stdout.write("push to kindle done\n")
            except Exception as e:
                sys.stderr.write("ERROR: push to kindle failed, e={}\n".format(e))
Beispiel #5
0
def test_make_ebook():
    src = './examples/source'
    dst = './examples/'
    mobi = make_mobi(src, dst)
    assert os.path.isfile(mobi)

    os.remove(mobi)
def test_make_ebook():
    path = pathlib.Path(__file__).parent.parent / 'examples'
    src = str(path / 'source')
    dst = str(path)
    mobi = make_mobi(src, dst)
    assert os.path.isfile(mobi)

    os.remove(mobi)
Beispiel #7
0
    def run(self, args):

        course_id = args[0]
        for arg in args[1:]:
            if '--out-dir=' in arg:
                out_dir = arg.split('--out-dir=')[1] or './ebook'
                break
        else:
            out_dir = './ebook'

        force = '--force' in args[1:]
        enable_comments = '--enable-comments' in args[1:]
        source_only = '--source-only' in args[1:]
        push = '--push' in args[1:]

        for arg in args[1:]:
            if '--comment-count=' in arg:
                comment_count = arg.split('--comment-count=')[1] or 10
                break
        else:
            comment_count = 10

        if not os.path.isdir(out_dir):
            os.makedirs(out_dir)

        dc = DataClient()

        course_data = dc.get_course_intro(course_id)

        if int(course_data['column_type']) not in (1, 2):
            raise Exception('该课程不提供文本:%s' % course_data['column_title'])

        # data
        data = dc.get_course_content(course_id, force=force)

        if enable_comments:
            for post in data:
                post['article_content'] += self._render_comment_html(
                    post['comments'], comment_count)

        # source file
        course_data['column_title'] = maker.format_file_name(
            course_data['column_title'])
        self.render_column_source_files(course_data,
                                        data,
                                        out_dir,
                                        force=force)

        # ebook
        if not source_only:
            if course_data['update_frequency'] == '全集' and os.path.isfile(
                    os.path.join(out_dir, self._title(course_data)) + '.mobi'):
                print("{} exists ".format(self._title(course_data)))
            else:
                make_mobi(source_dir=os.path.join(out_dir,
                                                  course_data['column_title']),
                          output_dir=out_dir)
        if push:

            fn = os.path.join(out_dir,
                              "{}.mobi".format(self._title(course_data)))
            if os.path.getsize(fn) / 1024.0 / 1024 > 50:
                print("电子书大小超过50M")
                return
            f = open(fn, 'rb')
            d = f.read()
            f.close()

            with open('smtp.conf') as f:
                smtp_conf = json.loads(f.read())
            m = MailServer(host=smtp_conf['host'],
                           port=smtp_conf['port'],
                           user=smtp_conf['user'],
                           password=smtp_conf['password'],
                           encryption=smtp_conf['encryption'])
            message = m.build_email(email_to=smtp_conf['email_to'],
                                    subject='convert',
                                    body='',
                                    attachments=[("{}.mobi".format(
                                        self._title(course_data)), d)])
            m.send_email(message)
            print("push to kindle done")
Beispiel #8
0
    def run(self, cfg: dict) -> None:

        course_id = cfg['course_id']
        if not course_id:
            sys.stderr.write("ERROR: couldn't find the target course id\n")
            return
        out_dir = os.path.join(cfg['output_folder'], 'ebook')
        out_dir = os.path.expanduser(out_dir)
        if not os.path.isdir(out_dir):
            try:
                os.makedirs(out_dir)
            except OSError:
                sys.stderr.write(
                    "ERROR: couldn't create the output folder {}\n".format(
                        out_dir))
                return
        try:
            dc = get_data_client(cfg)
        except Exception:
            sys.stderr.write("ERROR: invalid geektime account or password\n"
                             "Use '{} login --help' for  help.\n".format(
                                 sys.argv[0].split(os.path.sep)[-1]))
            return

        course_data = dc.get_course_intro(course_id, force=True)
        if int(course_data['column_type']) not in (1, 2):
            sys.stderr.write("ERROR: 该课程不提供文本:{}".format(
                course_data['column_title']))
            return

        # data
        sys.stdout.write('doing ......\n')
        data = dc.get_course_content(course_id, force=cfg['force'])
        if cfg['enable_comments']:
            for post in data:
                post['article_content'] += self._render_comment_html(
                    post['comments'], cfg['comments_count'])

        # source file
        course_data['column_title'] = Render.format_file_name(
            course_data['column_title'])
        self._render_source_files(course_data,
                                  data,
                                  out_dir,
                                  force=cfg['force'])

        # ebook
        ebook_name = self._title(course_data)
        if not cfg['source_only']:
            fn = os.path.join(out_dir, ebook_name) + '.mobi'
            if course_data['is_finish'] and os.path.isfile(fn):
                sys.stdout.write("{} exists\n".format(ebook_name))
            else:
                src_dir = os.path.join(out_dir, course_data['column_title'])
                make_mobi(source_dir=src_dir, output_dir=out_dir)

        # push to kindle
        if cfg['push'] and not cfg['source_only']:
            fn = os.path.join(out_dir, "{}.mobi".format(ebook_name))
            try:
                send_to_kindle(fn, cfg)
                sys.stdout.write("push to kindle done\n")
            except Exception as e:
                sys.stderr.write(
                    "ERROR: push to kindle failed, e={}\n".format(e))
Beispiel #9
0
import os
from kindle_maker import make_mobi


def main():
    rootDir = r""
    destDir = r""
    chapterList = os.listdir(rootDir)
    chapter2LectureMapper = {}
    sourceList = []
    for chapter in chapterList:
        chapterPath = os.path.join(rootDir, chapter)
        lectureList = os.listdir(chapterPath)


if __name__ == "__main__":
    # main()
    make_mobi(r"E:\24-Java并发编程实战(完结)\03-第一部分:并发理论基础 (13讲)",
              r"E:\24-Java并发编程实战(完结)\Test")