Beispiel #1
0
def render_article_index_page(site=None, iid=None):
    handle_client_ip()

    page, count_per_page, colspan, is_preview, toc, weight_from, weight_to, status_from, status_to, order1, order2, filters = get_request_query_args(
    )

    context = {"encoding": 'utf-8'}
    db = Database()

    last_updated_row = handle_rate_form(db=db)

    try:
        ta = db.DB_t_article
        title = ''
        if site:
            title += SiteSchemas.get(site, {}).get(SSK.NAME, '')
        if iid:
            ti = db.DB_t_index
            stmt = select([ti.c.site + ' - ' + ti.c.text])
            stmt = stmt.where(ti.c.id == iid)
            itext = db.conn.execute(stmt).scalar() or ('index %s' % iid)
            if title:
                title += ' - '
            title += itext
        if not title:
            title = '全部小说'

        cellwidth = 100 / colspan
        i = 0
        row = 0
        odd = True
        sb = []

        sb.append('<h3 align="center">%s</h3>' % title)
        render_pagination_and_filters(sb, return_link='/', return_text='返回首页')
        sb.append('<table align="center" width="95%" cellpadding="5">')

        stmt = select([
            ta.c.id, ta.c.site, ta.c.name, ta.c.author, ta.c.category,
            ta.c.length, ta.c.status, ta.c.desc, ta.c.cover, ta.c.recommends,
            ta.c.favorites, ta.c.recommends_month, ta.c.done, ta.c.update_on,
            ta.c.chapter_table, ta.c.weight, ta.c.url, ta.c.timestamp
        ])
        if site:
            stmt = stmt.where(ta.c.site == site)
        if iid:
            stmt = stmt.where(ta.c.iid == iid)
        stmt = stmt.where(between(ta.c.weight, weight_from, weight_to))
        stmt = stmt.where(between(ta.c.status, status_from, status_to))
        for fc, fo, fv in filters:
            if fc and fo and fv:
                stmt = stmt.where(text(fc + ' ' + fo + " '" + fv + "'"))
        stmt = stmt.order_by(
            text(order1[1:] + ' desc' if order1.startswith('-') else order1),
            text(order2[1:] + ' desc' if order2.startswith('-') else order2))
        stmt = stmt.offset(count_per_page * page).limit(count_per_page)
        rs = db.conn.execute(stmt)
        for r in rs:

            if is_preview:
                chapter_table, tc, table_alone = db.get_chapter_table_name_def_alone(
                    r)

                if chapter_table and db.exist_table(chapter_table):
                    # get article toc
                    stmt = select([tc.c.id, tc.c.name, tc.c.is_section
                                   ])  # .where(tc.c.content!=None)
                    if not table_alone:
                        stmt = stmt.where(tc.c.aid == r[ta.c.id])
                    stmt = stmt.order_by(tc.c.id).limit(toc)
                    rs1 = db.conn.execute(stmt)
                else:
                    rs1 = None

            if i % colspan == 0:
                sb.append('<tr id="r%s" style="%s;">' %
                          (row, 'background-color:#eee' if odd else ''))
                odd = not odd
                row += 1

            if is_preview:
                pass
            else:
                sb.append(
                    '<td align="right"><img src="%s/%s/%s" width="%dpx" height="%dpx"></td>'
                    % (album_url_path, site_for_url(
                        r[ta.c.site]), r[ta.c.cover] or '', album_w, album_h))

            sb.append('<td id="a%s" width="%d%%">' % (r[ta.c.id], cellwidth))
            if is_preview:

                sb.append('<h4>%(id)s <a href="/%(id)s/">%(name)s</a></h4>' %
                          r)
                sb.append('<div style="word-wrap:break-word;">%s</div>' %
                          (r[ta.c.desc] or ''))
                render_rate_form(sb,
                                 aweight=r[ta.c.weight],
                                 astatus=r[ta.c.status],
                                 aid=r[ta.c.id],
                                 row=row - 1)
                sb.append(
                    '<a href="/cache/%s/%s/%s?aid=%s">原目录页缓存</a>' %
                    (base64.standard_b64encode(r[ta.c.site].encode()).decode(),
                     base64.standard_b64encode(r[ta.c.url].encode()).decode(),
                     Spiders.TOC, r[ta.c.id]))
                sb.append(
                    ' | <a href="%s">原文</a>' %
                    (SiteSchemas.get(r[ta.c.site], {}).get(SSK.URL.code, '') +
                     r[ta.c.url]))
                if rs1:
                    sb.append('<ul>')
                    for r1 in rs1:
                        sb.append('<li><a href="/%s/%s">%s</a></li>' %
                                  (r[ta.c.id], r1[tc.c.id], r1[tc.c.name]))
                    rs1.close()
                    del rs1
                    sb.append('</ul>')
            else:
                sb.append(
                    '<h4>%(id)s <a title="%(desc)s" href="/%(id)s/">%(name)s</a></h4>'
                    % r)
                sb.append(
                    '<ul><li>作者: <a href="/author/%(author)s/">%(author)s</a></li>'
                    % r)
                sb.append(
                    '<li>类型: %(category)s</li><li>%(length)s 字</li><li>状态:%(status)s 完成:%(done)s</li>'
                    % r)
                sb.append('<li>%s</li></ul></td>' %
                          r[ta.c.update_on].strftime('%x'))

            for fc, fo, fv in filters:
                if fc:
                    sb.append('<li>%s: %s</li>' % (fc, r[fc]))

            sb.append('</td>')
            i += 1
            if i % colspan == 0:
                sb.append('</tr>')

        if rs.rowcount == 0:
            sb.append('<tr style="background-color:#eee"><td> 还未上传 </td></tr>')

        sb.append('</table>')

        render_pagination_and_filters(sb, return_link='/', return_text='返回首页')

        if last_updated_row:
            sb.append('''
            <script>
                window.location = window.location.protocol + '//' + window.location.host + 
                    window.location.pathname + window.location.search + '#r%s';
            </script>
            ''' % last_updated_row)

        sb.append('<hr><center>-- Page %s END --</center>' % page)
        content = '\n'.join(sb)
    except Exception as err:
        content = '<p class="error">' + str(err) + '</p>'
        log.exception(err)

    context['content'] = content
    resp = make_response(render_template_string(template_page, **context))
    resp.headers['Content-Type'] = 'text/html; charset=utf-8'
    return resp
Beispiel #2
0
class CopyDB(object):
    base_path = os.path.dirname(os.path.dirname(__file__))
    state_file_path = os.path.join(base_path, 'log', 'copy_db_state.json')

    def __init__(self, src_conn_str, target_conn_str):
        self.src_conn_str = src_conn_str
        self.target_conn_str = target_conn_str
        self.src_db = Database(conn_str=src_conn_str)
        self.target_db = Database(conn_str=target_conn_str)
        self.state = None

        log.info('Copying data from "%s" to "%s" ...' % (src_conn, target_conn))

        self._re_search = plugin_source.load_plugin('re_search')
        self._re_replace = plugin_source.load_plugin('re_replace')
        self._html2md = plugin_source.load_plugin('html2md')

    def _fix_author(self, author):
        val = author
        if not val:
            return val

        if self._re_search:
            val = self._re_search.perform(val, "([^&]*)")

        # print(author, '==>>', val)
        return val

    def _fix_desc(self, val):
        if not val:
            return val

        if self._html2md:
            if not val.startswith(self._html2md.MD_PREFIX):
                if self._re_replace:
                    val = self._re_replace.perform(val, "\\</?div.*?\\>", "")
                    val = self._re_replace.perform(val, "\\<p.*?\\>.*\\</p\\>", "")
                    val = self._re_replace.perform(val, "\\<a.*?\\>.*\\</a\\>", "")
                    val = self._re_replace.perform(val, "[\\r|\\n]", "")
                    val = self._re_replace.perform(val, "\\<br\\>", "\r\n")
        return val

    def _fix_content(self, val):
        if not val:
            return val

        if self._html2md:
            if not val.startswith(self._html2md.MD_PREFIX):
                if self._re_replace:
                    val = self._re_replace.perform(val, "\\</?div.*?\\>", "")
                    val = self._re_replace.perform(val, "\\<p.*?\\>.*\\</p\\>", "")
                    val = self._re_replace.perform(val, "\\<a.*?\\>.*\\</a\\>", "")
                    val = self._re_replace.perform(val, "[\\r|\\n]", "")
                    val = self._re_replace.perform(val, "\\<br\\>", "\r\n")
        return val

    def _copy_table(self, t):
        assert self.state is not None, 'Must load state before start coping.'

        limit = 1000
        if t.name not in self.state:
            self.state[t.name] = {'offset': 0}
            self.save_state()

        stat = self.state[t.name]
        offset = stat.get('offset', 0)
        done = stat.get('done', False)
        if done:
            log.info('Skip %s due to its done already (records:%d).' % (t.name, offset))
            return

        while True:
            stmt = select(t.c).offset(offset).limit(limit)
            rs = self.src_db.execute(stmt)

            records = []
            for r in rs:
                record = {c.name: r[c] for c in t.c}
                if 'author' in record:
                    record['author'] = self._fix_author(record['author'])
                if 'desc' in record:
                    record['desc'] = self._fix_desc(record['desc'])
                if 'content' in record:
                    record['content'] = self._fix_content(record['content'])
                records.append(record)

            if records:
                self.target_db.conn.execute(t.insert(), records)

            offset += len(records)
            stat['offset'] = offset
            self.save_state()

            if len(records) < limit:
                break

        stat['done'] = True
        self.save_state()

    def copy_major_tables(self):
        self.target_db.meta.bind = self.target_db.engine
        self.target_db.meta.create_all()
        for t in self.target_db.meta.tables.values():
            self._copy_table(t)

    def copy_site_chapter_tables(self):
        assert self.state is not None, 'Must load state before start coping.'
        stat_key = '_site_chapter_tables_'

        if stat_key not in self.state:
            self.state[stat_key] = {'offset': 0}
            self.save_state()

        stat = self.state[stat_key]
        offset = stat.get('offset', 0)
        done = stat.get('done', False)
        if done:
            log.info('Skip %s due to its done already (records:%d).' % (stat_key, offset))
            return

        sites = sorted(SiteSchemas.keys())
        i = 0
        for site in sites:
            if i < offset:
                i += 1
                continue

            tname = self.src_db.gen_site_chapter_table_name(site)
            t = self.src_db.get_db_t_site_chapter(tname)

            if self.src_db.exist_table(tname):
                if not self.target_db.exist_table(tname):
                    t.create(bind=self.target_db.conn)
                self._copy_table(t)

            offset += 1
            stat['offset'] = offset
            self.save_state()

        stat['done'] = True
        self.save_state()

    def copy_individual_chapter_tables(self):
        assert self.state is not None, 'Must load state before start coping.'

        ta = Database.DB_t_article
        chapter_tables_state_key = '_individual_chapter_tables_'
        if chapter_tables_state_key not in self.state:
            self.state[chapter_tables_state_key] = {'offset': 0}
            self.save_state()

        limit = 10
        stat = self.state[chapter_tables_state_key]
        offset = stat.get('offset', 0)
        done = stat.get('done', False)
        if done:
            log.info('Skip %s due to its done already (records:%d).' % (chapter_tables_state_key, offset))
            return

        self.src_db.meta.bind = self.target_db.engine

        while True:
            stmt = select([ta.c.id, ta.c.chapter_table]).where(ta.c.chapter_table != None)
            stmt = stmt.order_by(ta.c.id).offset(offset).limit(limit)
            rs = self.src_db.execute(stmt)
            count = 0
            for r in rs:
                tname = r[ta.c.chapter_table]
                t = self.src_db.get_db_t_chapter(tname)
                if self.src_db.exist_table(tname):
                    if not self.target_db.exist_table(tname):
                        t.create(bind=self.target_db.conn)
                    self._copy_table(t)

                count += 1
                offset += 1
                stat['offset'] = offset
                self.save_state()

            if count < limit:
                break

        stat['done'] = True
        self.save_state()

    def save_state(self):
        with open(self.state_file_path, 'wt', encoding='utf-8') as fp:
            json.dump(self.state, fp, indent=2, ensure_ascii=False)

    def read_state(self):
        try:
            with open(self.state_file_path, 'rt', encoding='utf-8') as fp:
                self.state = json.load(fp)
            log.info('State file found. Continue last copying.')
        except FileNotFoundError:
            log.info('No state file found. Start new copying.')
            self.state = {}
            self.save_state()

    def finish_state(self):
        suffix = datetime.now().strftime('%Y-%m-%d-%H-%M')

        fpath = os.path.split(self.state_file_path)
        fname = os.path.splitext(fpath[-1])
        fname = fname[0] + '-' + suffix + fname[-1]
        fpath = os.path.join(fpath[0], fname)

        self.save_state()
        try:
            os.rename(self.state_file_path, fpath)
        except FileNotFoundError:
            pass
Beispiel #3
0
def article_page(aid):

    # TODO: add button to fix 'desc' and 'chapter' content. e.g. markdown, regex replace, encoding, zip/unzip ...

    handle_client_ip()

    colspan = request.args.get('col', default=5, type=int)

    context = {"encoding": 'utf-8'}
    db = Database()
    ta = db.DB_t_article

    # POST
    handle_rate_form(aid, db)

    #  GET
    try:

        # get article information
        stmt = select([
            ta.c.id, ta.c.site, ta.c.iid, ta.c.name, ta.c.author,
            ta.c.category, ta.c.length, ta.c.status, ta.c.desc,
            ta.c.recommends, ta.c.favorites, ta.c.recommends_month,
            ta.c.update_on, ta.c.weight, ta.c.chapter_table, ta.c.timestamp
        ]).where(ta.c.id == aid).order_by(ta.c.id)
        rs = db.conn.execute(stmt)
        ra = rs.fetchone()

        chapter_table, tc, table_alone = db.get_chapter_table_name_def_alone(
            ra)

        if chapter_table and db.exist_table(chapter_table):
            # get article toc
            stmt = select([tc.c.id, tc.c.name,
                           tc.c.is_section])  #.where(tc.c.content!=None)
            if not table_alone:
                stmt = stmt.where(tc.c.aid == aid)
            stmt = stmt.order_by(tc.c.id)
            rs = db.conn.execute(stmt)
        else:
            rs = None

        i = 0
        odd = True
        sb = []

        sb.append('<h3 align="center">%(name)s</h3>' % ra)
        sb.append('<p align="center"><a href="/i/%(iid)s">返回索引</a></p>\n' % ra)
        render_rate_form(sb, aweight=ra[ta.c.weight], astatus=ra[ta.c.status])
        sb.append('<table align="center" width="95%">')
        if rs:
            for r in rs:
                if i % colspan == 0:
                    sb.append('<tr style="%s">' %
                              'background-color:#eee' if odd else '')
                    odd = not odd
                if r['is_section']:
                    sb.append(
                        '</tr><tr style="background-color:silver"><td colspan="%s" align="center">%s</td></tr>'
                        % (colspan, r['name'] or ''))
                    i = 0
                else:
                    sb.append('<td><a href="/%s/%s/">%s</a></td>' %
                              (aid, r['id'], r['name'] or '阅读'))
                    i += 1
                if i % colspan == 0:
                    sb.append('</tr>')

        if rs is None or rs.rowcount == 0:
            sb.append('<tr style="background-color:#eee"><td> 还未上传 </td></tr>')

        sb.append('</table>')

        content = '\n'.join(sb)
    except Exception as err:
        content = '<h1>Error</h1><p class="error">' + str(err) + '</p>'

    context['content'] = content
    resp = make_response(render_template_string(template_page, **context))
    resp.headers['Content-Type'] = 'text/html; charset=utf-8'
    return resp