Esempio n. 1
0
    def test_copy_data_files02(self):
        """###.html => copy ###.html to <ID>/*"""
        with open(self.test_input_meta, 'w', encoding='UTF-8') as fh:
            fh.write("""\
scrapbook.meta({
  "20200101000000000": {
    "index": "20200101000000000.html",
    "type": ""
  }
})""")

        with open(os.path.join(self.test_input, '20200101000000000.html'),
                  'w',
                  encoding='UTF-8') as fh:
            fh.write('page content')
        with open(os.path.join(self.test_input, 'page.html'),
                  'w',
                  encoding='UTF-8') as fh:
            fh.write('dummy')

        for info in wsb2sb.run(self.test_input, self.test_output):
            pass

        oid = util.datetime_to_id_legacy(
            util.id_to_datetime('20200101000000000'))
        self.assertEqual(
            set(os.listdir(os.path.join(self.test_output, 'data'))),
            {oid},
        )
        self.assertEqual(
            set(os.listdir(os.path.join(self.test_output, 'data', oid))),
            {'index.html'},
        )
    def test_meta_icon05(self):
        """Item folder => mapped item folder"""
        with open(self.test_input_meta, 'w', encoding='UTF-8') as fh:
            fh.write("""\
scrapbook.meta({
  "20200101000000000": {
    "index": "20200101000000000/index.html",
    "type": "",
    "icon": "favicon.bmp"
  }
})""")
        icon_file = os.path.join(self.test_input, '20200101000000000',
                                 'favicon.bmp')
        os.makedirs(os.path.dirname(icon_file), exist_ok=True)
        with open(icon_file, 'wb') as fh:
            fh.write(
                b64decode(
                    'Qk08AAAAAAAAADYAAAAoAAAAAQAAAAEAAAABACAAAAAAAAYAAAASCwAAEgsAAAAAAAAAAAAAAP8AAAAA'
                ))

        for info in wsb2sb.run(self.test_input, self.test_output):
            pass

        with open(self.test_output_rdf, 'rb') as fh:
            tree = etree.parse(fh)

        ts = util.datetime_to_id_legacy(
            util.id_to_datetime('20200101000000000'))
        self.assertEqual(
            tree.find(f'{RDF}Description').attrib[f'{NS1}icon'],
            f'resource://scrapbook/data/{ts}/favicon.bmp')
        self.assertTrue(
            os.path.isfile(
                os.path.join(self.test_output, 'data', ts, 'favicon.bmp')))
    def test_copy_data_files05(self):
        """###.maff => copy nothing if no page"""
        with open(self.test_input_meta, 'w', encoding='UTF-8') as fh:
            fh.write("""\
scrapbook.meta({
  "20200101000000000": {
    "index": "20200101000000000.maff",
    "type": ""
  }
})""")

        with zipfile.ZipFile(
                os.path.join(self.test_input, '20200101000000000.maff'),
                'w') as zh:
            zh.writestr('index.html', 'dummy')

        for info in wsb2sb.run(self.test_input, self.test_output):
            pass

        oid = util.datetime_to_id_legacy(
            util.id_to_datetime('20200101000000000'))
        self.assertEqual(
            set(
                glob.iglob(os.path.join(self.test_output, '**'),
                           recursive=True)), {
                               os.path.join(self.test_output, ''),
                               os.path.join(self.test_output, 'scrapbook.rdf'),
                           })
Esempio n. 4
0
    def test_meta_separator(self):
        """A sample of typical WebScrapBook separator item."""
        with open(self.test_input_meta, 'w', encoding='UTF-8') as fh:
            fh.write("""\
scrapbook.meta({
  "20200101000000000": {
    "type": "separator",
    "title": "Hello 中文",
    "create": "20200102000000000",
    "modify": "20200103000000000"
  }
})""")

        for info in wsb2sb.run(self.test_input, self.test_output):
            pass

        with open(self.test_output_rdf, 'rb') as fh:
            tree = etree.parse(fh)

        oid = util.datetime_to_id_legacy(
            util.id_to_datetime('20200101000000000'))
        self.assertEqual(
            dict(tree.find(f'{NC}BookmarkSeparator').attrib), {
                f'{RDF}about':
                f'urn:scrapbook:item{oid}',
                f'{NS1}id':
                oid,
                f'{NS1}type':
                'separator',
                f'{NS1}title':
                'Hello 中文',
                f'{NS1}create':
                util.datetime_to_id_legacy(
                    util.id_to_datetime('20200102000000000')),
                f'{NS1}modify':
                util.datetime_to_id_legacy(
                    util.id_to_datetime('20200103000000000')),
                f'{NS1}source':
                '',
                f'{NS1}icon':
                '',
                f'{NS1}comment':
                '',
                f'{NS1}chars':
                '',
            })
    def test_copy_data_files06(self):
        """foo.bar => copy it and create meta refresh"""
        with open(self.test_input_meta, 'w', encoding='UTF-8') as fh:
            fh.write("""\
scrapbook.meta({
  "20200101000000000": {
    "index": "中文#1.xhtml",
    "type": ""
  }
})""")

        with open(os.path.join(self.test_input, '中文#1.xhtml'),
                  'w',
                  encoding='UTF-8') as fh:
            fh.write("""\
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
  <title>Title of document</title>
</head>
<body>
some content
</body>
</html>
""")

        for info in wsb2sb.run(self.test_input, self.test_output):
            pass

        oid = util.datetime_to_id_legacy(
            util.id_to_datetime('20200101000000000'))
        self.assertEqual(
            set(
                glob.iglob(os.path.join(self.test_output, '**'),
                           recursive=True)),
            {
                os.path.join(self.test_output, ''),
                os.path.join(self.test_output, 'scrapbook.rdf'),
                os.path.join(self.test_output, 'data'),
                os.path.join(self.test_output, 'data', oid),
                os.path.join(self.test_output, 'data', oid, 'index.html'),
                os.path.join(self.test_output, 'data', oid, '中文#1.xhtml'),
            })
        self.assertEqual(
            util.parse_meta_refresh(
                os.path.join(self.test_output, 'data', oid,
                             'index.html')).target,
            './%E4%B8%AD%E6%96%87%231.xhtml')
Esempio n. 6
0
    def test_id_mapping02(self):
        """If conflict, increament by 1 from timestamp"""
        with open(self.test_input_meta, 'w', encoding='UTF-8') as fh:
            fh.write("""\
scrapbook.meta({
  "20200101000000000": {
    "type": "folder"
  },
  "20200101000000001": {
    "type": "folder"
  },
  "20200101000000010": {
    "type": "folder"
  }
})""")
        with open(self.test_input_toc, 'w', encoding='UTF-8') as fh:
            fh.write("""\
scrapbook.toc({
  "root": [
    "20200101000000000",
    "20200101000000001",
    "20200101000000010"
  ]
})""")

        for info in wsb2sb.run(self.test_input, self.test_output):
            pass

        with open(self.test_output_rdf, 'rb') as fh:
            tree = etree.parse(fh)

        self.assertEqual([
            node.attrib[f'{NS1}id']
            for node in tree.findall(f'{RDF}Description')
        ],
                         [
                             util.datetime_to_id_legacy(
                                 util.id_to_datetime('20200101000000000')),
                             util.datetime_to_id_legacy(
                                 util.id_to_datetime('20200101000001000')),
                             util.datetime_to_id_legacy(
                                 util.id_to_datetime('20200101000002000')),
                         ])
        self.assertEqual([
            node.attrib[f'{RDF}resource']
            for node in tree.findall(f'{RDF}Seq/{RDF}li')
        ], [
            'urn:scrapbook:item' + util.datetime_to_id_legacy(
                util.id_to_datetime('20200101000000000')),
            'urn:scrapbook:item' + util.datetime_to_id_legacy(
                util.id_to_datetime('20200101000001000')),
            'urn:scrapbook:item' + util.datetime_to_id_legacy(
                util.id_to_datetime('20200101000002000')),
        ])
    def test_copy_data_files01(self):
        """###/index.html => copy ###/* to <ID>/*"""
        with open(self.test_input_meta, 'w', encoding='UTF-8') as fh:
            fh.write("""\
scrapbook.meta({
  "20200101000000000": {
    "index": "20200101000000000/index.html",
    "type": ""
  }
})""")

        index_dir = os.path.join(self.test_input, '20200101000000000')
        os.makedirs(index_dir, exist_ok=True)
        with open(os.path.join(index_dir, 'index.html'), 'w',
                  encoding='UTF-8') as fh:
            fh.write('page content')
        with open(os.path.join(index_dir, 'page.html'), 'w',
                  encoding='UTF-8') as fh:
            fh.write('dummy')
        os.makedirs(os.path.join(self.test_input, '20200101000000001'),
                    exist_ok=True)
        with open(os.path.join(self.test_input, 'other.html'),
                  'w',
                  encoding='UTF-8') as fh:
            fh.write('dummy')

        for info in wsb2sb.run(self.test_input, self.test_output):
            pass

        oid = util.datetime_to_id_legacy(
            util.id_to_datetime('20200101000000000'))
        self.assertEqual(
            set(
                glob.iglob(os.path.join(self.test_output, '**'),
                           recursive=True)),
            {
                os.path.join(self.test_output, ''),
                os.path.join(self.test_output, 'scrapbook.rdf'),
                os.path.join(self.test_output, 'data'),
                os.path.join(self.test_output, 'data', oid),
                os.path.join(self.test_output, 'data', oid, 'index.html'),
                os.path.join(self.test_output, 'data', oid, 'page.html'),
            })
    def test_meta_type01(self):
        """postit => note"""
        with open(self.test_input_meta, 'w', encoding='UTF-8') as fh:
            fh.write("""\
scrapbook.meta({
  "20200101000000000": {
    "index": "20200101000000000/index.html",
    "type": "postit"
  }
})""")

        index_file = os.path.join(self.test_input, '20200101000000000',
                                  'index.html')
        os.makedirs(os.path.dirname(index_file), exist_ok=True)
        with open(index_file, 'w', encoding='UTF-8') as fh:
            fh.write("""\
<!DOCTYPE html><html><head>\
<meta charset="UTF-8">\
<meta name="viewport" content="width=device-width">\
<style>pre { white-space: pre-wrap; overflow-wrap: break-word; }</style>\
</head><body><pre>
postit page content
</pre></body></html>""")

        for info in wsb2sb.run(self.test_input, self.test_output):
            pass

        with open(self.test_output_rdf, 'rb') as fh:
            tree = etree.parse(fh)

        self.assertEqual(
            tree.find(f'{RDF}Description').attrib[f'{NS1}type'], 'note')

        # check output legacy note format
        oid = util.datetime_to_id_legacy(
            util.id_to_datetime('20200101000000000'))
        with open(os.path.join(self.test_output, 'data', oid, 'index.html'),
                  encoding='UTF-8') as fh:
            self.assertEqual(
                fh.read(), """\
<html><head><meta http-equiv="Content-Type" content="text/html;Charset=UTF-8"></head><body><pre>
postit page content
</pre></body></html>""")
    def test_copy_data_files04(self):
        """###.maff => copy internal files of first topdir to <ID>/*"""
        with open(self.test_input_meta, 'w', encoding='UTF-8') as fh:
            fh.write("""\
scrapbook.meta({
  "20200101000000000": {
    "index": "20200101000000000.maff",
    "type": ""
  }
})""")

        with zipfile.ZipFile(
                os.path.join(self.test_input, '20200101000000000.maff'),
                'w') as zh:
            zh.writestr('20200101000000000/index.html', 'page content')
            zh.writestr('20200101000000000/page.html', 'dummy')
            zh.writestr('20200101000000000/subdir/page2.html', 'dummy2')
            zh.writestr('20200101000000001/index.html', 'page content 2')

        for info in wsb2sb.run(self.test_input, self.test_output):
            pass

        oid = util.datetime_to_id_legacy(
            util.id_to_datetime('20200101000000000'))
        self.assertEqual(
            set(
                glob.iglob(os.path.join(self.test_output, '**'),
                           recursive=True)),
            {
                os.path.join(self.test_output, ''),
                os.path.join(self.test_output, 'scrapbook.rdf'),
                os.path.join(self.test_output, 'data'),
                os.path.join(self.test_output, 'data', oid),
                os.path.join(self.test_output, 'data', oid, 'index.html'),
                os.path.join(self.test_output, 'data', oid, 'page.html'),
                os.path.join(self.test_output, 'data', oid, 'subdir'),
                os.path.join(self.test_output, 'data', oid, 'subdir',
                             'page2.html'),
            })
Esempio n. 10
0
    def test_copy_data_files03(self):
        """###.htz => copy internal files to <ID>/*"""
        with open(self.test_input_meta, 'w', encoding='UTF-8') as fh:
            fh.write("""\
scrapbook.meta({
  "20200101000000000": {
    "index": "20200101000000000.htz",
    "type": ""
  }
})""")

        with zipfile.ZipFile(
                os.path.join(self.test_input, '20200101000000000.htz'),
                'w') as zh:
            zh.writestr('index.html', 'page content')
            zh.writestr('page.html', 'dummy')
            zh.writestr('subdir/page2.html', 'dummy2')

        for info in wsb2sb.run(self.test_input, self.test_output):
            pass

        oid = util.datetime_to_id_legacy(
            util.id_to_datetime('20200101000000000'))
        self.assertEqual(
            set(os.listdir(os.path.join(self.test_output, 'data'))),
            {oid},
        )
        self.assertEqual(
            set(os.listdir(os.path.join(self.test_output, 'data', oid))),
            {'index.html', 'page.html', 'subdir'},
        )
        self.assertEqual(
            set(
                os.listdir(
                    os.path.join(self.test_output, 'data', oid, 'subdir'))),
            {'page2.html'},
        )
Esempio n. 11
0
    def test_basic01(self):
        """Test exporting a common */index.html
        """
        with open(self.test_input_meta, 'w', encoding='UTF-8') as fh:
            fh.write("""\
scrapbook.meta({
  "20200101000000000": {
    "type": "folder",
    "title": "item0",
    "index": "20200101000000000/index.html",
    "create": "20200102000000000",
    "modify": "20200103000000000",
    "source": "http://example.com",
    "icon": "favicon.bmp"
  }
})""")
        with open(self.test_input_toc, 'w', encoding='UTF-8') as fh:
            fh.write("""\
scrapbook.toc({
  "root": [
    "20200101000000000"
  ]
})""")
        index_file = os.path.join(self.test_input, '20200101000000000',
                                  'index.html')
        os.makedirs(os.path.dirname(index_file))
        with open(index_file, 'w', encoding='UTF-8') as fh:
            fh.write('ABC123')

        for info in wsb_exporter.run(self.test_input, self.test_output):
            pass

        with os.scandir(self.test_output) as entries:
            files = sorted(entries, key=lambda x: x.path)

        # files are exported in depth-first order
        with zipfile.ZipFile(files[0]) as zh:
            with zh.open('meta.json') as fh:
                data = json.load(fh)
            with zh.open('export.json') as fh:
                export_info = json.load(fh)
            with zh.open('data/20200101000000000/index.html') as fh:
                index_data = fh.read().decode('UTF-8')

        self.assertEqual(
            data, {
                'id': '20200101000000000',
                'type': 'folder',
                'title': 'item0',
                'index': '20200101000000000/index.html',
                'create': '20200102000000000',
                'modify': '20200103000000000',
                'source': 'http://example.com',
                'icon': 'favicon.bmp',
            })

        self.assertEqual(export_info['version'], 1)
        self.assertAlmostEqual(util.id_to_datetime(
            export_info['id']).timestamp(),
                               datetime.now(timezone.utc).timestamp(),
                               delta=3)
        self.assertEqual(export_info['timestamp'], export_info['id'])
        self.assertEqual(
            export_info['timezone'],
            datetime.now().astimezone().utcoffset().total_seconds())
        self.assertEqual(export_info['path'], [{'id': 'root', 'title': ''}])

        self.assertEqual(index_data, 'ABC123')
Esempio n. 12
0
    def test_basic02(self):
        """Test exporting a common *.htz
        """
        with open(self.test_input_meta, 'w', encoding='UTF-8') as fh:
            fh.write("""\
scrapbook.meta({
  "20200101000000000": {
    "type": "folder",
    "title": "item0",
    "index": "20200101000000000.htz",
    "create": "20200102000000000",
    "modify": "20200103000000000",
    "source": "http://example.com",
    "icon": ".wsb/tree/favicon/dbc82be549e49d6db9a5719086722a4f1c5079cd.bmp"
  }
})""")
        with open(self.test_input_toc, 'w', encoding='UTF-8') as fh:
            fh.write("""\
scrapbook.toc({
  "root": [
    "20200101000000000"
  ]
})""")
        index_file = os.path.join(self.test_input, '20200101000000000.htz')
        with zipfile.ZipFile(index_file, 'w') as zh:
            zh.writestr('index.html', 'ABC123')
        favicon_file = os.path.join(
            self.test_input_tree, 'favicon',
            'dbc82be549e49d6db9a5719086722a4f1c5079cd.bmp')
        os.makedirs(os.path.dirname(favicon_file))
        with open(favicon_file, 'wb') as fh:
            fh.write(
                b64decode(
                    'Qk08AAAAAAAAADYAAAAoAAAAAQAAAAEAAAABACAAAAAAAAYAAAASCwAAEgsAAAAAAAAAAAAAAP8AAAAA'
                ))

        for info in wsb_exporter.run(self.test_input, self.test_output):
            pass

        with os.scandir(self.test_output) as entries:
            files = sorted(entries, key=lambda x: x.path)

        # files are exported in depth-first order
        with zipfile.ZipFile(files[0]) as zh:
            with zh.open('meta.json') as fh:
                data = json.load(fh)
            with zh.open('export.json') as fh:
                export_info = json.load(fh)
            with zh.open('data/20200101000000000.htz') as fh:
                fh = zip_stream(fh)
                with zipfile.ZipFile(fh) as zh2:
                    with zh2.open('index.html') as fh2:
                        index_data = fh2.read().decode('UTF-8')
            with zh.open('favicon/dbc82be549e49d6db9a5719086722a4f1c5079cd.bmp'
                         ) as fh:
                favicon_data = fh.read()

        self.assertEqual(
            data, {
                'id':
                '20200101000000000',
                'type':
                'folder',
                'title':
                'item0',
                'index':
                '20200101000000000.htz',
                'create':
                '20200102000000000',
                'modify':
                '20200103000000000',
                'source':
                'http://example.com',
                'icon':
                '.wsb/tree/favicon/dbc82be549e49d6db9a5719086722a4f1c5079cd.bmp',
            })

        self.assertEqual(export_info['version'], 1)
        self.assertAlmostEqual(util.id_to_datetime(
            export_info['id']).timestamp(),
                               datetime.now(timezone.utc).timestamp(),
                               delta=3)
        self.assertEqual(export_info['timestamp'], export_info['id'])
        self.assertEqual(
            export_info['timezone'],
            datetime.now().astimezone().utcoffset().total_seconds())
        self.assertEqual(export_info['path'], [{'id': 'root', 'title': ''}])

        self.assertEqual(index_data, 'ABC123')
        self.assertEqual(
            b64encode(favicon_data),
            b'Qk08AAAAAAAAADYAAAAoAAAAAQAAAAEAAAABACAAAAAAAAYAAAASCwAAEgsAAAAAAAAAAAAAAP8AAAAA'
        )
Esempio n. 13
0
    def test_meta_basic(self):
        """A sample of typical WebScrapBook item."""
        with open(self.test_input_meta, 'w', encoding='UTF-8') as fh:
            fh.write("""\
scrapbook.meta({
  "20200101000000000": {
    "index": "20200101000000000/index.html",
    "type": "",
    "title": "Hello 中文",
    "create": "20200102000000000",
    "modify": "20200103000000000",
    "source": "http://example.com",
    "icon": "favicon.bmp",
    "comment": "some comment\\nsecond line\\nthird line",
    "charset": "UTF-8",
    "locked": true
  }
})""")

        index_file = os.path.join(self.test_input, '20200101000000000',
                                  'index.html')
        os.makedirs(os.path.dirname(index_file), exist_ok=True)
        with open(index_file, 'w', encoding='UTF-8') as fh:
            fh.write('page content')

        icon_file = os.path.join(self.test_input, '20200101000000000',
                                 'favicon.bmp')
        os.makedirs(os.path.dirname(icon_file), exist_ok=True)
        with open(icon_file, 'wb') as fh:
            fh.write(
                b64decode(
                    'Qk08AAAAAAAAADYAAAAoAAAAAQAAAAEAAAABACAAAAAAAAYAAAASCwAAEgsAAAAAAAAAAAAAAP8AAAAA'
                ))

        for info in wsb2sb.run(self.test_input, self.test_output):
            pass

        with open(self.test_output_rdf, 'rb') as fh:
            tree = etree.parse(fh)

        oid = util.datetime_to_id_legacy(
            util.id_to_datetime('20200101000000000'))
        self.assertEqual(tree.getroot().tag, f'{RDF}RDF')
        self.assertEqual(
            dict(tree.find(f'{RDF}Description').attrib), {
                f'{RDF}about':
                f'urn:scrapbook:item{oid}',
                f'{NS1}id':
                oid,
                f'{NS1}type':
                '',
                f'{NS1}title':
                'Hello 中文',
                f'{NS1}create':
                util.datetime_to_id_legacy(
                    util.id_to_datetime('20200102000000000')),
                f'{NS1}modify':
                util.datetime_to_id_legacy(
                    util.id_to_datetime('20200103000000000')),
                f'{NS1}source':
                'http://example.com',
                f'{NS1}icon':
                f'resource://scrapbook/data/{oid}/favicon.bmp',
                f'{NS1}comment':
                'some comment __BR__ second line __BR__ third line',
                f'{NS1}chars':
                'UTF-8',
                f'{NS1}lock':
                'true'
            })