def test_copy_data_files02(self): """###.html => copy ###.html to <ID>/*""" with open(self.test_input_meta, 'w', encoding='UTF-8') as fh: fh.write("""\ scrapbook.meta({ "20200101000000000": { "index": "20200101000000000.html", "type": "" } })""") with open(os.path.join(self.test_input, '20200101000000000.html'), 'w', encoding='UTF-8') as fh: fh.write('page content') with open(os.path.join(self.test_input, 'page.html'), 'w', encoding='UTF-8') as fh: fh.write('dummy') for info in wsb2sb.run(self.test_input, self.test_output): pass oid = util.datetime_to_id_legacy( util.id_to_datetime('20200101000000000')) self.assertEqual( set(os.listdir(os.path.join(self.test_output, 'data'))), {oid}, ) self.assertEqual( set(os.listdir(os.path.join(self.test_output, 'data', oid))), {'index.html'}, )
def test_meta_icon05(self): """Item folder => mapped item folder""" with open(self.test_input_meta, 'w', encoding='UTF-8') as fh: fh.write("""\ scrapbook.meta({ "20200101000000000": { "index": "20200101000000000/index.html", "type": "", "icon": "favicon.bmp" } })""") icon_file = os.path.join(self.test_input, '20200101000000000', 'favicon.bmp') os.makedirs(os.path.dirname(icon_file), exist_ok=True) with open(icon_file, 'wb') as fh: fh.write( b64decode( 'Qk08AAAAAAAAADYAAAAoAAAAAQAAAAEAAAABACAAAAAAAAYAAAASCwAAEgsAAAAAAAAAAAAAAP8AAAAA' )) for info in wsb2sb.run(self.test_input, self.test_output): pass with open(self.test_output_rdf, 'rb') as fh: tree = etree.parse(fh) ts = util.datetime_to_id_legacy( util.id_to_datetime('20200101000000000')) self.assertEqual( tree.find(f'{RDF}Description').attrib[f'{NS1}icon'], f'resource://scrapbook/data/{ts}/favicon.bmp') self.assertTrue( os.path.isfile( os.path.join(self.test_output, 'data', ts, 'favicon.bmp')))
def test_copy_data_files05(self): """###.maff => copy nothing if no page""" with open(self.test_input_meta, 'w', encoding='UTF-8') as fh: fh.write("""\ scrapbook.meta({ "20200101000000000": { "index": "20200101000000000.maff", "type": "" } })""") with zipfile.ZipFile( os.path.join(self.test_input, '20200101000000000.maff'), 'w') as zh: zh.writestr('index.html', 'dummy') for info in wsb2sb.run(self.test_input, self.test_output): pass oid = util.datetime_to_id_legacy( util.id_to_datetime('20200101000000000')) self.assertEqual( set( glob.iglob(os.path.join(self.test_output, '**'), recursive=True)), { os.path.join(self.test_output, ''), os.path.join(self.test_output, 'scrapbook.rdf'), })
def test_meta_separator(self): """A sample of typical WebScrapBook separator item.""" with open(self.test_input_meta, 'w', encoding='UTF-8') as fh: fh.write("""\ scrapbook.meta({ "20200101000000000": { "type": "separator", "title": "Hello 中文", "create": "20200102000000000", "modify": "20200103000000000" } })""") for info in wsb2sb.run(self.test_input, self.test_output): pass with open(self.test_output_rdf, 'rb') as fh: tree = etree.parse(fh) oid = util.datetime_to_id_legacy( util.id_to_datetime('20200101000000000')) self.assertEqual( dict(tree.find(f'{NC}BookmarkSeparator').attrib), { f'{RDF}about': f'urn:scrapbook:item{oid}', f'{NS1}id': oid, f'{NS1}type': 'separator', f'{NS1}title': 'Hello 中文', f'{NS1}create': util.datetime_to_id_legacy( util.id_to_datetime('20200102000000000')), f'{NS1}modify': util.datetime_to_id_legacy( util.id_to_datetime('20200103000000000')), f'{NS1}source': '', f'{NS1}icon': '', f'{NS1}comment': '', f'{NS1}chars': '', })
def test_copy_data_files06(self): """foo.bar => copy it and create meta refresh""" with open(self.test_input_meta, 'w', encoding='UTF-8') as fh: fh.write("""\ scrapbook.meta({ "20200101000000000": { "index": "中文#1.xhtml", "type": "" } })""") with open(os.path.join(self.test_input, '中文#1.xhtml'), 'w', encoding='UTF-8') as fh: fh.write("""\ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <title>Title of document</title> </head> <body> some content </body> </html> """) for info in wsb2sb.run(self.test_input, self.test_output): pass oid = util.datetime_to_id_legacy( util.id_to_datetime('20200101000000000')) self.assertEqual( set( glob.iglob(os.path.join(self.test_output, '**'), recursive=True)), { os.path.join(self.test_output, ''), os.path.join(self.test_output, 'scrapbook.rdf'), os.path.join(self.test_output, 'data'), os.path.join(self.test_output, 'data', oid), os.path.join(self.test_output, 'data', oid, 'index.html'), os.path.join(self.test_output, 'data', oid, '中文#1.xhtml'), }) self.assertEqual( util.parse_meta_refresh( os.path.join(self.test_output, 'data', oid, 'index.html')).target, './%E4%B8%AD%E6%96%87%231.xhtml')
def test_id_mapping02(self): """If conflict, increament by 1 from timestamp""" with open(self.test_input_meta, 'w', encoding='UTF-8') as fh: fh.write("""\ scrapbook.meta({ "20200101000000000": { "type": "folder" }, "20200101000000001": { "type": "folder" }, "20200101000000010": { "type": "folder" } })""") with open(self.test_input_toc, 'w', encoding='UTF-8') as fh: fh.write("""\ scrapbook.toc({ "root": [ "20200101000000000", "20200101000000001", "20200101000000010" ] })""") for info in wsb2sb.run(self.test_input, self.test_output): pass with open(self.test_output_rdf, 'rb') as fh: tree = etree.parse(fh) self.assertEqual([ node.attrib[f'{NS1}id'] for node in tree.findall(f'{RDF}Description') ], [ util.datetime_to_id_legacy( util.id_to_datetime('20200101000000000')), util.datetime_to_id_legacy( util.id_to_datetime('20200101000001000')), util.datetime_to_id_legacy( util.id_to_datetime('20200101000002000')), ]) self.assertEqual([ node.attrib[f'{RDF}resource'] for node in tree.findall(f'{RDF}Seq/{RDF}li') ], [ 'urn:scrapbook:item' + util.datetime_to_id_legacy( util.id_to_datetime('20200101000000000')), 'urn:scrapbook:item' + util.datetime_to_id_legacy( util.id_to_datetime('20200101000001000')), 'urn:scrapbook:item' + util.datetime_to_id_legacy( util.id_to_datetime('20200101000002000')), ])
def test_copy_data_files01(self): """###/index.html => copy ###/* to <ID>/*""" with open(self.test_input_meta, 'w', encoding='UTF-8') as fh: fh.write("""\ scrapbook.meta({ "20200101000000000": { "index": "20200101000000000/index.html", "type": "" } })""") index_dir = os.path.join(self.test_input, '20200101000000000') os.makedirs(index_dir, exist_ok=True) with open(os.path.join(index_dir, 'index.html'), 'w', encoding='UTF-8') as fh: fh.write('page content') with open(os.path.join(index_dir, 'page.html'), 'w', encoding='UTF-8') as fh: fh.write('dummy') os.makedirs(os.path.join(self.test_input, '20200101000000001'), exist_ok=True) with open(os.path.join(self.test_input, 'other.html'), 'w', encoding='UTF-8') as fh: fh.write('dummy') for info in wsb2sb.run(self.test_input, self.test_output): pass oid = util.datetime_to_id_legacy( util.id_to_datetime('20200101000000000')) self.assertEqual( set( glob.iglob(os.path.join(self.test_output, '**'), recursive=True)), { os.path.join(self.test_output, ''), os.path.join(self.test_output, 'scrapbook.rdf'), os.path.join(self.test_output, 'data'), os.path.join(self.test_output, 'data', oid), os.path.join(self.test_output, 'data', oid, 'index.html'), os.path.join(self.test_output, 'data', oid, 'page.html'), })
def test_meta_type01(self): """postit => note""" with open(self.test_input_meta, 'w', encoding='UTF-8') as fh: fh.write("""\ scrapbook.meta({ "20200101000000000": { "index": "20200101000000000/index.html", "type": "postit" } })""") index_file = os.path.join(self.test_input, '20200101000000000', 'index.html') os.makedirs(os.path.dirname(index_file), exist_ok=True) with open(index_file, 'w', encoding='UTF-8') as fh: fh.write("""\ <!DOCTYPE html><html><head>\ <meta charset="UTF-8">\ <meta name="viewport" content="width=device-width">\ <style>pre { white-space: pre-wrap; overflow-wrap: break-word; }</style>\ </head><body><pre> postit page content </pre></body></html>""") for info in wsb2sb.run(self.test_input, self.test_output): pass with open(self.test_output_rdf, 'rb') as fh: tree = etree.parse(fh) self.assertEqual( tree.find(f'{RDF}Description').attrib[f'{NS1}type'], 'note') # check output legacy note format oid = util.datetime_to_id_legacy( util.id_to_datetime('20200101000000000')) with open(os.path.join(self.test_output, 'data', oid, 'index.html'), encoding='UTF-8') as fh: self.assertEqual( fh.read(), """\ <html><head><meta http-equiv="Content-Type" content="text/html;Charset=UTF-8"></head><body><pre> postit page content </pre></body></html>""")
def test_copy_data_files04(self): """###.maff => copy internal files of first topdir to <ID>/*""" with open(self.test_input_meta, 'w', encoding='UTF-8') as fh: fh.write("""\ scrapbook.meta({ "20200101000000000": { "index": "20200101000000000.maff", "type": "" } })""") with zipfile.ZipFile( os.path.join(self.test_input, '20200101000000000.maff'), 'w') as zh: zh.writestr('20200101000000000/index.html', 'page content') zh.writestr('20200101000000000/page.html', 'dummy') zh.writestr('20200101000000000/subdir/page2.html', 'dummy2') zh.writestr('20200101000000001/index.html', 'page content 2') for info in wsb2sb.run(self.test_input, self.test_output): pass oid = util.datetime_to_id_legacy( util.id_to_datetime('20200101000000000')) self.assertEqual( set( glob.iglob(os.path.join(self.test_output, '**'), recursive=True)), { os.path.join(self.test_output, ''), os.path.join(self.test_output, 'scrapbook.rdf'), os.path.join(self.test_output, 'data'), os.path.join(self.test_output, 'data', oid), os.path.join(self.test_output, 'data', oid, 'index.html'), os.path.join(self.test_output, 'data', oid, 'page.html'), os.path.join(self.test_output, 'data', oid, 'subdir'), os.path.join(self.test_output, 'data', oid, 'subdir', 'page2.html'), })
def test_copy_data_files03(self): """###.htz => copy internal files to <ID>/*""" with open(self.test_input_meta, 'w', encoding='UTF-8') as fh: fh.write("""\ scrapbook.meta({ "20200101000000000": { "index": "20200101000000000.htz", "type": "" } })""") with zipfile.ZipFile( os.path.join(self.test_input, '20200101000000000.htz'), 'w') as zh: zh.writestr('index.html', 'page content') zh.writestr('page.html', 'dummy') zh.writestr('subdir/page2.html', 'dummy2') for info in wsb2sb.run(self.test_input, self.test_output): pass oid = util.datetime_to_id_legacy( util.id_to_datetime('20200101000000000')) self.assertEqual( set(os.listdir(os.path.join(self.test_output, 'data'))), {oid}, ) self.assertEqual( set(os.listdir(os.path.join(self.test_output, 'data', oid))), {'index.html', 'page.html', 'subdir'}, ) self.assertEqual( set( os.listdir( os.path.join(self.test_output, 'data', oid, 'subdir'))), {'page2.html'}, )
def test_meta_basic(self): """A sample of typical WebScrapBook item.""" with open(self.test_input_meta, 'w', encoding='UTF-8') as fh: fh.write("""\ scrapbook.meta({ "20200101000000000": { "index": "20200101000000000/index.html", "type": "", "title": "Hello 中文", "create": "20200102000000000", "modify": "20200103000000000", "source": "http://example.com", "icon": "favicon.bmp", "comment": "some comment\\nsecond line\\nthird line", "charset": "UTF-8", "locked": true } })""") index_file = os.path.join(self.test_input, '20200101000000000', 'index.html') os.makedirs(os.path.dirname(index_file), exist_ok=True) with open(index_file, 'w', encoding='UTF-8') as fh: fh.write('page content') icon_file = os.path.join(self.test_input, '20200101000000000', 'favicon.bmp') os.makedirs(os.path.dirname(icon_file), exist_ok=True) with open(icon_file, 'wb') as fh: fh.write( b64decode( 'Qk08AAAAAAAAADYAAAAoAAAAAQAAAAEAAAABACAAAAAAAAYAAAASCwAAEgsAAAAAAAAAAAAAAP8AAAAA' )) for info in wsb2sb.run(self.test_input, self.test_output): pass with open(self.test_output_rdf, 'rb') as fh: tree = etree.parse(fh) oid = util.datetime_to_id_legacy( util.id_to_datetime('20200101000000000')) self.assertEqual(tree.getroot().tag, f'{RDF}RDF') self.assertEqual( dict(tree.find(f'{RDF}Description').attrib), { f'{RDF}about': f'urn:scrapbook:item{oid}', f'{NS1}id': oid, f'{NS1}type': '', f'{NS1}title': 'Hello 中文', f'{NS1}create': util.datetime_to_id_legacy( util.id_to_datetime('20200102000000000')), f'{NS1}modify': util.datetime_to_id_legacy( util.id_to_datetime('20200103000000000')), f'{NS1}source': 'http://example.com', f'{NS1}icon': f'resource://scrapbook/data/{oid}/favicon.bmp', f'{NS1}comment': 'some comment __BR__ second line __BR__ third line', f'{NS1}chars': 'UTF-8', f'{NS1}lock': 'true' })