def test_tree(): text = ''' 年龄, 工作, 已婚, 信用, 可以借贷? 青年, no , no , 一般 , 不给 青年, no , no , 好 , 不给 青年, yes, no , 好 , 给 青年, yes, yes, 一般 , 给 青年, no , no , 一般 , 不给 中年, no , no , 一般 , 不给 中年, no , no , 好 , 不给 中年, yes, yes, 好 , 给 中年, no , yes, 非常好 , 给 中年, no , yes, 非常好 , 给 老年, no , yes, 非常好 , 给 老年, no , yes, 好 , 给 老年, yes, no , 好 , 给 老年, yes, no , 非常好 , 给 老年, no , no , 一般 , 不给 ''' data, title = datamatrix(text) dt = DecisionTree(data, labels=title) n = dt.best_split_feature(data) tree = dt.decision_tree print(n) puts(tree) test1 = dt.classify(['青年', 'no', 'no', '好'], decision_tree=tree) test2 = dt.classify(['青年', 'no', 'yes', '非常好'], decision_tree=tree) test3 = dt.classify(['老年', 'yes', 'yes', '一般'], decision_tree=tree) test4 = dt.classify(['老年', 'no', 'no', '好'], decision_tree=tree) test1 | should.eq('不给') test2 | should.eq('给') test3 | should.eq('给') test4 | should.eq('不给')
def test_activate_contextmanager(self): @injector(self.map) def test(foo=Key('foo')): return foo self.map['foo'] = 'ROOT' test() | should.eql('ROOT') with self.map.activate('A'): self.map['foo'] = 'A' test() | should.eql('A') test() | should.eql('ROOT') with self.map.activate('B'): self.map['foo'] = 'B' test() | should.eq('B') # Force a context self.map.context('A') test() | should.eq('A') test() | should.eql('ROOT') with self.map.activate('A'): with self.map.activate('B'): test() | should.eq('B') test() | should.eq('A') test() | should.eql('ROOT')
def test_info_disable_octal_auto_convert_dec(): from pyshould import should text = ''' index1: 1.23 index2: 11 index_empty: index3: 100 # index4: 061 # will convert to 49, for int('61', base=8) = 49 index4a: '061' # will not convert to 49 ''' info = InfoText.from_string(text) print(info.content) str(info) | should.eq(""" <InfoText> index1: 1.23 index2: 11 index_empty: None index3: 100 index4a: '061' """.strip()) info.to_yaml_string() | should.eq(""" index1: 1.23 index2: 11 index_empty: None index3: 100 index4a: '061'""")
def test_register_singleton(self): @self.map.singleton('foo') def fn(deps): self.cnt += 1 return self.cnt self.map['foo'] | should.eq(1) self.map['foo'] | should.eq(1)
def test_register_factory(self): @self.map.factory('foo') def fn(deps): self.cnt += 1 return self.cnt self.map['foo'] | should.eq(1) self.map['foo'] | should.eq(2)
def test_7_transword(): sample = '''登陆网站异常, 请稍后, 您可以定义对象所期望的操作和属性, 而不是显示声明其类型''' result = '''登录网站异常, 请稍候, 您可以定义对象所期望的操作和属性, 而不是显式声明其类型''' Polish(sample).transword().text | should.eq(result) sample = '''那么`SelectLeftSpaceChar()`这个“函数”的作用''' result = '''那么`SelectLeftSpaceChar()`这个"函数"的作用''' Polish(sample).transword().text | should.eq(result)
def test_info_additional_keys(): from pyshould import should path = os.getcwd() + '/test/nested.inf' info = InfoText.from_yaml(path) puts(info.content) info.get('a') | should.eq(123) info.get('ErrorKey') | should.eq(None) info.get('foo') | should.eq('bar') # from key<default> info.get('current_year') | should.eq(1404) # key<default> contains this 1404 print(info.get('current_date')) # key<default> does not contain this,
def test_factory_in_different_context(self): @self.map.factory('foo') def fn(deps): self.cnt += 1 return self.cnt self.map['foo'] | should.eq(1) self.map['foo'] | should.eq(2) self.map.context('A') self.map['foo'] | should.eq(3) self.map['foo'] | should.eq(4)
def test_5_pangu_spacing(): sample = '當你,凝視著bug,bug也凝視著你' result = '當你,凝視著 bug,bug 也凝視著你' Polish(sample).pangu_spacing().text | should.eq(result) sample = ''' 这里需要特别说明的是,`::;c::` 定义了热键`;c`,那么`;c`就相当于一个单词,当你输入这个单词后,这个热键的代码就会被执行。既然是单词,在输入的时候前后都要输入空格来与其他单词分隔,AutoHotkey才会认为它是一个单词,才会激活这个热键。比如,当你输入this;c 时,Autohotkey会认为你输入的this;c才是一个完整的单词,而`;c`只是单词的一部分,从而不会激活`;c`热键。因此,如果你想在输入this 之后通过热键`;c`来切换中文输入法,那么在输入this之后需要加个 空格再输入`;c` , 即`this;c`。如此一来,就多输入了一个空格,有的时候这个空格是多余的, 我们不希望这个空格的存在。那么这个函数的作用,就是用来选中这个多余的空格。在选中这个多余的空格之后,你再输入内容时,会自动替换掉了这个空格,从而无需手工退格删除这个空格了。 ''' result = ''' 这里需要特别说明的是,`::;c::` 定义了热键 `;c`,那么 `;c` 就相当于一个单词,当你输入这个单词后,这个热键的代码就会被执行。既然是单词,在输入的时候前后都要输入空格来与其他单词分隔,AutoHotkey 才会认为它是一个单词,才会激活这个热键。比如,当你输入 this;c 时,Autohotkey 会认为你输入的 this;c 才是一个完整的单词,而 `;c` 只是单词的一部分,从而不会激活 `;c` 热键。因此,如果你想在输入 this 之后通过热键 `;c` 来切换中文输入法,那么在输入 this 之后需要加个 空格再输入 `;c` , 即 `this;c`。如此一来,就多输入了一个空格,有的时候这个空格是多余的,我们不希望这个空格的存在。那么这个函数的作用,就是用来选中这个多余的空格。在选中这个多余的空格之后,你再输入内容时,会自动替换掉了这个空格,从而无需手工退格删除这个空格了。 ''' # show_diff(Polish(sample).pangu_spacing().text.strip(), result.strip()) Polish(sample).pangu_spacing().text.strip() | should.eq(result.strip())
def test_9b_extract_notes_annotations(): sample = ''' 提取注记 注记 提取block note !!! note xxxx xxxx sep !!! danger xxxx xxxx xxxx text ''' result_should = [ ('annotation', '!!! note\n\n xxxx\n xxxx\n\n'), ('annotation', '!!! danger\n xxxx\n \n xxxx\n\n xxxx\n\n'), ] Polish(sample).extract_annotations() | should.eq(result_should)
def test_singleton_in_different_context(self): @self.map.singleton('foo') def fn(deps): self.cnt += 1 return self.cnt self.map['foo'] | should.eq(1) self.map.context('A') self.map['foo'] | should.eq(2) self.map.context('B') self.map['foo'] | should.eq(3) self.map.context('A') self.map['foo'] | should.eq(2)
def test_9a_extract_notes_highlight(): sample = ''' 提取高亮 即 ==xxx== 之间的内容 ==高亮== 有可能==在同一行中==出现两次 提取<span color='red'>自定义的</span>颜色 ## 可能出现在<span color=#7ed>标题</ span>里 可能出现在缩进块里, 可能有<font style='background:#def; font-size:1.5rem; color: #edb;'>不同的颜色定义</font>标签 不提取<span color='red'>带有换行 换行之后</span>的内容 ''' # highlight result_should = [ ('highlight', '==xxx=='), ('highlight', '==高亮=='), ('highlight', '==在同一行中=='), ('highlight', "<span color='red'>自定义的</span>"), ('highlight', "<span color=#7ed>标题</ span>"), ('highlight', "<font style='background:#def; font-size:1.5rem; color: #edb;'>不同的颜色定义</font>" ), ] Polish(sample).extract_highlights() | should.eq(result_should)
def test_6_pangu_spacing_level2(): # 需要能保留 `` 内部结构 sample = '''那么`SelectLeftSpaceChar()`这个函数的作用,就是用来选中这个多余的空格. 在选中这个多余的空格之后''' result = '''那么 `SelectLeftSpaceChar()` 这个函数的作用,就是用来选中这个多余的空格。在选中这个多余的空格之后''' Polish(sample).pangu_spacing().text.strip() | should.eq(result.strip()) sample = '當你,凝視著bug,bug也凝視著你' result = '當你,凝視著 bug, bug 也凝視著你'
def test_X_sample(): sample = ''' xxxxxx ''' result = ''' xxxxxx ''' Polish(sample).text | should.eq(result)
def test_dependencies_passed_as_arg(self): self.map.register('dep', 'DEP') @self.map.factory('foo') def fn(deps): return deps['dep'] self.map['foo'] | should.eq('DEP')
def test_register_thread(self): @self.map.thread('foo') def fn(deps): self.cnt += 1 return self.cnt self.map['foo'] | should.eq(1) self.cnt | should.eq(1) self.map['foo'] | should.eq(1) self.cnt | should.eq(1) # Run in a separate thread and make sure the cnt is updated import threading t1 = threading.Thread(target=lambda: self.map['foo']) t1.start() t1.join() self.cnt | should.eq(2)
def test_info_string_empty(): from pyshould import should text = ''' # comment: comment ''' info = InfoText.from_string(text) str(info) | should.eq('''<InfoText> (empty)''')
def test_8_extract_outline(): # 提取标题 sample = ''' # title1 ## title2 text text ## title3 text #### sub1 text ## title4 #### sub2 ### sub3 #### sub4 text > quote paragraph > > # quote title1 > ```python code # comments ## comments code ``` \## not title5 ## title6 ## title7 ''' result = '''# title1 ## title2 ## title3 ## title4 ### sub3 ## title7'''.splitlines() # Polish(sample).extract_outline(headers='h1,h2,h3', paragraph='full,preview,none') | should.eq(result) Polish(sample).extract_outline(headers='h1,h2,h3') | should.eq(result)
def test_info_string_in_list(): from pyshould import should text = ''' points_x[]: [100.1, 100.2, 100.3, 100.4] points_y[]: [200.1, 200.2, 200.3, 200.4] ''' info = InfoText.from_string(text) str(info) | should.eq('''<InfoText> points_x[]: [100.1, 100.2, 100.3, 100.4] points_y[]: [200.1, 200.2, 200.3, 200.4]''')
def test_info_string_spec(): from pyshould import should text = ''' 单位名称: 测试单位 项目名称: 测试项目 面积: 10000.11 ''' info = InfoText.from_string(text) str(info) | should.eq('''<InfoText> 单位名称: 测试单位 项目名称: 测试项目 面积: 10000.11''')
def test_infotext_merge(): from pyshould import should text1 = ''' 单位名称 =name1 项目名称: name2 key1: key2: key3: 456 ''' info1 = InfoText.from_string(text1) text2 = ''' 项目名称: change1 单位名称: change2 key1: key2: 123 key3: ''' info2 = InfoText.from_string(text2) str(info1) | should.eq('''<InfoText> 单位名称: name1 项目名称: name2 key1: None key2: None key3: 456''') str(info2) | should.eq('''<InfoText> 项目名称: change1 单位名称: change2 key1: None key2: 123 key3: None''') info1.merge(info2) str(info1) | should.eq('''<InfoText> 单位名称: change2 项目名称: change1 key1: None key2: 123 key3: 456''')
def test_info_string_some_not_space_after_colon(): from pyshould import should text = ''' name=foo age :56 语言 = 中文 # 此行为注释 phone : [12312412, 21414124] ''' info = InfoText.from_string(text) str(info) | should.eq('''<InfoText> name: foo age: 56 语言: 中文 phone: [12312412, 21414124]''')
def test_proxy_bypassed_methods(self): dm = DependencyMap() dm[list] = list() l = dm.proxy(list) l.append(1) l.append(3) l[1] = 2 l[0] | should.eq(1) l[1] | should.eq(2) len(l) | should.eq(2) (l == [1, 2]) | should.eq(True) repr(l) | should.eq("[1, 2]") str(l) | should.eq("[1, 2]") (l + [3, 4]) | should.eq([1, 2, 3, 4])
def test_2_merge_line_for_mdlink(): sample = ''' [ 笔记 ](https://www.zhihu.com/topic/19554982) [ 知识管理工具 ](https://www.zhihu.com/topic/19627718) [ 笔记类应用 ](https://www.zhihu.com/topic/19821486) ''' result = ''' [笔记](https://www.zhihu.com/topic/19554982) [知识管理工具](https://www.zhihu.com/topic/19627718) [笔记类应用](https://www.zhihu.com/topic/19821486) ''' Polish(sample).join_markdown_link().text | should.eq(result)
def test_4_fix_image_assets(): sample = ''' 1. 为你的编辑器安装LiveReload插件 ![](assets/15780-bb29aa6.png) 2. 如何使用?默认情况下 [![icon](assets/15781-21a54b384.webp)](https://www.xxx.com) [userlink](assets/15780-bb29aa6.png)关注 职能 ![](assets/15781-370d0a66.webp) Chrome ''' result = ''' 1. 为你的编辑器安装LiveReload插件 ![](filename.assets/15780-bb29aa6.png) 2. 如何使用?默认情况下 [![icon](filename.assets/15781-21a54b384.webp)](https://www.xxx.com) [userlink](assets/15780-bb29aa6.png)关注 职能 ![](filename.assets/15781-370d0a66.webp) Chrome ''' Polish(sample).update_image_url( prefix='filename.').text | should.eq(result)
def test_9c_extract_notes_annotations_level2(): sample = ''' 提取高亮, 注记, 以及自定义容器 注记 提取两种 block note !!! note xxxx xxxx ::: tip 也应该包括这种提示 ::: ::::: container :::: row ::: col-xs-6 alert alert-success success text ::: ::: col-xs-6 alert alert-info warning text ::: :::: ::::: ''' # annotation result_should = [ ('annotation', '!!! note\n\n xxxx\n xxxx\n\n'), ('annotation', '::: tip\n\n这是一个提示\n\n:::\n\n'), ] Polish(sample).extract_notes(highlight=False, annotation=True) | should.eq(result_should)
def test_keyed_value(self): @self.inject def foo(foo=Key(self)): return foo foo() | should.eq('SELF')
def test_setter(self): self.map['foo'] = 'FOO' self.map['foo'] | should.eq('FOO') self.map.context('A') self.map['foo'] | should.eq('FOO')
def Dtest_read_mat(self): mat, title = datamatrix(self.mat, title=True) mat | should.eq([[1,2,1,'female'],[2,3,1,'male'],[3,2,6.03,'male'],]) title | should.eq(['Col1','Col2','Col3','中文列'])
def test_keyed_multiple(self): @self.inject def foo(foo=Key(dict, 'foo')): return foo foo() | should.eq('SELF-FOO')
def test_setter(self): self.map['foo'] = 'FOO' self.map['foo'] | should.eq('FOO')
def test_register_value(self): self.map.register('foo', 'FOO') self.map['foo'] | should.eq('FOO')
def Dtest_rf(self): chooseBestFeatureToSplit(self.data) | should.be(0) tree = createTree(self.data, ['喉结', '胡子']) tree | should.eq({'喉结': {0: {'胡子': {0: 'female', 1: 'male'}}, 1: 'male'}}) classify(tree, ['喉结', '胡子'], [0, 0]) | should.be('female')
def test_keyed_literal(self): @self.inject def foo(foo=Key('foo')): return foo foo() | should.eq('FOO')
def test_10_split_md_blocks(): sample = ''' # title1 ## title2 text text ## title3 text #### sub1 text ## title4 #### sub2 ### sub3 #### sub4 text > quote paragraph > > # quote title1 > text ```python code # comments ## comments code ``` ----------sep---- \## not title5 ## title6 indent2 ## title7 text !!! note xxxx xxxx text ''' from polish import BlockType result_should = [ (BlockType.title, '# title1\n\n'), (BlockType.title, '## title2\n'), (BlockType.p, 'text\n'), (BlockType.p, 'text\n\n'), (BlockType.title, '## title3\n\n'), (BlockType.p, 'text\n\n'), (BlockType.title, '#### sub1\n\n'), (BlockType.p, 'text\n\n\n'), (BlockType.title, '## title4\n\n'), (BlockType.title, '#### sub2\n\n'), (BlockType.title, '### sub3\n\n'), (BlockType.title, '#### sub4\n\n'), (BlockType.p, 'text\n\n'), (BlockType.quote, '> quote paragraph\n> \n> # quote title1\n \n> text\n\n'), (BlockType.fencecode, '```python\ncode\n# comments\n## comments\ncode\n```\n\n'), (BlockType.sepline, '----------sep----\n\n'), (BlockType.p, '\\## not title5\n\n'), (BlockType.indent, ' ## title6\n \n indent2\n\n'), (BlockType.title, '## title7\n\n'), (BlockType.p, 'text\n\n'), (BlockType.admonition, '!!! note\n\n xxxx\n xxxx\n\n'), (BlockType.p, 'text'), ] result = list(Polish(sample)._split_markdown_blocks(sample.strip())) from pprint import pprint as pp pp(result) len(result) | should.eq(len(result_should)) for line_actual, line_should in zip(result, result_should): line_actual | should.eq(line_should)
def test_rf2(self): text = ''' 年龄, 工作, 已婚, 信用, 可以借贷? 青年, no , no , 一般 , 不给 青年, no , no , 好 , 不给 青年, yes, no , 好 , 给 青年, yes, yes, 一般 , 给 青年, no , no , 一般 , 不给 中年, no , no , 一般 , 不给 中年, no , no , 好 , 不给 中年, yes, yes, 好 , 给 中年, no , yes, 非常好 , 给 中年, no , yes, 非常好 , 给 老年, no , yes, 非常好 , 给 老年, no , yes, 好 , 给 老年, yes, no , 好 , 给 老年, yes, no , 非常好 , 给 老年, no , no , 一般 , 不给 ''' from pprint import pprint data, title = datamatrix(text) n = chooseBestFeatureToSplit(data) tree = createTree(data, title) n | should.be(2) tree | should.eq({'已婚': {'no': {'工作': {'no': '不给', 'yes': '给'}}, 'yes': '给'}}) test1 = classify(tree, ['年龄', '工作', '已婚', '信用'], ['青年', 'no', 'no', '好']) test2 = classify(tree, ['年龄', '工作', '已婚', '信用'], ['青年', 'no', 'yes', '非常好']) test3 = classify(tree, ['年龄', '工作', '已婚', '信用'], ['老年', 'yes','yes', '一般']) test4 = classify(tree, ['年龄', '工作', '已婚', '信用'], ['老年', 'no', 'no', '好']) test1 | should.eq('不给') test2 | should.eq('给') test3 | should.eq('给') test4 | should.eq('不给') data, title = datamatrix(text) dt = DecisionTree(data, labels=title) n = dt.best_split_feature(data) tree = dt.decision_tree print(n) pprint(tree) test1 = dt.classify(['青年', 'no', 'no', '好'], decision_tree=tree) test2 = dt.classify(['青年', 'no', 'yes', '非常好'], decision_tree=tree) test3 = dt.classify(['老年', 'yes','yes', '一般'], decision_tree=tree) test4 = dt.classify(['老年', 'no', 'no', '好'], decision_tree=tree) test1 | should.eq('不给') test2 | should.eq('给') test3 | should.eq('给') test4 | should.eq('不给')