def merged_interval_should_follow_spec(i, j, result): i = Interval(*i) j = Interval(*j) k = i | j the(k.begin).should.equal(result[0]) the(k.end).should.equal(result[1]) k = j | i the(k.begin).should.equal(result[0]) the(k.end).should.equal(result[1])
def intersect_interval_should_follow_spec(i, j, result): i = Interval(*i) j = Interval(*j) if result is None: this(i & j).should.be(None) this(j & i).should.be(None) return k = i & j the(k.begin).should.equal(result[0]) the(k.end).should.equal(result[1]) k = j & i the(k.begin).should.equal(result[0]) the(k.end).should.equal(result[1])
# standard library imports # third party realted imports from pyspecs import and_, as_well_as, given, it, provided, so, the, then, this, when # local library imports from Thor.utils.Rectangle import Rectangle, TextRectangle with given.a_TextRectangle: with provided.there_is_a_word_object: w = {'x': 1, 'y': 2, 'w': 3, 'h': 4, 't': u'ABC'} with then.it_can_be_created_directly: tr = TextRectangle(w['x'], w['y'], w['w'], w['h'], w['t']) the(tr.x).should.equal(1) the(tr.y).should.equal(2) the(tr.w).should.equal(3) the(tr.h).should.equal(4) the(tr.t).should.equal(u'ABC') with and_.it_can_be_created_by_calling_create_method: tr = TextRectangle.create(w) the(tr.x).should.equal(1) the(tr.y).should.equal(2) the(tr.w).should.equal(3) the(tr.h).should.equal(4) the(tr.t).should.equal(u'ABC') del w
with provided.two_intervals_are_disjoint: merged_interval_should_follow_spec((0, 10), (15, 20), (0, 20)) with provided.two_intervals_are_partly_overlapping: merged_interval_should_follow_spec((0, 10), (5, 15), (0, 15)) with provided.one_interval_includes_the_other: merged_interval_should_follow_spec((0, 10), (5, 8), (0, 10)) with given.an_interval: i = Interval(0, 10) with then.the_length_should_be_correct: the(i.length).should.equal(10) with given.two_intervals: with provided.have_the_same_left_tip_but_distince_right_tip: i = Interval(0, 10) j = Interval(0, 5) this(i).should_NOT.equal(j) with provided.have_the_same_right_tip_but_distinct_left_tip: i = Interval(5, 10) j = Interval(0, 10) this(i).should_NOT.equal(j) with provided.the_same_left_right_tips: i = Interval(0, 10)
with then.bleed_box_should_be_correct: bboxes_almost_the_same(bleed_box, bboxes['bleed']) with then.trim_box_should_be_correct: bboxes_almost_the_same(trim_box, bboxes['trim']) with then.art_box_should_be_correct: bboxes_almost_the_same(art_box, bboxes['art']) sample_pdf = os.path.join(curr_dir, 'fixture', 'test2.pdf') with when.extract_texts_from_it: with then.default_is_to_extract_from_all_pages: pages = PDFPage.extract_texts(sample_pdf) the(len(pages)).should.equal(4) for ix, page in enumerate(pages): the(page.page_num).should.equal(ix + 1) with and_.it_can_extract_specified_pages_as_well: pages = PDFPage.extract_texts(sample_pdf, (1, 3,)) the(len(pages)).should.equal(2) the(pages[0].page_num).should.equal(1) the(pages[1].page_num).should.equal(3) with and_.no_space_is_included: pages = PDFPage.extract_texts(sample_pdf) for page in pages: for word in page.words: the(word.t).should_NOT.be_in((' ', u'\u2003'))
with closing(open(sample_json)) as f: preprocessor = RawTextPreprocessor( sample_pdf, PDFPage.loads(f.read().decode('utf8')) ) with then.it_extracts_texts_in_content_stream_order: raw_texts = preprocessor.page.extract_raw_texts(sample_pdf, 1) with closing(open(sample_raw)) as f: expected = f.read().decode('utf8').splitlines() # XXX The last raw stream is form feed, we ignore it. for i in xrange(22): the(raw_texts[i]).should.equal(expected[i]) with then.each_word_obj_should_locate_itself_in_every_possible_raw_stream: ground_truth = ( # 0 (u'時尚雜誌', ((0, 0, 4),)), (u'國際中文版', ((1, 0, 5),)), (u'2012', ((2, 0, 4),)), (u'MAY.', ((2, 5, 9),)), (u'五月號', ((3, 0, 3),)), # 5 (u'×', ((8, 0, 1), (21, 0, 1),)), (u'iP', ((10, 0, 2),)), (u'ad', ((10, 2, 4),)), (u'version', ((11, 0, 7),)),
# standard library imports import random # third party related imports from pyspecs import and_, given, provided, the, then, this, when # local library imports from Thor.utils.Rectangle import Point, Rectangle with given.a_rectangle: r = Rectangle(0, 0, 4, 3) with then.can_calculate_its_area_correctly: the(r.area).should.equal(4 * 3) with then.its_vertices_can_be_correctly_enumerated: vertices = r.vertices the(vertices[0]).should.equal(Point(0, 0)) the(vertices[1]).should.equal(Point(4, 0)) the(vertices[2]).should.equal(Point(4, 3)) the(vertices[3]).should.equal(Point(0, 3)) with given.two_rectangles: with when.find_intersection_of_these_two_rectangles: with provided.they_share_an_edge:
game.roll(pins) return game from pyspecs import given, when, then, the, finish with given.a_game_with_all_gutter_balls: game = roll_game([0] * 20) with when.the_score_is_calculated: score = game.score() with then.the_score_should_be_zero: the(score).should.equal(0) with given.a_game_with_all_ones: game = roll_game([1] * 20) with when.the_score_is_calculated: score = game.score() with then.the_score_should_be_twenty: the(score).should.equal(20) with given.a_game_with_one_spare: game = roll_game([4, 6, 3] + [0] * 17)
) with then.it_can_extract_all_font_specs_used_by_a_pdf_page: ground_truths = [ FontSpec(size=6, color="221714"), FontSpec(size=5, color="221714"), FontSpec(size=38, color="221714"), FontSpec(size=27, color="221714"), FontSpec(size=8, color="221714"), FontSpec(size=4, color="000000"), ] ground_truths.sort(key=lambda fs: fs.size) font_specs = preprocessor.font_specs font_specs.sort(key=lambda fs: fs.size) the(len(font_specs)).should.equal(len(ground_truths)) for truth, spec in zip(ground_truths, font_specs): the(truth.size).should.equal(spec.size) the(truth.color).should.equal(spec.color) with then.it_can_figure_out_the_font_spec_of_a_textual_object: ground_truths = [ { 'top': 772 - 36.85, 'left': 28 - 36.85, 'width': 4, 'height': 9, 'text': u'9', 'font': FontSpec(size=6, color="221714") }, { 'top': 566 - 36.85, 'left': 235 - 36.85, 'width': 94, 'height': 8,
# third party realted imports from pyspecs import and_, as_well_as, given, it, provided, so, the, then, this, when # local library imports from Thor.understanding.stat import WordStatistician from Thor.utils.Rectangle import TextRectangle with given.a_WordStatistician_and_supply_it_with_some_words: words = map( lambda i: TextRectangle(x=1. * i, y=2. * i, w=3. * i, h=4. * i, t=u''), xrange(10)) ws = WordStatistician(words) with then.it_can_count_how_many_words_totally: the(ws.count).should.equal(10) with and_.it_can_calculate_average_width_of_textual_objects: the(abs(ws.avg_width - 3. * 4.5)).should.be_less_than(1.0e-3) with and_.it_can_calculate_average_height_of_textual_objects: the(abs(ws.avg_height - 4. * 4.5)).should.be_less_than(1.0e-3) with and_.it_can_calculate_variance_of_width_of_textual_objects: the(abs(ws.var_width - 3. * 3. * 8.25)).should.be_less_than(1.0e-3) with and_.it_can_calculate_variance_of_height_of_textual_objects: the(abs(ws.var_height - 4. * 4. * 8.25)).should.be_less_than(1.0e-3) with and_.it_can_calculate_median_of_width_of_textual_objects: words = [
# third party realted imports from pyspecs import and_, as_well_as, given, it, provided, so, the, then, this, when # local library imports from Thor.utils.Rectangle import Rectangle, TextRectangle with given.a_TextRectangle: with provided.there_is_a_word_object: w = {'x': 1, 'y': 2, 'w': 3, 'h': 4, 't': u'ABC'} with then.it_can_be_created_directly: tr = TextRectangle(w['x'], w['y'], w['w'], w['h'], w['t']) the(tr.x).should.equal(1) the(tr.y).should.equal(2) the(tr.w).should.equal(3) the(tr.h).should.equal(4) the(tr.t).should.equal(u'ABC') with and_.it_can_be_created_by_calling_create_method: tr = TextRectangle.create(w) the(tr.x).should.equal(1) the(tr.y).should.equal(2) the(tr.w).should.equal(3) the(tr.h).should.equal(4) the(tr.t).should.equal(u'ABC') del w
preprocessor = FontSpecPreprocessor(sample_pdf, PDFPage.loads(pdf_json)) with then.it_can_extract_all_font_specs_used_by_a_pdf_page: ground_truths = [ FontSpec(size=6, color="221714"), FontSpec(size=5, color="221714"), FontSpec(size=38, color="221714"), FontSpec(size=27, color="221714"), FontSpec(size=8, color="221714"), FontSpec(size=4, color="000000"), ] ground_truths.sort(key=lambda fs: fs.size) font_specs = preprocessor.font_specs font_specs.sort(key=lambda fs: fs.size) the(len(font_specs)).should.equal(len(ground_truths)) for truth, spec in zip(ground_truths, font_specs): the(truth.size).should.equal(spec.size) the(truth.color).should.equal(spec.color) with then.it_can_figure_out_the_font_spec_of_a_textual_object: ground_truths = [ { 'top': 772 - 36.85, 'left': 28 - 36.85, 'width': 4, 'height': 9, 'text': u'9', 'font': FontSpec(size=6, color="221714") }, {
with given.a_NaivePreprocessor: with when.it_normalizes_text_blocks_to_width_1000px: words = map(lambda i: dict(x=1 * i, y=2 * i, w=3 * i, h=4 * i, t=''), xrange(10)) preprocessor = NaivePreprocessor( 'test.pdf', PDFPage(page_num=1, width=200, height=200, words=map(PDFText.create_from_dict, words))) preprocessor._scale_words(1000 / 200.) with then.each_word_is_scaled_correctly: for ix, word in enumerate(preprocessor.words): the(word['x']).should.equal(5 * 1 * ix) the(word['y']).should.equal(5 * 2 * ix) the(word['w']).should.equal(5 * 3 * ix) the(word['h']).should.equal(5 * 4 * ix) del preprocessor, words with when.it_classifies_each_word_into_three_types_of_orientation: words = [{ 'x': 0, 'y': 0, 'w': 200, 'h': 100, 't': u'麗' }, {
with then.bleed_box_should_be_correct: bboxes_almost_the_same(bleed_box, bboxes['bleed']) with then.trim_box_should_be_correct: bboxes_almost_the_same(trim_box, bboxes['trim']) with then.art_box_should_be_correct: bboxes_almost_the_same(art_box, bboxes['art']) sample_pdf = os.path.join(curr_dir, 'fixture', 'test2.pdf') with when.extract_texts_from_it: with then.default_is_to_extract_from_all_pages: pages = PDFPage.extract_texts(sample_pdf) the(len(pages)).should.equal(4) for ix, page in enumerate(pages): the(page.page_num).should.equal(ix + 1) with and_.it_can_extract_specified_pages_as_well: pages = PDFPage.extract_texts(sample_pdf, ( 1, 3, )) the(len(pages)).should.equal(2) the(pages[0].page_num).should.equal(1) the(pages[1].page_num).should.equal(3) with and_.no_space_is_included: pages = PDFPage.extract_texts(sample_pdf) for page in pages:
from pyspecs import given, when, then, and_, the, this, finish with given.two_operands: a = 2 b = 3 with when.supplied_to_the_add_function: total = a + b with then.the_total_should_be_mathmatically_correct: the(total).should.equal(5) with and_.the_total_should_be_greater_than_either_operand: the(total).should.be_greater_than(a) the(total).should.be_greater_than(b) with when.supplied_to_the_subtract_function: difference = b - a with then.the_difference_should_be_mathmatically_correct: the(difference).should.equal(1) # cleanup is just based on scope del a, b, total, difference with given.an_error_prone_situation: with when.an_error_occurs: result = 1 / 0
from Thor.pdf.page import PDFPage from Thor.understanding.docspace import DocumentSpace from Thor.utils.Rectangle import Rectangle, TextRectangle with given.a_DocumentSpace: with then.it_can_determines_the_mainly_reading_direction: curr_dir = os.path.abspath(os.path.dirname(__file__)) sample_json = os.path.join(curr_dir, 'fixture', 'test1.json') with closing(open(sample_json)) as f: sample = ujson.loads(f.read().decode('utf8')) words = map(TextRectangle.create, sample['data']) ds = DocumentSpace(words) the(ds.reading_direction).should.equal(DocumentSpace.LEFT_TO_RIGHT) sample_json = os.path.join(curr_dir, 'fixture', 'test2.json') with closing(open(sample_json)) as f: sample = ujson.loads(f.read().decode('utf8')) words = map(TextRectangle.create, sample['data']) ds = DocumentSpace(words) the(ds.reading_direction).should.equal(DocumentSpace.TOP_TO_BOTTOM) with when.it_tries_to_divide_itself_into_two_subspaces: words = map(TextRectangle.create, [ {'x': 0, 'y': 0, 'w': 100, 'h': 50, 't': ''}, {'x': 10, 'y': 100, 'w': 50, 'h': 50, 't': ''}, {'x': 500, 'y': 0, 'w': 100, 'h': 25, 't': ''},
from pyspecs import and_, given, provided, the, then, this, when # local library imports from Thor.utils.Point import Point with given.two_random_points: p1 = Point(random.randint(0, 65536), random.randint(0, 65536)) p2 = Point(random.randint(0, 65536), random.randint(0, 65536)) with when.one_point_adds_the_other: p3 = p1 + p2 with then.x_coordinate_should_be_correctly_calculated: the(p3.x).should.equal(p1.x + p2.x) with then.y_coordinate_should_be_correctly_calculated: the(p3.y).should.equal(p1.y + p2.y) with when.one_point_subtracts_the_other: p3 = p1 - p2 with then.x_coordinate_should_be_correctly_calculated: the(p3.x).should.equal(p1.x - p2.x) with then.y_coordinate_should_be_correctly_calculated: the(p3.y).should.equal(p1.y - p2.y) with when.negative_a_point: p3 = -p1
) with testing.testConfig(request=request, settings=settings) as config: # Mock ScrapydJobHelper to isolate the test. with mock.patch('web_runner.views.ScrapydJobHelper') \ as ScrapydJobHelperMock: with mock.patch('web_runner.db.DbInterface') as DbMock: helper_mock = ScrapydJobHelperMock.return_value helper_mock.start_job.return_value = "XXX" # Pyramid testing doesn't configure resources. request.route_path = mock.MagicMock() with when.starting_a_spider: with then.it_should_redirect_to_pending_state: the(partial(views.spider_start_view, request)).should.raise_an(exc.HTTPFound) with given.a_configuration_with_one_command_and_spider: settings = { 'command._names': "cmd_cfg", 'command.cmd_cfg.cmd': "command line '{spider 0}'", 'command.cmd_cfg.resource': 'command_resource', 'command.cmd_cfg.content_type': 'application/x-ldjson', 'command.cmd_cfg.crawl.0.spider_config_name': 'spider_cfg', 'spider._names': 'spider_cfg', 'spider._scrapyd.base_url': 'http://localhost:6800/', 'spider._result.base_url': 'http://localhost:8000/', 'spider.spider_cfg.resource': 'spider_resource', 'spider.spider_cfg.spider_name': 'spider_name', 'spider.spider_cfg.project_name': 'spider_project_name', 'db_filename': ":memory:",
# standard library imports import random # third party related imports from pyspecs import and_, given, provided, the, then, this, when # local library imports from Thor.utils.Rectangle import Point, Rectangle with given.a_rectangle: r = Rectangle(0, 0, 4, 3) with then.can_calculate_its_area_correctly: the(r.area).should.equal(4 * 3) with then.its_vertices_can_be_correctly_enumerated: vertices = r.vertices the(vertices[0]).should.equal(Point(0, 0)) the(vertices[1]).should.equal(Point(4, 0)) the(vertices[2]).should.equal(Point(4, 3)) the(vertices[3]).should.equal(Point(0, 3)) with given.two_rectangles: with when.find_intersection_of_these_two_rectangles: with provided.they_share_an_edge: r1 = Rectangle(0, 0, 5, 5)
# local library imports from Thor.pdf.page import PDFPage from Thor.understanding.docspace import DocumentSpace from Thor.utils.Rectangle import Rectangle, TextRectangle with given.a_DocumentSpace: with then.it_can_determines_the_mainly_reading_direction: curr_dir = os.path.abspath(os.path.dirname(__file__)) sample_json = os.path.join(curr_dir, 'fixture', 'test1.json') with closing(open(sample_json)) as f: sample = ujson.loads(f.read().decode('utf8')) words = map(TextRectangle.create, sample['data']) ds = DocumentSpace(words) the(ds.reading_direction).should.equal(DocumentSpace.LEFT_TO_RIGHT) sample_json = os.path.join(curr_dir, 'fixture', 'test2.json') with closing(open(sample_json)) as f: sample = ujson.loads(f.read().decode('utf8')) words = map(TextRectangle.create, sample['data']) ds = DocumentSpace(words) the(ds.reading_direction).should.equal(DocumentSpace.TOP_TO_BOTTOM) with when.it_tries_to_divide_itself_into_two_subspaces: words = map(TextRectangle.create, [ { 'x': 0, 'y': 0, 'w': 100,
with given.a_NaivePreprocessor: with when.it_normalizes_text_blocks_to_width_1000px: words = map(lambda i: dict(x=1 * i, y=2 * i, w=3 * i, h=4 * i, t=''), xrange(10)) preprocessor = NaivePreprocessor( 'test.pdf', PDFPage(page_num=1, width=200, height=200, words=map(PDFText.create_from_dict, words)) ) preprocessor._scale_words(1000 / 200.) with then.each_word_is_scaled_correctly: for ix, word in enumerate(preprocessor.words): the(word['x']).should.equal(5 * 1 * ix) the(word['y']).should.equal(5 * 2 * ix) the(word['w']).should.equal(5 * 3 * ix) the(word['h']).should.equal(5 * 4 * ix) del preprocessor, words with when.it_classifies_each_word_into_three_types_of_orientation: words = [ {'x': 0, 'y': 0, 'w': 200, 'h': 100, 't': u'麗'}, {'x': 0, 'y': 0, 'w': 100, 'h': 100, 't': u'麗寶生活家'}, {'x': 0, 'y': 0, 'w': 200, 'h': 100, 't': u'麗寶生活家'}, {'x': 0, 'y': 0, 'w': 100, 'h': 200, 't': u'麗寶生活家'} ]
# third party related imports from pyspecs import and_, given, provided, the, then, this, when # local library imports from Thor.utils.Point import Point with given.two_random_points: p1 = Point(random.randint(0, 65536), random.randint(0, 65536)) p2 = Point(random.randint(0, 65536), random.randint(0, 65536)) with when.one_point_adds_the_other: p3 = p1 + p2 with then.x_coordinate_should_be_correctly_calculated: the(p3.x).should.equal(p1.x + p2.x) with then.y_coordinate_should_be_correctly_calculated: the(p3.y).should.equal(p1.y + p2.y) with when.one_point_subtracts_the_other: p3 = p1 - p2 with then.x_coordinate_should_be_correctly_calculated: the(p3.x).should.equal(p1.x - p2.x) with then.y_coordinate_should_be_correctly_calculated: the(p3.y).should.equal(p1.y - p2.y) with when.negative_a_point: p3 = -p1
# third party related imports from pyspecs import and_, given, the, then, when # local library imports from Thor.utils.Interval import Interval, IntervalList with given.some_intervals: with when.two_intervals_are_joint: i = Interval(0, 10) j = Interval(10, 20) interval_list = IntervalList(i, j) with then.two_intervals_can_be_merged: the(len(interval_list)).should.equal(1) with and_.the_begin_of_merged_interval_is_correct: the(interval_list[0].begin).should.equal(0) with and_.the_end_of_merged_interval_is_correct: the(interval_list[0].end).should.equal(20) with when.intervals_can_be_merged: i = Interval(0, 10) j = Interval(15, 20) k = Interval(9, 11) interval_list = IntervalList(i, j, k) the(len(interval_list)).should.equal(2) the(interval_list[0]).should.equal(Interval(0, 11))
with given.a_configuration_of_a_spider: settings = { 'spider._names': 'spider_cfg', 'spider._scrapyd.base_url': 'http://localhost:6800/', 'spider._result.base_url': 'http://localhost:8000/', 'spider.spider_cfg.resource': 'spider_resource', 'spider.spider_cfg.spider_name': 'spider_name', 'spider.spider_cfg.project_name': 'spider_project_name', } with when.searching_for_that_resource: config = find_spider_config_from_path(settings, '/spider_resource/') with then.the_configuration_should_be_found: the(config).should.equal( SpiderConfig('spider_name', 'spider_project_name')) with when.searching_for_an_unexistant_resource: config = partial(find_command_config_from_path, settings, '/unexistant/') config.__name__ = "find_command_config_from_path" with then.it_should_raise_not_found: the(config).should.raise_an(exc.HTTPNotFound) with given.a_configuration_of_a_command_with_one_spider: settings = { 'spider._names': 'test_spider', 'spider.test_spider.resource': '/spider/resource', 'spider.test_spider.spider_name': 'spider name', 'spider.test_spider.project_name': 'spider project',
sample_raw = os.path.join(curr_dir, 'fixture', 'test1.rtxt') sample_pdf = os.path.join(curr_dir, 'fixture', 'test1.pdf') with closing(open(sample_json)) as f: preprocessor = RawTextPreprocessor( sample_pdf, PDFPage.loads(f.read().decode('utf8'))) with then.it_extracts_texts_in_content_stream_order: raw_texts = preprocessor.page.extract_raw_texts(sample_pdf, 1) with closing(open(sample_raw)) as f: expected = f.read().decode('utf8').splitlines() # XXX The last raw stream is form feed, we ignore it. for i in xrange(22): the(raw_texts[i]).should.equal(expected[i]) with then.each_word_obj_should_locate_itself_in_every_possible_raw_stream: ground_truth = ( # 0 (u'時尚雜誌', ((0, 0, 4), )), (u'國際中文版', ((1, 0, 5), )), (u'2012', ((2, 0, 4), )), (u'MAY.', ((2, 5, 9), )), (u'五月號', ((3, 0, 3), )), # 5 (u'×', ( (8, 0, 1), (21, 0, 1), )),
# TODO: make tests independent of one another (the test framework does not have a before()) # TODO: refactor tests following "effective unit testing" best practices from Navigator import * from Rover import * CARDINAL_POINTS = ('N', 'S', 'E', 'W') with given.a_rover: x = 0 y = 0 starting_point = {'x': x, 'y': y} initial_direction = 'N' rover = Rover(starting_point, initial_direction) the(isinstance(rover, Rover)).should.be(True) with when.supplied_the_starting_point: with then.the_starting_point_should_have_two_axis: the(starting_point).should.contain('x') the(starting_point).should.contain('y') with then.the_initial_direction_should_belong_to_NSEW: the(initial_direction in CARDINAL_POINTS).should.be(True) with when.supplied_with_a_character_command: with and_.the_rover_should_give_current_position: the(rover.position).should.be(starting_point) with and_.the_rover_should_give_current_orientation: the(rover.orientation).should.be('N')