Ejemplo n.º 1
0
def merged_interval_should_follow_spec(i, j, result):

    i = Interval(*i)
    j = Interval(*j)

    k = i | j
    the(k.begin).should.equal(result[0])
    the(k.end).should.equal(result[1])

    k = j | i
    the(k.begin).should.equal(result[0])
    the(k.end).should.equal(result[1])
Ejemplo n.º 2
0
def merged_interval_should_follow_spec(i, j, result):

    i = Interval(*i)
    j = Interval(*j)

    k = i | j
    the(k.begin).should.equal(result[0])
    the(k.end).should.equal(result[1])

    k = j | i
    the(k.begin).should.equal(result[0])
    the(k.end).should.equal(result[1])
Ejemplo n.º 3
0
def intersect_interval_should_follow_spec(i, j, result):

    i = Interval(*i)
    j = Interval(*j)

    if result is None:
        this(i & j).should.be(None)
        this(j & i).should.be(None)
        return

    k = i & j
    the(k.begin).should.equal(result[0])
    the(k.end).should.equal(result[1])

    k = j & i
    the(k.begin).should.equal(result[0])
    the(k.end).should.equal(result[1])
Ejemplo n.º 4
0
def intersect_interval_should_follow_spec(i, j, result):

    i = Interval(*i)
    j = Interval(*j)

    if result is None:
        this(i & j).should.be(None)
        this(j & i).should.be(None)
        return

    k = i & j
    the(k.begin).should.equal(result[0])
    the(k.end).should.equal(result[1])

    k = j & i
    the(k.begin).should.equal(result[0])
    the(k.end).should.equal(result[1])
Ejemplo n.º 5
0
# standard library imports

# third party realted imports
from pyspecs import and_, as_well_as, given, it, provided, so, the, then, this, when

# local library imports
from Thor.utils.Rectangle import Rectangle, TextRectangle

with given.a_TextRectangle:

    with provided.there_is_a_word_object:
        w = {'x': 1, 'y': 2, 'w': 3, 'h': 4, 't': u'ABC'}

        with then.it_can_be_created_directly:
            tr = TextRectangle(w['x'], w['y'], w['w'], w['h'], w['t'])
            the(tr.x).should.equal(1)
            the(tr.y).should.equal(2)
            the(tr.w).should.equal(3)
            the(tr.h).should.equal(4)
            the(tr.t).should.equal(u'ABC')

        with and_.it_can_be_created_by_calling_create_method:
            tr = TextRectangle.create(w)
            the(tr.x).should.equal(1)
            the(tr.y).should.equal(2)
            the(tr.w).should.equal(3)
            the(tr.h).should.equal(4)
            the(tr.t).should.equal(u'ABC')

        del w
Ejemplo n.º 6
0
    with provided.two_intervals_are_disjoint:
        merged_interval_should_follow_spec((0, 10), (15, 20), (0, 20))

    with provided.two_intervals_are_partly_overlapping:
        merged_interval_should_follow_spec((0, 10), (5, 15), (0, 15))

    with provided.one_interval_includes_the_other:
        merged_interval_should_follow_spec((0, 10), (5, 8), (0, 10))

with given.an_interval:

    i = Interval(0, 10)

    with then.the_length_should_be_correct:
        the(i.length).should.equal(10)

with given.two_intervals:

    with provided.have_the_same_left_tip_but_distince_right_tip:
        i = Interval(0, 10)
        j = Interval(0, 5)
        this(i).should_NOT.equal(j)

    with provided.have_the_same_right_tip_but_distinct_left_tip:
        i = Interval(5, 10)
        j = Interval(0, 10)
        this(i).should_NOT.equal(j)

    with provided.the_same_left_right_tips:
        i = Interval(0, 10)
Ejemplo n.º 7
0
        with then.bleed_box_should_be_correct:
            bboxes_almost_the_same(bleed_box, bboxes['bleed'])

        with then.trim_box_should_be_correct:
            bboxes_almost_the_same(trim_box, bboxes['trim'])

        with then.art_box_should_be_correct:
            bboxes_almost_the_same(art_box, bboxes['art'])

    sample_pdf = os.path.join(curr_dir, 'fixture', 'test2.pdf')

    with when.extract_texts_from_it:

        with then.default_is_to_extract_from_all_pages:
            pages = PDFPage.extract_texts(sample_pdf)
            the(len(pages)).should.equal(4)
            for ix, page in enumerate(pages):
                the(page.page_num).should.equal(ix + 1)

        with and_.it_can_extract_specified_pages_as_well:
            pages = PDFPage.extract_texts(sample_pdf, (1, 3,))
            the(len(pages)).should.equal(2)
            the(pages[0].page_num).should.equal(1)
            the(pages[1].page_num).should.equal(3)

        with and_.no_space_is_included:
            pages = PDFPage.extract_texts(sample_pdf)
            for page in pages:
                for word in page.words:
                    the(word.t).should_NOT.be_in((' ', u'\u2003'))
    with closing(open(sample_json)) as f:
        preprocessor = RawTextPreprocessor(
            sample_pdf,
            PDFPage.loads(f.read().decode('utf8'))
        )

    with then.it_extracts_texts_in_content_stream_order:

        raw_texts = preprocessor.page.extract_raw_texts(sample_pdf, 1)
        with closing(open(sample_raw)) as f:
            expected = f.read().decode('utf8').splitlines()

        # XXX The last raw stream is form feed, we ignore it.
        for i in xrange(22):
            the(raw_texts[i]).should.equal(expected[i])

    with then.each_word_obj_should_locate_itself_in_every_possible_raw_stream:

        ground_truth = (
            # 0
            (u'時尚雜誌', ((0, 0, 4),)),
            (u'國際中文版', ((1, 0, 5),)),
            (u'2012', ((2, 0, 4),)),
            (u'MAY.', ((2, 5, 9),)),
            (u'五月號', ((3, 0, 3),)),
            # 5
            (u'×', ((8, 0, 1), (21, 0, 1),)),
            (u'iP', ((10, 0, 2),)),
            (u'ad', ((10, 2, 4),)),
            (u'version', ((11, 0, 7),)),
Ejemplo n.º 9
0
# standard library imports
import random

# third party related imports
from pyspecs import and_, given, provided, the, then, this, when

# local library imports
from Thor.utils.Rectangle import Point, Rectangle


with given.a_rectangle:

    r = Rectangle(0, 0, 4, 3)

    with then.can_calculate_its_area_correctly:
        the(r.area).should.equal(4 * 3)

    with then.its_vertices_can_be_correctly_enumerated:
        vertices = r.vertices

        the(vertices[0]).should.equal(Point(0, 0))
        the(vertices[1]).should.equal(Point(4, 0))
        the(vertices[2]).should.equal(Point(4, 3))
        the(vertices[3]).should.equal(Point(0, 3))


with given.two_rectangles:

    with when.find_intersection_of_these_two_rectangles:

        with provided.they_share_an_edge:
Ejemplo n.º 10
0
        game.roll(pins)

    return game


from pyspecs import given, when, then, the, finish


with given.a_game_with_all_gutter_balls:
    game = roll_game([0] * 20)

    with when.the_score_is_calculated:
        score = game.score()

        with then.the_score_should_be_zero:
            the(score).should.equal(0)


with given.a_game_with_all_ones:
    game = roll_game([1] * 20)

    with when.the_score_is_calculated:
        score = game.score()

        with then.the_score_should_be_twenty:
            the(score).should.equal(20)


with given.a_game_with_one_spare:
    game = roll_game([4, 6, 3] + [0] * 17)
    )

    with then.it_can_extract_all_font_specs_used_by_a_pdf_page:
        ground_truths = [
            FontSpec(size=6, color="221714"),
            FontSpec(size=5, color="221714"),
            FontSpec(size=38, color="221714"),
            FontSpec(size=27, color="221714"),
            FontSpec(size=8, color="221714"),
            FontSpec(size=4, color="000000"),
        ]
        ground_truths.sort(key=lambda fs: fs.size)

        font_specs = preprocessor.font_specs
        font_specs.sort(key=lambda fs: fs.size)
        the(len(font_specs)).should.equal(len(ground_truths))
        for truth, spec in zip(ground_truths, font_specs):
            the(truth.size).should.equal(spec.size)
            the(truth.color).should.equal(spec.color)


    with then.it_can_figure_out_the_font_spec_of_a_textual_object:
        ground_truths = [
            {
                'top': 772 - 36.85, 'left': 28 - 36.85,
                'width': 4, 'height': 9,
                'text': u'9', 'font': FontSpec(size=6, color="221714")
            },
            {
                'top': 566 - 36.85, 'left': 235 - 36.85,
                'width': 94, 'height': 8,
Ejemplo n.º 12
0
# third party realted imports
from pyspecs import and_, as_well_as, given, it, provided, so, the, then, this, when

# local library imports
from Thor.understanding.stat import WordStatistician
from Thor.utils.Rectangle import TextRectangle

with given.a_WordStatistician_and_supply_it_with_some_words:

    words = map(
        lambda i: TextRectangle(x=1. * i, y=2. * i, w=3. * i, h=4. * i, t=u''),
        xrange(10))
    ws = WordStatistician(words)

    with then.it_can_count_how_many_words_totally:
        the(ws.count).should.equal(10)

    with and_.it_can_calculate_average_width_of_textual_objects:
        the(abs(ws.avg_width - 3. * 4.5)).should.be_less_than(1.0e-3)

    with and_.it_can_calculate_average_height_of_textual_objects:
        the(abs(ws.avg_height - 4. * 4.5)).should.be_less_than(1.0e-3)

    with and_.it_can_calculate_variance_of_width_of_textual_objects:
        the(abs(ws.var_width - 3. * 3. * 8.25)).should.be_less_than(1.0e-3)

    with and_.it_can_calculate_variance_of_height_of_textual_objects:
        the(abs(ws.var_height - 4. * 4. * 8.25)).should.be_less_than(1.0e-3)

    with and_.it_can_calculate_median_of_width_of_textual_objects:
        words = [
Ejemplo n.º 13
0
# third party realted imports
from pyspecs import and_, as_well_as, given, it, provided, so, the, then, this, when

# local library imports
from Thor.utils.Rectangle import Rectangle, TextRectangle


with given.a_TextRectangle:

    with provided.there_is_a_word_object:
        w = {'x': 1, 'y': 2, 'w': 3, 'h': 4, 't': u'ABC'}

        with then.it_can_be_created_directly:
            tr = TextRectangle(w['x'], w['y'], w['w'], w['h'], w['t'])
            the(tr.x).should.equal(1)
            the(tr.y).should.equal(2)
            the(tr.w).should.equal(3)
            the(tr.h).should.equal(4)
            the(tr.t).should.equal(u'ABC')

        with and_.it_can_be_created_by_calling_create_method:
            tr = TextRectangle.create(w)
            the(tr.x).should.equal(1)
            the(tr.y).should.equal(2)
            the(tr.w).should.equal(3)
            the(tr.h).should.equal(4)
            the(tr.t).should.equal(u'ABC')

        del w
Ejemplo n.º 14
0
    preprocessor = FontSpecPreprocessor(sample_pdf, PDFPage.loads(pdf_json))

    with then.it_can_extract_all_font_specs_used_by_a_pdf_page:
        ground_truths = [
            FontSpec(size=6, color="221714"),
            FontSpec(size=5, color="221714"),
            FontSpec(size=38, color="221714"),
            FontSpec(size=27, color="221714"),
            FontSpec(size=8, color="221714"),
            FontSpec(size=4, color="000000"),
        ]
        ground_truths.sort(key=lambda fs: fs.size)

        font_specs = preprocessor.font_specs
        font_specs.sort(key=lambda fs: fs.size)
        the(len(font_specs)).should.equal(len(ground_truths))
        for truth, spec in zip(ground_truths, font_specs):
            the(truth.size).should.equal(spec.size)
            the(truth.color).should.equal(spec.color)

    with then.it_can_figure_out_the_font_spec_of_a_textual_object:
        ground_truths = [
            {
                'top': 772 - 36.85,
                'left': 28 - 36.85,
                'width': 4,
                'height': 9,
                'text': u'9',
                'font': FontSpec(size=6, color="221714")
            },
            {
Ejemplo n.º 15
0
with given.a_NaivePreprocessor:

    with when.it_normalizes_text_blocks_to_width_1000px:
        words = map(lambda i: dict(x=1 * i, y=2 * i, w=3 * i, h=4 * i, t=''),
                    xrange(10))
        preprocessor = NaivePreprocessor(
            'test.pdf',
            PDFPage(page_num=1,
                    width=200,
                    height=200,
                    words=map(PDFText.create_from_dict, words)))
        preprocessor._scale_words(1000 / 200.)

        with then.each_word_is_scaled_correctly:
            for ix, word in enumerate(preprocessor.words):
                the(word['x']).should.equal(5 * 1 * ix)
                the(word['y']).should.equal(5 * 2 * ix)
                the(word['w']).should.equal(5 * 3 * ix)
                the(word['h']).should.equal(5 * 4 * ix)

        del preprocessor, words

    with when.it_classifies_each_word_into_three_types_of_orientation:

        words = [{
            'x': 0,
            'y': 0,
            'w': 200,
            'h': 100,
            't': u'麗'
        }, {
Ejemplo n.º 16
0
        with then.bleed_box_should_be_correct:
            bboxes_almost_the_same(bleed_box, bboxes['bleed'])

        with then.trim_box_should_be_correct:
            bboxes_almost_the_same(trim_box, bboxes['trim'])

        with then.art_box_should_be_correct:
            bboxes_almost_the_same(art_box, bboxes['art'])

    sample_pdf = os.path.join(curr_dir, 'fixture', 'test2.pdf')

    with when.extract_texts_from_it:

        with then.default_is_to_extract_from_all_pages:
            pages = PDFPage.extract_texts(sample_pdf)
            the(len(pages)).should.equal(4)
            for ix, page in enumerate(pages):
                the(page.page_num).should.equal(ix + 1)

        with and_.it_can_extract_specified_pages_as_well:
            pages = PDFPage.extract_texts(sample_pdf, (
                1,
                3,
            ))
            the(len(pages)).should.equal(2)
            the(pages[0].page_num).should.equal(1)
            the(pages[1].page_num).should.equal(3)

        with and_.no_space_is_included:
            pages = PDFPage.extract_texts(sample_pdf)
            for page in pages:
Ejemplo n.º 17
0
from pyspecs import given, when, then, and_, the, this, finish


with given.two_operands:
    a = 2
    b = 3

    with when.supplied_to_the_add_function:
        total = a + b

        with then.the_total_should_be_mathmatically_correct:
            the(total).should.equal(5)

        with and_.the_total_should_be_greater_than_either_operand:
            the(total).should.be_greater_than(a)
            the(total).should.be_greater_than(b)

    with when.supplied_to_the_subtract_function:
        difference = b - a

        with then.the_difference_should_be_mathmatically_correct:
            the(difference).should.equal(1)

    # cleanup is just based on scope
    del a, b, total, difference


with given.an_error_prone_situation:
    with when.an_error_occurs:
        result = 1 / 0
Ejemplo n.º 18
0
from Thor.pdf.page import PDFPage
from Thor.understanding.docspace import DocumentSpace
from Thor.utils.Rectangle import Rectangle, TextRectangle


with given.a_DocumentSpace:

    with then.it_can_determines_the_mainly_reading_direction:
        curr_dir = os.path.abspath(os.path.dirname(__file__))

        sample_json = os.path.join(curr_dir, 'fixture', 'test1.json')
        with closing(open(sample_json)) as f:
            sample = ujson.loads(f.read().decode('utf8'))
            words = map(TextRectangle.create, sample['data'])
            ds = DocumentSpace(words)
            the(ds.reading_direction).should.equal(DocumentSpace.LEFT_TO_RIGHT)

        sample_json = os.path.join(curr_dir, 'fixture', 'test2.json')
        with closing(open(sample_json)) as f:
            sample = ujson.loads(f.read().decode('utf8'))
            words = map(TextRectangle.create, sample['data'])
            ds = DocumentSpace(words)
            the(ds.reading_direction).should.equal(DocumentSpace.TOP_TO_BOTTOM)


    with when.it_tries_to_divide_itself_into_two_subspaces:

        words = map(TextRectangle.create, [
            {'x': 0, 'y': 0, 'w': 100, 'h': 50, 't': ''},
            {'x': 10, 'y': 100, 'w': 50, 'h': 50, 't': ''},
            {'x': 500, 'y': 0, 'w': 100, 'h': 25, 't': ''},
Ejemplo n.º 19
0
from pyspecs import and_, given, provided, the, then, this, when

# local library imports
from Thor.utils.Point import Point


with given.two_random_points:

    p1 = Point(random.randint(0, 65536), random.randint(0, 65536))
    p2 = Point(random.randint(0, 65536), random.randint(0, 65536))

    with when.one_point_adds_the_other:
        p3 = p1 + p2

        with then.x_coordinate_should_be_correctly_calculated:
            the(p3.x).should.equal(p1.x + p2.x)

        with then.y_coordinate_should_be_correctly_calculated:
            the(p3.y).should.equal(p1.y + p2.y)

    with when.one_point_subtracts_the_other:
        p3 = p1 - p2

        with then.x_coordinate_should_be_correctly_calculated:
            the(p3.x).should.equal(p1.x - p2.x)

        with then.y_coordinate_should_be_correctly_calculated:
            the(p3.y).should.equal(p1.y - p2.y)

    with when.negative_a_point:
        p3 = -p1
Ejemplo n.º 20
0
    )

    with testing.testConfig(request=request, settings=settings) as config:
        # Mock ScrapydJobHelper to isolate the test.
        with mock.patch('web_runner.views.ScrapydJobHelper') \
                as ScrapydJobHelperMock:
            with mock.patch('web_runner.db.DbInterface') as DbMock:
                helper_mock = ScrapydJobHelperMock.return_value
                helper_mock.start_job.return_value = "XXX"

                # Pyramid testing doesn't configure resources.
                request.route_path = mock.MagicMock()

                with when.starting_a_spider:
                    with then.it_should_redirect_to_pending_state:
                        the(partial(views.spider_start_view,
                                    request)).should.raise_an(exc.HTTPFound)

with given.a_configuration_with_one_command_and_spider:
    settings = {
        'command._names': "cmd_cfg",
        'command.cmd_cfg.cmd': "command line '{spider 0}'",
        'command.cmd_cfg.resource': 'command_resource',
        'command.cmd_cfg.content_type': 'application/x-ldjson',
        'command.cmd_cfg.crawl.0.spider_config_name': 'spider_cfg',
        'spider._names': 'spider_cfg',
        'spider._scrapyd.base_url': 'http://localhost:6800/',
        'spider._result.base_url': 'http://localhost:8000/',
        'spider.spider_cfg.resource': 'spider_resource',
        'spider.spider_cfg.spider_name': 'spider_name',
        'spider.spider_cfg.project_name': 'spider_project_name',
        'db_filename': ":memory:",
Ejemplo n.º 21
0
# standard library imports
import random

# third party related imports
from pyspecs import and_, given, provided, the, then, this, when

# local library imports
from Thor.utils.Rectangle import Point, Rectangle

with given.a_rectangle:

    r = Rectangle(0, 0, 4, 3)

    with then.can_calculate_its_area_correctly:
        the(r.area).should.equal(4 * 3)

    with then.its_vertices_can_be_correctly_enumerated:
        vertices = r.vertices

        the(vertices[0]).should.equal(Point(0, 0))
        the(vertices[1]).should.equal(Point(4, 0))
        the(vertices[2]).should.equal(Point(4, 3))
        the(vertices[3]).should.equal(Point(0, 3))

with given.two_rectangles:

    with when.find_intersection_of_these_two_rectangles:

        with provided.they_share_an_edge:
            r1 = Rectangle(0, 0, 5, 5)
Ejemplo n.º 22
0
# local library imports
from Thor.pdf.page import PDFPage
from Thor.understanding.docspace import DocumentSpace
from Thor.utils.Rectangle import Rectangle, TextRectangle

with given.a_DocumentSpace:

    with then.it_can_determines_the_mainly_reading_direction:
        curr_dir = os.path.abspath(os.path.dirname(__file__))

        sample_json = os.path.join(curr_dir, 'fixture', 'test1.json')
        with closing(open(sample_json)) as f:
            sample = ujson.loads(f.read().decode('utf8'))
            words = map(TextRectangle.create, sample['data'])
            ds = DocumentSpace(words)
            the(ds.reading_direction).should.equal(DocumentSpace.LEFT_TO_RIGHT)

        sample_json = os.path.join(curr_dir, 'fixture', 'test2.json')
        with closing(open(sample_json)) as f:
            sample = ujson.loads(f.read().decode('utf8'))
            words = map(TextRectangle.create, sample['data'])
            ds = DocumentSpace(words)
            the(ds.reading_direction).should.equal(DocumentSpace.TOP_TO_BOTTOM)

    with when.it_tries_to_divide_itself_into_two_subspaces:

        words = map(TextRectangle.create, [
            {
                'x': 0,
                'y': 0,
                'w': 100,
Ejemplo n.º 23
0
with given.a_NaivePreprocessor:

    with when.it_normalizes_text_blocks_to_width_1000px:
        words = map(lambda i: dict(x=1 * i, y=2 * i, w=3 * i, h=4 * i, t=''),
                    xrange(10))
        preprocessor = NaivePreprocessor(
            'test.pdf',
            PDFPage(page_num=1, width=200, height=200,
                    words=map(PDFText.create_from_dict, words))
        )
        preprocessor._scale_words(1000 / 200.)

        with then.each_word_is_scaled_correctly:
            for ix, word in enumerate(preprocessor.words):
                the(word['x']).should.equal(5 * 1 * ix)
                the(word['y']).should.equal(5 * 2 * ix)
                the(word['w']).should.equal(5 * 3 * ix)
                the(word['h']).should.equal(5 * 4 * ix)

        del preprocessor, words


    with when.it_classifies_each_word_into_three_types_of_orientation:

        words = [
            {'x': 0, 'y': 0, 'w': 200, 'h': 100, 't': u'麗'},
            {'x': 0, 'y': 0, 'w': 100, 'h': 100, 't': u'麗寶生活家'},
            {'x': 0, 'y': 0, 'w': 200, 'h': 100, 't': u'麗寶生活家'},
            {'x': 0, 'y': 0, 'w': 100, 'h': 200, 't': u'麗寶生活家'}
        ]
Ejemplo n.º 24
0
    with provided.two_intervals_are_disjoint:
        merged_interval_should_follow_spec((0, 10), (15, 20), (0, 20))

    with provided.two_intervals_are_partly_overlapping:
        merged_interval_should_follow_spec((0, 10), (5, 15), (0, 15))

    with provided.one_interval_includes_the_other:
        merged_interval_should_follow_spec((0, 10), (5, 8), (0, 10))

with given.an_interval:

    i = Interval(0, 10)

    with then.the_length_should_be_correct:
        the(i.length).should.equal(10)

with given.two_intervals:

    with provided.have_the_same_left_tip_but_distince_right_tip:
        i = Interval(0, 10)
        j = Interval(0, 5)
        this(i).should_NOT.equal(j)

    with provided.have_the_same_right_tip_but_distinct_left_tip:
        i = Interval(5, 10)
        j = Interval(0, 10)
        this(i).should_NOT.equal(j)

    with provided.the_same_left_right_tips:
        i = Interval(0, 10)
Ejemplo n.º 25
0
# third party related imports
from pyspecs import and_, given, provided, the, then, this, when

# local library imports
from Thor.utils.Point import Point

with given.two_random_points:

    p1 = Point(random.randint(0, 65536), random.randint(0, 65536))
    p2 = Point(random.randint(0, 65536), random.randint(0, 65536))

    with when.one_point_adds_the_other:
        p3 = p1 + p2

        with then.x_coordinate_should_be_correctly_calculated:
            the(p3.x).should.equal(p1.x + p2.x)

        with then.y_coordinate_should_be_correctly_calculated:
            the(p3.y).should.equal(p1.y + p2.y)

    with when.one_point_subtracts_the_other:
        p3 = p1 - p2

        with then.x_coordinate_should_be_correctly_calculated:
            the(p3.x).should.equal(p1.x - p2.x)

        with then.y_coordinate_should_be_correctly_calculated:
            the(p3.y).should.equal(p1.y - p2.y)

    with when.negative_a_point:
        p3 = -p1
Ejemplo n.º 26
0
# third party related imports
from pyspecs import and_, given, the, then, when

# local library imports
from Thor.utils.Interval import Interval, IntervalList


with given.some_intervals:

    with when.two_intervals_are_joint:
        i = Interval(0, 10)
        j = Interval(10, 20)
        interval_list = IntervalList(i, j)

        with then.two_intervals_can_be_merged:
            the(len(interval_list)).should.equal(1)

        with and_.the_begin_of_merged_interval_is_correct:
            the(interval_list[0].begin).should.equal(0)

        with and_.the_end_of_merged_interval_is_correct:
            the(interval_list[0].end).should.equal(20)

    with when.intervals_can_be_merged:
        i = Interval(0, 10)
        j = Interval(15, 20)
        k = Interval(9, 11)
        interval_list = IntervalList(i, j, k)

        the(len(interval_list)).should.equal(2)
        the(interval_list[0]).should.equal(Interval(0, 11))
Ejemplo n.º 27
0
with given.a_configuration_of_a_spider:
    settings = {
        'spider._names': 'spider_cfg',
        'spider._scrapyd.base_url': 'http://localhost:6800/',
        'spider._result.base_url': 'http://localhost:8000/',
        'spider.spider_cfg.resource': 'spider_resource',
        'spider.spider_cfg.spider_name': 'spider_name',
        'spider.spider_cfg.project_name': 'spider_project_name',
    }

    with when.searching_for_that_resource:
        config = find_spider_config_from_path(settings, '/spider_resource/')

        with then.the_configuration_should_be_found:
            the(config).should.equal(
                SpiderConfig('spider_name', 'spider_project_name'))

    with when.searching_for_an_unexistant_resource:
        config = partial(find_command_config_from_path, settings,
                         '/unexistant/')
        config.__name__ = "find_command_config_from_path"

        with then.it_should_raise_not_found:
            the(config).should.raise_an(exc.HTTPNotFound)

with given.a_configuration_of_a_command_with_one_spider:
    settings = {
        'spider._names': 'test_spider',
        'spider.test_spider.resource': '/spider/resource',
        'spider.test_spider.spider_name': 'spider name',
        'spider.test_spider.project_name': 'spider project',
Ejemplo n.º 28
0
    sample_raw = os.path.join(curr_dir, 'fixture', 'test1.rtxt')
    sample_pdf = os.path.join(curr_dir, 'fixture', 'test1.pdf')

    with closing(open(sample_json)) as f:
        preprocessor = RawTextPreprocessor(
            sample_pdf, PDFPage.loads(f.read().decode('utf8')))

    with then.it_extracts_texts_in_content_stream_order:

        raw_texts = preprocessor.page.extract_raw_texts(sample_pdf, 1)
        with closing(open(sample_raw)) as f:
            expected = f.read().decode('utf8').splitlines()

        # XXX The last raw stream is form feed, we ignore it.
        for i in xrange(22):
            the(raw_texts[i]).should.equal(expected[i])

    with then.each_word_obj_should_locate_itself_in_every_possible_raw_stream:

        ground_truth = (
            # 0
            (u'時尚雜誌', ((0, 0, 4), )),
            (u'國際中文版', ((1, 0, 5), )),
            (u'2012', ((2, 0, 4), )),
            (u'MAY.', ((2, 5, 9), )),
            (u'五月號', ((3, 0, 3), )),
            # 5
            (u'×', (
                (8, 0, 1),
                (21, 0, 1),
            )),
Ejemplo n.º 29
0
# TODO: make tests independent of one another (the test framework does not have a before())
# TODO: refactor tests following "effective unit testing" best practices

from Navigator import *
from Rover import *

CARDINAL_POINTS = ('N', 'S', 'E', 'W')

with given.a_rover:
    x = 0
    y = 0
    starting_point = {'x': x, 'y': y}
    initial_direction = 'N'
    rover = Rover(starting_point, initial_direction)

    the(isinstance(rover, Rover)).should.be(True)

    with when.supplied_the_starting_point:
        with then.the_starting_point_should_have_two_axis:
            the(starting_point).should.contain('x')
            the(starting_point).should.contain('y')

        with then.the_initial_direction_should_belong_to_NSEW:
            the(initial_direction in CARDINAL_POINTS).should.be(True)

    with when.supplied_with_a_character_command:
        with and_.the_rover_should_give_current_position:
            the(rover.position).should.be(starting_point)

        with and_.the_rover_should_give_current_orientation:
            the(rover.orientation).should.be('N')