def test_dailydigest_I67(self):
        from locator.dailydigest import DailyDigestInputParser
        data = b'\x07I67H'
        current_state_stack, _ = process_lines(
            data, (None, b'G2'),
            locator_table=DailyDigestInputParser.LOCATOR_TABLE,
            font_table=DailyDigestInputParser.FONT_TABLE)
        current_state = current_state_stack[-1]
        self.assertEqual(
            current_state[0],
            {'end': '</span>', 'grid': b'', 'start':
             "<span class='bell-I67H dailydigest-extension'>"})

        import io
        out_io = io.StringIO()

        current_state = (None, b'G2')  # start as Grid 2
        parser = InputParser()
        for page, page_match, line in parser.makelines(data, output=out_io):
            current_state, _ = process_lines(
                line, current_state, outputf=out_io,
                locator_table=DailyDigestInputParser.LOCATOR_TABLE,
                font_table=DailyDigestInputParser.FONT_TABLE)
            contents = out_io.getvalue()
            self.assertEqual(
                contents,
                "<span class='bell-I67H dailydigest-extension'>")
    def test_dailydigest_I67(self):
        from locator.dailydigest import DailyDigestInputParser
        data = b'\x07I67H'
        current_state_stack, _ = process_lines(
            data, (None, b'G2'),
            locator_table=DailyDigestInputParser.LOCATOR_TABLE,
            font_table=DailyDigestInputParser.FONT_TABLE)
        current_state = current_state_stack[-1]
        self.assertEqual(
            current_state[0], {
                'end': '</span>',
                'grid': b'',
                'start': "<span class='bell-I67H dailydigest-extension'>"
            })

        import io
        out_io = io.StringIO()

        current_state = (None, b'G2')  # start as Grid 2
        parser = InputParser()
        for page, page_match, line in parser.makelines(data, output=out_io):
            current_state, _ = process_lines(
                line,
                current_state,
                outputf=out_io,
                locator_table=DailyDigestInputParser.LOCATOR_TABLE,
                font_table=DailyDigestInputParser.FONT_TABLE)
            contents = out_io.getvalue()
            self.assertEqual(contents,
                             "<span class='bell-I67H dailydigest-extension'>")
Example #3
0
    def parse_io(
            self,
            inputfile=None,
            current_state=(
                None,
            b'G2'),
        outputfile=None,
        locator_table=None,
        font_table=None,
        postfix=None):
        ''' output by default is a StringIO object, you will probably want to
        output = parse_io(...)
        output.seek(0)
        to rewind to the begining.  Alternatively you can pass in a file handle.
        '''
        if not locator_table:
            locator_table = self.LOCATOR_TABLE
        if not font_table :
            font_table = self.FONT_TABLE
        out = outputfile
        if outputfile is None:
            out = io.StringIO()

        input = inputfile.read()
        input = input.strip()
        current_page = None
        output("<html>", outf=out)
        for page, page_match, line in self.makelines(input, output=out):
            current_state_stack , output_line = process_lines(
                line,
                current_state,
                outputf=out,
                locator_table=locator_table,
                font_table=font_table,
                postfix=postfix)
            logger.debug("Current_state:%s", current_state)
            logger.debug("Page:%s Current_page:%s", page, current_page)
            current_state = current_state_stack[-1]

            if page:
                #output = re.sub(b'\x07',b'[BELL-]', line)
                if not current_page:
                    current_page = page
                if page != current_page:
                    # changed Page!
                    output(
                        b"<center>[Page:" +
                        current_page +
                        b"] </center>",
                        outf=out)
                    current_page = page
        if current_page:
            output(b"<center>[Page:" + current_page + b"] </center>", outf=out)
        output("</html>", outf=out)
        return out
    def test_dailydigest_I01_actions(self):
        from locator.dailydigest import DailyDigestInputParser

        data = b'\x07I01Monday, April 18, 2016\xadD382'
        current_state_stack, _ = process_lines(
            data, (None, b'G2'),
            locator_table=DailyDigestInputParser.LOCATOR_TABLE,
            font_table=DailyDigestInputParser.FONT_TABLE)
        current_state = current_state_stack[-1]
        self.assertEqual(current_state[0].get('end'), '</em></h3>')
        self.assertEqual(current_state[0].get('grid'), b'G2')
        self.assertEqual(current_state[0].get('start'), '<h3><em>')
 def test_I01(self):
     data = b'''I01AAGENES, ALEXA'''
     current_state_stack, _ = process_lines(
         data, (None, b'G2'),
         locator_table=CongressionalRecordIndexInputParser.LOCATOR_TABLE,
         font_table=CongressionalRecordIndexInputParser.FONT_TABLE)
     current_state = current_state_stack[-1]
     self.assertEqual(
         current_state[0].get('end'),  '')
     self.assertEqual(
         current_state[0].get('grid'), b'G2')
     self.assertEqual(
         current_state[0].get('start'), '')
 def test_I03(self):
     data = b''' I03Bills and resolutions cosponsored
     '''
     current_state_stack, _ = process_lines(
         data, (None, b'G2'),
         locator_table=CongressionalRecordIndexInputParser.LOCATOR_TABLE,
         font_table=CongressionalRecordIndexInputParser.FONT_TABLE)
     current_state = current_state_stack[-1]
     self.assertEqual(
         current_state[0].get('end'),  '</h2>')
     self.assertEqual(
         current_state[0].get('grid'), b'G2')
     self.assertEqual(
         current_state[0].get('start'), '<h2>')
Example #7
0
    def parse_io(self,
                 inputfile=None,
                 current_state=(None, b'G2'),
                 outputfile=None,
                 locator_table=None,
                 font_table=None,
                 postfix=None):
        ''' output by default is a StringIO object, you will probably want to
        output = parse_io(...)
        output.seek(0)
        to rewind to the begining.  Alternatively you can pass in a file handle.
        '''
        if not locator_table:
            locator_table = self.LOCATOR_TABLE
        if not font_table:
            font_table = self.FONT_TABLE
        out = outputfile
        if outputfile is None:
            out = io.StringIO()

        input = inputfile.read()
        input = input.strip()
        current_page = None
        output("<html>", outf=out)
        for page, page_match, line in self.makelines(input, output=out):
            current_state_stack, output_line = process_lines(
                line,
                current_state,
                outputf=out,
                locator_table=locator_table,
                font_table=font_table,
                postfix=postfix)
            logger.debug("Current_state:%s", current_state)
            logger.debug("Page:%s Current_page:%s", page, current_page)
            current_state = current_state_stack[-1]

            if page:
                #output = re.sub(b'\x07',b'[BELL-]', line)
                if not current_page:
                    current_page = page
                if page != current_page:
                    # changed Page!
                    output(b"<center>[Page:" + current_page + b"] </center>",
                           outf=out)
                    current_page = page
        if current_page:
            output(b"<center>[Page:" + current_page + b"] </center>", outf=out)
        output("</html>", outf=out)
        return out
 def test_I05(self):
     from locator.congressionalrecordindex import CongressionalRecordIndexInputParser
     data = b'''I05Committee to escort Japanese Prime Minister, Shinzo Abe, into the House Chamber, H2503 [29AP]
     '''
     current_state_stack, _ = process_lines(
         data, (None, b'G2'),
         locator_table=CongressionalRecordIndexInputParser.LOCATOR_TABLE,
         font_table=CongressionalRecordIndexInputParser.FONT_TABLE)
     current_state = current_state_stack[-1]
     self.assertEqual(
         current_state[0].get('end'),  '</p>')
     self.assertEqual(
         current_state[0].get('grid'), b'G2')
     self.assertEqual(
         current_state[0].get('start'), '<p>')
    def test_dailydigest_I01_actions(self):
        from locator.dailydigest import DailyDigestInputParser

        data = b'\x07I01Monday, April 18, 2016\xadD382'
        current_state_stack, _ = process_lines(
            data, (None, b'G2'),
            locator_table=DailyDigestInputParser.LOCATOR_TABLE,
            font_table=DailyDigestInputParser.FONT_TABLE)
        current_state = current_state_stack[-1]
        self.assertEqual(
            current_state[0].get('end'),  '</em></h3>')
        self.assertEqual(
            current_state[0].get('grid'), b'G2')
        self.assertEqual(
            current_state[0].get('start'), '<h3><em>')
Example #10
0
    def parse_io(self,
                 inputfile=None,
                 current_state=(None, b'G2'),
                 outputfile=None,
                 locator_table=None,
                 font_table=None,
                 postfix=None,
                 year=None):
        ''' output by default is a StringIO object, you will probably want to
        output = parse_io(...)
        output.seek(0)
        to rewind to the begining.  Alternatively you can pass in a file handle.
        '''

        if self.year and not year:
            year = self.year
        orig_current_state = current_state
        outputs = {}
        inputdata = inputfile.read()
        name = ""
        for stanza in self.make_stanzas(inputdata):
            logger.debug("CRI stanza:%s", stanza)
            out = io.StringIO()
            # For every sub document in the dat file reset the state to the
            # start
            current_state = orig_current_state
            current_state_stack = []
            cnt = 0
            for page, page_match, line in self.makelines(stanza, output=out):
                ret_current_state_stack, output_line = process_lines(
                    line,
                    current_state,
                    outputf=out,
                    locator_table=locator_table,
                    font_table=font_table,
                    postfix=postfix)
                current_state = ret_current_state_stack[-1]
                logger.debug("Current state:%s", current_state)
                logger.debug("Previous state :%s", ret_current_state_stack[0])
                logger.debug("[%d] line:[%s] states[%s]", cnt, line,
                             ret_current_state_stack)
                current_state_stack.append((ret_current_state_stack, line))
                cnt = cnt + 1

            # check all non first items in stack if they exist and have a bellcode
            for state, line in current_state_stack:
                # first item in every state is the previous state, so skip it
                if state[1]:
                    for action, grid in state[1:]:
                        if action and action.get('bellcode') == b'I01':
                            name, cleaned_line = self.process_stanza_title(
                                line, year)
                            line_name = cleaned_line

            if current_state[0] and current_state[0].get('end'):
                logger.debug("\tcurrent_state.end:%s",
                             current_state[0].get('end'))
                output(current_state[0].get('end'), outf=out)
            # rewind to the begining now that we are finshed with output.
            out.seek(0)
            # if there is no name then we don't bother with the section
            if name:
                outputs[name] = out

            yield ((name, line_name), out)
    def parse_io(
            self,
            inputfile=None,
            current_state=(
                None,
                b'G2'),
            outputfile=None,
            locator_table=None,
            font_table=None,
            postfix=None, year=None):
        ''' output by default is a StringIO object, you will probably want to
        output = parse_io(...)
        output.seek(0)
        to rewind to the begining.  Alternatively you can pass in a file handle.
        '''

        if self.year and not year:
            year = self.year
        orig_current_state = current_state
        outputs = {}
        inputdata = inputfile.read()
        name = ""
        for stanza in self.make_stanzas(inputdata):
            logger.debug("CRI stanza:%s", stanza)
            out = io.StringIO()
            # For every sub document in the dat file reset the state to the
            # start
            current_state = orig_current_state
            current_state_stack = []
            cnt = 0
            for page, page_match, line in self.makelines(stanza, output=out):
                ret_current_state_stack, output_line = process_lines(
                    line,
                    current_state,
                    outputf=out,
                    locator_table=locator_table,
                    font_table=font_table,
                    postfix=postfix)
                current_state = ret_current_state_stack[-1]
                logger.debug("Current state:%s", current_state)
                logger.debug("Previous state :%s", ret_current_state_stack[0])
                logger.debug("[%d] line:[%s] states[%s]", cnt, line, ret_current_state_stack)
                current_state_stack.append( ( ret_current_state_stack, line))
                cnt=cnt+1

            # check all non first items in stack if they exist and have a bellcode
            for state, line  in current_state_stack :
                # first item in every state is the previous state, so skip it
                if state[1]:
                    for action, grid in state[1:]:
                        if action and action.get('bellcode') == b'I01':
                            name ,cleaned_line= self.process_stanza_title(line,year)
                            line_name = cleaned_line

            if current_state[0] and current_state[0].get('end'):
                logger.debug(
                    "\tcurrent_state.end:%s",
                    current_state[0].get('end'))
                output(current_state[0].get('end'), outf=out)
            # rewind to the begining now that we are finshed with output.
            out.seek(0)
            # if there is no name then we don't bother with the section
            if name:
                outputs[name] = out

            yield ((name, line_name) , out )