Ejemplo n.º 1
0
    def add_content(self, toc):
        new_toc = deepcopy(toc)

        pagenums = []
        for t in range(len(new_toc)):
            pagenums.append(
                set([new_toc[t]['range']['from'], new_toc[t]['range']['to']]))

        outputs = Miner.get_pages(pdf_path=self.filename, pagenums=pagenums)

        for t in range(len(new_toc)):
            clean_output = re.sub('[^A-Za-z0-9.,?!]+', ' ', outputs[t])
            new_toc[t]['content'] = clean_output

        return new_toc