Example #1
0
 def download(self):
     # do something with static/sitenews.txt --> split into
     # <datadir>/sitenews/<timestamp>.txt
     ofp = temppath = path = basefile = None
     with codecs.open(self.resourceloader.filename(self.config.newsfile),
                      encoding="utf-8") as fp:
         for line in fp:
             m = self.re_news_subjectline(line)
             if m:
                 if ofp:
                     ofp.close()
                     if util.replace_if_different(temppath, path):
                         self.log.info("%s: creating news item" % basefile)
                 d = datetime.strptime(m.group(1), "%Y-%m-%d %H:%M:%S")
                 basefile = str(int(d.timestamp()))
                 path = self.store.downloaded_path(basefile)
                 fileno, temppath = tempfile.mkstemp(text=True)
                 util.ensure_dir(path)
                 # ofp = codecs.open(path, "w", encoding="utf-8")
                 ofp = os.fdopen(fileno, "w")
             ofp.write(line)
         ofp.close()
         if util.replace_if_different(temppath, path):
             self.log.info("%s: download OK (creating news item)" %
                           basefile)
Example #2
0
    def test_replace_if_different(self):
        # test 1: dst does not exist
        util.writefile(self.fname, "Hello")
        self.assertTrue(util.replace_if_different(self.fname, self.fname2))
        self.assertFalse(os.path.exists(self.fname))
        self.assertTrue(os.path.exists(self.fname2))

        # test 2: dst exists, but is different (gets overwritten)
        util.writefile(self.fname, "Hello (different)")
        self.assertTrue(util.replace_if_different(self.fname, self.fname2))
        self.assertFalse(os.path.exists(self.fname))
        self.assertEqual("Hello (different)",
                         util.readfile(self.fname2))

        # test 3: src and dst is identical (src gets removed)
        util.writefile(self.fname, "Hello (different)")
        self.assertFalse(util.replace_if_different(self.fname, self.fname2))
        self.assertFalse(os.path.exists(self.fname))

        # test 4: dst exist, is different, gets archived
        newfile = self.dname+"/new.txt"
        archivefile = self.dname+"/archive.txt"
        util.writefile(newfile, "Hello (archiving)")
        self.assertTrue(util.replace_if_different(newfile, self.fname2, archivefile))
        self.assertFalse(os.path.exists(newfile))
        self.assertEqual("Hello (archiving)",
                         util.readfile(self.fname2))
        self.assertEqual("Hello (different)",
                         util.readfile(archivefile))
Example #3
0
    def download(self, basefile=None):
        soup = BeautifulSoup(requests.get(self.start_url).text)
        main = soup.find(id="mainarea")
        docs = []
        for numberlabel in main.findAll(text="NUMMER"):
            numberdiv = numberlabel.findParent("div").parent

            typediv = numberdiv.findNextSibling()
            if typediv.find("div", "FFFSListAreaLeft").get_text(strip=True) != "TYP":
                self.log.error("Expected TYP in div, found %s" % typediv.get_text(strip=True))
                continue

            titlediv = typediv.findNextSibling()
            if titlediv.find("div", "FFFSListAreaLeft").get_text(strip=True) != "RUBRIK":
                self.log.error("Expected RUBRIK in div, found %s" % titlediv.get_text(strip=True))
                continue

            number = numberdiv.find("div", "FFFSListAreaRight").get_text(strip=True)
            tmpfile = mktemp()
            snippetfile = self.store.downloaded_path(number).replace(".pdf", ".snippet.html")
            fp = codecs.open(tmpfile, "w", encoding="utf-8")
            fp.write(str(numberdiv))
            fp.write(str(typediv))
            fp.write(str(titlediv))
            fp.close()
            util.replace_if_different(tmpfile, snippetfile)

            self.download_single(number, usecache)
Example #4
0
 def crop(self, top=0, left=0, bottom=None, right=None):
     """Removes any :py:class:`ferenda.pdfreader.Textbox` objects that does not fit within the bounding box specified by the parameters."""
     # Crop any text box that sticks out
     # Actually if top and left != 0, we need to adjust them
     newboxes = []
     for box in self.boundingbox(top, left, bottom, right):
         box.top = box.top - top
         box.left = box.left - left
         box.right = box.right - right
         box.bottom = box.bottom - bottom
         newboxes.append(box)
     self[:] = []
     self.extend(newboxes)
     self.width = right - left
     self.height = bottom - top
     # Then crop the background images... somehow
     if os.path.exists(self.background):
         cmdline = "convert %s -crop %dx%d+%d+%d +repage %s" % (self.background,
                                                                self.width, self.height, left, top,
                                                                self.background + ".new")
         # print "Running %s" % cmdline
         (returncode, stdout, stderr) = util.runcmd(cmdline,
                                                    require_success=True)
         util.replace_if_different(
             "%s.new" % self.background, self.background)
Example #5
0
 def download(self):
     # do something with static/sitenews.txt --> split into
     # <datadir>/sitenews/<timestamp>.txt
     ofp = temppath = path = basefile = None
     with codecs.open(self.resourceloader.filename(self.config.newsfile),
                      encoding="utf-8") as fp:
         for line in fp:
             m = self.re_news_subjectline(line)
             if m:
                 if ofp:
                     ofp.close()
                     if util.replace_if_different(temppath, path):
                         self.log.info("%s: creating news item" % basefile)
                 d = datetime.strptime(m.group(1), "%Y-%m-%d %H:%M:%S")
                 basefile = str(int(d.timestamp()))
                 path = self.store.downloaded_path(basefile)
                 fileno, temppath = tempfile.mkstemp(text=True)
                 util.ensure_dir(path)
                 # ofp = codecs.open(path, "w", encoding="utf-8")
                 ofp = os.fdopen(fileno, "w")
             ofp.write(line)
         ofp.close()
         if util.replace_if_different(temppath, path):
             self.log.info("%s: download OK (creating news item)" % basefile)