def perform(dirname, stype, convert=None, limit=None): log.info("limit = %s" % limit) log.info("dirname = %s" % dirname) status = defaultdict(list) pairs = walkdir(dirname, stype, ['pdf']) pairs = islice(pairs, limit) for dirpath, fname in pairs: _stype, bbl, ext = split_fname(fname) if _stype != stype: raise ValueError("stype mismatch") log.debug("stype,bbl = %s,%d .." % (_stype, bbl)) status['seen'] += [bbl] infile = "%s/%s" % (dirpath, fname) outfile = "%s/%s-%d.txt" % (dirpath, stype, bbl) if os.path.exists(outfile): log.info("SKIP %d" % bbl) status['skip'] += [bbl] continue try: convert(infile, outfile) log.info("GOOD %d" % bbl) status['good'] += [bbl] except Exception as e: log.info("FAIL %d %s" % (bbl, e)) log.exception(e) status['fail'] += [bbl] return status
def yield_after(page, label, offset=1): for i, line in enumerate(page): if line.startswith(label): if i + offset < len(page): yield page[i + offset] else: log.debug("weirdness: overflow for label='%s', offset=%d" % (label, offset))
def _walkdir(): for i, bbl in enumerate(targets): log.debug("bbl = %s" % bbl) try: d = process_target(pulldir, stype, bbl) except Exception as e: log.info("FAIL %s = %s" % (bbl, e)) log.exception(e) status['fail'] += [bbl] continue if d: log.info("GOOD %s" % bbl) status['good'] += [bbl] yield from expand(bbl, d) else: log.info("MISS %s" % bbl) status['miss'] += [bbl]
def post(self, url, **kwargs): log.debug("url = %s" % url) r = self.s.post(url, **kwargs) log.info("POST r.status = %s" % r.status_code) log.debug("POST r.headers = %s" % r.headers) return r
def get(self, url, **kwargs): log.debug("url = %s" % url) r = self.s.get(url, **kwargs) log.info("GET status = %s" % r.status_code) log.debug("GET r.headers = %s" % r.headers) return r