예제 #1
0
def perform(dirname, stype, convert=None, limit=None):
    log.info("limit = %s" % limit)
    log.info("dirname = %s" % dirname)
    status = defaultdict(list)
    pairs = walkdir(dirname, stype, ['pdf'])
    pairs = islice(pairs, limit)
    for dirpath, fname in pairs:
        _stype, bbl, ext = split_fname(fname)
        if _stype != stype:
            raise ValueError("stype mismatch")
        log.debug("stype,bbl = %s,%d .." % (_stype, bbl))
        status['seen'] += [bbl]
        infile = "%s/%s" % (dirpath, fname)
        outfile = "%s/%s-%d.txt" % (dirpath, stype, bbl)
        if os.path.exists(outfile):
            log.info("SKIP %d" % bbl)
            status['skip'] += [bbl]
            continue
        try:
            convert(infile, outfile)
            log.info("GOOD %d" % bbl)
            status['good'] += [bbl]
        except Exception as e:
            log.info("FAIL %d %s" % (bbl, e))
            log.exception(e)
            status['fail'] += [bbl]
    return status
예제 #2
0
def yield_after(page, label, offset=1):
    for i, line in enumerate(page):
        if line.startswith(label):
            if i + offset < len(page):
                yield page[i + offset]
            else:
                log.debug("weirdness: overflow for label='%s', offset=%d" %
                          (label, offset))
예제 #3
0
 def _walkdir():
     for i, bbl in enumerate(targets):
         log.debug("bbl = %s" % bbl)
         try:
             d = process_target(pulldir, stype, bbl)
         except Exception as e:
             log.info("FAIL %s = %s" % (bbl, e))
             log.exception(e)
             status['fail'] += [bbl]
             continue
         if d:
             log.info("GOOD %s" % bbl)
             status['good'] += [bbl]
             yield from expand(bbl, d)
         else:
             log.info("MISS %s" % bbl)
             status['miss'] += [bbl]
예제 #4
0
 def post(self, url, **kwargs):
     log.debug("url = %s" % url)
     r = self.s.post(url, **kwargs)
     log.info("POST r.status = %s" % r.status_code)
     log.debug("POST r.headers = %s" % r.headers)
     return r
예제 #5
0
 def get(self, url, **kwargs):
     log.debug("url = %s" % url)
     r = self.s.get(url, **kwargs)
     log.info("GET status = %s" % r.status_code)
     log.debug("GET r.headers = %s" % r.headers)
     return r