Ejemplo n.º 1
0
db = Storage(cfg)
db.load()
finder = Finder(cfg)
convertor = Convertor(cfg)


def process_file(f):
    checksum = Storage.file_checksum(f)
    if checksum in db.data:
        print("file {} already processed ({})".format(f, checksum))
        return
    db.store(checksum, {"pdf": f})
    convertor.convert(f)


for f in finder.find_all():
    process_file(f)
    db.load()

for f in glob.glob(os.path.join(cfg["html_out"], "*.html")):
    checksum = f.replace(".html", '').replace("html/", '')

    data = db.data[checksum]
    dirty = False
    # print(data)

    if "title" not in db.data[checksum]:
        print("title not cached")
        dirty = True
        title = Parser().parse(f)
        data.update({"title": title})