Esempio n. 1
0
def new_book(filename, category=None):
    import zipfile

    if zipfile.is_zipfile(filename):  # filename.endswith('.zip'):
        fn = zip_get_filename(filename)
    else:
        fn = filename

    # сортируем по длине расширений: длинные в начале
    # т.е. вначале .html.gz  а потом .gz
    file_types_l = list(file_types)
    file_types_l.sort(lambda ft1, ft2: cmp(max(map(len, file_types[ft2][0])), max(map(len, file_types[ft1][0]))))

    # search parser
    for type in file_types_l:
        for s in file_types[type][0]:
            if fn.endswith(s):
                file_parser = file_types[type][1]()
                external_filter = file_types[type][2]
                encoding = file_types[type][3]
                book = file_parser.new_book(
                    filename, category=category, external_filter=external_filter, encoding=encoding
                )
                return book, file_parser

    # suffix not found -> plain text
    file_parser = plain_text_parser()
    book = file_parser.new_book(filename, category=category)
    return book, file_parser
Esempio n. 2
0
def get_file(filename):
    if zipfile.is_zipfile(filename): #filename.endswith('.zip'):
        fn = tempfile.mktemp()
        zip_fn = miscutils.zip_get_filename(filename)
        open(fn, 'w').write(zipfile.ZipFile(filename).read(zip_fn))
    else:
        fn = filename

    return fn
Esempio n. 3
0
def get_file(filename):
    if zipfile.is_zipfile(filename): #filename.endswith('.zip'):
        fn = tempfile.mktemp()
        zip_fn = miscutils.zip_get_filename(filename)
        open(fn, 'w').write(zipfile.ZipFile(filename).read(zip_fn))
    elif filename.endswith('.gz'): #FIXME must be header test
        fn = tempfile.mktemp()
        open(fn, 'w').write(gzip.GzipFile(filename).read())
    elif filename.endswith('.bz2'): #FIXME must be header test
        fn = tempfile.mktemp()
        open(fn, 'w').write(bz2.BZ2File(filename).read())
    else:
        fn = filename

    return fn
Esempio n. 4
0
def new_book(filename, category=None):
    import zipfile

    if zipfile.is_zipfile(filename): #filename.endswith('.zip'):
        fn = zip_get_filename(filename)
    else:
        fn = filename
    type=detect_type(fn)
    file_parser = file_types[type][1]()
    external_filter = file_types[type][2]
    encoding = file_types[type][3]
    book = file_parser.new_book(
        filename, category=category,
        external_filter=external_filter,
        encoding=encoding)
    book.file_type=type
    return book, file_parser