def new_book(filename, category=None): import zipfile if zipfile.is_zipfile(filename): # filename.endswith('.zip'): fn = zip_get_filename(filename) else: fn = filename # сортируем по длине расширений: длинные в начале # т.е. вначале .html.gz а потом .gz file_types_l = list(file_types) file_types_l.sort(lambda ft1, ft2: cmp(max(map(len, file_types[ft2][0])), max(map(len, file_types[ft1][0])))) # search parser for type in file_types_l: for s in file_types[type][0]: if fn.endswith(s): file_parser = file_types[type][1]() external_filter = file_types[type][2] encoding = file_types[type][3] book = file_parser.new_book( filename, category=category, external_filter=external_filter, encoding=encoding ) return book, file_parser # suffix not found -> plain text file_parser = plain_text_parser() book = file_parser.new_book(filename, category=category) return book, file_parser
def get_file(filename): if zipfile.is_zipfile(filename): #filename.endswith('.zip'): fn = tempfile.mktemp() zip_fn = miscutils.zip_get_filename(filename) open(fn, 'w').write(zipfile.ZipFile(filename).read(zip_fn)) else: fn = filename return fn
def get_file(filename): if zipfile.is_zipfile(filename): #filename.endswith('.zip'): fn = tempfile.mktemp() zip_fn = miscutils.zip_get_filename(filename) open(fn, 'w').write(zipfile.ZipFile(filename).read(zip_fn)) elif filename.endswith('.gz'): #FIXME must be header test fn = tempfile.mktemp() open(fn, 'w').write(gzip.GzipFile(filename).read()) elif filename.endswith('.bz2'): #FIXME must be header test fn = tempfile.mktemp() open(fn, 'w').write(bz2.BZ2File(filename).read()) else: fn = filename return fn
def new_book(filename, category=None): import zipfile if zipfile.is_zipfile(filename): #filename.endswith('.zip'): fn = zip_get_filename(filename) else: fn = filename type=detect_type(fn) file_parser = file_types[type][1]() external_filter = file_types[type][2] encoding = file_types[type][3] book = file_parser.new_book( filename, category=category, external_filter=external_filter, encoding=encoding) book.file_type=type return book, file_parser