def parse_binary(string=None, fname=None, suffix=None, okext=OKEXT, tryagain=True, **xargs): """ Parse a binary file or string. """ stringbool = bool(string) if (not stringbool) and bool(fname) and (get_file_suffixes(fname) in okext): return parse_binary_from_file(fname) elif fname and (not suffix) and stringbool: suffix = auto_unicode_dang_it('.' + fname.split('.')[-1]).encode('ascii') elif (not suffix) and stringbool: suffix = guess_ext_from_mime(string) else: if (suffix not in okext) or (fname and (get_file_suffixes(fname) not in okext)): return None else: raise ValueError('Did not provide string or fname') if suffix.lower() not in okext: if not fname: fname = '' return None prsd = parse_binary_from_string(string=string, suffix=suffix) if tryagain and not (len(prsd) > 0): try: extbymime = guess_ext_from_mime(string) except KeyError: extbymime = None if extbymime and (extbymime.lower() in okext): try: return parse_binary_from_string(string, suffix=extbymime) except ValueError: LOG.debug('body len=0, and mime ' + 'derived ext resulted in ValueError, giving up.\t' + 'Supplied ext:\t' + suffix + '\t' + 'Mime derived ext:\t' + str(extbymime) + '\t' + 'Filename:\t' + str(fname)) else: pass else: pass return prsd
def document_to_text(filepath, okext=OKEXT): ext = get_file_suffixes(filepath).lower() if ext in okext: try: parsefunc = BFILEHANDLEDICT[ext] text = parsefunc(filepath) except KeyError: text = auto_textract(filepath) if text: return auto_unicode_dang_it(text) return u''
def test__get_file_suffixes__if_pathlib_is_installed(): assert utils.get_file_suffixes("/foo/bar/baz.tar.gz") == ".tar.gz"
def test__get_file_suffixes__3(): assert utils.get_file_suffixes("/foo/b.ar/baz.txt") == ".txt"
def test__get_file_suffixes__2(): assert utils.get_file_suffixes("~/baz.txt") == ".txt"