Esempio n. 1
0
def parse_binary(string=None, fname=None, suffix=None, okext=OKEXT,
                 tryagain=True, **xargs):
    """
    Parse a binary file or string.
    """
    stringbool = bool(string)
    if (not stringbool) and bool(fname) and (get_file_suffixes(fname) in okext):
        return parse_binary_from_file(fname)
    elif fname and (not suffix) and stringbool:
        suffix = auto_unicode_dang_it('.' +
                                      fname.split('.')[-1]).encode('ascii')
    elif (not suffix) and stringbool:
        suffix = guess_ext_from_mime(string)
    else:
        if (suffix not in okext) or (fname and
                                     (get_file_suffixes(fname) not in okext)):
            return None
        else:
            raise ValueError('Did not provide string or fname')

    if suffix.lower() not in okext:
        if not fname:
            fname = ''
        return None
    prsd = parse_binary_from_string(string=string, suffix=suffix)
    if tryagain and not (len(prsd) > 0):
        try:
            extbymime = guess_ext_from_mime(string)
        except KeyError:
            extbymime = None
        if extbymime and (extbymime.lower() in okext):
            try:
                return parse_binary_from_string(string,
                                                suffix=extbymime)
            except ValueError:
                LOG.debug('body len=0, and mime ' +
                          'derived ext resulted in ValueError, giving up.\t' +
                          'Supplied ext:\t' + suffix + '\t' +
                          'Mime derived ext:\t' + str(extbymime) + '\t' +
                          'Filename:\t' + str(fname))
        else:
            pass
    else:
        pass
    return prsd
Esempio n. 2
0
def document_to_text(filepath, okext=OKEXT):
    ext = get_file_suffixes(filepath).lower()
    if ext in okext:
        try:
            parsefunc = BFILEHANDLEDICT[ext]
            text = parsefunc(filepath)
        except KeyError:
            text = auto_textract(filepath)
        if text:
            return auto_unicode_dang_it(text)
    return u''
Esempio n. 3
0
def test__get_file_suffixes__if_pathlib_is_installed():
    assert utils.get_file_suffixes("/foo/bar/baz.tar.gz") == ".tar.gz"
Esempio n. 4
0
def test__get_file_suffixes__3():
    assert utils.get_file_suffixes("/foo/b.ar/baz.txt") == ".txt"
Esempio n. 5
0
def test__get_file_suffixes__2():
    assert utils.get_file_suffixes("~/baz.txt") == ".txt"