Ejemplo n.º 1
0
def execute(indexobj, filename):
    return filter.execfilter("pdftotext -enc UTF-8 %s -", filename)
def execute(indexobj, filename):
    # using -q switch to get quiet operation (no messages, no errors),
    # because poppler-utils pdftotext on Debian/Etch otherwise generates
    # lots of output on stderr (e.g. 10MB stderr output) and that causes
    # problems in current execfilter implementation.
    return execfilter("pdftotext -q -enc UTF-8 %s -", filename)
def execute(indexobj, filename):
    data = execfilter("catppt -dutf-8 %s", filename)
    return data
Ejemplo n.º 4
0
def execute(indexobj, filename):
    return filter.execfilter("antiword %s", filename)
Ejemplo n.º 5
0
def execute(indexobj, filename):
    data = execfilter("catppt -dutf-8 %s", filename)
    return data
Ejemplo n.º 6
0
def execute(indexobj, filename):
    data = execfilter("xls2csv %s", filename)
    # xls2csv uses comma as field separator and "field content",
    # we strip both to not confuse the indexer
    data = data.replace(u',', u' ').replace(u'"', u' ')
    return data
Ejemplo n.º 7
0
def execute(indexobj, filename):
    cmd = "antiword %s"
    if os.name == 'posix':
        cmd = "HOME=/tmp " + cmd  # no HOME makes antiword complain (on Linux)
    return execfilter(cmd, filename)
Ejemplo n.º 8
0
def execute(indexobj, filename):
    return filter.execfilter("catdoc %s", filename)
Ejemplo n.º 9
0
def execute(indexobj, filename):
    data = execfilter("xls2csv %s", filename)
    # xls2csv uses comma as field separator and "field content",
    # we strip both to not confuse the indexer
    data = data.replace(u',', u' ').replace(u'"', u' ')
    return data
Ejemplo n.º 10
0
def execute(indexobj, filename):
    return execfilter("catdoc %s", filename)