def convert_csv(fname):
    """
    Convert all of the sheets in a given Excel spreadsheet to CSV files.

    fname - The name of the file.
    return - A list of the names of the CSV sheet files.
    """

    # Make sure this is an Excel file.
    if (not is_excel_file(fname)):

        # Not Excel, so no sheets.
        return []

    # Run soffice in listening mode if it is not already running.
    run_soffice()
    
    # TODO: Make sure soffice is running in listening mode.
    # 
    
    # Connect to the local LibreOffice server.
    context = connect(Socket(HOST, PORT))

    # Load the Excel sheet.
    component = get_component(fname, context)

    # Iterate on all the sheets in the spreadsheet.
    controller = component.getCurrentController()
    sheets = component.getSheets()
    enumeration = sheets.createEnumeration()
    r = []
    if sheets.getCount() > 0:
        while enumeration.hasMoreElements():

            # Move to next sheet.
            sheet = enumeration.nextElement()
            name = sheet.getName()
            controller.setActiveSheet(sheet)

            # Set up the output URL.
            short_name = fname
            if (os.path.sep in short_name):
                short_name = short_name[short_name.rindex(os.path.sep) + 1:]
            outfilename =  "/tmp/sheet_%s-%s.csv" % (short_name, name.replace(' ', '_SPACE_'))
            r.append(outfilename)
            url = convert_path_to_url(outfilename)

            # Export the CSV.
            component.store_to_url(url,'FilterName','Text - txt - csv (StarCalc)')

    # Close the spreadsheet.
    component.close(True)

    # Kill soffice after done?
    # /usr/lib/libreoffice/program/soffice.bin --headless --invisible --nocrashreport --nodefault --nofirststartwizard --nologo --norestore --accept=socket,host=127.0.0.1,port=2002,tcpNoDelay=1;urp;StarOffice.ComponentContext
    
    # Done.
    return r
Ejemplo n.º 2
0
def connect2Calc(file=None, port=8100, counter_max=5000):
    """Open libreoffice and enable conection with Calc.

    Args:
        file (str or pathlib.Path, optional): file to connect. If None, it will
            open a new Calc instance.
        port (int, optional): port for connection.
        counter_max (int, optional): Max number of tentatives to establish a
            connection.

    Returns:
        Calc object.

        The main mathods defined for a Calc object are exemplyfied below:

        >>> # adds one sheet ('Sheet2') at position 1
        >>> calcObject.insert_sheets_new_by_name('Sheet2', 1)
        >>>
        >>> # adds multiple sheets ('Sheet3' and 'Sheet4) at position 2
        >>> calcObject.insert_multisheets_new_by_name(['Sheet3', 'Sheet4'], 2)
        >>>
        >>> # Get number of sheets
        >>> print(calcObject.get_sheets_count())
        4
        >>> # Remove sheets
        >>> calcObject.remove_sheets_by_name('Sheet3')
        >>> # get sheet data
        >>> sheet1 = calcObject.get_sheet_by_name('Sheet1')
        >>> sheet2 = calcObject.get_sheet_by_index(0)

        Also, use :py:func:`~backpack.figmanip.setFigurePosition`
    """
    # open libreoffice
    libreoffice = subprocess.Popen([f"soffice --nodefault --accept='socket,host=localhost,port={port};urp;'"], shell=True, close_fds=True)

    # connect to libreoffice
    connected = False
    counter = 0
    while connected == False:
        time.sleep(0.5)
        try:
            context = connect(Socket('localhost', 8100))
            connected = True
        except:
            counter += 1
            if counter == counter_max:
                raise ConnectionError('Cannot establish connection, maybe try increasing counter_max value.')
            pass

    if file is None:
        return Calc(context)
    else:
        file = Path(file)
        return Calc(context, convert_path_to_url(str(file)))
def wait_for_uno_api():
    """
    Sleeps until the libreoffice UNO api is available by the headless libreoffice process. Takes
    a bit to spin up even after the OS reports the process as running. Tries several times before giving
    up and throwing an Exception.
    """

    tries = 0

    while tries < 10:
        try:
            connect(Socket(HOST, PORT))
            return
        except ConnectionError:
            time.sleep(5)
            tries += 1

    raise Exception("libreoffice UNO API failed to start")
Ejemplo n.º 4
0
def convert_csv(fname):
    """Convert all of the sheets in a given Excel spreadsheet to CSV
    files. Also get the name of the currently active sheet.

    @param fname (str) The name of the Excel file.
    
    @return (list) A list where the 1st element is the name of the
    currently active sheet ("NO_ACTIVE_SHEET" if no sheets are active)
    and the rest of the elements are the names (str) of the CSV sheet
    files.

    """

    # Make sure this is an Excel file.
    if (not is_excel_file(fname)):

        # Not Excel, so no sheets.
        return []

    # Run soffice in listening mode if it is not already running.
    run_soffice()

    # TODO: Make sure soffice is running in listening mode.
    #

    # Connect to the local LibreOffice server.
    context = None
    attempts = 0
    while (attempts < 5):
        attempts += 1
        try:
            context = connect(Socket(HOST, PORT))
            break
        except ConnectionError:
            time.sleep(1)

    # Do we have a connection to the headless LibreOffice?
    if (context is None):

        # Can't connect to LibreOffice. Punt.
        print("ERROR: Cannot connect to headless LibreOffice.")
        return []

    # Load the Excel sheet.
    component = get_component(fname, context)

    # Save the currently active sheet.
    r = []
    controller = component.getCurrentController()
    active_sheet = None
    if hasattr(controller, "ActiveSheet"):
        active_sheet = controller.ActiveSheet
    active_sheet_name = "NO_ACTIVE_SHEET"
    if (active_sheet is not None):
        active_sheet_name = fix_file_name(active_sheet.getName())
    r.append(active_sheet_name)

    # Bomb out if this is not an Excel file.
    if (not hasattr(component, "getSheets")):
        return r

    # Iterate on all the sheets in the spreadsheet.
    sheets = component.getSheets()
    enumeration = sheets.createEnumeration()
    pos = 0
    if sheets.getCount() > 0:
        while enumeration.hasMoreElements():

            # Move to next sheet.
            sheet = enumeration.nextElement()
            name = sheet.getName()
            if (name.count(" ") > 10):
                name = name.replace(" ", "")
            name = fix_file_name(name)
            controller.setActiveSheet(sheet)

            # Set up the output URL.
            short_name = fname
            if (os.path.sep in short_name):
                short_name = short_name[short_name.rindex(os.path.sep) + 1:]
            short_name = fix_file_name(short_name)
            outfilename = "/tmp/sheet_%s-%s--%s.csv" % (
                short_name, str(pos), name.replace(' ', '_SPACE_'))
            pos += 1
            r.append(outfilename)
            url = convert_path_to_url(outfilename)

            # Export the CSV.
            component.store_to_url(url, 'FilterName',
                                   'Text - txt - csv (StarCalc)')

    # Close the spreadsheet.
    component.close(True)

    # clean up
    os.kill(get_office_proc()["pid"], signal.SIGTERM)

    # Done.
    return r
Ejemplo n.º 5
0
# -*- coding: utf-8 -*-
import sys
from os.path import join as pathjoin

from unotools import Socket, connect
from unotools.component.writer import Writer
from unotools.unohelper import convert_path_to_url


def writer_sample(args, context):
    writer = Writer(context)
    writer.set_string_to_end('world\n')
    writer.set_string_to_start('hello\n')

    base_path = convert_path_to_url(pathjoin(args.outputdir, 'sample'))
    writer.store_to_url(base_path + '.odt', 'FilterName', 'writer8')
    writer.store_to_url(base_path + '.doc', 'FilterName', 'MS Word 97')
    writer.store_to_url(base_path + '-writer.pdf', 'FilterName',
                        'writer_pdf_Export')
    writer.store_to_url(base_path + '-writer.html', 'FilterName',
                        'HTML (StarWriter)')

    writer.close(True)


if __name__ == '__main__':
    from unotools import parse_argument
    args = parse_argument(sys.argv[1:])
    context = connect(Socket(args.host, args.port), option=args.option)
    writer_sample(args, context)
def convert_csv(fname):
    """
    Convert all of the sheets in a given Excel spreadsheet to CSV files.

    fname - The name of the file.
    return - A list of the names of the CSV sheet files.
    """

    # Make sure this is an Excel file.
    if (not is_excel_file(fname)):

        # Not Excel, so no sheets.
        if verbose:
            print("NOT EXCEL", file=sys.stderr)
        return []

    # Run soffice in listening mode if it is not already running.
    run_soffice()

    # TODO: Make sure soffice is running in listening mode.
    #

    # Connect to the local LibreOffice server.
    context = connect(Socket(HOST, PORT))

    # Load the Excel sheet.
    component = get_component(fname, context)

    # Iterate on all the sheets in the spreadsheet.
    controller = component.getCurrentController()
    sheets = component.getSheets()
    enumeration = sheets.createEnumeration()
    r = []
    pos = 0
    if sheets.getCount() > 0:
        while enumeration.hasMoreElements():

            # Move to next sheet.
            sheet = enumeration.nextElement()
            name = sheet.getName()
            if (name.count(" ") > 10):
                name = name.replace(" ", "")
            if verbose:
                print("LOOKING AT SHEET " + str(name), file=sys.stderr)
            controller.setActiveSheet(sheet)

            # Set up the output URL.
            short_name = fname
            if (os.path.sep in short_name):
                short_name = short_name[short_name.rindex(os.path.sep) + 1:]
            outfilename = "/tmp/sheet_%s-%s--%s.csv" % (
                short_name, str(pos), name.replace(' ', '_SPACE_'))
            outfilename = ''.join(
                filter(lambda x: x in string.printable, outfilename))

            pos += 1
            r.append(outfilename)
            url = convert_path_to_url(outfilename)

            # Export the CSV.
            component.store_to_url(url, 'FilterName',
                                   'Text - txt - csv (StarCalc)')
            if verbose:
                print("SAVED CSV to " + str(outfilename), file=sys.stderr)

    # Close the spreadsheet.
    component.close(True)

    # clean up
    os.kill(get_office_proc()["pid"], signal.SIGTERM)
    if verbose:
        print("KILLED SOFFICE", file=sys.stderr)

    # Done.
    if verbose:
        print("DONE. RETURN " + str(r), file=sys.stderr)
    return r
Ejemplo n.º 7
0
                        help="export a string containing the document text")
arg_parser.add_argument("-f",
                        "--file",
                        action="store",
                        required=True,
                        help="path to the word doc")
args = arg_parser.parse_args()

# Make sure this is a word file.
if (not is_word_file(args.file)):

    # Not Word, so no text.
    exit()

# Run soffice in listening mode if it is not already running.
run_soffice()

# Connect to the local LibreOffice server.
connection = connect(Socket(HOST, PORT))

# Load the document using the connection
document = get_document(args.file, connection)

if args.text:
    print(get_text(document))
elif args.tables:
    print(json.dumps(get_tables(document)))

# clean up
os.kill(get_office_proc()["pid"], signal.SIGTERM)
Ejemplo n.º 8
0
 def __enter__(self):
     context = connect(Socket(self.host, self.port))
     self.writer = Writer(context, convert_path_to_url(self.input_path))
     return self