def convert_csv(fname): """ Convert all of the sheets in a given Excel spreadsheet to CSV files. fname - The name of the file. return - A list of the names of the CSV sheet files. """ # Make sure this is an Excel file. if (not is_excel_file(fname)): # Not Excel, so no sheets. return [] # Run soffice in listening mode if it is not already running. run_soffice() # TODO: Make sure soffice is running in listening mode. # # Connect to the local LibreOffice server. context = connect(Socket(HOST, PORT)) # Load the Excel sheet. component = get_component(fname, context) # Iterate on all the sheets in the spreadsheet. controller = component.getCurrentController() sheets = component.getSheets() enumeration = sheets.createEnumeration() r = [] if sheets.getCount() > 0: while enumeration.hasMoreElements(): # Move to next sheet. sheet = enumeration.nextElement() name = sheet.getName() controller.setActiveSheet(sheet) # Set up the output URL. short_name = fname if (os.path.sep in short_name): short_name = short_name[short_name.rindex(os.path.sep) + 1:] outfilename = "/tmp/sheet_%s-%s.csv" % (short_name, name.replace(' ', '_SPACE_')) r.append(outfilename) url = convert_path_to_url(outfilename) # Export the CSV. component.store_to_url(url,'FilterName','Text - txt - csv (StarCalc)') # Close the spreadsheet. component.close(True) # Kill soffice after done? # /usr/lib/libreoffice/program/soffice.bin --headless --invisible --nocrashreport --nodefault --nofirststartwizard --nologo --norestore --accept=socket,host=127.0.0.1,port=2002,tcpNoDelay=1;urp;StarOffice.ComponentContext # Done. return r
def connect2Calc(file=None, port=8100, counter_max=5000): """Open libreoffice and enable conection with Calc. Args: file (str or pathlib.Path, optional): file to connect. If None, it will open a new Calc instance. port (int, optional): port for connection. counter_max (int, optional): Max number of tentatives to establish a connection. Returns: Calc object. The main mathods defined for a Calc object are exemplyfied below: >>> # adds one sheet ('Sheet2') at position 1 >>> calcObject.insert_sheets_new_by_name('Sheet2', 1) >>> >>> # adds multiple sheets ('Sheet3' and 'Sheet4) at position 2 >>> calcObject.insert_multisheets_new_by_name(['Sheet3', 'Sheet4'], 2) >>> >>> # Get number of sheets >>> print(calcObject.get_sheets_count()) 4 >>> # Remove sheets >>> calcObject.remove_sheets_by_name('Sheet3') >>> # get sheet data >>> sheet1 = calcObject.get_sheet_by_name('Sheet1') >>> sheet2 = calcObject.get_sheet_by_index(0) Also, use :py:func:`~backpack.figmanip.setFigurePosition` """ # open libreoffice libreoffice = subprocess.Popen([f"soffice --nodefault --accept='socket,host=localhost,port={port};urp;'"], shell=True, close_fds=True) # connect to libreoffice connected = False counter = 0 while connected == False: time.sleep(0.5) try: context = connect(Socket('localhost', 8100)) connected = True except: counter += 1 if counter == counter_max: raise ConnectionError('Cannot establish connection, maybe try increasing counter_max value.') pass if file is None: return Calc(context) else: file = Path(file) return Calc(context, convert_path_to_url(str(file)))
def wait_for_uno_api(): """ Sleeps until the libreoffice UNO api is available by the headless libreoffice process. Takes a bit to spin up even after the OS reports the process as running. Tries several times before giving up and throwing an Exception. """ tries = 0 while tries < 10: try: connect(Socket(HOST, PORT)) return except ConnectionError: time.sleep(5) tries += 1 raise Exception("libreoffice UNO API failed to start")
def convert_csv(fname): """Convert all of the sheets in a given Excel spreadsheet to CSV files. Also get the name of the currently active sheet. @param fname (str) The name of the Excel file. @return (list) A list where the 1st element is the name of the currently active sheet ("NO_ACTIVE_SHEET" if no sheets are active) and the rest of the elements are the names (str) of the CSV sheet files. """ # Make sure this is an Excel file. if (not is_excel_file(fname)): # Not Excel, so no sheets. return [] # Run soffice in listening mode if it is not already running. run_soffice() # TODO: Make sure soffice is running in listening mode. # # Connect to the local LibreOffice server. context = None attempts = 0 while (attempts < 5): attempts += 1 try: context = connect(Socket(HOST, PORT)) break except ConnectionError: time.sleep(1) # Do we have a connection to the headless LibreOffice? if (context is None): # Can't connect to LibreOffice. Punt. print("ERROR: Cannot connect to headless LibreOffice.") return [] # Load the Excel sheet. component = get_component(fname, context) # Save the currently active sheet. r = [] controller = component.getCurrentController() active_sheet = None if hasattr(controller, "ActiveSheet"): active_sheet = controller.ActiveSheet active_sheet_name = "NO_ACTIVE_SHEET" if (active_sheet is not None): active_sheet_name = fix_file_name(active_sheet.getName()) r.append(active_sheet_name) # Bomb out if this is not an Excel file. if (not hasattr(component, "getSheets")): return r # Iterate on all the sheets in the spreadsheet. sheets = component.getSheets() enumeration = sheets.createEnumeration() pos = 0 if sheets.getCount() > 0: while enumeration.hasMoreElements(): # Move to next sheet. sheet = enumeration.nextElement() name = sheet.getName() if (name.count(" ") > 10): name = name.replace(" ", "") name = fix_file_name(name) controller.setActiveSheet(sheet) # Set up the output URL. short_name = fname if (os.path.sep in short_name): short_name = short_name[short_name.rindex(os.path.sep) + 1:] short_name = fix_file_name(short_name) outfilename = "/tmp/sheet_%s-%s--%s.csv" % ( short_name, str(pos), name.replace(' ', '_SPACE_')) pos += 1 r.append(outfilename) url = convert_path_to_url(outfilename) # Export the CSV. component.store_to_url(url, 'FilterName', 'Text - txt - csv (StarCalc)') # Close the spreadsheet. component.close(True) # clean up os.kill(get_office_proc()["pid"], signal.SIGTERM) # Done. return r
# -*- coding: utf-8 -*- import sys from os.path import join as pathjoin from unotools import Socket, connect from unotools.component.writer import Writer from unotools.unohelper import convert_path_to_url def writer_sample(args, context): writer = Writer(context) writer.set_string_to_end('world\n') writer.set_string_to_start('hello\n') base_path = convert_path_to_url(pathjoin(args.outputdir, 'sample')) writer.store_to_url(base_path + '.odt', 'FilterName', 'writer8') writer.store_to_url(base_path + '.doc', 'FilterName', 'MS Word 97') writer.store_to_url(base_path + '-writer.pdf', 'FilterName', 'writer_pdf_Export') writer.store_to_url(base_path + '-writer.html', 'FilterName', 'HTML (StarWriter)') writer.close(True) if __name__ == '__main__': from unotools import parse_argument args = parse_argument(sys.argv[1:]) context = connect(Socket(args.host, args.port), option=args.option) writer_sample(args, context)
def convert_csv(fname): """ Convert all of the sheets in a given Excel spreadsheet to CSV files. fname - The name of the file. return - A list of the names of the CSV sheet files. """ # Make sure this is an Excel file. if (not is_excel_file(fname)): # Not Excel, so no sheets. if verbose: print("NOT EXCEL", file=sys.stderr) return [] # Run soffice in listening mode if it is not already running. run_soffice() # TODO: Make sure soffice is running in listening mode. # # Connect to the local LibreOffice server. context = connect(Socket(HOST, PORT)) # Load the Excel sheet. component = get_component(fname, context) # Iterate on all the sheets in the spreadsheet. controller = component.getCurrentController() sheets = component.getSheets() enumeration = sheets.createEnumeration() r = [] pos = 0 if sheets.getCount() > 0: while enumeration.hasMoreElements(): # Move to next sheet. sheet = enumeration.nextElement() name = sheet.getName() if (name.count(" ") > 10): name = name.replace(" ", "") if verbose: print("LOOKING AT SHEET " + str(name), file=sys.stderr) controller.setActiveSheet(sheet) # Set up the output URL. short_name = fname if (os.path.sep in short_name): short_name = short_name[short_name.rindex(os.path.sep) + 1:] outfilename = "/tmp/sheet_%s-%s--%s.csv" % ( short_name, str(pos), name.replace(' ', '_SPACE_')) outfilename = ''.join( filter(lambda x: x in string.printable, outfilename)) pos += 1 r.append(outfilename) url = convert_path_to_url(outfilename) # Export the CSV. component.store_to_url(url, 'FilterName', 'Text - txt - csv (StarCalc)') if verbose: print("SAVED CSV to " + str(outfilename), file=sys.stderr) # Close the spreadsheet. component.close(True) # clean up os.kill(get_office_proc()["pid"], signal.SIGTERM) if verbose: print("KILLED SOFFICE", file=sys.stderr) # Done. if verbose: print("DONE. RETURN " + str(r), file=sys.stderr) return r
help="export a string containing the document text") arg_parser.add_argument("-f", "--file", action="store", required=True, help="path to the word doc") args = arg_parser.parse_args() # Make sure this is a word file. if (not is_word_file(args.file)): # Not Word, so no text. exit() # Run soffice in listening mode if it is not already running. run_soffice() # Connect to the local LibreOffice server. connection = connect(Socket(HOST, PORT)) # Load the document using the connection document = get_document(args.file, connection) if args.text: print(get_text(document)) elif args.tables: print(json.dumps(get_tables(document))) # clean up os.kill(get_office_proc()["pid"], signal.SIGTERM)
def __enter__(self): context = connect(Socket(self.host, self.port)) self.writer = Writer(context, convert_path_to_url(self.input_path)) return self