def getSpreadsheet(file): ooo_parser = OOoParser() # Extract tables from the speadsheet file if file is None: return {} elif hasattr(file, 'headers'): # if the file is not an open office format, try to convert it using oood content_type = file.headers.get('Content-Type', '') if not (content_type.startswith('application/vnd.sun.xml') or content_type.startswith('application/vnd.oasis.opendocument')): from Products.ERP5Type.Document import newTempOOoDocument tmp_ooo = newTempOOoDocument(context, file.filename) tmp_ooo.edit(data=file.read(), content_type=content_type) tmp_ooo.convertToBaseFormat() ignored, import_file_content = tmp_ooo.convert('ods') ooo_parser.openFromString(str(import_file_content)) else: ooo_parser.openFile(file) else: ooo_parser.openFromString(file) return ooo_parser.getSpreadsheetsMapping()
def getSpreadsheet(file): ooo_parser = OOoParser() # Extract tables from the speadsheet file if file is None: return {} elif hasattr(file, 'headers'): # if the file is not an open office format, try to convert it using oood content_type = file.headers.get('Content-Type', '') if not (content_type.startswith('application/vnd.sun.xml') or content_type.startswith('application/vnd.oasis.opendocument')): tmp_ooo = context.newContent(temp_object=True, portal_type='OOo Document', id=file.filename) tmp_ooo.edit(data=file.read(), content_type=content_type) tmp_ooo.convertToBaseFormat() ignored, import_file_content = tmp_ooo.convert('ods') ooo_parser.openFromString(str(import_file_content)) else: ooo_parser.openFile(file) else: ooo_parser.openFromString(file) return ooo_parser.getSpreadsheetsMapping()
def test_getSpreadSheetMappingText(self): parser = OOoParser() parser.openFile(open(makeFilePath('complex_text.ods'), 'rb')) mapping = parser.getSpreadsheetsMapping() self.assertEqual(['Feuille1'], mapping.keys()) self.assertEqual(mapping['Feuille1'][0], [' leading space']) self.assertEqual(mapping['Feuille1'][1], [' leading space']) self.assertEqual(mapping['Feuille1'][2], ['tab\t']) self.assertEqual(mapping['Feuille1'][3], ['New\nLine'])
def convert(self, filename, data=None): from Products.ERP5OOo.OOoUtils import OOoParser OOoParser = OOoParser() import_file = read(self, filename, data) # Extract tables from the speadsheet file OOoParser.openFile(import_file) filename = OOoParser.getFilename() spreadsheets = OOoParser.getSpreadsheetsMapping() table_dict = {} for table_name, table in spreadsheets.items(): if not table: continue # Get the header of the table columns_header = table[0] # Get the mapping to help us to know the property according a cell index property_map = {} column_index = 0 for column in columns_header: column_id = getIdFromString(column) # The column has no header information # The column has a normal header property_map[column_index] = column_id column_index += 1 # Construct categories data (with absolut path) from table lines object_list = [] for line in table[1:]: object_property_dict = {} # Exclude empty lines if line.count('') + line.count(None) == len(line): continue # Analyse every cells of the line cell_index = 0 for cell in line: # Ignore empty cells, do the test on the generated id # because getIdFromString() is more restrictive cell_id = getIdFromString(cell) if cell_id not in ('', None): # Get the property corresponding to the cell data property_id = property_map[cell_index] # Convert the value to something like '\xc3\xa9' not '\xc3\xa9' object_property_dict[property_id] = cell.encode('UTF-8') cell_index += 1 if len(object_property_dict) > 0: object_list.append(object_property_dict) table_dict[table_name.encode('UTF-8')] = object_list if len(table_dict.keys()) == 1: return object_list else: return table_dict
def convert(self, filename, data=None): from Products.ERP5OOo.OOoUtils import OOoParser OOoParser = OOoParser() import_file = read(self, filename, data) # Extract tables from the speadsheet file OOoParser.openFile(import_file) filename = OOoParser.getFilename() spreadsheets = OOoParser.getSpreadsheetsMapping() table_dict = {} for table_name, table in spreadsheets.items(): if not table: continue # Get the header of the table columns_header = table[0] # Get the mapping to help us to know the property according a cell index property_map = {} column_index = 0 for column in columns_header: column_id = getIdFromString(column) # The column has no header information # The column has a normal header property_map[column_index] = column_id column_index += 1 # Construct categories data (with absolut path) from table lines object_list = [] for line in table[1:]: object_property_dict = {} # Exclude empty lines if line.count('') + line.count(None) == len(line): continue # Analyse every cells of the line cell_index = 0 for cell in line: # Ignore empty cells, do the test on the generated id # because getIdFromString() is more restrictive cell_id = getIdFromString(cell) if cell_id not in ('', None): # Get the property corresponding to the cell data property_id = property_map[cell_index] # Convert the value to something like '\xc3\xa9' not '\xc3\xa9' object_property_dict[property_id] = cell.encode('UTF-8') cell_index += 1 if len(object_property_dict) > 0: object_list.append(object_property_dict) table_dict[table_name.encode('UTF-8')] = object_list if len(table_dict.keys()) == 1: return object_list else: return table_dict
def test_getSpreadSheetMappingText(self): parser = OOoParser() parser.openFile(open(makeFilePath('complex_text.ods'), 'rb')) mapping = parser.getSpreadsheetsMapping() self.assertEquals(['Feuille1'], mapping.keys()) self.assertEquals(mapping['Feuille1'][0], [' leading space']) self.assertEquals(mapping['Feuille1'][1], [' leading space']) self.assertEquals(mapping['Feuille1'][2], ['tab\t']) self.assertEquals(mapping['Feuille1'][3], ['New\nLine'])
def test_getSpreadSheetMappingStyle(self): parser = OOoParser() parser.openFile( open(makeFilePath('import_data_list_with_style.ods'), 'rb')) mapping = parser.getSpreadsheetsMapping() self.assertEquals(['Feuille1'], mapping.keys()) self.assertEquals(mapping['Feuille1'][1], ['a line with style']) self.assertEquals(mapping['Feuille1'][2], ['a line with multiple styles']) self.assertEquals(mapping['Feuille1'][3], ['http://www.erp5.org']) self.assertEquals(mapping['Feuille1'][4], ['*****@*****.**'])
def test_getSpreadSheetMappingEmptyCells(self): parser = OOoParser() parser.openFile(open(makeFilePath('empty_cells.ods'), 'rb')) mapping = parser.getSpreadsheetsMapping() self.assertEqual(['Feuille1'], mapping.keys()) self.assertEqual(mapping['Feuille1'], [ ['A1', None, 'C1'], [], [None, 'B3',], ])
def test_getSpreadSheetMappingEmptyCells(self): parser = OOoParser() parser.openFile(open(makeFilePath('empty_cells.ods'), 'rb')) mapping = parser.getSpreadsheetsMapping() self.assertEqual(['Feuille1'], mapping.keys()) self.assertEqual(mapping['Feuille1'], [ ['A1', None, 'C1'], [], [None, 'B3',], ])
def test_BigSpreadSheet_can_be_parsed(self, ): """Test than OOoimport can parse a file with more than 40000 lines """ parser = OOoParser() parser.openFile(open(makeFilePath('import_big_spreadsheet.ods'), 'rb')) mapping = parser.getSpreadsheetsMapping() not_ok = 1 for spread, values in mapping.iteritems(): self.assertEquals(len(values), 41001) not_ok = 0 if not_ok: self.fail('Spreadsheet not read!')
def test_getSpreadSheetMapping(self): parser = OOoParser() parser.openFile(open(makeFilePath('import_data_list.ods'), 'rb')) mapping = parser.getSpreadsheetsMapping() self.assertEquals(['Person'], mapping.keys()) person_mapping = mapping['Person'] self.assertTrue(isinstance(person_mapping, list)) self.assertTrue(102, len(person_mapping)) self.assertEquals(person_mapping[0], ['Title', 'First Name', 'Last Name', 'Default Email Text']) self.assertEquals(person_mapping[1], ['John Doe 0', 'John', 'Doe 0', '*****@*****.**'])
def test_BigSpreadSheet_can_be_parsed(self,): """Test than OOoimport can parse a file with more than 40000 lines """ parser = OOoParser() parser.openFile(open(makeFilePath('import_big_spreadsheet.ods'), 'rb')) mapping = parser.getSpreadsheetsMapping() not_ok = 1 for spread, values in mapping.iteritems(): self.assertEqual(len(values), 41001) not_ok = 0 if not_ok: self.fail('Spreadsheet not read!')
def test_getSpreadSheetMapping(self): parser = OOoParser() parser.openFile(open(makeFilePath('import_data_list.ods'), 'rb')) mapping = parser.getSpreadsheetsMapping() self.assertEqual(['Person'], mapping.keys()) person_mapping = mapping['Person'] self.assertTrue(isinstance(person_mapping, list)) self.assertTrue(102, len(person_mapping)) self.assertEqual(person_mapping[0], ['Title', 'First Name', 'Last Name', 'Default Email Text']) self.assertEqual(person_mapping[1], ['John Doe 0', 'John', 'Doe 0', '*****@*****.**'])
def test_getSpreadSheetMappingStyle(self): parser = OOoParser() parser.openFile(open(makeFilePath('import_data_list_with_style.ods'), 'rb')) mapping = parser.getSpreadsheetsMapping() self.assertEquals(['Feuille1'], mapping.keys()) self.assertEquals(mapping['Feuille1'][1], ['a line with style']) self.assertEquals(mapping['Feuille1'][2], ['a line with multiple styles']) self.assertEquals(mapping['Feuille1'][3], ['http://www.erp5.org']) self.assertEquals(mapping['Feuille1'][4], ['*****@*****.**'])
def test_getSpreadSheetMappingDataTypes(self): parser = OOoParser() parser.openFile( open(makeFilePath('import_data_list_data_type.ods'), 'rb')) mapping = parser.getSpreadsheetsMapping() self.assertEqual(['Feuille1'], mapping.keys()) self.assertEqual(mapping['Feuille1'][0], ['1234.5678']) self.assertEqual(mapping['Feuille1'][1], ['1234.5678']) self.assertEqual(mapping['Feuille1'][2], ['0.1']) self.assertEqual(mapping['Feuille1'][3], ['2008-11-14']) self.assertEqual(mapping['Feuille1'][4], ['2008-11-14T10:20:30']) # supported by DateTime self.assertEqual(mapping['Feuille1'][5], ['PT12H34M56S']) # maybe not good, this is raw format self.assertEqual(mapping['Feuille1'][6], ['With note'])
def test_getSpreadSheetMappingDataTypes(self): parser = OOoParser() parser.openFile(open(makeFilePath('import_data_list_data_type.ods'), 'rb')) mapping = parser.getSpreadsheetsMapping() self.assertEquals(['Feuille1'], mapping.keys()) self.assertEquals(mapping['Feuille1'][0], ['1234.5678']) self.assertEquals(mapping['Feuille1'][1], ['1234.5678']) self.assertEquals(mapping['Feuille1'][2], ['0.1']) self.assertEquals(mapping['Feuille1'][3], ['2008-11-14']) self.assertEquals(mapping['Feuille1'][4], ['2008-11-14T10:20:30']) # supported by DateTime self.assertEquals(mapping['Feuille1'][5], ['PT12H34M56S']) # maybe not good, this is raw format self.assertEquals(mapping['Feuille1'][6], ['With note'])
def test_openFromString(self): parser = OOoParser() parser.openFromString( open(makeFilePath('import_data_list.ods'), 'rb').read()) mapping = parser.getSpreadsheetsMapping() self.assertEquals(['Person'], mapping.keys())
if hasattr(import_file, 'headers'): content_type = import_file.headers.get('Content-Type', '') if not (content_type.startswith('application/vnd.sun.xml') or content_type.startswith('application/vnd.oasis.opendocument')): tmp_ooo = context.newContent(portal_type='OOo Document', temp_object=True, data=import_file.read(), content_type=content_type) tmp_ooo.convertToBaseFormat() _, import_file_content = tmp_ooo.convert('ods') parser.openFromString(str(import_file_content)) else: parser.openFile(import_file) # Extract tables from the speadsheet file spreadsheet_list = parser.getSpreadsheetsMapping(no_empty_lines=True) for table_name in spreadsheet_list.keys(): sheet = spreadsheet_list[table_name] if not sheet: continue # Get the header of the table columns_header = sheet[0] # Get the mapping to help us know the property according a cell index property_map = {} column_index = 0 path_index = 0 for column in columns_header: column_id = getIDFromString(column) # This give us the information that the path definition has started path_def_started = 'path_0' in property_map.values()
content_type = import_file.headers.get('Content-Type', '') if not (content_type.startswith('application/vnd.sun.xml') or content_type.startswith('application/vnd.oasis.opendocument')): from Products.ERP5Type.Document import newTempOOoDocument tmp_ooo = newTempOOoDocument(context, "_") tmp_ooo.edit(data=import_file.read(), content_type=content_type) tmp_ooo.convertToBaseFormat() ignored, import_file_content = tmp_ooo.convert('ods') parser.openFromString(str(import_file_content)) else: parser.openFile(import_file) # Extract tables from the speadsheet file filename = parser.getFilename() spreadsheet_list = parser.getSpreadsheetsMapping(no_empty_lines=True) for table_name in spreadsheet_list.keys(): sheet = spreadsheet_list[table_name] if not sheet: continue # Get the header of the table columns_header = sheet[0] # Get the mapping to help us know the property according a cell index property_map = {} column_index = 0 path_index = 0 for column in columns_header: column_id = getIDFromString(column) # This give us the information that the path definition has started
def test_openFromString(self): parser = OOoParser() parser.openFromString( open(makeFilePath('import_data_list.ods'), 'rb').read()) mapping = parser.getSpreadsheetsMapping() self.assertEquals(['Person'], mapping.keys())