Esempio n. 1
0
def getSpreadsheet(file):
  ooo_parser = OOoParser()

  # Extract tables from the speadsheet file
  if file is None:
    return {}
  elif hasattr(file, 'headers'):
    # if the file is not an open office format, try to convert it using oood
    content_type = file.headers.get('Content-Type', '')
    if not (content_type.startswith('application/vnd.sun.xml')
       or content_type.startswith('application/vnd.oasis.opendocument')):

      from Products.ERP5Type.Document import newTempOOoDocument
      tmp_ooo = newTempOOoDocument(context, file.filename)
      tmp_ooo.edit(data=file.read(), content_type=content_type)
      tmp_ooo.convertToBaseFormat()
      ignored, import_file_content = tmp_ooo.convert('ods')
      ooo_parser.openFromString(str(import_file_content))
    else:
      ooo_parser.openFile(file)
  else:
    ooo_parser.openFromString(file)


  return ooo_parser.getSpreadsheetsMapping()
Esempio n. 2
0
def getSpreadsheet(file):
    ooo_parser = OOoParser()

    # Extract tables from the speadsheet file
    if file is None:
        return {}
    elif hasattr(file, 'headers'):
        # if the file is not an open office format, try to convert it using oood
        content_type = file.headers.get('Content-Type', '')
        if not (content_type.startswith('application/vnd.sun.xml') or
                content_type.startswith('application/vnd.oasis.opendocument')):

            tmp_ooo = context.newContent(temp_object=True,
                                         portal_type='OOo Document',
                                         id=file.filename)
            tmp_ooo.edit(data=file.read(), content_type=content_type)
            tmp_ooo.convertToBaseFormat()
            ignored, import_file_content = tmp_ooo.convert('ods')
            ooo_parser.openFromString(str(import_file_content))
        else:
            ooo_parser.openFile(file)
    else:
        ooo_parser.openFromString(file)

    return ooo_parser.getSpreadsheetsMapping()
Esempio n. 3
0
 def test_getSpreadSheetMappingText(self):
   parser = OOoParser()
   parser.openFile(open(makeFilePath('complex_text.ods'), 'rb'))
   mapping = parser.getSpreadsheetsMapping()
   self.assertEqual(['Feuille1'], mapping.keys())
   self.assertEqual(mapping['Feuille1'][0], [' leading space'])
   self.assertEqual(mapping['Feuille1'][1], ['   leading space'])
   self.assertEqual(mapping['Feuille1'][2], ['tab\t'])
   self.assertEqual(mapping['Feuille1'][3], ['New\nLine'])
def convert(self, filename, data=None):
    from Products.ERP5OOo.OOoUtils import OOoParser
    OOoParser = OOoParser()
    import_file = read(self, filename, data)

    # Extract tables from the speadsheet file
    OOoParser.openFile(import_file)
    filename = OOoParser.getFilename()
    spreadsheets = OOoParser.getSpreadsheetsMapping()

    table_dict = {}
    for table_name, table in spreadsheets.items():
        if not table:
            continue
        # Get the header of the table
        columns_header = table[0]
        # Get the mapping to help us to know the property according a cell index
        property_map = {}
        column_index = 0
        for column in columns_header:
            column_id = getIdFromString(column)
            # The column has no header information
            # The column has a normal header
            property_map[column_index] = column_id
            column_index += 1

        # Construct categories data (with absolut path) from table lines
        object_list = []

        for line in table[1:]:
            object_property_dict = {}

            # Exclude empty lines
            if line.count('') + line.count(None) == len(line):
                continue

            # Analyse every cells of the line
            cell_index = 0
            for cell in line:
                # Ignore empty cells, do the test on the generated id
                # because getIdFromString() is more restrictive
                cell_id = getIdFromString(cell)
                if cell_id not in ('', None):
                    # Get the property corresponding to the cell data
                    property_id = property_map[cell_index]
                    # Convert the value to something like '\xc3\xa9' not '\xc3\xa9'
                    object_property_dict[property_id] = cell.encode('UTF-8')
                cell_index += 1

            if len(object_property_dict) > 0:
                object_list.append(object_property_dict)
        table_dict[table_name.encode('UTF-8')] = object_list

    if len(table_dict.keys()) == 1:
        return object_list
    else:
        return table_dict
def convert(self, filename, data=None):
  from Products.ERP5OOo.OOoUtils import OOoParser
  OOoParser = OOoParser()
  import_file = read(self, filename, data)

  # Extract tables from the speadsheet file
  OOoParser.openFile(import_file)
  filename = OOoParser.getFilename()
  spreadsheets = OOoParser.getSpreadsheetsMapping()

  table_dict = {}
  for table_name, table in spreadsheets.items():
    if not table:
      continue
    # Get the header of the table
    columns_header = table[0]
    # Get the mapping to help us to know the property according a cell index
    property_map = {}
    column_index = 0
    for column in columns_header:
      column_id = getIdFromString(column)
      # The column has no header information
      # The column has a normal header
      property_map[column_index] = column_id
      column_index += 1

    # Construct categories data (with absolut path) from table lines
    object_list = []

    for line in table[1:]:
      object_property_dict = {}

      # Exclude empty lines
      if line.count('') + line.count(None) == len(line):
        continue

      # Analyse every cells of the line
      cell_index = 0
      for cell in line:
        # Ignore empty cells, do the test on the generated id 
        # because getIdFromString() is more restrictive
        cell_id = getIdFromString(cell)
        if cell_id not in ('', None):
          # Get the property corresponding to the cell data
          property_id = property_map[cell_index]
          # Convert the value to something like '\xc3\xa9' not '\xc3\xa9'
          object_property_dict[property_id] = cell.encode('UTF-8')
        cell_index += 1

      if len(object_property_dict) > 0:
        object_list.append(object_property_dict)
    table_dict[table_name.encode('UTF-8')] = object_list

  if len(table_dict.keys()) == 1:
    return object_list
  else:
    return table_dict
Esempio n. 6
0
 def test_getSpreadSheetMappingText(self):
     parser = OOoParser()
     parser.openFile(open(makeFilePath('complex_text.ods'), 'rb'))
     mapping = parser.getSpreadsheetsMapping()
     self.assertEquals(['Feuille1'], mapping.keys())
     self.assertEquals(mapping['Feuille1'][0], [' leading space'])
     self.assertEquals(mapping['Feuille1'][1], ['   leading space'])
     self.assertEquals(mapping['Feuille1'][2], ['tab\t'])
     self.assertEquals(mapping['Feuille1'][3], ['New\nLine'])
Esempio n. 7
0
 def test_getSpreadSheetMappingStyle(self):
     parser = OOoParser()
     parser.openFile(
         open(makeFilePath('import_data_list_with_style.ods'), 'rb'))
     mapping = parser.getSpreadsheetsMapping()
     self.assertEquals(['Feuille1'], mapping.keys())
     self.assertEquals(mapping['Feuille1'][1], ['a line with style'])
     self.assertEquals(mapping['Feuille1'][2],
                       ['a line with multiple styles'])
     self.assertEquals(mapping['Feuille1'][3], ['http://www.erp5.org'])
     self.assertEquals(mapping['Feuille1'][4], ['*****@*****.**'])
Esempio n. 8
0
 def test_getSpreadSheetMappingEmptyCells(self):
   parser = OOoParser()
   parser.openFile(open(makeFilePath('empty_cells.ods'), 'rb'))
   mapping = parser.getSpreadsheetsMapping()
   self.assertEqual(['Feuille1'], mapping.keys())
   self.assertEqual(mapping['Feuille1'],
     [
       ['A1', None, 'C1'],
       [],
       [None, 'B3',],
     ])
Esempio n. 9
0
 def test_getSpreadSheetMappingEmptyCells(self):
   parser = OOoParser()
   parser.openFile(open(makeFilePath('empty_cells.ods'), 'rb'))
   mapping = parser.getSpreadsheetsMapping()
   self.assertEqual(['Feuille1'], mapping.keys())
   self.assertEqual(mapping['Feuille1'],
     [
       ['A1', None, 'C1'],
       [],
       [None, 'B3',],
     ])
Esempio n. 10
0
 def test_BigSpreadSheet_can_be_parsed(self, ):
     """Test than OOoimport can parse a file with more than 40000 lines
 """
     parser = OOoParser()
     parser.openFile(open(makeFilePath('import_big_spreadsheet.ods'), 'rb'))
     mapping = parser.getSpreadsheetsMapping()
     not_ok = 1
     for spread, values in mapping.iteritems():
         self.assertEquals(len(values), 41001)
         not_ok = 0
     if not_ok:
         self.fail('Spreadsheet not read!')
Esempio n. 11
0
 def test_getSpreadSheetMapping(self):
   parser = OOoParser()
   parser.openFile(open(makeFilePath('import_data_list.ods'), 'rb'))
   mapping = parser.getSpreadsheetsMapping()
   self.assertEquals(['Person'], mapping.keys())
   person_mapping = mapping['Person']
   self.assertTrue(isinstance(person_mapping, list))
   self.assertTrue(102, len(person_mapping))
   self.assertEquals(person_mapping[0],
      ['Title', 'First Name', 'Last Name', 'Default Email Text'])
   self.assertEquals(person_mapping[1],
      ['John Doe 0', 'John', 'Doe 0', '*****@*****.**'])
Esempio n. 12
0
 def test_BigSpreadSheet_can_be_parsed(self,):
   """Test than OOoimport can parse a file with more than 40000 lines
   """
   parser = OOoParser()
   parser.openFile(open(makeFilePath('import_big_spreadsheet.ods'), 'rb'))
   mapping = parser.getSpreadsheetsMapping()
   not_ok = 1
   for spread, values in mapping.iteritems():
     self.assertEqual(len(values), 41001)
     not_ok = 0
   if not_ok:
     self.fail('Spreadsheet not read!')
Esempio n. 13
0
 def test_getSpreadSheetMapping(self):
   parser = OOoParser()
   parser.openFile(open(makeFilePath('import_data_list.ods'), 'rb'))
   mapping = parser.getSpreadsheetsMapping()
   self.assertEqual(['Person'], mapping.keys())
   person_mapping = mapping['Person']
   self.assertTrue(isinstance(person_mapping, list))
   self.assertTrue(102, len(person_mapping))
   self.assertEqual(person_mapping[0],
      ['Title', 'First Name', 'Last Name', 'Default Email Text'])
   self.assertEqual(person_mapping[1],
      ['John Doe 0', 'John', 'Doe 0', '*****@*****.**'])
Esempio n. 14
0
 def test_getSpreadSheetMappingStyle(self):
   parser = OOoParser()
   parser.openFile(open(makeFilePath('import_data_list_with_style.ods'), 'rb'))
   mapping = parser.getSpreadsheetsMapping()
   self.assertEquals(['Feuille1'], mapping.keys())
   self.assertEquals(mapping['Feuille1'][1],
                     ['a line with style'])
   self.assertEquals(mapping['Feuille1'][2],
                     ['a line with multiple styles'])
   self.assertEquals(mapping['Feuille1'][3],
                     ['http://www.erp5.org'])
   self.assertEquals(mapping['Feuille1'][4],
                     ['*****@*****.**'])
Esempio n. 15
0
 def test_getSpreadSheetMappingDataTypes(self):
     parser = OOoParser()
     parser.openFile(
         open(makeFilePath('import_data_list_data_type.ods'), 'rb'))
     mapping = parser.getSpreadsheetsMapping()
     self.assertEqual(['Feuille1'], mapping.keys())
     self.assertEqual(mapping['Feuille1'][0], ['1234.5678'])
     self.assertEqual(mapping['Feuille1'][1], ['1234.5678'])
     self.assertEqual(mapping['Feuille1'][2], ['0.1'])
     self.assertEqual(mapping['Feuille1'][3], ['2008-11-14'])
     self.assertEqual(mapping['Feuille1'][4],
                      ['2008-11-14T10:20:30'])  # supported by DateTime
     self.assertEqual(mapping['Feuille1'][5],
                      ['PT12H34M56S'])  # maybe not good, this is raw format
     self.assertEqual(mapping['Feuille1'][6], ['With note'])
Esempio n. 16
0
 def test_getSpreadSheetMappingDataTypes(self):
   parser = OOoParser()
   parser.openFile(open(makeFilePath('import_data_list_data_type.ods'), 'rb'))
   mapping = parser.getSpreadsheetsMapping()
   self.assertEquals(['Feuille1'], mapping.keys())
   self.assertEquals(mapping['Feuille1'][0],
                     ['1234.5678'])
   self.assertEquals(mapping['Feuille1'][1],
                     ['1234.5678'])
   self.assertEquals(mapping['Feuille1'][2],
                     ['0.1'])
   self.assertEquals(mapping['Feuille1'][3],
                     ['2008-11-14'])
   self.assertEquals(mapping['Feuille1'][4],
                     ['2008-11-14T10:20:30']) # supported by DateTime
   self.assertEquals(mapping['Feuille1'][5],
                     ['PT12H34M56S']) # maybe not good, this is raw format
   self.assertEquals(mapping['Feuille1'][6],
                     ['With note'])
Esempio n. 17
0
 def test_openFromString(self):
   parser = OOoParser()
   parser.openFromString(
       open(makeFilePath('import_data_list.ods'), 'rb').read())
   mapping = parser.getSpreadsheetsMapping()
   self.assertEquals(['Person'], mapping.keys())
Esempio n. 18
0
if hasattr(import_file, 'headers'):
    content_type = import_file.headers.get('Content-Type', '')
if not (content_type.startswith('application/vnd.sun.xml')
        or content_type.startswith('application/vnd.oasis.opendocument')):
    tmp_ooo = context.newContent(portal_type='OOo Document',
                                 temp_object=True,
                                 data=import_file.read(),
                                 content_type=content_type)
    tmp_ooo.convertToBaseFormat()
    _, import_file_content = tmp_ooo.convert('ods')
    parser.openFromString(str(import_file_content))
else:
    parser.openFile(import_file)

# Extract tables from the speadsheet file
spreadsheet_list = parser.getSpreadsheetsMapping(no_empty_lines=True)

for table_name in spreadsheet_list.keys():
    sheet = spreadsheet_list[table_name]
    if not sheet:
        continue
    # Get the header of the table
    columns_header = sheet[0]
    # Get the mapping to help us know the property according a cell index
    property_map = {}
    column_index = 0
    path_index = 0
    for column in columns_header:
        column_id = getIDFromString(column)
        # This give us the information that the path definition has started
        path_def_started = 'path_0' in property_map.values()
  content_type = import_file.headers.get('Content-Type', '')
if not (content_type.startswith('application/vnd.sun.xml')
   or content_type.startswith('application/vnd.oasis.opendocument')):
  from Products.ERP5Type.Document import newTempOOoDocument
  tmp_ooo = newTempOOoDocument(context, "_")
  tmp_ooo.edit(data=import_file.read(),
               content_type=content_type)
  tmp_ooo.convertToBaseFormat()
  ignored, import_file_content = tmp_ooo.convert('ods')
  parser.openFromString(str(import_file_content))
else:
  parser.openFile(import_file)

# Extract tables from the speadsheet file
filename = parser.getFilename()
spreadsheet_list = parser.getSpreadsheetsMapping(no_empty_lines=True)


for table_name in spreadsheet_list.keys():
  sheet = spreadsheet_list[table_name]
  if not sheet:
    continue
  # Get the header of the table
  columns_header = sheet[0]
  # Get the mapping to help us know the property according a cell index
  property_map = {}
  column_index = 0
  path_index = 0
  for column in columns_header:
    column_id = getIDFromString(column)
    # This give us the information that the path definition has started
Esempio n. 20
0
 def test_openFromString(self):
     parser = OOoParser()
     parser.openFromString(
         open(makeFilePath('import_data_list.ods'), 'rb').read())
     mapping = parser.getSpreadsheetsMapping()
     self.assertEquals(['Person'], mapping.keys())