def from_xls(self, content, root='objects',**kwargs): if isinstance(content, six.string_types): wb = xlrd.open_workbook(file_contents=content) else: wb = xlrd.open_workbook(cStringIO.StringIO(content)) if wb.nsheets > 1: logger.warn('only first page of workbooks supported') # TODO: if root is specified, then get the sheet by name sheet = wb.sheet_by_index(0) if sheet.name.lower() in ['error', 'errors']: return xlsutils.sheet_rows(sheet) # because workbooks are treated like sets of csv sheets, now convert # as if this were a csv sheet data = csvutils.from_csv_iterate( xlsutils.sheet_rows(sheet), list_delimiter=LIST_DELIMITER_XLS, list_keys=kwargs.get('list_keys', None)) if root: return { root: data } else: return data
def read_xlsx(input_file): contents = input_file.read() logger.info('input_file read: %d', len(contents)) # with open('/tmp/test_read1.log','w') as logfile: # # wb = xlrd.open_workbook(file_contents=contents, logfile=logfile, verbosity=10) wb = xlrd.open_workbook(file_contents=contents) sheets = xlsutils.workbook_sheets(wb) logger.info('sheets: %r, %d', sheets, wb.nsheets) plate_matrices = [] errors = {} sheet_count = 0 for sheet in sheets: sheet_count += 1 logger.info('read sheet: %d, %r', sheet_count, sheet) (sheet_plate_matrices, sheet_errors) = read_rows(xlsutils.sheet_rows(sheet)) logger.info('for sheet: %r, %d matrices read', sheet.name, len(sheet_plate_matrices)) if sheet_plate_matrices: plate_matrices += sheet_plate_matrices logger.info('plate_matrices: %r', len(plate_matrices)) if sheet_errors: logger.warn('sheet errors: %r: %r', sheet.name, sheet_errors) errors[sheet.name] = sheet_errors if not sheet_count: errors['input_file'] = 'No sheets were read' elif not plate_matrices: errors['input_sheets'] = 'No matrices were found' return (plate_matrices, errors)
def from_xls(self, content, root='objects', list_keys=None, list_delimiters=None, **kwargs): logger.info('deserialize from_xls...') if isinstance(content, six.string_types): wb = xlrd.open_workbook(file_contents=content) else: wb = xlrd.open_workbook(cStringIO.StringIO(content)) if wb.nsheets > 1: # TODO: concatentate all sheets? logger.warn('only first page of workbooks is supported') logger.info('read first sheet ...') # TODO: if root is specified, then get the sheet by name sheet = wb.sheet_by_index(0) if sheet.name.lower() in ['error', 'errors']: return xlsutils.sheet_rows(sheet) list_delimiters = list_delimiters or [ LIST_DELIMITER_XLS, ] # Workbooks are treated like sets of csv sheets data = csvutils.input_spreadsheet_reader( xlsutils.sheet_rows(sheet), list_delimiters=list_delimiters, list_keys=list_keys) # # TODO: use csv_generator # data = csvutils.input_spreadsheet_reader( # xlsutils.sheet_rows(sheet), # list_delimiters=list_delimiters, # list_keys=list_keys) if root: return {root: data} else: return data
def from_xls(self, content, root='objects', list_keys=None, list_delimiters=None, **kwargs): logger.info('deserialize from_xls...') if isinstance(content, six.string_types): wb = xlrd.open_workbook(file_contents=content) else: wb = xlrd.open_workbook(cStringIO.StringIO(content)) if wb.nsheets > 1: # TODO: concatentate all sheets? logger.warn('only first page of workbooks is supported') logger.info('read first sheet ...') # TODO: if root is specified, then get the sheet by name sheet = wb.sheet_by_index(0) if sheet.name.lower() in ['error', 'errors']: return xlsutils.sheet_rows(sheet) list_delimiters = list_delimiters or [LIST_DELIMITER_XLS,] # Workbooks are treated like sets of csv sheets data = csvutils.input_spreadsheet_reader( xlsutils.sheet_rows(sheet), list_delimiters=list_delimiters, list_keys=list_keys) # # TODO: use csv_generator # data = csvutils.input_spreadsheet_reader( # xlsutils.sheet_rows(sheet), # list_delimiters=list_delimiters, # list_keys=list_keys) if root: return { root: data } else: return data
def parse_result_values(parsed_columns, sheets): ''' Parse the Screen Result input file format into a valid API input format: - Create a row generator from the result value sheets ''' logger.info('parse_result_values...') well_ids = set() parse_error = ParseError(errors={}) def add_parse_error(sheet_name, validation_error): sheet_name = str(sheet_name) if not sheet_name in parse_error.errors: parse_error.errors[sheet_name] = {} parse_error.errors[sheet_name].update(validation_error.errors) for sheet in sheets: logger.info('parse result values sheet: %r...', sheet.name) rows = sheet_rows(sheet) try: header_row = result_value_field_mapper(rows.next(), parsed_columns) except ValidationError, e: logger.exception('error: %r', e) add_parse_error(sheet.name, e) continue logger.info('output result values...') for i,row in enumerate(rows): try: result = parse_result_row( i,parsed_columns,dict(zip(header_row,row))) if DEBUG_IMPORTER: logger.info('parsed row: %d: %r',i, result) if result['well_id'] in well_ids: raise ParseError( key=result['well_id'], msg='duplicate') well_ids.add(result['well_id']) yield result except ValidationError,e: logger.exception('parse error: %r', e) add_parse_error(sheet.name, e)
def parse_result_values(parsed_columns, sheets): logger.info('parse result values...') well_ids = set() parse_error = None for sheet in sheets: logger.info('parse result values sheet: %r...', sheet.name) rows = sheet_rows(sheet) try: header_row = result_value_field_mapper(rows.next(), parsed_columns) except ValidationError, e: logger.info('error: %r', e) if not parse_error: parse_error = ParseError(errors={}) if not sheet.name in parse_error.errors: parse_error.errors[str(sheet.name)] = {} parse_error.errors[sheet.name] = e.errors continue logger.info('output result values') for i,row in enumerate(rows): try: result = parse_result_row( i,parsed_columns,dict(zip(header_row,row))) if result['well_id'] in well_ids: raise ParseError( key=result['well_id'], msg='duplicate') well_ids.add(result['well_id']) yield result except ValidationError,e: logger.exception('error: %r', e) if not parse_error: parse_error = ParseError(errors={}) if not sheet.name in parse_error.errors: parse_error.errors[sheet.name] = {} parse_error.errors[sheet.name].update(e.errors)