Ejemplo n.º 1
0
    def from_xls(self, content, root='objects',**kwargs):
        
        if isinstance(content, six.string_types):
            wb = xlrd.open_workbook(file_contents=content)
        else:
            wb = xlrd.open_workbook(cStringIO.StringIO(content))
            
        if wb.nsheets > 1:
            logger.warn('only first page of workbooks supported')
         
        # TODO: if root is specified, then get the sheet by name
        sheet = wb.sheet_by_index(0)
         
        if sheet.name.lower() in ['error', 'errors']:
            return xlsutils.sheet_rows(sheet)
             
        # because workbooks are treated like sets of csv sheets, now convert
        # as if this were a csv sheet
        data = csvutils.from_csv_iterate(
            xlsutils.sheet_rows(sheet),
            list_delimiter=LIST_DELIMITER_XLS, 
            list_keys=kwargs.get('list_keys', None))
 
        if root:
            return { root: data }
        else:
            return data
Ejemplo n.º 2
0
def read_xlsx(input_file):

    contents = input_file.read()
    logger.info('input_file read: %d', len(contents))
    # with open('/tmp/test_read1.log','w') as logfile:
    #
    #     wb = xlrd.open_workbook(file_contents=contents, logfile=logfile, verbosity=10)
    wb = xlrd.open_workbook(file_contents=contents)
    sheets = xlsutils.workbook_sheets(wb)
    logger.info('sheets: %r, %d', sheets, wb.nsheets)
    plate_matrices = []
    errors = {}

    sheet_count = 0
    for sheet in sheets:
        sheet_count += 1
        logger.info('read sheet: %d, %r', sheet_count, sheet)
        (sheet_plate_matrices,
         sheet_errors) = read_rows(xlsutils.sheet_rows(sheet))
        logger.info('for sheet: %r, %d matrices read', sheet.name,
                    len(sheet_plate_matrices))
        if sheet_plate_matrices:
            plate_matrices += sheet_plate_matrices
            logger.info('plate_matrices: %r', len(plate_matrices))
        if sheet_errors:
            logger.warn('sheet errors: %r: %r', sheet.name, sheet_errors)
            errors[sheet.name] = sheet_errors

    if not sheet_count:
        errors['input_file'] = 'No sheets were read'
    elif not plate_matrices:
        errors['input_sheets'] = 'No matrices were found'
    return (plate_matrices, errors)
Ejemplo n.º 3
0
    def from_xls(self,
                 content,
                 root='objects',
                 list_keys=None,
                 list_delimiters=None,
                 **kwargs):

        logger.info('deserialize from_xls...')
        if isinstance(content, six.string_types):
            wb = xlrd.open_workbook(file_contents=content)
        else:
            wb = xlrd.open_workbook(cStringIO.StringIO(content))

        if wb.nsheets > 1:
            # TODO: concatentate all sheets?
            logger.warn('only first page of workbooks is supported')

        logger.info('read first sheet ...')
        # TODO: if root is specified, then get the sheet by name
        sheet = wb.sheet_by_index(0)

        if sheet.name.lower() in ['error', 'errors']:
            return xlsutils.sheet_rows(sheet)

        list_delimiters = list_delimiters or [
            LIST_DELIMITER_XLS,
        ]

        # Workbooks are treated like sets of csv sheets
        data = csvutils.input_spreadsheet_reader(
            xlsutils.sheet_rows(sheet),
            list_delimiters=list_delimiters,
            list_keys=list_keys)

        # # TODO: use csv_generator
        # data = csvutils.input_spreadsheet_reader(
        #     xlsutils.sheet_rows(sheet),
        #     list_delimiters=list_delimiters,
        #     list_keys=list_keys)

        if root:
            return {root: data}
        else:
            return data
Ejemplo n.º 4
0
    def from_xls(self, content, root='objects', list_keys=None, 
            list_delimiters=None, **kwargs):
        
        logger.info('deserialize from_xls...')
        if isinstance(content, six.string_types):
            wb = xlrd.open_workbook(file_contents=content)
        else:
            wb = xlrd.open_workbook(cStringIO.StringIO(content))
            
        if wb.nsheets > 1:
            # TODO: concatentate all sheets?
            logger.warn('only first page of workbooks is supported')
        
        logger.info('read first sheet ...') 
        # TODO: if root is specified, then get the sheet by name
        sheet = wb.sheet_by_index(0)
         
        if sheet.name.lower() in ['error', 'errors']:
            return xlsutils.sheet_rows(sheet)
             
        list_delimiters = list_delimiters or [LIST_DELIMITER_XLS,]
        
        # Workbooks are treated like sets of csv sheets
        data = csvutils.input_spreadsheet_reader(
            xlsutils.sheet_rows(sheet), 
            list_delimiters=list_delimiters, 
            list_keys=list_keys)
        
        # # TODO: use csv_generator
        # data = csvutils.input_spreadsheet_reader(
        #     xlsutils.sheet_rows(sheet),
        #     list_delimiters=list_delimiters, 
        #     list_keys=list_keys)
 
        if root:
            return { root: data }
        else:
            return data
Ejemplo n.º 5
0
def parse_result_values(parsed_columns, sheets):
    '''
    Parse the Screen Result input file format into a valid API input format:
        - Create a row generator from the result value sheets
    '''
    
    logger.info('parse_result_values...')
    well_ids = set()
    parse_error = ParseError(errors={})
    def add_parse_error(sheet_name, validation_error):
        sheet_name = str(sheet_name)
        if not sheet_name in parse_error.errors:
            parse_error.errors[sheet_name] = {}
        parse_error.errors[sheet_name].update(validation_error.errors)
    
    for sheet in sheets:
        logger.info('parse result values sheet: %r...', sheet.name)
    
        rows = sheet_rows(sheet)
        try:
            header_row = result_value_field_mapper(rows.next(), parsed_columns)
        except ValidationError, e:
            logger.exception('error: %r', e)
            add_parse_error(sheet.name, e)
            continue
        logger.info('output result values...')
        for i,row in enumerate(rows):
            try:
                result = parse_result_row(
                    i,parsed_columns,dict(zip(header_row,row)))
                if DEBUG_IMPORTER:
                    logger.info('parsed row: %d: %r',i,  result)
                if result['well_id'] in well_ids:
                    raise ParseError(
                        key=result['well_id'],
                        msg='duplicate')
                well_ids.add(result['well_id'])
                yield result
            except ValidationError,e:
                logger.exception('parse error: %r', e)
                add_parse_error(sheet.name, e)
Ejemplo n.º 6
0
def parse_result_values(parsed_columns, sheets):
    logger.info('parse result values...')
    well_ids = set()
    parse_error = None
    for sheet in sheets:
        logger.info('parse result values sheet: %r...', sheet.name)
    
        rows = sheet_rows(sheet)
        try:
            header_row = result_value_field_mapper(rows.next(), parsed_columns)
        except ValidationError, e:
            logger.info('error: %r', e)
            if not parse_error:
                parse_error = ParseError(errors={})
            if not sheet.name in parse_error.errors:
                parse_error.errors[str(sheet.name)] = {}
            parse_error.errors[sheet.name] = e.errors
            continue
        logger.info('output result values')
        for i,row in enumerate(rows):
            try:
                result = parse_result_row(
                    i,parsed_columns,dict(zip(header_row,row)))
                if result['well_id'] in well_ids:
                    raise ParseError(
                        key=result['well_id'],
                        msg='duplicate')
                well_ids.add(result['well_id'])
                yield result
            except ValidationError,e:
                logger.exception('error: %r', e)
                if not parse_error:
                    parse_error = ParseError(errors={})
                if not sheet.name in parse_error.errors:
                    parse_error.errors[sheet.name] = {}
                parse_error.errors[sheet.name].update(e.errors)