Beispiel #1
0
 def test_badoptiondict_check_f01_missing_key_die(self):
     with self.assertRaises(Exception) as context:
         kvmatch.badoptiondict_check('test_badoptiondict_check_p01_bad_key',
                                     {'no_case': True},
                                     badoptiondict,
                                     True,
                                     dieonbadoption=True)
Beispiel #2
0
def chgsheet_findheader(excelDict,
                        req_cols,
                        xlatdict={},
                        optiondict={},
                        col_aref=None,
                        data_only=True,
                        debug=False):

    # local variables
    header = None

    # debugging
    if debug:
        print('req_cols:', req_cols)
        print('xlatdict:', xlatdict)
        print('optiondict:', optiondict)
        print('col_aref:', col_aref)
    logger.debug('req_cols:%s', req_cols)
    logger.debug('xlatdict:%s', xlatdict)
    logger.debug('optiondict:%s', optiondict)
    logger.debug('col_aref:%s', col_aref)

    # check to see if we are actually changing anyting - if not return back what was sent in
    if 'sheetname' in optiondict and excelDict['sheetName'] == optiondict[
            'sheetname']:
        logger.debug('nothing changed - return what was sent in')
        return excelDict

    # set flags
    col_header = False  # if true - we take the first row of the file as the header
    no_header = False  # if true - there are no headers read - we either return
    aref_result = False  # if true - we don't return dicts, we return a list
    save_row = False  # if true - then we append/save the XLSRow with the record

    start_row = 0  # if passed in - we start the search at this row (starts at 1 or greater)

    max_rows = 100000000

    # create the list of misconfigured solutions
    badoptiondict = {
        'startrow': 'start_row',
        'startrows': 'start_row',
        'start_rows': 'start_row',
        'colheaders': 'col_header',
        'col_headers': 'col_header',
        'noheader': 'no_header',
        'noheaders': 'no_header',
        'no_headers': 'no_header',
        'arefresult': 'aref_result',
        'arefresults': 'aref_result',
        'aref_results': 'aref_result',
        'maxrow': 'max_rows',
        'max_row': 'max_rows',
        'maxrows': 'max_rows',
        'saverow': 'save_row',
        'saverows': 'save_row',
        'save_rows': 'save_row',
        'sheetName': 'sheetname',
    }

    # check what got passed in
    kvmatch.badoptiondict_check('kvxls.readxls_findheader', optiondict,
                                badoptiondict, True)

    # pull in passed values from optiondict
    if 'col_header' in optiondict: col_header = optiondict['col_header']
    if 'aref_result' in optiondict: aref_result = optiondict['aref_result']
    if 'no_header' in optiondict: no_header = optiondict['no_header']
    if 'start_row' in optiondict:
        start_row = optiondict[
            'start_row'] - 1  # because we are not ZERO based in the users mind
    if 'save_row' in optiondict: save_row = optiondict['save_row']
    if 'max_rows' in optiondict: max_rows = optiondict['max_rows']

    # debugging
    if debug:
        print('col_header:', col_header)
        print('aref_result:', aref_result)
        print('no_header:', no_header)
        print('start_row:', start_row)
        print('save_row:', save_row)
        print('optiondict:', optiondict)
    logger.debug('col_header:%s', col_header)
    logger.debug('aref_result:%s', aref_result)
    logger.debug('no_header:%s', no_header)
    logger.debug('start_row:%s', start_row)
    logger.debug('save_row:%s', save_row)
    logger.debug('optiondict:%s', optiondict)

    # build object that will be used for record matching
    p = kvmatch.MatchRow(req_cols, xlatdict, optiondict)

    # read in values from excelDict
    # determine what filetype we have here
    xlsfile = excelDict['xlsfile']
    xlsxfiletype = excelDict['xlsxfiletype']
    wb = excelDict['wb']
    sheetNames = excelDict['sheetNames']

    # debugging
    if debug: print('sheetNames:', sheetNames)
    logger.debug('sheetNames:%s', sheetNames)

    # get the sheet we are going to work with
    if 'sheetname' in optiondict:
        sheetName = optiondict['sheetname']
    elif 'sheetrow' in optiondict:
        sheetName = sheetNames[optiondict['sheetrow']]
    else:
        sheetName = sheetNames[0]

    # debugging
    if debug: print('sheetName:', sheetName)
    logger.debug('sheetName:%s', sheetName)

    # create a workbook sheet object - using the name to get to the right sheet
    if xlsxfiletype:
        s = wb[sheetName]
        sheettitle = s.title
        sheetmaxrow = s.max_row
        sheetmaxcol = s.max_column
        sheetminrow = 0
        sheetmincol = 0
    else:
        s = wb.sheet_by_name(sheetName)
        sheettitle = s.name
        sheetmaxrow = s.nrows
        sheetmaxcol = s.ncols
        sheetminrow = 0
        sheetmincol = 0

    # debugging
    if debug:
        print('sheettitle:', sheettitle)
        print('sheetmaxrow:', sheetmaxrow)
        print('sheetmaxcol:', sheetmaxcol)
    logger.debug('sheettitle:%s', sheettitle)
    logger.debug('sheetmaxrow:%s', sheetmaxrow)
    logger.debug('sheetmaxcol:%s', sheetmaxcol)

    # check and see if we need to limit max row
    if max_rows < sheetmaxrow:
        sheetmaxrow = max_rows
        if debug:
            print('sheetmaxrow-changed:', sheetmaxrow)
            logger.debug('sheetmaxrow-changed:%s', sheetmaxrow)

    # ------------------------------- HEADER START ------------------------------

    # define the header for the records being read in
    if no_header:
        # user said we are not to look for the header in this file
        # we need to subtract 1 here because we are going to increment PAST the header
        # in the next section - so if there is no header - we need to start at zero ( -1 + 1 later)
        row_header = start_row - 1

        # if no col_aref - then we must force this to aref_result
        if not col_aref:
            aref_result = True
            if debug: print('no_header:no col_aref:set aref_result to true')
            logger.debug('no_header:no col_aref:set aref_result to true')

        # debug
        if debug: print('no_header:start_row:', start_row)
        logger.debug('no_header:start_row:%d', start_row)

    else:
        # debug
        if debug: print('find_header:start_row:', start_row)
        logger.debug('find_header:start_row:%d', start_row)

        # look for the header in the file
        for row in range(start_row, sheetmaxrow):
            # read in a row of data
            rowdata = _extract_excel_row_into_list(xlsxfiletype, s, row,
                                                   sheetmincol, sheetmaxcol,
                                                   debug)

            # user may have specified that the first row read is the header
            if col_header:
                # first row read is header - set the values
                header = rowdata
                row_header = row
                # debugging
                if debug: print('header_1strow:', header)
                logger.debug('header_1strow:%s', header)
                # break out of this loop we are done
                break

            # have not found the header yet - so look
            if debug: print('looking for header at row:', row)
            logger.debug('looking for header at row:%d', row)

            # Search to see if this row is the header
            if p.matchRowList(rowdata, debug=debug) or p.search_exceeded:
                # determine if we found the header
                if p.search_exceeded:
                    # did not find the header
                    raise
                else:
                    # set the row_header
                    row_header = row
                    # found the header grab the output
                    header = p._data_mapped
                    # debugging
                    if debug: print('header_found:', header)
                    logger.debug('header_found:%s', header)
                    # break out of the loop
                    break

    # ------------------------------- HEADER END ------------------------------

    # debug
    if debug: print('exitted header find loop')
    logger.debug('exitted header find loop')

    # user wants to define/override the column headers rather than read them in
    if col_aref:
        # debugging
        if debug: print('copying col_aref into header')
        logger.debug('copying col_aref into header')
        # copy over the values - and determine if we need to fill in more header values
        header = col_aref[:]
        # user defined the row definiton - make sure they passed in enough values to fill the row
        if len(col_aref) < sheetmaxcol - sheetmincol:
            # not enough entries - so we add more to the end
            for colcnt in range(1,
                                sheetmaxcol - sheetmincol - len(col_aref) + 1):
                header.append('')

        # now pass the final information through remapped
        header = p.remappedRow(header)
        # debug
        if debug: print('col_aref:header:', header)
        logger.debug('col_aref:header:%s', header)

    # ------------------------------- OBJECT DEFINITION ------------------------------
    excelDict = {
        'xlsfile': xlsfile,
        'xlsxfiletype': xlsxfiletype,
        'wb': wb,
        'sheetNames': sheetNames,
        'sheetName': sheetName,
        's': s,
        'sheettitle': sheettitle,
        'sheetmaxrow': sheetmaxrow,
        'sheetmaxcol': sheetmaxcol,
        'sheetminrow': sheetminrow,
        'sheetmincol': sheetmincol,
        'row_header': row_header,
        'header': header,
        'start_row': start_row,
    }

    return excelDict
Beispiel #3
0
 def test_badoptiondict_check_f01_no_bad_key(self):
     self.assertEqual(
         len(
             kvmatch.badoptiondict_check(
                 'test_badoptiondict_check_f01_no_bad_key',
                 {'nocase': True}, badoptiondict, True)), 0)
Beispiel #4
0
def readcsv2list_findheader(csvfile,
                            req_cols,
                            xlatdict={},
                            optiondict={},
                            col_aref=None,
                            debug=False):
    # local variables
    results = []
    header = None

    # debugging
    if debug: print('req_cols:', req_cols)
    if debug: print('xlatdict:', xlatdict)
    if debug: print('optiondict:', optiondict)
    if debug: print('col_aref:', col_aref)
    logger.debug('req_cols:%s', req_cols)
    logger.debug('xlatdict:%s', xlatdict)
    logger.debug('optiondict:%s', optiondict)
    logger.debug('col_aref:%s', col_aref)

    # check type
    if col_aref and not isinstance(col_aref, list):
        logger.error('col_aref must be list:%s', col_aref)
        raise Exception('col_aref not a list')

    # set flags
    col_header = False  # if true - we take the first row of the file as the header
    no_header = False  # if true - there are no headers read - we either return
    aref_result = False  # if true - we don't return dicts, we return a list
    save_row = False  # if true - then we append/save the XLSRow with the record

    start_row = 0  # if passed in - we start the search at this row (starts at 1 or greater)

    # create the list of misconfigured solutions
    badoptiondict = {
        'startrow': 'start_row',
        'startrows': 'start_row',
        'start_rows': 'start_row',
        'colheaders': 'col_header',
        'col_headers': 'col_header',
        'noheader': 'no_header',
        'noheaders': 'no_header',
        'no_headers': 'no_header',
        'arefresult': 'aref_result',
        'arefresults': 'aref_result',
        'aref_results': 'aref_result',
        'saverow': 'save_row',
        'saverows': 'save_row',
        'save_rows': 'save_row',
    }

    # check what got passed in
    kvmatch.badoptiondict_check('kvcsv.readcsv2list_findheader', optiondict,
                                badoptiondict, True)

    # pull in passed values from optiondict
    if 'col_header' in optiondict: col_header = optiondict['col_header']
    if 'aref_result' in optiondict: aref_result = optiondict['aref_result']
    if 'no_header' in optiondict: no_header = optiondict['no_header']
    if 'start_row' in optiondict:
        start_row = optiondict[
            'start_row'] - 1  # because we are not ZERO based in the users mind
    if 'save_row' in optiondict: save_row = optiondict['save_row']

    # build object that will be used for record matching
    p = kvmatch.MatchRow(req_cols, xlatdict, optiondict)

    # get the file opened
    csv_file = open(csvfile, mode='r')
    reader = csv.reader(csv_file)

    # ------------------------------- HEADER START ------------------------------

    # define the header for the records being read in
    if no_header:
        # user said we are not to look for the header in this file
        # we need to subtract 1 here because we are going to increment PAST the header
        # in the next section - so if there is no header - we need to start at zero ( -1 + 1 later)
        row_header = start_row - 1

        # if no col_aref - then we must force this to aref_result
        if not col_aref:
            aref_result = True
            if debug: print('no_header:no col_aref:set aref_result to true')
            logger.debug('no_header:no col_aref:set aref_result to true')

        # debug
        if debug: print('no_header:start_row:', start_row)
        logger.debug('no_header:start_row:%d', start_row)

    elif col_header:
        # extract the header as the first line in the file
        header = reader.__next__()
        row_header = 0
        if debug: print('col_header:header_1strow:', header)
        logger.debug('col_header:header_1strow:%s', header)
    else:
        # debug
        if debug: print('find_header:start_row:', start_row)
        logger.debug('find_header:start_row:%d', start_row)

        # get to the start_row record
        for next_row in range(0, start_row):
            line = reader.__next__()
            if debug: print('skipping line:', line)
            logger.debug('skipping line:%s', line)

        # counting row just to provide feedback
        row = start_row

        # now start the search for the header
        for rowdata in reader:
            # increment row
            row += 1

            # have not found the header yet - so look
            if debug: print('looking for header at row:', row)
            logger.debug('looking for header at row:%d', row)

            # Search to see if this row is the header
            if p.matchRowList(rowdata, debug=debug) or p.search_exceeded:
                # determine if we found the header
                # set the row_header
                row_header = row
                # found the header grab the output
                header = p._data_mapped
                # debugging
                if debug: print('header_found:', header)
                logger.debug('header_found:%s', header)
                # break out of the loop
                break
            elif p.search_exceeded:
                # close the file we opened
                csv_file.close()
                # did not find the header - raise error
                raise Exception('header not found')

    # ------------------------------- HEADER END ------------------------------

    # debug
    if debug: print('exitted header find loop')
    logger.debug('exitted header find loop')

    # user wants to define/override the column headers rather than read them in
    if col_aref:
        # debugging
        if debug: print('copying col_aref into header')
        logger.debug('copying col_aref into header')
        # copy over the values - and determine if we need to fill in more header values
        header = col_aref[:]
        # user defined the row definiton - make sure they passed in enough values to fill the row
        sheetmaxcol = 0
        sheetmincol = 0
        if False:
            if len(col_aref) < sheetmaxcol - sheetmincol:
                # not enough entries - so we add more to the end
                for colcnt in range(
                        1, sheetmaxcol - sheetmincol - len(col_aref) + 1):
                    header.append('')

        # now pass the final information through remapped
        header = p.remappedRow(header)
        # debug
        if debug: print('col_aref:header:', header)
        logger.debug('col_aref:header:%s', header)

    # ------------------------------- RECORDS START ------------------------------

    # continue processing this file
    for rowdata in reader:

        if debug: print('rowdata:', rowdata)
        logger.debug('rowdata:%s', rowdata)

        # determine what we are returning
        if aref_result:

            # we want to return the data we read
            rowdict = rowdata
            if debug: print('saving as array')
            logger.debug('saving as array')

            # optionally add the XLSRow attribute to this dictionary (not here right now
            if save_row:
                rowdict.append(row + 1)
                if debug: print('append row to record')
                logger.debug('append row to record')

        else:
            # we found the header so now build up the records
            rowdict = dict(zip(header, rowdata))
            if debug: print('saving as dict')
            logger.debug('saving as dict')

            # optionally add the XLSRow attribute to this dictionary (not here right now
            if save_row:
                rowdict['XLSRow'] = row + 1
                if debug: print('add column XLSRow with row to record')
                logger.debug('add column XLSRow with row to record')

        # add this dictionary to the results
        results.append(rowdict)
        if debug: print('append rowdict to results')
        logger.debug('append rowdict to results')

    # ------------------------------- RECORDS END ------------------------------

    # close the file we are reading
    csv_file.close()

    # debugging
    # if debug: print('results:', results)

    # return the results
    return results