def test_badoptiondict_check_f01_missing_key_die(self): with self.assertRaises(Exception) as context: kvmatch.badoptiondict_check('test_badoptiondict_check_p01_bad_key', {'no_case': True}, badoptiondict, True, dieonbadoption=True)
def chgsheet_findheader(excelDict, req_cols, xlatdict={}, optiondict={}, col_aref=None, data_only=True, debug=False): # local variables header = None # debugging if debug: print('req_cols:', req_cols) print('xlatdict:', xlatdict) print('optiondict:', optiondict) print('col_aref:', col_aref) logger.debug('req_cols:%s', req_cols) logger.debug('xlatdict:%s', xlatdict) logger.debug('optiondict:%s', optiondict) logger.debug('col_aref:%s', col_aref) # check to see if we are actually changing anyting - if not return back what was sent in if 'sheetname' in optiondict and excelDict['sheetName'] == optiondict[ 'sheetname']: logger.debug('nothing changed - return what was sent in') return excelDict # set flags col_header = False # if true - we take the first row of the file as the header no_header = False # if true - there are no headers read - we either return aref_result = False # if true - we don't return dicts, we return a list save_row = False # if true - then we append/save the XLSRow with the record start_row = 0 # if passed in - we start the search at this row (starts at 1 or greater) max_rows = 100000000 # create the list of misconfigured solutions badoptiondict = { 'startrow': 'start_row', 'startrows': 'start_row', 'start_rows': 'start_row', 'colheaders': 'col_header', 'col_headers': 'col_header', 'noheader': 'no_header', 'noheaders': 'no_header', 'no_headers': 'no_header', 'arefresult': 'aref_result', 'arefresults': 'aref_result', 'aref_results': 'aref_result', 'maxrow': 'max_rows', 'max_row': 'max_rows', 'maxrows': 'max_rows', 'saverow': 'save_row', 'saverows': 'save_row', 'save_rows': 'save_row', 'sheetName': 'sheetname', } # check what got passed in kvmatch.badoptiondict_check('kvxls.readxls_findheader', optiondict, badoptiondict, True) # pull in passed values from optiondict if 'col_header' in optiondict: col_header = optiondict['col_header'] if 'aref_result' in optiondict: aref_result = optiondict['aref_result'] if 'no_header' in optiondict: no_header = optiondict['no_header'] if 'start_row' in optiondict: start_row = optiondict[ 'start_row'] - 1 # because we are not ZERO based in the users mind if 'save_row' in optiondict: save_row = optiondict['save_row'] if 'max_rows' in optiondict: max_rows = optiondict['max_rows'] # debugging if debug: print('col_header:', col_header) print('aref_result:', aref_result) print('no_header:', no_header) print('start_row:', start_row) print('save_row:', save_row) print('optiondict:', optiondict) logger.debug('col_header:%s', col_header) logger.debug('aref_result:%s', aref_result) logger.debug('no_header:%s', no_header) logger.debug('start_row:%s', start_row) logger.debug('save_row:%s', save_row) logger.debug('optiondict:%s', optiondict) # build object that will be used for record matching p = kvmatch.MatchRow(req_cols, xlatdict, optiondict) # read in values from excelDict # determine what filetype we have here xlsfile = excelDict['xlsfile'] xlsxfiletype = excelDict['xlsxfiletype'] wb = excelDict['wb'] sheetNames = excelDict['sheetNames'] # debugging if debug: print('sheetNames:', sheetNames) logger.debug('sheetNames:%s', sheetNames) # get the sheet we are going to work with if 'sheetname' in optiondict: sheetName = optiondict['sheetname'] elif 'sheetrow' in optiondict: sheetName = sheetNames[optiondict['sheetrow']] else: sheetName = sheetNames[0] # debugging if debug: print('sheetName:', sheetName) logger.debug('sheetName:%s', sheetName) # create a workbook sheet object - using the name to get to the right sheet if xlsxfiletype: s = wb[sheetName] sheettitle = s.title sheetmaxrow = s.max_row sheetmaxcol = s.max_column sheetminrow = 0 sheetmincol = 0 else: s = wb.sheet_by_name(sheetName) sheettitle = s.name sheetmaxrow = s.nrows sheetmaxcol = s.ncols sheetminrow = 0 sheetmincol = 0 # debugging if debug: print('sheettitle:', sheettitle) print('sheetmaxrow:', sheetmaxrow) print('sheetmaxcol:', sheetmaxcol) logger.debug('sheettitle:%s', sheettitle) logger.debug('sheetmaxrow:%s', sheetmaxrow) logger.debug('sheetmaxcol:%s', sheetmaxcol) # check and see if we need to limit max row if max_rows < sheetmaxrow: sheetmaxrow = max_rows if debug: print('sheetmaxrow-changed:', sheetmaxrow) logger.debug('sheetmaxrow-changed:%s', sheetmaxrow) # ------------------------------- HEADER START ------------------------------ # define the header for the records being read in if no_header: # user said we are not to look for the header in this file # we need to subtract 1 here because we are going to increment PAST the header # in the next section - so if there is no header - we need to start at zero ( -1 + 1 later) row_header = start_row - 1 # if no col_aref - then we must force this to aref_result if not col_aref: aref_result = True if debug: print('no_header:no col_aref:set aref_result to true') logger.debug('no_header:no col_aref:set aref_result to true') # debug if debug: print('no_header:start_row:', start_row) logger.debug('no_header:start_row:%d', start_row) else: # debug if debug: print('find_header:start_row:', start_row) logger.debug('find_header:start_row:%d', start_row) # look for the header in the file for row in range(start_row, sheetmaxrow): # read in a row of data rowdata = _extract_excel_row_into_list(xlsxfiletype, s, row, sheetmincol, sheetmaxcol, debug) # user may have specified that the first row read is the header if col_header: # first row read is header - set the values header = rowdata row_header = row # debugging if debug: print('header_1strow:', header) logger.debug('header_1strow:%s', header) # break out of this loop we are done break # have not found the header yet - so look if debug: print('looking for header at row:', row) logger.debug('looking for header at row:%d', row) # Search to see if this row is the header if p.matchRowList(rowdata, debug=debug) or p.search_exceeded: # determine if we found the header if p.search_exceeded: # did not find the header raise else: # set the row_header row_header = row # found the header grab the output header = p._data_mapped # debugging if debug: print('header_found:', header) logger.debug('header_found:%s', header) # break out of the loop break # ------------------------------- HEADER END ------------------------------ # debug if debug: print('exitted header find loop') logger.debug('exitted header find loop') # user wants to define/override the column headers rather than read them in if col_aref: # debugging if debug: print('copying col_aref into header') logger.debug('copying col_aref into header') # copy over the values - and determine if we need to fill in more header values header = col_aref[:] # user defined the row definiton - make sure they passed in enough values to fill the row if len(col_aref) < sheetmaxcol - sheetmincol: # not enough entries - so we add more to the end for colcnt in range(1, sheetmaxcol - sheetmincol - len(col_aref) + 1): header.append('') # now pass the final information through remapped header = p.remappedRow(header) # debug if debug: print('col_aref:header:', header) logger.debug('col_aref:header:%s', header) # ------------------------------- OBJECT DEFINITION ------------------------------ excelDict = { 'xlsfile': xlsfile, 'xlsxfiletype': xlsxfiletype, 'wb': wb, 'sheetNames': sheetNames, 'sheetName': sheetName, 's': s, 'sheettitle': sheettitle, 'sheetmaxrow': sheetmaxrow, 'sheetmaxcol': sheetmaxcol, 'sheetminrow': sheetminrow, 'sheetmincol': sheetmincol, 'row_header': row_header, 'header': header, 'start_row': start_row, } return excelDict
def test_badoptiondict_check_f01_no_bad_key(self): self.assertEqual( len( kvmatch.badoptiondict_check( 'test_badoptiondict_check_f01_no_bad_key', {'nocase': True}, badoptiondict, True)), 0)
def readcsv2list_findheader(csvfile, req_cols, xlatdict={}, optiondict={}, col_aref=None, debug=False): # local variables results = [] header = None # debugging if debug: print('req_cols:', req_cols) if debug: print('xlatdict:', xlatdict) if debug: print('optiondict:', optiondict) if debug: print('col_aref:', col_aref) logger.debug('req_cols:%s', req_cols) logger.debug('xlatdict:%s', xlatdict) logger.debug('optiondict:%s', optiondict) logger.debug('col_aref:%s', col_aref) # check type if col_aref and not isinstance(col_aref, list): logger.error('col_aref must be list:%s', col_aref) raise Exception('col_aref not a list') # set flags col_header = False # if true - we take the first row of the file as the header no_header = False # if true - there are no headers read - we either return aref_result = False # if true - we don't return dicts, we return a list save_row = False # if true - then we append/save the XLSRow with the record start_row = 0 # if passed in - we start the search at this row (starts at 1 or greater) # create the list of misconfigured solutions badoptiondict = { 'startrow': 'start_row', 'startrows': 'start_row', 'start_rows': 'start_row', 'colheaders': 'col_header', 'col_headers': 'col_header', 'noheader': 'no_header', 'noheaders': 'no_header', 'no_headers': 'no_header', 'arefresult': 'aref_result', 'arefresults': 'aref_result', 'aref_results': 'aref_result', 'saverow': 'save_row', 'saverows': 'save_row', 'save_rows': 'save_row', } # check what got passed in kvmatch.badoptiondict_check('kvcsv.readcsv2list_findheader', optiondict, badoptiondict, True) # pull in passed values from optiondict if 'col_header' in optiondict: col_header = optiondict['col_header'] if 'aref_result' in optiondict: aref_result = optiondict['aref_result'] if 'no_header' in optiondict: no_header = optiondict['no_header'] if 'start_row' in optiondict: start_row = optiondict[ 'start_row'] - 1 # because we are not ZERO based in the users mind if 'save_row' in optiondict: save_row = optiondict['save_row'] # build object that will be used for record matching p = kvmatch.MatchRow(req_cols, xlatdict, optiondict) # get the file opened csv_file = open(csvfile, mode='r') reader = csv.reader(csv_file) # ------------------------------- HEADER START ------------------------------ # define the header for the records being read in if no_header: # user said we are not to look for the header in this file # we need to subtract 1 here because we are going to increment PAST the header # in the next section - so if there is no header - we need to start at zero ( -1 + 1 later) row_header = start_row - 1 # if no col_aref - then we must force this to aref_result if not col_aref: aref_result = True if debug: print('no_header:no col_aref:set aref_result to true') logger.debug('no_header:no col_aref:set aref_result to true') # debug if debug: print('no_header:start_row:', start_row) logger.debug('no_header:start_row:%d', start_row) elif col_header: # extract the header as the first line in the file header = reader.__next__() row_header = 0 if debug: print('col_header:header_1strow:', header) logger.debug('col_header:header_1strow:%s', header) else: # debug if debug: print('find_header:start_row:', start_row) logger.debug('find_header:start_row:%d', start_row) # get to the start_row record for next_row in range(0, start_row): line = reader.__next__() if debug: print('skipping line:', line) logger.debug('skipping line:%s', line) # counting row just to provide feedback row = start_row # now start the search for the header for rowdata in reader: # increment row row += 1 # have not found the header yet - so look if debug: print('looking for header at row:', row) logger.debug('looking for header at row:%d', row) # Search to see if this row is the header if p.matchRowList(rowdata, debug=debug) or p.search_exceeded: # determine if we found the header # set the row_header row_header = row # found the header grab the output header = p._data_mapped # debugging if debug: print('header_found:', header) logger.debug('header_found:%s', header) # break out of the loop break elif p.search_exceeded: # close the file we opened csv_file.close() # did not find the header - raise error raise Exception('header not found') # ------------------------------- HEADER END ------------------------------ # debug if debug: print('exitted header find loop') logger.debug('exitted header find loop') # user wants to define/override the column headers rather than read them in if col_aref: # debugging if debug: print('copying col_aref into header') logger.debug('copying col_aref into header') # copy over the values - and determine if we need to fill in more header values header = col_aref[:] # user defined the row definiton - make sure they passed in enough values to fill the row sheetmaxcol = 0 sheetmincol = 0 if False: if len(col_aref) < sheetmaxcol - sheetmincol: # not enough entries - so we add more to the end for colcnt in range( 1, sheetmaxcol - sheetmincol - len(col_aref) + 1): header.append('') # now pass the final information through remapped header = p.remappedRow(header) # debug if debug: print('col_aref:header:', header) logger.debug('col_aref:header:%s', header) # ------------------------------- RECORDS START ------------------------------ # continue processing this file for rowdata in reader: if debug: print('rowdata:', rowdata) logger.debug('rowdata:%s', rowdata) # determine what we are returning if aref_result: # we want to return the data we read rowdict = rowdata if debug: print('saving as array') logger.debug('saving as array') # optionally add the XLSRow attribute to this dictionary (not here right now if save_row: rowdict.append(row + 1) if debug: print('append row to record') logger.debug('append row to record') else: # we found the header so now build up the records rowdict = dict(zip(header, rowdata)) if debug: print('saving as dict') logger.debug('saving as dict') # optionally add the XLSRow attribute to this dictionary (not here right now if save_row: rowdict['XLSRow'] = row + 1 if debug: print('add column XLSRow with row to record') logger.debug('add column XLSRow with row to record') # add this dictionary to the results results.append(rowdict) if debug: print('append rowdict to results') logger.debug('append rowdict to results') # ------------------------------- RECORDS END ------------------------------ # close the file we are reading csv_file.close() # debugging # if debug: print('results:', results) # return the results return results