def test_init_p01_init_optiondict_dieonbadoption(self): p = kvmatch.MatchRow(['Col1'], optiondict={ 'dieonbadoption': True, 'no_warnings': True }) self.assertEqual(p.dieonbadoption, True)
def test_matchRowList_f01_longer_not_unique(self): p = kvmatch.MatchRow(record, optiondict={'unique_column': True}) tempdata = record[:] + record[:1] self.assertFalse(p.matchRowList(tempdata, debug=False)) self.assertTrue(p.search_failed) self.assertTrue( p.error_msg.startswith('Row found with duplicate column headers:'))
def test_init_f01_init_optiondict_warning_invalid_optiondict_die(self): tempdict = dict(badoptiondict) tempdict['no_warnings'] = True tempdict['dieonbadoption'] = True #print('tempdict:', tempdict) with self.assertRaises(Exception) as context: p = kvmatch.MatchRow(['Col1'], optiondict=tempdict)
def test_init_p01_init_optiondict_warning(self): self.assertIsInstance( kvmatch.MatchRow(['Col1'], optiondict={ 'no_case': True, 'no_warnings': True }), kvmatch.MatchRow)
def test_init_p01_init_optiondict_nocase(self): p = kvmatch.MatchRow(['Col1'], optiondict={ 'nocase': True, 'no_warnings': True }) self.assertEqual(p.nocase, True)
def test_init_p01_init_optiondict_unique_column(self): p = kvmatch.MatchRow(['Col1'], optiondict={ 'unique_column': True, 'no_warnings': True }) self.assertEqual(p.unique_column, True)
def test_init_p01_init_optiondict_maxrows(self): p = kvmatch.MatchRow(['Col1'], optiondict={ 'maxrows': 2, 'no_warnings': True }) self.assertEqual(p.maxrows, 2)
def test_init_p02_init_optiondict_xlat_nocase(self): p = kvmatch.MatchRow(['Col1'], xlat_dict, optiondict={'nocase': True}) xlat_dict_lower = {x.lower(): y for (x, y) in xlat_dict.items()} self.assertEqual(p._xlatdict, xlat_dict_lower) xlat_dict_lower = { x.lower(): y.lower() for (x, y) in xlat_dict.items() } self.assertEqual(p._xlatdict_lower, xlat_dict_lower)
def test_init_p02_init_optiondict_warning_returned_value(self): p = kvmatch.MatchRow(['Col1'], optiondict={ 'no_case': True, 'no_warnings': True }) self.assertEqual( p.warning_msg[0], kvmatch.badoption_msg('kvmatch:MatchRow:__init__', 'no_case', badoptiondict['no_case']))
def test_matchRowList_p06_find_row_in_list(self): p = kvmatch.MatchRow(kenlist, xlat_dict, { 'nocase': True, 'startrow': 2 }) tempdata = [nonrecord] * 4 tempdata.append(kenlist) tempdata.append(record) # print(tempdata) for data in tempdata: if p.matchRowList(data, debug=False): if False: print('matching record:', data) print('final header:', p._data_mapped) break self.assertTrue(p.matchRowList(data, debug=False))
def test_matchRowList_p07_not_find_row_in_list(self): p = kvmatch.MatchRow(kenlist, xlat_dict, { 'nocase': True, 'maxrows': 3 }) tempdata = [nonrecord] * 4 tempdata.append(kenlist) tempdata.append(record) # print(tempdata) for data in tempdata: if p.matchRowList(data, debug=False): print('matching record:', data) print('final header:', p._data_mapped) break if p.search_exceeded: break self.assertTrue(p.search_exceeded)
def test_matchRowList_p03_longer_xlat(self): templist = kenlist[:2] templist.append(record[2]) p = kvmatch.MatchRow(templist, xlat_dict) self.assertTrue(p.matchRowList(record, debug=False))
def test_matchRowList_p02_longer(self): p = kvmatch.MatchRow(record) self.assertTrue(p.matchRowList(record, debug=False))
def test_matchRowList_p01_simple(self): p = kvmatch.MatchRow(['Col1']) self.assertTrue(p.matchRowList(record, debug=False))
def test_remappedRow_p05_xlat_nocase(self): p = kvmatch.MatchRow(kenlist, xlat_dict_lower, {'nocase': True}) self.assertEqual(p.remappedRow(record, debug=False), kenlist)
def test_remappedRow_p04_xlat_mismatched_case(self): p = kvmatch.MatchRow(kenlist, xlat_dict) mismatch = kenlist[0:3] + record[-1:-1] self.assertEqual(p.remappedRow(record, debug=False), mismatch)
def test_remappedRow_p03_xlat(self): p = kvmatch.MatchRow(kenlist, xlat_dict) self.assertEqual(p.remappedRow(record, debug=False), kenlist)
def test_remappedRow_p02_blankfld(self): p = kvmatch.MatchRow(['Col1']) templist = record + [''] result = record + ['blank001'] self.assertEqual(p.remappedRow(templist, debug=False), result)
def test_remappedRow_p01_nothing(self): p = kvmatch.MatchRow(['Col1']) self.assertEqual(p.remappedRow(record), record)
def test_init_f01_no_req_col(self): with self.assertRaises(Exception) as context: kvmatch.MatchRow()
def test_init_p02_xlat(self): self.assertIsInstance(kvmatch.MatchRow(['Col1'], xlat_dict), kvmatch.MatchRow)
def test_matchRowList_p04_longer_xlat_nocase(self): p = kvmatch.MatchRow(kenlist, xlat_dict, {'nocase': True}) self.assertTrue(p.matchRowList(record, debug=False))
def test_init_f02_req_col_not_list(self): with self.assertRaises(Exception) as context: kvmatch.MatchRow('Col1')
def test_init_f03_xlatdict_not_dict(self): with self.assertRaises(Exception) as context: kvmatch.MatchRow(['Col1'], 'xlatdict')
def test_init_f04_optiondict_not_dict(self): with self.assertRaises(Exception) as context: kvmatch.MatchRow(['Col1'], optiondict='optiondict')
def test_init_p03_init_optiondict_warning_invalid_optiondict_nodie(self): tempdict = dict(badoptiondict) tempdict['no_warnings'] = True #print('tempdict:', tempdict) p = kvmatch.MatchRow(['Col1'], optiondict=tempdict) self.assertEqual(len(p.warning_msg), len(badoptiondict.keys()))
def chgsheet_findheader(excelDict, req_cols, xlatdict={}, optiondict={}, col_aref=None, data_only=True, debug=False): # local variables header = None # debugging if debug: print('req_cols:', req_cols) print('xlatdict:', xlatdict) print('optiondict:', optiondict) print('col_aref:', col_aref) logger.debug('req_cols:%s', req_cols) logger.debug('xlatdict:%s', xlatdict) logger.debug('optiondict:%s', optiondict) logger.debug('col_aref:%s', col_aref) # check to see if we are actually changing anyting - if not return back what was sent in if 'sheetname' in optiondict and excelDict['sheetName'] == optiondict[ 'sheetname']: logger.debug('nothing changed - return what was sent in') return excelDict # set flags col_header = False # if true - we take the first row of the file as the header no_header = False # if true - there are no headers read - we either return aref_result = False # if true - we don't return dicts, we return a list save_row = False # if true - then we append/save the XLSRow with the record start_row = 0 # if passed in - we start the search at this row (starts at 1 or greater) max_rows = 100000000 # create the list of misconfigured solutions badoptiondict = { 'startrow': 'start_row', 'startrows': 'start_row', 'start_rows': 'start_row', 'colheaders': 'col_header', 'col_headers': 'col_header', 'noheader': 'no_header', 'noheaders': 'no_header', 'no_headers': 'no_header', 'arefresult': 'aref_result', 'arefresults': 'aref_result', 'aref_results': 'aref_result', 'maxrow': 'max_rows', 'max_row': 'max_rows', 'maxrows': 'max_rows', 'saverow': 'save_row', 'saverows': 'save_row', 'save_rows': 'save_row', 'sheetName': 'sheetname', } # check what got passed in kvmatch.badoptiondict_check('kvxls.readxls_findheader', optiondict, badoptiondict, True) # pull in passed values from optiondict if 'col_header' in optiondict: col_header = optiondict['col_header'] if 'aref_result' in optiondict: aref_result = optiondict['aref_result'] if 'no_header' in optiondict: no_header = optiondict['no_header'] if 'start_row' in optiondict: start_row = optiondict[ 'start_row'] - 1 # because we are not ZERO based in the users mind if 'save_row' in optiondict: save_row = optiondict['save_row'] if 'max_rows' in optiondict: max_rows = optiondict['max_rows'] # debugging if debug: print('col_header:', col_header) print('aref_result:', aref_result) print('no_header:', no_header) print('start_row:', start_row) print('save_row:', save_row) print('optiondict:', optiondict) logger.debug('col_header:%s', col_header) logger.debug('aref_result:%s', aref_result) logger.debug('no_header:%s', no_header) logger.debug('start_row:%s', start_row) logger.debug('save_row:%s', save_row) logger.debug('optiondict:%s', optiondict) # build object that will be used for record matching p = kvmatch.MatchRow(req_cols, xlatdict, optiondict) # read in values from excelDict # determine what filetype we have here xlsfile = excelDict['xlsfile'] xlsxfiletype = excelDict['xlsxfiletype'] wb = excelDict['wb'] sheetNames = excelDict['sheetNames'] # debugging if debug: print('sheetNames:', sheetNames) logger.debug('sheetNames:%s', sheetNames) # get the sheet we are going to work with if 'sheetname' in optiondict: sheetName = optiondict['sheetname'] elif 'sheetrow' in optiondict: sheetName = sheetNames[optiondict['sheetrow']] else: sheetName = sheetNames[0] # debugging if debug: print('sheetName:', sheetName) logger.debug('sheetName:%s', sheetName) # create a workbook sheet object - using the name to get to the right sheet if xlsxfiletype: s = wb[sheetName] sheettitle = s.title sheetmaxrow = s.max_row sheetmaxcol = s.max_column sheetminrow = 0 sheetmincol = 0 else: s = wb.sheet_by_name(sheetName) sheettitle = s.name sheetmaxrow = s.nrows sheetmaxcol = s.ncols sheetminrow = 0 sheetmincol = 0 # debugging if debug: print('sheettitle:', sheettitle) print('sheetmaxrow:', sheetmaxrow) print('sheetmaxcol:', sheetmaxcol) logger.debug('sheettitle:%s', sheettitle) logger.debug('sheetmaxrow:%s', sheetmaxrow) logger.debug('sheetmaxcol:%s', sheetmaxcol) # check and see if we need to limit max row if max_rows < sheetmaxrow: sheetmaxrow = max_rows if debug: print('sheetmaxrow-changed:', sheetmaxrow) logger.debug('sheetmaxrow-changed:%s', sheetmaxrow) # ------------------------------- HEADER START ------------------------------ # define the header for the records being read in if no_header: # user said we are not to look for the header in this file # we need to subtract 1 here because we are going to increment PAST the header # in the next section - so if there is no header - we need to start at zero ( -1 + 1 later) row_header = start_row - 1 # if no col_aref - then we must force this to aref_result if not col_aref: aref_result = True if debug: print('no_header:no col_aref:set aref_result to true') logger.debug('no_header:no col_aref:set aref_result to true') # debug if debug: print('no_header:start_row:', start_row) logger.debug('no_header:start_row:%d', start_row) else: # debug if debug: print('find_header:start_row:', start_row) logger.debug('find_header:start_row:%d', start_row) # look for the header in the file for row in range(start_row, sheetmaxrow): # read in a row of data rowdata = _extract_excel_row_into_list(xlsxfiletype, s, row, sheetmincol, sheetmaxcol, debug) # user may have specified that the first row read is the header if col_header: # first row read is header - set the values header = rowdata row_header = row # debugging if debug: print('header_1strow:', header) logger.debug('header_1strow:%s', header) # break out of this loop we are done break # have not found the header yet - so look if debug: print('looking for header at row:', row) logger.debug('looking for header at row:%d', row) # Search to see if this row is the header if p.matchRowList(rowdata, debug=debug) or p.search_exceeded: # determine if we found the header if p.search_exceeded: # did not find the header raise else: # set the row_header row_header = row # found the header grab the output header = p._data_mapped # debugging if debug: print('header_found:', header) logger.debug('header_found:%s', header) # break out of the loop break # ------------------------------- HEADER END ------------------------------ # debug if debug: print('exitted header find loop') logger.debug('exitted header find loop') # user wants to define/override the column headers rather than read them in if col_aref: # debugging if debug: print('copying col_aref into header') logger.debug('copying col_aref into header') # copy over the values - and determine if we need to fill in more header values header = col_aref[:] # user defined the row definiton - make sure they passed in enough values to fill the row if len(col_aref) < sheetmaxcol - sheetmincol: # not enough entries - so we add more to the end for colcnt in range(1, sheetmaxcol - sheetmincol - len(col_aref) + 1): header.append('') # now pass the final information through remapped header = p.remappedRow(header) # debug if debug: print('col_aref:header:', header) logger.debug('col_aref:header:%s', header) # ------------------------------- OBJECT DEFINITION ------------------------------ excelDict = { 'xlsfile': xlsfile, 'xlsxfiletype': xlsxfiletype, 'wb': wb, 'sheetNames': sheetNames, 'sheetName': sheetName, 's': s, 'sheettitle': sheettitle, 'sheetmaxrow': sheetmaxrow, 'sheetmaxcol': sheetmaxcol, 'sheetminrow': sheetminrow, 'sheetmincol': sheetmincol, 'row_header': row_header, 'header': header, 'start_row': start_row, } return excelDict
def test_init_p01_init_optiondict_xlat(self): p = kvmatch.MatchRow(['Col1'], xlat_dict) self.assertEqual(p._xlatdict, xlat_dict) self.assertEqual(p._xlatdict_lower, {})
def readcsv2list_findheader(csvfile, req_cols, xlatdict={}, optiondict={}, col_aref=None, debug=False): # local variables results = [] header = None # debugging if debug: print('req_cols:', req_cols) if debug: print('xlatdict:', xlatdict) if debug: print('optiondict:', optiondict) if debug: print('col_aref:', col_aref) logger.debug('req_cols:%s', req_cols) logger.debug('xlatdict:%s', xlatdict) logger.debug('optiondict:%s', optiondict) logger.debug('col_aref:%s', col_aref) # check type if col_aref and not isinstance(col_aref, list): logger.error('col_aref must be list:%s', col_aref) raise Exception('col_aref not a list') # set flags col_header = False # if true - we take the first row of the file as the header no_header = False # if true - there are no headers read - we either return aref_result = False # if true - we don't return dicts, we return a list save_row = False # if true - then we append/save the XLSRow with the record start_row = 0 # if passed in - we start the search at this row (starts at 1 or greater) # create the list of misconfigured solutions badoptiondict = { 'startrow': 'start_row', 'startrows': 'start_row', 'start_rows': 'start_row', 'colheaders': 'col_header', 'col_headers': 'col_header', 'noheader': 'no_header', 'noheaders': 'no_header', 'no_headers': 'no_header', 'arefresult': 'aref_result', 'arefresults': 'aref_result', 'aref_results': 'aref_result', 'saverow': 'save_row', 'saverows': 'save_row', 'save_rows': 'save_row', } # check what got passed in kvmatch.badoptiondict_check('kvcsv.readcsv2list_findheader', optiondict, badoptiondict, True) # pull in passed values from optiondict if 'col_header' in optiondict: col_header = optiondict['col_header'] if 'aref_result' in optiondict: aref_result = optiondict['aref_result'] if 'no_header' in optiondict: no_header = optiondict['no_header'] if 'start_row' in optiondict: start_row = optiondict[ 'start_row'] - 1 # because we are not ZERO based in the users mind if 'save_row' in optiondict: save_row = optiondict['save_row'] # build object that will be used for record matching p = kvmatch.MatchRow(req_cols, xlatdict, optiondict) # get the file opened csv_file = open(csvfile, mode='r') reader = csv.reader(csv_file) # ------------------------------- HEADER START ------------------------------ # define the header for the records being read in if no_header: # user said we are not to look for the header in this file # we need to subtract 1 here because we are going to increment PAST the header # in the next section - so if there is no header - we need to start at zero ( -1 + 1 later) row_header = start_row - 1 # if no col_aref - then we must force this to aref_result if not col_aref: aref_result = True if debug: print('no_header:no col_aref:set aref_result to true') logger.debug('no_header:no col_aref:set aref_result to true') # debug if debug: print('no_header:start_row:', start_row) logger.debug('no_header:start_row:%d', start_row) elif col_header: # extract the header as the first line in the file header = reader.__next__() row_header = 0 if debug: print('col_header:header_1strow:', header) logger.debug('col_header:header_1strow:%s', header) else: # debug if debug: print('find_header:start_row:', start_row) logger.debug('find_header:start_row:%d', start_row) # get to the start_row record for next_row in range(0, start_row): line = reader.__next__() if debug: print('skipping line:', line) logger.debug('skipping line:%s', line) # counting row just to provide feedback row = start_row # now start the search for the header for rowdata in reader: # increment row row += 1 # have not found the header yet - so look if debug: print('looking for header at row:', row) logger.debug('looking for header at row:%d', row) # Search to see if this row is the header if p.matchRowList(rowdata, debug=debug) or p.search_exceeded: # determine if we found the header # set the row_header row_header = row # found the header grab the output header = p._data_mapped # debugging if debug: print('header_found:', header) logger.debug('header_found:%s', header) # break out of the loop break elif p.search_exceeded: # close the file we opened csv_file.close() # did not find the header - raise error raise Exception('header not found') # ------------------------------- HEADER END ------------------------------ # debug if debug: print('exitted header find loop') logger.debug('exitted header find loop') # user wants to define/override the column headers rather than read them in if col_aref: # debugging if debug: print('copying col_aref into header') logger.debug('copying col_aref into header') # copy over the values - and determine if we need to fill in more header values header = col_aref[:] # user defined the row definiton - make sure they passed in enough values to fill the row sheetmaxcol = 0 sheetmincol = 0 if False: if len(col_aref) < sheetmaxcol - sheetmincol: # not enough entries - so we add more to the end for colcnt in range( 1, sheetmaxcol - sheetmincol - len(col_aref) + 1): header.append('') # now pass the final information through remapped header = p.remappedRow(header) # debug if debug: print('col_aref:header:', header) logger.debug('col_aref:header:%s', header) # ------------------------------- RECORDS START ------------------------------ # continue processing this file for rowdata in reader: if debug: print('rowdata:', rowdata) logger.debug('rowdata:%s', rowdata) # determine what we are returning if aref_result: # we want to return the data we read rowdict = rowdata if debug: print('saving as array') logger.debug('saving as array') # optionally add the XLSRow attribute to this dictionary (not here right now if save_row: rowdict.append(row + 1) if debug: print('append row to record') logger.debug('append row to record') else: # we found the header so now build up the records rowdict = dict(zip(header, rowdata)) if debug: print('saving as dict') logger.debug('saving as dict') # optionally add the XLSRow attribute to this dictionary (not here right now if save_row: rowdict['XLSRow'] = row + 1 if debug: print('add column XLSRow with row to record') logger.debug('add column XLSRow with row to record') # add this dictionary to the results results.append(rowdict) if debug: print('append rowdict to results') logger.debug('append rowdict to results') # ------------------------------- RECORDS END ------------------------------ # close the file we are reading csv_file.close() # debugging # if debug: print('results:', results) # return the results return results
def test_init_p01_simple(self): self.assertIsInstance(kvmatch.MatchRow(['Col1']), kvmatch.MatchRow)