예제 #1
0
def readcsv2dict_with_noheader(csvfile,
                               dictkeys,
                               header,
                               dupkeyfail=False,
                               noshowwarning=False,
                               encoding='windows-1252',
                               debug=False):
    if not dictkeys:
        logger.error('must pass in dictkeys')
        raise Exception('must pass in dictkeys')
    if not header:
        logger.error('must pass in header')
        raise Exception('must pass in header')
    if not isinstance(header, list):
        logger.error('header must be a list:%s', header)
        raise Exception('header must be a list:%s', header)

    results = {}
    dupkeys = []
    dupcount = 0
    with open(csvfile, mode='r', encoding=encoding) as csv_file:
        reader = csv.reader(csv_file)
        for row in reader:
            rowdict = dict(zip(header, row))
            reckey = kvmatch.build_multifield_key(rowdict, dictkeys)
            # do we fail if we see the same key multiple times?
            if reckey in results:
                dupcount += 1
                if dupkeyfail:
                    # capture this key
                    dupkeys.append(reckey)
            # create/update the dictionary
            results[reckey] = rowdict
    # fail if we found dupkeys
    if dupkeys:
        # log this issue
        logger.warning(
            'readcsv2dict:v%s:file:%s:duplicate key failure:keys:%s',
            AppVersion, csvfile, ','.join(dupkeys))
        # display message if the user wants this displayed
        if not noshowwarning:
            print('readcsv2dict:duplicate key failure:', ','.join(dupkeys))
        raise ValueError('Duplicate key failure')
    # return the results
    return results, header, dupcount
예제 #2
0
def readcsv2dict_with_header(csvfile,
                             dictkeys,
                             dupkeyfail=False,
                             noshowwarning=False,
                             headerlc=False,
                             encoding='windows-1252',
                             debug=False):
    results = {}
    dupkeys = []
    dupcount = 0
    with open(csvfile, mode='r', encoding=encoding) as csv_file:
        reader = csv.reader(csv_file)
        header = reader.__next__()
        if debug: print('header-before:', header)
        logger.debug('header-before:%s', header)
        if headerlc:
            dictkeys = [x.lower() for x in dictkeys]
            header = [x.lower() for x in header]
            if debug: print('header-after:', header)
            logger.debug('header-after:%s', header)
        for row in reader:
            rowdict = dict(zip(header, row))
            reckey = kvmatch.build_multifield_key(rowdict, dictkeys)
            # do we fail if we see the same key multiple times?
            if reckey in results:
                dupcount += 1
                if dupkeyfail:
                    # capture this key
                    dupkeys.append(reckey)
            # create/update the dictionary
            results[reckey] = rowdict
    # fail if we found dupkeys
    if dupkeys:
        # log this issue
        logger.warning(
            'readcsv2dict:v%s:file:%s:duplicate key failure:keys:%s',
            AppVersion, csvfile, ','.join(dupkeys))
        # display message if the user wants this displayed
        if not noshowwarning:
            print('readcsv2dict:duplicate key failure:', ','.join(dupkeys))
        raise ValueError('Duplicate key failure')
    # return the results
    return results, header, dupcount
예제 #3
0
 def test_build_multifield_key_f03_empty_string_dictkeys(self):
     with self.assertRaises(Exception) as context:
         kvmatch.build_multifield_key(rowdict, '')
예제 #4
0
 def test_build_multifield_key_f01_missing_key(self):
     with self.assertRaises(Exception) as context:
         kvmatch.build_multifield_key(rowdict, ['Company', 'Missing'])
예제 #5
0
 def test_build_multifield_key_p02_multiplestrings_joinchar(self):
     self.assertEqual(
         kvmatch.build_multifield_key(rowdict, ['Company', 'Wine'],
                                      joinchar=':'), 'Test:Yummy')
예제 #6
0
 def test_build_multifield_key_p05_single_string_string(self):
     self.assertEqual(kvmatch.build_multifield_key(rowdict, 'Company'),
                      'Test')
예제 #7
0
 def test_build_multifield_key_p04_string_date(self):
     self.assertEqual(
         kvmatch.build_multifield_key(rowdict, ['Company', 'ProcessDate']),
         'Test|2020-01-01 00:00:00')
예제 #8
0
 def test_build_multifield_key_p03_string_number(self):
     self.assertEqual(
         kvmatch.build_multifield_key(rowdict, ['Company', 'Price']),
         'Test|10.0')
예제 #9
0
 def test_build_multifield_key_p02_multiplestrings(self):
     self.assertEqual(
         kvmatch.build_multifield_key(rowdict, ['Company', 'Wine']),
         'Test|Yummy')
예제 #10
0
def readxls2dict_findheader(xlsfile,
                            dictkeys,
                            req_cols=[],
                            xlatdict={},
                            optiondict={},
                            col_aref=None,
                            debug=False,
                            dupkeyfail=False):

    # validate we have proper input
    if not dictkeys:
        logger.error(
            'kvxls:readxls2dict_findheader:dictkeys not populated - program error'
        )
        print(
            'kvxls:readxls2dict_findheader:dictkeys not populated - program error'
        )
        raise

    # check for duplicate keys
    dupkeys = []

    # results defined as a dicut
    results = {}

    # debugging
    logger.debug('dictkeys:%s', dictkeys)
    if debug:
        print('readxls2dict_findheader:dictkeys:', dictkeys)
        input('press enter')

    # test how dictkeys was passed in
    if isinstance(dictkeys, str):
        dictkeys = [dictkeys]
        if debug:
            print(
                'readxls2dict_findheader:converted dictkeys from string to list'
            )
        logger.debug('converted dictkeys from string to list')

    # debugging
    if debug: print('readxls2dict_findheader:reading in xls as a list first')
    logger.debug('reading in xls as a list first')

    # read in the data from the file
    resultslist = readxls2list_findheader(xlsfile,
                                          req_cols,
                                          xlatdict=xlatdict,
                                          optiondict=optiondict,
                                          col_aref=col_aref,
                                          debug=debug)

    # debugging
    if debug:
        print(
            'readxls2dict_findheader:xls data is in an array - now convert to a dictionary'
        )
        print('readxls2dict_findheader:dictkeys:', dictkeys)
    logger.debug('xls data is in an array - now convert to a dictionary')
    logger.debug('dictkeys:%s', dictkeys)

    # convert to a dictionary based on keys provided
    for rowdict in resultslist:
        #rowdict = dict(zip(header,row))
        if debug:
            print('rowdict:', rowdict)
            print('dictkeys:', dictkeys)
        logger.debug('rowdict:%s', rowdict)
        logger.debug('dictkeys:%s', dictkeys)
        reckey = kvmatch.build_multifield_key(rowdict, dictkeys)
        # do we fail if we see the same key multiple times?
        if dupkeyfail:
            if reckey in results.keys():
                # capture this key
                dupkeys.append(reckey)

        # create/update the dictionary
        results[reckey] = rowdict

    # fail if we found dupkeys
    if dupkeys:
        logger.error('duplicate key failure:%s', ','.join(dupkeys))
        print('readxls2dict:duplicate key failure:', ','.join(dupkeys))
        raise

    # return the results
    return results
예제 #11
0
def readcsv2dict_findheader(csvfile,
                            req_cols,
                            dictkeys,
                            xlatdict={},
                            optiondict={},
                            col_aref=None,
                            debug=False,
                            dupkeyfail=False):
    # check inputs
    if not dictkeys:
        raise Exception('dictkeys must be populated')
    elif not isinstance(dictkeys, list):
        raise Exception('dictkeys must be a list:%s', dictkeys)

    # debugging
    if debug:
        print('dictkeys:', dictkeys)

    # local variables
    dupkeys = []
    dictresults = {}

    # read in the data from the file
    results = readcsv2list_findheader(csvfile,
                                      req_cols,
                                      xlatdict=xlatdict,
                                      optiondict=optiondict,
                                      col_aref=col_aref,
                                      debug=debug)

    # check processing
    if 'no_header' in optiondict and optiondict['no_header'] and not col_aref:
        raise Exception(
            'invalid setting optiondict[no_header] and no col_aref')
    if 'aref_result' in optiondict and optiondict['aref_result']:
        raise Exception('invalid setting optiondict[aref_result]')

    if debug:
        print('results:', results)

    # convert to a dictionary based on keys provided
    for rowdict in results:
        if debug:
            print('dictkeys:', dictkeys)
            print('rowdict:', rowdict)

        reckey = kvmatch.build_multifield_key(rowdict, dictkeys)
        # do we fail if we see the same key multiple times?
        if dupkeyfail:
            if reckey in dictresults:
                # capture this key
                dupkeys.append(reckey)

        # create/update the dictionary
        dictresults[reckey] = rowdict

    # fail if we found dupkeys
    if dupkeys:
        print('readcsv2dict:duplicate key failure:', ','.join(dupkeys))
        raise Exception('duplicate key failure:%s', dupkeys)

    if debug:
        print('dictresults:', dictresults)

    # return the results
    return dictresults