def test_read_non_unique_data_unexpected_error(self): key_col = 'tre' expected_msg = 'Unexpected non-unique columns found' expected_items = sorted(['lista', 'ett']) with self.assertRaises(MyError) as cm: csv_file_to_dict(self.test_infile.name, key_col, self.test_header, non_unique=False) # ensure error message is the same result_msg, _, result_items = cm.exception.value.partition(':') result_items = strip_list_entries(result_items.split(',')) self.assertEqual(result_msg, expected_msg) self.assertEqual(sorted(result_items), expected_items)
def test_read_list_data(self): key_col = self.test_header.split('|')[1] lists = ('lista', ) expected = { '2': { 'ett': '1', 'lista': ['1', '2', '3', '4', '5'], 'fem': '', 'tre': '3', 'två': '2', 'fyra': '4' }, 'a2': { 'lista': ['a1', 'a2', 'a3', 'a4', 'a5'], 'ett': 'a1', 'fem': 'a5', 'tre': 'a3', 'två': 'a2', 'fyra': 'a4' } } result = csv_file_to_dict(self.test_infile.name, key_col, self.test_header, lists=lists) self.assertDictEqual(result, expected)
def rename(base_dir, sub_cat, in_filename, log_file='move.log'): """ Identify any files to replace and rename them to their commons names. :param base_dir: Path to directory in which replacement image files are found. :param sub_cat: The name of the subdirectory into which processed files should be moved. :param in_filename: The photoAll.csv file filtered to only contain the files to replace. :param log_file: The name of the log file to be created (in base_dir). """ # Load indata in_filename = common.modify_path(base_dir, in_filename) header_check = u'PhoId|PhoObjId|PhoBeschreibungM|PhoAufnahmeortS|' \ u'PhoSwdS|MulId|filnamn|AdrVorNameS|AdrNameS|PhoSystematikS' data = csv.csv_file_to_dict(in_filename, "filnamn", header_check, keep=('PhoSystematikS', 'filnamn'), delimiter='|', codec='utf-16') # reformat the commons filenames url_prefix = u'https://commons.wikimedia.org/wiki/File:' for k, v in data.iteritems(): if v['PhoSystematikS'].startswith(url_prefix): data[k] = v['PhoSystematikS'][len(url_prefix):] else: pywikibot.output("error in indatafile: %s, %s" % (k, v)) # find candidate files candidates = prep.find_files(base_dir, ('.tif', ), subdir=False) # rename the files sub_cat = common.modify_path(base_dir, sub_cat) log_file = common.modify_path(base_dir, log_file) common.create_dir(sub_cat) log = [] for candidate in candidates: base_name = os.path.basename(candidate) if base_name not in data.keys(): log.append('%s not found in csv file' % base_name) continue commons_name = data.pop(base_name) commons_name = common.modify_path(sub_cat, commons_name) os.rename(candidate, commons_name) for k in data.keys(): log.append('%s not found on disk' % k) common.open_and_write_file(log_file, '\n'.join(log), codec='utf-8') pywikibot.output(u'Created %s' % log_file)
def load_data(in_file): """Load csv file. @param in_data: the path to the metadata file @return: dict, list """ header = u'Folder|Original|Commons' key_col = (u'Folder', u'Original') data = csv_methods.csv_file_to_dict(in_file, key_col, header) folders = [] for val in data.values(): folders.append(val[u'Folder']) folders = list(set(folders)) # remove any duplicates return (data, folders)
def load_data(self, in_file): """ Load the provided data (in whichever format) and produce a dict with an entry per file which can be used for further processing. @param in_file: the path to the metadata file @return: dict """ key_col = u'Identifikationsnr' lists = (u'Ämnesord', u'Material', u'Motiv-ämnesord') return csv_methods.csv_file_to_dict(in_file, key_col, EXPECTED_HEADER, non_unique=True, lists=lists, list_delimiter=',')
def base_load_data(self, csv_file, metadata): """ Load and parse the provided csv file. :param csv_file: the filename to load :param metadata: the metadata for the file """ fields, list_columns, key_column = metadata expected_header = self.delimiter.join(fields.keys()) raw_dict = csv_methods.csv_file_to_dict( csv_file, key_column, expected_header, lists=list_columns, delimiter=self.delimiter, list_delimiter=self.list_delimiter) return common.relabel_inner_dicts(raw_dict, fields)
def test_read_data(self): key_col = self.test_header.split('|')[1] expected = { '2': { 'ett': '1', 'lista': '1;2;3;;4;5', 'fem': '', 'tre': '3', 'två': '2', 'fyra': '4' }, 'a2': { 'lista': 'a1;a2; a3 ;a4;a5', 'ett': 'a1', 'fem': 'a5', 'tre': 'a3', 'två': 'a2', 'fyra': 'a4' } } result = csv_file_to_dict(self.test_infile.name, key_col, self.test_header) self.assertDictEqual(result, expected)
def test_read_non_unique_data(self): key_col = 'tre' lists = ('lista', ) expected = { '3': { 'ett': ['1', '2'], 'lista': ['1', '2', '3', '4', '5'], 'tre': '3', 'fyra': '4' }, 'a3': { 'ett': ['a1', 'a2'], 'lista': ['a1', 'a2', 'a3', 'a4', 'a5', 'a5'], 'tre': 'a3', 'fyra': 'a4' } } result = csv_file_to_dict(self.test_infile.name, key_col, self.test_header, lists=lists, non_unique=True) self.assertEqual(deep_sort(result), deep_sort(expected))