def test_read_non_unique_data_unexpected_error(self):
     key_col = 'tre'
     expected_msg = 'Unexpected non-unique columns found'
     expected_items = sorted(['lista', 'ett'])
     with self.assertRaises(MyError) as cm:
         csv_file_to_dict(self.test_infile.name,
                          key_col,
                          self.test_header,
                          non_unique=False)
     # ensure error message is the same
     result_msg, _, result_items = cm.exception.value.partition(':')
     result_items = strip_list_entries(result_items.split(','))
     self.assertEqual(result_msg, expected_msg)
     self.assertEqual(sorted(result_items), expected_items)
 def test_read_list_data(self):
     key_col = self.test_header.split('|')[1]
     lists = ('lista', )
     expected = {
         '2': {
             'ett': '1',
             'lista': ['1', '2', '3', '4', '5'],
             'fem': '',
             'tre': '3',
             'två': '2',
             'fyra': '4'
         },
         'a2': {
             'lista': ['a1', 'a2', 'a3', 'a4', 'a5'],
             'ett': 'a1',
             'fem': 'a5',
             'tre': 'a3',
             'två': 'a2',
             'fyra': 'a4'
         }
     }
     result = csv_file_to_dict(self.test_infile.name,
                               key_col,
                               self.test_header,
                               lists=lists)
     self.assertDictEqual(result, expected)
예제 #3
0
def rename(base_dir, sub_cat, in_filename, log_file='move.log'):
    """
    Identify any files to replace and rename them to their commons names.

    :param base_dir: Path to directory in which replacement image files are
        found.
    :param sub_cat: The name of the subdirectory into which processed files
        should be moved.
    :param in_filename: The photoAll.csv file filtered to only contain the
        files to replace.
    :param log_file: The name of the log file to be created (in base_dir).
    """
    # Load indata
    in_filename = common.modify_path(base_dir, in_filename)
    header_check = u'PhoId|PhoObjId|PhoBeschreibungM|PhoAufnahmeortS|' \
                   u'PhoSwdS|MulId|filnamn|AdrVorNameS|AdrNameS|PhoSystematikS'
    data = csv.csv_file_to_dict(in_filename,
                                "filnamn",
                                header_check,
                                keep=('PhoSystematikS', 'filnamn'),
                                delimiter='|',
                                codec='utf-16')

    # reformat the commons filenames
    url_prefix = u'https://commons.wikimedia.org/wiki/File:'
    for k, v in data.iteritems():
        if v['PhoSystematikS'].startswith(url_prefix):
            data[k] = v['PhoSystematikS'][len(url_prefix):]
        else:
            pywikibot.output("error in indatafile: %s, %s" % (k, v))

    # find candidate files
    candidates = prep.find_files(base_dir, ('.tif', ), subdir=False)

    # rename the files
    sub_cat = common.modify_path(base_dir, sub_cat)
    log_file = common.modify_path(base_dir, log_file)
    common.create_dir(sub_cat)
    log = []

    for candidate in candidates:
        base_name = os.path.basename(candidate)
        if base_name not in data.keys():
            log.append('%s not found in csv file' % base_name)
            continue

        commons_name = data.pop(base_name)
        commons_name = common.modify_path(sub_cat, commons_name)
        os.rename(candidate, commons_name)

    for k in data.keys():
        log.append('%s not found on disk' % k)

    common.open_and_write_file(log_file, '\n'.join(log), codec='utf-8')
    pywikibot.output(u'Created %s' % log_file)
예제 #4
0
def rename(base_dir, sub_cat, in_filename, log_file='move.log'):
    """
    Identify any files to replace and rename them to their commons names.

    :param base_dir: Path to directory in which replacement image files are
        found.
    :param sub_cat: The name of the subdirectory into which processed files
        should be moved.
    :param in_filename: The photoAll.csv file filtered to only contain the
        files to replace.
    :param log_file: The name of the log file to be created (in base_dir).
    """
    # Load indata
    in_filename = common.modify_path(base_dir, in_filename)
    header_check = u'PhoId|PhoObjId|PhoBeschreibungM|PhoAufnahmeortS|' \
                   u'PhoSwdS|MulId|filnamn|AdrVorNameS|AdrNameS|PhoSystematikS'
    data = csv.csv_file_to_dict(in_filename, "filnamn", header_check,
                                keep=('PhoSystematikS', 'filnamn'),
                                delimiter='|', codec='utf-16')

    # reformat the commons filenames
    url_prefix = u'https://commons.wikimedia.org/wiki/File:'
    for k, v in data.iteritems():
        if v['PhoSystematikS'].startswith(url_prefix):
            data[k] = v['PhoSystematikS'][len(url_prefix):]
        else:
            pywikibot.output("error in indatafile: %s, %s" % (k, v))

    # find candidate files
    candidates = prep.find_files(base_dir, ('.tif', ), subdir=False)

    # rename the files
    sub_cat = common.modify_path(base_dir, sub_cat)
    log_file = common.modify_path(base_dir, log_file)
    common.create_dir(sub_cat)
    log = []

    for candidate in candidates:
        base_name = os.path.basename(candidate)
        if base_name not in data.keys():
            log.append('%s not found in csv file' % base_name)
            continue

        commons_name = data.pop(base_name)
        commons_name = common.modify_path(sub_cat, commons_name)
        os.rename(candidate, commons_name)

    for k in data.keys():
        log.append('%s not found on disk' % k)

    common.open_and_write_file(log_file, '\n'.join(log), codec='utf-8')
    pywikibot.output(u'Created %s' % log_file)
def load_data(in_file):
    """Load csv file.

    @param in_data: the path to the metadata file
    @return: dict, list
    """
    header = u'Folder|Original|Commons'
    key_col = (u'Folder', u'Original')
    data = csv_methods.csv_file_to_dict(in_file, key_col, header)
    folders = []
    for val in data.values():
        folders.append(val[u'Folder'])
    folders = list(set(folders))  # remove any duplicates
    return (data, folders)
예제 #6
0
    def load_data(self, in_file):
        """
        Load the provided data (in whichever format) and produce a dict with an
        entry per file which can be used for further processing.

        @param in_file: the path to the metadata file
        @return: dict
        """
        key_col = u'Identifikationsnr'
        lists = (u'Ämnesord', u'Material', u'Motiv-ämnesord')
        return csv_methods.csv_file_to_dict(in_file,
                                            key_col,
                                            EXPECTED_HEADER,
                                            non_unique=True,
                                            lists=lists,
                                            list_delimiter=',')
예제 #7
0
    def base_load_data(self, csv_file, metadata):
        """
        Load and parse the provided csv file.

        :param csv_file: the filename to load
        :param metadata: the metadata for the file
        """
        fields, list_columns, key_column = metadata

        expected_header = self.delimiter.join(fields.keys())
        raw_dict = csv_methods.csv_file_to_dict(
            csv_file,
            key_column,
            expected_header,
            lists=list_columns,
            delimiter=self.delimiter,
            list_delimiter=self.list_delimiter)

        return common.relabel_inner_dicts(raw_dict, fields)
 def test_read_data(self):
     key_col = self.test_header.split('|')[1]
     expected = {
         '2': {
             'ett': '1',
             'lista': '1;2;3;;4;5',
             'fem': '',
             'tre': '3',
             'två': '2',
             'fyra': '4'
         },
         'a2': {
             'lista': 'a1;a2; a3 ;a4;a5',
             'ett': 'a1',
             'fem': 'a5',
             'tre': 'a3',
             'två': 'a2',
             'fyra': 'a4'
         }
     }
     result = csv_file_to_dict(self.test_infile.name, key_col,
                               self.test_header)
     self.assertDictEqual(result, expected)
 def test_read_non_unique_data(self):
     key_col = 'tre'
     lists = ('lista', )
     expected = {
         '3': {
             'ett': ['1', '2'],
             'lista': ['1', '2', '3', '4', '5'],
             'tre': '3',
             'fyra': '4'
         },
         'a3': {
             'ett': ['a1', 'a2'],
             'lista': ['a1', 'a2', 'a3', 'a4', 'a5', 'a5'],
             'tre': 'a3',
             'fyra': 'a4'
         }
     }
     result = csv_file_to_dict(self.test_infile.name,
                               key_col,
                               self.test_header,
                               lists=lists,
                               non_unique=True)
     self.assertEqual(deep_sort(result), deep_sort(expected))