Exemple #1
0
def get_csv_dict(csvfile):
    """
    :param: csvfile: contains the accession number specified by --col_acc,
    optionally the exhibition number in the column specified by  and --col_ex,
    and optionally the catalog number specified by --col_cat.
    :return: A dict with the key of the accession number and the value being
             a tuple of (exhibition number, catalogue number).
    """
    def one_accession_number(accno):
        # print(f'{row=}')
        try:
            accnum = normalize_id(accno)
        except ValueError:
            print(f"Skipping in csv: {accno}")
            return
        if accnum in cdict:
            raise KeyError(f'Duplicate accession number: {accnum}')
        cataloguenumber = None
        if _args.col_cat is not None:
            cataloguenumber = row[_args.col_cat]
            try:
                # convert "33." to 33
                cataloguenumber = int(float(cataloguenumber))
            except ValueError:
                pass  # ok, doesn't have to be an integer
        # print(row)
        # print(exhibition, cataloguenumber)
        cdict[accnum] = (exhibition, cataloguenumber)

    with codecs.open(csvfile, 'r', 'utf-8-sig') as mapfile:
        cdict = {}
        reader = csv.reader(mapfile)
        for n in range(_args.skiprows):
            next(reader)
        for row in reader:
            accnumber = row[_args.col_acc]
            if not accnumber:
                continue  # blank accession number
            if _args.exhibition:
                exhibition = _args.exhibition
            else:
                col_ex = _args.col_ex
                try:
                    exhibition = int(row[col_ex])
                except (IndexError, ValueError) as e:
                    if _args.allow_missing:
                        trace(2, 'Missing exhibition number, skipping {}',
                              accnumber)
                        continue
                    print(f'Missing column {col_ex}, accession #: {accnumber}')
                    raise e
            # The "accnumber" might actually be a range of accession numbers
            # in the form JB001-002:
            accnumlist = expand_idnum(accnumber)
            for accn in accnumlist:
                one_accession_number(accn)
    trace(2, 'get_csv_dict: {}', cdict)
    return cdict
Exemple #2
0
def loadcsv():
    """
    Read the CSV file containing objectid -> location mappings, specified
    by the --mapfile argument.
    :return: the dictionary containing the mappings
    """
    rownum = 0
    location_dict = {}
    if _args.subp == 'validate':
        return location_dict
    loc_arg = _args.location
    need_heading = bool(_args.heading)
    with codecs.open(_args.mapfile, 'r', 'utf-8-sig') as mapfile:
        reader = csv.reader(mapfile)
        for row in reader:
            rownum += 1
            trace(3, 'row: {}', row)
            if need_heading:
                # if --location is given just skip the first row
                if not loc_arg and (row[_args.col_loc].strip().lower() !=
                                    _args.heading.lower()):
                    print(f'Fatal error: Failed heading check. '
                          f'{row[_args.col_loc].lower()} is not '
                          f'{_args.heading.lower()}.')
                    sys.exit(1)
                need_heading = False
                continue
            objid = row[_args.col_acc].strip().upper()
            if not objid and ''.join(row):
                trace(2, 'Skipping row with blank object id: {}', row)
                continue
            objidlist = expand_idnum(objid)
            for ob in objidlist:
                nobjid = nd.normalize_id(ob)
                if not nobjid:
                    print(f'Warning: Blank object ID row {rownum}: {row}')
                    continue  # blank number
                if nobjid in location_dict:
                    print(
                        f'Fatal error: Duplicate object ID row {rownum}: {row}.'
                    )
                    sys.exit(1)
                location_dict[nobjid] = loc_arg if loc_arg else row[
                    _args.col_loc].strip()
    return location_dict
Exemple #3
0
def main():
    outfile.write(b'<?xml version="1.0"?><Interchange>\n')
    if _args.object:
        objlist = expand_idnum(_args.object)  # JB001-002 -> JB001, JB002
        exmap = {
            normalize_id(obj):  # JB001 -> JB00000001
            (_args.exhibition, _args.catalogue)
            for obj in objlist
        }
    else:
        exmap = get_csv_dict(
            _args.mapfile)  # acc # -> (exhibition #, catalog #)
    exdict = get_exhibition_dict()  # exhibition # -> Exhibition tuple
    written = 0
    numupdated = 0
    for event, elem in ET.iterparse(infile):
        if elem.tag != 'Object':
            continue
        idelem = elem.find(Stmt.get_default_record_id_xpath())
        idnum = idelem.text if idelem is not None else None
        idnum = normalize_id(idnum)
        trace(3, 'idnum: {}', idnum)
        if idnum and idnum in exmap:
            exnum, cataloguenumber = exmap[idnum]
            one_object(elem, idnum, exdict[exnum], cataloguenumber)
            del exmap[idnum]
            updated = True
            numupdated += 1
        else:
            updated = False
        if updated or _args.all:
            outfile.write(ET.tostring(elem, encoding='utf-8'))
            written += 1
        if updated and _args.short:
            break
    outfile.write(b'</Interchange>')
    for idnum in exmap:
        trace(1, 'In CSV but not XML: "{}"', idnum)
    trace(
        1, f'End exhibition.py. {written} object'
        f'{"s" if written != 1 else ""} written '
        f'of which {numupdated} updated.')
Exemple #4
0
def main(argv):  # can be called either by __main__ or test_xml2csv
    global _args, _logfile
    _args = getargs(argv)
    infilename = _args.infile
    outfilename = _args.outfile
    cfgfilename = _args.cfgfile
    if _args.logfile:
        _logfile = open(_args.logfile, 'w')
    else:
        _logfile = sys.stdout
    infile = openfile(infilename)
    nlines = notfound = nwritten = 0
    Config.reset_config()  # needed by test_xml2csv
    if cfgfilename:
        cfgfile = open(cfgfilename)
    else:
        cfgfile = None
        trace(
            1,
            'Warning: Config file omitted. Only accession numbers will be output.'
        )
    config = Config(cfgfile, dump=_args.verbose >= 2, logfile=_logfile)
    outcsv, outfile = opencsvwriter(outfilename, config.delimiter)
    outlist = []
    titles = yaml_fieldnames(config)
    trace(1, 'Columns: {}', ', '.join(titles))
    if not _args.heading:
        trace(1, 'Heading row not written.')
    if _args.heading:
        outcsv.writerow(titles)
    objectlevel = 0
    if _args.object:
        expanded = [normalize_id(obj) for obj in expand_idnum(_args.object)]
        includeset = set(expanded)  # JB001-002 -> JB001, JB002
        includes = dict.fromkeys(includeset)
    else:
        includes = read_include_dict(_args.include,
                                     _args.include_column,
                                     _args.include_skip,
                                     _args.verbose,
                                     logfile=_logfile,
                                     allow_blanks=_args.allow_blanks)
    for event, elem in ET.iterparse(infile, events=('start', 'end')):
        # print(event)
        if event == 'start':
            # print(elem.tag)
            if elem.tag == config.record_tag:
                objectlevel += 1
            continue
        # It's an "end" event.
        if elem.tag != config.record_tag:  # default: Object
            continue
        objectlevel -= 1
        if objectlevel:
            continue  # It's not a top level Object.
        data = []
        idelem = elem.find(config.record_id_xpath)
        idnum = idelem.text if idelem is not None else ''
        trace(3, 'idnum: {}', idnum)
        nlines += 1

        writerow = config.select(elem, includes, exclude=_args.exclude)
        # print(f'{writerow=}')
        if not writerow:
            continue
        norm_idnum = normalize_id(idnum, _args.mdacode, verbose=_args.verbose)
        # We have selected the id but only write the row if there is something
        # to display. There will always be at least the ID number in the first
        # column unless skip_number was specified in the config.
        if config.skip_number:
            writerow = False
        else:
            # Insert the ID number as the first column.
            data.append(norm_idnum)

        for document in config.col_docs:
            text, command = one_document(document, elem, config)
            # print(f'{command=}')
            if text is None:
                notfound += 1
                trace(2, '{}: cmd: {}, "{}" is not found in XML.', idnum,
                      command, document[Stmt.TITLE])
                text = ''
            if text:
                writerow = True
            data.append(text)

        if writerow:
            nwritten += 1
            outlist.append(data)
            trace(3, '{} written.', idnum)
        elem.clear()
        if includes and not _args.exclude:
            includes.pop(norm_idnum)
        if _args.short:
            break
    if config.sort_numeric:
        outlist.sort(key=lambda x: int(x[0]))
    else:
        outlist.sort()
    # Create a list of flags indicating whether the value needs to be
    # de-normalized.
    norm = []
    if not config.skip_number:
        norm.append(True)  # for the Serial number
    for doc in config.col_docs:
        if doc[Stmt.CMD] in Cmd.get_control_cmds():
            continue
        norm.append(Stmt.NORMALIZE in doc)
    lennorm = len(norm)
    for row in outlist:
        for n, cell in enumerate(row[:lennorm]):
            if norm[n]:
                row[n] = denormalize_id(cell, _args.mdacode)
        outcsv.writerow(row)
    infile.close()
    if cfgfile:
        cfgfile.close()
    outfile.close()
    if includes and len(includes):
        trace(1, '{} items in include list not in XML.', len(includes))
        if _args.verbose > 1:
            print('In include list but not xml:', file=_logfile)
            for accnum in includes:
                print(accnum, file=_logfile)
    if not _args.bom:
        trace(1, 'BOM not written.')
    return nlines, nwritten, notfound
Exemple #5
0
called_from_sphinx = True

if __name__ == '__main__':
    called_from_sphinx = False
    assert sys.version_info >= (3, 6)
    is_diff = sys.argv[1] == 'diff'
    is_select = sys.argv[1] == 'select'
    is_update = sys.argv[1] == 'update'
    is_validate = sys.argv[1] == 'validate'
    _args = getargs(sys.argv)
    verbose = _args.verbose
    if is_update and _args.object:
        if not _args.location:
            raise (ValueError('You specified the object id. You must also '
                              'specify the location.'))
        objectlist = expand_idnum(_args.object)
        newlocs = {nd.normalize_id(obj): _args.location for obj in objectlist}
        trace(2, 'Object(s) specified, newlocs= {}', newlocs)
    else:
        newlocs = loadcsv()
    total_in_csvfile = len(newlocs)
    total_updated = total_written = 0
    total_failed = total_objects = 0  # validate only
    infile = open(_args.infile, encoding=_args.encoding)
    if is_update:
        outfile = open(_args.outfile, 'wb')
    else:
        outfile = None
    main()
    if is_update:
        print(f'Total Updated: {total_updated}/{total_in_csvfile}\n'
Exemple #6
0
 def test_01(self):
     idnums = expand_idnum('jb001')
     self.assertNotEqual(idnums, ['JB001'])
Exemple #7
0
 def test_19(self):
     idnums = expand_idnum('SH104-5')
     self.assertEqual(idnums, ['SH104', 'SH105'])
Exemple #8
0
 def test_06(self):
     idnums = expand_idnum('SH9-10')
     self.assertEqual(idnums, ['SH9', 'SH10'])
Exemple #9
0
    objcount = selcount = 0
    object_number = ''
    _args = getargs()
    infile = open(_args.infile)
    if _args.directory:
        outfile = _args.outfile
        ld = os.listdir(outfile)
        if ld and not _args.force:
            print(f'Directory {outfile} is not empty. Exiting.')
            sys.exit()
    else:
        outfile = open(_args.outfile, 'wb')
    if _args.cfgfile:
        cfgfile = open(_args.cfgfile)
    else:
        cfgfile = None
    config = Config(cfgfile, dump=_args.verbose >= 2)
    if _args.object:
        expanded = [normalize_id(obj) for obj in expand_idnum(_args.object)]
        includeset = set(expanded)  # JB001-002 -> JB001, JB002
        includes = dict.fromkeys(includeset)
    else:
        includes = read_include_dict(_args.include, _args.include_column,
                                     _args.include_skip, _args.verbose)
    main()
    basename = os.path.basename(sys.argv[0])
    print(
        f'{selcount} object{"" if selcount == 1 else "s"} selected from {objcount}.'
    )
    print(f'End {basename.split(".")[0]}')
Exemple #10
0
 def test_15(self):
     idnums = expand_idnum('SH1-02')
     self.assertEqual(idnums, ['SH1', 'SH2'])
Exemple #11
0
 def test_14(self):
     idnums = expand_idnum('LDHRM.2021.2 & 17,JB001')
     target = ['LDHRM.2021.2', 'LDHRM.2021.17', 'JB001']
     self.assertEqual(idnums, target)
Exemple #12
0
 def test_11(self):
     idnums = expand_idnum('LDHRM.2021.2-17')
     target = ['LDHRM.2021.' + str(n) for n in range(2, 18)]
     self.assertEqual(idnums, target)
Exemple #13
0
 def test_10(self):
     idnums = expand_idnum('JB08-1110')
     target = ['JB08', 'JB09'] + ['JB' + str(n) for n in range(10, 1111)]
     self.assertEqual(idnums, target)
Exemple #14
0
 def test_09(self):
     idnums = expand_idnum('JB001-002')
     self.assertEqual(idnums, ['JB001', 'JB002'])
Exemple #15
0
 def test_07(self):
     idnums = expand_idnum('JB09-10')
     self.assertEqual(idnums, ['JB09', 'JB10'])
Exemple #16
0
 def test_16(self):
     idnums = expand_idnum('SH1&2&3')
     self.assertEqual(idnums, ['SH1', 'SH2', 'SH3'])
Exemple #17
0
 def test_17(self):
     idnums = expand_idnum('SH10-3')
     self.assertEqual(idnums, ['SH10', 'SH11', 'SH12', 'SH13'])
Exemple #18
0
 def test_one_idnum(self):
     idlist = []
     for idnum in IDNUM_TESTS:
         idlist += expand_idnum(idnum)
     self.assertEqual(idlist, IDNUM_RESULTS)
Exemple #19
0
 def test_02(self):
     idnums = expand_idnum('JB001')
     self.assertEqual(idnums, ['JB001'])