def main(): outfile.write(b'<?xml version="1.0"?><Interchange>\n') written = 0 numupdated = 0 conserved, condition = make_conserved() for event, elem in ET.iterparse(infile): if elem.tag != 'Object': continue idelem = elem.find(Stmt.get_default_record_id_xpath()) idnum = idelem.text if idelem is not None else None idnum = normalize_id(idnum) trace(3, 'idnum: {}', idnum) updated = False if idnum.startswith('SH') or idnum == 'JB00000314': one_object(elem, idnum, conserved, condition) updated = True numupdated += 1 if updated or _args.all: outfile.write(ET.tostring(elem, encoding='utf-8')) written += 1 if updated and _args.short: break outfile.write(b'</Interchange>') trace( 1, f'End SH_acquisition.py. {written} object' f'{"s" if written != 1 else ""} written ' f'of which {numupdated} updated.') for idnum in conserved: print(f'Not processed: {idnum}')
def main(): global nwritten outfile.write(b'<?xml version="1.0" encoding="UTF-8"?><Interchange>\n') for event, elem in ET.iterparse(infile): if elem.tag != 'Object': continue idelem = elem.find(cfg.record_id_xpath) idnum = idelem.text if idelem is not None else None nidnum = normalize_id(idnum) trace(3, 'idnum: {}', idnum) if nidnum and nidnum in newvals: updated = one_element(elem, nidnum) del newvals[nidnum.upper()] else: updated = False if _args.missing: trace(2, 'Not in CSV file: "{}"', idnum) if updated or _args.all: outfile.write(ET.tostring(elem, encoding='utf-8')) nwritten += 1 if _args.short: break outfile.write(b'</Interchange>') for nidnum in newvals: trace(1, 'In CSV but not XML: "{}"', denormalize_id(nidnum))
def handle_diff(idnum, elem): if _args.all: if idnum not in newlocs and _args.warn: trace(3, 'Not in CSV file: {}', idnum) return objlocs = elem.findall('./ObjectLocation') for ol in objlocs: loc = ol.get(ELEMENTTYPE) if (_args.normal and loc == NORMAL_LOCATION) or (_args.current and loc == CURRENT_LOCATION): location = ol.find('./Location') if location.text is not None: text = location.text.strip().upper() else: text = None if _args.location: newtext = _args.location else: nidnum = nd.normalize_id(idnum) newtext = newlocs.get(nidnum, None) if newtext is None: return del newlocs[nidnum] trace(2, 'New location for {}: {}', idnum, newtext) if text != newtext: trace(1, 'Different {}: XML: {}, CSV: {}', idnum, text, newtext) break
def main(): outfile.write(b'<?xml version="1.0"?><Interchange>\n') if _args.object: objlist = expand_idnum(_args.object) # JB001-002 -> JB001, JB002 exmap = { normalize_id(obj): # JB001 -> JB00000001 (_args.exhibition, _args.catalogue) for obj in objlist } else: exmap = get_csv_dict( _args.mapfile) # acc # -> (exhibition #, catalog #) exdict = get_exhibition_dict() # exhibition # -> Exhibition tuple written = 0 numupdated = 0 for event, elem in ET.iterparse(infile): if elem.tag != 'Object': continue idelem = elem.find(Stmt.get_default_record_id_xpath()) idnum = idelem.text if idelem is not None else None idnum = normalize_id(idnum) trace(3, 'idnum: {}', idnum) if idnum and idnum in exmap: exnum, cataloguenumber = exmap[idnum] one_object(elem, idnum, exdict[exnum], cataloguenumber) del exmap[idnum] updated = True numupdated += 1 else: updated = False if updated or _args.all: outfile.write(ET.tostring(elem, encoding='utf-8')) written += 1 if updated and _args.short: break outfile.write(b'</Interchange>') for idnum in exmap: trace(1, 'In CSV but not XML: "{}"', idnum) trace( 1, f'End exhibition.py. {written} object' f'{"s" if written != 1 else ""} written ' f'of which {numupdated} updated.')
def one_object(elt): num = elt.find('./ObjectIdentity/Number').text loc = elt.find('./ObjectLocation[@elementtype="current location"]/Location') if loc is not None and loc.text: location = pad_loc(loc.text) else: location = 'unknown' title = elt.find('./Identification/Title').text nnum = normalize_id(num) boxdict[location].append(nnum) titledict[nnum] = title
def make_conserved() -> tuple[set[str], dict[str]]: confile = codecs.open(CSVFILE, 'r', 'utf-8-sig') trace(1, 'using list of conserved objects in: {}', confile.name) conserved = set() for row in confile: row = row.strip() if not (row.startswith('SH') or row.startswith('JB')): row = 'SH' + row normid = normalize_id(row) if normid in conserved: print(f'Duplicate id: {normid}') conserved.add(normid) print(f'{len(conserved)=}') condfile = codecs.open(CONDFILE, 'r', 'utf-8-sig') trace(1, 'using condition of conserved objects in: {}', condfile.name) condition = dict() reader = csv.reader(condfile) for row in reader: normid = normalize_id(row[0]) if normid in condition: print(f'Duplicate id: {normid}') condition[normid] = row[1] print(f'{len(condition)=}') return conserved, condition
def loadnewvals(allow_blanks=False): """ Read the CSV file containing objectid -> new element values :param allow_blanks: if True, rows with a blank accession number are skipped. Otherwise a ValueError exception is raised. :return: the dictionary containing the mappings where the key is the objectid and the value is a list of the remaining columns """ newval_dict = {} with codecs.open(_args.mapfile, 'r', 'utf-8-sig') as mapfile: reader = csv.reader(mapfile) skiprows = _args.skip_rows for n in range(skiprows): # default = 0 skipped = next(reader) # skip header if _args.verbose >= 1: print(f'Skipping row in map file: {skipped}') if _args.heading: # Check that the first row in the CSV file contains the same # column headings as in the title statements of the YAML file. row = next(reader) row = [r.strip() for r in row] irow = iter(row) next(irow) # skip Serial column for doc in cfg.col_docs: col = next(irow) title = doc[Stmt.TITLE] if col.lower() != title.lower(): print(f'Mismatch on heading: "{title}" in config !=' f' "{col}" in CSV file') sys.exit(1) for row in reader: row = [r.strip() for r in row] idnum = row[0] if not idnum: if allow_blanks: trace(2, 'Row with blank accession number skipped: {}', row) continue # skip blank accession numbers else: raise ValueError('Blank accession number in include file;' ' --allow_blank not selected.') # Strip off the accession number in the first column so that the # list matches the columns in the config file. Note that this # depends on the accession number being in the first column. newval_dict[normalize_id(idnum)] = row[1:] return newval_dict
def dir2list(jpegdir, normalize=False): jpglist = list() jpgfiles = os.listdir(jpegdir) for jpgfile in jpgfiles: m = re.match(r'(collection_)?(.+)\.jpg', jpgfile) if not m: print(f'dir2list skipping: {jpgfile}') continue accn = m.group(2) if normalize: try: accn = normalize_id(accn, verbose=2) except ValueError: pass jpglist.append(accn) return sorted(jpglist)
def loadcsv(): """ Read the CSV file containing objectid -> location mappings, specified by the --mapfile argument. :return: the dictionary containing the mappings """ rownum = 0 location_dict = {} if _args.subp == 'validate': return location_dict loc_arg = _args.location need_heading = bool(_args.heading) with codecs.open(_args.mapfile, 'r', 'utf-8-sig') as mapfile: reader = csv.reader(mapfile) for row in reader: rownum += 1 trace(3, 'row: {}', row) if need_heading: # if --location is given just skip the first row if not loc_arg and (row[_args.col_loc].strip().lower() != _args.heading.lower()): print(f'Fatal error: Failed heading check. ' f'{row[_args.col_loc].lower()} is not ' f'{_args.heading.lower()}.') sys.exit(1) need_heading = False continue objid = row[_args.col_acc].strip().upper() if not objid and ''.join(row): trace(2, 'Skipping row with blank object id: {}', row) continue objidlist = expand_idnum(objid) for ob in objidlist: nobjid = nd.normalize_id(ob) if not nobjid: print(f'Warning: Blank object ID row {rownum}: {row}') continue # blank number if nobjid in location_dict: print( f'Fatal error: Duplicate object ID row {rownum}: {row}.' ) sys.exit(1) location_dict[nobjid] = loc_arg if loc_arg else row[ _args.col_loc].strip() return location_dict
def main(): objdict = {} outfile.write(b'<?xml version="1.0" encoding="utf-8"?><Interchange>\n') seq = 0 for event, elem in ET.iterparse(infile): if elem.tag != 'Object': continue seq += 1 num = elem.find('./ObjectIdentity/Number').text num = normalize_id(num, _args.mdacode) if num in objdict: print(f'seq {seq}, ID {num} is a duplicate, ignored.') continue objdict[num] = ET.tostring(elem, encoding='utf-8').strip() for num in sorted(objdict): outfile.write(objdict[num]) outfile.write(b'\n') outfile.write(b'</Interchange>')
def onefile(imgf: str): imgf2 = imgf.removeprefix('collection_') prefix, suffix = os.path.splitext(imgf2) if suffix.lower() not in ('.jpg', '.png'): if _args.verbose > 1 and not imgf.startswith( '.'): # ignore .DS_Store print('not image:', imgf) return try: nid = normalize_id(prefix) except ValueError as ve: print(f'Skipping {imgf}: {ve}') return if nid in img_ids: print( f'Duplicate: {prefix} in {dirpath.removeprefix(_args.imgdir)},' f'original in {img_ids[nid][0].removeprefix(_args.imgdir)}') else: img_ids[nid] = (imgf2, dirpath)
def onefile(imgf: str): m = re.match(r'(collection_)?(.*)', imgf) imgf2 = m.group(2) # remove optional leading 'collection_' prefix, suffix = os.path.splitext(imgf2) if suffix.lower() not in ('.jpg', '.png'): if _args.verbose > 1: print('not image:', imgf) return try: nid = normalize_id(prefix) except ValueError as ve: print(f'Skipping {imgf}') return if nid in img_ids: print( f'Duplicate: {prefix} in {dirpath.removeprefix(_args.imgdir)},' f'original in {img_ids[nid][0].removeprefix(_args.imgdir)}') else: img_ids[nid] = (imgf2, dirpath)
def add_one_id(candidate): """ :param candidate: filename with trailing .csv removed :return: None. The nonlocal candidate_set is updated if the name was valid. """ nonlocal notinmodes candidate2 = candidate.removeprefix('collection_') try: normid = normalize_id(candidate2) except ValueError as ve: if not candidate2.startswith('.'): trace(1, '{}', ve) return if normid in valid_idnums: candidate_set.add(normid) else: trace(2, 'Skipping {}, not in Modes.', candidate2) notinmodes += 1
def main(): targetdir = sys.argv[1] try: targetlist = [(normalize_id(os.path.splitext(t)[0], strict=False), t) for t in os.listdir(targetdir)] targetlist = sorted(targetlist, key=lambda item: item[0]) numtargets = len(targetlist) ntarg = 0 for _, target in targetlist: ntarg += 1 print(f'file {ntarg} of {numtargets}: {target}') filename, extension = os.path.splitext(target) if extension.lower() in ('.jpg', '.jpeg', '.png'): subprocess.run(['open', '-W', os.path.join(targetdir, target)]) else: print('skipping', target) except KeyboardInterrupt: print('\nExiting.') sys.exit(1)
def one_accession_number(accno): # print(f'{row=}') try: accnum = normalize_id(accno) except ValueError: print(f"Skipping in csv: {accno}") return if accnum in cdict: raise KeyError(f'Duplicate accession number: {accnum}') cataloguenumber = None if _args.col_cat is not None: cataloguenumber = row[_args.col_cat] try: # convert "33." to 33 cataloguenumber = int(float(cataloguenumber)) except ValueError: pass # ok, doesn't have to be an integer # print(row) # print(exhibition, cataloguenumber) cdict[accnum] = (exhibition, cataloguenumber)
def update_normal_location(ol, idnum): """ :param ol: the ObjectLocation element :param idnum: the ObjectIdentity/Number text (we've tested that idnum is in newlocs) :return: True if the object is updated, False otherwise """ updated = False location = ol.find('./Location') if location.text is not None: text = location.text.strip().upper() else: text = None nidnum = nd.normalize_id(idnum) newtext = _args.location if _args.location else newlocs[nidnum] if text != newtext: trace(2, '{}: Updated normal {} -> {}', idnum, text, newtext) location.text = newtext updated = True else: trace(2, '{}: Normal location unchanged: {}', idnum, text) return updated
def read_include_dict(includes_file, include_column, include_skip, verbos=1, logfile=sys.stdout, allow_blanks=False): """ Read the optional CSV file from the --include argument. Build a dict of accession IDs in upper case for use by cfgutil.select. The value of the dict is the row from the CSV file. :return: a dict or None if --include was not specified """ if not includes_file: return None if os.path.splitext(includes_file)[1].lower() != 'csv': raise ValueError('mapfile must be a CSV file.') includedict: dict = dict() includereader = csv.reader(codecs.open(includes_file, 'r', 'utf-8-sig')) for n in range(include_skip): # default in xml2csv = 0 skipped = next(includereader) # skip header if verbos >= 1: print(f'Skipping row in "include" file: {skipped}', file=logfile) for row in includereader: if not row: continue idnum = row[include_column].upper() # cfgutil.select needs uppercase if not idnum: if allow_blanks: continue # skip blank accession numbers else: raise ValueError('Blank accession number in include file;' ' --allow_blank not selected.') # idnumlist: list[str] = expand_idnum(idnum) idnumlist: list[str] = [normalize_id(i) for i in expand_idnum(idnum)] if verbos >= 1: for num in idnumlist: if num in includedict: print(f'Warning: Duplicate id number in include ' f'file, {num}, ignored.', file=logfile) for idnum in idnumlist: includedict[idnum] = row return includedict
def main(): global objcount, selcount if not _args.directory: declaration = f'<?xml version="1.0" encoding="{_args.encoding}"?>\n' outfile.write(bytes(declaration, encoding=_args.encoding)) outfile.write(b'<Interchange>\n') objectlevel = 0 for event, oldobject in ET.iterparse(infile, events=('start', 'end')): if event == 'start': if oldobject.tag == config.record_tag: objectlevel += 1 continue # It's an "end" event. if oldobject.tag != config.record_tag: continue objectlevel -= 1 if objectlevel: continue # It's not a top level Object. idelem = oldobject.find(config.record_id_xpath) idnum = idelem.text if idelem is not None else None if _args.normalize: idnum = normalize_id(idnum) selected = config.select(oldobject, includes, _args.exclude) objcount += 1 if selected: selcount += 1 outstring = ET.tostring(oldobject, encoding=_args.encoding) if _args.directory: objfilename = os.path.join(_args.outfile, idnum + '.xml') objfile = open(objfilename, 'wb') objfile.write(outstring) objfile.close() else: outfile.write(outstring) oldobject.clear() if _args.short: break if not _args.directory: outfile.write(b'</Interchange>')
def one_document(document, parent, config: Config): command = document[Stmt.CMD] eltstr = document.get(Stmt.XPATH) text = None if eltstr: element = parent.find(eltstr) else: element = None if element is None: return None, command if command == Cmd.ATTRIB: attribute = document[Stmt.ATTRIBUTE] text = element.get(attribute) elif command == Cmd.COUNT: count = len(list(parent.findall(eltstr))) text = f'{count}' elif command == Cmd.KEYWORD: value = document[Stmt.VALUE] if element.text.strip() == value: keyword = element.find('Keyword') text = keyword.text.strip() elif command == Cmd.MULTIPLE: elements = parent.findall(eltstr) delimiter = document[Stmt.MULTIPLE_DELIMITER] # print(f'{elements=}') # for e in elements: # print(f'{e.text=}') text = delimiter.join([e.text for e in elements if e.text is not None]) elif element.text is None: text = '' else: text = element.text.strip() if Stmt.NORMALIZE in document: text = normalize_id(text, _args.mdacode) if Stmt.WIDTH in document: text = text[:int(document[Stmt.WIDTH])] return text, command
def handle_update(idnum, elem): """ If the location in the newlocs dictionary is different from the location in the XML, update the XML, insert the date specified on the command line, and delete the idnum from the global "newlocs" dictionary. :param idnum: :param elem: :return: None """ global total_updated, total_written updated = False nidnum = nd.normalize_id(idnum) if nidnum in newlocs: # newlocs: list returned by loadcsv() if not validate_locations(idnum, elem): trace(1, 'Failed pre-update validation.') sys.exit(1) if _args.normal: ol = elem.find('./ObjectLocation[@elementtype="normal location"]') updated = update_normal_location(ol, idnum) if _args.current: updated |= update_current_location(elem, idnum) if _args.previous: updated |= update_previous_location(elem, idnum) del newlocs[nidnum] else: if _args.warn: trace(1, '{}: Not in CSV file', idnum) if nidnum in newlocs and not validate_locations(idnum, elem): trace(1, 'Failed post-update validation.') sys.exit(1) if updated: total_updated += 1 if updated or _args.all: outfile.write(ET.tostring(elem, encoding='utf-8')) total_written += 1
def test_01b(self): nid = normalize_id('JB1a') self.assertEqual(nid, 'JB000001A')
def select(cfg: Config, elem, includes=None, exclude=False): """ :param cfg: the Config instance :param elem: the Object element :param includes: A set or dict of id numbers of objects to be included in the output CSV file. The list must be all uppercase. :param exclude: Treat the include list as an exclude list. :return: selected is true if the Object element should be written out """ # print('select') selected = True idelem = elem.find(cfg.record_id_xpath) idnum = normalize_id(idelem.text) if idelem is not None else None # print(f'{idnum=}') if idnum and exclude and includes: if idnum.upper() in includes: return False elif includes is not None: if not idnum or idnum.upper() not in includes: # print('select return false') return False for document in cfg.ctrl_docs: command = document[Stmt.CMD] if command == Cmd.GLOBAL: continue eltstr = document.get(Stmt.XPATH) if eltstr: element = elem.find(eltstr) else: element = None # print(f'{element=}') if element is None: if Stmt.REQUIRED in document: print(f'*** Required element {eltstr} is missing from' f' {idnum}. Object excluded.', file=cfg.logfile) selected = False break elif command == Cmd.IFELT: continue # if the element exists if command in (Cmd.ATTRIB, Cmd.IFATTRIB, Cmd.IFATTRIBEQ, Cmd.IFATTRIBNOTEQ): attribute = document[Stmt.ATTRIBUTE] text = element.get(attribute).strip() elif element is None or element.text is None: text = '' else: # noinspection PyUnresolvedReferences text = element.text.strip() # print(f'{text=}') if text: if command == Cmd.IFNOT: selected = False break else: if Stmt.REQUIRED in document: print(f'*** Required text in {eltstr} is missing from' f' {idnum}. Object excluded.', file=cfg.logfile) if command in (Cmd.IF, Cmd.IFATTRIB, Cmd.IFCONTAINS, Cmd.IFEQ, Cmd.IFATTRIBEQ): selected = False break if command in (Cmd.IFEQ, Cmd.IFNOTEQ, Cmd.IFCONTAINS, Cmd.IFATTRIBEQ, Cmd.IFATTRIBNOTEQ): value = document[Stmt.VALUE] textvalue = text if Stmt.CASESENSITIVE not in document: value = value.lower() textvalue = textvalue.lower() if command == Cmd.IFCONTAINS and value not in textvalue: selected = False break elif (command in (Cmd.IFEQ, Cmd.IFATTRIBEQ) and value != textvalue): selected = False break elif (command in (Cmd.IFNOTEQ, Cmd.IFATTRIBNOTEQ) and value == textvalue): selected = False break continue # print(f'{selected=}') return selected
def norm(e): return normalize_id(e[0])
def main(argv): # can be called either by __main__ or test_xml2csv global _args, _logfile _args = getargs(argv) infilename = _args.infile outfilename = _args.outfile cfgfilename = _args.cfgfile if _args.logfile: _logfile = open(_args.logfile, 'w') else: _logfile = sys.stdout infile = openfile(infilename) nlines = notfound = nwritten = 0 Config.reset_config() # needed by test_xml2csv if cfgfilename: cfgfile = open(cfgfilename) else: cfgfile = None trace( 1, 'Warning: Config file omitted. Only accession numbers will be output.' ) config = Config(cfgfile, dump=_args.verbose >= 2, logfile=_logfile) outcsv, outfile = opencsvwriter(outfilename, config.delimiter) outlist = [] titles = yaml_fieldnames(config) trace(1, 'Columns: {}', ', '.join(titles)) if not _args.heading: trace(1, 'Heading row not written.') if _args.heading: outcsv.writerow(titles) objectlevel = 0 if _args.object: expanded = [normalize_id(obj) for obj in expand_idnum(_args.object)] includeset = set(expanded) # JB001-002 -> JB001, JB002 includes = dict.fromkeys(includeset) else: includes = read_include_dict(_args.include, _args.include_column, _args.include_skip, _args.verbose, logfile=_logfile, allow_blanks=_args.allow_blanks) for event, elem in ET.iterparse(infile, events=('start', 'end')): # print(event) if event == 'start': # print(elem.tag) if elem.tag == config.record_tag: objectlevel += 1 continue # It's an "end" event. if elem.tag != config.record_tag: # default: Object continue objectlevel -= 1 if objectlevel: continue # It's not a top level Object. data = [] idelem = elem.find(config.record_id_xpath) idnum = idelem.text if idelem is not None else '' trace(3, 'idnum: {}', idnum) nlines += 1 writerow = config.select(elem, includes, exclude=_args.exclude) # print(f'{writerow=}') if not writerow: continue norm_idnum = normalize_id(idnum, _args.mdacode, verbose=_args.verbose) # We have selected the id but only write the row if there is something # to display. There will always be at least the ID number in the first # column unless skip_number was specified in the config. if config.skip_number: writerow = False else: # Insert the ID number as the first column. data.append(norm_idnum) for document in config.col_docs: text, command = one_document(document, elem, config) # print(f'{command=}') if text is None: notfound += 1 trace(2, '{}: cmd: {}, "{}" is not found in XML.', idnum, command, document[Stmt.TITLE]) text = '' if text: writerow = True data.append(text) if writerow: nwritten += 1 outlist.append(data) trace(3, '{} written.', idnum) elem.clear() if includes and not _args.exclude: includes.pop(norm_idnum) if _args.short: break if config.sort_numeric: outlist.sort(key=lambda x: int(x[0])) else: outlist.sort() # Create a list of flags indicating whether the value needs to be # de-normalized. norm = [] if not config.skip_number: norm.append(True) # for the Serial number for doc in config.col_docs: if doc[Stmt.CMD] in Cmd.get_control_cmds(): continue norm.append(Stmt.NORMALIZE in doc) lennorm = len(norm) for row in outlist: for n, cell in enumerate(row[:lennorm]): if norm[n]: row[n] = denormalize_id(cell, _args.mdacode) outcsv.writerow(row) infile.close() if cfgfile: cfgfile.close() outfile.close() if includes and len(includes): trace(1, '{} items in include list not in XML.', len(includes)) if _args.verbose > 1: print('In include list but not xml:', file=_logfile) for accnum in includes: print(accnum, file=_logfile) if not _args.bom: trace(1, 'BOM not written.') return nlines, nwritten, notfound
objcount = selcount = 0 object_number = '' _args = getargs() infile = open(_args.infile) if _args.directory: outfile = _args.outfile ld = os.listdir(outfile) if ld and not _args.force: print(f'Directory {outfile} is not empty. Exiting.') sys.exit() else: outfile = open(_args.outfile, 'wb') if _args.cfgfile: cfgfile = open(_args.cfgfile) else: cfgfile = None config = Config(cfgfile, dump=_args.verbose >= 2) if _args.object: expanded = [normalize_id(obj) for obj in expand_idnum(_args.object)] includeset = set(expanded) # JB001-002 -> JB001, JB002 includes = dict.fromkeys(includeset) else: includes = read_include_dict(_args.include, _args.include_column, _args.include_skip, _args.verbose) main() basename = os.path.basename(sys.argv[0]) print( f'{selcount} object{"" if selcount == 1 else "s"} selected from {objcount}.' ) print(f'End {basename.split(".")[0]}')
objects = list_objects(XMLPATH) imgs = os.listdir(IMGSPATH) includecsvfile = open(INCLUDECSVPATH, 'w') csvfile = codecs.open(CSVPATH, 'r', 'utf-8-sig') reader = csv.DictReader(csvfile) objset = set(n.normalized for n in objects) cat2accn = {} for row in reader: cat = row['Cat'].strip() # catalogue number if not cat: continue ac = row['Accn. No.'] try: accnum = normalize_id(ac.strip()) except ValueError: print(f'bad accnum: skipping {cat}: "{ac}"') continue if accnum not in objset: print(f'skipping {cat}: "{accnum}"') continue cat2accn[cat] = accnum for filename in imgs: m = re.match(r'(\d+\w?)\.', filename) if m: cat = m.group(1) else: print(f'skipping image: {filename}') continue
def test_01a(self): nid = normalize_id('JB1') self.assertEqual(nid, 'JB000001')
def test_02(self): nid = normalize_id('LDHRM.2018.1') self.assertEqual(nid, 'LDHRM.2018.000001')
def test_04(self): with self.assertRaises(AssertionError): normalize_id('JB9999999')
def test_03(self): nid = normalize_id('LDHRM.2018.1.2') self.assertEqual(nid, 'LDHRM.2018.000001.000002')