def getparser(): parser = argparse.ArgumentParser(description=nd.sphinxify(''' Set the normal location and/or current location to the new location from a CSV file with rows of the format: <object number>,<location>. If the location in the CSV file differs from the location in the XML file, update the ``Date/DateBegin`` element to today's date unless the --date option is specified. If a new current location is being set, create a previous location from the existing current location. ''', called_from_sphinx)) subparsers = parser.add_subparsers(dest='subp') diff_parser = subparsers.add_parser('diff', description=nd.sphinxify(''' With no options, check that the location in the object specified by --col_loc is the same as the location specified by -c or -n option in the XML file. ''', called_from_sphinx)) select_parser = subparsers.add_parser('select', description=''' Select the objects named in the CSV file specified by -m and write them to the output without modification. ''') update_parser = subparsers.add_parser('update', description=''' Update the XML file from the location in the CSV file specified by -m. ''') validate_parser = subparsers.add_parser('validate', description=''' Run the validate_locations function against the input file. This validates all locations and ignores the -c, -n, and -p options. Check that dates exist and do not overlap. ''') diff_parser.set_defaults(func=handle_diff) select_parser.set_defaults(func=handle_select) update_parser.set_defaults(func=handle_update) validate_parser.set_defaults(func=handle_validate) add_arguments(diff_parser, 'diff') add_arguments(select_parser, 'select') add_arguments(update_parser, 'update') add_arguments(validate_parser, 'update') return parser
def getargs(): parser = argparse.ArgumentParser(description=''' For a single field, display the number of occurances of each value. ''') parser.add_argument('infile', help=''' The XML file saved from Modes.''') parser.add_argument('-c', '--cfgfile', type=argparse.FileType('r'), help=sphinxify(''' The YAML file describing the column path containing values to count. The config file may contain only a single ``column`` command. Specify this or the --xpath parameter. ''', calledfromsphinx)) parser.add_argument('-t', '--type', action='append', help=sphinxify(''' Print the object number of all of the Object elements of this type. Multiple --type arguments may be entered.''', calledfromsphinx)) parser.add_argument('-v', '--verbose', type=int, default=1, help=''' Set the verbosity. The default is 1 which prints summary information. ''') parser.add_argument('-w', '--width', type=int, default=50, help=''' Set the width of the field printed. The default is 50. ''') parser.add_argument('-x', '--xpath', help=sphinxify(''' Specify the xpath of the field containing values to count. Specify this or the --cfgfile parameter. ''', calledfromsphinx)) args = parser.parse_args() if bool(args.cfgfile) == bool(args.xpath): raise ValueError('Exactly one of the --cfgfile and --xpath parameters' ' must be specified.') return args
def getparser(): parser = argparse.ArgumentParser(description=''' Read a DOCX file, extract any tables, and convert them to CSV. ''') parser.add_argument('infile', help=''' The input DOCX file''') parser.add_argument('outfile', help=''' The output CSV file.''') parser.add_argument('-b', '--bom', action='store_true', help=''' Select this option to insert a BOM at the front of the output CSV file. Use this option when the CSV file is to be imported into Excel so that the proper character set (UTF-8) is used. ''') parser.add_argument('--exclude_column', type=int, default=0, help=sphinxify(''' Specify the column to check for row exclusion. The default is column 0. This argument is ignored if --exclude is not specified. ''', called_from_sphinx)) parser.add_argument('-i', '--index_column', type=int, help=sphinxify(''' Specify a column to generate an index in. This will overwrite whatever is in that column ''', called_from_sphinx)) parser.add_argument('-r', '--index_row', type=int, default=0, help=sphinxify(''' The zero-based row in which to begin generating the index. This is ignored unless --index_column is specified. The default is zero, that is, to start numbering from the first row. ''', called_from_sphinx)) parser.add_argument('-s', '--index_start', type=int, default=1, help=sphinxify(''' The first number to insert into the index column. This is ignored unless --index_column is specified. The default is one which is incremented for each row. ''', called_from_sphinx)) parser.add_argument('-t', '--table', type=int, default=0, help=''' Select a single table to process. The default is to process all tables. ''') parser.add_argument('-u', '--inhibit_upper', action='store_true', default=False, help=''' By default, the first column is converted to upper case and white space characters are removed. If specified, inhibit this conversion. ''') parser.add_argument('--upper',type=int, help=''' Convert the zero-based column to upper case. This is in addition to column zero unless -u is specified. ''') parser.add_argument('-v', '--verbose', type=int, default=1, help=''' Set the verbosity. The default is 1 which prints summary information. ''') parser.add_argument('-x', '--exclude', help=sphinxify(''' Exclude rows where this text appears in the column specified by the --exclude_column argument. ''', called_from_sphinx)) return parser
def add_arguments(parser, command): global is_update, is_diff, is_select, is_validate # Needed for Sphinx parser.add_argument('infile', help=''' The XML file saved from Modes.''') parser.add_argument('-o', '--outfile', required=True, help=''' The output XML file.''') parser.add_argument('--col_acc', type=int, default=0, help=''' The zero-based column containing the accession number of the object to be updated. The default is column zero. ''') parser.add_argument('--col_loc', type=int, default=1, help=nd.sphinxify(''' The zero-based column containing the new location of the object to be updated. The default is column 1. See the --location option which sets the location for all objects in which case this option is ignored.''', called_from_sphinx)) parser.add_argument('-c', '--current', action='store_true', help=''' Update the current location and change the old current location to a previous location. See the descrption of "n" and "p". ''') parser.add_argument('--encoding', default='utf-8', help=''' Set the input encoding. Default is utf-8. Output is always utf-8. ''') parser.add_argument('--heading', help=nd.sphinxify(''' The first row of the map file contains a column title which must match the parameter (case insensitive) in the column designated for the location. If a --location argument is specified, the first row is skipped and the value, which nevertheless must be specified, is ignored. ''', called_from_sphinx)) parser.add_argument('-m', '--mapfile', help=nd.sphinxify(''' The CSV file mapping the object number to its new location. By default, the accession number is in the first column (column 0) but this can be changed by the --col_acc option. The new location is by default in the second column (column 1) but can be changed by the --col_loc option. This argument is ignored if --object is specified. ''', called_from_sphinx)) if is_update or is_diff: parser.add_argument('-n', '--normal', help=''' The new normal location''') parser.add_argument('-j', '--object', help=nd.sphinxify(''' Specify a single object to be processed. If specified, do not specify the CSV file containing object numbers and locations (--mapfile). ''', called_from_sphinx)) parser.add_argument('-r', '--reason', default='', help=''' Insert this text as the reason for the move to the new current location. ''') parser.add_argument('-s', '--short', action='store_true', help=''' Only process a single object. For debugging.''') parser.add_argument('-v', '--verbose', type=int, default=1, help=''' Set the verbosity. The default is 1 which prints summary information. ''')
def getparser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser(description=''' Read a CSV file, recode columns and write the CSV file. The Exhibition Name and Exhibition Place columns are merged into a "name at place" format unless the place is "HRM" in which case it's omitted. The DateBegin column (in Modes format) is deleted and replaced by a human-friendly column and an ISO date column. The input columns are defined in ``cfg/website.yml`` and must match names hard-coded here.''') parser.add_argument('incsvfile', help=sphinxify( ''' The input is expected to have been produced by xml2csv.py using the website.yml config file. You must specify the --heading option ''', called_from_sphinx)) parser.add_argument('outfile', help=''' The output CSV file.''') parser.add_argument('-s', '--short', action='store_true', help=''' Only process one object. For debugging.''') parser.add_argument('-v', '--verbose', type=int, default=1, help=''' Set the verbosity. The default is 1 which prints summary information.''') return parser
def getargs(): parser = argparse.ArgumentParser(description=''' For every JPG file in a directory, copy it to the output directory or, if it is large, copy a shrunken version of it.''') parser.add_argument('indir', help=''' Input directory''') parser.add_argument('outdir', help=''' Output directory''') parser.add_argument('--dryrun', action='store_true', help=sphinxify( ''' Print messages but don't do processing. Implies --verbose=2''', calledfromsphinx)) parser.add_argument('-m', '--maxpixels', type=int, default=DEFAULT_MAXPIXELS, help=''' Maximum number of pixels in either dimension.''' + if_not_sphinx( f''' The default is {DEFAULT_MAXPIXELS} pixels.''', calledfromsphinx)) parser.add_argument('-v', '--verbose', type=int, default=1, help=''' Set the verbosity. The default is 1 which prints summary information. ''') args = parser.parse_args() if args.dryrun: args.verbose = 2 return args
def getargs(): parser = argparse.ArgumentParser(description=''' For each file in a "candidate" folder, if that file is not in the "done" folder or any of its subfolders, copy it to a "staging" folder. Run list_needed.py before this script. ''') parser.add_argument('-c', '--candidate', required=True, help=''' Directory containing new files that may need to be transferred''') parser.add_argument('-d', '--done', required=True, help=''' Directory containing files already transferred, including sub-directories''') parser.add_argument('-s', '--staging', required=True, help=''' Directory to contain files to be transferred. We copy files from the candidate directory to this directory. ''') parser.add_argument('--dryrun', action='store_true', help=sphinxify( ''' Do not copy files. Just print info. Implies --verbose 2.''', calledfromsphinx)) parser.add_argument('-v', '--verbose', type=int, default=1, help=''' Set the verbosity. The default is 1 which prints summary information. ''') args = parser.parse_args() if args.dryrun: args.verbose = max(2, args.verbose) return args
def getparser(): parser = argparse.ArgumentParser(description=''' Read a CSV file containing two or more column_paths. The first column is the index and the following columns are the field(s) defined by the XPATH statement in the YAML configuration file. Update the XML file with data from the CSV file. If a row in the CSV file has fewer columns than the number of columns specified in the YAML file, replace the text with an empty string.''') parser.add_argument('infile', help=''' The XML file saved from Modes.''') parser.add_argument('outfile', help=''' The output XML file.''') parser.add_argument('-a', '--all', action='store_true', help=''' Write all objects. The default is to only write updated objects.''') parser.add_argument('--allow_blanks', action='store_true', help=''' Skip rows in the include CSV file with blank accession numbers. If not set, this will cause an abort. ''') parser.add_argument('-c', '--cfgfile', required=True, type=argparse.FileType('r'), help=''' The YAML file describing the column path(s) to update''') parser.add_argument('-e', '--empty', action='store_true', help=sphinxify( ''' Normally, an empty field in the CSV file means that no action is to be taken. If -e is selected, empty values from the CSV will overwrite the fields in the file. Another way to do this for specific fields is to set the text to ``{{clear}}`` in the CSV field to be emptied. --empty implies --replace.''', called_from_sphinx)) parser.add_argument('--missing', action='store_true', help=''' By default, ignore indices missing from the CSV file. If selected, trace the missing index.''') parser.add_argument('--heading', action='store_true', help=''' The first row of the map file contains a heading which must match the value of the title statement in the corresponding column document (case insensitive).''') parser.add_argument('-m', '--mapfile', required=True, help=sphinxify( ''' The CSV file mapping the object number to the new element value(s). The first column must contain the object number and subsequent columns must correspond to the columns in the mapping file. If a row in the CSV file has fewer fields than defined in the mapping file, zero-length strings will be assumed. See --empty.''', called_from_sphinx)) parser.add_argument('-r', '--replace', action='store_true', help=''' Replace existing values. If not specified only empty elements will be updated. Existing values will be cleared if the value in the CSV file contains the special value ``{{clear}}``. See also --empty. If --replace is not set a warning will be issued if the existing value is not blank.''') parser.add_argument('-s', '--short', action='store_true', help=''' Only process one object. For debugging.''') parser.add_argument('--skip_rows', type=int, default=0, help=''' Skip rows at the beginning of the CSV file.''') parser.add_argument('-v', '--verbose', type=int, default=1, help=''' Set the verbosity. The default is 1 which prints summary information. ''') return parser
def getargs(): parser = argparse.ArgumentParser(description=''' Copy a selected set of objects to a new XML file based on the config and a CSV file giving explicit accessions numbers to include or exclude. If neither parameter is given, the entire file is copied, possibly reformatting the text and converting ASCII to UTF-8.''') parser.add_argument('infile', help=''' The input XML file''') parser.add_argument('outfile', help=''' The output XML file.''') parser.add_argument('-c', '--cfgfile', help=''' The config file describing the Object elements to include in the output''') parser.add_argument('-d', '--directory', action='store_true', help=sphinxify( ''' The output file is a directory. Create files in the directory, one per object in the XML file. The directory must be empty (but see --force).''', calledfromsphinx)) parser.add_argument('-e', '--encoding', default='utf-8', help=''' Set the output encoding. The default is "utf-8". ''') parser.add_argument('-f', '--force', action='store_true', help=sphinxify( ''' Allow output to a directory that is not empty. ''', calledfromsphinx)) parser.add_argument('--include', required=False, help=''' A CSV file specifying the accession numbers of records to process. If omitted, all records will be processed based on configuration statements.''') parser.add_argument('--include_column', required=False, type=int, default=0, help=''' The column number containing the accession number in the file specified by the --select option. The default is 0, the first column. ''') parser.add_argument('--include_skip', type=int, default=0, help=''' The number of rows to skip at the front of the include file. The default is 0. ''') parser.add_argument('-j', '--object', required=False, help=''' Specify a single object to copy. ''') parser.add_argument('-n', '--normalize', action='store_true', help=''' Noramlize the accession number written to the CSV file or used to create the output filename. ''') parser.add_argument('-s', '--short', action='store_true', help=''' Only process one object.''') parser.add_argument('-v', '--verbose', type=int, default=1, help=''' Set the verbosity. The default is 1 which prints summary information. ''') parser.add_argument('-x', '--exclude', action='store_true', help=''' Treat the include list as an exclude list.''') args = parser.parse_args() return args
def getparser(): parser = argparse.ArgumentParser(description=''' Import exhibition information into a Modes XML file. Read a CSV file containing one, two, or three columns containing the accession number whose record should be updated, the optional exhibition number, and the optional catalog number. The exhibition number corresponds to the data in exhibition_list.py and is used for this process. It is not recorded in the XML file. See the parameters below for more details. ''') exgroup = parser.add_mutually_exclusive_group() objgroup = parser.add_mutually_exclusive_group() parser.add_argument('infile', help=''' The XML file saved from Modes.''') parser.add_argument('outfile', help=''' The output XML file.''') parser.add_argument('-a', '--all', action='store_true', help=''' Write all objects. The default is to only write updated objects.''') parser.add_argument('--allow_missing', action='store_true', help=''' Skip rows with missing exhibition numbers. Otherwise abort.''') parser.add_argument('--col_acc', help=''' The zero-based column containing the accession number of the object to be updated. The default is column zero. The column can be a number or a spreadsheet-style letter.''') parser.add_argument('-c', '--catalogue', help=''' The catalogue number. Only specify this if a single object is specified with the -j option. ''') parser.add_argument('--col_cat', help=''' The zero-based column containing the catalog number of the object in the corresponding exhibition. The default is to not create a catalog number sub-element. The column can be a number or a spreadsheet-style letter.''') exgroup.add_argument('--col_ex', help=sphinxify( ''' The zero-based column containing the exhibition number. Do not specify this if --exhibition is specified. It is mandatory otherwise. The column can be a number or a spreadsheet-style letter.''', called_from_sphinx)) parser.add_argument('--delete', action='store_true', help=sphinxify( ''' Delete this exhibition from all objects selected. Requires --exhibition.''', called_from_sphinx)) exgroup.add_argument('-e', '--exhibition', type=int, help=sphinxify( ''' The exhibition number to apply to all objects in the CSV file. Do not specify this if --col_ex is specified.''', called_from_sphinx)) objgroup.add_argument('-m', '--mapfile', help=sphinxify( ''' The CSV file mapping the accession number to the catalog number and exhibition number. (but see --exhibition). There is no heading row (but see --skiprows).''', called_from_sphinx)) objgroup.add_argument('-j', '--object', help=sphinxify( ''' Specify a single object to be processed. If specified, do not specify the CSV file containing object numbers, exhibitions and catalogue numbers (--mapfile). You must also specify --exhibition and optionally --catalogue. ''', called_from_sphinx)) parser.add_argument('--old_name', help=sphinxify( ''' Specify the old name of the exhibition to be replaced by the name now in ``exhibition_list.py``. You must specify the --exhibition parameter. ''', called_from_sphinx)) parser.add_argument('--old_place', help=sphinxify( ''' Specify the old ``Place`` of the exhibition to be replaced by the ``Place`` now in ``exhibition_list.py``. You must specify the --old_name parameter. This is optional and only needed if the exhibition name is not unique. ''', called_from_sphinx)) parser.add_argument('--old_date', help=sphinxify( ''' Specify the old BeginDate of the exhibition to be replaced by the BeginDate now in ``exhibition_list.py``. This is optional and only needed if the exhibition name is not unique. You must specify the --old_place parameter. The date must be in Modes format (d.m.yyyy). ''', called_from_sphinx)) parser.add_argument('-s', '--skiprows', type=int, default=0, help=''' Number of lines to skip at the start of the CSV file''') parser.add_argument('--short', action='store_true', help=''' Only process one object. For debugging.''') parser.add_argument('-v', '--verbose', type=int, default=1, help=''' Set the verbosity. The default is 1 which prints summary information. ''') return parser
def getparser(): parser = argparse.ArgumentParser(description=sphinxify( ''' Read a CSV file containing two or more columns. The first column is the index and the following columns are the field(s) defined by the XPATH statement in the config file. The first row in the CSV file is a heading row. The column titles must match the document titles in the CSV file. Columns are referred to by name so it is permissible to omit columns from the config file. Note that this contrasts with ``update_from_csv.py`` where the heading row may be omitted and filler columns must be included. Create an XML file with data from the CSV file based on a template of the XML structure. ''', calledfromsphinx)) parser.add_argument('--acc_num', help=''' This is the first accession number in a series to assign to rows in the input CSV file. If specified, the column in the CSV file containing a serial number, if one exists, is ignored. For example, if the parameter is "LDHRM.2021.2", the numbers assigned will be "LDHRM.2021.2", "LDHRM.2021.3", etc. This value will be stored in the ObjectIdentity/Number element.''') parser.add_argument('-c', '--cfgfile', required=True, type=argparse.FileType('r'), help=sphinxify( ''' The YAML file describing the column path(s) to update. The config file may contain only ``column``, ``constant``, or ``items`` commands. ''', calledfromsphinx)) parser.add_argument('-i', '--incsvfile', help=''' The CSV file containing data to be inserted into the XML template. The CSV file must have a heading. The heading of the column containing the serial number must be 'Serial' or an alternative set by --serial parameter. Subsequent columns must match the corresponding title in the configuration file. ''') parser.add_argument('-m', '--mdacode', default=DEFAULT_MDA_CODE, help=f''' Specify the MDA code, used in normalizing the accession number. The default is "{DEFAULT_MDA_CODE}". ''') parser.add_argument('-o', '--outfile', help=''' The output XML file.''') parser.add_argument('-p', '--noprolog', action='store_true', help=''' Inhibit the insertion of an XML prolog at the front of the file and an <Interchange> element as the root. This results in an invalid XML file but is useful if the output is to be manually edited.''') parser.add_argument('--serial', default='Serial', help=sphinxify( ''' The column containing the serial number must have a heading with this value. This is ignored if the --acc_num parameter is specified. ''' + if_not_sphinx(''' The default value is "Serial".''', calledfromsphinx), calledfromsphinx)) parser.add_argument('-s', '--short', action='store_true', help=''' Only process one object. For debugging.''') group = parser.add_mutually_exclusive_group(required=True) group.add_argument('-t', '--template', help=sphinxify( ''' The XML file that is the template for creating the output XML. Specify this or global statements in the configuration ``template_dir`, ``template_title``, and ``templates``. ''', calledfromsphinx)) parser.add_argument('-v', '--verbose', type=int, default=1, help=''' Set the verbosity. The default is 1 which prints summary information. ''') return parser
def add_arguments(parser, command): global is_update, is_diff, is_select, is_validate # Needed for Sphinx if called_from_sphinx: is_update = command == 'update' is_diff = command == 'diff' is_select = command == 'select' is_validate = command == 'validate' parser.add_argument('-i', '--infile', required=True, help=''' The XML file saved from Modes.''') if is_update or is_select: parser.add_argument('-o', '--outfile', required=True, help=''' The output XML file.''') if is_diff or is_update or is_select: parser.add_argument('-a', '--all', action='store_true', help=''' Write all objects and, if -w is selected, issue a warning if an object is not in the detail CSV file. The default is to only write updated objects. In either case warn if an object in the CSV file is not in the input XML file.''') if is_update or is_diff: parser.add_argument('--col_acc', type=int, default=0, help=''' The zero-based column containing the accession number of the object to be updated. The default is column zero. ''') parser.add_argument('--col_loc', type=int, default=1, help=nd.sphinxify( ''' The zero-based column containing the new location of the object to be updated. The default is column 1. See the --location option which sets the location for all objects in which case this option is ignored.''', called_from_sphinx)) if is_update or is_diff: parser.add_argument('-c', '--current', action='store_true', help=''' Update the current location and change the old current location to a previous location. See the descrption of "n" and "p". ''') if is_update: parser.add_argument('-d', '--date', default=nd.modesdate(date.today()), help=''' When updating the current location, use this date as the DateEnd value for the previous location we're making and the DateBegin value for the new current location we're making. The default is today's date in Modes format (d.m.yyyy). ''') parser.add_argument('--datebegin', help=''' Use this string as the date to store in the new previous ObjectLocation date. The format must be in Modes format (d.m.yyyy). ''') parser.add_argument('--dateend', default=nd.modesdate(date.today()), help=''' Use this string as the date to store in the new previous ObjectLocation date. The format must be in Modes format (d.m.yyyy). ''') parser.add_argument('--encoding', default='utf-8', help=''' Set the input encoding. Default is utf-8. Output is always utf-8. ''') if is_update: parser.add_argument('-f', '--force', action='store_true', help=''' Write the object to the output file even if it hasn't been updated. This only applies to objects whose ID appears in the CSV file. -a implies -f. ''') parser.add_argument('--heading', help=nd.sphinxify( ''' The first row of the map file contains a column title which must match the parameter (case insensitive) in the column designated for the location. If a --location argument is specified, the first row is skipped and the value, which nevertheless must be specified, is ignored. ''', called_from_sphinx)) if is_update or is_diff: parser.add_argument('-j', '--object', help=nd.sphinxify( ''' Specify a single object to be processed. If specified, do not specify the CSV file containing object numbers and locations (--mapfile). You must also specify --location. ''', called_from_sphinx)) parser.add_argument('-l', '--location', help=''' Set the location for all of the objects in the CSV file. In this case the CSV file only needs a single column containing the accession number. ''') if is_diff or is_select or is_update: parser.add_argument('-m', '--mapfile', help=nd.sphinxify( ''' The CSV file mapping the object number to its new location. By default, the accession number is in the first column (column 0) but this can be changed by the --col_acc option. The new location is by default in the second column (column 1) but can be changed by the --col_loc option. This argument is ignored if --object is specified. ''', called_from_sphinx)) if is_update or is_diff: parser.add_argument('-n', '--normal', action='store_true', help=''' Update the normal location. See the description for "p" and "c".''') if is_diff: parser.add_argument('--old', action='store_true', help=''' The column selected is the "old" location, the one we are moving the object from. Warn if the value in the CSV file does not match the value in the XML file. The default is to warn if the value in the CSV file does match the value in the XML file which is not expected as the purpose is to update that value. ''') if is_update: parser.add_argument('--patch', action='store_true', help=''' Update the specified location in place without creating history. This is always the behavior for normal locations but not for current or previous. ''') parser.add_argument('-p', '--previous', action='store_true', help=nd.sphinxify( ''' Add a previous location. This location's start and end dates must not overlap with an existing current or previous location's date(s). If "p" is selected, do not select "n" or "c". If "p" is specified, you must specify --datebegin and --dateend. ''', called_from_sphinx)) parser.add_argument('--reset_current', action='store_true', help=''' Only output the most recent current location element for each object, deleting all previous locations. ''') parser.add_argument('-r', '--reason', default='', help=''' Insert this text as the reason for the move to the new current location. ''') parser.add_argument('-s', '--short', action='store_true', help=''' Only process a single object. For debugging.''') parser.add_argument('-v', '--verbose', type=int, default=1, help=''' Set the verbosity. The default is 1 which prints summary information. ''') if is_diff or is_select or is_update: parser.add_argument('-w', '--warn', action='store_true', help=''' Valid if -a is selected. Warn if an object in the XML file is not in the CSV file. ''')