def ready_source_file(filepath, tag, to_mark, cfg): """ Check whether source file exists and create it from template if not Parameters ---------- filepath : Desired path for the file tag : Key for the associated script data in to_mark to_mark : Dict of script data for current marking task (MKH format) cfg : MarkingConfig for current task (specifies template etc) Returns ------- None. """ make_new = False try: # check file exists with open(filepath, 'r'): pass except OSError: # create new file make_new = True if make_new: try: make_from_template( filepath, '../' + tag, mhsm.count_pdf_pages([ os.path.join(cfg.script_dir(), p) for p in to_mark[tag][0] ]), cfg) except Exception: loghelper.print_and_log( logger, "Failed to create new file at: {}".format(filepath))
def open_one_to_edit(cfg, sourcefile): ''' Run editor specified in `cfg` on selected sourcefile. Block until editor closes ''' try: proc = sp.Popen([cfg.editor(), sourcefile]) proc.wait() except Exception: loghelper.print_and_log( logger, "Error occurred editing document." + " Check that the correct appliction is" + " selected.")
def cmd_build_n_check(args): ''' **CLI command:** Compile all marked scripts and open for the user to preview/edit, allowing them to check/modify the output. Record scripts for which this succeeds as having output validated ''' inp = input("Questions required in completed scripts " + "(separated by spaces): ") question_names = inp.split() quit_flag = False while not quit_flag: print("Checking marking state...") try: # check for scripts with unmarked questions (from list) or which # have not had the source validated to_mark, done_mark = mhsm.check_marking_state( g_config, question_names, True, False) if to_mark != {}: print("Some scripts missing marks or validation: ") print_some(to_mark) return True # now all scripts validly marked # get all of those that need user to check output to_mark, done_mark = mhsm.check_marking_state( g_config, question_names, True, True) except Exception: loghelper.print_and_log(logger, "Failed to update marking state!") return True if to_mark == {}: print("Checking complete!") break try: # compile print("Compiling...") mhem.batch_compile_and_check(g_config.marking_dir(), to_mark, g_config) print("Compiling successful!") except Exception: loghelper.print_and_log(logger, "Compiling failed!") return True for tag in to_mark: print("Now checking " + tag) quit_flag = not mhem.mark_one_loop(tag, to_mark, g_config, question_names, True, True) # update marking state in file mhsm.declare_marked(tag, to_mark, g_config) if quit_flag: break return True
def batch_compile(directory, files, compile_command, **kwargs): ''' Runs string `compile_command` in terminal in the given `directory` for each source file listed in `files` Parameters ---------- `directory` : directory in which to run compiler `files` : list of file paths to compile (relative to `directory`) `compile_command` : command line command to which file paths will be appended. shlex will be run on this argument to generate command line tokens `kwargs` : Options are `cfg` - MarkingConfig for current job `manual_fallback` - if True `cfg` must be given user will be prompted to manually compile any files that failed ''' here = ".." fail_list = [] # list of files that did not compile try: here = os.getcwd() os.chdir(directory) for i, s in enumerate(files): # compile examples try: print("\rCompiling: {}/{}. ".format(i + 1, len(files)), end='\r') cmd_toks = shlex.split(compile_command) cmd_toks.append(s) sp.run(cmd_toks, check=True, stdin=sp.PIPE, stdout=sp.PIPE, stderr=sp.PIPE) except sp.CalledProcessError: fail_list.append(s) loghelper.print_and_log(logger, "Compilation failed for {}.".format(s)) print(" Continuing...") finally: print('') # newline to break from progress bar os.chdir(here) go_manual = kwargs.get('manual_fallback', False) if go_manual: print("There are {} files to compile manually.".format(len(fail_list))) for s in fail_list: open_one_to_edit(kwargs['cfg'], os.path.join(directory, s)) if input("Continue compiling? (\'q\' to quit): ") in ["q", "Q"]: break
def cmd_makecsv(args): ''' **CLI command:** Prompt user for question names and try to extract marks from each script mkh file for those questions. Store the results in a csv file specified by the user (stored in the original script directory) ''' out_path = os.path.join(g_config.script_dir(), input("CSV filename: ")) try: with open(out_path, 'r'): pass if not input("File {} exists, overwrite? [y/n]: ".format(out_path))\ in ['y', 'Y']: print("Operation cancelled.") return True except OSError: pass inp = input("Questions for which to extract marks (separated by spaces): ") question_names = inp.split() try: # initialize to_mark from given script directory to_mark, done_mark = mhsm.check_marking_state(g_config, question_names, True, True) except Exception: loghelper.print_and_log(logger, "Failed to read marking state!") return True if to_mark != {}: print("Selected questions may not be validly marked in some" + " scripts. Including: ") print_some(to_mark) print("Remember to run \'check\' command for final version.") return True try: with open(out_path, 'w') as file: file.write("Script #") # header line for q in question_names: file.write(", Question {}".format(q)) # body for d in sorted(done_mark.keys()): file.write("\n{}".format(d)) for q in question_names: file.write(",{}".format(done_mark[d][4][1][q])) except Exception: loghelper.print_and_log(logger, "Failed to write csv file.") return True
def cmd_config(self, args, gethelp=False): ''' Command line method to prompt user to set a section or all of config Parameters ---------- args : string list ['all'] or ['<sec name>'] gethelp : bool, optional return help strings instead The default is False. Returns ------- bool : True unless CLI should quit. ''' if gethelp: dets = "Use \'config all\' or \'config <section>\' with " + \ "<section> being:" for sec in self._categories: dets = dets + "\n\t" + sec return [ "Edit current configuration", dets + "\nConfiguration options you are prompted for can be" + " left blank to keep the current setting." ] if len(args) < 1 or not (args[0] in self._categories or args[0] == 'all'): # help text print("Config section not found. " + self.cmd_config([], True)[1]) return True for cat in self._categories: if args[0] == cat or args[0] == 'all': for key in self._categories[cat]: self._get_cfg_var(cat, key, self._meta[cat][key][1], self._meta[cat][key][0]) try: with open(self.path, "w") as config_file: json.dump(self._categories, config_file) except (OSError, TypeError, ValueError): loghelper.print_and_log(logger, "Warning: Config not saved!") return True
def cmd_begin(args): """ **CLI command:** Let user select some questions, then open each script source file in turn for editing. Check user's progress on each script when they close it """ inp = input("Questions to mark (separated by spaces): ") question_names = inp.split() source_validate = input("Do final validation of source file? [y/n]: ") \ in ["y", "Y"] quit_flag = False while not quit_flag: print("Checking marking state...") try: # initialize to_mark from given script directory to_mark = mhsm.check_marking_state(g_config, question_names, source_validate)[0] except Exception: loghelper.print_and_log(logger, "Failed to update marking state!") return True if to_mark == {}: print("Marking complete!") break try: # precompile print("Precompiling...") mhem.pre_build(to_mark, g_config) print("Precompiling successful!") except Exception: loghelper.print_and_log(logger, "Precompiling failed!") return True for tag in to_mark: print("Now marking " + tag) quit_flag = not mhem.mark_one_loop( tag, to_mark, g_config, question_names, source_validate, False) # update marking state in file mhsm.declare_marked(tag, to_mark, g_config) if quit_flag: break return True
def get_script_list(cfg): ''' Parameters ---------- cfg : MarkingConfig specifying current job Return ------ {tag:file_list} where tag is the prefix of a collection of files in script_directory e.g. script1.pdf => tag = script1, and file_list is a list of the files (pdfs) that comprise the script ''' ret = {} script_files_raw = os.listdir(cfg.script_dir()) suffix = cfg.script_suffix() # extract only pdfs and strip '.pdf' script_files_pdf = [ f[:-len(suffix)] for f in script_files_raw if f.endswith(suffix) ] script_files_pdf.sort() # ensure files with same tag appear in same order # add file names to list, accounting for doc numbers for scr in script_files_pdf: if len(re.findall(r"\s", scr)) > 0: loghelper.print_and_log( logger, "Warning: filename " + "\'{}\'".format(scr) + " contains whitespace.") tag = scr # default tag in to_mark sep_ind = scr.rfind(cfg.numsep()) # look for trailing number if sep_ind > 0: # sep found in valid place if scr[sep_ind + 1:].isnumeric(): tag = scr[:sep_ind] if tag in ret: ret[tag].append(scr + suffix) else: ret[tag] = [scr + suffix] return ret
def count_pdf_pages(file_paths): ''' given a list of file paths (all pdfs) sum the numbers of pages in those files Parameter --------- file_paths : list of paths to pdf files Returns ------- number of pages found ''' pages = 0 for fip in file_paths: try: reader = ppdf.PdfFileReader(fip) pages += reader.getNumPages() except (ppdf.utils.PdfReadError, OSError): loghelper.print_and_log(logger, "Could not count pages in {}".format(fip)) return pages
def parse_cmd(cmd): ''' Tokenize a string `cmd` and dispatch tokens [1:] to handler specified by the first from g_handlers. Returns ------- True, or the value returned by the called handler ''' toks = cmd.split() if len(toks) == 0: return True # basic checks if toks[0] in g_handlers: try: return g_handlers[toks[0]](toks[1:]) except Exception: loghelper.print_and_log(logger, "Problem occured in {}".format(toks[0])) return True else: print("Unrecognized command!") return True
def cmd_reset_validation(args): ''' **CLI command:** Reset validation for particular file, or all files. Use argument \'all\' to reset all validation ''' tags = [] # tags to reset if 'all' in args and input("Are you sure you would like to " + "reset validation checks in ALL scripts?" + " [y/n]: ") in ['y', 'Y']: tags = mhsm.get_script_list(g_config).keys() else: tags = [ input("Enter script prefix (e.g. \'tag\' if \'tag.mkh\' " + "needs resetting): ") ] for tag in tags: try: mhsm.reset_validation(tag, g_config) except Exception: loghelper.print_and_log( logger, "Warning! Failed to reset " + "validation for {}".format(tag)) return True
def check_marking_state(cfg, questions=None, final_assert=True, match_outhash=False): ''' check source directory for files or file sets, check which .mkh files exist and are up to date Parameters ---------- cfg : MarkingConfig specifying current job questions : if set to a list of question names then any scripts which have not had all of those questions marked will be returned final_assert : if True then any file that has not passed final validation will also be included Returns ------- [to_mark, done_mark]: lists of scripts left to mark and marked in mkh format. (A script is 'marked' in this case if all requested questions are available and final validation reported complete, if final_assert == True) if match_outhash == True then additionally, scripts will appear in to_mark if the final output hash is not saved or does not match the actual output file ''' script_directory = cfg.script_dir() if not questions: questions = [] to_mark_temp = get_script_list(cfg) ret = [{}, {}] # to_mark, done_mark for tag in to_mark_temp: # input hash, question marks, source validate flag, output hash to_mark_temp[tag] = [to_mark_temp[tag], '', {}, False, ''] files_hash = mh_hash.hash_file_list(to_mark_temp[tag][0], script_directory) marked = False # file exists and all questions marked? # check for matching .mkh file if tag + '.mkh' in os.listdir(script_directory): try: with open(os.path.join(script_directory, tag + ".mkh"), "r") as mkh: mkh_data = json.load(mkh) # extract non-hash, non-path data to_mark_temp[tag][2:] = mkh_data[2:] # if hashes don't match it's not marked! if mkh_data[:2] == [to_mark_temp[tag][0], files_hash]: marked = mkh_data[3] or not final_assert marklist = mkh_data[2] # in output validation mode # check marks from validation instead if match_outhash: marklist = mkh_data[4][1] # make sure all questions marked too for que in questions: if que not in marklist: marked = False break if match_outhash: outhash = mh_hash.hash_file_list( [tag + cfg.output_suffix()], cfg.marking_dir()) # outhash valid and matches saved value marked = marked and outhash == \ mkh_data[4][0] and outhash else: print( "Warning: originals modified for script {}".format( tag)) except (OSError, TypeError, ValueError): loghelper.print_and_log( logger, "Error occurred checking {}".format(tag)) marked = False # add to to_mark if not marked: ret[0][tag] = to_mark_temp[tag] ret[0][tag][1] = files_hash else: ret[1][tag] = to_mark_temp[tag] ret[1][tag][1] = files_hash return ret
def make_user_mark(tag, to_mark, cfg, questions=None, final_validate_source=True, final_validate_output=False): ''' Prepare blank file for user to mark, based on template open it in the editor when editor closes check that the job is done (all listed questions reported marked and marks available) Parameters ---------- `tag` : prefix of script to mark (key in `to_mark`) `to_mark` : dict of scripts to mark in mkh format `cfg` : MarkingConfig object specifying details of marking job `final_validate_source` : if True then also check that source file passes final 'all-marked' checks `final_validate_output` : True check also that source has been compiled since saving and generate a hash value for the output to return NB: final output validation fails automatically if source validation disabled Returns ------- `[marks,success,outhash]` : where: *`marks={name:mark}` for all questions validly marked *`success` =False only if final_validate_source==True and source validation fails, or if one of the requested questions fails validation (output validation has no effect) *`outhash` ='' or a hash string of the final output file (as generated by mh_hash.hash_file_list), if final_validate_output==True and all tests are passed (including final source validation) ''' if not questions: questions = [] if not os.path.isdir(cfg.marking_dir()): # create directory if necessary os.mkdir(cfg.marking_dir()) sourcefile = cfg.tag_to_sourcepath(tag) ready_source_file(sourcefile, tag, to_mark, cfg) # reset all variables to inspect later for q in questions: try: reset_file_q(sourcefile, q, cfg, to_mark[tag][2].get(q, '')) except Exception: loghelper.print_and_log( logger, "Failed to reset question {} in {}".format(q, sourcefile)) if final_validate_source: try: reset_file_final_check(sourcefile, cfg) except Exception: loghelper.print_and_log( logger, "Failed to reset master assert in {}".format(sourcefile)) # get time of last change if output file exists and newer than source old_edit_epoch = -1 try: old_edit_epoch = mhsm.get_edit_epoch( [sourcefile, cfg.tag_to_outputpath(tag)]) except OSError: pass # old_edit_epoch=-1 already signifies it's not valid try: open_one_to_edit(cfg, sourcefile) # check state of resulting file # (must be called as counterpart to each reset) finally: # {question:mark} for all validly marked questions. # Bool indicates overall success ret = [{}, True, ''] # set to indicate output checks passed # (but will not be returned unless source also validated) output_hash = '' if final_validate_output: # do this first or timestamps change new_edit_epoch = old_edit_epoch + 1 try: new_edit_epoch =\ mhsm.get_edit_epoch([sourcefile, cfg.tag_to_outputpath(tag)]) except OSError: pass # the following will fail by default. It's okay if old_edit_epoch < new_edit_epoch: print("Files edited. They should be checked again...") else: # generate hash (also warn about page counts) if not mhsm.check_page_counts([ os.path.join(cfg.script_dir(), p) for p in to_mark[tag][0] ], cfg.tag_to_outputpath(tag)): print("Warning: page count in {} doesn't match input.". format(cfg.tag_to_outputpath(tag))) output_hash = mh_hash.hash_file_list( [tag + cfg.output_suffix()], cfg.marking_dir()) if final_validate_source: try: ret[1] = ret[1] \ and do_file_final_check(sourcefile, cfg) except Exception: loghelper.print_and_log( logger, "Failed to perform final source" + " validation in {}".format(sourcefile)) ret[1] = False # inspect selected variables for q in questions: try: marked, score = do_file_q_check(sourcefile, q, cfg) if marked: ret[0][q] = score else: ret[1] = False except Exception: loghelper.print_and_log( logger, "Failed to extract data for" + " question {} in {}".format(q, sourcefile)) ret[1] = False # if source validation succeeded (incl final tests) set the # hash of the output if ret[1] and final_validate_source: ret[2] = output_hash return ret
def cmd_make_merged_output(args): ''' **CLI command:** Create blank pdf for each script and compile marked source files over the corresponding blank. Merge the output pdfs on top of copies of the original scripts to produce 'final merged output' ''' inp = input("Confirm questions required in completed scripts " + "(separated by spaces): ") question_names = inp.split() print("Checking marking state...") try: # check for scripts with unmarked questions (from list) or which # have not had the source validated to_mark, done_mark = mhsm.check_marking_state(g_config, question_names, True, True) if to_mark != {}: print("Some scripts missing marks or validation: ") print_some(to_mark) print("Please ensure all marking completed before merging.") return True except Exception: loghelper.print_and_log(logger, "Failed to update marking state!") return True ''' Make blanks ''' print("Making blanks...") blankdir = g_config.merged_dir() newsourcedir = g_config.merged_sourcedir() newfinaldir = g_config.final_dir() for path in [blankdir, newsourcedir, newfinaldir]: if not os.path.isdir(path): # create directory if necessary os.mkdir(path) for d in done_mark: try: for file in done_mark[d][0]: # constituent files mhsm.make_blank_pdf_like( os.path.join(g_config.script_dir(), file), os.path.join(blankdir, file)) except Exception: loghelper.print_and_log( logger, "Warning! Failed to make blanks for {}".format(d)) ''' copy source files ''' print("Copying source files...") to_compile = [] for d in done_mark: try: mhem.copyFile(g_config.tag_to_sourcepath(d), g_config.tag_to_mergesource(d)) to_compile.append(d) except Exception: loghelper.print_and_log( logger, "Warning! Failed to copy source " + "file for {}".format(d)) ''' compile source files ''' print("Compiling...") mhem.batch_compile_and_check(newsourcedir, to_compile, g_config) ''' Merge files ''' print("Merging...") for d in done_mark: try: mhsm.merge_pdfs(done_mark[d][0], g_config.tag_to_mergeoutput(d), g_config.tag_to_mergefinal(d), g_config.script_dir()) except Exception: loghelper.print_and_log( logger, "Warning! Failed to merge output for {}".format(d)) print("Merge complete.") return True