def test_glob(self): expected = [path.join(self.dir, x) for x in ['a', 'b', 'c', 'd']] actual = glob.glob(path.join(self.dir, '**')) self.assertIsInstance(actual, list) self.assertCountEqual(expected, actual) actual2 = glob.glob(path.join(self.dir, '*')) self.assertIsInstance(actual2, list) self.assertEqual(actual, actual2)
def pattern_copy(what, patterns, source, target, tmp_logs): with open(os.path.join(tmp_logs, "overall.txt"), 'a') as f: print(what, " pattern copy ", patterns, " from ", source, " -> ", target, file=f) for pattern in patterns: for my_file in glob.glob(os.path.join(source, pattern), recursive=True): if (os.path.isfile(my_file)): # grab the matched name relpath = os.path.relpath(my_file, source) # make the necessary directories leading to the file os.makedirs(os.path.join(target, os.path.dirname(relpath)), exist_ok=True) # copy the file shutil.copy(my_file, os.path.join(target, relpath)) print(" COPY ", my_file, " -> ", os.path.join(target, relpath), file=f) else: print("skip this directory (will recurse into it later)", my_file, file=f)
def test_glob_recursive(self): expected = [ '', 'a', 'b', 'c', 'd', path.join('c', 'e'), path.join('d', 'f'), path.join('d', 'f', 'g') ] expected = [path.join(self.dir, x) for x in expected] actual = glob.glob(path.join(self.dir, '**'), recursive=True) self.assertIsInstance(actual, list) self.assertCountEqual(expected, actual)
def populate_queue(queue, folder): """ Populate a queue with all files in folder. We first scan the folder to check for any "GRADING_*" and clean them up, and then add the remaining files to the queue, sorted by creation time. :param queue: multiprocessing.queues.Queue :param folder: string representing the path to the folder to add files from """ for file_path in glob.glob(os.path.join(folder, "GRADING_*")): grade_items_logging.log_message(message="Remove old queue file: " + file_path) os.remove(file_path) # Grab all the files currently in the folder, sorted by creation # time, and put them in the queue to be graded files = glob.glob(os.path.join(folder, "*")) files.sort(key=os.path.getctime) for f in files: queue.put(os.path.join(folder, f))
def main(): args = arg_parse() data_dir = os.path.join(SUBMITTY_DATA_DIR, "courses") data_dirs = data_dir.split(os.sep) grade_queue = [] if not args.times is None: starttime = dateutils.read_submitty_date(args.times[0]) endtime = dateutils.read_submitty_date(args.times[1]) replay(starttime,endtime) exit() if len(args.path) == 0: print ("ERROR! Must specify at least one path") exit() for input_path in args.path: print ('input path',input_path) # handle relative path if input_path == '.': input_path = os.getcwd() if input_path[0] != '/': input_path = os.getcwd() + '/' + input_path # remove trailing slash (if any) input_path = input_path.rstrip('/') # split the path into directories dirs = input_path.split(os.sep) # must be in the known submitty base data directory if dirs[0:len(data_dirs)] != data_dirs: print("ERROR: BAD REGRADE SUBMISSIONS PATH",input_path) raise SystemExit("You need to point to a directory within {}".format(data_dir)) # Extract directories from provided pattern path (path may be incomplete) pattern_semester="*" if len(dirs) > len(data_dirs): pattern_semester=dirs[len(data_dirs)] pattern_course="*" if len(dirs) > len(data_dirs)+1: pattern_course=dirs[len(data_dirs)+1] if len(dirs) > len(data_dirs)+2: if (dirs[len(data_dirs)+2] != "submissions"): raise SystemExit("You must specify the submissions directory within the course") pattern_gradeable="*" if len(dirs) > len(data_dirs)+3: pattern_gradeable=dirs[len(data_dirs)+3] pattern_who="*" if len(dirs) > len(data_dirs)+4: pattern_who=dirs[len(data_dirs)+4] pattern_version="*" if len(dirs) > len(data_dirs)+5: pattern_version=dirs[len(data_dirs)+5] # full pattern may include wildcards! pattern = os.path.join(data_dir,pattern_semester,pattern_course,"submissions",pattern_gradeable,pattern_who,pattern_version) print("pattern: ",pattern) # Find all matching submissions for d in glob.glob(pattern): if os.path.isdir(d): my_dirs = d.split(os.sep) if len(my_dirs) != len(data_dirs)+6: raise SystemExit("ERROR: directory length not as expected") # if requested, only regrade the currently active versions if args.active_only and not is_active_version(d): continue print("match: ",d) my_semester=my_dirs[len(data_dirs)] my_course=my_dirs[len(data_dirs)+1] my_gradeable=my_dirs[len(data_dirs)+3] gradeable_config = os.path.join(data_dir,my_semester,my_course,"config/build/"+"build_"+my_gradeable+".json") with open(gradeable_config, 'r') as build_configuration: datastore = json.load(build_configuration) required_capabilities = datastore.get('required_capabilities', 'default') max_grading_time = datastore.get('max_possible_grading_time', -1) #get the current time queue_time = dateutils.write_submitty_date() my_who=my_dirs[len(data_dirs)+4] my_version=my_dirs[len(data_dirs)+5] my_path=os.path.join(data_dir,my_semester,my_course,"submissions",my_gradeable,my_who,my_version) if my_path != d: raise SystemExit("ERROR: path reconstruction failed") # add them to the queue if '_' not in my_who: my_user = my_who my_team = "" my_is_team = False else: my_user = "" my_team = my_who my_is_team = True grade_queue.append({"semester": my_semester, "course": my_course, "gradeable": my_gradeable, "user": my_user, "team": my_team, "who": my_who, "is_team": my_is_team, "version": my_version, "required_capabilities" : required_capabilities, "queue_time":queue_time, "regrade":True, "max_possible_grading_time" : max_grading_time}) # Check before adding a very large number of systems to the queue if len(grade_queue) > 50 and not args.no_input: inp = input("Found {:d} matching submissions. Add to queue? [y/n]".format(len(grade_queue))) if inp.lower() not in ["yes", "y"]: raise SystemExit("Aborting...") for item in grade_queue: file_name = "__".join([item['semester'], item['course'], item['gradeable'], item['who'], item['version']]) file_name = os.path.join(SUBMITTY_DATA_DIR, "to_be_graded_queue", file_name) with open(file_name, "w") as open_file: json.dump(item, open_file, sort_keys=True, indent=4) os.system("chmod o+rw {}".format(file_name)) print("Added {:d} to the queue for regrading.".format(len(grade_queue)))
def launch_shippers(worker_status_map): # verify the DAEMON_USER is running this script if not int(os.getuid()) == int(DAEMON_UID): raise SystemExit("ERROR: the grade_item.py script must be run by the DAEMON_USER") grade_items_logging.log_message(JOB_ID, message="grade_scheduler.py launched") # Clean up old files from previous shipping/autograding (any # partially completed work will be re-done) for file_path in glob.glob(os.path.join(INTERACTIVE_QUEUE, "GRADING_*")): grade_items_logging.log_message(JOB_ID, message="Remove old queue file: " + file_path) os.remove(file_path) for file_path in glob.glob(os.path.join(SUBMITTY_DATA_DIR,"autograding_TODO","unstrusted*")): grade_items_logging.log_message(JOB_ID, message="Remove autograding TODO file: " + file_path) os.remove(file_path) for file_path in glob.glob(os.path.join(SUBMITTY_DATA_DIR,"autograding_DONE","*")): grade_items_logging.log_message(JOB_ID, message="Remove autograding DONE file: " + file_path) os.remove(file_path) # this lock will be used to edit the queue or new job event overall_lock = multiprocessing.Lock() # The names of the worker machines, the capabilities of each # worker machine, and the number of workers per machine are stored # in the autograding_workers json. try: autograding_workers_path = os.path.join(SUBMITTY_INSTALL_DIR, 'config', "autograding_workers.json") with open(autograding_workers_path, 'r') as infile: autograding_workers = json.load(infile) except Exception as e: raise SystemExit("ERROR: could not locate the autograding workers json: {0}".format(e)) # There must always be a primary machine, it may or may not have # autograding workers. if not "primary" in autograding_workers: raise SystemExit("ERROR: autograding_workers.json contained no primary machine.") # One (or more) of the machines must accept "default" jobs. default_present = False for name, machine in autograding_workers.items(): if "default" in machine["capabilities"]: default_present = True break if not default_present: raise SystemExit("ERROR: autograding_workers.json contained no machine with default capabilities") # Launch a shipper process for every worker on the primary machine and each worker machine total_num_workers = 0 processes = list() for name, machine in autograding_workers.items(): if worker_status_map[name] == False: print("{0} could not be reached, so we are not spinning up shipper threads.".format(name)) grade_items_logging.log_message(JOB_ID, message="{0} could not be reached, so we are not spinning up shipper threads.".format(name)) continue if 'enabled' in machine and machine['enabled'] == False: print("{0} is disabled, so we are not spinning up shipper threads.".format(name)) grade_items_logging.log_message(JOB_ID, message="{0} is disabled, so we are not spinning up shipper threads.") continue try: full_address = "" if machine["address"] != "localhost": if machine["username"] == "": raise SystemExit("ERROR: empty username for worker machine {0} ".format(machine["address"])) full_address = "{0}@{1}".format(machine["username"], machine["address"]) else: if not machine["username"] == "": raise SystemExit('ERROR: username for primary (localhost) must be ""') full_address = machine['address'] num_workers_on_machine = machine["num_autograding_workers"] if num_workers_on_machine < 0: raise SystemExit("ERROR: num_workers_on_machine for '{0}' must be non-negative.".format(machine)) single_machine_data = {name : machine} single_machine_data = add_fields_to_autograding_worker_json(single_machine_data, name) except Exception as e: print("ERROR: autograding_workers.json entry for {0} contains an error: {1}".format(name, e)) grade_items_logging.log_message(JOB_ID, message="ERROR: autograding_workers.json entry for {0} contains an error: {1}".format(name,e)) continue # launch the shipper threads for i in range(0,num_workers_on_machine): u = "untrusted" + str(i).zfill(2) p = multiprocessing.Process(target=shipper_process,args=(name,single_machine_data,full_address, u,overall_lock)) p.start() processes.append(p) total_num_workers += num_workers_on_machine # main monitoring loop try: while True: alive = 0 for i in range(0,total_num_workers): if processes[i].is_alive: alive = alive+1 else: grade_items_logging.log_message(JOB_ID, message="ERROR: process "+str(i)+" is not alive") if alive != total_num_workers: grade_items_logging.log_message(JOB_ID, message="ERROR: #shippers="+str(total_num_workers)+" != #alive="+str(alive)) #print ("shippers= ",total_num_workers," alive=",alive) time.sleep(1) except KeyboardInterrupt: grade_items_logging.log_message(JOB_ID, message="grade_scheduler.py keyboard interrupt") # just kill everything in this group id right now # NOTE: this may be a bug if the grandchildren have a different group id and not be killed os.kill(-os.getpid(), signal.SIGKILL) # run this to check if everything is dead # ps xao pid,ppid,pgid,sid,comm,user | grep untrust # everything's dead, including the main process so the rest of this will be ignored # but this was mostly working... # terminate the jobs for i in range(0,total_num_workers): processes[i].terminate() # wait for them to join for i in range(0,total_num_workers): processes[i].join() grade_items_logging.log_message(JOB_ID, message="grade_scheduler.py terminated")
def get_job(my_name,which_machine,my_capabilities,which_untrusted,overall_lock): """ Picks a job from the queue :param overall_lock: a lock on the directory containing all queue files """ time_get_job_begin = dateutils.get_current_time() overall_lock.acquire() folder= INTERACTIVE_QUEUE # Grab all the files currently in the folder, sorted by creation # time, and put them in the queue to be graded files = glob.glob(os.path.join(folder, "*")) files_and_times = list() for f in files: try: my_time = os.path.getctime(f) except: continue tup = (f, my_time) files_and_times.append(tup) files_and_times = sorted(files_and_times, key=operator.itemgetter(1)) my_job="" for full_path_file, file_time in files_and_times: # get the file name (without the path) just_file = full_path_file[len(folder)+1:] # skip items that are already being graded if (just_file[0:8]=="GRADING_"): continue grading_file = os.path.join(folder,"GRADING_"+just_file) if grading_file in files: continue # found something to do try: with open(full_path_file, 'r') as infile: queue_obj = json.load(infile) except: continue #Check to make sure that we are capable of grading this submission required_capabilities = queue_obj["required_capabilities"] if not required_capabilities in my_capabilities: continue # prioritize interactive jobs over (batch) regrades # if you've found an interactive job, exit early (since they are sorted by timestamp) if not "regrade" in queue_obj or not queue_obj["regrade"]: my_job = just_file break # otherwise it's a regrade, and if we don't already have a # job, take it, but we have to search the rest of the list if my_job == "": my_job = just_file if not my_job == "": grading_file = os.path.join(folder, "GRADING_" + my_job) # create the grading file with open(os.path.join(grading_file), "w") as queue_file: json.dump({"untrusted": which_untrusted}, queue_file) overall_lock.release() time_get_job_end = dateutils.get_current_time() time_delta = time_get_job_end-time_get_job_begin if time_delta > datetime.timedelta(milliseconds=100): print (my_name, " WARNING: submitty_autograding shipper get_job time ", time_delta) grade_items_logging.log_message(JOB_ID, message=str(my_name)+" WARNING: submitty_autograding shipper get_job time "+str(time_delta)) return (my_job)
def main(): args = parse_args() while True: # count the processes pid_list = psutil.pids() num_procs = 0 for pid in pid_list: try: proc = psutil.Process(pid) if 'hwcron' == proc.username(): if (len(proc.cmdline()) >= 2 and proc.cmdline()[1] == os.path.join( SUBMITTY_INSTALL_DIR, "bin", "submitty_autograding_shipper.py")): num_procs += 1 except psutil.NoSuchProcess: pass # remove 1 from the count... each worker is forked from the # initial process num_procs -= 1 if num_procs <= 0: print( "WARNING: No matching submitty_autograding_shipper.py processes!" ) num_procs = 0 done = True print("GRADING PROCESSES:{:3d} ".format(num_procs), end="") if os.access(GRADING_QUEUE, os.R_OK): # most instructors do not have read access to the interactive queue files = glob.glob(os.path.join(GRADING_QUEUE, "*")) interactive_count = 0 interactive_grading_count = 0 regrade_count = 0 regrade_grading_count = 0 for full_path_file in files: json_file = full_path_file # get the file name (without the path) just_file = full_path_file[len(GRADING_QUEUE) + 1:] # skip items that are already being graded is_grading = just_file[0:8] == "GRADING_" is_regrade = False if is_grading: json_file = os.path.join(GRADING_QUEUE, just_file[8:]) try: with open(json_file, 'r') as infile: queue_obj = json.load(infile) if "regrade" in queue_obj: is_regrade = queue_obj["regrade"] except: print("whoops", json_file, end="") if is_grading: if is_regrade: regrade_grading_count += 1 else: interactive_grading_count += 1 else: if is_regrade: regrade_count += 1 else: interactive_count += 1 print("INTERACTIVE todo:{:3d} ".format(interactive_count), end="") if interactive_grading_count == 0: print(" ", end="") else: print("(grading:{:3d}) ".format(interactive_grading_count), end="") if interactive_count != 0: done = False print("BATCH todo:{:3d} ".format(regrade_count), end="") if regrade_grading_count == 0: print(" ", end="") else: print("(grading:{:3d}) ".format(regrade_grading_count), end="") if regrade_count != 0: done = False print() # quit when the queues are empty if done and not args.continuous: raise SystemExit() # pause before checking again time.sleep(5)
def main(): args = arg_parse() data_dir = os.path.join(SUBMITTY_DATA_DIR, "courses") data_dirs = data_dir.split(os.sep) grade_queue = [] for input_path in args.path: # handle relative path if input_path == '.': input_path = os.getcwd() if input_path[0] != '/': input_path = os.getcwd() + '/' + input_path # remove trailing slash (if any) input_path = input_path.rstrip('/') # split the path into directories dirs = input_path.split(os.sep) # must be in the known submitty base data directory if dirs[0:len(data_dirs)] != data_dirs: print("ERROR: BAD REGRADE SUBMISSIONS PATH",input_path) raise SystemExit("You need to point to a directory within {}".format(data_dir)) # Extract directories from provided pattern path (path may be incomplete) pattern_semester="*" if len(dirs) > len(data_dirs): pattern_semester=dirs[len(data_dirs)] pattern_course="*" if len(dirs) > len(data_dirs)+1: pattern_course=dirs[len(data_dirs)+1] if len(dirs) > len(data_dirs)+2: if (dirs[len(data_dirs)+2] != "submissions"): raise SystemExit("You must specify the submissions directory within the course") pattern_gradeable="*" if len(dirs) > len(data_dirs)+3: pattern_gradeable=dirs[len(data_dirs)+3] pattern_who="*" if len(dirs) > len(data_dirs)+4: pattern_who=dirs[len(data_dirs)+4] pattern_version="*" if len(dirs) > len(data_dirs)+5: pattern_version=dirs[len(data_dirs)+5] # full pattern may include wildcards! pattern = os.path.join(data_dir,pattern_semester,pattern_course,"submissions",pattern_gradeable,pattern_who,pattern_version) print("pattern: ",pattern) # Find all matching submissions for d in glob.glob(pattern): if os.path.isdir(d): print("match: ",d) my_dirs = d.split(os.sep) if len(my_dirs) != len(data_dirs)+6: raise SystemExit("ERROR: directory length not as expected") my_semester=my_dirs[len(data_dirs)] my_course=my_dirs[len(data_dirs)+1] my_gradeable=my_dirs[len(data_dirs)+3] my_who=my_dirs[len(data_dirs)+4] my_version=my_dirs[len(data_dirs)+5] my_path=os.path.join(data_dir,my_semester,my_course,"submissions",my_gradeable,my_who,my_version) if my_path != d: raise SystemExit("ERROR: path reconstruction failed") # add them to the queue if '_' not in my_who: my_user = my_who my_team = "" my_is_team = False else: my_user = "" my_team = my_who my_is_team = True grade_queue.append({"semester": my_semester, "course": my_course, "gradeable": my_gradeable, "user": my_user, "team": my_team, "who": my_who, "is_team": my_is_team, "version": my_version}) # Check before adding a very large number of systems to the queue if len(grade_queue) > 50 and not args.no_input: inp = input("Found {:d} matching submissions. Add to queue? [y/n]".format(len(grade_queue))) if inp.lower() not in ["yes", "y"]: raise SystemExit("Aborting...") which_queue="batch" if args.interactive: which_queue="interactive" for item in grade_queue: file_name = "__".join([item['semester'], item['course'], item['gradeable'], item['who'], item['version']]) file_name = os.path.join(SUBMITTY_DATA_DIR, "to_be_graded_"+which_queue, file_name) with open(file_name, "w") as open_file: json.dump(item, open_file) os.system("chmod o+rw {}".format(file_name)) print("Added {:d} to the {} queue for regrading.".format(len(grade_queue), which_queue.upper()))