Beispiel #1
0
    def test_glob(self):
        expected = [path.join(self.dir, x) for x in ['a', 'b', 'c', 'd']]
        actual = glob.glob(path.join(self.dir, '**'))
        self.assertIsInstance(actual, list)
        self.assertCountEqual(expected, actual)

        actual2 = glob.glob(path.join(self.dir, '*'))
        self.assertIsInstance(actual2, list)
        self.assertEqual(actual, actual2)
Beispiel #2
0
def pattern_copy(what, patterns, source, target, tmp_logs):
    with open(os.path.join(tmp_logs, "overall.txt"), 'a') as f:
        print(what,
              " pattern copy ",
              patterns,
              " from ",
              source,
              " -> ",
              target,
              file=f)
        for pattern in patterns:
            for my_file in glob.glob(os.path.join(source, pattern),
                                     recursive=True):
                if (os.path.isfile(my_file)):
                    # grab the matched name
                    relpath = os.path.relpath(my_file, source)
                    # make the necessary directories leading to the file
                    os.makedirs(os.path.join(target, os.path.dirname(relpath)),
                                exist_ok=True)
                    # copy the file
                    shutil.copy(my_file, os.path.join(target, relpath))
                    print("    COPY ",
                          my_file,
                          " -> ",
                          os.path.join(target, relpath),
                          file=f)
                else:
                    print("skip this directory (will recurse into it later)",
                          my_file,
                          file=f)
Beispiel #3
0
 def test_glob_recursive(self):
     expected = [
         '', 'a', 'b', 'c', 'd',
         path.join('c', 'e'),
         path.join('d', 'f'),
         path.join('d', 'f', 'g')
     ]
     expected = [path.join(self.dir, x) for x in expected]
     actual = glob.glob(path.join(self.dir, '**'), recursive=True)
     self.assertIsInstance(actual, list)
     self.assertCountEqual(expected, actual)
def populate_queue(queue, folder):
    """
    Populate a queue with all files in folder. We first scan the folder to check for any
    "GRADING_*" and clean them up, and then add the remaining files to the queue, sorted
    by creation time.

    :param queue: multiprocessing.queues.Queue
    :param folder: string representing the path to the folder to add files from
    """

    for file_path in glob.glob(os.path.join(folder, "GRADING_*")):
        grade_items_logging.log_message(message="Remove old queue file: " + file_path)
        os.remove(file_path)

    # Grab all the files currently in the folder, sorted by creation
    # time, and put them in the queue to be graded
    files = glob.glob(os.path.join(folder, "*"))
    files.sort(key=os.path.getctime)
    for f in files:
        queue.put(os.path.join(folder, f))
Beispiel #5
0
def main():
    args = arg_parse()
    data_dir = os.path.join(SUBMITTY_DATA_DIR, "courses")
    data_dirs = data_dir.split(os.sep)
    grade_queue = []
    if not args.times is None:
        starttime = dateutils.read_submitty_date(args.times[0])
        endtime = dateutils.read_submitty_date(args.times[1])
        replay(starttime,endtime)
        exit()
    if len(args.path) == 0:
        print ("ERROR! Must specify at least one path")
        exit()
    for input_path in args.path:
        print ('input path',input_path)
        # handle relative path
        if input_path == '.':
            input_path = os.getcwd()
        if input_path[0] != '/':
            input_path = os.getcwd() + '/' + input_path
        # remove trailing slash (if any)
        input_path = input_path.rstrip('/')
        # split the path into directories
        dirs = input_path.split(os.sep)

        # must be in the known submitty base data directory
        if dirs[0:len(data_dirs)] != data_dirs:
            print("ERROR: BAD REGRADE SUBMISSIONS PATH",input_path)
            raise SystemExit("You need to point to a directory within {}".format(data_dir))

        # Extract directories from provided pattern path (path may be incomplete)
        pattern_semester="*"
        if len(dirs) > len(data_dirs):
            pattern_semester=dirs[len(data_dirs)]
        pattern_course="*"
        if len(dirs) > len(data_dirs)+1:
            pattern_course=dirs[len(data_dirs)+1]
        if len(dirs) > len(data_dirs)+2:
            if (dirs[len(data_dirs)+2] != "submissions"):
                raise SystemExit("You must specify the submissions directory within the course")
        pattern_gradeable="*"
        if len(dirs) > len(data_dirs)+3:
            pattern_gradeable=dirs[len(data_dirs)+3]
        pattern_who="*"
        if len(dirs) > len(data_dirs)+4:
            pattern_who=dirs[len(data_dirs)+4]
        pattern_version="*"
        if len(dirs) > len(data_dirs)+5:
            pattern_version=dirs[len(data_dirs)+5]

        # full pattern may include wildcards!
        pattern = os.path.join(data_dir,pattern_semester,pattern_course,"submissions",pattern_gradeable,pattern_who,pattern_version)

        print("pattern: ",pattern)

        # Find all matching submissions
        for d in glob.glob(pattern):
            if os.path.isdir(d):
                my_dirs = d.split(os.sep)
                if len(my_dirs) != len(data_dirs)+6:
                    raise SystemExit("ERROR: directory length not as expected")
                # if requested, only regrade the currently active versions
                if args.active_only and not is_active_version(d):
                    continue
                print("match: ",d)
                my_semester=my_dirs[len(data_dirs)]
                my_course=my_dirs[len(data_dirs)+1]
                my_gradeable=my_dirs[len(data_dirs)+3]
                gradeable_config = os.path.join(data_dir,my_semester,my_course,"config/build/"+"build_"+my_gradeable+".json")
                with open(gradeable_config, 'r') as build_configuration:
                    datastore = json.load(build_configuration)
                    required_capabilities = datastore.get('required_capabilities', 'default')
                    max_grading_time = datastore.get('max_possible_grading_time', -1)

                #get the current time
                queue_time = dateutils.write_submitty_date()
                my_who=my_dirs[len(data_dirs)+4]
                my_version=my_dirs[len(data_dirs)+5]
                my_path=os.path.join(data_dir,my_semester,my_course,"submissions",my_gradeable,my_who,my_version)
                if my_path != d:
                    raise SystemExit("ERROR: path reconstruction failed")
                # add them to the queue

                if '_' not in my_who:
                    my_user = my_who
                    my_team = ""
                    my_is_team = False
                else:
                    my_user = ""
                    my_team = my_who
                    my_is_team = True

                grade_queue.append({"semester": my_semester,
                                    "course": my_course,
                                    "gradeable": my_gradeable,
                                    "user": my_user,
                                    "team": my_team,
                                    "who": my_who,
                                    "is_team": my_is_team,
                                    "version": my_version,
                                    "required_capabilities" : required_capabilities,
                                    "queue_time":queue_time,
                                    "regrade":True,
                                    "max_possible_grading_time" : max_grading_time})

    # Check before adding a very large number of systems to the queue
    if len(grade_queue) > 50 and not args.no_input:
        inp = input("Found {:d} matching submissions. Add to queue? [y/n]".format(len(grade_queue)))
        if inp.lower() not in ["yes", "y"]:
            raise SystemExit("Aborting...")

    for item in grade_queue:
        file_name = "__".join([item['semester'], item['course'], item['gradeable'], item['who'], item['version']])
        file_name = os.path.join(SUBMITTY_DATA_DIR, "to_be_graded_queue", file_name)
        with open(file_name, "w") as open_file:
            json.dump(item, open_file, sort_keys=True, indent=4)
        os.system("chmod o+rw {}".format(file_name))

    print("Added {:d} to the queue for regrading.".format(len(grade_queue)))
def launch_shippers(worker_status_map):
    # verify the DAEMON_USER is running this script
    if not int(os.getuid()) == int(DAEMON_UID):
        raise SystemExit("ERROR: the grade_item.py script must be run by the DAEMON_USER")
    grade_items_logging.log_message(JOB_ID, message="grade_scheduler.py launched")

    # Clean up old files from previous shipping/autograding (any
    # partially completed work will be re-done)
    for file_path in glob.glob(os.path.join(INTERACTIVE_QUEUE, "GRADING_*")):
        grade_items_logging.log_message(JOB_ID, message="Remove old queue file: " + file_path)
        os.remove(file_path)

    for file_path in glob.glob(os.path.join(SUBMITTY_DATA_DIR,"autograding_TODO","unstrusted*")):
        grade_items_logging.log_message(JOB_ID, message="Remove autograding TODO file: " + file_path)
        os.remove(file_path)
    for file_path in glob.glob(os.path.join(SUBMITTY_DATA_DIR,"autograding_DONE","*")):
        grade_items_logging.log_message(JOB_ID, message="Remove autograding DONE file: " + file_path)
        os.remove(file_path)

    # this lock will be used to edit the queue or new job event
    overall_lock = multiprocessing.Lock()

    # The names of the worker machines, the capabilities of each
    # worker machine, and the number of workers per machine are stored
    # in the autograding_workers json.
    try:
        autograding_workers_path = os.path.join(SUBMITTY_INSTALL_DIR, 'config', "autograding_workers.json")
        with open(autograding_workers_path, 'r') as infile:
            autograding_workers = json.load(infile)
    except Exception as e:
        raise SystemExit("ERROR: could not locate the autograding workers json: {0}".format(e))

    # There must always be a primary machine, it may or may not have
    # autograding workers.
    if not "primary" in autograding_workers:
        raise SystemExit("ERROR: autograding_workers.json contained no primary machine.")

    # One (or more) of the machines must accept "default" jobs.
    default_present = False
    for name, machine in autograding_workers.items():
        if "default" in machine["capabilities"]:
            default_present = True
            break
    if not default_present:
        raise SystemExit("ERROR: autograding_workers.json contained no machine with default capabilities")

    # Launch a shipper process for every worker on the primary machine and each worker machine
    total_num_workers = 0
    processes = list()
    for name, machine in autograding_workers.items():
        if worker_status_map[name] == False:
            print("{0} could not be reached, so we are not spinning up shipper threads.".format(name))
            grade_items_logging.log_message(JOB_ID, message="{0} could not be reached, so we are not spinning up shipper threads.".format(name))
            continue
        if 'enabled' in machine and machine['enabled'] == False:
            print("{0} is disabled, so we are not spinning up shipper threads.".format(name))
            grade_items_logging.log_message(JOB_ID, message="{0} is disabled, so we are not spinning up shipper threads.")
            continue
        try:
            full_address = ""
            if machine["address"] != "localhost":
                if machine["username"] == "":
                    raise SystemExit("ERROR: empty username for worker machine {0} ".format(machine["address"]))
                full_address = "{0}@{1}".format(machine["username"], machine["address"])
            else:
                if not machine["username"] == "":
                    raise SystemExit('ERROR: username for primary (localhost) must be ""')
                full_address = machine['address']

            num_workers_on_machine = machine["num_autograding_workers"]
            if num_workers_on_machine < 0:
                raise SystemExit("ERROR: num_workers_on_machine for '{0}' must be non-negative.".format(machine))

            single_machine_data = {name : machine}
            single_machine_data = add_fields_to_autograding_worker_json(single_machine_data, name)
        except Exception as e:
            print("ERROR: autograding_workers.json entry for {0} contains an error: {1}".format(name, e))
            grade_items_logging.log_message(JOB_ID, message="ERROR: autograding_workers.json entry for {0} contains an error: {1}".format(name,e))
            continue
        # launch the shipper threads
        for i in range(0,num_workers_on_machine):
            u = "untrusted" + str(i).zfill(2)
            p = multiprocessing.Process(target=shipper_process,args=(name,single_machine_data,full_address, u,overall_lock))
            p.start()
            processes.append(p)
        total_num_workers += num_workers_on_machine

    # main monitoring loop
    try:
        while True:
            alive = 0
            for i in range(0,total_num_workers):
                if processes[i].is_alive:
                    alive = alive+1
                else:
                    grade_items_logging.log_message(JOB_ID, message="ERROR: process "+str(i)+" is not alive")
            if alive != total_num_workers:
                grade_items_logging.log_message(JOB_ID, message="ERROR: #shippers="+str(total_num_workers)+" != #alive="+str(alive))
            #print ("shippers= ",total_num_workers,"  alive=",alive)
            time.sleep(1)

    except KeyboardInterrupt:
        grade_items_logging.log_message(JOB_ID, message="grade_scheduler.py keyboard interrupt")
        # just kill everything in this group id right now
        # NOTE:  this may be a bug if the grandchildren have a different group id and not be killed
        os.kill(-os.getpid(), signal.SIGKILL)

        # run this to check if everything is dead
        #    ps  xao pid,ppid,pgid,sid,comm,user  | grep untrust

        # everything's dead, including the main process so the rest of this will be ignored
        # but this was mostly working...

        # terminate the jobs
        for i in range(0,total_num_workers):
            processes[i].terminate()
        # wait for them to join
        for i in range(0,total_num_workers):
            processes[i].join()

    grade_items_logging.log_message(JOB_ID, message="grade_scheduler.py terminated")
def get_job(my_name,which_machine,my_capabilities,which_untrusted,overall_lock):
    """
    Picks a job from the queue

    :param overall_lock: a lock on the directory containing all queue files
    """

    time_get_job_begin = dateutils.get_current_time()

    overall_lock.acquire()
    folder= INTERACTIVE_QUEUE

    # Grab all the files currently in the folder, sorted by creation
    # time, and put them in the queue to be graded
    files = glob.glob(os.path.join(folder, "*"))
    files_and_times = list()
    for f in files:
        try:
            my_time = os.path.getctime(f)
        except:
            continue
        tup = (f, my_time)
        files_and_times.append(tup)

    files_and_times = sorted(files_and_times, key=operator.itemgetter(1))
    my_job=""

    for full_path_file, file_time in files_and_times:
        # get the file name (without the path)
        just_file = full_path_file[len(folder)+1:]
        # skip items that are already being graded
        if (just_file[0:8]=="GRADING_"):
            continue
        grading_file = os.path.join(folder,"GRADING_"+just_file)
        if grading_file in files:
            continue

        # found something to do
        try:
            with open(full_path_file, 'r') as infile:
                queue_obj = json.load(infile)
        except:
            continue

        #Check to make sure that we are capable of grading this submission
        required_capabilities = queue_obj["required_capabilities"]
        if not required_capabilities in my_capabilities:
            continue

        # prioritize interactive jobs over (batch) regrades
        # if you've found an interactive job, exit early (since they are sorted by timestamp)
        if not "regrade" in queue_obj or not queue_obj["regrade"]:
            my_job = just_file
            break

        # otherwise it's a regrade, and if we don't already have a
        # job, take it, but we have to search the rest of the list
        if my_job == "":
            my_job = just_file

    if not my_job == "":
        grading_file = os.path.join(folder, "GRADING_" + my_job)
        # create the grading file
        with open(os.path.join(grading_file), "w") as queue_file:
            json.dump({"untrusted": which_untrusted}, queue_file)

    overall_lock.release()

    time_get_job_end = dateutils.get_current_time()

    time_delta = time_get_job_end-time_get_job_begin
    if time_delta > datetime.timedelta(milliseconds=100):
        print (my_name, " WARNING: submitty_autograding shipper get_job time ", time_delta)
        grade_items_logging.log_message(JOB_ID, message=str(my_name)+" WARNING: submitty_autograding shipper get_job time "+str(time_delta))

    return (my_job)
Beispiel #8
0
def main():
    args = parse_args()
    while True:

        # count the processes
        pid_list = psutil.pids()
        num_procs = 0
        for pid in pid_list:
            try:
                proc = psutil.Process(pid)
                if 'hwcron' == proc.username():
                    if (len(proc.cmdline()) >= 2
                            and proc.cmdline()[1] == os.path.join(
                                SUBMITTY_INSTALL_DIR, "bin",
                                "submitty_autograding_shipper.py")):
                        num_procs += 1
            except psutil.NoSuchProcess:
                pass

        # remove 1 from the count...  each worker is forked from the
        # initial process
        num_procs -= 1

        if num_procs <= 0:
            print(
                "WARNING: No matching submitty_autograding_shipper.py processes!"
            )
            num_procs = 0

        done = True

        print("GRADING PROCESSES:{:3d}       ".format(num_procs), end="")

        if os.access(GRADING_QUEUE, os.R_OK):
            # most instructors do not have read access to the interactive queue

            files = glob.glob(os.path.join(GRADING_QUEUE, "*"))
            interactive_count = 0
            interactive_grading_count = 0
            regrade_count = 0
            regrade_grading_count = 0

            for full_path_file in files:
                json_file = full_path_file

                # get the file name (without the path)
                just_file = full_path_file[len(GRADING_QUEUE) + 1:]
                # skip items that are already being graded
                is_grading = just_file[0:8] == "GRADING_"
                is_regrade = False

                if is_grading:
                    json_file = os.path.join(GRADING_QUEUE, just_file[8:])

                try:
                    with open(json_file, 'r') as infile:
                        queue_obj = json.load(infile)
                    if "regrade" in queue_obj:
                        is_regrade = queue_obj["regrade"]
                except:
                    print("whoops", json_file, end="")

                if is_grading:
                    if is_regrade:
                        regrade_grading_count += 1
                    else:
                        interactive_grading_count += 1
                else:
                    if is_regrade:
                        regrade_count += 1
                    else:
                        interactive_count += 1

            print("INTERACTIVE todo:{:3d} ".format(interactive_count), end="")
            if interactive_grading_count == 0:
                print("                 ", end="")
            else:
                print("(grading:{:3d})    ".format(interactive_grading_count),
                      end="")
            if interactive_count != 0:
                done = False

            print("BATCH todo:{:3d} ".format(regrade_count), end="")
            if regrade_grading_count == 0:
                print("                 ", end="")
            else:
                print("(grading:{:3d})    ".format(regrade_grading_count),
                      end="")
            if regrade_count != 0:
                done = False

        print()

        # quit when the queues are empty
        if done and not args.continuous:
            raise SystemExit()

        # pause before checking again
        time.sleep(5)
Beispiel #9
0
def main():
    args = arg_parse()
    data_dir = os.path.join(SUBMITTY_DATA_DIR, "courses")
    data_dirs = data_dir.split(os.sep)
    grade_queue = []

    for input_path in args.path:
        # handle relative path
        if input_path == '.':
            input_path = os.getcwd()
        if input_path[0] != '/':
            input_path = os.getcwd() + '/' + input_path
        # remove trailing slash (if any)
        input_path = input_path.rstrip('/')
        # split the path into directories
        dirs = input_path.split(os.sep)

        # must be in the known submitty base data directory
        if dirs[0:len(data_dirs)] != data_dirs:
            print("ERROR: BAD REGRADE SUBMISSIONS PATH",input_path)
            raise SystemExit("You need to point to a directory within {}".format(data_dir))

        # Extract directories from provided pattern path (path may be incomplete)
        pattern_semester="*"
        if len(dirs) > len(data_dirs):
            pattern_semester=dirs[len(data_dirs)]
        pattern_course="*"
        if len(dirs) > len(data_dirs)+1:
            pattern_course=dirs[len(data_dirs)+1]
        if len(dirs) > len(data_dirs)+2:
            if (dirs[len(data_dirs)+2] != "submissions"):
                raise SystemExit("You must specify the submissions directory within the course")
        pattern_gradeable="*"
        if len(dirs) > len(data_dirs)+3:
            pattern_gradeable=dirs[len(data_dirs)+3]
        pattern_who="*"
        if len(dirs) > len(data_dirs)+4:
            pattern_who=dirs[len(data_dirs)+4]
        pattern_version="*"
        if len(dirs) > len(data_dirs)+5:
            pattern_version=dirs[len(data_dirs)+5]

        # full pattern may include wildcards!
        pattern = os.path.join(data_dir,pattern_semester,pattern_course,"submissions",pattern_gradeable,pattern_who,pattern_version)
        print("pattern: ",pattern)

        # Find all matching submissions
        for d in glob.glob(pattern):
            if os.path.isdir(d):
                print("match: ",d)
                my_dirs = d.split(os.sep)
                if len(my_dirs) != len(data_dirs)+6:
                    raise SystemExit("ERROR: directory length not as expected")
                my_semester=my_dirs[len(data_dirs)]
                my_course=my_dirs[len(data_dirs)+1]
                my_gradeable=my_dirs[len(data_dirs)+3]
                my_who=my_dirs[len(data_dirs)+4]
                my_version=my_dirs[len(data_dirs)+5]
                my_path=os.path.join(data_dir,my_semester,my_course,"submissions",my_gradeable,my_who,my_version)
                if my_path != d:
                    raise SystemExit("ERROR: path reconstruction failed")
                # add them to the queue

                if '_' not in my_who:
                    my_user = my_who
                    my_team = ""
                    my_is_team = False
                else:
                    my_user = ""
                    my_team = my_who
                    my_is_team = True

                grade_queue.append({"semester": my_semester, "course": my_course, "gradeable": my_gradeable,
                                    "user": my_user, "team": my_team, "who": my_who, "is_team": my_is_team, "version": my_version})

    # Check before adding a very large number of systems to the queue
    if len(grade_queue) > 50 and not args.no_input:
        inp = input("Found {:d} matching submissions. Add to queue? [y/n]".format(len(grade_queue)))
        if inp.lower() not in ["yes", "y"]:
            raise SystemExit("Aborting...")

    which_queue="batch"
    if args.interactive:
        which_queue="interactive"

    for item in grade_queue:
        file_name = "__".join([item['semester'], item['course'], item['gradeable'], item['who'], item['version']])
        file_name = os.path.join(SUBMITTY_DATA_DIR, "to_be_graded_"+which_queue, file_name)
        with open(file_name, "w") as open_file:
            json.dump(item, open_file)
        os.system("chmod o+rw {}".format(file_name))

    print("Added {:d} to the {} queue for regrading.".format(len(grade_queue), which_queue.upper()))