def add_statement(task_name, language_code, statement_type, statement_file, overwrite): logger.info( "Adding the statement(language: %s) of task %s " "in the database.", language_code, task_name) if statement_type is None: return False if not os.path.exists(statement_file): logger.error("Statement file (path: %s) does not exist.", statement_file) return False with SessionGen() as session: task = session.query(Task)\ .filter(Task.name == task_name).first() if not task: logger.error("No task named %s", task_name) return False try: file_cacher = FileCacher() digest = file_cacher.put_file_from_path( statement_file, "%s Statement (lang: %s) for task %s" % (statement_type.upper(), language_code, task_name)) except Exception: logger.error("Task statement storage failed.", exc_info=True) arr = session.query(Statement)\ .filter(Statement.language == language_code)\ .filter(Statement.statement_type == statement_type)\ .filter(Statement.task == task)\ .all() if arr: # Statement already exists if overwrite: logger.info("Overwriting already existing statement.") session.delete(arr[0]) session.commit() else: logger.error("A statement of the given type and language " "already exists. Not overwriting.") return False statement = Statement(language_code, statement_type, digest, task=task) session.add(statement) session.commit() logger.info("Statement added.") return True
def add_statement(task_name, language_code, statement_file, overwrite): logger.info("Adding the statement(language: %s) of task %s " "in the database.", language_code, task_name) if not os.path.exists(statement_file): logger.error("Statement file (path: %s) does not exist.", statement_file) return False if not statement_file.endswith(".pdf"): logger.error("Statement file should be a pdf file.") return False with SessionGen() as session: task = session.query(Task)\ .filter(Task.name == task_name).first() if not task: logger.error("No task named %s", task_name) return False try: file_cacher = FileCacher() digest = file_cacher.put_file_from_path( statement_file, "Statement for task %s (lang: %s)" % (task_name, language_code)) except Exception: logger.error("Task statement storage failed.", exc_info=True) arr = session.query(Statement)\ .filter(Statement.language == language_code)\ .filter(Statement.task == task)\ .all() if arr: # Statement already exists if overwrite: logger.info("Overwriting already existing statement.") session.delete(arr[0]) session.commit() else: logger.error("A statement with given language already exists. " "Not overwriting.") return False statement = Statement(language_code, digest, task=task) session.add(statement) session.commit() logger.info("Statement added.") return True
def test_testcases(base_dir, soluzione, language, assume=None): global task, file_cacher # Use a disabled FileCacher with a FSBackend in order to avoid to fill # the database with junk and to save up space. if file_cacher is None: file_cacher = FileCacher(path=os.path.join(config.cache_dir, 'cmsMake'), enabled=False) # Load the task if task is None: loader = YamlLoader(os.path.realpath(os.path.join(base_dir, "..")), file_cacher) # Normally we should import the contest before, but YamlLoader # accepts get_task() even without previous get_contest() calls task = loader.get_task(os.path.split(os.path.realpath(base_dir))[1]) # Prepare the EvaluationJob dataset = task.active_dataset if dataset.task_type != "OutputOnly": digest = file_cacher.put_file_from_path( os.path.join(base_dir, soluzione), "Solution %s for task %s" % (soluzione, task.name)) executables = { task.name: Executable(filename=task.name, digest=digest) } jobs = [(t, EvaluationJob(language=language, task_type=dataset.task_type, task_type_parameters=json.loads( dataset.task_type_parameters), managers=dict(dataset.managers), executables=executables, input=dataset.testcases[t].input, output=dataset.testcases[t].output, time_limit=dataset.time_limit, memory_limit=dataset.memory_limit)) for t in dataset.testcases] tasktype = get_task_type(dataset=dataset) else: print("Generating outputs...", end='') files = {} for t in sorted(dataset.testcases.keys()): with file_cacher.get_file(dataset.testcases[t].input) as fin: with TemporaryFile() as fout: print(str(t), end='') call(soluzione, stdin=fin, stdout=fout, cwd=base_dir) fout.seek(0) digest = file_cacher.put_file_from_fobj(fout) outname = "output_%s.txt" % t files[outname] = File(filename=outname, digest=digest) jobs = [(t, EvaluationJob(task_type=dataset.task_type, task_type_parameters=json.loads( dataset.task_type_parameters), managers=dict(dataset.managers), files=files, input=dataset.testcases[t].input, output=dataset.testcases[t].output, time_limit=dataset.time_limit, memory_limit=dataset.memory_limit)) for t in dataset.testcases] for k, job in jobs: job._key = k tasktype = get_task_type(dataset=dataset) print() ask_again = True last_status = "ok" status = "ok" stop = False info = [] points = [] comments = [] tcnames = [] for jobinfo in sorted(jobs): print(jobinfo[0], end='') sys.stdout.flush() job = jobinfo[1] # Skip the testcase if we decide to consider everything to # timeout if stop: info.append("Time limit exceeded") points.append(0.0) comments.append("Timeout.") continue # Evaluate testcase last_status = status tasktype.evaluate(job, file_cacher) if dataset.task_type != "OutputOnly": status = job.plus["exit_status"] info.append("Time: %5.3f Wall: %5.3f Memory: %s" % (job.plus["execution_time"], job.plus["execution_wall_clock_time"], mem_human(job.plus["execution_memory"]))) else: status = "ok" info.append("N/A") points.append(float(job.outcome)) comments.append(format_status_text(job.text)) tcnames.append(jobinfo[0]) # If we saw two consecutive timeouts, ask wether we want to # consider everything to timeout if ask_again and status == "timeout" and last_status == "timeout": print() print("Want to stop and consider everything to timeout? [y/N]", end='') if assume is not None: print(assume) tmp = assume else: tmp = raw_input().lower() if tmp in ['y', 'yes']: stop = True else: ask_again = False # Result pretty printing print() clen = max(len(c) for c in comments) ilen = max(len(i) for i in info) for (i, p, c, b) in zip(tcnames, points, comments, info): print("%s) %5.2lf --- %s [%s]" % (i, p, c.ljust(clen), b.center(ilen))) return zip(points, comments, info)
def add_submission(contest_id, username, task_name, timestamp, files): file_cacher = FileCacher() with SessionGen() as session: participation = session.query(Participation)\ .join(Participation.user)\ .filter(Participation.contest_id == contest_id)\ .filter(User.username == username)\ .first() if participation is None: logging.critical("User `%s' does not exists or " "does not participate in the contest.", username) return False task = session.query(Task)\ .filter(Task.contest_id == contest_id)\ .filter(Task.name == task_name)\ .first() if task is None: logging.critical("Unable to find task `%s'.", task_name) return False elements = [format.filename for format in task.submission_format] for file_ in files: if file_ not in elements: logging.critical("File `%s' is not in the submission format " "for the task.", file_) return False if any(element not in files for element in elements): logger.warning("Not all files from the submission format were " "provided.") # files and elements now coincide. We compute the language for # each file and check that they do not mix. language = None for file_ in files: this_language = filename_to_language(files[file_]) if this_language is None and "%l" in file_: logger.critical("Cannot recognize language for file `%s'.", file_) return False if language is None: language = this_language elif this_language is not None and language != this_language: logger.critical("Mixed-language submission detected.") return False # Store all files from the arguments, and obtain their digests.. file_digests = {} try: for file_ in files: digest = file_cacher.put_file_from_path( files[file_], "Submission file %s sent by %s at %d." % (file_, username, timestamp)) file_digests[file_] = digest except: logger.critical("Error while storing submission's file.", exc_info=True) return False # Create objects in the DB. submission = Submission(make_datetime(timestamp), language, participation=participation, task=task) for filename, digest in file_digests.items(): session.add(File(filename, digest, submission=submission)) session.add(submission) session.commit() return True
def test_testcases(base_dir, soluzione, assume=None): global task, file_cacher # Use a FileCacher with a NullBackend in order to avoid to fill # the database with junk if file_cacher is None: file_cacher = FileCacher(null=True) # Load the task # TODO - This implies copying a lot of data to the FileCacher, # which is annoying if you have to do it continuously; it would be # better to use a persistent cache (although local, possibly # filesystem-based instead of database-based) and somehow detect # when the task has already been loaded if task is None: loader = YamlLoader( os.path.realpath(os.path.join(base_dir, "..")), file_cacher) # Normally we should import the contest before, but YamlLoader # accepts get_task() even without previous get_contest() calls task = loader.get_task(os.path.split(os.path.realpath(base_dir))[1]) # Prepare the EvaluationJob dataset = task.active_dataset digest = file_cacher.put_file_from_path( os.path.join(base_dir, soluzione), "Solution %s for task %s" % (soluzione, task.name)) executables = {task.name: Executable(filename=task.name, digest=digest)} jobs = [(t, EvaluationJob( task_type=dataset.task_type, task_type_parameters=json.loads(dataset.task_type_parameters), managers=dict(dataset.managers), executables=executables, input=dataset.testcases[t].input, output=dataset.testcases[t].output, time_limit=dataset.time_limit, memory_limit=dataset.memory_limit)) for t in dataset.testcases] tasktype = get_task_type(dataset=dataset) ask_again = True last_status = "ok" status = "ok" stop = False info = [] points = [] comments = [] tcnames = [] for jobinfo in sorted(jobs): print jobinfo[0], sys.stdout.flush() job = jobinfo[1] # Skip the testcase if we decide to consider everything to # timeout if stop: info.append("Time limit exceeded") points.append(0.0) comments.append("Timeout.") continue # Evaluate testcase last_status = status tasktype.evaluate(job, file_cacher) status = job.plus["exit_status"] info.append("Time: %5.3f Wall: %5.3f Memory: %s" % (job.plus["execution_time"], job.plus["execution_wall_clock_time"], mem_human(job.plus["execution_memory"]))) points.append(float(job.outcome)) comments.append(format_status_text(job.text)) tcnames.append(jobinfo[0]) # If we saw two consecutive timeouts, ask wether we want to # consider everything to timeout if ask_again and status == "timeout" and last_status == "timeout": print print "Want to stop and consider everything to timeout? [y/N]", if assume is not None: print assume tmp = assume else: tmp = raw_input().lower() if tmp in ['y', 'yes']: stop = True else: ask_again = False # Result pretty printing print clen = max(len(c) for c in comments) ilen = max(len(i) for i in info) for (i, p, c, b) in zip(tcnames, points, comments, info): print "%s) %5.2lf --- %s [%s]" % (i, p, c.ljust(clen), b.center(ilen)) return zip(points, comments, info)
def add_submission(contest_id, username, task_name, timestamp, files): file_cacher = FileCacher() with SessionGen() as session: participation = session.query(Participation)\ .join(Participation.user)\ .filter(Participation.contest_id == contest_id)\ .filter(User.username == username)\ .first() if participation is None: logging.critical("User `%s' does not exists or " "does not participate in the contest.", username) return False task = session.query(Task)\ .filter(Task.contest_id == contest_id)\ .filter(Task.name == task_name)\ .first() if task is None: logging.critical("Unable to find task `%s'.", task_name) return False elements = set(task.submission_format) for file_ in files: if file_ not in elements: logging.critical("File `%s' is not in the submission format " "for the task.", file_) return False if any(element not in files for element in elements): logger.warning("Not all files from the submission format were " "provided.") # files is now a subset of elements. # We ensure we can infer a language if the task requires it. language = None need_lang = any(element.find(".%l") != -1 for element in elements) if need_lang: try: language = language_from_submitted_files(files) except ValueError as e: logger.critical(e) return False if language is None: # This might happen in case not all files were provided. logger.critical("Unable to infer language from submission.") return False language_name = None if language is None else language.name # Store all files from the arguments, and obtain their digests.. file_digests = {} try: for file_ in files: digest = file_cacher.put_file_from_path( files[file_], "Submission file %s sent by %s at %d." % (file_, username, timestamp)) file_digests[file_] = digest except Exception as e: logger.critical("Error while storing submission's file: %s.", e) return False # Create objects in the DB. submission = Submission(make_datetime(timestamp), language_name, participation=participation, task=task) for filename, digest in iteritems(file_digests): session.add(File(filename, digest, submission=submission)) session.add(submission) session.commit() maybe_send_notification(submission.id) return True
def add_submissions(contest_name, task_name, username, items): """ Add submissions from the given user to the given task in the given contest. Each item corresponds to a submission, and should contain a dictionary which maps formatted file names to paths. For example, in batch tasks the format is "Task.%l", so one submission would be {"Task.%l": "path/to/task.cpp"}. """ # We connect to evaluation service to try and notify it about # the new submissions. Otherwise, it will pick it up only on # the next sweep for missed operations. rs = RemoteServiceClient(ServiceCoord("EvaluationService", 0)) rs.connect() with SessionGen() as session: user = get_user(session, username) contest = get_contest(session, contest_name) participation = get_participation(session, contest, user) task = get_task(session, task_name, contest) elements = set(format_element.filename for format_element in task.submission_format) file_cacher = FileCacher() # We go over all submissions twice. First we validate the # submission format. for submission_dict in items: for (format_file_name, path) in submission_dict.iteritems(): if format_file_name not in elements: raise Exception("Unexpected submission file: %s. " "Expected elements: %s" % (format_file_name, elements)) if not os.path.isfile(path): raise Exception("File not found: %s" % path) # Now add to database. for submission_dict in items: if not submission_dict: continue timestamp = time.time() file_digests = {} language_name = None for (format_file_name, path) in submission_dict.iteritems(): digest = file_cacher.put_file_from_path( path, "Submission file %s sent by %s at %d." % (path, username, timestamp)) file_digests[format_file_name] = digest current_language = filename_to_language(path) if current_language is not None: language_name = current_language.name submission = Submission(make_datetime(timestamp), language_name, participation=participation, task=task) for filename, digest in file_digests.items(): session.add(File(filename, digest, submission=submission)) session.add(submission) session.commit() rs.new_submission(submission_id=submission.id) rs.disconnect()
class ContestImporter(object): """This service imports a contest from a directory that has been the target of a ContestExport. The process of exporting and importing again should be idempotent. """ def __init__(self, drop, import_source, load_files, load_model, skip_generated, skip_submissions, skip_user_tests): self.drop = drop self.load_files = load_files self.load_model = load_model self.skip_generated = skip_generated self.skip_submissions = skip_submissions self.skip_user_tests = skip_user_tests self.import_source = import_source self.import_dir = import_source self.file_cacher = FileCacher() def do_import(self): """Run the actual import code.""" logger.info("Starting import.") if not os.path.isdir(self.import_source): if self.import_source.endswith(".zip"): archive = zipfile.ZipFile(self.import_source, "r") file_names = archive.infolist() self.import_dir = tempfile.mkdtemp() archive.extractall(self.import_dir) elif self.import_source.endswith(".tar.gz") \ or self.import_source.endswith(".tgz") \ or self.import_source.endswith(".tar.bz2") \ or self.import_source.endswith(".tbz2") \ or self.import_source.endswith(".tar"): archive = tarfile.open(name=self.import_source) file_names = archive.getnames() elif self.import_source.endswith(".tar.xz") \ or self.import_source.endswith(".txz"): try: import lzma except ImportError: logger.critical("LZMA compression format not " "supported. Please install package " "lzma.") return False archive = tarfile.open( fileobj=lzma.LZMAFile(self.import_source)) file_names = archive.getnames() else: logger.critical("Unable to import from %s." % self.import_source) return False root = find_root_of_archive(file_names) if root is None: logger.critical("Cannot find a root directory in %s." % self.import_source) return False self.import_dir = tempfile.mkdtemp() archive.extractall(self.import_dir) self.import_dir = os.path.join(self.import_dir, root) if self.drop: logger.info("Dropping and recreating the database.") try: if not (drop_db() and init_db()): logger.critical("Unexpected error while dropping " "and recreating the database.", exc_info=True) return False except Exception as error: logger.critical("Unable to access DB.\n%r" % error) return False with SessionGen() as session: # Import the contest in JSON format. if self.load_model: logger.info("Importing the contest from a JSON file.") with io.open(os.path.join(self.import_dir, "contest.json"), "rb") as fin: # TODO - Throughout all the code we'll assume the # input is correct without actually doing any # validations. Thus, for example, we're not # checking that the decoded object is a dict... self.datas = json.load(fin, encoding="utf-8") # If the dump has been exported using a data model # different than the current one (that is, a previous # one) we try to update it. # If no "_version" field is found we assume it's a v1.0 # export (before the new dump format was introduced). dump_version = self.datas.get("_version", 0) if dump_version < model_version: logger.warning( "The dump you're trying to import has been created " "by an old version of CMS. It may take a while to " "adapt it to the current data model. You can use " "cmsDumpUpdater to update the on-disk dump and " "speed up future imports.") if dump_version > model_version: logger.critical( "The dump you're trying to import has been created " "by a version of CMS newer than this one and there " "is no way to adapt it to the current data model. " "You probably need to update CMS to handle it. It's " "impossible to proceed with the importation.") return False for version in range(dump_version, model_version): # Update from version to version+1 updater = __import__( "cmscontrib.updaters.update_%d" % (version + 1), globals(), locals(), ["Updater"]).Updater(self.datas) self.datas = updater.run() self.datas["_version"] = version + 1 assert self.datas["_version"] == model_version self.objs = dict() for id_, data in self.datas.iteritems(): if not id_.startswith("_"): self.objs[id_] = self.import_object(data) for id_, data in self.datas.iteritems(): if not id_.startswith("_"): self.add_relationships(data, self.objs[id_]) for k, v in list(self.objs.iteritems()): # Skip submissions if requested if self.skip_submissions and isinstance(v, Submission): del self.objs[k] # Skip user_tests if requested if self.skip_user_tests and isinstance(v, UserTest): del self.objs[k] # Skip generated data if requested if self.skip_generated and \ isinstance(v, (SubmissionResult, UserTestResult)): del self.objs[k] contest_id = list() contest_files = set() # Add each base object and all its dependencies for id_ in self.datas["_objects"]: contest = self.objs[id_] # We explictly add only the contest since all child # objects will be automatically added by cascade. # Adding each object individually would also add # orphaned objects like the ones that depended on # submissions or user_tests that we (possibly) # removed above. session.add(contest) session.flush() contest_id += [contest.id] contest_files |= contest.enumerate_files( self.skip_submissions, self.skip_user_tests, self.skip_generated) session.commit() else: contest_id = None contest_files = None # Import files. if self.load_files: logger.info("Importing files.") files_dir = os.path.join(self.import_dir, "files") descr_dir = os.path.join(self.import_dir, "descriptions") files = set(os.listdir(files_dir)) descr = set(os.listdir(descr_dir)) if not descr <= files: logger.warning("Some files do not have an associated " "description.") if not files <= descr: logger.warning("Some descriptions do not have an " "associated file.") if not (contest_files is None or files <= contest_files): # FIXME Check if it's because this is a light import # or because we're skipping submissions or user_tests logger.warning("The dump contains some files that are " "not needed by the contest.") if not (contest_files is None or contest_files <= files): # The reason for this could be that it was a light # export that's not being reimported as such. logger.warning("The contest needs some files that are " "not contained in the dump.") # Limit import to files we actually need. if contest_files is not None: files &= contest_files for digest in files: file_ = os.path.join(files_dir, digest) desc = os.path.join(descr_dir, digest) if not self.safe_put_file(file_, desc): logger.critical("Unable to put file `%s' in the database. " "Aborting. Please remove the contest " "from the database." % file_) # TODO: remove contest from the database. return False if contest_id is not None: logger.info("Import finished (contest id: %s)." % ", ".join(str(id_) for id_ in contest_id)) else: logger.info("Import finished.") # If we extracted an archive, we remove it. if self.import_dir != self.import_source: rmtree(self.import_dir) return True def import_object(self, data): """Import objects from the given data (without relationships). The given data is assumed to be a dict in the format produced by ContestExporter. This method reads the "_class" item and tries to find the corresponding class. Then it loads all column properties of that class (those that are present in the data) and uses them as keyword arguments in a call to the class constructor (if a required property is missing this call will raise an error). Relationships are not handled by this method, since we may not have all referenced objects available yet. Thus we prefer to add relationships in a later moment, using the add_relationships method. Note that both this method and add_relationships don't check if the given data has more items than the ones we understand and use. """ cls = getattr(class_hook, data["_class"]) args = dict() for prp in cls._col_props: if prp.key not in data: # We will let the __init__ of the class check if any # argument is missing, so it's safe to just skip here. continue col = prp.columns[0] col_type = type(col.type) val = data[prp.key] if col_type in [Boolean, Integer, Float, Unicode, RepeatedUnicode]: args[prp.key] = val elif col_type is String: args[prp.key] = val.encode('latin1') if val is not None else None elif col_type is DateTime: args[prp.key] = make_datetime(val) if val is not None else None elif col_type is Interval: args[prp.key] = timedelta(seconds=val) if val is not None else None else: raise RuntimeError("Unknown SQLAlchemy column type: %s" % col_type) return cls(**args) def add_relationships(self, data, obj): """Add the relationships to the given object, using the given data. Do what we didn't in import_objects: importing relationships. We already now the class of the object so we simply iterate over its relationship properties trying to load them from the data (if present), checking wheter they are IDs or collection of IDs, dereferencing them (i.e. getting the corresponding object) and reflecting all on the given object. Note that both this method and import_object don't check if the given data has more items than the ones we understand and use. """ cls = type(obj) for prp in cls._rel_props: if prp.key not in data: # Relationships are always optional continue val = data[prp.key] if val is None: setattr(obj, prp.key, None) elif type(val) == unicode: setattr(obj, prp.key, self.objs[val]) elif type(val) == list: setattr(obj, prp.key, list(self.objs[i] for i in val)) elif type(val) == dict: setattr(obj, prp.key, dict((k, self.objs[v]) for k, v in val.iteritems())) else: raise RuntimeError("Unknown RelationshipProperty value: %s" % type(val)) def safe_put_file(self, path, descr_path): """Put a file to FileCacher signaling every error (including digest mismatch). path (string): the path from which to load the file. descr_path (string): same for description. return (bool): True if all ok, False if something wrong. """ # TODO - Probably this method could be merged in FileCacher # First read the description. try: with io.open(descr_path, 'rt', encoding='utf-8') as fin: description = fin.read() except IOError: description = '' # Put the file. try: digest = self.file_cacher.put_file_from_path(path, description) except Exception as error: logger.critical("File %s could not be put to file server (%r), " "aborting." % (path, error)) return False # Then check the digest. calc_digest = sha1sum(path) if digest != calc_digest: logger.critical("File %s has hash %s, but the server returned %s, " "aborting." % (path, calc_digest, digest)) return False return True
def test_testcases(base_dir, solution, language, assume=None): global task, file_cacher # Use a FileCacher with a NullBackend in order to avoid to fill # the database with junk if file_cacher is None: file_cacher = FileCacher(null=True) cmscontrib.loaders.italy_yaml.logger = NullLogger() # Load the task # TODO - This implies copying a lot of data to the FileCacher, # which is annoying if you have to do it continuously; it would be # better to use a persistent cache (although local, possibly # filesystem-based instead of database-based) and somehow detect # when the task has already been loaded if task is None: loader = cmscontrib.loaders.italy_yaml.YamlLoader(base_dir, file_cacher) task = loader.get_task(get_statement=False) # Prepare the EvaluationJob dataset = task.active_dataset digest = file_cacher.put_file_from_path( os.path.join(base_dir, solution), "Solution %s for task %s" % (solution, task.name)) executables = {task.name: Executable(filename=task.name, digest=digest)} jobs = [(t, EvaluationJob( language=language, task_type=dataset.task_type, task_type_parameters=json.loads(dataset.task_type_parameters), managers=dict(dataset.managers), executables=executables, input=dataset.testcases[t].input, output=dataset.testcases[t].output, time_limit=dataset.time_limit, memory_limit=dataset.memory_limit)) for t in dataset.testcases] tasktype = get_task_type(dataset=dataset) ask_again = True last_status = "ok" status = "ok" stop = False info = [] points = [] comments = [] tcnames = [] for jobinfo in sorted(jobs): print(jobinfo[0]) sys.stdout.flush() job = jobinfo[1] # Skip the testcase if we decide to consider everything to # timeout if stop: info.append("Time limit exceeded") points.append(0.0) comments.append("Timeout.") move_cursor(directions.UP, erase=True) continue # Evaluate testcase last_status = status tasktype.evaluate(job, file_cacher) status = job.plus.get("exit_status") info.append((job.plus.get("execution_time"), job.plus.get("execution_memory"))) points.append(float(job.outcome)) comments.append(format_status_text(job.text)) tcnames.append(jobinfo[0]) # If we saw two consecutive timeouts, ask wether we want to # consider everything to timeout if ask_again and status == "timeout" and last_status == "timeout": print("Want to stop and consider everything to timeout? [y/N]", end='') if assume is not None: print(assume) tmp = assume else: tmp = raw_input().lower() if tmp in ['y', 'yes']: stop = True else: ask_again = False print() move_cursor(directions.UP, erase=True) # Subtasks scoring try: subtasks = json.loads(dataset.score_type_parameters) subtasks[0] except: subtasks = [[100, len(info)]] if dataset.score_type == 'GroupMin': scoreFun = min else: if dataset.score_type != 'Sum': logger.warning("Score type %s not yet supported! Using Sum" % dataset.score_type) def scoreFun(x): return sum(x) / len(x) pos = 0 sts = [] # For each subtask generate a list of testcase it owns, the score gained # and the highest time and memory usage. for i in subtasks: stscores = [] stsdata = [] worst = [0, 0] try: for _ in xrange(i[1]): stscores.append(points[pos]) stsdata.append((tcnames[pos], points[pos], comments[pos], info[pos])) if info[pos][0] > worst[0]: worst[0] = info[pos][0] if info[pos][1] > worst[1]: worst[1] = info[pos][1] pos += 1 sts.append((scoreFun(stscores) * i[0], i[0], stsdata, worst)) except: sts.append((0, i[0], stsdata, [0, 0])) # Result pretty printing # Strips sol/ and _EVAL from the solution's name solution = solution[4:-5] print() clen = max(len(c) for c in comments) for st, d in enumerate(sts): print( "Subtask %d:" % st, add_color_to_string( "%5.2f/%d" % (d[0], d[1]), colors.RED if abs(d[0] - d[1]) > 0.01 else colors.GREEN, bold=True ) ) for (i, p, c, w) in d[2]: print( "%s)" % i, add_color_to_string( "%5.2lf" % p, colors.RED if abs(p - 1) > 0.01 else colors.BLACK ), "--- %s [Time:" % c.ljust(clen), add_color_to_string( ("%5.3f" % w[0]) if w[0] is not None else "N/A", colors.BLUE if w[0] is not None and w[0] >= 0.95 * d[3][0] else colors.BLACK ), "Memory:", add_color_to_string( "%5s" % mem_human(w[1]) if w[1] is not None else "N/A", colors.BLUE if w[1] is not None and w[1] >= 0.95 * d[3][1] else colors.BLACK, ), end="]" ) move_cursor(directions.RIGHT, 1000) move_cursor(directions.LEFT, len(solution) - 1) print(add_color_to_string(solution, colors.BLACK, bold=True)) print() sols.append((solution, sum([st[0] for st in sts]))) global tested_something if not tested_something: tested_something = True atexit.register(print_at_exit) return zip(points, comments, info)
def main(): if len(sys.argv) != 2: print "%s [file delle domande]" % sys.argv[0] sys.exit(0) lines = file(sys.argv[1]).readlines() test = Test() test.name = os.path.basename(sys.argv[1]).replace(".txt", "") test.description = lines[0].strip() test.max_score = 0 dirname = os.path.dirname(sys.argv[1]) question = TestQuestion() question.text = "<p>\n" file_cacher = FileCacher() answers = [] status = "score" for l in lines[1:]: l = escape(l) if l[:3] == '===': question.text += "</p>" question.answers = json.dumps(answers) test.questions.append(question) status = "score" question = TestQuestion() question.text = "<p>\n" continue if l[:3] == '---': status = "choice" question.type = "choice" answers = [] continue if l[:3] == '+++': status = "answer" answers = [] continue if status == "score": try: score, wrong_score = map(int, l.split(",")) test.max_score += score except ValueError: continue question.score = score question.wrong_score = wrong_score status = "text" continue if status == "text": if l == "\n": question.text += "</p><p>\n" elif l[:2] == "[[" and l[-3:] == "]]\n": name = l[2:-3] digest = file_cacher.put_file_from_path( os.path.join(dirname, "data", name), "Image %s for test %s" % (name, test.name)) question.text += "<center>" question.text += "<img src='/files/%s/%s'/>" % (digest, name) question.text += "</center>\n" f = QuestionFile(filename=name, digest=digest) question.files.append(f) elif l[:-1] == "```": question.text += "<pre>" elif l[:-1] == "'''": question.text += "</pre>" else: question.text += l if status == "choice": answers.append([l[1:].strip(), l[0] == '*']) if status == "answer": pos = l.index(":") name = l[:pos] value = json.loads("[" + l[pos + 1:] + "]") if isinstance(value[0], basestring): question.type = "string" elif not question.type: question.type = "number" answers.append([name, value]) if status == "answer": question.text += "</p>" question.answers = json.dumps(answers) test.questions.append(question) with SessionGen() as session: test.access_level = 7 session.add(test) session.commit()
class DumpImporter(object): """This service imports data from a directory that has been the target of a DumpExport. The process of exporting and importing again should be idempotent. """ def __init__(self, drop, import_source, load_files, load_model, skip_generated, skip_submissions, skip_user_tests, skip_print_jobs): self.drop = drop self.load_files = load_files self.load_model = load_model self.skip_generated = skip_generated self.skip_submissions = skip_submissions self.skip_user_tests = skip_user_tests self.skip_print_jobs = skip_print_jobs self.import_source = import_source self.import_dir = import_source self.file_cacher = FileCacher() def do_import(self): """Run the actual import code.""" logger.info("Starting import.") archive = None if Archive.is_supported(self.import_source): archive = Archive(self.import_source) self.import_dir = archive.unpack() file_names = os.listdir(self.import_dir) if len(file_names) != 1: logger.critical("Cannot find a root directory in %s.", self.import_source) archive.cleanup() return False self.import_dir = os.path.join(self.import_dir, file_names[0]) if self.drop: logger.info("Dropping and recreating the database.") try: if not (drop_db() and init_db()): logger.critical( "Unexpected error while dropping " "and recreating the database.", exc_info=True) return False except Exception: logger.critical("Unable to access DB.", exc_info=True) return False with SessionGen() as session: # Import the contest in JSON format. if self.load_model: logger.info("Importing the contest from a JSON file.") with io.open(os.path.join(self.import_dir, "contest.json"), "rb") as fin: # TODO - Throughout all the code we'll assume the # input is correct without actually doing any # validations. Thus, for example, we're not # checking that the decoded object is a dict... self.datas = json.load(fin) # If the dump has been exported using a data model # different than the current one (that is, a previous # one) we try to update it. # If no "_version" field is found we assume it's a v1.0 # export (before the new dump format was introduced). dump_version = self.datas.get("_version", 0) if dump_version < model_version: logger.warning( "The dump you're trying to import has been created " "by an old version of CMS (it declares data model " "version %d). It may take a while to adapt it to " "the current data model (which is version %d). You " "can use cmsDumpUpdater to update the on-disk dump " "and speed up future imports.", dump_version, model_version) elif dump_version > model_version: logger.critical( "The dump you're trying to import has been created " "by a version of CMS newer than this one (it " "declares data model version %d) and there is no " "way to adapt it to the current data model (which " "is version %d). You probably need to update CMS to " "handle it. It is impossible to proceed with the " "importation.", dump_version, model_version) return False else: logger.info("Importing dump with data model version %d.", dump_version) for version in range(dump_version, model_version): # Update from version to version+1 updater = __import__( "cmscontrib.updaters.update_%d" % (version + 1), globals(), locals(), ["Updater"]).Updater(self.datas) self.datas = updater.run() self.datas["_version"] = version + 1 assert self.datas["_version"] == model_version self.objs = dict() for id_, data in iteritems(self.datas): if not id_.startswith("_"): self.objs[id_] = self.import_object(data) for id_, data in iteritems(self.datas): if not id_.startswith("_"): self.add_relationships(data, self.objs[id_]) for k, v in list(iteritems(self.objs)): # Skip submissions if requested if self.skip_submissions and isinstance(v, Submission): del self.objs[k] # Skip user_tests if requested if self.skip_user_tests and isinstance(v, UserTest): del self.objs[k] # Skip print jobs if requested if self.skip_print_jobs and isinstance(v, PrintJob): del self.objs[k] # Skip generated data if requested if self.skip_generated and \ isinstance(v, (SubmissionResult, UserTestResult)): del self.objs[k] contest_id = list() contest_files = set() # We add explicitly only the top-level objects: # contests, and tasks and users not contained in any # contest. This will add on cascade all dependent # objects, and not add orphaned objects (like those # that depended on submissions or user tests that we # might have removed above). for id_ in self.datas["_objects"]: obj = self.objs[id_] session.add(obj) session.flush() if isinstance(obj, Contest): contest_id += [obj.id] contest_files |= enumerate_files( session, obj, skip_submissions=self.skip_submissions, skip_user_tests=self.skip_user_tests, skip_print_jobs=self.skip_print_jobs, skip_generated=self.skip_generated) session.commit() else: contest_id = None contest_files = None # Import files. if self.load_files: logger.info("Importing files.") files_dir = os.path.join(self.import_dir, "files") descr_dir = os.path.join(self.import_dir, "descriptions") files = set(os.listdir(files_dir)) descr = set(os.listdir(descr_dir)) if not descr <= files: logger.warning("Some files do not have an associated " "description.") if not files <= descr: logger.warning("Some descriptions do not have an " "associated file.") if not (contest_files is None or files <= contest_files): # FIXME Check if it's because this is a light import # or because we're skipping submissions or user_tests logger.warning("The dump contains some files that are " "not needed by the contest.") if not (contest_files is None or contest_files <= files): # The reason for this could be that it was a light # export that's not being reimported as such. logger.warning("The contest needs some files that are " "not contained in the dump.") # Limit import to files we actually need. if contest_files is not None: files &= contest_files for digest in files: file_ = os.path.join(files_dir, digest) desc = os.path.join(descr_dir, digest) if not self.safe_put_file(file_, desc): logger.critical( "Unable to put file `%s' in the DB. " "Aborting. Please remove the contest " "from the database.", file_) # TODO: remove contest from the database. return False # Clean up, if an archive was used if archive is not None: archive.cleanup() if contest_id is not None: logger.info("Import finished (contest id: %s).", ", ".join("%d" % id_ for id_ in contest_id)) else: logger.info("Import finished.") return True def import_object(self, data): """Import objects from the given data (without relationships). The given data is assumed to be a dict in the format produced by ContestExporter. This method reads the "_class" item and tries to find the corresponding class. Then it loads all column properties of that class (those that are present in the data) and uses them as keyword arguments in a call to the class constructor (if a required property is missing this call will raise an error). Relationships are not handled by this method, since we may not have all referenced objects available yet. Thus we prefer to add relationships in a later moment, using the add_relationships method. Note that both this method and add_relationships don't check if the given data has more items than the ones we understand and use. """ cls = getattr(class_hook, data["_class"]) args = dict() for prp in cls._col_props: if prp.key not in data: # We will let the __init__ of the class check if any # argument is missing, so it's safe to just skip here. continue col = prp.columns[0] val = data[prp.key] args[prp.key] = decode_value(col.type, val) return cls(**args) def add_relationships(self, data, obj): """Add the relationships to the given object, using the given data. Do what we didn't in import_objects: importing relationships. We already now the class of the object so we simply iterate over its relationship properties trying to load them from the data (if present), checking wheter they are IDs or collection of IDs, dereferencing them (i.e. getting the corresponding object) and reflecting all on the given object. Note that both this method and import_object don't check if the given data has more items than the ones we understand and use. """ cls = type(obj) for prp in cls._rel_props: if prp.key not in data: # Relationships are always optional continue val = data[prp.key] if val is None: setattr(obj, prp.key, None) elif isinstance(val, str): setattr(obj, prp.key, self.objs[val]) elif isinstance(val, list): setattr(obj, prp.key, list(self.objs[i] for i in val)) elif isinstance(val, dict): setattr(obj, prp.key, dict((k, self.objs[v]) for k, v in iteritems(val))) else: raise RuntimeError("Unknown RelationshipProperty value: %s" % type(val)) def safe_put_file(self, path, descr_path): """Put a file to FileCacher signaling every error (including digest mismatch). path (string): the path from which to load the file. descr_path (string): same for description. return (bool): True if all ok, False if something wrong. """ # TODO - Probably this method could be merged in FileCacher # First read the description. try: with io.open(descr_path, 'rt', encoding='utf-8') as fin: description = fin.read() except IOError: description = '' # Put the file. try: digest = self.file_cacher.put_file_from_path(path, description) except Exception as error: logger.critical( "File %s could not be put to file server (%r), " "aborting.", path, error) return False # Then check the digest. calc_digest = path_digest(path) if digest != calc_digest: logger.critical( "File %s has hash %s, but the server returned %s, " "aborting.", path, calc_digest, digest) return False return True
def test_testcases(base_dir, solution, language, assume=None): global task, file_cacher # Use a FileCacher with a NullBackend in order to avoid to fill # the database with junk if file_cacher is None: file_cacher = FileCacher(null=True) cmscontrib.loaders.italy_yaml.logger = NullLogger() # Load the task # TODO - This implies copying a lot of data to the FileCacher, # which is annoying if you have to do it continuously; it would be # better to use a persistent cache (although local, possibly # filesystem-based instead of database-based) and somehow detect # when the task has already been loaded if task is None: loader = cmscontrib.loaders.italy_yaml.YamlLoader( base_dir, file_cacher) task = loader.get_task(get_statement=False) # Prepare the EvaluationJob dataset = task.active_dataset digest = file_cacher.put_file_from_path( os.path.join(base_dir, solution), "Solution %s for task %s" % (solution, task.name)) executables = {task.name: Executable(filename=task.name, digest=digest)} jobs = [ (t, EvaluationJob( operation=ESOperation(ESOperation.EVALUATION, None, dataset.id, dataset.testcases[t].codename).to_dict(), language=language, task_type=dataset.task_type, task_type_parameters=json.loads(dataset.task_type_parameters), managers=dict(dataset.managers), executables=executables, input=dataset.testcases[t].input, output=dataset.testcases[t].output, time_limit=dataset.time_limit, memory_limit=dataset.memory_limit)) for t in dataset.testcases ] tasktype = get_task_type(dataset=dataset) ask_again = True last_status = "ok" status = "ok" stop = False info = [] points = [] comments = [] tcnames = [] for jobinfo in sorted(jobs): print(jobinfo[0]) sys.stdout.flush() job = jobinfo[1] # Skip the testcase if we decide to consider everything to # timeout if stop: info.append("Time limit exceeded") points.append(0.0) comments.append("Timeout.") move_cursor(directions.UP, erase=True) continue # Evaluate testcase last_status = status tasktype.evaluate(job, file_cacher) status = job.plus.get("exit_status") info.append( (job.plus.get("execution_time"), job.plus.get("execution_memory"))) points.append(float(job.outcome)) # Avoid printing unneeded newline job.text = [t.rstrip() for t in job.text] comments.append(format_status_text(job.text)) tcnames.append(jobinfo[0]) # If we saw two consecutive timeouts, ask wether we want to # consider everything to timeout if ask_again and status == "timeout" and last_status == "timeout": print("Want to stop and consider everything to timeout? [y/N] ", end='') sys.stdout.flush() if assume is not None: tmp = assume print(tmp) else: # User input with a timeout of 5 seconds, at the end of which # we automatically say "n". ready will be a list of input ready # for reading, or an empty list if the timeout expired. # See: http://stackoverflow.com/a/2904057 ready, _, _ = select.select([sys.stdin], [], [], 5) if ready: tmp = sys.stdin.readline().strip().lower() else: tmp = 'n' print(tmp) if tmp in ['y', 'yes']: stop = True else: ask_again = False print() move_cursor(directions.UP, erase=True) # Subtasks scoring subtasks = json.loads(dataset.score_type_parameters) if not isinstance(subtasks, list) or len(subtasks) == 0: subtasks = [[100, len(info)]] if dataset.score_type == 'GroupMin': scoreFun = min else: if dataset.score_type != 'Sum': logger.warning("Score type %s not yet supported! Using Sum" % dataset.score_type) def scoreFun(x): return sum(x) / len(x) pos = 0 sts = [] # For each subtask generate a list of testcase it owns, the score gained # and the highest time and memory usage. for i in subtasks: stscores = [] stsdata = [] worst = [0, 0] try: for _ in xrange(i[1]): stscores.append(points[pos]) stsdata.append( (tcnames[pos], points[pos], comments[pos], info[pos])) if info[pos][0] > worst[0]: worst[0] = info[pos][0] if info[pos][1] > worst[1]: worst[1] = info[pos][1] pos += 1 sts.append((scoreFun(stscores) * i[0], i[0], stsdata, worst)) except: sts.append((0, i[0], stsdata, [0, 0])) # Result pretty printing # Strips sol/ and _EVAL from the solution's name solution = solution[4:-5] print() clen = max(len(c) for c in comments) for st, d in enumerate(sts): print( "Subtask %d:" % st, add_color_to_string( "%5.2f/%d" % (d[0], d[1]), colors.RED if abs(d[0] - d[1]) > 0.01 else colors.GREEN, bold=True)) for (i, p, c, w) in d[2]: print("%s)" % i, add_color_to_string( "%5.2lf" % p, colors.RED if abs(p - 1) > 0.01 else colors.BLACK), "--- %s [Time:" % c.ljust(clen), add_color_to_string( ("%5.3f" % w[0]) if w[0] is not None else "N/A", colors.BLUE if w[0] is not None and w[0] >= 0.95 * d[3][0] else colors.BLACK), "Memory:", add_color_to_string( "%5s" % mem_human(w[1]) if w[1] is not None else "N/A", colors.BLUE if w[1] is not None and w[1] >= 0.95 * d[3][1] else colors.BLACK, ), end="]") move_cursor(directions.RIGHT, 1000) move_cursor(directions.LEFT, len(solution) - 1) print(add_color_to_string(solution, colors.BLACK, bold=True)) print() sols.append((solution, sum([st[0] for st in sts]))) global tested_something if not tested_something: tested_something = True atexit.register(print_at_exit) return zip(points, comments, info)
def add_submission(contest_id, username, task_name, timestamp, files): file_cacher = FileCacher() with SessionGen() as session: participation = session.query(Participation)\ .join(Participation.user)\ .filter(Participation.contest_id == contest_id)\ .filter(User.username == username)\ .first() if participation is None: logging.critical("User `%s' does not exists or " "does not participate in the contest.", username) return False task = session.query(Task)\ .filter(Task.contest_id == contest_id)\ .filter(Task.name == task_name)\ .first() if task is None: logging.critical("Unable to find task `%s'.", task_name) return False elements = set(task.submission_format) for file_ in files: if file_ not in elements: logging.critical("File `%s' is not in the submission format " "for the task.", file_) return False if any(element not in files for element in elements): logger.warning("Not all files from the submission format were " "provided.") # files is now a subset of elements. # We ensure we can infer a language if the task requires it. language = None need_lang = any(element.find(".%l") != -1 for element in elements) if need_lang: try: language = language_from_submitted_files(files) except ValueError as e: logger.critical(e) return False if language is None: # This might happen in case not all files were provided. logger.critical("Unable to infer language from submission.") return False language_name = None if language is None else language.name # Store all files from the arguments, and obtain their digests.. file_digests = {} try: for file_ in files: digest = file_cacher.put_file_from_path( files[file_], "Submission file %s sent by %s at %d." % (file_, username, timestamp)) file_digests[file_] = digest except Exception as e: logger.critical("Error while storing submission's file: %s.", e) return False # Create objects in the DB. submission = Submission(make_datetime(timestamp), language_name, participation=participation, task=task) for filename, digest in file_digests.items(): session.add(File(filename, digest, submission=submission)) session.add(submission) session.commit() maybe_send_notification(submission.id) return True
def main(): if len(sys.argv) != 2: print "%s [file delle domande]" % sys.argv[0] sys.exit(0) lines = file(sys.argv[1]).readlines() test = Test() test.name = os.path.basename(sys.argv[1]).replace(".txt", "") test.description = lines[0].strip() test.max_score = 0 dirname = os.path.dirname(sys.argv[1]) question = TestQuestion() question.text = "<p>\n" file_cacher = FileCacher() answers = [] status = "score" for l in lines[1:]: l = escape(l) if l[:3] == '===': question.text += "</p>" question.answers = json.dumps(answers) test.questions.append(question) status = "score" question = TestQuestion() question.text = "<p>\n" continue if l[:3] == '---': status = "choice" question.type = "choice" answers = [] continue if l[:3] == '+++': status = "answer" answers = [] continue if status == "score": try: score, wrong_score = map(int, l.split(",")) test.max_score += score except ValueError: continue question.score = score question.wrong_score = wrong_score status = "text" continue if status == "text": if l == "\n": question.text += "</p><p>\n" elif l[:2] == "[[" and l[-3:] == "]]\n": name = l[2:-3] digest = file_cacher.put_file_from_path( os.path.join(dirname, "data", name), "Image %s for test %s" % (name, test.name)) question.text += "<center>" question.text += "<img src='/files/%s/%s'/>" % (digest, name) question.text += "</center>\n" f = QuestionFile(filename=name, digest=digest) question.files.append(f) elif l[:-1] == "```": question.text += "<pre>" elif l[:-1] == "'''": question.text += "</pre>" else: question.text += l if status == "choice": answers.append([l[1:].strip(), l[0] == '*']) if status == "answer": pos = l.index(":") name = l[:pos] value = json.loads("[" + l[pos + 1:] + "]") if isinstance(value[0], basestring): question.type = "string" elif not question.type: question.type = "number" answers.append([name, value]) if status == "answer": question.text += "</p>" question.answers = json.dumps(answers) test.questions.append(question) with SessionGen() as session: test.access_level = 7 session.add(test) session.commit()
class DumpImporter(object): """This service imports data from a directory that has been the target of a DumpExport. The process of exporting and importing again should be idempotent. """ def __init__(self, drop, import_source, load_files, load_model, skip_generated, skip_submissions, skip_user_tests, skip_print_jobs): self.drop = drop self.load_files = load_files self.load_model = load_model self.skip_generated = skip_generated self.skip_submissions = skip_submissions self.skip_user_tests = skip_user_tests self.skip_print_jobs = skip_print_jobs self.import_source = import_source self.import_dir = import_source self.file_cacher = FileCacher() def do_import(self): """Run the actual import code.""" logger.info("Starting import.") archive = None if Archive.is_supported(self.import_source): archive = Archive(self.import_source) self.import_dir = archive.unpack() file_names = os.listdir(self.import_dir) if len(file_names) != 1: logger.critical("Cannot find a root directory in %s.", self.import_source) archive.cleanup() return False self.import_dir = os.path.join(self.import_dir, file_names[0]) if self.drop: logger.info("Dropping and recreating the database.") try: if not (drop_db() and init_db()): logger.critical("Unexpected error while dropping " "and recreating the database.", exc_info=True) return False except Exception: logger.critical("Unable to access DB.", exc_info=True) return False with SessionGen() as session: # Import the contest in JSON format. if self.load_model: logger.info("Importing the contest from a JSON file.") with io.open(os.path.join(self.import_dir, "contest.json"), "rb") as fin: # TODO - Throughout all the code we'll assume the # input is correct without actually doing any # validations. Thus, for example, we're not # checking that the decoded object is a dict... self.datas = json.load(fin) # If the dump has been exported using a data model # different than the current one (that is, a previous # one) we try to update it. # If no "_version" field is found we assume it's a v1.0 # export (before the new dump format was introduced). dump_version = self.datas.get("_version", 0) if dump_version < model_version: logger.warning( "The dump you're trying to import has been created " "by an old version of CMS (it declares data model " "version %d). It may take a while to adapt it to " "the current data model (which is version %d). You " "can use cmsDumpUpdater to update the on-disk dump " "and speed up future imports.", dump_version, model_version) elif dump_version > model_version: logger.critical( "The dump you're trying to import has been created " "by a version of CMS newer than this one (it " "declares data model version %d) and there is no " "way to adapt it to the current data model (which " "is version %d). You probably need to update CMS to " "handle it. It is impossible to proceed with the " "importation.", dump_version, model_version) return False else: logger.info( "Importing dump with data model version %d.", dump_version) for version in range(dump_version, model_version): # Update from version to version+1 updater = __import__( "cmscontrib.updaters.update_%d" % (version + 1), globals(), locals(), ["Updater"]).Updater(self.datas) self.datas = updater.run() self.datas["_version"] = version + 1 assert self.datas["_version"] == model_version self.objs = dict() for id_, data in iteritems(self.datas): if not id_.startswith("_"): self.objs[id_] = self.import_object(data) for id_, data in iteritems(self.datas): if not id_.startswith("_"): self.add_relationships(data, self.objs[id_]) for k, v in list(iteritems(self.objs)): # Skip submissions if requested if self.skip_submissions and isinstance(v, Submission): del self.objs[k] # Skip user_tests if requested if self.skip_user_tests and isinstance(v, UserTest): del self.objs[k] # Skip print jobs if requested if self.skip_print_jobs and isinstance(v, PrintJob): del self.objs[k] # Skip generated data if requested if self.skip_generated and \ isinstance(v, (SubmissionResult, UserTestResult)): del self.objs[k] contest_id = list() contest_files = set() # We add explicitly only the top-level objects: # contests, and tasks and users not contained in any # contest. This will add on cascade all dependent # objects, and not add orphaned objects (like those # that depended on submissions or user tests that we # might have removed above). for id_ in self.datas["_objects"]: obj = self.objs[id_] session.add(obj) session.flush() if isinstance(obj, Contest): contest_id += [obj.id] contest_files |= enumerate_files( session, obj, skip_submissions=self.skip_submissions, skip_user_tests=self.skip_user_tests, skip_print_jobs=self.skip_print_jobs, skip_generated=self.skip_generated) session.commit() else: contest_id = None contest_files = None # Import files. if self.load_files: logger.info("Importing files.") files_dir = os.path.join(self.import_dir, "files") descr_dir = os.path.join(self.import_dir, "descriptions") files = set(os.listdir(files_dir)) descr = set(os.listdir(descr_dir)) if not descr <= files: logger.warning("Some files do not have an associated " "description.") if not files <= descr: logger.warning("Some descriptions do not have an " "associated file.") if not (contest_files is None or files <= contest_files): # FIXME Check if it's because this is a light import # or because we're skipping submissions or user_tests logger.warning("The dump contains some files that are " "not needed by the contest.") if not (contest_files is None or contest_files <= files): # The reason for this could be that it was a light # export that's not being reimported as such. logger.warning("The contest needs some files that are " "not contained in the dump.") # Limit import to files we actually need. if contest_files is not None: files &= contest_files for digest in files: file_ = os.path.join(files_dir, digest) desc = os.path.join(descr_dir, digest) if not self.safe_put_file(file_, desc): logger.critical("Unable to put file `%s' in the DB. " "Aborting. Please remove the contest " "from the database.", file_) # TODO: remove contest from the database. return False # Clean up, if an archive was used if archive is not None: archive.cleanup() if contest_id is not None: logger.info("Import finished (contest id: %s).", ", ".join("%d" % id_ for id_ in contest_id)) else: logger.info("Import finished.") return True def import_object(self, data): """Import objects from the given data (without relationships). The given data is assumed to be a dict in the format produced by DumpExporter. This method reads the "_class" item and tries to find the corresponding class. Then it loads all column properties of that class (those that are present in the data) and uses them as keyword arguments in a call to the class constructor (if a required property is missing this call will raise an error). Relationships are not handled by this method, since we may not have all referenced objects available yet. Thus we prefer to add relationships in a later moment, using the add_relationships method. Note that both this method and add_relationships don't check if the given data has more items than the ones we understand and use. """ cls = getattr(class_hook, data["_class"]) args = dict() for prp in cls._col_props: if prp.key not in data: # We will let the __init__ of the class check if any # argument is missing, so it's safe to just skip here. continue col = prp.columns[0] val = data[prp.key] args[prp.key] = decode_value(col.type, val) return cls(**args) def add_relationships(self, data, obj): """Add the relationships to the given object, using the given data. Do what we didn't in import_objects: importing relationships. We already now the class of the object so we simply iterate over its relationship properties trying to load them from the data (if present), checking wheter they are IDs or collection of IDs, dereferencing them (i.e. getting the corresponding object) and reflecting all on the given object. Note that both this method and import_object don't check if the given data has more items than the ones we understand and use. """ cls = type(obj) for prp in cls._rel_props: if prp.key not in data: # Relationships are always optional continue val = data[prp.key] if val is None: setattr(obj, prp.key, None) elif isinstance(val, str): setattr(obj, prp.key, self.objs[val]) elif isinstance(val, list): setattr(obj, prp.key, list(self.objs[i] for i in val)) elif isinstance(val, dict): setattr(obj, prp.key, dict((k, self.objs[v]) for k, v in iteritems(val))) else: raise RuntimeError( "Unknown RelationshipProperty value: %s" % type(val)) def safe_put_file(self, path, descr_path): """Put a file to FileCacher signaling every error (including digest mismatch). path (string): the path from which to load the file. descr_path (string): same for description. return (bool): True if all ok, False if something wrong. """ # TODO - Probably this method could be merged in FileCacher # First read the description. try: with io.open(descr_path, 'rt', encoding='utf-8') as fin: description = fin.read() except IOError: description = '' # Put the file. try: digest = self.file_cacher.put_file_from_path(path, description) except Exception as error: logger.critical("File %s could not be put to file server (%r), " "aborting.", path, error) return False # Then check the digest. calc_digest = path_digest(path) if digest != calc_digest: logger.critical("File %s has hash %s, but the server returned %s, " "aborting.", path, calc_digest, digest) return False return True
class ContestImporter(object): """This service imports a contest from a directory that has been the target of a ContestExport. The process of exporting and importing again should be idempotent. """ def __init__(self, drop, import_source, load_files, load_model, skip_generated, skip_submissions, skip_user_tests): self.drop = drop self.load_files = load_files self.load_model = load_model self.skip_generated = skip_generated self.skip_submissions = skip_submissions self.skip_user_tests = skip_user_tests self.import_source = import_source self.import_dir = import_source self.file_cacher = FileCacher() def do_import(self): """Run the actual import code.""" logger.info("Starting import.") if not os.path.isdir(self.import_source): if self.import_source.endswith(".zip"): archive = zipfile.ZipFile(self.import_source, "r") file_names = archive.infolist() self.import_dir = tempfile.mkdtemp() archive.extractall(self.import_dir) elif self.import_source.endswith(".tar.gz") \ or self.import_source.endswith(".tgz") \ or self.import_source.endswith(".tar.bz2") \ or self.import_source.endswith(".tbz2") \ or self.import_source.endswith(".tar"): archive = tarfile.open(name=self.import_source) file_names = archive.getnames() elif self.import_source.endswith(".tar.xz") \ or self.import_source.endswith(".txz"): try: import lzma except ImportError: logger.critical("LZMA compression format not " "supported. Please install package " "lzma.") return False archive = tarfile.open( fileobj=lzma.LZMAFile(self.import_source)) file_names = archive.getnames() else: logger.critical("Unable to import from %s." % self.import_source) return False root = find_root_of_archive(file_names) if root is None: logger.critical("Cannot find a root directory in %s." % self.import_source) return False self.import_dir = tempfile.mkdtemp() archive.extractall(self.import_dir) self.import_dir = os.path.join(self.import_dir, root) if self.drop: logger.info("Dropping and recreating the database.") try: if not (drop_db() and init_db()): logger.critical( "Unexpected error while dropping " "and recreating the database.", exc_info=True) return False except Exception as error: logger.critical("Unable to access DB.\n%r" % error) return False with SessionGen() as session: # Import the contest in JSON format. if self.load_model: logger.info("Importing the contest from a JSON file.") with io.open(os.path.join(self.import_dir, "contest.json"), "rb") as fin: # TODO - Throughout all the code we'll assume the # input is correct without actually doing any # validations. Thus, for example, we're not # checking that the decoded object is a dict... self.datas = json.load(fin, encoding="utf-8") # If the dump has been exported using a data model # different than the current one (that is, a previous # one) we try to update it. # If no "_version" field is found we assume it's a v1.0 # export (before the new dump format was introduced). dump_version = self.datas.get("_version", 0) if dump_version < model_version: logger.warning( "The dump you're trying to import has been created " "by an old version of CMS. It may take a while to " "adapt it to the current data model. You can use " "cmsDumpUpdater to update the on-disk dump and " "speed up future imports.") if dump_version > model_version: logger.critical( "The dump you're trying to import has been created " "by a version of CMS newer than this one and there " "is no way to adapt it to the current data model. " "You probably need to update CMS to handle it. It's " "impossible to proceed with the importation.") return False for version in range(dump_version, model_version): # Update from version to version+1 updater = __import__( "cmscontrib.updaters.update_%d" % (version + 1), globals(), locals(), ["Updater"]).Updater(self.datas) self.datas = updater.run() self.datas["_version"] = version + 1 assert self.datas["_version"] == model_version self.objs = dict() for id_, data in self.datas.iteritems(): if not id_.startswith("_"): self.objs[id_] = self.import_object(data) for id_, data in self.datas.iteritems(): if not id_.startswith("_"): self.add_relationships(data, self.objs[id_]) for k, v in list(self.objs.iteritems()): # Skip submissions if requested if self.skip_submissions and isinstance(v, Submission): del self.objs[k] # Skip user_tests if requested if self.skip_user_tests and isinstance(v, UserTest): del self.objs[k] # Skip generated data if requested if self.skip_generated and \ isinstance(v, (SubmissionResult, UserTestResult)): del self.objs[k] contest_id = list() contest_files = set() # Add each base object and all its dependencies for id_ in self.datas["_objects"]: contest = self.objs[id_] # We explictly add only the contest since all child # objects will be automatically added by cascade. # Adding each object individually would also add # orphaned objects like the ones that depended on # submissions or user_tests that we (possibly) # removed above. session.add(contest) session.flush() contest_id += [contest.id] contest_files |= contest.enumerate_files( self.skip_submissions, self.skip_user_tests, self.skip_generated) session.commit() else: contest_id = None contest_files = None # Import files. if self.load_files: logger.info("Importing files.") files_dir = os.path.join(self.import_dir, "files") descr_dir = os.path.join(self.import_dir, "descriptions") files = set(os.listdir(files_dir)) descr = set(os.listdir(descr_dir)) if not descr <= files: logger.warning("Some files do not have an associated " "description.") if not files <= descr: logger.warning("Some descriptions do not have an " "associated file.") if not (contest_files is None or files <= contest_files): # FIXME Check if it's because this is a light import # or because we're skipping submissions or user_tests logger.warning("The dump contains some files that are " "not needed by the contest.") if not (contest_files is None or contest_files <= files): # The reason for this could be that it was a light # export that's not being reimported as such. logger.warning("The contest needs some files that are " "not contained in the dump.") # Limit import to files we actually need. if contest_files is not None: files &= contest_files for digest in files: file_ = os.path.join(files_dir, digest) desc = os.path.join(descr_dir, digest) if not self.safe_put_file(file_, desc): logger.critical( "Unable to put file `%s' in the database. " "Aborting. Please remove the contest " "from the database." % file_) # TODO: remove contest from the database. return False if contest_id is not None: logger.info("Import finished (contest id: %s)." % ", ".join(str(id_) for id_ in contest_id)) else: logger.info("Import finished.") # If we extracted an archive, we remove it. if self.import_dir != self.import_source: rmtree(self.import_dir) return True def import_object(self, data): """Import objects from the given data (without relationships). The given data is assumed to be a dict in the format produced by ContestExporter. This method reads the "_class" item and tries to find the corresponding class. Then it loads all column properties of that class (those that are present in the data) and uses them as keyword arguments in a call to the class constructor (if a required property is missing this call will raise an error). Relationships are not handled by this method, since we may not have all referenced objects available yet. Thus we prefer to add relationships in a later moment, using the add_relationships method. Note that both this method and add_relationships don't check if the given data has more items than the ones we understand and use. """ cls = getattr(class_hook, data["_class"]) args = dict() for prp in cls._col_props: if prp.key not in data: # We will let the __init__ of the class check if any # argument is missing, so it's safe to just skip here. continue col = prp.columns[0] col_type = type(col.type) val = data[prp.key] if col_type in [Boolean, Integer, Float, Unicode, RepeatedUnicode]: args[prp.key] = val elif col_type is String: args[prp.key] = val.encode( 'latin1') if val is not None else None elif col_type is DateTime: args[prp.key] = make_datetime(val) if val is not None else None elif col_type is Interval: args[prp.key] = timedelta( seconds=val) if val is not None else None else: raise RuntimeError("Unknown SQLAlchemy column type: %s" % col_type) return cls(**args) def add_relationships(self, data, obj): """Add the relationships to the given object, using the given data. Do what we didn't in import_objects: importing relationships. We already now the class of the object so we simply iterate over its relationship properties trying to load them from the data (if present), checking wheter they are IDs or collection of IDs, dereferencing them (i.e. getting the corresponding object) and reflecting all on the given object. Note that both this method and import_object don't check if the given data has more items than the ones we understand and use. """ cls = type(obj) for prp in cls._rel_props: if prp.key not in data: # Relationships are always optional continue val = data[prp.key] if val is None: setattr(obj, prp.key, None) elif type(val) == unicode: setattr(obj, prp.key, self.objs[val]) elif type(val) == list: setattr(obj, prp.key, list(self.objs[i] for i in val)) elif type(val) == dict: setattr(obj, prp.key, dict((k, self.objs[v]) for k, v in val.iteritems())) else: raise RuntimeError("Unknown RelationshipProperty value: %s" % type(val)) def safe_put_file(self, path, descr_path): """Put a file to FileCacher signaling every error (including digest mismatch). path (string): the path from which to load the file. descr_path (string): same for description. return (bool): True if all ok, False if something wrong. """ # TODO - Probably this method could be merged in FileCacher # First read the description. try: with io.open(descr_path, 'rt', encoding='utf-8') as fin: description = fin.read() except IOError: description = '' # Put the file. try: digest = self.file_cacher.put_file_from_path(path, description) except Exception as error: logger.critical("File %s could not be put to file server (%r), " "aborting." % (path, error)) return False # Then check the digest. calc_digest = sha1sum(path) if digest != calc_digest: logger.critical("File %s has hash %s, but the server returned %s, " "aborting." % (path, calc_digest, digest)) return False return True
def test_testcases(base_dir, soluzione, language, assume=None): global task, file_cacher # Use a FileCacher with a NullBackend in order to avoid to fill # the database with junk if file_cacher is None: file_cacher = FileCacher(null=True) # Load the task # TODO - This implies copying a lot of data to the FileCacher, # which is annoying if you have to do it continuously; it would be # better to use a persistent cache (although local, possibly # filesystem-based instead of database-based) and somehow detect # when the task has already been loaded if task is None: loader = YamlLoader( os.path.realpath(os.path.join(base_dir, "..")), file_cacher) # Normally we should import the contest before, but YamlLoader # accepts get_task() even without previous get_contest() calls task = loader.get_task(os.path.split(os.path.realpath(base_dir))[1]) # Prepare the EvaluationJob dataset = task.active_dataset digest = file_cacher.put_file_from_path( os.path.join(base_dir, soluzione), "Solution %s for task %s" % (soluzione, task.name)) executables = {task.name: Executable(filename=task.name, digest=digest)} jobs = [(t, EvaluationJob( language=language, task_type=dataset.task_type, task_type_parameters=json.loads(dataset.task_type_parameters), managers=dict(dataset.managers), executables=executables, input=dataset.testcases[t].input, output=dataset.testcases[t].output, time_limit=dataset.time_limit, memory_limit=dataset.memory_limit)) for t in dataset.testcases] tasktype = get_task_type(dataset=dataset) ask_again = True last_status = "ok" status = "ok" stop = False info = [] points = [] comments = [] tcnames = [] for jobinfo in sorted(jobs): print(jobinfo[0], end='') sys.stdout.flush() job = jobinfo[1] # Skip the testcase if we decide to consider everything to # timeout if stop: info.append("Time limit exceeded") points.append(0.0) comments.append("Timeout.") continue # Evaluate testcase last_status = status tasktype.evaluate(job, file_cacher) status = job.plus["exit_status"] info.append("Time: %5.3f Wall: %5.3f Memory: %s" % (job.plus["execution_time"], job.plus["execution_wall_clock_time"], mem_human(job.plus["execution_memory"]))) points.append(float(job.outcome)) comments.append(format_status_text(job.text)) tcnames.append(jobinfo[0]) # If we saw two consecutive timeouts, ask wether we want to # consider everything to timeout if ask_again and status == "timeout" and last_status == "timeout": print() print("Want to stop and consider everything to timeout? [y/N]", end='') if assume is not None: print(assume) tmp = assume else: tmp = raw_input().lower() if tmp in ['y', 'yes']: stop = True else: ask_again = False # Result pretty printing print() clen = max(len(c) for c in comments) ilen = max(len(i) for i in info) for (i, p, c, b) in zip(tcnames, points, comments, info): print("%s) %5.2lf --- %s [%s]" % (i, p, c.ljust(clen), b.center(ilen))) return zip(points, comments, info)
def test_testcases(base_dir, soluzione, language, assume=None): global task, file_cacher # Use a disabled FileCacher with a FSBackend in order to avoid to fill # the database with junk and to save up space. if file_cacher is None: file_cacher = FileCacher(path=os.path.join(config.cache_dir, 'cmsMake'), enabled=False) # Load the task if task is None: loader = YamlLoader( os.path.realpath(os.path.join(base_dir, "..")), file_cacher) # Normally we should import the contest before, but YamlLoader # accepts get_task() even without previous get_contest() calls task = loader.get_task(os.path.split(os.path.realpath(base_dir))[1]) # Prepare the EvaluationJob dataset = task.active_dataset if dataset.task_type != "OutputOnly": digest = file_cacher.put_file_from_path( os.path.join(base_dir, soluzione), "Solution %s for task %s" % (soluzione, task.name)) executables = {task.name: Executable(filename=task.name, digest=digest)} jobs = [(t, EvaluationJob( language=language, task_type=dataset.task_type, task_type_parameters=json.loads(dataset.task_type_parameters), managers=dict(dataset.managers), executables=executables, input=dataset.testcases[t].input, output=dataset.testcases[t].output, time_limit=dataset.time_limit, memory_limit=dataset.memory_limit)) for t in dataset.testcases] tasktype = get_task_type(dataset=dataset) else: print("Generating outputs...", end='') files = {} for t in sorted(dataset.testcases.keys()): with file_cacher.get_file(dataset.testcases[t].input) as fin: with TemporaryFile() as fout: print(str(t), end='') call(soluzione, stdin=fin, stdout=fout, cwd=base_dir) fout.seek(0) digest = file_cacher.put_file_from_fobj(fout) outname = "output_%s.txt" % t files[outname] = File(filename=outname, digest=digest) jobs = [(t, EvaluationJob( task_type=dataset.task_type, task_type_parameters=json.loads(dataset.task_type_parameters), managers=dict(dataset.managers), files=files, input=dataset.testcases[t].input, output=dataset.testcases[t].output, time_limit=dataset.time_limit, memory_limit=dataset.memory_limit)) for t in dataset.testcases] for k, job in jobs: job._key = k tasktype = get_task_type(dataset=dataset) print() ask_again = True last_status = "ok" status = "ok" stop = False info = [] points = [] comments = [] tcnames = [] for jobinfo in sorted(jobs): print(jobinfo[0], end='') sys.stdout.flush() job = jobinfo[1] # Skip the testcase if we decide to consider everything to # timeout if stop: info.append("Time limit exceeded") points.append(0.0) comments.append("Timeout.") continue # Evaluate testcase last_status = status tasktype.evaluate(job, file_cacher) if dataset.task_type != "OutputOnly": status = job.plus["exit_status"] info.append("Time: %5.3f Wall: %5.3f Memory: %s" % (job.plus["execution_time"], job.plus["execution_wall_clock_time"], mem_human(job.plus["execution_memory"]))) else: status = "ok" info.append("N/A") points.append(float(job.outcome)) comments.append(format_status_text(job.text)) tcnames.append(jobinfo[0]) # If we saw two consecutive timeouts, ask wether we want to # consider everything to timeout if ask_again and status == "timeout" and last_status == "timeout": print() print("Want to stop and consider everything to timeout? [y/N]", end='') if assume is not None: print(assume) tmp = assume else: tmp = raw_input().lower() if tmp in ['y', 'yes']: stop = True else: ask_again = False # Result pretty printing print() clen = max(len(c) for c in comments) ilen = max(len(i) for i in info) for (i, p, c, b) in zip(tcnames, points, comments, info): print("%s) %5.2lf --- %s [%s]" % (i, p, c.ljust(clen), b.center(ilen))) return zip(points, comments, info)