def __init__(self, contest_ids, export_target, dump_files, dump_model, skip_generated, skip_submissions, skip_user_tests, skip_print_jobs): if contest_ids is None: with SessionGen() as session: contests = session.query(Contest).all() self.contests_ids = [contest.id for contest in contests] users = session.query(User).all() self.users_ids = [user.id for user in users] tasks = session.query(Task)\ .filter(Task.contest_id.is_(None)).all() self.tasks_ids = [task.id for task in tasks] else: # FIXME: this is ATM broken, because if you export a contest, you # then export the users who participated in it and then all of the # contests those users participated in. self.contests_ids = contest_ids self.users_ids = [] self.tasks_ids = [] self.dump_files = dump_files self.dump_model = dump_model self.skip_generated = skip_generated self.skip_submissions = skip_submissions self.skip_user_tests = skip_user_tests self.skip_print_jobs = skip_print_jobs self.export_target = export_target # If target is not provided, we use the contest's name. if len(export_target) == 0: self.export_target = "dump_%s.tar.gz" % date.today().isoformat() logger.warning("export_target not given, using \"%s\"", self.export_target) self.file_cacher = FileCacher()
def clean_files(session, dry_run): filecacher = FileCacher() files = set(file[0] for file in filecacher.list()) logger.info("A total number of %d files are present in the file store", len(files)) for cls in [Attachment, Executable, File, Manager, PrintJob, Statement, Testcase, UserTest, UserTestExecutable, UserTestFile, UserTestManager, UserTestResult]: for col in ["input", "output", "digest"]: if hasattr(cls, col): found_digests = set() digests = session.query(cls).all() digests = [getattr(obj, col) for obj in digests] found_digests |= set(digests) found_digests.discard(FileCacher.TOMBSTONE_DIGEST) logger.info("Found %d digests while scanning %s.%s", len(found_digests), cls.__name__, col) files -= found_digests logger.info("%d digests are orphan.", len(files)) total_size = 0 for orphan in files: total_size += filecacher.get_size(orphan) logger.info("Orphan files take %s bytes of disk space", "{:,}".format(total_size)) if not dry_run: for count, orphan in enumerate(files): filecacher.delete(orphan) if count % 100 == 0: logger.info("%d files deleted from the file store", count) logger.info("All orphan files have been deleted")
def __init__(self, contest_id, spool_dir): self.contest_id = contest_id self.spool_dir = spool_dir self.upload_dir = os.path.join(self.spool_dir, "upload") self.contest = None self.submissions = None self.file_cacher = FileCacher()
def setUp(self): self.file_cacher = FileCacher() #self.file_cacher = FileCacher(self, path="fs-storage") self.cache_base_path = self.file_cacher.file_dir self.cache_path = None self.content = None self.fake_content = None self.digest = None self.file_obj = None
def __init__(self, shard): Service.__init__(self, shard) self.file_cacher = FileCacher(self) self.work_lock = gevent.coros.RLock() self._last_end_time = None self._total_free_time = 0 self._total_busy_time = 0 self._number_execution = 0
def __init__(self, shard, fake_worker_time=None, listen_on_address=None): Service.__init__(self, shard, listen_on_address) self.file_cacher = FileCacher(self) self.work_lock = gevent.lock.RLock() self._last_end_time = None self._total_free_time = 0 self._total_busy_time = 0 self._number_execution = 0 self._fake_worker_time = fake_worker_time
def __init__(self, drop, import_source, load_files, load_model, skip_generated, skip_submissions, skip_user_tests): self.drop = drop self.load_files = load_files self.load_model = load_model self.skip_generated = skip_generated self.skip_submissions = skip_submissions self.skip_user_tests = skip_user_tests self.import_source = import_source self.import_dir = import_source self.file_cacher = FileCacher()
def __init__(self, shard): logger.initialize(ServiceCoord("TestFileCacher", shard)) TestService.__init__(self, shard, custom_logger=logger) # Assume we store the cache in "./cache/fs-cache-TestFileCacher-0/" self.cache_base_path = os.path.join(config.cache_dir, "fs-cache-TestFileCacher-0") self.cache_path = None self.content = None self.fake_content = None self.digest = None self.file_obj = None self.file_cacher = FileCacher(self)
def __init__(self, rulesdir, source, output, party, wdir, extra=[ "-lualatex=lualatex --interaction=nonstopmode " "--shell-restricted --nosocket %O %S" ], ignore=set(), ignore_ext=set(), do_copy=set()): super(SafeLaTeXRule, self).__init__(rulesdir) self.source = source self.output = output self.party = party self.wdir = wdir self.file_cacher = FileCacher() self.extra = extra self.command = [ "/usr/bin/latexmk", "-g", "-pdflua", "-deps", "-deps-out=.deps" ] + self.extra + [source] self.ignore = copy(ignore) self.ignore_ext = copy(ignore_ext) self.do_copy = copy(do_copy)
def __init__(self, contest_ids, export_target, dump_files, dump_model, skip_generated, skip_submissions, skip_user_tests): if contest_ids is None: with SessionGen() as session: contests = session.query(Contest).all() self.contests_ids = [contest.id for contest in contests] users = session.query(User).all() self.users_ids = [user.id for user in users] tasks = session.query(Task)\ .filter(Task.contest_id.is_(None)).all() self.tasks_ids = [task.id for task in tasks] else: # FIXME: this is ATM broken, because if you export a contest, you # then export the users who participated in it and then all of the # contests those users participated in. self.contests_ids = contest_ids self.users_ids = [] self.tasks_ids = [] self.dump_files = dump_files self.dump_model = dump_model self.skip_generated = skip_generated self.skip_submissions = skip_submissions self.skip_user_tests = skip_user_tests self.export_target = export_target # If target is not provided, we use the contest's name. if export_target == "": self.export_target = "dump_%s.tar.gz" % date.today().isoformat() logger.warning("export_target not given, using \"%s\"", self.export_target) self.file_cacher = FileCacher()
def __init__(self, shard, contest): parameters = { "login_url": "/", "template_path": pkg_resources.resource_filename("cms.server.contest", "templates"), "static_files": [("cms.server", "static"), ("cms.server.contest", "static")], "cookie_secret": base64.b64encode(config.secret_key), "debug": config.tornado_debug, "is_proxy_used": config.is_proxy_used, } try: listen_address = config.contest_listen_address[shard] listen_port = config.contest_listen_port[shard] except IndexError: raise ConfigError("Wrong shard number for %s, or missing " "address/port configuration. Please check " "contest_listen_address and contest_listen_port " "in cms.conf." % __name__) super(ContestWebServer, self).__init__(listen_port, HANDLERS, parameters, shard=shard, listen_address=listen_address) self.contest = contest # This is a dictionary (indexed by username) of pending # notification. Things like "Yay, your submission went # through.", not things like "Your question has been replied", # that are handled by the db. Each username points to a list # of tuples (timestamp, subject, text). self.notifications = {} # Retrieve the available translations. self.langs = { lang_code: wrap_translations_for_tornado(trans) for lang_code, trans in get_translations().iteritems() } self.file_cacher = FileCacher(self) self.evaluation_service = self.connect_to( ServiceCoord("EvaluationService", 0)) self.scoring_service = self.connect_to( ServiceCoord("ScoringService", 0)) ranking_enabled = len(config.rankings) > 0 self.proxy_service = self.connect_to(ServiceCoord("ProxyService", 0), must_be_present=ranking_enabled) printing_enabled = config.printer is not None self.printing_service = self.connect_to( ServiceCoord("PrintingService", 0), must_be_present=printing_enabled)
def __init__(self, shard): logger.initialize(ServiceCoord("Worker", shard)) Service.__init__(self, shard, custom_logger=logger) self.file_cacher = FileCacher(self) self.work_lock = gevent.coros.RLock() self._ignore_job = False
def extract_complexity(task_id, file_lengther=None): """Extract the complexity of all submissions of the task. The results are stored in a file task_<id>.info task_id (int): the id of the task we are interested in. file_lengther (type): a File-like object that tell the dimension of the input (see example above for how to write one). return (int): 0 if operation was successful. """ with SessionGen() as session: task = Task.get_from_id(task_id, session) if task is None: return -1 # Extracting the length of the testcase. file_cacher = FileCacher() testcases_lengths = [ file_length(testcase.input, file_cacher, file_lengther) for testcase in task.testcases ] file_cacher.purge_cache() # Compute the complexity of the solutions. with io.open("task_%s.info" % task_id, "wt", encoding="utf-8") as info: for submission in task.contest.get_submissions(): if submission.task_id == task_id and \ submission.evaluated(): print(submission.participation.user.username) result = extract_complexity_submission( testcases_lengths, submission) if result[1] is None: continue info.write("Submission: %s" % submission.id) info.write(" - user: %15s" % submission.participation.user.username) info.write(" - task: %s" % task.name) if result[0] is not None: info.write(" - score: %6.2lf" % result[0]) info.write(" - complexity: %20s" % complexity_to_string(result[1])) if result[2] is not None: info.write(" - confidence %5.1lf" % result[2]) info.write("\n") return 0
def build(self): file_cacher = FileCacher(path=os.path.join(self.wdir, ".cache")) with chdir(self.wdir): contestconfig = ContestConfig( os.path.join(self.wdir, ".rules"), "hidden contest", relevant_language=(self.language if self.language != "ALL" else None), ignore_latex=self.no_latex, minimal=self.minimal) copyifnecessary( os.path.join(contestconfig._get_ready_dir(), "contest-template.py"), os.path.join(self.wdir, "c.py")) contestconfig._readconfig("c.py") contestconfig._task(self.task, contestconfig.full_feedback, None, self.minimal, standalone_task=True) if not self.minimal: cdb = contestconfig._makecontest() test_udb = contestconfig._makeuser( contestconfig._mytestuser.username) test_gdb = contestconfig._makegroup( contestconfig._mytestuser.group.name, cdb) # We're not putting the test user on any team for testing # (shouldn't be needed). test_pdb = contestconfig._makeparticipation( contestconfig._mytestuser.username, cdb, test_udb, test_gdb, None) for t in contestconfig.tasks.values(): tdb = t._makedbobject(cdb, file_cacher) t._make_test_submissions(test_pdb, tdb, self.local_test) statements = list(contestconfig.tasks.values())[0]._statements if self.language == "ALL": return [ os.path.abspath(s.file_) for s in list(statements.values()) ] if self.language is not None: if self.language in statements: return os.path.abspath(statements[self.language].file_) else: return None primary_statements = [ s for s in list(statements.values()) if s.primary ] if len(primary_statements) == 0: return None elif len(primary_statements) == 1: return os.path.abspath(primary_statements[0].file_) else: raise Exception("More than one primary statement")
def add_statement(task_name, language_code, statement_type, statement_file, overwrite): logger.info( "Adding the statement(language: %s) of task %s " "in the database.", language_code, task_name) if statement_type is None: return False if not os.path.exists(statement_file): logger.error("Statement file (path: %s) does not exist.", statement_file) return False with SessionGen() as session: task = session.query(Task)\ .filter(Task.name == task_name).first() if not task: logger.error("No task named %s", task_name) return False try: file_cacher = FileCacher() digest = file_cacher.put_file_from_path( statement_file, "%s Statement (lang: %s) for task %s" % (statement_type.upper(), language_code, task_name)) except Exception: logger.error("Task statement storage failed.", exc_info=True) arr = session.query(Statement)\ .filter(Statement.language == language_code)\ .filter(Statement.statement_type == statement_type)\ .filter(Statement.task == task)\ .all() if arr: # Statement already exists if overwrite: logger.info("Overwriting already existing statement.") session.delete(arr[0]) session.commit() else: logger.error("A statement of the given type and language " "already exists. Not overwriting.") return False statement = Statement(language_code, statement_type, digest, task=task) session.add(statement) session.commit() logger.info("Statement added.") return True
def __init__(self, path, contest_id, force, loader_class, full): self.old_contest_id = contest_id self.force = force self.full = full self.file_cacher = FileCacher() self.loader = loader_class(os.path.realpath(path), self.file_cacher)
def __init__(self, path, prefix, update, no_statement, contest_id, loader_class): self.file_cacher = FileCacher() self.prefix = prefix self.update = update self.no_statement = no_statement self.contest_id = contest_id self.loader = loader_class(os.path.abspath(path), self.file_cacher)
def __init__(self, listen_port, handlers, parameters, shard=0, listen_address=""): super(WebService, self).__init__(shard) static_files = parameters.pop('static_files', []) rpc_enabled = parameters.pop('rpc_enabled', False) rpc_auth = parameters.pop('rpc_auth', None) auth_middleware = parameters.pop('auth_middleware', None) is_proxy_used = parameters.pop('is_proxy_used', None) num_proxies_used = parameters.pop('num_proxies_used', None) self.wsgi_app = tornado.wsgi.WSGIApplication(handlers, **parameters) self.wsgi_app.service = self for entry in static_files: # TODO If we will introduce a flag to trigger autoreload in # Jinja2 templates, use it to disable the cache arg here. self.wsgi_app = SharedDataMiddleware( self.wsgi_app, {"/static": entry}, cache=True, cache_timeout=SECONDS_IN_A_YEAR, fallback_mimetype="application/octet-stream") self.file_cacher = FileCacher(self) self.wsgi_app = FileServerMiddleware(self.file_cacher, self.wsgi_app) if rpc_enabled: self.wsgi_app = DispatcherMiddleware( self.wsgi_app, {"/rpc": RPCMiddleware(self, rpc_auth)}) # The authentication middleware needs to be applied before the # ProxyFix as otherwise the remote address it gets is the one # of the proxy. if auth_middleware is not None: self.wsgi_app = auth_middleware(self.wsgi_app) self.auth_handler = self.wsgi_app # If we are behind one or more proxies, we'll use the content # of the X-Forwarded-For HTTP header (if provided) to determine # the client IP address, ignoring the one the request came from. # This allows to use the IP lock behind a proxy. Activate it # only if all requests come from a trusted source (if clients # were allowed to directlty communicate with the server they # could fake their IP and compromise the security of IP lock). if num_proxies_used is None: if is_proxy_used: num_proxies_used = 1 else: num_proxies_used = 0 if num_proxies_used > 0: self.wsgi_app = ProxyFix(self.wsgi_app, num_proxies_used) self.web_server = WSGIServer((listen_address, listen_port), self)
def extract_complexity(task_id, file_lengther=None): """Extract the complexity of all submissions of the task. The results are stored in a file task_<id>.info task_id (int): the id of the task we are interested in. file_lengther (type): a File-like object that tell the dimension of the input (see example above for how to write one). return (int): 0 if operation was successful. """ with SessionGen() as session: task = Task.get_from_id(task_id, session) if task is None: return -1 # Extracting the length of the testcase. file_cacher = FileCacher() testcases_lengths = [file_length(testcase.input, file_cacher, file_lengther) for testcase in task.testcases] file_cacher.purge_cache() # Compute the complexity of the solutions. with open("task_%s.info" % task_id, "wt") as info: for submission in task.contest.get_submissions(): if submission.task_id == task_id and \ submission.evaluated(): print submission.user.username result = extract_complexity_submission(testcases_lengths, submission) if result[1] is None: continue info.write("Submission: %s" % submission.id) info.write(" - user: %15s" % submission.user.username) info.write(" - task: %s" % task.name) if result[0] is not None: info.write(" - score: %6.2lf" % result[0]) info.write(" - complexity: %20s" % complexity_to_string(result[1])) if result[2] is not None: info.write(" - confidence %5.1lf" % result[2]) info.write("\n") return 0
def add_statement(task_name, language_code, statement_file, overwrite): logger.info("Adding the statement(language: %s) of task %s " "in the database.", language_code, task_name) if not os.path.exists(statement_file): logger.error("Statement file (path: %s) does not exist.", statement_file) return False if not statement_file.endswith(".pdf"): logger.error("Statement file should be a pdf file.") return False with SessionGen() as session: task = session.query(Task)\ .filter(Task.name == task_name).first() if not task: logger.error("No task named %s", task_name) return False try: file_cacher = FileCacher() digest = file_cacher.put_file_from_path( statement_file, "Statement for task %s (lang: %s)" % (task_name, language_code)) except Exception: logger.error("Task statement storage failed.", exc_info=True) arr = session.query(Statement)\ .filter(Statement.language == language_code)\ .filter(Statement.task == task)\ .all() if arr: # Statement already exists if overwrite: logger.info("Overwriting already existing statement.") session.delete(arr[0]) session.commit() else: logger.error("A statement with given language already exists. " "Not overwriting.") return False statement = Statement(language_code, digest, task=task) session.add(statement) session.commit() logger.info("Statement added.") return True
def file_length(digest, file_cacher=None, file_lengther=None): """Compute the length of the file identified by digest. digest (string): the digest of the file. file_cacher (FileCacher): the cacher to use, or None. file_lengther (type): a File-like object that tell the dimension of the input (see example above for how to write one). return (int): the length of the tile. """ if file_cacher is None: file_cacher = FileCacher() if file_lengther is None: file_lengther = FileLengther lengther = file_lengther() file_cacher.get_file_to_fobj(digest, lengther) return lengther.tell()
def __init__(self, path, drop, test, zero_time, user_number, loader_class): self.drop = drop self.test = test self.zero_time = zero_time self.user_number = user_number self.file_cacher = FileCacher() self.loader = loader_class(os.path.realpath(path), self.file_cacher)
def make(self): # Unset stack size limit resource.setrlimit(resource.RLIMIT_STACK, (resource.RLIM_INFINITY, resource.RLIM_INFINITY)) if not os.path.exists(os.path.join(self.odir, "contest-config.py")): raise Exception("Directory doesn't contain contest-config.py") self.wdir = os.path.join(self.odir, "build") if self.clean: shutil.rmtree(self.wdir) if not os.path.exists(self.wdir): os.mkdir(self.wdir) # We have to avoid copying the folder contest/build # or contest/task/build into contest/build. # For this reason, we ignore all files and directories named "build" # when copying recursively. copyrecursivelyifnecessary(self.odir, self.wdir, set(["build"])) self.wdir = os.path.abspath(self.wdir) file_cacher = FileCacher(path=os.path.join(self.wdir, ".cache")) try: with chdir(self.wdir): contestconfig = ContestConfig( os.path.join(self.wdir, ".rules"), os.path.basename(self.odir), ignore_latex=self.no_latex, onlytask=self.task) contestconfig._readconfig("contest-config.py") if self.task is not None and len(contestconfig.tasks) == 0: raise Exception("Task {} not found".format(self.task)) cdb = contestconfig._makecontest() test_udb = contestconfig._makeuser( contestconfig._mytestuser.username) test_gdb = contestconfig._makegroup( contestconfig._mytestuser.group.name, cdb) # We're not putting the test user on any team for testing # (shouldn't be needed). test_pdb = contestconfig._makeparticipation( contestconfig._mytestuser.username, cdb, test_udb, test_gdb, None) for t in contestconfig.tasks.values(): tdb = t._makedbobject(cdb, file_cacher) t._make_test_submissions(test_pdb, tdb, self.local_test) finally: file_cacher.destroy_cache()
def __init__(self, path, zero_time, import_tasks, update_contest, update_tasks, no_statements, loader_class): self.zero_time = zero_time self.import_tasks = import_tasks self.update_contest = update_contest self.update_tasks = update_tasks self.no_statements = no_statements self.file_cacher = FileCacher() self.loader = loader_class(os.path.abspath(path), self.file_cacher)
def __init__(self, args): Service.__init__(self, shard=args.shard) self.address = config.get("core", "listen_address") self.port = int(config.get("core", "listen_port")) + args.shard self.file_cacher = FileCacher(self) self.evaluation_service = self.connect_to( ServiceCoord('EvaluationService', 0)) self.wsgi_app = APIHandler(self)
def build(self): file_cacher = FileCacher(path=os.path.join(self.wdir, ".cache")) try: with chdir(self.wdir): contestconfig = \ ContestConfig(os.path.join(self.wdir, ".rules"), "hidden contest", minimal=self.minimal) copyifnecessary(os.path.join(contestconfig._get_ready_dir(), "contest-template.py"), os.path.join(self.wdir, "c.py")) contestconfig._readconfig("c.py") contestconfig._task( self.task, contestconfig.full_feedback, self.minimal) if not self.minimal: cdb = contestconfig._makecontest() test_udb = contestconfig._makeuser( contestconfig._mytestuser.username) test_gdb = contestconfig._makegroup( contestconfig._mytestuser.group.name, cdb) # We're not putting the test user on any team for testing # (shouldn't be needed). test_pdb = contestconfig._makeparticipation( contestconfig._mytestuser.username, cdb, test_udb, test_gdb, None) for t in contestconfig.tasks.values(): tdb = t._makedbobject(cdb, file_cacher) t._make_test_submissions( test_pdb, tdb, self.local_test) finally: file_cacher.destroy_cache() primary_statements = [s for s in list(list(contestconfig.tasks.values())[ 0]._statements.values()) if s.primary] if len(primary_statements) == 0: return None elif len(primary_statements) == 1: return os.path.abspath(primary_statements[0].file_) else: raise Exception("More than one primary statement")
def clean_files(session, dry_run): filecacher = FileCacher() files = set(file[0] for file in filecacher.list()) logger.info("A total number of %d files are present in the file store", len(files)) found_digests = enumerate_files(session) logger.info("Found %d digests while scanning", len(found_digests)) files -= found_digests logger.info("%d digests are orphan.", len(files)) total_size = 0 for orphan in files: total_size += filecacher.get_size(orphan) logger.info("Orphan files take %s bytes of disk space", "{:,}".format(total_size)) if not dry_run: for count, orphan in enumerate(files): filecacher.delete(orphan) if count % 100 == 0: logger.info("%d files deleted from the file store", count) logger.info("All orphan files have been deleted")
def debugSubmission(submission_id, dataset_id, testcase_codename): config.keep_sandbox = True file_cacher = FileCacher() with SessionGen() as session: submission = session.query(Submission)\ .filter(Submission.id == submission_id)\ .first() if submission is None: logger.error("There's no submission with id %d" % submission_id) return False if dataset_id is None: dataset = submission.task.active_dataset dataset_id = submission.task.active_dataset_id else: dataset = session.query(Dataset)\ .filter(Dataset.id == dataset_id)\ .first() # Compilation operation = ESOperation(ESOperation.COMPILATION, submission_id, dataset_id) comp_job = CompilationJob.from_submission(operation, submission, dataset) task_type = get_task_type(comp_job.task_type, comp_job.task_type_parameters) task_type.execute_job(comp_job, file_cacher) for sandbox_path in comp_job.sandboxes: logger.info("Compilation sandbox created in %s" % sandbox_path) # Check if the compilation is successful result = submission.get_result(dataset) if result is None or result.compilation_failed(): logger.error("Compilatoin Failed") return True # Evaluation operation = ESOperation(ESOperation.EVALUATION, submission_id, dataset_id, testcase_codename) eval_job = EvaluationJob.from_submission(operation, submission, dataset) task_type = get_task_type(eval_job.task_type, eval_job.task_type_parameters) task_type.execute_job(eval_job, file_cacher) for sandbox_path in eval_job.sandboxes: logger.info("Evaluation sandbox created in %s" % sandbox_path) return True
def __init__(self, contest_id, export_target, dump_files, dump_model, skip_generated, skip_submissions, skip_user_tests): self.contest_id = contest_id self.dump_files = dump_files self.dump_model = dump_model self.skip_generated = skip_generated self.skip_submissions = skip_submissions self.skip_user_tests = skip_user_tests # If target is not provided, we use the contest's name. if export_target == "": with SessionGen() as session: contest = Contest.get_from_id(self.contest_id, session) self.export_target = "dump_%s.tar.gz" % contest.name logger.warning("export_target not given, using \"%s\"" % self.export_target) else: self.export_target = export_target self.file_cacher = FileCacher()
def clean_files(session, dry_run): filecacher = FileCacher() files = set(file[0] for file in filecacher.list()) logger.info("A total number of %d files are present in the file store", len(files)) for cls in [ Attachment, Executable, File, Manager, PrintJob, Statement, Testcase, UserTest, UserTestExecutable, UserTestFile, UserTestManager, UserTestResult ]: for col in ["input", "output", "digest"]: if hasattr(cls, col): found_digests = set() digests = session.query(cls).all() digests = [getattr(obj, col) for obj in digests] found_digests |= set(digests) found_digests.discard(FileCacher.TOMBSTONE_DIGEST) logger.info("Found %d digests while scanning %s.%s", len(found_digests), cls.__name__, col) files -= found_digests logger.info("%d digests are orphan.", len(files)) total_size = 0 for orphan in files: total_size += filecacher.get_size(orphan) logger.info("Orphan files take %s bytes of disk space", "{:,}".format(total_size)) if not dry_run: for count, orphan in enumerate(files): filecacher.delete(orphan) if count % 100 == 0: logger.info("%d files deleted from the file store", count) logger.info("All orphan files have been deleted")
def __init__(self, path, yes, zero_time, import_tasks, update_contest, update_tasks, no_statements, delete_stale_participations, loader_class): self.yes = yes self.zero_time = zero_time self.import_tasks = import_tasks self.update_contest = update_contest self.update_tasks = update_tasks self.no_statements = no_statements self.delete_stale_participations = delete_stale_participations self.file_cacher = FileCacher() self.loader = loader_class(os.path.abspath(path), self.file_cacher)
def __init__(self, contest_id, export_target, json): self.contest_id = contest_id self.export_target = export_target self.json = json # If target is not provided, we use the curent date. if export_target == "": self.export_target = "users_c%d_%s.html" % \ (self.contest_id, date.today().isoformat()) logger.warning("export_target not given, using \"%s\"", self.export_target) self.file_cacher = FileCacher()
def add_testcases(archive, input_template, output_template, task_name, dataset_description=None, contest_name=None, public=False, overwrite=False): with SessionGen() as session: task = session.query(Task)\ .filter(Task.name == task_name).first() if not task: logger.error("No task called %s found." % task_name) return False dataset = task.active_dataset if dataset_description is not None: dataset = session.query(Dataset)\ .filter(Dataset.task_id == task.id)\ .filter(Dataset.description == dataset_description)\ .first() if not dataset: logger.error("No dataset called %s found." % dataset_description) return False if contest_name is not None: contest = session.query(Contest)\ .filter(Contest.name == contest_name).first() if task.contest != contest: logger.error("%s is not in %s" % (task_name, contest_name)) return False file_cacher = FileCacher() # Get input/output file names templates input_re = re.compile( re.escape(input_template).replace("\\*", "(.*)") + "$") output_re = re.compile( re.escape(output_template).replace("\\*", "(.*)") + "$") try: successful_subject, successful_message = \ import_testcases_from_zipfile( session, file_cacher, dataset, archive, input_re, output_re, overwrite, public) except Exception as error: logger.error(str(error)) return False logger.info(successful_subject) logger.info(successful_message) return True
def __init__(self, shard): parameters = { "ui_modules": views, "template_path": pkg_resources.resource_filename("cms.server.admin", "templates"), "static_files": [("cms.server", "static"), ("cms.server.admin", "static")], "cookie_secret": base64.b64encode(config.secret_key), "debug": config.tornado_debug, "auth_middleware": AWSAuthMiddleware, "rpc_enabled": True, "rpc_auth": self.is_rpc_authorized, "xsrf_cookies": True, } super(AdminWebServer, self).__init__(config.admin_listen_port, HANDLERS, parameters, shard=shard, listen_address=config.admin_listen_address) # A list of pending notifications. self.notifications = [] self.file_cacher = FileCacher(self) self.admin_web_server = self.connect_to( ServiceCoord("AdminWebServer", 0)) self.queue_service = self.connect_to(ServiceCoord("QueueService", 0)) # TODO: does it make sense to use a random one? self.evaluation_services = self.connect_to( ServiceCoord("EvaluationService", 0)) self.scoring_service = self.connect_to( ServiceCoord("ScoringService", 0)) ranking_enabled = len(config.rankings) > 0 self.proxy_service = self.connect_to(ServiceCoord("ProxyService", 0), must_be_present=ranking_enabled) self.resource_services = [] for i in range(get_service_shards("ResourceService")): self.resource_services.append( self.connect_to(ServiceCoord("ResourceService", i))) self.logservice = self.connect_to(ServiceCoord("LogService", 0))
def __init__(self, shard): """Initialize the PrintingService. """ super(PrintingService, self).__init__(shard) self.file_cacher = FileCacher(self) self.add_executor(PrintingExecutor(self.file_cacher)) if config.printer is None: logger.info("Printing is disabled, so the PrintingService is " "idle.") return
def __init__(self, shard): parameters = { "static_files": [("cms.server", "static"), ("cms.server.admin", "static")], "cookie_secret": hex_to_bin(config.secret_key), "debug": config.tornado_debug, "auth_middleware": AWSAuthMiddleware, "rpc_enabled": True, "rpc_auth": self.is_rpc_authorized, "xsrf_cookies": True, } super(AdminWebServer, self).__init__(config.admin_listen_port, HANDLERS, parameters, shard=shard, listen_address=config.admin_listen_address) self.jinja2_environment = AWS_ENVIRONMENT # A list of pending notifications. self.notifications = [] self.file_cacher = FileCacher(self) self.admin_web_server = self.connect_to( ServiceCoord("AdminWebServer", 0)) self.evaluation_service = self.connect_to( ServiceCoord("EvaluationService", 0)) self.scoring_service = self.connect_to( ServiceCoord("ScoringService", 0)) ranking_enabled = len(config.rankings) > 0 self.proxy_service = self.connect_to(ServiceCoord("ProxyService", 0), must_be_present=ranking_enabled) self.resource_services = [] for i in range(get_service_shards("ResourceService")): self.resource_services.append( self.connect_to(ServiceCoord("ResourceService", i))) self.logservice = self.connect_to(ServiceCoord("LogService", 0))
def __init__(self, path, prefix, override_name, update, no_statement, contest_id, loader_class): """Create the importer object for a task. path (string): the path to the file or directory to import. prefix (string): an optional prefix added to the task name. override_name (string): an optional new name for the task. update (bool): if the task already exists, try to update it. no_statement (bool): do not try to import the task statement. contest_id (int): if set, the new task will be tied to this contest. """ self.file_cacher = FileCacher() self.prefix = prefix self.override_name = override_name self.update = update self.no_statement = no_statement self.contest_id = contest_id self.loader = loader_class(os.path.abspath(path), self.file_cacher)
def __init__(self, contest_id, export_target, dump_files, dump_model, light, skip_submissions, skip_user_tests): self.contest_id = contest_id self.dump_files = dump_files self.dump_model = dump_model self.light = light self.skip_submissions = skip_submissions self.skip_user_tests = skip_user_tests # If target is not provided, we use the contest's name. if export_target == "": with SessionGen(commit=False) as session: contest = Contest.get_from_id(self.contest_id, session) self.export_target = "dump_%s.tar.gz" % contest.name logger.warning("export_target not given, using \"%s\"" % self.export_target) else: self.export_target = export_target self.file_cacher = FileCacher()
def __init__(self, contest_id, export_target, dump_files, dump_model, skip_generated, skip_submissions, skip_user_tests): self.contest_id = contest_id self.dump_files = dump_files self.dump_model = dump_model self.skip_generated = skip_generated self.skip_submissions = skip_submissions self.skip_user_tests = skip_user_tests self.export_target = export_target # If target is not provided, we use the contest's name. if export_target == "": with SessionGen() as session: contest = Contest.get_from_id(self.contest_id, session) if contest is None: logger.critical("Please specify a valid contest id.") self.contest_id = None else: self.export_target = "dump_%s.tar.gz" % contest.name logger.warning("export_target not given, using \"%s\"" % self.export_target) self.file_cacher = FileCacher()
class ContestImporter(object): """This service imports a contest from a directory that has been the target of a ContestExport. The process of exporting and importing again should be idempotent. """ def __init__(self, drop, import_source, load_files, load_model, skip_generated, skip_submissions, skip_user_tests): self.drop = drop self.load_files = load_files self.load_model = load_model self.skip_generated = skip_generated self.skip_submissions = skip_submissions self.skip_user_tests = skip_user_tests self.import_source = import_source self.import_dir = import_source self.file_cacher = FileCacher() def do_import(self): """Run the actual import code.""" logger.info("Starting import.") if not os.path.isdir(self.import_source): if self.import_source.endswith(".zip"): archive = zipfile.ZipFile(self.import_source, "r") file_names = archive.infolist() self.import_dir = tempfile.mkdtemp() archive.extractall(self.import_dir) elif self.import_source.endswith(".tar.gz") \ or self.import_source.endswith(".tgz") \ or self.import_source.endswith(".tar.bz2") \ or self.import_source.endswith(".tbz2") \ or self.import_source.endswith(".tar"): archive = tarfile.open(name=self.import_source) file_names = archive.getnames() elif self.import_source.endswith(".tar.xz") \ or self.import_source.endswith(".txz"): try: import lzma except ImportError: logger.critical("LZMA compression format not " "supported. Please install package " "lzma.") return False archive = tarfile.open( fileobj=lzma.LZMAFile(self.import_source)) file_names = archive.getnames() else: logger.critical("Unable to import from %s." % self.import_source) return False root = find_root_of_archive(file_names) if root is None: logger.critical("Cannot find a root directory in %s." % self.import_source) return False self.import_dir = tempfile.mkdtemp() archive.extractall(self.import_dir) self.import_dir = os.path.join(self.import_dir, root) if self.drop: logger.info("Dropping and recreating the database.") try: if not (drop_db() and init_db()): logger.critical("Unexpected error while dropping " "and recreating the database.", exc_info=True) return False except Exception as error: logger.critical("Unable to access DB.\n%r" % error) return False with SessionGen() as session: # Import the contest in JSON format. if self.load_model: logger.info("Importing the contest from a JSON file.") with io.open(os.path.join(self.import_dir, "contest.json"), "rb") as fin: # TODO - Throughout all the code we'll assume the # input is correct without actually doing any # validations. Thus, for example, we're not # checking that the decoded object is a dict... self.datas = json.load(fin, encoding="utf-8") # If the dump has been exported using a data model # different than the current one (that is, a previous # one) we try to update it. # If no "_version" field is found we assume it's a v1.0 # export (before the new dump format was introduced). dump_version = self.datas.get("_version", 0) if dump_version < model_version: logger.warning( "The dump you're trying to import has been created " "by an old version of CMS. It may take a while to " "adapt it to the current data model. You can use " "cmsDumpUpdater to update the on-disk dump and " "speed up future imports.") if dump_version > model_version: logger.critical( "The dump you're trying to import has been created " "by a version of CMS newer than this one and there " "is no way to adapt it to the current data model. " "You probably need to update CMS to handle it. It's " "impossible to proceed with the importation.") return False for version in range(dump_version, model_version): # Update from version to version+1 updater = __import__( "cmscontrib.updaters.update_%d" % (version + 1), globals(), locals(), ["Updater"]).Updater(self.datas) self.datas = updater.run() self.datas["_version"] = version + 1 assert self.datas["_version"] == model_version self.objs = dict() for id_, data in self.datas.iteritems(): if not id_.startswith("_"): self.objs[id_] = self.import_object(data) for id_, data in self.datas.iteritems(): if not id_.startswith("_"): self.add_relationships(data, self.objs[id_]) for k, v in list(self.objs.iteritems()): # Skip submissions if requested if self.skip_submissions and isinstance(v, Submission): del self.objs[k] # Skip user_tests if requested if self.skip_user_tests and isinstance(v, UserTest): del self.objs[k] # Skip generated data if requested if self.skip_generated and \ isinstance(v, (SubmissionResult, UserTestResult)): del self.objs[k] contest_id = list() contest_files = set() # Add each base object and all its dependencies for id_ in self.datas["_objects"]: contest = self.objs[id_] # We explictly add only the contest since all child # objects will be automatically added by cascade. # Adding each object individually would also add # orphaned objects like the ones that depended on # submissions or user_tests that we (possibly) # removed above. session.add(contest) session.flush() contest_id += [contest.id] contest_files |= contest.enumerate_files( self.skip_submissions, self.skip_user_tests, self.skip_generated) session.commit() else: contest_id = None contest_files = None # Import files. if self.load_files: logger.info("Importing files.") files_dir = os.path.join(self.import_dir, "files") descr_dir = os.path.join(self.import_dir, "descriptions") files = set(os.listdir(files_dir)) descr = set(os.listdir(descr_dir)) if not descr <= files: logger.warning("Some files do not have an associated " "description.") if not files <= descr: logger.warning("Some descriptions do not have an " "associated file.") if not (contest_files is None or files <= contest_files): # FIXME Check if it's because this is a light import # or because we're skipping submissions or user_tests logger.warning("The dump contains some files that are " "not needed by the contest.") if not (contest_files is None or contest_files <= files): # The reason for this could be that it was a light # export that's not being reimported as such. logger.warning("The contest needs some files that are " "not contained in the dump.") # Limit import to files we actually need. if contest_files is not None: files &= contest_files for digest in files: file_ = os.path.join(files_dir, digest) desc = os.path.join(descr_dir, digest) if not self.safe_put_file(file_, desc): logger.critical("Unable to put file `%s' in the database. " "Aborting. Please remove the contest " "from the database." % file_) # TODO: remove contest from the database. return False if contest_id is not None: logger.info("Import finished (contest id: %s)." % ", ".join(str(id_) for id_ in contest_id)) else: logger.info("Import finished.") # If we extracted an archive, we remove it. if self.import_dir != self.import_source: rmtree(self.import_dir) return True def import_object(self, data): """Import objects from the given data (without relationships). The given data is assumed to be a dict in the format produced by ContestExporter. This method reads the "_class" item and tries to find the corresponding class. Then it loads all column properties of that class (those that are present in the data) and uses them as keyword arguments in a call to the class constructor (if a required property is missing this call will raise an error). Relationships are not handled by this method, since we may not have all referenced objects available yet. Thus we prefer to add relationships in a later moment, using the add_relationships method. Note that both this method and add_relationships don't check if the given data has more items than the ones we understand and use. """ cls = getattr(class_hook, data["_class"]) args = dict() for prp in cls._col_props: if prp.key not in data: # We will let the __init__ of the class check if any # argument is missing, so it's safe to just skip here. continue col = prp.columns[0] col_type = type(col.type) val = data[prp.key] if col_type in [Boolean, Integer, Float, Unicode, RepeatedUnicode]: args[prp.key] = val elif col_type is String: args[prp.key] = val.encode('latin1') if val is not None else None elif col_type is DateTime: args[prp.key] = make_datetime(val) if val is not None else None elif col_type is Interval: args[prp.key] = timedelta(seconds=val) if val is not None else None else: raise RuntimeError("Unknown SQLAlchemy column type: %s" % col_type) return cls(**args) def add_relationships(self, data, obj): """Add the relationships to the given object, using the given data. Do what we didn't in import_objects: importing relationships. We already now the class of the object so we simply iterate over its relationship properties trying to load them from the data (if present), checking wheter they are IDs or collection of IDs, dereferencing them (i.e. getting the corresponding object) and reflecting all on the given object. Note that both this method and import_object don't check if the given data has more items than the ones we understand and use. """ cls = type(obj) for prp in cls._rel_props: if prp.key not in data: # Relationships are always optional continue val = data[prp.key] if val is None: setattr(obj, prp.key, None) elif type(val) == unicode: setattr(obj, prp.key, self.objs[val]) elif type(val) == list: setattr(obj, prp.key, list(self.objs[i] for i in val)) elif type(val) == dict: setattr(obj, prp.key, dict((k, self.objs[v]) for k, v in val.iteritems())) else: raise RuntimeError("Unknown RelationshipProperty value: %s" % type(val)) def safe_put_file(self, path, descr_path): """Put a file to FileCacher signaling every error (including digest mismatch). path (string): the path from which to load the file. descr_path (string): same for description. return (bool): True if all ok, False if something wrong. """ # TODO - Probably this method could be merged in FileCacher # First read the description. try: with io.open(descr_path, 'rt', encoding='utf-8') as fin: description = fin.read() except IOError: description = '' # Put the file. try: digest = self.file_cacher.put_file_from_path(path, description) except Exception as error: logger.critical("File %s could not be put to file server (%r), " "aborting." % (path, error)) return False # Then check the digest. calc_digest = sha1sum(path) if digest != calc_digest: logger.critical("File %s has hash %s, but the server returned %s, " "aborting." % (path, calc_digest, digest)) return False return True
def main(): if len(sys.argv) != 2: print "%s [file delle domande]" % sys.argv[0] sys.exit(0) lines = file(sys.argv[1]).readlines() test = Test() test.name = os.path.basename(sys.argv[1]).replace(".txt", "") test.description = lines[0].strip() test.max_score = 0 dirname = os.path.dirname(sys.argv[1]) question = TestQuestion() question.text = "<p>\n" file_cacher = FileCacher() answers = [] status = "score" for l in lines[1:]: l = escape(l) if l[:3] == '===': question.text += "</p>" question.answers = json.dumps(answers) test.questions.append(question) status = "score" question = TestQuestion() question.text = "<p>\n" continue if l[:3] == '---': status = "choice" question.type = "choice" answers = [] continue if l[:3] == '+++': status = "answer" answers = [] continue if status == "score": try: score, wrong_score = map(int, l.split(",")) test.max_score += score except ValueError: continue question.score = score question.wrong_score = wrong_score status = "text" continue if status == "text": if l == "\n": question.text += "</p><p>\n" elif l[:2] == "[[" and l[-3:] == "]]\n": name = l[2:-3] digest = file_cacher.put_file_from_path( os.path.join(dirname, "data", name), "Image %s for test %s" % (name, test.name)) question.text += "<center>" question.text += "<img src='/files/%s/%s'/>" % (digest, name) question.text += "</center>\n" f = QuestionFile(filename=name, digest=digest) question.files.append(f) elif l[:-1] == "```": question.text += "<pre>" elif l[:-1] == "'''": question.text += "</pre>" else: question.text += l if status == "choice": answers.append([l[1:].strip(), l[0] == '*']) if status == "answer": pos = l.index(":") name = l[:pos] value = json.loads("[" + l[pos + 1:] + "]") if isinstance(value[0], basestring): question.type = "string" elif not question.type: question.type = "number" answers.append([name, value]) if status == "answer": question.text += "</p>" question.answers = json.dumps(answers) test.questions.append(question) with SessionGen() as session: test.access_level = 7 session.add(test) session.commit()
class Worker(Service): """This service implement the possibility to compile and evaluate submissions in a sandbox. The instructions to follow for the operations are in the TaskType classes, while the sandbox is in the Sandbox module. """ JOB_TYPE_COMPILATION = "compile" JOB_TYPE_EVALUATION = "evaluate" def __init__(self, shard, fake_worker_time=None): Service.__init__(self, shard) self.file_cacher = FileCacher(self) self.work_lock = gevent.lock.RLock() self._last_end_time = None self._total_free_time = 0 self._total_busy_time = 0 self._number_execution = 0 self._fake_worker_time = fake_worker_time @rpc_method def precache_files(self, contest_id): """RPC to ask the worker to precache of files in the contest. contest_id (int): the id of the contest """ # In order to avoid a long-living connection, first fetch the # complete list of files and then download the files; since # this is just pre-caching, possible race conditions are not # dangerous logger.info("Precaching files for contest %d.", contest_id) with SessionGen() as session: contest = Contest.get_from_id(contest_id, session) files = enumerate_files(session, contest, skip_submissions=True, skip_user_tests=True, skip_print_jobs=True) for digest in files: try: self.file_cacher.load(digest, if_needed=True) except KeyError: # No problem (at this stage) if we cannot find the # file pass logger.info("Precaching finished.") @rpc_method def execute_job_group(self, job_group_dict): """Receive a group of jobs in a list format and executes them one by one. job_group_dict ({}): a JobGroup exported to dict. return ({}): the same JobGroup in dict format, but containing the results. """ start_time = time.time() job_group = JobGroup.import_from_dict(job_group_dict) if self.work_lock.acquire(False): try: logger.info("Starting job group.") for job in job_group.jobs: logger.info("Starting job.", extra={"operation": job.info}) job.shard = self.shard if self._fake_worker_time is None: task_type = get_task_type(job.task_type, job.task_type_parameters) try: task_type.execute_job(job, self.file_cacher) except TombstoneError: job.success = False job.plus = {"tombstone": True} else: self._fake_work(job) logger.info("Finished job.", extra={"operation": job.info}) logger.info("Finished job group.") return job_group.export_to_dict() except Exception as e: err_msg = "Worker failed: %s." % e logger.error(err_msg, exc_info=True) raise JobException(err_msg) finally: self._finalize(start_time) self.work_lock.release() else: err_msg = "Request received, but declined because of acquired " \ "lock (Worker is busy executing another job, this should " \ "not happen: check if there are more than one ES running, " \ "or for bugs in ES." logger.warning(err_msg) self._finalize(start_time) raise JobException(err_msg) def _fake_work(self, job): """Fill the job with fake success data after waiting for some time.""" time.sleep(self._fake_worker_time) job.success = True job.text = ["ok"] job.plus = { "execution_time": self._fake_worker_time, "execution_wall_clock_time": self._fake_worker_time, "execution_memory": 1000, } if isinstance(job, CompilationJob): job.compilation_success = True elif isinstance(job, EvaluationJob): job.outcome = "1.0" def _finalize(self, start_time): end_time = time.time() busy_time = end_time - start_time free_time = 0.0 if self._last_end_time is not None: free_time = start_time - self._last_end_time self._last_end_time = end_time self._total_busy_time += busy_time self._total_free_time += free_time ratio = self._total_busy_time * 100.0 / \ (self._total_busy_time + self._total_free_time) avg_free_time = 0.0 if self._number_execution > 0: avg_free_time = self._total_free_time / self._number_execution avg_busy_time = 0.0 if self._number_execution > 0: avg_busy_time = self._total_busy_time / self._number_execution self._number_execution += 1 logger.info("Executed in %.3lf after free for %.3lf; " "busyness is %.1lf%%; avg free time is %.3lf " "avg busy time is %.3lf ", busy_time, free_time, ratio, avg_free_time, avg_busy_time)
def test_testcases(base_dir, soluzione, assume=None): global task, file_cacher # Use a FileCacher with a NullBackend in order to avoid to fill # the database with junk if file_cacher is None: file_cacher = FileCacher(null=True) # Load the task # TODO - This implies copying a lot of data to the FileCacher, # which is annoying if you have to do it continuously; it would be # better to use a persistent cache (although local, possibly # filesystem-based instead of database-based) and somehow detect # when the task has already been loaded if task is None: loader = YamlLoader( os.path.realpath(os.path.join(base_dir, "..")), file_cacher) # Normally we should import the contest before, but YamlLoader # accepts get_task() even without previous get_contest() calls task = loader.get_task(os.path.split(os.path.realpath(base_dir))[1]) # Prepare the EvaluationJob dataset = task.active_dataset digest = file_cacher.put_file_from_path( os.path.join(base_dir, soluzione), "Solution %s for task %s" % (soluzione, task.name)) executables = {task.name: Executable(filename=task.name, digest=digest)} jobs = [(t, EvaluationJob( task_type=dataset.task_type, task_type_parameters=json.loads(dataset.task_type_parameters), managers=dict(dataset.managers), executables=executables, input=dataset.testcases[t].input, output=dataset.testcases[t].output, time_limit=dataset.time_limit, memory_limit=dataset.memory_limit)) for t in dataset.testcases] tasktype = get_task_type(dataset=dataset) ask_again = True last_status = "ok" status = "ok" stop = False info = [] points = [] comments = [] tcnames = [] for jobinfo in sorted(jobs): print jobinfo[0], sys.stdout.flush() job = jobinfo[1] # Skip the testcase if we decide to consider everything to # timeout if stop: info.append("Time limit exceeded") points.append(0.0) comments.append("Timeout.") continue # Evaluate testcase last_status = status tasktype.evaluate(job, file_cacher) status = job.plus["exit_status"] info.append("Time: %5.3f Wall: %5.3f Memory: %s" % (job.plus["execution_time"], job.plus["execution_wall_clock_time"], mem_human(job.plus["execution_memory"]))) points.append(float(job.outcome)) comments.append(format_status_text(job.text)) tcnames.append(jobinfo[0]) # If we saw two consecutive timeouts, ask wether we want to # consider everything to timeout if ask_again and status == "timeout" and last_status == "timeout": print print "Want to stop and consider everything to timeout? [y/N]", if assume is not None: print assume tmp = assume else: tmp = raw_input().lower() if tmp in ['y', 'yes']: stop = True else: ask_again = False # Result pretty printing print clen = max(len(c) for c in comments) ilen = max(len(i) for i in info) for (i, p, c, b) in zip(tcnames, points, comments, info): print "%s) %5.2lf --- %s [%s]" % (i, p, c.ljust(clen), b.center(ilen)) return zip(points, comments, info)
class TestFileCacher(TestService): """Service that performs automatically some tests for the FileCacher service. """ def __init__(self, shard): logger.initialize(ServiceCoord("TestFileCacher", shard)) TestService.__init__(self, shard, custom_logger=logger) # Assume we store the cache in "./cache/fs-cache-TestFileCacher-0/" self.cache_base_path = os.path.join(config.cache_dir, "fs-cache-TestFileCacher-0") self.cache_path = None self.content = None self.fake_content = None self.digest = None self.file_obj = None self.file_cacher = FileCacher(self) #self.file_cacher = FileCacher(self, path="fs-storage") def prepare(self): """Initialization for the test code - make sure that the cache is empty before testing. """ logger.info("Please delete directory %s before." % self.cache_base_path) ### TEST 000 ### def test_000(self): """Send a ~100B random binary file to the storage through FileCacher as a file-like object. FC should cache the content locally. """ self.size = 100 self.content = "".join(chr(random.randint(0, 255)) for unused_i in xrange(self.size)) logger.info(" I am sending the ~100B binary file to FileCacher") try: data = self.file_cacher.put_file_from_fobj(StringIO(self.content), u"Test #000") except Exception as error: self.test_end(False, "Error received: %r." % error) return if not os.path.exists(os.path.join(self.cache_base_path, data)): self.test_end(False, "File not stored in local cache.") elif open(os.path.join(self.cache_base_path, data), "rb").read() != \ self.content: self.test_end(False, "Local cache's content differ " "from original file.") else: self.cache_path = os.path.join(self.cache_base_path, data) self.digest = data self.test_end(True, "Data sent and cached without error.") ### TEST 001 ### def test_001(self): """Retrieve the file. """ logger.info(" I am retrieving the ~100B binary file from FileCacher") self.fake_content = "Fake content.\n" with open(self.cache_path, "wb") as cached_file: cached_file.write(self.fake_content) try: data = self.file_cacher.get_file(self.digest) except Exception as error: self.test_end(False, "Error received: %r." % error) return received = data.read() data.close() if received != self.fake_content: if received == self.content: self.test_end(False, "Did not use the cache even if it could.") else: self.test_end(False, "Content differ.") else: self.test_end(True, "Data object received correctly.") ### TEST 002 ### def test_002(self): """Check the size of the file. """ logger.info(" I am checking the size of the ~100B binary file") try: size = self.file_cacher.get_size(self.digest) except Exception as error: self.test_end(False, "Error received: %r." % error) return if size == self.size: self.test_end(True, "The size is correct.") else: self.test_end(False, "The size is wrong: %d instead of %d" % (size, self.size)) ### TEST 003 ### def test_003(self): """Get file from FileCacher. """ logger.info(" I am retrieving the file from FileCacher " + "after deleting the cache.") os.unlink(self.cache_path) try: data = self.file_cacher.get_file(self.digest) except Exception as error: self.test_end(False, "Error received: %r." % error) return received = data.read() data.close() if received != self.content: self.test_end(False, "Content differ.") elif not os.path.exists(self.cache_path): self.test_end(False, "File not stored in local cache.") elif open(self.cache_path).read() != self.content: self.test_end(False, "Local cache's content differ " + "from original file.") else: self.test_end(True, "Content object received " + "and cached correctly.") ### TEST 004 ### def test_004(self): """Delete the file through FS and tries to get it again through FC. """ logger.info(" I am deleting the file from FileCacher.") try: self.file_cacher.delete(digest=self.digest) except Exception as error: self.test_end(False, "Error received: %s." % error) return else: logger.info(" File deleted correctly.") logger.info(" I am getting the file from FileCacher.") try: self.file_cacher.get_file(self.digest) except Exception as error: self.test_end(True, "Correctly received an error: %r." % error) else: self.test_end(False, "Did not receive error.") ### TEST 005 ### def test_005(self): """Get unexisting file from FileCacher. """ logger.info(" I am retrieving an unexisting file from FileCacher.") try: self.file_cacher.get_file(self.digest) except Exception as error: self.test_end(True, "Correctly received an error: %r." % error) else: self.test_end(False, "Did not receive error.") ### TEST 006 ### def test_006(self): """Send a ~100B random binary file to the storage through FileCacher as a string. FC should cache the content locally. """ self.content = "".join(chr(random.randint(0, 255)) for unused_i in xrange(100)) logger.info(" I am sending the ~100B binary file to FileCacher") try: data = self.file_cacher.put_file_content(self.content, u"Test #005") except Exception as error: self.test_end(False, "Error received: %r." % error) return if not os.path.exists(os.path.join(self.cache_base_path, data)): self.test_end(False, "File not stored in local cache.") elif open(os.path.join(self.cache_base_path, data), "rb").read() != self.content: self.test_end(False, "Local cache's content differ " "from original file.") else: self.cache_path = os.path.join(self.cache_base_path, data) self.digest = data self.test_end(True, "Data sent and cached without error.") ### TEST 007 ### def test_007(self): """Retrieve the file as a string. """ logger.info(" I am retrieving the ~100B binary file from FileCacher " "using get_file_to_string()") self.fake_content = "Fake content.\n" with open(self.cache_path, "wb") as cached_file: cached_file.write(self.fake_content) try: data = self.file_cacher.get_file_content(self.digest) except Exception as error: self.test_end(False, "Error received: %r." % error) return if data != self.fake_content: if data == self.content: self.test_end(False, "Did not use the cache even if it could.") else: self.test_end(False, "Content differ.") else: self.test_end(True, "Data received correctly.") ### TEST 008 ### def test_008(self): """Put a ~100MB file into the storage (using a specially crafted file-like object). """ logger.info(" I am sending the ~100MB binary file to FileCacher") rand_file = RandomFile(100000000) try: data = self.file_cacher.put_file_from_fobj(rand_file, u"Test #007") except Exception as error: self.test_end(False, "Error received: %r." % error) return if rand_file.dim != 0: self.test_end(False, "The input file wasn't read completely.") my_digest = rand_file.digest rand_file.close() if not os.path.exists(os.path.join(self.cache_base_path, data)): self.test_end(False, "File not stored in local cache.") elif my_digest != data: self.test_end(False, "File received with wrong hash.") else: self.cache_path = os.path.join(self.cache_base_path, data) self.digest = data self.test_end(True, "Data sent and cached without error.") ### TEST 009 ### def test_009(self): """Get the ~100MB file from FileCacher. """ logger.info(" I am retrieving the ~100MB file from FileCacher " + "after deleting the cache.") os.unlink(self.cache_path) hash_file = HashingFile() try: self.file_cacher.get_file_to_fobj(self.digest, hash_file) except Exception as error: self.test_end(False, "Error received: %r." % error) return my_digest = hash_file.digest hash_file.close() try: if self.digest != my_digest: self.test_end(False, "Content differs.") elif not os.path.exists(self.cache_path): self.test_end(False, "File not stored in local cache.") else: self.test_end(True, "Content object received " + "and cached correctly.") finally: self.file_cacher.delete(self.digest)
class Worker(Service): """This service implement the possibility to compile and evaluate submissions in a sandbox. The instructions to follow for the operations are in the TaskType classes, while the sandbox is in the Sandbox module. """ JOB_TYPE_COMPILATION = "compile" JOB_TYPE_EVALUATION = "evaluate" def __init__(self, shard): Service.__init__(self, shard) self.file_cacher = FileCacher(self) self.work_lock = gevent.coros.RLock() self._ignore_job = False @rpc_method def ignore_job(self): """RPC that inform the worker that its result for the current action will be discarded. The worker will try to return as soon as possible even if this means that the result are inconsistent. """ # We remember to quit as soon as possible. logger.info("Trying to interrupt job as requested.") self._ignore_job = True @rpc_method def precache_files(self, contest_id): """RPC to ask the worker to precache of files in the contest. contest_id (int): the id of the contest """ # Lock is not needed if the admins correctly placed cache and # temp directories in the same filesystem. This is what # usually happens since they are children of the same, # cms-created, directory. logger.info("Precaching files for contest %d." % contest_id) with SessionGen() as session: contest = Contest.get_from_id(contest_id, session) for digest in contest.enumerate_files(skip_submissions=True, skip_user_tests=True): self.file_cacher.load(digest) logger.info("Precaching finished.") @rpc_method def execute_job_group(self, job_group_dict): """Receive a group of jobs in a dict format and executes them one by one. job_group_dict (dict): a dictionary suitable to be imported from JobGroup. """ job_group = JobGroup.import_from_dict(job_group_dict) if self.work_lock.acquire(False): try: self._ignore_job = False for k, job in job_group.jobs.iteritems(): logger.info("Starting job.", extra={"operation": job.info}) #self.rpc_test(job_group_dict) job.shard = self.shard # FIXME This is actually kind of a workaround... # The only TaskType that needs it is OutputOnly. job._key = k # FIXME We're creating a new TaskType for each Job # even if, at the moment, a JobGroup always uses # the same TaskType and the same parameters. Yet, # this could change in the future, so the best # solution is to keep a cache of TaskTypes objects # (like ScoringService does with ScoreTypes, except # that we cannot index by Dataset ID here...). task_type = get_task_type(job.task_type, job.task_type_parameters) task_type.execute_job(job, self.file_cacher) logger.info("Finished job.", extra={"operation": job.info}) if not job.success or self._ignore_job: job_group.success = False break else: job_group.success = True return job_group.export_to_dict() except: err_msg = "Worker failed." logger.error(err_msg, exc_info=True) raise JobException(err_msg) finally: self.work_lock.release() else: err_msg = "Request received, but declined because of acquired " \ "lock (Worker is busy executing another job group, this " \ "should not happen: check if there are more than one ES " \ "running, or for bugs in ES." logger.warning(err_msg) raise JobException(err_msg) @rpc_method def rpc_test(self, mes): logger.info(mes)
def add_submission(contest_id, username, task_name, timestamp, files): file_cacher = FileCacher() with SessionGen() as session: participation = session.query(Participation)\ .join(Participation.user)\ .filter(Participation.contest_id == contest_id)\ .filter(User.username == username)\ .first() if participation is None: logging.critical("User `%s' does not exists or " "does not participate in the contest.", username) return False task = session.query(Task)\ .filter(Task.contest_id == contest_id)\ .filter(Task.name == task_name)\ .first() if task is None: logging.critical("Unable to find task `%s'.", task_name) return False elements = [format.filename for format in task.submission_format] for file_ in files: if file_ not in elements: logging.critical("File `%s' is not in the submission format " "for the task.", file_) return False if any(element not in files for element in elements): logger.warning("Not all files from the submission format were " "provided.") # files and elements now coincide. We compute the language for # each file and check that they do not mix. language = None for file_ in files: this_language = filename_to_language(files[file_]) if this_language is None and "%l" in file_: logger.critical("Cannot recognize language for file `%s'.", file_) return False if language is None: language = this_language elif this_language is not None and language != this_language: logger.critical("Mixed-language submission detected.") return False # Store all files from the arguments, and obtain their digests.. file_digests = {} try: for file_ in files: digest = file_cacher.put_file_from_path( files[file_], "Submission file %s sent by %s at %d." % (file_, username, timestamp)) file_digests[file_] = digest except: logger.critical("Error while storing submission's file.", exc_info=True) return False # Create objects in the DB. submission = Submission(make_datetime(timestamp), language, participation=participation, task=task) for filename, digest in file_digests.items(): session.add(File(filename, digest, submission=submission)) session.add(submission) session.commit() return True
class DumpExporter(object): """This service exports every data that CMS knows. The process of exporting and importing again should be idempotent. """ def __init__(self, contest_ids, export_target, dump_files, dump_model, skip_generated, skip_submissions, skip_user_tests): if contest_ids is None: with SessionGen() as session: contests = session.query(Contest).all() self.contests_ids = [contest.id for contest in contests] users = session.query(User).all() self.users_ids = [user.id for user in users] tasks = session.query(Task)\ .filter(Task.contest_id.is_(None)).all() self.tasks_ids = [task.id for task in tasks] else: # FIXME: this is ATM broken, because if you export a contest, you # then export the users who participated in it and then all of the # contests those users participated in. self.contests_ids = contest_ids self.users_ids = [] self.tasks_ids = [] self.dump_files = dump_files self.dump_model = dump_model self.skip_generated = skip_generated self.skip_submissions = skip_submissions self.skip_user_tests = skip_user_tests self.export_target = export_target # If target is not provided, we use the contest's name. if export_target == "": self.export_target = "dump_%s.tar.gz" % date.today().isoformat() logger.warning("export_target not given, using \"%s\"", self.export_target) self.file_cacher = FileCacher() def do_export(self): """Run the actual export code.""" logger.info("Starting export.") export_dir = self.export_target archive_info = get_archive_info(self.export_target) if archive_info["write_mode"] != "": # We are able to write to this archive. if os.path.exists(self.export_target): logger.critical("The specified file already exists, " "I won't overwrite it.") return False export_dir = os.path.join(tempfile.mkdtemp(), archive_info["basename"]) logger.info("Creating dir structure.") try: os.mkdir(export_dir) except OSError: logger.critical("The specified directory already exists, " "I won't overwrite it.") return False files_dir = os.path.join(export_dir, "files") descr_dir = os.path.join(export_dir, "descriptions") os.mkdir(files_dir) os.mkdir(descr_dir) with SessionGen() as session: # Export files. logger.info("Exporting files.") if self.dump_files: for contest_id in self.contests_ids: contest = Contest.get_from_id(contest_id, session) files = contest.enumerate_files(self.skip_submissions, self.skip_user_tests, self.skip_generated) for file_ in files: if not self.safe_get_file(file_, os.path.join(files_dir, file_), os.path.join(descr_dir, file_)): return False # Export data in JSON format. if self.dump_model: logger.info("Exporting data to a JSON file.") # We use strings because they'll be the keys of a JSON # object self.ids = {} self.queue = [] data = dict() for cls, lst in [(Contest, self.contests_ids), (User, self.users_ids), (Task, self.tasks_ids)]: for i in lst: obj = cls.get_from_id(i, session) self.get_id(obj) # Specify the "root" of the data graph data["_objects"] = self.ids.values() while len(self.queue) > 0: obj = self.queue.pop(0) data[self.ids[obj.sa_identity_key]] = \ self.export_object(obj) data["_version"] = model_version with io.open(os.path.join(export_dir, "contest.json"), "wb") as fout: json.dump(data, fout, encoding="utf-8", indent=4, sort_keys=True) # If the admin requested export to file, we do that. if archive_info["write_mode"] != "": archive = tarfile.open(self.export_target, archive_info["write_mode"]) archive.add(export_dir, arcname=archive_info["basename"]) archive.close() rmtree(export_dir) logger.info("Export finished.") return True def get_id(self, obj): obj_key = obj.sa_identity_key if obj_key not in self.ids: # We use strings because they'll be the keys of a JSON object self.ids[obj_key] = "%d" % len(self.ids) self.queue.append(obj) return self.ids[obj_key] def export_object(self, obj): """Export the given object, returning a JSON-encodable dict. The returned dict will contain a "_class" item (the name of the class of the given object), an item for each column property (with a value properly translated to a JSON-compatible type) and an item for each relationship property (which will be an ID or a collection of IDs). The IDs used in the exported dict aren't related to the ones used in the DB: they are newly generated and their scope is limited to the exported file only. They are shared among all classes (that is, two objects can never share the same ID, even if they are of different classes). If, when exporting the relationship, we find an object without an ID we generate a new ID, assign it to the object and append the object to the queue of objects to export. The self.skip_submissions flag controls wheter we export submissions (and all other objects that can be reached only by passing through a submission) or not. """ cls = type(obj) data = {"_class": cls.__name__} for prp in cls._col_props: col, = prp.columns col_type = type(col.type) val = getattr(obj, prp.key) if col_type in \ [Boolean, Integer, Float, Unicode, RepeatedUnicode, Enum]: data[prp.key] = val elif col_type is String: data[prp.key] = \ val.decode('latin1') if val is not None else None elif col_type is DateTime: data[prp.key] = \ make_timestamp(val) if val is not None else None elif col_type is Interval: data[prp.key] = \ val.total_seconds() if val is not None else None else: raise RuntimeError("Unknown SQLAlchemy column type: %s" % col_type) for prp in cls._rel_props: other_cls = prp.mapper.class_ # Skip submissions if requested if self.skip_submissions and other_cls is Submission: continue # Skip user_tests if requested if self.skip_user_tests and other_cls is UserTest: continue # Skip generated data if requested if self.skip_generated and other_cls in (SubmissionResult, UserTestResult): continue val = getattr(obj, prp.key) if val is None: data[prp.key] = None elif isinstance(val, other_cls): data[prp.key] = self.get_id(val) elif isinstance(val, list): data[prp.key] = list(self.get_id(i) for i in val) elif isinstance(val, dict): data[prp.key] = \ dict((k, self.get_id(v)) for k, v in val.iteritems()) else: raise RuntimeError("Unknown SQLAlchemy relationship type: %s" % type(val)) return data def safe_get_file(self, digest, path, descr_path=None): """Get file from FileCacher ensuring that the digest is correct. digest (string): the digest of the file to retrieve. path (string): the path where to save the file. descr_path (string): the path where to save the description. return (bool): True if all ok, False if something wrong. """ # TODO - Probably this method could be merged in FileCacher # First get the file try: self.file_cacher.get_file_to_path(digest, path) except Exception: logger.error("File %s could not retrieved from file server.", digest, exc_info=True) return False # Then check the digest calc_digest = sha1sum(path) if digest != calc_digest: logger.critical("File %s has wrong hash %s.", digest, calc_digest) return False # If applicable, retrieve also the description if descr_path is not None: with io.open(descr_path, 'wt', encoding='utf-8') as fout: fout.write(self.file_cacher.describe(digest)) return True
class Worker(Service): """This service implement the possibility to compile and evaluate submissions in a sandbox. The instructions to follow for the operations are in the TaskType classes, while the sandbox is in the Sandbox module. """ JOB_TYPE_COMPILATION = "compile" JOB_TYPE_EVALUATION = "evaluate" def __init__(self, shard): Service.__init__(self, shard) self.file_cacher = FileCacher(self) self.work_lock = gevent.coros.RLock() @rpc_method def precache_files(self, contest_id): """RPC to ask the worker to precache of files in the contest. contest_id (int): the id of the contest """ # In order to avoid a long-living connection, first fetch the # complete list of files and then download the files; since # this is just pre-caching, possible race conditions are not # dangerous logger.info("Precaching files for contest %d.", contest_id) with SessionGen() as session: contest = Contest.get_from_id(contest_id, session) files = contest.enumerate_files(skip_submissions=True, skip_user_tests=True) for digest in files: try: self.file_cacher.load(digest, if_needed=True) except KeyError: # No problem (at this stage) if we cannot find the # file pass logger.info("Precaching finished.") @rpc_method def execute_job(self, job_dict): """Receive a group of jobs in a dict format and executes them one by one. job_dict (dict): a dictionary suitable to be imported from Job. """ job = Job.import_from_dict_with_type(job_dict) if self.work_lock.acquire(False): try: logger.info("Starting job.", extra={"operation": job.info}) job.shard = self.shard task_type = get_task_type(job.task_type, job.task_type_parameters) task_type.execute_job(job, self.file_cacher) logger.info("Finished job.", extra={"operation": job.info}) return job.export_to_dict() except: err_msg = "Worker failed." logger.error(err_msg, exc_info=True) raise JobException(err_msg) finally: self.work_lock.release() else: err_msg = "Request received, but declined because of acquired " \ "lock (Worker is busy executing another job, this should " \ "not happen: check if there are more than one ES running, " \ "or for bugs in ES." logger.warning(err_msg) raise JobException(err_msg)
def add_submission(contest_id, username, task_name, timestamp, files): file_cacher = FileCacher() with SessionGen() as session: participation = session.query(Participation)\ .join(Participation.user)\ .filter(Participation.contest_id == contest_id)\ .filter(User.username == username)\ .first() if participation is None: logging.critical("User `%s' does not exists or " "does not participate in the contest.", username) return False task = session.query(Task)\ .filter(Task.contest_id == contest_id)\ .filter(Task.name == task_name)\ .first() if task is None: logging.critical("Unable to find task `%s'.", task_name) return False elements = set(task.submission_format) for file_ in files: if file_ not in elements: logging.critical("File `%s' is not in the submission format " "for the task.", file_) return False if any(element not in files for element in elements): logger.warning("Not all files from the submission format were " "provided.") # files is now a subset of elements. # We ensure we can infer a language if the task requires it. language = None need_lang = any(element.find(".%l") != -1 for element in elements) if need_lang: try: language = language_from_submitted_files(files) except ValueError as e: logger.critical(e) return False if language is None: # This might happen in case not all files were provided. logger.critical("Unable to infer language from submission.") return False language_name = None if language is None else language.name # Store all files from the arguments, and obtain their digests.. file_digests = {} try: for file_ in files: digest = file_cacher.put_file_from_path( files[file_], "Submission file %s sent by %s at %d." % (file_, username, timestamp)) file_digests[file_] = digest except Exception as e: logger.critical("Error while storing submission's file: %s.", e) return False # Create objects in the DB. submission = Submission(make_datetime(timestamp), language_name, participation=participation, task=task) for filename, digest in iteritems(file_digests): session.add(File(filename, digest, submission=submission)) session.add(submission) session.commit() maybe_send_notification(submission.id) return True
def __init__(self, shard): Service.__init__(self, shard) self.file_cacher = FileCacher(self) self.work_lock = gevent.coros.RLock()
class SpoolExporter: """This service creates a tree structure "similar" to the one used in Italian IOI repository for storing the results of a contest. """ def __init__(self, contest_id, spool_dir): self.contest_id = contest_id self.spool_dir = spool_dir self.upload_dir = os.path.join(self.spool_dir, "upload") self.contest = None self.submissions = None self.file_cacher = FileCacher() def run(self): """Interface to make the class do its job.""" return self.do_export() def do_export(self): """Run the actual export code. """ logger.operation = "exporting contest %s" % self.contest_id logger.info("Starting export.") logger.info("Creating dir structure.") try: os.mkdir(self.spool_dir) except OSError: logger.critical("The specified directory already exists, " "I won't overwrite it.") return False os.mkdir(self.upload_dir) with SessionGen() as session: self.contest = Contest.get_from_id(self.contest_id, session) self.submissions = \ get_submissions(session, contest_id=self.contest_id) \ .filter(not_(Participation.hidden)) \ .order_by(Submission.timestamp).all() # Creating users' directory. for participation in self.contest.participations: if not participation.hidden: os.mkdir(os.path.join( self.upload_dir, participation.user.username)) try: self.export_submissions() self.export_ranking() except Exception: logger.critical("Generic error.", exc_info=True) return False logger.info("Export finished.") logger.operation = "" return True def export_submissions(self): """Export submissions' source files. """ logger.info("Exporting submissions.") with open(os.path.join(self.spool_dir, "queue"), "wt", encoding="utf-8") as queue_file: for submission in sorted(self.submissions, key=lambda x: x.timestamp): logger.info("Exporting submission %s.", submission.id) username = submission.participation.user.username task = submission.task.name timestamp = time.mktime(submission.timestamp.timetuple()) # Get source files to the spool directory. ext = languagemanager.get_language(submission.language)\ .source_extension submission_dir = os.path.join( self.upload_dir, username, "%s.%d.%s" % (task, timestamp, ext)) os.mkdir(submission_dir) for filename, file_ in submission.files.items(): self.file_cacher.get_file_to_path( file_.digest, os.path.join(submission_dir, filename.replace(".%l", ext))) last_submission_dir = os.path.join( self.upload_dir, username, "%s.%s" % (task, ext)) try: os.unlink(last_submission_dir) except OSError: pass os.symlink(os.path.basename(submission_dir), last_submission_dir) print("./upload/%s/%s.%d.%s" % (username, task, timestamp, ext), file=queue_file) # Write results file for the submission. active_dataset = submission.task.active_dataset result = submission.get_result(active_dataset) if result.evaluated(): with open(os.path.join(self.spool_dir, "%d.%s.%s.%s.res" % (timestamp, username, task, ext)), "wt", encoding="utf-8") as res_file, \ open(os.path.join(self.spool_dir, "%s.%s.%s.res" % (username, task, ext)), "wt", encoding="utf-8") as res2_file: total = 0.0 for evaluation in result.evaluations: outcome = float(evaluation.outcome) total += outcome line = ( "Executing on file with codename '%s' %s (%.4f)" % (evaluation.testcase.codename, evaluation.text, outcome)) print(line, file=res_file) print(line, file=res2_file) line = "Score: %.6f" % total print(line, file=res_file) print(line, file=res2_file) print("", file=queue_file) def export_ranking(self): """Exports the ranking in csv and txt (human-readable) form. """ logger.info("Exporting ranking.") # Create the structure to store the scores. scores = dict((participation.user.username, 0.0) for participation in self.contest.participations if not participation.hidden) task_scores = dict( (task.id, dict((participation.user.username, 0.0) for participation in self.contest.participations if not participation.hidden)) for task in self.contest.tasks) is_partial = False for task in self.contest.tasks: for participation in self.contest.participations: if participation.hidden: continue score, partial = task_score(participation, task) is_partial = is_partial or partial task_scores[task.id][participation.user.username] = score scores[participation.user.username] += score if is_partial: logger.warning("Some of the scores are not definitive.") sorted_usernames = sorted(scores.keys(), key=lambda username: (scores[username], username), reverse=True) sorted_tasks = sorted(self.contest.tasks, key=lambda task: task.num) with open(os.path.join(self.spool_dir, "ranking.txt"), "wt", encoding="utf-8") as ranking_file, \ open(os.path.join(self.spool_dir, "ranking.csv"), "wt", encoding="utf-8") as ranking_csv: # Write rankings' header. n_tasks = len(sorted_tasks) print("Final Ranking of Contest `%s'" % self.contest.description, file=ranking_file) points_line = " %10s" * n_tasks csv_points_line = ",%s" * n_tasks print(("%20s %10s" % ("User", "Total")) + (points_line % tuple([t.name for t in sorted_tasks])), file=ranking_file) print(("%s,%s" % ("user", "total")) + (csv_points_line % tuple([t.name for t in sorted_tasks])), file=ranking_csv) # Write rankings' content. points_line = " %10.3f" * n_tasks csv_points_line = ",%.6f" * n_tasks for username in sorted_usernames: user_scores = [task_scores[task.id][username] for task in sorted_tasks] print(("%20s %10.3f" % (username, scores[username])) + (points_line % tuple(user_scores)), file=ranking_file) print(("%s,%.6f" % (username, scores[username])) + (csv_points_line % tuple(user_scores)), file=ranking_csv)
class SpoolExporter(object): """This service creates a tree structure "similar" to the one used in Italian IOI repository for storing the results of a contest. """ def __init__(self, contest_id, spool_dir): self.contest_id = contest_id self.spool_dir = spool_dir self.upload_dir = os.path.join(self.spool_dir, "upload") self.contest = None self.submissions = None self.file_cacher = FileCacher() def run(self): """Interface to make the class do its job.""" return self.do_export() def do_export(self): """Run the actual export code. """ logger.operation = "exporting contest %s" % self.contest_id logger.info("Starting export.") logger.info("Creating dir structure.") try: os.mkdir(self.spool_dir) except OSError: logger.critical("The specified directory already exists, " "I won't overwrite it.") return False os.mkdir(self.upload_dir) with SessionGen() as session: self.contest = Contest.get_from_id(self.contest_id, session) self.submissions = sorted( (submission for submission in self.contest.get_submissions() if not submission.user.hidden), key=lambda submission: submission.timestamp) # Creating users' directory. for user in self.contest.users: if not user.hidden: os.mkdir(os.path.join(self.upload_dir, user.username)) try: self.export_submissions() self.export_ranking() except Exception: logger.critical("Generic error.", exc_info=True) return False logger.info("Export finished.") logger.operation = "" return True def export_submissions(self): """Export submissions' source files. """ logger.info("Exporting submissions.") queue_file = codecs.open(os.path.join(self.spool_dir, "queue"), "w", encoding="utf-8") for submission in sorted(self.submissions, key=lambda x: x.timestamp): logger.info("Exporting submission %s." % submission.id) username = submission.user.username task = submission.task.name timestamp = time.mktime(submission.timestamp.timetuple()) # Get source files to the spool directory. submission_dir = os.path.join( self.upload_dir, username, "%s.%d.%s" % (task, timestamp, submission.language)) os.mkdir(submission_dir) for filename, file_ in submission.files.iteritems(): self.file_cacher.get_file_to_path( file_.digest, os.path.join(submission_dir, filename)) last_submission_dir = os.path.join( self.upload_dir, username, "%s.%s" % (task, submission.language)) try: os.unlink(last_submission_dir) except OSError: pass os.symlink(os.path.basename(submission_dir), last_submission_dir) print("./upload/%s/%s.%d.%s" % (username, task, timestamp, submission.language), file=queue_file) # Write results file for the submission. active_dataset = submission.task.active_dataset result = submission.get_result(active_dataset) if result.evaluated(): res_file = codecs.open(os.path.join( self.spool_dir, "%d.%s.%s.%s.res" % (timestamp, username, task, submission.language)), "w", encoding="utf-8") res2_file = codecs.open( os.path.join(self.spool_dir, "%s.%s.%s.res" % (username, task, submission.language)), "w", encoding="utf-8") total = 0.0 for evaluation in result.evaluations: outcome = float(evaluation.outcome) total += outcome line = "Executing on file with codename '%s' %s (%.4f)" % \ (evaluation.testcase.codename, evaluation.text, outcome) print(line, file=res_file) print(line, file=res2_file) line = "Score: %.6f" % total print(line, file=res_file) print(line, file=res2_file) res_file.close() res2_file.close() print(file=queue_file) queue_file.close() def export_ranking(self): """Exports the ranking in csv and txt (human-readable) form. """ logger.info("Exporting ranking.") # Create the structure to store the scores. scores = dict((user.username, 0.0) for user in self.contest.users if not user.hidden) task_scores = dict((task.id, dict((user.username, 0.0) for user in self.contest.users if not user.hidden)) for task in self.contest.tasks) last_scores = dict((task.id, dict((user.username, 0.0) for user in self.contest.users if not user.hidden)) for task in self.contest.tasks) # Make the score type compute the scores. scorers = {} for task in self.contest.tasks: scorers[task.id] = get_score_type(dataset=task.active_dataset) for submission in self.submissions: active_dataset = submission.task.active_dataset result = submission.get_result(active_dataset) scorers[submission.task_id].add_submission( submission.id, submission.timestamp, submission.user.username, result.evaluated(), dict((ev.codename, {"outcome": ev.outcome, "text": ev.text, "time": ev.execution_time, "memory": ev.execution_memory}) for ev in result.evaluations), submission.tokened()) # Put together all the scores. for submission in self.submissions: task_id = submission.task_id username = submission.user.username details = scorers[task_id].pool[submission.id] last_scores[task_id][username] = details["score"] if details["tokened"]: task_scores[task_id][username] = max( task_scores[task_id][username], details["score"]) # Merge tokened and last submissions. for username in scores: for task_id in task_scores: task_scores[task_id][username] = max( task_scores[task_id][username], last_scores[task_id][username]) # print(username, [task_scores[task_id][username] # for task_id in task_scores]) scores[username] = sum(task_scores[task_id][username] for task_id in task_scores) sorted_usernames = sorted(scores.keys(), key=lambda username: (scores[username], username), reverse=True) sorted_tasks = sorted(self.contest.tasks, key=lambda task: task.num) ranking_file = codecs.open( os.path.join(self.spool_dir, "classifica.txt"), "w", encoding="utf-8") ranking_csv = codecs.open( os.path.join(self.spool_dir, "classifica.csv"), "w", encoding="utf-8") # Write rankings' header. n_tasks = len(sorted_tasks) print("Classifica finale del contest `%s'" % self.contest.description, file=ranking_file) points_line = " %10s" * n_tasks csv_points_line = ",%s" * n_tasks print(("%20s %10s" % ("Utente", "Totale")) + (points_line % tuple([t.name for t in sorted_tasks])), file=ranking_file) print(("%s,%s" % ("utente", "totale")) + (csv_points_line % tuple([t.name for t in sorted_tasks])), file=ranking_csv) # Write rankings' content. points_line = " %10.3f" * n_tasks csv_points_line = ",%.6f" * n_tasks for username in sorted_usernames: user_scores = [task_scores[task.id][username] for task in sorted_tasks] print(("%20s %10.3f" % (username, scores[username])) + (points_line % tuple(user_scores)), file=ranking_file) print(("%s,%.6f" % (username, scores[username])) + (csv_points_line % tuple(user_scores)), file=ranking_csv) ranking_file.close() ranking_csv.close()
def test_testcases(base_dir, solution, language, assume=None): global task, file_cacher # Use a FileCacher with a NullBackend in order to avoid to fill # the database with junk if file_cacher is None: file_cacher = FileCacher(null=True) cmscontrib.loaders.italy_yaml.logger = NullLogger() # Load the task # TODO - This implies copying a lot of data to the FileCacher, # which is annoying if you have to do it continuously; it would be # better to use a persistent cache (although local, possibly # filesystem-based instead of database-based) and somehow detect # when the task has already been loaded if task is None: loader = cmscontrib.loaders.italy_yaml.YamlLoader(base_dir, file_cacher) task = loader.get_task(get_statement=False) # Prepare the EvaluationJob dataset = task.active_dataset digest = file_cacher.put_file_from_path( os.path.join(base_dir, solution), "Solution %s for task %s" % (solution, task.name)) executables = {task.name: Executable(filename=task.name, digest=digest)} jobs = [(t, EvaluationJob( language=language, task_type=dataset.task_type, task_type_parameters=json.loads(dataset.task_type_parameters), managers=dict(dataset.managers), executables=executables, input=dataset.testcases[t].input, output=dataset.testcases[t].output, time_limit=dataset.time_limit, memory_limit=dataset.memory_limit)) for t in dataset.testcases] tasktype = get_task_type(dataset=dataset) ask_again = True last_status = "ok" status = "ok" stop = False info = [] points = [] comments = [] tcnames = [] for jobinfo in sorted(jobs): print(jobinfo[0]) sys.stdout.flush() job = jobinfo[1] # Skip the testcase if we decide to consider everything to # timeout if stop: info.append("Time limit exceeded") points.append(0.0) comments.append("Timeout.") move_cursor(directions.UP, erase=True) continue # Evaluate testcase last_status = status tasktype.evaluate(job, file_cacher) status = job.plus.get("exit_status") info.append((job.plus.get("execution_time"), job.plus.get("execution_memory"))) points.append(float(job.outcome)) comments.append(format_status_text(job.text)) tcnames.append(jobinfo[0]) # If we saw two consecutive timeouts, ask wether we want to # consider everything to timeout if ask_again and status == "timeout" and last_status == "timeout": print("Want to stop and consider everything to timeout? [y/N]", end='') if assume is not None: print(assume) tmp = assume else: tmp = raw_input().lower() if tmp in ['y', 'yes']: stop = True else: ask_again = False print() move_cursor(directions.UP, erase=True) # Subtasks scoring try: subtasks = json.loads(dataset.score_type_parameters) subtasks[0] except: subtasks = [[100, len(info)]] if dataset.score_type == 'GroupMin': scoreFun = min else: if dataset.score_type != 'Sum': logger.warning("Score type %s not yet supported! Using Sum" % dataset.score_type) def scoreFun(x): return sum(x) / len(x) pos = 0 sts = [] # For each subtask generate a list of testcase it owns, the score gained # and the highest time and memory usage. for i in subtasks: stscores = [] stsdata = [] worst = [0, 0] try: for _ in xrange(i[1]): stscores.append(points[pos]) stsdata.append((tcnames[pos], points[pos], comments[pos], info[pos])) if info[pos][0] > worst[0]: worst[0] = info[pos][0] if info[pos][1] > worst[1]: worst[1] = info[pos][1] pos += 1 sts.append((scoreFun(stscores) * i[0], i[0], stsdata, worst)) except: sts.append((0, i[0], stsdata, [0, 0])) # Result pretty printing # Strips sol/ and _EVAL from the solution's name solution = solution[4:-5] print() clen = max(len(c) for c in comments) for st, d in enumerate(sts): print( "Subtask %d:" % st, add_color_to_string( "%5.2f/%d" % (d[0], d[1]), colors.RED if abs(d[0] - d[1]) > 0.01 else colors.GREEN, bold=True ) ) for (i, p, c, w) in d[2]: print( "%s)" % i, add_color_to_string( "%5.2lf" % p, colors.RED if abs(p - 1) > 0.01 else colors.BLACK ), "--- %s [Time:" % c.ljust(clen), add_color_to_string( ("%5.3f" % w[0]) if w[0] is not None else "N/A", colors.BLUE if w[0] is not None and w[0] >= 0.95 * d[3][0] else colors.BLACK ), "Memory:", add_color_to_string( "%5s" % mem_human(w[1]) if w[1] is not None else "N/A", colors.BLUE if w[1] is not None and w[1] >= 0.95 * d[3][1] else colors.BLACK, ), end="]" ) move_cursor(directions.RIGHT, 1000) move_cursor(directions.LEFT, len(solution) - 1) print(add_color_to_string(solution, colors.BLACK, bold=True)) print() sols.append((solution, sum([st[0] for st in sts]))) global tested_something if not tested_something: tested_something = True atexit.register(print_at_exit) return zip(points, comments, info)