예제 #1
0
파일: Worker.py 프로젝트: beyondai/cms
    def __init__(self, shard):
        logger.initialize(ServiceCoord("Worker", shard))
        Service.__init__(self, shard, custom_logger=logger)
        self.file_cacher = FileCacher(self)

        self.work_lock = gevent.coros.RLock()
        self._ignore_job = False
예제 #2
0
파일: Worker.py 프로젝트: s546360316/cms
    def __init__(self, shard):
        logger.initialize(ServiceCoord("Worker", shard))
        Service.__init__(self, shard, custom_logger=logger)
        self.file_cacher = FileCacher(self)

        self.work_lock = threading.Lock()
        self.ignore_job = False
예제 #3
0
    def __init__(self, contest_id, spool_dir):
        self.contest_id = contest_id
        self.spool_dir = spool_dir
        self.upload_dir = os.path.join(self.spool_dir, "upload")
        self.contest = None

        self.file_cacher = FileCacher()
예제 #4
0
def extract_complexity(task_id, file_lengther=None):
    """Extract the complexity of all submissions of the task. The
    results are stored in a file task_<id>.info

    task_id (int): the id of the task we are interested in.
    file_lengther (class): a File-like object that tell the dimension
                           of the input (see example above for how to
                           write one).

    return (int): 0 if operation was successful.

    """
    with SessionGen() as session:
        task = Task.get_from_id(task_id, session)
        if task is None:
            return -1

        # Extracting the length of the testcase.
        file_cacher = FileCacher()
        testcases_lengths = [file_length(testcase.input,
                                         file_cacher, file_lengther)
                             for testcase in task.testcases]
        file_cacher.purge_cache()

        # Compute the complexity of the solutions.
        with open("task_%s.info" % task_id, "wt") as info:
            for submission in task.contest.get_submissions():
                if submission.task_id == task_id and \
                       submission.evaluated():
                    print submission.user.username
                    result = extract_complexity_submission(testcases_lengths,
                                                           submission)
                    if result[1] is None:
                        continue
                    info.write("Submission: %s" % submission.id)
                    info.write(" - user: %15s" % submission.user.username)
                    info.write(" - task: %s" % task.name)
                    if result[0] is not None:
                        info.write(" - score: %6.2lf" % result[0])
                    info.write(" - complexity: %20s" %
                               complexity_to_string(result[1]))
                    if result[2] is not None:
                        info.write(" - confidence %5.1lf" % result[2])
                    info.write("\n")

    return 0
    def __init__(self, drop, import_source, only_files, no_files):
        self.drop = drop
        self.only_files = only_files
        self.no_files = no_files
        self.import_source = import_source
        self.import_dir = import_source

        self.file_cacher = FileCacher()
예제 #6
0
파일: Worker.py 프로젝트: Mloc/cms
    def __init__(self, shard):
        logger.initialize(ServiceCoord("Worker", shard))
        Service.__init__(self, shard, custom_logger=logger)
        self.file_cacher = FileCacher(self)

        self.task_type = None
        self.work_lock = threading.Lock()
        self.session = None
def file_length(digest, file_cacher=None, file_lengther=None):
    """Compute the length of the file identified by digest.

    digest (string): the digest of the file.
    file_cacher (FileCacher): the cacher to use, or None.
    file_lengther (class): a File-like object that tell the dimension
                           of the input (see example above for how to
                           write one).

    return (int): the length of the tile.

    """
    if file_cacher is None:
        file_cacher = FileCacher()
    if file_lengther is None:
        file_lengther = FileLengther
    lengther = file_lengther()
    file_cacher.get_file(digest, file_obj=lengther)
    return lengther.tell()
예제 #8
0
    def __init__(self, shard):
        logger.initialize(ServiceCoord("TestFileCacher", shard))
        TestService.__init__(self, shard, custom_logger=logger)

        # Assume we store the cache in "./cache/fs-cache-TestFileCacher-0/"
        self.cache_base_path = os.path.join(config.cache_dir, "fs-cache-TestFileCacher-0")
        self.cache_path = None
        self.content = None
        self.fake_content = None
        self.digest = None
        self.file_obj = None
        self.file_cacher = FileCacher(self)
예제 #9
0
    def __init__(self, drop, import_source,
                 only_files, no_files, no_submissions):
        self.drop = drop
        self.only_files = only_files
        self.no_files = no_files
        self.no_submissions = no_submissions
        self.import_source = import_source
        self.import_dir = import_source

        self.file_cacher = FileCacher()

        configure_mappers()
예제 #10
0
    def __init__(self, drop, import_source,
                 load_files, load_model, light,
                 skip_submissions, skip_user_tests):
        self.drop = drop
        self.load_files = load_files
        self.load_model = load_model
        self.light = light
        self.skip_submissions = skip_submissions
        self.skip_user_tests = skip_user_tests

        self.import_source = import_source
        self.import_dir = import_source

        self.file_cacher = FileCacher()
예제 #11
0
    def __init__(self, contest_id, export_target, skip_submissions, skip_user_tests, light):
        self.contest_id = contest_id
        self.skip_submissions = skip_submissions
        self.skip_user_tests = skip_user_tests
        self.light = light

        # If target is not provided, we use the contest's name.
        if export_target == "":
            with SessionGen(commit=False) as session:
                contest = Contest.get_from_id(self.contest_id, session)
                self.export_target = "dump_%s.tar.gz" % contest.name
        else:
            self.export_target = export_target

        self.file_cacher = FileCacher()
예제 #12
0
파일: ContestExporter.py 프로젝트: Mloc/cms
    def __init__(self, contest_id, export_target,
                 dump_files, dump_model, light,
                 skip_submissions, skip_user_tests):
        self.contest_id = contest_id
        self.dump_files = dump_files
        self.dump_model = dump_model
        self.light = light
        self.skip_submissions = skip_submissions
        self.skip_user_tests = skip_user_tests

        # If target is not provided, we use the contest's name.
        if export_target == "":
            with SessionGen(commit=False) as session:
                contest = Contest.get_from_id(self.contest_id, session)
                self.export_target = "dump_%s.tar.gz" % contest.name
                logger.warning("export_target not given, using \"%s\""
                               % self.export_target)
        else:
            self.export_target = export_target

        self.file_cacher = FileCacher()
예제 #13
0
def run(contest_id):
    session = Session()
    contest = Contest.get_from_id(contest_id, session)

    task_by_team = set()
    task_by_lang = set()

    task_dir = os.path.join(os.path.dirname(__file__), "tasks")

    for t in os.listdir(task_dir):
        if t.endswith('.json'):
            task = t[:-5]
            task_path = os.path.join(task_dir, t)
            with open(task_path) as task_file:
                data = json.load(task_file)
                if "teams" in data:
                    for team, v in data["teams"].iteritems():
                        for lang in v:
                            task_by_team.add((task, lang, team))
                if "langs" in data:
                    for lang, v in data["langs"].iteritems():
                        for team in v:
                            task_by_lang.add((task, lang, team))

    if task_by_team != task_by_lang:
        print "ERROR: data in 'tasks' is not self-consistent"
        print repr(task_by_team - task_by_lang)
        print repr(task_by_lang - task_by_team)
        return

    team_by_task = set()
    team_by_lang = set()

    team_dir = os.path.join(os.path.dirname(__file__), "teams")

    for t in os.listdir(team_dir):
        if t.endswith('.json'):
            team = t[:-5]
            team_path = os.path.join(team_dir, t)
            with open(team_path) as team_file:
                data = json.load(team_file)
                if "tasks" in data:
                    for task, v in data["tasks"].iteritems():
                        for lang in v:
                            team_by_task.add((task, lang, team))
                if "langs" in data:
                    for lang, v in data["langs"].iteritems():
                        for task in v:
                            team_by_lang.add((task, lang, team))

    if team_by_task != team_by_lang:
        print "ERROR: data in 'teams' is not self-consistent"
        print repr(team_by_task - team_by_lang)
        print repr(team_by_lang - team_by_task)
        return

    if task_by_team != team_by_task:
        print "ERROR: data in 'tasks' and 'teams' is different"
        print repr(task_by_team - team_by_task)
        print repr(team_by_task - task_by_team)
        return

    data_by_lang = set()
    data_by_team = set()

    data_dir = os.path.join(os.path.dirname(__file__), "data")

    for task in os.listdir(data_dir):
        if os.path.isdir(os.path.join(data_dir, task)):
            for f in os.listdir(os.path.join(data_dir, task, "by_lang")):
                # f == "lang (team).pdf"
                lang, team = re.findall("^([A-Za-z0-9_]+) \(([A-Za-z0-9_]+)\)\.pdf$", f)[0]
                data_by_lang.add((task, lang, team))
            for f in os.listdir(os.path.join(data_dir, task, "by_team")):
                # f == "team (lang).pdf"
                team, lang = re.findall("^([A-Za-z0-9_]+) \(([A-Za-z0-9_]+)\)\.pdf$", f)[0]
                data_by_team.add((task, lang, team))

    if data_by_lang != data_by_team:
        print "ERROR: PDF files in 'data' are not complete"
        print repr(data_by_lang - data_by_team)
        print repr(data_by_team - data_by_lang)
        return

    if task_by_team != data_by_lang:
        print "ERROR: PDF files in 'data' do not match JSON data"
        print repr(task_by_team - data_by_lang)
        print repr(data_by_lang - task_by_team)
        return

    print "Hooray! Data is consistent!"


    # Pick one at random: they're all equal.
    translations = task_by_team

    # Determine language codes used in CMS.
    codes = dict()

    # Read JSON files in 'tasks' again as it provides data already
    # grouped as we need it, and not simply as a list of tuples.
    for t in os.listdir(task_dir):
        if t.endswith('.json'):
            task = t[:-5]
            task_path = os.path.join(task_dir, t)
            with open(task_path) as task_file:
                data = json.load(task_file)
                if "langs" in data:
                    for lang, v in data["langs"].iteritems():
                        if len(v) == 0:
                            pass
                        elif len(v) == 1 and v[0] != official_team:
                            for team in v:
                                codes[(task, lang, team)] = "%s" % lang
                        else:
                            for team in v:
                                codes[(task, lang, team)] = "%s_%s" % (lang, ioi_to_iso2[team])

    # Store the files as Statement objects.
    file_cacher = FileCacher()

    for task, lang, team in translations:
        if team == official_team:
            assert lang == "en"
            digest = file_cacher.put_file(
                path=os.path.join(data_dir, task, "by_lang", "%s (%s).pdf" % (lang, team)),
                description="Statement for task %s" % task)
        else:
            digest = file_cacher.put_file(
                path=os.path.join(data_dir, task, "by_lang", "%s (%s).pdf" % (lang, team)),
                description="Statement for task %s, translated into %s (%s) by %s (%s)" %
                            (task, langs[lang], lang, teams[team], team))

        s = Statement(codes[(task, lang, team)], digest, task=contest.get_task(task))

        session.add(s)

    session.commit()


    primary = dict()

    # Retrieve the statements selected by each team.
    for t in os.listdir(team_dir):
        if t.endswith('.json'):
            team = t[:-5]
            team_path = os.path.join(team_dir, t)
            with open(team_path) as team_file:
                data = json.load(team_file)

                for team2, lang, task in data.get("selected", []):
                    # A team could have selected a statement that later got removed.
                    if (task, lang, team2) in codes:
                        primary.setdefault(team, {}).setdefault(task, []).append(codes[(task, lang, team2)])

    # Add the ones they uploaded themselves.
    for task, lang, team in translations:
        # Don't worry about duplicates, CWS filters them out.
        primary.setdefault(team, {}).setdefault(task, []).append(codes[(task, lang, team)])

    # Set the primary statements for tasks (i.e. the ones of the official team)
    for task, primary2 in primary.get(official_team, {}).iteritems():
        contest.get_task(task).primary_statements = json.dumps(primary2)

    # Set the primary statements for teams
    for team, primary2 in primary.iteritems():
        session.execute("UPDATE users SET primary_statements = '%s' WHERE username LIKE '%s%%';" % (json.dumps(primary2), team))

    session.commit()

    print "Statements stored in the DB!"
예제 #14
0
파일: Worker.py 프로젝트: invinciblejha/cms
class Worker(Service):
    """This service implement the possibility to compile and evaluate
    submissions in a sandbox. The instructions to follow for the
    operations are in the TaskType classes, while the sandbox is in
    the Sandbox module.

    """

    JOB_TYPE_COMPILATION = "compile"
    JOB_TYPE_EVALUATION = "evaluate"

    def __init__(self, shard):
        logger.initialize(ServiceCoord("Worker", shard))
        Service.__init__(self, shard, custom_logger=logger)
        self.file_cacher = FileCacher(self)

        self.task_type = None
        self.work_lock = threading.Lock()
        self.session = None

    def get_submission_data(self, submission_id):
        """Given the id, returns the submission object and a new task
        type object of the correct type.

        submission_id (int): id of the submission.

        return (Submission, TaskType): corresponding objects.

        raise: JobException if id or task type not found.

        """
        submission = Submission.get_from_id(submission_id, self.session)
        if submission is None:
            err_msg = "Couldn't find submission %s " \
                      "in the database." % submission_id
            logger.critical(err_msg)
            raise JobException(err_msg)

        try:
            task_type = get_task_type(submission, self.file_cacher)
        except KeyError as error:
            err_msg = "Task type `%s' not known for " \
                "submission %s (error: %s)." % (
                submission.task.task_type, submission_id, error)
            logger.error(err_msg)
            raise JobException(err_msg)

        return (submission, task_type)

    @rpc_method
    def ignore_job(self):
        """RPC that inform the worker that its result for the current
        action will be discarded. The worker will try to return as
        soon as possible even if this means that the result are
        inconsistent.

        """
        # We inform the task_type to quit as soon as possible.
        logger.info("Trying to interrupt job as requested.")
        try:
            self.task_type.ignore_job = True
        except AttributeError:
            pass  # Job concluded right under our nose, that's ok too.

    @rpc_method
    @rpc_threaded
    def compile(self, submission_id):
        """RPC to ask the worker to compile the submission.

        submission_id (int): the id of the submission to compile.

        """
        return self.action(submission_id, Worker.JOB_TYPE_COMPILATION)

    @rpc_method
    @rpc_threaded
    def evaluate(self, submission_id):
        """RPC to ask the worker to evaluate the submission.

        submission_id (int): the id of the submission to evaluate.

        """
        return self.action(submission_id, Worker.JOB_TYPE_EVALUATION)

    # FIXME - rpc_threaded is disable because it makes the call fail:
    # we should investigate on this
    @rpc_method
    @rpc_threaded
    def precache_files(self, contest_id):
        """RPC to ask the worker to precache of files in the contest.

        contest_id (int): the id of the contest

        """
        # Lock is not needed if the admins correctly placed cache and
        # temp directories in the same filesystem. This is what
        # usually happens since they are children of the same,
        # cms-created, directory.
        logger.info("Precaching files for contest %d." % contest_id)
        with SessionGen(commit=False) as session:
            contest = Contest.get_from_id(contest_id, session)
            for digest in contest.enumerate_files(skip_submissions=True):
                self.file_cacher.get_file(digest)
        logger.info("Precaching finished.")

    def action(self, submission_id, job_type):
        """The actual work - that can be compilation or evaluation
        (the code is pretty much the same, the differencies are in
        what we ask TaskType to do).

        submission_id (string): the submission to which act on.
        job_type (string): a constant JOB_TYPE_*.

        """
        if self.work_lock.acquire(False):

            try:
                logger.operation = "%s of submission %s" % (job_type,
                                                            submission_id)
                logger.info("Request received: %s of submission %s." %
                            (job_type, submission_id))

                with SessionGen(commit=False) as self.session:

                    # Retrieve submission and task_type.
                    unused_submission, self.task_type = \
                        self.get_submission_data(submission_id)

                    # Store in the task type the shard number.
                    self.task_type.worker_shard = self.shard

                    # Do the actual work.
                    if job_type == Worker.JOB_TYPE_COMPILATION:
                        task_type_action = self.task_type.compile
                    elif job_type == Worker.JOB_TYPE_EVALUATION:
                        task_type_action = self.task_type.evaluate
                    else:
                        raise KeyError("Unexpected job type %s." % job_type)

                    logger.info("Request finished.")
                    return task_type_action()

            except:
                err_msg = "Worker failed on operation `%s'" % logger.operation
                logger.error("%s\n%s" % (err_msg, traceback.format_exc()))
                raise JobException(err_msg)

            finally:
                self.task_type = None
                self.session = None
                logger.operation = ""
                self.work_lock.release()

        else:
            logger.warning("Request of %s of submission %s received, "
                           "but declined because of acquired lock" %
                           (job_type, submission_id))
            return False
class ContestExporter:
    """This service exports every data about the contest that CMS
    knows. The process of exporting and importing again should be
    idempotent.

    """
    def __init__(self, contest_id, export_target,
                 skip_submissions, skip_user_tests, light):
        self.contest_id = contest_id
        self.skip_submissions = skip_submissions
        self.skip_user_tests = skip_user_tests
        self.light = light

        # If target is not provided, we use the contest's name.
        if export_target == "":
            with SessionGen(commit=False) as session:
                contest = Contest.get_from_id(self.contest_id, session)
                self.export_target = "dump_%s.tar.gz" % contest.name
        else:
            self.export_target = export_target

        self.file_cacher = FileCacher()

    def run(self):
        """Interface to make the class do its job."""
        return self.do_export()

    def do_export(self):
        """Run the actual export code.

        """
        logger.operation = "exporting contest %d" % self.contest_id
        logger.info("Starting export.")

        export_dir = self.export_target
        archive_info = get_archive_info(self.export_target)

        if archive_info["write_mode"] != "":
            # We are able to write to this archive.
            if os.path.exists(self.export_target):
                logger.error("The specified file already exists, "
                             "I won't overwrite it.")
                return False
            export_dir = os.path.join(tempfile.mkdtemp(),
                                      archive_info["basename"])

        logger.info("Creating dir structure.")
        try:
            os.mkdir(export_dir)
        except OSError:
            logger.error("The specified directory already exists, "
                         "I won't overwrite it.")
            return False

        files_dir = os.path.join(export_dir, "files")
        descr_dir = os.path.join(export_dir, "descriptions")
        os.mkdir(files_dir)
        os.mkdir(descr_dir)

        with SessionGen(commit=False) as session:

            contest = Contest.get_from_id(self.contest_id, session)

            # Export files.
            logger.info("Exporting files.")
            files = contest.enumerate_files(self.skip_submissions,
                                            self.skip_user_tests,
                                            light=self.light)
            for _file in files:
                if not self.safe_get_file(_file,
                                          os.path.join(files_dir, _file),
                                          os.path.join(descr_dir, _file)):
                    return False

            # Export the contest in JSON format.
            logger.info("Exporting the contest in JSON format.")
            with open(os.path.join(export_dir, "contest.json"), 'w') as fout:
                json.dump(contest.export_to_dict(
                        self.skip_submissions,
                        self.skip_user_tests),
                          fout, indent=4)

        # If the admin requested export to file, we do that.
        if archive_info["write_mode"] != "":
            archive = tarfile.open(self.export_target,
                                   archive_info["write_mode"])
            archive.add(export_dir, arcname=archive_info["basename"])
            archive.close()
            shutil.rmtree(export_dir)

        logger.info("Export finished.")
        logger.operation = ""

        return True

    def safe_get_file(self, digest, path, descr_path=None):
        """Get file from FileCacher ensuring that the digest is
        correct.

        digest (string): the digest of the file to retrieve.
        path (string): the path where to save the file.
        descr_path (string): the path where to save the description.

        return (bool): True if all ok, False if something wrong.

        """
        # First get the file
        try:
            self.file_cacher.get_file(digest, path=path)
        except Exception as error:
            logger.error("File %s could not retrieved from file server (%r)." %
                         (digest, error))
            return False

        # Then check the digest
        calc_digest = sha1sum(path)
        if digest != calc_digest:
            logger.error("File %s has wrong hash %s." % (digest, calc_digest))
            return False

        # If applicable, retrieve also the description
        if descr_path is not None:
            with codecs.open(descr_path, 'w', encoding='utf-8') as fout:
                fout.write(self.file_cacher.describe(digest))

        return True
예제 #16
0
class ContestImporter:

    """This service imports a contest from a directory that has been
    the target of a ContestExport. The process of exporting and
    importing again should be idempotent.

    """

    def __init__(self, drop, import_source,
                 load_files, load_model, light,
                 skip_submissions, skip_user_tests):
        self.drop = drop
        self.load_files = load_files
        self.load_model = load_model
        self.light = light
        self.skip_submissions = skip_submissions
        self.skip_user_tests = skip_user_tests

        self.import_source = import_source
        self.import_dir = import_source

        self.file_cacher = FileCacher()

    def do_import(self):
        """Run the actual import code."""
        logger.operation = "importing contest from %s" % self.import_source
        logger.info("Starting import.")

        if not os.path.isdir(self.import_source):
            if self.import_source.endswith(".zip"):
                archive = zipfile.ZipFile(self.import_source, "r")
                file_names = archive.infolist()

                self.import_dir = tempfile.mkdtemp()
                archive.extractall(self.import_dir)
            elif self.import_source.endswith(".tar.gz") \
                     or self.import_source.endswith(".tgz") \
                     or self.import_source.endswith(".tar.bz2") \
                     or self.import_source.endswith(".tbz2") \
                     or self.import_source.endswith(".tar"):
                archive = tarfile.open(name=self.import_source)
                file_names = archive.getnames()
            elif self.import_source.endswith(".tar.xz") \
                    or self.import_source.endswith(".txz"):
                try:
                    import lzma
                except ImportError:
                    logger.critical("LZMA compression format not "
                                    "supported. Please install package "
                                    "lzma.")
                    return False
                archive = tarfile.open(
                    fileobj=lzma.LZMAFile(self.import_source))
                file_names = archive.getnames()
            else:
                logger.critical("Unable to import from %s." %
                                self.import_source)
                return False

            root = find_root_of_archive(file_names)
            if root is None:
                logger.critical("Cannot find a root directory in %s." %
                                self.import_source)
                return False

            self.import_dir = tempfile.mkdtemp()
            archive.extractall(self.import_dir)
            self.import_dir = os.path.join(self.import_dir, root)

        if self.drop:
            logger.info("Dropping and recreating the database.")
            try:
                metadata.drop_all()
            except sqlalchemy.exc.OperationalError as error:
                logger.critical("Unable to access DB.\n%r" % error)
                return False
        try:
            metadata.create_all()
        except sqlalchemy.exc.OperationalError as error:
            logger.critical("Unable to access DB.\n%r" % error)
            return False

        with SessionGen(commit=False) as session:

            # Import the contest in JSON format.
            if self.load_model:
                logger.info("Importing the contest from a JSON file.")

                with io.open(os.path.join(self.import_dir,
                                          "contest.json"), "rb") as fin:
                    # TODO - Throughout all the code we'll assume the
                    # input is correct without actually doing any
                    # validations.  Thus, for example, we're not
                    # checking that the decoded object is a dict...
                    self.datas = json.load(fin, encoding="utf-8")

                # If the dump has been exported using a data model
                # different than the current one (that is, a previous
                # one) we try to update it.
                # If no "_version" field is found we assume it's a v1.0
                # export (before the new dump format was introduced).
                dump_version = self.datas.get("_version", 0)

                if dump_version < model_version:
                    logger.warning(
                        "The dump you're trying to import has been created "
                        "by an old version of CMS. It may take a while to "
                        "adapt it to the current data model. You can use "
                        "cmsDumpUpdater to update the on-disk dump and "
                        "speed up future imports.")

                if dump_version > model_version:
                    logger.critical(
                        "The dump you're trying to import has been created "
                        "by a version of CMS newer than this one and there "
                        "is no way to adapt it to the current data model. "
                        "You probably need to update CMS to handle it. It's "
                        "impossible to proceed with the importation.")
                    return False

                for version in range(dump_version, model_version):
                    # Update from version to version+1
                    updater = __import__(
                        "cmscontrib.updaters.update_%d" % (version + 1),
                        globals(), locals(), ["Updater"]).Updater(self.datas)
                    self.datas = updater.run()
                    self.datas["_version"] = version + 1

                assert self.datas["_version"] == model_version

                self.objs = dict()
                for id_, data in self.datas.iteritems():
                    if not id_.startswith("_"):
                        self.objs[id_] = self.import_object(data)
                for id_, data in self.datas.iteritems():
                    if not id_.startswith("_"):
                        self.add_relationships(data, self.objs[id_])

                for k, v in list(self.objs.iteritems()):

                    # Skip submissions if requested
                    if self.skip_submissions and isinstance(v, Submission):
                        del self.objs[k]

                    # Skip user_tests if requested
                    if self.skip_user_tests and isinstance(v, UserTest):
                        del self.objs[k]

                contest_id = list()
                contest_files = set()

                # Add each base object and all its dependencies
                for id_ in self.datas["_objects"]:
                    contest = self.objs[id_]

                    # We explictly add only the contest since all child
                    # objects will be automatically added by cascade.
                    # Adding each object individually would also add
                    # orphaned objects like the ones that depended on
                    # submissions or user_tests that we (possibly)
                    # removed above.
                    session.add(contest)
                    session.flush()

                    contest_id += [contest.id]
                    contest_files |= contest.enumerate_files(
                        self.skip_submissions, self.skip_user_tests, self.light)

                session.commit()
            else:
                contest_id = None
                contest_files = None

            # Import files.
            if self.load_files:
                logger.info("Importing files.")

                files_dir = os.path.join(self.import_dir, "files")
                descr_dir = os.path.join(self.import_dir, "descriptions")

                files = set(os.listdir(files_dir))
                descr = set(os.listdir(descr_dir))

                if not descr <= files:
                    logger.warning("Some files do not have an associated "
                                   "description.")
                if not files <= descr:
                    logger.warning("Some descriptions do not have an "
                                   "associated file.")

                if not (contest_files is None or files <= contest_files):
                    # FIXME Check if it's because this is a light import
                    # or because we're skipping submissions or user_tests
                    logger.warning("The dump contains some files that are "
                                   "not needed by the contest.")
                if not (contest_files is None or contest_files <= files):
                    # The reason for this could be that it was a light
                    # export that's not being reimported as such.
                    logger.warning("The contest needs some files that are "
                                   "not contained in the dump.")

                # Limit import to files we actually need.
                if contest_files is not None:
                    files &= contest_files

                for digest in files:
                    file_ = os.path.join(files_dir, digest)
                    desc = os.path.join(descr_dir, digest)
                    if not self.safe_put_file(file_, desc):
                        logger.critical("Unable to put file `%s' in the database. "
                                        "Aborting. Please remove the contest "
                                        "from the database." % file_)
                        # TODO: remove contest from the database.
                        return False


        if contest_id is not None:
            logger.info("Import finished (contest id: %s)." %
                        ", ".join(str(id_) for id_ in contest_id))
        else:
            logger.info("Import finished.")
        logger.operation = ""

        # If we extracted an archive, we remove it.
        if self.import_dir != self.import_source:
            rmtree(self.import_dir)

        return True

    def import_object(self, data):

        """Import objects from the given data (without relationships).

        The given data is assumed to be a dict in the format produced by
        ContestExporter. This method reads the "_class" item and tries
        to find the corresponding class. Then it loads all column
        properties of that class (those that are present in the data)
        and uses them as keyword arguments in a call to the class
        constructor (if a required property is missing this call will
        raise an error).

        Relationships are not handled by this method, since we may not
        have all referenced objects available yet. Thus we prefer to add
        relationships in a later moment, using the add_relationships
        method.

        Note that both this method and add_relationships don't check if
        the given data has more items than the ones we understand and
        use.

        """

        cls = getattr(class_hook, data["_class"])

        args = dict()

        for prp in cls._col_props:
            if prp.key not in data:
                # We will let the __init__ of the class check if any
                # argument is missing, so it's safe to just skip here.
                continue

            col = prp.columns[0]
            col_type = type(col.type)

            val = data[prp.key]
            if col_type in [Boolean, Integer, Float, String]:
                args[prp.key] = val
            elif col_type is DateTime:
                args[prp.key] = make_datetime(val) if val is not None else None
            elif col_type is Interval:
                args[prp.key] = timedelta(seconds=val) if val is not None else None
            else:
                raise RuntimeError("Unknown SQLAlchemy column type: %s" % col_type)

        return cls(**args)

    def add_relationships(self, data, obj):

        """Add the relationships to the given object, using the given data.

        Do what we didn't in import_objects: importing relationships.
        We already now the class of the object so we simply iterate over
        its relationship properties trying to load them from the data (if
        present), checking wheter they are IDs or collection of IDs,
        dereferencing them (i.e. getting the corresponding object) and
        reflecting all on the given object.

        Note that both this method and import_object don't check if the
        given data has more items than the ones we understand and use.

        """

        cls = type(obj)

        for prp in cls._rel_props:
            if prp.key not in data:
                # Relationships are always optional
                continue

            val = data[prp.key]
            if val is None:
                setattr(obj, prp.key, None)
            elif type(val) == str:
                setattr(obj, prp.key, self.objs[val])
            elif type(val) == list:
                setattr(obj, prp.key, list(self.objs[i] for i in val))
            elif type(val) == dict:
                setattr(obj, prp.key, dict((k, self.objs[v]) for k, v in val.iteritems()))
            else:
                raise RuntimeError("Unknown RelationshipProperty value: %s" % type(val))

    def safe_put_file(self, path, descr_path):

        """Put a file to FileCacher signaling every error (including
        digest mismatch).

        path (string): the path from which to load the file.
        descr_path (string): same for description.

        return (bool): True if all ok, False if something wrong.

        """

        # TODO - Probably this method could be merged in FileCacher

        # First read the description.
        try:
            with io.open(descr_path, 'rt', encoding='utf-8') as fin:
                description = fin.read()
        except IOError:
            description = ''

        # Put the file.
        try:
            digest = self.file_cacher.put_file(path=path,
                                               description=description)
        except Exception as error:
            logger.critical("File %s could not be put to file server (%r), "
                            "aborting." % (path, error))
            return False

        # Then check the digest.
        calc_digest = sha1sum(path)
        if digest != calc_digest:
            logger.critical("File %s has hash %s, but the server returned %s, "
                            "aborting." % (path, calc_digest, digest))
            return False

        return True
예제 #17
0
class ContestImporter:
    """This service imports a contest from a directory that has been
    the target of a ContestExport. The process of exporting and
    importing again should be idempotent.

    """
    def __init__(self, drop, import_source,
                 only_files, no_files, no_submissions):
        self.drop = drop
        self.only_files = only_files
        self.no_files = no_files
        self.no_submissions = no_submissions
        self.import_source = import_source
        self.import_dir = import_source

        self.file_cacher = FileCacher()

    def run(self):
        """Interface to make the class do its job."""
        return self.do_import()

    def do_import(self):
        """Run the actual import code.

        """
        logger.operation = "importing contest from %s" % self.import_source
        logger.info("Starting import.")

        if not os.path.isdir(self.import_source):
            if self.import_source.endswith(".zip"):
                archive = zipfile.ZipFile(self.import_source, "r")
                file_names = archive.infolist()

                self.import_dir = tempfile.mkdtemp()
                archive.extractall(self.import_dir)
            elif self.import_source.endswith(".tar.gz") \
                     or self.import_source.endswith(".tgz") \
                     or self.import_source.endswith(".tar.bz2") \
                     or self.import_source.endswith(".tbz2") \
                     or self.import_source.endswith(".tar"):
                archive = tarfile.open(name=self.import_source)
                file_names = archive.getnames()
            else:
                logger.critical("Unable to import from %s." %
                                self.import_source)
                return False

            root = find_root_of_archive(file_names)
            if root is None:
                logger.critical("Cannot find a root directory in %s." %
                                self.import_source)
                return False

            self.import_dir = tempfile.mkdtemp()
            archive.extractall(self.import_dir)
            self.import_dir = os.path.join(self.import_dir, root)

        if self.drop:
            logger.info("Dropping and recreating the database.")
            try:
                metadata.drop_all()
            except sqlalchemy.exc.OperationalError as error:
                logger.critical("Unable to access DB.\n%r" % error)
                return False
        try:
            metadata.create_all()
        except sqlalchemy.exc.OperationalError as error:
            logger.critical("Unable to access DB.\n%r" % error)
            return False

        logger.info("Reading JSON file...")
        with open(os.path.join(self.import_dir, "contest.json")) as fin:
            contest_json = json.load(fin)
        if self.no_submissions:
            for user in contest_json["users"]:
                user["submissions"] = []
                user["user_tests"] = []

        if not self.only_files:
            with SessionGen(commit=False) as session:

                # Import the contest in JSON format.
                logger.info("Importing the contest from JSON file.")
                contest = Contest.import_from_dict(contest_json)
                session.add(contest)

                session.flush()
                contest_id = contest.id
                contest_files = contest.enumerate_files()
                session.commit()

        if not self.no_files:
            logger.info("Importing files.")
            files_dir = os.path.join(self.import_dir, "files")
            descr_dir = os.path.join(self.import_dir, "descriptions")
            for digest in contest_files:
                file_ = os.path.join(files_dir, digest)
                desc = os.path.join(descr_dir, digest)
                print open(desc).read()
                if not os.path.exists(file_) or not os.path.exists(desc):
                    logger.error("Some files needed to the contest "
                                 "are missing in the import directory. "
                                 "The import will continue. Be aware.")
                if not self.safe_put_file(file_, desc):
                    logger.critical("Unable to put file `%s' in the database. "
                                    "Aborting. Please remove the contest "
                                    "from the database." % file_)
                    # TODO: remove contest from the database.
                    return False

        logger.info("Import finished (contest id: %s)." % contest_id)
        logger.operation = ""

        # If we extracted an archive, we remove it.
        if self.import_dir != self.import_source:
            shutil.rmtree(self.import_dir)

        return True

    def safe_put_file(self, path, descr_path):
        """Put a file to FileCacher signaling every error (including
        digest mismatch).

        path (string): the path from which to load the file.
        descr_path (string): same for description.

        return (bool): True if all ok, False if something wrong.

        """
        # First read the description.
        try:
            with open(descr_path) as fin:
                description = fin.read()
        except IOError:
            description = ''

        # Put the file.
        try:
            digest = self.file_cacher.put_file(path=path,
                                               description=description)
        except Exception as error:
            logger.critical("File %s could not be put to file server (%r), "
                            "aborting." % (path, error))
            return False

        # Then check the digest.
        calc_digest = sha1sum(path)
        if digest != calc_digest:
            logger.critical("File %s has hash %s, but the server returned %s, "
                            "aborting." % (path, calc_digest, digest))
            return False

        return True
예제 #18
0
파일: SpoolExporter.py 프로젝트: schlos/cms
class SpoolExporter:
    """This service creates a tree structure "similar" to the one used
    in Italian IOI repository for storing the results of a contest.

    """

    def __init__(self, contest_id, spool_dir):
        self.contest_id = contest_id
        self.spool_dir = spool_dir
        self.upload_dir = os.path.join(self.spool_dir, "upload")
        self.contest = None
        self.submissions = None

        self.file_cacher = FileCacher()

    def run(self):
        """Interface to make the class do its job."""
        return self.do_export()

    def do_export(self):
        """Run the actual export code.

        """
        logger.operation = "exporting contest %s" % self.contest_id
        logger.info("Starting export.")

        logger.info("Creating dir structure.")
        try:
            os.mkdir(self.spool_dir)
        except OSError:
            logger.critical("The specified directory already exists, " "I won't overwrite it.")
            return False
        os.mkdir(self.upload_dir)

        with SessionGen(commit=False) as session:
            self.contest = Contest.get_from_id(self.contest_id, session)
            self.submissions = sorted(
                (submission for submission in self.contest.get_submissions() if not submission.user.hidden),
                key=lambda submission: submission.timestamp,
            )

            # Creating users' directory.
            for user in self.contest.users:
                if not user.hidden:
                    os.mkdir(os.path.join(self.upload_dir, user.username))

            try:
                self.export_submissions()
                self.export_ranking()
            except Exception as error:
                logger.critical("Generic error. %r" % error)
                return False

        logger.info("Export finished.")
        logger.operation = ""

        return True

    def export_submissions(self):
        """Export submissions' source files.

        """
        logger.info("Exporting submissions.")

        queue_file = codecs.open(os.path.join(self.spool_dir, "queue"), "w", encoding="utf-8")
        for submission in self.submissions:
            logger.info("Exporting submission %s." % submission.id)
            username = submission.user.username
            task = submission.task.name
            timestamp = submission.timestamp

            # Get source files to the spool directory.
            file_digest = submission.files["%s.%s" % (task, "%l")].digest
            upload_filename = os.path.join(
                self.upload_dir, username, "%s.%d.%s" % (task, timestamp, submission.language)
            )
            self.file_cacher.get_file(file_digest, path=upload_filename)
            upload_filename = os.path.join(self.upload_dir, username, "%s.%s" % (task, submission.language))
            self.file_cacher.get_file(file_digest, path=upload_filename)
            print >> queue_file, "./upload/%s/%s.%d.%s" % (username, task, timestamp, submission.language)

            # Write results file for the submission.
            if submission.evaluated():
                res_file = codecs.open(
                    os.path.join(self.spool_dir, "%d.%s.%s.%s.res" % (timestamp, username, task, submission.language)),
                    "w",
                    encoding="utf-8",
                )
                res2_file = codecs.open(
                    os.path.join(self.spool_dir, "%s.%s.%s.res" % (username, task, submission.language)),
                    "w",
                    encoding="utf-8",
                )
                total = 0.0
                for num, evaluation in enumerate(submission.evaluations):
                    outcome = float(evaluation.outcome)
                    total += outcome
                    line = "Executing on file n. %2d %s (%.4f)" % (num, evaluation.text, outcome)
                    print >> res_file, line
                    print >> res2_file, line
                line = "Score: %.6f" % total
                print >> res_file, line
                print >> res2_file, line
                res_file.close()
                res2_file.close()

        print >> queue_file
        queue_file.close()

    def export_ranking(self):
        """Exports the ranking in csv and txt (human-readable) form.

        """
        logger.info("Exporting ranking.")

        # Create the structure to store the scores.
        scores = dict((user.username, 0.0) for user in self.contest.users if not user.hidden)
        task_scores = dict(
            (task.id, dict((user.username, 0.0) for user in self.contest.users if not user.hidden))
            for task in self.contest.tasks
        )
        last_scores = dict(
            (task.id, dict((user.username, 0.0) for user in self.contest.users if not user.hidden))
            for task in self.contest.tasks
        )

        # Make the score type compute the scores.
        scorers = {}
        for task in self.contest.tasks:
            scorers[task.id] = get_score_type(dataset=task.active_dataset)

        for submission in self.submissions:
            scorers[submission.task_id].add_submission(
                submission.id,
                submission.timestamp,
                submission.user.username,
                dict((ev.num, float(ev.outcome)) for ev in submission.evaluations),
                submission.tokened(),
            )

        # Put together all the scores.
        for submission in self.submissions:
            task_id = submission.task_id
            username = submission.user.username
            details = scorers[task_id].pool[submission.id]
            last_scores[task_id][username] = details["score"]
            if details["tokened"]:
                task_scores[task_id][username] = max(task_scores[task_id][username], details["score"])

        # Merge tokened and last submissions.
        for username in scores:
            for task_id in task_scores:
                task_scores[task_id][username] = max(task_scores[task_id][username], last_scores[task_id][username])
            print username, [task_scores[task_id][username] for task_id in task_scores]
            scores[username] = sum(task_scores[task_id][username] for task_id in task_scores)

        sorted_usernames = sorted(scores.keys(), key=lambda username: (scores[username], username), reverse=True)
        sorted_tasks = sorted(self.contest.tasks, key=lambda task: task.num)

        ranking_file = codecs.open(os.path.join(self.spool_dir, "classifica.txt"), "w", encoding="utf-8")
        ranking_csv = codecs.open(os.path.join(self.spool_dir, "classifica.csv"), "w", encoding="utf-8")

        # Write rankings' header.
        n_tasks = len(sorted_tasks)
        print >> ranking_file, "Classifica finale del contest `%s'" % self.contest.description
        points_line = " %10s" * n_tasks
        csv_points_line = ",%s" * n_tasks
        print >> ranking_file, ("%20s %10s" % ("Utente", "Totale")) + (
            points_line % tuple([t.name for t in sorted_tasks])
        )
        print >> ranking_csv, ("%s,%s" % ("utente", "totale")) + (
            csv_points_line % tuple([t.name for t in sorted_tasks])
        )

        # Write rankings' content.
        points_line = " %10.3f" * n_tasks
        csv_points_line = ",%.6f" * n_tasks
        for username in sorted_usernames:
            user_scores = [task_scores[task.id][username] for task in sorted_tasks]
            print >> ranking_file, ("%20s %10.3f" % (username, scores[username])) + (points_line % tuple(user_scores))
            print >> ranking_csv, ("%s,%.6f" % (username, scores[username])) + (csv_points_line % tuple(user_scores))

        ranking_file.close()
        ranking_csv.close()
예제 #19
0
class ContestExporter:
    """This service exports every data about the contest that CMS
    knows. The process of exporting and importing again should be
    idempotent.

    """
    def __init__(self, contest_id, dump, export_target, skip_submissions,
                 light):
        self.contest_id = contest_id
        self.dump = dump
        self.skip_submissions = skip_submissions
        self.light = light

        # If target is not provided, we use the contest's name.
        if export_target == "":
            with SessionGen(commit=False) as session:
                contest = Contest.get_from_id(self.contest_id, session)
                self.export_target = "dump_%s.tar.gz" % contest.name
        else:
            self.export_target = export_target

        self.file_cacher = FileCacher()

    def run(self):
        """Interface to make the class do its job."""
        return self.do_export()

    def do_export(self):
        """Run the actual export code.

        """
        logger.operation = "exporting contest %d" % self.contest_id
        logger.info("Starting export.")

        export_dir = self.export_target
        archive_info = get_archive_info(self.export_target)

        if archive_info["write_mode"] != "":
            # We are able to write to this archive.
            if os.path.exists(self.export_target):
                logger.error("The specified file already exists, "
                             "I won't overwrite it.")
                return False
            export_dir = os.path.join(tempfile.mkdtemp(),
                                      archive_info["basename"])

        logger.info("Creating dir structure.")
        try:
            os.mkdir(export_dir)
        except OSError:
            logger.error("The specified directory already exists, "
                         "I won't overwrite it.")
            return False

        files_dir = os.path.join(export_dir, "files")
        descr_dir = os.path.join(export_dir, "descriptions")
        os.mkdir(files_dir)
        os.mkdir(descr_dir)

        with SessionGen(commit=False) as session:

            contest = Contest.get_from_id(self.contest_id, session)

            # Export files.
            logger.info("Exporting files.")
            files = contest.enumerate_files(self.skip_submissions,
                                            light=self.light)
            for _file in files:
                if not self.safe_get_file(_file,
                                          os.path.join(files_dir, _file),
                                          os.path.join(descr_dir, _file)):
                    return False

            # Export the contest in JSON format.
            logger.info("Exporting the contest in JSON format.")
            with open(os.path.join(export_dir, "contest.json"), 'w') as fout:
                json.dump(contest.export_to_dict(self.skip_submissions),
                          fout, indent=4)

        if self.dump:
            if not self.dump_database(export_dir):
                return False

        # If the admin requested export to file, we do that.
        if archive_info["write_mode"] != "":
            archive = tarfile.open(self.export_target,
                                   archive_info["write_mode"])
            archive.add(export_dir, arcname=archive_info["basename"])
            archive.close()
            shutil.rmtree(export_dir)

        logger.info("Export finished.")
        logger.operation = ""

        return True

    def dump_database(self, export_dir):
        """Dump the whole database. This is never used; however, this
        part is retained for historical reasons.

        """
        # Warning: this part depends on the specific database used.
        logger.info("Dumping SQL database.")
        (engine, connection) = config.database.split(':', 1)
        db_exportfile = os.path.join(export_dir, "database_dump.sql")

        # Export procedure for PostgreSQL.
        if engine == 'postgresql':
            db_regex = re.compile('//(\w*):(\w*)@(\w*)/(\w*)')
            db_match = db_regex.match(connection)
            if db_match is not None:
                username, password, host, database = db_match.groups()
                os.environ['PGPASSWORD'] = password
                export_res = os.system('pg_dump -h %s -U %s -w %s -x " \
                    "--attribute-inserts > %s' % (host, username, database,
                                                  db_exportfile))
                del os.environ['PGPASSWORD']
                if export_res != 0:
                    logger.critical("Database export failed.")
                    return False
            else:
                logger.critical("Cannot obtain parameters for "
                                "database connection.")
                return False

        # Export procedure for SQLite.
        elif engine == 'sqlite':
            db_regex = re.compile('///(.*)')
            db_match = db_regex.match(connection)
            if db_match is not None:
                dbfile, = db_match.groups()
                export_res = os.system('sqlite3 %s .dump > %s' %
                                       (dbfile, db_exportfile))
                if export_res != 0:
                    logger.critical("Database export failed.")
                    return False
            else:
                logger.critical("Cannot obtain parameters for "
                                "database connection.")
                return False

        else:
            logger.critical("Database engine not supported. :-(")
            return False

        return True

    def safe_get_file(self, digest, path, descr_path=None):
        """Get file from FileCacher ensuring that the digest is
        correct.

        digest (string): the digest of the file to retrieve.
        path (string): the path where to save the file.
        descr_path (string): the path where to save the description.

        return (bool): True if all ok, False if something wrong.

        """
        # First get the file
        try:
            self.file_cacher.get_file(digest, path=path)
        except Exception as error:
            logger.error("File %s could not retrieved from file server (%r)." %
                         (digest, error))
            return False

        # Then check the digest
        calc_digest = sha1sum(path)
        if digest != calc_digest:
            logger.error("File %s has wrong hash %s." % (digest, calc_digest))
            return False

        # If applicable, retrieve also the description
        if descr_path is not None:
            with open(descr_path, 'w') as fout:
                fout.write(self.file_cacher.describe(digest))

        return True
예제 #20
0
class SpoolExporter:
    """This service creates a tree structure "similar" to the one used
    in Italian IOI repository for storing the results of a contest.

    """
    def __init__(self, contest_id, spool_dir):
        self.contest_id = contest_id
        self.spool_dir = spool_dir
        self.upload_dir = os.path.join(self.spool_dir, "upload")
        self.contest = None

        self.file_cacher = FileCacher()

    def run(self):
        """Interface to make the class do its job."""
        return self.do_export()

    def do_export(self):
        """Run the actual export code.

        """
        logger.operation = "exporting contest %s" % self.contest_id
        logger.info("Starting export.")

        logger.info("Creating dir structure.")
        try:
            os.mkdir(self.spool_dir)
        except OSError:
            logger.error("The specified directory already exists, "
                         "I won't overwrite it.")
            return False
        os.mkdir(self.upload_dir)

        with SessionGen(commit=False) as session:
            self.contest = Contest.get_from_id(self.contest_id, session)

            # Creating users' directory.
            for user in self.contest.users:
                if not user.hidden:
                    os.mkdir(os.path.join(self.upload_dir, user.username))

            self.export_submissions()
            self.export_ranking()

        logger.info("Export finished.")
        logger.operation = ""

        return True

    def export_submissions(self):
        """Export submissions' source files.

        """
        logger.info("Exporting submissions.")

        queue_file = codecs.open(os.path.join(self.spool_dir, "queue"), "w",
                                 encoding="utf-8")
        # FIXME - The enumeration of submission should be time-increasing
        for submission in self.contest.get_submissions():
            if submission.user.hidden:
                continue
            logger.info("Exporting submission %s." % submission.id)
            username = submission.user.username
            task = submission.task.name
            timestamp = submission.timestamp

            # Get source files to the spool directory.
            file_digest = submission.files["%s.%s" % (task, "%l")].digest
            upload_filename = os.path.join(
                self.upload_dir, username, "%s.%d.%s" %
                (task, timestamp, submission.language))
            self.file_cacher.get_file(file_digest, path=upload_filename)
            upload_filename = os.path.join(
                self.upload_dir, username, "%s.%s" %
                (task, submission.language))
            self.file_cacher.get_file(file_digest, path=upload_filename)
            print >> queue_file, "./upload/%s/%s.%d.%s" % \
                (username, task, timestamp, submission.language)

            # Write results file for the submission.
            if submission.evaluated():
                res_file = codecs.open(os.path.join(
                    self.spool_dir,
                    "%d.%s.%s.%s.res" % (timestamp, username,
                                         task, submission.language)),
                                       "w", encoding="utf-8")
                res2_file = codecs.open(os.path.join(
                    self.spool_dir,
                    "%s.%s.%s.res" % (username, task,
                                      submission.language)),
                                        "w", encoding="utf-8")
                total = 0.0
                for num, evaluation in enumerate(submission.evaluations):
                    outcome = float(evaluation.outcome)
                    total += outcome
                    line = "Executing on file n. %2d %s (%.4f)" % \
                        (num, evaluation.text, outcome)
                    print >> res_file, line
                    print >> res2_file, line
                line = "Score: %.6f" % total
                print >> res_file, line
                print >> res2_file, line
                res_file.close()
                res2_file.close()

        print >> queue_file
        queue_file.close()

    def export_ranking(self):
        """Exports the ranking in csv and txt (human-readable) form.

        """
        logger.info("Exporting ranking.")

        # Create a list of (points, usernames, [task_points]) to write
        # to the rankings.
        users = {}
        hidden_users = {}
        for user in self.contest.users:
            # Avoid hidden users.
            if not user.hidden:
                users[user.username] = [0.0, user.username,
                                        [0.0] * len(self.contest.tasks)]
            else:
                hidden_users[user.username] = True
        for (username, task_num), score in \
                self.contest.ranking_view.scores.iteritems():
            if username not in hidden_users:
                users[username][0] += score.score
                users[username][2][task_num] = score.score
        users = users.values()
        users.sort(reverse=True)

        ranking_file = codecs.open(
            os.path.join(self.spool_dir, "classifica.txt"),
            "w", encoding="utf-8")
        ranking_csv = codecs.open(
            os.path.join(self.spool_dir, "classifica.csv"),
            "w", encoding="utf-8")

        # Write rankings' header.
        print >> ranking_file, "Classifica finale del contest `%s'" % \
            self.contest.description
        points_line = " %10s" * len(self.contest.tasks)
        csv_points_line = ",%s" * len(self.contest.tasks)
        print >> ranking_file, ("%20s %10s" % ("Utente", "Totale")) + \
              (points_line % tuple([t.name for t in self.contest.tasks]))
        print >> ranking_csv, ("%s,%s" % ("utente", "totale")) + \
              (csv_points_line % tuple([t.name for t in self.contest.tasks]))

        # Write rankings' content.
        points_line = " %10.3f" * len(self.contest.tasks)
        csv_points_line = ",%.6f" * len(self.contest.tasks)
        for total, user, problems in users:
            print >> ranking_file, ("%20s %10.3f" % (user, total)) + \
                  (points_line % tuple(problems))
            print >> ranking_csv, ("%s,%.6f" % (user, total)) + \
                  (csv_points_line % tuple(problems))

        ranking_file.close()
        ranking_csv.close()
예제 #21
0
def run(contest_id):
    session = Session()
    contest = Contest.get_from_id(contest_id, session)

    task_by_team = set()
    task_by_lang = set()

    task_dir = os.path.join(os.path.dirname(__file__), "tasks")

    for t in os.listdir(task_dir):
        if t.endswith('.json'):
            task = t[:-5]
            task_path = os.path.join(task_dir, t)
            with open(task_path) as task_file:
                data = json.load(task_file)
                if "teams" in data:
                    for team, v in data["teams"].iteritems():
                        for lang in v:
                            task_by_team.add((task, lang, team))
                if "langs" in data:
                    for lang, v in data["langs"].iteritems():
                        for team in v:
                            task_by_lang.add((task, lang, team))

    if task_by_team != task_by_lang:
        print "ERROR: data in 'tasks' is not self-consistent"
        print repr(task_by_team - task_by_lang)
        print repr(task_by_lang - task_by_team)
        return

    team_by_task = set()
    team_by_lang = set()

    team_dir = os.path.join(os.path.dirname(__file__), "teams")

    for t in os.listdir(team_dir):
        if t.endswith('.json'):
            team = t[:-5]
            team_path = os.path.join(team_dir, t)
            with open(team_path) as team_file:
                data = json.load(team_file)
                if "tasks" in data:
                    for task, v in data["tasks"].iteritems():
                        for lang in v:
                            team_by_task.add((task, lang, team))
                if "langs" in data:
                    for lang, v in data["langs"].iteritems():
                        for task in v:
                            team_by_lang.add((task, lang, team))

    if team_by_task != team_by_lang:
        print "ERROR: data in 'teams' is not self-consistent"
        print repr(team_by_task - team_by_lang)
        print repr(team_by_lang - team_by_task)
        return

    if task_by_team != team_by_task:
        print "ERROR: data in 'tasks' and 'teams' is different"
        print repr(task_by_team - team_by_task)
        print repr(team_by_task - task_by_team)
        return

    data_by_lang = set()
    data_by_team = set()

    data_dir = os.path.join(os.path.dirname(__file__), "data")

    for task in os.listdir(data_dir):
        if os.path.isdir(os.path.join(data_dir, task)):
            for f in os.listdir(os.path.join(data_dir, task, "by_lang")):
                # f == "lang (team).pdf"
                lang, team = re.findall(
                    "^([A-Za-z0-9_]+) \(([A-Za-z0-9_]+)\)\.pdf$", f)[0]
                data_by_lang.add((task, lang, team))
            for f in os.listdir(os.path.join(data_dir, task, "by_team")):
                # f == "team (lang).pdf"
                team, lang = re.findall(
                    "^([A-Za-z0-9_]+) \(([A-Za-z0-9_]+)\)\.pdf$", f)[0]
                data_by_team.add((task, lang, team))

    if data_by_lang != data_by_team:
        print "ERROR: PDF files in 'data' are not complete"
        print repr(data_by_lang - data_by_team)
        print repr(data_by_team - data_by_lang)
        return

    if task_by_team != data_by_lang:
        print "ERROR: PDF files in 'data' do not match JSON data"
        print repr(task_by_team - data_by_lang)
        print repr(data_by_lang - task_by_team)
        return

    print "Hooray! Data is consistent!"

    # Pick one at random: they're all equal.
    translations = task_by_team

    # Determine language codes used in CMS.
    codes = dict()

    # Read JSON files in 'tasks' again as it provides data already
    # grouped as we need it, and not simply as a list of tuples.
    for t in os.listdir(task_dir):
        if t.endswith('.json'):
            task = t[:-5]
            task_path = os.path.join(task_dir, t)
            with open(task_path) as task_file:
                data = json.load(task_file)
                if "langs" in data:
                    for lang, v in data["langs"].iteritems():
                        if len(v) == 0:
                            pass
                        elif len(v) == 1 and v[0] != official_team:
                            for team in v:
                                codes[(task, lang, team)] = "%s" % lang
                        else:
                            for team in v:
                                codes[(task, lang,
                                       team)] = "%s_%s" % (lang,
                                                           ioi_to_iso2[team])

    # Store the files as Statement objects.
    file_cacher = FileCacher()

    for task, lang, team in translations:
        if team == official_team:
            assert lang == "en"
            digest = file_cacher.put_file(
                path=os.path.join(data_dir, task, "by_lang",
                                  "%s (%s).pdf" % (lang, team)),
                description="Statement for task %s" % task)
        else:
            digest = file_cacher.put_file(
                path=os.path.join(data_dir, task, "by_lang",
                                  "%s (%s).pdf" % (lang, team)),
                description=
                "Statement for task %s, translated into %s (%s) by %s (%s)" %
                (task, langs[lang], lang, teams[team], team))

        s = Statement(codes[(task, lang, team)],
                      digest,
                      task=contest.get_task(task))

        session.add(s)

    session.commit()

    primary = dict()

    # Retrieve the statements selected by each team.
    for t in os.listdir(team_dir):
        if t.endswith('.json'):
            team = t[:-5]
            team_path = os.path.join(team_dir, t)
            with open(team_path) as team_file:
                data = json.load(team_file)

                for team2, lang, task in data.get("selected", []):
                    # A team could have selected a statement that later got removed.
                    if (task, lang, team2) in codes:
                        primary.setdefault(team, {}).setdefault(
                            task, []).append(codes[(task, lang, team2)])

    # Add the ones they uploaded themselves.
    for task, lang, team in translations:
        # Don't worry about duplicates, CWS filters them out.
        primary.setdefault(team,
                           {}).setdefault(task,
                                          []).append(codes[(task, lang, team)])

    # Set the primary statements for tasks (i.e. the ones of the official team)
    for task, primary2 in primary.get(official_team, {}).iteritems():
        contest.get_task(task).primary_statements = json.dumps(primary2)

    # Set the primary statements for teams
    for team, primary2 in primary.iteritems():
        session.execute(
            "UPDATE users SET primary_statements = '%s' WHERE username LIKE '%s%%';"
            % (json.dumps(primary2), team))

    session.commit()

    print "Statements stored in the DB!"
class TestFileCacher(TestService):
    """Service that performs automatically some tests for the
    FileCacher service.

    """

    def __init__(self, shard):
        logger.initialize(ServiceCoord("TestFileCacher", shard))
        TestService.__init__(self, shard, custom_logger=logger)

        # Assume we store the cache in "./cache/fs-cache-TestFileCacher-0/"
        self.cache_base_path = os.path.join(config.cache_dir,
                                            "fs-cache-TestFileCacher-0")
        self.cache_path = None
        self.content = None
        self.fake_content = None
        self.digest = None
        self.file_obj = None
        self.file_cacher = FileCacher(self)
        #self.file_cacher = FileCacher(self, path="fs-storage")

    def prepare(self):
        """Initialization for the test code - make sure that the cache
        is empty before testing.

        """
        logger.info("Please delete directory %s before." %
                    self.cache_base_path)

### TEST 000 ###

    def test_000(self):
        """Send a ~100B random binary file to the storage through
        FileCacher as a file-like object. FC should cache the content
        locally.

        """
        self.size = 100
        self.content = "".join(chr(random.randint(0, 255))
                               for unused_i in xrange(self.size))

        logger.info("  I am sending the ~100B binary file to FileCacher")
        try:
            data = self.file_cacher.put_file(file_obj=StringIO(self.content),
                                             description="Test #000")
        except Exception as error:
            self.test_end(False, "Error received: %r." % error)

        if not os.path.exists(
            os.path.join(self.cache_base_path, "objects", data)):
            self.test_end(False, "File not stored in local cache.")
        elif open(os.path.join(self.cache_base_path, "objects", data),
                  "rb").read() != self.content:
            self.test_end(False, "Local cache's content differ "
                          "from original file.")
        else:
            self.cache_path = os.path.join(self.cache_base_path, "objects",
                                           data)
            self.digest = data
            self.test_end(True, "Data sent and cached without error.")

### TEST 001 ###

    def test_001(self):
        """Retrieve the file.

        """
        logger.info("  I am retrieving the ~100B binary file from FileCacher")
        self.fake_content = "Fake content.\n"
        with open(self.cache_path, "wb") as cached_file:
            cached_file.write(self.fake_content)
        try:
            data = self.file_cacher.get_file(digest=self.digest,
                                             temp_file_obj=True)
        except Exception as error:
            self.test_end(False, "Error received: %r." % error)

        received = data.read()
        data.close()
        if received != self.fake_content:
            if received == self.content:
                self.test_end(False,
                              "Did not use the cache even if it could.")
            else:
                self.test_end(False, "Content differ.")
        else:
            self.test_end(True, "Data object received correctly.")

### TEST 002 ###

    def test_002(self):
        """Check the size of the file.

        """
        logger.info("  I am checking the size of the ~100B binary file")
        try:
            size = self.file_cacher.get_size(self.digest)
        except Exception as error:
            self.test_end(False, "Error received: %r." % error)

        if size == self.size:
            self.test_end(True, "The size is correct.")
        else:
            self.test_end(False, "The size is wrong: %d instead of %d" %
                          (size, self.size))

### TEST 003 ###

    def test_003(self):
        """Get file from FileCacher.

        """
        logger.info("  I am retrieving the file from FileCacher " +
                    "after deleting the cache.")
        os.unlink(self.cache_path)
        try:
            data = self.file_cacher.get_file(digest=self.digest,
                                             temp_file_obj=True)
        except Exception as error:
            self.test_end(False, "Error received: %r." % error)

        received = data.read()
        data.close()
        if received != self.content:
            self.test_end(False, "Content differ.")
        elif not os.path.exists(self.cache_path):
            self.test_end(False, "File not stored in local cache.")
        elif open(self.cache_path).read() != self.content:
            self.test_end(False, "Local cache's content differ " +
                          "from original file.")
        else:
            self.test_end(True, "Content object received " +
                          "and cached correctly.")

### TEST 004 ###

    def test_004(self):
        """Delete the file through FS and tries to get it again through FC.

        """
        logger.info("  I am deleting the file from FileCacher.")
        try:
            self.file_cacher.delete(digest=self.digest)
        except Exception as error:
            self.test_end(False, "Error received: %s." % error)

        else:
            logger.info("  File deleted correctly.")
            logger.info("  I am getting the file from FileCacher.")
            try:
                self.file_cacher.get_file(digest=self.digest)
            except Exception as error:
                self.test_end(True, "Correctly received an error: %r." % error)
            else:
                self.test_end(False, "Did not receive error.")

### TEST 005 ###

    def test_005(self):
        """Get unexisting file from FileCacher.

        """
        logger.info("  I am retrieving an unexisting file from FileCacher.")
        try:
            self.file_cacher.get_file(digest=self.digest, temp_file_obj=True)
        except Exception as error:
            self.test_end(True, "Correctly received an error: %r." % error)
        else:
            self.test_end(False, "Did not receive error.")

### TEST 006 ###

    def test_006(self):
        """Send a ~100B random binary file to the storage through
        FileCacher as a string. FC should cache the content locally.

        """
        self.content = "".join(chr(random.randint(0, 255))
                               for unused_i in xrange(100))

        logger.info("  I am sending the ~100B binary file to FileCacher")
        try:
            data = self.file_cacher.put_file(binary_data=self.content,
                                             description="Test #005")
        except Exception as error:
            self.test_end(False, "Error received: %r." % error)

        if not os.path.exists(
            os.path.join(self.cache_base_path, "objects", data)):
            self.test_end(False, "File not stored in local cache.")
        elif open(os.path.join(self.cache_base_path, "objects", data),
                  "rb").read() != self.content:
            self.test_end(False, "Local cache's content differ "
                          "from original file.")
        else:
            self.cache_path = os.path.join(self.cache_base_path, "objects",
                                           data)
            self.digest = data
            self.test_end(True, "Data sent and cached without error.")

### TEST 007 ###

    def test_007(self):
        """Retrieve the file as a string.

        """
        logger.info("  I am retrieving the ~100B binary file from FileCacher "
                    "using get_file_to_string()")
        self.fake_content = "Fake content.\n"
        with open(self.cache_path, "wb") as cached_file:
            cached_file.write(self.fake_content)
        try:
            data = self.file_cacher.get_file(digest=self.digest, string=True)
        except Exception as error:
            self.test_end(False, "Error received: %r." % error)

        if data != self.fake_content:
            if data == self.content:
                self.test_end(False,
                              "Did not use the cache even if it could.")
            else:
                self.test_end(False, "Content differ.")
        else:
            self.test_end(True, "Data received correctly.")

### TEST 008 ###

    def test_008(self):
        """Put a ~100MB file into the storage (using a specially
        crafted file-like object).

        """
        logger.info("  I am sending the ~100MB binary file to FileCacher")
        rand_file = RandomFile(100000000)
        try:
            data = self.file_cacher.put_file(file_obj=rand_file,
                                             description="Test #007")
        except Exception as error:
            self.test_end(False, "Error received: %r." % error)
        if rand_file.dim != 0:
            self.test_end(False, "The input file wasn't read completely.")
        my_digest = rand_file.digest
        rand_file.close()

        if not os.path.exists(
            os.path.join(self.cache_base_path, "objects", data)):
            self.test_end(False, "File not stored in local cache.")
        elif my_digest != data:
            self.test_end(False, "File received with wrong hash.")
        else:
            self.cache_path = os.path.join(self.cache_base_path, "objects",
                                           data)
            self.digest = data
            self.test_end(True, "Data sent and cached without error.")

### TEST 009 ###

    def test_009(self):
        """Get the ~100MB file from FileCacher.

        """
        logger.info("  I am retrieving the ~100MB file from FileCacher " +
                    "after deleting the cache.")
        os.unlink(self.cache_path)
        hash_file = HashingFile()
        try:
            self.file_cacher.get_file(digest=self.digest, file_obj=hash_file)
        except Exception as error:
            self.test_end(False, "Error received: %r." % error)
        my_digest = hash_file.digest
        hash_file.close()

        try:
            if self.digest != my_digest:
                self.test_end(False, "Content differs.")
            elif not os.path.exists(self.cache_path):
                self.test_end(False, "File not stored in local cache.")
            else:
                self.test_end(True, "Content object received " +
                              "and cached correctly.")
        finally:
            self.file_cacher.delete(self.digest)
예제 #23
0
class ContestImporter:
    """This service imports a contest from a directory that has been
    the target of a ContestExport. The process of exporting and
    importing again should be idempotent.

    """
    def __init__(self, drop, import_source,
                 only_files, no_files, no_submissions):
        self.drop = drop
        self.only_files = only_files
        self.no_files = no_files
        self.no_submissions = no_submissions
        self.import_source = import_source
        self.import_dir = import_source

        self.file_cacher = FileCacher()

        configure_mappers()

    def run(self):
        """Interface to make the class do its job."""
        return self.do_import()

    def do_import(self):
        """Run the actual import code.

        """
        logger.operation = "importing contest from %s" % self.import_source
        logger.info("Starting import.")

        if not os.path.isdir(self.import_source):
            if self.import_source.endswith(".zip"):
                archive = zipfile.ZipFile(self.import_source, "r")
                file_names = archive.infolist()

                self.import_dir = tempfile.mkdtemp()
                archive.extractall(self.import_dir)
            elif self.import_source.endswith(".tar.gz") \
                     or self.import_source.endswith(".tgz") \
                     or self.import_source.endswith(".tar.bz2") \
                     or self.import_source.endswith(".tbz2") \
                     or self.import_source.endswith(".tar"):
                archive = tarfile.open(name=self.import_source)
                file_names = archive.getnames()
            else:
                logger.critical("Unable to import from %s." %
                                self.import_source)
                return False

            root = find_root_of_archive(file_names)
            if root is None:
                logger.critical("Cannot find a root directory in %s." %
                                self.import_source)
                return False

            self.import_dir = tempfile.mkdtemp()
            archive.extractall(self.import_dir)
            self.import_dir = os.path.join(self.import_dir, root)

        if self.drop:
            logger.info("Dropping and recreating the database.")
            try:
                metadata.drop_all()
            except sqlalchemy.exc.OperationalError as error:
                logger.critical("Unable to access DB.\n%r" % error)
                return False
        try:
            metadata.create_all()
        except sqlalchemy.exc.OperationalError as error:
            logger.critical("Unable to access DB.\n%r" % error)
            return False

        if not self.only_files:
            with SessionGen(commit=False) as session:

                # Import the contest in JSON format.
                logger.info("Importing the contest from JSON file.")

                with open(os.path.join(self.import_dir,
                                       "contest.json")) as fin:
                    # Throughout all the code we'll assume the input is
                    # correct without actually doing any validations.
                    # Thus, for example, we're not checking that the
                    # decoded object is a dict...
                    self.datas = json.load(fin)

                self.objs = dict()
                for _id, data in self.datas.iteritems():
                    obj = self.import_object(data)
                    self.objs[_id] = obj
                    session.add(obj)

                for _id in self.datas:
                    self.add_relationships(self.datas[_id], self.objs[_id])

                # Mmh... kind of fragile interface
                contest = self.objs["0"]

                # Check that no files were missing (only if files were
                # imported).
                if False and not self.no_files:
                    contest_files = contest.enumerate_files()
                    missing_files = contest_files.difference(files)
                    if len(missing_files) > 0:
                        logger.warning("Some files needed to the contest "
                                       "are missing in the import directory.")

                session.flush()
                contest_id = contest.id
                contest_files = contest.enumerate_files()
                session.commit()

        if not self.no_files:
            logger.info("Importing files.")
            files_dir = os.path.join(self.import_dir, "files")
            descr_dir = os.path.join(self.import_dir, "descriptions")
            for digest in contest_files:
                file_ = os.path.join(files_dir, digest)
                desc = os.path.join(descr_dir, digest)
                if not os.path.exists(file_) or not os.path.exists(desc):
                    logger.error("Some files needed to the contest "
                                 "are missing in the import directory. "
                                 "The import will continue. Be aware.")
                if not self.safe_put_file(file_, desc):
                    logger.critical("Unable to put file `%s' in the database. "
                                    "Aborting. Please remove the contest "
                                    "from the database." % file_)
                    # TODO: remove contest from the database.
                    return False

        logger.info("Import finished (contest id: %s)." % contest_id)
        logger.operation = ""

        # If we extracted an archive, we remove it.
        if self.import_dir != self.import_source:
            shutil.rmtree(self.import_dir)

        return True

    def import_object(self, data):
        """Import objects from the given data (without relationships)

        The given data is assumed to be a dict in the format produced by
        ContestExporter. This method reads the "_class" item and tries
        to find the corresponding class. Then it loads all column
        properties of that class (those that are present in the data)
        and uses them as keyword arguments in a call to the class
        constructor (if a required property is missing this call will
        raise an error).

        Relationships are not handled by this method, since we may not
        have all referenced objects available yet. Thus we prefer to add
        relationships in a later moment, using the add_relationships
        method.

        """
        cls = getattr(class_hook, data["_class"])

        args = dict()

        for prp in cls._col_props:
            if prp.key not in data:
                # We will let the __init__ of the class check if any
                # argument is missing, so it's safe to just skip here.
                continue

            col = prp.columns[0]
            col_type = type(col.type)

            val = data[prp.key]
            if col_type in [Boolean, Integer, Float, String]:
                args[prp.key] = val
            elif col_type is DateTime:
                args[prp.key] = make_datetime(val) if val is not None else None
            elif col_type is Interval:
                args[prp.key] = timedelta(seconds=val) if val is not None else None
            else:
                raise RuntimeError("Unknown SQLAlchemy column type: %s" % col_type)

        return cls(**args)

    def add_relationships(self, data, obj):
        """Add the relationships to the given object, using the given data.

        Do what we didn't in import_objects: importing relationships.
        We already now the class of the object so we simply iterate over
        its relationship properties trying to load them from the data (if
        present), checking wheter they are IDs or collection of IDs,
        dereferencing them (i.e. getting the corresponding object) and
        reflecting all on the given object.

        Note that both methods don't check if the given data has more
        items than the ones we understand and use.

        """
        cls = type(obj)

        for prp in cls._rel_props:
            if prp.key not in data:
                # Relationships are always optional
                continue

            val = data[prp.key]
            if val is None:
                setattr(obj, prp.key, None)
            elif type(val) == str:
                setattr(obj, prp.key, self.objs[val])
            elif type(val) == list:
                setattr(obj, prp.key, list(self.objs[i] for i in val))
            elif type(val) == dict:
                setattr(obj, prp.key, dict((k, self.objs[v]) for k, v in val.iteritems()))
            else:
                raise RuntimeError("Unknown RelationshipProperty value: %s" % type(val))

    def safe_put_file(self, path, descr_path):
        """Put a file to FileCacher signaling every error (including
        digest mismatch).

        path (string): the path from which to load the file.
        descr_path (string): same for description.

        return (bool): True if all ok, False if something wrong.

        """
        # First read the description.
        try:
            with open(descr_path) as fin:
                description = fin.read()
        except IOError:
            description = ''

        # Put the file.
        try:
            digest = self.file_cacher.put_file(path=path,
                                               description=description)
        except Exception as error:
            logger.critical("File %s could not be put to file server (%r), "
                            "aborting." % (path, error))
            return False

        # Then check the digest.
        calc_digest = sha1sum(path)
        if digest != calc_digest:
            logger.critical("File %s has hash %s, but the server returned %s, "
                            "aborting." % (path, calc_digest, digest))
            return False

        return True
예제 #24
0
class TestFileCacher(TestService):
    """Service that performs automatically some tests for the
    FileCacher service.

    """
    def __init__(self, shard):
        logger.initialize(ServiceCoord("TestFileCacher", shard))
        TestService.__init__(self, shard, custom_logger=logger)

        # Assume we store the cache in "./cache/fs-cache-TestFileCacher-0/"
        self.cache_base_path = os.path.join(config.cache_dir,
                                            "fs-cache-TestFileCacher-0")
        self.cache_path = None
        self.content = None
        self.fake_content = None
        self.digest = None
        self.file_obj = None
        self.file_cacher = FileCacher(self)
        #self.file_cacher = FileCacher(self, path="fs-storage")

    def prepare(self):
        """Initialization for the test code - make sure that the cache
        is empty before testing.

        """
        logger.info("Please delete directory %s before." %
                    self.cache_base_path)

### TEST 000 ###

    def test_000(self):
        """Send a ~100B random binary file to the storage through
        FileCacher as a file-like object. FC should cache the content
        locally.

        """
        self.size = 100
        self.content = "".join(
            chr(random.randint(0, 255)) for unused_i in xrange(self.size))

        logger.info("  I am sending the ~100B binary file to FileCacher")
        try:
            data = self.file_cacher.put_file(file_obj=StringIO(self.content),
                                             description="Test #000")
        except Exception as error:
            self.test_end(False, "Error received: %r." % error)

        if not os.path.exists(
                os.path.join(self.cache_base_path, "objects", data)):
            self.test_end(False, "File not stored in local cache.")
        elif open(os.path.join(self.cache_base_path, "objects", data),
                  "rb").read() != self.content:
            self.test_end(
                False, "Local cache's content differ "
                "from original file.")
        else:
            self.cache_path = os.path.join(self.cache_base_path, "objects",
                                           data)
            self.digest = data
            self.test_end(True, "Data sent and cached without error.")

### TEST 001 ###

    def test_001(self):
        """Retrieve the file.

        """
        logger.info("  I am retrieving the ~100B binary file from FileCacher")
        self.fake_content = "Fake content.\n"
        with open(self.cache_path, "wb") as cached_file:
            cached_file.write(self.fake_content)
        try:
            data = self.file_cacher.get_file(digest=self.digest,
                                             temp_file_obj=True)
        except Exception as error:
            self.test_end(False, "Error received: %r." % error)

        received = data.read()
        data.close()
        if received != self.fake_content:
            if received == self.content:
                self.test_end(False, "Did not use the cache even if it could.")
            else:
                self.test_end(False, "Content differ.")
        else:
            self.test_end(True, "Data object received correctly.")

### TEST 002 ###

    def test_002(self):
        """Check the size of the file.

        """
        logger.info("  I am checking the size of the ~100B binary file")
        try:
            size = self.file_cacher.get_size(self.digest)
        except Exception as error:
            self.test_end(False, "Error received: %r." % error)

        if size == self.size:
            self.test_end(True, "The size is correct.")
        else:
            self.test_end(
                False,
                "The size is wrong: %d instead of %d" % (size, self.size))

### TEST 003 ###

    def test_003(self):
        """Get file from FileCacher.

        """
        logger.info("  I am retrieving the file from FileCacher " +
                    "after deleting the cache.")
        os.unlink(self.cache_path)
        try:
            data = self.file_cacher.get_file(digest=self.digest,
                                             temp_file_obj=True)
        except Exception as error:
            self.test_end(False, "Error received: %r." % error)

        received = data.read()
        data.close()
        if received != self.content:
            self.test_end(False, "Content differ.")
        elif not os.path.exists(self.cache_path):
            self.test_end(False, "File not stored in local cache.")
        elif open(self.cache_path).read() != self.content:
            self.test_end(
                False, "Local cache's content differ " + "from original file.")
        else:
            self.test_end(True,
                          "Content object received " + "and cached correctly.")

### TEST 004 ###

    def test_004(self):
        """Delete the file through FS and tries to get it again through FC.

        """
        logger.info("  I am deleting the file from FileCacher.")
        try:
            self.file_cacher.delete(digest=self.digest)
        except Exception as error:
            self.test_end(False, "Error received: %s." % error)

        else:
            logger.info("  File deleted correctly.")
            logger.info("  I am getting the file from FileCacher.")
            try:
                self.file_cacher.get_file(digest=self.digest)
            except Exception as error:
                self.test_end(True, "Correctly received an error: %r." % error)
            else:
                self.test_end(False, "Did not receive error.")

### TEST 005 ###

    def test_005(self):
        """Get unexisting file from FileCacher.

        """
        logger.info("  I am retrieving an unexisting file from FileCacher.")
        try:
            self.file_cacher.get_file(digest=self.digest, temp_file_obj=True)
        except Exception as error:
            self.test_end(True, "Correctly received an error: %r." % error)
        else:
            self.test_end(False, "Did not receive error.")

### TEST 006 ###

    def test_006(self):
        """Send a ~100B random binary file to the storage through
        FileCacher as a string. FC should cache the content locally.

        """
        self.content = "".join(
            chr(random.randint(0, 255)) for unused_i in xrange(100))

        logger.info("  I am sending the ~100B binary file to FileCacher")
        try:
            data = self.file_cacher.put_file(binary_data=self.content,
                                             description="Test #005")
        except Exception as error:
            self.test_end(False, "Error received: %r." % error)

        if not os.path.exists(
                os.path.join(self.cache_base_path, "objects", data)):
            self.test_end(False, "File not stored in local cache.")
        elif open(os.path.join(self.cache_base_path, "objects", data),
                  "rb").read() != self.content:
            self.test_end(
                False, "Local cache's content differ "
                "from original file.")
        else:
            self.cache_path = os.path.join(self.cache_base_path, "objects",
                                           data)
            self.digest = data
            self.test_end(True, "Data sent and cached without error.")

### TEST 007 ###

    def test_007(self):
        """Retrieve the file as a string.

        """
        logger.info("  I am retrieving the ~100B binary file from FileCacher "
                    "using get_file_to_string()")
        self.fake_content = "Fake content.\n"
        with open(self.cache_path, "wb") as cached_file:
            cached_file.write(self.fake_content)
        try:
            data = self.file_cacher.get_file(digest=self.digest, string=True)
        except Exception as error:
            self.test_end(False, "Error received: %r." % error)

        if data != self.fake_content:
            if data == self.content:
                self.test_end(False, "Did not use the cache even if it could.")
            else:
                self.test_end(False, "Content differ.")
        else:
            self.test_end(True, "Data received correctly.")

### TEST 008 ###

    def test_008(self):
        """Put a ~100MB file into the storage (using a specially
        crafted file-like object).

        """
        logger.info("  I am sending the ~100MB binary file to FileCacher")
        rand_file = RandomFile(100000000)
        try:
            data = self.file_cacher.put_file(file_obj=rand_file,
                                             description="Test #007")
        except Exception as error:
            self.test_end(False, "Error received: %r." % error)
        if rand_file.dim != 0:
            self.test_end(False, "The input file wasn't read completely.")
        my_digest = rand_file.digest
        rand_file.close()

        if not os.path.exists(
                os.path.join(self.cache_base_path, "objects", data)):
            self.test_end(False, "File not stored in local cache.")
        elif my_digest != data:
            self.test_end(False, "File received with wrong hash.")
        else:
            self.cache_path = os.path.join(self.cache_base_path, "objects",
                                           data)
            self.digest = data
            self.test_end(True, "Data sent and cached without error.")


### TEST 009 ###

    def test_009(self):
        """Get the ~100MB file from FileCacher.

        """
        logger.info("  I am retrieving the ~100MB file from FileCacher " +
                    "after deleting the cache.")
        os.unlink(self.cache_path)
        hash_file = HashingFile()
        try:
            self.file_cacher.get_file(digest=self.digest, file_obj=hash_file)
        except Exception as error:
            self.test_end(False, "Error received: %r." % error)
        my_digest = hash_file.digest
        hash_file.close()

        try:
            if self.digest != my_digest:
                self.test_end(False, "Content differs.")
            elif not os.path.exists(self.cache_path):
                self.test_end(False, "File not stored in local cache.")
            else:
                self.test_end(
                    True, "Content object received " + "and cached correctly.")
        finally:
            self.file_cacher.delete(self.digest)
예제 #25
0
파일: ContestExporter.py 프로젝트: Mloc/cms
class ContestExporter:

    """This service exports every data about the contest that CMS
    knows. The process of exporting and importing again should be
    idempotent.

    """

    def __init__(self, contest_id, export_target,
                 dump_files, dump_model, light,
                 skip_submissions, skip_user_tests):
        self.contest_id = contest_id
        self.dump_files = dump_files
        self.dump_model = dump_model
        self.light = light
        self.skip_submissions = skip_submissions
        self.skip_user_tests = skip_user_tests

        # If target is not provided, we use the contest's name.
        if export_target == "":
            with SessionGen(commit=False) as session:
                contest = Contest.get_from_id(self.contest_id, session)
                self.export_target = "dump_%s.tar.gz" % contest.name
                logger.warning("export_target not given, using \"%s\""
                               % self.export_target)
        else:
            self.export_target = export_target

        self.file_cacher = FileCacher()

    def do_export(self):
        """Run the actual export code."""
        logger.operation = "exporting contest %d" % self.contest_id
        logger.info("Starting export.")

        export_dir = self.export_target
        archive_info = get_archive_info(self.export_target)

        if archive_info["write_mode"] != "":
            # We are able to write to this archive.
            if os.path.exists(self.export_target):
                logger.critical("The specified file already exists, "
                                "I won't overwrite it.")
                return False
            export_dir = os.path.join(tempfile.mkdtemp(),
                                      archive_info["basename"])

        logger.info("Creating dir structure.")
        try:
            os.mkdir(export_dir)
        except OSError:
            logger.critical("The specified directory already exists, "
                            "I won't overwrite it.")
            return False

        files_dir = os.path.join(export_dir, "files")
        descr_dir = os.path.join(export_dir, "descriptions")
        os.mkdir(files_dir)
        os.mkdir(descr_dir)

        with SessionGen(commit=False) as session:

            contest = Contest.get_from_id(self.contest_id, session)

            # Export files.
            if self.dump_files:
                logger.info("Exporting files.")
                files = contest.enumerate_files(self.skip_submissions,
                                                self.skip_user_tests,
                                                self.light)
                for file_ in files:
                    if not self.safe_get_file(file_,
                                              os.path.join(files_dir, file_),
                                              os.path.join(descr_dir, file_)):
                        return False

            # Export the contest in JSON format.
            if self.dump_model:
                logger.info("Exporting the contest to a JSON file.")

                # We use strings because they'll be the keys of a JSON
                # object; the contest will have ID 0.
                self.ids = {contest.sa_identity_key: "0"}
                self.queue = [contest]

                data = dict()
                while len(self.queue) > 0:
                    obj = self.queue.pop(0)
                    data[self.ids[obj.sa_identity_key]] = self.export_object(obj)

                # Specify the "root" of the data graph
                data["_objects"] = ["0"]

                with io.open(os.path.join(export_dir,
                                          "contest.json"), "wb") as fout:
                    json.dump(data, fout, encoding="utf-8",
                              indent=4, sort_keys=True)

        # If the admin requested export to file, we do that.
        if archive_info["write_mode"] != "":
            archive = tarfile.open(self.export_target,
                                   archive_info["write_mode"])
            archive.add(export_dir, arcname=archive_info["basename"])
            archive.close()
            shutil.rmtree(export_dir)

        logger.info("Export finished.")
        logger.operation = ""

        return True

    def get_id(self, obj):
        obj_key = obj.sa_identity_key
        if obj_key not in self.ids:
            # We use strings because they'll be the keys of a JSON object
            self.ids[obj_key] = str(len(self.ids))
            self.queue.append(obj)

        return self.ids[obj_key]

    def export_object(self, obj):

        """Export the given object, returning a JSON-encodable dict.

        The returned dict will contain a "_class" item (the name of the
        class of the given object), an item for each column property
        (with a value properly translated to a JSON-compatible type)
        and an item for each relationship property (which will be an ID
        or a collection of IDs).

        The IDs used in the exported dict aren't related to the ones
        used in the DB: they are newly generated and their scope is
        limited to the exported file only. They are shared among all
        classes (that is, two objects can never share the same ID, even
        if they are of different classes).

        If, when exporting the relationship, we find an object without
        an ID we generate a new ID, assign it to the object and append
        the object to the queue of objects to export.

        The self.skip_submissions flag controls wheter we export
        submissions (and all other objects that can be reached only by
        passing through a submission) or not.

        """

        cls = type(obj)

        data = {"_class": cls.__name__}

        for prp in cls._col_props:
            col, = prp.columns
            col_type = type(col.type)

            val = getattr(obj, prp.key)
            if col_type in [Boolean, Integer, Float, String]:
                data[prp.key] = val
            elif col_type is DateTime:
                data[prp.key] = \
                    make_timestamp(val) if val is not None else None
            elif col_type is Interval:
                data[prp.key] = \
                    val.total_seconds() if val is not None else None
            else:
                raise RuntimeError("Unknown SQLAlchemy column type: %s"
                                   % col_type)

        for prp in cls._rel_props:
            other_cls = prp.mapper.class_

            # Skip submissions if requested
            if self.skip_submissions and other_cls is Submission:
                continue

            # Skip user_tests if requested
            if self.skip_user_tests and other_cls is UserTest:
                continue

            val = getattr(obj, prp.key)
            if val is None:
                data[prp.key] = None
            elif isinstance(val, other_cls):
                data[prp.key] = self.get_id(val)
            elif isinstance(val, list):
                data[prp.key] = list(self.get_id(i) for i in val)
            elif isinstance(val, dict):
                data[prp.key] = \
                    dict((k, self.get_id(v)) for k, v in val.iteritems())
            else:
                raise RuntimeError("Unknown SQLAlchemy relationship type: %s"
                                   % type(val))

        return data

    def safe_get_file(self, digest, path, descr_path=None):

        """Get file from FileCacher ensuring that the digest is
        correct.

        digest (string): the digest of the file to retrieve.
        path (string): the path where to save the file.
        descr_path (string): the path where to save the description.

        return (bool): True if all ok, False if something wrong.

        """

        # TODO - Probably this method could be merged in FileCacher

        # First get the file
        try:
            self.file_cacher.get_file(digest, path=path)
        except Exception as error:
            logger.error("File %s could not retrieved from file server (%r)."
                         % (digest, error))
            return False

        # Then check the digest
        calc_digest = sha1sum(path)
        if digest != calc_digest:
            logger.critical("File %s has wrong hash %s."
                            % (digest, calc_digest))
            return False

        # If applicable, retrieve also the description
        if descr_path is not None:
            with io.open(descr_path, 'wt', encoding='utf-8') as fout:
                fout.write(self.file_cacher.describe(digest))

        return True
예제 #26
0
파일: Worker.py 프로젝트: Mloc/cms
class Worker(Service):
    """This service implement the possibility to compile and evaluate
    submissions in a sandbox. The instructions to follow for the
    operations are in the TaskType classes, while the sandbox is in
    the Sandbox module.

    """

    JOB_TYPE_COMPILATION = "compile"
    JOB_TYPE_EVALUATION = "evaluate"

    def __init__(self, shard):
        logger.initialize(ServiceCoord("Worker", shard))
        Service.__init__(self, shard, custom_logger=logger)
        self.file_cacher = FileCacher(self)

        self.task_type = None
        self.work_lock = threading.Lock()
        self.session = None

    @rpc_method
    def ignore_job(self):
        """RPC that inform the worker that its result for the current
        action will be discarded. The worker will try to return as
        soon as possible even if this means that the result are
        inconsistent.

        """
        # We inform the task_type to quit as soon as possible.
        logger.info("Trying to interrupt job as requested.")
        try:
            self.task_type.ignore_job = True
        except AttributeError:
            pass  # Job concluded right under our nose, that's ok too.

    # FIXME - rpc_threaded is disable because it makes the call fail:
    # we should investigate on this
    @rpc_method
    @rpc_threaded
    def precache_files(self, contest_id):
        """RPC to ask the worker to precache of files in the contest.

        contest_id (int): the id of the contest

        """
        # Lock is not needed if the admins correctly placed cache and
        # temp directories in the same filesystem. This is what
        # usually happens since they are children of the same,
        # cms-created, directory.
        logger.info("Precaching files for contest %d." % contest_id)
        with SessionGen(commit=False) as session:
            contest = Contest.get_from_id(contest_id, session)
            for digest in contest.enumerate_files(skip_submissions=True,
                                                  skip_user_tests=True):
                self.file_cacher.get_file(digest)
        logger.info("Precaching finished.")

    @rpc_method
    @rpc_threaded
    def execute_job(self, job_dict):
        job = Job.import_from_dict_with_type(job_dict)

        if self.work_lock.acquire(False):

            try:
                logger.operation = "job '%s'" % (job.info)
                logger.info("Request received")
                job.shard = self.shard

                self.task_type = get_task_type(job, self.file_cacher)
                self.task_type.execute_job()
                logger.info("Request finished.")

                return job.export_to_dict()

            except:
                err_msg = "Worker failed on operation `%s'" % logger.operation
                logger.error("%s\n%s" % (err_msg, traceback.format_exc()))
                raise JobException(err_msg)

            finally:
                self.task_type = None
                self.session = None
                logger.operation = ""
                self.work_lock.release()

        else:
            err_msg = "Request '%s' received, " \
                "but declined because of acquired lock" % \
                (job.info)
            logger.warning(err_msg)
            raise JobException(err_msg)
예제 #27
0
class Worker(Service):
    """This service implement the possibility to compile and evaluate
    submissions in a sandbox. The instructions to follow for the
    operations are in the TaskType classes, while the sandbox is in
    the Sandbox module.

    """

    JOB_TYPE_COMPILATION = "compile"
    JOB_TYPE_EVALUATION = "evaluate"

    def __init__(self, shard):
        logger.initialize(ServiceCoord("Worker", shard))
        Service.__init__(self, shard, custom_logger=logger)
        self.file_cacher = FileCacher(self)

        self.task_type = None
        self.work_lock = threading.Lock()
        self.session = None

    @rpc_method
    def ignore_job(self):
        """RPC that inform the worker that its result for the current
        action will be discarded. The worker will try to return as
        soon as possible even if this means that the result are
        inconsistent.

        """
        # We inform the task_type to quit as soon as possible.
        logger.info("Trying to interrupt job as requested.")
        try:
            self.task_type.ignore_job = True
        except AttributeError:
            pass  # Job concluded right under our nose, that's ok too.

    # FIXME - rpc_threaded is disable because it makes the call fail:
    # we should investigate on this
    @rpc_method
    @rpc_threaded
    def precache_files(self, contest_id):
        """RPC to ask the worker to precache of files in the contest.

        contest_id (int): the id of the contest

        """
        # Lock is not needed if the admins correctly placed cache and
        # temp directories in the same filesystem. This is what
        # usually happens since they are children of the same,
        # cms-created, directory.
        logger.info("Precaching files for contest %d." % contest_id)
        with SessionGen(commit=False) as session:
            contest = Contest.get_from_id(contest_id, session)
            for digest in contest.enumerate_files(skip_submissions=True,
                                                  skip_user_tests=True):
                self.file_cacher.get_file(digest)
        logger.info("Precaching finished.")

    @rpc_method
    @rpc_threaded
    def execute_job(self, job_dict):
        job = Job.import_from_dict_with_type(job_dict)

        if self.work_lock.acquire(False):

            try:
                logger.operation = "job '%s'" % (job.info)
                logger.info("Request received")
                job.shard = self.shard

                self.task_type = get_task_type(job, self.file_cacher)
                self.task_type.execute_job()
                logger.info("Request finished.")

                return job.export_to_dict()

            except:
                err_msg = "Worker failed on operation `%s'" % logger.operation
                logger.error("%s\n%s" % (err_msg, traceback.format_exc()))
                raise JobException(err_msg)

            finally:
                self.task_type = None
                self.session = None
                logger.operation = ""
                self.work_lock.release()

        else:
            err_msg = "Request '%s' received, " \
                "but declined because of acquired lock" % \
                (job.info)
            logger.warning(err_msg)
            raise JobException(err_msg)
예제 #28
0
class ContestExporter:
    """This service exports every data about the contest that CMS
    knows. The process of exporting and importing again should be
    idempotent.

    """
    def __init__(self, contest_id, export_target, skip_submissions,
                 skip_user_tests, light):
        self.contest_id = contest_id
        self.skip_submissions = skip_submissions
        self.skip_user_tests = skip_user_tests
        self.light = light

        # If target is not provided, we use the contest's name.
        if export_target == "":
            with SessionGen(commit=False) as session:
                contest = Contest.get_from_id(self.contest_id, session)
                self.export_target = "dump_%s.tar.gz" % contest.name
        else:
            self.export_target = export_target

        self.file_cacher = FileCacher()

    def run(self):
        """Interface to make the class do its job."""
        return self.do_export()

    def do_export(self):
        """Run the actual export code.

        """
        logger.operation = "exporting contest %d" % self.contest_id
        logger.info("Starting export.")

        export_dir = self.export_target
        archive_info = get_archive_info(self.export_target)

        if archive_info["write_mode"] != "":
            # We are able to write to this archive.
            if os.path.exists(self.export_target):
                logger.critical("The specified file already exists, "
                                "I won't overwrite it.")
                return False
            export_dir = os.path.join(tempfile.mkdtemp(),
                                      archive_info["basename"])

        logger.info("Creating dir structure.")
        try:
            os.mkdir(export_dir)
        except OSError:
            logger.critical("The specified directory already exists, "
                            "I won't overwrite it.")
            return False

        files_dir = os.path.join(export_dir, "files")
        descr_dir = os.path.join(export_dir, "descriptions")
        os.mkdir(files_dir)
        os.mkdir(descr_dir)

        with SessionGen(commit=False) as session:

            contest = Contest.get_from_id(self.contest_id, session)

            # Export files.
            logger.info("Exporting files.")
            files = contest.enumerate_files(self.skip_submissions,
                                            self.skip_user_tests,
                                            light=self.light)
            for _file in files:
                if not self.safe_get_file(_file, os.path.join(
                        files_dir, _file), os.path.join(descr_dir, _file)):
                    return False

            # Export the contest in JSON format.
            logger.info("Exporting the contest in JSON format.")
            with open(os.path.join(export_dir, "contest.json"), 'w') as fout:
                json.dump(contest.export_to_dict(self.skip_submissions,
                                                 self.skip_user_tests),
                          fout,
                          indent=4)

        # If the admin requested export to file, we do that.
        if archive_info["write_mode"] != "":
            archive = tarfile.open(self.export_target,
                                   archive_info["write_mode"])
            archive.add(export_dir, arcname=archive_info["basename"])
            archive.close()
            shutil.rmtree(export_dir)

        logger.info("Export finished.")
        logger.operation = ""

        return True

    def safe_get_file(self, digest, path, descr_path=None):
        """Get file from FileCacher ensuring that the digest is
        correct.

        digest (string): the digest of the file to retrieve.
        path (string): the path where to save the file.
        descr_path (string): the path where to save the description.

        return (bool): True if all ok, False if something wrong.

        """
        # First get the file
        try:
            self.file_cacher.get_file(digest, path=path)
        except Exception as error:
            logger.error("File %s could not retrieved from file server (%r)." %
                         (digest, error))
            return False

        # Then check the digest
        calc_digest = sha1sum(path)
        if digest != calc_digest:
            logger.critical("File %s has wrong hash %s." %
                            (digest, calc_digest))
            return False

        # If applicable, retrieve also the description
        if descr_path is not None:
            with codecs.open(descr_path, 'w', encoding='utf-8') as fout:
                fout.write(self.file_cacher.describe(digest))

        return True
예제 #29
0
파일: Test.py 프로젝트: Mloc/cms
def test_testcases(base_dir, soluzione, assume=None):
    global task, file_cacher

    # Use a FileCacher with a NullBackend in order to avoid to fill
    # the database with junk
    if file_cacher is None:
        file_cacher = FileCacher(null=True)

    # Load the task
    # TODO - This implies copying a lot of data to the FileCacher,
    # which is annoying if you have to do it continuously; it would be
    # better to use a persistent cache (although local, possibly
    # filesystem-based instead of database-based) and somehow detect
    # when the task has already been loaded
    if task is None:
        loader = YamlLoader(
            os.path.realpath(os.path.join(base_dir, "..")),
            file_cacher)
        # Normally we should import the contest before, but YamlLoader
        # accepts get_task() even without previous get_contest() calls
        task = loader.get_task(os.path.split(os.path.realpath(base_dir))[1])

    # Prepare the EvaluationJob
    dataset = task.active_dataset
    digest = file_cacher.put_file(
        path=os.path.join(base_dir, soluzione),
        description="Solution %s for task %s" % (soluzione, task.name))
    executables = {task.name: Executable(filename=task.name, digest=digest)}
    job = EvaluationJob(
        task_type=dataset.task_type,
        task_type_parameters=json.loads(dataset.task_type_parameters),
        managers=dict(dataset.managers),
        executables=executables,
        testcases=dict((t.num, Testcase(t.input, t.output))
                       for t in dataset.testcases),
        time_limit=dataset.time_limit,
        memory_limit=dataset.memory_limit)
    tasktype = get_task_type(job, file_cacher)

    ask_again = True
    last_status = "ok"
    status = "ok"
    stop = False
    info = []
    points = []
    comments = []
    for i in job.testcases.keys():
        print i,
        sys.stdout.flush()

        # Skip the testcase if we decide to consider everything to
        # timeout
        if stop:
            info.append("Time limit exceeded")
            points.append(0.0)
            comments.append("Timeout.")
            continue

        # Evaluate testcase
        last_status = status
        tasktype.evaluate_testcase(i)
        # print job.evaluations[i]
        status = job.evaluations[i]["plus"]["exit_status"]
        info.append("Time: %5.3f   Wall: %5.3f   Memory: %s" %
                    (job.evaluations[i]["plus"]["execution_time"],
                    job.evaluations[i]["plus"]["execution_wall_clock_time"],
                    mem_human(job.evaluations[i]["plus"]["memory_used"])))
        points.append(float(job.evaluations[i]["outcome"]))
        comments.append(job.evaluations[i]["text"])

        # If we saw two consecutive timeouts, ask wether we want to
        # consider everything to timeout
        if ask_again and status == "timeout" and last_status == "timeout":
            print
            print "Want to stop and consider everything to timeout? [y/N]",
            if assume is not None:
                print assume
                tmp = assume
            else:
                tmp = raw_input().lower()
            if tmp in ['y', 'yes']:
                stop = True
            else:
                ask_again = False

    # Result pretty printing
    print
    clen = max(len(c) for c in comments)
    ilen = max(len(i) for i in info)
    for i, (p, c, b) in enumerate(zip(points, comments, info)):
        print "%3d) %5.2lf --- %s [%s]" % (i, p, c.ljust(clen), b.center(ilen))

    return zip(points, comments, info)
예제 #30
0
class SpoolExporter:
    """This service creates a tree structure "similar" to the one used
    in Italian IOI repository for storing the results of a contest.

    """
    def __init__(self, contest_id, spool_dir):
        self.contest_id = contest_id
        self.spool_dir = spool_dir
        self.upload_dir = os.path.join(self.spool_dir, "upload")
        self.contest = None
        self.submissions = None

        self.file_cacher = FileCacher()

    def run(self):
        """Interface to make the class do its job."""
        return self.do_export()

    def do_export(self):
        """Run the actual export code.

        """
        logger.operation = "exporting contest %s" % self.contest_id
        logger.info("Starting export.")

        logger.info("Creating dir structure.")
        try:
            os.mkdir(self.spool_dir)
        except OSError:
            logger.error("The specified directory already exists, "
                         "I won't overwrite it.")
            return False
        os.mkdir(self.upload_dir)

        with SessionGen(commit=False) as session:
            self.contest = Contest.get_from_id(self.contest_id, session)
            self.submissions = sorted(
                (submission for submission in self.contest.get_submissions()
                 if not submission.user.hidden),
                key=lambda submission: submission.timestamp)

            # Creating users' directory.
            for user in self.contest.users:
                if not user.hidden:
                    os.mkdir(os.path.join(self.upload_dir, user.username))

            try:
                self.export_submissions()
                self.export_ranking()
            except Exception as error:
                logger.error("Generic error. %r" % error)
                return False

        logger.info("Export finished.")
        logger.operation = ""

        return True

    def export_submissions(self):
        """Export submissions' source files.

        """
        logger.info("Exporting submissions.")

        queue_file = codecs.open(os.path.join(self.spool_dir, "queue"),
                                 "w",
                                 encoding="utf-8")
        for submission in self.submissions:
            logger.info("Exporting submission %s." % submission.id)
            username = submission.user.username
            task = submission.task.name
            timestamp = submission.timestamp

            # Get source files to the spool directory.
            file_digest = submission.files["%s.%s" % (task, "%l")].digest
            upload_filename = os.path.join(
                self.upload_dir, username,
                "%s.%d.%s" % (task, timestamp, submission.language))
            self.file_cacher.get_file(file_digest, path=upload_filename)
            upload_filename = os.path.join(
                self.upload_dir, username,
                "%s.%s" % (task, submission.language))
            self.file_cacher.get_file(file_digest, path=upload_filename)
            print >> queue_file, "./upload/%s/%s.%d.%s" % \
                (username, task, timestamp, submission.language)

            # Write results file for the submission.
            if submission.evaluated():
                res_file = codecs.open(os.path.join(
                    self.spool_dir, "%d.%s.%s.%s.res" %
                    (timestamp, username, task, submission.language)),
                                       "w",
                                       encoding="utf-8")
                res2_file = codecs.open(os.path.join(
                    self.spool_dir,
                    "%s.%s.%s.res" % (username, task, submission.language)),
                                        "w",
                                        encoding="utf-8")
                total = 0.0
                for num, evaluation in enumerate(submission.evaluations):
                    outcome = float(evaluation.outcome)
                    total += outcome
                    line = "Executing on file n. %2d %s (%.4f)" % \
                        (num, evaluation.text, outcome)
                    print >> res_file, line
                    print >> res2_file, line
                line = "Score: %.6f" % total
                print >> res_file, line
                print >> res2_file, line
                res_file.close()
                res2_file.close()

        print >> queue_file
        queue_file.close()

    def export_ranking(self):
        """Exports the ranking in csv and txt (human-readable) form.

        """
        logger.info("Exporting ranking.")

        # Create the structure to store the scores.
        scores = dict((user.username, 0.0) for user in self.contest.users
                      if not user.hidden)
        task_scores = dict((task.id,
                            dict((user.username, 0.0)
                                 for user in self.contest.users
                                 if not user.hidden))
                           for task in self.contest.tasks)
        last_scores = dict((task.id,
                            dict((user.username, 0.0)
                                 for user in self.contest.users
                                 if not user.hidden))
                           for task in self.contest.tasks)

        # Make the score type compute the scores.
        scorers = {}
        for task in self.contest.tasks:
            scorers[task.id] = get_score_type(task=task)

        for submission in self.submissions:
            scorers[submission.task_id].add_submission(
                submission.id, submission.timestamp, submission.user.username,
                dict((ev.num, float(ev.outcome))
                     for ev in submission.evaluations), submission.tokened())

        # Put together all the scores.
        for submission in self.submissions:
            task_id = submission.task_id
            username = submission.user.username
            details = scorers[task_id].pool[submission.id]
            last_scores[task_id][username] = details["score"]
            if details["tokened"]:
                task_scores[task_id][username] = max(
                    task_scores[task_id][username], details["score"])

        # Merge tokened and last submissions.
        for username in scores:
            for task_id in task_scores:
                task_scores[task_id][username] = max(
                    task_scores[task_id][username],
                    last_scores[task_id][username])
            print username, [
                task_scores[task_id][username] for task_id in task_scores
            ]
            scores[username] = sum(task_scores[task_id][username]
                                   for task_id in task_scores)

        sorted_usernames = sorted(scores.keys(),
                                  key=lambda username:
                                  (scores[username], username),
                                  reverse=True)
        sorted_tasks = sorted(self.contest.tasks, key=lambda task: task.num)

        ranking_file = codecs.open(os.path.join(self.spool_dir,
                                                "classifica.txt"),
                                   "w",
                                   encoding="utf-8")
        ranking_csv = codecs.open(os.path.join(self.spool_dir,
                                               "classifica.csv"),
                                  "w",
                                  encoding="utf-8")

        # Write rankings' header.
        n_tasks = len(sorted_tasks)
        print >> ranking_file, "Classifica finale del contest `%s'" % \
            self.contest.description
        points_line = " %10s" * n_tasks
        csv_points_line = ",%s" * n_tasks
        print >> ranking_file, ("%20s %10s" % ("Utente", "Totale")) + \
              (points_line % tuple([t.name for t in sorted_tasks]))
        print >> ranking_csv, ("%s,%s" % ("utente", "totale")) + \
              (csv_points_line % tuple([t.name for t in sorted_tasks]))

        # Write rankings' content.
        points_line = " %10.3f" * n_tasks
        csv_points_line = ",%.6f" * n_tasks
        for username in sorted_usernames:
            user_scores = [
                task_scores[task.id][username] for task in sorted_tasks
            ]
            print >> ranking_file, ("%20s %10.3f" % (username,
                                                     scores[username])) + \
                  (points_line % tuple(user_scores))
            print >> ranking_csv, ("%s,%.6f" % (username,
                                                scores[username])) + \
                  (csv_points_line % tuple(user_scores))

        ranking_file.close()
        ranking_csv.close()
예제 #31
0
파일: Worker.py 프로젝트: s546360316/cms
class Worker(Service):
    """This service implement the possibility to compile and evaluate
    submissions in a sandbox. The instructions to follow for the
    operations are in the TaskType classes, while the sandbox is in
    the Sandbox module.

    """

    JOB_TYPE_COMPILATION = "compile"
    JOB_TYPE_EVALUATION = "evaluate"

    def __init__(self, shard):
        logger.initialize(ServiceCoord("Worker", shard))
        Service.__init__(self, shard, custom_logger=logger)
        self.file_cacher = FileCacher(self)

        self.work_lock = threading.Lock()
        self.ignore_job = False

    @rpc_method
    def ignore_job(self):
        """RPC that inform the worker that its result for the current
        action will be discarded. The worker will try to return as
        soon as possible even if this means that the result are
        inconsistent.

        """
        # We remember to quit as soon as possible.
        logger.info("Trying to interrupt job as requested.")
        self.ignore_job = True

    # FIXME - rpc_threaded is disable because it makes the call fail:
    # we should investigate on this
    @rpc_method
    @rpc_threaded
    def precache_files(self, contest_id):
        """RPC to ask the worker to precache of files in the contest.

        contest_id (int): the id of the contest

        """
        # Lock is not needed if the admins correctly placed cache and
        # temp directories in the same filesystem. This is what
        # usually happens since they are children of the same,
        # cms-created, directory.
        logger.info("Precaching files for contest %d." % contest_id)
        with SessionGen(commit=False) as session:
            contest = Contest.get_from_id(contest_id, session)
            for digest in contest.enumerate_files(skip_submissions=True,
                                                  skip_user_tests=True):
                self.file_cacher.get_file(digest)
        logger.info("Precaching finished.")

    @rpc_method
    @rpc_threaded
    def execute_job_group(self, job_group_dict):
        job_group = JobGroup.import_from_dict(job_group_dict)

        if self.work_lock.acquire(False):

            try:
                self.ignore_job = False

                for k, job in job_group.jobs.iteritems():
                    logger.operation = "job '%s'" % (job.info)
                    logger.info("Request received")

                    job.shard = self.shard

                    # FIXME This is actually kind of a workaround...
                    # The only TaskType that needs it is OutputOnly.
                    job._key = k

                    # FIXME We're creating a new TaskType for each Job
                    # even if, at the moment, a JobGroup always uses
                    # the same TaskType and the same parameters. Yet,
                    # this could change in the future, so the best
                    # solution is to keep a cache of TaskTypes objects
                    # (like ScoringService does with ScoreTypes, except
                    # that we cannot index by Dataset ID here...).
                    task_type = get_task_type(job.task_type,
                                              job.task_type_parameters)
                    task_type.execute_job(job, self.file_cacher)

                    logger.info("Request finished.")

                    if not job.success or self.ignore_job:
                        job_group.success = False
                        break
                else:
                    job_group.success = True

                return job_group.export_to_dict()

            except:
                err_msg = "Worker failed on operation `%s'" % logger.operation
                logger.error("%s\n%s" % (err_msg, traceback.format_exc()))
                raise JobException(err_msg)

            finally:
                logger.operation = ""
                self.work_lock.release()

        else:
            err_msg = "Request '%s' received, " \
                "but declined because of acquired lock" % \
                (job.info)
            logger.warning(err_msg)
            raise JobException(err_msg)