Example #1
0
    def precache_files(self, contest_id):
        """RPC to ask the worker to precache of files in the contest.

        contest_id (int): the id of the contest

        """
        # In order to avoid a long-living connection, first fetch the
        # complete list of files and then download the files; since
        # this is just pre-caching, possible race conditions are not
        # dangerous
        logger.info("Precaching files for contest %d.", contest_id)
        with SessionGen() as session:
            contest = Contest.get_from_id(contest_id, session)
            files = enumerate_files(session,
                                    contest,
                                    skip_submissions=True,
                                    skip_user_tests=True,
                                    skip_print_jobs=True)
        for digest in files:
            try:
                self.file_cacher.load(digest, if_needed=True)
            except KeyError:
                # No problem (at this stage) if we cannot find the
                # file
                pass

        logger.info("Precaching finished.")
Example #2
0
def clean_files(session, dry_run):
    filecacher = FileCacher()
    files = set(file[0] for file in filecacher.list())
    logger.info("A total number of %d files are present in the file store",
                len(files))
    found_digests = enumerate_files(session)
    logger.info("Found %d digests while scanning", len(found_digests))
    files -= found_digests
    logger.info("%d digests are orphan.", len(files))
    total_size = 0
    for orphan in files:
        total_size += filecacher.get_size(orphan)
    logger.info("Orphan files take %s bytes of disk space",
                "{:,}".format(total_size))
    if not dry_run:
        for count, orphan in enumerate(files):
            filecacher.delete(orphan)
            if count % 100 == 0:
                logger.info("%d files deleted from the file store", count)
        logger.info("All orphan files have been deleted")
Example #3
0
def clean_files(session, dry_run):
    filecacher = FileCacher()
    files = set(file[0] for file in filecacher.list())
    logger.info("A total number of %d files are present in the file store",
                len(files))
    found_digests = enumerate_files(session)
    logger.info("Found %d digests while scanning", len(found_digests))
    files -= found_digests
    logger.info("%d digests are orphan.", len(files))
    total_size = 0
    for orphan in files:
        total_size += filecacher.get_size(orphan)
    logger.info("Orphan files take %s bytes of disk space",
                "{:,}".format(total_size))
    if not dry_run:
        for count, orphan in enumerate(files):
            filecacher.delete(orphan)
            if count % 100 == 0:
                logger.info("%d files deleted from the file store", count)
        logger.info("All orphan files have been deleted")
Example #4
0
    def precache_files(self, contest_id):
        """RPC to ask the worker to precache of files in the contest.

        contest_id (int): the id of the contest

        """
        lock = self.file_cacher.precache_lock()
        if lock is None:
            # Another worker is already precaching. Hence, this worker doesn't
            # need to do anything.
            logger.info(
                "Another worker is already precaching files for "
                "contest %d.", contest_id)
            return
        with lock:
            # In order to avoid a long-living connection, first fetch the
            # complete list of files and then download the files; since
            # this is just pre-caching, possible race conditions are not
            # dangerous
            logger.info("Precaching files for contest %d.", contest_id)
            with SessionGen() as session:
                contest = Contest.get_from_id(contest_id, session)
                files = enumerate_files(session,
                                        contest,
                                        skip_submissions=True,
                                        skip_user_tests=True,
                                        skip_print_jobs=True)
            for digest in files:
                try:
                    self.file_cacher.cache_file(digest)
                except KeyError:
                    # No problem (at this stage) if we cannot find the
                    # file
                    pass

            logger.info("Precaching finished.")
Example #5
0
    def precache_files(self, contest_id):
        """RPC to ask the worker to precache of files in the contest.

        contest_id (int): the id of the contest

        """
        # In order to avoid a long-living connection, first fetch the
        # complete list of files and then download the files; since
        # this is just pre-caching, possible race conditions are not
        # dangerous
        logger.info("Precaching files for contest %d.", contest_id)
        with SessionGen() as session:
            contest = Contest.get_from_id(contest_id, session)
            files = enumerate_files(session, contest, skip_submissions=True,
                                    skip_user_tests=True, skip_print_jobs=True)
        for digest in files:
            try:
                self.file_cacher.load(digest, if_needed=True)
            except KeyError:
                # No problem (at this stage) if we cannot find the
                # file
                pass

        logger.info("Precaching finished.")
Example #6
0
    def do_export(self):
        """Run the actual export code."""
        logger.info("Starting export.")

        export_dir = self.export_target
        archive_info = get_archive_info(self.export_target)

        if archive_info["write_mode"] != "":
            # We are able to write to this archive.
            if os.path.exists(self.export_target):
                logger.critical("The specified file already exists, "
                                "I won't overwrite it.")
                return False
            export_dir = os.path.join(tempfile.mkdtemp(),
                                      archive_info["basename"])

        logger.info("Creating dir structure.")
        try:
            os.mkdir(export_dir)
        except OSError:
            logger.critical("The specified directory already exists, "
                            "I won't overwrite it.")
            return False

        files_dir = os.path.join(export_dir, "files")
        descr_dir = os.path.join(export_dir, "descriptions")
        os.mkdir(files_dir)
        os.mkdir(descr_dir)

        with SessionGen() as session:
            # Export files.
            logger.info("Exporting files.")
            if self.dump_files:
                for contest_id in self.contests_ids:
                    contest = Contest.get_from_id(contest_id, session)
                    files = enumerate_files(
                        session, contest,
                        skip_submissions=self.skip_submissions,
                        skip_user_tests=self.skip_user_tests,
                        skip_users=self.skip_users,
                        skip_print_jobs=self.skip_print_jobs,
                        skip_generated=self.skip_generated)
                    for file_ in files:
                        if not self.safe_get_file(file_,
                                                  os.path.join(files_dir,
                                                               file_),
                                                  os.path.join(descr_dir,
                                                               file_)):
                            return False

            # Export data in JSON format.
            if self.dump_model:
                logger.info("Exporting data to a JSON file.")

                # We use strings because they'll be the keys of a JSON
                # object
                self.ids = {}
                self.queue = []

                data = dict()

                for cls, lst in [(Contest, self.contests_ids),
                                 (User, self.users_ids),
                                 (Task, self.tasks_ids)]:
                    for i in lst:
                        obj = cls.get_from_id(i, session)
                        self.get_id(obj)

                # Specify the "root" of the data graph
                data["_objects"] = list(self.ids.values())

                while len(self.queue) > 0:
                    obj = self.queue.pop(0)
                    data[self.ids[obj.sa_identity_key]] = \
                        self.export_object(obj)

                data["_version"] = model_version

                destination = os.path.join(export_dir, "contest.json")
                with open(destination, "wt", encoding="utf-8") as fout:
                    json.dump(data, fout, indent=4, sort_keys=True)

        # If the admin requested export to file, we do that.
        if archive_info["write_mode"] != "":
            with tarfile.open(self.export_target,
                              archive_info["write_mode"]) as archive:
                archive.add(export_dir, arcname=archive_info["basename"])
            rmtree(export_dir)

        logger.info("Export finished.")

        return True
Example #7
0
    def do_import(self):
        """Run the actual import code."""
        logger.info("Starting import.")

        archive = None
        if Archive.is_supported(self.import_source):
            archive = Archive(self.import_source)
            self.import_dir = archive.unpack()

            file_names = os.listdir(self.import_dir)
            if len(file_names) != 1:
                logger.critical("Cannot find a root directory in %s.",
                                self.import_source)
                archive.cleanup()
                return False

            self.import_dir = os.path.join(self.import_dir, file_names[0])

        if self.drop:
            logger.info("Dropping and recreating the database.")
            try:
                if not (drop_db() and init_db()):
                    logger.critical(
                        "Unexpected error while dropping "
                        "and recreating the database.",
                        exc_info=True)
                    return False
            except Exception:
                logger.critical("Unable to access DB.", exc_info=True)
                return False

        with SessionGen() as session:

            # Import the contest in JSON format.
            if self.load_model:
                logger.info("Importing the contest from a JSON file.")

                with io.open(os.path.join(self.import_dir, "contest.json"),
                             "rb") as fin:
                    # TODO - Throughout all the code we'll assume the
                    # input is correct without actually doing any
                    # validations.  Thus, for example, we're not
                    # checking that the decoded object is a dict...
                    self.datas = json.load(fin)

                # If the dump has been exported using a data model
                # different than the current one (that is, a previous
                # one) we try to update it.
                # If no "_version" field is found we assume it's a v1.0
                # export (before the new dump format was introduced).
                dump_version = self.datas.get("_version", 0)

                if dump_version < model_version:
                    logger.warning(
                        "The dump you're trying to import has been created "
                        "by an old version of CMS (it declares data model "
                        "version %d). It may take a while to adapt it to "
                        "the current data model (which is version %d). You "
                        "can use cmsDumpUpdater to update the on-disk dump "
                        "and speed up future imports.", dump_version,
                        model_version)

                elif dump_version > model_version:
                    logger.critical(
                        "The dump you're trying to import has been created "
                        "by a version of CMS newer than this one (it "
                        "declares data model version %d) and there is no "
                        "way to adapt it to the current data model (which "
                        "is version %d). You probably need to update CMS to "
                        "handle it. It is impossible to proceed with the "
                        "importation.", dump_version, model_version)
                    return False

                else:
                    logger.info("Importing dump with data model version %d.",
                                dump_version)

                for version in range(dump_version, model_version):
                    # Update from version to version+1
                    updater = __import__(
                        "cmscontrib.updaters.update_%d" % (version + 1),
                        globals(), locals(), ["Updater"]).Updater(self.datas)
                    self.datas = updater.run()
                    self.datas["_version"] = version + 1

                assert self.datas["_version"] == model_version

                self.objs = dict()
                for id_, data in iteritems(self.datas):
                    if not id_.startswith("_"):
                        self.objs[id_] = self.import_object(data)
                for id_, data in iteritems(self.datas):
                    if not id_.startswith("_"):
                        self.add_relationships(data, self.objs[id_])

                for k, v in list(iteritems(self.objs)):

                    # Skip submissions if requested
                    if self.skip_submissions and isinstance(v, Submission):
                        del self.objs[k]

                    # Skip user_tests if requested
                    if self.skip_user_tests and isinstance(v, UserTest):
                        del self.objs[k]

                    # Skip print jobs if requested
                    if self.skip_print_jobs and isinstance(v, PrintJob):
                        del self.objs[k]

                    # Skip generated data if requested
                    if self.skip_generated and \
                            isinstance(v, (SubmissionResult, UserTestResult)):
                        del self.objs[k]

                contest_id = list()
                contest_files = set()

                # We add explicitly only the top-level objects:
                # contests, and tasks and users not contained in any
                # contest. This will add on cascade all dependent
                # objects, and not add orphaned objects (like those
                # that depended on submissions or user tests that we
                # might have removed above).
                for id_ in self.datas["_objects"]:
                    obj = self.objs[id_]
                    session.add(obj)
                    session.flush()

                    if isinstance(obj, Contest):
                        contest_id += [obj.id]
                        contest_files |= enumerate_files(
                            session,
                            obj,
                            skip_submissions=self.skip_submissions,
                            skip_user_tests=self.skip_user_tests,
                            skip_print_jobs=self.skip_print_jobs,
                            skip_generated=self.skip_generated)

                session.commit()
            else:
                contest_id = None
                contest_files = None

            # Import files.
            if self.load_files:
                logger.info("Importing files.")

                files_dir = os.path.join(self.import_dir, "files")
                descr_dir = os.path.join(self.import_dir, "descriptions")

                files = set(os.listdir(files_dir))
                descr = set(os.listdir(descr_dir))

                if not descr <= files:
                    logger.warning("Some files do not have an associated "
                                   "description.")
                if not files <= descr:
                    logger.warning("Some descriptions do not have an "
                                   "associated file.")

                if not (contest_files is None or files <= contest_files):
                    # FIXME Check if it's because this is a light import
                    # or because we're skipping submissions or user_tests
                    logger.warning("The dump contains some files that are "
                                   "not needed by the contest.")
                if not (contest_files is None or contest_files <= files):
                    # The reason for this could be that it was a light
                    # export that's not being reimported as such.
                    logger.warning("The contest needs some files that are "
                                   "not contained in the dump.")

                # Limit import to files we actually need.
                if contest_files is not None:
                    files &= contest_files

                for digest in files:
                    file_ = os.path.join(files_dir, digest)
                    desc = os.path.join(descr_dir, digest)
                    if not self.safe_put_file(file_, desc):
                        logger.critical(
                            "Unable to put file `%s' in the DB. "
                            "Aborting. Please remove the contest "
                            "from the database.", file_)
                        # TODO: remove contest from the database.
                        return False

        # Clean up, if an archive was used
        if archive is not None:
            archive.cleanup()

        if contest_id is not None:
            logger.info("Import finished (contest id: %s).",
                        ", ".join("%d" % id_ for id_ in contest_id))
        else:
            logger.info("Import finished.")

        return True
Example #8
0
    def do_import(self):
        """Run the actual import code."""
        logger.info("Starting import.")

        archive = None
        if Archive.is_supported(self.import_source):
            archive = Archive(self.import_source)
            self.import_dir = archive.unpack()

            file_names = os.listdir(self.import_dir)
            if len(file_names) != 1:
                logger.critical("Cannot find a root directory in %s.",
                                self.import_source)
                archive.cleanup()
                return False

            self.import_dir = os.path.join(self.import_dir, file_names[0])

        if self.drop:
            logger.info("Dropping and recreating the database.")
            try:
                if not (drop_db() and init_db()):
                    logger.critical("Unexpected error while dropping "
                                    "and recreating the database.",
                                    exc_info=True)
                    return False
            except Exception:
                logger.critical("Unable to access DB.", exc_info=True)
                return False

        with SessionGen() as session:

            # Import the contest in JSON format.
            if self.load_model:
                logger.info("Importing the contest from a JSON file.")

                with io.open(os.path.join(self.import_dir,
                                          "contest.json"), "rb") as fin:
                    # TODO - Throughout all the code we'll assume the
                    # input is correct without actually doing any
                    # validations.  Thus, for example, we're not
                    # checking that the decoded object is a dict...
                    self.datas = json.load(fin)

                # If the dump has been exported using a data model
                # different than the current one (that is, a previous
                # one) we try to update it.
                # If no "_version" field is found we assume it's a v1.0
                # export (before the new dump format was introduced).
                dump_version = self.datas.get("_version", 0)

                if dump_version < model_version:
                    logger.warning(
                        "The dump you're trying to import has been created "
                        "by an old version of CMS (it declares data model "
                        "version %d). It may take a while to adapt it to "
                        "the current data model (which is version %d). You "
                        "can use cmsDumpUpdater to update the on-disk dump "
                        "and speed up future imports.",
                        dump_version, model_version)

                elif dump_version > model_version:
                    logger.critical(
                        "The dump you're trying to import has been created "
                        "by a version of CMS newer than this one (it "
                        "declares data model version %d) and there is no "
                        "way to adapt it to the current data model (which "
                        "is version %d). You probably need to update CMS to "
                        "handle it. It is impossible to proceed with the "
                        "importation.", dump_version, model_version)
                    return False

                else:
                    logger.info(
                        "Importing dump with data model version %d.",
                        dump_version)

                for version in range(dump_version, model_version):
                    # Update from version to version+1
                    updater = __import__(
                        "cmscontrib.updaters.update_%d" % (version + 1),
                        globals(), locals(), ["Updater"]).Updater(self.datas)
                    self.datas = updater.run()
                    self.datas["_version"] = version + 1

                assert self.datas["_version"] == model_version

                self.objs = dict()
                for id_, data in iteritems(self.datas):
                    if not id_.startswith("_"):
                        self.objs[id_] = self.import_object(data)
                for id_, data in iteritems(self.datas):
                    if not id_.startswith("_"):
                        self.add_relationships(data, self.objs[id_])

                for k, v in list(iteritems(self.objs)):

                    # Skip submissions if requested
                    if self.skip_submissions and isinstance(v, Submission):
                        del self.objs[k]

                    # Skip user_tests if requested
                    if self.skip_user_tests and isinstance(v, UserTest):
                        del self.objs[k]

                    # Skip print jobs if requested
                    if self.skip_print_jobs and isinstance(v, PrintJob):
                        del self.objs[k]

                    # Skip generated data if requested
                    if self.skip_generated and \
                            isinstance(v, (SubmissionResult, UserTestResult)):
                        del self.objs[k]

                contest_id = list()
                contest_files = set()

                # We add explicitly only the top-level objects:
                # contests, and tasks and users not contained in any
                # contest. This will add on cascade all dependent
                # objects, and not add orphaned objects (like those
                # that depended on submissions or user tests that we
                # might have removed above).
                for id_ in self.datas["_objects"]:
                    obj = self.objs[id_]
                    session.add(obj)
                    session.flush()

                    if isinstance(obj, Contest):
                        contest_id += [obj.id]
                        contest_files |= enumerate_files(
                            session, obj,
                            skip_submissions=self.skip_submissions,
                            skip_user_tests=self.skip_user_tests,
                            skip_print_jobs=self.skip_print_jobs,
                            skip_generated=self.skip_generated)

                session.commit()
            else:
                contest_id = None
                contest_files = None

            # Import files.
            if self.load_files:
                logger.info("Importing files.")

                files_dir = os.path.join(self.import_dir, "files")
                descr_dir = os.path.join(self.import_dir, "descriptions")

                files = set(os.listdir(files_dir))
                descr = set(os.listdir(descr_dir))

                if not descr <= files:
                    logger.warning("Some files do not have an associated "
                                   "description.")
                if not files <= descr:
                    logger.warning("Some descriptions do not have an "
                                   "associated file.")

                if not (contest_files is None or files <= contest_files):
                    # FIXME Check if it's because this is a light import
                    # or because we're skipping submissions or user_tests
                    logger.warning("The dump contains some files that are "
                                   "not needed by the contest.")
                if not (contest_files is None or contest_files <= files):
                    # The reason for this could be that it was a light
                    # export that's not being reimported as such.
                    logger.warning("The contest needs some files that are "
                                   "not contained in the dump.")

                # Limit import to files we actually need.
                if contest_files is not None:
                    files &= contest_files

                for digest in files:
                    file_ = os.path.join(files_dir, digest)
                    desc = os.path.join(descr_dir, digest)
                    if not self.safe_put_file(file_, desc):
                        logger.critical("Unable to put file `%s' in the DB. "
                                        "Aborting. Please remove the contest "
                                        "from the database.", file_)
                        # TODO: remove contest from the database.
                        return False

        # Clean up, if an archive was used
        if archive is not None:
            archive.cleanup()

        if contest_id is not None:
            logger.info("Import finished (contest id: %s).",
                        ", ".join("%d" % id_ for id_ in contest_id))
        else:
            logger.info("Import finished.")

        return True
Example #9
0
    def do_export(self):
        """Run the actual export code."""
        logger.info("Starting export.")

        export_dir = self.export_target
        archive_info = get_archive_info(self.export_target)

        if archive_info["write_mode"] != "":
            # We are able to write to this archive.
            if os.path.exists(self.export_target):
                logger.critical("The specified file already exists, "
                                "I won't overwrite it.")
                return False
            export_dir = os.path.join(tempfile.mkdtemp(),
                                      archive_info["basename"])

        logger.info("Creating dir structure.")
        try:
            os.mkdir(export_dir)
        except OSError:
            logger.critical("The specified directory already exists, "
                            "I won't overwrite it.")
            return False

        files_dir = os.path.join(export_dir, "files")
        descr_dir = os.path.join(export_dir, "descriptions")
        os.mkdir(files_dir)
        os.mkdir(descr_dir)

        with SessionGen() as session:
            # Export files.
            logger.info("Exporting files.")
            if self.dump_files:
                for contest_id in self.contests_ids:
                    contest = Contest.get_from_id(contest_id, session)
                    files = enumerate_files(
                        session, contest,
                        skip_submissions=self.skip_submissions,
                        skip_user_tests=self.skip_user_tests,
                        skip_print_jobs=self.skip_print_jobs,
                        skip_generated=self.skip_generated)
                    for file_ in files:
                        if not self.safe_get_file(file_,
                                                  os.path.join(files_dir,
                                                               file_),
                                                  os.path.join(descr_dir,
                                                               file_)):
                            return False

            # Export data in JSON format.
            if self.dump_model:
                logger.info("Exporting data to a JSON file.")

                # We use strings because they'll be the keys of a JSON
                # object
                self.ids = {}
                self.queue = []

                data = dict()

                for cls, lst in [(Contest, self.contests_ids),
                                 (User, self.users_ids),
                                 (Task, self.tasks_ids)]:
                    for i in lst:
                        obj = cls.get_from_id(i, session)
                        self.get_id(obj)

                # Specify the "root" of the data graph
                data["_objects"] = list(itervalues(self.ids))

                while len(self.queue) > 0:
                    obj = self.queue.pop(0)
                    data[self.ids[obj.sa_identity_key]] = \
                        self.export_object(obj)

                data["_version"] = model_version

                destination = os.path.join(export_dir, "contest.json")
                if PY3:
                    with io.open(destination, "wt", encoding="utf-8") as fout:
                        json.dump(data, fout, indent=4, sort_keys=True)
                else:
                    with io.open(destination, "wb") as fout:
                        json.dump(data, fout, indent=4, sort_keys=True)

        # If the admin requested export to file, we do that.
        if archive_info["write_mode"] != "":
            with tarfile.open(self.export_target,
                              archive_info["write_mode"]) as archive:
                archive.add(export_dir, arcname=archive_info["basename"])
            rmtree(export_dir)

        logger.info("Export finished.")

        return True