예제 #1
0
class ContestExporter:
    """This service exports every data about the contest that CMS
    knows. The process of exporting and importing again should be
    idempotent.

    """
    def __init__(self, contest_id, export_target, skip_submissions,
                 skip_user_tests, light):
        self.contest_id = contest_id
        self.skip_submissions = skip_submissions
        self.skip_user_tests = skip_user_tests
        self.light = light

        # If target is not provided, we use the contest's name.
        if export_target == "":
            with SessionGen(commit=False) as session:
                contest = Contest.get_from_id(self.contest_id, session)
                self.export_target = "dump_%s.tar.gz" % contest.name
        else:
            self.export_target = export_target

        self.file_cacher = FileCacher()

    def run(self):
        """Interface to make the class do its job."""
        return self.do_export()

    def do_export(self):
        """Run the actual export code.

        """
        logger.operation = "exporting contest %d" % self.contest_id
        logger.info("Starting export.")

        export_dir = self.export_target
        archive_info = get_archive_info(self.export_target)

        if archive_info["write_mode"] != "":
            # We are able to write to this archive.
            if os.path.exists(self.export_target):
                logger.critical("The specified file already exists, "
                                "I won't overwrite it.")
                return False
            export_dir = os.path.join(tempfile.mkdtemp(),
                                      archive_info["basename"])

        logger.info("Creating dir structure.")
        try:
            os.mkdir(export_dir)
        except OSError:
            logger.critical("The specified directory already exists, "
                            "I won't overwrite it.")
            return False

        files_dir = os.path.join(export_dir, "files")
        descr_dir = os.path.join(export_dir, "descriptions")
        os.mkdir(files_dir)
        os.mkdir(descr_dir)

        with SessionGen(commit=False) as session:

            contest = Contest.get_from_id(self.contest_id, session)

            # Export files.
            logger.info("Exporting files.")
            files = contest.enumerate_files(self.skip_submissions,
                                            self.skip_user_tests,
                                            light=self.light)
            for _file in files:
                if not self.safe_get_file(_file, os.path.join(
                        files_dir, _file), os.path.join(descr_dir, _file)):
                    return False

            # Export the contest in JSON format.
            logger.info("Exporting the contest in JSON format.")

            self.ids = {contest: "0"}
            self.queue = [contest]

            data = dict()
            i = 0
            while i < len(self.queue):
                obj = self.queue[i]
                data[self.ids[obj]] = self.export_object(obj)
                i += 1

            def maybe_sort_numerically(x):
                try:
                    if isinstance(x, tuple) or isinstance(x, list):
                        x = x[0]
                    x = int(x)
                except:
                    pass
                return x

            with open(os.path.join(export_dir, "contest.json"), 'w') as fout:
                json.dump(data,
                          fout,
                          indent=4,
                          sort_keys=True,
                          item_sort_key=maybe_sort_numerically)

        # If the admin requested export to file, we do that.
        if archive_info["write_mode"] != "":
            archive = tarfile.open(self.export_target,
                                   archive_info["write_mode"])
            archive.add(export_dir, arcname=archive_info["basename"])
            archive.close()
            shutil.rmtree(export_dir)

        logger.info("Export finished.")
        logger.operation = ""

        return True

    def get_id(self, obj):
        if obj not in self.ids:
            # We use strings because they'll be the keys of a JSON object
            self.ids[obj] = str(len(self.ids))
            self.queue.append(obj)

        return self.ids[obj]

    def export_object(self, obj):
        """Export the given object, returning a JSON-encodable dict

        The returned dict will contain a "_class" item (the name of the
        class of the given object), an item for each column property
        (with a value properly translated to a JSON-compatible type)
        and an item for each relationship property (which will be an ID
        or a collection of IDs).

        The IDs used in the exported dict aren't related to the ones
        used in the DB: they are newly generated and their scope is
        limited to the exported file only. They are shared among all
        classes (that is, two objects can never share the same ID, even
        if they are of different classes). The contest will have ID 0.

        If, when exporting the relationship, we find an object without
        an ID we generate a new ID, assign it to the object and append
        the object to the queue of objects to export.

        The self.skip_submissions flag controls wheter we export
        submissions (and all other objects that can be reached only by
        passing through a submission) or not.

        """
        cls = type(obj)

        data = {"_class": cls.__name__}

        for prp in cls._col_props:
            col = prp.columns[0]
            col_type = type(col.type)

            val = getattr(obj, prp.key)
            if col_type in [Boolean, Integer, Float, String]:
                data[prp.key] = val
            elif col_type is DateTime:
                data[prp.key] = make_timestamp(
                    val) if val is not None else None
            elif col_type is Interval:
                data[
                    prp.key] = val.total_seconds() if val is not None else None
            else:
                raise RuntimeError("Unknown SQLAlchemy column type: %s" %
                                   col_type)

        for prp in cls._rel_props:
            other_cls = prp.mapper.class_

            # Skip submissions if requested
            if self.skip_submissions and other_cls is Submission:
                continue

            val = getattr(obj, prp.key)
            if val is None:
                data[prp.key] = None
            elif isinstance(val, other_cls):
                data[prp.key] = self.get_id(val)
            elif isinstance(val, list):
                data[prp.key] = list(self.get_id(i) for i in val)
            elif isinstance(val, dict):
                data[prp.key] = dict(
                    (k, self.get_id(v)) for k, v in val.iteritems())
            else:
                raise RuntimeError(
                    "Unknown SQLAlchemy relationship type on %s: %s" %
                    (prp.key, type(val)))

        return data

    def safe_get_file(self, digest, path, descr_path=None):
        """Get file from FileCacher ensuring that the digest is
        correct.

        digest (string): the digest of the file to retrieve.
        path (string): the path where to save the file.
        descr_path (string): the path where to save the description.

        return (bool): True if all ok, False if something wrong.

        """
        # First get the file
        try:
            self.file_cacher.get_file(digest, path=path)
        except Exception as error:
            logger.error("File %s could not retrieved from file server (%r)." %
                         (digest, error))
            return False

        # Then check the digest
        calc_digest = sha1sum(path)
        if digest != calc_digest:
            logger.critical("File %s has wrong hash %s." %
                            (digest, calc_digest))
            return False

        # If applicable, retrieve also the description
        if descr_path is not None:
            with codecs.open(descr_path, 'w', encoding='utf-8') as fout:
                fout.write(self.file_cacher.describe(digest))

        return True
예제 #2
0
class ContestExporter:
    """This service exports every data about the contest that CMS
    knows. The process of exporting and importing again should be
    idempotent.

    """
    def __init__(self, contest_id, dump, export_target, skip_submissions,
                 light):
        self.contest_id = contest_id
        self.dump = dump
        self.skip_submissions = skip_submissions
        self.light = light

        # If target is not provided, we use the contest's name.
        if export_target == "":
            with SessionGen(commit=False) as session:
                contest = Contest.get_from_id(self.contest_id, session)
                self.export_target = "dump_%s.tar.gz" % contest.name
        else:
            self.export_target = export_target

        self.file_cacher = FileCacher()

    def run(self):
        """Interface to make the class do its job."""
        return self.do_export()

    def do_export(self):
        """Run the actual export code.

        """
        logger.operation = "exporting contest %d" % self.contest_id
        logger.info("Starting export.")

        export_dir = self.export_target
        archive_info = get_archive_info(self.export_target)

        if archive_info["write_mode"] != "":
            # We are able to write to this archive.
            if os.path.exists(self.export_target):
                logger.error("The specified file already exists, "
                             "I won't overwrite it.")
                return False
            export_dir = os.path.join(tempfile.mkdtemp(),
                                      archive_info["basename"])

        logger.info("Creating dir structure.")
        try:
            os.mkdir(export_dir)
        except OSError:
            logger.error("The specified directory already exists, "
                         "I won't overwrite it.")
            return False

        files_dir = os.path.join(export_dir, "files")
        descr_dir = os.path.join(export_dir, "descriptions")
        os.mkdir(files_dir)
        os.mkdir(descr_dir)

        with SessionGen(commit=False) as session:

            contest = Contest.get_from_id(self.contest_id, session)

            # Export files.
            logger.info("Exporting files.")
            files = contest.enumerate_files(self.skip_submissions,
                                            light=self.light)
            for _file in files:
                if not self.safe_get_file(_file,
                                          os.path.join(files_dir, _file),
                                          os.path.join(descr_dir, _file)):
                    return False

            # Export the contest in JSON format.
            logger.info("Exporting the contest in JSON format.")
            with open(os.path.join(export_dir, "contest.json"), 'w') as fout:
                json.dump(contest.export_to_dict(self.skip_submissions),
                          fout, indent=4)

        if self.dump:
            if not self.dump_database(export_dir):
                return False

        # If the admin requested export to file, we do that.
        if archive_info["write_mode"] != "":
            archive = tarfile.open(self.export_target,
                                   archive_info["write_mode"])
            archive.add(export_dir, arcname=archive_info["basename"])
            archive.close()
            shutil.rmtree(export_dir)

        logger.info("Export finished.")
        logger.operation = ""

        return True

    def dump_database(self, export_dir):
        """Dump the whole database. This is never used; however, this
        part is retained for historical reasons.

        """
        # Warning: this part depends on the specific database used.
        logger.info("Dumping SQL database.")
        (engine, connection) = config.database.split(':', 1)
        db_exportfile = os.path.join(export_dir, "database_dump.sql")

        # Export procedure for PostgreSQL.
        if engine == 'postgresql':
            db_regex = re.compile('//(\w*):(\w*)@(\w*)/(\w*)')
            db_match = db_regex.match(connection)
            if db_match is not None:
                username, password, host, database = db_match.groups()
                os.environ['PGPASSWORD'] = password
                export_res = os.system('pg_dump -h %s -U %s -w %s -x " \
                    "--attribute-inserts > %s' % (host, username, database,
                                                  db_exportfile))
                del os.environ['PGPASSWORD']
                if export_res != 0:
                    logger.critical("Database export failed.")
                    return False
            else:
                logger.critical("Cannot obtain parameters for "
                                "database connection.")
                return False

        # Export procedure for SQLite.
        elif engine == 'sqlite':
            db_regex = re.compile('///(.*)')
            db_match = db_regex.match(connection)
            if db_match is not None:
                dbfile, = db_match.groups()
                export_res = os.system('sqlite3 %s .dump > %s' %
                                       (dbfile, db_exportfile))
                if export_res != 0:
                    logger.critical("Database export failed.")
                    return False
            else:
                logger.critical("Cannot obtain parameters for "
                                "database connection.")
                return False

        else:
            logger.critical("Database engine not supported. :-(")
            return False

        return True

    def safe_get_file(self, digest, path, descr_path=None):
        """Get file from FileCacher ensuring that the digest is
        correct.

        digest (string): the digest of the file to retrieve.
        path (string): the path where to save the file.
        descr_path (string): the path where to save the description.

        return (bool): True if all ok, False if something wrong.

        """
        # First get the file
        try:
            self.file_cacher.get_file(digest, path=path)
        except Exception as error:
            logger.error("File %s could not retrieved from file server (%r)." %
                         (digest, error))
            return False

        # Then check the digest
        calc_digest = sha1sum(path)
        if digest != calc_digest:
            logger.error("File %s has wrong hash %s." % (digest, calc_digest))
            return False

        # If applicable, retrieve also the description
        if descr_path is not None:
            with open(descr_path, 'w') as fout:
                fout.write(self.file_cacher.describe(digest))

        return True
class ContestExporter:
    """This service exports every data about the contest that CMS
    knows. The process of exporting and importing again should be
    idempotent.

    """
    def __init__(self, contest_id, export_target,
                 skip_submissions, skip_user_tests, light):
        self.contest_id = contest_id
        self.skip_submissions = skip_submissions
        self.skip_user_tests = skip_user_tests
        self.light = light

        # If target is not provided, we use the contest's name.
        if export_target == "":
            with SessionGen(commit=False) as session:
                contest = Contest.get_from_id(self.contest_id, session)
                self.export_target = "dump_%s.tar.gz" % contest.name
        else:
            self.export_target = export_target

        self.file_cacher = FileCacher()

    def run(self):
        """Interface to make the class do its job."""
        return self.do_export()

    def do_export(self):
        """Run the actual export code.

        """
        logger.operation = "exporting contest %d" % self.contest_id
        logger.info("Starting export.")

        export_dir = self.export_target
        archive_info = get_archive_info(self.export_target)

        if archive_info["write_mode"] != "":
            # We are able to write to this archive.
            if os.path.exists(self.export_target):
                logger.error("The specified file already exists, "
                             "I won't overwrite it.")
                return False
            export_dir = os.path.join(tempfile.mkdtemp(),
                                      archive_info["basename"])

        logger.info("Creating dir structure.")
        try:
            os.mkdir(export_dir)
        except OSError:
            logger.error("The specified directory already exists, "
                         "I won't overwrite it.")
            return False

        files_dir = os.path.join(export_dir, "files")
        descr_dir = os.path.join(export_dir, "descriptions")
        os.mkdir(files_dir)
        os.mkdir(descr_dir)

        with SessionGen(commit=False) as session:

            contest = Contest.get_from_id(self.contest_id, session)

            # Export files.
            logger.info("Exporting files.")
            files = contest.enumerate_files(self.skip_submissions,
                                            self.skip_user_tests,
                                            light=self.light)
            for _file in files:
                if not self.safe_get_file(_file,
                                          os.path.join(files_dir, _file),
                                          os.path.join(descr_dir, _file)):
                    return False

            # Export the contest in JSON format.
            logger.info("Exporting the contest in JSON format.")
            with open(os.path.join(export_dir, "contest.json"), 'w') as fout:
                json.dump(contest.export_to_dict(
                        self.skip_submissions,
                        self.skip_user_tests),
                          fout, indent=4)

        # If the admin requested export to file, we do that.
        if archive_info["write_mode"] != "":
            archive = tarfile.open(self.export_target,
                                   archive_info["write_mode"])
            archive.add(export_dir, arcname=archive_info["basename"])
            archive.close()
            shutil.rmtree(export_dir)

        logger.info("Export finished.")
        logger.operation = ""

        return True

    def safe_get_file(self, digest, path, descr_path=None):
        """Get file from FileCacher ensuring that the digest is
        correct.

        digest (string): the digest of the file to retrieve.
        path (string): the path where to save the file.
        descr_path (string): the path where to save the description.

        return (bool): True if all ok, False if something wrong.

        """
        # First get the file
        try:
            self.file_cacher.get_file(digest, path=path)
        except Exception as error:
            logger.error("File %s could not retrieved from file server (%r)." %
                         (digest, error))
            return False

        # Then check the digest
        calc_digest = sha1sum(path)
        if digest != calc_digest:
            logger.error("File %s has wrong hash %s." % (digest, calc_digest))
            return False

        # If applicable, retrieve also the description
        if descr_path is not None:
            with codecs.open(descr_path, 'w', encoding='utf-8') as fout:
                fout.write(self.file_cacher.describe(digest))

        return True
예제 #4
0
파일: ContestExporter.py 프로젝트: Mloc/cms
class ContestExporter:

    """This service exports every data about the contest that CMS
    knows. The process of exporting and importing again should be
    idempotent.

    """

    def __init__(self, contest_id, export_target,
                 dump_files, dump_model, light,
                 skip_submissions, skip_user_tests):
        self.contest_id = contest_id
        self.dump_files = dump_files
        self.dump_model = dump_model
        self.light = light
        self.skip_submissions = skip_submissions
        self.skip_user_tests = skip_user_tests

        # If target is not provided, we use the contest's name.
        if export_target == "":
            with SessionGen(commit=False) as session:
                contest = Contest.get_from_id(self.contest_id, session)
                self.export_target = "dump_%s.tar.gz" % contest.name
                logger.warning("export_target not given, using \"%s\""
                               % self.export_target)
        else:
            self.export_target = export_target

        self.file_cacher = FileCacher()

    def do_export(self):
        """Run the actual export code."""
        logger.operation = "exporting contest %d" % self.contest_id
        logger.info("Starting export.")

        export_dir = self.export_target
        archive_info = get_archive_info(self.export_target)

        if archive_info["write_mode"] != "":
            # We are able to write to this archive.
            if os.path.exists(self.export_target):
                logger.critical("The specified file already exists, "
                                "I won't overwrite it.")
                return False
            export_dir = os.path.join(tempfile.mkdtemp(),
                                      archive_info["basename"])

        logger.info("Creating dir structure.")
        try:
            os.mkdir(export_dir)
        except OSError:
            logger.critical("The specified directory already exists, "
                            "I won't overwrite it.")
            return False

        files_dir = os.path.join(export_dir, "files")
        descr_dir = os.path.join(export_dir, "descriptions")
        os.mkdir(files_dir)
        os.mkdir(descr_dir)

        with SessionGen(commit=False) as session:

            contest = Contest.get_from_id(self.contest_id, session)

            # Export files.
            if self.dump_files:
                logger.info("Exporting files.")
                files = contest.enumerate_files(self.skip_submissions,
                                                self.skip_user_tests,
                                                self.light)
                for file_ in files:
                    if not self.safe_get_file(file_,
                                              os.path.join(files_dir, file_),
                                              os.path.join(descr_dir, file_)):
                        return False

            # Export the contest in JSON format.
            if self.dump_model:
                logger.info("Exporting the contest to a JSON file.")

                # We use strings because they'll be the keys of a JSON
                # object; the contest will have ID 0.
                self.ids = {contest.sa_identity_key: "0"}
                self.queue = [contest]

                data = dict()
                while len(self.queue) > 0:
                    obj = self.queue.pop(0)
                    data[self.ids[obj.sa_identity_key]] = self.export_object(obj)

                # Specify the "root" of the data graph
                data["_objects"] = ["0"]

                with io.open(os.path.join(export_dir,
                                          "contest.json"), "wb") as fout:
                    json.dump(data, fout, encoding="utf-8",
                              indent=4, sort_keys=True)

        # If the admin requested export to file, we do that.
        if archive_info["write_mode"] != "":
            archive = tarfile.open(self.export_target,
                                   archive_info["write_mode"])
            archive.add(export_dir, arcname=archive_info["basename"])
            archive.close()
            shutil.rmtree(export_dir)

        logger.info("Export finished.")
        logger.operation = ""

        return True

    def get_id(self, obj):
        obj_key = obj.sa_identity_key
        if obj_key not in self.ids:
            # We use strings because they'll be the keys of a JSON object
            self.ids[obj_key] = str(len(self.ids))
            self.queue.append(obj)

        return self.ids[obj_key]

    def export_object(self, obj):

        """Export the given object, returning a JSON-encodable dict.

        The returned dict will contain a "_class" item (the name of the
        class of the given object), an item for each column property
        (with a value properly translated to a JSON-compatible type)
        and an item for each relationship property (which will be an ID
        or a collection of IDs).

        The IDs used in the exported dict aren't related to the ones
        used in the DB: they are newly generated and their scope is
        limited to the exported file only. They are shared among all
        classes (that is, two objects can never share the same ID, even
        if they are of different classes).

        If, when exporting the relationship, we find an object without
        an ID we generate a new ID, assign it to the object and append
        the object to the queue of objects to export.

        The self.skip_submissions flag controls wheter we export
        submissions (and all other objects that can be reached only by
        passing through a submission) or not.

        """

        cls = type(obj)

        data = {"_class": cls.__name__}

        for prp in cls._col_props:
            col, = prp.columns
            col_type = type(col.type)

            val = getattr(obj, prp.key)
            if col_type in [Boolean, Integer, Float, String]:
                data[prp.key] = val
            elif col_type is DateTime:
                data[prp.key] = \
                    make_timestamp(val) if val is not None else None
            elif col_type is Interval:
                data[prp.key] = \
                    val.total_seconds() if val is not None else None
            else:
                raise RuntimeError("Unknown SQLAlchemy column type: %s"
                                   % col_type)

        for prp in cls._rel_props:
            other_cls = prp.mapper.class_

            # Skip submissions if requested
            if self.skip_submissions and other_cls is Submission:
                continue

            # Skip user_tests if requested
            if self.skip_user_tests and other_cls is UserTest:
                continue

            val = getattr(obj, prp.key)
            if val is None:
                data[prp.key] = None
            elif isinstance(val, other_cls):
                data[prp.key] = self.get_id(val)
            elif isinstance(val, list):
                data[prp.key] = list(self.get_id(i) for i in val)
            elif isinstance(val, dict):
                data[prp.key] = \
                    dict((k, self.get_id(v)) for k, v in val.iteritems())
            else:
                raise RuntimeError("Unknown SQLAlchemy relationship type: %s"
                                   % type(val))

        return data

    def safe_get_file(self, digest, path, descr_path=None):

        """Get file from FileCacher ensuring that the digest is
        correct.

        digest (string): the digest of the file to retrieve.
        path (string): the path where to save the file.
        descr_path (string): the path where to save the description.

        return (bool): True if all ok, False if something wrong.

        """

        # TODO - Probably this method could be merged in FileCacher

        # First get the file
        try:
            self.file_cacher.get_file(digest, path=path)
        except Exception as error:
            logger.error("File %s could not retrieved from file server (%r)."
                         % (digest, error))
            return False

        # Then check the digest
        calc_digest = sha1sum(path)
        if digest != calc_digest:
            logger.critical("File %s has wrong hash %s."
                            % (digest, calc_digest))
            return False

        # If applicable, retrieve also the description
        if descr_path is not None:
            with io.open(descr_path, 'wt', encoding='utf-8') as fout:
                fout.write(self.file_cacher.describe(digest))

        return True