Ejemplo n.º 1
0
def main():
    """Parse arguments and launch process.

    """
    parser = argparse.ArgumentParser(
        description="Export CMS submissions to a folder.")
    parser.add_argument("-c", "--contest-id", action="store", type=int,
                        help="id of contest (default: all contests)")
    parser.add_argument("-t", "--task-id", action="store", type=int,
                        help="id of task (default: all tasks)")
    parser.add_argument("-u", "--user-id", action="store", type=int,
                        help="id of user (default: all users)")
    parser.add_argument("-s", "--submission-id", action="store", type=int,
                        help="id of submission (default: all submissions)")
    parser.add_argument("--utf8", action="store_true",
                        help="if set, the files will be encoded in utf8"
                             " when possible")
    parser.add_argument("--add-info", action="store_true",
                        help="if set, information on the submission will"
                             " be added in the first lines of each file")
    parser.add_argument("--min-score", action="store", type=float,
                        help="ignore submissions which scored strictly"
                             " less than this (default: 0.0)",
                        default=0.0)
    parser.add_argument("--filename", action="store", type=utf8_decoder,
                        help="the filename format to use"
                             " (default: {id}.{name}.{ext})",
                        default="{id}.{name}.{ext}")
    parser.add_argument("output_dir", action="store", type=utf8_decoder,
                        help="directory where to save the submissions")

    group = parser.add_mutually_exclusive_group(required=False)
    group.add_argument("--unique", action="store_true",
                       help="if set, only the earliest best submission"
                            " will be exported for each (user, task)")
    group.add_argument("--best", action="store_true",
                       help="if set, only the best submissions will be"
                            " exported for each (user, task)")

    args = parser.parse_args()

    if args.add_info and not args.utf8:
        logger.critical("If --add-info is specified, then --utf8 must be"
                        " specified as well.")
        return 1

    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)
    if not os.path.isdir(args.output_dir):
        logger.critical("The output-dir parameter must point to a directory")
        return 1

    with SessionGen() as session:
        q = session.query(Submission)\
            .join(Submission.task)\
            .join(Submission.files)\
            .join(Submission.results)\
            .join(SubmissionResult.dataset)\
            .join(Submission.participation)\
            .join(Participation.user)\
            .filter(Dataset.id == Task.active_dataset_id)\
            .filter(SubmissionResult.score >= args.min_score)\
            .with_entities(Submission.id, Submission.language,
                           Submission.timestamp,
                           SubmissionResult.score,
                           File.filename, File.digest,
                           User.id, User.username, User.first_name,
                           User.last_name,
                           Task.id, Task.name)

        if args.contest_id:
            q = q.filter(Participation.contest_id == args.contest_id)

        if args.task_id:
            q = q.filter(Submission.task_id == args.task_id)

        if args.user_id:
            q = q.filter(Participation.user_id == args.user_id)

        if args.submission_id:
            q = q.filter(Submission.id == args.submission_id)

        results = q.all()

        if args.unique or args.best:
            results = filter_top_scoring(results, args.unique)

        print("%s file(s) will be created." % len(results))
        if raw_input("Continue? [Y/n] ").lower() not in ["y", ""]:
            sys.exit(0)

        done = 0
        for row in results:
            s_id, s_language, s_timestamp, sr_score, f_filename, f_digest, \
                u_id, u_name, u_fname, u_lname, t_id, t_name = row

            name = f_filename
            if name.endswith(".%l"):
                name = name[:-3]  # remove last 3 chars

            filename = args.filename.format(id=s_id, name=name, ext=s_language,
                                            time=s_timestamp, user=u_name)
            filename = os.path.join(args.output_dir, filename)
            if os.path.exists(filename):
                logger.warning("Skipping file '%s' because it already exists",
                               filename)

            fso = FSObject.get_from_digest(f_digest, session)
            assert fso is not None
            with fso.get_lobject(mode="rb") as file_obj:
                data = file_obj.read()

                if args.utf8:
                    try:
                        data = utf8_decoder(data)
                    except TypeError:
                        logger.critical("Could not guess encoding of file "
                                        "'%s'. Aborting.",
                                        filename)
                        sys.exit(1)

                    if args.add_info:
                        data = TEMPLATE[s_language] % (
                            u_name,
                            u_fname,
                            u_lname,
                            t_name,
                            sr_score,
                            s_timestamp
                        ) + data

                    # Print utf8-encoded, possibly altered data
                    with codecs.open(filename, "w", encoding="utf-8") as f_out:
                        f_out.write(data)
                else:
                    # Print raw, untouched binary data
                    with open(filename, "wb") as f_out:
                        f_out.write(data)

            done += 1
            print(done, "/", len(results))

    return 0
Ejemplo n.º 2
0
def main():
    """Parse arguments and launch process.

    """
    parser = argparse.ArgumentParser(
        description="Export CMS submissions to a folder.\n",
        formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument("-c",
                        "--contest-id",
                        action="store",
                        type=int,
                        help="id of contest (default: all contests)")
    parser.add_argument("-t",
                        "--task-id",
                        action="store",
                        type=int,
                        help="id of task (default: all tasks)")
    parser.add_argument("-u",
                        "--user-id",
                        action="store",
                        type=int,
                        help="id of user (default: all users)")
    parser.add_argument("-s",
                        "--submission-id",
                        action="store",
                        type=int,
                        help="id of submission (default: all submissions)")
    parser.add_argument("--utf8",
                        action="store_true",
                        help="if set, the files will be encoded in utf8"
                        " when possible")
    parser.add_argument("--add-info",
                        action="store_true",
                        help="if set, information on the submission will"
                        " be added in the first lines of each file")
    parser.add_argument("--min-score",
                        action="store",
                        type=float,
                        help="ignore submissions which scored strictly"
                        " less than this (default: 0.0)",
                        default=0.0)
    parser.add_argument("--filename",
                        action="store",
                        type=utf8_decoder,
                        help="the filename format to use\n"
                        "Variables:\n"
                        "  id: submission id\n"
                        "  file: filename without extension\n"
                        "  ext: filename extension\n"
                        "  time: submission timestamp\n"
                        "  user: username\n"
                        "  task: taskname\n"
                        "  score: raw score\n"
                        " (default: {id}.{file}{ext})",
                        default="{id}.{file}{ext}")
    parser.add_argument("output_dir",
                        action="store",
                        type=utf8_decoder,
                        help="directory where to save the submissions")

    group = parser.add_mutually_exclusive_group(required=False)
    group.add_argument("--unique",
                       action="store_true",
                       help="if set, only the earliest best submission"
                       " will be exported for each (user, task)")
    group.add_argument("--best",
                       action="store_true",
                       help="if set, only the best submissions will be"
                       " exported for each (user, task)")

    args = parser.parse_args()

    if args.add_info and not args.utf8:
        logger.critical("If --add-info is specified, then --utf8 must be"
                        " specified as well.")
        return 1

    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)
    if not os.path.isdir(args.output_dir):
        logger.critical("The output-dir parameter must point to a directory")
        return 1

    with SessionGen() as session:
        q = session.query(Submission)\
            .join(Submission.task)\
            .join(Submission.files)\
            .join(Submission.results)\
            .join(SubmissionResult.dataset)\
            .join(Submission.participation)\
            .join(Participation.user)\
            .filter(Dataset.id == Task.active_dataset_id)\
            .filter(SubmissionResult.score >= args.min_score)\
            .with_entities(Submission.id, Submission.language,
                           Submission.timestamp,
                           SubmissionResult.score,
                           File.filename, File.digest,
                           User.id, User.username, User.first_name,
                           User.last_name,
                           Task.id, Task.name)

        if args.contest_id:
            q = q.filter(Participation.contest_id == args.contest_id)

        if args.task_id:
            q = q.filter(Submission.task_id == args.task_id)

        if args.user_id:
            q = q.filter(Participation.user_id == args.user_id)

        if args.submission_id:
            q = q.filter(Submission.id == args.submission_id)

        results = q.all()

        if args.unique or args.best:
            results = filter_top_scoring(results, args.unique)

        print("%s file(s) will be created." % len(results))
        if input("Continue? [Y/n] ").strip().lower() not in ["y", ""]:
            return 0

        done = 0
        for row in results:
            s_id, s_language, s_timestamp, sr_score, f_filename, f_digest, \
                u_id, u_name, u_fname, u_lname, t_id, t_name = row

            timef = s_timestamp.strftime('%Y%m%dT%H%M%S')

            ext = languagemanager.get_language(s_language).source_extension \
                if s_language else '.txt'
            filename_base, filename_ext = os.path.splitext(
                f_filename.replace('.%l', ext))

            # "name" is a deprecated specifier with the same meaning as "file"
            filename = args.filename.format(id=s_id,
                                            file=filename_base,
                                            name=filename_base,
                                            ext=filename_ext,
                                            time=timef,
                                            user=u_name,
                                            task=t_name,
                                            score=sr_score)
            filename = os.path.join(args.output_dir, filename)
            if os.path.exists(filename):
                logger.warning("Skipping file '%s' because it already exists",
                               filename)
                continue
            filedir = os.path.dirname(filename)
            if not os.path.exists(filedir):
                os.makedirs(filedir)
            if not os.path.isdir(filedir):
                logger.warning("%s is not a directory, skipped.", filedir)
                continue

            fso = FSObject.get_from_digest(f_digest, session)
            assert fso is not None
            with fso.get_lobject(mode="rb") as file_obj:
                data = file_obj.read()

                if args.utf8:
                    try:
                        data = utf8_decoder(data)
                    except TypeError:
                        logger.warning(
                            "Could not guess encoding of file "
                            "'%s'. Skipping.", filename)
                        continue

                    if args.add_info:
                        data = TEMPLATE[ext] % (u_name, u_fname, u_lname,
                                                t_name, sr_score,
                                                s_timestamp) + data

                    # Print utf8-encoded, possibly altered data
                    with open(filename, "wt", encoding="utf-8") as f_out:
                        f_out.write(data)
                else:
                    # Print raw, untouched binary data
                    with open(filename, "wb") as f_out:
                        f_out.write(data)

            done += 1
            print(done, "/", len(results))

    return 0
Ejemplo n.º 3
0
def main():
    """Parse arguments and launch process.

    """
    parser = argparse.ArgumentParser(
        description="Export CMS submissions to a folder.")
    parser.add_argument("-c",
                        "--contest-id",
                        action="store",
                        type=int,
                        help="id of contest (default: all contests)")
    parser.add_argument("-t",
                        "--task-id",
                        action="store",
                        type=int,
                        help="id of task (default: all tasks)")
    parser.add_argument("-u",
                        "--user-id",
                        action="store",
                        type=int,
                        help="id of user (default: all users)")
    parser.add_argument("-s",
                        "--submission-id",
                        action="store",
                        type=int,
                        help="id of submission (default: all submissions)")
    parser.add_argument("--utf8",
                        action="store_true",
                        help="if set, the files will be encoded in utf8"
                        " when possible")
    parser.add_argument("--add-info",
                        action="store_true",
                        help="if set, information on the submission will"
                        " be added in the first lines of each file")
    parser.add_argument("--min-score",
                        action="store",
                        type=float,
                        help="ignore submissions which scored strictly"
                        " less than this (default: 0.0)",
                        default=0.0)
    parser.add_argument("--filename",
                        action="store",
                        type=utf8_decoder,
                        help="the filename format to use"
                        " (default: {id}.{name}.{ext})",
                        default="{id}.{name}.{ext}")
    parser.add_argument("output_dir",
                        action="store",
                        type=utf8_decoder,
                        help="directory where to save the submissions")

    group = parser.add_mutually_exclusive_group(required=False)
    group.add_argument("--unique",
                       action="store_true",
                       help="if set, only the earliest best submission"
                       " will be exported for each (user, task)")
    group.add_argument("--best",
                       action="store_true",
                       help="if set, only the best submissions will be"
                       " exported for each (user, task)")

    args = parser.parse_args()

    if args.add_info and not args.utf8:
        logger.critical("If --add-info is specified, then --utf8 must be"
                        " specified as well.")
        return 1

    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)
    if not os.path.isdir(args.output_dir):
        logger.critical("The output-dir parameter must point to a directory")
        return 1

    with SessionGen() as session:
        q = session.query(Submission)\
            .join(Submission.task)\
            .join(Submission.files)\
            .join(Submission.results)\
            .join(SubmissionResult.dataset)\
            .join(Submission.participation)\
            .join(Participation.user)\
            .filter(Dataset.id == Task.active_dataset_id)\
            .filter(SubmissionResult.score >= args.min_score)\
            .with_entities(Submission.id, Submission.language,
                           Submission.timestamp,
                           SubmissionResult.score,
                           File.filename, File.digest,
                           User.id, User.username, User.first_name,
                           User.last_name,
                           Task.id, Task.name)

        if args.contest_id:
            q = q.filter(Participation.contest_id == args.contest_id)

        if args.task_id:
            q = q.filter(Submission.task_id == args.task_id)

        if args.user_id:
            q = q.filter(Participation.user_id == args.user_id)

        if args.submission_id:
            q = q.filter(Submission.id == args.submission_id)

        results = q.all()

        if args.unique or args.best:
            results = filter_top_scoring(results, args.unique)

        print("%s file(s) will be created." % len(results))
        if raw_input("Continue? [Y/n] ").lower() not in ["y", ""]:
            sys.exit(0)

        done = 0
        for row in results:
            s_id, s_language, s_timestamp, sr_score, f_filename, f_digest, \
                u_id, u_name, u_fname, u_lname, t_id, t_name = row

            name = f_filename
            if name.endswith(".%l"):
                name = name[:-3]  # remove last 3 chars

            filename = args.filename.format(id=s_id,
                                            name=name,
                                            ext=s_language,
                                            time=s_timestamp,
                                            user=u_name)
            filename = os.path.join(args.output_dir, filename)
            if os.path.exists(filename):
                logger.warning("Skipping file '%s' because it already exists",
                               filename)

            fso = FSObject.get_from_digest(f_digest, session)
            assert fso is not None
            with fso.get_lobject(mode="rb") as file_obj:
                data = file_obj.read()

                if args.utf8:
                    try:
                        data = utf8_decoder(data)
                    except TypeError:
                        logger.critical(
                            "Could not guess encoding of file "
                            "'%s'. Aborting.", filename)
                        sys.exit(1)

                    if args.add_info:
                        data = TEMPLATE[s_language] % (
                            u_name, u_fname, u_lname, t_name, sr_score,
                            s_timestamp) + data

                    # Print utf8-encoded, possibly altered data
                    with codecs.open(filename, "w", encoding="utf-8") as f_out:
                        f_out.write(data)
                else:
                    # Print raw, untouched binary data
                    with open(filename, "wb") as f_out:
                        f_out.write(data)

            done += 1
            print(done, "/", len(results))

    return 0