def main(): """Parse arguments and launch process. """ parser = argparse.ArgumentParser( description="Export CMS submissions to a folder.") parser.add_argument("-c", "--contest-id", action="store", type=int, help="id of contest (default: all contests)") parser.add_argument("-t", "--task-id", action="store", type=int, help="id of task (default: all tasks)") parser.add_argument("-u", "--user-id", action="store", type=int, help="id of user (default: all users)") parser.add_argument("-s", "--submission-id", action="store", type=int, help="id of submission (default: all submissions)") parser.add_argument("--utf8", action="store_true", help="if set, the files will be encoded in utf8" " when possible") parser.add_argument("--add-info", action="store_true", help="if set, information on the submission will" " be added in the first lines of each file") parser.add_argument("--min-score", action="store", type=float, help="ignore submissions which scored strictly" " less than this (default: 0.0)", default=0.0) parser.add_argument("--filename", action="store", type=utf8_decoder, help="the filename format to use" " (default: {id}.{name}.{ext})", default="{id}.{name}.{ext}") parser.add_argument("output_dir", action="store", type=utf8_decoder, help="directory where to save the submissions") group = parser.add_mutually_exclusive_group(required=False) group.add_argument("--unique", action="store_true", help="if set, only the earliest best submission" " will be exported for each (user, task)") group.add_argument("--best", action="store_true", help="if set, only the best submissions will be" " exported for each (user, task)") args = parser.parse_args() if args.add_info and not args.utf8: logger.critical("If --add-info is specified, then --utf8 must be" " specified as well.") return 1 if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) if not os.path.isdir(args.output_dir): logger.critical("The output-dir parameter must point to a directory") return 1 with SessionGen() as session: q = session.query(Submission)\ .join(Submission.task)\ .join(Submission.files)\ .join(Submission.results)\ .join(SubmissionResult.dataset)\ .join(Submission.participation)\ .join(Participation.user)\ .filter(Dataset.id == Task.active_dataset_id)\ .filter(SubmissionResult.score >= args.min_score)\ .with_entities(Submission.id, Submission.language, Submission.timestamp, SubmissionResult.score, File.filename, File.digest, User.id, User.username, User.first_name, User.last_name, Task.id, Task.name) if args.contest_id: q = q.filter(Participation.contest_id == args.contest_id) if args.task_id: q = q.filter(Submission.task_id == args.task_id) if args.user_id: q = q.filter(Participation.user_id == args.user_id) if args.submission_id: q = q.filter(Submission.id == args.submission_id) results = q.all() if args.unique or args.best: results = filter_top_scoring(results, args.unique) print("%s file(s) will be created." % len(results)) if raw_input("Continue? [Y/n] ").lower() not in ["y", ""]: sys.exit(0) done = 0 for row in results: s_id, s_language, s_timestamp, sr_score, f_filename, f_digest, \ u_id, u_name, u_fname, u_lname, t_id, t_name = row name = f_filename if name.endswith(".%l"): name = name[:-3] # remove last 3 chars filename = args.filename.format(id=s_id, name=name, ext=s_language, time=s_timestamp, user=u_name) filename = os.path.join(args.output_dir, filename) if os.path.exists(filename): logger.warning("Skipping file '%s' because it already exists", filename) fso = FSObject.get_from_digest(f_digest, session) assert fso is not None with fso.get_lobject(mode="rb") as file_obj: data = file_obj.read() if args.utf8: try: data = utf8_decoder(data) except TypeError: logger.critical("Could not guess encoding of file " "'%s'. Aborting.", filename) sys.exit(1) if args.add_info: data = TEMPLATE[s_language] % ( u_name, u_fname, u_lname, t_name, sr_score, s_timestamp ) + data # Print utf8-encoded, possibly altered data with codecs.open(filename, "w", encoding="utf-8") as f_out: f_out.write(data) else: # Print raw, untouched binary data with open(filename, "wb") as f_out: f_out.write(data) done += 1 print(done, "/", len(results)) return 0
def main(): """Parse arguments and launch process. """ parser = argparse.ArgumentParser( description="Export CMS submissions to a folder.\n", formatter_class=argparse.RawTextHelpFormatter) parser.add_argument("-c", "--contest-id", action="store", type=int, help="id of contest (default: all contests)") parser.add_argument("-t", "--task-id", action="store", type=int, help="id of task (default: all tasks)") parser.add_argument("-u", "--user-id", action="store", type=int, help="id of user (default: all users)") parser.add_argument("-s", "--submission-id", action="store", type=int, help="id of submission (default: all submissions)") parser.add_argument("--utf8", action="store_true", help="if set, the files will be encoded in utf8" " when possible") parser.add_argument("--add-info", action="store_true", help="if set, information on the submission will" " be added in the first lines of each file") parser.add_argument("--min-score", action="store", type=float, help="ignore submissions which scored strictly" " less than this (default: 0.0)", default=0.0) parser.add_argument("--filename", action="store", type=utf8_decoder, help="the filename format to use\n" "Variables:\n" " id: submission id\n" " file: filename without extension\n" " ext: filename extension\n" " time: submission timestamp\n" " user: username\n" " task: taskname\n" " score: raw score\n" " (default: {id}.{file}{ext})", default="{id}.{file}{ext}") parser.add_argument("output_dir", action="store", type=utf8_decoder, help="directory where to save the submissions") group = parser.add_mutually_exclusive_group(required=False) group.add_argument("--unique", action="store_true", help="if set, only the earliest best submission" " will be exported for each (user, task)") group.add_argument("--best", action="store_true", help="if set, only the best submissions will be" " exported for each (user, task)") args = parser.parse_args() if args.add_info and not args.utf8: logger.critical("If --add-info is specified, then --utf8 must be" " specified as well.") return 1 if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) if not os.path.isdir(args.output_dir): logger.critical("The output-dir parameter must point to a directory") return 1 with SessionGen() as session: q = session.query(Submission)\ .join(Submission.task)\ .join(Submission.files)\ .join(Submission.results)\ .join(SubmissionResult.dataset)\ .join(Submission.participation)\ .join(Participation.user)\ .filter(Dataset.id == Task.active_dataset_id)\ .filter(SubmissionResult.score >= args.min_score)\ .with_entities(Submission.id, Submission.language, Submission.timestamp, SubmissionResult.score, File.filename, File.digest, User.id, User.username, User.first_name, User.last_name, Task.id, Task.name) if args.contest_id: q = q.filter(Participation.contest_id == args.contest_id) if args.task_id: q = q.filter(Submission.task_id == args.task_id) if args.user_id: q = q.filter(Participation.user_id == args.user_id) if args.submission_id: q = q.filter(Submission.id == args.submission_id) results = q.all() if args.unique or args.best: results = filter_top_scoring(results, args.unique) print("%s file(s) will be created." % len(results)) if input("Continue? [Y/n] ").strip().lower() not in ["y", ""]: return 0 done = 0 for row in results: s_id, s_language, s_timestamp, sr_score, f_filename, f_digest, \ u_id, u_name, u_fname, u_lname, t_id, t_name = row timef = s_timestamp.strftime('%Y%m%dT%H%M%S') ext = languagemanager.get_language(s_language).source_extension \ if s_language else '.txt' filename_base, filename_ext = os.path.splitext( f_filename.replace('.%l', ext)) # "name" is a deprecated specifier with the same meaning as "file" filename = args.filename.format(id=s_id, file=filename_base, name=filename_base, ext=filename_ext, time=timef, user=u_name, task=t_name, score=sr_score) filename = os.path.join(args.output_dir, filename) if os.path.exists(filename): logger.warning("Skipping file '%s' because it already exists", filename) continue filedir = os.path.dirname(filename) if not os.path.exists(filedir): os.makedirs(filedir) if not os.path.isdir(filedir): logger.warning("%s is not a directory, skipped.", filedir) continue fso = FSObject.get_from_digest(f_digest, session) assert fso is not None with fso.get_lobject(mode="rb") as file_obj: data = file_obj.read() if args.utf8: try: data = utf8_decoder(data) except TypeError: logger.warning( "Could not guess encoding of file " "'%s'. Skipping.", filename) continue if args.add_info: data = TEMPLATE[ext] % (u_name, u_fname, u_lname, t_name, sr_score, s_timestamp) + data # Print utf8-encoded, possibly altered data with open(filename, "wt", encoding="utf-8") as f_out: f_out.write(data) else: # Print raw, untouched binary data with open(filename, "wb") as f_out: f_out.write(data) done += 1 print(done, "/", len(results)) return 0
def main(): """Parse arguments and launch process. """ parser = argparse.ArgumentParser( description="Export CMS submissions to a folder.") parser.add_argument("-c", "--contest-id", action="store", type=int, help="id of contest (default: all contests)") parser.add_argument("-t", "--task-id", action="store", type=int, help="id of task (default: all tasks)") parser.add_argument("-u", "--user-id", action="store", type=int, help="id of user (default: all users)") parser.add_argument("-s", "--submission-id", action="store", type=int, help="id of submission (default: all submissions)") parser.add_argument("--utf8", action="store_true", help="if set, the files will be encoded in utf8" " when possible") parser.add_argument("--add-info", action="store_true", help="if set, information on the submission will" " be added in the first lines of each file") parser.add_argument("--min-score", action="store", type=float, help="ignore submissions which scored strictly" " less than this (default: 0.0)", default=0.0) parser.add_argument("--filename", action="store", type=utf8_decoder, help="the filename format to use" " (default: {id}.{name}.{ext})", default="{id}.{name}.{ext}") parser.add_argument("output_dir", action="store", type=utf8_decoder, help="directory where to save the submissions") group = parser.add_mutually_exclusive_group(required=False) group.add_argument("--unique", action="store_true", help="if set, only the earliest best submission" " will be exported for each (user, task)") group.add_argument("--best", action="store_true", help="if set, only the best submissions will be" " exported for each (user, task)") args = parser.parse_args() if args.add_info and not args.utf8: logger.critical("If --add-info is specified, then --utf8 must be" " specified as well.") return 1 if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) if not os.path.isdir(args.output_dir): logger.critical("The output-dir parameter must point to a directory") return 1 with SessionGen() as session: q = session.query(Submission)\ .join(Submission.task)\ .join(Submission.files)\ .join(Submission.results)\ .join(SubmissionResult.dataset)\ .join(Submission.participation)\ .join(Participation.user)\ .filter(Dataset.id == Task.active_dataset_id)\ .filter(SubmissionResult.score >= args.min_score)\ .with_entities(Submission.id, Submission.language, Submission.timestamp, SubmissionResult.score, File.filename, File.digest, User.id, User.username, User.first_name, User.last_name, Task.id, Task.name) if args.contest_id: q = q.filter(Participation.contest_id == args.contest_id) if args.task_id: q = q.filter(Submission.task_id == args.task_id) if args.user_id: q = q.filter(Participation.user_id == args.user_id) if args.submission_id: q = q.filter(Submission.id == args.submission_id) results = q.all() if args.unique or args.best: results = filter_top_scoring(results, args.unique) print("%s file(s) will be created." % len(results)) if raw_input("Continue? [Y/n] ").lower() not in ["y", ""]: sys.exit(0) done = 0 for row in results: s_id, s_language, s_timestamp, sr_score, f_filename, f_digest, \ u_id, u_name, u_fname, u_lname, t_id, t_name = row name = f_filename if name.endswith(".%l"): name = name[:-3] # remove last 3 chars filename = args.filename.format(id=s_id, name=name, ext=s_language, time=s_timestamp, user=u_name) filename = os.path.join(args.output_dir, filename) if os.path.exists(filename): logger.warning("Skipping file '%s' because it already exists", filename) fso = FSObject.get_from_digest(f_digest, session) assert fso is not None with fso.get_lobject(mode="rb") as file_obj: data = file_obj.read() if args.utf8: try: data = utf8_decoder(data) except TypeError: logger.critical( "Could not guess encoding of file " "'%s'. Aborting.", filename) sys.exit(1) if args.add_info: data = TEMPLATE[s_language] % ( u_name, u_fname, u_lname, t_name, sr_score, s_timestamp) + data # Print utf8-encoded, possibly altered data with codecs.open(filename, "w", encoding="utf-8") as f_out: f_out.write(data) else: # Print raw, untouched binary data with open(filename, "wb") as f_out: f_out.write(data) done += 1 print(done, "/", len(results)) return 0