def commit_file(self, fobj, digest, desc=""): """See FileCacherBackend.commit_file(). """ fobj.close() try: with SessionGen() as session: fso = FSObject(description=desc) fso.digest = digest fso.loid = fobj.loid session.add(fso) session.commit() logger.info("File %s (%s) stored on the database.", digest, desc) except IntegrityError: # If someone beat us to adding the same object to the database, we # should at least drop the large object. LargeObject.unlink(fobj.loid) logger.warning("File %s (%s) caused an IntegrityError, ignoring.", digest, desc) return False return True
def delete(self, digest): """See FileCacherBackend.delete(). """ with SessionGen() as session: fso = FSObject.get_from_digest(digest, session) fso.delete() session.commit()
def describe(self, digest): """See FileCacherBackend.describe(). """ with SessionGen() as session: fso = FSObject.get_from_digest(digest, session) if fso is not None: return fso.description else: return None
def get_file(self, digest): """See FileCacherBackend.get_file(). """ with SessionGen() as session: fso = FSObject.get_from_digest(digest, session) if fso is None: raise KeyError("File not found.") return fso.get_lobject(mode='rb')
def describe(self, digest): """See FileCacherBackend.describe(). """ with SessionGen() as session: fso = FSObject.get_from_digest(digest, session) if fso is None: raise KeyError("File not found.") return fso.description
def put_file(self, digest, origin, description=""): """See FileCacherBackend.put_file(). """ try: with SessionGen() as session: # Check digest uniqueness if FSObject.get_from_digest(digest, session) is not None: logger.debug("File %s already on database, " "dropping this one." % digest) session.rollback() # If it is not already present, copy the file into the # lobject else: fso = FSObject(description=description) logger.debug("Sending file %s to the database." % digest) with open(origin, 'rb') as temp_file: with fso.get_lobject(session, mode='wb') \ as lobject: logger.debug("Large object created.") buf = temp_file.read(self.CHUNK_SIZE) while buf != '': while len(buf) > 0: written = lobject.write(buf) buf = buf[written:] # Cooperative yield gevent.sleep(0) buf = temp_file.read(self.CHUNK_SIZE) fso.digest = digest session.add(fso) session.commit() logger.debug("File %s sent to the database." % digest) except IntegrityError: logger.warning("File %s caused an IntegrityError, ignoring..." % digest)
def put_file(self, digest, desc=""): """See FileCacherBackend.put_file(). """ try: with SessionGen() as session: fso = FSObject.get_from_digest(digest, session) # Check digest uniqueness if fso is not None: logger.debug( "File %s already stored on database, not " "sending it again.", digest) session.rollback() return None # If it is not already present, copy the file into the # lobject else: fso = FSObject(description=desc) fso.digest = digest session.add(fso) logger.debug("File %s stored on the database.", digest) # FIXME There is a remote possibility that someone # will try to access this file, believing it has # already been stored (since its FSObject exists), # while we're still sending its content. lobject = fso.get_lobject(mode='wb') session.commit() return lobject except IntegrityError: logger.warning("File %s caused an IntegrityError, ignoring...", digest)
def get_size(self, digest): """See FileCacherBackend.get_size(). """ # TODO - The business logic may be moved in FSObject, for # better generality with SessionGen() as session: fso = FSObject.get_from_digest(digest, session) if fso is not None: with fso.get_lobject(session, mode='rb') as lobject: return lobject.seek(0, os.SEEK_END) else: return None
def put_file(self, digest, desc=""): """See FileCacherBackend.put_file(). """ try: with SessionGen() as session: fso = FSObject.get_from_digest(digest, session) # Check digest uniqueness if fso is not None: logger.debug("File %s already stored on database, not " "sending it again." % digest) session.rollback() return None # If it is not already present, copy the file into the # lobject else: fso = FSObject(description=desc) fso.digest = digest session.add(fso) logger.debug("File %s stored on the database." % digest) # FIXME There is a remote possibility that someone # will try to access this file, believing it has # already been stored (since its FSObject exists), # while we're still sending its content. lobject = fso.get_lobject(mode='wb') session.commit() return lobject except IntegrityError: logger.warning("File %s caused an IntegrityError, ignoring..." % digest)
def delete(self, digest): """See FileCacherBackend.delete(). """ with SessionGen() as session: fso = FSObject.get_from_digest(digest, session) if fso is None: session.rollback() return fso.delete() session.commit()
def get_size(self, digest): """See FileCacherBackend.get_size(). """ # TODO - The business logic may be moved in FSObject, for # better generality with SessionGen() as session: fso = FSObject.get_from_digest(digest, session) if fso is None: raise KeyError("File not found.") with fso.get_lobject(mode='rb') as lobj: return lobj.seek(0, io.SEEK_END)
def get_file(self, digest, dest): """See FileCacherBackend.get_file(). """ with open(dest, 'wb') as temp_file: # hasher = hashlib.sha1() with SessionGen() as session: fso = FSObject.get_from_digest(digest, session) # Copy the file into the lobject with fso.get_lobject(mode='rb') as lobject: buf = lobject.read(self.CHUNK_SIZE) while buf != '': # hasher.update(buf) temp_file.write(buf) # Cooperative yield gevent.sleep(0) buf = lobject.read(self.CHUNK_SIZE)
def submit(timestamp, username, password, t_id, t_short, files, language, session, cws_address): """Execute the request for a submission. timestamp (int): seconds from the start. username (string): username issuing the submission. password (string): password of username. t_id (string): id of the task. t_short (string): short name of the task. files ([dict]): list of files. language (string): the extension the files should have. cws_address (string): http address of CWS. """ logger.info("%s - Submitting for %s on task %s.", to_time(timestamp), username, t_short) # Copying submission files into a temporary directory with the # correct name. Otherwise, SubmissionRequest does not know how # to interpret the file (and which language are they in). temp_dir = tempfile.mkdtemp(dir=config.temp_dir) file_name = [] submission_format = [] for file_ in files: name = file_.filename filename = os.path.join(temp_dir, name) fso = FSObject.get_from_digest(file_.digest, session) assert fso is not None with fso.get_lobject(mode="rb") as file_obj: data = file_obj.read() with open(filename, "wb") as f_out: f_out.write(data) file_name.append(filename) submission_format.append(name) browser = Browser() lr = LoginRequest(browser, username, password, base_url=cws_address) browser.login(lr) SubmitRequest(browser=browser, task=(int(t_id), t_short), submission_format=submission_format, filenames=file_name, language=language, base_url=cws_address).execute() shutil.rmtree(temp_dir)
def create_file(self, digest): """See FileCacherBackend.create_file(). """ with SessionGen() as session: fso = FSObject.get_from_digest(digest, session) # Check digest uniqueness if fso is not None: logger.debug("File %s already stored on database, not " "sending it again.", digest) session.rollback() return None # If it is not already present, copy the file into the # lobject else: # Create the large object first. This should be populated # and committed before putting it into the FSObjects table. return LargeObject(0, mode='wb')
def main(): """Parse arguments and launch process. """ parser = argparse.ArgumentParser( description="Export CMS submissions to a folder.") parser.add_argument("-c", "--contest-id", action="store", type=int, help="id of contest (default: all contests)") parser.add_argument("-t", "--task-id", action="store", type=int, help="id of task (default: all tasks)") parser.add_argument("-u", "--user-id", action="store", type=int, help="id of user (default: all users)") parser.add_argument("-s", "--submission-id", action="store", type=int, help="id of submission (default: all submissions)") parser.add_argument("--add-info", action="store_true", help="if set, information on the submission will" " be added in the first lines of each file") parser.add_argument("--min-score", action="store", type=float, help="ignore submissions which scored strictly" " less than this (default: 0.0)", default=0.0) parser.add_argument("--filename", action="store", type=utf8_decoder, help="the filename format to use" " (default: {id}.{name}.{ext})", default="{id}.{name}.{ext}") parser.add_argument("output_dir", action="store", type=utf8_decoder, help="directory where to save the submissions") group = parser.add_mutually_exclusive_group(required=False) group.add_argument("--unique", action="store_true", help="if set, only the earliest best submission" " will be exported for each (user, task)") group.add_argument("--best", action="store_true", help="if set, only the best submissions will be" " exported for each (user, task)") args = parser.parse_args() if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) if not os.path.isdir(args.output_dir): logger.critical("The output-dir parameter must point to a directory") sys.exit(1) with SessionGen() as session: q = session.query(Submission)\ .join(Submission.task)\ .join(Submission.files)\ .join(Submission.results)\ .join(SubmissionResult.dataset)\ .join(Submission.participation)\ .join(Participation.user)\ .filter(Dataset.id == Task.active_dataset_id)\ .filter(SubmissionResult.score >= args.min_score)\ .with_entities(Submission.id, Submission.language, Submission.timestamp, SubmissionResult.score, File.filename, File.digest, User.id, User.username, User.first_name, User.last_name, Task.id, Task.name) if args.contest_id: q = q.filter(Participation.contest_id == args.contest_id) if args.task_id: q = q.filter(Submission.task_id == args.task_id) if args.user_id: q = q.filter(Participation.user_id == args.user_id) if args.submission_id: q = q.filter(Submission.id == args.submission_id) results = q.all() if args.unique or args.best: results = filter_top_scoring(results, args.unique) print("%s file(s) will be created." % len(results)) if raw_input("Continue? [Y/n] ").lower() not in ["y", ""]: sys.exit(0) done = 0 for row in results: s_id, s_language, s_timestamp, sr_score, f_filename, f_digest, \ u_id, u_name, u_fname, u_lname, t_id, t_name = row name = f_filename if name.endswith(".%l"): name = name[:-3] # remove last 3 chars filename = args.filename.format(id=s_id, name=name, ext=s_language, time=s_timestamp, user=u_name) filename = os.path.join(args.output_dir, filename) if os.path.exists(filename): logger.warning("Skipping file '%s' because it already exists", filename) fso = FSObject.get_from_digest(f_digest, session) assert fso is not None with fso.get_lobject(mode="rb") as file_obj: data = file_obj.read().decode('utf-8') if args.add_info: data = TEMPLATE[s_language] % (u_name, u_fname, u_lname, t_name, sr_score, s_timestamp) + data with codecs.open(filename, "w", encoding="utf-8") as file_out: file_out.write(data) done += 1 print(done, "/", len(results))
def export_submissions(target_dir, contest_names, overwrite=False, make_dir=True): """ Export all submissions from the given contests to the given directory. If overwrite is true, existing files are overwritten. Otherwise, raise an exception if a file exists. If make_dir is true, create all subdirectories needed for the following format. Otherwise, assume they exist. The files of each submission are put in a directory: target_dir/contest_name/task_name/user_name/submission_string/ Where submission_string includes the date, time, task, user, score. For example: 2018-01-01.10-00.1.task_name.username.score-100 2018-01-01.10-00.2.task_name.username.compilation-fail """ with SessionGen() as session: for contest_name in contest_names: contest = session.query(Contest)\ .filter(Contest.name == unicode(contest_name))\ .first() if contest is None: raise Exception("Contest not found: %s" % contest_name) logger.info("Querying database for submissions in contest %s...", contest_name) submissions = session.query(Submission)\ .filter(Participation.contest_id == contest.id)\ .join(Submission.task)\ .join(Submission.files)\ .join(Submission.results)\ .join(SubmissionResult.dataset)\ .join(Submission.participation)\ .join(Participation.user)\ .filter(Dataset.id == Task.active_dataset_id)\ .with_entities(Submission.id, Submission.language, Submission.timestamp, SubmissionResult.score, SubmissionResult.compilation_outcome, File.filename, File.digest, User.username, Task.name)\ .all() logger.info("Found %d submissions. Saving...", len(submissions)) for (index, row) in enumerate(submissions, 1): logger.info("Contest %s: saving submission (%d / %d)", contest_name, index, len(submissions)) # Get submission info and target file path. sid, language, timestamp, score, comp_outcome, filename,\ digest, username, task_name = row file_path = _get_submission_file_path(target_dir, sid, language, timestamp, score, comp_outcome, filename, username, task_name, contest_name) # Don't overwrite if not allowed. if not overwrite and os.path.exists(file_path): raise Exception("File exists: %s" % file_path) # Make directories if necessary. if make_dir: dir_path = os.path.dirname(file_path) if not os.path.exists(dir_path): os.makedirs(dir_path) # Save the file. fso = FSObject.get_from_digest(digest, session) with fso.get_lobject(mode="rb") as file_obj: data = file_obj.read() with open(file_path, "w") as stream: stream.write(data)
def main(): """Parse arguments and launch process. """ parser = argparse.ArgumentParser( description="Export CMS submissions to a folder.\n", formatter_class=argparse.RawTextHelpFormatter) parser.add_argument("-c", "--contest-id", action="store", type=int, help="id of contest (default: all contests)") parser.add_argument("-t", "--task-id", action="store", type=int, help="id of task (default: all tasks)") parser.add_argument("-u", "--user-id", action="store", type=int, help="id of user (default: all users)") parser.add_argument("-s", "--submission-id", action="store", type=int, help="id of submission (default: all submissions)") parser.add_argument("--utf8", action="store_true", help="if set, the files will be encoded in utf8" " when possible") parser.add_argument("--add-info", action="store_true", help="if set, information on the submission will" " be added in the first lines of each file") parser.add_argument("--min-score", action="store", type=float, help="ignore submissions which scored strictly" " less than this (default: 0.0)", default=0.0) parser.add_argument("--filename", action="store", type=utf8_decoder, help="the filename format to use\n" "Variables:\n" " id: submission id\n" " file: filename without extension\n" " ext: filename extension\n" " time: submission timestamp\n" " user: username\n" " task: taskname\n" " score: raw score\n" " (default: {id}.{file}{ext})", default="{id}.{file}{ext}") parser.add_argument("output_dir", action="store", type=utf8_decoder, help="directory where to save the submissions") group = parser.add_mutually_exclusive_group(required=False) group.add_argument("--unique", action="store_true", help="if set, only the earliest best submission" " will be exported for each (user, task)") group.add_argument("--best", action="store_true", help="if set, only the best submissions will be" " exported for each (user, task)") args = parser.parse_args() if args.add_info and not args.utf8: logger.critical("If --add-info is specified, then --utf8 must be" " specified as well.") return 1 if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) if not os.path.isdir(args.output_dir): logger.critical("The output-dir parameter must point to a directory") return 1 with SessionGen() as session: q = session.query(Submission)\ .join(Submission.task)\ .join(Submission.files)\ .join(Submission.results)\ .join(SubmissionResult.dataset)\ .join(Submission.participation)\ .join(Participation.user)\ .filter(Dataset.id == Task.active_dataset_id)\ .filter(SubmissionResult.score >= args.min_score)\ .with_entities(Submission.id, Submission.language, Submission.timestamp, SubmissionResult.score, File.filename, File.digest, User.id, User.username, User.first_name, User.last_name, Task.id, Task.name) if args.contest_id: q = q.filter(Participation.contest_id == args.contest_id) if args.task_id: q = q.filter(Submission.task_id == args.task_id) if args.user_id: q = q.filter(Participation.user_id == args.user_id) if args.submission_id: q = q.filter(Submission.id == args.submission_id) results = q.all() if args.unique or args.best: results = filter_top_scoring(results, args.unique) print("%s file(s) will be created." % len(results)) if input("Continue? [Y/n] ").strip().lower() not in ["y", ""]: return 0 done = 0 for row in results: s_id, s_language, s_timestamp, sr_score, f_filename, f_digest, \ u_id, u_name, u_fname, u_lname, t_id, t_name = row timef = s_timestamp.strftime('%Y%m%dT%H%M%S') ext = languagemanager.get_language(s_language).source_extension \ if s_language else '.txt' filename_base, filename_ext = os.path.splitext( f_filename.replace('.%l', ext)) # "name" is a deprecated specifier with the same meaning as "file" filename = args.filename.format(id=s_id, file=filename_base, name=filename_base, ext=filename_ext, time=timef, user=u_name, task=t_name, score=sr_score) filename = os.path.join(args.output_dir, filename) if os.path.exists(filename): logger.warning("Skipping file '%s' because it already exists", filename) continue filedir = os.path.dirname(filename) if not os.path.exists(filedir): os.makedirs(filedir) if not os.path.isdir(filedir): logger.warning("%s is not a directory, skipped.", filedir) continue fso = FSObject.get_from_digest(f_digest, session) assert fso is not None with fso.get_lobject(mode="rb") as file_obj: data = file_obj.read() if args.utf8: try: data = utf8_decoder(data) except TypeError: logger.warning( "Could not guess encoding of file " "'%s'. Skipping.", filename) continue if args.add_info: data = TEMPLATE[ext] % (u_name, u_fname, u_lname, t_name, sr_score, s_timestamp) + data # Print utf8-encoded, possibly altered data with open(filename, "wt", encoding="utf-8") as f_out: f_out.write(data) else: # Print raw, untouched binary data with open(filename, "wb") as f_out: f_out.write(data) done += 1 print(done, "/", len(results)) return 0
def main(): """Parse arguments and launch process. """ parser = argparse.ArgumentParser( description="Export CMS submissions to a folder.") parser.add_argument("-c", "--contest-id", action="store", type=int, help="id of contest (default: all contests)") parser.add_argument("-t", "--task-id", action="store", type=int, help="id of task (default: all tasks)") parser.add_argument("-u", "--user-id", action="store", type=int, help="id of user (default: all users)") parser.add_argument("-s", "--submission-id", action="store", type=int, help="id of submission (default: all submissions)") parser.add_argument("--utf8", action="store_true", help="if set, the files will be encoded in utf8" " when possible") parser.add_argument("--add-info", action="store_true", help="if set, information on the submission will" " be added in the first lines of each file") parser.add_argument("--min-score", action="store", type=float, help="ignore submissions which scored strictly" " less than this (default: 0.0)", default=0.0) parser.add_argument("--filename", action="store", type=utf8_decoder, help="the filename format to use" " (default: {id}.{name}.{ext})", default="{id}.{name}.{ext}") parser.add_argument("output_dir", action="store", type=utf8_decoder, help="directory where to save the submissions") group = parser.add_mutually_exclusive_group(required=False) group.add_argument("--unique", action="store_true", help="if set, only the earliest best submission" " will be exported for each (user, task)") group.add_argument("--best", action="store_true", help="if set, only the best submissions will be" " exported for each (user, task)") args = parser.parse_args() if args.add_info and not args.utf8: logger.critical("If --add-info is specified, then --utf8 must be" " specified as well.") return 1 if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) if not os.path.isdir(args.output_dir): logger.critical("The output-dir parameter must point to a directory") return 1 with SessionGen() as session: q = session.query(Submission)\ .join(Submission.task)\ .join(Submission.files)\ .join(Submission.results)\ .join(SubmissionResult.dataset)\ .join(Submission.participation)\ .join(Participation.user)\ .filter(Dataset.id == Task.active_dataset_id)\ .filter(SubmissionResult.score >= args.min_score)\ .with_entities(Submission.id, Submission.language, Submission.timestamp, SubmissionResult.score, File.filename, File.digest, User.id, User.username, User.first_name, User.last_name, Task.id, Task.name) if args.contest_id: q = q.filter(Participation.contest_id == args.contest_id) if args.task_id: q = q.filter(Submission.task_id == args.task_id) if args.user_id: q = q.filter(Participation.user_id == args.user_id) if args.submission_id: q = q.filter(Submission.id == args.submission_id) results = q.all() if args.unique or args.best: results = filter_top_scoring(results, args.unique) print("%s file(s) will be created." % len(results)) if raw_input("Continue? [Y/n] ").lower() not in ["y", ""]: sys.exit(0) done = 0 for row in results: s_id, s_language, s_timestamp, sr_score, f_filename, f_digest, \ u_id, u_name, u_fname, u_lname, t_id, t_name = row name = f_filename if name.endswith(".%l"): name = name[:-3] # remove last 3 chars filename = args.filename.format(id=s_id, name=name, ext=s_language, time=s_timestamp, user=u_name) filename = os.path.join(args.output_dir, filename) if os.path.exists(filename): logger.warning("Skipping file '%s' because it already exists", filename) fso = FSObject.get_from_digest(f_digest, session) assert fso is not None with fso.get_lobject(mode="rb") as file_obj: data = file_obj.read() if args.utf8: try: data = utf8_decoder(data) except TypeError: logger.critical("Could not guess encoding of file " "'%s'. Aborting.", filename) sys.exit(1) if args.add_info: data = TEMPLATE[s_language] % ( u_name, u_fname, u_lname, t_name, sr_score, s_timestamp ) + data # Print utf8-encoded, possibly altered data with codecs.open(filename, "w", encoding="utf-8") as f_out: f_out.write(data) else: # Print raw, untouched binary data with open(filename, "wb") as f_out: f_out.write(data) done += 1 print(done, "/", len(results)) return 0