def daily_cleanup(): logger.info('Cleaning up database') # cleanup sessions table call_command('clearsessions') call_command('django_cas_ng_clean_sessions') # cleanup old news items NewsItem.objects.filter(updated__lt=datetime.datetime.now()-datetime.timedelta(days=365)).delete() # cleanup old log entries LogEntry.objects.filter(datetime__lt=datetime.datetime.now()-datetime.timedelta(days=365)).delete() # cleanup old official grades Member.clear_old_official_grades() # cleanup old similarity reports from submission.models.base import SimilarityResult SimilarityResult.cleanup_old()
def run_moss_as_task(activities: List[Activity], language: str) -> SimilarityResult: """ Start run_moss() in a Celery task. The activities arg: list of all activities to compare with activities[0] being the "main" one for this course. """ # save the results, removing any previous MOSS results on this activity activity = activities[0] SimilarityResult.objects.filter(activity=activity, generator='MOSS').delete() result = SimilarityResult(activity=activity, generator='MOSS', config={'language': language, 'complete': False}) result.save() from submission.tasks import run_moss_task run_moss_task.delay(activity.id, [a.id for a in activities], language, result.id) return result
def run_moss(main_activity: Activity, activities: List[Activity], language: str, result: SimilarityResult) -> SimilarityResult: """ Run MOSS for the main_activity's submissions. ... comparing past submission from everything in the activities list. ... looking only at the given programming language. ... storing the results in result. """ assert language in MOSS_LANGUAGES assert main_activity in activities icon_url_path = reverse('dashboard:moss_icon', kwargs={'filename': ''}) tmpdir = tempfile.TemporaryDirectory() tmp = tmpdir.name code_dir = os.path.join(tmp, 'code') moss_out_dir = os.path.join(tmp, 'moss') # assemble tmp directory of submissions for MOSS offering_slug = main_activity.offering.slug extension = '.' + MOSS_LANGUAGES[language] moss_files = [] # files that we will give to MOSS file_submissions = { } # MOSS input file to submission_id, so we can recover the source later for a in activities: si = SubmissionInfo.for_activity(a) si.get_all_components() _, individual_subcomps, _ = si.most_recent_submissions() for userid, components in individual_subcomps.items(): prefix = os.path.join(code_dir, a.offering.slug, userid) for comp, sub in components: if not isinstance(sub, SubmittedCodefile): # we can only deal with Codefile components continue if not isinstance(sub.code.storage, FileSystemStorage): raise NotImplementedError( 'more work necessary to support non-filesystem file storage' ) source_file = os.path.join(sub.code.storage.location, sub.code.name) moss_file = sub.file_filename(sub.code, prefix) if not moss_file.endswith(extension): # we only handle one language at a time continue dst_dir, _ = os.path.split(moss_file) os.makedirs(dst_dir, exist_ok=True) os.symlink(source_file, moss_file) moss_files.append(moss_file) file_submissions[moss_file] = sub.submission_id if not moss_files: raise MOSSError( 'No files found for that language to analyze with MOSS.') # run MOSS moss_pl = os.path.join(settings.MOSS_DISTRIBUTION_PATH, 'moss.pl') cmd = [moss_pl, '-l', language, '-o', moss_out_dir] + moss_files try: res = subprocess.run(cmd, cwd=settings.MOSS_DISTRIBUTION_PATH) except FileNotFoundError: raise MOSSError( 'System not correctly configured with the MOSS executable.') if res.returncode != 0: raise MOSSError('MOSS command failed: ' + str(cmd)) # try to deal with MOSS' [profanity suppressed] HTML, and produce SimilarityData objects to represent everything for f in os.listdir(moss_out_dir): if f == 'index.html': data = open(os.path.join(moss_out_dir, f), 'rt', encoding='utf8').read() soup = bs4.BeautifulSoup(data, 'lxml') index_data = [] for tr in soup.find_all('tr'): if tr.find('th'): continue m = [] for a in tr.find_all('a'): label = a.get('href') fn, perc = a.string.split(' ') fn = _canonical_filename(fn, code_dir) m.append((label, fn, perc)) # Only display if one side is from the main_activity: leave the past behind. if any(fn.startswith(offering_slug + '/') for _, fn, _ in m): index_data.append(m) data = SimilarityData(result=result, label='index.html', file=None, config={}) data.config['index_data'] = index_data data.save() elif match_base_re.match(f): pass elif match_top_re.match(f): data = open(os.path.join(moss_out_dir, f), 'rt', encoding='utf8').read() soup = bs4.BeautifulSoup(data, 'lxml') table = soup.find('table') del table['bgcolor'] del table['border'] del table['cellspacing'] for th in table.find_all('th'): if th.string is not None: th.string = _canonical_filename(th.string, code_dir) for img in table.find_all('img'): src = img.get('src') img['src'] = src.replace('../bitmaps/', icon_url_path) file = File(file=io.BytesIO(str(table).encode('utf8')), name=f) data = SimilarityData(result=result, label=f, file=file, config={}) data.save() elif match_file_re.match(f): try: data = open(os.path.join(moss_out_dir, f), 'rt', encoding='utf8').read() except UnicodeDecodeError: data = open(os.path.join(moss_out_dir, f), 'rt', encoding='windows-1252').read() soup = bs4.BeautifulSoup(data, 'lxml') # find the input filename, which leads to the submission for c in soup.find('body').children: if isinstance(c, bs4.element.NavigableString): c = str(c).strip() if c.startswith(code_dir): filename = c break submission_id = file_submissions[filename] # the only <pre> is the real content we care about pre = soup.find('pre') for img in pre.find_all('img'): src = img.get('src') img['src'] = src.replace('../bitmaps/', icon_url_path) file = File(file=io.BytesIO(str(pre).encode('utf8')), name=f) data = SimilarityData(result=result, label=f, file=file, submission_id=submission_id, config={}) data.save() else: raise ValueError('unexpected file produced by MOSS') result.config['complete'] = True result.save() return result