def test_extension_without_leading_period(self): self.languages.update({PASCAL_LANG}) # Check that the *whole* trailing `.%l` string is replaced with # the extension, not just the `%l` part, and also check that the # function doesn't split the extension on the filename. files, language = match_files_and_language( [ReceivedFile(None, "foolib.pas", FOO_CONTENT)], None, {"foo.%l"}, None) self.assertEqual(files, {"foo.%l": FOO_CONTENT}) self.assertIs(language, PASCAL_LANG) # The same check, in the negative form. with self.assertRaises(InvalidFilesOrLanguage): match_files_and_language( [ReceivedFile(None, "foo.lib.pas", FOO_CONTENT)], None, {"foo.%l"}, None) # This must also hold when the filename isn't matched against # the submission format (because the codename is used for that) # but just its extension is checked. files, language = match_files_and_language( [ReceivedFile("foo.%l", "foolib.pas", FOO_CONTENT)], None, {"foo.%l"}, None) self.assertEqual(files, {"foo.%l": FOO_CONTENT}) self.assertIs(language, PASCAL_LANG)
def test_ambiguous_file(self): self.languages.update({C_LANG, CPP_LANG}) # For an admittedly weird submission format, a single file could # successfully match multiple elements. with self.assertRaises(InvalidFilesOrLanguage): match_files_and_language( [ReceivedFile(None, "foo.c", FOO_CONTENT)], "C", {"foo.%l", "foo.c"}, None) # This brings in some weird side-effects: for example, in the # following, our attempt at matching the files as C fails (since # foo.c is ambiguous) whereas matching them as C++ doesn't (as # foo.c isn't compatible with foo.%l anymore); thus we guess # that the correct language must be C++. If there were other # languages allowed it would become ambiguous and fail (as then # all languages would be compatible, except C). Remember that # these sort of problems arise only when codenames aren't given. files, language = match_files_and_language( [ReceivedFile(None, "foo.c", FOO_CONTENT)], None, {"foo.%l", "foo.c"}, None) self.assertEqual(files, {"foo.c": FOO_CONTENT}) self.assertIs(language, CPP_LANG) # And although in theory it could be disambiguated in some cases # if one were smart enough, we aren't. with self.assertRaises(InvalidFilesOrLanguage): match_files_and_language([ ReceivedFile("foo.%l", "bar.c", FOO_CONTENT), ReceivedFile(None, "foo.c", FOO_CONTENT) ], "C", {"foo.%l", "foo.c"}, None)
def test_bad_file(self): self.languages.update({C_LANG}) # Different codename. with self.assertRaises(InvalidFilesOrLanguage): match_files_and_language( [ReceivedFile("foo.%l", None, FOO_CONTENT)], "C", {"bar.%l"}, None) # Incompatible filename. with self.assertRaises(InvalidFilesOrLanguage): match_files_and_language( [ReceivedFile(None, "foo.c", FOO_CONTENT)], "C", {"bar.%l"}, None) # The same in a language-agnostic setting. with self.assertRaises(InvalidFilesOrLanguage): match_files_and_language( [ReceivedFile("foo.txt", None, FOO_CONTENT)], None, {"bar.txt"}, None) with self.assertRaises(InvalidFilesOrLanguage): match_files_and_language( [ReceivedFile(None, "foo.txt", FOO_CONTENT)], None, {"bar.txt"}, None)
def test_not_archive_if_other_codenames(self): tornado_files = { "submission": [MockHTTPFile("sub.zip", b"this is an archive")], "foo.%l": [MockHTTPFile("foo.c", b"this is something else")] } six.assertCountEqual(self, extract_files_from_tornado(tornado_files), [ ReceivedFile("submission", "sub.zip", b"this is an archive"), ReceivedFile("foo.%l", "foo.c", b"this is something else") ])
def test_duplicate_files(self): self.languages.update({C_LANG}) # If two files match the same codename (even if through # different means) then the match is invalid. with self.assertRaises(InvalidFilesOrLanguage): match_files_and_language([ ReceivedFile("foo.%l", "bar.c", FOO_CONTENT), ReceivedFile(None, "foo.c", BAR_CONTENT) ], None, {"foo.%l"}, None)
def test_not_archive_if_other_files(self): tornado_files = { "submission": [ MockHTTPFile("sub.zip", b"this is an archive"), MockHTTPFile("sub2.zip", b"this is another one") ] } six.assertCountEqual(self, extract_files_from_tornado(tornado_files), [ ReceivedFile("submission", "sub.zip", b"this is an archive"), ReceivedFile("submission", "sub2.zip", b"this is another one") ])
def test_neither_codename_nor_filename(self): self.languages.update({C_LANG}) # Without neither codename nor filename, there's nothing to base # a match on. with self.assertRaises(InvalidFilesOrLanguage): match_files_and_language([ReceivedFile(None, None, FOO_CONTENT)], "C", {"foo.%l"}, None) # The same holds in a language-agnostic setting. with self.assertRaises(InvalidFilesOrLanguage): match_files_and_language([ReceivedFile(None, None, FOO_CONTENT)], None, {"foo.txt"}, None)
def test_forbidden_language(self): self.languages.update({C_LANG, CPP_LANG}) # The (autoguessed) language that would match is forbidden. with self.assertRaises(InvalidFilesOrLanguage): match_files_and_language( [ReceivedFile("foo.%l", "foo.c", FOO_CONTENT)], None, {"foo.%l"}, ["C++", "Py2"]) # The same if the language is given. with self.assertRaises(InvalidFilesOrLanguage): match_files_and_language( [ReceivedFile("foo.%l", "foo.c", FOO_CONTENT)], "C", {"foo.%l"}, ["C++", "Py2"])
def test_zip(self): files = [ ReceivedFile(None, "foo.c", b"some content"), ReceivedFile(None, "foo", b"some other content"), ReceivedFile(None, "foo.%l", b"more content") ] archive_data = io.BytesIO() with zipfile.ZipFile(archive_data, "w", compression=zipfile.ZIP_DEFLATED) as f: for _, filename, content in files: f.writestr(filename, content) six.assertCountEqual( self, extract_files_from_archive(archive_data.getvalue()), files)
def test_success(self): tornado_files = { "foo.%l": [MockHTTPFile("foo.py", b"some python stuff")], "bar.%l": [ MockHTTPFile("bar.c", b"one file in C"), MockHTTPFile("bar.cxx", b"the same file in C++") ], # Make sure that empty lists have no effect. "baz": [] } six.assertCountEqual(self, extract_files_from_tornado(tornado_files), [ ReceivedFile("foo.%l", "foo.py", b"some python stuff"), ReceivedFile("bar.%l", "bar.c", b"one file in C"), ReceivedFile("bar.%l", "bar.cxx", b"the same file in C++") ])
def test_tar_gz(self): files = [ ReceivedFile(None, "foo.c", b"some content"), ReceivedFile(None, "foo", b"some other content"), ReceivedFile(None, "foo.%l", b"more content") ] archive_data = io.BytesIO() with tarfile.open(fileobj=archive_data, mode="w:gz") as f: for _, filename, content in files: fileobj = io.BytesIO(content) tarinfo = tarfile.TarInfo(filename) tarinfo.size = len(content) f.addfile(tarinfo, fileobj) six.assertCountEqual( self, extract_files_from_archive(archive_data.getvalue()), files)
def test_multiple_slashes_are_compressed(self): # This is a (probably expected and) desirable behavior. archive_data = io.BytesIO() with zipfile.ZipFile(archive_data, "w") as f: f.writestr("foo//bar", b"some content") six.assertCountEqual( self, extract_files_from_archive(archive_data.getvalue()), [ReceivedFile(None, "bar", b"some content")])
def test_language_agnostic_always_possible(self): self.languages.update({C_LANG, CPP_LANG}) # In language-agnostic settings, passing a (non-None) language # is an error. with self.assertRaises(InvalidFilesOrLanguage): match_files_and_language( [ReceivedFile("foo.txt", None, FOO_CONTENT)], "C", {"foo.txt", "bar.zip"}, None) # Even if a set of allowed languages is given, None (when # applicable) is always allowed. files, language = match_files_and_language( [ReceivedFile("foo.txt", None, FOO_CONTENT)], None, {"foo.txt", "bar.zip"}, ["C++"]) self.assertEqual(files, {"foo.txt": FOO_CONTENT}) self.assertIsNone(language)
def test_filename_with_null(self): # This is an expected and most likely unproblematic behavior. archive_data = io.BytesIO() with zipfile.ZipFile(archive_data, "w") as f: f.writestr("foo\0bar", b"some content") six.assertCountEqual( self, extract_files_from_archive(archive_data.getvalue()), [ReceivedFile(None, "foo", b"some content")])
def test_success_language_agnostic(self): self.languages.update({C_LANG, CPP_LANG}) # Languageless files with and without codename and filename are # matched correctly against a language-agnostic submission # format. files, language = match_files_and_language([ ReceivedFile("foo.txt", "my_name", FOO_CONTENT), ReceivedFile("bar.zip", None, BAR_CONTENT), ReceivedFile(None, "baz", BAZ_CONTENT) ], None, {"foo.txt", "bar.zip", "baz", "superfluous"}, None) self.assertEqual(files, { "foo.txt": FOO_CONTENT, "bar.zip": BAR_CONTENT, "baz": BAZ_CONTENT }) self.assertIsNone(language)
def test_nonexisting_given_languages(self): self.languages.update({C_LANG, CPP_LANG}) # Passing a language that doesn't exist means the contestant # doesn't know what they are doing: we're not following through. with self.assertRaises(InvalidFilesOrLanguage): match_files_and_language( [ReceivedFile("foo.%l", "foo.c", FOO_CONTENT)], "BadLang", {"foo.%l"}, None)
def test_bad_extension(self): self.languages.update({C_LANG}) # Even when the codename (and, here, but not necessarily, the # extensionless filename) match, the filename's extension needs # to be compatible with the language. with self.assertRaises(InvalidFilesOrLanguage): match_files_and_language( [ReceivedFile("foo.%l", "foo.cpp", FOO_CONTENT)], "C", {"foo.%l"}, None)
def test_directories(self): # Make sure we ignore the directory structure and only use the # trailing component of the path (i.e., the basename) in the # return value, even if it leads to duplicated filenames. archive_data = io.BytesIO() with zipfile.ZipFile(archive_data, "w", compression=zipfile.ZIP_DEFLATED) as f: f.writestr("toplevel", b"some content") f.writestr("nested/once", b"some other content") f.writestr("two/levels/deep", b"more content") f.writestr("many/levels/deep", b"moar content") six.assertCountEqual( self, extract_files_from_archive(archive_data.getvalue()), [ ReceivedFile(None, "toplevel", b"some content"), ReceivedFile(None, "once", b"some other content"), ReceivedFile(None, "deep", b"more content"), ReceivedFile(None, "deep", b"moar content") ])
def test_paths_that_might_escape(self): # This should check that the extracted files cannot "escape" # from the temporary directory where they're being extracted to. filenames = ["../foo/bar", "/foo/bar"] for filename in filenames: archive_data = io.BytesIO() with zipfile.ZipFile(archive_data, "w") as f: f.writestr(filename, b"some content") six.assertCountEqual( self, extract_files_from_archive(archive_data.getvalue()), [ReceivedFile(None, "bar", b"some content")])
def test_nonexisting_allowed_languages(self): self.languages.update({C_LANG, CPP_LANG}) # Non-existing languages among the allowed languages are seen as # a configuration error: admins should intervene but contestants # shouldn't suffer, and thus these items are simply ignored. # Both when used to constitute the candidates (as no candidates # were given)... files, language = match_files_and_language( [ReceivedFile("foo.%l", "foo.c", FOO_CONTENT)], None, {"foo.%l"}, ["C", "BadLang"]) self.assertEqual(files, {"foo.%l": FOO_CONTENT}) self.assertIs(language, C_LANG) # And when they act as filter for the given candidates. files, language = match_files_and_language( [ReceivedFile("foo.%l", "foo.c", FOO_CONTENT)], "C", {"foo.%l"}, ["C", "BadLang"]) self.assertEqual(files, {"foo.%l": FOO_CONTENT}) self.assertIs(language, C_LANG)
def test_submission_format_empty(self): self.languages.update({C_LANG, CPP_LANG}) # If no files are wanted, any file will cause an invalid match. with self.assertRaises(InvalidFilesOrLanguage): match_files_and_language( [ReceivedFile("foo.%l", "foo.c", FOO_CONTENT)], "C", set(), None) # Even in language-agnostic settings. with self.assertRaises(InvalidFilesOrLanguage): match_files_and_language( [ReceivedFile("foo.txt", "foo.txt", FOO_CONTENT)], None, set(), None) # If there are no files this could be made to work. However we # decided that this means that the whole thing is very messed up # and thus abort instead. with self.assertRaises(InvalidFilesOrLanguage): match_files_and_language(list(), None, set(), None)
def test_success_language_required(self): self.languages.update({C_LANG, CPP_LANG}) # Both languageful and languageless files with and without # codename and filename are matched correctly against a # language-specific submission format. # Also check that when the codename matches the "extensionless" # filename is irrelevant (the extension matters, however). files, language = match_files_and_language( [ ReceivedFile("foo.%l", "my_name.cpp", FOO_CONTENT), ReceivedFile("bar.%l", None, BAR_CONTENT), ReceivedFile(None, "baz.cc", BAZ_CONTENT), ReceivedFile("spam.txt", "my_other_name", SPAM_CONTENT), ReceivedFile("eggs.zip", None, HAM_CONTENT), ReceivedFile(None, "ham", EGGS_CONTENT) ], None, { "foo.%l", "bar.%l", "baz.%l", "spam.txt", "eggs.zip", "ham", "superfluous" }, None) self.assertEqual( files, { "foo.%l": FOO_CONTENT, "bar.%l": BAR_CONTENT, "baz.%l": BAZ_CONTENT, "spam.txt": SPAM_CONTENT, "eggs.zip": HAM_CONTENT, "ham": EGGS_CONTENT }) self.assertIs(language, CPP_LANG)
def test_ambiguous_file_2(self): self.languages.update( {SELF_OVERLAP_LANG, LONG_OVERLAP_LANG, SHORT_OVERLAP_LANG}) # For an even weirder language and submission format, a single # file could successfully match two language-specific elements # of the submission format. with self.assertRaises(InvalidFilesOrLanguage): match_files_and_language( [ReceivedFile(None, "foo.suf.fix", FOO_CONTENT)], "SelfOverlap", {"foo.%l", "foo.suf.%l"}, None) # Wow, much overlap, very ambiguous. with self.assertRaises(InvalidFilesOrLanguage): match_files_and_language( [ReceivedFile(None, "foo.suf.fix", FOO_CONTENT)], None, {"foo.%l", "foo.suf.%l"}, None) # I'm doing this just for the fun. with self.assertRaises(InvalidFilesOrLanguage): match_files_and_language( [ReceivedFile(None, "foo.suf.fix", FOO_CONTENT)], None, {"foo.%l"}, None)
def test_missing_extensions(self): self.languages.update({C_LANG, CPP_LANG}) given_files = [ReceivedFile("foo.%l", None, FOO_CONTENT)] submission_format = {"foo.%l"} # The situation is ambiguous: it matches for every language, as # there is no extension to clarify and no language is given. with self.assertRaises(InvalidFilesOrLanguage): match_files_and_language(given_files, None, submission_format, None) # Restricting the candidates fixes it. files, language = match_files_and_language(given_files, "C", submission_format, None) self.assertEqual(files, {"foo.%l": FOO_CONTENT}) self.assertIs(language, C_LANG) # So does limiting the allowed languages. files, language = match_files_and_language(given_files, None, submission_format, ["C++"]) self.assertEqual(files, {"foo.%l": FOO_CONTENT}) self.assertIs(language, CPP_LANG)
def test_ambiguous_extensions(self): self.languages.update({PY2_LANG, PY3_LANG}) given_files = [ReceivedFile("foo.%l", "foo.py", FOO_CONTENT)] submission_format = {"foo.%l"} # The situation is ambiguous: both languages match the # extension. with self.assertRaises(InvalidFilesOrLanguage): match_files_and_language(given_files, None, submission_format, None) # Restricting the candidates fixes it. files, language = match_files_and_language(given_files, "Py2", submission_format, None) self.assertEqual(files, {"foo.%l": FOO_CONTENT}) self.assertIs(language, PY2_LANG) # So does limiting the allowed languages. files, language = match_files_and_language(given_files, None, submission_format, ["Py3"]) self.assertEqual(files, {"foo.%l": FOO_CONTENT}) self.assertIs(language, PY3_LANG)
def test_overlapping_extensions(self): self.languages.update({LONG_OVERLAP_LANG, SHORT_OVERLAP_LANG}) given_files = [ReceivedFile(None, "foo.suf.fix", FOO_CONTENT)] submission_format = {"foo.%l", "foo.suf.%l"} # The situation is ambiguous: both languages match, although # each does so to a different element of the submission format. with self.assertRaises(InvalidFilesOrLanguage): match_files_and_language(given_files, None, submission_format, None) # Restricting the candidates fixes it. files, language = match_files_and_language(given_files, "LongOverlap", submission_format, None) self.assertEqual(files, {"foo.%l": FOO_CONTENT}) self.assertIs(language, LONG_OVERLAP_LANG) # So does limiting the allowed languages. files, language = match_files_and_language(given_files, None, submission_format, ["ShortOverlap"]) self.assertEqual(files, {"foo.suf.%l": FOO_CONTENT}) self.assertIs(language, SHORT_OVERLAP_LANG)
def test_allowed_languages_empty(self): self.languages.update({C_LANG}) # An empty list of allowed languages means no language allowed: # any attempt at matching must necessarily fail. with self.assertRaises(InvalidFilesOrLanguage): match_files_and_language( [ReceivedFile("foo.%l", "foo.c", FOO_CONTENT)], "C", {"foo.%l"}, list()) # If all allowed languages are invalid, it's as if there weren't # any. with self.assertRaises(InvalidFilesOrLanguage): match_files_and_language( [ReceivedFile("foo.%l", "foo.c", FOO_CONTENT)], "C", {"foo.%l"}, ["BadLang"]) # The same holds if no candidates are given (this difference is # relevant because now the allowed ones are used as candidates, # instead of acting only as a filter). with self.assertRaises(InvalidFilesOrLanguage): match_files_and_language( [ReceivedFile("foo.%l", "foo.c", FOO_CONTENT)], None, {"foo.%l"}, list()) with self.assertRaises(InvalidFilesOrLanguage): match_files_and_language( [ReceivedFile("foo.%l", "foo.c", FOO_CONTENT)], None, {"foo.%l"}, ["BadLang"]) # However the "None" language, if applicable (i.e., if the # submission format is language-agnostic), is always allowed. files, language = match_files_and_language( [ReceivedFile("foo.txt", "foo.txt", FOO_CONTENT)], None, {"foo.txt"}, list()) self.assertEqual(files, {"foo.txt": FOO_CONTENT}) self.assertIsNone(language) files, language = match_files_and_language( [ReceivedFile("foo.txt", "foo.txt", FOO_CONTENT)], None, {"foo.txt"}, ["BadLang"]) self.assertEqual(files, {"foo.txt": FOO_CONTENT}) self.assertIsNone(language)