예제 #1
0
    def test_extension_without_leading_period(self):
        self.languages.update({PASCAL_LANG})

        # Check that the *whole* trailing `.%l` string is replaced with
        # the extension, not just the `%l` part, and also check that the
        # function doesn't split the extension on the filename.
        files, language = match_files_and_language(
            [ReceivedFile(None, "foolib.pas", FOO_CONTENT)], None, {"foo.%l"},
            None)
        self.assertEqual(files, {"foo.%l": FOO_CONTENT})
        self.assertIs(language, PASCAL_LANG)

        # The same check, in the negative form.
        with self.assertRaises(InvalidFilesOrLanguage):
            match_files_and_language(
                [ReceivedFile(None, "foo.lib.pas", FOO_CONTENT)], None,
                {"foo.%l"}, None)

        # This must also hold when the filename isn't matched against
        # the submission format (because the codename is used for that)
        # but just its extension is checked.
        files, language = match_files_and_language(
            [ReceivedFile("foo.%l", "foolib.pas", FOO_CONTENT)], None,
            {"foo.%l"}, None)
        self.assertEqual(files, {"foo.%l": FOO_CONTENT})
        self.assertIs(language, PASCAL_LANG)
예제 #2
0
    def test_ambiguous_file(self):
        self.languages.update({C_LANG, CPP_LANG})

        # For an admittedly weird submission format, a single file could
        # successfully match multiple elements.
        with self.assertRaises(InvalidFilesOrLanguage):
            match_files_and_language(
                [ReceivedFile(None, "foo.c", FOO_CONTENT)], "C",
                {"foo.%l", "foo.c"}, None)

        # This brings in some weird side-effects: for example, in the
        # following, our attempt at matching the files as C fails (since
        # foo.c is ambiguous) whereas matching them as C++ doesn't (as
        # foo.c isn't compatible with foo.%l anymore); thus we guess
        # that the correct language must be C++. If there were other
        # languages allowed it would become ambiguous and fail (as then
        # all languages would be compatible, except C). Remember that
        # these sort of problems arise only when codenames aren't given.
        files, language = match_files_and_language(
            [ReceivedFile(None, "foo.c", FOO_CONTENT)], None,
            {"foo.%l", "foo.c"}, None)
        self.assertEqual(files, {"foo.c": FOO_CONTENT})
        self.assertIs(language, CPP_LANG)

        # And although in theory it could be disambiguated in some cases
        # if one were smart enough, we aren't.
        with self.assertRaises(InvalidFilesOrLanguage):
            match_files_and_language([
                ReceivedFile("foo.%l", "bar.c", FOO_CONTENT),
                ReceivedFile(None, "foo.c", FOO_CONTENT)
            ], "C", {"foo.%l", "foo.c"}, None)
예제 #3
0
    def test_bad_file(self):
        self.languages.update({C_LANG})

        # Different codename.
        with self.assertRaises(InvalidFilesOrLanguage):
            match_files_and_language(
                [ReceivedFile("foo.%l", None, FOO_CONTENT)], "C", {"bar.%l"},
                None)

        # Incompatible filename.
        with self.assertRaises(InvalidFilesOrLanguage):
            match_files_and_language(
                [ReceivedFile(None, "foo.c", FOO_CONTENT)], "C", {"bar.%l"},
                None)

        # The same in a language-agnostic setting.
        with self.assertRaises(InvalidFilesOrLanguage):
            match_files_and_language(
                [ReceivedFile("foo.txt", None, FOO_CONTENT)], None,
                {"bar.txt"}, None)

        with self.assertRaises(InvalidFilesOrLanguage):
            match_files_and_language(
                [ReceivedFile(None, "foo.txt", FOO_CONTENT)], None,
                {"bar.txt"}, None)
예제 #4
0
 def test_not_archive_if_other_codenames(self):
     tornado_files = {
         "submission": [MockHTTPFile("sub.zip", b"this is an archive")],
         "foo.%l": [MockHTTPFile("foo.c", b"this is something else")]
     }
     six.assertCountEqual(self, extract_files_from_tornado(tornado_files), [
         ReceivedFile("submission", "sub.zip", b"this is an archive"),
         ReceivedFile("foo.%l", "foo.c", b"this is something else")
     ])
예제 #5
0
    def test_duplicate_files(self):
        self.languages.update({C_LANG})

        # If two files match the same codename (even if through
        # different means) then the match is invalid.
        with self.assertRaises(InvalidFilesOrLanguage):
            match_files_and_language([
                ReceivedFile("foo.%l", "bar.c", FOO_CONTENT),
                ReceivedFile(None, "foo.c", BAR_CONTENT)
            ], None, {"foo.%l"}, None)
예제 #6
0
 def test_not_archive_if_other_files(self):
     tornado_files = {
         "submission": [
             MockHTTPFile("sub.zip", b"this is an archive"),
             MockHTTPFile("sub2.zip", b"this is another one")
         ]
     }
     six.assertCountEqual(self, extract_files_from_tornado(tornado_files), [
         ReceivedFile("submission", "sub.zip", b"this is an archive"),
         ReceivedFile("submission", "sub2.zip", b"this is another one")
     ])
예제 #7
0
    def test_neither_codename_nor_filename(self):
        self.languages.update({C_LANG})

        # Without neither codename nor filename, there's nothing to base
        # a match on.
        with self.assertRaises(InvalidFilesOrLanguage):
            match_files_and_language([ReceivedFile(None, None, FOO_CONTENT)],
                                     "C", {"foo.%l"}, None)

        # The same holds in a language-agnostic setting.
        with self.assertRaises(InvalidFilesOrLanguage):
            match_files_and_language([ReceivedFile(None, None, FOO_CONTENT)],
                                     None, {"foo.txt"}, None)
예제 #8
0
    def test_forbidden_language(self):
        self.languages.update({C_LANG, CPP_LANG})

        # The (autoguessed) language that would match is forbidden.
        with self.assertRaises(InvalidFilesOrLanguage):
            match_files_and_language(
                [ReceivedFile("foo.%l", "foo.c", FOO_CONTENT)], None,
                {"foo.%l"}, ["C++", "Py2"])

        # The same if the language is given.
        with self.assertRaises(InvalidFilesOrLanguage):
            match_files_and_language(
                [ReceivedFile("foo.%l", "foo.c", FOO_CONTENT)], "C",
                {"foo.%l"}, ["C++", "Py2"])
예제 #9
0
 def test_zip(self):
     files = [
         ReceivedFile(None, "foo.c", b"some content"),
         ReceivedFile(None, "foo", b"some other content"),
         ReceivedFile(None, "foo.%l", b"more content")
     ]
     archive_data = io.BytesIO()
     with zipfile.ZipFile(archive_data,
                          "w",
                          compression=zipfile.ZIP_DEFLATED) as f:
         for _, filename, content in files:
             f.writestr(filename, content)
     six.assertCountEqual(
         self, extract_files_from_archive(archive_data.getvalue()), files)
예제 #10
0
 def test_success(self):
     tornado_files = {
         "foo.%l": [MockHTTPFile("foo.py", b"some python stuff")],
         "bar.%l": [
             MockHTTPFile("bar.c", b"one file in C"),
             MockHTTPFile("bar.cxx", b"the same file in C++")
         ],
         # Make sure that empty lists have no effect.
         "baz": []
     }
     six.assertCountEqual(self, extract_files_from_tornado(tornado_files), [
         ReceivedFile("foo.%l", "foo.py", b"some python stuff"),
         ReceivedFile("bar.%l", "bar.c", b"one file in C"),
         ReceivedFile("bar.%l", "bar.cxx", b"the same file in C++")
     ])
예제 #11
0
 def test_tar_gz(self):
     files = [
         ReceivedFile(None, "foo.c", b"some content"),
         ReceivedFile(None, "foo", b"some other content"),
         ReceivedFile(None, "foo.%l", b"more content")
     ]
     archive_data = io.BytesIO()
     with tarfile.open(fileobj=archive_data, mode="w:gz") as f:
         for _, filename, content in files:
             fileobj = io.BytesIO(content)
             tarinfo = tarfile.TarInfo(filename)
             tarinfo.size = len(content)
             f.addfile(tarinfo, fileobj)
     six.assertCountEqual(
         self, extract_files_from_archive(archive_data.getvalue()), files)
예제 #12
0
 def test_multiple_slashes_are_compressed(self):
     # This is a (probably expected and) desirable behavior.
     archive_data = io.BytesIO()
     with zipfile.ZipFile(archive_data, "w") as f:
         f.writestr("foo//bar", b"some content")
     six.assertCountEqual(
         self, extract_files_from_archive(archive_data.getvalue()),
         [ReceivedFile(None, "bar", b"some content")])
예제 #13
0
    def test_language_agnostic_always_possible(self):
        self.languages.update({C_LANG, CPP_LANG})

        # In language-agnostic settings, passing a (non-None) language
        # is an error.
        with self.assertRaises(InvalidFilesOrLanguage):
            match_files_and_language(
                [ReceivedFile("foo.txt", None, FOO_CONTENT)], "C",
                {"foo.txt", "bar.zip"}, None)

        # Even if a set of allowed languages is given, None (when
        # applicable) is always allowed.
        files, language = match_files_and_language(
            [ReceivedFile("foo.txt", None, FOO_CONTENT)], None,
            {"foo.txt", "bar.zip"}, ["C++"])
        self.assertEqual(files, {"foo.txt": FOO_CONTENT})
        self.assertIsNone(language)
예제 #14
0
 def test_filename_with_null(self):
     # This is an expected and most likely unproblematic behavior.
     archive_data = io.BytesIO()
     with zipfile.ZipFile(archive_data, "w") as f:
         f.writestr("foo\0bar", b"some content")
     six.assertCountEqual(
         self, extract_files_from_archive(archive_data.getvalue()),
         [ReceivedFile(None, "foo", b"some content")])
예제 #15
0
    def test_success_language_agnostic(self):
        self.languages.update({C_LANG, CPP_LANG})

        # Languageless files with and without codename and filename are
        # matched correctly against a language-agnostic submission
        # format.
        files, language = match_files_and_language([
            ReceivedFile("foo.txt", "my_name", FOO_CONTENT),
            ReceivedFile("bar.zip", None, BAR_CONTENT),
            ReceivedFile(None, "baz", BAZ_CONTENT)
        ], None, {"foo.txt", "bar.zip", "baz", "superfluous"}, None)
        self.assertEqual(files, {
            "foo.txt": FOO_CONTENT,
            "bar.zip": BAR_CONTENT,
            "baz": BAZ_CONTENT
        })
        self.assertIsNone(language)
예제 #16
0
    def test_nonexisting_given_languages(self):
        self.languages.update({C_LANG, CPP_LANG})

        # Passing a language that doesn't exist means the contestant
        # doesn't know what they are doing: we're not following through.
        with self.assertRaises(InvalidFilesOrLanguage):
            match_files_and_language(
                [ReceivedFile("foo.%l", "foo.c", FOO_CONTENT)], "BadLang",
                {"foo.%l"}, None)
예제 #17
0
    def test_bad_extension(self):
        self.languages.update({C_LANG})

        # Even when the codename (and, here, but not necessarily, the
        # extensionless filename) match, the filename's extension needs
        # to be compatible with the language.
        with self.assertRaises(InvalidFilesOrLanguage):
            match_files_and_language(
                [ReceivedFile("foo.%l", "foo.cpp", FOO_CONTENT)], "C",
                {"foo.%l"}, None)
예제 #18
0
 def test_directories(self):
     # Make sure we ignore the directory structure and only use the
     # trailing component of the path (i.e., the basename) in the
     # return value, even if it leads to duplicated filenames.
     archive_data = io.BytesIO()
     with zipfile.ZipFile(archive_data,
                          "w",
                          compression=zipfile.ZIP_DEFLATED) as f:
         f.writestr("toplevel", b"some content")
         f.writestr("nested/once", b"some other content")
         f.writestr("two/levels/deep", b"more content")
         f.writestr("many/levels/deep", b"moar content")
     six.assertCountEqual(
         self, extract_files_from_archive(archive_data.getvalue()), [
             ReceivedFile(None, "toplevel", b"some content"),
             ReceivedFile(None, "once", b"some other content"),
             ReceivedFile(None, "deep", b"more content"),
             ReceivedFile(None, "deep", b"moar content")
         ])
예제 #19
0
 def test_paths_that_might_escape(self):
     # This should check that the extracted files cannot "escape"
     # from the temporary directory where they're being extracted to.
     filenames = ["../foo/bar", "/foo/bar"]
     for filename in filenames:
         archive_data = io.BytesIO()
         with zipfile.ZipFile(archive_data, "w") as f:
             f.writestr(filename, b"some content")
         six.assertCountEqual(
             self, extract_files_from_archive(archive_data.getvalue()),
             [ReceivedFile(None, "bar", b"some content")])
예제 #20
0
    def test_nonexisting_allowed_languages(self):
        self.languages.update({C_LANG, CPP_LANG})

        # Non-existing languages among the allowed languages are seen as
        # a configuration error: admins should intervene but contestants
        # shouldn't suffer, and thus these items are simply ignored.
        # Both when used to constitute the candidates (as no candidates
        # were given)...
        files, language = match_files_and_language(
            [ReceivedFile("foo.%l", "foo.c", FOO_CONTENT)], None, {"foo.%l"},
            ["C", "BadLang"])
        self.assertEqual(files, {"foo.%l": FOO_CONTENT})
        self.assertIs(language, C_LANG)

        # And when they act as filter for the given candidates.
        files, language = match_files_and_language(
            [ReceivedFile("foo.%l", "foo.c", FOO_CONTENT)], "C", {"foo.%l"},
            ["C", "BadLang"])
        self.assertEqual(files, {"foo.%l": FOO_CONTENT})
        self.assertIs(language, C_LANG)
예제 #21
0
    def test_submission_format_empty(self):
        self.languages.update({C_LANG, CPP_LANG})

        # If no files are wanted, any file will cause an invalid match.
        with self.assertRaises(InvalidFilesOrLanguage):
            match_files_and_language(
                [ReceivedFile("foo.%l", "foo.c", FOO_CONTENT)], "C", set(),
                None)

        # Even in language-agnostic settings.
        with self.assertRaises(InvalidFilesOrLanguage):
            match_files_and_language(
                [ReceivedFile("foo.txt", "foo.txt", FOO_CONTENT)], None, set(),
                None)

        # If there are no files this could be made to work. However we
        # decided that this means that the whole thing is very messed up
        # and thus abort instead.
        with self.assertRaises(InvalidFilesOrLanguage):
            match_files_and_language(list(), None, set(), None)
예제 #22
0
    def test_success_language_required(self):
        self.languages.update({C_LANG, CPP_LANG})

        # Both languageful and languageless files with and without
        # codename and filename are matched correctly against a
        # language-specific submission format.
        # Also check that when the codename matches the "extensionless"
        # filename is irrelevant (the extension matters, however).
        files, language = match_files_and_language(
            [
                ReceivedFile("foo.%l", "my_name.cpp", FOO_CONTENT),
                ReceivedFile("bar.%l", None, BAR_CONTENT),
                ReceivedFile(None, "baz.cc", BAZ_CONTENT),
                ReceivedFile("spam.txt", "my_other_name", SPAM_CONTENT),
                ReceivedFile("eggs.zip", None, HAM_CONTENT),
                ReceivedFile(None, "ham", EGGS_CONTENT)
            ], None, {
                "foo.%l", "bar.%l", "baz.%l", "spam.txt", "eggs.zip", "ham",
                "superfluous"
            }, None)
        self.assertEqual(
            files, {
                "foo.%l": FOO_CONTENT,
                "bar.%l": BAR_CONTENT,
                "baz.%l": BAZ_CONTENT,
                "spam.txt": SPAM_CONTENT,
                "eggs.zip": HAM_CONTENT,
                "ham": EGGS_CONTENT
            })
        self.assertIs(language, CPP_LANG)
예제 #23
0
    def test_ambiguous_file_2(self):
        self.languages.update(
            {SELF_OVERLAP_LANG, LONG_OVERLAP_LANG, SHORT_OVERLAP_LANG})

        # For an even weirder language and submission format, a single
        # file could successfully match two language-specific elements
        # of the submission format.
        with self.assertRaises(InvalidFilesOrLanguage):
            match_files_and_language(
                [ReceivedFile(None, "foo.suf.fix", FOO_CONTENT)],
                "SelfOverlap", {"foo.%l", "foo.suf.%l"}, None)

        # Wow, much overlap, very ambiguous.
        with self.assertRaises(InvalidFilesOrLanguage):
            match_files_and_language(
                [ReceivedFile(None, "foo.suf.fix", FOO_CONTENT)], None,
                {"foo.%l", "foo.suf.%l"}, None)

        # I'm doing this just for the fun.
        with self.assertRaises(InvalidFilesOrLanguage):
            match_files_and_language(
                [ReceivedFile(None, "foo.suf.fix", FOO_CONTENT)], None,
                {"foo.%l"}, None)
예제 #24
0
    def test_missing_extensions(self):
        self.languages.update({C_LANG, CPP_LANG})
        given_files = [ReceivedFile("foo.%l", None, FOO_CONTENT)]
        submission_format = {"foo.%l"}

        # The situation is ambiguous: it matches for every language, as
        # there is no extension to clarify and no language is given.
        with self.assertRaises(InvalidFilesOrLanguage):
            match_files_and_language(given_files, None, submission_format,
                                     None)

        # Restricting the candidates fixes it.
        files, language = match_files_and_language(given_files, "C",
                                                   submission_format, None)
        self.assertEqual(files, {"foo.%l": FOO_CONTENT})
        self.assertIs(language, C_LANG)

        # So does limiting the allowed languages.
        files, language = match_files_and_language(given_files, None,
                                                   submission_format, ["C++"])
        self.assertEqual(files, {"foo.%l": FOO_CONTENT})
        self.assertIs(language, CPP_LANG)
예제 #25
0
    def test_ambiguous_extensions(self):
        self.languages.update({PY2_LANG, PY3_LANG})
        given_files = [ReceivedFile("foo.%l", "foo.py", FOO_CONTENT)]
        submission_format = {"foo.%l"}

        # The situation is ambiguous: both languages match the
        # extension.
        with self.assertRaises(InvalidFilesOrLanguage):
            match_files_and_language(given_files, None, submission_format,
                                     None)

        # Restricting the candidates fixes it.
        files, language = match_files_and_language(given_files, "Py2",
                                                   submission_format, None)
        self.assertEqual(files, {"foo.%l": FOO_CONTENT})
        self.assertIs(language, PY2_LANG)

        # So does limiting the allowed languages.
        files, language = match_files_and_language(given_files, None,
                                                   submission_format, ["Py3"])
        self.assertEqual(files, {"foo.%l": FOO_CONTENT})
        self.assertIs(language, PY3_LANG)
예제 #26
0
    def test_overlapping_extensions(self):
        self.languages.update({LONG_OVERLAP_LANG, SHORT_OVERLAP_LANG})
        given_files = [ReceivedFile(None, "foo.suf.fix", FOO_CONTENT)]
        submission_format = {"foo.%l", "foo.suf.%l"}

        # The situation is ambiguous: both languages match, although
        # each does so to a different element of the submission format.
        with self.assertRaises(InvalidFilesOrLanguage):
            match_files_and_language(given_files, None, submission_format,
                                     None)

        # Restricting the candidates fixes it.
        files, language = match_files_and_language(given_files, "LongOverlap",
                                                   submission_format, None)
        self.assertEqual(files, {"foo.%l": FOO_CONTENT})
        self.assertIs(language, LONG_OVERLAP_LANG)

        # So does limiting the allowed languages.
        files, language = match_files_and_language(given_files, None,
                                                   submission_format,
                                                   ["ShortOverlap"])
        self.assertEqual(files, {"foo.suf.%l": FOO_CONTENT})
        self.assertIs(language, SHORT_OVERLAP_LANG)
예제 #27
0
    def test_allowed_languages_empty(self):
        self.languages.update({C_LANG})

        # An empty list of allowed languages means no language allowed:
        # any attempt at matching must necessarily fail.
        with self.assertRaises(InvalidFilesOrLanguage):
            match_files_and_language(
                [ReceivedFile("foo.%l", "foo.c", FOO_CONTENT)], "C",
                {"foo.%l"}, list())

        # If all allowed languages are invalid, it's as if there weren't
        # any.
        with self.assertRaises(InvalidFilesOrLanguage):
            match_files_and_language(
                [ReceivedFile("foo.%l", "foo.c", FOO_CONTENT)], "C",
                {"foo.%l"}, ["BadLang"])

        # The same holds if no candidates are given (this difference is
        # relevant because now the allowed ones are used as candidates,
        # instead of acting only as a filter).
        with self.assertRaises(InvalidFilesOrLanguage):
            match_files_and_language(
                [ReceivedFile("foo.%l", "foo.c", FOO_CONTENT)], None,
                {"foo.%l"}, list())

        with self.assertRaises(InvalidFilesOrLanguage):
            match_files_and_language(
                [ReceivedFile("foo.%l", "foo.c", FOO_CONTENT)], None,
                {"foo.%l"}, ["BadLang"])

        # However the "None" language, if applicable (i.e., if the
        # submission format is language-agnostic), is always allowed.
        files, language = match_files_and_language(
            [ReceivedFile("foo.txt", "foo.txt", FOO_CONTENT)], None,
            {"foo.txt"}, list())
        self.assertEqual(files, {"foo.txt": FOO_CONTENT})
        self.assertIsNone(language)

        files, language = match_files_and_language(
            [ReceivedFile("foo.txt", "foo.txt", FOO_CONTENT)], None,
            {"foo.txt"}, ["BadLang"])
        self.assertEqual(files, {"foo.txt": FOO_CONTENT})
        self.assertIsNone(language)