예제 #1
0
    def test_remove(self):
        """Test setting and getting state of removed file."""
        cwd = os.getcwd()
        testfiles_dir = os.path.join(cwd, 'tests')
        testfile_path = os.path.join(testfiles_dir,
                                     'type_test_files/polch.tex')
        file = File(testfile_path, testfiles_dir)

        self.assertFalse(file.removed, 'File not removed yet')
        file.remove("Test File Removal")
        self.assertTrue(file.removed, 'File has been marked as removed')
예제 #2
0
    def test_file_subdirectory(self):
        """Pretend the file is in a subdirectory of submission workspace."""
        cwd = os.getcwd()
        testfiles_dir = os.path.join(cwd, 'tests')
        testfile_path = os.path.join(testfiles_dir,
                                     'type_test_files/polch.tex')
        file = File(testfile_path, testfiles_dir)

        self.assertIsInstance(file, File, "Instantiated 'File' class object")

        # Check arguments are stored properly
        self.assertEquals(file.base_dir, testfiles_dir,
                          "Check base_dir() method")
        self.assertEquals(file.filepath, testfile_path,
                          "Check filepath() method")

        self.assertEquals(file.name, 'polch.tex', "Check name() method")
        file_dir = os.path.join(testfiles_dir, 'type_test_files')
        self.assertEquals(file.dir, file_dir, "Check dir() method")
        self.assertEquals(file.public_dir, 'type_test_files',
                          "Check public_dir() method")
        self.assertEquals(file.public_filepath, 'type_test_files/polch.tex',
                          "Check public_filepath() method")

        self.assertEquals(file.type, 'latex', "Check type() method")
        self.assertEquals(file.type_string, 'LaTeX',
                          "Check type_string() method")
        self.assertEquals(file.ext, '.tex', "Check ext() method is '.tex'")
        self.assertEquals(file.size, 358441,
                          "Check size of 'polch.tex' is 358441,")
예제 #3
0
    def test_file(self):
        """Test :class:`.File` methods."""
        cwd = os.getcwd()
        testfiles_dir = os.path.join(cwd, 'tests/type_test_files')
        testfile_path = os.path.join(testfiles_dir, 'image.gif')
        file = File(testfile_path, testfiles_dir)

        self.assertIsInstance(file, File, "Instantiated 'File' class object")

        # Check arguments are stored properly
        self.assertEquals(file.base_dir, testfiles_dir,
                          "Check base_dir() method")
        self.assertEquals(file.filepath, testfile_path,
                          "Check filepath() method")

        self.assertEquals(file.name, 'image.gif', "Check name() method")
        self.assertEquals(file.dir, testfiles_dir, "Check dir() method")

        self.assertEquals(file.public_dir, '', "Check public_dir() method")
        self.assertEquals(file.public_filepath, "image.gif",
                          "Check public_filepath() method")

        self.assertEquals(file.type, 'image', "Check type() method")
        self.assertEquals(file.type_string, 'Image (gif/jpg etc)',
                          "Check type_string() method")

        # TODO implement sha256sum function
        self.assertEquals(file.sha256sum, "NOT IMPLEMENTED YET",
                          "Check sha256sum method()")
        self.assertEquals(file.checksum, "8KwlZuQvByH23-4HIcANGQ==",
                          "Generate checksum (MD5)")
        file.description = 'This is my favorite photo.'
        self.assertEquals(file.description, 'This is my favorite photo.',
                          "Check description() method")
        self.assertEquals(file.is_tex_type, False,
                          "Check is_tex_type() method")
        self.assertEquals(file.ext, '.gif', "Check ext() method is '.gif'")
        self.assertEquals(file.size, 495, "Check size of '.gif' is 495")
        mtime = os.path.getmtime(file.filepath)
        modified_datetime = datetime.datetime.fromtimestamp(
            mtime, tz=UTC).isoformat()
        self.assertEquals(file.modified_datetime, modified_datetime,
                          "Check modification time of file.")
예제 #4
0
    def test_file_setters(self):
        """Test that we are able to set various settings."""
        cwd = os.getcwd()
        testfiles_dir = os.path.join(cwd, 'tests')
        testfile_path = os.path.join(testfiles_dir,
                                     'type_test_files/polch.tex')
        file = File(testfile_path, testfiles_dir)

        # Check arguments are stored properly
        self.assertEquals(file.base_dir, testfiles_dir,
                          "Check base_dir() method")
        self.assertEquals(file.filepath, testfile_path,
                          "Check filepath() method")

        # new base dir
        new_dir = os.path.join(testfiles_dir, 'type_test_files')
        file.base_dir = new_dir
        self.assertEquals(file.base_dir, new_dir, "Check base_dir() method")

        file.description = "test setter"
        self.assertEquals(file.description, 'test setter',
                          "Check description() method")
예제 #5
0
    def test_anc_detection(self):
        """Check that we are detecting 'special' ancillary directory properly."""
        cwd = os.getcwd()
        testfiles_dir = os.path.join(cwd, 'tests')
        testfile_path = os.path.join(testfiles_dir, 'anc')

        # make sure directory exists
        if not os.path.exists(testfile_path):
            os.mkdir(testfile_path)

        file = File(testfile_path, testfiles_dir)

        self.assertEquals(file.type, 'directory',
                          "Check type of ancillary directory.")
        self.assertEquals(file.type_string, 'Ancillary files directory',
                          "Check type_string for ancillary directory")
예제 #6
0
    def test_dirs(self):
        """Check operations on directories."""
        cwd = os.getcwd()
        testfiles_dir = os.path.join(cwd, 'tests')
        testfile_path = os.path.join(testfiles_dir, 'type_test_files',
                                     'subdirectory')

        # make sure directory exists
        if not os.path.exists(testfile_path):
            os.mkdir(testfile_path)

        file = File(testfile_path, testfiles_dir)

        self.assertEquals(file.base_dir, testfiles_dir,
                          "Check base_dir() method")
        self.assertEquals(file.public_dir, 'type_test_files',
                          "Check public_dir() method")
        self.assertEquals(file.type, 'directory',
                          "Check type of ancillary directory.")
        self.assertEquals(file.type_string, 'Directory',
                          "Check type_string for ancillary directory")
예제 #7
0
def unpack_archive(upload: 'Upload') -> None:
    """
    Unpack uploaded files.

    Unpack uploaded archives and recursively traverse the source directory
    and unpack any additional gzipped/tar archives contained within original
    archive.

    Parameters
    ----------
    upload : Upload
        Upload object with files to be unpacks.

    Returns
    -------
    None

    Notes
    -----
    Originates from Upload.pm (Perl).
    """
    #archive_name = os.path.basename(archive_path)
    # TODO debug logging ("*******Process upload: " + archive_name + '*****************')

    source_directory = upload.get_source_directory()
    removed_directory = upload.get_removed_directory()

    # Recursively scan source directory and uplack all archives until there
    # are no more gzipped/tar archives.
    packed_file = 1
    round = 1
    while packed_file:
        # TODO debug logging ("\n*****ROUND " + str(round) + '  Packed: '
        # + str(packed_file) + '*****\n')

        for root_directory, subdirs, files in os.walk(source_directory):
            # TODO debug logging (f"---> Dir {root_directory} contains the
            # directories {b} and the files {c}")
            # ignoring directories using '_' above

            for dir in subdirs:
                # create path
                path = os.path.join(root_directory, dir)

                # wrap in our File encapsulation class
                obj = File(path, source_directory)

                if obj.name == '__MACOSX':
                    upload.add_warning(obj.public_filepath, "Removed '__MACOSX' directory.")
                    # Remove __MACOSX directory
                    if os.path.exists(path):
                        shutil.rmtree(path)
                    # Remove deleted directory from os.walk
                    subdirs.remove(dir)
                elif obj.name == 'processed':  # and from_paper_id
                    # TODO: Need to investigate what's going on here so we
                    # TODO: understand what needs to be done.
                    #
                    # Deletion of 'processed' directory depends on
                    # from_paper_id also being set.
                    #
                    # This appears to be related to replacing a submission
                    # where files are imported/copied from previous version of paper.
                    #
                    # Legacy action is to delete 'processed' directory when
                    # from_paper_id is set.
                    #
                    # We have not reached the point of implementing this yet so
                    # I will only issue a warning for now.
                    upload.add_warning(obj.public_filepath, "Detected 'processed' directory. Please check.")

            for file in files:

                # os.walk provides a list of files with the root directory so
                # we need to build path at each step
                path = os.path.join(root_directory, file)

                # wrap in our File encapsulation class
                obj = File(path, source_directory)

                # TODO log something to source log
                # print("File is : " + file + " Size: " + str(obj.size)
                # + " File is type: " + obj.type + ":" + obj.type_string + '\n')

                # Tar module is supposed to handle bz2 compressed files (gzip too)
                if ((obj.type == 'tar' or obj.type == 'gzipped')
                        and tarfile.is_tarfile(path)) or obj.type == 'bzip2':
                    # TODO debug logging ("**Found tar  or bzip2 file!**\n")

                    target_directory = os.path.join(source_directory, root_directory)

                    msg = f"***** unpack {obj.type} {file} to dir: {target_directory}"
                    upload.log(msg)

                    try:
                        tar = tarfile.open(path)
                    except tarfile.TarError as error:
                        # Do something better with as error
                        upload.add_warning(obj.public_filepath, "There were problems opening file '"
                                           + obj.public_filepath + "'")
                        upload.add_warning(obj.public_filepath, 'Tar error message: ' + error.__str__())

                    try:
                        for tarinfo in tar:
                            # print("Tar name: " + tarinfo.name() + '\n')
                            # print("**" + tarinfo.name, "is", tarinfo.size,
                            #     "bytes in size and is", end="")

                            # TODO: Need to think about this a little more.
                            # Don't really want to flatten directory structure,
                            # but not sure we can just secure basename.
                            # secure = secure_filename(tarinfo.name)
                            # if (secure != tarinfo.name):
                            #    print("\nFile name not secure: " + tarinfo.name
                            #        + ' (' + secure + ')\n')

                            # if tarinfo.name.startswith('.'):
                            # These get handled in checks and logged.

                            # Extract files and directories for now
                            dest = os.path.join(target_directory, tarinfo.name)
                            # Tarfiles may contain relative paths! We must
                            # ensure that each file is not going to escape the
                            # upload source directory _before_ we extract it.
                            if source_directory not in os.path.normpath(dest):
                                continue

                            if tarinfo.isreg():
                                # log this? ("Reg File")
                                tar.extract(tarinfo, target_directory)
                                # Update access and modified times to now.

                                os.utime(dest)
                            elif tarinfo.isdir():
                                # log this? ("Dir")
                                tar.extract(tarinfo, target_directory)
                                os.utime(dest)
                            else:
                                # Warn about entities we don't want to see in
                                # upload archives
                                # We did not check carefully in legacy system
                                # and hard links caused bad things to happen.
                                if tarinfo.issym():  # sym link
                                    upload.add_warning(obj.public_filepath, "Symbolic links are not allowed. Removing '"
                                                       + tarinfo.name + "'.")
                                elif tarinfo.islnk():  # hard link
                                    upload.add_warning(obj.public_filepath, 'Hard links are not allowed. Removing ')
                                elif tarinfo.ischr():
                                    upload.add_warning(obj.public_filepath, 'Character devices are not allowed. Removing ')
                                elif tarinfo.isblk():
                                    upload.add_warning(obj.public_filepath, 'Block devices are not allowed. Removing ')
                                elif tarinfo.isfifo():
                                    upload.add_warning(obj.public_filepath, 'FIFO are not allowed. Removing ')
                                elif tarinfo.isdev():
                                    upload.add_warning(obj.public_filepath, 'Character devices are '
                                                       + 'not allowed. Removing ')
                        tar.close()

                    except tarfile.TarError as error:
                        # TODO: Do something with as error, post to error log
                        # print("Error processing tar file failed!\n")
                        upload.add_warning(obj.public_filepath, ERROR_MSG_PRE + obj.public_filepath + ERROR_MSG_SUF)
                        upload.add_warning(obj.public_filepath, 'Tar error message: ' + error.__str__())

                    # Move gzipped file out of way
                    rfile = os.path.join(removed_directory, os.path.basename(path))

                    # Maybe can't do this in production if submitter reloads tar.gz
                    if os.path.exists(rfile) and (os.path.getsize(rfile) == os.path.getsize(path)):
                        # File (same size) saved already! Remove tar file
                        msg = f"Removed packed file {file}"
                        upload.log(msg)
                        os.remove(path)
                    else:
                        rem_path = os.path.join(removed_directory, os.path.basename(path))
                        msg = f"Removed packed file {file}"
                        upload.log(msg)
                        # Now move tar file out of way to removed directory
                        shutil.move(path, rem_path)
                    # Since we are unpacking something we want to make one more pass over files.
                    packed_file += 1
                elif obj.type == 'tar' and not tarfile.is_tarfile(path):
                    print("Package 'tarfile' unable to read this tar file.")
                    # TODO Throw an error

                # Hanlde .zip files
                elif obj.type == 'zip' and zipfile.is_zipfile(path):
                    target_directory = os.path.join(source_directory, root_directory)
                    print("*******Process zip archive: " + path)
                    msg = f"***** unpack {obj.type} {file} to dir: {target_directory}"
                    upload.log(msg)
                    try:
                        with zipfile.ZipFile(path, "r") as zip_ref:
                            zip_ref.extractall(target_directory)
                            # Now move zip file out of way to removed directory
                            rem_path = os.path.join(removed_directory, os.path.basename(path))
                            msg = f"Removed packed file {file}"
                            upload.log(msg)
                            shutil.move(path, rem_path)
                            # Since we are unpacking something we want to make
                            # one more pass over files.
                            packed_file += 1
                    except zipfile.BadZipFile as error:
                        # TODO: Think about warnings a bit. Tar/zip problems
                        # currently reported as warnings. Upload warnings allow
                        # submitter to continue on to process/compile step.
                        upload.add_warning(obj.public_filepath, ERROR_MSG_PRE + obj.public_filepath + ERROR_MSG_SUF)
                        upload.add_warning(obj.public_filepath, 'Zip error message: ' + error.__str__())

                # TODO: Add support for compressed files
                elif obj.type == 'compressed':
                    print("We can't uncompress .Z files yet.")
                    msg = f"***** unpack {obj.type} {file} to dir: {source_directory}"
                    upload.log(msg)
                    msg = "Unable to uncompress .Z file. Not implemented yet"
                    upload.log(msg)

                # TODO: Handle 'processed' and __MACOSX directories (removal of/deletion)

                # TODO: Handle encrypted files - need to investigate Crypt and how we are using it.

        round += 1
        packed_file -= 1

    # Set permissions on all directories and files
    upload.set_file_permissions()