def update(input_file, burst_match_dict=None):
    """
    In order to avoid segmentation faults when updating a batch of files just
    start every conversion on a different Python process.

    :param input_file: the file that needs to be converted to a newer file storage version.
        This should be a file that still uses TVB 2.0 storage
    """
    if not os.path.isfile(input_file):
        raise FileVersioningException("The input path %s received for upgrading from 2 -> 3 is not a "
                                      "valid file on the disk." % input_file)

    folder, file_name = os.path.split(input_file)
    storage_manager = StorageInterface.get_storage_manager(input_file)

    root_metadata = storage_manager.get_metadata()
    class_name = root_metadata[DataTypeMetaData.KEY_CLASS_NAME]

    if class_name == "LocalConnectivity":
        root_metadata[DataTypeMetaData.KEY_MODULE] = "tvb.datatypes.local_connectivity"
        storage_manager.set_metadata(root_metadata)
        update_localconnectivity_metadata(folder, file_name)

    elif class_name == "RegionMapping":
        root_metadata[DataTypeMetaData.KEY_MODULE] = "tvb.datatypes.region_mapping"

    root_metadata[TvbProfile.current.version.DATA_VERSION_ATTRIBUTE] = TvbProfile.current.version.DATA_VERSION
    storage_manager.set_metadata(root_metadata)
Beispiel #2
0
    def __init__(self, path):
        # type: (str) -> None
        self.path = path
        self.storage_manager = StorageInterface.get_storage_manager(self.path)
        # would be nice to have an opened state for the chunked api instead of the close_file=False

        # common scalar headers
        self.gid = Uuid(HasTraits.gid, self)
        self.written_by = Scalar(Attr(str), self, name=self.KEY_WRITTEN_BY)
        self.create_date = Scalar(Attr(str), self, name='create_date')
        self.type = Scalar(Attr(str), self, name='type')

        # Generic attributes descriptors
        self.generic_attributes = GenericAttributes()
        self.invalid = Scalar(Attr(bool), self, name='invalid')
        self.is_nan = Scalar(Attr(bool), self, name='is_nan')
        self.subject = Scalar(Attr(str), self, name='subject')
        self.state = Scalar(Attr(str), self, name='state')
        self.user_tag_1 = Scalar(Attr(str), self, name='user_tag_1')
        self.user_tag_2 = Scalar(Attr(str), self, name='user_tag_2')
        self.user_tag_3 = Scalar(Attr(str), self, name='user_tag_3')
        self.user_tag_4 = Scalar(Attr(str), self, name='user_tag_4')
        self.user_tag_5 = Scalar(Attr(str), self, name='user_tag_5')
        self.operation_tag = Scalar(Attr(str, required=False),
                                    self,
                                    name='operation_tag')
        self.parent_burst = Uuid(Attr(uuid.UUID, required=False),
                                 self,
                                 name='parent_burst')
        self.visible = Scalar(Attr(bool), self, name='visible')
        self.metadata_cache = None
        # Keep a list with datasets for which we should write metadata before closing the file
        self.expandable_datasets = []

        if not self.storage_manager.is_valid_tvb_file():
            self.written_by.store(self.get_class_path())
            self.is_new_file = True
class FilesUpdateManager(UpdateManager):
    """
    Manager for updating H5 files version, when code gets changed.
    """

    UPDATE_SCRIPTS_SUFFIX = "_update_files"
    PROJECTS_PAGE_SIZE = 20
    DATA_TYPES_PAGE_SIZE = 500
    STATUS = True
    MESSAGE = "Done"

    def __init__(self):
        super(FilesUpdateManager, self).__init__(file_update_scripts,
                                                 TvbProfile.current.version.DATA_CHECKED_TO_VERSION,
                                                 TvbProfile.current.version.DATA_VERSION)
        self.storage_interface = StorageInterface()

    def get_file_data_version(self, file_path):
        """
        Return the data version for the given file.

        :param file_path: the path on disk to the file for which you need the TVB data version
        :returns: a number representing the data version for which the input file was written
        """
        data_version = TvbProfile.current.version.DATA_VERSION_ATTRIBUTE
        return self.storage_interface.get_storage_manager(file_path).get_file_data_version(data_version)

    def is_file_up_to_date(self, file_path):
        """
        Returns True only if the data version of the file is equal with the
        data version specified into the TVB configuration file.
        """
        try:
            file_version = self.get_file_data_version(file_path)
        except MissingDataFileException as ex:
            self.log.exception(ex)
            return False
        except FileStructureException as ex:
            self.log.exception(ex)
            return False

        if file_version == TvbProfile.current.version.DATA_VERSION:
            return True
        return False

    def upgrade_file(self, input_file_name, datatype=None, burst_match_dict=None):
        """
        Upgrades the given file to the latest data version. The file will be upgraded
        sequentially, up until the current version from tvb.basic.config.settings.VersionSettings.DB_STRUCTURE_VERSION

        :param input_file_name the path to the file which needs to be upgraded
        :return True when update was successful and False when it resulted in an error.
        """
        if self.is_file_up_to_date(input_file_name):
            # Avoid running the DB update of size, when H5 is not being changed, to speed-up
            return True

        file_version = self.get_file_data_version(input_file_name)
        self.log.info("Updating from version %s , file: %s " % (file_version, input_file_name))
        for script_name in self.get_update_scripts(file_version):
            temp_file_path = os.path.join(TvbProfile.current.TVB_TEMP_FOLDER,
                                          os.path.basename(input_file_name) + '.tmp')
            self.storage_interface.copy_file(input_file_name, temp_file_path)
            try:
                self.run_update_script(script_name, input_file=input_file_name, burst_match_dict=burst_match_dict)
            except FileMigrationException as excep:
                self.storage_interface.copy_file(temp_file_path, input_file_name)
                os.remove(temp_file_path)
                self.log.error(excep)
                return False

        if datatype:
            # Compute and update the disk_size attribute of the DataType in DB:
            datatype.disk_size = self.storage_interface.compute_size_on_disk(input_file_name)
            dao.store_entity(datatype)

        return True

    def __upgrade_h5_list(self, h5_files):
        """
        Upgrade a list of DataTypes to the current version.

        :returns: (nr_of_dts_upgraded_fine, nr_of_dts_ignored) a two-tuple of integers representing
            the number of DataTypes for which the upgrade worked fine, and the number of DataTypes for which
            the upgrade has failed.
        """
        nr_of_dts_upgraded_fine = 0
        nr_of_dts_failed = 0

        burst_match_dict = {}
        for path in h5_files:
            update_result = self.upgrade_file(path, burst_match_dict=burst_match_dict)

            if update_result:
                nr_of_dts_upgraded_fine += 1
            else:
                nr_of_dts_failed += 1

        return nr_of_dts_upgraded_fine, nr_of_dts_failed

    # TO DO: We should migrate the older scripts to Python 3 if we want to support migration for versions < 4
    def run_all_updates(self):
        """
        Upgrades all the data types from TVB storage to the latest data version.

        :returns: a two entry tuple (status, message) where status is a boolean that is True in case
            the upgrade was successfully for all DataTypes and False otherwise, and message is a status
            update message.
        """
        if TvbProfile.current.version.DATA_CHECKED_TO_VERSION < TvbProfile.current.version.DATA_VERSION:
            start_time = datetime.now()

            file_paths = self.get_all_h5_paths()
            total_count = len(file_paths)
            no_ok, no_error = self.__upgrade_h5_list(file_paths)

            self.log.info("Updated H5 files in total: %d [fine:%d, failed:%d in: %s min]" % (
                total_count, no_ok, no_error, int((datetime.now() - start_time).seconds / 60)))
            delete_old_burst_table_after_migration()

            # Now update the configuration file since update was done
            config_file_update_dict = {stored.KEY_LAST_CHECKED_FILE_VERSION: TvbProfile.current.version.DATA_VERSION}

            if no_error == 0:
                # Everything went fine
                config_file_update_dict[stored.KEY_FILE_STORAGE_UPDATE_STATUS] = FILE_STORAGE_VALID
                FilesUpdateManager.STATUS = True
                FilesUpdateManager.MESSAGE = ("File upgrade finished successfully for all %s entries. "
                                              "Thank you for your patience!" % total_count)
                self.log.info(FilesUpdateManager.MESSAGE)
            else:
                # Keep track of how many DataTypes were properly updated and how many
                # were marked as invalid due to missing files or invalid manager.
                config_file_update_dict[stored.KEY_FILE_STORAGE_UPDATE_STATUS] = FILE_STORAGE_INVALID
                FilesUpdateManager.STATUS = False
                FilesUpdateManager.MESSAGE = ("Out of %s stored DataTypes, %s were upgraded successfully, but %s had "
                                              "faults and were marked invalid" % (total_count, no_ok, no_error))
                self.log.warning(FilesUpdateManager.MESSAGE)

            TvbProfile.current.version.DATA_CHECKED_TO_VERSION = TvbProfile.current.version.DATA_VERSION
            TvbProfile.current.manager.add_entries_to_config_file(config_file_update_dict)

    @staticmethod
    def get_all_h5_paths():
        """
        This method returns a list of all h5 files and it is used in the migration from version 4 to 5.
        The h5 files inside a certain project are retrieved in numerical order (1, 2, 3 etc.).
        """
        h5_files = []
        projects_folder = StorageInterface().get_projects_folder()

        for project_path in os.listdir(projects_folder):
            # Getting operation folders inside the current project
            project_full_path = os.path.join(projects_folder, project_path)
            try:
                project_operations = os.listdir(project_full_path)
            except NotADirectoryError:
                continue
            project_operations_base_names = [os.path.basename(op) for op in project_operations]

            for op_folder in project_operations_base_names:
                try:
                    int(op_folder)
                    op_folder_path = os.path.join(project_full_path, op_folder)
                    for file in os.listdir(op_folder_path):
                        if StorageInterface().ends_with_tvb_storage_file_extension(file):
                            h5_file = os.path.join(op_folder_path, file)
                            try:
                                if FilesUpdateManager._is_empty_file(h5_file):
                                    continue
                                h5_files.append(h5_file)
                            except FileStructureException:
                                continue
                except ValueError:
                    pass

        # Sort all h5 files based on their creation date stored in the files themselves
        sorted_h5_files = sorted(h5_files, key=lambda h5_path: FilesUpdateManager._get_create_date_for_sorting(
            h5_path) or datetime.now())
        return sorted_h5_files

    @staticmethod
    def _is_empty_file(h5_file):
        return H5File.get_metadata_param(h5_file, 'Create_date') is None

    @staticmethod
    def _get_create_date_for_sorting(h5_file):
        create_date_str = str(H5File.get_metadata_param(h5_file, 'Create_date'), 'utf-8')
        create_date = string2date(create_date_str, date_format='datetime:%Y-%m-%d %H:%M:%S.%f')
        return create_date
Beispiel #4
0
 def get_metadata_param(path, param):
     meta = StorageInterface.get_storage_manager(path).get_metadata()
     return meta.get(param)
def update(input_file, burst_match_dict=None):
    """
    :param input_file: the file that needs to be converted to a newer file storage version.
    """

    if not os.path.isfile(input_file):
        raise IncompatibleFileManagerException(
            "The input path %s received for upgrading from 3 -> 4 is not a "
            "valid file on the disk." % input_file)

    folder, file_name = os.path.split(input_file)
    storage_manager = StorageInterface.get_storage_manager(input_file)

    root_metadata = storage_manager.get_metadata()
    if DataTypeMetaData.KEY_CLASS_NAME not in root_metadata:
        raise IncompatibleFileManagerException(
            "File %s received for upgrading 3 -> 4 is not valid, due to missing "
            "metadata: %s" % (input_file, DataTypeMetaData.KEY_CLASS_NAME))
    class_name = root_metadata[DataTypeMetaData.KEY_CLASS_NAME]

    class_name = str(class_name, 'utf-8')
    if "ProjectionSurface" in class_name and FIELD_PROJECTION_TYPE not in root_metadata:
        LOGGER.info("Updating ProjectionSurface %s from %s" %
                    (file_name, folder))

        projection_type = ProjectionsType.EEG.value
        if "SEEG" in class_name:
            projection_type = ProjectionsType.SEEG.value
        elif "MEG" in class_name:
            projection_type = ProjectionsType.MEG.value

        root_metadata[FIELD_PROJECTION_TYPE] = json.dumps(projection_type)
        LOGGER.debug("Setting %s = %s" %
                     (FIELD_PROJECTION_TYPE, projection_type))

    elif "TimeSeries" in class_name:
        LOGGER.info("Updating TS %s from %s" % (file_name, folder))

        service = ImportService()
        try:
            operation_id = int(os.path.split(folder)[1])
            dt = service.load_datatype_from_file(
                os.path.join(folder, file_name), operation_id)
            dt_db = dao.get_datatype_by_gid(dt.gid)
        except ValueError:
            dt_db = None

        if dt_db is not None:
            # DT already in DB (update of own storage, by making sure all fields are being correctly populated)
            dt_db.configure()
            dt_db.persist_full_metadata()
            try:
                # restore in DB, in case TVB 1.4 had wrongly imported flags
                dao.store_entity(dt_db)
            except Exception:
                LOGGER.exception(
                    "Could not update flags in DB, but we continue with the update!"
                )

        elif FIELD_SURFACE_MAPPING not in root_metadata:
            # Have default values, to avoid the full project not being imported
            root_metadata[FIELD_SURFACE_MAPPING] = json.dumps(False)
            root_metadata[FIELD_VOLUME_MAPPING] = json.dumps(False)

    root_metadata[
        TvbProfile.current.version.
        DATA_VERSION_ATTRIBUTE] = TvbProfile.current.version.DATA_VERSION
    storage_manager.set_metadata(root_metadata)
Beispiel #6
0
 def remove_metadata_param(file_path, param):
     storage_manager = StorageInterface.get_storage_manager(file_path)
     if param in storage_manager.get_metadata():
         storage_manager.remove_metadata(param)