class ImportService():
    """
    Service for importing TVB entities into system.
    It supports TVB exported H5 files as input, but it should also handle H5 files 
    generated outside of TVB, as long as they respect the same structure.
    """
    def __init__(self):
        self.logger = get_logger(__name__)
        self.user_id = None
        self.files_helper = FilesHelper()
        self.created_projects = []

    def _download_and_unpack_project_zip(self, uploaded, uq_file_name,
                                         temp_folder):

        if isinstance(uploaded, FieldStorage) or isinstance(uploaded, Part):
            if not uploaded.file:
                raise ProjectImportException(
                    "Please select the archive which contains the project structure."
                )
            with open(uq_file_name, 'wb') as file_obj:
                self.files_helper.copy_file(uploaded.file, file_obj)
        else:
            shutil.copy2(uploaded, uq_file_name)

        try:
            self.files_helper.unpack_zip(uq_file_name, temp_folder)
        except FileStructureException, excep:
            self.logger.exception(excep)
            raise ProjectImportException(
                "Bad ZIP archive provided. A TVB exported project is expected!"
            )
class ImportService():
    """
    Service for importing TVB entities into system.
    It supports TVB exported H5 files as input, but it should also handle H5 files 
    generated outside of TVB, as long as they respect the same structure.
    """


    def __init__(self):
        self.logger = get_logger(__name__)
        self.user_id = None
        self.files_helper = FilesHelper()
        self.created_projects = []


    def _download_and_unpack_project_zip(self, uploaded, uq_file_name, temp_folder):

        if isinstance(uploaded, FieldStorage) or isinstance(uploaded, Part):
            if not uploaded.file:
                raise ProjectImportException("Please select the archive which contains the project structure.")
            with open(uq_file_name, 'wb') as file_obj:
                self.files_helper.copy_file(uploaded.file, file_obj)
        else:
            shutil.copy2(uploaded, uq_file_name)

        try:
            self.files_helper.unpack_zip(uq_file_name, temp_folder)
        except FileStructureException, excep:
            self.logger.exception(excep)
            raise ProjectImportException("Bad ZIP archive provided. A TVB exported project is expected!")
Beispiel #3
0
def search_and_export_ts(project_id, export_folder=os.path.join("~", "TVB")):

    #### This is the simplest filter you could write: filter and entity by Subject
    filter_connectivity = FilterChain(
        fields=[FilterChain.datatype + '.subject'],
        operations=["=="],
        values=[DataTypeMetaData.DEFAULT_SUBJECT])

    connectivities = _retrieve_entities_by_filters(Connectivity, project_id,
                                                   filter_connectivity)

    #### A more complex filter: by linked entity (connectivity), BOLD monitor, sampling, operation param:
    filter_timeseries = FilterChain(
        fields=[
            FilterChain.datatype + '._connectivity',
            FilterChain.datatype + '._title',
            FilterChain.datatype + '._sample_period',
            FilterChain.datatype + '._sample_rate',
            FilterChain.operation + '.parameters'
        ],
        operations=["==", "like", ">=", "<=", "like"],
        values=[
            connectivities[0].gid, "Bold", "500", "0.002",
            '"conduction_speed": "3.0"'
        ])

    #### If you want to filter another type of TS, change the kind class bellow,
    #### instead of TimeSeriesRegion use TimeSeriesEEG, or TimeSeriesSurface, etc.
    timeseries = _retrieve_entities_by_filters(TimeSeriesRegion, project_id,
                                               filter_timeseries)

    for ts in timeseries:
        print("=============================")
        print(ts.summary_info)
        print(" Original file: " + str(ts.get_storage_file_path()))
        destination_file = os.path.expanduser(
            os.path.join(export_folder, ts.get_storage_file_name()))
        FilesHelper.copy_file(ts.get_storage_file_path(), destination_file)
        if os.path.exists(destination_file):
            print(" TS file copied at: " + destination_file)
        else:
            print(
                " Some error happened when trying to copy at destination folder!!"
            )
Beispiel #4
0
def import_conn_h5(project_id, h5_path):
    project = dao.get_project_by_id(project_id)

    TvbProfile.set_profile(TvbProfile.COMMAND_PROFILE)
    now = datetime.now()
    date_str = "%d-%d-%d_%d-%d-%d_%d" % (now.year, now.month, now.day,
                                         now.hour, now.minute, now.second,
                                         now.microsecond)
    uq_name = "%s-Connectivity" % date_str
    new_path = os.path.join(TvbProfile.current.TVB_TEMP_FOLDER, uq_name)

    FilesHelper.copy_file(h5_path, new_path)
    importer = ABCAdapter.build_adapter_from_class(TVBImporter)
    view_model = importer.get_view_model_class()()
    view_model.data_file = new_path
    return OperationService().fire_operation(importer,
                                             project.administrator,
                                             project_id,
                                             view_model=view_model)[0]
def search_and_export_ts(project_id, export_folder=os.path.join("~", "TVB")):

    #### This is the simplest filter you could write: filter and entity by Subject
    filter_connectivity = FilterChain(fields=[FilterChain.datatype + '.subject'],
                                      operations=["=="],
                                      values=[DataTypeMetaData.DEFAULT_SUBJECT])

    connectivities = _retrieve_entities_by_filters(Connectivity, project_id, filter_connectivity)


    #### A more complex filter: by linked entity (connectivity), BOLD monitor, sampling, operation param:
    filter_timeseries = FilterChain(fields=[FilterChain.datatype + '._connectivity',
                                            FilterChain.datatype + '._title',
                                            FilterChain.datatype + '._sample_period',
                                            FilterChain.datatype + '._sample_rate',
                                            FilterChain.operation + '.parameters'
                                            ],
                                    operations=["==", "like", ">=", "<=", "like"],
                                    values=[connectivities[0].gid,
                                            "Bold",
                                            "500", "0.002",
                                            '"conduction_speed": "3.0"'
                                            ]
                                    )

    #### If you want to filter another type of TS, change the kind class bellow,
    #### instead of TimeSeriesRegion use TimeSeriesEEG, or TimeSeriesSurface, etc.
    timeseries = _retrieve_entities_by_filters(TimeSeriesRegion, project_id, filter_timeseries)

    for ts in timeseries:
        print("=============================")
        print(ts.summary_info)
        print(" Original file: " + str(ts.get_storage_file_path()))
        destination_file = os.path.expanduser(os.path.join(export_folder, ts.get_storage_file_name()))
        FilesHelper.copy_file(ts.get_storage_file_path(), destination_file)
        if os.path.exists(destination_file):
            print(" TS file copied at: " + destination_file)
        else:
            print(" Some error happened when trying to copy at destination folder!!")
    def _process_input_zip(self, zip_arch, result_folder, remote_prefix, 
                           file_name_base, expected_pairs, fix_number=True):
        """
        Read entries in uploaded ZIP.
        Raise Exception in case pairs HDR/IMG are not matched or number "expected_pairs" is not met.
        :returns: string with HDR list (to be passed to DTI pipeline).
        """
        
        hdr_files = []
        for file_name in zip_arch.namelist():
            if not file_name.startswith(file_name_base) or file_name.endswith("/"):
                continue
            if file_name.endswith(".hdr"):
                pair_img = file_name.replace(".hdr", ".img")
                if pair_img not in zip_arch.namelist():
                    raise ConnectException("Could not find pair for HDR file :" + str(file_name))
                
                new_file_name = os.path.join(result_folder, file_name_base + str(len(hdr_files)) + ".hdr")
                src = zip_arch.open(file_name, 'rU')
                FilesHelper.copy_file(src, new_file_name)
                hdr_files.append(os.path.join(remote_prefix, os.path.split(new_file_name)[1]))
                new_file_name = new_file_name.replace(".hdr", ".img")
                src = zip_arch.open(pair_img, 'rU')
                FilesHelper.copy_file(src, new_file_name)
                
            elif not file_name.endswith(".img"):
                self.logger.warning("Ignored file :" + str(file_name))
            
        if len(hdr_files) < expected_pairs or (fix_number and len(hdr_files) > expected_pairs):
            raise ConnectException("Invalid number of files:" + str(len(hdr_files)) +
                                   " expected:" + str(expected_pairs))

        result = ""
        for hdr_name in hdr_files:
            result = result + hdr_name + " "
        return result
Beispiel #7
0
class ImportService(object):
    """
    Service for importing TVB entities into system.
    It supports TVB exported H5 files as input, but it should also handle H5 files 
    generated outside of TVB, as long as they respect the same structure.
    """
    def __init__(self):
        self.logger = get_logger(__name__)
        self.user_id = None
        self.files_helper = FilesHelper()
        self.created_projects = []

    def _download_and_unpack_project_zip(self, uploaded, uq_file_name,
                                         temp_folder):

        if isinstance(uploaded, FieldStorage) or isinstance(uploaded, Part):
            if not uploaded.file:
                raise ProjectImportException(
                    "Please select the archive which contains the project structure."
                )
            with open(uq_file_name, 'wb') as file_obj:
                self.files_helper.copy_file(uploaded.file, file_obj)
        else:
            shutil.copy2(uploaded, uq_file_name)

        try:
            self.files_helper.unpack_zip(uq_file_name, temp_folder)
        except FileStructureException as excep:
            self.logger.exception(excep)
            raise ProjectImportException(
                "Bad ZIP archive provided. A TVB exported project is expected!"
            )

    @staticmethod
    def _compute_unpack_path():
        """
        :return: the name of the folder where to expand uploaded zip
        """
        now = datetime.now()
        date_str = "%d-%d-%d_%d-%d-%d_%d" % (now.year, now.month, now.day,
                                             now.hour, now.minute, now.second,
                                             now.microsecond)
        uq_name = "%s-ImportProject" % date_str
        return os.path.join(TvbProfile.current.TVB_TEMP_FOLDER, uq_name)

    @transactional
    def import_project_structure(self, uploaded, user_id):
        """
        Execute import operations:
         
        1. check if ZIP or folder 
        2. find all project nodes
        3. for each project node:
            - create project
            - create all operations
            - import all images
            - create all dataTypes
        """

        self.user_id = user_id
        self.created_projects = []

        # Now compute the name of the folder where to explode uploaded ZIP file
        temp_folder = self._compute_unpack_path()
        uq_file_name = temp_folder + ".zip"

        try:
            self._download_and_unpack_project_zip(uploaded, uq_file_name,
                                                  temp_folder)
            self._import_projects_from_folder(temp_folder)

        except Exception as excep:
            self.logger.exception(
                "Error encountered during import. Deleting projects created during this operation."
            )
            # Remove project folders created so far.
            # Note that using the project service to remove the projects will not work,
            # because we do not have support for nested transaction.
            # Removing from DB is not necessary because in transactional env a simple exception throw
            # will erase everything to be inserted.
            for project in self.created_projects:
                project_path = os.path.join(TvbProfile.current.TVB_STORAGE,
                                            FilesHelper.PROJECTS_FOLDER,
                                            project.name)
                shutil.rmtree(project_path)
            raise ProjectImportException(str(excep))

        finally:
            # Now delete uploaded file
            if os.path.exists(uq_file_name):
                os.remove(uq_file_name)
            # Now delete temporary folder where uploaded ZIP was exploded.
            if os.path.exists(temp_folder):
                shutil.rmtree(temp_folder)

    @staticmethod
    def _load_burst_info_from_json(project_path):
        bursts_dict = {}
        dt_mappings_dict = {}
        bursts_file = os.path.join(project_path, BURST_INFO_FILE)
        if os.path.isfile(bursts_file):
            with open(bursts_file) as f:
                bursts_info_dict = json.load(f)
            bursts_dict = bursts_info_dict[BURSTS_DICT_KEY]
            dt_mappings_dict = bursts_info_dict[DT_BURST_MAP]
        os.remove(bursts_file)
        return bursts_dict, dt_mappings_dict

    @staticmethod
    def _import_bursts(project_entity, bursts_dict):
        """
        Re-create old bursts, but keep a mapping between the id it has here and the old-id it had
        in the project where they were exported, so we can re-add the datatypes to them.
        """
        burst_ids_mapping = {}

        for old_burst_id in bursts_dict:
            burst_information = BurstInformation.load_from_dict(
                bursts_dict[old_burst_id])
            burst_entity = model.BurstConfiguration(project_entity.id)
            burst_entity.from_dict(burst_information.data)
            burst_entity = dao.store_entity(burst_entity)
            burst_ids_mapping[int(old_burst_id)] = burst_entity.id
            # We don't need the data in dictionary form anymore, so update it with new BurstInformation object
            bursts_dict[old_burst_id] = burst_information
        return burst_ids_mapping

    def _import_projects_from_folder(self, temp_folder):
        """
        Process each project from the uploaded pack, to extract names.
        """
        project_roots = []
        for root, _, files in os.walk(temp_folder):
            if FilesHelper.TVB_PROJECT_FILE in files:
                project_roots.append(root)

        for project_path in project_roots:
            update_manager = ProjectUpdateManager(project_path)
            update_manager.run_all_updates()
            project_entity = self.__populate_project(project_path)

            # Compute the path where to store files of the imported project
            new_project_path = os.path.join(TvbProfile.current.TVB_STORAGE,
                                            FilesHelper.PROJECTS_FOLDER,
                                            project_entity.name)
            if project_path != new_project_path:
                shutil.move(project_path, new_project_path)

            self.created_projects.append(project_entity)

            # Keep a list with all burst that were imported since we will want to also add the workflow
            # steps after we are finished with importing the operations and datatypes. We need to first
            # stored bursts since we need to know which new id's they have for operations parent_burst.
            bursts_dict, dt_mappings_dict = self._load_burst_info_from_json(
                new_project_path)
            burst_ids_mapping = self._import_bursts(project_entity,
                                                    bursts_dict)

            # Now import project operations
            self.import_project_operations(project_entity, new_project_path,
                                           dt_mappings_dict, burst_ids_mapping)
            # Import images
            self._store_imported_images(project_entity)
            # Now we can finally import workflow related entities
            self.import_workflows(project_entity, bursts_dict,
                                  burst_ids_mapping)

    def import_workflows(self, project, bursts_dict, burst_ids_mapping):
        """
        Import the workflow entities for all bursts imported in the project.

        :param project: the current

        :param bursts_dict: a dictionary that holds all the required information in order to
                            import the bursts from the new project

        :param burst_ids_mapping: a dictionary of the form {old_burst_id : new_burst_id} so we
                                  know what burst to link each workflow to
        """
        for burst_id in bursts_dict:
            workflows_info = bursts_dict[burst_id].get_workflows()
            for one_wf_info in workflows_info:
                # Use the new burst id when creating the workflow
                workflow_entity = model.Workflow(
                    project.id, burst_ids_mapping[int(burst_id)])
                workflow_entity.from_dict(one_wf_info.data)
                workflow_entity = dao.store_entity(workflow_entity)
                wf_steps_info = one_wf_info.get_workflow_steps()
                view_steps_info = one_wf_info.get_view_steps()
                self.import_workflow_steps(workflow_entity, wf_steps_info,
                                           view_steps_info)

    def import_workflow_steps(self, workflow, wf_steps, view_steps):
        """
        Import all workflow steps for the given workflow. We create both wf_steps and view_steps
        in the same method, since if a wf_step has to be omited for some reason, we also need to
        omit that view step.
        :param workflow: a model.Workflow entity from which we need to add workflow steps

        :param wf_steps: a list of WorkflowStepInformation entities, from which we will rebuild 
                         the workflow steps

        :param view_steps: a list of WorkflowViewStepInformation entities, from which we will 
                           rebuild the workflow view steps

        """
        for wf_step in wf_steps:
            try:
                algorithm = wf_step.get_algorithm()
                if algorithm is None:
                    # The algorithm is invalid for some reason. Just remove also the view step.
                    position = wf_step.index()
                    for entry in view_steps:
                        if entry.index() == position:
                            view_steps.remove(entry)
                    continue
                wf_step_entity = model.WorkflowStep(algorithm.id)
                wf_step_entity.from_dict(wf_step.data)
                wf_step_entity.fk_workflow = workflow.id
                wf_step_entity.fk_operation = wf_step.get_operation_id()
                dao.store_entity(wf_step_entity)
            except Exception:
                # only log exception and ignore this as it is not very important:
                self.logger.exception("Could not restore WorkflowStep: %s" %
                                      wf_step.get_algorithm().name)

        for view_step in view_steps:
            try:
                algorithm = view_step.get_algorithm()
                if algorithm is None:
                    continue
                view_step_entity = model.WorkflowStepView(algorithm.id)
                view_step_entity.from_dict(view_step.data)
                view_step_entity.fk_workflow = workflow.id
                view_step_entity.fk_portlet = view_step.get_portlet().id
                dao.store_entity(view_step_entity)
            except Exception:
                # only log exception and ignore this as it is not very important:
                self.logger.exception("Could not restore WorkflowViewStep " +
                                      view_step.get_algorithm().name)

    @staticmethod
    def _append_tmp_to_folders_containing_operations(import_path):
        """
        Find folders containing operations and rename them, return the renamed paths
        """
        pths = []
        for root, _, files in os.walk(import_path):
            if FilesHelper.TVB_OPERARATION_FILE in files:
                # Found an operation folder - append TMP to its name
                tmp_op_folder = root + 'tmp'
                os.rename(root, tmp_op_folder)
                operation_file_path = os.path.join(
                    tmp_op_folder, FilesHelper.TVB_OPERARATION_FILE)
                pths.append(operation_file_path)
        return pths

    def _load_operations_from_paths(self, project, op_paths):
        """
        Load operations from paths containing them.
        :returns: Operations ordered by start/creation date to be sure data dependency is resolved correct
        """
        def by_time(op):
            return op.start_date or op.create_date or datetime.now()

        operations = []

        for operation_file_path in op_paths:
            operation = self.__build_operation_from_file(
                project, operation_file_path)
            operation.import_file = operation_file_path
            operations.append(operation)

        operations.sort(key=by_time)
        return operations

    def _load_datatypes_from_operation_folder(self, op_path, operation_entity,
                                              datatype_group):
        """
        Loads datatypes from operation folder
        :returns: Datatypes ordered by creation date (to solve any dependencies)
        """
        all_datatypes = []
        for file_name in os.listdir(op_path):
            if file_name.endswith(FilesHelper.TVB_STORAGE_FILE_EXTENSION):
                h5_file = os.path.join(op_path, file_name)
                try:
                    file_update_manager = FilesUpdateManager()
                    file_update_manager.upgrade_file(h5_file)
                    datatype = self.load_datatype_from_file(
                        op_path, file_name, operation_entity.id,
                        datatype_group)
                    all_datatypes.append(datatype)

                except IncompatibleFileManagerException:
                    os.remove(h5_file)
                    self.logger.warning(
                        "Incompatible H5 file will be ignored: %s" % h5_file)
                    self.logger.exception("Incompatibility details ...")

        all_datatypes.sort(key=lambda dt_date: dt_date.create_date)
        for dt in all_datatypes:
            self.logger.debug("Import order %s: %s" % (dt.type, dt.gid))
        return all_datatypes

    def _store_imported_datatypes_in_db(self, project, all_datatypes,
                                        dt_burst_mappings, burst_ids_mapping):
        def by_time(dt):
            return dt.create_date or datetime.now()

        if burst_ids_mapping is None:
            burst_ids_mapping = {}
        if dt_burst_mappings is None:
            dt_burst_mappings = {}

        all_datatypes.sort(key=by_time)

        for datatype in all_datatypes:
            old_burst_id = dt_burst_mappings.get(datatype.gid)

            if old_burst_id is not None:
                datatype.fk_parent_burst = burst_ids_mapping[old_burst_id]

            datatype_allready_in_tvb = dao.get_datatype_by_gid(datatype.gid)

            if not datatype_allready_in_tvb:
                # Compute disk size. Similar to ABCAdapter._capture_operation_results.
                # No need to close the h5 as we have not written to it.
                associated_file = os.path.join(
                    datatype.storage_path, datatype.get_storage_file_name())
                datatype.disk_size = FilesHelper.compute_size_on_disk(
                    associated_file)

                self.store_datatype(datatype)
            else:
                FlowService.create_link([datatype_allready_in_tvb.id],
                                        project.id)

    def _store_imported_images(self, project):
        """
        Import all images from project
        """
        images_root = self.files_helper.get_images_folder(project.name)
        # for file_name in os.listdir(images_root):
        for root, _, files in os.walk(images_root):
            for file_name in files:
                if file_name.endswith(FilesHelper.TVB_FILE_EXTENSION):
                    self._populate_image(os.path.join(root, file_name),
                                         project.id)

    def import_project_operations(self,
                                  project,
                                  import_path,
                                  dt_burst_mappings=None,
                                  burst_ids_mapping=None):
        """
        This method scans provided folder and identify all operations that needs to be imported
        """
        op_paths = self._append_tmp_to_folders_containing_operations(
            import_path)
        operations = self._load_operations_from_paths(project, op_paths)

        imported_operations = []
        datatypes = []

        # Here we process each operation found
        for operation in operations:
            self.logger.debug("Importing operation " + str(operation))
            old_operation_folder, _ = os.path.split(operation.import_file)
            operation_entity, datatype_group = self.__import_operation(
                operation)

            # Rename operation folder with the ID of the stored operation
            new_operation_path = FilesHelper().get_operation_folder(
                project.name, operation_entity.id)
            if old_operation_folder != new_operation_path:
                # Delete folder of the new operation, otherwise move will fail
                shutil.rmtree(new_operation_path)
                shutil.move(old_operation_folder, new_operation_path)

            operation_datatypes = self._load_datatypes_from_operation_folder(
                new_operation_path, operation_entity, datatype_group)
            imported_operations.append(operation_entity)
            datatypes.extend(operation_datatypes)

        self._store_imported_datatypes_in_db(project, datatypes,
                                             dt_burst_mappings,
                                             burst_ids_mapping)
        return imported_operations

    def _populate_image(self, file_name, project_id):
        """
        Create and store a image entity.
        """
        figure_dict = XMLReader(file_name).read_metadata()
        new_path = os.path.join(
            os.path.split(file_name)[0],
            os.path.split(figure_dict['file_path'])[1])
        if not os.path.exists(new_path):
            self.logger.warn("Expected to find image path %s .Skipping" %
                             new_path)

        op = dao.get_operation_by_gid(figure_dict['fk_from_operation'])
        figure_dict['fk_op_id'] = op.id if op is not None else None
        figure_dict['fk_user_id'] = self.user_id
        figure_dict['fk_project_id'] = project_id
        figure_entity = manager_of_class(model.ResultFigure).new_instance()
        figure_entity = figure_entity.from_dict(figure_dict)
        stored_entity = dao.store_entity(figure_entity)

        # Update image meta-data with the new details after import
        figure = dao.load_figure(stored_entity.id)
        self.logger.debug("Store imported figure")
        self.files_helper.write_image_metadata(figure)

    def load_datatype_from_file(self,
                                storage_folder,
                                file_name,
                                op_id,
                                datatype_group=None,
                                move=True):
        """
        Creates an instance of datatype from storage / H5 file 
        :returns: datatype
        """
        self.logger.debug("Loading datatType from file: %s" % file_name)
        storage_manager = HDF5StorageManager(storage_folder, file_name)
        meta_dictionary = storage_manager.get_metadata()
        meta_structure = DataTypeMetaData(meta_dictionary)

        # Now try to determine class and instantiate it
        class_name = meta_structure[DataTypeMetaData.KEY_CLASS_NAME]
        class_module = meta_structure[DataTypeMetaData.KEY_MODULE]
        datatype = __import__(class_module, globals(), locals(), [class_name])
        datatype = getattr(datatype, class_name)
        type_instance = manager_of_class(datatype).new_instance()

        # Now we fill data into instance
        type_instance.type = str(type_instance.__class__.__name__)
        type_instance.module = str(type_instance.__module__)

        # Fill instance with meta data
        type_instance.load_from_metadata(meta_dictionary)

        #Add all the required attributes
        if datatype_group is not None:
            type_instance.fk_datatype_group = datatype_group.id
        type_instance.set_operation_id(op_id)

        # Now move storage file into correct folder if necessary
        current_file = os.path.join(storage_folder, file_name)
        new_file = type_instance.get_storage_file_path()
        if new_file != current_file and move:
            shutil.move(current_file, new_file)

        return type_instance

    def store_datatype(self, datatype):
        """This method stores data type into DB"""
        try:
            self.logger.debug("Store datatype: %s with Gid: %s" %
                              (datatype.__class__.__name__, datatype.gid))
            return dao.store_entity(datatype)
        except MissingDataSetException:
            self.logger.error(
                "Datatype %s has missing data and could not be imported properly."
                % (datatype, ))
            os.remove(datatype.get_storage_file_path())
        except IntegrityError as excep:
            self.logger.exception(excep)
            error_msg = "Could not import data with gid: %s. There is already a one with " \
                        "the same name or gid." % datatype.gid
            # Delete file if can't be imported
            os.remove(datatype.get_storage_file_path())
            raise ProjectImportException(error_msg)

    def __populate_project(self, project_path):
        """
        Create and store a Project entity.
        """
        self.logger.debug("Creating project from path: %s" % project_path)
        project_dict = self.files_helper.read_project_metadata(project_path)

        project_entity = manager_of_class(model.Project).new_instance()
        project_entity = project_entity.from_dict(project_dict, self.user_id)

        try:
            self.logger.debug("Storing imported project")
            return dao.store_entity(project_entity)
        except IntegrityError as excep:
            self.logger.exception(excep)
            error_msg = (
                "Could not import project: %s with gid: %s. There is already a "
                "project with the same name or gid.") % (project_entity.name,
                                                         project_entity.gid)
            raise ProjectImportException(error_msg)

    def __build_operation_from_file(self, project, operation_file):
        """
        Create Operation entity from metadata file.
        """
        operation_dict = XMLReader(operation_file).read_metadata()
        operation_entity = manager_of_class(model.Operation).new_instance()
        return operation_entity.from_dict(operation_dict, dao, self.user_id,
                                          project.gid)

    @staticmethod
    def __import_operation(operation_entity):
        """
        Store a Operation entity.
        """
        operation_entity = dao.store_entity(operation_entity)
        operation_group_id = operation_entity.fk_operation_group
        datatype_group = None

        if operation_group_id is not None:
            try:
                datatype_group = dao.get_datatypegroup_by_op_group_id(
                    operation_group_id)
            except SQLAlchemyError:
                # If no dataType group present for current op. group, create it.
                operation_group = dao.get_operationgroup_by_id(
                    operation_group_id)
                datatype_group = model.DataTypeGroup(
                    operation_group, operation_id=operation_entity.id)
                datatype_group.state = ADAPTERS['Upload']['defaultdatastate']
                datatype_group = dao.store_entity(datatype_group)

        return operation_entity, datatype_group

    def load_burst_entity(self, json_burst, project_id):
        """
        Load BurstConfiguration from JSON (possibly exported from a different machine).
        Nothing gets persisted in DB or on disk.

        :param json_burst: Burst JSON export
        :param project_id: Current project ID (it will be used if the user later starts this simulation)
        :return: BurstConfiguration  filled from JSON
        """

        burst_information = BurstInformation.load_from_dict(json_burst)
        burst_entity = model.BurstConfiguration(project_id)
        burst_entity.from_dict(burst_information.data)
        burst_entity.prepare_after_load()
        burst_entity.reset_tabs()

        workflow_info = burst_information.get_workflows()[0]
        workflow_entity = model.Workflow(project_id, None)
        workflow_entity.from_dict(workflow_info.data)

        view_steps = workflow_info.get_view_steps()
        analyze_steps = workflow_info.get_workflow_steps()

        for view_step in view_steps:
            try:
                algorithm = view_step.get_algorithm()
                portlet = view_step.get_portlet()
                view_step_entity = model.WorkflowStepView(
                    algorithm.id, portlet_id=portlet.id)
                view_step_entity.from_dict(view_step.data)
                view_step_entity.workflow = workflow_entity

                ## For each visualize step, also load all of the analyze steps.
                analyzers = []
                for an_step in analyze_steps:
                    if (an_step.data["tab_index"] != view_step_entity.tab_index
                            or an_step.data["index_in_tab"] !=
                            view_step_entity.index_in_tab):
                        continue
                    algorithm = an_step.get_algorithm()
                    wf_step_entity = model.WorkflowStep(algorithm.id)
                    wf_step_entity.from_dict(an_step.data)
                    wf_step_entity.workflow = workflow_entity
                    analyzers.append(wf_step_entity)

                portlet = PortletConfiguration(portlet.id)
                portlet.set_visualizer(view_step_entity)
                portlet.set_analyzers(analyzers)
                burst_entity.set_portlet(view_step_entity.tab_index,
                                         view_step_entity.index_in_tab,
                                         portlet)

            except Exception:
                # only log exception and ignore this step from loading
                self.logger.exception("Could not restore Workflow Step " +
                                      view_step.get_algorithm().name)

        return burst_entity
Beispiel #8
0
class TestCSVConnectivityImporter(TransactionalTestCase):
    """
    Unit-tests for csv connectivity importer.
    """
    def transactional_setup_method(self):
        self.test_user = TestFactory.create_user()
        self.test_project = TestFactory.create_project(self.test_user)
        self.helper = FilesHelper()

    def transactional_teardown_method(self):
        """
        Clean-up tests data
        """
        FilesHelper().remove_project_structure(self.test_project.name)

    def _import_csv_test_connectivity(self, reference_connectivity_gid,
                                      subject):
        ### First prepare input data:
        data_dir = path.abspath(path.dirname(tvb_data.__file__))

        toronto_dir = path.join(data_dir, 'dti_pipeline_toronto')
        weights = path.join(toronto_dir, 'output_ConnectionCapacityMatrix.csv')
        tracts = path.join(toronto_dir, 'output_ConnectionDistanceMatrix.csv')
        weights_tmp = weights + '.tmp'
        tracts_tmp = tracts + '.tmp'
        self.helper.copy_file(weights, weights_tmp)
        self.helper.copy_file(tracts, tracts_tmp)

        ### Find importer and Launch Operation
        importer = TestFactory.create_adapter(
            'tvb.adapters.uploaders.csv_connectivity_importer',
            'CSVConnectivityImporter')

        form = CSVConnectivityImporterForm()
        form.fill_from_post({
            '_weights': Part(weights_tmp, HeaderMap({}), ''),
            '_tracts': Part(tracts_tmp, HeaderMap({}), ''),
            '_weights_delimiter': 'comma',
            '_tracts_delimiter': 'comma',
            '_Data_Subject': subject,
            '_input_data': reference_connectivity_gid
        })

        form.weights.data = weights_tmp
        form.tracts.data = tracts_tmp
        importer.submit_form(form)

        FlowService().fire_operation(importer, self.test_user,
                                     self.test_project.id, **form.get_dict())

    def test_happy_flow_import(self):
        """
        Test that importing a CFF generates at least one DataType in DB.
        """

        zip_path = path.join(path.dirname(tvb_data.__file__), 'connectivity',
                             'connectivity_96.zip')
        TestFactory.import_zip_connectivity(self.test_user,
                                            self.test_project,
                                            zip_path,
                                            subject=TEST_SUBJECT_A)

        field = FilterChain.datatype + '.subject'
        filters = FilterChain('', [field], [TEST_SUBJECT_A], ['=='])
        reference_connectivity_index = TestFactory.get_entity(
            self.test_project, ConnectivityIndex, filters)

        dt_count_before = TestFactory.get_entity_count(self.test_project,
                                                       ConnectivityIndex())

        self._import_csv_test_connectivity(reference_connectivity_index.gid,
                                           TEST_SUBJECT_B)

        dt_count_after = TestFactory.get_entity_count(self.test_project,
                                                      ConnectivityIndex())
        assert dt_count_before + 1 == dt_count_after

        filters = FilterChain('', [field], [TEST_SUBJECT_B], ['like'])
        imported_connectivity_index = TestFactory.get_entity(
            self.test_project, ConnectivityIndex, filters)

        # check relationship between the imported connectivity and the reference
        assert reference_connectivity_index.number_of_regions == imported_connectivity_index.number_of_regions
        assert not reference_connectivity_index.number_of_connections == imported_connectivity_index.number_of_connections

        reference_connectivity = h5.load_from_index(
            reference_connectivity_index)
        imported_connectivity = h5.load_from_index(imported_connectivity_index)

        assert not (reference_connectivity.weights
                    == imported_connectivity.weights).all()
        assert not (reference_connectivity.tract_lengths
                    == imported_connectivity.tract_lengths).all()

        assert (reference_connectivity.centres == imported_connectivity.centres
                ).all()
        assert (reference_connectivity.orientations ==
                imported_connectivity.orientations).all()
        assert (reference_connectivity.region_labels ==
                imported_connectivity.region_labels).all()

    def test_bad_reference(self):
        zip_path = path.join(path.dirname(tvb_data.__file__), 'connectivity',
                             'connectivity_66.zip')
        TestFactory.import_zip_connectivity(self.test_user, self.test_project,
                                            zip_path)
        field = FilterChain.datatype + '.subject'
        filters = FilterChain('', [field], [TEST_SUBJECT_A], ['!='])
        bad_reference_connectivity = TestFactory.get_entity(
            self.test_project, ConnectivityIndex, filters)

        with pytest.raises(OperationException):
            self._import_csv_test_connectivity(bad_reference_connectivity.gid,
                                               TEST_SUBJECT_A)
Beispiel #9
0
class ImportService(object):
    """
    Service for importing TVB entities into system.
    It supports TVB exported H5 files as input, but it should also handle H5 files
    generated outside of TVB, as long as they respect the same structure.
    """
    def __init__(self):
        self.logger = get_logger(__name__)
        self.user_id = None
        self.files_helper = FilesHelper()
        self.created_projects = []

    def _download_and_unpack_project_zip(self, uploaded, uq_file_name,
                                         temp_folder):

        if isinstance(uploaded, FieldStorage) or isinstance(uploaded, Part):
            if not uploaded.file:
                raise ImportException(
                    "Please select the archive which contains the project structure."
                )
            with open(uq_file_name, 'wb') as file_obj:
                self.files_helper.copy_file(uploaded.file, file_obj)
        else:
            shutil.copy2(uploaded, uq_file_name)

        try:
            self.files_helper.unpack_zip(uq_file_name, temp_folder)
        except FileStructureException as excep:
            self.logger.exception(excep)
            raise ImportException(
                "Bad ZIP archive provided. A TVB exported project is expected!"
            )

    @staticmethod
    def _compute_unpack_path():
        """
        :return: the name of the folder where to expand uploaded zip
        """
        now = datetime.now()
        date_str = "%d-%d-%d_%d-%d-%d_%d" % (now.year, now.month, now.day,
                                             now.hour, now.minute, now.second,
                                             now.microsecond)
        uq_name = "%s-ImportProject" % date_str
        return os.path.join(TvbProfile.current.TVB_TEMP_FOLDER, uq_name)

    @transactional
    def import_project_structure(self, uploaded, user_id):
        """
        Execute import operations:

        1. check if ZIP or folder
        2. find all project nodes
        3. for each project node:
            - create project
            - create all operations and groups
            - import all images
            - create all dataTypes
        """

        self.user_id = user_id
        self.created_projects = []

        # Now compute the name of the folder where to explode uploaded ZIP file
        temp_folder = self._compute_unpack_path()
        uq_file_name = temp_folder + ".zip"

        try:
            self._download_and_unpack_project_zip(uploaded, uq_file_name,
                                                  temp_folder)
            self._import_projects_from_folder(temp_folder)

        except Exception as excep:
            self.logger.exception(
                "Error encountered during import. Deleting projects created during this operation."
            )
            # Remove project folders created so far.
            # Note that using the project service to remove the projects will not work,
            # because we do not have support for nested transaction.
            # Removing from DB is not necessary because in transactional env a simple exception throw
            # will erase everything to be inserted.
            for project in self.created_projects:
                project_path = os.path.join(TvbProfile.current.TVB_STORAGE,
                                            FilesHelper.PROJECTS_FOLDER,
                                            project.name)
                shutil.rmtree(project_path)
            raise ImportException(str(excep))

        finally:
            # Now delete uploaded file
            if os.path.exists(uq_file_name):
                os.remove(uq_file_name)
            # Now delete temporary folder where uploaded ZIP was exploded.
            if os.path.exists(temp_folder):
                shutil.rmtree(temp_folder)

    def _import_projects_from_folder(self, temp_folder):
        """
        Process each project from the uploaded pack, to extract names.
        """
        project_roots = []
        for root, _, files in os.walk(temp_folder):
            if FilesHelper.TVB_PROJECT_FILE in files:
                project_roots.append(root)

        for temp_project_path in project_roots:
            update_manager = ProjectUpdateManager(temp_project_path)
            update_manager.run_all_updates()
            project = self.__populate_project(temp_project_path)
            # Populate the internal list of create projects so far, for cleaning up folders, in case of failure
            self.created_projects.append(project)
            # Ensure project final folder exists on disk
            project_path = self.files_helper.get_project_folder(project)
            shutil.move(
                os.path.join(temp_project_path, FilesHelper.TVB_PROJECT_FILE),
                project_path)
            # Now import project operations with their results
            self.import_project_operations(project, temp_project_path)
            # Import images and move them from temp into target
            self._store_imported_images(project, temp_project_path,
                                        project.name)

    def _load_datatypes_from_operation_folder(self, src_op_path,
                                              operation_entity,
                                              datatype_group):
        """
        Loads datatypes from operation folder
        :returns: Datatype entities as dict {original_path: Dt instance}
        """
        all_datatypes = {}
        for file_name in os.listdir(src_op_path):
            if file_name.endswith(FilesHelper.TVB_STORAGE_FILE_EXTENSION):
                h5_file = os.path.join(src_op_path, file_name)
                try:
                    file_update_manager = FilesUpdateManager()
                    file_update_manager.upgrade_file(h5_file)
                    datatype = self.load_datatype_from_file(
                        h5_file, operation_entity.id, datatype_group,
                        operation_entity.fk_launched_in)
                    all_datatypes[h5_file] = datatype

                except IncompatibleFileManagerException:
                    os.remove(h5_file)
                    self.logger.warning(
                        "Incompatible H5 file will be ignored: %s" % h5_file)
                    self.logger.exception("Incompatibility details ...")
        return all_datatypes

    def _store_imported_datatypes_in_db(self, project, all_datatypes):
        # type: (Project, dict) -> int
        sorted_dts = sorted(
            all_datatypes.items(),
            key=lambda dt_item: dt_item[1].create_date or datetime.now())

        count = 0
        for dt_path, datatype in sorted_dts:
            datatype_already_in_tvb = dao.get_datatype_by_gid(datatype.gid)
            if not datatype_already_in_tvb:
                self.store_datatype(datatype, dt_path)
                count += 1
            else:
                AlgorithmService.create_link([datatype_already_in_tvb.id],
                                             project.id)

            file_path = h5.h5_file_for_index(datatype).path
            h5_class = H5File.h5_class_from_file(file_path)
            reference_list = h5_class(file_path).gather_references()

            for _, reference_gid in reference_list:
                if not reference_gid:
                    continue

                ref_index = dao.get_datatype_by_gid(reference_gid.hex)
                if ref_index is None:
                    os.remove(file_path)
                    dao.remove_entity(datatype.__class__, datatype.id)
                    raise MissingReferenceException(
                        'Imported file depends on datatypes that do not exist. Please upload '
                        'those first!')

        return count

    def _store_imported_images(self, project, temp_project_path, project_name):
        """
        Import all images from project
        """
        images_root = os.path.join(temp_project_path,
                                   FilesHelper.IMAGES_FOLDER)
        target_images_path = self.files_helper.get_images_folder(project_name)
        for root, _, files in os.walk(images_root):
            for metadata_file in files:
                if metadata_file.endswith(FilesHelper.TVB_FILE_EXTENSION):
                    self._import_image(root, metadata_file, project.id,
                                       target_images_path)

    def _retrieve_operations_in_order(self, project, import_path):
        # type: (Project, str) -> list[Operation2ImportData]
        retrieved_operations = []

        for root, _, files in os.walk(import_path):
            if OPERATION_XML in files:
                # Previous Operation format for uploading previous versions of projects
                operation_file_path = os.path.join(root, OPERATION_XML)
                operation, operation_xml_parameters = self.__build_operation_from_file(
                    project, operation_file_path)
                operation.import_file = operation_file_path
                self.logger.debug("Found operation in old XML format: " +
                                  str(operation))
                retrieved_operations.append(
                    Operation2ImportData(
                        operation,
                        root,
                        info_from_xml=operation_xml_parameters))

            else:
                # We strive for the new format with ViewModelH5
                main_view_model = None
                dt_paths = []
                all_view_model_files = []
                for file in files:
                    if file.endswith(FilesHelper.TVB_STORAGE_FILE_EXTENSION):
                        h5_file = os.path.join(root, file)
                        try:
                            h5_class = H5File.h5_class_from_file(h5_file)
                            if h5_class is ViewModelH5:
                                all_view_model_files.append(h5_file)
                                if not main_view_model:
                                    view_model = h5.load_view_model_from_file(
                                        h5_file)
                                    if type(view_model
                                            ) in VIEW_MODEL2ADAPTER.keys():
                                        main_view_model = view_model
                            else:
                                file_update_manager = FilesUpdateManager()
                                file_update_manager.upgrade_file(h5_file)
                                dt_paths.append(h5_file)
                        except Exception:
                            self.logger.warning(
                                "Unreadable H5 file will be ignored: %s" %
                                h5_file)

                if main_view_model is not None:
                    alg = VIEW_MODEL2ADAPTER[type(main_view_model)]
                    operation = Operation(main_view_model.gid.hex,
                                          project.fk_admin,
                                          project.id,
                                          alg.id,
                                          status=STATUS_FINISHED,
                                          user_group=main_view_model.
                                          generic_attributes.operation_tag,
                                          start_date=datetime.now(),
                                          completion_date=datetime.now())
                    operation.create_date = main_view_model.create_date
                    self.logger.debug(
                        "Found main ViewModel to create operation for it: " +
                        str(operation))

                    retrieved_operations.append(
                        Operation2ImportData(operation, root, main_view_model,
                                             dt_paths, all_view_model_files))

                elif len(dt_paths) > 0:
                    alg = dao.get_algorithm_by_module(TVB_IMPORTER_MODULE,
                                                      TVB_IMPORTER_CLASS)
                    default_adapter = ABCAdapter.build_adapter(alg)
                    view_model = default_adapter.get_view_model_class()()
                    view_model.data_file = dt_paths[0]
                    vm_path = h5.store_view_model(view_model, root)
                    all_view_model_files.append(vm_path)
                    operation = Operation(view_model.gid.hex,
                                          project.fk_admin,
                                          project.id,
                                          alg.id,
                                          status=STATUS_FINISHED,
                                          start_date=datetime.now(),
                                          completion_date=datetime.now())
                    self.logger.debug(
                        "Found no ViewModel in folder, so we default to " +
                        str(operation))

                    retrieved_operations.append(
                        Operation2ImportData(operation, root, view_model,
                                             dt_paths, all_view_model_files,
                                             True))

        return sorted(retrieved_operations,
                      key=lambda op_data: op_data.order_field)

    def import_project_operations(self, project, import_path):
        """
        This method scans provided folder and identify all operations that needs to be imported
        """
        imported_operations = []
        ordered_operations = self._retrieve_operations_in_order(
            project, import_path)
        success_no = 0

        for operation_data in ordered_operations:

            if operation_data.is_old_form:
                operation_entity, datatype_group = self.__import_operation(
                    operation_data.operation)
                new_op_folder = self.files_helper.get_project_folder(
                    project, str(operation_entity.id))

                try:
                    operation_datatypes = self._load_datatypes_from_operation_folder(
                        operation_data.operation_folder, operation_entity,
                        datatype_group)
                    # Create and store view_model from operation
                    view_model = self._get_new_form_view_model(
                        operation_entity, operation_data.info_from_xml)
                    h5.store_view_model(view_model, new_op_folder)
                    operation_entity.view_model_gid = view_model.gid.hex
                    dao.store_entity(operation_entity)

                    self._store_imported_datatypes_in_db(
                        project, operation_datatypes)
                    imported_operations.append(operation_entity)
                    success_no = success_no + 1
                except MissingReferenceException:
                    operation_entity.status = STATUS_ERROR
                    dao.store_entity(operation_entity)

            elif operation_data.main_view_model is not None:
                operation_entity = dao.store_entity(operation_data.operation)
                dt_group = None  # TODO
                # Store the DataTypes in db
                dts = {}
                for dt_path in operation_data.dt_paths:
                    dt = self.load_datatype_from_file(dt_path,
                                                      operation_entity.id,
                                                      dt_group, project.id)
                    if isinstance(dt, BurstConfiguration):
                        dao.store_entity(dt)
                    else:
                        dts[dt_path] = dt
                try:
                    stored_dts_count = self._store_imported_datatypes_in_db(
                        project, dts)

                    if stored_dts_count > 0 or not operation_data.is_self_generated:
                        imported_operations.append(operation_entity)
                        new_op_folder = self.files_helper.get_project_folder(
                            project, str(operation_entity.id))
                        for h5_file in operation_data.all_view_model_files:
                            shutil.move(h5_file, new_op_folder)
                    else:
                        # In case all Dts under the current operation were Links and the ViewModel is dummy,
                        # don't keep the Operation empty in DB
                        dao.remove_entity(Operation, operation_entity.id)
                except MissingReferenceException:
                    operation_entity.status = STATUS_ERROR
                    dao.store_entity(operation_entity)

            else:
                self.logger.warning(
                    "Folder %s will be ignored, as we could not find a serialized "
                    "operation or DTs inside!" %
                    operation_data.operation_folder)

        self.logger.warning(
            "Project has been only partially imported because of some missing dependent datatypes. "
            + "%d files were successfully imported from a total of %d!" %
            (success_no, len(ordered_operations)))
        return imported_operations

    @staticmethod
    def _get_new_form_view_model(operation, xml_parameters):
        # type (Operation) -> ViewModel
        ad = ABCAdapter.build_adapter(operation.algorithm)
        view_model = ad.get_view_model_class()()

        if xml_parameters:
            params = json.loads(xml_parameters)
            declarative_attrs = type(view_model).declarative_attrs

            for param in params:
                new_param_name = param
                if param[0] == "_":
                    new_param_name = param[1:]
                new_param_name = new_param_name.lower()
                if new_param_name in declarative_attrs:
                    setattr(view_model, new_param_name, params[param])
        return view_model

    def _import_image(self, src_folder, metadata_file, project_id,
                      target_images_path):
        """
        Create and store a image entity.
        """
        figure_dict = XMLReader(os.path.join(src_folder,
                                             metadata_file)).read_metadata()
        actual_figure = os.path.join(
            src_folder,
            os.path.split(figure_dict['file_path'])[1])
        if not os.path.exists(actual_figure):
            self.logger.warning("Expected to find image path %s .Skipping" %
                                actual_figure)
            return
        figure_dict['fk_user_id'] = self.user_id
        figure_dict['fk_project_id'] = project_id
        figure_entity = manager_of_class(ResultFigure).new_instance()
        figure_entity = figure_entity.from_dict(figure_dict)
        stored_entity = dao.store_entity(figure_entity)

        # Update image meta-data with the new details after import
        figure = dao.load_figure(stored_entity.id)
        shutil.move(actual_figure, target_images_path)
        self.logger.debug("Store imported figure")
        self.files_helper.write_image_metadata(figure)

    def load_datatype_from_file(self,
                                current_file,
                                op_id,
                                datatype_group=None,
                                current_project_id=None):
        # type: (str, int, DataTypeGroup, int) -> HasTraitsIndex
        """
        Creates an instance of datatype from storage / H5 file 
        :returns: DatatypeIndex
        """
        self.logger.debug("Loading DataType from file: %s" % current_file)
        h5_class = H5File.h5_class_from_file(current_file)

        if h5_class is BurstConfigurationH5:
            if current_project_id is None:
                op_entity = dao.get_operationgroup_by_id(op_id)
                current_project_id = op_entity.fk_launched_in
            h5_file = BurstConfigurationH5(current_file)
            burst = BurstConfiguration(current_project_id)
            burst.fk_simulation = op_id
            # burst.fk_operation_group = TODO
            # burst.fk_metric_operation_group = TODO
            h5_file.load_into(burst)
            result = burst
        else:
            datatype, generic_attributes = h5.load_with_links(current_file)
            index_class = h5.REGISTRY.get_index_for_datatype(
                datatype.__class__)
            datatype_index = index_class()
            datatype_index.fill_from_has_traits(datatype)
            datatype_index.fill_from_generic_attributes(generic_attributes)

            # Add all the required attributes
            if datatype_group is not None:
                datatype_index.fk_datatype_group = datatype_group.id
            datatype_index.fk_from_operation = op_id

            associated_file = h5.path_for_stored_index(datatype_index)
            if os.path.exists(associated_file):
                datatype_index.disk_size = FilesHelper.compute_size_on_disk(
                    associated_file)
            result = datatype_index

        return result

    def store_datatype(self, datatype, current_file=None):
        """This method stores data type into DB"""
        try:
            self.logger.debug("Store datatype: %s with Gid: %s" %
                              (datatype.__class__.__name__, datatype.gid))
            # Now move storage file into correct folder if necessary
            if current_file is not None:
                final_path = h5.path_for_stored_index(datatype)
                if final_path != current_file:
                    shutil.move(current_file, final_path)

            return dao.store_entity(datatype)
        except MissingDataSetException as e:
            self.logger.exception(e)
            error_msg = "Datatype %s has missing data and could not be imported properly." % (
                datatype, )
            raise ImportException(error_msg)
        except IntegrityError as excep:
            self.logger.exception(excep)
            error_msg = "Could not import data with gid: %s. There is already a one with " \
                        "the same name or gid." % datatype.gid
            raise ImportException(error_msg)

    def __populate_project(self, project_path):
        """
        Create and store a Project entity.
        """
        self.logger.debug("Creating project from path: %s" % project_path)
        project_dict = self.files_helper.read_project_metadata(project_path)

        project_entity = manager_of_class(Project).new_instance()
        project_entity = project_entity.from_dict(project_dict, self.user_id)

        try:
            self.logger.debug("Storing imported project")
            return dao.store_entity(project_entity)
        except IntegrityError as excep:
            self.logger.exception(excep)
            error_msg = (
                "Could not import project: %s with gid: %s. There is already a "
                "project with the same name or gid.") % (project_entity.name,
                                                         project_entity.gid)
            raise ImportException(error_msg)

    def __build_operation_from_file(self, project, operation_file):
        """
        Create Operation entity from metadata file.
        """
        operation_dict = XMLReader(operation_file).read_metadata()
        operation_entity = manager_of_class(Operation).new_instance()
        return operation_entity.from_dict(operation_dict, dao, self.user_id,
                                          project.gid)

    @staticmethod
    def __import_operation(operation_entity):
        """
        Store a Operation entity.
        """
        operation_entity = dao.store_entity(operation_entity)
        operation_group_id = operation_entity.fk_operation_group
        datatype_group = None

        if operation_group_id is not None:
            try:
                datatype_group = dao.get_datatypegroup_by_op_group_id(
                    operation_group_id)
            except SQLAlchemyError:
                # If no dataType group present for current op. group, create it.
                operation_group = dao.get_operationgroup_by_id(
                    operation_group_id)
                datatype_group = DataTypeGroup(
                    operation_group, operation_id=operation_entity.id)
                datatype_group.state = UploadAlgorithmCategoryConfig.defaultdatastate
                datatype_group = dao.store_entity(datatype_group)

        return operation_entity, datatype_group

    def import_simulator_configuration_zip(self, zip_file):
        # Now compute the name of the folder where to explode uploaded ZIP file
        temp_folder = self._compute_unpack_path()
        uq_file_name = temp_folder + ".zip"

        if isinstance(zip_file, FieldStorage) or isinstance(zip_file, Part):
            if not zip_file.file:
                raise ServicesBaseException(
                    "Could not process the given ZIP file...")

            with open(uq_file_name, 'wb') as file_obj:
                self.files_helper.copy_file(zip_file.file, file_obj)
        else:
            shutil.copy2(zip_file, uq_file_name)

        try:
            self.files_helper.unpack_zip(uq_file_name, temp_folder)
            return temp_folder
        except FileStructureException as excep:
            raise ServicesBaseException(
                "Could not process the given ZIP file..." + str(excep))


#     # Sort all h5 files based on their creation date stored in the files themselves
#     sorted_h5_files = sorted(h5_files, key=lambda h5_path: _get_create_date_for_sorting(h5_path) or datetime.now())
#     return sorted_h5_files
#
#
# def _get_create_date_for_sorting(h5_file):
#     storage_manager = HDF5StorageManager(os.path.dirname(h5_file), os.path.basename(h5_file))
#     root_metadata = storage_manager.get_metadata()
#     create_date_str = str(root_metadata['Create_date'], 'utf-8')
#     create_date = datetime.strptime(create_date_str.replace('datetime:', ''), '%Y-%m-%d %H:%M:%S.%f')
#     return create_date
Beispiel #10
0
class ImportService(object):
    """
    Service for importing TVB entities into system.
    It supports TVB exported H5 files as input, but it should also handle H5 files 
    generated outside of TVB, as long as they respect the same structure.
    """
    def __init__(self):
        self.logger = get_logger(__name__)
        self.user_id = None
        self.files_helper = FilesHelper()
        self.created_projects = []

    def _download_and_unpack_project_zip(self, uploaded, uq_file_name,
                                         temp_folder):

        if isinstance(uploaded, FieldStorage) or isinstance(uploaded, Part):
            if not uploaded.file:
                raise ProjectImportException(
                    "Please select the archive which contains the project structure."
                )
            with open(uq_file_name, 'wb') as file_obj:
                self.files_helper.copy_file(uploaded.file, file_obj)
        else:
            shutil.copy2(uploaded, uq_file_name)

        try:
            self.files_helper.unpack_zip(uq_file_name, temp_folder)
        except FileStructureException as excep:
            self.logger.exception(excep)
            raise ProjectImportException(
                "Bad ZIP archive provided. A TVB exported project is expected!"
            )

    @staticmethod
    def _compute_unpack_path():
        """
        :return: the name of the folder where to expand uploaded zip
        """
        now = datetime.now()
        date_str = "%d-%d-%d_%d-%d-%d_%d" % (now.year, now.month, now.day,
                                             now.hour, now.minute, now.second,
                                             now.microsecond)
        uq_name = "%s-ImportProject" % date_str
        return os.path.join(TvbProfile.current.TVB_TEMP_FOLDER, uq_name)

    @transactional
    def import_project_structure(self, uploaded, user_id):
        """
        Execute import operations:
         
        1. check if ZIP or folder 
        2. find all project nodes
        3. for each project node:
            - create project
            - create all operations
            - import all images
            - create all dataTypes
        """

        self.user_id = user_id
        self.created_projects = []

        # Now compute the name of the folder where to explode uploaded ZIP file
        temp_folder = self._compute_unpack_path()
        uq_file_name = temp_folder + ".zip"

        try:
            self._download_and_unpack_project_zip(uploaded, uq_file_name,
                                                  temp_folder)
            self._import_projects_from_folder(temp_folder)

        except Exception as excep:
            self.logger.exception(
                "Error encountered during import. Deleting projects created during this operation."
            )
            # Remove project folders created so far.
            # Note that using the project service to remove the projects will not work,
            # because we do not have support for nested transaction.
            # Removing from DB is not necessary because in transactional env a simple exception throw
            # will erase everything to be inserted.
            for project in self.created_projects:
                project_path = os.path.join(TvbProfile.current.TVB_STORAGE,
                                            FilesHelper.PROJECTS_FOLDER,
                                            project.name)
                shutil.rmtree(project_path)
            raise ProjectImportException(str(excep))

        finally:
            # Now delete uploaded file
            if os.path.exists(uq_file_name):
                os.remove(uq_file_name)
            # Now delete temporary folder where uploaded ZIP was exploded.
            if os.path.exists(temp_folder):
                shutil.rmtree(temp_folder)

    @staticmethod
    def _load_burst_info_from_json(project_path):
        bursts_dict = {}
        dt_mappings_dict = {}
        bursts_file = os.path.join(project_path, BURST_INFO_FILE)
        if os.path.isfile(bursts_file):
            with open(bursts_file) as f:
                bursts_info_dict = json.load(f)
            bursts_dict = bursts_info_dict[BURSTS_DICT_KEY]
            dt_mappings_dict = bursts_info_dict[DT_BURST_MAP]
        os.remove(bursts_file)
        return bursts_dict, dt_mappings_dict

    @staticmethod
    def _import_bursts(project_entity, bursts_dict):
        """
        Re-create old bursts, but keep a mapping between the id it has here and the old-id it had
        in the project where they were exported, so we can re-add the datatypes to them.
        """
        burst_ids_mapping = {}

        # for old_burst_id in bursts_dict:
        # burst_information = BurstInformation.load_from_dict(bursts_dict[old_burst_id])
        # burst_entity = BurstConfiguration(project_entity.id)
        # burst_entity.from_dict(burst_information.data)
        # burst_entity = dao.store_entity(burst_entity)
        # burst_ids_mapping[int(old_burst_id)] = burst_entity.id
        # We don't need the data in dictionary form anymore, so update it with new BurstInformation object
        # bursts_dict[old_burst_id] = burst_information
        return burst_ids_mapping

    def _import_projects_from_folder(self, temp_folder):
        """
        Process each project from the uploaded pack, to extract names.
        """
        project_roots = []
        for root, _, files in os.walk(temp_folder):
            if FilesHelper.TVB_PROJECT_FILE in files:
                project_roots.append(root)

        for project_path in project_roots:
            update_manager = ProjectUpdateManager(project_path)
            update_manager.run_all_updates()
            project_entity = self.__populate_project(project_path)

            # Compute the path where to store files of the imported project
            new_project_path = os.path.join(TvbProfile.current.TVB_STORAGE,
                                            FilesHelper.PROJECTS_FOLDER,
                                            project_entity.name)
            if project_path != new_project_path:
                shutil.move(project_path, new_project_path)

            self.created_projects.append(project_entity)

            # Keep a list with all burst that were imported since we will want to also add the workflow
            # steps after we are finished with importing the operations and datatypes. We need to first
            # stored bursts since we need to know which new id's they have for operations parent_burst.
            # bursts_dict, dt_mappings_dict = self._load_burst_info_from_json(new_project_path)
            # burst_ids_mapping = self._import_bursts(project_entity, bursts_dict)

            # Now import project operations
            self.import_project_operations(project_entity, new_project_path)
            # Import images
            self._store_imported_images(project_entity)

    @staticmethod
    def _append_tmp_to_folders_containing_operations(import_path):
        """
        Find folders containing operations and rename them, return the renamed paths
        """
        pths = []
        for root, _, files in os.walk(import_path):
            if FilesHelper.TVB_OPERARATION_FILE in files:
                # Found an operation folder - append TMP to its name
                tmp_op_folder = root + 'tmp'
                os.rename(root, tmp_op_folder)
                operation_file_path = os.path.join(
                    tmp_op_folder, FilesHelper.TVB_OPERARATION_FILE)
                pths.append(operation_file_path)
        return pths

    def _load_operations_from_paths(self, project, op_paths):
        """
        Load operations from paths containing them.
        :returns: Operations ordered by start/creation date to be sure data dependency is resolved correct
        """
        def by_time(op):
            return op.start_date or op.create_date or datetime.now()

        operations = []

        for operation_file_path in op_paths:
            operation = self.__build_operation_from_file(
                project, operation_file_path)
            operation.import_file = operation_file_path
            operations.append(operation)

        operations.sort(key=by_time)
        return operations

    def _load_datatypes_from_operation_folder(self, op_path, operation_entity,
                                              datatype_group):
        """
        Loads datatypes from operation folder
        :returns: Datatypes ordered by creation date (to solve any dependencies)
        """
        all_datatypes = []
        for file_name in os.listdir(op_path):
            if file_name.endswith(FilesHelper.TVB_STORAGE_FILE_EXTENSION):
                h5_file = os.path.join(op_path, file_name)
                try:
                    file_update_manager = FilesUpdateManager()
                    file_update_manager.upgrade_file(h5_file)
                    datatype = self.load_datatype_from_file(
                        op_path, file_name, operation_entity.id,
                        datatype_group)
                    all_datatypes.append(datatype)

                except IncompatibleFileManagerException:
                    os.remove(h5_file)
                    self.logger.warning(
                        "Incompatible H5 file will be ignored: %s" % h5_file)
                    self.logger.exception("Incompatibility details ...")

        all_datatypes.sort(key=lambda dt_date: dt_date.create_date)
        for dt in all_datatypes:
            self.logger.debug("Import order %s: %s" % (dt.type, dt.gid))
        return all_datatypes

    def _store_imported_datatypes_in_db(self, project, all_datatypes,
                                        dt_burst_mappings, burst_ids_mapping):
        def by_time(dt):
            return dt.create_date or datetime.now()

        if burst_ids_mapping is None:
            burst_ids_mapping = {}
        if dt_burst_mappings is None:
            dt_burst_mappings = {}

        all_datatypes.sort(key=by_time)

        for datatype in all_datatypes:
            old_burst_id = dt_burst_mappings.get(datatype.gid)

            if old_burst_id is not None:
                datatype.fk_parent_burst = burst_ids_mapping[old_burst_id]

            datatype_allready_in_tvb = dao.get_datatype_by_gid(datatype.gid)

            if not datatype_allready_in_tvb:
                self.store_datatype(datatype)
            else:
                AlgorithmService.create_link([datatype_allready_in_tvb.id],
                                             project.id)

    def _store_imported_images(self, project):
        """
        Import all images from project
        """
        images_root = self.files_helper.get_images_folder(project.name)
        # for file_name in os.listdir(images_root):
        for root, _, files in os.walk(images_root):
            for file_name in files:
                if file_name.endswith(FilesHelper.TVB_FILE_EXTENSION):
                    self._populate_image(os.path.join(root, file_name),
                                         project.id)

    def import_project_operations(self,
                                  project,
                                  import_path,
                                  dt_burst_mappings=None,
                                  burst_ids_mapping=None):
        """
        This method scans provided folder and identify all operations that needs to be imported
        """
        op_paths = self._append_tmp_to_folders_containing_operations(
            import_path)
        operations = self._load_operations_from_paths(project, op_paths)

        imported_operations = []

        # Here we process each operation found
        for operation in operations:
            self.logger.debug("Importing operation " + str(operation))
            old_operation_folder, _ = os.path.split(operation.import_file)
            operation_entity, datatype_group = self.__import_operation(
                operation)

            # Rename operation folder with the ID of the stored operation
            new_operation_path = FilesHelper().get_operation_folder(
                project.name, operation_entity.id)
            if old_operation_folder != new_operation_path:
                # Delete folder of the new operation, otherwise move will fail
                shutil.rmtree(new_operation_path)
                shutil.move(old_operation_folder, new_operation_path)

            operation_datatypes = self._load_datatypes_from_operation_folder(
                new_operation_path, operation_entity, datatype_group)
            self._store_imported_datatypes_in_db(project, operation_datatypes,
                                                 dt_burst_mappings,
                                                 burst_ids_mapping)
            imported_operations.append(operation_entity)

        return imported_operations

    def _populate_image(self, file_name, project_id):
        """
        Create and store a image entity.
        """
        figure_dict = XMLReader(file_name).read_metadata()
        new_path = os.path.join(
            os.path.split(file_name)[0],
            os.path.split(figure_dict['file_path'])[1])
        if not os.path.exists(new_path):
            self.logger.warning("Expected to find image path %s .Skipping" %
                                new_path)

        op = dao.get_operation_by_gid(figure_dict['fk_from_operation'])
        figure_dict['fk_op_id'] = op.id if op is not None else None
        figure_dict['fk_user_id'] = self.user_id
        figure_dict['fk_project_id'] = project_id
        figure_entity = manager_of_class(ResultFigure).new_instance()
        figure_entity = figure_entity.from_dict(figure_dict)
        stored_entity = dao.store_entity(figure_entity)

        # Update image meta-data with the new details after import
        figure = dao.load_figure(stored_entity.id)
        self.logger.debug("Store imported figure")
        self.files_helper.write_image_metadata(figure)

    def load_datatype_from_file(self,
                                storage_folder,
                                file_name,
                                op_id,
                                datatype_group=None,
                                move=True,
                                final_storage=None):
        """
        Creates an instance of datatype from storage / H5 file 
        :returns: DatatypeIndex
        """
        self.logger.debug("Loading DataType from file: %s" % file_name)
        datatype, generic_attributes = h5.load_with_references(
            os.path.join(storage_folder, file_name))
        index_class = h5.REGISTRY.get_index_for_datatype(datatype.__class__)
        datatype_index = index_class()
        datatype_index.fill_from_has_traits(datatype)
        datatype_index.fill_from_generic_attributes(generic_attributes)

        # Add all the required attributes
        if datatype_group is not None:
            datatype_index.fk_datatype_group = datatype_group.id
        datatype_index.fk_from_operation = op_id

        associated_file = h5.path_for_stored_index(datatype_index)
        if os.path.exists(associated_file):
            datatype_index.disk_size = FilesHelper.compute_size_on_disk(
                associated_file)

        # Now move storage file into correct folder if necessary
        if move and final_storage is not None:
            current_file = os.path.join(storage_folder, file_name)
            h5_type = h5.REGISTRY.get_h5file_for_datatype(datatype.__class__)
            final_path = h5.path_for(final_storage, h5_type, datatype.gid)
            if final_path != current_file and move:
                shutil.move(current_file, final_path)

        return datatype_index

    def store_datatype(self, datatype):
        """This method stores data type into DB"""
        try:
            self.logger.debug("Store datatype: %s with Gid: %s" %
                              (datatype.__class__.__name__, datatype.gid))
            return dao.store_entity(datatype)
        except MissingDataSetException:
            self.logger.error(
                "Datatype %s has missing data and could not be imported properly."
                % (datatype, ))
            os.remove(datatype.get_storage_file_path())
        except IntegrityError as excep:
            self.logger.exception(excep)
            error_msg = "Could not import data with gid: %s. There is already a one with " \
                        "the same name or gid." % datatype.gid
            # Delete file if can't be imported
            os.remove(datatype.get_storage_file_path())
            raise ProjectImportException(error_msg)

    def __populate_project(self, project_path):
        """
        Create and store a Project entity.
        """
        self.logger.debug("Creating project from path: %s" % project_path)
        project_dict = self.files_helper.read_project_metadata(project_path)

        project_entity = manager_of_class(Project).new_instance()
        project_entity = project_entity.from_dict(project_dict, self.user_id)

        try:
            self.logger.debug("Storing imported project")
            return dao.store_entity(project_entity)
        except IntegrityError as excep:
            self.logger.exception(excep)
            error_msg = (
                "Could not import project: %s with gid: %s. There is already a "
                "project with the same name or gid.") % (project_entity.name,
                                                         project_entity.gid)
            raise ProjectImportException(error_msg)

    def __build_operation_from_file(self, project, operation_file):
        """
        Create Operation entity from metadata file.
        """
        operation_dict = XMLReader(operation_file).read_metadata()
        operation_entity = manager_of_class(Operation).new_instance()
        return operation_entity.from_dict(operation_dict, dao, self.user_id,
                                          project.gid)

    @staticmethod
    def __import_operation(operation_entity):
        """
        Store a Operation entity.
        """
        operation_entity = dao.store_entity(operation_entity)
        operation_group_id = operation_entity.fk_operation_group
        datatype_group = None

        if operation_group_id is not None:
            try:
                datatype_group = dao.get_datatypegroup_by_op_group_id(
                    operation_group_id)
            except SQLAlchemyError:
                # If no dataType group present for current op. group, create it.
                operation_group = dao.get_operationgroup_by_id(
                    operation_group_id)
                datatype_group = DataTypeGroup(
                    operation_group, operation_id=operation_entity.id)
                datatype_group.state = UploadAlgorithmCategoryConfig.defaultdatastate
                datatype_group = dao.store_entity(datatype_group)

        return operation_entity, datatype_group

    def import_simulator_configuration_zip(self, zip_file):
        # Now compute the name of the folder where to explode uploaded ZIP file
        temp_folder = self._compute_unpack_path()
        uq_file_name = temp_folder + ".zip"

        if isinstance(zip_file, FieldStorage) or isinstance(zip_file, Part):
            if not zip_file.file:
                raise ServicesBaseException(
                    "Could not process the given ZIP file...")

            with open(uq_file_name, 'wb') as file_obj:
                self.files_helper.copy_file(zip_file.file, file_obj)
        else:
            shutil.copy2(zip_file, uq_file_name)

        try:
            self.files_helper.unpack_zip(uq_file_name, temp_folder)
            return temp_folder
        except FileStructureException as excep:
            raise ServicesBaseException(
                "Could not process the given ZIP file..." + str(excep))
Beispiel #11
0
class ExportManager(object):
    """
    This class provides basic methods for exporting data types of projects in different formats.
    """
    all_exporters = {}  # Dictionary containing all available exporters
    export_folder = None
    EXPORT_FOLDER_NAME = "EXPORT_TMP"
    EXPORTED_SIMULATION_NAME = "exported_simulation"
    EXPORTED_SIMULATION_DTS_DIR = "datatypes"
    ZIP_FILE_EXTENSION = "zip"
    logger = get_logger(__name__)

    def __init__(self):
        # Here we register all available data type exporters
        # If new exporters supported, they should be added here
        self._register_exporter(TVBExporter())
        self._register_exporter(TVBLinkedExporter())
        self.export_folder = os.path.join(TvbProfile.current.TVB_STORAGE,
                                          self.EXPORT_FOLDER_NAME)
        self.files_helper = FilesHelper()

    def _register_exporter(self, exporter):
        """
        This method register into an internal format available exporters.
        :param exporter: Instance of a data type exporter (extends ABCExporter)
        """
        if exporter is not None:
            self.all_exporters[exporter.__class__.__name__] = exporter

    def get_exporters_for_data(self, data):
        """
        Get available exporters for current data type.
        :returns: a dictionary with the {exporter_id : label}
        """
        if data is None:
            raise InvalidExportDataException(
                "Could not detect exporters for null data")

        self.logger.debug("Trying to determine exporters valid for %s" %
                          data.type)
        results = {}

        # No exporter for None data
        if data is None:
            return results

        for exporterId in self.all_exporters.keys():
            exporter = self.all_exporters[exporterId]
            if exporter.accepts(data):
                results[exporterId] = exporter.get_label()

        return results

    def export_data(self, data, exporter_id, project):
        """
        Export provided data using given exporter
        :param data: data type to be exported
        :param exporter_id: identifier of the exporter to be used
        :param project: project that contains data to be exported

        :returns: a tuple with the following elements
            1. name of the file to be shown to user
            2. full path of the export file (available for download)
            3. boolean which specify if file can be deleted after download
        """
        if data is None:
            raise InvalidExportDataException(
                "Could not export null data. Please select data to be exported"
            )

        if exporter_id is None:
            raise ExportException(
                "Please select the exporter to be used for this operation")

        if exporter_id not in self.all_exporters:
            raise ExportException(
                "Provided exporter identifier is not a valid one")

        exporter = self.all_exporters[exporter_id]

        if project is None:
            raise ExportException(
                "Please provide the project where data files are stored")

        # Now we start the real export
        if not exporter.accepts(data):
            raise InvalidExportDataException(
                "Current data can not be exported by specified exporter")

        # Now compute and create folder where to store exported data
        # This will imply to generate a folder which is unique for each export
        data_export_folder = None
        try:
            data_export_folder = self._build_data_export_folder(data)
            self.logger.debug("Start export of data: %s" % data.type)
            export_data = exporter.export(data, data_export_folder, project)
        finally:
            # In case export did not generated any file delete folder
            if data_export_folder is not None and len(
                    os.listdir(data_export_folder)) == 0:
                os.rmdir(data_export_folder)

        return export_data

    def _export_linked_datatypes(self, project, zip_file):
        linked_paths = ProjectService().get_linked_datatypes_storage_path(
            project)

        if not linked_paths:
            # do not export an empty operation
            return

        # Make a import operation which will contain links to other projects
        algo = dao.get_algorithm_by_module(TVB_IMPORTER_MODULE,
                                           TVB_IMPORTER_CLASS)
        op = model_operation.Operation(None, None, project.id, algo.id)
        op.project = project
        op.algorithm = algo
        op.id = 'links-to-external-projects'
        op.start_now()
        op.mark_complete(model_operation.STATUS_FINISHED)

        op_folder = self.files_helper.get_operation_folder(
            op.project.name, op.id)
        op_folder_name = os.path.basename(op_folder)

        # add linked datatypes to archive in the import operation
        for pth in linked_paths:
            zip_pth = op_folder_name + '/' + os.path.basename(pth)
            zip_file.write(pth, zip_pth)

        # remove these files, since we only want them in export archive
        self.files_helper.remove_folder(op_folder)

    def export_project(self, project, optimize_size=False):
        """
        Given a project root and the TVB storage_path, create a ZIP
        ready for export.
        :param project: project object which identifies project to be exported
        """
        if project is None:
            raise ExportException("Please provide project to be exported")

        project_folder = self.files_helper.get_project_folder(project)
        project_datatypes = dao.get_datatypes_in_project(
            project.id, only_visible=optimize_size)
        to_be_exported_folders = []
        considered_op_ids = []
        folders_to_exclude = self._get_op_with_errors(project.id)

        if optimize_size:
            # take only the DataType with visibility flag set ON
            for dt in project_datatypes:
                op_id = dt.fk_from_operation
                if op_id not in considered_op_ids:
                    to_be_exported_folders.append({
                        'folder':
                        self.files_helper.get_project_folder(
                            project, str(op_id)),
                        'archive_path_prefix':
                        str(op_id) + os.sep,
                        'exclude':
                        folders_to_exclude
                    })
                    considered_op_ids.append(op_id)

        else:
            folders_to_exclude.append("TEMP")
            to_be_exported_folders.append({
                'folder': project_folder,
                'archive_path_prefix': '',
                'exclude': folders_to_exclude
            })

        # Compute path and name of the zip file
        now = datetime.now()
        date_str = now.strftime("%Y-%m-%d_%H-%M")
        zip_file_name = "%s_%s.%s" % (date_str, project.name,
                                      self.ZIP_FILE_EXTENSION)

        export_folder = self._build_data_export_folder(project)
        result_path = os.path.join(export_folder, zip_file_name)

        with TvbZip(result_path, "w") as zip_file:
            # Pack project [filtered] content into a ZIP file:
            self.logger.debug("Done preparing, now we will write folders " +
                              str(len(to_be_exported_folders)))
            self.logger.debug(str(to_be_exported_folders))
            for pack in to_be_exported_folders:
                zip_file.write_folder(**pack)
            self.logger.debug(
                "Done exporting files, now we will export linked DTs")
            self._export_linked_datatypes(project, zip_file)
            # Make sure the Project.xml file gets copied:
            if optimize_size:
                self.logger.debug("Done linked, now we write the project xml")
                zip_file.write(
                    self.files_helper.get_project_meta_file_path(project.name),
                    self.files_helper.TVB_PROJECT_FILE)
            self.logger.debug("Done, closing")

        return result_path

    @staticmethod
    def _get_op_with_errors(project_id):
        """
        Get the operation folders with error base name as list.
        """
        operations = dao.get_operations_with_error_in_project(project_id)
        op_with_errors = []
        for op in operations:
            op_with_errors.append(op.id)
        return op_with_errors

    def _build_data_export_folder(self, data):
        """
        This method computes the folder where results of an export operation will be
        stored for a while (e.g until download is done; or for 1 day)
        """
        now = datetime.now()
        date_str = "%d-%d-%d_%d-%d-%d_%d" % (now.year, now.month, now.day,
                                             now.hour, now.minute, now.second,
                                             now.microsecond)
        tmp_str = date_str + "@" + data.gid
        data_export_folder = os.path.join(self.export_folder, tmp_str)
        self.files_helper.check_created(data_export_folder)

        return data_export_folder

    def export_simulator_configuration(self, burst_id):
        burst = dao.get_burst_by_id(burst_id)
        if burst is None:
            raise InvalidExportDataException("Could not find burst with ID " +
                                             str(burst_id))

        op_folder = self.files_helper.get_project_folder(
            burst.project, str(burst.fk_simulation))
        tmp_export_folder = self._build_data_export_folder(burst)
        tmp_sim_folder = os.path.join(tmp_export_folder,
                                      self.EXPORTED_SIMULATION_NAME)

        if not os.path.exists(tmp_sim_folder):
            os.makedirs(tmp_sim_folder)

        all_view_model_paths, all_datatype_paths = h5.gather_references_of_view_model(
            burst.simulator_gid, op_folder)

        burst_path = h5.determine_filepath(burst.gid, op_folder)
        all_view_model_paths.append(burst_path)

        for vm_path in all_view_model_paths:
            dest = os.path.join(tmp_sim_folder, os.path.basename(vm_path))
            self.files_helper.copy_file(vm_path, dest)

        for dt_path in all_datatype_paths:
            dest = os.path.join(tmp_sim_folder,
                                self.EXPORTED_SIMULATION_DTS_DIR,
                                os.path.basename(dt_path))
            self.files_helper.copy_file(dt_path, dest)

        main_vm_path = h5.determine_filepath(burst.simulator_gid,
                                             tmp_sim_folder)
        H5File.remove_metadata_param(main_vm_path, 'history_gid')

        now = datetime.now()
        date_str = now.strftime("%Y-%m-%d_%H-%M")
        zip_file_name = "%s_%s.%s" % (date_str, str(burst_id),
                                      self.ZIP_FILE_EXTENSION)

        result_path = os.path.join(tmp_export_folder, zip_file_name)
        with TvbZip(result_path, "w") as zip_file:
            zip_file.write_folder(tmp_sim_folder)

        self.files_helper.remove_folder(tmp_sim_folder)
        return result_path
class CSVConnectivityImporterTest(TransactionalTestCase):
    """
    Unit-tests for csv connectivity importer.
    """

    def setUp(self):
        self.test_user = TestFactory.create_user()
        self.test_project = TestFactory.create_project(self.test_user)
        self.helper = FilesHelper()


    def tearDown(self):
        """
        Clean-up tests data
        """
        FilesHelper().remove_project_structure(self.test_project.name)


    def _import_csv_test_connectivity(self, reference_connectivity_gid, subject):

        ### First prepare input data:
        data_dir = path.abspath(path.dirname(tvb_data.__file__))

        torronto_dir = path.join(data_dir, 'dti_pipeline', 'Output_Toronto')
        weights = path.join(torronto_dir, 'output_ConnectionCapacityMatrix.csv')
        tracts = path.join(torronto_dir, 'output_ConnectionDistanceMatrix.csv')
        weights_tmp = weights + '.tmp'
        tracts_tmp = tracts + '.tmp'
        self.helper.copy_file(weights, weights_tmp)
        self.helper.copy_file(tracts, tracts_tmp)

        ### Find importer and Launch Operation
        importer = TestFactory.create_adapter('tvb.adapters.uploaders.csv_connectivity_importer',
                                              'CSVConnectivityImporter')
        FlowService().fire_operation(importer, self.test_user, self.test_project.id,
                                     weights=weights_tmp, tracts=tracts_tmp, Data_Subject=subject,
                                     input_data=reference_connectivity_gid)


    def test_happy_flow_import(self):
        """
        Test that importing a CFF generates at least one DataType in DB.
        """
        ConnectivityZipTest.import_test_connectivity96(self.test_user,
                                                       self.test_project,
                                                       subject=TEST_SUBJECT_A)

        field = FilterChain.datatype + '.subject'
        filters = FilterChain('', [field], [TEST_SUBJECT_A], ['=='])
        reference_connectivity = TestFactory.get_entity(self.test_project, Connectivity(), filters)

        dt_count_before = TestFactory.get_entity_count(self.test_project, Connectivity())

        self._import_csv_test_connectivity(reference_connectivity.gid, TEST_SUBJECT_B)

        dt_count_after = TestFactory.get_entity_count(self.test_project, Connectivity())
        self.assertEqual(dt_count_before + 1, dt_count_after)

        filters = FilterChain('', [field], [TEST_SUBJECT_B], ['like'])
        imported_connectivity = TestFactory.get_entity(self.test_project, Connectivity(), filters)

        # check relationship between the imported connectivity and the reference
        self.assertTrue((reference_connectivity.centres == imported_connectivity.centres).all())
        self.assertTrue((reference_connectivity.orientations == imported_connectivity.orientations).all())

        self.assertEqual(reference_connectivity.number_of_regions, imported_connectivity.number_of_regions)
        self.assertTrue((reference_connectivity.region_labels == imported_connectivity.region_labels).all())

        self.assertFalse((reference_connectivity.weights == imported_connectivity.weights).all())
        self.assertFalse((reference_connectivity.tract_lengths == imported_connectivity.tract_lengths).all())



    def test_bad_reference(self):
        TestFactory.import_cff(test_user=self.test_user, test_project=self.test_project)
        field = FilterChain.datatype + '.subject'
        filters = FilterChain('', [field], [TEST_SUBJECT_A], ['!='])
        bad_reference_connectivity = TestFactory.get_entity(self.test_project, Connectivity(), filters)

        self.assertRaises(OperationException, self._import_csv_test_connectivity,
                          bad_reference_connectivity.gid, TEST_SUBJECT_A)
Beispiel #13
0
class CSVConnectivityImporterTest(TransactionalTestCase):
    """
    Unit-tests for csv connectivity importer.
    """
    def setUp(self):
        self.test_user = TestFactory.create_user()
        self.test_project = TestFactory.create_project(self.test_user)
        self.helper = FilesHelper()

    def tearDown(self):
        """
        Clean-up tests data
        """
        FilesHelper().remove_project_structure(self.test_project.name)

    def _import_csv_test_connectivity(self, reference_connectivity_gid,
                                      subject):

        ### First prepare input data:
        data_dir = path.abspath(path.dirname(tvb_data.__file__))

        torronto_dir = path.join(data_dir, 'dti_pipeline', 'Output_Toronto')
        weights = path.join(torronto_dir,
                            'output_ConnectionCapacityMatrix.csv')
        tracts = path.join(torronto_dir, 'output_ConnectionDistanceMatrix.csv')
        weights_tmp = weights + '.tmp'
        tracts_tmp = tracts + '.tmp'
        self.helper.copy_file(weights, weights_tmp)
        self.helper.copy_file(tracts, tracts_tmp)

        ### Find importer and Launch Operation
        importer = TestFactory.create_adapter(
            'tvb.adapters.uploaders.csv_connectivity_importer',
            'CSVConnectivityImporter')
        FlowService().fire_operation(importer,
                                     self.test_user,
                                     self.test_project.id,
                                     weights=weights_tmp,
                                     tracts=tracts_tmp,
                                     Data_Subject=subject,
                                     input_data=reference_connectivity_gid)

    def test_happy_flow_import(self):
        """
        Test that importing a CFF generates at least one DataType in DB.
        """
        ConnectivityZipTest.import_test_connectivity96(self.test_user,
                                                       self.test_project,
                                                       subject=TEST_SUBJECT_A)

        field = FilterChain.datatype + '.subject'
        filters = FilterChain('', [field], [TEST_SUBJECT_A], ['=='])
        reference_connectivity = TestFactory.get_entity(
            self.test_project, Connectivity(), filters)

        dt_count_before = TestFactory.get_entity_count(self.test_project,
                                                       Connectivity())

        self._import_csv_test_connectivity(reference_connectivity.gid,
                                           TEST_SUBJECT_B)

        dt_count_after = TestFactory.get_entity_count(self.test_project,
                                                      Connectivity())
        self.assertEqual(dt_count_before + 1, dt_count_after)

        filters = FilterChain('', [field], [TEST_SUBJECT_B], ['like'])
        imported_connectivity = TestFactory.get_entity(self.test_project,
                                                       Connectivity(), filters)

        # check relationship between the imported connectivity and the reference
        self.assertTrue(
            (reference_connectivity.centres == imported_connectivity.centres
             ).all())
        self.assertTrue((reference_connectivity.orientations ==
                         imported_connectivity.orientations).all())

        self.assertEqual(reference_connectivity.number_of_regions,
                         imported_connectivity.number_of_regions)
        self.assertTrue((reference_connectivity.region_labels ==
                         imported_connectivity.region_labels).all())

        self.assertFalse(
            (reference_connectivity.weights == imported_connectivity.weights
             ).all())
        self.assertFalse((reference_connectivity.tract_lengths ==
                          imported_connectivity.tract_lengths).all())

    def test_bad_reference(self):
        TestFactory.import_cff(test_user=self.test_user,
                               test_project=self.test_project)
        field = FilterChain.datatype + '.subject'
        filters = FilterChain('', [field], [TEST_SUBJECT_A], ['!='])
        bad_reference_connectivity = TestFactory.get_entity(
            self.test_project, Connectivity(), filters)

        self.assertRaises(OperationException,
                          self._import_csv_test_connectivity,
                          bad_reference_connectivity.gid, TEST_SUBJECT_A)
Beispiel #14
0
class FilesUpdateManager(UpdateManager):
    """
    Manager for updating H5 files version, when code gets changed.
    """

    UPDATE_SCRIPTS_SUFFIX = "_update_files"
    PROJECTS_PAGE_SIZE = 20
    DATA_TYPES_PAGE_SIZE = 500
    STATUS = True
    MESSAGE = "Done"

    def __init__(self):
        super(FilesUpdateManager, self).__init__(file_update_scripts,
                                                 TvbProfile.current.version.DATA_CHECKED_TO_VERSION,
                                                 TvbProfile.current.version.DATA_VERSION)
        self.files_helper = FilesHelper()

    def get_file_data_version(self, file_path):
        """
        Return the data version for the given file.

        :param file_path: the path on disk to the file for which you need the TVB data version
        :returns: a number representing the data version for which the input file was written
        """
        manager = self._get_manager(file_path)
        return manager.get_file_data_version()

    def is_file_up_to_date(self, file_path):
        """
        Returns True only if the data version of the file is equal with the
        data version specified into the TVB configuration file.
        """
        try:
            file_version = self.get_file_data_version(file_path)
        except MissingDataFileException as ex:
            self.log.exception(ex)
            return False
        except FileStructureException as ex:
            self.log.exception(ex)
            return False

        if file_version == TvbProfile.current.version.DATA_VERSION:
            return True
        return False

    def upgrade_file(self, input_file_name, datatype=None, burst_match_dict=None):
        """
        Upgrades the given file to the latest data version. The file will be upgraded
        sequentially, up until the current version from tvb.basic.config.settings.VersionSettings.DB_STRUCTURE_VERSION

        :param input_file_name the path to the file which needs to be upgraded
        :return True when update was successful and False when it resulted in an error.
        """
        if self.is_file_up_to_date(input_file_name):
            # Avoid running the DB update of size, when H5 is not being changed, to speed-up
            return True

        file_version = self.get_file_data_version(input_file_name)
        self.log.info("Updating from version %s , file: %s " % (file_version, input_file_name))
        for script_name in self.get_update_scripts(file_version):
            temp_file_path = os.path.join(TvbProfile.current.TVB_TEMP_FOLDER,
                                          os.path.basename(input_file_name) + '.tmp')
            self.files_helper.copy_file(input_file_name, temp_file_path)
            try:
                self.run_update_script(script_name, input_file=input_file_name, burst_match_dict=burst_match_dict)
            except FileMigrationException as excep:
                self.files_helper.copy_file(temp_file_path, input_file_name)
                os.remove(temp_file_path)
                self.log.error(excep)
                return False

        if datatype:
            # Compute and update the disk_size attribute of the DataType in DB:
            datatype.disk_size = self.files_helper.compute_size_on_disk(input_file_name)
            dao.store_entity(datatype)

        return True

    def __upgrade_h5_list(self, h5_files):
        """
        Upgrade a list of DataTypes to the current version.

        :returns: (nr_of_dts_upgraded_fine, nr_of_dts_ignored) a two-tuple of integers representing
            the number of DataTypes for which the upgrade worked fine, and the number of DataTypes for which
            the upgrade has failed.
        """
        nr_of_dts_upgraded_fine = 0
        nr_of_dts_failed = 0

        burst_match_dict = {}
        for path in h5_files:
            update_result = self.upgrade_file(path, burst_match_dict=burst_match_dict)

            if update_result:
                nr_of_dts_upgraded_fine += 1
            else:
                nr_of_dts_failed += 1

        return nr_of_dts_upgraded_fine, nr_of_dts_failed

    # TO DO: We should migrate the older scripts to Python 3 if we want to support migration for versions < 4
    def run_all_updates(self):
        """
        Upgrades all the data types from TVB storage to the latest data version.

        :returns: a two entry tuple (status, message) where status is a boolean that is True in case
            the upgrade was successfully for all DataTypes and False otherwise, and message is a status
            update message.
        """
        if TvbProfile.current.version.DATA_CHECKED_TO_VERSION < TvbProfile.current.version.DATA_VERSION:
            total_count = dao.count_all_datatypes()

            self.log.info("Starting to run H5 file updates from version %d to %d, for %d datatypes" % (
                TvbProfile.current.version.DATA_CHECKED_TO_VERSION,
                TvbProfile.current.version.DATA_VERSION, total_count))

            # Keep track of how many DataTypes were properly updated and how many
            # were marked as invalid due to missing files or invalid manager.
            start_time = datetime.now()

            file_paths = self.get_all_h5_paths()
            total_count = len(file_paths)
            no_ok, no_error = self.__upgrade_h5_list(file_paths)

            self.log.info("Updated H5 files in total: %d [fine:%d, failed:%d in: %s min]" % (
                total_count, no_ok, no_error, int((datetime.now() - start_time).seconds / 60)))
            delete_old_burst_table_after_migration()

            # Now update the configuration file since update was done
            config_file_update_dict = {stored.KEY_LAST_CHECKED_FILE_VERSION: TvbProfile.current.version.DATA_VERSION}

            if no_error == 0:
                # Everything went fine
                config_file_update_dict[stored.KEY_FILE_STORAGE_UPDATE_STATUS] = FILE_STORAGE_VALID
                FilesUpdateManager.STATUS = True
                FilesUpdateManager.MESSAGE = ("File upgrade finished successfully for all %s entries. "
                                              "Thank you for your patience!" % total_count)
                self.log.info(FilesUpdateManager.MESSAGE)
            else:
                # Something went wrong
                config_file_update_dict[stored.KEY_FILE_STORAGE_UPDATE_STATUS] = FILE_STORAGE_INVALID
                FilesUpdateManager.STATUS = False
                FilesUpdateManager.MESSAGE = ("Out of %s stored DataTypes, %s were upgraded successfully, but %s had "
                                              "faults and were marked invalid" % (total_count, no_ok, no_error))
                self.log.warning(FilesUpdateManager.MESSAGE)

            TvbProfile.current.version.DATA_CHECKED_TO_VERSION = TvbProfile.current.version.DATA_VERSION
            TvbProfile.current.manager.add_entries_to_config_file(config_file_update_dict)

    @staticmethod
    def _get_manager(file_path):
        """
        Returns a storage manager.
        """
        folder, file_name = os.path.split(file_path)
        return HDF5StorageManager(folder, file_name)

    @staticmethod
    def get_all_h5_paths():
        """
        This method returns a list of all h5 files and it is used in the migration from version 4 to 5.
        The h5 files inside a certain project are retrieved in numerical order (1, 2, 3 etc.).
        """
        h5_files = []
        projects_folder = FilesHelper().get_projects_folder()

        for project_path in os.listdir(projects_folder):
            # Getting operation folders inside the current project
            project_full_path = os.path.join(projects_folder, project_path)
            try:
                project_operations = os.listdir(project_full_path)
            except NotADirectoryError:
                continue
            project_operations_base_names = [os.path.basename(op) for op in project_operations]

            for op_folder in project_operations_base_names:
                try:
                    int(op_folder)
                    op_folder_path = os.path.join(project_full_path, op_folder)
                    for file in os.listdir(op_folder_path):
                        if file.endswith(FilesHelper.TVB_STORAGE_FILE_EXTENSION):
                            h5_file = os.path.join(op_folder_path, file)
                            try:
                                if FilesUpdateManager._is_empty_file(h5_file):
                                    continue
                                h5_files.append(h5_file)
                            except FileStructureException:
                                continue
                except ValueError:
                    pass

        # Sort all h5 files based on their creation date stored in the files themselves
        sorted_h5_files = sorted(h5_files, key=lambda h5_path: FilesUpdateManager._get_create_date_for_sorting(
            h5_path) or datetime.now())
        return sorted_h5_files

    @staticmethod
    def _is_empty_file(h5_file):
        return H5File.get_metadata_param(h5_file, 'Create_date') is None

    @staticmethod
    def _get_create_date_for_sorting(h5_file):
        create_date_str = str(H5File.get_metadata_param(h5_file, 'Create_date'), 'utf-8')
        create_date = string2date(create_date_str, date_format='datetime:%Y-%m-%d %H:%M:%S.%f')
        return create_date
class ImportService(object):
    """
    Service for importing TVB entities into system.
    It supports TVB exported H5 files as input, but it should also handle H5 files 
    generated outside of TVB, as long as they respect the same structure.
    """


    def __init__(self):
        self.logger = get_logger(__name__)
        self.user_id = None
        self.files_helper = FilesHelper()
        self.created_projects = []


    def _download_and_unpack_project_zip(self, uploaded, uq_file_name, temp_folder):

        if isinstance(uploaded, FieldStorage) or isinstance(uploaded, Part):
            if not uploaded.file:
                raise ProjectImportException("Please select the archive which contains the project structure.")
            with open(uq_file_name, 'wb') as file_obj:
                self.files_helper.copy_file(uploaded.file, file_obj)
        else:
            shutil.copy2(uploaded, uq_file_name)

        try:
            self.files_helper.unpack_zip(uq_file_name, temp_folder)
        except FileStructureException as excep:
            self.logger.exception(excep)
            raise ProjectImportException("Bad ZIP archive provided. A TVB exported project is expected!")


    @staticmethod
    def _compute_unpack_path():
        """
        :return: the name of the folder where to expand uploaded zip
        """
        now = datetime.now()
        date_str = "%d-%d-%d_%d-%d-%d_%d" % (now.year, now.month, now.day, now.hour,
                                             now.minute, now.second, now.microsecond)
        uq_name = "%s-ImportProject" % date_str
        return os.path.join(TvbProfile.current.TVB_TEMP_FOLDER, uq_name)


    @transactional
    def import_project_structure(self, uploaded, user_id):
        """
        Execute import operations:
         
        1. check if ZIP or folder 
        2. find all project nodes
        3. for each project node:
            - create project
            - create all operations
            - import all images
            - create all dataTypes
        """

        self.user_id = user_id
        self.created_projects = []

        # Now compute the name of the folder where to explode uploaded ZIP file
        temp_folder = self._compute_unpack_path()
        uq_file_name = temp_folder + ".zip"

        try:
            self._download_and_unpack_project_zip(uploaded, uq_file_name, temp_folder)
            self._import_projects_from_folder(temp_folder)

        except Exception as excep:
            self.logger.exception("Error encountered during import. Deleting projects created during this operation.")
            # Remove project folders created so far.
            # Note that using the project service to remove the projects will not work,
            # because we do not have support for nested transaction.
            # Removing from DB is not necessary because in transactional env a simple exception throw
            # will erase everything to be inserted.
            for project in self.created_projects:
                project_path = os.path.join(TvbProfile.current.TVB_STORAGE, FilesHelper.PROJECTS_FOLDER, project.name)
                shutil.rmtree(project_path)
            raise ProjectImportException(str(excep))

        finally:
            # Now delete uploaded file
            if os.path.exists(uq_file_name):
                os.remove(uq_file_name)
            # Now delete temporary folder where uploaded ZIP was exploded.
            if os.path.exists(temp_folder):
                shutil.rmtree(temp_folder)


    @staticmethod
    def _load_burst_info_from_json(project_path):
        bursts_dict = {}
        dt_mappings_dict = {}
        bursts_file = os.path.join(project_path, BURST_INFO_FILE)
        if os.path.isfile(bursts_file):
            with open(bursts_file) as f:
                bursts_info_dict = json.load(f)
            bursts_dict = bursts_info_dict[BURSTS_DICT_KEY]
            dt_mappings_dict = bursts_info_dict[DT_BURST_MAP]
        os.remove(bursts_file)
        return bursts_dict, dt_mappings_dict


    @staticmethod
    def _import_bursts(project_entity, bursts_dict):
        """
        Re-create old bursts, but keep a mapping between the id it has here and the old-id it had
        in the project where they were exported, so we can re-add the datatypes to them.
        """
        burst_ids_mapping = {}

        for old_burst_id in bursts_dict:
            burst_information = BurstInformation.load_from_dict(bursts_dict[old_burst_id])
            burst_entity = model.BurstConfiguration(project_entity.id)
            burst_entity.from_dict(burst_information.data)
            burst_entity = dao.store_entity(burst_entity)
            burst_ids_mapping[int(old_burst_id)] = burst_entity.id
            # We don't need the data in dictionary form anymore, so update it with new BurstInformation object
            bursts_dict[old_burst_id] = burst_information
        return burst_ids_mapping


    def _import_projects_from_folder(self, temp_folder):
        """
        Process each project from the uploaded pack, to extract names.
        """
        project_roots = []
        for root, _, files in os.walk(temp_folder):
            if FilesHelper.TVB_PROJECT_FILE in files:
                project_roots.append(root)

        for project_path in project_roots:
            update_manager = ProjectUpdateManager(project_path)
            update_manager.run_all_updates()
            project_entity = self.__populate_project(project_path)

            # Compute the path where to store files of the imported project
            new_project_path = os.path.join(TvbProfile.current.TVB_STORAGE,
                                            FilesHelper.PROJECTS_FOLDER, project_entity.name)
            if project_path != new_project_path:
                shutil.move(project_path, new_project_path)

            self.created_projects.append(project_entity)

            # Keep a list with all burst that were imported since we will want to also add the workflow
            # steps after we are finished with importing the operations and datatypes. We need to first
            # stored bursts since we need to know which new id's they have for operations parent_burst.
            bursts_dict, dt_mappings_dict = self._load_burst_info_from_json(new_project_path)
            burst_ids_mapping = self._import_bursts(project_entity, bursts_dict)

            # Now import project operations
            self.import_project_operations(project_entity, new_project_path, dt_mappings_dict, burst_ids_mapping)
            # Import images
            self._store_imported_images(project_entity)
            # Now we can finally import workflow related entities
            self.import_workflows(project_entity, bursts_dict, burst_ids_mapping)


    def import_workflows(self, project, bursts_dict, burst_ids_mapping):
        """
        Import the workflow entities for all bursts imported in the project.

        :param project: the current

        :param bursts_dict: a dictionary that holds all the required information in order to
                            import the bursts from the new project

        :param burst_ids_mapping: a dictionary of the form {old_burst_id : new_burst_id} so we
                                  know what burst to link each workflow to
        """
        for burst_id in bursts_dict:
            workflows_info = bursts_dict[burst_id].get_workflows()
            for one_wf_info in workflows_info:
                # Use the new burst id when creating the workflow
                workflow_entity = model.Workflow(project.id, burst_ids_mapping[int(burst_id)])
                workflow_entity.from_dict(one_wf_info.data)
                workflow_entity = dao.store_entity(workflow_entity)
                wf_steps_info = one_wf_info.get_workflow_steps()
                view_steps_info = one_wf_info.get_view_steps()
                self.import_workflow_steps(workflow_entity, wf_steps_info, view_steps_info)


    def import_workflow_steps(self, workflow, wf_steps, view_steps):
        """
        Import all workflow steps for the given workflow. We create both wf_steps and view_steps
        in the same method, since if a wf_step has to be omited for some reason, we also need to
        omit that view step.
        :param workflow: a model.Workflow entity from which we need to add workflow steps

        :param wf_steps: a list of WorkflowStepInformation entities, from which we will rebuild 
                         the workflow steps

        :param view_steps: a list of WorkflowViewStepInformation entities, from which we will 
                           rebuild the workflow view steps

        """
        for wf_step in wf_steps:
            try:
                algorithm = wf_step.get_algorithm()
                if algorithm is None:
                    # The algorithm is invalid for some reason. Just remove also the view step.
                    position = wf_step.index()
                    for entry in view_steps:
                        if entry.index() == position:
                            view_steps.remove(entry)
                    continue
                wf_step_entity = model.WorkflowStep(algorithm.id)
                wf_step_entity.from_dict(wf_step.data)
                wf_step_entity.fk_workflow = workflow.id
                wf_step_entity.fk_operation = wf_step.get_operation_id()
                dao.store_entity(wf_step_entity)
            except Exception:
                # only log exception and ignore this as it is not very important:
                self.logger.exception("Could not restore WorkflowStep: %s" % wf_step.get_algorithm().name)

        for view_step in view_steps:
            try:
                algorithm = view_step.get_algorithm()
                if algorithm is None:
                    continue
                view_step_entity = model.WorkflowStepView(algorithm.id)
                view_step_entity.from_dict(view_step.data)
                view_step_entity.fk_workflow = workflow.id
                view_step_entity.fk_portlet = view_step.get_portlet().id
                dao.store_entity(view_step_entity)
            except Exception:
                # only log exception and ignore this as it is not very important:
                self.logger.exception("Could not restore WorkflowViewStep " + view_step.get_algorithm().name)


    @staticmethod
    def _append_tmp_to_folders_containing_operations(import_path):
        """
        Find folders containing operations and rename them, return the renamed paths
        """
        pths = []
        for root, _, files in os.walk(import_path):
            if FilesHelper.TVB_OPERARATION_FILE in files:
                # Found an operation folder - append TMP to its name
                tmp_op_folder = root + 'tmp'
                os.rename(root, tmp_op_folder)
                operation_file_path = os.path.join(tmp_op_folder, FilesHelper.TVB_OPERARATION_FILE)
                pths.append(operation_file_path)
        return pths


    def _load_operations_from_paths(self, project, op_paths):
        """
        Load operations from paths containing them.
        :returns: Operations ordered by start/creation date to be sure data dependency is resolved correct
        """
        def by_time(op):
            return op.start_date or op.create_date or datetime.now()

        operations = []

        for operation_file_path in op_paths:
            operation = self.__build_operation_from_file(project, operation_file_path)
            operation.import_file = operation_file_path
            operations.append(operation)

        operations.sort(key=by_time)
        return operations


    def _load_datatypes_from_operation_folder(self, op_path, operation_entity, datatype_group):
        """
        Loads datatypes from operation folder
        :returns: Datatypes ordered by creation date (to solve any dependencies)
        """
        all_datatypes = []
        for file_name in os.listdir(op_path):
            if file_name.endswith(FilesHelper.TVB_STORAGE_FILE_EXTENSION):
                h5_file = os.path.join(op_path, file_name)
                try:
                    file_update_manager = FilesUpdateManager()
                    file_update_manager.upgrade_file(h5_file)
                    datatype = self.load_datatype_from_file(op_path, file_name, operation_entity.id, datatype_group)
                    all_datatypes.append(datatype)

                except IncompatibleFileManagerException:
                    os.remove(h5_file)
                    self.logger.warning("Incompatible H5 file will be ignored: %s" % h5_file)
                    self.logger.exception("Incompatibility details ...")

        all_datatypes.sort(key=lambda dt_date: dt_date.create_date)
        for dt in all_datatypes:
            self.logger.debug("Import order %s: %s" % (dt.type, dt.gid))
        return all_datatypes


    def _store_imported_datatypes_in_db(self, project, all_datatypes, dt_burst_mappings, burst_ids_mapping):
        def by_time(dt):
            return dt.create_date or datetime.now()

        if burst_ids_mapping is None:
            burst_ids_mapping = {}
        if dt_burst_mappings is None:
            dt_burst_mappings = {}

        all_datatypes.sort(key=by_time)

        for datatype in all_datatypes:
            old_burst_id = dt_burst_mappings.get(datatype.gid)

            if old_burst_id is not None:
                datatype.fk_parent_burst = burst_ids_mapping[old_burst_id]

            datatype_allready_in_tvb = dao.get_datatype_by_gid(datatype.gid)

            if not datatype_allready_in_tvb:
                # Compute disk size. Similar to ABCAdapter._capture_operation_results.
                # No need to close the h5 as we have not written to it.
                associated_file = os.path.join(datatype.storage_path, datatype.get_storage_file_name())
                datatype.disk_size = FilesHelper.compute_size_on_disk(associated_file)

                self.store_datatype(datatype)
            else:
                FlowService.create_link([datatype_allready_in_tvb.id], project.id)

    def _store_imported_images(self, project):
        """
        Import all images from project
        """
        images_root = self.files_helper.get_images_folder(project.name)
        # for file_name in os.listdir(images_root):
        for root, _, files in os.walk(images_root):
            for file_name in files:
                if file_name.endswith(FilesHelper.TVB_FILE_EXTENSION):
                    self._populate_image(os.path.join(root, file_name), project.id)


    def import_project_operations(self, project, import_path, dt_burst_mappings=None, burst_ids_mapping=None):
        """
        This method scans provided folder and identify all operations that needs to be imported
        """
        op_paths = self._append_tmp_to_folders_containing_operations(import_path)
        operations = self._load_operations_from_paths(project, op_paths)

        imported_operations = []
        datatypes = []

        # Here we process each operation found
        for operation in operations:
            self.logger.debug("Importing operation " + str(operation))
            old_operation_folder, _ = os.path.split(operation.import_file)
            operation_entity, datatype_group = self.__import_operation(operation)

            # Rename operation folder with the ID of the stored operation
            new_operation_path = FilesHelper().get_operation_folder(project.name, operation_entity.id)
            if old_operation_folder != new_operation_path:
                # Delete folder of the new operation, otherwise move will fail
                shutil.rmtree(new_operation_path)
                shutil.move(old_operation_folder, new_operation_path)

            operation_datatypes = self._load_datatypes_from_operation_folder(new_operation_path, operation_entity,
                                                                             datatype_group)
            imported_operations.append(operation_entity)
            datatypes.extend(operation_datatypes)

        self._store_imported_datatypes_in_db(project, datatypes, dt_burst_mappings, burst_ids_mapping)
        return imported_operations


    def _populate_image(self, file_name, project_id):
        """
        Create and store a image entity.
        """
        figure_dict = XMLReader(file_name).read_metadata()
        new_path = os.path.join(os.path.split(file_name)[0], os.path.split(figure_dict['file_path'])[1])
        if not os.path.exists(new_path):
            self.logger.warn("Expected to find image path %s .Skipping" % new_path)

        op = dao.get_operation_by_gid(figure_dict['fk_from_operation'])
        figure_dict['fk_op_id'] = op.id if op is not None else None
        figure_dict['fk_user_id'] = self.user_id
        figure_dict['fk_project_id'] = project_id
        figure_entity = manager_of_class(model.ResultFigure).new_instance()
        figure_entity = figure_entity.from_dict(figure_dict)
        stored_entity = dao.store_entity(figure_entity)

        # Update image meta-data with the new details after import
        figure = dao.load_figure(stored_entity.id)
        self.logger.debug("Store imported figure")
        self.files_helper.write_image_metadata(figure)


    def load_datatype_from_file(self, storage_folder, file_name, op_id, datatype_group=None, move=True):
        """
        Creates an instance of datatype from storage / H5 file 
        :returns: datatype
        """
        self.logger.debug("Loading datatType from file: %s" % file_name)
        storage_manager = HDF5StorageManager(storage_folder, file_name)
        meta_dictionary = storage_manager.get_metadata()
        meta_structure = DataTypeMetaData(meta_dictionary)

        # Now try to determine class and instantiate it
        class_name = meta_structure[DataTypeMetaData.KEY_CLASS_NAME]
        class_module = meta_structure[DataTypeMetaData.KEY_MODULE]
        datatype = __import__(class_module, globals(), locals(), [class_name])
        datatype = getattr(datatype, class_name)
        type_instance = manager_of_class(datatype).new_instance()

        # Now we fill data into instance
        type_instance.type = str(type_instance.__class__.__name__)
        type_instance.module = str(type_instance.__module__)

        # Fill instance with meta data
        type_instance.load_from_metadata(meta_dictionary)

        #Add all the required attributes
        if datatype_group is not None:
            type_instance.fk_datatype_group = datatype_group.id
        type_instance.set_operation_id(op_id)

        # Now move storage file into correct folder if necessary
        current_file = os.path.join(storage_folder, file_name)
        new_file = type_instance.get_storage_file_path()
        if new_file != current_file and move:
            shutil.move(current_file, new_file)

        return type_instance


    def store_datatype(self, datatype):
        """This method stores data type into DB"""
        try:
            self.logger.debug("Store datatype: %s with Gid: %s" % (datatype.__class__.__name__, datatype.gid))
            return dao.store_entity(datatype)
        except MissingDataSetException:
            self.logger.error("Datatype %s has missing data and could not be imported properly." % (datatype,))
            os.remove(datatype.get_storage_file_path())
        except IntegrityError as excep:
            self.logger.exception(excep)
            error_msg = "Could not import data with gid: %s. There is already a one with " \
                        "the same name or gid." % datatype.gid
            # Delete file if can't be imported
            os.remove(datatype.get_storage_file_path())
            raise ProjectImportException(error_msg)


    def __populate_project(self, project_path):
        """
        Create and store a Project entity.
        """
        self.logger.debug("Creating project from path: %s" % project_path)
        project_dict = self.files_helper.read_project_metadata(project_path)

        project_entity = manager_of_class(model.Project).new_instance()
        project_entity = project_entity.from_dict(project_dict, self.user_id)

        try:
            self.logger.debug("Storing imported project")
            return dao.store_entity(project_entity)
        except IntegrityError as excep:
            self.logger.exception(excep)
            error_msg = ("Could not import project: %s with gid: %s. There is already a "
                         "project with the same name or gid.") % (project_entity.name, project_entity.gid)
            raise ProjectImportException(error_msg)


    def __build_operation_from_file(self, project, operation_file):
        """
        Create Operation entity from metadata file.
        """
        operation_dict = XMLReader(operation_file).read_metadata()
        operation_entity = manager_of_class(model.Operation).new_instance()
        return operation_entity.from_dict(operation_dict, dao, self.user_id, project.gid)


    @staticmethod
    def __import_operation(operation_entity):
        """
        Store a Operation entity.
        """
        operation_entity = dao.store_entity(operation_entity)
        operation_group_id = operation_entity.fk_operation_group
        datatype_group = None

        if operation_group_id is not None:
            try:
                datatype_group = dao.get_datatypegroup_by_op_group_id(operation_group_id)
            except SQLAlchemyError:
                # If no dataType group present for current op. group, create it.
                operation_group = dao.get_operationgroup_by_id(operation_group_id)
                datatype_group = model.DataTypeGroup(operation_group, operation_id=operation_entity.id)
                datatype_group.state = ADAPTERS['Upload']['defaultdatastate']
                datatype_group = dao.store_entity(datatype_group)

        return operation_entity, datatype_group


    def load_burst_entity(self, json_burst, project_id):
        """
        Load BurstConfiguration from JSON (possibly exported from a different machine).
        Nothing gets persisted in DB or on disk.

        :param json_burst: Burst JSON export
        :param project_id: Current project ID (it will be used if the user later starts this simulation)
        :return: BurstConfiguration  filled from JSON
        """

        burst_information = BurstInformation.load_from_dict(json_burst)
        burst_entity = model.BurstConfiguration(project_id)
        burst_entity.from_dict(burst_information.data)
        burst_entity.prepare_after_load()
        burst_entity.reset_tabs()

        workflow_info = burst_information.get_workflows()[0]
        workflow_entity = model.Workflow(project_id, None)
        workflow_entity.from_dict(workflow_info.data)

        view_steps = workflow_info.get_view_steps()
        analyze_steps = workflow_info.get_workflow_steps()

        for view_step in view_steps:
            try:
                algorithm = view_step.get_algorithm()
                portlet = view_step.get_portlet()
                view_step_entity = model.WorkflowStepView(algorithm.id, portlet_id=portlet.id)
                view_step_entity.from_dict(view_step.data)
                view_step_entity.workflow = workflow_entity

                ## For each visualize step, also load all of the analyze steps.
                analyzers = []
                for an_step in analyze_steps:
                    if (an_step.data["tab_index"] != view_step_entity.tab_index
                            or an_step.data["index_in_tab"] != view_step_entity.index_in_tab):
                        continue
                    algorithm = an_step.get_algorithm()
                    wf_step_entity = model.WorkflowStep(algorithm.id)
                    wf_step_entity.from_dict(an_step.data)
                    wf_step_entity.workflow = workflow_entity
                    analyzers.append(wf_step_entity)

                portlet = PortletConfiguration(portlet.id)
                portlet.set_visualizer(view_step_entity)
                portlet.set_analyzers(analyzers)
                burst_entity.set_portlet(view_step_entity.tab_index, view_step_entity.index_in_tab, portlet)

            except Exception:
                # only log exception and ignore this step from loading
                self.logger.exception("Could not restore Workflow Step " + view_step.get_algorithm().name)

        return burst_entity
class TestCSVConnectivityImporter(BaseTestCase):
    """
    Unit-tests for csv connectivity importer.
    """
    def setup_method(self):
        self.test_user = TestFactory.create_user()
        self.test_project = TestFactory.create_project(self.test_user)
        self.helper = FilesHelper()

    def teardown_method(self):
        """
        Clean-up tests data
        """
        self.clean_database()
        FilesHelper().remove_project_structure(self.test_project.name)

    def _import_csv_test_connectivity(self, reference_connectivity_gid,
                                      subject):
        ### First prepare input data:
        data_dir = path.abspath(path.dirname(tvb_data.__file__))

        toronto_dir = path.join(data_dir, 'dti_pipeline_toronto')
        weights = path.join(toronto_dir, 'output_ConnectionCapacityMatrix.csv')
        tracts = path.join(toronto_dir, 'output_ConnectionDistanceMatrix.csv')
        weights_tmp = weights + '.tmp'
        tracts_tmp = tracts + '.tmp'
        self.helper.copy_file(weights, weights_tmp)
        self.helper.copy_file(tracts, tracts_tmp)

        view_model = CSVConnectivityImporterModel()
        view_model.weights = weights_tmp
        view_model.tracts = tracts_tmp
        view_model.data_subject = subject
        view_model.input_data = reference_connectivity_gid
        TestFactory.launch_importer(CSVConnectivityImporter, view_model,
                                    self.test_user, self.test_project, False)

    def test_happy_flow_import(self):
        """
        Test that importing a CFF generates at least one DataType in DB.
        """

        zip_path = path.join(path.dirname(tvb_data.__file__), 'connectivity',
                             'connectivity_96.zip')
        TestFactory.import_zip_connectivity(self.test_user,
                                            self.test_project,
                                            zip_path,
                                            subject=TEST_SUBJECT_A)

        field = FilterChain.datatype + '.subject'
        filters = FilterChain('', [field], [TEST_SUBJECT_A], ['=='])
        reference_connectivity_index = TestFactory.get_entity(
            self.test_project, ConnectivityIndex, filters)

        dt_count_before = TestFactory.get_entity_count(self.test_project,
                                                       ConnectivityIndex)

        self._import_csv_test_connectivity(reference_connectivity_index.gid,
                                           TEST_SUBJECT_B)

        dt_count_after = TestFactory.get_entity_count(self.test_project,
                                                      ConnectivityIndex)
        assert dt_count_before + 1 == dt_count_after

        filters = FilterChain('', [field], [TEST_SUBJECT_B], ['like'])
        imported_connectivity_index = TestFactory.get_entity(
            self.test_project, ConnectivityIndex, filters)

        # check relationship between the imported connectivity and the reference
        assert reference_connectivity_index.number_of_regions == imported_connectivity_index.number_of_regions
        assert not reference_connectivity_index.number_of_connections == imported_connectivity_index.number_of_connections

        reference_connectivity = h5.load_from_index(
            reference_connectivity_index)
        imported_connectivity = h5.load_from_index(imported_connectivity_index)

        assert not (reference_connectivity.weights
                    == imported_connectivity.weights).all()
        assert not (reference_connectivity.tract_lengths
                    == imported_connectivity.tract_lengths).all()

        assert (reference_connectivity.centres == imported_connectivity.centres
                ).all()
        assert (reference_connectivity.orientations ==
                imported_connectivity.orientations).all()
        assert (reference_connectivity.region_labels ==
                imported_connectivity.region_labels).all()

    def test_bad_reference(self):
        zip_path = path.join(path.dirname(tvb_data.__file__), 'connectivity',
                             'connectivity_66.zip')
        TestFactory.import_zip_connectivity(self.test_user, self.test_project,
                                            zip_path)
        field = FilterChain.datatype + '.subject'
        filters = FilterChain('', [field], [TEST_SUBJECT_A], ['!='])
        bad_reference_connectivity = TestFactory.get_entity(
            self.test_project, ConnectivityIndex, filters)

        with pytest.raises(OperationException):
            self._import_csv_test_connectivity(bad_reference_connectivity.gid,
                                               TEST_SUBJECT_A)