def _export_linked_datatypes(self, project, zip_file): files_helper = FilesHelper() linked_paths = self._get_linked_datatypes_storage_path(project) if not linked_paths: # do not export an empty operation return # Make a import operation which will contain links to other projects algo = dao.get_algorithm_by_module(TVB_IMPORTER_MODULE, TVB_IMPORTER_CLASS) op = model.Operation(None, project.id, algo.id, '') op.project = project op.algorithm = algo op.id = 'links-to-external-projects' op.start_now() op.mark_complete(model.STATUS_FINISHED) # write operation.xml to disk files_helper.write_operation_metadata(op) op_folder = files_helper.get_operation_folder(op.project.name, op.id) operation_xml = files_helper.get_operation_meta_file_path(op.project.name, op.id) op_folder_name = os.path.basename(op_folder) # add operation.xml zip_file.write(operation_xml, op_folder_name + '/' + os.path.basename(operation_xml)) # add linked datatypes to archive in the import operation for pth in linked_paths: zip_pth = op_folder_name + '/' + os.path.basename(pth) zip_file.write(pth, zip_pth) # remove these files, since we only want them in export archive files_helper.remove_folder(op_folder)
def export(self, data, export_folder, project): """ Exports data type: 1. If data is a normal data type, simply exports storage file (HDF format) 2. If data is a DataTypeGroup creates a zip with all files for all data types """ download_file_name = self.get_export_file_name(data) files_helper = FilesHelper() if self.is_data_a_group(data): all_datatypes = self._get_all_data_types_arr(data) if all_datatypes is None or len(all_datatypes) == 0: raise ExportException( "Could not export a data type group with no data") zip_file = os.path.join(export_folder, download_file_name) # Now process each data type from group and add it to ZIP file operation_folders = [] for data_type in all_datatypes: operation_folder = files_helper.get_operation_folder( project.name, data_type.fk_from_operation) operation_folders.append(operation_folder) # Create ZIP archive files_helper.zip_folders(zip_file, operation_folders, self.OPERATION_FOLDER_PREFIX) return download_file_name, zip_file, True else: data_file = self.copy_dt_to_export_folder(data, export_folder) return None, data_file, True
def _export_linked_datatypes(self, project, zip_file): files_helper = FilesHelper() linked_paths = self._get_linked_datatypes_storage_path(project) if not linked_paths: # do not export an empty operation return # Make a import operation which will contain links to other projects alg_group = dao.find_group(TVB_IMPORTER_MODULE, TVB_IMPORTER_CLASS) algo = dao.get_algorithm_by_group(alg_group.id) op = model.Operation(None, project.id, algo.id, '') op.project = project op.algorithm = algo op.id = 'links-to-external-projects' op.start_now() op.mark_complete(model.STATUS_FINISHED) # write operation.xml to disk files_helper.write_operation_metadata(op) op_folder = files_helper.get_operation_folder(op.project.name, op.id) operation_xml = files_helper.get_operation_meta_file_path(op.project.name, op.id) op_folder_name = os.path.basename(op_folder) # add operation.xml zip_file.write(operation_xml, op_folder_name + '/' + os.path.basename(operation_xml)) # add linked datatypes to archive in the import operation for pth in linked_paths: zip_pth = op_folder_name + '/' + os.path.basename(pth) zip_file.write(pth, zip_pth) # remove these files, since we only want them in export archive files_helper.remove_folder(op_folder)
def export(self, data, export_folder, project): """ Exports data type: 1. If data is a normal data type, simply exports storage file (HDF format) 2. If data is a DataTypeGroup creates a zip with all files for all data types """ download_file_name = self.get_export_file_name(data) files_helper = FilesHelper() if self.is_data_a_group(data): all_datatypes = self._get_all_data_types_arr(data) if all_datatypes is None or len(all_datatypes) == 0: raise ExportException("Could not export a data type group with no data") zip_file = os.path.join(export_folder, download_file_name) # Now process each data type from group and add it to ZIP file operation_folders = [] for data_type in all_datatypes: operation_folder = files_helper.get_operation_folder(project.name, data_type.fk_from_operation) operation_folders.append(operation_folder) # Create ZIP archive files_helper.zip_folders(zip_file, operation_folders, self.OPERATION_FOLDER_PREFIX) return download_file_name, zip_file, True else: project_folder = files_helper.get_project_folder(project) data_file = os.path.join(project_folder, data.get_storage_file_path()) return download_file_name, data_file, False
class ExportManager(object): """ This class provides basic methods for exporting data types of projects in different formats. """ all_exporters = {} # Dictionary containing all available exporters export_folder = None EXPORT_FOLDER_NAME = "EXPORT_TMP" EXPORTED_SIMULATION_NAME = "exported_simulation" EXPORTED_SIMULATION_DTS_DIR = "datatypes" ZIP_FILE_EXTENSION = "zip" logger = get_logger(__name__) def __init__(self): # Here we register all available data type exporters # If new exporters supported, they should be added here self._register_exporter(TVBExporter()) self._register_exporter(TVBLinkedExporter()) self.export_folder = os.path.join(TvbProfile.current.TVB_STORAGE, self.EXPORT_FOLDER_NAME) self.files_helper = FilesHelper() def _register_exporter(self, exporter): """ This method register into an internal format available exporters. :param exporter: Instance of a data type exporter (extends ABCExporter) """ if exporter is not None: self.all_exporters[exporter.__class__.__name__] = exporter def get_exporters_for_data(self, data): """ Get available exporters for current data type. :returns: a dictionary with the {exporter_id : label} """ if data is None: raise InvalidExportDataException( "Could not detect exporters for null data") self.logger.debug("Trying to determine exporters valid for %s" % data.type) results = {} # No exporter for None data if data is None: return results for exporterId in self.all_exporters.keys(): exporter = self.all_exporters[exporterId] if exporter.accepts(data): results[exporterId] = exporter.get_label() return results def export_data(self, data, exporter_id, project): """ Export provided data using given exporter :param data: data type to be exported :param exporter_id: identifier of the exporter to be used :param project: project that contains data to be exported :returns: a tuple with the following elements 1. name of the file to be shown to user 2. full path of the export file (available for download) 3. boolean which specify if file can be deleted after download """ if data is None: raise InvalidExportDataException( "Could not export null data. Please select data to be exported" ) if exporter_id is None: raise ExportException( "Please select the exporter to be used for this operation") if exporter_id not in self.all_exporters: raise ExportException( "Provided exporter identifier is not a valid one") exporter = self.all_exporters[exporter_id] if project is None: raise ExportException( "Please provide the project where data files are stored") # Now we start the real export if not exporter.accepts(data): raise InvalidExportDataException( "Current data can not be exported by specified exporter") # Now compute and create folder where to store exported data # This will imply to generate a folder which is unique for each export data_export_folder = None try: data_export_folder = self._build_data_export_folder(data) self.logger.debug("Start export of data: %s" % data.type) export_data = exporter.export(data, data_export_folder, project) finally: # In case export did not generated any file delete folder if data_export_folder is not None and len( os.listdir(data_export_folder)) == 0: os.rmdir(data_export_folder) return export_data def _export_linked_datatypes(self, project, zip_file): linked_paths = ProjectService().get_linked_datatypes_storage_path( project) if not linked_paths: # do not export an empty operation return # Make a import operation which will contain links to other projects algo = dao.get_algorithm_by_module(TVB_IMPORTER_MODULE, TVB_IMPORTER_CLASS) op = model_operation.Operation(None, None, project.id, algo.id) op.project = project op.algorithm = algo op.id = 'links-to-external-projects' op.start_now() op.mark_complete(model_operation.STATUS_FINISHED) op_folder = self.files_helper.get_operation_folder( op.project.name, op.id) op_folder_name = os.path.basename(op_folder) # add linked datatypes to archive in the import operation for pth in linked_paths: zip_pth = op_folder_name + '/' + os.path.basename(pth) zip_file.write(pth, zip_pth) # remove these files, since we only want them in export archive self.files_helper.remove_folder(op_folder) def export_project(self, project, optimize_size=False): """ Given a project root and the TVB storage_path, create a ZIP ready for export. :param project: project object which identifies project to be exported """ if project is None: raise ExportException("Please provide project to be exported") project_folder = self.files_helper.get_project_folder(project) project_datatypes = dao.get_datatypes_in_project( project.id, only_visible=optimize_size) to_be_exported_folders = [] considered_op_ids = [] folders_to_exclude = self._get_op_with_errors(project.id) if optimize_size: # take only the DataType with visibility flag set ON for dt in project_datatypes: op_id = dt.fk_from_operation if op_id not in considered_op_ids: to_be_exported_folders.append({ 'folder': self.files_helper.get_project_folder( project, str(op_id)), 'archive_path_prefix': str(op_id) + os.sep, 'exclude': folders_to_exclude }) considered_op_ids.append(op_id) else: folders_to_exclude.append("TEMP") to_be_exported_folders.append({ 'folder': project_folder, 'archive_path_prefix': '', 'exclude': folders_to_exclude }) # Compute path and name of the zip file now = datetime.now() date_str = now.strftime("%Y-%m-%d_%H-%M") zip_file_name = "%s_%s.%s" % (date_str, project.name, self.ZIP_FILE_EXTENSION) export_folder = self._build_data_export_folder(project) result_path = os.path.join(export_folder, zip_file_name) with TvbZip(result_path, "w") as zip_file: # Pack project [filtered] content into a ZIP file: self.logger.debug("Done preparing, now we will write folders " + str(len(to_be_exported_folders))) self.logger.debug(str(to_be_exported_folders)) for pack in to_be_exported_folders: zip_file.write_folder(**pack) self.logger.debug( "Done exporting files, now we will export linked DTs") self._export_linked_datatypes(project, zip_file) # Make sure the Project.xml file gets copied: if optimize_size: self.logger.debug("Done linked, now we write the project xml") zip_file.write( self.files_helper.get_project_meta_file_path(project.name), self.files_helper.TVB_PROJECT_FILE) self.logger.debug("Done, closing") return result_path @staticmethod def _get_op_with_errors(project_id): """ Get the operation folders with error base name as list. """ operations = dao.get_operations_with_error_in_project(project_id) op_with_errors = [] for op in operations: op_with_errors.append(op.id) return op_with_errors def _build_data_export_folder(self, data): """ This method computes the folder where results of an export operation will be stored for a while (e.g until download is done; or for 1 day) """ now = datetime.now() date_str = "%d-%d-%d_%d-%d-%d_%d" % (now.year, now.month, now.day, now.hour, now.minute, now.second, now.microsecond) tmp_str = date_str + "@" + data.gid data_export_folder = os.path.join(self.export_folder, tmp_str) self.files_helper.check_created(data_export_folder) return data_export_folder def export_simulator_configuration(self, burst_id): burst = dao.get_burst_by_id(burst_id) if burst is None: raise InvalidExportDataException("Could not find burst with ID " + str(burst_id)) op_folder = self.files_helper.get_project_folder( burst.project, str(burst.fk_simulation)) tmp_export_folder = self._build_data_export_folder(burst) tmp_sim_folder = os.path.join(tmp_export_folder, self.EXPORTED_SIMULATION_NAME) if not os.path.exists(tmp_sim_folder): os.makedirs(tmp_sim_folder) all_view_model_paths, all_datatype_paths = h5.gather_references_of_view_model( burst.simulator_gid, op_folder) burst_path = h5.determine_filepath(burst.gid, op_folder) all_view_model_paths.append(burst_path) for vm_path in all_view_model_paths: dest = os.path.join(tmp_sim_folder, os.path.basename(vm_path)) self.files_helper.copy_file(vm_path, dest) for dt_path in all_datatype_paths: dest = os.path.join(tmp_sim_folder, self.EXPORTED_SIMULATION_DTS_DIR, os.path.basename(dt_path)) self.files_helper.copy_file(dt_path, dest) main_vm_path = h5.determine_filepath(burst.simulator_gid, tmp_sim_folder) H5File.remove_metadata_param(main_vm_path, 'history_gid') now = datetime.now() date_str = now.strftime("%Y-%m-%d_%H-%M") zip_file_name = "%s_%s.%s" % (date_str, str(burst_id), self.ZIP_FILE_EXTENSION) result_path = os.path.join(tmp_export_folder, zip_file_name) with TvbZip(result_path, "w") as zip_file: zip_file.write_folder(tmp_sim_folder) self.files_helper.remove_folder(tmp_sim_folder) return result_path
class DiagnoseDiskUsage(object): FORMAT_DT = ' {:14} {:20} {:>12} {:>12} {:>12} {:>12}' HEADER_DT = FORMAT_DT.format('', '', 'disk_size(kib)', 'db_size(kib)', 'delta(kib)', 'ratio(%)') def __init__(self, prj_id): self.file_helper = FilesHelper() self.expected_files = set() self.prj_disk_size, self.prj_db_size = 0, 0 try: dao.session.open_session() # We do not fetch the project using dao because dao will detach it from the session. # We want to query on the fly on attribute access and this requires attached objects. # This code is doing a tree traversal of the db. # The query on attribute access style fits better than aggregating queries. self.project = dao.session.query(Project).filter(Project.id == prj_id).one() self.expected_files.add(self.file_helper.get_project_meta_file_path(self.project.name)) root_path = self.file_helper.get_project_folder(self.project) print() print('Reporting disk for project {} in {}'.format(self.project.name, root_path)) print() print(self.HEADER_DT) for op in self.project.OPERATIONS: self.analyse_operation(op) print(self.HEADER_DT) self.print_usage_line('Project', 'total', self.prj_disk_size, self.prj_db_size) print() self.list_unexpected_project_files(root_path) print() finally: dao.session.close_session() @staticmethod def get_h5_by_gid(root, gid): for f in os.listdir(root): fp = os.path.join(root, f) if gid in f and os.path.isfile(fp): return fp @staticmethod def get_file_kib_size(fp): return int(round((os.path.getsize(fp) / 1024.))) @staticmethod def print_usage_line(col1, col2, actual, expected): if expected != 0: ratio = int(100.0 * actual / expected) if ratio > 200: ratio = "! %s" % ratio else: ratio = str(ratio) else: ratio = 'inf' delta = actual - expected if delta > 100: delta = "! %s" % delta else: delta = str(delta) print(DiagnoseDiskUsage.FORMAT_DT.format(col1, col2, '{:,}'.format(actual), '{:,}'.format(expected), delta, ratio)) def analyse_operation(self, op): op_disk_size, op_db_size = 0, 0 print('Operation {} : {}'.format(op.id, op.algorithm.name)) for dt in op.DATA_TYPES: if dt.type == 'DataTypeGroup': # these have no h5 continue op_pth = self.file_helper.get_operation_folder(self.project.name, op.id) dt_pth = self.get_h5_by_gid(op_pth, dt.gid) dt_actual_disk_size = self.get_file_kib_size(dt_pth) db_disk_size = dt.disk_size or 0 op_disk_size += dt_actual_disk_size op_db_size += db_disk_size self.print_usage_line(dt.gid[:12], dt.type, dt_actual_disk_size, db_disk_size) self.expected_files.add(dt_pth) self.prj_disk_size += op_disk_size self.prj_db_size += op_db_size self.print_usage_line('', 'total :', op_disk_size, op_db_size) print() def list_unexpected_project_files(self, root_path): unexpected = [] for r, d, files in os.walk(root_path): for f in files: pth = os.path.join(r, f) if pth not in self.expected_files: unexpected.append(pth) print('Unexpected project files :') for f in unexpected: print(f) if not unexpected: print('yey! none found')