def test_utils_get_object_from_ledger(self): with mock.patch('substrapp.ledger.api.query_ledger') as mquery_ledger: mquery_ledger.return_value = objective data = get_object_from_ledger('mychannel', '', 'queryObjective') self.assertEqual(data, objective)
def download_file(self, request, django_field, ledger_field=None): lookup_url_kwarg = self.lookup_url_kwarg or self.lookup_field key = self.kwargs[lookup_url_kwarg] channel_name = get_channel_name(request) validate_key(key) try: asset = get_object_from_ledger(channel_name, key, self.ledger_query_call) except LedgerError as e: return Response({'message': str(e.msg)}, status=e.status) try: self.check_access(channel_name, request.user, asset, is_proxied_request(request)) except PermissionError as e: return Response({'message': str(e)}, status=status.HTTP_403_FORBIDDEN) if get_owner() == asset['owner']: response = self._download_local_file(django_field) else: if not ledger_field: ledger_field = django_field storage_address = self.get_storage_address(asset, ledger_field) response = self._download_remote_file(channel_name, storage_address, asset) return response
def _retrieve(self, request, key): validate_key(key) data = get_object_from_ledger(get_channel_name(request), key, self.ledger_query_call) # do not cache if node has not process permission if node_has_process_permission(data): # try to get it from local db to check if description exists try: instance = self.get_object() except Http404: instance = None finally: # check if instance has description if not instance or not instance.description: instance = self.create_or_update_algo( get_channel_name(request), data, key) # For security reason, do not give access to local file address # Restrain data to some fields # TODO: do we need to send creation date and/or last modified date ? serializer = self.get_serializer(instance, fields=('owner')) data.update(serializer.data) replace_storage_addresses(request, data) return data
def _retrieve(self, request, key): validate_key(key) # get instance from remote node data = get_object_from_ledger(get_channel_name(request), key, 'queryDataset') # do not cache if node has not process permission if node_has_process_permission(data): # try to get it from local db to check if description exists try: instance = self.get_object() except Http404: instance = None finally: # check if instance has description or data_opener if not instance or not instance.description or not instance.data_opener: instance = self.create_or_update_datamanager( get_channel_name(request), instance, data, key) # do not give access to local files address serializer = self.get_serializer(instance, fields=('owner')) data.update(serializer.data) replace_storage_addresses(request, data) return data
def retrieve(self, request, *args, **kwargs): lookup_url_kwarg = self.lookup_url_kwarg or self.lookup_field key = self.kwargs[lookup_url_kwarg] validate_key(key) try: data = get_object_from_ledger(get_channel_name(request), key, 'queryComputePlan') except LedgerError as e: return Response({'message': str(e.msg)}, status=e.status) else: return Response(data, status=status.HTTP_200_OK)
def prepare_testtuple_input_models(channel_name, directory, tuple_): """Get testtuple input models content.""" traintuple_type = tuple_['traintuple_type'] traintuple_key = tuple_['traintuple_key'] # TODO we should use the find method to be consistent with the traintuple if traintuple_type == TRAINTUPLE_TYPE: metadata = get_object_from_ledger(channel_name, traintuple_key, 'queryTraintuple') model_dst_path = path.join(directory, f'model/{traintuple_key}') raise_if_path_traversal([model_dst_path], path.join(directory, 'model/')) get_and_put_model_content(channel_name, traintuple_type, traintuple_key, metadata, metadata['out_model'], model_dst_path) elif traintuple_type == COMPOSITE_TRAINTUPLE_TYPE: metadata = get_object_from_ledger(channel_name, traintuple_key, 'queryCompositeTraintuple') head_model_dst_path = path.join( directory, f'model/{PREFIX_HEAD_FILENAME}{traintuple_key}') raise_if_path_traversal([head_model_dst_path], path.join(directory, 'model/')) get_and_put_local_model_content( traintuple_key, metadata['out_head_model']['out_model'], head_model_dst_path) model_dst_path = path.join( directory, f'model/{PREFIX_TRUNK_FILENAME}{traintuple_key}') raise_if_path_traversal([model_dst_path], path.join(directory, 'model/')) get_and_put_model_content(channel_name, traintuple_type, traintuple_key, metadata, metadata['out_trunk_model']['out_model'], model_dst_path) else: raise TasksError( f"Testtuple from type '{traintuple_type}' not supported")
def test_get_object_from_ledger(self): with patch('substrapp.ledger.api.query_ledger') as mquery_ledger: mquery_ledger.side_effect = LedgerAssetNotFound('Not Found') self.assertRaises(LedgerAssetNotFound, get_object_from_ledger, CHANNEL, 'key', 'fake_query') with patch('substrapp.ledger.api.query_ledger') as mquery_ledger: mquery_ledger.side_effect = LedgerInvalidResponse('Bad Response') self.assertRaises(LedgerInvalidResponse, get_object_from_ledger, CHANNEL, 'key', 'fake_query') with patch('substrapp.ledger.api.query_ledger') as mquery_ledger: mquery_ledger.return_value = {'key': 'key'} data = get_object_from_ledger(CHANNEL, 'key', 'good_query') self.assertEqual(data['key'], 'key')
def get_objective(channel_name, tuple_): objective_key = tuple_['objective']['key'] objective_metadata = get_object_from_ledger(channel_name, objective_key, 'queryObjective') objective_content = get_asset_content( channel_name, objective_metadata['metrics']['storage_address'], objective_metadata['owner'], objective_metadata['metrics']['checksum'], ) return objective_content
def _retrieve(self, channel_name, key): validate_key(key) data = get_object_from_ledger(channel_name, key, self.ledger_query_call) compatible_tuple_types = ['traintuple', 'composite_traintuple', 'aggregatetuple'] any_data = any(list(map(lambda x: x in data, compatible_tuple_types))) if not any_data: raise Exception( 'Invalid model: missing traintuple, composite_traintuple or aggregatetuple field' ) return data
def find_training_step_tuple_from_key(channel_name, tuple_key): """Get tuple type and tuple metadata from tuple key. Applies to traintuple, composite traintuple and aggregatetuple. """ metadata = get_object_from_ledger(channel_name, tuple_key, 'queryModelDetails') if metadata.get('aggregatetuple'): return AGGREGATETUPLE_TYPE, metadata['aggregatetuple'] if metadata.get('composite_traintuple'): return COMPOSITE_TRAINTUPLE_TYPE, metadata['composite_traintuple'] if metadata.get('traintuple'): return TRAINTUPLE_TYPE, metadata['traintuple'] raise TasksError( f'Key {tuple_key}: no tuple found for training step: model: {metadata}' )
def download_local_file(self, request, django_field, ledger_field=None): lookup_url_kwarg = self.lookup_url_kwarg or self.lookup_field key = self.kwargs[lookup_url_kwarg] try: asset = get_object_from_ledger(get_channel_name(request), key, self.ledger_query_call) except LedgerError as e: return HttpResponse({'message': str(e.msg)}, status=e.status) if not self.has_access(request.user, asset): return HttpResponse({'message': 'Unauthorized'}, status=status.HTTP_403_FORBIDDEN) if not ledger_field: ledger_field = django_field return self._download_local_file(django_field)
def download_local_file(self, request, django_field): lookup_url_kwarg = self.lookup_url_kwarg or self.lookup_field key = self.kwargs[lookup_url_kwarg] channel_name = get_channel_name(request) try: asset = get_object_from_ledger(channel_name, key, self.ledger_query_call) except LedgerError as e: return Response({'message': str(e.msg)}, status=e.status) try: self.check_access(channel_name, request.user, asset, is_proxied_request(request)) except PermissionError as e: return Response({'message': str(e)}, status=status.HTTP_403_FORBIDDEN) return self._download_local_file(django_field)
def get_algo(channel_name, tuple_type, tuple_): """Get algo from ledger.""" query_method_names_mapper = { TRAINTUPLE_TYPE: 'queryAlgo', COMPOSITE_TRAINTUPLE_TYPE: 'queryCompositeAlgo', AGGREGATETUPLE_TYPE: 'queryAggregateAlgo', } if tuple_type not in query_method_names_mapper: raise TasksError( f'Cannot find algo from tuple type {tuple_type}: {tuple_}') method_name = query_method_names_mapper[tuple_type] key = tuple_['algo']['key'] metadata = get_object_from_ledger(channel_name, key, method_name) content = get_asset_content( channel_name, metadata['content']['storage_address'], metadata['owner'], metadata['content']['checksum'], ) return content
def _retrieve(self, channel_name, key): validate_key(key) return get_object_from_ledger(channel_name, key, self.ledger_query_call)
def do_task(channel_name, subtuple, tuple_type): subtuple_directory = get_subtuple_directory(subtuple['key']) # compute plan / federated learning variables compute_plan_key = None rank = None compute_plan_tag = None if 'compute_plan_key' in subtuple and subtuple['compute_plan_key']: compute_plan_key = subtuple['compute_plan_key'] rank = int(subtuple['rank']) compute_plan = get_object_from_ledger(channel_name, compute_plan_key, 'queryComputePlan') compute_plan_tag = compute_plan['tag'] common_volumes, compute_volumes = prepare_volumes(subtuple_directory, tuple_type, compute_plan_key, compute_plan_tag) # Add node index to environment variable for the compute node_index = os.getenv('NODE_INDEX') if node_index: environment = {'NODE_INDEX': node_index} else: environment = {} # Use tag to tranfer or not performances and models tag = subtuple.get("tag") if tuple_type == TESTTUPLE_TYPE: if tag and TAG_VALUE_FOR_TRANSFER_BUCKET in tag: environment['TESTTUPLE_TAG'] = TAG_VALUE_FOR_TRANSFER_BUCKET job_name = f'{tuple_type.replace("_", "-")}-{subtuple["key"][0:8]}-{TUPLE_COMMANDS[tuple_type]}'.lower( ) command = generate_command(tuple_type, subtuple, rank) # train or predict compute_job(subtuple_key=subtuple["key"], compute_plan_key=compute_plan_key, dockerfile_path=subtuple_directory, image_name=get_algo_image_name(subtuple['algo']['key']), job_name=job_name, volumes={ **common_volumes, **compute_volumes }, command=command, remove_image=compute_plan_key is None and not settings.TASK['CACHE_DOCKER_IMAGES'], remove_container=settings.TASK['CLEAN_EXECUTION_ENVIRONMENT'], capture_logs=settings.TASK['CAPTURE_LOGS'], environment=environment) # Handle model and result from tuple models = save_models(subtuple_directory, tuple_type, subtuple['key']) # Can be empty if testtuple result = extract_result_from_models(tuple_type, models) # Can be empty if testtuple # Evaluation if tuple_type == TESTTUPLE_TYPE: # We set pred folder to ro during evalutation pred_path = path.join(subtuple_directory, 'pred') common_volumes[pred_path]['mode'] = 'ro' # eval compute_job( subtuple_key=subtuple["key"], compute_plan_key=compute_plan_key, dockerfile_path=f'{subtuple_directory}/metrics', image_name=f'substra/metrics_{subtuple["objective"]["key"][0:8]}'. lower(), job_name= f'{tuple_type.replace("_", "-")}-{subtuple["key"][0:8]}-eval'. lower(), volumes=common_volumes, command=f'--output-perf-path {OUTPUT_PERF_PATH}', remove_image=compute_plan_key is None and not settings.TASK['CACHE_DOCKER_IMAGES'], remove_container=settings.TASK['CLEAN_EXECUTION_ENVIRONMENT'], capture_logs=settings.TASK['CAPTURE_LOGS'], environment=environment) pred_path = path.join(subtuple_directory, 'pred') export_path = path.join(subtuple_directory, 'export') perf_path = path.join(subtuple_directory, 'perf') # load performance with open(path.join(perf_path, 'perf.json'), 'r') as perf_file: perf = json.load(perf_file) result['global_perf'] = perf['all'] if tag and TAG_VALUE_FOR_TRANSFER_BUCKET in tag: transfer_to_bucket(subtuple['key'], [pred_path, perf_path, export_path]) return result
def get_testtuple(channel_name, key): return get_object_from_ledger(channel_name, key, 'queryTesttuple')