Exemple #1
0
    def test_utils_get_object_from_ledger(self):

        with mock.patch('substrapp.ledger.api.query_ledger') as mquery_ledger:
            mquery_ledger.return_value = objective
            data = get_object_from_ledger('mychannel', '', 'queryObjective')

            self.assertEqual(data, objective)
Exemple #2
0
    def download_file(self, request, django_field, ledger_field=None):
        lookup_url_kwarg = self.lookup_url_kwarg or self.lookup_field
        key = self.kwargs[lookup_url_kwarg]
        channel_name = get_channel_name(request)

        validate_key(key)

        try:
            asset = get_object_from_ledger(channel_name, key,
                                           self.ledger_query_call)
        except LedgerError as e:
            return Response({'message': str(e.msg)}, status=e.status)

        try:
            self.check_access(channel_name, request.user, asset,
                              is_proxied_request(request))
        except PermissionError as e:
            return Response({'message': str(e)},
                            status=status.HTTP_403_FORBIDDEN)

        if get_owner() == asset['owner']:
            response = self._download_local_file(django_field)
        else:
            if not ledger_field:
                ledger_field = django_field
            storage_address = self.get_storage_address(asset, ledger_field)
            response = self._download_remote_file(channel_name,
                                                  storage_address, asset)

        return response
Exemple #3
0
    def _retrieve(self, request, key):
        validate_key(key)
        data = get_object_from_ledger(get_channel_name(request), key,
                                      self.ledger_query_call)

        # do not cache if node has not process permission
        if node_has_process_permission(data):
            # try to get it from local db to check if description exists
            try:
                instance = self.get_object()
            except Http404:
                instance = None
            finally:
                # check if instance has description
                if not instance or not instance.description:
                    instance = self.create_or_update_algo(
                        get_channel_name(request), data, key)

                # For security reason, do not give access to local file address
                # Restrain data to some fields
                # TODO: do we need to send creation date and/or last modified date ?
                serializer = self.get_serializer(instance, fields=('owner'))
                data.update(serializer.data)

        replace_storage_addresses(request, data)

        return data
Exemple #4
0
    def _retrieve(self, request, key):
        validate_key(key)
        # get instance from remote node
        data = get_object_from_ledger(get_channel_name(request), key,
                                      'queryDataset')

        # do not cache if node has not process permission
        if node_has_process_permission(data):
            # try to get it from local db to check if description exists
            try:
                instance = self.get_object()
            except Http404:
                instance = None
            finally:
                # check if instance has description or data_opener
                if not instance or not instance.description or not instance.data_opener:
                    instance = self.create_or_update_datamanager(
                        get_channel_name(request), instance, data, key)

                # do not give access to local files address
                serializer = self.get_serializer(instance, fields=('owner'))
                data.update(serializer.data)

        replace_storage_addresses(request, data)

        return data
    def retrieve(self, request, *args, **kwargs):
        lookup_url_kwarg = self.lookup_url_kwarg or self.lookup_field
        key = self.kwargs[lookup_url_kwarg]
        validate_key(key)

        try:
            data = get_object_from_ledger(get_channel_name(request), key,
                                          'queryComputePlan')
        except LedgerError as e:
            return Response({'message': str(e.msg)}, status=e.status)
        else:
            return Response(data, status=status.HTTP_200_OK)
Exemple #6
0
def prepare_testtuple_input_models(channel_name, directory, tuple_):
    """Get testtuple input models content."""
    traintuple_type = tuple_['traintuple_type']
    traintuple_key = tuple_['traintuple_key']

    # TODO we should use the find method to be consistent with the traintuple

    if traintuple_type == TRAINTUPLE_TYPE:
        metadata = get_object_from_ledger(channel_name, traintuple_key,
                                          'queryTraintuple')
        model_dst_path = path.join(directory, f'model/{traintuple_key}')
        raise_if_path_traversal([model_dst_path],
                                path.join(directory, 'model/'))
        get_and_put_model_content(channel_name, traintuple_type,
                                  traintuple_key, metadata,
                                  metadata['out_model'], model_dst_path)

    elif traintuple_type == COMPOSITE_TRAINTUPLE_TYPE:
        metadata = get_object_from_ledger(channel_name, traintuple_key,
                                          'queryCompositeTraintuple')
        head_model_dst_path = path.join(
            directory, f'model/{PREFIX_HEAD_FILENAME}{traintuple_key}')
        raise_if_path_traversal([head_model_dst_path],
                                path.join(directory, 'model/'))
        get_and_put_local_model_content(
            traintuple_key, metadata['out_head_model']['out_model'],
            head_model_dst_path)

        model_dst_path = path.join(
            directory, f'model/{PREFIX_TRUNK_FILENAME}{traintuple_key}')
        raise_if_path_traversal([model_dst_path],
                                path.join(directory, 'model/'))
        get_and_put_model_content(channel_name, traintuple_type,
                                  traintuple_key, metadata,
                                  metadata['out_trunk_model']['out_model'],
                                  model_dst_path)

    else:
        raise TasksError(
            f"Testtuple from type '{traintuple_type}' not supported")
    def test_get_object_from_ledger(self):
        with patch('substrapp.ledger.api.query_ledger') as mquery_ledger:
            mquery_ledger.side_effect = LedgerAssetNotFound('Not Found')
            self.assertRaises(LedgerAssetNotFound, get_object_from_ledger, CHANNEL, 'key', 'fake_query')

        with patch('substrapp.ledger.api.query_ledger') as mquery_ledger:
            mquery_ledger.side_effect = LedgerInvalidResponse('Bad Response')
            self.assertRaises(LedgerInvalidResponse, get_object_from_ledger, CHANNEL, 'key', 'fake_query')

        with patch('substrapp.ledger.api.query_ledger') as mquery_ledger:
            mquery_ledger.return_value = {'key': 'key'}
            data = get_object_from_ledger(CHANNEL, 'key', 'good_query')
            self.assertEqual(data['key'], 'key')
Exemple #8
0
def get_objective(channel_name, tuple_):

    objective_key = tuple_['objective']['key']
    objective_metadata = get_object_from_ledger(channel_name, objective_key,
                                                'queryObjective')

    objective_content = get_asset_content(
        channel_name,
        objective_metadata['metrics']['storage_address'],
        objective_metadata['owner'],
        objective_metadata['metrics']['checksum'],
    )

    return objective_content
Exemple #9
0
    def _retrieve(self, channel_name, key):
        validate_key(key)

        data = get_object_from_ledger(channel_name, key, self.ledger_query_call)

        compatible_tuple_types = ['traintuple', 'composite_traintuple', 'aggregatetuple']
        any_data = any(list(map(lambda x: x in data, compatible_tuple_types)))

        if not any_data:
            raise Exception(
                'Invalid model: missing traintuple, composite_traintuple or aggregatetuple field'
            )

        return data
Exemple #10
0
def find_training_step_tuple_from_key(channel_name, tuple_key):
    """Get tuple type and tuple metadata from tuple key.

    Applies to traintuple, composite traintuple and aggregatetuple.
    """
    metadata = get_object_from_ledger(channel_name, tuple_key,
                                      'queryModelDetails')
    if metadata.get('aggregatetuple'):
        return AGGREGATETUPLE_TYPE, metadata['aggregatetuple']
    if metadata.get('composite_traintuple'):
        return COMPOSITE_TRAINTUPLE_TYPE, metadata['composite_traintuple']
    if metadata.get('traintuple'):
        return TRAINTUPLE_TYPE, metadata['traintuple']
    raise TasksError(
        f'Key {tuple_key}: no tuple found for training step: model: {metadata}'
    )
Exemple #11
0
    def download_local_file(self, request, django_field, ledger_field=None):
        lookup_url_kwarg = self.lookup_url_kwarg or self.lookup_field
        key = self.kwargs[lookup_url_kwarg]

        try:
            asset = get_object_from_ledger(get_channel_name(request), key,
                                           self.ledger_query_call)
        except LedgerError as e:
            return HttpResponse({'message': str(e.msg)}, status=e.status)

        if not self.has_access(request.user, asset):
            return HttpResponse({'message': 'Unauthorized'},
                                status=status.HTTP_403_FORBIDDEN)

        if not ledger_field:
            ledger_field = django_field

        return self._download_local_file(django_field)
Exemple #12
0
    def download_local_file(self, request, django_field):
        lookup_url_kwarg = self.lookup_url_kwarg or self.lookup_field
        key = self.kwargs[lookup_url_kwarg]
        channel_name = get_channel_name(request)

        try:
            asset = get_object_from_ledger(channel_name, key,
                                           self.ledger_query_call)
        except LedgerError as e:
            return Response({'message': str(e.msg)}, status=e.status)

        try:
            self.check_access(channel_name, request.user, asset,
                              is_proxied_request(request))
        except PermissionError as e:
            return Response({'message': str(e)},
                            status=status.HTTP_403_FORBIDDEN)

        return self._download_local_file(django_field)
Exemple #13
0
def get_algo(channel_name, tuple_type, tuple_):
    """Get algo from ledger."""
    query_method_names_mapper = {
        TRAINTUPLE_TYPE: 'queryAlgo',
        COMPOSITE_TRAINTUPLE_TYPE: 'queryCompositeAlgo',
        AGGREGATETUPLE_TYPE: 'queryAggregateAlgo',
    }

    if tuple_type not in query_method_names_mapper:
        raise TasksError(
            f'Cannot find algo from tuple type {tuple_type}: {tuple_}')
    method_name = query_method_names_mapper[tuple_type]

    key = tuple_['algo']['key']
    metadata = get_object_from_ledger(channel_name, key, method_name)

    content = get_asset_content(
        channel_name,
        metadata['content']['storage_address'],
        metadata['owner'],
        metadata['content']['checksum'],
    )
    return content
Exemple #14
0
 def _retrieve(self, channel_name, key):
     validate_key(key)
     return get_object_from_ledger(channel_name, key,
                                   self.ledger_query_call)
Exemple #15
0
def do_task(channel_name, subtuple, tuple_type):
    subtuple_directory = get_subtuple_directory(subtuple['key'])

    # compute plan / federated learning variables
    compute_plan_key = None
    rank = None
    compute_plan_tag = None

    if 'compute_plan_key' in subtuple and subtuple['compute_plan_key']:
        compute_plan_key = subtuple['compute_plan_key']
        rank = int(subtuple['rank'])
        compute_plan = get_object_from_ledger(channel_name, compute_plan_key,
                                              'queryComputePlan')
        compute_plan_tag = compute_plan['tag']

    common_volumes, compute_volumes = prepare_volumes(subtuple_directory,
                                                      tuple_type,
                                                      compute_plan_key,
                                                      compute_plan_tag)

    # Add node index to environment variable for the compute
    node_index = os.getenv('NODE_INDEX')
    if node_index:
        environment = {'NODE_INDEX': node_index}
    else:
        environment = {}

    # Use tag to tranfer or not performances and models
    tag = subtuple.get("tag")
    if tuple_type == TESTTUPLE_TYPE:
        if tag and TAG_VALUE_FOR_TRANSFER_BUCKET in tag:
            environment['TESTTUPLE_TAG'] = TAG_VALUE_FOR_TRANSFER_BUCKET

    job_name = f'{tuple_type.replace("_", "-")}-{subtuple["key"][0:8]}-{TUPLE_COMMANDS[tuple_type]}'.lower(
    )
    command = generate_command(tuple_type, subtuple, rank)

    # train or predict
    compute_job(subtuple_key=subtuple["key"],
                compute_plan_key=compute_plan_key,
                dockerfile_path=subtuple_directory,
                image_name=get_algo_image_name(subtuple['algo']['key']),
                job_name=job_name,
                volumes={
                    **common_volumes,
                    **compute_volumes
                },
                command=command,
                remove_image=compute_plan_key is None
                and not settings.TASK['CACHE_DOCKER_IMAGES'],
                remove_container=settings.TASK['CLEAN_EXECUTION_ENVIRONMENT'],
                capture_logs=settings.TASK['CAPTURE_LOGS'],
                environment=environment)

    # Handle model and result from tuple
    models = save_models(subtuple_directory, tuple_type,
                         subtuple['key'])  # Can be empty if testtuple
    result = extract_result_from_models(tuple_type,
                                        models)  # Can be empty if testtuple

    # Evaluation
    if tuple_type == TESTTUPLE_TYPE:

        # We set pred folder to ro during evalutation
        pred_path = path.join(subtuple_directory, 'pred')
        common_volumes[pred_path]['mode'] = 'ro'

        # eval
        compute_job(
            subtuple_key=subtuple["key"],
            compute_plan_key=compute_plan_key,
            dockerfile_path=f'{subtuple_directory}/metrics',
            image_name=f'substra/metrics_{subtuple["objective"]["key"][0:8]}'.
            lower(),
            job_name=
            f'{tuple_type.replace("_", "-")}-{subtuple["key"][0:8]}-eval'.
            lower(),
            volumes=common_volumes,
            command=f'--output-perf-path {OUTPUT_PERF_PATH}',
            remove_image=compute_plan_key is None
            and not settings.TASK['CACHE_DOCKER_IMAGES'],
            remove_container=settings.TASK['CLEAN_EXECUTION_ENVIRONMENT'],
            capture_logs=settings.TASK['CAPTURE_LOGS'],
            environment=environment)

        pred_path = path.join(subtuple_directory, 'pred')
        export_path = path.join(subtuple_directory, 'export')
        perf_path = path.join(subtuple_directory, 'perf')

        # load performance
        with open(path.join(perf_path, 'perf.json'), 'r') as perf_file:
            perf = json.load(perf_file)

        result['global_perf'] = perf['all']

        if tag and TAG_VALUE_FOR_TRANSFER_BUCKET in tag:
            transfer_to_bucket(subtuple['key'],
                               [pred_path, perf_path, export_path])

    return result
Exemple #16
0
def get_testtuple(channel_name, key):
    return get_object_from_ledger(channel_name, key, 'queryTesttuple')