Esempio n. 1
0
    def test_bulk_add_data_sample_sync_ok(self):

        self.add_default_data_manager()

        url = reverse('substrapp:data_sample-list')

        file_mock = MagicMock(spec=InMemoryUploadedFile)
        file_mock2 = MagicMock(spec=InMemoryUploadedFile)
        file_mock.name = 'foo.zip'
        file_mock2.name = 'bar.zip'
        file_mock.read = MagicMock(return_value=self.data_file.read())
        file_mock2.read = MagicMock(return_value=self.data_file_2.read())

        data = {
            file_mock.name:
            file_mock,
            file_mock2.name:
            file_mock2,
            'data_manager_keys': [
                get_hash(self.data_data_opener),
                get_hash(self.data_data_opener2)
            ],
            'test_only':
            True,
        }
        extra = {
            'HTTP_ACCEPT': 'application/json;version=0.0',
        }

        with mock.patch(
                'substrapp.serializers.ledger.datasample.util.create_ledger_assets'
        ) as mcreate_ledger_assets:
            self.data_file.seek(0)
            self.data_file_2.seek(0)
            ledger_data = {
                'pkhash': [get_dir_hash(file_mock),
                           get_dir_hash(file_mock2)],
                'validated': True
            }
            mcreate_ledger_assets.return_value = ledger_data

            response = self.client.post(url, data, format='multipart', **extra)
            r = response.json()

            self.assertEqual(len(r), 2)
            self.assertEqual(r[0]['pkhash'], get_dir_hash(file_mock))
            self.assertTrue(r[0]['path'].endswith(
                f'/datasamples/{get_dir_hash(file_mock)}'))
            self.assertEqual(response.status_code, status.HTTP_201_CREATED)
Esempio n. 2
0
def map_data_sample(paths):
    data_sample = []
    for file_or_path in paths:
        if os.path.exists(file_or_path):

            # file case
            if os.path.isfile(file_or_path):
                with open(file_or_path, 'rb') as f:
                    filename = path_leaf(file_or_path)
                    file = ContentFile(f.read(), filename)
                    pkhash = get_dir_hash(file)

                    check(file_or_path, pkhash, data_sample)

                    data_sample.append({'pkhash': pkhash, 'file': file})

            # directory case
            elif os.path.isdir(file_or_path):
                pkhash = dirhash(file_or_path, 'sha256')

                check(file_or_path, pkhash, data_sample)

                data_sample.append({
                    'pkhash': pkhash,
                    'path': normpath(file_or_path)
                })
            else:
                raise Exception(f'{file_or_path} is not a file or a directory')

        else:
            raise Exception(f'File or Path: {file_or_path} does not exist')

    return data_sample
Esempio n. 3
0
def prepare_data_sample(directory, tuple_):
    """Prepare data samples for tuple execution."""
    from substrapp.models import DataSample
    for data_sample_key in tuple_['dataset']['data_sample_keys']:
        data_sample = DataSample.objects.get(key=data_sample_key)

        if not os.path.exists(data_sample.path) or not os.path.isdir(
                data_sample.path):
            raise Exception(
                f'Data Sample ({data_sample.path}) is missing in local storage'
            )

        if not os.listdir(data_sample.path):
            raise Exception(
                f'Data Sample ({data_sample.path}) is empty in local storage')

        data_sample_checksum = get_dir_hash(data_sample.path)
        if data_sample_checksum != data_sample.checksum:
            raise Exception(
                'Data Sample checksum in tuple is not the same as in local db')

        # create a symlink on the folder containing data
        data_directory = path.join(directory, 'data', data_sample_key)
        try:
            if not os.path.exists(data_directory):
                os.symlink(data_sample.path, data_directory)

            if not (os.path.realpath(data_directory) == data_sample.path):
                Exception(
                    f'Sym link ({data_directory})for tuple for data sample {data_sample.path}'
                    f'does not match (currently to {os.path.realpath(data_directory)}'
                )
        except OSError as e:
            logger.exception(e)
            raise Exception('Failed to create sym link for tuple data sample')
Esempio n. 4
0
    def test_add_data_sample_ko_408(self):
        url = reverse('substrapp:data_sample-list')

        self.add_default_data_manager()

        file_mock = MagicMock(spec=InMemoryUploadedFile)
        file_mock.name = 'foo.zip'
        file_mock.read = MagicMock(return_value=self.data_file.file.read())
        file_mock.open = MagicMock(return_value=file_mock)

        data = {
            'file': file_mock,
            'data_manager_keys': [get_hash(self.data_data_opener)],
            'test_only': True,
        }
        extra = {
            'HTTP_ACCEPT': 'application/json;version=0.0',
        }

        with mock.patch.object(zipfile, 'is_zipfile') as mis_zipfile, \
                mock.patch.object(LedgerDataSampleSerializer, 'create') as mcreate:
            mcreate.side_effect = LedgerTimeout('Timeout')
            mis_zipfile.return_value = True
            response = self.client.post(url, data, format='multipart', **extra)
            r = response.json()
            self.assertEqual(r['message'], {
                'pkhash': [get_dir_hash(file_mock)],
                'validated': False
            })
            self.assertEqual(response.status_code,
                             status.HTTP_408_REQUEST_TIMEOUT)
Esempio n. 5
0
    def test_bulk_add_data_sample_ko_same_pkhash(self):

        self.add_default_data_manager()

        url = reverse('substrapp:data_sample-list')

        file_mock = MagicMock(spec=InMemoryUploadedFile)
        file_mock2 = MagicMock(spec=InMemoryUploadedFile)
        file_mock.name = 'foo.zip'
        file_mock2.name = 'bar.tar.gz'
        file_mock.read = MagicMock(return_value=self.data_file.read())
        file_mock2.read = MagicMock(return_value=self.data_tar_file.read())

        data = {
            file_mock.name: file_mock,
            file_mock2.name: file_mock2,
            'data_manager_keys': [get_hash(self.data_data_opener)],
            'test_only': True,
        }
        extra = {
            'HTTP_ACCEPT': 'application/json;version=0.0',
        }

        with mock.patch('substrapp.serializers.datasample.DataSampleSerializer.get_validators') as mget_validators, \
                mock.patch.object(LedgerDataSampleSerializer, 'create') as mcreate:
            mget_validators.return_value = []
            self.data_file.seek(0)
            self.data_tar_file.seek(0)
            ledger_data = {
                'pkhash': [get_dir_hash(file_mock),
                           get_dir_hash(file_mock2)],
                'validated': False
            }
            mcreate.return_value = ledger_data, status.HTTP_408_REQUEST_TIMEOUT

            response = self.client.post(url, data, format='multipart', **extra)
            r = response.json()
            self.assertEqual(DataSample.objects.count(), 0)
            self.assertEqual(
                r['message'],
                f'Your data sample archives contain same files leading to same pkhash, '
                f'please review the content of your achives. '
                f'Archives {file_mock2.name} and {file_mock.name} are the same'
            )
            self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
Esempio n. 6
0
 def test_create_data(self):
     dir_path = os.path.dirname(os.path.realpath(__file__))
     path = os.path.join(
         dir_path, '../../../fixtures/chunantes/datasamples/train/0024308')
     data_sample = DataSample.objects.create(path=path)
     self.assertEqual(data_sample.checksum, get_dir_hash(path))
     self.assertFalse(data_sample.validated)
     self.assertIn(f'key {data_sample.key}', str(data_sample))
     self.assertIn(f'validated {data_sample.validated}', str(data_sample))
Esempio n. 7
0
    def get_default_datasample_data(self):
        expected_hash = get_dir_hash(self.data_file.file)
        self.data_file.file.seek(0)
        data = {
            'file': self.data_file,
            'data_manager_keys': [get_hash(self.data_data_opener)],
            'test_only': True,
        }

        return expected_hash, data
Esempio n. 8
0
    def compute_data(self, request):
        data = {}
        # files, should be archive
        for k, file in request.FILES.items():
            pkhash = get_dir_hash(file)  # can raise
            # check pkhash does not belong to the list
            try:
                existing = data[pkhash]
            except KeyError:
                pass
            else:
                raise Exception(f'Your data sample archives contain same files leading to same pkhash, please review the content of your achives. Archives {file} and {existing["file"]} are the same')
            data[pkhash] = {
                'pkhash': pkhash,
                'file': file
            }

        # path/paths case
        path = request.POST.get('path', None)
        paths = request.POST.getlist('paths', [])

        if path and paths:
            raise Exception('Cannot use path and paths together.')

        if path is not None:
            paths = [path]

        # paths, should be directories
        for path in paths:
            if not os.path.isdir(path):
                raise Exception(f'One of your paths does not exist, is not a directory or is not an absolute path: {path}')
            pkhash = dirhash(path, 'sha256')
            try:
                existing = data[pkhash]
            except KeyError:
                pass
            else:
                # existing can be a dict with a field path or file
                raise Exception(f'Your data sample directory contain same files leading to same pkhash. Invalid path: {path}.')

            data[pkhash] = {
                'pkhash': pkhash,
                'path': normpath(path)
            }

        if not data:  # data empty
            raise Exception(f'No data sample provided.')

        return list(data.values())