def test_should_download_original_images_along_resampled_images_if_previously_downloaded( tmp_path): collections, images = _get_neurovault_data() sample_collection = collections.iloc[0] sample_collection_id = sample_collection["id"] # Fetch non-resampled images data = neurovault.fetch_neurovault_ids( collection_ids=[sample_collection_id], data_dir=tmp_path, resample=True, ) # Check that only the resampled version is here assert np.all([ os.path.isfile(im_meta['resampled_absolute_path']) for im_meta in data['images_meta'] ]) assert not np.any([ os.path.isfile(im_meta['absolute_path']) for im_meta in data['images_meta'] ]) # Get the time of the last access to the resampled data access_time_resampled = (os.path.getatime( data['images_meta'][0]['resampled_absolute_path'])) # Download original data data_orig = neurovault.fetch_neurovault_ids( collection_ids=[sample_collection_id], data_dir=tmp_path, resample=False, ) # Get the time of the last access to one of the original files (which should be download time) access_time = (os.path.getatime( data_orig['images_meta'][0]['absolute_path'])) # Check that the last access to the original data is after the access to the resampled data assert (access_time - access_time_resampled > 0) # Check that the original version is now here (previous test should have failed anyway if not) assert np.all([ os.path.isfile(im_meta['absolute_path']) for im_meta in data_orig['images_meta'] ]) # Check that the affines of the original version do not correspond to the resampled one affines_orig = [load_img(cur_im).affine for cur_im in data_orig['images']] assert not np.any( [np.all(affine == neurovault.STD_AFFINE) for affine in affines_orig])
def test_should_download_resampled_images_only_if_no_previous_download( tmp_path): collections, images = _get_neurovault_data() sample_collection = collections.iloc[0] sample_collection_id = sample_collection["id"] expected_number_of_images = sample_collection["true_number_of_images"] data = neurovault.fetch_neurovault_ids( collection_ids=[sample_collection_id], data_dir=str(tmp_path), resample=True, ) # Check the expected size of the dataset assert (len(data['images_meta'])) == expected_number_of_images # Check that the resampled version is here assert np.all([ os.path.isfile(im_meta['resampled_absolute_path']) for im_meta in data['images_meta'] ]) # Load images that are fetched and check the affines affines = [load_img(cur_im).affine for cur_im in data['images']] assert np.all( [np.all(affine == neurovault.STD_AFFINE) for affine in affines]) # Check that the original version is NOT here assert not np.any([ os.path.isfile(im_meta['absolute_path']) for im_meta in data['images_meta'] ])
def test_fetch_neurovault_ids(tmp_path): data_dir = str(tmp_path) collections, images = _get_neurovault_data() collections = collections.sort_values(by="true_number_of_images", ascending=False) other_col_id, *col_ids = collections["id"].values[:3] img_ids = images[images["collection_id"] == other_col_id]["id"].values[:3] img_from_cols_ids = images[images["collection_id"].isin( col_ids)]["id"].values pytest.raises(ValueError, neurovault.fetch_neurovault_ids, mode='bad') data = neurovault.fetch_neurovault_ids(image_ids=img_ids, collection_ids=col_ids, data_dir=data_dir) expected_images = list(img_ids) + list(img_from_cols_ids) assert len(data.images) == len(expected_images) assert {img['id'] for img in data['images_meta']} == set(expected_images) assert os.path.dirname( data['images'][0]) == data['collections_meta'][0]['absolute_path'] # check image can be loaded again from disk data = neurovault.fetch_neurovault_ids(image_ids=[img_ids[0]], data_dir=data_dir, mode='offline') assert len(data.images) == 1 # check that download_new mode forces overwrite modified_meta = data['images_meta'][0] assert modified_meta['some_key'] == 'some_value' modified_meta['some_key'] = 'some_other_value' # mess it up on disk meta_path = os.path.join(os.path.dirname(modified_meta['absolute_path']), 'image_{}_metadata.json'.format(img_ids[0])) with open(meta_path, 'wb') as meta_f: meta_f.write(json.dumps(modified_meta).encode('UTF-8')) # fresh download data = neurovault.fetch_neurovault_ids(image_ids=[img_ids[0]], data_dir=data_dir, mode='download_new') data = neurovault.fetch_neurovault_ids(image_ids=[img_ids[0]], data_dir=data_dir, mode='offline') # should not have changed assert data['images_meta'][0]['some_key'] == 'some_other_value' data = neurovault.fetch_neurovault_ids(image_ids=[img_ids[0]], data_dir=data_dir, mode='overwrite') data = neurovault.fetch_neurovault_ids(image_ids=[img_ids[0]], data_dir=data_dir, mode='offline') # should be back to the original version assert data['images_meta'][0]['some_key'] == 'some_value'
def test_fetch_neurovault_ids(): # test using explicit id list instead of filters, and downloading # an image which has no collection dir or metadata yet. with _TestTemporaryDirectory() as data_dir: assert_raises(ValueError, neurovault.fetch_neurovault_ids, mode='bad') data = neurovault.fetch_neurovault_ids(image_ids=[111], collection_ids=[307], data_dir=data_dir) if len(data.images) == 2: assert_equal([img['id'] for img in data['images_meta']], [1750, 111]) assert_equal(os.path.dirname(data['images'][0]), data['collections_meta'][0]['absolute_path']) # check image can be loaded again from disk data = neurovault.fetch_neurovault_ids(image_ids=[111], data_dir=data_dir, mode='offline') assert_equal(len(data.images), 1) # check that download_new mode forces overwrite modified_meta = data['images_meta'][0] assert_equal(modified_meta['figure'], '3A') modified_meta['figure'] = '3B' # mess it up on disk meta_path = os.path.join( os.path.dirname(modified_meta['absolute_path']), 'image_111_metadata.json') with open(meta_path, 'wb') as meta_f: meta_f.write(json.dumps(modified_meta).encode('UTF-8')) # fresh download data = neurovault.fetch_neurovault_ids(image_ids=[111], data_dir=data_dir, mode='download_new') data = neurovault.fetch_neurovault_ids(image_ids=[111], data_dir=data_dir, mode='offline') # should not have changed assert_equal(data['images_meta'][0]['figure'], '3B') data = neurovault.fetch_neurovault_ids(image_ids=[111], data_dir=data_dir, mode='overwrite') data = neurovault.fetch_neurovault_ids(image_ids=[111], data_dir=data_dir, mode='offline') # should be back to the original version assert_equal(data['images_meta'][0]['figure'], '3A')
def test_fetch_neurovault_ids(): # test using explicit id list instead of filters, and downloading # an image which has no collection dir or metadata yet. with _TestTemporaryDirectory() as data_dir: assert_raises(ValueError, neurovault.fetch_neurovault_ids, mode='bad') data = neurovault.fetch_neurovault_ids( image_ids=[111], collection_ids=[307], data_dir=data_dir) if len(data.images) == 2: assert_equal([img['id'] for img in data['images_meta']], [1750, 111]) assert_equal(os.path.dirname(data['images'][0]), data['collections_meta'][0]['absolute_path']) # check image can be loaded again from disk data = neurovault.fetch_neurovault_ids( image_ids=[111], data_dir=data_dir, mode='offline') assert_equal(len(data.images), 1) # check that download_new mode forces overwrite modified_meta = data['images_meta'][0] assert_equal(modified_meta['figure'], '3A') modified_meta['figure'] = '3B' # mess it up on disk meta_path = os.path.join( os.path.dirname(modified_meta['absolute_path']), 'image_111_metadata.json') with open(meta_path, 'wb') as meta_f: meta_f.write(json.dumps(modified_meta).encode('UTF-8')) # fresh download data = neurovault.fetch_neurovault_ids( image_ids=[111], data_dir=data_dir, mode='download_new') data = neurovault.fetch_neurovault_ids( image_ids=[111], data_dir=data_dir, mode='offline') # should not have changed assert_equal(data['images_meta'][0]['figure'], '3B') data = neurovault.fetch_neurovault_ids( image_ids=[111], data_dir=data_dir, mode='overwrite') data = neurovault.fetch_neurovault_ids( image_ids=[111], data_dir=data_dir, mode='offline') # should be back to the original version assert_equal(data['images_meta'][0]['figure'], '3A')
def test_should_download_resampled_images_along_original_images_if_previously_downloaded( tmp_path): collections, images = _get_neurovault_data() sample_collection = collections.iloc[0] sample_collection_id = sample_collection["id"] # Fetch non-resampled images data_orig = neurovault.fetch_neurovault_ids( collection_ids=[sample_collection_id], data_dir=str(tmp_path), resample=False) # Check that the original version is here assert np.all([ os.path.isfile(im_meta['absolute_path']) for im_meta in data_orig['images_meta'] ]) # Check that the resampled version is NOT here assert not np.any([ os.path.isfile(im_meta['resampled_absolute_path']) for im_meta in data_orig['images_meta'] ]) # Asks for the resampled version. This should only resample, not download. # Get the time of the last modification to the original data modif_time_original = (os.path.getmtime( data_orig['images_meta'][0]['absolute_path'])) # Ask for resampled data, which should only trigger resample data = neurovault.fetch_neurovault_ids( collection_ids=[sample_collection_id], data_dir=str(tmp_path), resample=True) # Get the time of the last modification to the original data, after fetch modif_time_original_after = (os.path.getmtime( data['images_meta'][0]['absolute_path'])) # The time difference should be 0 assert (np.isclose(modif_time_original, modif_time_original_after)) # Check that the resampled version is here assert np.all([ os.path.isfile(im_meta['resampled_absolute_path']) for im_meta in data['images_meta'] ]) # And the original version should still be here as well assert np.all([ os.path.isfile(im_meta['absolute_path']) for im_meta in data['images_meta'] ]) # Load resampled images and check the affines affines = [load_img(cur_im).affine for cur_im in data['images']] assert np.all( [np.all(affine == neurovault.STD_AFFINE) for affine in affines]) # Check that the affines of the original version do not correspond to the resampled one affines_orig = [load_img(cur_im).affine for cur_im in data_orig['images']] assert not np.any( [np.all(affine == neurovault.STD_AFFINE) for affine in affines_orig])