def collect_stimuli(data_dir): IT_base616 = pickle.load(open(os.path.join(data_dir, 'data_IT_base616.pkl'), 'rb')) stimuli = IT_base616['meta'] # Add columns stimuli['image_id'] = '' stimuli['image_file_name'] = '' stimuli['image_current_local_file_path'] = '' stimuli['image_path_within_store'] = '' stimuli['image_file_sha1'] = '' for idx, row in stimuli.iterrows(): image_file_name = f'{row.id}.png' image_file_path = os.path.join(data_dir, 'stimuli', image_file_name) im_kf = kf(image_file_path) stimuli.at[idx, 'image_id'] = im_kf.sha1 stimuli.at[idx, 'image_file_name'] = image_file_name stimuli.at[idx, 'image_current_local_file_path'] = image_file_path stimuli.at[idx, 'image_path_within_store'] = image_file_name stimuli.at[idx, 'image_file_sha1'] = im_kf.sha1 stimuli = stimuli.drop(columns='id') # Drop ID column since the information is retained in other columns stimuli['grp5_bigram_freq'] = stimuli['grp5_bigram_freq'].astype(str) # IntervalIndex not supported by netCDF4 stimuli = stimuli.astype({column_name: 'int32' for column_name in stimuli.select_dtypes(include=['bool']).keys()}) # Bool not supported by netCDF4 assert len(np.unique(stimuli['image_id'])) == len(stimuli) stimuli = StimulusSet(stimuli) stimuli.image_paths = \ {stimuli.at[idx, 'image_id']: stimuli.at[idx, 'image_current_local_file_path'] for idx in range(len(stimuli))} return stimuli
def write_netcdf(assembly, target_netcdf_file): _logger.debug(f"Writing assembly to {target_netcdf_file}") for index in assembly.indexes.keys(): assembly.reset_index(index, inplace=True) assembly.to_netcdf(target_netcdf_file) netcdf_kf = kf(target_netcdf_file) return netcdf_kf.sha1
def convert_stimuli(stimulus_set_existing, stimulus_set_name_new, image_dir_new): Path(image_dir_new).mkdir(parents=True, exist_ok=True) image_converter = ApplyCosineAperture(target_dir=image_dir_new) converted_image_paths = {} converted_image_ids = {} for image_id in tqdm(stimulus_set_existing['image_id'], total=len(stimulus_set_existing), desc='apply cosine aperture'): converted_image_path = image_converter.convert_image( image_path=stimulus_set_existing.get_image(image_id)) converted_image_id = kf(converted_image_path).sha1 converted_image_ids[image_id] = converted_image_id converted_image_paths[converted_image_id] = converted_image_path _logger.debug( f"{image_id} -> {converted_image_id}: {converted_image_path}") converted_stimuli = StimulusSet(stimulus_set_existing.copy(deep=True)) converted_stimuli["image_id_without_aperture"] = converted_stimuli[ "image_id"] converted_stimuli["image_id"] = converted_stimuli["image_id"].map( converted_image_ids) converted_stimuli["image_file_sha1"] = converted_stimuli["image_id"] converted_stimuli.image_paths = converted_image_paths converted_stimuli.name = stimulus_set_name_new converted_stimuli.id_mapping = converted_image_ids return converted_stimuli
def create_image_zip(stimuli, target_zip_path): os.makedirs(os.path.dirname(target_zip_path), exist_ok=True) with zipfile.ZipFile(target_zip_path, 'w') as target_zip: for image in stimuli.itertuples(): target_zip.write(image.image_current_local_file_path, arcname=image.image_path_within_store) zip_kf = kf(target_zip_path) return zip_kf.sha1
def test_package_stimulus_set(transaction): proto = prep_proto_stim() stim_set_name = "dicarlo.test." + now() test_bucket = "brainio-temp" stim_model = package_stimulus_set(proto, stimulus_set_name=stim_set_name, bucket_name=test_bucket) assert stim_model assert stim_model.name == stim_set_name stim_set_fetched = brainio_collection.get_stimulus_set(stim_set_name) assert len(proto) == len(stim_set_fetched) for image in proto.itertuples(): orig = proto.get_image(image.image_id) fetched = stim_set_fetched.get_image(image.image_id) assert os.path.basename(orig) == os.path.basename(fetched) kf_orig = kf(orig) kf_fetched = kf(fetched) assert kf_orig.sha1 == kf_fetched.sha1
def create_image_zip(proto_stimulus_set, target_zip_path): _logger.debug(f"Zipping stimulus set to {target_zip_path}") assert isinstance(proto_stimulus_set, StimulusSet), f"Expected StimulusSet object, got {proto_stimulus_set}" os.makedirs(os.path.dirname(target_zip_path), exist_ok=True) with zipfile.ZipFile(target_zip_path, 'w') as target_zip: for image in proto_stimulus_set.itertuples(): arcname = image.image_path_within_store if hasattr(image, 'image_path_within_store') \ else image.image_file_name target_zip.write(proto_stimulus_set.get_image(image.image_id), arcname=arcname) zip_kf = kf(target_zip_path) return zip_kf.sha1
def add_assembly_lookup(assembly_name, stim_set_model, bucket_name, target_netcdf_file, assembly_store_unique_name): kf_netcdf = kf(target_netcdf_file) assy, created = AssemblyModel.get_or_create( name=assembly_name, assembly_class="BehavioralAssembly", stimulus_set=stim_set_model) store, created = AssemblyStoreModel.get_or_create( assembly_type="netCDF", location_type="S3", location= f"https://{bucket_name}.s3.amazonaws.com/{assembly_store_unique_name }.nc", unique_name=assembly_store_unique_name, sha1=kf_netcdf.sha1) assy_store_map, created = AssemblyStoreMap.get_or_create( assembly_model=assy, assembly_store_model=store, role=assembly_name)
def collect_stimuli(stimuli_directory): meta = os.path.join(stimuli_directory, 'cocogray_labels.mat') meta = h5py.File(meta, 'r') labels = [''.join(chr(c) for c in meta[meta['lb'].value[0, i]]) for i in range(meta['lb'].value[0].size)] stimuli = [] for image_file_path in tqdm(glob(os.path.join(stimuli_directory, '*.png'))): image_file_name = os.path.basename(image_file_path) image_number = re.match('im([0-9]+).png', image_file_name) image_number = int(image_number.group(1)) im_kf = kf(image_file_path) stimuli.append({ 'image_id': im_kf.sha1, 'image_file_name': image_file_name, 'image_current_local_file_path': image_file_path, 'image_file_sha1': im_kf.sha1, 'image_number': image_number, 'image_path_within_store': image_file_name, 'label': labels[image_number], }) stimuli = pd.DataFrame(stimuli) assert len(stimuli) == 1600 assert len(np.unique(stimuli['image_id'])) == len(stimuli) return stimuli