def package_stimulus_set(proto_stimulus_set, stimulus_set_identifier, bucket_name="brainio-contrib"): """ Package a set of images along with their metadata for the BrainIO system. :param proto_stimulus_set: A StimulusSet containing one row for each image, and the columns {'image_id', ['image_path_within_store' (optional to structure zip directory layout)]} and columns for all stimulus-set-specific metadata but not the column 'filename'. :param stimulus_set_identifier: A unique name identifying the stimulus set <lab identifier>.<first author e.g. 'Rajalingham' or 'MajajHong' for shared first-author><YYYY year of publication>. :param bucket_name: 'brainio.dicarlo' for DiCarlo Lab stimulus sets, 'brainio.contrib' for external stimulus sets, 'brainio.requested' for to-be-run-on-monkey-machine stimulus sets. """ _logger.debug(f"Packaging {stimulus_set_identifier}") assert 'image_id' in proto_stimulus_set.columns, "StimulusSet needs to have an `image_id` column" if bucket_name == 'brainio.requested': check_experiment_stimulus_set(proto_stimulus_set) # naming image_store_identifier = "image_" + stimulus_set_identifier.replace( ".", "_") # - csv csv_file_name = image_store_identifier + ".csv" target_csv_path = Path(__file__).parent / csv_file_name # - zip zip_file_name = image_store_identifier + ".zip" target_zip_path = Path(__file__).parent / zip_file_name # create csv and zip files image_zip_sha1, zip_filenames = create_image_zip(proto_stimulus_set, str(target_zip_path)) assert 'filename' not in proto_stimulus_set.columns, "StimulusSet already has column 'filename'" proto_stimulus_set[ 'filename'] = zip_filenames # keep record of zip (or later local) filenames csv_sha1 = create_image_csv(proto_stimulus_set, str(target_csv_path)) # upload both to S3 upload_to_s3(str(target_csv_path), bucket_name, target_s3_key=csv_file_name) upload_to_s3(str(target_zip_path), bucket_name, target_s3_key=zip_file_name) # link to csv and zip from same identifier. The csv however is the only one of the two rows with a class. lookup.append(object_identifier=stimulus_set_identifier, cls='StimulusSet', lookup_type=TYPE_STIMULUS_SET, bucket_name=bucket_name, sha1=csv_sha1, s3_key=csv_file_name, stimulus_set_identifier=None) lookup.append(object_identifier=stimulus_set_identifier, cls=None, lookup_type=TYPE_STIMULUS_SET, bucket_name=bucket_name, sha1=image_zip_sha1, s3_key=zip_file_name, stimulus_set_identifier=None) _logger.debug(f"stimulus set {stimulus_set_identifier} packaged")
def package_data_assembly(proto_data_assembly, assembly_identifier, stimulus_set_identifier, assembly_class="NeuronRecordingAssembly", bucket_name="brainio-contrib"): """ Package a set of data along with its metadata for the BrainIO system. :param proto_data_assembly: An xarray DataArray containing experimental measurements and all related metadata. * The dimensions of a neural DataArray must be * presentation * neuroid * time_bin A behavioral DataArray should also have a presentation dimension, but can be flexible about its other dimensions. * The presentation dimension must have an image_id coordinate and should have coordinates for presentation-level metadata such as repetition and image_id. The presentation dimension should not have coordinates for image-specific metadata, these will be drawn from the StimulusSet based on image_id. * The neuroid dimension must have a neuroid_id coordinate and should have coordinates for as much neural metadata as possible (e.g. region, subregion, animal, row in array, column in array, etc.) * The time_bin dimension should have coordinates time_bin_start and time_bin_end. :param assembly_identifier: A dot-separated string starting with a lab identifier. * For published: <lab identifier>.<first author e.g. 'Rajalingham' or 'MajajHong' for shared first-author><YYYY year of publication> * For requests: <lab identifier>.<b for behavioral|n for neuroidal>.<m for monkey|h for human>.<proposer e.g. 'Margalit'>.<pull request number> :param stimulus_set_identifier: The unique name of an existing StimulusSet in the BrainIO system. :param assembly_class: The name of a DataAssembly subclass. :param bucket_name: 'brainio-dicarlo' for DiCarlo Lab assemblies, 'brainio-contrib' for external assemblies. """ _logger.debug(f"Packaging {assembly_identifier}") # verify verify_assembly(proto_data_assembly, assembly_class=assembly_class) assert hasattr(brainio_base.assemblies, assembly_class) assert stimulus_set_identifier in list_stimulus_sets(), \ f"StimulusSet {stimulus_set_identifier} not found in packaged stimulus sets" # identifiers assembly_store_identifier = "assy_" + assembly_identifier.replace(".", "_") netcdf_file_name = assembly_store_identifier + ".nc" target_netcdf_path = Path(__file__).parent / netcdf_file_name s3_key = netcdf_file_name # execute netcdf_kf_sha1 = write_netcdf(proto_data_assembly, target_netcdf_path) upload_to_s3(target_netcdf_path, bucket_name, s3_key) lookup.append(object_identifier=assembly_identifier, stimulus_set_identifier=stimulus_set_identifier, lookup_type=TYPE_ASSEMBLY, bucket_name=bucket_name, sha1=netcdf_kf_sha1, s3_key=s3_key, cls=assembly_class) _logger.debug(f"assembly {assembly_identifier} packaged")