def package_stimulus_set(proto_stimulus_set,
                         stimulus_set_identifier,
                         bucket_name="brainio-contrib"):
    """
    Package a set of images along with their metadata for the BrainIO system.
    :param proto_stimulus_set: A StimulusSet containing one row for each image,
        and the columns {'image_id', ['image_path_within_store' (optional to structure zip directory layout)]}
        and columns for all stimulus-set-specific metadata but not the column 'filename'.
    :param stimulus_set_identifier: A unique name identifying the stimulus set
        <lab identifier>.<first author e.g. 'Rajalingham' or 'MajajHong' for shared first-author><YYYY year of publication>.
    :param bucket_name: 'brainio.dicarlo' for DiCarlo Lab stimulus sets, 'brainio.contrib' for external stimulus sets,
        'brainio.requested' for to-be-run-on-monkey-machine stimulus sets.
    """
    _logger.debug(f"Packaging {stimulus_set_identifier}")

    assert 'image_id' in proto_stimulus_set.columns, "StimulusSet needs to have an `image_id` column"

    if bucket_name == 'brainio.requested':
        check_experiment_stimulus_set(proto_stimulus_set)

    # naming
    image_store_identifier = "image_" + stimulus_set_identifier.replace(
        ".", "_")
    # - csv
    csv_file_name = image_store_identifier + ".csv"
    target_csv_path = Path(__file__).parent / csv_file_name
    # - zip
    zip_file_name = image_store_identifier + ".zip"
    target_zip_path = Path(__file__).parent / zip_file_name
    # create csv and zip files
    image_zip_sha1, zip_filenames = create_image_zip(proto_stimulus_set,
                                                     str(target_zip_path))
    assert 'filename' not in proto_stimulus_set.columns, "StimulusSet already has column 'filename'"
    proto_stimulus_set[
        'filename'] = zip_filenames  # keep record of zip (or later local) filenames
    csv_sha1 = create_image_csv(proto_stimulus_set, str(target_csv_path))
    # upload both to S3
    upload_to_s3(str(target_csv_path),
                 bucket_name,
                 target_s3_key=csv_file_name)
    upload_to_s3(str(target_zip_path),
                 bucket_name,
                 target_s3_key=zip_file_name)
    # link to csv and zip from same identifier. The csv however is the only one of the two rows with a class.
    lookup.append(object_identifier=stimulus_set_identifier,
                  cls='StimulusSet',
                  lookup_type=TYPE_STIMULUS_SET,
                  bucket_name=bucket_name,
                  sha1=csv_sha1,
                  s3_key=csv_file_name,
                  stimulus_set_identifier=None)
    lookup.append(object_identifier=stimulus_set_identifier,
                  cls=None,
                  lookup_type=TYPE_STIMULUS_SET,
                  bucket_name=bucket_name,
                  sha1=image_zip_sha1,
                  s3_key=zip_file_name,
                  stimulus_set_identifier=None)
    _logger.debug(f"stimulus set {stimulus_set_identifier} packaged")
Beispiel #2
0
def package_data_assembly(proto_data_assembly,
                          assembly_identifier,
                          stimulus_set_identifier,
                          assembly_class="NeuronRecordingAssembly",
                          bucket_name="brainio-contrib"):
    """
    Package a set of data along with its metadata for the BrainIO system.
    :param proto_data_assembly: An xarray DataArray containing experimental measurements and all related metadata.
        * The dimensions of a neural DataArray must be
            * presentation
            * neuroid
            * time_bin
            A behavioral DataArray should also have a presentation dimension, but can be flexible about its other dimensions.
        * The presentation dimension must have an image_id coordinate and should have coordinates for presentation-level metadata such as repetition and image_id.
          The presentation dimension should not have coordinates for image-specific metadata, these will be drawn from the StimulusSet based on image_id.
        * The neuroid dimension must have a neuroid_id coordinate and should have coordinates for as much neural metadata as possible (e.g. region, subregion, animal, row in array, column in array, etc.)
        * The time_bin dimension should have coordinates time_bin_start and time_bin_end.
    :param assembly_identifier: A dot-separated string starting with a lab identifier.
        * For published: <lab identifier>.<first author e.g. 'Rajalingham' or 'MajajHong' for shared first-author><YYYY year of publication>
        * For requests: <lab identifier>.<b for behavioral|n for neuroidal>.<m for monkey|h for human>.<proposer e.g. 'Margalit'>.<pull request number>
    :param stimulus_set_identifier: The unique name of an existing StimulusSet in the BrainIO system.
    :param assembly_class: The name of a DataAssembly subclass.
    :param bucket_name: 'brainio-dicarlo' for DiCarlo Lab assemblies, 'brainio-contrib' for external assemblies.
    """
    _logger.debug(f"Packaging {assembly_identifier}")

    # verify
    verify_assembly(proto_data_assembly, assembly_class=assembly_class)
    assert hasattr(brainio_base.assemblies, assembly_class)
    assert stimulus_set_identifier in list_stimulus_sets(), \
        f"StimulusSet {stimulus_set_identifier} not found in packaged stimulus sets"

    # identifiers
    assembly_store_identifier = "assy_" + assembly_identifier.replace(".", "_")
    netcdf_file_name = assembly_store_identifier + ".nc"
    target_netcdf_path = Path(__file__).parent / netcdf_file_name
    s3_key = netcdf_file_name

    # execute
    netcdf_kf_sha1 = write_netcdf(proto_data_assembly, target_netcdf_path)
    upload_to_s3(target_netcdf_path, bucket_name, s3_key)
    lookup.append(object_identifier=assembly_identifier,
                  stimulus_set_identifier=stimulus_set_identifier,
                  lookup_type=TYPE_ASSEMBLY,
                  bucket_name=bucket_name,
                  sha1=netcdf_kf_sha1,
                  s3_key=s3_key,
                  cls=assembly_class)
    _logger.debug(f"assembly {assembly_identifier} packaged")