def extra_additions_dataset(example_frame, example_readme): ds = Dataset(example_frame, "test_dataset", "me", example_readme) ds.set_path_columns(["2dReadPath"]) ds.set_extra_files([example_readme]) ds.set_column_names_map({"2dReadPath": "MappedPath"}) ds.set_metadata_columns(["Structure"]) return ds
def distribute_cellprofiler_features( test=False, csv_loc="/allen/aics/gene-editing/FISH/2019/chaos/data/cp_20201022/merged_features/features2quilt/features2quilt.csv", dataset_name="2d_autocontrasted_single_cell_features_actn2_2", package_owner="tanyasg", s3_bucket="s3://allencell-internal-quilt", ): df = pd.read_csv(csv_loc) # subsample features to make test if test: # write test feature csv and test image counts csv make_test_csv(csv_loc=csv_loc) cell_line = df["cell_line"][0] cellprofiler_id = df["cellprofiler_id"][0] # make test manifest df = pd.DataFrame({ "feature_file": ["cp_features_test.csv"], "image_object_count_file": ["image_object_counts_test.csv"], "cell_line": [cell_line], "cellprofiler_id": [cellprofiler_id], }) dataset_name = f"{dataset_name}_test" # Create the dataset ds = Dataset( dataset=df, name=dataset_name, package_owner=package_owner, readme_path= "/allen/aics/gene-editing/FISH/2019/chaos/data/cp_20201022/merged_features/features2quilt/README.md", ) # Optionally add common additional requirements ds.add_usage_doc( "https://docs.quiltdata.com/walkthrough/reading-from-a-package") ds.add_license("https://www.allencell.org/terms-of-use.html") # Optionally indicate column values to use for file metadata ds.set_metadata_columns(["cell_line", "cellprofiler_id"]) # Optionally rename the columns on the package level ds.set_column_names_map({ "feature_file": "features", "image_object_count_file": "object_counts" }) # add commit hash to message label = (subprocess.check_output(["git", "rev-parse", "HEAD"]).strip().decode("utf-8")) # Distribute ds.distribute(push_uri=s3_bucket, message=f"git commit hash of fish_morphology_code = {label}")
ds.set_metadata_columns([ "CellId", "CellIndex", "CellLine", "NucMembSegmentationAlgorithm", "NucMembSegmentationAlgorithmVersion", "FOVId", "Gene", "PlateId", "WellId", "ProteinDisplayName", "StructureDisplayName", "Workflow", "FeatureExplorerURL" ]) # Set produced package directory naming ds.set_column_names_map({ "MembraneContourReadPath": "membrane_contours", "MembraneSegmentationReadPath": "membrane_segmentations", "NucleusContourReadPath": "dna_contours", "NucleusSegmentationReadPath": "dna_segmentations", "SourceReadPath": "fovs", "StructureContourReadPath": "structure_contours", "StructureSegmentationReadPath": "structure_segmentations" }) # Step 6: # Distribute the package ds.distribute(push_uri="s3://quilt-aics", message="Add feature explorer links to metadata") print("-" * 80) print("COMPLETE")
# Step 5: # Add a license ds.add_license("https://www.allencell.org/terms-of-use.html") # Indicate column values to use for file metadata ds.set_metadata_columns([ "CellId", "CellIndex", "CellLine", "NucMembSegmentationAlgorithm", "NucMembSegmentationAlgorithmVersion", "FOVId", "Gene", "PlateId", "WellId", "ProteinDisplayName", "StructureDisplayName", "Workflow", "FeatureExplorerURL" ]) # Set produced package directory naming ds.set_column_names_map({ "save_feats_path": "cell_features", "save_reg_path": "cell_images_3d", "save_reg_path_flat": "cell_images_2d", "save_reg_path_flat_proj": "cell_images_2d_projections" }) # Add any extra files ds.set_extra_files( {"contact_sheets": list(scp_output_dir.glob("diagnostics_*.png"))}) # Step 6: # Distribute the package ds.distribute(push_uri="s3://quilt-aics", message="Add feature explorer links to metadata") print("-" * 80) print("COMPLETE")