def create_image_csv(proto_stimulus_set, target_path): _logger.debug(f"Writing csv to {target_path}") specific_columns = extract_specific(proto_stimulus_set) specific_stimulus_set = proto_stimulus_set[specific_columns] specific_stimulus_set.to_csv(target_path, index=False) sha1 = sha1_hash(target_path) return sha1
def create_image_zip(proto_stimulus_set, target_zip_path): """ Create zip file for images in StimulusSet. Files in the zip will follow a flat directory structure with each row's filename equal to the `image_id` by default, or `image_path_within_store` if passed. :param proto_stimulus_set: a `StimulusSet` with a `get_image: image_id -> local path` method, an `image_id` column, and optionally an `image_path_within_store` column. :param target_zip_path: path to write the zip file to :return: SHA1 hash of the zip file """ _logger.debug(f"Zipping stimulus set to {target_zip_path}") os.makedirs(os.path.dirname(target_zip_path), exist_ok=True) arcnames = [] with zipfile.ZipFile(target_zip_path, 'w') as target_zip: for _, row in proto_stimulus_set.iterrows( ): # using iterrows instead of itertuples for very large StimulusSets image_path = proto_stimulus_set.get_image(row['image_id']) extension = os.path.splitext(image_path)[1] arcname = row['image_path_within_store'] if hasattr( row, 'image_path_within_store') else row['image_id'] arcname = arcname + extension target_zip.write(image_path, arcname=arcname) arcnames.append(arcname) sha1 = sha1_hash(target_zip_path) return sha1, arcnames
def verify_sha1(filepath, sha1): actual_hash = sha1_hash(filepath) if sha1 != actual_hash: raise IOError( f"File '{filepath}': invalid SHA-1 hash {actual_hash} (expected {sha1})" ) _logger.debug(f"sha1 OK: {filepath}")
def write_netcdf(assembly, target_netcdf_file): _logger.debug(f"Writing assembly to {target_netcdf_file}") assembly = DataArray(assembly) # if we're passed a BrainIO DataAssembly, it will automatically re-index otherwise for index in assembly.indexes.keys(): assembly.reset_index(index, inplace=True) assembly.to_netcdf(target_netcdf_file) sha1 = sha1_hash(target_netcdf_file) return sha1
def np_to_png(img_array, img_temp_path): meta = [] for i, img_np in enumerate(img_array): img_pil = Image.fromarray((img_np).astype('uint8')) img_path = img_temp_path / f"img{i}.png" img_pil.save(img_path) sha1 = sha1_hash(img_path) img_path_sha1 = img_temp_path / f"{sha1}.png" img_path.rename(img_path_sha1) meta.append({ "image_id": sha1, "image_index": i, "image_current_local_file_path": img_path_sha1 }) _logger.debug(f"{img_path} -> {img_path_sha1}") return pd.DataFrame(meta)
def write_netcdf(assembly, target_netcdf_file): _logger.debug(f"Writing assembly to {target_netcdf_file}") assembly = assembly.reset_index(list(assembly.indexes)) assembly.to_netcdf(target_netcdf_file) sha1 = sha1_hash(target_netcdf_file) return sha1