def process_zip(blob: storage.Blob, patient_id: str) -> np.ndarray: """ Downloads the blob and returns a 3D standardized numpy array. :param blob: :param patient_id: :return: """ old_wd = os.getcwd() dirname = f'/tmp/dicom_to_npy-{int(time.time())}' os.makedirs(dirname, exist_ok=True) os.chdir(dirname) blob.download_to_filename(patient_id + '.zip') logging.info('extracting zip file') shutil.unpack_archive(patient_id + '.zip', format='zip') dirpath = list(os.walk('.'))[3][0] logging.info(f'loading scans from {dirpath}') scan = parsers.load_scan(dirpath) processed_scan = preprocess_scan(scan) logging.info(f'processing dicom data') os.chdir(old_wd) shutil.rmtree(dirname) return processed_scan
def _download_from_blobstore(self, blob_to_download_name, blob_download_target_path, chunk_size=None): """Download file from blobstore. :type chunk_size: int :param chunk_size: If file size if greater than 5MB, it is recommended that, chunked downloads should be used. To do so, pass chunk_size param to this function. This must be a multiple of 256 KB per the API specification. """ log_prefix = '[Google Cloud Storage] [DOWNLOAD]' if self.container: self.logger.info('{} Started to download the tarball to target.'.format( log_prefix, blob_download_target_path)) try: blob = Blob(blob_to_download_name, self.container, chunk_size=chunk_size) blob.download_to_filename(blob_download_target_path) self.logger.info('{} SUCCESS: blob_to_download={}, blob_target_name={}, container={}' .format(log_prefix, blob_to_download_name, self.CONTAINER, blob_download_target_path)) return True except Exception as error: message = '{} ERROR: blob_to_download={}, blob_target_name={}, container={}\n{}'.format( log_prefix, blob_to_download_name, blob_download_target_path, self.CONTAINER, error) self.logger.error(message) raise Exception(message) else: message = '{} ERROR: blob_to_download={}, blob_target_name={}, container={}\n{}'.format( log_prefix, blob_to_download_name, blob_download_target_path, self.CONTAINER, "Container not found or accessible") self.logger.error(message) raise Exception(message)
def _load_image(self, blob: gcs.Blob) -> Tuple[np.ndarray, GoesMetadata]: bid = blob.name if self.cache and bid in self.cache: return self.cache[bid] with file_util.mktemp(dir=self.tmp_dir, suffix='.nc') as infile: logging.info('downloading %s', bid) blob.download_to_filename(infile) logging.info('downloaded %s', bid) with xarray.open_dataset(infile, engine='h5netcdf') as nc: img = self._resample_image(nc) logging.info('resampled %s', bid) md = goes_metadata(nc) v = img, md if self.cache: self.cache[bid] = v return v
def download_encrypted_blob(bucket_name, source_blob_name, destination_file_name, base64_encryption_key): """Downloads a previously-encrypted blob from Google Cloud Storage. The encryption key provided must be the same key provided when uploading the blob. """ storage_client = storage.Client() bucket = storage_client.get_bucket(bucket_name) # Encryption key must be an AES256 key represented as a bytestring with # 32 bytes. Since it's passed in as a base64 encoded string, it needs # to be decoded. encryption_key = base64.b64decode(base64_encryption_key) blob = Blob(source_blob_name, bucket, encryption_key=encryption_key) blob.download_to_filename(destination_file_name) print('Blob {} downloaded to {}.'.format(source_blob_name, destination_file_name))
def download_documents(user_id, docs): """ Downloads given user's files to server; Creates download link and returns it to user. If more than one file is specified, zip the files and return the zip folder. Otherwise return the single file. Args: user_id: firestore user's document id docs: a list of filenames Returns: str: Path to new file """ if len(docs) < 1: return {"error": "No files selected"} higher_path = os.path.dirname(os.path.abspath(__file__)) static_path = "/static/" + config["DOCUMENTS"]["USER_DOWNLOADS_PATH"] path_str = "{}_{}_{}".format( user_id, datetime.now( tz=pytz.timezone("Europe/Berlin")).strftime("%H.%M.%S_%d-%m-%Y"), token_urlsafe(8), # 8 Random Bytes ) full_path = higher_path + static_path + path_str os.makedirs(full_path) # Ensures the path to the file has been created for doc in docs: # Downloads each file blob = Blob(get_cloud_folder(user_id) + doc, bucket, encryption_key=encryption_key) blob.download_to_filename(full_path + "/" + doc) if len(docs) > 1: # If more than one file, zips them shutil.make_archive(full_path, "zip", full_path) zipped_docs = static_path + path_str + ".zip" return {"download_path": zipped_docs} # Returns zip file return { "download_path": static_path + path_str + "/" + docs[0] } # Returns single file
def download_encrypted_blob(bucket_name, source_blob_name, destination_file_name, base64_encryption_key): """Downloads a previously-encrypted blob from Google Cloud Storage. The encryption key provided must be the same key provided when uploading the blob. """ storage_client = storage.Client() bucket = storage_client.get_bucket(bucket_name) # Encryption key must be an AES256 key represented as a bytestring with # 32 bytes. Since it's passed in as a base64 encoded string, it needs # to be decoded. encryption_key = base64.b64decode(base64_encryption_key) blob = Blob(source_blob_name, bucket, encryption_key=encryption_key) blob.download_to_filename(destination_file_name) print('Blob {} downloaded to {}.'.format( source_blob_name, destination_file_name))
def to_public_png(npy_blob: storage.Blob, public_bucket: storage.Bucket): """ Converts a .npy blob into a png file and uploads it to the public bucket. :param npy_blob: :param public_bucket: :return: """ npy_filepath = f'/tmp/{npy_blob.name.split("/")[-1]}' npy_blob.download_to_filename(npy_filepath) arr = np.load(npy_filepath) png_filepath = npy_filepath.replace('.npy', '.png') plt.imsave(png_filepath, arr) png_blob_name = npy_blob.name.replace('.npy', '.png') png_blob = public_bucket.blob(png_blob_name) png_blob.upload_from_filename(png_filepath) os.remove(npy_filepath) os.remove(png_filepath)
def process_cab(blob: storage.Blob, patient_id: str) -> np.ndarray: """ Downloads the blob and return a 3D standardized numpy array. :param blob: :param patient_id: :return: """ # TODO: Fix issues with process_cab workingdir failing old_wd = os.getcwd() dirname = f'/tmp/dicom_to_npy-{int(time.time())}' os.makedirs(dirname, exist_ok=True) os.chdir(dirname) blob.download_to_filename(patient_id + '.cab') subprocess.call(['cabextract', patient_id + '.cab']) logging.info('extracted cab file') dirpath = list(os.walk('.'))[2][0] logging.info(f'loading scans from {dirpath}') processed_scan = _process_cab(dirpath) os.chdir(old_wd) shutil.rmtree(dirname) return processed_scan
def filter(fr, scenes, removal): video = fr.src_video fileext = os.path.splitext(os.path.basename(video.url))[1] src_name = str(uuid.uuid4()) + '_' + str(current_millis()) src_path = os.path.join(settings.MEDIA_ROOT, src_name + fileext) print('##### start download %s' % src_path) client = storage.Client.from_service_account_json(settings.GCP_KEY_PATH) bucket = client.get_bucket(settings.BUCKET_NAME) blob = Blob(gcp_path(video.url), bucket) blob.download_to_filename(src_path) print('##### complete download %s' % src_path) video = cv2.VideoCapture(src_path) fps = video.get(cv2.CAP_PROP_FPS) num_frames = video.get(cv2.CAP_PROP_FRAME_COUNT) duration = int((num_frames / fps) * 1000) out_name = str(uuid.uuid4()) + '_' + str(current_millis()) out_path = os.path.join(settings.MEDIA_ROOT, out_name + fileext) infile = ffmpeg.input(src_path) if removal: print('##### start removing %d scenes' % len(scenes)) start_times = sorted( [scene['start_millis'] / 1000 for scene in scenes]) end_times = sorted([scene['end_millis'] / 1000 for scene in scenes]) times_length = len(start_times) times = [] print(start_times, end_times) for idx in range(times_length): print(start_times[idx], end_times[idx]) if idx == 0: if start_times[idx] != 0: times.append((0, start_times[idx])) if idx == times_length - 1: times.append((end_times[idx], None)) if 0 < idx and times_length > 1: times.append((end_times[idx - 1], start_times[idx])) trims = [] print(times) for time in times: start = time[0] end = time[1] if end is None: trims.append(infile.trim(start=start).setpts('N/FR/TB')) else: trims.append( infile.trim(start=start, end=end).setpts('N/FR/TB')) print(trims) ffmpeg.concat(*trims).output(out_path).run(overwrite_output=True) else: # extract frames for detecting print('##### start extracting frames for detecting blurbox') detector = NudeDetector(settings.NUDE_NET_DETECTOR_MODEL_PATH) frames_dir = os.path.join(settings.MEDIA_ROOT, src_name) if not os.path.exists(frames_dir): os.makedirs(frames_dir) try: interval = 250 for scene in scenes: cur_millis = scene['start_millis'] while (True): video.set(cv2.CAP_PROP_POS_MSEC, cur_millis) ret, frame = video.read() if ret: frame_path = os.path.join(frames_dir, str(cur_millis) + '.jpg') cv2.imwrite(frame_path, frame) else: break cur_millis += interval if cur_millis >= scene[ 'end_millis'] or cur_millis > duration: break print('##### complete extracting frames for detecting blurbox') print('##### start detecting blurbox %s' % frames_dir) bps = BlurPointSet() for frame in sorted(os.listdir(frames_dir), key=lambda f: int(os.path.splitext(f)[0])): censors = detector.detect(os.path.join(frames_dir, frame)) print('detected blur box point %d from %s' % (len(censors), frame)) start_millis = int(os.path.splitext(frame)[0]) end_millis = start_millis + interval bps.add_censors(censors, start_millis, end_millis) print('##### complete detecting blurbox') print('##### start blur') blur_op = infile for overlay in bps.list_for_overlay(): blur_op = blur_op.overlay(infile.crop( x=overlay['x'], y=overlay['y'], width=overlay['width'], height=overlay['height']).filter_('boxblur', luma_radius=10, luma_power=10), x=overlay['x'], y=overlay['y'], enable=overlay['enable']) blur_op.output(out_path).run(overwrite_output=True) print('##### complete blur') shutil.rmtree(frames_dir) os.remove(src_path) except Exception as e: print('##### detect and blur failed %s', str(e)) os.remove(src_path) shutil.rmtree(frames_dir) raise e video.release() cv2.destroyAllWindows() return out_path