Exemple #1
0
def process_zip(blob: storage.Blob, patient_id: str) -> np.ndarray:
    """
    Downloads the blob and returns a 3D standardized numpy array.
    :param blob:
    :param patient_id:
    :return:
    """
    old_wd = os.getcwd()
    dirname = f'/tmp/dicom_to_npy-{int(time.time())}'
    os.makedirs(dirname, exist_ok=True)
    os.chdir(dirname)

    blob.download_to_filename(patient_id + '.zip')
    logging.info('extracting zip file')
    shutil.unpack_archive(patient_id + '.zip', format='zip')

    dirpath = list(os.walk('.'))[3][0]
    logging.info(f'loading scans from {dirpath}')
    scan = parsers.load_scan(dirpath)
    processed_scan = preprocess_scan(scan)
    logging.info(f'processing dicom data')

    os.chdir(old_wd)
    shutil.rmtree(dirname)
    return processed_scan
Exemple #2
0
 def _download_from_blobstore(self, blob_to_download_name, blob_download_target_path, chunk_size=None):
     """Download file from blobstore.
     :type chunk_size: int
     :param chunk_size: If file size if greater than 5MB, it is recommended that, 
                        chunked downloads should be used.
                        To do so, pass chunk_size param to this function.
                        This must be a multiple of 256 KB per the API specification.
     """
     log_prefix = '[Google Cloud Storage] [DOWNLOAD]'
     if self.container:
         self.logger.info('{} Started to download the tarball to target.'.format(
             log_prefix, blob_download_target_path))
         try:
             blob = Blob(blob_to_download_name,
                         self.container, chunk_size=chunk_size)
             blob.download_to_filename(blob_download_target_path)
             self.logger.info('{} SUCCESS: blob_to_download={}, blob_target_name={}, container={}'
                              .format(log_prefix, blob_to_download_name, self.CONTAINER,
                                      blob_download_target_path))
             return True
         except Exception as error:
             message = '{} ERROR: blob_to_download={}, blob_target_name={}, container={}\n{}'.format(
                 log_prefix, blob_to_download_name, blob_download_target_path, self.CONTAINER, error)
             self.logger.error(message)
             raise Exception(message)
     else:
         message = '{} ERROR: blob_to_download={}, blob_target_name={}, container={}\n{}'.format(
             log_prefix, blob_to_download_name, blob_download_target_path, self.CONTAINER, "Container not found or accessible")
         self.logger.error(message)
         raise Exception(message)
Exemple #3
0
 def _load_image(self, blob: gcs.Blob) -> Tuple[np.ndarray, GoesMetadata]:
   bid = blob.name
   if self.cache and bid in self.cache:
     return self.cache[bid]
   with file_util.mktemp(dir=self.tmp_dir, suffix='.nc') as infile:
     logging.info('downloading %s', bid)
     blob.download_to_filename(infile)
     logging.info('downloaded %s', bid)
     with xarray.open_dataset(infile, engine='h5netcdf') as nc:
       img = self._resample_image(nc)
       logging.info('resampled %s', bid)
       md = goes_metadata(nc)
       v = img, md
       if self.cache:
         self.cache[bid] = v
       return v
Exemple #4
0
def download_encrypted_blob(bucket_name, source_blob_name,
                            destination_file_name, base64_encryption_key):
    """Downloads a previously-encrypted blob from Google Cloud Storage.

    The encryption key provided must be the same key provided when uploading
    the blob.
    """
    storage_client = storage.Client()
    bucket = storage_client.get_bucket(bucket_name)
    # Encryption key must be an AES256 key represented as a bytestring with
    # 32 bytes. Since it's passed in as a base64 encoded string, it needs
    # to be decoded.
    encryption_key = base64.b64decode(base64_encryption_key)
    blob = Blob(source_blob_name, bucket, encryption_key=encryption_key)

    blob.download_to_filename(destination_file_name)

    print('Blob {} downloaded to {}.'.format(source_blob_name,
                                             destination_file_name))
Exemple #5
0
def download_documents(user_id, docs):
    """
        Downloads given user's files to server; Creates download link and returns it to user.

        If more than one file is specified, zip the files and return the zip folder.
        Otherwise return the single file.

        Args:
            user_id: firestore user's document id
            docs: a list of filenames

        Returns:
            str: Path to new file
    """
    if len(docs) < 1:
        return {"error": "No files selected"}

    higher_path = os.path.dirname(os.path.abspath(__file__))
    static_path = "/static/" + config["DOCUMENTS"]["USER_DOWNLOADS_PATH"]
    path_str = "{}_{}_{}".format(
        user_id,
        datetime.now(
            tz=pytz.timezone("Europe/Berlin")).strftime("%H.%M.%S_%d-%m-%Y"),
        token_urlsafe(8),  # 8 Random Bytes
    )
    full_path = higher_path + static_path + path_str
    os.makedirs(full_path)  # Ensures the path to the file has been created

    for doc in docs:  # Downloads each file
        blob = Blob(get_cloud_folder(user_id) + doc,
                    bucket,
                    encryption_key=encryption_key)
        blob.download_to_filename(full_path + "/" + doc)

    if len(docs) > 1:  # If more than one file, zips them
        shutil.make_archive(full_path, "zip", full_path)
        zipped_docs = static_path + path_str + ".zip"
        return {"download_path": zipped_docs}  # Returns zip file

    return {
        "download_path": static_path + path_str + "/" + docs[0]
    }  # Returns single file
def download_encrypted_blob(bucket_name, source_blob_name,
                            destination_file_name, base64_encryption_key):
    """Downloads a previously-encrypted blob from Google Cloud Storage.

    The encryption key provided must be the same key provided when uploading
    the blob.
    """
    storage_client = storage.Client()
    bucket = storage_client.get_bucket(bucket_name)
    # Encryption key must be an AES256 key represented as a bytestring with
    # 32 bytes. Since it's passed in as a base64 encoded string, it needs
    # to be decoded.
    encryption_key = base64.b64decode(base64_encryption_key)
    blob = Blob(source_blob_name, bucket, encryption_key=encryption_key)

    blob.download_to_filename(destination_file_name)

    print('Blob {} downloaded to {}.'.format(
        source_blob_name,
        destination_file_name))
Exemple #7
0
def to_public_png(npy_blob: storage.Blob, public_bucket: storage.Bucket):
    """
    Converts a .npy blob into a png file and uploads it to the public
    bucket.

    :param npy_blob:
    :param public_bucket:
    :return:
    """
    npy_filepath = f'/tmp/{npy_blob.name.split("/")[-1]}'
    npy_blob.download_to_filename(npy_filepath)
    arr = np.load(npy_filepath)

    png_filepath = npy_filepath.replace('.npy', '.png')
    plt.imsave(png_filepath, arr)

    png_blob_name = npy_blob.name.replace('.npy', '.png')
    png_blob = public_bucket.blob(png_blob_name)
    png_blob.upload_from_filename(png_filepath)
    os.remove(npy_filepath)
    os.remove(png_filepath)
Exemple #8
0
def process_cab(blob: storage.Blob, patient_id: str) -> np.ndarray:
    """
    Downloads the blob and return a 3D standardized numpy array.
    :param blob:
    :param patient_id:
    :return:
    """
    # TODO: Fix issues with process_cab workingdir failing
    old_wd = os.getcwd()
    dirname = f'/tmp/dicom_to_npy-{int(time.time())}'
    os.makedirs(dirname, exist_ok=True)
    os.chdir(dirname)

    blob.download_to_filename(patient_id + '.cab')
    subprocess.call(['cabextract', patient_id + '.cab'])
    logging.info('extracted cab file')

    dirpath = list(os.walk('.'))[2][0]
    logging.info(f'loading scans from {dirpath}')
    processed_scan = _process_cab(dirpath)

    os.chdir(old_wd)
    shutil.rmtree(dirname)
    return processed_scan
def filter(fr, scenes, removal):
    video = fr.src_video
    fileext = os.path.splitext(os.path.basename(video.url))[1]
    src_name = str(uuid.uuid4()) + '_' + str(current_millis())
    src_path = os.path.join(settings.MEDIA_ROOT, src_name + fileext)
    print('##### start download  %s' % src_path)
    client = storage.Client.from_service_account_json(settings.GCP_KEY_PATH)
    bucket = client.get_bucket(settings.BUCKET_NAME)
    blob = Blob(gcp_path(video.url), bucket)
    blob.download_to_filename(src_path)
    print('##### complete download %s' % src_path)

    video = cv2.VideoCapture(src_path)
    fps = video.get(cv2.CAP_PROP_FPS)
    num_frames = video.get(cv2.CAP_PROP_FRAME_COUNT)
    duration = int((num_frames / fps) * 1000)

    out_name = str(uuid.uuid4()) + '_' + str(current_millis())
    out_path = os.path.join(settings.MEDIA_ROOT, out_name + fileext)
    infile = ffmpeg.input(src_path)
    if removal:
        print('##### start removing %d scenes' % len(scenes))
        start_times = sorted(
            [scene['start_millis'] / 1000 for scene in scenes])
        end_times = sorted([scene['end_millis'] / 1000 for scene in scenes])
        times_length = len(start_times)
        times = []
        print(start_times, end_times)
        for idx in range(times_length):
            print(start_times[idx], end_times[idx])
            if idx == 0:
                if start_times[idx] != 0:
                    times.append((0, start_times[idx]))
            if idx == times_length - 1:
                times.append((end_times[idx], None))
            if 0 < idx and times_length > 1:
                times.append((end_times[idx - 1], start_times[idx]))
        trims = []
        print(times)
        for time in times:
            start = time[0]
            end = time[1]
            if end is None:
                trims.append(infile.trim(start=start).setpts('N/FR/TB'))
            else:
                trims.append(
                    infile.trim(start=start, end=end).setpts('N/FR/TB'))
        print(trims)
        ffmpeg.concat(*trims).output(out_path).run(overwrite_output=True)
    else:
        # extract frames for detecting
        print('##### start extracting frames for detecting blurbox')
        detector = NudeDetector(settings.NUDE_NET_DETECTOR_MODEL_PATH)
        frames_dir = os.path.join(settings.MEDIA_ROOT, src_name)
        if not os.path.exists(frames_dir):
            os.makedirs(frames_dir)
        try:
            interval = 250
            for scene in scenes:
                cur_millis = scene['start_millis']
                while (True):
                    video.set(cv2.CAP_PROP_POS_MSEC, cur_millis)
                    ret, frame = video.read()
                    if ret:
                        frame_path = os.path.join(frames_dir,
                                                  str(cur_millis) + '.jpg')
                        cv2.imwrite(frame_path, frame)
                    else:
                        break
                    cur_millis += interval
                    if cur_millis >= scene[
                            'end_millis'] or cur_millis > duration:
                        break
            print('##### complete extracting frames for detecting blurbox')
            print('##### start detecting blurbox %s' % frames_dir)
            bps = BlurPointSet()
            for frame in sorted(os.listdir(frames_dir),
                                key=lambda f: int(os.path.splitext(f)[0])):
                censors = detector.detect(os.path.join(frames_dir, frame))
                print('detected blur box point %d from %s' %
                      (len(censors), frame))
                start_millis = int(os.path.splitext(frame)[0])
                end_millis = start_millis + interval
                bps.add_censors(censors, start_millis, end_millis)
            print('##### complete detecting blurbox')

            print('##### start blur')
            blur_op = infile
            for overlay in bps.list_for_overlay():
                blur_op = blur_op.overlay(infile.crop(
                    x=overlay['x'],
                    y=overlay['y'],
                    width=overlay['width'],
                    height=overlay['height']).filter_('boxblur',
                                                      luma_radius=10,
                                                      luma_power=10),
                                          x=overlay['x'],
                                          y=overlay['y'],
                                          enable=overlay['enable'])
            blur_op.output(out_path).run(overwrite_output=True)
            print('##### complete blur')
            shutil.rmtree(frames_dir)
            os.remove(src_path)
        except Exception as e:
            print('##### detect and blur failed %s', str(e))
            os.remove(src_path)
            shutil.rmtree(frames_dir)
            raise e
    video.release()
    cv2.destroyAllWindows()
    return out_path