Beispiel #1
0
    def download_file(self, mongo_id: str) -> None:
        """
        Downloads files and store them to {config.storage_vol}

        Tracking: {db.record.is_downloaded} indicates success

        NOTE/TODO: is run as a task.
        """
        record = read_record(mongo_id)

        # If pipeline is rerun after an error
        if record.is_downloaded:
            log.debug("Data file already downloaded. Skipping.")
            return

        is_downloaded_success = dreem_api.download_file(
            self.session, record.download_folder(), record.manufacturer_ref)

        if is_downloaded_success:
            # Useful metadata for performing pre-processing.
            downloaded_file = Path(record.download_folder() /
                                   f"{record.manufacturer_ref}.h5")
            record.meta["filesize"] = downloaded_file.stat().st_size

            record.is_downloaded = is_downloaded_success
            update_record(record)
            log.debug(f"Download SUCCESS for:\n   {record}")
        else:
            log.debug(f"Download FAILED for:\n   {record}")
Beispiel #2
0
def task_preprocess_data(mongoid: str) -> str:
    """
    Preprocessing tasks on dreem data.
    """
    record = read_record(mongoid)
    if record.is_downloaded and not record.is_processed:
        record.is_processed = True
        update_record(record)
    return mongoid
Beispiel #3
0
def task_preprocess_data(mongoid: str) -> str:
    """
    Preprocessing tasks on thinkFAST and CANTAB data.
    """
    record = read_record(mongoid)
    if not record.is_processed:
        record.is_processed = True
        update_record(record)
    return mongoid
Beispiel #4
0
def upload_data(data_folder: Path) -> None:
    """Zips and uploads a folder at data_folder."""
    log.debug(f"Uploading: {data_folder}")

    zip_path = dmpy.zip_folder(data_folder)
    is_uploaded = dmpy.upload(zip_path)

    if is_uploaded:
        for record in records_by_dmp_folder(data_folder.stem):
            record.is_uploaded = True
            update_record(record)

        dmpy.rm_local_data(zip_path)
Beispiel #5
0
    def download_file(self, mongo_id: str) -> None:
        """
        Downloads files and store them to {config.storage_vol}

        Tracking: {db.record.is_downloaded} indicates success

        NOTE/TODO: is run as a task.
        """
        record = read_record(mongo_id)
        is_downloaded_success = vttsma_api.download_files(
            self.bucket, record.filename, record.vttsma_export_date)
        if is_downloaded_success:
            record.is_downloaded = is_downloaded_success
            update_record(record)
Beispiel #6
0
def prepare_data_folders(device_type: DeviceType) -> None:
    """
    Checks folders present in 'data/upload/ are finished
    and moves them into the upload folder in the format:

        DEVICEID-PATIENTID-STARTWEAR-ENDWEAR
    """
    grouped = records_not_uploaded(device_type)

    # filter sets which have any record with 'is_processed' == False
    # 'is_processed' == False catches the 'False' for any preceding task as well
    to_upload = {
        k: v
        for k, v in grouped.items() if all([r.is_processed for r in v])
    }

    for patient_device in to_upload.keys():
        max_data, min_data = min_max_data_wear_times(to_upload[patient_device])

        start_data = max_data.strftime("%Y%m%d")
        end_data = min_data.strftime("%Y%m%d")

        source = config.storage_vol / device_type.name / patient_device

        dmp_folder = (
            f"{patient_device.replace('-','').replace('/','-')}-{start_data}-{end_data}"
        )

        # patient_device looks like = 'patient-id/device-id'
        destination = config.upload_folder / device_type.name / dmp_folder

        destination.mkdir(parents=True, exist_ok=True)

        source.rename(destination)

        for record in to_upload[patient_device]:
            record.is_prepared = True
            record.dmp_folder = dmp_folder
            update_record(record)

        # check if patient folder is empty, then remove it
        patient_path = (config.storage_vol / device_type.name /
                        to_upload[patient_device][0].patient_id)

        if not any(patient_path.iterdir()):
            patient_path.rmdir()
        else:
            log.error("Files left behind when uploading dataset to DMP.")