def download_file(self, mongo_id: str) -> None: """ Downloads files and store them to {config.storage_vol} Tracking: {db.record.is_downloaded} indicates success NOTE/TODO: is run as a task. """ record = read_record(mongo_id) # If pipeline is rerun after an error if record.is_downloaded: log.debug("Data file already downloaded. Skipping.") return is_downloaded_success = dreem_api.download_file( self.session, record.download_folder(), record.manufacturer_ref) if is_downloaded_success: # Useful metadata for performing pre-processing. downloaded_file = Path(record.download_folder() / f"{record.manufacturer_ref}.h5") record.meta["filesize"] = downloaded_file.stat().st_size record.is_downloaded = is_downloaded_success update_record(record) log.debug(f"Download SUCCESS for:\n {record}") else: log.debug(f"Download FAILED for:\n {record}")
def task_preprocess_data(mongoid: str) -> str: """ Preprocessing tasks on dreem data. """ record = read_record(mongoid) if record.is_downloaded and not record.is_processed: record.is_processed = True update_record(record) return mongoid
def task_preprocess_data(mongoid: str) -> str: """ Preprocessing tasks on thinkFAST and CANTAB data. """ record = read_record(mongoid) if not record.is_processed: record.is_processed = True update_record(record) return mongoid
def upload_data(data_folder: Path) -> None: """Zips and uploads a folder at data_folder.""" log.debug(f"Uploading: {data_folder}") zip_path = dmpy.zip_folder(data_folder) is_uploaded = dmpy.upload(zip_path) if is_uploaded: for record in records_by_dmp_folder(data_folder.stem): record.is_uploaded = True update_record(record) dmpy.rm_local_data(zip_path)
def download_file(self, mongo_id: str) -> None: """ Downloads files and store them to {config.storage_vol} Tracking: {db.record.is_downloaded} indicates success NOTE/TODO: is run as a task. """ record = read_record(mongo_id) is_downloaded_success = vttsma_api.download_files( self.bucket, record.filename, record.vttsma_export_date) if is_downloaded_success: record.is_downloaded = is_downloaded_success update_record(record)
def prepare_data_folders(device_type: DeviceType) -> None: """ Checks folders present in 'data/upload/ are finished and moves them into the upload folder in the format: DEVICEID-PATIENTID-STARTWEAR-ENDWEAR """ grouped = records_not_uploaded(device_type) # filter sets which have any record with 'is_processed' == False # 'is_processed' == False catches the 'False' for any preceding task as well to_upload = { k: v for k, v in grouped.items() if all([r.is_processed for r in v]) } for patient_device in to_upload.keys(): max_data, min_data = min_max_data_wear_times(to_upload[patient_device]) start_data = max_data.strftime("%Y%m%d") end_data = min_data.strftime("%Y%m%d") source = config.storage_vol / device_type.name / patient_device dmp_folder = ( f"{patient_device.replace('-','').replace('/','-')}-{start_data}-{end_data}" ) # patient_device looks like = 'patient-id/device-id' destination = config.upload_folder / device_type.name / dmp_folder destination.mkdir(parents=True, exist_ok=True) source.rename(destination) for record in to_upload[patient_device]: record.is_prepared = True record.dmp_folder = dmp_folder update_record(record) # check if patient folder is empty, then remove it patient_path = (config.storage_vol / device_type.name / to_upload[patient_device][0].patient_id) if not any(patient_path.iterdir()): patient_path.rmdir() else: log.error("Files left behind when uploading dataset to DMP.")