def reap(self, _id, item, tempdir): reap_start = datetime.datetime.utcnow() log.info('reaping %s' % self.state_str(_id, item['state'])) reap_cnt = 0 reap_path = tempdir + '/' + 'reap' os.mkdir(reap_path) metadata_path = tempdir + '/' + 'METADATA.json' for fn in os.listdir(item['path']): fp = item['path'] + '/' + fn if fn == 'metadata.json' or fn == 'METADATA.json': shutil.move(fp, metadata_path) else: try: dcm = dicom.read_file(fp, stop_before_pixels=True) # ensure file is dicom except: pass else: reap_cnt += 1 shutil.copyfile(fp, reap_path + '/' + fn) log.info('reaped %s (%d images) in %.1fs' % (_id, reap_cnt, (datetime.datetime.utcnow() - reap_start).total_seconds())) metadata = {} if os.path.exists(metadata_path): with open(metadata_path, 'r') as metadata_file: metadata = json.load(metadata_file, object_hook=reaper.datetime_decoder) os.remove(metadata_path) metadata['filetype'] = 'dicom' log.info('compressing %s' % _id) reaper.create_archive(reap_path+'.tgz', reap_path, os.path.basename(reap_path), metadata, compresslevel=6) shutil.rmtree(reap_path) return True
def reap(self, _id, item, tempdir): reap_start = datetime.datetime.utcnow() log.info('reaping %s' % self.state_str(_id, item['state'])) reap_cnt = 0 reap_path = os.path.join(tempdir, 'reap') os.mkdir(reap_path) for fn in os.listdir(item['path']): fp = os.path.join(item['path'], fn) try: # catch warnings here because some recordings have a # shielding mode that will throw a warning on read import mne with warnings.catch_warnings(record=True): mne.io.read_raw_fif(fp, allow_maxshield=True) except Exception: pass else: reap_cnt += 1 shutil.copyfile(fp, reap_path + '/' + fn) t = (datetime.datetime.utcnow() - reap_start).total_seconds() log.info('reaped %s (%d images) in %.1fs' % (_id, reap_cnt, t)) metadata = dict(filetype=u'meeg', timezone=self.timezone, header={}) log.info('compressing %s' % _id) reaper.create_archive(reap_path + '.zip', reap_path, os.path.basename(reap_path), metadata) shutil.rmtree(reap_path) return True
def reap(self, _id, item, tempdir): try: pfile = scipfile.PFile(item['path'], timezone=self.timezone) except (IOError, scipfile.PFileError): success = None log.warning('skipping %s (disappeared or unparsable)' % _id) else: if pfile.patient_id.strip('/').lower() in self.blacklist: success = None log.info('discarding %s' % _id) elif not re.match(self.whitelist, pfile.patient_id): success = None log.info('ignoring %s (non-matching patient ID)' % _id) else: name_prefix = pfile.series_uid + '_' + str(pfile.acq_no) reap_path = tempdir + '/' + name_prefix + '_' + scipfile.PFile.filetype os.mkdir(reap_path) auxfiles = [(ap, _id + '_' + ap.rsplit('_', 1)[-1]) for ap in glob.glob(item['path'] + '_' + pfile.series_uid + '_*')] log.debug('staging %s%s' % (_id, ', ' + ', '.join([af[1] for af in auxfiles]) if auxfiles else '')) os.symlink(item['path'], os.path.join(reap_path, _id)) for af in auxfiles: os.symlink(af[0], os.path.join(reap_path, af[1])) pfile_size = reaper.hrsize(item['state']['size']) metadata = { 'filetype': scipfile.PFile.filetype, 'timezone': self.timezone, 'header': { 'group': pfile.nims_group_id, 'project': pfile.nims_project, 'session': pfile.nims_session_id, 'session_no': pfile.series_no, 'session_desc': pfile.series_desc, 'acquisition': pfile.nims_acquisition_id, 'acquisition_no': pfile.acq_no, 'timestamp': pfile.nims_timestamp, }, } reap_start = datetime.datetime.utcnow() auxfile_str = ' + %d aux files' % len(auxfiles) if auxfiles else '' log.info('reaping.zip %s [%s%s]' % (_id, pfile_size, auxfile_str)) try: reaper.create_archive(reap_path+'.zip', reap_path, os.path.basename(reap_path), metadata) shutil.rmtree(reap_path) except (IOError): success = False log.warning('reap error %s%s' % (_id, ' or aux files' if auxfiles else '')) else: success = True reap_time = (datetime.datetime.utcnow() - reap_start).total_seconds() log.info('reaped.zip %s [%s%s] in %.1fs' % (_id, pfile_size, auxfile_str, reap_time)) self.reap_peripheral_data(tempdir, pfile, name_prefix, _id) return success
def reap(name, data_path, reap_path, reap_data, reap_name, log, log_info, tempdir): if not reap_data.psd_name or not re.match(r'^[a-zA-Z0-9_+-]+$', reap_data.psd_name): log.warning('periph data %s %s invalid PSD name' % (log_info, name)) return lower_time_bound = reap_data.timestamp + datetime.timedelta(seconds=reap_data.prescribed_duration or 0) - datetime.timedelta(seconds=15) upper_time_bound = lower_time_bound + datetime.timedelta(seconds=180) sleep_time = (upper_time_bound - datetime.datetime.now()).total_seconds() if sleep_time > 0: log.info('periph data %s waiting for %s for %ds' % (log_info, name, sleep_time)) time.sleep(sleep_time) for i in range(15): try: physio_files = os.listdir(data_path) except OSError: physio_files = [] if physio_files: break else: log.warning('periph data %s %s temporarily unavailable' % (log_info, name)) time.sleep(60) else: log.error('periph data %s %s permanently unavailable - giving up' % (log_info, name)) return physio_tuples = filter(lambda pt: pt[0], [(re.match('.+_%s_([0-9_]{18,20})$' % reap_data.psd_name, pfn), pfn) for pfn in physio_files]) physio_tuples = [(datetime.datetime.strptime(pts.group(1), '%m%d%Y%H_%M_%S_%f'), pfn) for pts, pfn in physio_tuples] physio_tuples = filter(lambda pt: lower_time_bound <= pt[0] <= upper_time_bound, physio_tuples) if physio_tuples: log.info('periph data %s %s found' % (log_info, name)) with tempfile.TemporaryDirectory(dir=tempdir) as tempdir_path: metadata = { 'filetype': scitran.data.medimg.gephysio.GEPhysio.filetype, 'timezone': reap_data.nims_timezone, 'header': { 'group': reap_data.nims_group_id, 'project': reap_data.nims_project, 'session': reap_data.nims_session_id, 'acquisition': reap_data.nims_acquisition_id, 'timestamp': reap_data.nims_timestamp, }, } physio_reap_path = os.path.join(tempdir_path, reap_name) os.mkdir(physio_reap_path) for pts, pfn in physio_tuples: shutil.copy2(os.path.join(data_path, pfn), physio_reap_path) reaper.create_archive(os.path.join(reap_path, reap_name+'.tgz'), physio_reap_path, reap_name, metadata, compresslevel=6) else: log.info('periph data %s %s not found' % (log_info, name))
def split_into_acquisitions(self, _id, item, path, filepaths): if self.anonymize: log.info('anonymizing %s' % _id) dcm_dict = {} for filepath in filepaths: dcm = self.DicomFile(filepath, self.anonymize) if os.path.basename(filepath).startswith('(none)'): new_filepath = filepath.replace('(none)', 'NA') os.rename(filepath, new_filepath) filepath = new_filepath os.utime(filepath, (int(dcm.timestamp.strftime('%s')), int(dcm.timestamp.strftime('%s')))) # correct timestamps dcm_dict.setdefault(dcm.acq_no, []).append(filepath) log.info('compressing %s' % _id) acq_info = [] for acq_no, acq_paths in dcm_dict.iteritems(): name_prefix = _id + ('_' + acq_no if acq_no is not None else '') dir_name = name_prefix + '_' + scidcm.Dicom.filetype arcdir_path = os.path.join(path, dir_name) os.mkdir(arcdir_path) for filepath in acq_paths: os.rename(filepath, '%s.dcm' % os.path.join(arcdir_path, os.path.basename(filepath))) metadata = { 'filetype': scidcm.Dicom.filetype, 'timezone': self.timezone, 'overwrite': { 'firstname_hash': dcm.firstname_hash, 'lastname_hash': dcm.lastname_hash, } } reaper.create_archive(arcdir_path+'.zip', arcdir_path, dir_name, metadata) shutil.rmtree(arcdir_path) acq_info.append({ 'path': arcdir_path+'.zip', 'prefix': name_prefix, 'log_info': '%s%s' % (_id, '.' + acq_no if acq_no is not None else ''), }) return acq_info