def put(self): """Receive a sortable reaper or user upload.""" #if not self.uid and not self.drone_request: # self.abort(402, 'uploads must be from an authorized user or drone') if 'Content-MD5' not in self.request.headers: self.abort(400, 'Request must contain a valid "Content-MD5" header.') filename = self.request.headers.get('Content-Disposition', '').partition('filename=')[2].strip('"') if not filename: self.abort(400, 'Request must contain a valid "Content-Disposition" header.') with tempfile.TemporaryDirectory(prefix='.tmp', dir=self.app.config['upload_path']) as tempdir_path: filepath = os.path.join(tempdir_path, filename) success, digest, filesize, duration = util.receive_stream_and_validate(self.request.body_file, filepath, self.request.headers['Content-MD5']) if not success: self.abort(400, 'Content-MD5 mismatch.') if not tarfile.is_tarfile(filepath): self.abort(415, 'Only tar files are accepted.') log.info('Received %s [%s] from %s' % (filename, util.hrsize(self.request.content_length), self.request.user_agent)) datainfo = util.parse_file(filepath, digest) if datainfo is None: util.quarantine_file(filepath, self.app.config['quarantine_path']) self.abort(202, 'Quarantining %s (unparsable)' % filename) util.commit_file(self.app.db.acquisitions, None, datainfo, filepath, self.app.config['data_path']) util.create_job(self.app.db.acquisitions, datainfo) # FIXME we should only mark files as new and let engine take it from there throughput = filesize / duration.total_seconds() log.info('Received %s [%s, %s/s] from %s' % (filename, util.hrsize(filesize), util.hrsize(throughput), self.request.client_addr))
def sort(args): logging.basicConfig(level=logging.WARNING) quarantine_path = os.path.join(args.sort_path, 'quarantine') if not os.path.exists(args.sort_path): os.makedirs(args.sort_path) if not os.path.exists(quarantine_path): os.makedirs(quarantine_path) print 'initializing DB' kwargs = dict(tz_aware=True) db_client = connect_db(args.db_uri, **kwargs) db = db_client.get_default_database() print 'inspecting %s' % args.path files = [] for dirpath, dirnames, filenames in os.walk(args.path): for filepath in [os.path.join(dirpath, fn) for fn in filenames if not fn.startswith('.')]: if not os.path.islink(filepath): files.append(filepath) dirnames[:] = [dn for dn in dirnames if not dn.startswith('.')] # need to use slice assignment to influence walk behavior file_cnt = len(files) print 'found %d files to sort (ignoring symlinks and dotfiles)' % file_cnt for i, filepath in enumerate(files): print 'sorting %s [%s] (%d/%d)' % (os.path.basename(filepath), util.hrsize(os.path.getsize(filepath)), i+1, file_cnt) hash_ = hashlib.sha1() if not args.quick: with open(filepath, 'rb') as fd: for chunk in iter(lambda: fd.read(2**20), ''): hash_.update(chunk) datainfo = util.parse_file(filepath, hash_.hexdigest()) if datainfo is None: util.quarantine_file(filepath, quarantine_path) print 'Quarantining %s (unparsable)' % os.path.basename(filepath) else: util.commit_file(db.acquisitions, None, datainfo, filepath, args.sort_path) util.create_job(db.acquisitions, datainfo) # FIXME we should only mark files as new and let engine take it from there
def sort(args): logging.basicConfig(level=logging.WARNING) quarantine_path = os.path.join(args.sort_path, 'quarantine') if not os.path.exists(args.sort_path): os.makedirs(args.sort_path) if not os.path.exists(quarantine_path): os.makedirs(quarantine_path) print 'initializing DB' kwargs = dict(tz_aware=True) db_client = connect_db(args.db_uri, **kwargs) db = db_client.get_default_database() print 'inspecting %s' % args.path files = [] for dirpath, dirnames, filenames in os.walk(args.path): for filepath in [ os.path.join(dirpath, fn) for fn in filenames if not fn.startswith('.') ]: if not os.path.islink(filepath): files.append(filepath) dirnames[:] = [ dn for dn in dirnames if not dn.startswith('.') ] # need to use slice assignment to influence walk behavior file_cnt = len(files) print 'found %d files to sort (ignoring symlinks and dotfiles)' % file_cnt for i, filepath in enumerate(files): print 'sorting %s [%s] (%d/%d)' % (os.path.basename( filepath), util.hrsize(os.path.getsize(filepath)), i + 1, file_cnt) hash_ = hashlib.sha1() if not args.quick: with open(filepath, 'rb') as fd: for chunk in iter(lambda: fd.read(2**20), ''): hash_.update(chunk) datainfo = util.parse_file(filepath, hash_.hexdigest()) if datainfo is None: util.quarantine_file(filepath, quarantine_path) print 'Quarantining %s (unparsable)' % os.path.basename(filepath) else: util.commit_file(db.acquisitions, None, datainfo, filepath, args.sort_path) util.create_job( db.acquisitions, datainfo ) # FIXME we should only mark files as new and let engine take it from there
def jobsinit(args): """Create a job entry for every acquisition's orig dataset.""" db_client = connect_db(args.db_uri) db = db_client.get_default_database() if args.force: db.drop_collection('jobs') # find all "orig" files, and create jobs for them for a in db.acquisitions.find({'files.filetype': 'dicom'}, ['uid', 'files.$']): aid = str(a['_id']) fileinfo = a['files'][0] print aid fp = os.path.join(args.data_path, aid[-3:], aid, fileinfo['filename']) if not os.path.exists(fp): print ('%s does not exist. no job created.' % fp) continue datainfo = {'acquisition_id': a['uid'], 'fileinfo': fileinfo} util.create_job(db.acquisitions, datainfo)
def jobsinit(args): """Create a job entry for every acquisition's orig dataset.""" db_client = connect_db(args.db_uri) db = db_client.get_default_database() if args.force: db.drop_collection('jobs') # find all "orig" files, and create jobs for them for a in db.acquisitions.find({'files.filetype': 'dicom'}, ['uid', 'files.$']): aid = str(a['_id']) fileinfo = a['files'][0] print aid fp = os.path.join(args.data_path, aid[-3:], aid, fileinfo['filename']) if not os.path.exists(fp): print('%s does not exist. no job created.' % fp) continue datainfo = {'acquisition_id': a['uid'], 'fileinfo': fileinfo} util.create_job(db.acquisitions, datainfo)