Example #1
File: core.py Project: larsoner/api
 def put(self):
     """Receive a sortable reaper or user upload."""
     #if not self.uid and not self.drone_request:
     #    self.abort(402, 'uploads must be from an authorized user or drone')
     if 'Content-MD5' not in self.request.headers:
         self.abort(400, 'Request must contain a valid "Content-MD5" header.')
     filename = self.request.headers.get('Content-Disposition', '').partition('filename=')[2].strip('"')
     if not filename:
         self.abort(400, 'Request must contain a valid "Content-Disposition" header.')
     with tempfile.TemporaryDirectory(prefix='.tmp', dir=self.app.config['upload_path']) as tempdir_path:
         filepath = os.path.join(tempdir_path, filename)
         success, digest, filesize, duration = util.receive_stream_and_validate(self.request.body_file, filepath, self.request.headers['Content-MD5'])
         if not success:
             self.abort(400, 'Content-MD5 mismatch.')
         if not tarfile.is_tarfile(filepath):
             self.abort(415, 'Only tar files are accepted.')
         log.info('Received    %s [%s] from %s' % (filename, util.hrsize(self.request.content_length), self.request.user_agent))
         datainfo = util.parse_file(filepath, digest)
         if datainfo is None:
             util.quarantine_file(filepath, self.app.config['quarantine_path'])
             self.abort(202, 'Quarantining %s (unparsable)' % filename)
         util.commit_file(self.app.db.acquisitions, None, datainfo, filepath, self.app.config['data_path'])
         util.create_job(self.app.db.acquisitions, datainfo) # FIXME we should only mark files as new and let engine take it from there
         throughput = filesize / duration.total_seconds()
         log.info('Received    %s [%s, %s/s] from %s' % (filename, util.hrsize(filesize), util.hrsize(throughput), self.request.client_addr))
Example #2
def sort(args):
    quarantine_path = os.path.join(args.sort_path, 'quarantine')
    if not os.path.exists(args.sort_path):
    if not os.path.exists(quarantine_path):
    print 'initializing DB'
    kwargs = dict(tz_aware=True)
    db_client = connect_db(args.db_uri, **kwargs)
    db = db_client.get_default_database()
    print 'inspecting %s' % args.path
    files = []
    for dirpath, dirnames, filenames in os.walk(args.path):
        for filepath in [os.path.join(dirpath, fn) for fn in filenames if not fn.startswith('.')]:
            if not os.path.islink(filepath):
        dirnames[:] = [dn for dn in dirnames if not dn.startswith('.')] # need to use slice assignment to influence walk behavior
    file_cnt = len(files)
    print 'found %d files to sort (ignoring symlinks and dotfiles)' % file_cnt
    for i, filepath in enumerate(files):
        print 'sorting     %s [%s] (%d/%d)' % (os.path.basename(filepath), util.hrsize(os.path.getsize(filepath)), i+1, file_cnt)
        hash_ = hashlib.sha1()
        if not args.quick:
            with open(filepath, 'rb') as fd:
                for chunk in iter(lambda: fd.read(2**20), ''):
        datainfo = util.parse_file(filepath, hash_.hexdigest())
        if datainfo is None:
            util.quarantine_file(filepath, quarantine_path)
            print 'Quarantining %s (unparsable)' % os.path.basename(filepath)
            util.commit_file(db.acquisitions, None, datainfo, filepath, args.sort_path)
            util.create_job(db.acquisitions, datainfo) # FIXME we should only mark files as new and let engine take it from there
Example #3
def sort(args):
    quarantine_path = os.path.join(args.sort_path, 'quarantine')
    if not os.path.exists(args.sort_path):
    if not os.path.exists(quarantine_path):
    print 'initializing DB'
    kwargs = dict(tz_aware=True)
    db_client = connect_db(args.db_uri, **kwargs)
    db = db_client.get_default_database()
    print 'inspecting %s' % args.path
    files = []
    for dirpath, dirnames, filenames in os.walk(args.path):
        for filepath in [
                os.path.join(dirpath, fn) for fn in filenames
                if not fn.startswith('.')
            if not os.path.islink(filepath):
        dirnames[:] = [
            dn for dn in dirnames if not dn.startswith('.')
        ]  # need to use slice assignment to influence walk behavior
    file_cnt = len(files)
    print 'found %d files to sort (ignoring symlinks and dotfiles)' % file_cnt
    for i, filepath in enumerate(files):
        print 'sorting     %s [%s] (%d/%d)' % (os.path.basename(
            filepath), util.hrsize(os.path.getsize(filepath)), i + 1, file_cnt)
        hash_ = hashlib.sha1()
        if not args.quick:
            with open(filepath, 'rb') as fd:
                for chunk in iter(lambda: fd.read(2**20), ''):
        datainfo = util.parse_file(filepath, hash_.hexdigest())
        if datainfo is None:
            util.quarantine_file(filepath, quarantine_path)
            print 'Quarantining %s (unparsable)' % os.path.basename(filepath)
            util.commit_file(db.acquisitions, None, datainfo, filepath,
                db.acquisitions, datainfo
            )  # FIXME we should only mark files as new and let engine take it from there
Example #4
def jobsinit(args):
    """Create a job entry for every acquisition's orig dataset."""
    db_client = connect_db(args.db_uri)
    db = db_client.get_default_database()

    if args.force:

    # find all "orig" files, and create jobs for them
    for a in db.acquisitions.find({'files.filetype': 'dicom'}, ['uid', 'files.$']):
        aid = str(a['_id'])
        fileinfo = a['files'][0]
        print aid
        fp = os.path.join(args.data_path, aid[-3:], aid, fileinfo['filename'])
        if not os.path.exists(fp):
            print ('%s does not exist. no job created.' % fp)
        datainfo = {'acquisition_id': a['uid'], 'fileinfo': fileinfo}
        util.create_job(db.acquisitions, datainfo)
Example #5
def jobsinit(args):
    """Create a job entry for every acquisition's orig dataset."""
    db_client = connect_db(args.db_uri)
    db = db_client.get_default_database()

    if args.force:

    # find all "orig" files, and create jobs for them
    for a in db.acquisitions.find({'files.filetype': 'dicom'},
                                  ['uid', 'files.$']):
        aid = str(a['_id'])
        fileinfo = a['files'][0]
        print aid
        fp = os.path.join(args.data_path, aid[-3:], aid, fileinfo['filename'])
        if not os.path.exists(fp):
            print('%s does not exist. no job created.' % fp)
        datainfo = {'acquisition_id': a['uid'], 'fileinfo': fileinfo}
        util.create_job(db.acquisitions, datainfo)