예제 #1
    def classify(self, image, model, args):
        ''' segment image produicing a semantic mask
        num_points = int(args.get('points', 10))
        border = int(args.get('border', 0))
        my_goodness = float(args.get('goodness', model.minimum_goodness*100))/100.0
        my_accuracy = float(args.get('accuracy', model.minimum_accuracy*100))
        my_confidence = float(args.get('confidence', 0))

        # color output mode
        color_mode = args.get('colors', 'ids')
        if color_mode not in self.color_modes:
            raise ConnoisseurException(responses.BAD_REQUEST, 'Requested color mode "%s" is not supported'%color_mode)

        # compute output file name and test cached result
        workdir = args['_workdir']
        _mkdir (workdir)
        filename = '%s_%s_conf%.2f_a%s_c%s_n%s_b%s.png'%(image.uniq, color_mode, my_goodness, my_accuracy, my_confidence, num_points, border)
        output_file = os.path.join(workdir, filename)

        with Locks(None, output_file, failonexist=True) as l:
            if l.locked: # the file is not being currently written by another process
                self.do_classify(image, model, args, output_file, color_mode)

        # return results
        if os.path.exists(output_file):
            with Locks(output_file):

        return DataToken(data=output_file, mime='image/png', name='Segments', filename=filename)
예제 #2
    def __init__(self, filename, mode='w', failonexist=False, *args, **kwargs):
            Opens hdf5 files providing read/write locks for
            thread safety.

            If libHDF5 is not configured for thread safety please
            set MULTITHREAD_HDF5 to False to keep you feature
            service working in a mutlithread environment

            @param: filename - Name of the hdf5 file
            @param: mode - sets the file access mode (default: 'w')
            @param: failonexist - well not lock if file exists (default: False)
            @param: args - passes arguments to table.open_file
            @param: kwargs - passes arguments to table.open_file
        self.filename = filename
        self.mode = mode
        self.args = args
        self.kwargs = kwargs
        self.h5file = None

        #create locks
        if mode in set(['w', 'a']):  #write lock
            self.bq_lock = Locks(None,
                                 mode=mode + 'b')
        else:  #read lock
            self.bq_lock = Locks(self.filename,
                                 mode=mode + 'b')
예제 #3
    def run(cls, ifnm, ofnm, args, **kw ):
        '''converts input filename into output using exact arguments as provided in args'''
        if not cls.installed:
            return None
        failonread = kw.get('failonread') or (not block_reads)
        tmp = None
        with Locks(ifnm, ofnm, failonexist=True) as l:
            if l.locked: # the file is not being currently written by another process
                command = [cls.CONVERTERCOMMAND]
                log.debug('Run command: [%s]', misc.toascii(command))
                proceed = True
                if ofnm is not None and os.path.exists(ofnm) and os.path.getsize(ofnm)>16:
                    if kw.get('nooverwrite', False) is True:
                        proceed = False
                        log.warning ('Run: output exists before command [%s], skipping', misc.toascii(ofnm))
                        log.warning ('Run: output exists before command [%s], overwriting', misc.toascii(ofnm))
                if proceed is True:
                    command, tmp = misc.start_nounicode_win(ifnm, command)
                        retcode = call (command)
                    except Exception:
                        retcode = 100
                        log.exception('Error running command: %s', command)
                    if retcode == 99:
                        # in case of a timeout
                        log.info ('Run: timed-out for [%s]', misc.toascii(command))
                        if ofnm is not None and os.path.exists(ofnm):
                        raise ImageServiceException(412, 'Requested timeout reached')
                    if retcode!=0:
                        log.info ('Run: returned [%s] for [%s]', retcode, misc.toascii(command))
                        return None
                    if ofnm is None:
                        return str(retcode)
                    # output file does not exist for some operations, like tiles
                    # tile command does not produce a file with this filename
                    # if not os.path.exists(ofnm):
                    #     log.error ('Run: output does not exist after command [%s]', ofnm)
                    #     return None
            elif l.locked is False: # dima: never wait, respond immediately
                raise ImageServiceFuture((1,15))

        # make sure the write of the output file have finished
        if ofnm is not None and os.path.exists(ofnm):
            with Locks(ofnm, failonread=failonread) as l:
                if l.locked is False: # dima: never wait, respond immediately
                    raise ImageServiceFuture((1,15))

        # safeguard for incorrectly converted files, sometimes only the tiff header can be written
        # empty lock files are automatically removed before by lock code
        if os.path.exists(ofnm) and os.path.getsize(ofnm) < cls.MINIMUM_FILE_SIZE:
            log.error ('Run: output file is smaller than %s bytes, probably an error, removing [%s]', cls.MINIMUM_FILE_SIZE, ofnm)
            return None
        return ofnm
예제 #4
def irods_cache_save(f, path, cache, *dest):
    cache_filename = os.path.join(cache, path[1:])
    with Locks(None, cache_filename, failonexist=True) as l:
        if l.locked:
            with open(cache_filename, 'wb') as fw:
                copyfile(f, fw, *dest)

    with Locks(cache_filename):
        return cache_filename
예제 #5
    def getImageInfo(self, filename, series=0, infofile=None, meta=None):
        if infofile is None:
            infofile = '%s.info'%filename
        info = {}

        # read image info using converters
        if not os.path.exists(infofile):
            # sanity check
            if not os.path.exists(filename):
                return None
            with Locks(filename, infofile, failonexist=True) as l:
                if l.locked: # the file is not being currently written by another process
                    # parse image info from original file
                    file_speed = infofile.replace('.info', '.speed')
                    for n,c in self.converters.iteritems():
                        info = c.info(ProcessToken(ifnm=filename, series=series), speed=file_speed)
                        if info is not None and len(info)>0:
                            info['converter'] = n
                    if info is None or 'image_num_x' not in info:
                        return None

                    info.setdefault('image_num_t', 1)
                    info.setdefault('image_num_z', 1)
                    info.setdefault('image_num_p', info['image_num_t'] * info['image_num_z'])
                    info.setdefault('format', default_format)
                    if not 'filesize' in info:
                        info.setdefault('filesize', os.path.getsize(filename))
                    if meta is not None:

                    # cache file info into a file
                    image = etree.Element ('image')
                    for k,v in info.iteritems():
                        image.set(k, '%s'%v)
                    with open(infofile, 'w') as f:
                    return info
                elif l.locked is False: # dima: never wait, respond immediately
                    raise ImageServiceFuture((1,10))

        # info file exists
        with Locks(infofile, failonread=(not block_reads)) as l:
            if l.locked is False: # dima: never wait, respond immediately
                raise ImageServiceFuture((1,10))
                image = etree.parse(infofile).getroot()
                for k,v in image.attrib.iteritems():
                    info[k] = safetypeparse(v)
                return info
            except  etree.XMLSyntaxError:
                log.debug ("attempt to read empty info file")
        return None
예제 #6
    def format(self, token, args):
        """ converts table to HDF5 """

        # GobsTable = np.dtype([
        #     ('gobject',    tables.StringCol(10)),
        #     ('type',       tables.StringCol(50)),
        #     ('vertices',   tables.VLArray()),

        #     Col.from_atom(atom, pos=None)

        #     ('accuracy',   tables.Float32Col()),
        #     ('goodness',   tables.Float32Col()),
        #     ('confidence', tables.Float32Col()),
        # ])

        data = {
            'gobject': [],
            'type': [],
            'vertices': [],
            'accuracy': [],
            'goodness': [],
            'confidence': [],
            #'color': [],

        for i, r in enumerate(token.data):
            m_g, m_a, m_c = compute_measures (r)
            data['vertices'].append([ (v[1],v[0]) for v in r['vertex'] ])

        df = pd.DataFrame(data)

        workdir = args['_workdir']
        _mkdir (workdir)

        filename = '%s_%s.h5'%(token.name, args['_filename'])
        output_file = os.path.join(workdir, filename)

        with Locks(None, output_file, failonexist=True) as l:
            if l.locked: # the file is not being currently written by another process
                df.to_hdf(output_file, 'table', append=False)

        # return results
        if os.path.exists(output_file):
            with Locks(output_file):

        return token.setFile(path=output_file, mime=self.mime_type, filename=filename)
예제 #7
def s3_download(bucket, key, cache_filename, creds, blocking):
    s3_client = boto3.client ('s3', **creds)

    with Locks (None, cache_filename, failonexist=True) as l:
        if l.locked is True:
            with Timer () as t:
                s3_client.download_file (bucket, key, cache_filename)
            log.info("S3 Downloaded %s", rate_str (cache_filename, t))

    if cache_filename is not None and os.path.exists(cache_filename):
        with Locks (cache_filename, failonread = (not blocking)) as l:
            if l.locked is False:
                return None
            return cache_filename
    return None
예제 #8
    def train(self, method='finetune'):
        if self.framework is None:
            raise ConnoisseurException(responses.BAD_REQUEST, 'Model is incomplete for train operation')

        with Locks(None, self.lockable, failonexist=True) as l:
            if l.locked is False: # the file is being written by another process
                raise ConnoisseurException(responses.LOCKED, 'The model is locked for processing by another process')

            # dispatch this as a celery task

                r = self.framework.update_model_from_template()
                r = self.framework.train(method=method)
                self.update_with_error('status.train', 'Exception during train')

            set_tag(self.resource, 'status.train', 'finished')
            self.framework.activate (training=False)
            if self.framework.is_activate() is False:
                raise ConnoisseurException(responses.INTERNAL_SERVER_ERROR, 'Could not activate network after training, something is wrong...')
            return r
예제 #9
    def split_samples_training_testing(self, args=None):
        if self.framework is None:
            raise ConnoisseurException(responses.BAD_REQUEST, 'Model is incomplete for split operation')

        with Locks(None, self.lockable, failonexist=True) as l:
            if l.locked is False: # the file is being written by another process
                raise ConnoisseurException(responses.LOCKED, 'The model is locked for processing by another process')

            # dispatch this as a celery task
            self.update_status(status='Splitting data for training and testing')

            paths = None
            if args is not None and 'template_path' in args:
                paths = self.get_sample_preview_paths(args.get('template_path'), args.get('template_filename'), args.get('num_per_class'))

                r = self.framework.split_samples_training_testing(sample_preview_paths=paths)
                self.update_with_error('status.samples.split', 'Exception during split_samples_training_testing')

            set_classes(self.resource, 'classes_model', self.classes_model)
            set_tag(self.resource, 'status.samples.split', 'finished')
            return r
예제 #10
def s3_upload (bucket, key, cache_filename, creds):
    s3_client = boto3.client('s3', **creds)
    with Locks (cache_filename):
        with Timer () as t:
            s3_client.upload_file(cache_filename, bucket, key)
    size_bytes = os.path.getsize (cache_filename)
    log.info("S3 Uploaded %s", rate_str(cache_filename, t))
예제 #11
    def return_from_workdir(self, table, resource_list):
            Returns a hdf5 file from the workdir

            @param: table - workdir table object that allows access
            to the workdir table created by the feature service
            @param: resource_list - the resource lists object containing
            all the resources proccessed on during the request

            @yield: fileapp object with path set to the hdf5 file in
            the feature workdir
        # since the uncached table is already saved in the workdir the file is just
        # returned
            disposition = 'attachment; filename="%s"' % (
        except UnicodeEncodeError:
            disposition = 'attachment; filename="%s"; filename*="%s"' % (

        #waits table that is being constructed
        with Locks(table.path):

        return forward(
                    allowed_methods=('GET', 'POST'),
예제 #12
    def tile(cls, token, ofnm, level=None, x=None, y=None, sz=None, **kw):
        '''extract tile from image
        default interface:
            Level,X,Y tile from input filename into output in TIFF format
        alternative interface, not required to support and may return None in this case
        scale=scale, x1=x1, y1=y1, x2=x2, y2=y2, arbitrary_size=False '''

        # open slide driver does not support arbitrary size interface
        if kw.get('arbitrary_size',
                  False) == True or level is None or sz is None:
            return None

        ifnm = token.first_input_file()
        series = token.series
        if not cls.supported(token):
            return None
        log.debug('Tile: %s %s %s %s %s for [%s]', level, x, y, sz, series,

        level = misc.safeint(level, 0)
        x = misc.safeint(x, 0)
        y = misc.safeint(y, 0)
        sz = misc.safeint(sz, 0)
        with Locks(ifnm, ofnm, failonexist=True) as l:
            if l.locked:  # the file is not being currently written by another process
                    _, tmp = misc.start_nounicode_win(ifnm, [])
                    slide = openslide.OpenSlide(tmp or ifnm)
                    dz = deepzoom.DeepZoomGenerator(slide,
                    img = dz.get_tile(dz.level_count - level - 1, (x, y))
                    img.save(ofnm, 'TIFF', compression='LZW')
                except (openslide.OpenSlideUnsupportedFormatError,
                    return None

        # make sure the file was written
        with Locks(ofnm, failonread=(not block_reads)) as l:
            if l.locked is False:  # dima: never wait, respond immediately
                raise ImageServiceFuture((1, 15))
        return ofnm
예제 #13
 def run_read(cls, ifnm, command ):
     with Locks(ifnm, failonread=(not block_reads)) as l:
         if l.locked is False: # dima: never wait, respond immediately
             raise ImageServiceFuture((1,10))
         command, tmp = misc.start_nounicode_win(ifnm, command)
         log.debug('run_read command: [%s]', misc.toascii(command))
         out = cls.run_command( command )
     return out
예제 #14
    def thumbnail(cls, token, ofnm, width, height, **kw):
        '''converts input filename into output thumbnail'''
        ifnm = token.first_input_file()
        series = token.series
        if not cls.supported(token):
            return None
        log.debug('Thumbnail: %s %s %s for [%s]', width, height, series, ifnm)

        fmt = kw.get('fmt', 'jpeg').upper()
        with Locks(ifnm, ofnm, failonexist=True) as l:
            if l.locked:  # the file is not being currently written by another process
                    _, tmp = misc.start_nounicode_win(ifnm, [])
                    slide = openslide.OpenSlide(tmp or ifnm)
                except (openslide.OpenSlideUnsupportedFormatError,
                    return None
                img = slide.get_thumbnail((width, height))
                    img.save(ofnm, fmt)
                except IOError:
                    tmp = '%s.tif' % ofnm
                    img.save(tmp, 'TIFF')
            elif l.locked is False:  # dima: never wait, respond immediately
                raise ImageServiceFuture((1, 15))

        # make sure the file was written
        with Locks(ofnm, failonread=(not block_reads)) as l:
            if l.locked is False:  # dima: never wait, respond immediately
                raise ImageServiceFuture((1, 15))
        return ofnm
예제 #15
    def init_classes_dataset(self):
        if self.training_set is None:
            raise ConnoisseurException(responses.BAD_REQUEST, 'Cannot initialize classes due to missing training dataset')

        with Locks(None, self.lockable, failonexist=True) as l:
            if l.locked is False: # the file is being written by another process
                raise ConnoisseurException(responses.LOCKED, 'The model is locked for processing by another process')
                dataset_url = ensure_url(self.training_set)
                adapter_gobs = self.create_adapter_gobs(model=self, image=None)

                classes = {}
                gobs = data_service.query(resource_type='value', parent=dataset_url, extract='gobject[type]')
                idx = 0
                self.total_samples = 0
                for g in gobs:
                    k = g.get('type')
                    n = misc.safeint(g.text, 0)
                    if k is None: continue
                    k = adapter_gobs.get_class_name(g) # adapt the class name, might need some change since the node is not a true gobject
                    if k is None: continue
                    if k not in classes:
                        classes[k] = {
                            'label': k,
                            'id': idx,
                            'samples': n,
                        idx += 1
                        classes[k]['samples'] += n
                    self.total_samples += n
                self.classes_data = classes
                self.classes_data_by_original_id = dict((v['id'],v) for k,v in self.classes_data.iteritems())
                #log.debug('Classes data: %s', str(self.classes_data))

                self.classes_model = {}
                self.classes_model_by_id = {}
                self.classes_model_by_original_id = {}
                self.number_classes_in_model = 0
                self.update_with_error('status.classes.init', 'Exception during init_classes_dataset')

            # update model resource
            set_tag(self.resource, 'total_samples', self.total_samples)
            set_classes(self.resource, 'classes_data', self.classes_data)
            set_classes(self.resource, 'classes_model', self.classes_model)
            set_tag(self.resource, 'status.classes.init', 'finished')
예제 #16
    def run_read(cls, ifnm, command):
        with Locks(ifnm, failonread=(not block_reads)) as l:
            if l.locked is False:  # dima: never wait, respond immediately
                raise ImageServiceFuture((1, 15))
            #command, tmp = misc.start_nounicode_win(ifnm, command)
            log.debug('run_read dylib command: %s', misc.tounicode(command))
            #out = cls.run_command( command )
            retcode, out = call_imgcnvlib(command)
            if retcode == 100 or retcode == 101:  # some error in libbioimage, retry once
                log.error('Libioimage retcode %s: retry once: %s', retcode,
                retcode, out = call_imgcnvlib(command)

            #log.debug('Retcode: %s', retcode)
            #log.debug('out: %s', out)
        return out
예제 #17
def scandir(dirname, options, logger):
    """Scan a directory freeing oldest files until free target is achieved
    stats = os.statvfs(dirname)
    f_bavail = stats.f_bavail
    f_blocks = stats.f_blocks
    f_bfree = stats.f_bfree
    percent_free = percent_free_last = 100.0 - ((f_blocks-f_bfree) * 100.0 / (f_blocks-f_bfree+f_bavail))
    files_removed = 0
    logger.info("Filesystem %s before cleaning %s%% free" ,  dirname, int(percent_free))
    if percent_free < float(options.capacity):
        for filename, _, size in iter_files_by_atime(dirname, include_pattern=options.include_pattern, exclude_pattern=options.exclude_pattern):
                with Locks(None, filename, failonexist=False, mode='ab') as bq_lock:
                    if bq_lock.locked:
                        # we have exclusive lock => OK to delete
                        if options.dryrun:
                            logger.info("(simulated) delete %s (%s bytes)" ,  filename, size)
                            f_bavail += math.ceil(float(size) / float(stats.f_frsize))
                            f_bfree += math.ceil(float(size) / float(stats.f_frsize))
                            logger.debug("delete %s (%s bytes)" ,  filename, size)
                            files_removed += 1
                            if percent_free_last < percent_free-0.1:
                                # time to refresh stats
                                stats = os.statvfs(dirname)
                                f_bavail = stats.f_bavail
                                f_bfree = stats.f_bfree
                                percent_free_last = percent_free
                                f_bavail += math.ceil(float(size) / float(stats.f_frsize))
                                f_bfree += math.ceil(float(size) / float(stats.f_frsize))
                        percent_free = percent_free_last = 100.0 - ((f_blocks-f_bfree) * 100.0 / (f_blocks-f_bfree+f_bavail))
                        logger.debug("now %s%% free" ,  percent_free)
                        logger.info("lock on %s failed, skipping" , filename)
            except IOError:
                logger.info("IO error accessing %s, skipping", filename)
            if percent_free >= float(options.capacity):
    logger.info("Filesystem %s after cleaning %s%% free, removed %s files" , dirname, int(percent_free), files_removed)
예제 #18
    def create_sample_db(self):
        if self.training_set is None:
            raise ConnoisseurException(responses.BAD_REQUEST, 'Cannot create sample DB due to missing training dataset')

        with Locks(None, self.lockable, failonexist=True) as l:
            if l.locked is False: # the file is being written by another process
                raise ConnoisseurException(responses.LOCKED, 'The model is locked for processing by another process')

            dataset_url = ensure_url(self.training_set)
            dataset = data_service.get_resource(dataset_url, view='full')
            if (dataset is None or dataset.tag) != 'dataset':
                raise ConnoisseurException(responses.BAD_REQUEST, 'Provided resource is not a dataset')

            self.training_set_timestamp = dataset.get('ts')
            set_tag(self.resource, 'training_set_timestamp', self.training_set_timestamp)

            images = []
            refs = dataset.xpath('value[@type="object"]')
            for r in refs:

            self.total_images = len(images)
            set_tag(self.resource, 'total_images', self.total_images)

            self.update_status(status='Creating sample db')

            # dima: this should be parallelized
            #r = self.framework.create_sample_db(images)
            log.info('STARTING samples:init for %s images', self.total_images)
                for i,image in enumerate(images):
                    log.info('PROCESSING samples:init %s/%s for %s', i, self.total_images, image)
                self.update_with_error('status.samples.init', 'Exception during create_sample_db')
            log.info('FINSHED samples:init for %s images', self.total_images)

            set_classes(self.resource, 'classes_model', self.classes_model)
            set_tag(self.resource, 'status.samples.init', 'finished')
            return r
예제 #19
    def action(self, token, arg):
        if not token.isFile():
            raise ImageServiceException(400, 'Roi: input is not an image...')
        rois = []
        for a in arg.split(';'):
            vs = a.split(',', 4)
            x1 = int(vs[0]) if len(vs) > 0 and vs[0].isdigit() else 0
            y1 = int(vs[1]) if len(vs) > 1 and vs[1].isdigit() else 0
            x2 = int(vs[2]) if len(vs) > 2 and vs[2].isdigit() else 0
            y2 = int(vs[3]) if len(vs) > 3 and vs[3].isdigit() else 0
            rois.append((x1, y1, x2, y2))
        x1, y1, x2, y2 = rois[0]

        if x1 <= 0 and x2 <= 0 and y1 <= 0 and y2 <= 0:
            raise ImageServiceException(400, 'ROI: region is not provided')

        ifile = token.first_input_file()
        otemp = token.data
        ofile = '%s.roi_%d,%d,%d,%d' % (token.data, x1 - 1, y1 - 1, x2 - 1,
                                        y2 - 1)
        log.debug('ROI %s: %s to %s', token.resource_id, ifile, ofile)

        if len(rois) == 1:
            info = {
                'image_num_x': x2 - x1,
                'image_num_y': y2 - y1,
            command = [
                '%s,%s,%s,%s' % (x1 - 1, y1 - 1, x2 - 1, y2 - 1)
            return self.server.enqueue(token,

        # remove pre-computed ROIs
        rois = [
            (_x1, _y1, _x2, _y2) for _x1, _y1, _x2, _y2 in rois
            if not os.path.exists('%s.roi_%d,%d,%d,%d' %
                                  (otemp, _x1 - 1, _y1 - 1, _x2 - 1, _y2 - 1))

        lfile = '%s.rois' % (otemp)
        command = token.drainQueue()
        if not os.path.exists(ofile) or len(rois) > 0:
            # global ROI lock on this input since we can't lock on all individual outputs
            with Locks(ifile, lfile, failonexist=True) as l:
                if l.locked:  # the file is not being currently written by another process
                    s = ';'.join([
                        '%s,%s,%s,%s' % (x1 - 1, y1 - 1, x2 - 1, y2 - 1)
                        for x1, y1, x2, y2 in rois
                    command.extend(['-roi', s])
                         '%s.roi_{x1},{y1},{x2},{y2}' % otemp])
                    # ensure the virtual locking file is not removed
                    with open(lfile, 'wb') as f:
                        f.write('#Temporary locking file')
                elif l.locked is False:  # dima: never wait, respond immediately
                    raise ImageServiceFuture((1, 15))

        # ensure the operation is finished
        if os.path.exists(lfile):
            with Locks(lfile, failonread=(not block_reads)) as l:
                if l.locked is False:  # dima: never wait, respond immediately
                    raise ImageServiceFuture((1, 15))

        info = {
            'image_num_x': x2 - x1,
            'image_num_y': y2 - y1,
        return token.setImage(ofile,
예제 #20
def irods_cache_fetch(path, cache):
    cache_filename = os.path.join(cache, path[1:])
    if os.path.exists(cache_filename):
        with Locks(cache_filename):  # checks if currently writing
            return cache_filename
    return None
예제 #21
    def action(self, token, arg):
        '''arg = l,tnx,tny,tsz'''
        if not token.isFile():
            raise ImageServiceException(400, 'Tile: input is not an image...' )
        level=0; tnx=0; tny=0; tsz=512;
        vs = arg.split(',', 4)
        if len(vs)>0 and vs[0].isdigit(): level = int(vs[0])
        if len(vs)>1 and vs[1].isdigit(): tnx = int(vs[1])
        if len(vs)>2 and vs[2].isdigit(): tny = int(vs[2])
        if len(vs)>3 and vs[3].isdigit(): tsz = int(vs[3])
        log.debug( 'Tile: l:%d, tnx:%d, tny:%d, tsz:%d' % (level, tnx, tny, tsz) )

        # if input image is smaller than the requested tile size
        dims = token.dims or {}
        width = dims.get('image_num_x', 0)
        height = dims.get('image_num_y', 0)
        if width<=tsz and height<=tsz:
            log.debug('Image is smaller than requested tile size, passing the whole image...')
            return token

        # construct a sliced filename
        ifname    = token.first_input_file()
        base_name = '%s.tiles'%(token.data)
        _mkdir( base_name )
        ofname    = os.path.join(base_name, '%s_%.3d_%.3d_%.3d' % (tsz, level, tnx, tny))
        hist_name = os.path.join(base_name, '%s_histogram'%(tsz))

        # if input image does not contain tile pyramid, create one and pass it along
        if dims.get('image_num_resolution_levels', 0)<2 or dims.get('tile_num_x', 0)<1:
            pyramid = '%s.pyramid.tif'%(token.data)
            command = token.drainQueue()
            if not os.path.exists(pyramid):
                #command.extend(['-ohst', hist_name])
                command.extend(['-options', 'compression lzw tiles %s pyramid subdirs'%default_tile_size])
                log.debug('Generate tiled pyramid %s: from %s to %s with %s', token.resource_id, ifname, pyramid, command )
                r = self.server.imageconvert(token, ifname, pyramid, fmt=default_format, extra=command)
                if r is None:
                    raise ImageServiceException(500, 'Tile: could not generate pyramidal file' )
            # ensure the file was created
            with Locks(pyramid, failonread=(not block_tile_reads)) as l:
                if l.locked is False: # dima: never wait, respond immediately
                    fff = (width*height) / (10000*10000)
                    raise ImageServiceFuture((15*fff,30*fff))

            # compute the number of pyramidal levels
            # sz = max(width, height)
            # num_levels = math.ceil(math.log(sz, 2)) - math.ceil(math.log(min_level_size, 2)) + 1
            # scales = [1/float(pow(2,i)) for i in range(0, num_levels)]
            # info = {
            #     'image_num_resolution_levels': num_levels,
            #     'image_resolution_level_scales': ',',join([str(i) for i in scales]),
            #     'tile_num_x': default_tile_size,
            #     'tile_num_y': default_tile_size,
            #     'converter': ConverterImgcnv.name,
            # }

            # load the number of pyramidal levels from the file
            info2 = self.server.getImageInfo(filename=pyramid)
            info = {
                'image_num_resolution_levels': info2.get('image_num_resolution_levels'),
                'image_resolution_level_scales': info2.get('image_resolution_level_scales'),
                'tile_num_x': info2.get('tile_num_x'),
                'tile_num_y': info2.get('tile_num_y'),
                'converter': info2.get('converter'),
            log.debug('Updating original input to pyramidal version %s: %s -> %s', token.resource_id, ifname, pyramid )
            token.setImage(ofname, fmt=default_format, dims=info, input=pyramid)
            ifname = pyramid

        # compute output tile size
        dims = token.dims or {}
        x = tnx * tsz
        y = tny * tsz
        if x>=width or y>=height:
            raise ImageServiceException(400, 'Tile: tile position outside of the image: %s,%s'%(tnx, tny))

        # the new tile service does not change the number of z points in the image and if contains all z will perform the operation
        info = {
            'image_num_x': tsz if width-x >= tsz else width-x,
            'image_num_y': tsz if height-y >= tsz else height-y,
            #'image_num_z': 1,
            #'image_num_t': 1,

        #log.debug('Inside pyramid dims: %s', dims)
        #log.debug('Inside pyramid input: %s', token.first_input_file() )
        #log.debug('Inside pyramid data: %s', token.data )

        # extract individual tile from pyramidal tiled image
        if dims.get('image_num_resolution_levels', 0)>1 and dims.get('tile_num_x', 0)>0:
            # dima: maybe better to test converter, if imgcnv then enqueue, otherwise proceed with the converter path
            if dims.get('converter', '') == ConverterImgcnv.name:
                c = self.server.converters[ConverterImgcnv.name]
                r = c.tile(token, ofname, level, tnx, tny, tsz)
                if r is not None:
                    if not os.path.exists(hist_name):
                        # write the histogram file is missing
                        c.writeHistogram(token, ofnm=hist_name)
                # if decoder returned a list of operations for imgcnv to enqueue
                if isinstance(r, list):
                    #r.extend([ '-ihst', hist_name])
                    token.histogram = hist_name
                    return self.server.enqueue(token, 'tile', ofname, fmt=default_format, command=r, dims=info)

            # try other decoders to read tiles
            ofname = '%s.tif'%ofname
            if os.path.exists(ofname):
                return token.setImage(ofname, fmt=default_format, dims=info, hist=hist_name, input=ofname)
                r = None
                for n,c in self.server.converters.iteritems():
                    if n == ConverterImgcnv.name: continue
                    if callable( getattr(c, "tile", None) ):
                        r = c.tile(token, ofname, level, tnx, tny, tsz)
                        if r is not None:
                            if not os.path.exists(hist_name):
                                # write the histogram file if missing
                                c.writeHistogram(token, ofnm=hist_name)
                            return token.setImage(ofname, fmt=default_format, dims=info, hist=hist_name, input=ofname)

        raise ImageServiceException(500, 'Tile could not be extracted')
예제 #22
def main():
    parser = argparse.ArgumentParser(description='Clean specific files from directory trees.')
    parser.add_argument('paths', nargs='+', help='directory to clean')
    parser.add_argument('-c', '--free', dest="capacity", default='80', help="target free capacity (in percent of drive), default: 80" )
    parser.add_argument('-l','--loop', dest="loop", help="wait time between cleaning cycles (in s), default: no cycle" )
    parser.add_argument('-r','--dryrun', action="store_true", default=False, help='simulate what would happen')
    parser.add_argument('-d','--debug',  action="store_true", default=False, help='print debug log')
    parser.add_argument('-i','--include',  dest="include_pattern",  action='append', help='filename pattern to include')
    parser.add_argument('-e','--exclude',  dest="exclude_pattern", action='append', help='filename pattern to exclude')
    parser.add_argument('--log-ini', dest='logini', default=None, help='logging config ini')
    parser.add_argument('--prerun', default = None, help="Run script before processing")
    parser.add_argument('--postrun', default = None, help="Run script after processing")
    parser.add_argument('--lockdir', default = None , help="Directory for locks (deafult is dir path). Ensures 1 cleanrer ")

    options = parser.parse_args()
    args = options.paths
    dirnames = [arg.rstrip('/') for arg in args]

    if options.dryrun:
        print options

    if options.logini:
        logging.config.fileConfig (options.logini)
        logging.basicConfig(stream=sys.stdout, level = logging.INFO)

    logger = logging.getLogger ('bq.file_cleaner')

    if options.debug:

    while True:
        if options.prerun:
            status = subprocess.call (options.prerun, shell=True)
            if status != 0:
                logger.error ("Prerun %s failed with status %s", options.prerun, status)
                logger.info ("PRERUN %s: OK", options.prerun)

        for dirname in dirnames:
            skipped = False
            lockname = os.path.join (options.lockdir or dirname, 'xCLEANERx')
            with Locks (None, lockname, failonexist=True) as fl:
                if not fl.locked:
                    # Somebody
                    skipped = True
                    logger.info ("%s was locked .. skipping ", lockname)
                with open(lockname, 'wb') as fl:
                    scandir (dirname, options, logger)
                    os.remove (lockname)

        if not skipped and options.postrun:
            status = subprocess.call (options.postrun, shell=True)
            if status != 0:
                logger.error ("Postrun %s failed with status %s", options.postrun, status)
                logger.info ("POSTRUN %s: OK", options.postrun)

        if options.loop:
예제 #23
    def init_classes_model(self):
        if self.classes_data is None:
            raise ConnoisseurException(responses.BAD_REQUEST, 'Cannot initialize model classes due to missing data classes')

        with Locks(None, self.lockable, failonexist=True) as l:
            if l.locked is False: # the file is being written by another process
                raise ConnoisseurException(responses.LOCKED, 'The model is locked for processing by another process')
                classes = {}
                idx = 0

                # append background class if requested
                if self.use_background_class is True:
                    classes[self.background_class_name] = {
                        'label': self.background_class_name,
                        'id': idx,
                        'id_original': -1,
                        'samples': 0,
                        'samples_training': 0,
                        'samples_validation': 0,
                        'samples_testing': 0,
                        'ignored': True,
                    idx += 1

                # add all the classes found in the dataset
                self.total_samples = 0
                for k,c in self.classes_data.iteritems():
                    if c['samples'] >= self.minimum_samples and c.get('ignored', False) is not True:
                        classes[k] = {
                            'label': k,
                            'id': idx,
                            'id_original': c['id'],
                            #'samples': c['samples'],
                            'samples': 0,
                            'samples_training': 0,
                            'samples_validation': 0,
                            'samples_testing': 0,

                        if c['samples'] < self.minimum_samples_augmentation:
                            classes[k]['samples_actual'] = c['samples']
                            self.total_samples += c['samples'] * AUGMENTATION_SCALE
                            self.total_samples += c['samples']

                        idx += 1
                    elif self.use_background_class is True:
                        #classes[self.background_class_name]['samples'] += c['samples']
                        self.total_samples += c['samples']
                self.update_with_error('status.classes.filter', 'Exception during init_classes_model')

            if self.total_samples < 10:
                self.update_with_error('status.classes.filter', 'Too few samples were found, model cannot be trained')
                raise ConnoisseurException(responses.NO_CONTENT, 'Too few samples were found, model cannot be trained')

            self.classes_model = classes
            self.classes_model_by_id = dict((v['id'],v) for k,v in self.classes_model.iteritems())
            self.classes_model_by_original_id = dict((v['id_original'],v) for k,v in self.classes_model.iteritems())
            self.number_classes_in_model = len(self.classes_model)

            # update model resource
            set_tag(self.resource, 'total_samples', self.total_samples)
            set_classes(self.resource, 'classes_model', self.classes_model)
            set_tag(self.resource, 'status.classes.filter', 'finished')
예제 #24
    def validate(self):
        if self.framework is None:
            raise ConnoisseurException(responses.BAD_REQUEST, 'Model is incomplete for validation operation')

        with Locks(None, self.lockable, failonexist=True) as l:
            if l.locked is False: # the file is being written by another process
                raise ConnoisseurException(responses.LOCKED, 'The model is locked for processing by another process')

            #log.debug('Initial model:\n\n %s\n\n\n', self.classes_model)

            # dispatch this as a celery task

                goodnesses = [0.0, 0.5, 0.9]
                results = []

                for my_goodness in goodnesses:
                    v = self.validate_by_goodness(my_goodness)

                # initialize classes with resulting vectors
                attr = ['true_positive', 'false_negative', 'false_positive', 'discarded', 'weight', 'accuracy', 'error', 'F1', 'MCC', 'error_contributions']
                keys = range(len(self.classes_model))
                classes = dict(zip(keys, [dict() for i in [None]*len(keys)]))
                for i,v in classes.iteritems():
                    for a in attr:
                        v[a] = []
                    v['goodness'] = goodnesses
                    for r in results:
                        for a in attr:

                # update classes and resource
                for k,v in classes.iteritems():
                    class_name = self.classes_model_by_id[k]['label']
                    for a in attr:
                        self.classes_model[class_name][a] = v[a]
                    #print 'Class %s\n%s\n'%(class_name, self.classes_model[class_name])

                self.update_with_error('status.validate', 'Exception during validate')

            #log.debug('Final model:\n\n %s\n\n\n', self.classes_model)

            #     #print 'id samples good trash fp fn Accu Error F1 MCC class'
            #     print "%s\t%s\t%s\t%s\t%s\t%s\t%.0f%%\t%.1f%%\t%.2f\t%.2f\t%s"%(k, n, tp, d, fp, fn, A, E, F, MCC, cls)
            #     N += n; NW += nw; TP += tpw; FP += fpw; D += d; DW += dw
            #     if has_background_class is True and cls != background_class_name:
            #         NN += n; NNW += nw; TPP += tpw; FPP += fpw; DD += d; DDW += dw

            # print("\nClassified %sx%s samples in %.2fs, %.0fsamples/s"%( total, batch_sz, runtime, float(total*batch_sz)/runtime ))
            # print 'Final accuracy: %.2f%% and error %.2f%% on %s samples with %s discarded (%.1f%%)\n'%( (100.0*TP)/(NW-DW), (100.0*FP)/(NW+FP), N-D, D, (100.0*D)/total )
            # if has_background_class is True:
            #     print 'Accuracy excluding +1: %.2f%% error %.2f%% on %s samples (%.1f%%)\n'%( (100.0*TPP)/(NNW-DDW), (100.0*FPP)/(NNW+FPP), NN-DD, (100.0*DD)/(total-(N-NN)) )

            set_classes(self.resource, 'classes_model', self.classes_model)
            set_tag(self.resource, 'status.validate', 'finished')
예제 #25
    def process(self, url, ident, resource=None, **kw):
        resource_id, subpath, query = getOperations(url, self.base_url)
        log.debug ('STARTING %s: %s', ident, query)
        log.debug('Current path %s: %s', ident, self.workdir)

        if resource is None:
            resource = {}

        # init the output to a simple file
        token = ProcessToken()

        if ident is not None:
            # pre-compute final filename and check if it exists before starting any other processing
            if len(query)>0:
                series = subpath or 0
                workpath = self.initialWorkPath(ident, user_name=kw.get('user_name', None), series=series)
                token.setFile(workpath, series=series)
                token.dims = self.getImageInfo(filename=token.data, series=token.series, infofile='%s.info'%token.data, meta=kw.get('imagemeta', None) )
                if token.dims is None:
                    log.debug('SKIPPING dryrun processing due to empty image info')
                    token.init(resource_id=ident, ifnm=token.data, imagemeta=kw.get('imagemeta', None), timeout=kw.get('timeout', None), resource_name=resource.get('name'), dryrun=True)
                    for action, args in query:
                            service = self.operations.plugins.get(action)
                            #if service is None:
                            #    continue
                            # if the service has a dryrun function, some actions are same as dryrun
                            if callable( getattr(service, "dryrun", None) ):
                                #log.debug ('DRY run: %s calling dryrun', action)
                                token = service.dryrun(token, args)
                                #log.debug ('DRY run: %s calling action', action)
                                token = service.action(token, args)
                            log.debug ('DRY run: %s producing: %s', action, token.data)
                        except Exception:
                            log.exception('Exception during dryrun')
                        if token.isHttpError():
                    localpath = os.path.join(os.path.realpath(self.workdir), token.data)
                    log.debug('Dryrun test %s: [%s] [%s]', ident, localpath, str(token))
                    if token.isFile() and os.path.exists(localpath):
                        log.debug('FINISHED %s: returning pre-cached result %s', ident, token.data)
                        with Locks(token.data, failonread=(not block_reads)) as l:
                            if l.locked is False: # dima: never wait, respond immediately
                                raise ImageServiceFuture((1,10))
                        return token

            log.debug('STARTING full processing %s: with %s', ident, token)

            # dima - randomly raise exceptions for requested resources for testing of the UI
            # this will imitate overloading the server with processing requests
            # breaker = random.choice([False, True])
            # if breaker:
            #     raise ImageServiceFuture((1,15))

            # ----------------------------------------------
            # start the processing
            b = self.ensureOriginalFile(ident, resource=resource)
            #log.debug('Original %s, %s, %s', b.path, b.sub, b.files)
            series = (b.sub or subpath or 0)
            workpath = self.ensureWorkPath(b.path, ident, user_name=kw.get('user_name', None), series=series)
            token.setFile(workpath, series=series)
            token.init(resource_id=ident, ifnm=b.path, imagemeta=kw.get('imagemeta', None), files=b.files, timeout=kw.get('timeout', None), resource_name=resource.get('name'), initial_workpath=workpath, dryrun=None)

            if not os.path.exists(b.path):
                raise ImageServiceException(responses.NOT_FOUND, 'File not found...')

            if len(query)>0:
                token.dims = self.getImageInfo(filename=token.first_input_file(), series=token.series, infofile='%s.info'%token.data, meta=token.meta)
                if token.dims is None or 'image_num_x' not in token.dims:
                    raise ImageServiceException(415, 'File format is not supported...')
                # overwrite fields from resource image meta
                if token.meta is not None:

        #process all the requested operations
        for action,args in query:
            log.debug ('ACTION %s: %s', ident, action)
            token = self.request(action, token, args)
            if token.isHttpError():
        token = self.process_queue(token)

        # test output, if it is a file but it does not exist, set 404 error

        # if the output is a file but not an image or no processing was done to it
        # set to the original file name
        if token.isFile() and not token.isImage() and not token.isText() and not token.hasFileName():
            token.contentType = 'application/octet-stream'
            token.outFileName = token.resource_name

        # if supplied file name overrides filename
        for action,args in query:
            if (action.lower() == 'filename'):
                token.outFileName = args

        log.debug ('FINISHED %s: %s', ident, query)
        return token
예제 #26
    def meta(cls, token, **kw):
        ifnm = token.first_input_file()
        if not cls.supported(token):
            return {}
        log.debug('Meta for: %s', ifnm)
        with Locks(ifnm, failonread=(not block_reads)) as l:
            if l.locked is False:  # dima: never wait, respond immediately
                raise ImageServiceFuture((1, 10))
                _, tmp = misc.start_nounicode_win(ifnm, [])
                slide = openslide.OpenSlide(tmp or ifnm)
            except (openslide.OpenSlideUnsupportedFormatError,
                return {}
            rd = {
                ','.join([str(1.0 / i) for i in slide.level_downsamples]),
                'unsigned integer',
                # new format

            if slide.properties.get(openslide.PROPERTY_NAME_MPP_X,
                                    None) is not None:
                    slide.properties.get(openslide.PROPERTY_NAME_MPP_X, 0),
                    slide.properties.get(openslide.PROPERTY_NAME_MPP_Y, 0),

            # custom - any other tags in proprietary files should go further prefixed by the custom parent
            for k, v in slide.properties.iteritems():
                rd['custom/%s' % k.replace('.', '/')] = v

            # read metadata using imgcnv since openslide does not decode all of the info
            meta = ConverterImgcnv.meta(
                ProcessToken(ifnm=tmp or ifnm, series=token.series), **kw)
            rd = meta

        return rd
예제 #27
    def info(cls, token, **kw):
        '''returns a dict with file info'''
        ifnm = token.first_input_file()
        series = token.series
        if not cls.supported(token):
            return {}
        log.debug('Info for: %s', ifnm)
        with Locks(ifnm, failonread=(not block_reads)) as l:
            if l.locked is False:  # dima: never wait, respond immediately
                raise ImageServiceFuture((1, 10))
            if not os.path.exists(ifnm):
                return {}
                _, tmp = misc.start_nounicode_win(ifnm, [])
                slide = openslide.OpenSlide(tmp or ifnm)
            except (openslide.OpenSlideUnsupportedFormatError,
                return {}

            info2 = {
                ','.join([str(1.0 / i) for i in slide.level_downsamples]),
                'unsigned integer',

            if slide.properties.get(openslide.PROPERTY_NAME_MPP_X,
                                    None) is not None:
                    slide.properties.get(openslide.PROPERTY_NAME_MPP_X, 0),
                    slide.properties.get(openslide.PROPERTY_NAME_MPP_Y, 0),

            # read metadata using imgcnv since openslide does not decode all of the info
            info = ConverterImgcnv.info(
                ProcessToken(ifnm=tmp or ifnm, series=series), **kw)
            return info
        return {}
예제 #28
class TablesLock(object):
        Provides locks for hdf5 files
    def __init__(self, filename, mode='w', failonexist=False, *args, **kwargs):
            Opens hdf5 files providing read/write locks for
            thread safety.

            If libHDF5 is not configured for thread safety please
            set MULTITHREAD_HDF5 to False to keep you feature
            service working in a mutlithread environment

            @param: filename - Name of the hdf5 file
            @param: mode - sets the file access mode (default: 'w')
            @param: failonexist - well not lock if file exists (default: False)
            @param: args - passes arguments to table.open_file
            @param: kwargs - passes arguments to table.open_file
        self.filename = filename
        self.mode = mode
        self.args = args
        self.kwargs = kwargs
        self.h5file = None

        #create locks
        if mode in set(['w', 'a']):  #write lock
            self.bq_lock = Locks(None,
                                 mode=mode + 'b')
        else:  #read lock
            self.bq_lock = Locks(self.filename,
                                 mode=mode + 'b')

    def debug(self, msg):
        """Log detailed info about the locking of threads and files"""
        log.debug("(LOCKING: %s) %s" %
                  (threading.currentThread().getName(), msg))

    def acquire(self):
            Acquires the locks for the hdf5 file.

            If MULTITHREAD_HDF5 is set, the hdf5 file will be
            locked in write mode and pytables will be locked on
            file hdf5_lock.

            @return: a pytables file handle. If locks fail nothing will be returned.
            If the file cannot be open a FeatureServiceError exception will be
        if self.h5file is None:

            if MULTITHREAD_HDF5 is False:
                self.debug('Setting HDF5 global lock!')

            self.bq_lock.acquire(self.bq_lock.ifnm, self.bq_lock.ofnm)
            if not self.bq_lock.locked:  #no lock was given on the hdf5 file
                self.debug('Failed to lock hdf5 file!')
                return None

            self.debug('Succesfully acquired tables locks!')
            return self.h5file

            self.debug('%s is already locked' % self.h5file.filename)
            return self.h5file

    def _open_table(self):
            Opens an hdf5 file under locks.
            if os.name == 'nt' and self.mode == 'w':
                self.h5file = tables.open_file(
                    self.filename, 'w', *self.args, **self.kwargs
                )  #windows does not make a file initally when locked
            elif self.mode == 'w':
                self.h5file = tables.open_file(self.filename, 'r+', *self.args,
            else:  #if append is passed make it +r to protect the locks'
                self.h5file = tables.open_file(self.filename,
                                               self.mode.replace('a', 'r+'),
                                               *self.args, **self.kwargs)
        except tables.exceptions.HDF5ExtError:
            log.exception('Fatal Error: hdf5 file was corrupted! -> %s' %
            raise FeatureServiceError(
                error_message='Fatal Error: hdf5 file was corrupted! -> %s' %

    def release(self):
            Releases all locks and closes and deletes hdf5
            file handle
        #release file
        if self.h5file:
            del self.h5file
            self.h5file = None

        if self.bq_lock.locked:

        #release pytables
        if MULTITHREAD_HDF5 is False:
            self.debug('Releasing HDF5 global lock!')
        self.debug('Successfully release tables locks!')

    def __enter__(self):
        return self.acquire()

    def __exit__(self, type, value, traceback):