def classify(self, image, model, args): ''' segment image produicing a semantic mask ''' num_points = int(args.get('points', 10)) border = int(args.get('border', 0)) my_goodness = float(args.get('goodness', model.minimum_goodness*100))/100.0 my_accuracy = float(args.get('accuracy', model.minimum_accuracy*100)) my_confidence = float(args.get('confidence', 0)) # color output mode color_mode = args.get('colors', 'ids') if color_mode not in self.color_modes: raise ConnoisseurException(responses.BAD_REQUEST, 'Requested color mode "%s" is not supported'%color_mode) # compute output file name and test cached result workdir = args['_workdir'] _mkdir (workdir) filename = '%s_%s_conf%.2f_a%s_c%s_n%s_b%s.png'%(image.uniq, color_mode, my_goodness, my_accuracy, my_confidence, num_points, border) output_file = os.path.join(workdir, filename) with Locks(None, output_file, failonexist=True) as l: if l.locked: # the file is not being currently written by another process self.do_classify(image, model, args, output_file, color_mode) # return results if os.path.exists(output_file): with Locks(output_file): pass return DataToken(data=output_file, mime='image/png', name='Segments', filename=filename)
def __init__(self, filename, mode='w', failonexist=False, *args, **kwargs): """ Opens hdf5 files providing read/write locks for thread safety. If libHDF5 is not configured for thread safety please set MULTITHREAD_HDF5 to False to keep you feature service working in a mutlithread environment @param: filename - Name of the hdf5 file @param: mode - sets the file access mode (default: 'w') @param: failonexist - well not lock if file exists (default: False) @param: args - passes arguments to table.open_file @param: kwargs - passes arguments to table.open_file """ self.filename = filename self.mode = mode self.args = args self.kwargs = kwargs self.h5file = None #create locks if mode in set(['w', 'a']): #write lock self.bq_lock = Locks(None, self.filename, failonexist=failonexist, mode=mode + 'b') else: #read lock self.bq_lock = Locks(self.filename, None, failonexist=failonexist, mode=mode + 'b')
def run(cls, ifnm, ofnm, args, **kw ): '''converts input filename into output using exact arguments as provided in args''' if not cls.installed: return None failonread = kw.get('failonread') or (not block_reads) tmp = None with Locks(ifnm, ofnm, failonexist=True) as l: if l.locked: # the file is not being currently written by another process command = [cls.CONVERTERCOMMAND] command.extend(args) log.debug('Run command: [%s]', misc.toascii(command)) proceed = True if ofnm is not None and os.path.exists(ofnm) and os.path.getsize(ofnm)>16: if kw.get('nooverwrite', False) is True: proceed = False log.warning ('Run: output exists before command [%s], skipping', misc.toascii(ofnm)) else: log.warning ('Run: output exists before command [%s], overwriting', misc.toascii(ofnm)) if proceed is True: command, tmp = misc.start_nounicode_win(ifnm, command) try: retcode = call (command) except Exception: retcode = 100 log.exception('Error running command: %s', command) misc.end_nounicode_win(tmp) if retcode == 99: # in case of a timeout log.info ('Run: timed-out for [%s]', misc.toascii(command)) if ofnm is not None and os.path.exists(ofnm): os.remove(ofnm) raise ImageServiceException(412, 'Requested timeout reached') if retcode!=0: log.info ('Run: returned [%s] for [%s]', retcode, misc.toascii(command)) return None if ofnm is None: return str(retcode) # output file does not exist for some operations, like tiles # tile command does not produce a file with this filename # if not os.path.exists(ofnm): # log.error ('Run: output does not exist after command [%s]', ofnm) # return None elif l.locked is False: # dima: never wait, respond immediately raise ImageServiceFuture((1,15)) # make sure the write of the output file have finished if ofnm is not None and os.path.exists(ofnm): with Locks(ofnm, failonread=failonread) as l: if l.locked is False: # dima: never wait, respond immediately raise ImageServiceFuture((1,15)) # safeguard for incorrectly converted files, sometimes only the tiff header can be written # empty lock files are automatically removed before by lock code if os.path.exists(ofnm) and os.path.getsize(ofnm) < cls.MINIMUM_FILE_SIZE: log.error ('Run: output file is smaller than %s bytes, probably an error, removing [%s]', cls.MINIMUM_FILE_SIZE, ofnm) os.remove(ofnm) return None return ofnm
def irods_cache_save(f, path, cache, *dest): cache_filename = os.path.join(cache, path[1:]) _mkdir(os.path.dirname(cache_filename)) with Locks(None, cache_filename, failonexist=True) as l: if l.locked: with open(cache_filename, 'wb') as fw: copyfile(f, fw, *dest) with Locks(cache_filename): return cache_filename
def getImageInfo(self, filename, series=0, infofile=None, meta=None): if infofile is None: infofile = '%s.info'%filename info = {} # read image info using converters if not os.path.exists(infofile): # sanity check if not os.path.exists(filename): return None with Locks(filename, infofile, failonexist=True) as l: if l.locked: # the file is not being currently written by another process # parse image info from original file file_speed = infofile.replace('.info', '.speed') for n,c in self.converters.iteritems(): info = c.info(ProcessToken(ifnm=filename, series=series), speed=file_speed) if info is not None and len(info)>0: info['converter'] = n break if info is None or 'image_num_x' not in info: return None info.setdefault('image_num_t', 1) info.setdefault('image_num_z', 1) info.setdefault('image_num_p', info['image_num_t'] * info['image_num_z']) info.setdefault('format', default_format) if not 'filesize' in info: info.setdefault('filesize', os.path.getsize(filename)) if meta is not None: info.update(meta) # cache file info into a file image = etree.Element ('image') for k,v in info.iteritems(): image.set(k, '%s'%v) with open(infofile, 'w') as f: f.write(etree.tostring(image)) return info elif l.locked is False: # dima: never wait, respond immediately raise ImageServiceFuture((1,10)) # info file exists with Locks(infofile, failonread=(not block_reads)) as l: if l.locked is False: # dima: never wait, respond immediately raise ImageServiceFuture((1,10)) try: image = etree.parse(infofile).getroot() for k,v in image.attrib.iteritems(): info[k] = safetypeparse(v) return info except etree.XMLSyntaxError: log.debug ("attempt to read empty info file") return None
def format(self, token, args): """ converts table to HDF5 """ # GobsTable = np.dtype([ # ('gobject', tables.StringCol(10)), # ('type', tables.StringCol(50)), # ('vertices', tables.VLArray()), # Col.from_atom(atom, pos=None) # ('accuracy', tables.Float32Col()), # ('goodness', tables.Float32Col()), # ('confidence', tables.Float32Col()), # ]) data = { 'gobject': [], 'type': [], 'vertices': [], 'accuracy': [], 'goodness': [], 'confidence': [], #'color': [], } for i, r in enumerate(token.data): m_g, m_a, m_c = compute_measures (r) data['gobject'].append(r['gob']) data['type'].append(r['label']) data['vertices'].append([ (v[1],v[0]) for v in r['vertex'] ]) data['accuracy'].append(m_a) data['goodness'].append(m_g) data['confidence'].append(m_c) #data['color'].append(get_color_html(r['id'])) df = pd.DataFrame(data) workdir = args['_workdir'] _mkdir (workdir) filename = '%s_%s.h5'%(token.name, args['_filename']) output_file = os.path.join(workdir, filename) with Locks(None, output_file, failonexist=True) as l: if l.locked: # the file is not being currently written by another process df.to_hdf(output_file, 'table', append=False) # return results if os.path.exists(output_file): with Locks(output_file): pass return token.setFile(path=output_file, mime=self.mime_type, filename=filename)
def s3_download(bucket, key, cache_filename, creds, blocking): s3_client = boto3.client ('s3', **creds) with Locks (None, cache_filename, failonexist=True) as l: if l.locked is True: with Timer () as t: s3_client.download_file (bucket, key, cache_filename) log.info("S3 Downloaded %s", rate_str (cache_filename, t)) if cache_filename is not None and os.path.exists(cache_filename): with Locks (cache_filename, failonread = (not blocking)) as l: if l.locked is False: return None return cache_filename return None
def train(self, method='finetune'): if self.framework is None: raise ConnoisseurException(responses.BAD_REQUEST, 'Model is incomplete for train operation') with Locks(None, self.lockable, failonexist=True) as l: if l.locked is False: # the file is being written by another process raise ConnoisseurException(responses.LOCKED, 'The model is locked for processing by another process') # dispatch this as a celery task self.update_status(status='Training') try: self.framework.deactivate() r = self.framework.update_model_from_template() r = self.framework.train(method=method) except: self.update_with_error('status.train', 'Exception during train') raise set_tag(self.resource, 'status.train', 'finished') self.sync_resource(status='finished') self.framework.activate (training=False) if self.framework.is_activate() is False: raise ConnoisseurException(responses.INTERNAL_SERVER_ERROR, 'Could not activate network after training, something is wrong...') return r
def split_samples_training_testing(self, args=None): if self.framework is None: raise ConnoisseurException(responses.BAD_REQUEST, 'Model is incomplete for split operation') with Locks(None, self.lockable, failonexist=True) as l: if l.locked is False: # the file is being written by another process raise ConnoisseurException(responses.LOCKED, 'The model is locked for processing by another process') # dispatch this as a celery task self.update_status(status='Splitting data for training and testing') paths = None if args is not None and 'template_path' in args: paths = self.get_sample_preview_paths(args.get('template_path'), args.get('template_filename'), args.get('num_per_class')) try: r = self.framework.split_samples_training_testing(sample_preview_paths=paths) except: self.update_with_error('status.samples.split', 'Exception during split_samples_training_testing') raise set_classes(self.resource, 'classes_model', self.classes_model) set_tag(self.resource, 'status.samples.split', 'finished') self.sync_resource(status='finished') return r
def s3_upload (bucket, key, cache_filename, creds): s3_client = boto3.client('s3', **creds) with Locks (cache_filename): with Timer () as t: s3_client.upload_file(cache_filename, bucket, key) size_bytes = os.path.getsize (cache_filename) log.info("S3 Uploaded %s", rate_str(cache_filename, t))
def return_from_workdir(self, table, resource_list): """ Returns a hdf5 file from the workdir @param: table - workdir table object that allows access to the workdir table created by the feature service @param: resource_list - the resource lists object containing all the resources proccessed on during the request @yield: fileapp object with path set to the hdf5 file in the feature workdir """ # since the uncached table is already saved in the workdir the file is just # returned try: disposition = 'attachment; filename="%s"' % ( table.filename).encode('ascii') except UnicodeEncodeError: disposition = 'attachment; filename="%s"; filename*="%s"' % ( (table.filename).encode('utf8'), (table.filename).encode('utf8')) #waits table that is being constructed with Locks(table.path): pass return forward( FileApp(table.path, allowed_methods=('GET', 'POST'), content_type=self.content_type, content_disposition=disposition))
def tile(cls, token, ofnm, level=None, x=None, y=None, sz=None, **kw): '''extract tile from image default interface: Level,X,Y tile from input filename into output in TIFF format alternative interface, not required to support and may return None in this case scale=scale, x1=x1, y1=y1, x2=x2, y2=y2, arbitrary_size=False ''' # open slide driver does not support arbitrary size interface if kw.get('arbitrary_size', False) == True or level is None or sz is None: return None ifnm = token.first_input_file() series = token.series if not cls.supported(token): return None log.debug('Tile: %s %s %s %s %s for [%s]', level, x, y, sz, series, ifnm) level = misc.safeint(level, 0) x = misc.safeint(x, 0) y = misc.safeint(y, 0) sz = misc.safeint(sz, 0) with Locks(ifnm, ofnm, failonexist=True) as l: if l.locked: # the file is not being currently written by another process try: _, tmp = misc.start_nounicode_win(ifnm, []) slide = openslide.OpenSlide(tmp or ifnm) dz = deepzoom.DeepZoomGenerator(slide, tile_size=sz, overlap=0) img = dz.get_tile(dz.level_count - level - 1, (x, y)) img.save(ofnm, 'TIFF', compression='LZW') slide.close() misc.end_nounicode_win(tmp) except (openslide.OpenSlideUnsupportedFormatError, openslide.OpenSlideError): misc.end_nounicode_win(tmp) return None # make sure the file was written with Locks(ofnm, failonread=(not block_reads)) as l: if l.locked is False: # dima: never wait, respond immediately raise ImageServiceFuture((1, 15)) return ofnm
def run_read(cls, ifnm, command ): with Locks(ifnm, failonread=(not block_reads)) as l: if l.locked is False: # dima: never wait, respond immediately raise ImageServiceFuture((1,10)) command, tmp = misc.start_nounicode_win(ifnm, command) log.debug('run_read command: [%s]', misc.toascii(command)) out = cls.run_command( command ) misc.end_nounicode_win(tmp) return out
def thumbnail(cls, token, ofnm, width, height, **kw): '''converts input filename into output thumbnail''' ifnm = token.first_input_file() series = token.series if not cls.supported(token): return None log.debug('Thumbnail: %s %s %s for [%s]', width, height, series, ifnm) fmt = kw.get('fmt', 'jpeg').upper() with Locks(ifnm, ofnm, failonexist=True) as l: if l.locked: # the file is not being currently written by another process try: _, tmp = misc.start_nounicode_win(ifnm, []) slide = openslide.OpenSlide(tmp or ifnm) except (openslide.OpenSlideUnsupportedFormatError, openslide.OpenSlideError): misc.end_nounicode_win(tmp) return None img = slide.get_thumbnail((width, height)) try: img.save(ofnm, fmt) except IOError: tmp = '%s.tif' % ofnm img.save(tmp, 'TIFF') ConverterImgcnv.thumbnail(ProcessToken(ifnm=tmp), ofnm=ofnm, width=width, height=height, **kw) slide.close() misc.end_nounicode_win(tmp) elif l.locked is False: # dima: never wait, respond immediately raise ImageServiceFuture((1, 15)) # make sure the file was written with Locks(ofnm, failonread=(not block_reads)) as l: if l.locked is False: # dima: never wait, respond immediately raise ImageServiceFuture((1, 15)) return ofnm
def init_classes_dataset(self): if self.training_set is None: raise ConnoisseurException(responses.BAD_REQUEST, 'Cannot initialize classes due to missing training dataset') with Locks(None, self.lockable, failonexist=True) as l: if l.locked is False: # the file is being written by another process raise ConnoisseurException(responses.LOCKED, 'The model is locked for processing by another process') try: dataset_url = ensure_url(self.training_set) adapter_gobs = self.create_adapter_gobs(model=self, image=None) classes = {} gobs = data_service.query(resource_type='value', parent=dataset_url, extract='gobject[type]') idx = 0 self.total_samples = 0 for g in gobs: k = g.get('type') n = misc.safeint(g.text, 0) if k is None: continue k = adapter_gobs.get_class_name(g) # adapt the class name, might need some change since the node is not a true gobject if k is None: continue if k not in classes: classes[k] = { 'label': k, 'id': idx, 'samples': n, } idx += 1 else: classes[k]['samples'] += n self.total_samples += n self.classes_data = classes self.classes_data_by_original_id = dict((v['id'],v) for k,v in self.classes_data.iteritems()) #log.debug('Classes data: %s', str(self.classes_data)) self.classes_model = {} self.classes_model_by_id = {} self.classes_model_by_original_id = {} self.number_classes_in_model = 0 except: self.update_with_error('status.classes.init', 'Exception during init_classes_dataset') raise # update model resource set_tag(self.resource, 'total_samples', self.total_samples) set_classes(self.resource, 'classes_data', self.classes_data) set_classes(self.resource, 'classes_model', self.classes_model) set_tag(self.resource, 'status.classes.init', 'finished') self.sync_resource()
def run_read(cls, ifnm, command): with Locks(ifnm, failonread=(not block_reads)) as l: if l.locked is False: # dima: never wait, respond immediately raise ImageServiceFuture((1, 15)) #command, tmp = misc.start_nounicode_win(ifnm, command) log.debug('run_read dylib command: %s', misc.tounicode(command)) #out = cls.run_command( command ) #misc.end_nounicode_win(tmp) retcode, out = call_imgcnvlib(command) if retcode == 100 or retcode == 101: # some error in libbioimage, retry once log.error('Libioimage retcode %s: retry once: %s', retcode, command) retcode, out = call_imgcnvlib(command) #log.debug('Retcode: %s', retcode) #log.debug('out: %s', out) return out
def scandir(dirname, options, logger): """Scan a directory freeing oldest files until free target is achieved """ stats = os.statvfs(dirname) f_bavail = stats.f_bavail f_blocks = stats.f_blocks f_bfree = stats.f_bfree percent_free = percent_free_last = 100.0 - ((f_blocks-f_bfree) * 100.0 / (f_blocks-f_bfree+f_bavail)) files_removed = 0 logger.info("Filesystem %s before cleaning %s%% free" , dirname, int(percent_free)) if percent_free < float(options.capacity): for filename, _, size in iter_files_by_atime(dirname, include_pattern=options.include_pattern, exclude_pattern=options.exclude_pattern): try: with Locks(None, filename, failonexist=False, mode='ab') as bq_lock: if bq_lock.locked: # we have exclusive lock => OK to delete if options.dryrun: logger.info("(simulated) delete %s (%s bytes)" , filename, size) f_bavail += math.ceil(float(size) / float(stats.f_frsize)) f_bfree += math.ceil(float(size) / float(stats.f_frsize)) else: logger.debug("delete %s (%s bytes)" , filename, size) os.remove(filename) files_removed += 1 if percent_free_last < percent_free-0.1: # time to refresh stats stats = os.statvfs(dirname) f_bavail = stats.f_bavail f_bfree = stats.f_bfree percent_free_last = percent_free else: f_bavail += math.ceil(float(size) / float(stats.f_frsize)) f_bfree += math.ceil(float(size) / float(stats.f_frsize)) percent_free = percent_free_last = 100.0 - ((f_blocks-f_bfree) * 100.0 / (f_blocks-f_bfree+f_bavail)) logger.debug("now %s%% free" , percent_free) else: logger.info("lock on %s failed, skipping" , filename) except IOError: logger.info("IO error accessing %s, skipping", filename) if percent_free >= float(options.capacity): break logger.info("Filesystem %s after cleaning %s%% free, removed %s files" , dirname, int(percent_free), files_removed)
def create_sample_db(self): if self.training_set is None: raise ConnoisseurException(responses.BAD_REQUEST, 'Cannot create sample DB due to missing training dataset') with Locks(None, self.lockable, failonexist=True) as l: if l.locked is False: # the file is being written by another process raise ConnoisseurException(responses.LOCKED, 'The model is locked for processing by another process') dataset_url = ensure_url(self.training_set) dataset = data_service.get_resource(dataset_url, view='full') if (dataset is None or dataset.tag) != 'dataset': raise ConnoisseurException(responses.BAD_REQUEST, 'Provided resource is not a dataset') self.training_set_timestamp = dataset.get('ts') set_tag(self.resource, 'training_set_timestamp', self.training_set_timestamp) images = [] refs = dataset.xpath('value[@type="object"]') for r in refs: images.append(XImage(base_url=r.text)) self.total_images = len(images) set_tag(self.resource, 'total_images', self.total_images) self.update_status(status='Creating sample db') # dima: this should be parallelized #r = self.framework.create_sample_db(images) log.info('STARTING samples:init for %s images', self.total_images) try: for i,image in enumerate(images): log.info('PROCESSING samples:init %s/%s for %s', i, self.total_images, image) self.framework.update_sample_db(image) except: self.update_with_error('status.samples.init', 'Exception during create_sample_db') raise log.info('FINSHED samples:init for %s images', self.total_images) set_classes(self.resource, 'classes_model', self.classes_model) set_tag(self.resource, 'status.samples.init', 'finished') self.sync_resource(status='finished') return r
def action(self, token, arg): if not token.isFile(): raise ImageServiceException(400, 'Roi: input is not an image...') rois = [] for a in arg.split(';'): vs = a.split(',', 4) x1 = int(vs[0]) if len(vs) > 0 and vs[0].isdigit() else 0 y1 = int(vs[1]) if len(vs) > 1 and vs[1].isdigit() else 0 x2 = int(vs[2]) if len(vs) > 2 and vs[2].isdigit() else 0 y2 = int(vs[3]) if len(vs) > 3 and vs[3].isdigit() else 0 rois.append((x1, y1, x2, y2)) x1, y1, x2, y2 = rois[0] if x1 <= 0 and x2 <= 0 and y1 <= 0 and y2 <= 0: raise ImageServiceException(400, 'ROI: region is not provided') ifile = token.first_input_file() otemp = token.data ofile = '%s.roi_%d,%d,%d,%d' % (token.data, x1 - 1, y1 - 1, x2 - 1, y2 - 1) log.debug('ROI %s: %s to %s', token.resource_id, ifile, ofile) if len(rois) == 1: info = { 'image_num_x': x2 - x1, 'image_num_y': y2 - y1, } command = [ '-roi', '%s,%s,%s,%s' % (x1 - 1, y1 - 1, x2 - 1, y2 - 1) ] return self.server.enqueue(token, 'roi', ofile, fmt=default_format, command=command, dims=info) # remove pre-computed ROIs rois = [ (_x1, _y1, _x2, _y2) for _x1, _y1, _x2, _y2 in rois if not os.path.exists('%s.roi_%d,%d,%d,%d' % (otemp, _x1 - 1, _y1 - 1, _x2 - 1, _y2 - 1)) ] lfile = '%s.rois' % (otemp) command = token.drainQueue() if not os.path.exists(ofile) or len(rois) > 0: # global ROI lock on this input since we can't lock on all individual outputs with Locks(ifile, lfile, failonexist=True) as l: if l.locked: # the file is not being currently written by another process s = ';'.join([ '%s,%s,%s,%s' % (x1 - 1, y1 - 1, x2 - 1, y2 - 1) for x1, y1, x2, y2 in rois ]) command.extend(['-roi', s]) command.extend( ['-template', '%s.roi_{x1},{y1},{x2},{y2}' % otemp]) self.server.imageconvert(token, ifile, ofile, fmt=default_format, extra=command) # ensure the virtual locking file is not removed with open(lfile, 'wb') as f: f.write('#Temporary locking file') elif l.locked is False: # dima: never wait, respond immediately raise ImageServiceFuture((1, 15)) # ensure the operation is finished if os.path.exists(lfile): with Locks(lfile, failonread=(not block_reads)) as l: if l.locked is False: # dima: never wait, respond immediately raise ImageServiceFuture((1, 15)) info = { 'image_num_x': x2 - x1, 'image_num_y': y2 - y1, } return token.setImage(ofile, fmt=default_format, dims=info, input=ofile)
def irods_cache_fetch(path, cache): cache_filename = os.path.join(cache, path[1:]) if os.path.exists(cache_filename): with Locks(cache_filename): # checks if currently writing return cache_filename return None
def action(self, token, arg): '''arg = l,tnx,tny,tsz''' if not token.isFile(): raise ImageServiceException(400, 'Tile: input is not an image...' ) level=0; tnx=0; tny=0; tsz=512; vs = arg.split(',', 4) if len(vs)>0 and vs[0].isdigit(): level = int(vs[0]) if len(vs)>1 and vs[1].isdigit(): tnx = int(vs[1]) if len(vs)>2 and vs[2].isdigit(): tny = int(vs[2]) if len(vs)>3 and vs[3].isdigit(): tsz = int(vs[3]) log.debug( 'Tile: l:%d, tnx:%d, tny:%d, tsz:%d' % (level, tnx, tny, tsz) ) # if input image is smaller than the requested tile size dims = token.dims or {} width = dims.get('image_num_x', 0) height = dims.get('image_num_y', 0) if width<=tsz and height<=tsz: log.debug('Image is smaller than requested tile size, passing the whole image...') return token # construct a sliced filename ifname = token.first_input_file() base_name = '%s.tiles'%(token.data) _mkdir( base_name ) ofname = os.path.join(base_name, '%s_%.3d_%.3d_%.3d' % (tsz, level, tnx, tny)) hist_name = os.path.join(base_name, '%s_histogram'%(tsz)) # if input image does not contain tile pyramid, create one and pass it along if dims.get('image_num_resolution_levels', 0)<2 or dims.get('tile_num_x', 0)<1: pyramid = '%s.pyramid.tif'%(token.data) command = token.drainQueue() if not os.path.exists(pyramid): #command.extend(['-ohst', hist_name]) command.extend(['-options', 'compression lzw tiles %s pyramid subdirs'%default_tile_size]) log.debug('Generate tiled pyramid %s: from %s to %s with %s', token.resource_id, ifname, pyramid, command ) r = self.server.imageconvert(token, ifname, pyramid, fmt=default_format, extra=command) if r is None: raise ImageServiceException(500, 'Tile: could not generate pyramidal file' ) # ensure the file was created with Locks(pyramid, failonread=(not block_tile_reads)) as l: if l.locked is False: # dima: never wait, respond immediately fff = (width*height) / (10000*10000) raise ImageServiceFuture((15*fff,30*fff)) # compute the number of pyramidal levels # sz = max(width, height) # num_levels = math.ceil(math.log(sz, 2)) - math.ceil(math.log(min_level_size, 2)) + 1 # scales = [1/float(pow(2,i)) for i in range(0, num_levels)] # info = { # 'image_num_resolution_levels': num_levels, # 'image_resolution_level_scales': ',',join([str(i) for i in scales]), # 'tile_num_x': default_tile_size, # 'tile_num_y': default_tile_size, # 'converter': ConverterImgcnv.name, # } # load the number of pyramidal levels from the file info2 = self.server.getImageInfo(filename=pyramid) info = { 'image_num_resolution_levels': info2.get('image_num_resolution_levels'), 'image_resolution_level_scales': info2.get('image_resolution_level_scales'), 'tile_num_x': info2.get('tile_num_x'), 'tile_num_y': info2.get('tile_num_y'), 'converter': info2.get('converter'), } log.debug('Updating original input to pyramidal version %s: %s -> %s', token.resource_id, ifname, pyramid ) token.setImage(ofname, fmt=default_format, dims=info, input=pyramid) ifname = pyramid # compute output tile size dims = token.dims or {} x = tnx * tsz y = tny * tsz if x>=width or y>=height: raise ImageServiceException(400, 'Tile: tile position outside of the image: %s,%s'%(tnx, tny)) # the new tile service does not change the number of z points in the image and if contains all z will perform the operation info = { 'image_num_x': tsz if width-x >= tsz else width-x, 'image_num_y': tsz if height-y >= tsz else height-y, #'image_num_z': 1, #'image_num_t': 1, } #log.debug('Inside pyramid dims: %s', dims) #log.debug('Inside pyramid input: %s', token.first_input_file() ) #log.debug('Inside pyramid data: %s', token.data ) # extract individual tile from pyramidal tiled image if dims.get('image_num_resolution_levels', 0)>1 and dims.get('tile_num_x', 0)>0: # dima: maybe better to test converter, if imgcnv then enqueue, otherwise proceed with the converter path if dims.get('converter', '') == ConverterImgcnv.name: c = self.server.converters[ConverterImgcnv.name] r = c.tile(token, ofname, level, tnx, tny, tsz) if r is not None: if not os.path.exists(hist_name): # write the histogram file is missing c.writeHistogram(token, ofnm=hist_name) # if decoder returned a list of operations for imgcnv to enqueue if isinstance(r, list): #r.extend([ '-ihst', hist_name]) token.histogram = hist_name return self.server.enqueue(token, 'tile', ofname, fmt=default_format, command=r, dims=info) # try other decoders to read tiles ofname = '%s.tif'%ofname if os.path.exists(ofname): return token.setImage(ofname, fmt=default_format, dims=info, hist=hist_name, input=ofname) else: r = None for n,c in self.server.converters.iteritems(): if n == ConverterImgcnv.name: continue if callable( getattr(c, "tile", None) ): r = c.tile(token, ofname, level, tnx, tny, tsz) if r is not None: if not os.path.exists(hist_name): # write the histogram file if missing c.writeHistogram(token, ofnm=hist_name) return token.setImage(ofname, fmt=default_format, dims=info, hist=hist_name, input=ofname) raise ImageServiceException(500, 'Tile could not be extracted')
def main(): parser = argparse.ArgumentParser(description='Clean specific files from directory trees.') parser.add_argument('paths', nargs='+', help='directory to clean') parser.add_argument('-c', '--free', dest="capacity", default='80', help="target free capacity (in percent of drive), default: 80" ) parser.add_argument('-l','--loop', dest="loop", help="wait time between cleaning cycles (in s), default: no cycle" ) parser.add_argument('-r','--dryrun', action="store_true", default=False, help='simulate what would happen') parser.add_argument('-d','--debug', action="store_true", default=False, help='print debug log') parser.add_argument('-i','--include', dest="include_pattern", action='append', help='filename pattern to include') parser.add_argument('-e','--exclude', dest="exclude_pattern", action='append', help='filename pattern to exclude') parser.add_argument('--log-ini', dest='logini', default=None, help='logging config ini') parser.add_argument('--prerun', default = None, help="Run script before processing") parser.add_argument('--postrun', default = None, help="Run script after processing") parser.add_argument('--lockdir', default = None , help="Directory for locks (deafult is dir path). Ensures 1 cleanrer ") options = parser.parse_args() args = options.paths dirnames = [arg.rstrip('/') for arg in args] if options.dryrun: print options if options.logini: logging.config.fileConfig (options.logini) else: logging.basicConfig(stream=sys.stdout, level = logging.INFO) logger = logging.getLogger ('bq.file_cleaner') if options.debug: logger.setLevel(logging.DEBUG) while True: if options.prerun: status = subprocess.call (options.prerun, shell=True) if status != 0: logger.error ("Prerun %s failed with status %s", options.prerun, status) else: logger.info ("PRERUN %s: OK", options.prerun) for dirname in dirnames: skipped = False lockname = os.path.join (options.lockdir or dirname, 'xCLEANERx') with Locks (None, lockname, failonexist=True) as fl: if not fl.locked: # Somebody skipped = True logger.info ("%s was locked .. skipping ", lockname) break with open(lockname, 'wb') as fl: scandir (dirname, options, logger) os.remove (lockname) if not skipped and options.postrun: status = subprocess.call (options.postrun, shell=True) if status != 0: logger.error ("Postrun %s failed with status %s", options.postrun, status) else: logger.info ("POSTRUN %s: OK", options.postrun) if options.loop: time.sleep(float(options.loop)) else: break
def init_classes_model(self): if self.classes_data is None: raise ConnoisseurException(responses.BAD_REQUEST, 'Cannot initialize model classes due to missing data classes') with Locks(None, self.lockable, failonexist=True) as l: if l.locked is False: # the file is being written by another process raise ConnoisseurException(responses.LOCKED, 'The model is locked for processing by another process') try: classes = {} idx = 0 # append background class if requested if self.use_background_class is True: classes[self.background_class_name] = { 'label': self.background_class_name, 'id': idx, 'id_original': -1, 'samples': 0, 'samples_training': 0, 'samples_validation': 0, 'samples_testing': 0, 'ignored': True, } idx += 1 # add all the classes found in the dataset self.total_samples = 0 for k,c in self.classes_data.iteritems(): if c['samples'] >= self.minimum_samples and c.get('ignored', False) is not True: classes[k] = { 'label': k, 'id': idx, 'id_original': c['id'], #'samples': c['samples'], 'samples': 0, 'samples_training': 0, 'samples_validation': 0, 'samples_testing': 0, } if c['samples'] < self.minimum_samples_augmentation: classes[k]['samples_actual'] = c['samples'] self.total_samples += c['samples'] * AUGMENTATION_SCALE else: self.total_samples += c['samples'] idx += 1 elif self.use_background_class is True: #classes[self.background_class_name]['samples'] += c['samples'] self.total_samples += c['samples'] except: self.update_with_error('status.classes.filter', 'Exception during init_classes_model') raise if self.total_samples < 10: self.update_with_error('status.classes.filter', 'Too few samples were found, model cannot be trained') raise ConnoisseurException(responses.NO_CONTENT, 'Too few samples were found, model cannot be trained') self.classes_model = classes self.classes_model_by_id = dict((v['id'],v) for k,v in self.classes_model.iteritems()) self.classes_model_by_original_id = dict((v['id_original'],v) for k,v in self.classes_model.iteritems()) self.number_classes_in_model = len(self.classes_model) # update model resource set_tag(self.resource, 'total_samples', self.total_samples) set_classes(self.resource, 'classes_model', self.classes_model) set_tag(self.resource, 'status.classes.filter', 'finished') self.sync_resource()
def validate(self): if self.framework is None: raise ConnoisseurException(responses.BAD_REQUEST, 'Model is incomplete for validation operation') with Locks(None, self.lockable, failonexist=True) as l: if l.locked is False: # the file is being written by another process raise ConnoisseurException(responses.LOCKED, 'The model is locked for processing by another process') #log.debug('Initial model:\n\n %s\n\n\n', self.classes_model) # dispatch this as a celery task self.update_status(status='Validating') try: goodnesses = [0.0, 0.5, 0.9] results = [] for my_goodness in goodnesses: v = self.validate_by_goodness(my_goodness) results.append(v) # initialize classes with resulting vectors attr = ['true_positive', 'false_negative', 'false_positive', 'discarded', 'weight', 'accuracy', 'error', 'F1', 'MCC', 'error_contributions'] keys = range(len(self.classes_model)) classes = dict(zip(keys, [dict() for i in [None]*len(keys)])) for i,v in classes.iteritems(): for a in attr: v[a] = [] v['goodness'] = goodnesses for r in results: for a in attr: v[a].append(r[i][a]) # update classes and resource attr.append('goodness') for k,v in classes.iteritems(): class_name = self.classes_model_by_id[k]['label'] for a in attr: self.classes_model[class_name][a] = v[a] #print 'Class %s\n%s\n'%(class_name, self.classes_model[class_name]) except: self.update_with_error('status.validate', 'Exception during validate') raise #log.debug('Final model:\n\n %s\n\n\n', self.classes_model) # #print 'id samples good trash fp fn Accu Error F1 MCC class' # print "%s\t%s\t%s\t%s\t%s\t%s\t%.0f%%\t%.1f%%\t%.2f\t%.2f\t%s"%(k, n, tp, d, fp, fn, A, E, F, MCC, cls) # N += n; NW += nw; TP += tpw; FP += fpw; D += d; DW += dw # if has_background_class is True and cls != background_class_name: # NN += n; NNW += nw; TPP += tpw; FPP += fpw; DD += d; DDW += dw # print("\nClassified %sx%s samples in %.2fs, %.0fsamples/s"%( total, batch_sz, runtime, float(total*batch_sz)/runtime )) # print 'Final accuracy: %.2f%% and error %.2f%% on %s samples with %s discarded (%.1f%%)\n'%( (100.0*TP)/(NW-DW), (100.0*FP)/(NW+FP), N-D, D, (100.0*D)/total ) # if has_background_class is True: # print 'Accuracy excluding +1: %.2f%% error %.2f%% on %s samples (%.1f%%)\n'%( (100.0*TPP)/(NNW-DDW), (100.0*FPP)/(NNW+FPP), NN-DD, (100.0*DD)/(total-(N-NN)) ) set_classes(self.resource, 'classes_model', self.classes_model) set_tag(self.resource, 'status.validate', 'finished') self.sync_resource(status='finished')
def process(self, url, ident, resource=None, **kw): resource_id, subpath, query = getOperations(url, self.base_url) log.debug ('STARTING %s: %s', ident, query) #os.chdir(self.workdir) log.debug('Current path %s: %s', ident, self.workdir) if resource is None: resource = {} # init the output to a simple file token = ProcessToken() if ident is not None: # pre-compute final filename and check if it exists before starting any other processing if len(query)>0: series = subpath or 0 workpath = self.initialWorkPath(ident, user_name=kw.get('user_name', None), series=series) token.setFile(workpath, series=series) token.dims = self.getImageInfo(filename=token.data, series=token.series, infofile='%s.info'%token.data, meta=kw.get('imagemeta', None) ) if token.dims is None: log.debug('SKIPPING dryrun processing due to empty image info') else: token.init(resource_id=ident, ifnm=token.data, imagemeta=kw.get('imagemeta', None), timeout=kw.get('timeout', None), resource_name=resource.get('name'), dryrun=True) for action, args in query: try: service = self.operations.plugins.get(action) #if service is None: # continue # if the service has a dryrun function, some actions are same as dryrun if callable( getattr(service, "dryrun", None) ): #log.debug ('DRY run: %s calling dryrun', action) token = service.dryrun(token, args) else: #log.debug ('DRY run: %s calling action', action) token = service.action(token, args) log.debug ('DRY run: %s producing: %s', action, token.data) except Exception: log.exception('Exception during dryrun') pass if token.isHttpError(): break localpath = os.path.join(os.path.realpath(self.workdir), token.data) log.debug('Dryrun test %s: [%s] [%s]', ident, localpath, str(token)) if token.isFile() and os.path.exists(localpath): log.debug('FINISHED %s: returning pre-cached result %s', ident, token.data) with Locks(token.data, failonread=(not block_reads)) as l: if l.locked is False: # dima: never wait, respond immediately raise ImageServiceFuture((1,10)) return token log.debug('STARTING full processing %s: with %s', ident, token) # dima - randomly raise exceptions for requested resources for testing of the UI # this will imitate overloading the server with processing requests # breaker = random.choice([False, True]) # if breaker: # raise ImageServiceFuture((1,15)) # ---------------------------------------------- # start the processing b = self.ensureOriginalFile(ident, resource=resource) #log.debug('Original %s, %s, %s', b.path, b.sub, b.files) series = (b.sub or subpath or 0) workpath = self.ensureWorkPath(b.path, ident, user_name=kw.get('user_name', None), series=series) token.setFile(workpath, series=series) token.init(resource_id=ident, ifnm=b.path, imagemeta=kw.get('imagemeta', None), files=b.files, timeout=kw.get('timeout', None), resource_name=resource.get('name'), initial_workpath=workpath, dryrun=None) if not os.path.exists(b.path): raise ImageServiceException(responses.NOT_FOUND, 'File not found...') if len(query)>0: token.dims = self.getImageInfo(filename=token.first_input_file(), series=token.series, infofile='%s.info'%token.data, meta=token.meta) if token.dims is None or 'image_num_x' not in token.dims: raise ImageServiceException(415, 'File format is not supported...') # overwrite fields from resource image meta if token.meta is not None: token.dims.update(token.meta) #process all the requested operations for action,args in query: log.debug ('ACTION %s: %s', ident, action) token = self.request(action, token, args) if token.isHttpError(): break token = self.process_queue(token) # test output, if it is a file but it does not exist, set 404 error token.testFile() # if the output is a file but not an image or no processing was done to it # set to the original file name if token.isFile() and not token.isImage() and not token.isText() and not token.hasFileName(): token.contentType = 'application/octet-stream' token.outFileName = token.resource_name # if supplied file name overrides filename for action,args in query: if (action.lower() == 'filename'): token.outFileName = args break log.debug ('FINISHED %s: %s', ident, query) return token
def meta(cls, token, **kw): ifnm = token.first_input_file() if not cls.supported(token): return {} log.debug('Meta for: %s', ifnm) with Locks(ifnm, failonread=(not block_reads)) as l: if l.locked is False: # dima: never wait, respond immediately raise ImageServiceFuture((1, 10)) try: _, tmp = misc.start_nounicode_win(ifnm, []) slide = openslide.OpenSlide(tmp or ifnm) except (openslide.OpenSlideUnsupportedFormatError, openslide.OpenSlideError): misc.end_nounicode_win(tmp) return {} rd = { 'format': slide.properties.get(openslide.PROPERTY_NAME_VENDOR), 'image_num_series': 0, 'image_num_x': slide.dimensions[0], 'image_num_y': slide.dimensions[1], 'image_num_z': 1, 'image_num_t': 1, 'image_num_c': 3, 'image_num_resolution_levels': slide.level_count, 'image_resolution_level_scales': ','.join([str(1.0 / i) for i in slide.level_downsamples]), 'image_pixel_format': 'unsigned integer', 'image_pixel_depth': 8, 'magnification': slide.properties.get(openslide.PROPERTY_NAME_OBJECTIVE_POWER), 'channel_0_name': 'red', 'channel_1_name': 'green', 'channel_2_name': 'blue', 'channel_color_0': '255,0,0', 'channel_color_1': '0,255,0', 'channel_color_2': '0,0,255', # new format 'channels/channel_00000/name': 'red', 'channels/channel_00000/color': '255,0,0', 'channels/channel_00001/name': 'green', 'channels/channel_00001/color': '0,255,0', 'channels/channel_00002/name': 'blue', 'channels/channel_00002/color': '0,0,255', } if slide.properties.get(openslide.PROPERTY_NAME_MPP_X, None) is not None: rd.update({ 'pixel_resolution_x': slide.properties.get(openslide.PROPERTY_NAME_MPP_X, 0), 'pixel_resolution_y': slide.properties.get(openslide.PROPERTY_NAME_MPP_Y, 0), 'pixel_resolution_unit_x': 'microns', 'pixel_resolution_unit_y': 'microns' }) # custom - any other tags in proprietary files should go further prefixed by the custom parent for k, v in slide.properties.iteritems(): rd['custom/%s' % k.replace('.', '/')] = v slide.close() # read metadata using imgcnv since openslide does not decode all of the info meta = ConverterImgcnv.meta( ProcessToken(ifnm=tmp or ifnm, series=token.series), **kw) meta.update(rd) rd = meta misc.end_nounicode_win(tmp) return rd
def info(cls, token, **kw): '''returns a dict with file info''' ifnm = token.first_input_file() series = token.series if not cls.supported(token): return {} log.debug('Info for: %s', ifnm) with Locks(ifnm, failonread=(not block_reads)) as l: if l.locked is False: # dima: never wait, respond immediately raise ImageServiceFuture((1, 10)) if not os.path.exists(ifnm): return {} try: _, tmp = misc.start_nounicode_win(ifnm, []) slide = openslide.OpenSlide(tmp or ifnm) except (openslide.OpenSlideUnsupportedFormatError, openslide.OpenSlideError): misc.end_nounicode_win(tmp) return {} info2 = { 'format': slide.properties[openslide.PROPERTY_NAME_VENDOR], 'image_num_series': 0, 'image_series_index': 0, 'image_num_x': slide.dimensions[0], 'image_num_y': slide.dimensions[1], 'image_num_z': 1, 'image_num_t': 1, 'image_num_c': 3, 'image_num_resolution_levels': slide.level_count, 'image_resolution_level_scales': ','.join([str(1.0 / i) for i in slide.level_downsamples]), 'image_pixel_format': 'unsigned integer', 'image_pixel_depth': 8 } if slide.properties.get(openslide.PROPERTY_NAME_MPP_X, None) is not None: info2.update({ 'pixel_resolution_x': slide.properties.get(openslide.PROPERTY_NAME_MPP_X, 0), 'pixel_resolution_y': slide.properties.get(openslide.PROPERTY_NAME_MPP_Y, 0), 'pixel_resolution_unit_x': 'microns', 'pixel_resolution_unit_y': 'microns' }) slide.close() # read metadata using imgcnv since openslide does not decode all of the info info = ConverterImgcnv.info( ProcessToken(ifnm=tmp or ifnm, series=series), **kw) misc.end_nounicode_win(tmp) info.update(info2) return info return {}
class TablesLock(object): """ Provides locks for hdf5 files """ def __init__(self, filename, mode='w', failonexist=False, *args, **kwargs): """ Opens hdf5 files providing read/write locks for thread safety. If libHDF5 is not configured for thread safety please set MULTITHREAD_HDF5 to False to keep you feature service working in a mutlithread environment @param: filename - Name of the hdf5 file @param: mode - sets the file access mode (default: 'w') @param: failonexist - well not lock if file exists (default: False) @param: args - passes arguments to table.open_file @param: kwargs - passes arguments to table.open_file """ self.filename = filename self.mode = mode self.args = args self.kwargs = kwargs self.h5file = None #create locks if mode in set(['w', 'a']): #write lock self.bq_lock = Locks(None, self.filename, failonexist=failonexist, mode=mode + 'b') else: #read lock self.bq_lock = Locks(self.filename, None, failonexist=failonexist, mode=mode + 'b') def debug(self, msg): """Log detailed info about the locking of threads and files""" log.debug("(LOCKING: %s) %s" % (threading.currentThread().getName(), msg)) def acquire(self): """ Acquires the locks for the hdf5 file. If MULTITHREAD_HDF5 is set, the hdf5 file will be locked in write mode and pytables will be locked on file hdf5_lock. @return: a pytables file handle. If locks fail nothing will be returned. If the file cannot be open a FeatureServiceError exception will be raised. """ if self.h5file is None: if MULTITHREAD_HDF5 is False: self.debug('Setting HDF5 global lock!') HDF5_Global_Lock.acquire(True) self.bq_lock.acquire(self.bq_lock.ifnm, self.bq_lock.ofnm) if not self.bq_lock.locked: #no lock was given on the hdf5 file self.debug('Failed to lock hdf5 file!') return None self.debug('Succesfully acquired tables locks!') self._open_table() return self.h5file else: self.debug('%s is already locked' % self.h5file.filename) return self.h5file def _open_table(self): """ Opens an hdf5 file under locks. """ try: if os.name == 'nt' and self.mode == 'w': self.h5file = tables.open_file( self.filename, 'w', *self.args, **self.kwargs ) #windows does not make a file initally when locked elif self.mode == 'w': self.h5file = tables.open_file(self.filename, 'r+', *self.args, **self.kwargs) else: #if append is passed make it +r to protect the locks' self.h5file = tables.open_file(self.filename, self.mode.replace('a', 'r+'), *self.args, **self.kwargs) except tables.exceptions.HDF5ExtError: log.exception('Fatal Error: hdf5 file was corrupted! -> %s' % self.filename) raise FeatureServiceError( error_message='Fatal Error: hdf5 file was corrupted! -> %s' % self.filename) def release(self): """ Releases all locks and closes and deletes hdf5 file handle """ #release file if self.h5file: self.h5file.close() del self.h5file self.h5file = None if self.bq_lock.locked: self.bq_lock.release() #release pytables if MULTITHREAD_HDF5 is False: HDF5_Global_Lock.release() self.debug('Releasing HDF5 global lock!') self.debug('Successfully release tables locks!') def __enter__(self): return self.acquire() def __exit__(self, type, value, traceback): self.release()