def get_all_samples(dataset_id, dataset_info=None): # Get all samples, annotated with dataset name if dataset_info is None: dataset_info = db.get_dataset_by_id(dataset_id) enqueued = db.get_unprocessed_samples(dataset_id=dataset_id) finished = db.get_processed_samples(dataset_id=dataset_id) errored = db.get_error_samples(dataset_id=dataset_id) all_samples = enqueued + finished + errored for s in all_samples: s['dataset_name'] = dataset_info.get('name') return all_samples
def delete_dataset(dataset_id_str): dataset_id = ObjectId(dataset_id_str) dataset_info = db.get_dataset_by_id(dataset_id) if dataset_info is None: return render_template("404.html") if db.is_readonly_dataset(dataset_info): set_error('Dataset is protected.') return redirect('/dataset/' + str(dataset_info['_id'])) db.delete_dataset(dataset_id) set_notice('Dataset "%s" deleted.' % dataset_info['name']) return redirect('/')
def dataset_set_threshold(dataset_id_str, new_threshold_str): dataset_id = ObjectId(dataset_id_str) dataset_info = db.get_dataset_by_id(dataset_id) new_threshold = float(new_threshold_str) if dataset_info.get('threshold_prob') == new_threshold: set_notice('Threshold not updated: Values are identical.') else: db.set_dataset_threshold_prob(dataset_id, new_threshold) count = db.remove_machine_annotations_for_dataset(dataset_id) set_notice('Threshold updated. %d annotations removed.' % count) return redirect('/dataset/' + dataset_id_str)
def dataset_export(dataset_id_str): dataset_id = ObjectId(dataset_id_str) dataset_info = db.get_dataset_by_id(dataset_id) results = export_generator(get_all_samples(dataset_id, dataset_info)) dataset_export_name, _ext = os.path.splitext( secure_filename(dataset_info['name'])) dataset_export_name += '.csv' return Response(results, mimetype="text/plain", headers={ "Content-Disposition": "attachment;filename=%s" % dataset_export_name })
def dataset_export_correlation(dataset_id_str): dataset_id = ObjectId(dataset_id_str) dataset_info = db.get_dataset_by_id(dataset_id) finished = db.get_processed_samples(dataset_id=dataset_id) valid = [ s for s in finished if s.get('human_position_count') is not None and s.get('machine_position_count') is not None ] hu = np.array([s['human_position_count'] for s in valid]) ma = np.array([s['machine_position_count'] for s in valid]) sns.jointplot(y=hu, x=ma, kind="reg") ax = plt.gca() ax.set_ylabel('Human stomata count') ax.set_xlabel('Automatic stomata count') return Response(get_plot_as_png(), mimetype="image/png")
def process_image_sample(net, model_id, sample_id, is_primary_model): sample = db.get_sample_by_id(sample_id) dataset_info = db.get_dataset_by_id(sample['dataset_id']) image_zoom_values = default_image_zoom_values.get(dataset_info.get('image_zoom')) image_filename = sample['filename'] set_status('Processing %s...' % image_filename, secondary=not is_primary_model) image_filename_full = os.path.join(config.server_image_path, image_filename) if not os.path.isfile(image_filename_full): db.set_sample_error(sample['_id'], 'File does not exist: "%s".' % image_filename_full) return basename, ext = os.path.splitext(image_filename) if not ext.lower() in config.image_extensions: db.set_sample_error(sample['_id'], 'Unknown file extension "%s".' % ext) return try: # Lots of saving and loading here. TODO: Should be optimized to be done all in memory. # Determine output file paths heatmap_filename = os.path.join(net.name, basename + '_heatmap.npz') heatmap_filename_full = os.path.join(config.server_heatmap_path, heatmap_filename) if not os.path.isdir(os.path.dirname(heatmap_filename_full)): os.makedirs(os.path.dirname(heatmap_filename_full)) heatmap_image_filename = os.path.join(net.name, basename + '_heatmap.jpg') heatmap_image_filename_full = os.path.join(config.server_heatmap_path, heatmap_image_filename) # Process image data = process_image_file(net, image_filename_full, heatmap_filename_full, scales=image_zoom_values) plot_heatmap(image_filename_full, heatmap_filename_full, heatmap_image_filename_full) if 'imq_entropy' not in sample: imq = get_image_measures(image_filename_full) db.set_image_measures(sample['_id'], imq) positions = [] # Computed later machine_annotation = db.add_machine_annotation(sample['_id'], model_id, heatmap_filename, heatmap_image_filename, positions, margin=int(net.margin / data['scale']), is_primary_model=is_primary_model, scale=data['scale']) # Count stomata heatmap_image = plt.imread(heatmap_image_filename_full) positions = compute_stomata_positions(machine_annotation, heatmap_image, plot=False) db.update_machine_annotation_positions(sample['_id'], machine_annotation['_id'], positions, is_primary_model=is_primary_model) plt.imsave(heatmap_image_filename_full, heatmap_image) print 'Finished record.' except: error_string = traceback.format_exc() db.set_sample_error(sample['_id'], "Processing error:\n" +str(error_string))
def do_claim_dataset(dataset_id_str, ignore_errors): dataset_id = ObjectId(dataset_id_str) if current_user is None: if ignore_errors: return set_error('Not logged in.') return redirect('/dataset/' + dataset_id_str) dataset = db.get_dataset_by_id(dataset_id) if dataset is None: if ignore_errors: return set_error('Dataset not found.') return redirect('/user_datasets') if dataset.get('user') is not None: if ignore_errors: return set_error('Dataset already owned by %s.' % dataset['user'].get('email')) return redirect('/dataset/' + dataset_id_str) db.set_dataset_user(dataset_id, current_user.id) return redirect('/user_datasets')
def dataset_info(dataset_id_str): print 'request.method', request.method if dataset_id_str == 'new' and request.method == 'POST': dataset_id = None dataset_info = None new_dataset_zoom = request.form['size'] else: dataset_id = ObjectId(dataset_id_str) dataset_info = db.get_dataset_by_id(dataset_id) new_dataset_zoom = None if dataset_info is None: return render_template("404.html") if request.method == 'POST': # File upload if dataset_info is not None: if db.is_readonly_dataset(dataset_info): set_error('Dataset is protected.') return redirect('/dataset/' + dataset_id_str) return upload_file(dataset_id, image_zoom=new_dataset_zoom) enqueued = db.get_unprocessed_samples(dataset_id=dataset_id) finished = db.get_processed_samples(dataset_id=dataset_id) for i, sample in enumerate(finished): sample['machine_distance'] = 1.0 / max( [0.001, sqrt(float(sample['machine_position_count']))]) sample['index'] = i errored = db.get_error_samples(dataset_id=dataset_id) # Get request data return render_template("dataset.html", dataset_name=dataset_info['name'], dataset_id=dataset_id_str, enqueued=enqueued, finished=finished, errored=errored, status=db.get_status('worker'), readonly=db.is_readonly_dataset(dataset_info), error=pop_last_error(), dataset_user=dataset_info.get('user'), image_zoom=dataset_info.get('image_zoom', 'default'))
def dataset_info(dataset_id_str): print 'request.method', request.method new_dataset_threshold_prob = None new_allow_reuse = False if dataset_id_str == 'new': print 'Creating new dataset' if request.method != 'POST': return redirect('/') dataset_id = None dataset_info = None new_dataset_zoom = request.form['size'] print 'Threshold prob:' print request.form['threshold'] try: v = float(request.form['threshold']) new_dataset_threshold_prob = min(max(v, 0.5), 1.0) print 'Specified thresh prob:', new_dataset_threshold_prob except ValueError: print 'Invalid threshold. Ignored.' try: new_allow_reuse = bool(request.form.get('reuseCheck')) print 'Specified allow reuse:', request.form.get( 'reuseCheck'), new_allow_reuse except ValueError: print 'Invalid reuse setting. Ignored.' else: dataset_id = ObjectId(dataset_id_str) db.access_dataset(dataset_id) dataset_info = db.get_dataset_by_id(dataset_id) new_dataset_zoom = None if dataset_info is None: return render_template("404.html") if request.method == 'POST': # File upload if dataset_info is not None: if db.is_readonly_dataset(dataset_info): set_error('Dataset is protected.') return redirect('/dataset/' + dataset_id_str) return upload_file(dataset_id, image_zoom=new_dataset_zoom, threshold_prob=new_dataset_threshold_prob, allow_reuse=new_allow_reuse) enqueued = db.get_unprocessed_samples(dataset_id=dataset_id) finished = db.get_processed_samples(dataset_id=dataset_id) for i, sample in enumerate(finished): sample['machine_distance'] = 1.0 / max( [0.001, sqrt(float(sample['machine_position_count']))]) sample['index'] = i errored = db.get_error_samples(dataset_id=dataset_id) threshold_prob = round(dataset_info.get('threshold_prob') or fc8_to_prob(default_prob_threshold), ndigits=3) # Get request data return render_template("dataset.html", dataset_name=dataset_info['name'], dataset_id=dataset_id_str, enqueued=enqueued, finished=finished, errored=errored, status=db.get_status('worker'), readonly=db.is_readonly_dataset(dataset_info), error=pop_last_error(), dataset_user=dataset_info.get('user'), image_zoom=dataset_info.get('image_zoom', 'default'), threshold_prob=threshold_prob)