image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) cv2.imwrite(filename, image) print "Downloaded ............. ", i,"/",nb_annotations download_time = time.time() - start #Classification des crops print "Building attributes from ", os.path.dirname(os.path.dirname(folder_name)) start = time.time() X, y = build_from_dir(os.path.dirname(os.path.dirname(folder_name))) extraction_time = time.time() - start #print annotation_mapping #downloaded = conn.get_job_data_file(model_job_data, model_filepath) #if not downloaded : # print "Could not fetch model" # sys.exit() fp = open(model_filepath, "r") start = time.time() classes = pickle.load(fp) pyxit = pickle.load(fp)
def main(argv): # Define command line options p = optparse.OptionParser(description='Pyxit/Cytomine Segmentation Model Builder', prog='PyXit Segmentation Model Builder (PYthon piXiT)') p.add_option("--cytomine_host", type="string", default = '', dest="cytomine_host", help="The Cytomine host (eg: beta.cytomine.be, localhost:8080)") p.add_option('--cytomine_public_key', type="string", default = '', dest="cytomine_public_key", help="Cytomine public key") p.add_option('--cytomine_private_key',type="string", default = '', dest="cytomine_private_key", help="Cytomine private key") p.add_option('--cytomine_base_path', type="string", default = '/api/', dest="cytomine_base_path", help="Cytomine base path") p.add_option('--cytomine_id_software', type="int", dest="cytomine_id_software", help="The Cytomine software identifier") p.add_option('--cytomine_working_path', default="/tmp/", type="string", dest="cytomine_working_path", help="The working directory (eg: /tmp)") p.add_option('--cytomine_id_project', type="int", dest="cytomine_id_project", help="The Cytomine project identifier") p.add_option('-z', '--cytomine_zoom_level', type='int', dest='cytomine_zoom_level', help="working zoom level") p.add_option('--cytomine_annotation_projects', type="string", dest="cytomine_annotation_projects", help="Projects from which annotations are extracted") p.add_option('--cytomine_predict_terms', type='string', default='0', dest='cytomine_predict_terms', help="term ids of predicted terms (=positive class in binary mode)") p.add_option('--cytomine_excluded_terms', type='string', default='0', dest='cytomine_excluded_terms', help="term ids of excluded terms") p.add_option('--cytomine_reviewed', type='string', default="False", dest="cytomine_reviewed", help="Get reviewed annotations only") p.add_option('--pyxit_target_width', type='int', dest='pyxit_target_width', help="pyxit subwindows width") p.add_option('--pyxit_target_height', type='int', dest='pyxit_target_height', help="pyxit subwindows height") p.add_option('--pyxit_save_to', type='string', dest='pyxit_save_to', help="pyxit model directory") #future: get it from server db p.add_option('--pyxit_colorspace', type='int', dest='pyxit_colorspace', help="pyxit colorspace encoding") #future: get it from server db p.add_option('--pyxit_n_jobs', type='int', dest='pyxit_n_jobs', help="pyxit number of jobs for trees") #future: get it from server db p.add_option('--pyxit_n_subwindows', default=10, type="int", dest="pyxit_n_subwindows", help="number of subwindows") p.add_option('--pyxit_interpolation', default=2, type="int", dest="pyxit_interpolation", help="interpolation method 1,2,3,4") p.add_option('--pyxit_transpose', type="string", default="False", dest="pyxit_transpose", help="transpose subwindows") p.add_option('--pyxit_fixed_size', type="string", default="False", dest="pyxit_fixed_size", help="extract fixed size subwindows") p.add_option('--forest_n_estimators', default=10, type="int", dest="forest_n_estimators", help="number of base estimators (T)") p.add_option('--forest_max_features' , default=1, type="int", dest="forest_max_features", help="max features at test node (k)") p.add_option('--forest_min_samples_split', default=1, type="int", dest="forest_min_samples_split", help="minimum node sample size (nmin)") p.add_option('--verbose', type="string", default="0", dest="verbose", help="Turn on (1) or off (0) verbose mode") options, arguments = p.parse_args( args = argv) parameters['cytomine_host'] = options.cytomine_host parameters['cytomine_public_key'] = options.cytomine_public_key parameters['cytomine_private_key'] = options.cytomine_private_key parameters['cytomine_base_path'] = options.cytomine_base_path parameters['cytomine_working_path'] = options.cytomine_working_path parameters['cytomine_base_path'] = options.cytomine_base_path parameters['cytomine_id_project'] = options.cytomine_id_project parameters['cytomine_id_software'] = options.cytomine_id_software parameters['cytomine_annotation_projects'] = map(int,options.cytomine_annotation_projects.split(',')) parameters['cytomine_predict_terms'] = map(int,options.cytomine_predict_terms.split(',')) parameters['cytomine_excluded_terms'] = map(int,options.cytomine_excluded_terms.split(',')) parameters['cytomine_zoom_level'] = options.cytomine_zoom_level parameters['cytomine_reviewed'] = str2bool(options.cytomine_reviewed) pyxit_parameters['pyxit_target_width'] = options.pyxit_target_width pyxit_parameters['pyxit_target_height'] = options.pyxit_target_height pyxit_parameters['pyxit_n_subwindows'] = options.pyxit_n_subwindows pyxit_parameters['pyxit_colorspace'] = options.pyxit_colorspace pyxit_parameters['pyxit_interpolation'] = options.pyxit_interpolation pyxit_parameters['pyxit_transpose'] = str2bool(options.pyxit_transpose) pyxit_parameters['pyxit_fixed_size'] = str2bool(options.pyxit_fixed_size) pyxit_parameters['forest_n_estimators'] = options.forest_n_estimators pyxit_parameters['forest_max_features'] = options.forest_max_features pyxit_parameters['forest_min_samples_split'] = options.forest_min_samples_split pyxit_parameters['pyxit_save_to'] = options.pyxit_save_to pyxit_parameters['pyxit_n_jobs'] = options.pyxit_n_jobs # Check for errors in the options if options.verbose: print "[pyxit.main] Options = ", options # Create JOB/USER/JOB conn = cytomine.Cytomine(parameters["cytomine_host"], parameters["cytomine_public_key"], parameters["cytomine_private_key"] , base_path = parameters['cytomine_base_path'], working_path = parameters['cytomine_working_path'], verbose= str2bool(options.verbose)) #Create a new userjob if connected as human user current_user = conn.get_current_user() if current_user.algo==False: print "adduserJob..." user_job = conn.add_user_job(parameters['cytomine_id_software'], parameters['cytomine_id_project']) print "set_credentials..." conn.set_credentials(str(user_job.publicKey), str(user_job.privateKey)) print "done" else: user_job = current_user print "Already running as userjob" job = conn.get_job(user_job.job) pyxit_parameters['dir_ls'] = os.path.join(parameters["cytomine_working_path"], str(parameters['cytomine_annotation_projects']).replace(',','-').replace('[','').replace(']','').replace(' ',''), "zoom_level", str(parameters['cytomine_zoom_level'])) if not os.path.exists(pyxit_parameters['dir_ls']): print "Creating annotation directory: %s" %pyxit_parameters['dir_ls'] os.makedirs(pyxit_parameters['dir_ls']) time.sleep(2) job = conn.update_job_status(job, status_comment = "Publish software parameters values") all_params=pyxit_parameters all_params.update(parameters) job_parameters_values = conn.add_job_parameters(user_job.job, conn.get_software(parameters['cytomine_id_software']), all_params) #Get annotation data job = conn.update_job_status(job, status = job.RUNNING, status_comment = "Fetching data", progress = 0) #Retrieve annotations from each annotation projects, either reviewed or unreviewed annotations annotations = None for prj in parameters['cytomine_annotation_projects']: if parameters["cytomine_reviewed"]: print "Retrieving reviewed annotations..." annotations_prj = conn.get_annotations(id_project = prj, reviewed_only=True) print "Reviewed annotations: %d" %len(annotations_prj.data()) else : print "Retrieving (unreviewed) annotations..." annotations_prj = conn.get_annotations(id_project = prj) print "(Unreviewed) annotations: %d" %len(annotations_prj.data()) if not annotations : annotations = annotations_prj else : annotations.data().extend(annotations_prj.data()) print "Nb annotations so far... = %d" %len(annotations.data()) time.sleep(3) print "Total annotations projects %s = %d" %(parameters['cytomine_annotation_projects'],len(annotations.data())) time.sleep(3) print "Predict terms / excluded terms" print parameters['cytomine_predict_terms'] print parameters['cytomine_excluded_terms'] time.sleep(3) annotations = conn.dump_annotations(annotations = annotations, get_image_url_func = Annotation.get_annotation_alpha_crop_url, dest_path = pyxit_parameters['dir_ls'], excluded_terms = parameters['cytomine_excluded_terms'], desired_zoom = parameters['cytomine_zoom_level']) #Build matrix (subwindows described by pixel values and output) for training project = conn.get_project(parameters['cytomine_id_project']) terms = conn.get_terms(project.ontology) map_classes = {} # build X, Y. Change initial problem into binary problem : "predict_terms" vs others for term in terms.data(): if term.id in parameters['cytomine_predict_terms']: map_classes[term.id] = 1 else : map_classes[term.id] = 0 print pyxit_parameters #Prepare image matrix X, y = build_from_dir(pyxit_parameters['dir_ls'], map_classes) print "X length: %d " %len(X) print "Y length: %d " %len(y) time.sleep(5) #classes = np.unique(y) classes = [0,1] n_classes = len(classes) y_original = y y = np.searchsorted(classes, y) # Instantiate classifiers job = conn.update_job_status(job, status = job.RUNNING, status_comment = "[pyxit.main] Initializing PyxitClassifier...", progress = 25) forest = ExtraTreesClassifier(n_estimators=pyxit_parameters['forest_n_estimators'], max_features=pyxit_parameters['forest_max_features'], min_samples_split=pyxit_parameters['forest_min_samples_split'], n_jobs=pyxit_parameters['pyxit_n_jobs'], verbose=True) pyxit = PyxitClassifier(base_estimator=forest, n_subwindows=pyxit_parameters['pyxit_n_subwindows'], min_size=0.0,#segmentation use fixed-size subwindows max_size=1.0,#segmentation use fixed-size subwindows target_width=pyxit_parameters['pyxit_target_width'], target_height=pyxit_parameters['pyxit_target_height'], interpolation=pyxit_parameters['pyxit_interpolation'], transpose=pyxit_parameters['pyxit_transpose'], colorspace=pyxit_parameters['pyxit_colorspace'], fixed_size=pyxit_parameters['pyxit_fixed_size'], n_jobs=pyxit_parameters['pyxit_n_jobs'], verbose=True, get_output = _get_output_from_mask) if pyxit_parameters['pyxit_save_to']: d = os.path.dirname(pyxit_parameters['pyxit_save_to']) if not os.path.exists(d): os.makedirs(d) fd = open(pyxit_parameters['pyxit_save_to'], "wb") pickle.dump(classes, fd, protocol=pickle.HIGHEST_PROTOCOL) job = conn.update_job_status(job, status_comment = "[pyxit.main] Extracting %d subwindows from each image in %s" %(pyxit_parameters['pyxit_n_subwindows'],pyxit_parameters['dir_ls']), progress = 50) time.sleep(3) #Extract random subwindows in dumped annotations _X, _y = pyxit.extract_subwindows(X, y) #Build pixel classifier job = conn.update_job_status(job, status_comment = "[pyxit.main] Fitting Pyxit Segmentation Model on %s", progress = 75) print "TIME : %s" %strftime("%Y-%m-%d %H:%M:%S", localtime()) start = time.time() pyxit.fit(X, y, _X=_X, _y=_y) end = time.time() print "Elapsed time FIT: %d s" %(end-start) print "TIME : %s" %strftime("%Y-%m-%d %H:%M:%S", localtime()) print "pyxit.base_estimator.n_classes_" print pyxit.base_estimator.n_classes_ print "pyxit.base_estimator.classes_" print pyxit.base_estimator.classes_ if options.verbose: print "----------------------------------------------------------------" print "[pyxit.main] Saving Pyxit Segmentation Model locally into %s" % pyxit_parameters['pyxit_save_to'] print "----------------------------------------------------------------" #Save model on local disk if pyxit_parameters['pyxit_save_to']: pickle.dump(pyxit, fd, protocol=pickle.HIGHEST_PROTOCOL) if pyxit_parameters['pyxit_save_to']: fd.close() print "Not Publishing model in db.." #job_data = conn.add_job_data(job, "model", pyxit_parameters['pyxit_save_to']) job = conn.update_job_status(job, status = job.TERMINATED, status_comment = "Finish", progress = 100) print "END."
def main(argv): current_path = os.getcwd() + '/' + os.path.dirname(__file__) # Define command line options p = optparse.OptionParser(description='Cytomine Segmentation prediction', prog='Cytomine segmentation prediction', version='0.1') p.add_option( '--cytomine_host', type="string", default='beta.cytomine.be', dest="cytomine_host", help="The Cytomine host (eg: beta.cytomine.be, localhost:8080)") p.add_option('--cytomine_public_key', type="string", default='', dest="cytomine_public_key", help="Cytomine public key") p.add_option('--cytomine_private_key', type="string", default='', dest="cytomine_private_key", help="Cytomine private key") p.add_option('--cytomine_base_path', type="string", default='/api/', dest="cytomine_base_path", help="Cytomine base path") p.add_option('--cytomine_id_software', type="int", dest="cytomine_id_software", help="The Cytomine software identifier") p.add_option('--cytomine_working_path', default="/tmp/", type="string", dest="cytomine_working_path", help="The working directory (eg: /tmp)") p.add_option('--cytomine_id_project', type="int", dest="cytomine_id_project", help="The Cytomine project identifier") p.add_option('-i', '--cytomine_id_image', type='int', dest='cytomine_id_image', help="image id from cytomine", metavar='IMAGE') p.add_option('-z', '--cytomine_zoom_level', type='int', dest='cytomine_zoom_level', help="working zoom level") p.add_option('-j', '--nb_jobs', type='int', dest='nb_jobs', help="number of parallel jobs") p.add_option( '--cytomine_predict_terms', type='str', dest='cytomine_predict_terms', help= "term id of all positive terms. The first term is the output predicted annotation term" ) p.add_option('--cytomine_excluded_terms', type='string', dest='cytomine_excluded_terms', help="term id of excluded terms)") p.add_option('--pyxit_target_width', type='int', dest='pyxit_target_width', help="pyxit subwindows width") p.add_option('--pyxit_target_height', type='int', dest='pyxit_target_height', help="pyxit subwindows height") p.add_option('--pyxit_colorspace', type='int', dest='pyxit_colorspace', help="pyxit colorspace encoding") p.add_option('--pyxit_nb_jobs', type='int', dest='pyxit_nb_jobs', help="pyxit number of jobs for trees") p.add_option('--pyxit_fixed_size', type='string', default="0", dest="pyxit_fixed_size", help="extract fixed size subwindows") p.add_option('--pyxit_transpose', type='string', default="0", dest="pyxit_transpose", help="transpose subwindows") p.add_option('--pyxit_n_subwindows', type='int', default="10", dest="pyxit_n_subwindows", help="number of subwindows") p.add_option('--pyxit_interpolation', default=2, type="int", dest="pyxit_interpolation", help="interpolation method 1,2,3,4") p.add_option('--pyxit_min_size', default=0.5, type="float", dest="pyxit_min_size", help="min size") p.add_option('--pyxit_max_size', default=1.0, type="float", dest="pyxit_max_size", help="max size") p.add_option('--cytomine_reviewed', type='string', default="False", dest="cytomine_reviewed", help="Get reviewed annotations only") p.add_option('--cytomine_dump_annotations', type='string', default="0", dest="cytomine_dump_annotations", help="Dump training annotations or not") p.add_option('--cytomine_dump_annotation_stats', type='string', default="0", dest="cytomine_dump_annotation_stats", help="Calculate stats on dumped annotations or not") p.add_option('--build_model', type="string", default="0", dest="build_model", help="Turn on (1) or off (0) model building") p.add_option('--cytomine_annotation_projects', type="string", dest="cytomine_annotation_projects", help="Projects from which annotations are extracted") p.add_option('--verbose', type="string", default="0", dest="verbose", help="Turn on (1) or off (0) verbose mode") p.add_option('--keras_save_to', type='string', default="", dest='keras_save_to', help="keras model weight file") p.add_option('--keras_batch_size', type="int", dest="keras_batch_size", help="Training batch size") p.add_option('--keras_n_epochs', type="int", dest="keras_n_epochs", help="Number of epochs") p.add_option('--keras_shuffle', type="string", dest="keras_shuffle", help="Turn on (1) or off (0) batch shuffle") p.add_option('--keras_validation_split', type="float", dest="keras_validation_split", help="Batch validation split") options, arguments = p.parse_args(args=argv) parameters = {} parameters['keras_save_to'] = options.keras_save_to parameters['keras_batch_size'] = options.keras_batch_size parameters['keras_n_epochs'] = options.keras_n_epochs parameters['keras_shuffle'] = options.keras_shuffle parameters['keras_validation_split'] = options.keras_validation_split parameters['cytomine_host'] = options.cytomine_host parameters['cytomine_public_key'] = options.cytomine_public_key parameters['cytomine_private_key'] = options.cytomine_private_key parameters['cytomine_base_path'] = options.cytomine_base_path parameters['cytomine_working_path'] = options.cytomine_working_path parameters['cytomine_base_path'] = options.cytomine_base_path parameters['cytomine_id_project'] = options.cytomine_id_project parameters['cytomine_id_software'] = options.cytomine_id_software parameters['cytomine_predict_terms'] = map( int, options.cytomine_predict_terms.split(',')) parameters['cytomine_predicted_annotation_term'] = parameters[ 'cytomine_predict_terms'][0] parameters['cytomine_excluded_terms'] = map( int, options.cytomine_excluded_terms.split(',')) parameters['pyxit_colorspace'] = options.pyxit_colorspace parameters['pyxit_nb_jobs'] = options.pyxit_nb_jobs parameters['pyxit_n_jobs'] = options.pyxit_nb_jobs parameters['cytomine_nb_jobs'] = options.pyxit_nb_jobs parameters['cytomine_id_image'] = options.cytomine_id_image parameters['cytomine_zoom_level'] = options.cytomine_zoom_level parameters['nb_jobs'] = options.nb_jobs parameters['pyxit_target_width'] = options.pyxit_target_width parameters['pyxit_target_height'] = options.pyxit_target_height parameters['pyxit_n_subwindows'] = options.pyxit_n_subwindows parameters['pyxit_interpolation'] = options.pyxit_interpolation parameters['pyxit_transpose'] = str2bool(options.pyxit_transpose) parameters['pyxit_min_size'] = options.pyxit_min_size parameters['pyxit_max_size'] = options.pyxit_max_size parameters['pyxit_fixed_size'] = str2bool(options.pyxit_fixed_size) parameters['cytomine_annotation_projects'] = map( int, options.cytomine_annotation_projects.split(',')) parameters['cytomine_reviewed'] = str2bool(options.cytomine_reviewed) parameters['cytomine_dump_annotation_stats'] = str2bool( options.cytomine_dump_annotation_stats) parameters['cytomine_dump_annotations'] = str2bool( options.cytomine_dump_annotations) parameters['build_model'] = str2bool(options.build_model) parameters['dir_ls'] = os.path.join( parameters["cytomine_working_path"], str(parameters['cytomine_annotation_projects']).replace( ',', '-').replace('[', '').replace(']', '').replace(' ', ''), "zoom_level", str(parameters['cytomine_zoom_level'])) pyxit_parameters = {} pyxit_parameters['pyxit_target_width'] = options.pyxit_target_width pyxit_parameters['pyxit_target_height'] = options.pyxit_target_height pyxit_parameters['pyxit_n_subwindows'] = options.pyxit_n_subwindows pyxit_parameters['pyxit_min_size'] = options.pyxit_min_size pyxit_parameters['pyxit_max_size'] = options.pyxit_max_size pyxit_parameters['pyxit_colorspace'] = options.pyxit_colorspace pyxit_parameters['pyxit_interpolation'] = options.pyxit_interpolation pyxit_parameters['pyxit_transpose'] = str2bool(options.pyxit_transpose) pyxit_parameters['pyxit_fixed_size'] = str2bool(options.pyxit_fixed_size) pyxit_parameters['pyxit_n_jobs'] = options.pyxit_nb_jobs if options.verbose: print(parameters) # Create Cytomine connection conn = cytomine.Cytomine(parameters["cytomine_host"], parameters["cytomine_public_key"], parameters["cytomine_private_key"], base_path=parameters['cytomine_base_path'], working_path=parameters['cytomine_working_path'], verbose=str2bool(options.verbose)) # Dump annotations if parameters['cytomine_dump_annotations']: # Get annotation descriptions (JSON) from project(s) annotations = None for prj in parameters['cytomine_annotation_projects']: if parameters["cytomine_reviewed"]: annotations_prj = conn.get_annotations( id_project=prj, reviewed_only=parameters["cytomine_reviewed"]) else: annotations_prj = conn.get_annotations(id_project=prj) if not annotations: annotations = annotations_prj else: annotations.data().extend(annotations_prj.data()) if prj == 21907448 or prj == 155194683: annotations_prj = conn.get_annotations(id_project=prj, id_term=91376951) annotations.data().extend(annotations_prj.data()) print("Nb annotations so far... = %d" % len(annotations.data())) print("Total annotations projects %s = %d" % (parameters['cytomine_annotation_projects'], len(annotations.data()))) # Set output dir parameters if not os.path.exists(parameters['dir_ls']): print("Creating annotation directory: %s" % parameters['dir_ls']) os.makedirs(parameters['dir_ls']) # Dump annotation images locally print("Dump training annotation images in %s...", parameters['dir_ls']) conn.dump_annotations( annotations=annotations, get_image_url_func=Annotation.get_annotation_alpha_crop_url, dest_path=parameters['dir_ls'], desired_zoom=parameters['cytomine_zoom_level'], excluded_terms=parameters['cytomine_excluded_terms']) # Put positive terms under the same term and same for negative terms term_directories = os.listdir(parameters['dir_ls']) pos_image_path = os.path.join(parameters['dir_ls'], "image", "1") pos_mask_path = os.path.join(parameters['dir_ls'], "mask", "1") neg_image_path = os.path.join(parameters['dir_ls'], "image", "0") neg_mask_path = os.path.join(parameters['dir_ls'], "mask", "0") if not os.path.exists(pos_image_path): print("Creating positive annotation directory: %s" % pos_image_path) os.makedirs(pos_image_path) if not os.path.exists(neg_image_path): print("Creating negative annotation directory: %s" % neg_image_path) os.makedirs(neg_image_path) if not os.path.exists(pos_mask_path): print("Creating positive annotation directory: %s" % pos_mask_path) os.makedirs(pos_mask_path) if not os.path.exists(neg_mask_path): print("Creating negative annotation directory: %s" % neg_mask_path) os.makedirs(neg_mask_path) for dir in term_directories: dir_abs = os.path.join(parameters['dir_ls'], dir) # Move files print("Term directory: %s" % dir_abs) if int(dir) in parameters['cytomine_predict_terms']: print("Positive term") for image_file in os.listdir(dir_abs): print(image_file) try: im = Image.open(os.path.join(dir_abs, image_file)) except IOError: "warning filename %s is not an image" % os.path.join( dir_abs, image_file) continue rgb = im.tobytes("raw", "RGB") a = im.tobytes("raw", "A") im.close() image = Image.frombytes("RGB", im.size, rgb) mask = Image.frombytes("L", im.size, a) image.save(os.path.join(pos_image_path, image_file), "PNG") mask.save(os.path.join(pos_mask_path, image_file), "PNG") else: print("Negative term") for image_file in os.listdir(dir_abs): print(image_file) try: im = Image.open(os.path.join(dir_abs, image_file)) except IOError: "warning filename %s is not an image" % os.path.join( dir_abs, image_file) continue rgb = im.tobytes("raw", "RGB") a = im.tobytes("raw", "A") im.close() image = Image.frombytes("RGB", im.size, rgb) mask = Image.frombytes("L", im.size, a) image.save(os.path.join(neg_image_path, image_file), "PNG") mask.save(os.path.join(neg_mask_path, image_file), "PNG") if parameters['cytomine_dump_annotation_stats']: pos_path = os.path.join(parameters['dir_ls'], "image", "1") neg_path = os.path.join(parameters['dir_ls'], "image", "0") stats_dumped_annotations(pos_path, neg_path) # if parameters['build_model'] : # # Model name # model_name = "nsubw{}_winsize{}x{}_minsize{}_maxsize{}_batchsize{}_epochs{}_shuffle{}_valsplit{}_colorspace{}"\ # .format(parameters['pyxit_n_subwindows'], # parameters['pyxit_target_width'], # parameters['pyxit_target_height'], # parameters['pyxit_min_size'], # parameters['pyxit_max_size'], # parameters['keras_batch_size'], # parameters['keras_n_epochs'], # parameters['keras_shuffle'], # parameters['keras_validation_split'], # pyxit_parameters['pyxit_colorspace']).replace(".", "") # print("Model_name :", model_name) # # pyxit = PyxitClassifier(None, # n_subwindows = pyxit_parameters['pyxit_n_subwindows'], # min_size = pyxit_parameters['pyxit_min_size'], # max_size = pyxit_parameters['pyxit_max_size'], # target_width = pyxit_parameters['pyxit_target_width'], # target_height = pyxit_parameters['pyxit_target_height'], # n_jobs = pyxit_parameters['pyxit_n_jobs'], # interpolation = pyxit_parameters['pyxit_interpolation'], # transpose = pyxit_parameters['pyxit_transpose'], # colorspace = pyxit_parameters['pyxit_colorspace'], # fixed_size = pyxit_parameters['pyxit_fixed_size'], # random_state = None, # verbose = 1, # get_output = _get_output_from_mask, # parallel_leaf_transform = False) # # # Build filenames and classes # X, y = build_from_dir(parameters['dir_ls']) # # classes = np.unique(y) # n_classes = len(classes) # y_original = y # y = np.searchsorted(classes, y) # n_images = len(y) # print("Number of images : ", n_images) # # # Extract subwindows # _X, _y = pyxit.extract_subwindows(X, y) # n_subw = len(_y) # print("Number of subwindows : ", n_subw) # # # Reshape data structure # _X = np.reshape(_X, ( # n_subw, pyxit_parameters['pyxit_target_width'], pyxit_parameters['pyxit_target_height'], n_channels)) # _y = np.reshape(_y, (n_subw, pyxit_parameters['pyxit_target_width'], pyxit_parameters['pyxit_target_height'])) # # # Train FCN # if not os.path.exists(parameters['keras_save_to']) : # os.makedirs(parameters['keras_save_to']) # # model_weights_file_path = os.path.join(parameters['keras_save_to'], "weights_" + model_name + ".h5") # # mean, std = train(_X, _y, # model_weights_file_path, # imgs_width = pyxit_parameters['pyxit_target_width'], # imgs_height = pyxit_parameters['pyxit_target_height'], # batch_size = parameters['keras_batch_size'], # epochs = parameters['keras_n_epochs'], # shuffle = parameters['keras_shuffle'], # validation_split = parameters['keras_validation_split']) # # # Save mean and std used to normalize training data # mean_std_save_file_path = os.path.join(parameters['keras_save_to'], "meanstd_" + model_name + ".txt") # mean_std_save_file = open(mean_std_save_file_path, 'w') # mean_std_save_file.write(str(mean) + '\n') # mean_std_save_file.write(str(std) + '\n') if parameters['build_model']: # Model name model_name = "nsubw{}_winsize{}x{}_minsize{}_maxsize{}_batchsize{}_epochs{}_shuffle{}_valsplit{}_colorspace{}_zoom{}_until4x4_IDG"\ .format(parameters['pyxit_n_subwindows'], parameters['pyxit_target_width'], parameters['pyxit_target_height'], parameters['pyxit_min_size'], parameters['pyxit_max_size'], parameters['keras_batch_size'], parameters['keras_n_epochs'], parameters['keras_shuffle'], parameters['keras_validation_split'], pyxit_parameters['pyxit_colorspace'], parameters['cytomine_zoom_level']).replace(".", "") print("Model_name :", model_name) pyxit = PyxitClassifier( None, n_subwindows=1, min_size=1, max_size=1, target_width=pyxit_parameters['pyxit_target_width'], target_height=pyxit_parameters['pyxit_target_height'], n_jobs=pyxit_parameters['pyxit_n_jobs'], interpolation=pyxit_parameters['pyxit_interpolation'], transpose=pyxit_parameters['pyxit_transpose'], colorspace=pyxit_parameters['pyxit_colorspace'], fixed_size=pyxit_parameters['pyxit_fixed_size'], random_state=None, verbose=1, get_output=_get_output_from_mask, parallel_leaf_transform=False) # pyxit = PyxitClassifier(None, # n_subwindows=pyxit_parameters['pyxit_n_subwindows'], # min_size=pyxit_parameters['pyxit_min_size'], # max_size=pyxit_parameters['pyxit_max_size'], # target_width=pyxit_parameters['pyxit_target_width'], # target_height=pyxit_parameters['pyxit_target_height'], # n_jobs=pyxit_parameters['pyxit_n_jobs'], # interpolation=pyxit_parameters['pyxit_interpolation'], # transpose=pyxit_parameters['pyxit_transpose'], # colorspace=pyxit_parameters['pyxit_colorspace'], # fixed_size=pyxit_parameters['pyxit_fixed_size'], # random_state=None, # verbose=1, # get_output = _get_output_from_mask, # parallel_leaf_transform=False) # Build filenames and classes X, y = build_from_dir(parameters['dir_ls']) classes = np.unique(y) n_classes = len(classes) y_original = y y = np.searchsorted(classes, y) n_images = len(y) print("Number of images : ", n_images) print("Start extraction of subwindows...") # Extract subwindows _X, _y = pyxit.extract_subwindows(X, y) print("Over") n_subw = len(_y) print("Number of subwindows : ", n_subw) # Reshape data structure _X = np.reshape(_X, (n_subw, pyxit_parameters['pyxit_target_width'], pyxit_parameters['pyxit_target_height'], n_channels)) _y = np.reshape(_y, (n_subw, pyxit_parameters['pyxit_target_width'], pyxit_parameters['pyxit_target_height'], 1)) print(type(_X)) print(type(_y)) # ImageDataGenerator : two instances with the same arguments print("Init data gen") data_gen_args = dict(rotation_range=180., width_shift_range=0.1, height_shift_range=0.1, zoom_range=0.2, rescale=1 / 255, horizontal_flip=True, vertical_flip=True) # featurewise_center = True, # featurewise_std_normalization = True) image_datagen = ImageDataGenerator(**data_gen_args) mask_datagen = ImageDataGenerator(**data_gen_args) # Provide the same seed and keyword arguments to the fit and flow methods seed = 1 print("Fit image data generator (image)...") image_datagen.fit(_X[0:100], augment=True, seed=seed) print("Fit image data generator (mask)...") mask_datagen.fit(_y[0:100], augment=True, seed=seed) print('Flow on images...') # image_generator = image_datagen.flow(_X, labels, seed = seed, shuffle = False) image_generator = image_datagen.flow_from_directory( os.path.join(parameters['dir_ls'], "image"), class_mode=None, target_size=(128, 128), seed=seed) print('Flow on masks...') # mask_generator = mask_datagen.flow(_y, labels, seed = seed, shuffle = False) mask_generator = mask_datagen.flow_from_directory( os.path.join(parameters['dir_ls'], "mask"), class_mode=None, target_size=(128, 128), seed=seed) # combine generators into one which yields image and masks train_generator = combine_generator(image_generator, mask_generator) # Creating and compiling model if not os.path.exists(parameters['keras_save_to']): os.makedirs(parameters['keras_save_to']) model_weights_filename = os.path.join(parameters['keras_save_to'], "weights_" + model_name + ".h5") print('Fitting model...') model = get_unet(128, 128) model_checkpoint = ModelCheckpoint(model_weights_filename, monitor='val_loss', save_best_only=True) # Train FCN model.fit_generator(train_generator, steps_per_epoch=100, epochs=50, callbacks=[model_checkpoint], verbose=1)
def main(argv): # Define command line options p = optparse.OptionParser( description='Pyxit/Cytomine Segmentation Model Builder', prog='PyXit Segmentation Model Builder (PYthon piXiT)') p.add_option( "--cytomine_host", type="string", default='', dest="cytomine_host", help="The Cytomine host (eg: beta.cytomine.be, localhost:8080)") p.add_option('--cytomine_public_key', type="string", default='', dest="cytomine_public_key", help="Cytomine public key") p.add_option('--cytomine_private_key', type="string", default='', dest="cytomine_private_key", help="Cytomine private key") p.add_option('--cytomine_base_path', type="string", default='/api/', dest="cytomine_base_path", help="Cytomine base path") p.add_option('--cytomine_id_software', type="int", dest="cytomine_id_software", help="The Cytomine software identifier") p.add_option('--cytomine_working_path', default="/tmp/", type="string", dest="cytomine_working_path", help="The working directory (eg: /tmp)") p.add_option('--cytomine_id_project', type="int", dest="cytomine_id_project", help="The Cytomine project identifier") p.add_option('-z', '--cytomine_zoom_level', type='int', dest='cytomine_zoom_level', help="working zoom level") p.add_option('--cytomine_annotation_projects', type="string", dest="cytomine_annotation_projects", help="Projects from which annotations are extracted") p.add_option( '--cytomine_predict_terms', type='string', default='0', dest='cytomine_predict_terms', help="term ids of predicted terms (=positive class in binary mode)") p.add_option('--cytomine_excluded_terms', type='string', default='0', dest='cytomine_excluded_terms', help="term ids of excluded terms") p.add_option('--cytomine_reviewed', type='string', default="False", dest="cytomine_reviewed", help="Get reviewed annotations only") p.add_option('--pyxit_target_width', type='int', dest='pyxit_target_width', help="pyxit subwindows width") p.add_option('--pyxit_target_height', type='int', dest='pyxit_target_height', help="pyxit subwindows height") p.add_option('--pyxit_save_to', type='string', dest='pyxit_save_to', help="pyxit model directory") #future: get it from server db p.add_option( '--pyxit_colorspace', type='int', dest='pyxit_colorspace', help="pyxit colorspace encoding") #future: get it from server db p.add_option( '--pyxit_n_jobs', type='int', dest='pyxit_n_jobs', help="pyxit number of jobs for trees") #future: get it from server db p.add_option('--pyxit_n_subwindows', default=10, type="int", dest="pyxit_n_subwindows", help="number of subwindows") p.add_option('--pyxit_interpolation', default=2, type="int", dest="pyxit_interpolation", help="interpolation method 1,2,3,4") p.add_option('--pyxit_transpose', type="string", default="False", dest="pyxit_transpose", help="transpose subwindows") p.add_option('--pyxit_fixed_size', type="string", default="False", dest="pyxit_fixed_size", help="extract fixed size subwindows") p.add_option('--forest_n_estimators', default=10, type="int", dest="forest_n_estimators", help="number of base estimators (T)") p.add_option('--forest_max_features', default=1, type="int", dest="forest_max_features", help="max features at test node (k)") p.add_option('--forest_min_samples_split', default=1, type="int", dest="forest_min_samples_split", help="minimum node sample size (nmin)") p.add_option('--verbose', type="string", default="0", dest="verbose", help="Turn on (1) or off (0) verbose mode") options, arguments = p.parse_args(args=argv) parameters['cytomine_host'] = options.cytomine_host parameters['cytomine_public_key'] = options.cytomine_public_key parameters['cytomine_private_key'] = options.cytomine_private_key parameters['cytomine_base_path'] = options.cytomine_base_path parameters['cytomine_working_path'] = options.cytomine_working_path parameters['cytomine_base_path'] = options.cytomine_base_path parameters['cytomine_id_project'] = options.cytomine_id_project parameters['cytomine_id_software'] = options.cytomine_id_software parameters['cytomine_annotation_projects'] = map( int, options.cytomine_annotation_projects.split(',')) parameters['cytomine_predict_terms'] = map( int, options.cytomine_predict_terms.split(',')) parameters['cytomine_excluded_terms'] = map( int, options.cytomine_excluded_terms.split(',')) parameters['cytomine_zoom_level'] = options.cytomine_zoom_level parameters['cytomine_reviewed'] = str2bool(options.cytomine_reviewed) pyxit_parameters['pyxit_target_width'] = options.pyxit_target_width pyxit_parameters['pyxit_target_height'] = options.pyxit_target_height pyxit_parameters['pyxit_n_subwindows'] = options.pyxit_n_subwindows pyxit_parameters['pyxit_colorspace'] = options.pyxit_colorspace pyxit_parameters['pyxit_interpolation'] = options.pyxit_interpolation pyxit_parameters['pyxit_transpose'] = str2bool(options.pyxit_transpose) pyxit_parameters['pyxit_fixed_size'] = str2bool(options.pyxit_fixed_size) pyxit_parameters['forest_n_estimators'] = options.forest_n_estimators pyxit_parameters['forest_max_features'] = options.forest_max_features pyxit_parameters[ 'forest_min_samples_split'] = options.forest_min_samples_split pyxit_parameters['pyxit_save_to'] = options.pyxit_save_to pyxit_parameters['pyxit_n_jobs'] = options.pyxit_n_jobs # Check for errors in the options if options.verbose: print "[pyxit.main] Options = ", options # Create JOB/USER/JOB conn = cytomine.Cytomine(parameters["cytomine_host"], parameters["cytomine_public_key"], parameters["cytomine_private_key"], base_path=parameters['cytomine_base_path'], working_path=parameters['cytomine_working_path'], verbose=str2bool(options.verbose)) #Create a new userjob if connected as human user current_user = conn.get_current_user() if current_user.algo == False: print "adduserJob..." user_job = conn.add_user_job(parameters['cytomine_id_software'], parameters['cytomine_id_project']) print "set_credentials..." conn.set_credentials(str(user_job.publicKey), str(user_job.privateKey)) print "done" else: user_job = current_user print "Already running as userjob" job = conn.get_job(user_job.job) pyxit_parameters['dir_ls'] = os.path.join( parameters["cytomine_working_path"], str(parameters['cytomine_annotation_projects']).replace( ',', '-').replace('[', '').replace(']', '').replace(' ', ''), "zoom_level", str(parameters['cytomine_zoom_level'])) if not os.path.exists(pyxit_parameters['dir_ls']): print "Creating annotation directory: %s" % pyxit_parameters['dir_ls'] os.makedirs(pyxit_parameters['dir_ls']) time.sleep(2) job = conn.update_job_status( job, status_comment="Publish software parameters values") all_params = pyxit_parameters all_params.update(parameters) job_parameters_values = conn.add_job_parameters( user_job.job, conn.get_software(parameters['cytomine_id_software']), all_params) #Get annotation data job = conn.update_job_status(job, status=job.RUNNING, status_comment="Fetching data", progress=0) #Retrieve annotations from each annotation projects, either reviewed or unreviewed annotations annotations = None for prj in parameters['cytomine_annotation_projects']: if parameters["cytomine_reviewed"]: print "Retrieving reviewed annotations..." annotations_prj = conn.get_annotations(id_project=prj, reviewed_only=True) print "Reviewed annotations: %d" % len(annotations_prj.data()) else: print "Retrieving (unreviewed) annotations..." annotations_prj = conn.get_annotations(id_project=prj) print "(Unreviewed) annotations: %d" % len(annotations_prj.data()) if not annotations: annotations = annotations_prj else: annotations.data().extend(annotations_prj.data()) print "Nb annotations so far... = %d" % len(annotations.data()) time.sleep(3) print "Total annotations projects %s = %d" % ( parameters['cytomine_annotation_projects'], len(annotations.data())) time.sleep(3) print "Predict terms / excluded terms" print parameters['cytomine_predict_terms'] print parameters['cytomine_excluded_terms'] time.sleep(3) annotations = conn.dump_annotations( annotations=annotations, get_image_url_func=Annotation.get_annotation_alpha_crop_url, dest_path=pyxit_parameters['dir_ls'], excluded_terms=parameters['cytomine_excluded_terms'], desired_zoom=parameters['cytomine_zoom_level']) #Build matrix (subwindows described by pixel values and output) for training project = conn.get_project(parameters['cytomine_id_project']) terms = conn.get_terms(project.ontology) map_classes = { } # build X, Y. Change initial problem into binary problem : "predict_terms" vs others for term in terms.data(): if term.id in parameters['cytomine_predict_terms']: map_classes[term.id] = 1 else: map_classes[term.id] = 0 print pyxit_parameters #Prepare image matrix X, y = build_from_dir(pyxit_parameters['dir_ls'], map_classes) print "X length: %d " % len(X) print "Y length: %d " % len(y) time.sleep(5) #classes = np.unique(y) classes = [0, 1] n_classes = len(classes) y_original = y y = np.searchsorted(classes, y) # Instantiate classifiers job = conn.update_job_status( job, status=job.RUNNING, status_comment="[pyxit.main] Initializing PyxitClassifier...", progress=25) forest = ExtraTreesClassifier( n_estimators=pyxit_parameters['forest_n_estimators'], max_features=pyxit_parameters['forest_max_features'], min_samples_split=pyxit_parameters['forest_min_samples_split'], n_jobs=pyxit_parameters['pyxit_n_jobs'], verbose=True) pyxit = PyxitClassifier( base_estimator=forest, n_subwindows=pyxit_parameters['pyxit_n_subwindows'], min_size=0.0, #segmentation use fixed-size subwindows max_size=1.0, #segmentation use fixed-size subwindows target_width=pyxit_parameters['pyxit_target_width'], target_height=pyxit_parameters['pyxit_target_height'], interpolation=pyxit_parameters['pyxit_interpolation'], transpose=pyxit_parameters['pyxit_transpose'], colorspace=pyxit_parameters['pyxit_colorspace'], fixed_size=pyxit_parameters['pyxit_fixed_size'], n_jobs=pyxit_parameters['pyxit_n_jobs'], verbose=True, get_output=_get_output_from_mask) if pyxit_parameters['pyxit_save_to']: d = os.path.dirname(pyxit_parameters['pyxit_save_to']) if not os.path.exists(d): os.makedirs(d) fd = open(pyxit_parameters['pyxit_save_to'], "wb") pickle.dump(classes, fd, protocol=pickle.HIGHEST_PROTOCOL) job = conn.update_job_status( job, status_comment= "[pyxit.main] Extracting %d subwindows from each image in %s" % (pyxit_parameters['pyxit_n_subwindows'], pyxit_parameters['dir_ls']), progress=50) time.sleep(3) #Extract random subwindows in dumped annotations _X, _y = pyxit.extract_subwindows(X, y) #Build pixel classifier job = conn.update_job_status( job, status_comment="[pyxit.main] Fitting Pyxit Segmentation Model on %s", progress=75) print "TIME : %s" % strftime("%Y-%m-%d %H:%M:%S", localtime()) start = time.time() pyxit.fit(X, y, _X=_X, _y=_y) end = time.time() print "Elapsed time FIT: %d s" % (end - start) print "TIME : %s" % strftime("%Y-%m-%d %H:%M:%S", localtime()) print "pyxit.base_estimator.n_classes_" print pyxit.base_estimator.n_classes_ print "pyxit.base_estimator.classes_" print pyxit.base_estimator.classes_ if options.verbose: print "----------------------------------------------------------------" print "[pyxit.main] Saving Pyxit Segmentation Model locally into %s" % pyxit_parameters[ 'pyxit_save_to'] print "----------------------------------------------------------------" #Save model on local disk if pyxit_parameters['pyxit_save_to']: pickle.dump(pyxit, fd, protocol=pickle.HIGHEST_PROTOCOL) if pyxit_parameters['pyxit_save_to']: fd.close() print "Not Publishing model in db.." #job_data = conn.add_job_data(job, "model", pyxit_parameters['pyxit_save_to']) job = conn.update_job_status(job, status=job.TERMINATED, status_comment="Finish", progress=100) print "END."
def run(cyto_job, parameters): logging.info("----- segmentation_prediction v%s -----", __version__) logging.info("Entering run(cyto_job=%s, parameters=%s)", cyto_job, parameters) job = cyto_job.job project = cyto_job.project current_tile_annotation = None working_path = os.path.join("tmp", str(job.id)) if not os.path.exists(working_path): logging.info("Creating annotation directory: %s", working_path) os.makedirs(working_path) try: # Initialization pyxit_target_width = parameters.pyxit_target_width pyxit_target_height = parameters.pyxit_target_height tile_size = parameters.cytomine_tile_size zoom = parameters.cytomine_zoom_level predictionstep = int(parameters.cytomine_predict_step) mindev = parameters.cytomine_tile_min_stddev maxmean = parameters.cytomine_tile_max_mean logging.info("Loading prediction model (local)") fp = open(parameters.pyxit_load_from, "r") logging.debug(fp) pickle.load(fp) # classes => not needed pyxit = pickle.load(fp) pyxit.n_jobs = parameters.pyxit_nb_jobs # multithread subwindows extraction in pyxit pyxit.base_estimator.n_jobs = parameters.pyxit_nb_jobs # multithread tree propagation # loop for images in the project id TODO let user specify the images to process images = ImageInstanceCollection().fetch_with_filter( "project", project.id) nb_images = len(images) logging.info("# images in project: %d", nb_images) progress = 0 progress_delta = 100 / nb_images # Go through all images for (i, image) in enumerate(images): image_str = "{} ({}/{})".format(image.instanceFilename, i + 1, nb_images) job.update(progress=progress, statusComment="Analyzing image {}...".format(image_str)) logging.debug( "Image id: %d width: %d height: %d resolution: %f magnification: %d filename: %s", image.id, image.width, image.height, image.resolution, image.magnification, image.filename) image.colorspace = "RGB" # required for correct handling in CytomineReader # Create local object to access the remote whole slide logging.debug( "Creating connector to Slide Image from Cytomine server") whole_slide = WholeSlide(image) logging.debug("Wholeslide: %d x %d pixels", whole_slide.width, whole_slide.height) # endx and endy allow to stop image analysis at a given x, y position (for debugging) endx = parameters.cytomine_endx if parameters.cytomine_endx else whole_slide.width endy = parameters.cytomine_endy if parameters.cytomine_endy else whole_slide.height # initialize variables and tools for ROI nx = tile_size ny = tile_size local_tile_component = ([(0, 0), (0, ny), (nx, ny), (nx, 0), (0, 0)], []) # We can apply the segmentation model either in the whole slide (including background area), or only within # multiple ROIs (of a given term) # For example ROI could be generated first using a thresholding step to detect the tissue # Here we build a polygon union containing all roi_annotations locations (user or reviewed annotations) to # later match tile with roi masks if parameters.cytomine_roi_term: logging.debug("Retrieving ROI annotations") roi_annotations = AnnotationCollection( image=image.id, term=parameters.cytomine_roi_term, showWKT=True, showTerm=True, reviewed=parameters.cytomine_reviewed_roi).fetch() roi_annotations_locations = [] for roi_annotation in roi_annotations: roi_annotations_locations.append( shapely.wkt.loads(roi_annotation.location)) roi_annotations_union = shapely.ops.unary_union( roi_annotations_locations) else: # no ROI used # We build a rectangular roi_mask corresponding to the whole image filled with ones logging.debug("Processing all tiles") roi_mask = np.ones((ny, nx), dtype=np.bool) # Initiate the reader object which browse the whole slide image with tiles of size tile_size logging.info("Initiating the Slide reader") reader = CytomineReader( whole_slide, window_position=Bounds(parameters.cytomine_startx, parameters.cytomine_starty, tile_size, tile_size), zoom=zoom, # overlap needed because the predictions at the borders of the tile are removed overlap=pyxit_target_width + 1) wsi = 0 # tile number logging.info("Starting browsing the image using tiles") while True: tile_component = reader.convert_to_real_coordinates( [local_tile_component])[0] tile_polygon = shapely.geometry.Polygon( tile_component[0], tile_component[1]) # Get rasterized roi mask to match with this tile (if no ROI used, the roi_mask was built before and # corresponds to the whole image). if parameters.cytomine_roi_term: roi_mask = rasterize_tile_roi_union( nx, ny, tile_polygon, roi_annotations_union, reader) if np.count_nonzero(roi_mask) == 0: logging.info( "Tile %d is not included in any ROI, skipping processing", wsi) else: # Browse the whole slide image with catch exception while True: try: reader.read() break except socket.timeout: logging.error("Socket timeout for tile %d: %s", wsi, socket.timeout) time.sleep(1) except socket.error: logging.error("Socket error for tile %d: %s", wsi, socket.error) time.sleep(1) tile = reader.data # Get statistics about the current tile logging.info("Computing tile %d statistics", wsi) pos = reader.window_position logging.debug( "Tile zoom: %d, posx: %d, posy: %d, poswidth: %d, posheight: %d", zoom, pos.x, pos.y, pos.width, pos.height) tilemean = ImageStat.Stat(tile).mean logging.debug("Tile mean pixel values: %d %d %d", tilemean[0], tilemean[1], tilemean[2]) tilestddev = ImageStat.Stat(tile).stddev logging.debug("Tile stddev pixel values: %d %d %d", tilestddev[0], tilestddev[1], tilestddev[2]) # Criteria to determine if tile is empty, specific to this application if ((tilestddev[0] < mindev and tilestddev[1] < mindev and tilestddev[2] < mindev) or (tilemean[0] > maxmean and tilemean[1] > maxmean and tilemean[2] > maxmean)): logging.info( "Tile %d empty (filtered by min stddev or max mean)", wsi) else: # This tile is not empty, we process it # Add current tile annotation on server just for progress visualization purpose current_tile_annotation = Annotation( tile_polygon.wkt, image.id).save() # Save the tile image locally image_filename = "%s/%d-zoom_%d-tile_%d_x%d_y%d_w%d_h%d.png" \ % (working_path, image.id, zoom, wsi, pos.x, pos.y, pos.width, pos.height) tile.save(image_filename, "PNG") logging.debug("Tile file: %s", image_filename) logging.info("Extraction of subwindows in tile %d", wsi) width, height = tile.size half_subwindow_width = int(pyxit_target_width / 2) half_subwindow_height = int(pyxit_target_height / 2) # Coordinates of centers of extracted subwindows y_roi = range(half_subwindow_height, height - half_subwindow_height, predictionstep) x_roi = range(half_subwindow_width, width - half_subwindow_width, predictionstep) logging.info("%d subwindows to extract", len(x_roi) * len(y_roi)) n_jobs = parameters.cytomine_nb_jobs n_jobs, _, starts = _partition_images( n_jobs, len(y_roi)) # Parallel extraction of subwindows in the current tile all_data = Parallel(n_jobs=n_jobs)( delayed(_parallel_crop_boxes) (y_roi[starts[k]:starts[k + 1]], x_roi, image_filename, half_subwindow_width, half_subwindow_height, parameters.pyxit_colorspace) for k in xrange(n_jobs)) # Reduce boxes = np.vstack(box for box, _ in all_data) _X = np.vstack([X for _, X in all_data]) logging.info("Prediction of subwindows for tile %d", wsi) # Propagate subwindow feature vectors (X) into trees and get probabilities _Y = pyxit.base_estimator.predict_proba(_X) # Warning: we get output vectors for all classes for pixel (0,0) for all subwindows, then pixel # predictions for pixel (0,1) for all subwindows, ... We do not get predictions window after # window, but output after output # => Y is a list of length m, where m = nb of pixels by subwindow ; # each element of the list is itself a list of size n, where n = nb of subwindows # for each subwindow, the probabilities for each class are given # <optimized code logging.info( "Parallel construction of confidence map in current tile" ) pixels = range(pyxit_target_width * pyxit_target_height) n_jobs, _, starts = _partition_images( n_jobs, len(pixels)) all_votes_class = Parallel(n_jobs=n_jobs)( delayed(_parallel_confidence_map) (pixels[starts[k]:starts[k + 1]], _Y[starts[k]:starts[k + 1]], boxes, width, height, pyxit.base_estimator.n_classes_[0], pyxit_target_width, pyxit_target_height) for k in xrange(n_jobs)) votes_class = all_votes_class[0] for v in all_votes_class[1:]: votes_class += v # optimized code> logging.info("Delete borders") # Delete predictions at borders for k in xrange(0, width): for j in xrange(0, half_subwindow_height): votes_class[j, k, :] = [1, 0] for j in xrange(height - half_subwindow_height, height): votes_class[j, k, :] = [1, 0] for j in xrange(0, height): for k in xrange(0, half_subwindow_width): votes_class[j, k, :] = [1, 0] for k in xrange(width - half_subwindow_width, width): votes_class[j, k, :] = [1, 0] votes = np.argmax(votes_class, axis=2) * 255 # only predict in roi region based on roi mask votes[np.logical_not(roi_mask)] = 0 # process mask votes = process_mask(votes) votes = votes.astype(np.uint8) # Save of confidence map locally logging.info("Creating output tile file locally") output = Image.fromarray(votes) outputfilename = "%s/%d-zoom_%d-tile_%d_xxOUTPUT-%dx%d.png" \ % (working_path, image.id, zoom, wsi, pyxit_target_width, pyxit_target_height) output.save(outputfilename, "PNG") logging.debug("Tile OUTPUT file: %s", outputfilename) # Convert and transfer annotations of current tile logging.info("Find components") components = ObjectFinder(votes).find_components() components = reader.convert_to_real_coordinates( components) polygons = [ Polygon(component[0], component[1]) for component in components ] logging.info("Uploading annotations...") logging.debug("Number of polygons: %d" % len(polygons)) start = time.time() for poly in polygons: geometry = poly.wkt if not poly.is_valid: logging.warning( "Invalid geometry, try to correct it with buffer" ) logging.debug( "Geometry prior to modification: %s", geometry) new_poly = poly.buffer(0) if not new_poly.is_valid: logging.error( "Failed to make valid geometry, skipping this polygon" ) continue geometry = new_poly.wkt logging.debug("Uploading geometry %s", geometry) startsingle = time.time() while True: try: # TODO: save collection of annotations annot = Annotation( geometry, image.id, [parameters.cytomine_predict_term ]).save() if not annot: logging.error( "Annotation could not be saved ; location = %s", geometry) break except socket.timeout, socket.error: logging.error( "socket timeout/error add_annotation") time.sleep(1) endsingle = time.time() logging.debug( "Elapsed time for adding single annotation: %d", endsingle - startsingle) # current time end = time.time() logging.debug( "Elapsed time for adding all annotations: %d", end - start) # Delete current tile annotation (progress visualization) current_tile_annotation.delete() wsi += 1 if not reader.next() or (reader.window_position.x > endx and reader.window_position.y > endy): break # end of browsing the whole slide # Postprocessing to remove small/large annotations according to min/max area if parameters.cytomine_postproc: logging.info("Post-processing before union...") job.update(progress=progress + progress_delta / 4, statusComment="Post-processing image {}...".format( image_str)) while True: try: annotations = AnnotationCollection(id_user=job.userJob, id_image=image.id, showGIS=True) break except socket.timeout, socket.error: logging.error( "Socket timeout/error when fetching annotations") time.sleep(1) # remove/edit useless annotations start = time.time() for annotation in annotations: if (annotation.area == 0 or annotation.area < parameters.cytomine_min_size or annotation.area > parameters.cytomine_max_size): annotation.delete() else: logging.debug("Keeping annotation %d", annotation.id) end = time.time() logging.debug( "Elapsed time for post-processing all annotations: %d" % (end - start)) # Segmentation model was applied on individual tiles. We need to merge geometries generated from each tile. # We use a groovy/JTS script that downloads annotation geometries and perform union locally to relieve the # Cytomine server if parameters.cytomine_union: logging.info("Union of polygons for image %s", image.instanceFilename) job.update( progress=progress + progress_delta / 3, statusComment="Union of polygons in image {}...".format( image_str)) start = time.time() union_command = ( "groovy -cp \"lib/jars/*\" lib/union4.groovy " + "%s %s %s %d %d %d %d %d %d %d %d %d %d" % (cyto_job._base_url(False), parameters.publicKey, parameters.privateKey, image.id, job.userJob, parameters.cytomine_predict_term, parameters.cytomine_union_min_length, parameters.cytomine_union_bufferoverlap, parameters.cytomine_union_min_point_for_simplify, parameters.cytomine_union_min_point, parameters.cytomine_union_max_point, parameters.cytomine_union_nb_zones_width, parameters.cytomine_union_nb_zones_height)) logging.info("Union command: %s", union_command) os.system(union_command) end = time.time() logging.info("Elapsed time union: %d s", end - start) # Perform classification of detected geometries using a classification model (pkl) if parameters.pyxit_post_classification: logging.info("Post classification of all candidates") job.update( progress=progress + progress_delta * 2 / 3, statusComment="Post-classification in image {}...".format( image_str)) # Retrieve locally annotations from Cytomine core produced by the segmentation job as candidates candidate_annotations = AnnotationCollection( user=job.userJob, image=image.id, showWKT=True, showMeta=True).fetch() folder_name = "%s/crops-candidates-%d/zoom-%d/" % ( working_path, image.id, zoom) if not os.path.exists(folder_name): os.makedirs(folder_name) dest_pattern = os.path.join(folder_name, "{id}.png") for annotation in candidate_annotations: annotation.dump(dest_pattern, mask=True, alpha=True) # np_image = cv2.imread(annotation.filename, -1) # if np_image is not None: # alpha = np.array(np_image[:, :, 3]) # image = np.array(np_image[:, :, 0:3]) # image[alpha == 0] = (255,255,255) # to replace surrounding by white # cv2.imwrite(annotation.filename, image) logging.debug("Building attributes from %s", folder_name) # Extract subwindows from all candidates x, y = build_from_dir(folder_name) post_fp = open(parameters.pyxit_post_classification_save_to, "r") classes = pickle.load(post_fp) pyxit = pickle.load(post_fp) logging.debug(pyxit) # pyxit parameters are in the model file y_proba = pyxit.predict_proba(x) y_predict = classes.take(np.argmax(y_proba, axis=1), axis=0) y_rate = np.max(y_proba, axis=1) # We classify each candidate annotation and keep only those predicted as cytomine_predict_term for annotation in candidate_annotations: j = np.where(x == annotation.filename)[0][0] new_term = int(y_predict[j]) accepted = (new_term == parameters.cytomine_predict_term) logging.debug( "Annotation %d %s during post-classification (class: %d proba: %d)", annotation.id, "accepted" if accepted else "rejected", int(y_predict[j]), y_rate[j]) if not accepted: AlgoAnnotationTerm( annotation.id, parameters.cytomine_predict_term).delete() AlgoAnnotationTerm(annotation.id, new_term).save() logging.info("End of post-classification") # ... # Perform stats (counting) in roi area if parameters.cytomine_count and parameters.cytomine_roi_term: logging.info("Compute statistics") # Count number of annotations in roi area # Get Rois roi_annotations = AnnotationCollection( image=image.id, term=parameters.cytomine_roi_term, showGIS=True).fetch() # Count included annotations (term = predict_term) in each ROI for roi_annotation in roi_annotations: included_annotations = AnnotationCollection( image=image.id, user=job.userJob, bboxAnnotation=roi_annotation.id).fetch() logging.info( "Stats of image %s: %d annotations included in ROI %d (%d %s)", image.instanceFilename, len(included_annotations), roi_annotation.id, roi_annotation.area, roi_annotation.areaUnit) logging.info("Finished processing image %s", image.instanceFilename) progress += progress_delta
def main(argv): print("Main") # Define command line options p = optparse.OptionParser(description='Cytomine Segmentation prediction', prog='Cytomine segmentation prediction', version='0.1') p.add_option( '--cytomine_host', type="string", default='beta.cytomine.be', dest="cytomine_host", help="The Cytomine host (eg: beta.cytomine.be, localhost:8080)") p.add_option('--cytomine_public_key', type="string", default='', dest="cytomine_public_key", help="Cytomine public key") p.add_option('--cytomine_private_key', type="string", default='', dest="cytomine_private_key", help="Cytomine private key") p.add_option('--cytomine_base_path', type="string", default='/api/', dest="cytomine_base_path", help="Cytomine base path") p.add_option('--cytomine_id_software', type="int", dest="cytomine_id_software", help="The Cytomine software identifier") p.add_option('--cytomine_working_path', default="/tmp/", type="string", dest="cytomine_working_path", help="The working directory (eg: /tmp)") p.add_option('--cytomine_id_project', type="int", dest="cytomine_id_project", help="The Cytomine project identifier") p.add_option('-i', '--cytomine_id_image', type='int', dest='cytomine_id_image', help="image id from cytomine", metavar='IMAGE') p.add_option('-z', '--cytomine_zoom_level', type='int', dest='cytomine_zoom_level', help="working zoom level") p.add_option('-j', '--nb_jobs', type='int', dest='nb_jobs', help="number of parallel jobs") p.add_option( '--cytomine_predict_terms', type='str', dest='cytomine_predict_terms', help= "term id of all positive terms. The first term is the output predicted annotation term" ) p.add_option('--cytomine_excluded_terms', type='string', dest='cytomine_excluded_terms', help="term id of excluded terms)") p.add_option('--pyxit_target_width', type='int', dest='pyxit_target_width', help="pyxit subwindows width") p.add_option('--pyxit_target_height', type='int', dest='pyxit_target_height', help="pyxit subwindows height") p.add_option('--pyxit_colorspace', type='int', dest='pyxit_colorspace', help="pyxit colorspace encoding") p.add_option('--pyxit_nb_jobs', type='int', dest='pyxit_nb_jobs', help="pyxit number of jobs for trees") p.add_option('--pyxit_fixed_size', type='string', default="0", dest="pyxit_fixed_size", help="extract fixed size subwindows") p.add_option('--pyxit_transpose', type='string', default="0", dest="pyxit_transpose", help="transpose subwindows") p.add_option('--pyxit_n_subwindows', type='int', default="10", dest="pyxit_n_subwindows", help="number of subwindows") p.add_option('--pyxit_interpolation', default=2, type="int", dest="pyxit_interpolation", help="interpolation method 1,2,3,4") p.add_option('--pyxit_min_size', default=0.5, type="float", dest="pyxit_min_size", help="min size") p.add_option('--pyxit_max_size', default=1.0, type="float", dest="pyxit_max_size", help="max size") p.add_option('--cytomine_reviewed', type='string', default="False", dest="cytomine_reviewed", help="Get reviewed annotations only") p.add_option('--cytomine_dump_annotations', type='string', default="0", dest="cytomine_dump_annotations", help="Dump training annotations or not") p.add_option('--cytomine_dump_annotation_stats', type='string', default="0", dest="cytomine_dump_annotation_stats", help="Calculate stats on dumped annotations or not") p.add_option('--build_model', type="string", default="0", dest="build_model", help="Turn on (1) or off (0) model building") p.add_option('--cytomine_annotation_projects', type="string", dest="cytomine_annotation_projects", help="Projects from which annotations are extracted") p.add_option('--verbose', type="string", default="0", dest="verbose", help="Turn on (1) or off (0) verbose mode") p.add_option('--keras_save_to', type='string', default="", dest='keras_save_to', help="keras model weight file") p.add_option('--keras_batch_size', type="int", dest="keras_batch_size", help="Training batch size") p.add_option('--keras_n_epochs', type="int", dest="keras_n_epochs", help="Number of epochs") p.add_option('--keras_shuffle', type="string", dest="keras_shuffle", help="Turn on (1) or off (0) batch shuffle") p.add_option('--keras_validation_split', type="float", dest="keras_validation_split", help="Batch validation split") options, arguments = p.parse_args(args=argv) parameters = {} parameters['keras_save_to'] = options.keras_save_to parameters['keras_batch_size'] = options.keras_batch_size parameters['keras_n_epochs'] = options.keras_n_epochs parameters['keras_shuffle'] = options.keras_shuffle parameters['keras_validation_split'] = options.keras_validation_split parameters['cytomine_host'] = options.cytomine_host parameters['cytomine_public_key'] = options.cytomine_public_key parameters['cytomine_private_key'] = options.cytomine_private_key parameters['cytomine_base_path'] = options.cytomine_base_path parameters['cytomine_working_path'] = options.cytomine_working_path parameters['cytomine_base_path'] = options.cytomine_base_path parameters['cytomine_id_project'] = options.cytomine_id_project parameters['cytomine_id_software'] = options.cytomine_id_software parameters['cytomine_predict_terms'] = map( int, options.cytomine_predict_terms.split(',')) parameters['cytomine_predicted_annotation_term'] = parameters[ 'cytomine_predict_terms'][0] parameters['cytomine_excluded_terms'] = map( int, options.cytomine_excluded_terms.split(',')) parameters['pyxit_colorspace'] = options.pyxit_colorspace parameters['pyxit_nb_jobs'] = options.pyxit_nb_jobs parameters['pyxit_n_jobs'] = options.pyxit_nb_jobs parameters['cytomine_nb_jobs'] = options.pyxit_nb_jobs parameters['cytomine_id_image'] = options.cytomine_id_image parameters['cytomine_zoom_level'] = options.cytomine_zoom_level parameters['nb_jobs'] = options.nb_jobs parameters['pyxit_target_width'] = options.pyxit_target_width parameters['pyxit_target_height'] = options.pyxit_target_height parameters['pyxit_n_subwindows'] = options.pyxit_n_subwindows parameters['pyxit_interpolation'] = options.pyxit_interpolation parameters['pyxit_transpose'] = str2bool(options.pyxit_transpose) parameters['pyxit_min_size'] = options.pyxit_min_size parameters['pyxit_max_size'] = options.pyxit_max_size parameters['pyxit_fixed_size'] = str2bool(options.pyxit_fixed_size) parameters['cytomine_annotation_projects'] = map( int, options.cytomine_annotation_projects.split(',')) parameters['cytomine_reviewed'] = str2bool(options.cytomine_reviewed) parameters['cytomine_dump_annotation_stats'] = str2bool( options.cytomine_dump_annotation_stats) parameters['cytomine_dump_annotations'] = str2bool( options.cytomine_dump_annotations) parameters['build_model'] = str2bool(options.build_model) parameters['dir_ls'] = os.path.join( parameters["cytomine_working_path"], str(parameters['cytomine_annotation_projects']).replace( ',', '-').replace('[', '').replace(']', '').replace(' ', ''), "zoom_level", str(parameters['cytomine_zoom_level'])) pyxit_parameters = {} pyxit_parameters['pyxit_target_width'] = options.pyxit_target_width pyxit_parameters['pyxit_target_height'] = options.pyxit_target_height pyxit_parameters['pyxit_n_subwindows'] = options.pyxit_n_subwindows pyxit_parameters['pyxit_min_size'] = options.pyxit_min_size pyxit_parameters['pyxit_max_size'] = options.pyxit_max_size pyxit_parameters['pyxit_colorspace'] = options.pyxit_colorspace pyxit_parameters['pyxit_interpolation'] = options.pyxit_interpolation pyxit_parameters['pyxit_transpose'] = str2bool(options.pyxit_transpose) pyxit_parameters['pyxit_fixed_size'] = str2bool(options.pyxit_fixed_size) pyxit_parameters['pyxit_n_jobs'] = options.pyxit_nb_jobs if options.verbose: print(parameters) # Create Cytomine connection conn = cytomine.Cytomine(parameters["cytomine_host"], parameters["cytomine_public_key"], parameters["cytomine_private_key"], base_path=parameters['cytomine_base_path'], working_path=parameters['cytomine_working_path'], verbose=str2bool(options.verbose)) # Dump annotations if parameters['cytomine_dump_annotations']: # Get annotation descriptions (JSON) from project(s) annotations = None for prj in parameters['cytomine_annotation_projects']: if parameters["cytomine_reviewed"]: annotations_prj = conn.get_annotations( id_project=prj, reviewed_only=parameters["cytomine_reviewed"]) else: annotations_prj = conn.get_annotations(id_project=prj) if not annotations: annotations = annotations_prj else: annotations.data().extend(annotations_prj.data()) if prj == 21907448 or prj == 155194683: annotations_prj = conn.get_annotations(id_project=prj, id_term=91376951) annotations.data().extend(annotations_prj.data()) print("Nb annotations so far... = %d" % len(annotations.data())) print("Total annotations projects %s = %d" % (parameters['cytomine_annotation_projects'], len(annotations.data()))) # Set output dir parameters if not os.path.exists(parameters['dir_ls']): print("Creating annotation directory: %s" % parameters['dir_ls']) os.makedirs(parameters['dir_ls']) # Dump annotation images locally print("Dump training annotation images in %s...", parameters['dir_ls']) conn.dump_annotations( annotations=annotations, get_image_url_func=Annotation.get_annotation_alpha_crop_url, dest_path=parameters['dir_ls'], desired_zoom=parameters['cytomine_zoom_level'], excluded_terms=parameters['cytomine_excluded_terms']) # Put positive terms under the same term and same for negative terms term_directories = os.listdir(parameters['dir_ls']) pos_path = os.path.join(parameters['dir_ls'], "1") if not os.path.exists(pos_path): print("Creating positive annotation directory: %s" % pos_path) os.makedirs(pos_path) neg_path = os.path.join(parameters['dir_ls'], "0") if not os.path.exists(neg_path): print("Creating negative annotation directory: %s" % neg_path) os.makedirs(neg_path) for dir in term_directories: dir_abs = os.path.join(parameters['dir_ls'], dir) # Move files if int(dir) in parameters['cytomine_predict_terms']: for image_file in os.listdir(dir_abs): os.rename(os.path.join(dir_abs, image_file), os.path.join(pos_path, image_file)) else: for image_file in os.listdir(dir_abs): os.rename(os.path.join(dir_abs, image_file), os.path.join(neg_path, image_file)) # Remove empty directory if int(dir) != 0 and int(dir) != 1: os.rmdir(dir_abs) if parameters['cytomine_dump_annotation_stats']: pos_path = os.path.join(parameters['dir_ls'], "1") neg_path = os.path.join(parameters['dir_ls'], "0") stats_dumped_annotations(pos_path, neg_path) if parameters['build_model']: print("Build_model...") # Model name model_name = "all_in_batchsize{}_epochs{}"\ .format(parameters['keras_batch_size'], parameters['keras_n_epochs']).replace(".", "") print("Model_name :", model_name) pyxit = PyxitClassifier( None, n_subwindows=pyxit_parameters['pyxit_n_subwindows'], min_size=pyxit_parameters['pyxit_min_size'], max_size=pyxit_parameters['pyxit_max_size'], target_width=pyxit_parameters['pyxit_target_width'], target_height=pyxit_parameters['pyxit_target_height'], n_jobs=pyxit_parameters['pyxit_n_jobs'], interpolation=pyxit_parameters['pyxit_interpolation'], transpose=pyxit_parameters['pyxit_transpose'], colorspace=pyxit_parameters['pyxit_colorspace'], fixed_size=pyxit_parameters['pyxit_fixed_size'], random_state=None, verbose=1, get_output=_get_output_from_mask, parallel_leaf_transform=False) # Build filenames and classes X, y = build_from_dir(parameters['dir_ls']) classes = np.unique(y) n_classes = len(classes) y_original = y y = np.searchsorted(classes, y) n_images = len(y) print("Number of images : ", n_images) images, masks, labels = image_mask_builder( X, y, parameters['pyxit_colorspace']) # ImageDataGenerator : two instances with the same arguments data_gen_args = dict(rotation_range=180., width_shift_range=0.1, height_shift_range=0.1, zoom_range=0.2, rescale=1 / 255, horizontal_flip=True, vertical_flip=True) # featurewise_center = True, # featurewise_std_normalization = True) image_datagen = ImageDataGenerator(**data_gen_args) mask_datagen = ImageDataGenerator(**data_gen_args) # Provide the same seed and keyword arguments to the fit and flow methods seed = 1 # image_datagen.fit(images, augment = True, seed = seed) # mask_datagen.fit(masks, augment = True, seed = seed) print(type(images)) print(type(masks)) print(type(labels)) print(images[0:10]) print(masks[0:10]) print(labels[0:10]) image_generator = image_datagen.flow(images, labels, seed=seed, shuffle=False) mask_generator = mask_datagen.flow(masks, labels, seed=seed, shuffle=False) # combine generators into one which yields image and masks train_generator = zip(image_generator, mask_generator) # Creating and compiling model if not os.path.exists(parameters['keras_save_to']): os.makedirs(parameters['keras_save_to']) model_weights_filename = os.path.join(parameters['keras_save_to'], "weights_" + model_name + ".h5") print('Fitting model...') model = get_unet() model_checkpoint = ModelCheckpoint(model_weights_filename, monitor='val_loss', save_best_only=True) # Train FCN model.fit_generator(train_generator, steps_per_epoch=100, epochs=30, callbacks=[model_checkpoint], verbose=1)