def main(argv): # Define command line options p = optparse.OptionParser(description='Pyxit/Cytomine Segmentation Model Builder', prog='PyXit Segmentation Model Builder (PYthon piXiT)') p.add_option("--cytomine_host", type="string", default = '', dest="cytomine_host", help="The Cytomine host (eg: beta.cytomine.be, localhost:8080)") p.add_option('--cytomine_public_key', type="string", default = '', dest="cytomine_public_key", help="Cytomine public key") p.add_option('--cytomine_private_key',type="string", default = '', dest="cytomine_private_key", help="Cytomine private key") p.add_option('--cytomine_base_path', type="string", default = '/api/', dest="cytomine_base_path", help="Cytomine base path") p.add_option('--cytomine_id_software', type="int", dest="cytomine_id_software", help="The Cytomine software identifier") p.add_option('--cytomine_working_path', default="/tmp/", type="string", dest="cytomine_working_path", help="The working directory (eg: /tmp)") p.add_option('--cytomine_id_project', type="int", dest="cytomine_id_project", help="The Cytomine project identifier") p.add_option('-z', '--cytomine_zoom_level', type='int', dest='cytomine_zoom_level', help="working zoom level") p.add_option('--cytomine_annotation_projects', type="string", dest="cytomine_annotation_projects", help="Projects from which annotations are extracted") p.add_option('--cytomine_predict_terms', type='string', default='0', dest='cytomine_predict_terms', help="term ids of predicted terms (=positive class in binary mode)") p.add_option('--cytomine_excluded_terms', type='string', default='0', dest='cytomine_excluded_terms', help="term ids of excluded terms") p.add_option('--cytomine_reviewed', type='string', default="False", dest="cytomine_reviewed", help="Get reviewed annotations only") p.add_option('--pyxit_target_width', type='int', dest='pyxit_target_width', help="pyxit subwindows width") p.add_option('--pyxit_target_height', type='int', dest='pyxit_target_height', help="pyxit subwindows height") p.add_option('--pyxit_save_to', type='string', dest='pyxit_save_to', help="pyxit model directory") #future: get it from server db p.add_option('--pyxit_colorspace', type='int', dest='pyxit_colorspace', help="pyxit colorspace encoding") #future: get it from server db p.add_option('--pyxit_n_jobs', type='int', dest='pyxit_n_jobs', help="pyxit number of jobs for trees") #future: get it from server db p.add_option('--pyxit_n_subwindows', default=10, type="int", dest="pyxit_n_subwindows", help="number of subwindows") p.add_option('--pyxit_interpolation', default=2, type="int", dest="pyxit_interpolation", help="interpolation method 1,2,3,4") p.add_option('--pyxit_transpose', type="string", default="False", dest="pyxit_transpose", help="transpose subwindows") p.add_option('--pyxit_fixed_size', type="string", default="False", dest="pyxit_fixed_size", help="extract fixed size subwindows") p.add_option('--forest_n_estimators', default=10, type="int", dest="forest_n_estimators", help="number of base estimators (T)") p.add_option('--forest_max_features' , default=1, type="int", dest="forest_max_features", help="max features at test node (k)") p.add_option('--forest_min_samples_split', default=1, type="int", dest="forest_min_samples_split", help="minimum node sample size (nmin)") p.add_option('--verbose', type="string", default="0", dest="verbose", help="Turn on (1) or off (0) verbose mode") options, arguments = p.parse_args( args = argv) parameters['cytomine_host'] = options.cytomine_host parameters['cytomine_public_key'] = options.cytomine_public_key parameters['cytomine_private_key'] = options.cytomine_private_key parameters['cytomine_base_path'] = options.cytomine_base_path parameters['cytomine_working_path'] = options.cytomine_working_path parameters['cytomine_base_path'] = options.cytomine_base_path parameters['cytomine_id_project'] = options.cytomine_id_project parameters['cytomine_id_software'] = options.cytomine_id_software parameters['cytomine_annotation_projects'] = map(int,options.cytomine_annotation_projects.split(',')) parameters['cytomine_predict_terms'] = map(int,options.cytomine_predict_terms.split(',')) parameters['cytomine_excluded_terms'] = map(int,options.cytomine_excluded_terms.split(',')) parameters['cytomine_zoom_level'] = options.cytomine_zoom_level parameters['cytomine_reviewed'] = str2bool(options.cytomine_reviewed) pyxit_parameters['pyxit_target_width'] = options.pyxit_target_width pyxit_parameters['pyxit_target_height'] = options.pyxit_target_height pyxit_parameters['pyxit_n_subwindows'] = options.pyxit_n_subwindows pyxit_parameters['pyxit_colorspace'] = options.pyxit_colorspace pyxit_parameters['pyxit_interpolation'] = options.pyxit_interpolation pyxit_parameters['pyxit_transpose'] = str2bool(options.pyxit_transpose) pyxit_parameters['pyxit_fixed_size'] = str2bool(options.pyxit_fixed_size) pyxit_parameters['forest_n_estimators'] = options.forest_n_estimators pyxit_parameters['forest_max_features'] = options.forest_max_features pyxit_parameters['forest_min_samples_split'] = options.forest_min_samples_split pyxit_parameters['pyxit_save_to'] = options.pyxit_save_to pyxit_parameters['pyxit_n_jobs'] = options.pyxit_n_jobs # Check for errors in the options if options.verbose: print "[pyxit.main] Options = ", options # Create JOB/USER/JOB conn = cytomine.Cytomine(parameters["cytomine_host"], parameters["cytomine_public_key"], parameters["cytomine_private_key"] , base_path = parameters['cytomine_base_path'], working_path = parameters['cytomine_working_path'], verbose= str2bool(options.verbose)) #Create a new userjob if connected as human user current_user = conn.get_current_user() if current_user.algo==False: print "adduserJob..." user_job = conn.add_user_job(parameters['cytomine_id_software'], parameters['cytomine_id_project']) print "set_credentials..." conn.set_credentials(str(user_job.publicKey), str(user_job.privateKey)) print "done" else: user_job = current_user print "Already running as userjob" job = conn.get_job(user_job.job) pyxit_parameters['dir_ls'] = os.path.join(parameters["cytomine_working_path"], str(parameters['cytomine_annotation_projects']).replace(',','-').replace('[','').replace(']','').replace(' ',''), "zoom_level", str(parameters['cytomine_zoom_level'])) if not os.path.exists(pyxit_parameters['dir_ls']): print "Creating annotation directory: %s" %pyxit_parameters['dir_ls'] os.makedirs(pyxit_parameters['dir_ls']) time.sleep(2) job = conn.update_job_status(job, status_comment = "Publish software parameters values") all_params=pyxit_parameters all_params.update(parameters) job_parameters_values = conn.add_job_parameters(user_job.job, conn.get_software(parameters['cytomine_id_software']), all_params) #Get annotation data job = conn.update_job_status(job, status = job.RUNNING, status_comment = "Fetching data", progress = 0) #Retrieve annotations from each annotation projects, either reviewed or unreviewed annotations annotations = None for prj in parameters['cytomine_annotation_projects']: if parameters["cytomine_reviewed"]: print "Retrieving reviewed annotations..." annotations_prj = conn.get_annotations(id_project = prj, reviewed_only=True) print "Reviewed annotations: %d" %len(annotations_prj.data()) else : print "Retrieving (unreviewed) annotations..." annotations_prj = conn.get_annotations(id_project = prj) print "(Unreviewed) annotations: %d" %len(annotations_prj.data()) if not annotations : annotations = annotations_prj else : annotations.data().extend(annotations_prj.data()) print "Nb annotations so far... = %d" %len(annotations.data()) time.sleep(3) print "Total annotations projects %s = %d" %(parameters['cytomine_annotation_projects'],len(annotations.data())) time.sleep(3) print "Predict terms / excluded terms" print parameters['cytomine_predict_terms'] print parameters['cytomine_excluded_terms'] time.sleep(3) annotations = conn.dump_annotations(annotations = annotations, get_image_url_func = Annotation.get_annotation_alpha_crop_url, dest_path = pyxit_parameters['dir_ls'], excluded_terms = parameters['cytomine_excluded_terms'], desired_zoom = parameters['cytomine_zoom_level']) #Build matrix (subwindows described by pixel values and output) for training project = conn.get_project(parameters['cytomine_id_project']) terms = conn.get_terms(project.ontology) map_classes = {} # build X, Y. Change initial problem into binary problem : "predict_terms" vs others for term in terms.data(): if term.id in parameters['cytomine_predict_terms']: map_classes[term.id] = 1 else : map_classes[term.id] = 0 print pyxit_parameters #Prepare image matrix X, y = build_from_dir(pyxit_parameters['dir_ls'], map_classes) print "X length: %d " %len(X) print "Y length: %d " %len(y) time.sleep(5) #classes = np.unique(y) classes = [0,1] n_classes = len(classes) y_original = y y = np.searchsorted(classes, y) # Instantiate classifiers job = conn.update_job_status(job, status = job.RUNNING, status_comment = "[pyxit.main] Initializing PyxitClassifier...", progress = 25) forest = ExtraTreesClassifier(n_estimators=pyxit_parameters['forest_n_estimators'], max_features=pyxit_parameters['forest_max_features'], min_samples_split=pyxit_parameters['forest_min_samples_split'], n_jobs=pyxit_parameters['pyxit_n_jobs'], verbose=True) pyxit = PyxitClassifier(base_estimator=forest, n_subwindows=pyxit_parameters['pyxit_n_subwindows'], min_size=0.0,#segmentation use fixed-size subwindows max_size=1.0,#segmentation use fixed-size subwindows target_width=pyxit_parameters['pyxit_target_width'], target_height=pyxit_parameters['pyxit_target_height'], interpolation=pyxit_parameters['pyxit_interpolation'], transpose=pyxit_parameters['pyxit_transpose'], colorspace=pyxit_parameters['pyxit_colorspace'], fixed_size=pyxit_parameters['pyxit_fixed_size'], n_jobs=pyxit_parameters['pyxit_n_jobs'], verbose=True, get_output = _get_output_from_mask) if pyxit_parameters['pyxit_save_to']: d = os.path.dirname(pyxit_parameters['pyxit_save_to']) if not os.path.exists(d): os.makedirs(d) fd = open(pyxit_parameters['pyxit_save_to'], "wb") pickle.dump(classes, fd, protocol=pickle.HIGHEST_PROTOCOL) job = conn.update_job_status(job, status_comment = "[pyxit.main] Extracting %d subwindows from each image in %s" %(pyxit_parameters['pyxit_n_subwindows'],pyxit_parameters['dir_ls']), progress = 50) time.sleep(3) #Extract random subwindows in dumped annotations _X, _y = pyxit.extract_subwindows(X, y) #Build pixel classifier job = conn.update_job_status(job, status_comment = "[pyxit.main] Fitting Pyxit Segmentation Model on %s", progress = 75) print "TIME : %s" %strftime("%Y-%m-%d %H:%M:%S", localtime()) start = time.time() pyxit.fit(X, y, _X=_X, _y=_y) end = time.time() print "Elapsed time FIT: %d s" %(end-start) print "TIME : %s" %strftime("%Y-%m-%d %H:%M:%S", localtime()) print "pyxit.base_estimator.n_classes_" print pyxit.base_estimator.n_classes_ print "pyxit.base_estimator.classes_" print pyxit.base_estimator.classes_ if options.verbose: print "----------------------------------------------------------------" print "[pyxit.main] Saving Pyxit Segmentation Model locally into %s" % pyxit_parameters['pyxit_save_to'] print "----------------------------------------------------------------" #Save model on local disk if pyxit_parameters['pyxit_save_to']: pickle.dump(pyxit, fd, protocol=pickle.HIGHEST_PROTOCOL) if pyxit_parameters['pyxit_save_to']: fd.close() print "Not Publishing model in db.." #job_data = conn.add_job_data(job, "model", pyxit_parameters['pyxit_save_to']) job = conn.update_job_status(job, status = job.TERMINATED, status_comment = "Finish", progress = 100) print "END."
def run(cyto_job, parameters): logging.info("----- segmentation_model_builder v%s -----", __version__) logging.info("Entering run(cyto_job=%s, parameters=%s)", cyto_job, parameters) job = cyto_job.job projects = map(int, parameters.cytomine_annotation_projects.split(',')) predict_terms = map(int, parameters.cytomine_predict_terms.split(',')) excluded_terms = parameters.cytomine_excluded_terms excluded_terms = map(int, excluded_terms.split( ',')) if excluded_terms and excluded_terms != "null" else [] working_path = os.path.join("tmp", str(job.id)) if not os.path.exists(working_path): logging.info("Creating annotation directory: %s", working_path) os.makedirs(working_path) try: # Get annotation data job.update(statusComment="Fetching data") # Retrieve annotations from each annotation project annotations = [] for prj in projects: logging.info("Retrieving annotations in project %d", prj) annotations_prj = AnnotationCollection( project=prj, showTerm=True, reviewed=parameters.cytomine_reviewed).fetch() logging.info("# annotations in project %d: %d", prj, len(annotations_prj)) annotations += annotations_prj nb_annots = len(annotations) logging.info("# total annotations: %d", nb_annots) terms = TermCollection().fetch_with_filter("project", cyto_job.project.id) # Change initial problem into binary problem : "predict_terms" vs others map_classes = { term.id: int(term.id in predict_terms) for term in terms } for term in excluded_terms: map_classes[term] = -1 # excluded class classes = [0, 1] dest_patterns = { cls: os.path.join(working_path, str(cls), "{image}_{id}.png") for cls in classes } x = [] y = [] for (i, annot) in enumerate(annotations): job.update(progress=int(40 * i / nb_annots), statusComment="Treating annotation {}/{}".format( i, nb_annots)) class_annot = 0 terms = annot.term if annot.term is not None else [] for term in terms: class_annot = map_classes[term] if class_annot != 0: break if class_annot == -1: # excluded => do not dump and do not add to dataset continue annot.dump(dest_patterns[class_annot], mask=True, alpha=True, zoom=parameters.cytomine_zoom_level) x.append(annot.filename) y.append(class_annot) x = np.array(x) y = np.array(y) logging.debug("X length: %d", len(x)) logging.debug("y length: %d", len(y)) # Instantiate classifiers job.update( statusComment="[pyxit.main] Initializing PyxitClassifier...", progress=40) forest = ExtraTreesClassifier( n_estimators=parameters.forest_n_estimators, max_features=parameters.forest_max_features, min_samples_split=parameters.forest_min_samples_split, n_jobs=parameters.pyxit_n_jobs, verbose=True) pyxit = PyxitClassifier( base_estimator=forest, n_subwindows=parameters.pyxit_n_subwindows, min_size=0.0, # segmentation use fixed-size subwindows max_size=1.0, # segmentation use fixed-size subwindows target_width=parameters.pyxit_target_width, target_height=parameters.pyxit_target_height, interpolation=parameters.pyxit_interpolation, transpose=parameters.pyxit_transpose, colorspace=parameters.pyxit_colorspace, fixed_size=parameters.pyxit_fixed_size, n_jobs=parameters.pyxit_n_jobs, verbose=True, get_output=_get_output_from_mask) if parameters.pyxit_save_to: d = os.path.dirname(parameters.pyxit_save_to) if not os.path.exists(d): os.makedirs(d) fd = open(parameters.pyxit_save_to, "wb") pickle.dump(classes, fd, protocol=pickle.HIGHEST_PROTOCOL) job.update( statusComment= "[pyxit.main] Extracting {} subwindows from each image in {}". format(parameters.pyxit_n_subwindows, working_path), progress=50) # Extract random subwindows in dumped annotations _X, _y = pyxit.extract_subwindows(x, y) # Build pixel classifier job.update( statusComment="[pyxit.main] Fitting Pyxit Segmentation Model", progress=75) logging.info("Start fitting Pyxit segmentation model") start = time.time() pyxit.fit(x, y, _X=_X, _y=_y) end = time.time() logging.debug("Elapsed time FIT: %d s", end - start) logging.debug("pyxit.base_estimator.n_classes_: %s", pyxit.base_estimator.n_classes_) logging.debug("pyxit.base_estimator.classes_: %s", pyxit.base_estimator.classes_) if parameters.pyxit_save_to: logging.debug( "----------------------------------------------------------------" ) logging.debug( "[pyxit.main] Saving Pyxit Segmentation Model locally into %s", parameters.pyxit_save_to) logging.debug( "----------------------------------------------------------------" ) pickle.dump(pyxit, fd, protocol=pickle.HIGHEST_PROTOCOL) fd.close() # job_data = JobData(job.id, "Model", "model.pkl").save() # job_data.upload(parameters.pyxit_save_to) finally: logging.info("Deleting folder %s", working_path) shutil.rmtree(working_path, ignore_errors=True) logging.debug("Leaving run()")
def main(argv): current_path = os.getcwd() + '/' + os.path.dirname(__file__) # Define command line options p = optparse.OptionParser(description='Cytomine Segmentation prediction', prog='Cytomine segmentation prediction', version='0.1') p.add_option( '--cytomine_host', type="string", default='beta.cytomine.be', dest="cytomine_host", help="The Cytomine host (eg: beta.cytomine.be, localhost:8080)") p.add_option('--cytomine_public_key', type="string", default='', dest="cytomine_public_key", help="Cytomine public key") p.add_option('--cytomine_private_key', type="string", default='', dest="cytomine_private_key", help="Cytomine private key") p.add_option('--cytomine_base_path', type="string", default='/api/', dest="cytomine_base_path", help="Cytomine base path") p.add_option('--cytomine_id_software', type="int", dest="cytomine_id_software", help="The Cytomine software identifier") p.add_option('--cytomine_working_path', default="/tmp/", type="string", dest="cytomine_working_path", help="The working directory (eg: /tmp)") p.add_option('--cytomine_id_project', type="int", dest="cytomine_id_project", help="The Cytomine project identifier") p.add_option('-i', '--cytomine_id_image', type='int', dest='cytomine_id_image', help="image id from cytomine", metavar='IMAGE') p.add_option('-z', '--cytomine_zoom_level', type='int', dest='cytomine_zoom_level', help="working zoom level") p.add_option('-j', '--nb_jobs', type='int', dest='nb_jobs', help="number of parallel jobs") p.add_option( '--cytomine_predict_terms', type='str', dest='cytomine_predict_terms', help= "term id of all positive terms. The first term is the output predicted annotation term" ) p.add_option('--cytomine_excluded_terms', type='string', dest='cytomine_excluded_terms', help="term id of excluded terms)") p.add_option('--pyxit_target_width', type='int', dest='pyxit_target_width', help="pyxit subwindows width") p.add_option('--pyxit_target_height', type='int', dest='pyxit_target_height', help="pyxit subwindows height") p.add_option('--pyxit_colorspace', type='int', dest='pyxit_colorspace', help="pyxit colorspace encoding") p.add_option('--pyxit_nb_jobs', type='int', dest='pyxit_nb_jobs', help="pyxit number of jobs for trees") p.add_option('--pyxit_fixed_size', type='string', default="0", dest="pyxit_fixed_size", help="extract fixed size subwindows") p.add_option('--pyxit_transpose', type='string', default="0", dest="pyxit_transpose", help="transpose subwindows") p.add_option('--pyxit_n_subwindows', type='int', default="10", dest="pyxit_n_subwindows", help="number of subwindows") p.add_option('--pyxit_interpolation', default=2, type="int", dest="pyxit_interpolation", help="interpolation method 1,2,3,4") p.add_option('--pyxit_min_size', default=0.5, type="float", dest="pyxit_min_size", help="min size") p.add_option('--pyxit_max_size', default=1.0, type="float", dest="pyxit_max_size", help="max size") p.add_option('--cytomine_reviewed', type='string', default="False", dest="cytomine_reviewed", help="Get reviewed annotations only") p.add_option('--cytomine_dump_annotations', type='string', default="0", dest="cytomine_dump_annotations", help="Dump training annotations or not") p.add_option('--cytomine_dump_annotation_stats', type='string', default="0", dest="cytomine_dump_annotation_stats", help="Calculate stats on dumped annotations or not") p.add_option('--build_model', type="string", default="0", dest="build_model", help="Turn on (1) or off (0) model building") p.add_option('--cytomine_annotation_projects', type="string", dest="cytomine_annotation_projects", help="Projects from which annotations are extracted") p.add_option('--verbose', type="string", default="0", dest="verbose", help="Turn on (1) or off (0) verbose mode") p.add_option('--keras_save_to', type='string', default="", dest='keras_save_to', help="keras model weight file") p.add_option('--keras_batch_size', type="int", dest="keras_batch_size", help="Training batch size") p.add_option('--keras_n_epochs', type="int", dest="keras_n_epochs", help="Number of epochs") p.add_option('--keras_shuffle', type="string", dest="keras_shuffle", help="Turn on (1) or off (0) batch shuffle") p.add_option('--keras_validation_split', type="float", dest="keras_validation_split", help="Batch validation split") options, arguments = p.parse_args(args=argv) parameters = {} parameters['keras_save_to'] = options.keras_save_to parameters['keras_batch_size'] = options.keras_batch_size parameters['keras_n_epochs'] = options.keras_n_epochs parameters['keras_shuffle'] = options.keras_shuffle parameters['keras_validation_split'] = options.keras_validation_split parameters['cytomine_host'] = options.cytomine_host parameters['cytomine_public_key'] = options.cytomine_public_key parameters['cytomine_private_key'] = options.cytomine_private_key parameters['cytomine_base_path'] = options.cytomine_base_path parameters['cytomine_working_path'] = options.cytomine_working_path parameters['cytomine_base_path'] = options.cytomine_base_path parameters['cytomine_id_project'] = options.cytomine_id_project parameters['cytomine_id_software'] = options.cytomine_id_software parameters['cytomine_predict_terms'] = map( int, options.cytomine_predict_terms.split(',')) parameters['cytomine_predicted_annotation_term'] = parameters[ 'cytomine_predict_terms'][0] parameters['cytomine_excluded_terms'] = map( int, options.cytomine_excluded_terms.split(',')) parameters['pyxit_colorspace'] = options.pyxit_colorspace parameters['pyxit_nb_jobs'] = options.pyxit_nb_jobs parameters['pyxit_n_jobs'] = options.pyxit_nb_jobs parameters['cytomine_nb_jobs'] = options.pyxit_nb_jobs parameters['cytomine_id_image'] = options.cytomine_id_image parameters['cytomine_zoom_level'] = options.cytomine_zoom_level parameters['nb_jobs'] = options.nb_jobs parameters['pyxit_target_width'] = options.pyxit_target_width parameters['pyxit_target_height'] = options.pyxit_target_height parameters['pyxit_n_subwindows'] = options.pyxit_n_subwindows parameters['pyxit_interpolation'] = options.pyxit_interpolation parameters['pyxit_transpose'] = str2bool(options.pyxit_transpose) parameters['pyxit_min_size'] = options.pyxit_min_size parameters['pyxit_max_size'] = options.pyxit_max_size parameters['pyxit_fixed_size'] = str2bool(options.pyxit_fixed_size) parameters['cytomine_annotation_projects'] = map( int, options.cytomine_annotation_projects.split(',')) parameters['cytomine_reviewed'] = str2bool(options.cytomine_reviewed) parameters['cytomine_dump_annotation_stats'] = str2bool( options.cytomine_dump_annotation_stats) parameters['cytomine_dump_annotations'] = str2bool( options.cytomine_dump_annotations) parameters['build_model'] = str2bool(options.build_model) parameters['dir_ls'] = os.path.join( parameters["cytomine_working_path"], str(parameters['cytomine_annotation_projects']).replace( ',', '-').replace('[', '').replace(']', '').replace(' ', ''), "zoom_level", str(parameters['cytomine_zoom_level'])) pyxit_parameters = {} pyxit_parameters['pyxit_target_width'] = options.pyxit_target_width pyxit_parameters['pyxit_target_height'] = options.pyxit_target_height pyxit_parameters['pyxit_n_subwindows'] = options.pyxit_n_subwindows pyxit_parameters['pyxit_min_size'] = options.pyxit_min_size pyxit_parameters['pyxit_max_size'] = options.pyxit_max_size pyxit_parameters['pyxit_colorspace'] = options.pyxit_colorspace pyxit_parameters['pyxit_interpolation'] = options.pyxit_interpolation pyxit_parameters['pyxit_transpose'] = str2bool(options.pyxit_transpose) pyxit_parameters['pyxit_fixed_size'] = str2bool(options.pyxit_fixed_size) pyxit_parameters['pyxit_n_jobs'] = options.pyxit_nb_jobs if options.verbose: print(parameters) # Create Cytomine connection conn = cytomine.Cytomine(parameters["cytomine_host"], parameters["cytomine_public_key"], parameters["cytomine_private_key"], base_path=parameters['cytomine_base_path'], working_path=parameters['cytomine_working_path'], verbose=str2bool(options.verbose)) # Dump annotations if parameters['cytomine_dump_annotations']: # Get annotation descriptions (JSON) from project(s) annotations = None for prj in parameters['cytomine_annotation_projects']: if parameters["cytomine_reviewed"]: annotations_prj = conn.get_annotations( id_project=prj, reviewed_only=parameters["cytomine_reviewed"]) else: annotations_prj = conn.get_annotations(id_project=prj) if not annotations: annotations = annotations_prj else: annotations.data().extend(annotations_prj.data()) if prj == 21907448 or prj == 155194683: annotations_prj = conn.get_annotations(id_project=prj, id_term=91376951) annotations.data().extend(annotations_prj.data()) print("Nb annotations so far... = %d" % len(annotations.data())) print("Total annotations projects %s = %d" % (parameters['cytomine_annotation_projects'], len(annotations.data()))) # Set output dir parameters if not os.path.exists(parameters['dir_ls']): print("Creating annotation directory: %s" % parameters['dir_ls']) os.makedirs(parameters['dir_ls']) # Dump annotation images locally print("Dump training annotation images in %s...", parameters['dir_ls']) conn.dump_annotations( annotations=annotations, get_image_url_func=Annotation.get_annotation_alpha_crop_url, dest_path=parameters['dir_ls'], desired_zoom=parameters['cytomine_zoom_level'], excluded_terms=parameters['cytomine_excluded_terms']) # Put positive terms under the same term and same for negative terms term_directories = os.listdir(parameters['dir_ls']) pos_image_path = os.path.join(parameters['dir_ls'], "image", "1") pos_mask_path = os.path.join(parameters['dir_ls'], "mask", "1") neg_image_path = os.path.join(parameters['dir_ls'], "image", "0") neg_mask_path = os.path.join(parameters['dir_ls'], "mask", "0") if not os.path.exists(pos_image_path): print("Creating positive annotation directory: %s" % pos_image_path) os.makedirs(pos_image_path) if not os.path.exists(neg_image_path): print("Creating negative annotation directory: %s" % neg_image_path) os.makedirs(neg_image_path) if not os.path.exists(pos_mask_path): print("Creating positive annotation directory: %s" % pos_mask_path) os.makedirs(pos_mask_path) if not os.path.exists(neg_mask_path): print("Creating negative annotation directory: %s" % neg_mask_path) os.makedirs(neg_mask_path) for dir in term_directories: dir_abs = os.path.join(parameters['dir_ls'], dir) # Move files print("Term directory: %s" % dir_abs) if int(dir) in parameters['cytomine_predict_terms']: print("Positive term") for image_file in os.listdir(dir_abs): print(image_file) try: im = Image.open(os.path.join(dir_abs, image_file)) except IOError: "warning filename %s is not an image" % os.path.join( dir_abs, image_file) continue rgb = im.tobytes("raw", "RGB") a = im.tobytes("raw", "A") im.close() image = Image.frombytes("RGB", im.size, rgb) mask = Image.frombytes("L", im.size, a) image.save(os.path.join(pos_image_path, image_file), "PNG") mask.save(os.path.join(pos_mask_path, image_file), "PNG") else: print("Negative term") for image_file in os.listdir(dir_abs): print(image_file) try: im = Image.open(os.path.join(dir_abs, image_file)) except IOError: "warning filename %s is not an image" % os.path.join( dir_abs, image_file) continue rgb = im.tobytes("raw", "RGB") a = im.tobytes("raw", "A") im.close() image = Image.frombytes("RGB", im.size, rgb) mask = Image.frombytes("L", im.size, a) image.save(os.path.join(neg_image_path, image_file), "PNG") mask.save(os.path.join(neg_mask_path, image_file), "PNG") if parameters['cytomine_dump_annotation_stats']: pos_path = os.path.join(parameters['dir_ls'], "image", "1") neg_path = os.path.join(parameters['dir_ls'], "image", "0") stats_dumped_annotations(pos_path, neg_path) # if parameters['build_model'] : # # Model name # model_name = "nsubw{}_winsize{}x{}_minsize{}_maxsize{}_batchsize{}_epochs{}_shuffle{}_valsplit{}_colorspace{}"\ # .format(parameters['pyxit_n_subwindows'], # parameters['pyxit_target_width'], # parameters['pyxit_target_height'], # parameters['pyxit_min_size'], # parameters['pyxit_max_size'], # parameters['keras_batch_size'], # parameters['keras_n_epochs'], # parameters['keras_shuffle'], # parameters['keras_validation_split'], # pyxit_parameters['pyxit_colorspace']).replace(".", "") # print("Model_name :", model_name) # # pyxit = PyxitClassifier(None, # n_subwindows = pyxit_parameters['pyxit_n_subwindows'], # min_size = pyxit_parameters['pyxit_min_size'], # max_size = pyxit_parameters['pyxit_max_size'], # target_width = pyxit_parameters['pyxit_target_width'], # target_height = pyxit_parameters['pyxit_target_height'], # n_jobs = pyxit_parameters['pyxit_n_jobs'], # interpolation = pyxit_parameters['pyxit_interpolation'], # transpose = pyxit_parameters['pyxit_transpose'], # colorspace = pyxit_parameters['pyxit_colorspace'], # fixed_size = pyxit_parameters['pyxit_fixed_size'], # random_state = None, # verbose = 1, # get_output = _get_output_from_mask, # parallel_leaf_transform = False) # # # Build filenames and classes # X, y = build_from_dir(parameters['dir_ls']) # # classes = np.unique(y) # n_classes = len(classes) # y_original = y # y = np.searchsorted(classes, y) # n_images = len(y) # print("Number of images : ", n_images) # # # Extract subwindows # _X, _y = pyxit.extract_subwindows(X, y) # n_subw = len(_y) # print("Number of subwindows : ", n_subw) # # # Reshape data structure # _X = np.reshape(_X, ( # n_subw, pyxit_parameters['pyxit_target_width'], pyxit_parameters['pyxit_target_height'], n_channels)) # _y = np.reshape(_y, (n_subw, pyxit_parameters['pyxit_target_width'], pyxit_parameters['pyxit_target_height'])) # # # Train FCN # if not os.path.exists(parameters['keras_save_to']) : # os.makedirs(parameters['keras_save_to']) # # model_weights_file_path = os.path.join(parameters['keras_save_to'], "weights_" + model_name + ".h5") # # mean, std = train(_X, _y, # model_weights_file_path, # imgs_width = pyxit_parameters['pyxit_target_width'], # imgs_height = pyxit_parameters['pyxit_target_height'], # batch_size = parameters['keras_batch_size'], # epochs = parameters['keras_n_epochs'], # shuffle = parameters['keras_shuffle'], # validation_split = parameters['keras_validation_split']) # # # Save mean and std used to normalize training data # mean_std_save_file_path = os.path.join(parameters['keras_save_to'], "meanstd_" + model_name + ".txt") # mean_std_save_file = open(mean_std_save_file_path, 'w') # mean_std_save_file.write(str(mean) + '\n') # mean_std_save_file.write(str(std) + '\n') if parameters['build_model']: # Model name model_name = "nsubw{}_winsize{}x{}_minsize{}_maxsize{}_batchsize{}_epochs{}_shuffle{}_valsplit{}_colorspace{}_zoom{}_until4x4_IDG"\ .format(parameters['pyxit_n_subwindows'], parameters['pyxit_target_width'], parameters['pyxit_target_height'], parameters['pyxit_min_size'], parameters['pyxit_max_size'], parameters['keras_batch_size'], parameters['keras_n_epochs'], parameters['keras_shuffle'], parameters['keras_validation_split'], pyxit_parameters['pyxit_colorspace'], parameters['cytomine_zoom_level']).replace(".", "") print("Model_name :", model_name) pyxit = PyxitClassifier( None, n_subwindows=1, min_size=1, max_size=1, target_width=pyxit_parameters['pyxit_target_width'], target_height=pyxit_parameters['pyxit_target_height'], n_jobs=pyxit_parameters['pyxit_n_jobs'], interpolation=pyxit_parameters['pyxit_interpolation'], transpose=pyxit_parameters['pyxit_transpose'], colorspace=pyxit_parameters['pyxit_colorspace'], fixed_size=pyxit_parameters['pyxit_fixed_size'], random_state=None, verbose=1, get_output=_get_output_from_mask, parallel_leaf_transform=False) # pyxit = PyxitClassifier(None, # n_subwindows=pyxit_parameters['pyxit_n_subwindows'], # min_size=pyxit_parameters['pyxit_min_size'], # max_size=pyxit_parameters['pyxit_max_size'], # target_width=pyxit_parameters['pyxit_target_width'], # target_height=pyxit_parameters['pyxit_target_height'], # n_jobs=pyxit_parameters['pyxit_n_jobs'], # interpolation=pyxit_parameters['pyxit_interpolation'], # transpose=pyxit_parameters['pyxit_transpose'], # colorspace=pyxit_parameters['pyxit_colorspace'], # fixed_size=pyxit_parameters['pyxit_fixed_size'], # random_state=None, # verbose=1, # get_output = _get_output_from_mask, # parallel_leaf_transform=False) # Build filenames and classes X, y = build_from_dir(parameters['dir_ls']) classes = np.unique(y) n_classes = len(classes) y_original = y y = np.searchsorted(classes, y) n_images = len(y) print("Number of images : ", n_images) print("Start extraction of subwindows...") # Extract subwindows _X, _y = pyxit.extract_subwindows(X, y) print("Over") n_subw = len(_y) print("Number of subwindows : ", n_subw) # Reshape data structure _X = np.reshape(_X, (n_subw, pyxit_parameters['pyxit_target_width'], pyxit_parameters['pyxit_target_height'], n_channels)) _y = np.reshape(_y, (n_subw, pyxit_parameters['pyxit_target_width'], pyxit_parameters['pyxit_target_height'], 1)) print(type(_X)) print(type(_y)) # ImageDataGenerator : two instances with the same arguments print("Init data gen") data_gen_args = dict(rotation_range=180., width_shift_range=0.1, height_shift_range=0.1, zoom_range=0.2, rescale=1 / 255, horizontal_flip=True, vertical_flip=True) # featurewise_center = True, # featurewise_std_normalization = True) image_datagen = ImageDataGenerator(**data_gen_args) mask_datagen = ImageDataGenerator(**data_gen_args) # Provide the same seed and keyword arguments to the fit and flow methods seed = 1 print("Fit image data generator (image)...") image_datagen.fit(_X[0:100], augment=True, seed=seed) print("Fit image data generator (mask)...") mask_datagen.fit(_y[0:100], augment=True, seed=seed) print('Flow on images...') # image_generator = image_datagen.flow(_X, labels, seed = seed, shuffle = False) image_generator = image_datagen.flow_from_directory( os.path.join(parameters['dir_ls'], "image"), class_mode=None, target_size=(128, 128), seed=seed) print('Flow on masks...') # mask_generator = mask_datagen.flow(_y, labels, seed = seed, shuffle = False) mask_generator = mask_datagen.flow_from_directory( os.path.join(parameters['dir_ls'], "mask"), class_mode=None, target_size=(128, 128), seed=seed) # combine generators into one which yields image and masks train_generator = combine_generator(image_generator, mask_generator) # Creating and compiling model if not os.path.exists(parameters['keras_save_to']): os.makedirs(parameters['keras_save_to']) model_weights_filename = os.path.join(parameters['keras_save_to'], "weights_" + model_name + ".h5") print('Fitting model...') model = get_unet(128, 128) model_checkpoint = ModelCheckpoint(model_weights_filename, monitor='val_loss', save_best_only=True) # Train FCN model.fit_generator(train_generator, steps_per_epoch=100, epochs=50, callbacks=[model_checkpoint], verbose=1)
def main(argv): # Define command line options p = optparse.OptionParser(description='Pyxit', prog='PyXit (PYthon piXiT)', version='PyXit 0.1') p.add_option('--dir_ls', type="string", dest="dir_ls", help="The learning set directory") p.add_option('--dir_ts', type="string", dest="dir_ts", help="The training set directory") p.add_option('--cv_k_folds', type="int", dest="cv_k_folds", help="The number of folds") p.add_option( '--cv_shuffle', default=False, action="store_true", dest="cv_shuffle", help="Whether cross-validation is performed using ShuffleSplit.") p.add_option('--cv_shuffle_test_fraction', default=0.1, type="float", dest="cv_shuffle_test_fraction", help="The proportion of data in shuffled test splits.") p.add_option('--pyxit_n_subwindows', default=10, type="int", dest="pyxit_n_subwindows", help="number of subwindows") p.add_option('--pyxit_min_size', default=0.5, type="float", dest="pyxit_min_size", help="min size") p.add_option('--pyxit_max_size', default=1.0, type="float", dest="pyxit_max_size", help="max size") p.add_option('--pyxit_target_width', default=16, type="int", dest="pyxit_target_width", help="target width") p.add_option('--pyxit_target_height', default=16, type="int", dest="pyxit_target_height", help="target height") p.add_option('--pyxit_interpolation', default=2, type="int", dest="pyxit_interpolation", help="interpolation method 1,2,3,4") p.add_option('--pyxit_transpose', default=False, action="store_true", dest="pyxit_transpose", help="transpose subwindows") p.add_option('--pyxit_colorspace', default=2, type="int", dest="pyxit_colorspace", help="colorspace 0=RGB, 1=TRGB, 2=HSV") p.add_option('--pyxit_fixed_size', default=False, action="store_true", dest="pyxit_fixed_size", help="extract fixed size subwindows") p.add_option('--pyxit_n_jobs', default=1, type="int", dest="pyxit_n_jobs", help="number of jobs") p.add_option('--pyxit_save_to', type="string", dest="pyxit_save_to", help="file to save the model into") p.add_option('--forest_n_estimators', default=10, type="int", dest="forest_n_estimators", help="number of base estimators (T)") p.add_option('--forest_max_features', default=1, type="int", dest="forest_max_features", help="max features at test node (k)") p.add_option('--forest_min_samples_split', default=1, type="int", dest="forest_min_samples_split", help="minimum node sample size (nmin)") p.add_option('--forest_shared_mem', default=False, action="store_true", dest="forest_shared_mem", help="shared mem") p.add_option( '--svm', default=0, dest="svm", help= "final svm classifier: 0=nosvm, 1=libsvm, 2=liblinear, 3=lr-l1, 4=lr-l2", type="int") p.add_option('--svm_c', default=1.0, type="float", dest="svm_c", help="svm C") p.add_option('--quiet', action="store_false", default=True, dest="verbose", help="Turn off verbose mode") p.add_option('--verbose', action="store_true", default=True, dest="verbose", help="Turn on verbose mode") options, arguments = p.parse_args(args=argv) # Check for errors in the options e = None if not options.dir_ls: e = "--dir_ls needs to be set." elif options.dir_ts and options.cv_k_folds: e = "--dir_ts and --cv_k_folds cannot be set at the same time." elif options.pyxit_save_to and options.cv_k_folds: e = "--pyxit_save_to and --cv_k_folds cannot be set at the time." if e: print("Error: %s" % e) print("Run with -h option for help.") sys.exit(1) if options.verbose: print("[pyxit.main] Options = ", options) # Load data if options.verbose: print("[pyxit.main] Loading data...") X, y = build_from_dir(options.dir_ls) classes = np.unique(y) n_classes = len(classes) y_original = y y = np.searchsorted(classes, y) # Instantiate classifiers if options.verbose: print("[pyxit.main] Initializing PyxitClassifier...") forest = ExtraTreesClassifier( n_estimators=options.forest_n_estimators, max_features=options.forest_max_features, min_samples_split=options.forest_min_samples_split, n_jobs=options.pyxit_n_jobs, verbose=options.verbose) pyxit = PyxitClassifier(base_estimator=forest, n_subwindows=options.pyxit_n_subwindows, min_size=options.pyxit_min_size, max_size=options.pyxit_max_size, target_width=options.pyxit_target_width, target_height=options.pyxit_target_height, interpolation=options.pyxit_interpolation, transpose=options.pyxit_transpose, colorspace=options.pyxit_colorspace, fixed_size=options.pyxit_fixed_size, n_jobs=options.pyxit_n_jobs, verbose=options.verbose) if options.svm: if options.svm == SVM_LIBSVM: svm = SVC(probability=True, C=options.svm_c, kernel="linear") if options.svm == SVM_LIBLINEAR: svm = LinearSVC(C=options.svm_c) if options.svm == SVM_LRL1: svm = LogisticRegression(penalty="l1", C=options.svm_c) if options.svm == SVM_LRL2: svm = LogisticRegression(penalty="l2", C=options.svm_c) if options.svm == ET: svm = ExtraTreesClassifier( n_estimators=1000, max_features="sqrt", #max_features=1000, min_samples_split=2, n_jobs=options.pyxit_n_jobs, verbose=options.verbose) if options.svm == RF: svm = RandomForestClassifier( n_estimators=1000, #max_features=1000, max_features="sqrt", min_samples_split=2, n_jobs=options.pyxit_n_jobs, verbose=options.verbose) if options.svm == NN: svm = neighbors.KNeighborsClassifier(10) if options.verbose: print("[pyxit.main] PyxitClassifier =") print(pyxit) if options.svm: print("[pyxit.main] SVM =") print(svm) # Build and evaluate if options.dir_ls and not options.dir_ts and not options.cv_k_folds: if options.pyxit_save_to: fd = open(options.pyxit_save_to, "wb") pickle.dump(classes, fd, protocol=pickle.HIGHEST_PROTOCOL) if options.verbose: print("[pyxit.main] Fitting PyxitClassifier on %s" % options.dir_ls) _X, _y = pyxit.extract_subwindows(X, y) pyxit.fit(X, y, _X=_X, _y=_y) if options.verbose: print("[pyxit.main] Saving PyxitClassifier into %s" % options.pyxit_save_to) if options.pyxit_save_to: pickle.dump(pyxit, fd, protocol=pickle.HIGHEST_PROTOCOL) if options.svm: Xt = pyxit.transform(X, _X=_X, reset=True) if options.verbose: print("[pyxit.main] Fitting SVC on %s" % options.dir_ls) svm.fit(Xt, y) if options.verbose: print("[pyxit.main] Saving SVM into %s" % options.pyxit_save_to) if options.pyxit_save_to: pickle.dump(svm, fd, protocol=pickle.HIGHEST_PROTOCOL) if options.pyxit_save_to: fd.close() elif options.dir_ts: if options.pyxit_save_to: fd = open(options.pyxit_save_to, "wb") pickle.dump(classes, fd, protocol=pickle.HIGHEST_PROTOCOL) if options.verbose: print("[pyxit.main] Fitting PyxitClassifier on %s" % options.dir_ls) _X, _y = pyxit.extract_subwindows(X, y) pyxit.fit(X, y, _X=_X, _y=_y) if options.pyxit_save_to: pickle.dump(pyxit, fd, protocol=pickle.HIGHEST_PROTOCOL) if options.svm: Xt = pyxit.transform(X, _X=_X, reset=True) if options.verbose: print("[pyxit.main] Fitting SVC on %s" % options.dir_ls) svm.fit(Xt, y) if options.pyxit_save_to: pickle.dump(svm, fd, protocol=pickle.HIGHEST_PROTOCOL) if options.pyxit_save_to: fd.close() if options.verbose: print("[pyxit.main] Testing on %s" % options.dir_ts) X_test, y_test = build_from_dir(options.dir_ts) y_test = np.searchsorted(classes, y_test) _X_test, _y_test = pyxit.extract_subwindows(X_test, y_test) y_true = y_test all_tested = np.ones(len(y_true), dtype=np.bool) if not options.svm: y_predict = pyxit.predict(X_test, _X=_X_test) y_proba = pyxit.predict_proba(X_test, _X=_X_test) else: Xt = pyxit.transform(X_test, _X=_X_test) y_predict = svm.predict(Xt) if options.svm != SVM_LIBLINEAR: y_proba = svm.predict_proba(Xt) elif options.cv_k_folds: if options.verbose: print("[pyxit.main] K-Fold cross-validation (K=%d)" % options.cv_k_folds) _X, _y = pyxit.extract_subwindows(X, y) i = 1 step = 100. / options.cv_k_folds y_true = y y_predict = np.empty(y_true.shape, dtype=y.dtype) y_proba = np.empty((y_true.shape[0], n_classes)) all_tested = np.zeros(len(y_true), dtype=np.bool) cm = np.zeros((n_classes, n_classes), dtype=np.int32) if not options.cv_shuffle: cv = StratifiedKFold(y_true, options.cv_k_folds) else: cv = ShuffleSplit(len(X), n_iter=options.cv_k_folds, test_size=options.cv_shuffle_test_fraction) for train, test in cv: all_tested[test] = True _train = pyxit.extend_mask(train) _test = pyxit.extend_mask(test) if options.verbose: print("[pyxit.main] Fitting PyxitClassifier on fold %d" % i) pyxit.fit(X[train], y[train], _X=_X[_train], _y=_y[_train]) if options.svm: Xt = pyxit.transform(X[train], _X=_X[_train], reset=True) if options.verbose: print("[pyxit.main] Fitting SVC on fold %d" % i) svm.fit(Xt, y[train]) if options.verbose: print("[pyxit.main] Testing on fold %d" % i) if not options.svm: y_predict[test] = pyxit.predict(X[test], _X=_X[_test]) y_proba[test] = pyxit.predict_proba(X[test], _X=_X[_test]) else: Xt = pyxit.transform(X[test], _X=_X[_test]) y_predict[test] = np.asarray(svm.predict(Xt), dtype=y.dtype) if hasattr(svm, "predict_proba"): y_proba[test] = svm.predict_proba(Xt) print(svm) if options.verbose: print("[pyxit.main] Classification error on fold %d = %f" % (i, 1.0 * np.sum(y_true[test] != y_predict[test]) / len(y_true[test]))) print("[pyxit.main] Cumulated confusion matrix =") cm += confusion_matrix(y_true[test], y_predict[test]) print_cm(cm, classes) i += 1 # Output some results if "all_tested" in locals(): if options.verbose: print("---") print("[pyxit.main] Test coverage =", sum(all_tested) / (1.0 * len(all_tested))) print("[pyxit.main] Overall classification error = %f" % (1.0 * np.sum(y_true[all_tested] != y_predict[all_tested]) / len(y_true[all_tested]))) print("[pyxit.main] Overall confusion matrix =") print_cm( confusion_matrix(y_true[all_tested], y_predict[all_tested]), classes) #y_true = classes.take(y_true[all_tested], axis=0) y_predict = classes.take(y_predict[all_tested], axis=0) y_proba = np.max(y_proba, axis=1) d = {} for i in range(len(X)): d[X[i]] = (int(y_predict[i]), y_proba[i]) return d
def main(argv): # Define command line options p = optparse.OptionParser( description='Pyxit/Cytomine Segmentation Model Builder', prog='PyXit Segmentation Model Builder (PYthon piXiT)') p.add_option( "--cytomine_host", type="string", default='', dest="cytomine_host", help="The Cytomine host (eg: beta.cytomine.be, localhost:8080)") p.add_option('--cytomine_public_key', type="string", default='', dest="cytomine_public_key", help="Cytomine public key") p.add_option('--cytomine_private_key', type="string", default='', dest="cytomine_private_key", help="Cytomine private key") p.add_option('--cytomine_base_path', type="string", default='/api/', dest="cytomine_base_path", help="Cytomine base path") p.add_option('--cytomine_id_software', type="int", dest="cytomine_id_software", help="The Cytomine software identifier") p.add_option('--cytomine_working_path', default="/tmp/", type="string", dest="cytomine_working_path", help="The working directory (eg: /tmp)") p.add_option('--cytomine_id_project', type="int", dest="cytomine_id_project", help="The Cytomine project identifier") p.add_option('-z', '--cytomine_zoom_level', type='int', dest='cytomine_zoom_level', help="working zoom level") p.add_option('--cytomine_annotation_projects', type="string", dest="cytomine_annotation_projects", help="Projects from which annotations are extracted") p.add_option( '--cytomine_predict_terms', type='string', default='0', dest='cytomine_predict_terms', help="term ids of predicted terms (=positive class in binary mode)") p.add_option('--cytomine_excluded_terms', type='string', default='0', dest='cytomine_excluded_terms', help="term ids of excluded terms") p.add_option('--cytomine_reviewed', type='string', default="False", dest="cytomine_reviewed", help="Get reviewed annotations only") p.add_option('--pyxit_target_width', type='int', dest='pyxit_target_width', help="pyxit subwindows width") p.add_option('--pyxit_target_height', type='int', dest='pyxit_target_height', help="pyxit subwindows height") p.add_option('--pyxit_save_to', type='string', dest='pyxit_save_to', help="pyxit model directory") #future: get it from server db p.add_option( '--pyxit_colorspace', type='int', dest='pyxit_colorspace', help="pyxit colorspace encoding") #future: get it from server db p.add_option( '--pyxit_n_jobs', type='int', dest='pyxit_n_jobs', help="pyxit number of jobs for trees") #future: get it from server db p.add_option('--pyxit_n_subwindows', default=10, type="int", dest="pyxit_n_subwindows", help="number of subwindows") p.add_option('--pyxit_interpolation', default=2, type="int", dest="pyxit_interpolation", help="interpolation method 1,2,3,4") p.add_option('--pyxit_transpose', type="string", default="False", dest="pyxit_transpose", help="transpose subwindows") p.add_option('--pyxit_fixed_size', type="string", default="False", dest="pyxit_fixed_size", help="extract fixed size subwindows") p.add_option('--forest_n_estimators', default=10, type="int", dest="forest_n_estimators", help="number of base estimators (T)") p.add_option('--forest_max_features', default=1, type="int", dest="forest_max_features", help="max features at test node (k)") p.add_option('--forest_min_samples_split', default=1, type="int", dest="forest_min_samples_split", help="minimum node sample size (nmin)") p.add_option('--verbose', type="string", default="0", dest="verbose", help="Turn on (1) or off (0) verbose mode") options, arguments = p.parse_args(args=argv) parameters['cytomine_host'] = options.cytomine_host parameters['cytomine_public_key'] = options.cytomine_public_key parameters['cytomine_private_key'] = options.cytomine_private_key parameters['cytomine_base_path'] = options.cytomine_base_path parameters['cytomine_working_path'] = options.cytomine_working_path parameters['cytomine_base_path'] = options.cytomine_base_path parameters['cytomine_id_project'] = options.cytomine_id_project parameters['cytomine_id_software'] = options.cytomine_id_software parameters['cytomine_annotation_projects'] = map( int, options.cytomine_annotation_projects.split(',')) parameters['cytomine_predict_terms'] = map( int, options.cytomine_predict_terms.split(',')) parameters['cytomine_excluded_terms'] = map( int, options.cytomine_excluded_terms.split(',')) parameters['cytomine_zoom_level'] = options.cytomine_zoom_level parameters['cytomine_reviewed'] = str2bool(options.cytomine_reviewed) pyxit_parameters['pyxit_target_width'] = options.pyxit_target_width pyxit_parameters['pyxit_target_height'] = options.pyxit_target_height pyxit_parameters['pyxit_n_subwindows'] = options.pyxit_n_subwindows pyxit_parameters['pyxit_colorspace'] = options.pyxit_colorspace pyxit_parameters['pyxit_interpolation'] = options.pyxit_interpolation pyxit_parameters['pyxit_transpose'] = str2bool(options.pyxit_transpose) pyxit_parameters['pyxit_fixed_size'] = str2bool(options.pyxit_fixed_size) pyxit_parameters['forest_n_estimators'] = options.forest_n_estimators pyxit_parameters['forest_max_features'] = options.forest_max_features pyxit_parameters[ 'forest_min_samples_split'] = options.forest_min_samples_split pyxit_parameters['pyxit_save_to'] = options.pyxit_save_to pyxit_parameters['pyxit_n_jobs'] = options.pyxit_n_jobs # Check for errors in the options if options.verbose: print "[pyxit.main] Options = ", options # Create JOB/USER/JOB conn = cytomine.Cytomine(parameters["cytomine_host"], parameters["cytomine_public_key"], parameters["cytomine_private_key"], base_path=parameters['cytomine_base_path'], working_path=parameters['cytomine_working_path'], verbose=str2bool(options.verbose)) #Create a new userjob if connected as human user current_user = conn.get_current_user() if current_user.algo == False: print "adduserJob..." user_job = conn.add_user_job(parameters['cytomine_id_software'], parameters['cytomine_id_project']) print "set_credentials..." conn.set_credentials(str(user_job.publicKey), str(user_job.privateKey)) print "done" else: user_job = current_user print "Already running as userjob" job = conn.get_job(user_job.job) pyxit_parameters['dir_ls'] = os.path.join( parameters["cytomine_working_path"], str(parameters['cytomine_annotation_projects']).replace( ',', '-').replace('[', '').replace(']', '').replace(' ', ''), "zoom_level", str(parameters['cytomine_zoom_level'])) if not os.path.exists(pyxit_parameters['dir_ls']): print "Creating annotation directory: %s" % pyxit_parameters['dir_ls'] os.makedirs(pyxit_parameters['dir_ls']) time.sleep(2) job = conn.update_job_status( job, status_comment="Publish software parameters values") all_params = pyxit_parameters all_params.update(parameters) job_parameters_values = conn.add_job_parameters( user_job.job, conn.get_software(parameters['cytomine_id_software']), all_params) #Get annotation data job = conn.update_job_status(job, status=job.RUNNING, status_comment="Fetching data", progress=0) #Retrieve annotations from each annotation projects, either reviewed or unreviewed annotations annotations = None for prj in parameters['cytomine_annotation_projects']: if parameters["cytomine_reviewed"]: print "Retrieving reviewed annotations..." annotations_prj = conn.get_annotations(id_project=prj, reviewed_only=True) print "Reviewed annotations: %d" % len(annotations_prj.data()) else: print "Retrieving (unreviewed) annotations..." annotations_prj = conn.get_annotations(id_project=prj) print "(Unreviewed) annotations: %d" % len(annotations_prj.data()) if not annotations: annotations = annotations_prj else: annotations.data().extend(annotations_prj.data()) print "Nb annotations so far... = %d" % len(annotations.data()) time.sleep(3) print "Total annotations projects %s = %d" % ( parameters['cytomine_annotation_projects'], len(annotations.data())) time.sleep(3) print "Predict terms / excluded terms" print parameters['cytomine_predict_terms'] print parameters['cytomine_excluded_terms'] time.sleep(3) annotations = conn.dump_annotations( annotations=annotations, get_image_url_func=Annotation.get_annotation_alpha_crop_url, dest_path=pyxit_parameters['dir_ls'], excluded_terms=parameters['cytomine_excluded_terms'], desired_zoom=parameters['cytomine_zoom_level']) #Build matrix (subwindows described by pixel values and output) for training project = conn.get_project(parameters['cytomine_id_project']) terms = conn.get_terms(project.ontology) map_classes = { } # build X, Y. Change initial problem into binary problem : "predict_terms" vs others for term in terms.data(): if term.id in parameters['cytomine_predict_terms']: map_classes[term.id] = 1 else: map_classes[term.id] = 0 print pyxit_parameters #Prepare image matrix X, y = build_from_dir(pyxit_parameters['dir_ls'], map_classes) print "X length: %d " % len(X) print "Y length: %d " % len(y) time.sleep(5) #classes = np.unique(y) classes = [0, 1] n_classes = len(classes) y_original = y y = np.searchsorted(classes, y) # Instantiate classifiers job = conn.update_job_status( job, status=job.RUNNING, status_comment="[pyxit.main] Initializing PyxitClassifier...", progress=25) forest = ExtraTreesClassifier( n_estimators=pyxit_parameters['forest_n_estimators'], max_features=pyxit_parameters['forest_max_features'], min_samples_split=pyxit_parameters['forest_min_samples_split'], n_jobs=pyxit_parameters['pyxit_n_jobs'], verbose=True) pyxit = PyxitClassifier( base_estimator=forest, n_subwindows=pyxit_parameters['pyxit_n_subwindows'], min_size=0.0, #segmentation use fixed-size subwindows max_size=1.0, #segmentation use fixed-size subwindows target_width=pyxit_parameters['pyxit_target_width'], target_height=pyxit_parameters['pyxit_target_height'], interpolation=pyxit_parameters['pyxit_interpolation'], transpose=pyxit_parameters['pyxit_transpose'], colorspace=pyxit_parameters['pyxit_colorspace'], fixed_size=pyxit_parameters['pyxit_fixed_size'], n_jobs=pyxit_parameters['pyxit_n_jobs'], verbose=True, get_output=_get_output_from_mask) if pyxit_parameters['pyxit_save_to']: d = os.path.dirname(pyxit_parameters['pyxit_save_to']) if not os.path.exists(d): os.makedirs(d) fd = open(pyxit_parameters['pyxit_save_to'], "wb") pickle.dump(classes, fd, protocol=pickle.HIGHEST_PROTOCOL) job = conn.update_job_status( job, status_comment= "[pyxit.main] Extracting %d subwindows from each image in %s" % (pyxit_parameters['pyxit_n_subwindows'], pyxit_parameters['dir_ls']), progress=50) time.sleep(3) #Extract random subwindows in dumped annotations _X, _y = pyxit.extract_subwindows(X, y) #Build pixel classifier job = conn.update_job_status( job, status_comment="[pyxit.main] Fitting Pyxit Segmentation Model on %s", progress=75) print "TIME : %s" % strftime("%Y-%m-%d %H:%M:%S", localtime()) start = time.time() pyxit.fit(X, y, _X=_X, _y=_y) end = time.time() print "Elapsed time FIT: %d s" % (end - start) print "TIME : %s" % strftime("%Y-%m-%d %H:%M:%S", localtime()) print "pyxit.base_estimator.n_classes_" print pyxit.base_estimator.n_classes_ print "pyxit.base_estimator.classes_" print pyxit.base_estimator.classes_ if options.verbose: print "----------------------------------------------------------------" print "[pyxit.main] Saving Pyxit Segmentation Model locally into %s" % pyxit_parameters[ 'pyxit_save_to'] print "----------------------------------------------------------------" #Save model on local disk if pyxit_parameters['pyxit_save_to']: pickle.dump(pyxit, fd, protocol=pickle.HIGHEST_PROTOCOL) if pyxit_parameters['pyxit_save_to']: fd.close() print "Not Publishing model in db.." #job_data = conn.add_job_data(job, "model", pyxit_parameters['pyxit_save_to']) job = conn.update_job_status(job, status=job.TERMINATED, status_comment="Finish", progress=100) print "END."
def build_models(n_subwindows=10, min_size=0.5, max_size=1.0, target_width=16, target_height=16, interpolation=2, transpose=False, colorspace=2, fixed_size=False, verbose=0, get_output=_get_output_from_directory, create_svm=False, C=1.0, random_state=None, **base_estimator_params): """Build models Parameters ---------- n_subwindows: int min_size: float max_size: float target_width: int target_height: int interpolation: int transpose: bool colorspace: int fixed_size: bool verbose: int get_output: callable create_svm: bool C: float base_estimator_params: dict Parameters for the ExtraTreesClassifier object Returns ------- et: ExtraTreesClassifier Base estimator a.k.a. extra-trees pyxit: PyxitClassifier|SvmPyxitClassifier (Svm) Pyxit classifier """ n_jobs = base_estimator_params.get("n_jobs", 1) random_state = check_random_state(random_state) et = ExtraTreesClassifier(random_state=random_state, **base_estimator_params) pyxit = PyxitClassifier( base_estimator=et, n_subwindows=n_subwindows, min_size=min_size, max_size=max_size, target_height=target_height, target_width=target_width, n_jobs=n_jobs, colorspace=colorspace, fixed_size=fixed_size, interpolation=interpolation, transpose=transpose, verbose=verbose, get_output=get_output, random_state=check_random_state( random_state.tomaxint() % 0x100000000) # ET and Pyxit must have != random nbs ) if not create_svm: return et, pyxit else: return et, SvmPyxitClassifier(pyxit, LinearSVC(C=C))
def main(argv): print("Main") # Define command line options p = optparse.OptionParser(description='Cytomine Segmentation prediction', prog='Cytomine segmentation prediction', version='0.1') p.add_option( '--cytomine_host', type="string", default='beta.cytomine.be', dest="cytomine_host", help="The Cytomine host (eg: beta.cytomine.be, localhost:8080)") p.add_option('--cytomine_public_key', type="string", default='', dest="cytomine_public_key", help="Cytomine public key") p.add_option('--cytomine_private_key', type="string", default='', dest="cytomine_private_key", help="Cytomine private key") p.add_option('--cytomine_base_path', type="string", default='/api/', dest="cytomine_base_path", help="Cytomine base path") p.add_option('--cytomine_id_software', type="int", dest="cytomine_id_software", help="The Cytomine software identifier") p.add_option('--cytomine_working_path', default="/tmp/", type="string", dest="cytomine_working_path", help="The working directory (eg: /tmp)") p.add_option('--cytomine_id_project', type="int", dest="cytomine_id_project", help="The Cytomine project identifier") p.add_option('-i', '--cytomine_id_image', type='int', dest='cytomine_id_image', help="image id from cytomine", metavar='IMAGE') p.add_option('-z', '--cytomine_zoom_level', type='int', dest='cytomine_zoom_level', help="working zoom level") p.add_option('-j', '--nb_jobs', type='int', dest='nb_jobs', help="number of parallel jobs") p.add_option( '--cytomine_predict_terms', type='str', dest='cytomine_predict_terms', help= "term id of all positive terms. The first term is the output predicted annotation term" ) p.add_option('--cytomine_excluded_terms', type='string', dest='cytomine_excluded_terms', help="term id of excluded terms)") p.add_option('--pyxit_target_width', type='int', dest='pyxit_target_width', help="pyxit subwindows width") p.add_option('--pyxit_target_height', type='int', dest='pyxit_target_height', help="pyxit subwindows height") p.add_option('--pyxit_colorspace', type='int', dest='pyxit_colorspace', help="pyxit colorspace encoding") p.add_option('--pyxit_nb_jobs', type='int', dest='pyxit_nb_jobs', help="pyxit number of jobs for trees") p.add_option('--pyxit_fixed_size', type='string', default="0", dest="pyxit_fixed_size", help="extract fixed size subwindows") p.add_option('--pyxit_transpose', type='string', default="0", dest="pyxit_transpose", help="transpose subwindows") p.add_option('--pyxit_n_subwindows', type='int', default="10", dest="pyxit_n_subwindows", help="number of subwindows") p.add_option('--pyxit_interpolation', default=2, type="int", dest="pyxit_interpolation", help="interpolation method 1,2,3,4") p.add_option('--pyxit_min_size', default=0.5, type="float", dest="pyxit_min_size", help="min size") p.add_option('--pyxit_max_size', default=1.0, type="float", dest="pyxit_max_size", help="max size") p.add_option('--cytomine_reviewed', type='string', default="False", dest="cytomine_reviewed", help="Get reviewed annotations only") p.add_option('--cytomine_dump_annotations', type='string', default="0", dest="cytomine_dump_annotations", help="Dump training annotations or not") p.add_option('--cytomine_dump_annotation_stats', type='string', default="0", dest="cytomine_dump_annotation_stats", help="Calculate stats on dumped annotations or not") p.add_option('--build_model', type="string", default="0", dest="build_model", help="Turn on (1) or off (0) model building") p.add_option('--cytomine_annotation_projects', type="string", dest="cytomine_annotation_projects", help="Projects from which annotations are extracted") p.add_option('--verbose', type="string", default="0", dest="verbose", help="Turn on (1) or off (0) verbose mode") p.add_option('--keras_save_to', type='string', default="", dest='keras_save_to', help="keras model weight file") p.add_option('--keras_batch_size', type="int", dest="keras_batch_size", help="Training batch size") p.add_option('--keras_n_epochs', type="int", dest="keras_n_epochs", help="Number of epochs") p.add_option('--keras_shuffle', type="string", dest="keras_shuffle", help="Turn on (1) or off (0) batch shuffle") p.add_option('--keras_validation_split', type="float", dest="keras_validation_split", help="Batch validation split") options, arguments = p.parse_args(args=argv) parameters = {} parameters['keras_save_to'] = options.keras_save_to parameters['keras_batch_size'] = options.keras_batch_size parameters['keras_n_epochs'] = options.keras_n_epochs parameters['keras_shuffle'] = options.keras_shuffle parameters['keras_validation_split'] = options.keras_validation_split parameters['cytomine_host'] = options.cytomine_host parameters['cytomine_public_key'] = options.cytomine_public_key parameters['cytomine_private_key'] = options.cytomine_private_key parameters['cytomine_base_path'] = options.cytomine_base_path parameters['cytomine_working_path'] = options.cytomine_working_path parameters['cytomine_base_path'] = options.cytomine_base_path parameters['cytomine_id_project'] = options.cytomine_id_project parameters['cytomine_id_software'] = options.cytomine_id_software parameters['cytomine_predict_terms'] = map( int, options.cytomine_predict_terms.split(',')) parameters['cytomine_predicted_annotation_term'] = parameters[ 'cytomine_predict_terms'][0] parameters['cytomine_excluded_terms'] = map( int, options.cytomine_excluded_terms.split(',')) parameters['pyxit_colorspace'] = options.pyxit_colorspace parameters['pyxit_nb_jobs'] = options.pyxit_nb_jobs parameters['pyxit_n_jobs'] = options.pyxit_nb_jobs parameters['cytomine_nb_jobs'] = options.pyxit_nb_jobs parameters['cytomine_id_image'] = options.cytomine_id_image parameters['cytomine_zoom_level'] = options.cytomine_zoom_level parameters['nb_jobs'] = options.nb_jobs parameters['pyxit_target_width'] = options.pyxit_target_width parameters['pyxit_target_height'] = options.pyxit_target_height parameters['pyxit_n_subwindows'] = options.pyxit_n_subwindows parameters['pyxit_interpolation'] = options.pyxit_interpolation parameters['pyxit_transpose'] = str2bool(options.pyxit_transpose) parameters['pyxit_min_size'] = options.pyxit_min_size parameters['pyxit_max_size'] = options.pyxit_max_size parameters['pyxit_fixed_size'] = str2bool(options.pyxit_fixed_size) parameters['cytomine_annotation_projects'] = map( int, options.cytomine_annotation_projects.split(',')) parameters['cytomine_reviewed'] = str2bool(options.cytomine_reviewed) parameters['cytomine_dump_annotation_stats'] = str2bool( options.cytomine_dump_annotation_stats) parameters['cytomine_dump_annotations'] = str2bool( options.cytomine_dump_annotations) parameters['build_model'] = str2bool(options.build_model) parameters['dir_ls'] = os.path.join( parameters["cytomine_working_path"], str(parameters['cytomine_annotation_projects']).replace( ',', '-').replace('[', '').replace(']', '').replace(' ', ''), "zoom_level", str(parameters['cytomine_zoom_level'])) pyxit_parameters = {} pyxit_parameters['pyxit_target_width'] = options.pyxit_target_width pyxit_parameters['pyxit_target_height'] = options.pyxit_target_height pyxit_parameters['pyxit_n_subwindows'] = options.pyxit_n_subwindows pyxit_parameters['pyxit_min_size'] = options.pyxit_min_size pyxit_parameters['pyxit_max_size'] = options.pyxit_max_size pyxit_parameters['pyxit_colorspace'] = options.pyxit_colorspace pyxit_parameters['pyxit_interpolation'] = options.pyxit_interpolation pyxit_parameters['pyxit_transpose'] = str2bool(options.pyxit_transpose) pyxit_parameters['pyxit_fixed_size'] = str2bool(options.pyxit_fixed_size) pyxit_parameters['pyxit_n_jobs'] = options.pyxit_nb_jobs if options.verbose: print(parameters) # Create Cytomine connection conn = cytomine.Cytomine(parameters["cytomine_host"], parameters["cytomine_public_key"], parameters["cytomine_private_key"], base_path=parameters['cytomine_base_path'], working_path=parameters['cytomine_working_path'], verbose=str2bool(options.verbose)) # Dump annotations if parameters['cytomine_dump_annotations']: # Get annotation descriptions (JSON) from project(s) annotations = None for prj in parameters['cytomine_annotation_projects']: if parameters["cytomine_reviewed"]: annotations_prj = conn.get_annotations( id_project=prj, reviewed_only=parameters["cytomine_reviewed"]) else: annotations_prj = conn.get_annotations(id_project=prj) if not annotations: annotations = annotations_prj else: annotations.data().extend(annotations_prj.data()) if prj == 21907448 or prj == 155194683: annotations_prj = conn.get_annotations(id_project=prj, id_term=91376951) annotations.data().extend(annotations_prj.data()) print("Nb annotations so far... = %d" % len(annotations.data())) print("Total annotations projects %s = %d" % (parameters['cytomine_annotation_projects'], len(annotations.data()))) # Set output dir parameters if not os.path.exists(parameters['dir_ls']): print("Creating annotation directory: %s" % parameters['dir_ls']) os.makedirs(parameters['dir_ls']) # Dump annotation images locally print("Dump training annotation images in %s...", parameters['dir_ls']) conn.dump_annotations( annotations=annotations, get_image_url_func=Annotation.get_annotation_alpha_crop_url, dest_path=parameters['dir_ls'], desired_zoom=parameters['cytomine_zoom_level'], excluded_terms=parameters['cytomine_excluded_terms']) # Put positive terms under the same term and same for negative terms term_directories = os.listdir(parameters['dir_ls']) pos_path = os.path.join(parameters['dir_ls'], "1") if not os.path.exists(pos_path): print("Creating positive annotation directory: %s" % pos_path) os.makedirs(pos_path) neg_path = os.path.join(parameters['dir_ls'], "0") if not os.path.exists(neg_path): print("Creating negative annotation directory: %s" % neg_path) os.makedirs(neg_path) for dir in term_directories: dir_abs = os.path.join(parameters['dir_ls'], dir) # Move files if int(dir) in parameters['cytomine_predict_terms']: for image_file in os.listdir(dir_abs): os.rename(os.path.join(dir_abs, image_file), os.path.join(pos_path, image_file)) else: for image_file in os.listdir(dir_abs): os.rename(os.path.join(dir_abs, image_file), os.path.join(neg_path, image_file)) # Remove empty directory if int(dir) != 0 and int(dir) != 1: os.rmdir(dir_abs) if parameters['cytomine_dump_annotation_stats']: pos_path = os.path.join(parameters['dir_ls'], "1") neg_path = os.path.join(parameters['dir_ls'], "0") stats_dumped_annotations(pos_path, neg_path) if parameters['build_model']: print("Build_model...") # Model name model_name = "all_in_batchsize{}_epochs{}"\ .format(parameters['keras_batch_size'], parameters['keras_n_epochs']).replace(".", "") print("Model_name :", model_name) pyxit = PyxitClassifier( None, n_subwindows=pyxit_parameters['pyxit_n_subwindows'], min_size=pyxit_parameters['pyxit_min_size'], max_size=pyxit_parameters['pyxit_max_size'], target_width=pyxit_parameters['pyxit_target_width'], target_height=pyxit_parameters['pyxit_target_height'], n_jobs=pyxit_parameters['pyxit_n_jobs'], interpolation=pyxit_parameters['pyxit_interpolation'], transpose=pyxit_parameters['pyxit_transpose'], colorspace=pyxit_parameters['pyxit_colorspace'], fixed_size=pyxit_parameters['pyxit_fixed_size'], random_state=None, verbose=1, get_output=_get_output_from_mask, parallel_leaf_transform=False) # Build filenames and classes X, y = build_from_dir(parameters['dir_ls']) classes = np.unique(y) n_classes = len(classes) y_original = y y = np.searchsorted(classes, y) n_images = len(y) print("Number of images : ", n_images) images, masks, labels = image_mask_builder( X, y, parameters['pyxit_colorspace']) # ImageDataGenerator : two instances with the same arguments data_gen_args = dict(rotation_range=180., width_shift_range=0.1, height_shift_range=0.1, zoom_range=0.2, rescale=1 / 255, horizontal_flip=True, vertical_flip=True) # featurewise_center = True, # featurewise_std_normalization = True) image_datagen = ImageDataGenerator(**data_gen_args) mask_datagen = ImageDataGenerator(**data_gen_args) # Provide the same seed and keyword arguments to the fit and flow methods seed = 1 # image_datagen.fit(images, augment = True, seed = seed) # mask_datagen.fit(masks, augment = True, seed = seed) print(type(images)) print(type(masks)) print(type(labels)) print(images[0:10]) print(masks[0:10]) print(labels[0:10]) image_generator = image_datagen.flow(images, labels, seed=seed, shuffle=False) mask_generator = mask_datagen.flow(masks, labels, seed=seed, shuffle=False) # combine generators into one which yields image and masks train_generator = zip(image_generator, mask_generator) # Creating and compiling model if not os.path.exists(parameters['keras_save_to']): os.makedirs(parameters['keras_save_to']) model_weights_filename = os.path.join(parameters['keras_save_to'], "weights_" + model_name + ".h5") print('Fitting model...') model = get_unet() model_checkpoint = ModelCheckpoint(model_weights_filename, monitor='val_loss', save_best_only=True) # Train FCN model.fit_generator(train_generator, steps_per_epoch=100, epochs=30, callbacks=[model_checkpoint], verbose=1)