def main(argv):
    # Define command line options
    p = optparse.OptionParser(description='Pyxit/Cytomine Segmentation Model Builder',
                              prog='PyXit Segmentation Model Builder (PYthon piXiT)')

    p.add_option("--cytomine_host", type="string", default = '', dest="cytomine_host", help="The Cytomine host (eg: beta.cytomine.be, localhost:8080)")
    p.add_option('--cytomine_public_key', type="string", default = '', dest="cytomine_public_key", help="Cytomine public key")
    p.add_option('--cytomine_private_key',type="string", default = '', dest="cytomine_private_key", help="Cytomine private key")
    p.add_option('--cytomine_base_path', type="string", default = '/api/', dest="cytomine_base_path", help="Cytomine base path")
    p.add_option('--cytomine_id_software', type="int", dest="cytomine_id_software", help="The Cytomine software identifier")	
    p.add_option('--cytomine_working_path', default="/tmp/", type="string", dest="cytomine_working_path", help="The working directory (eg: /tmp)")    
    p.add_option('--cytomine_id_project', type="int", dest="cytomine_id_project", help="The Cytomine project identifier")	
    p.add_option('-z', '--cytomine_zoom_level', type='int', dest='cytomine_zoom_level', help="working zoom level")
    p.add_option('--cytomine_annotation_projects', type="string", dest="cytomine_annotation_projects", help="Projects from which annotations are extracted")	
    p.add_option('--cytomine_predict_terms', type='string', default='0', dest='cytomine_predict_terms', help="term ids of predicted terms (=positive class in binary mode)")
    p.add_option('--cytomine_excluded_terms', type='string', default='0', dest='cytomine_excluded_terms', help="term ids of excluded terms")
    p.add_option('--cytomine_reviewed', type='string', default="False", dest="cytomine_reviewed", help="Get reviewed annotations only")
    p.add_option('--pyxit_target_width', type='int', dest='pyxit_target_width', help="pyxit subwindows width")
    p.add_option('--pyxit_target_height', type='int', dest='pyxit_target_height', help="pyxit subwindows height")
    p.add_option('--pyxit_save_to', type='string', dest='pyxit_save_to', help="pyxit model directory") #future: get it from server db
    p.add_option('--pyxit_colorspace', type='int', dest='pyxit_colorspace', help="pyxit colorspace encoding") #future: get it from server db
    p.add_option('--pyxit_n_jobs', type='int', dest='pyxit_n_jobs', help="pyxit number of jobs for trees") #future: get it from server db
    p.add_option('--pyxit_n_subwindows', default=10, type="int", dest="pyxit_n_subwindows", help="number of subwindows")
    p.add_option('--pyxit_interpolation', default=2, type="int", dest="pyxit_interpolation", help="interpolation method 1,2,3,4")
    p.add_option('--pyxit_transpose', type="string", default="False", dest="pyxit_transpose", help="transpose subwindows")
    p.add_option('--pyxit_fixed_size', type="string", default="False", dest="pyxit_fixed_size", help="extract fixed size subwindows")
    p.add_option('--forest_n_estimators', default=10, type="int", dest="forest_n_estimators", help="number of base estimators (T)")
    p.add_option('--forest_max_features' , default=1, type="int", dest="forest_max_features", help="max features at test node (k)")
    p.add_option('--forest_min_samples_split', default=1, type="int", dest="forest_min_samples_split", help="minimum node sample size (nmin)")
    p.add_option('--verbose', type="string", default="0", dest="verbose", help="Turn on (1) or off (0) verbose mode")

    options, arguments = p.parse_args( args = argv)

    parameters['cytomine_host'] = options.cytomine_host	
    parameters['cytomine_public_key'] = options.cytomine_public_key
    parameters['cytomine_private_key'] = options.cytomine_private_key
    parameters['cytomine_base_path'] = options.cytomine_base_path
    parameters['cytomine_working_path'] = options.cytomine_working_path	
    parameters['cytomine_base_path'] = options.cytomine_base_path
    parameters['cytomine_id_project'] = options.cytomine_id_project
    parameters['cytomine_id_software'] = options.cytomine_id_software
    parameters['cytomine_annotation_projects'] = map(int,options.cytomine_annotation_projects.split(','))
    parameters['cytomine_predict_terms'] = map(int,options.cytomine_predict_terms.split(','))
    parameters['cytomine_excluded_terms'] = map(int,options.cytomine_excluded_terms.split(','))
    parameters['cytomine_zoom_level'] = options.cytomine_zoom_level
    parameters['cytomine_reviewed'] = str2bool(options.cytomine_reviewed)

   
    pyxit_parameters['pyxit_target_width'] = options.pyxit_target_width
    pyxit_parameters['pyxit_target_height'] = options.pyxit_target_height
    pyxit_parameters['pyxit_n_subwindows'] = options.pyxit_n_subwindows
    pyxit_parameters['pyxit_colorspace'] = options.pyxit_colorspace
    pyxit_parameters['pyxit_interpolation'] = options.pyxit_interpolation
    pyxit_parameters['pyxit_transpose'] = str2bool(options.pyxit_transpose)
    pyxit_parameters['pyxit_fixed_size'] = str2bool(options.pyxit_fixed_size)
    pyxit_parameters['forest_n_estimators'] = options.forest_n_estimators
    pyxit_parameters['forest_max_features'] = options.forest_max_features
    pyxit_parameters['forest_min_samples_split'] = options.forest_min_samples_split
    pyxit_parameters['pyxit_save_to'] = options.pyxit_save_to
    pyxit_parameters['pyxit_n_jobs'] = options.pyxit_n_jobs


    
    # Check for errors in the options
    if options.verbose:
      print "[pyxit.main] Options = ", options
    
    # Create JOB/USER/JOB
    conn = cytomine.Cytomine(parameters["cytomine_host"], 
                             parameters["cytomine_public_key"], 
                             parameters["cytomine_private_key"] , 
                             base_path = parameters['cytomine_base_path'], 
                             working_path = parameters['cytomine_working_path'], 
                             verbose= str2bool(options.verbose))

    #Create a new userjob if connected as human user
    current_user = conn.get_current_user()
    if current_user.algo==False:
        print "adduserJob..."
        user_job = conn.add_user_job(parameters['cytomine_id_software'], parameters['cytomine_id_project'])
        print "set_credentials..."
        conn.set_credentials(str(user_job.publicKey), str(user_job.privateKey))
        print "done"
    else:
        user_job = current_user
        print "Already running as userjob"
    job = conn.get_job(user_job.job)


    pyxit_parameters['dir_ls'] = os.path.join(parameters["cytomine_working_path"], str(parameters['cytomine_annotation_projects']).replace(',','-').replace('[','').replace(']','').replace(' ',''), "zoom_level", str(parameters['cytomine_zoom_level']))
    if not os.path.exists(pyxit_parameters['dir_ls']):
        print "Creating annotation directory: %s" %pyxit_parameters['dir_ls']
        os.makedirs(pyxit_parameters['dir_ls'])
    time.sleep(2)
    job = conn.update_job_status(job, status_comment = "Publish software parameters values")
    all_params=pyxit_parameters
    all_params.update(parameters)
    job_parameters_values = conn.add_job_parameters(user_job.job, conn.get_software(parameters['cytomine_id_software']), all_params)

    #Get annotation data
    job = conn.update_job_status(job, status = job.RUNNING, status_comment = "Fetching data", progress = 0)    
    #Retrieve annotations from each annotation projects, either reviewed or unreviewed annotations
    annotations = None
    for prj in parameters['cytomine_annotation_projects']:
        if parameters["cytomine_reviewed"]:
            print "Retrieving reviewed annotations..."
            annotations_prj = conn.get_annotations(id_project = prj, reviewed_only=True)    
            print "Reviewed annotations: %d" %len(annotations_prj.data())
        else :
            print "Retrieving (unreviewed) annotations..."
            annotations_prj = conn.get_annotations(id_project = prj)
            print "(Unreviewed) annotations: %d" %len(annotations_prj.data())
        if not annotations :
            annotations = annotations_prj
        else : 
            annotations.data().extend(annotations_prj.data())
        print "Nb annotations so far... = %d" %len(annotations.data())
        time.sleep(3)
    print "Total annotations projects %s = %d" %(parameters['cytomine_annotation_projects'],len(annotations.data()))
    time.sleep(3)
    print "Predict terms / excluded terms"
    print parameters['cytomine_predict_terms']
    print parameters['cytomine_excluded_terms']
    time.sleep(3)
    annotations = conn.dump_annotations(annotations = annotations, get_image_url_func = Annotation.get_annotation_alpha_crop_url, dest_path = pyxit_parameters['dir_ls'], excluded_terms = parameters['cytomine_excluded_terms'], desired_zoom = parameters['cytomine_zoom_level'])



    #Build matrix (subwindows described by pixel values and output) for training
    project = conn.get_project(parameters['cytomine_id_project'])
    terms = conn.get_terms(project.ontology)
    map_classes = {} # build X, Y. Change initial problem into binary problem : "predict_terms" vs others
    for term in terms.data():
		if term.id in parameters['cytomine_predict_terms']:
			map_classes[term.id] = 1
		else :
			map_classes[term.id] = 0
    print pyxit_parameters
    
    #Prepare image matrix
    X, y = build_from_dir(pyxit_parameters['dir_ls'], map_classes)
    print "X length: %d " %len(X)
    print "Y length: %d " %len(y)
    time.sleep(5)
    #classes = np.unique(y)
    classes = [0,1]
    n_classes = len(classes)
    y_original = y
    y = np.searchsorted(classes, y)		


    # Instantiate classifiers
    job = conn.update_job_status(job, status = job.RUNNING, status_comment = "[pyxit.main] Initializing PyxitClassifier...", progress = 25)                    
    forest = ExtraTreesClassifier(n_estimators=pyxit_parameters['forest_n_estimators'],
                                  max_features=pyxit_parameters['forest_max_features'],
                                  min_samples_split=pyxit_parameters['forest_min_samples_split'],
                                  n_jobs=pyxit_parameters['pyxit_n_jobs'],
                                  verbose=True)

    pyxit = PyxitClassifier(base_estimator=forest,
                            n_subwindows=pyxit_parameters['pyxit_n_subwindows'],
                            min_size=0.0,#segmentation use fixed-size subwindows
                            max_size=1.0,#segmentation use fixed-size subwindows
                            target_width=pyxit_parameters['pyxit_target_width'],
                            target_height=pyxit_parameters['pyxit_target_height'],
                            interpolation=pyxit_parameters['pyxit_interpolation'],
                            transpose=pyxit_parameters['pyxit_transpose'],
                            colorspace=pyxit_parameters['pyxit_colorspace'],
                            fixed_size=pyxit_parameters['pyxit_fixed_size'],
                            n_jobs=pyxit_parameters['pyxit_n_jobs'],
                            verbose=True, 
                            get_output = _get_output_from_mask)
	
    
    if pyxit_parameters['pyxit_save_to']:
        d = os.path.dirname(pyxit_parameters['pyxit_save_to'])
        if not os.path.exists(d):
            os.makedirs(d)
        fd = open(pyxit_parameters['pyxit_save_to'], "wb")
        pickle.dump(classes, fd, protocol=pickle.HIGHEST_PROTOCOL)


    job = conn.update_job_status(job, status_comment = "[pyxit.main] Extracting %d subwindows from each image in %s" %(pyxit_parameters['pyxit_n_subwindows'],pyxit_parameters['dir_ls']), progress = 50)                        
    time.sleep(3)
    #Extract random subwindows in dumped annotations
    _X, _y = pyxit.extract_subwindows(X, y)    


    #Build pixel classifier
    job = conn.update_job_status(job, status_comment = "[pyxit.main] Fitting Pyxit Segmentation Model on %s", progress = 75)
    print "TIME : %s" %strftime("%Y-%m-%d %H:%M:%S", localtime())
    start = time.time()
    pyxit.fit(X, y, _X=_X, _y=_y)
    end = time.time()
    print "Elapsed time FIT: %d s" %(end-start)
    print "TIME : %s" %strftime("%Y-%m-%d %H:%M:%S", localtime())
    
    print "pyxit.base_estimator.n_classes_"
    print pyxit.base_estimator.n_classes_
    print "pyxit.base_estimator.classes_"
    print pyxit.base_estimator.classes_

    if options.verbose:
        print "----------------------------------------------------------------"
        print "[pyxit.main] Saving Pyxit Segmentation Model locally into %s" % pyxit_parameters['pyxit_save_to']
        print "----------------------------------------------------------------"

    #Save model on local disk
    if pyxit_parameters['pyxit_save_to']:
        pickle.dump(pyxit, fd, protocol=pickle.HIGHEST_PROTOCOL)

    if pyxit_parameters['pyxit_save_to']:
        fd.close()
	
    print "Not Publishing model in db.."
    #job_data = conn.add_job_data(job, "model", pyxit_parameters['pyxit_save_to'])
    
    job = conn.update_job_status(job, status = job.TERMINATED, status_comment = "Finish", progress = 100)    
    print "END."
예제 #2
0
def run(cyto_job, parameters):
    logging.info("----- segmentation_model_builder v%s -----", __version__)
    logging.info("Entering run(cyto_job=%s, parameters=%s)", cyto_job,
                 parameters)

    job = cyto_job.job

    projects = map(int, parameters.cytomine_annotation_projects.split(','))
    predict_terms = map(int, parameters.cytomine_predict_terms.split(','))
    excluded_terms = parameters.cytomine_excluded_terms
    excluded_terms = map(int, excluded_terms.split(
        ',')) if excluded_terms and excluded_terms != "null" else []

    working_path = os.path.join("tmp", str(job.id))
    if not os.path.exists(working_path):
        logging.info("Creating annotation directory: %s", working_path)
        os.makedirs(working_path)

    try:
        # Get annotation data
        job.update(statusComment="Fetching data")

        # Retrieve annotations from each annotation project
        annotations = []
        for prj in projects:
            logging.info("Retrieving annotations in project %d", prj)
            annotations_prj = AnnotationCollection(
                project=prj,
                showTerm=True,
                reviewed=parameters.cytomine_reviewed).fetch()
            logging.info("# annotations in project %d: %d", prj,
                         len(annotations_prj))

            annotations += annotations_prj

        nb_annots = len(annotations)
        logging.info("# total annotations: %d", nb_annots)

        terms = TermCollection().fetch_with_filter("project",
                                                   cyto_job.project.id)
        # Change initial problem into binary problem : "predict_terms" vs others
        map_classes = {
            term.id: int(term.id in predict_terms)
            for term in terms
        }
        for term in excluded_terms:
            map_classes[term] = -1  # excluded class

        classes = [0, 1]
        dest_patterns = {
            cls: os.path.join(working_path, str(cls), "{image}_{id}.png")
            for cls in classes
        }

        x = []
        y = []
        for (i, annot) in enumerate(annotations):
            job.update(progress=int(40 * i / nb_annots),
                       statusComment="Treating annotation {}/{}".format(
                           i, nb_annots))

            class_annot = 0
            terms = annot.term if annot.term is not None else []
            for term in terms:
                class_annot = map_classes[term]
                if class_annot != 0:
                    break

            if class_annot == -1:  # excluded => do not dump and do not add to dataset
                continue

            annot.dump(dest_patterns[class_annot],
                       mask=True,
                       alpha=True,
                       zoom=parameters.cytomine_zoom_level)
            x.append(annot.filename)
            y.append(class_annot)

        x = np.array(x)
        y = np.array(y)
        logging.debug("X length: %d", len(x))
        logging.debug("y length: %d", len(y))

        # Instantiate classifiers
        job.update(
            statusComment="[pyxit.main] Initializing PyxitClassifier...",
            progress=40)
        forest = ExtraTreesClassifier(
            n_estimators=parameters.forest_n_estimators,
            max_features=parameters.forest_max_features,
            min_samples_split=parameters.forest_min_samples_split,
            n_jobs=parameters.pyxit_n_jobs,
            verbose=True)

        pyxit = PyxitClassifier(
            base_estimator=forest,
            n_subwindows=parameters.pyxit_n_subwindows,
            min_size=0.0,  # segmentation use fixed-size subwindows
            max_size=1.0,  # segmentation use fixed-size subwindows
            target_width=parameters.pyxit_target_width,
            target_height=parameters.pyxit_target_height,
            interpolation=parameters.pyxit_interpolation,
            transpose=parameters.pyxit_transpose,
            colorspace=parameters.pyxit_colorspace,
            fixed_size=parameters.pyxit_fixed_size,
            n_jobs=parameters.pyxit_n_jobs,
            verbose=True,
            get_output=_get_output_from_mask)

        if parameters.pyxit_save_to:
            d = os.path.dirname(parameters.pyxit_save_to)
            if not os.path.exists(d):
                os.makedirs(d)
            fd = open(parameters.pyxit_save_to, "wb")
            pickle.dump(classes, fd, protocol=pickle.HIGHEST_PROTOCOL)

        job.update(
            statusComment=
            "[pyxit.main] Extracting {} subwindows from each image in {}".
            format(parameters.pyxit_n_subwindows, working_path),
            progress=50)
        # Extract random subwindows in dumped annotations
        _X, _y = pyxit.extract_subwindows(x, y)

        # Build pixel classifier
        job.update(
            statusComment="[pyxit.main] Fitting Pyxit Segmentation Model",
            progress=75)
        logging.info("Start fitting Pyxit segmentation model")
        start = time.time()
        pyxit.fit(x, y, _X=_X, _y=_y)
        end = time.time()
        logging.debug("Elapsed time FIT: %d s", end - start)

        logging.debug("pyxit.base_estimator.n_classes_: %s",
                      pyxit.base_estimator.n_classes_)
        logging.debug("pyxit.base_estimator.classes_: %s",
                      pyxit.base_estimator.classes_)

        if parameters.pyxit_save_to:
            logging.debug(
                "----------------------------------------------------------------"
            )
            logging.debug(
                "[pyxit.main] Saving Pyxit Segmentation Model locally into %s",
                parameters.pyxit_save_to)
            logging.debug(
                "----------------------------------------------------------------"
            )

            pickle.dump(pyxit, fd, protocol=pickle.HIGHEST_PROTOCOL)
            fd.close()

            # job_data = JobData(job.id, "Model", "model.pkl").save()
            # job_data.upload(parameters.pyxit_save_to)

    finally:
        logging.info("Deleting folder %s", working_path)
        shutil.rmtree(working_path, ignore_errors=True)

        logging.debug("Leaving run()")
예제 #3
0
def main(argv):
    current_path = os.getcwd() + '/' + os.path.dirname(__file__)

    # Define command line options
    p = optparse.OptionParser(description='Cytomine Segmentation prediction',
                              prog='Cytomine segmentation prediction',
                              version='0.1')

    p.add_option(
        '--cytomine_host',
        type="string",
        default='beta.cytomine.be',
        dest="cytomine_host",
        help="The Cytomine host (eg: beta.cytomine.be, localhost:8080)")
    p.add_option('--cytomine_public_key',
                 type="string",
                 default='',
                 dest="cytomine_public_key",
                 help="Cytomine public key")
    p.add_option('--cytomine_private_key',
                 type="string",
                 default='',
                 dest="cytomine_private_key",
                 help="Cytomine private key")
    p.add_option('--cytomine_base_path',
                 type="string",
                 default='/api/',
                 dest="cytomine_base_path",
                 help="Cytomine base path")
    p.add_option('--cytomine_id_software',
                 type="int",
                 dest="cytomine_id_software",
                 help="The Cytomine software identifier")
    p.add_option('--cytomine_working_path',
                 default="/tmp/",
                 type="string",
                 dest="cytomine_working_path",
                 help="The working directory (eg: /tmp)")
    p.add_option('--cytomine_id_project',
                 type="int",
                 dest="cytomine_id_project",
                 help="The Cytomine project identifier")

    p.add_option('-i',
                 '--cytomine_id_image',
                 type='int',
                 dest='cytomine_id_image',
                 help="image id from cytomine",
                 metavar='IMAGE')
    p.add_option('-z',
                 '--cytomine_zoom_level',
                 type='int',
                 dest='cytomine_zoom_level',
                 help="working zoom level")
    p.add_option('-j',
                 '--nb_jobs',
                 type='int',
                 dest='nb_jobs',
                 help="number of parallel jobs")
    p.add_option(
        '--cytomine_predict_terms',
        type='str',
        dest='cytomine_predict_terms',
        help=
        "term id of all positive terms. The first term is the output predicted annotation term"
    )
    p.add_option('--cytomine_excluded_terms',
                 type='string',
                 dest='cytomine_excluded_terms',
                 help="term id of excluded terms)")

    p.add_option('--pyxit_target_width',
                 type='int',
                 dest='pyxit_target_width',
                 help="pyxit subwindows width")
    p.add_option('--pyxit_target_height',
                 type='int',
                 dest='pyxit_target_height',
                 help="pyxit subwindows height")
    p.add_option('--pyxit_colorspace',
                 type='int',
                 dest='pyxit_colorspace',
                 help="pyxit colorspace encoding")
    p.add_option('--pyxit_nb_jobs',
                 type='int',
                 dest='pyxit_nb_jobs',
                 help="pyxit number of jobs for trees")
    p.add_option('--pyxit_fixed_size',
                 type='string',
                 default="0",
                 dest="pyxit_fixed_size",
                 help="extract fixed size subwindows")
    p.add_option('--pyxit_transpose',
                 type='string',
                 default="0",
                 dest="pyxit_transpose",
                 help="transpose subwindows")
    p.add_option('--pyxit_n_subwindows',
                 type='int',
                 default="10",
                 dest="pyxit_n_subwindows",
                 help="number of subwindows")
    p.add_option('--pyxit_interpolation',
                 default=2,
                 type="int",
                 dest="pyxit_interpolation",
                 help="interpolation method 1,2,3,4")
    p.add_option('--pyxit_min_size',
                 default=0.5,
                 type="float",
                 dest="pyxit_min_size",
                 help="min size")
    p.add_option('--pyxit_max_size',
                 default=1.0,
                 type="float",
                 dest="pyxit_max_size",
                 help="max size")
    p.add_option('--cytomine_reviewed',
                 type='string',
                 default="False",
                 dest="cytomine_reviewed",
                 help="Get reviewed annotations only")

    p.add_option('--cytomine_dump_annotations',
                 type='string',
                 default="0",
                 dest="cytomine_dump_annotations",
                 help="Dump training annotations or not")
    p.add_option('--cytomine_dump_annotation_stats',
                 type='string',
                 default="0",
                 dest="cytomine_dump_annotation_stats",
                 help="Calculate stats on dumped annotations or not")
    p.add_option('--build_model',
                 type="string",
                 default="0",
                 dest="build_model",
                 help="Turn on (1) or off (0) model building")
    p.add_option('--cytomine_annotation_projects',
                 type="string",
                 dest="cytomine_annotation_projects",
                 help="Projects from which annotations are extracted")
    p.add_option('--verbose',
                 type="string",
                 default="0",
                 dest="verbose",
                 help="Turn on (1) or off (0) verbose mode")

    p.add_option('--keras_save_to',
                 type='string',
                 default="",
                 dest='keras_save_to',
                 help="keras model weight file")
    p.add_option('--keras_batch_size',
                 type="int",
                 dest="keras_batch_size",
                 help="Training batch size")
    p.add_option('--keras_n_epochs',
                 type="int",
                 dest="keras_n_epochs",
                 help="Number of epochs")
    p.add_option('--keras_shuffle',
                 type="string",
                 dest="keras_shuffle",
                 help="Turn on (1) or off (0) batch shuffle")
    p.add_option('--keras_validation_split',
                 type="float",
                 dest="keras_validation_split",
                 help="Batch validation split")
    options, arguments = p.parse_args(args=argv)

    parameters = {}
    parameters['keras_save_to'] = options.keras_save_to
    parameters['keras_batch_size'] = options.keras_batch_size
    parameters['keras_n_epochs'] = options.keras_n_epochs
    parameters['keras_shuffle'] = options.keras_shuffle
    parameters['keras_validation_split'] = options.keras_validation_split
    parameters['cytomine_host'] = options.cytomine_host
    parameters['cytomine_public_key'] = options.cytomine_public_key
    parameters['cytomine_private_key'] = options.cytomine_private_key
    parameters['cytomine_base_path'] = options.cytomine_base_path
    parameters['cytomine_working_path'] = options.cytomine_working_path
    parameters['cytomine_base_path'] = options.cytomine_base_path
    parameters['cytomine_id_project'] = options.cytomine_id_project
    parameters['cytomine_id_software'] = options.cytomine_id_software
    parameters['cytomine_predict_terms'] = map(
        int, options.cytomine_predict_terms.split(','))
    parameters['cytomine_predicted_annotation_term'] = parameters[
        'cytomine_predict_terms'][0]
    parameters['cytomine_excluded_terms'] = map(
        int, options.cytomine_excluded_terms.split(','))

    parameters['pyxit_colorspace'] = options.pyxit_colorspace
    parameters['pyxit_nb_jobs'] = options.pyxit_nb_jobs
    parameters['pyxit_n_jobs'] = options.pyxit_nb_jobs
    parameters['cytomine_nb_jobs'] = options.pyxit_nb_jobs
    parameters['cytomine_id_image'] = options.cytomine_id_image
    parameters['cytomine_zoom_level'] = options.cytomine_zoom_level
    parameters['nb_jobs'] = options.nb_jobs
    parameters['pyxit_target_width'] = options.pyxit_target_width
    parameters['pyxit_target_height'] = options.pyxit_target_height
    parameters['pyxit_n_subwindows'] = options.pyxit_n_subwindows
    parameters['pyxit_interpolation'] = options.pyxit_interpolation
    parameters['pyxit_transpose'] = str2bool(options.pyxit_transpose)
    parameters['pyxit_min_size'] = options.pyxit_min_size
    parameters['pyxit_max_size'] = options.pyxit_max_size
    parameters['pyxit_fixed_size'] = str2bool(options.pyxit_fixed_size)
    parameters['cytomine_annotation_projects'] = map(
        int, options.cytomine_annotation_projects.split(','))
    parameters['cytomine_reviewed'] = str2bool(options.cytomine_reviewed)
    parameters['cytomine_dump_annotation_stats'] = str2bool(
        options.cytomine_dump_annotation_stats)
    parameters['cytomine_dump_annotations'] = str2bool(
        options.cytomine_dump_annotations)
    parameters['build_model'] = str2bool(options.build_model)
    parameters['dir_ls'] = os.path.join(
        parameters["cytomine_working_path"],
        str(parameters['cytomine_annotation_projects']).replace(
            ',', '-').replace('[', '').replace(']', '').replace(' ', ''),
        "zoom_level", str(parameters['cytomine_zoom_level']))

    pyxit_parameters = {}
    pyxit_parameters['pyxit_target_width'] = options.pyxit_target_width
    pyxit_parameters['pyxit_target_height'] = options.pyxit_target_height
    pyxit_parameters['pyxit_n_subwindows'] = options.pyxit_n_subwindows
    pyxit_parameters['pyxit_min_size'] = options.pyxit_min_size
    pyxit_parameters['pyxit_max_size'] = options.pyxit_max_size
    pyxit_parameters['pyxit_colorspace'] = options.pyxit_colorspace
    pyxit_parameters['pyxit_interpolation'] = options.pyxit_interpolation
    pyxit_parameters['pyxit_transpose'] = str2bool(options.pyxit_transpose)
    pyxit_parameters['pyxit_fixed_size'] = str2bool(options.pyxit_fixed_size)
    pyxit_parameters['pyxit_n_jobs'] = options.pyxit_nb_jobs

    if options.verbose:
        print(parameters)

    # Create Cytomine connection
    conn = cytomine.Cytomine(parameters["cytomine_host"],
                             parameters["cytomine_public_key"],
                             parameters["cytomine_private_key"],
                             base_path=parameters['cytomine_base_path'],
                             working_path=parameters['cytomine_working_path'],
                             verbose=str2bool(options.verbose))

    # Dump annotations
    if parameters['cytomine_dump_annotations']:
        # Get annotation descriptions (JSON) from project(s)
        annotations = None
        for prj in parameters['cytomine_annotation_projects']:
            if parameters["cytomine_reviewed"]:
                annotations_prj = conn.get_annotations(
                    id_project=prj,
                    reviewed_only=parameters["cytomine_reviewed"])
            else:
                annotations_prj = conn.get_annotations(id_project=prj)
            if not annotations:
                annotations = annotations_prj
            else:
                annotations.data().extend(annotations_prj.data())

            if prj == 21907448 or prj == 155194683:
                annotations_prj = conn.get_annotations(id_project=prj,
                                                       id_term=91376951)
                annotations.data().extend(annotations_prj.data())
            print("Nb annotations so far... = %d" % len(annotations.data()))
        print("Total annotations projects %s = %d" %
              (parameters['cytomine_annotation_projects'],
               len(annotations.data())))

        # Set output dir parameters
        if not os.path.exists(parameters['dir_ls']):
            print("Creating annotation directory: %s" % parameters['dir_ls'])
            os.makedirs(parameters['dir_ls'])

        # Dump annotation images locally
        print("Dump training annotation images in %s...", parameters['dir_ls'])
        conn.dump_annotations(
            annotations=annotations,
            get_image_url_func=Annotation.get_annotation_alpha_crop_url,
            dest_path=parameters['dir_ls'],
            desired_zoom=parameters['cytomine_zoom_level'],
            excluded_terms=parameters['cytomine_excluded_terms'])

        # Put positive terms under the same term and same for negative terms
        term_directories = os.listdir(parameters['dir_ls'])
        pos_image_path = os.path.join(parameters['dir_ls'], "image", "1")
        pos_mask_path = os.path.join(parameters['dir_ls'], "mask", "1")
        neg_image_path = os.path.join(parameters['dir_ls'], "image", "0")
        neg_mask_path = os.path.join(parameters['dir_ls'], "mask", "0")
        if not os.path.exists(pos_image_path):
            print("Creating positive annotation directory: %s" %
                  pos_image_path)
            os.makedirs(pos_image_path)
        if not os.path.exists(neg_image_path):
            print("Creating negative annotation directory: %s" %
                  neg_image_path)
            os.makedirs(neg_image_path)
        if not os.path.exists(pos_mask_path):
            print("Creating positive annotation directory: %s" % pos_mask_path)
            os.makedirs(pos_mask_path)
        if not os.path.exists(neg_mask_path):
            print("Creating negative annotation directory: %s" % neg_mask_path)
            os.makedirs(neg_mask_path)

        for dir in term_directories:
            dir_abs = os.path.join(parameters['dir_ls'], dir)

            # Move files
            print("Term directory: %s" % dir_abs)
            if int(dir) in parameters['cytomine_predict_terms']:
                print("Positive term")
                for image_file in os.listdir(dir_abs):
                    print(image_file)
                    try:
                        im = Image.open(os.path.join(dir_abs, image_file))
                    except IOError:
                        "warning filename %s is not an image" % os.path.join(
                            dir_abs, image_file)
                        continue
                    rgb = im.tobytes("raw", "RGB")
                    a = im.tobytes("raw", "A")
                    im.close()
                    image = Image.frombytes("RGB", im.size, rgb)
                    mask = Image.frombytes("L", im.size, a)
                    image.save(os.path.join(pos_image_path, image_file), "PNG")
                    mask.save(os.path.join(pos_mask_path, image_file), "PNG")

            else:
                print("Negative term")
                for image_file in os.listdir(dir_abs):
                    print(image_file)
                    try:
                        im = Image.open(os.path.join(dir_abs, image_file))
                    except IOError:
                        "warning filename %s is not an image" % os.path.join(
                            dir_abs, image_file)
                        continue
                    rgb = im.tobytes("raw", "RGB")
                    a = im.tobytes("raw", "A")
                    im.close()
                    image = Image.frombytes("RGB", im.size, rgb)
                    mask = Image.frombytes("L", im.size, a)
                    image.save(os.path.join(neg_image_path, image_file), "PNG")
                    mask.save(os.path.join(neg_mask_path, image_file), "PNG")

    if parameters['cytomine_dump_annotation_stats']:
        pos_path = os.path.join(parameters['dir_ls'], "image", "1")
        neg_path = os.path.join(parameters['dir_ls'], "image", "0")
        stats_dumped_annotations(pos_path, neg_path)

    # if parameters['build_model'] :
    # 	# Model name
    # 	model_name = "nsubw{}_winsize{}x{}_minsize{}_maxsize{}_batchsize{}_epochs{}_shuffle{}_valsplit{}_colorspace{}"\
    # 		.format(parameters['pyxit_n_subwindows'],
    # 				parameters['pyxit_target_width'],
    # 				parameters['pyxit_target_height'],
    # 				parameters['pyxit_min_size'],
    # 				parameters['pyxit_max_size'],
    # 				parameters['keras_batch_size'],
    # 				parameters['keras_n_epochs'],
    # 				parameters['keras_shuffle'],
    # 				parameters['keras_validation_split'],
    # 				pyxit_parameters['pyxit_colorspace']).replace(".", "")
    # 	print("Model_name :", model_name)
    #
    # 	pyxit = PyxitClassifier(None,
    # 							n_subwindows = pyxit_parameters['pyxit_n_subwindows'],
    # 							min_size = pyxit_parameters['pyxit_min_size'],
    # 							max_size = pyxit_parameters['pyxit_max_size'],
    # 							target_width = pyxit_parameters['pyxit_target_width'],
    # 							target_height = pyxit_parameters['pyxit_target_height'],
    # 							n_jobs = pyxit_parameters['pyxit_n_jobs'],
    # 							interpolation = pyxit_parameters['pyxit_interpolation'],
    # 							transpose = pyxit_parameters['pyxit_transpose'],
    # 							colorspace = pyxit_parameters['pyxit_colorspace'],
    # 							fixed_size = pyxit_parameters['pyxit_fixed_size'],
    # 							random_state = None,
    # 							verbose = 1,
    # 							get_output = _get_output_from_mask,
    # 							parallel_leaf_transform = False)
    #
    # 	# Build filenames and classes
    # 	X, y = build_from_dir(parameters['dir_ls'])
    #
    # 	classes = np.unique(y)
    # 	n_classes = len(classes)
    # 	y_original = y
    # 	y = np.searchsorted(classes, y)
    # 	n_images = len(y)
    # 	print("Number of images : ", n_images)
    #
    # 	# Extract subwindows
    # 	_X, _y = pyxit.extract_subwindows(X, y)
    # 	n_subw = len(_y)
    # 	print("Number of subwindows : ", n_subw)
    #
    # 	# Reshape data structure
    # 	_X = np.reshape(_X, (
    # 	n_subw, pyxit_parameters['pyxit_target_width'], pyxit_parameters['pyxit_target_height'], n_channels))
    # 	_y = np.reshape(_y, (n_subw, pyxit_parameters['pyxit_target_width'], pyxit_parameters['pyxit_target_height']))
    #
    # 	# Train FCN
    # 	if not os.path.exists(parameters['keras_save_to']) :
    # 		os.makedirs(parameters['keras_save_to'])
    #
    # 	model_weights_file_path = os.path.join(parameters['keras_save_to'], "weights_" + model_name + ".h5")
    #
    # 	mean, std = train(_X, _y,
    # 					  model_weights_file_path,
    # 					  imgs_width = pyxit_parameters['pyxit_target_width'],
    # 					  imgs_height = pyxit_parameters['pyxit_target_height'],
    # 					  batch_size = parameters['keras_batch_size'],
    # 					  epochs = parameters['keras_n_epochs'],
    # 					  shuffle = parameters['keras_shuffle'],
    # 					  validation_split = parameters['keras_validation_split'])
    #
    # 	# Save mean and std used to normalize training data
    # 	mean_std_save_file_path = os.path.join(parameters['keras_save_to'], "meanstd_" + model_name + ".txt")
    # 	mean_std_save_file = open(mean_std_save_file_path, 'w')
    # 	mean_std_save_file.write(str(mean) + '\n')
    # 	mean_std_save_file.write(str(std) + '\n')

    if parameters['build_model']:
        # Model name
        model_name = "nsubw{}_winsize{}x{}_minsize{}_maxsize{}_batchsize{}_epochs{}_shuffle{}_valsplit{}_colorspace{}_zoom{}_until4x4_IDG"\
         .format(parameters['pyxit_n_subwindows'],
           parameters['pyxit_target_width'],
           parameters['pyxit_target_height'],
           parameters['pyxit_min_size'],
           parameters['pyxit_max_size'],
           parameters['keras_batch_size'],
           parameters['keras_n_epochs'],
           parameters['keras_shuffle'],
           parameters['keras_validation_split'],
           pyxit_parameters['pyxit_colorspace'],
           parameters['cytomine_zoom_level']).replace(".", "")
        print("Model_name :", model_name)

        pyxit = PyxitClassifier(
            None,
            n_subwindows=1,
            min_size=1,
            max_size=1,
            target_width=pyxit_parameters['pyxit_target_width'],
            target_height=pyxit_parameters['pyxit_target_height'],
            n_jobs=pyxit_parameters['pyxit_n_jobs'],
            interpolation=pyxit_parameters['pyxit_interpolation'],
            transpose=pyxit_parameters['pyxit_transpose'],
            colorspace=pyxit_parameters['pyxit_colorspace'],
            fixed_size=pyxit_parameters['pyxit_fixed_size'],
            random_state=None,
            verbose=1,
            get_output=_get_output_from_mask,
            parallel_leaf_transform=False)
        # pyxit = PyxitClassifier(None,
        # 						n_subwindows=pyxit_parameters['pyxit_n_subwindows'],
        # 						min_size=pyxit_parameters['pyxit_min_size'],
        # 						max_size=pyxit_parameters['pyxit_max_size'],
        # 						target_width=pyxit_parameters['pyxit_target_width'],
        # 						target_height=pyxit_parameters['pyxit_target_height'],
        # 						n_jobs=pyxit_parameters['pyxit_n_jobs'],
        # 						interpolation=pyxit_parameters['pyxit_interpolation'],
        # 						transpose=pyxit_parameters['pyxit_transpose'],
        # 						colorspace=pyxit_parameters['pyxit_colorspace'],
        # 						fixed_size=pyxit_parameters['pyxit_fixed_size'],
        # 						random_state=None,
        # 						verbose=1,
        # 						get_output = _get_output_from_mask,
        # 						parallel_leaf_transform=False)

        # Build filenames and classes
        X, y = build_from_dir(parameters['dir_ls'])

        classes = np.unique(y)
        n_classes = len(classes)
        y_original = y
        y = np.searchsorted(classes, y)
        n_images = len(y)
        print("Number of images : ", n_images)
        print("Start extraction of subwindows...")

        # Extract subwindows
        _X, _y = pyxit.extract_subwindows(X, y)
        print("Over")
        n_subw = len(_y)
        print("Number of subwindows : ", n_subw)

        # Reshape data structure
        _X = np.reshape(_X,
                        (n_subw, pyxit_parameters['pyxit_target_width'],
                         pyxit_parameters['pyxit_target_height'], n_channels))
        _y = np.reshape(_y, (n_subw, pyxit_parameters['pyxit_target_width'],
                             pyxit_parameters['pyxit_target_height'], 1))
        print(type(_X))
        print(type(_y))

        # ImageDataGenerator :  two instances with the same arguments
        print("Init data gen")
        data_gen_args = dict(rotation_range=180.,
                             width_shift_range=0.1,
                             height_shift_range=0.1,
                             zoom_range=0.2,
                             rescale=1 / 255,
                             horizontal_flip=True,
                             vertical_flip=True)
        # featurewise_center = True,
        #  featurewise_std_normalization = True)

        image_datagen = ImageDataGenerator(**data_gen_args)
        mask_datagen = ImageDataGenerator(**data_gen_args)

        # Provide the same seed and keyword arguments to the fit and flow methods
        seed = 1
        print("Fit image data generator (image)...")
        image_datagen.fit(_X[0:100], augment=True, seed=seed)
        print("Fit image data generator (mask)...")
        mask_datagen.fit(_y[0:100], augment=True, seed=seed)

        print('Flow on images...')
        # image_generator = image_datagen.flow(_X, labels, seed = seed, shuffle = False)
        image_generator = image_datagen.flow_from_directory(
            os.path.join(parameters['dir_ls'], "image"),
            class_mode=None,
            target_size=(128, 128),
            seed=seed)
        print('Flow on masks...')
        # mask_generator = mask_datagen.flow(_y, labels, seed = seed, shuffle = False)
        mask_generator = mask_datagen.flow_from_directory(
            os.path.join(parameters['dir_ls'], "mask"),
            class_mode=None,
            target_size=(128, 128),
            seed=seed)

        # combine generators into one which yields image and masks
        train_generator = combine_generator(image_generator, mask_generator)

        # Creating and compiling model
        if not os.path.exists(parameters['keras_save_to']):
            os.makedirs(parameters['keras_save_to'])

        model_weights_filename = os.path.join(parameters['keras_save_to'],
                                              "weights_" + model_name + ".h5")
        print('Fitting model...')
        model = get_unet(128, 128)
        model_checkpoint = ModelCheckpoint(model_weights_filename,
                                           monitor='val_loss',
                                           save_best_only=True)

        # Train FCN
        model.fit_generator(train_generator,
                            steps_per_epoch=100,
                            epochs=50,
                            callbacks=[model_checkpoint],
                            verbose=1)
예제 #4
0
def main(argv):
    # Define command line options
    p = optparse.OptionParser(description='Pyxit',
                              prog='PyXit (PYthon piXiT)',
                              version='PyXit 0.1')

    p.add_option('--dir_ls',
                 type="string",
                 dest="dir_ls",
                 help="The learning set directory")
    p.add_option('--dir_ts',
                 type="string",
                 dest="dir_ts",
                 help="The training set directory")

    p.add_option('--cv_k_folds',
                 type="int",
                 dest="cv_k_folds",
                 help="The number of folds")
    p.add_option(
        '--cv_shuffle',
        default=False,
        action="store_true",
        dest="cv_shuffle",
        help="Whether cross-validation is performed using ShuffleSplit.")
    p.add_option('--cv_shuffle_test_fraction',
                 default=0.1,
                 type="float",
                 dest="cv_shuffle_test_fraction",
                 help="The proportion of data in shuffled test splits.")

    p.add_option('--pyxit_n_subwindows',
                 default=10,
                 type="int",
                 dest="pyxit_n_subwindows",
                 help="number of subwindows")
    p.add_option('--pyxit_min_size',
                 default=0.5,
                 type="float",
                 dest="pyxit_min_size",
                 help="min size")
    p.add_option('--pyxit_max_size',
                 default=1.0,
                 type="float",
                 dest="pyxit_max_size",
                 help="max size")
    p.add_option('--pyxit_target_width',
                 default=16,
                 type="int",
                 dest="pyxit_target_width",
                 help="target width")
    p.add_option('--pyxit_target_height',
                 default=16,
                 type="int",
                 dest="pyxit_target_height",
                 help="target height")
    p.add_option('--pyxit_interpolation',
                 default=2,
                 type="int",
                 dest="pyxit_interpolation",
                 help="interpolation method 1,2,3,4")
    p.add_option('--pyxit_transpose',
                 default=False,
                 action="store_true",
                 dest="pyxit_transpose",
                 help="transpose subwindows")
    p.add_option('--pyxit_colorspace',
                 default=2,
                 type="int",
                 dest="pyxit_colorspace",
                 help="colorspace 0=RGB, 1=TRGB, 2=HSV")
    p.add_option('--pyxit_fixed_size',
                 default=False,
                 action="store_true",
                 dest="pyxit_fixed_size",
                 help="extract fixed size subwindows")
    p.add_option('--pyxit_n_jobs',
                 default=1,
                 type="int",
                 dest="pyxit_n_jobs",
                 help="number of jobs")
    p.add_option('--pyxit_save_to',
                 type="string",
                 dest="pyxit_save_to",
                 help="file to save the model into")

    p.add_option('--forest_n_estimators',
                 default=10,
                 type="int",
                 dest="forest_n_estimators",
                 help="number of base estimators (T)")
    p.add_option('--forest_max_features',
                 default=1,
                 type="int",
                 dest="forest_max_features",
                 help="max features at test node (k)")
    p.add_option('--forest_min_samples_split',
                 default=1,
                 type="int",
                 dest="forest_min_samples_split",
                 help="minimum node sample size (nmin)")
    p.add_option('--forest_shared_mem',
                 default=False,
                 action="store_true",
                 dest="forest_shared_mem",
                 help="shared mem")

    p.add_option(
        '--svm',
        default=0,
        dest="svm",
        help=
        "final svm classifier: 0=nosvm, 1=libsvm, 2=liblinear, 3=lr-l1, 4=lr-l2",
        type="int")
    p.add_option('--svm_c',
                 default=1.0,
                 type="float",
                 dest="svm_c",
                 help="svm C")

    p.add_option('--quiet',
                 action="store_false",
                 default=True,
                 dest="verbose",
                 help="Turn off verbose mode")
    p.add_option('--verbose',
                 action="store_true",
                 default=True,
                 dest="verbose",
                 help="Turn on verbose mode")

    options, arguments = p.parse_args(args=argv)

    # Check for errors in the options
    e = None

    if not options.dir_ls:
        e = "--dir_ls needs to be set."

    elif options.dir_ts and options.cv_k_folds:
        e = "--dir_ts and --cv_k_folds cannot be set at the same time."

    elif options.pyxit_save_to and options.cv_k_folds:
        e = "--pyxit_save_to and --cv_k_folds cannot be set at the time."

    if e:
        print("Error: %s" % e)
        print("Run with -h option for help.")
        sys.exit(1)

    if options.verbose:
        print("[pyxit.main] Options = ", options)

    # Load data
    if options.verbose:
        print("[pyxit.main] Loading data...")

    X, y = build_from_dir(options.dir_ls)

    classes = np.unique(y)
    n_classes = len(classes)
    y_original = y
    y = np.searchsorted(classes, y)

    # Instantiate classifiers
    if options.verbose:
        print("[pyxit.main] Initializing PyxitClassifier...")

    forest = ExtraTreesClassifier(
        n_estimators=options.forest_n_estimators,
        max_features=options.forest_max_features,
        min_samples_split=options.forest_min_samples_split,
        n_jobs=options.pyxit_n_jobs,
        verbose=options.verbose)

    pyxit = PyxitClassifier(base_estimator=forest,
                            n_subwindows=options.pyxit_n_subwindows,
                            min_size=options.pyxit_min_size,
                            max_size=options.pyxit_max_size,
                            target_width=options.pyxit_target_width,
                            target_height=options.pyxit_target_height,
                            interpolation=options.pyxit_interpolation,
                            transpose=options.pyxit_transpose,
                            colorspace=options.pyxit_colorspace,
                            fixed_size=options.pyxit_fixed_size,
                            n_jobs=options.pyxit_n_jobs,
                            verbose=options.verbose)

    if options.svm:
        if options.svm == SVM_LIBSVM:
            svm = SVC(probability=True, C=options.svm_c, kernel="linear")
        if options.svm == SVM_LIBLINEAR:
            svm = LinearSVC(C=options.svm_c)
        if options.svm == SVM_LRL1:
            svm = LogisticRegression(penalty="l1", C=options.svm_c)
        if options.svm == SVM_LRL2:
            svm = LogisticRegression(penalty="l2", C=options.svm_c)
        if options.svm == ET:
            svm = ExtraTreesClassifier(
                n_estimators=1000,
                max_features="sqrt",
                #max_features=1000,
                min_samples_split=2,
                n_jobs=options.pyxit_n_jobs,
                verbose=options.verbose)
        if options.svm == RF:
            svm = RandomForestClassifier(
                n_estimators=1000,
                #max_features=1000,
                max_features="sqrt",
                min_samples_split=2,
                n_jobs=options.pyxit_n_jobs,
                verbose=options.verbose)

        if options.svm == NN:
            svm = neighbors.KNeighborsClassifier(10)

    if options.verbose:
        print("[pyxit.main] PyxitClassifier =")
        print(pyxit)

        if options.svm:
            print("[pyxit.main] SVM =")
            print(svm)

    # Build and evaluate
    if options.dir_ls and not options.dir_ts and not options.cv_k_folds:
        if options.pyxit_save_to:
            fd = open(options.pyxit_save_to, "wb")
            pickle.dump(classes, fd, protocol=pickle.HIGHEST_PROTOCOL)

        if options.verbose:
            print("[pyxit.main] Fitting PyxitClassifier on %s" %
                  options.dir_ls)

        _X, _y = pyxit.extract_subwindows(X, y)
        pyxit.fit(X, y, _X=_X, _y=_y)

        if options.verbose:
            print("[pyxit.main] Saving PyxitClassifier into %s" %
                  options.pyxit_save_to)

        if options.pyxit_save_to:
            pickle.dump(pyxit, fd, protocol=pickle.HIGHEST_PROTOCOL)

        if options.svm:
            Xt = pyxit.transform(X, _X=_X, reset=True)

            if options.verbose:
                print("[pyxit.main] Fitting SVC on %s" % options.dir_ls)

            svm.fit(Xt, y)

            if options.verbose:
                print("[pyxit.main] Saving SVM into %s" %
                      options.pyxit_save_to)

            if options.pyxit_save_to:
                pickle.dump(svm, fd, protocol=pickle.HIGHEST_PROTOCOL)

        if options.pyxit_save_to:
            fd.close()

    elif options.dir_ts:
        if options.pyxit_save_to:
            fd = open(options.pyxit_save_to, "wb")
            pickle.dump(classes, fd, protocol=pickle.HIGHEST_PROTOCOL)

        if options.verbose:
            print("[pyxit.main] Fitting PyxitClassifier on %s" %
                  options.dir_ls)

        _X, _y = pyxit.extract_subwindows(X, y)
        pyxit.fit(X, y, _X=_X, _y=_y)

        if options.pyxit_save_to:
            pickle.dump(pyxit, fd, protocol=pickle.HIGHEST_PROTOCOL)

        if options.svm:
            Xt = pyxit.transform(X, _X=_X, reset=True)

            if options.verbose:
                print("[pyxit.main] Fitting SVC on %s" % options.dir_ls)

            svm.fit(Xt, y)

            if options.pyxit_save_to:
                pickle.dump(svm, fd, protocol=pickle.HIGHEST_PROTOCOL)

        if options.pyxit_save_to:
            fd.close()

        if options.verbose:
            print("[pyxit.main] Testing on %s" % options.dir_ts)

        X_test, y_test = build_from_dir(options.dir_ts)
        y_test = np.searchsorted(classes, y_test)
        _X_test, _y_test = pyxit.extract_subwindows(X_test, y_test)
        y_true = y_test
        all_tested = np.ones(len(y_true), dtype=np.bool)

        if not options.svm:
            y_predict = pyxit.predict(X_test, _X=_X_test)
            y_proba = pyxit.predict_proba(X_test, _X=_X_test)

        else:
            Xt = pyxit.transform(X_test, _X=_X_test)
            y_predict = svm.predict(Xt)
            if options.svm != SVM_LIBLINEAR:
                y_proba = svm.predict_proba(Xt)

    elif options.cv_k_folds:
        if options.verbose:
            print("[pyxit.main] K-Fold cross-validation (K=%d)" %
                  options.cv_k_folds)

        _X, _y = pyxit.extract_subwindows(X, y)

        i = 1
        step = 100. / options.cv_k_folds

        y_true = y
        y_predict = np.empty(y_true.shape, dtype=y.dtype)
        y_proba = np.empty((y_true.shape[0], n_classes))
        all_tested = np.zeros(len(y_true), dtype=np.bool)

        cm = np.zeros((n_classes, n_classes), dtype=np.int32)

        if not options.cv_shuffle:
            cv = StratifiedKFold(y_true, options.cv_k_folds)
        else:
            cv = ShuffleSplit(len(X),
                              n_iter=options.cv_k_folds,
                              test_size=options.cv_shuffle_test_fraction)

        for train, test in cv:
            all_tested[test] = True
            _train = pyxit.extend_mask(train)
            _test = pyxit.extend_mask(test)

            if options.verbose:
                print("[pyxit.main] Fitting PyxitClassifier on fold %d" % i)

            pyxit.fit(X[train], y[train], _X=_X[_train], _y=_y[_train])

            if options.svm:
                Xt = pyxit.transform(X[train], _X=_X[_train], reset=True)

                if options.verbose:
                    print("[pyxit.main] Fitting SVC on fold %d" % i)

                svm.fit(Xt, y[train])

            if options.verbose:
                print("[pyxit.main] Testing on fold %d" % i)

            if not options.svm:
                y_predict[test] = pyxit.predict(X[test], _X=_X[_test])
                y_proba[test] = pyxit.predict_proba(X[test], _X=_X[_test])

            else:
                Xt = pyxit.transform(X[test], _X=_X[_test])
                y_predict[test] = np.asarray(svm.predict(Xt), dtype=y.dtype)

                if hasattr(svm, "predict_proba"):
                    y_proba[test] = svm.predict_proba(Xt)
                print(svm)

            if options.verbose:
                print("[pyxit.main] Classification error on fold %d = %f" %
                      (i, 1.0 * np.sum(y_true[test] != y_predict[test]) /
                       len(y_true[test])))
                print("[pyxit.main] Cumulated confusion matrix =")
                cm += confusion_matrix(y_true[test], y_predict[test])
                print_cm(cm, classes)

            i += 1

    # Output some results
    if "all_tested" in locals():
        if options.verbose:
            print("---")
            print("[pyxit.main] Test coverage =",
                  sum(all_tested) / (1.0 * len(all_tested)))
            print("[pyxit.main] Overall classification error = %f" %
                  (1.0 * np.sum(y_true[all_tested] != y_predict[all_tested]) /
                   len(y_true[all_tested])))
            print("[pyxit.main] Overall confusion matrix =")
            print_cm(
                confusion_matrix(y_true[all_tested], y_predict[all_tested]),
                classes)

        #y_true = classes.take(y_true[all_tested], axis=0)
        y_predict = classes.take(y_predict[all_tested], axis=0)
        y_proba = np.max(y_proba, axis=1)
        d = {}
        for i in range(len(X)):
            d[X[i]] = (int(y_predict[i]), y_proba[i])
        return d
def main(argv):
    # Define command line options
    p = optparse.OptionParser(
        description='Pyxit/Cytomine Segmentation Model Builder',
        prog='PyXit Segmentation Model Builder (PYthon piXiT)')

    p.add_option(
        "--cytomine_host",
        type="string",
        default='',
        dest="cytomine_host",
        help="The Cytomine host (eg: beta.cytomine.be, localhost:8080)")
    p.add_option('--cytomine_public_key',
                 type="string",
                 default='',
                 dest="cytomine_public_key",
                 help="Cytomine public key")
    p.add_option('--cytomine_private_key',
                 type="string",
                 default='',
                 dest="cytomine_private_key",
                 help="Cytomine private key")
    p.add_option('--cytomine_base_path',
                 type="string",
                 default='/api/',
                 dest="cytomine_base_path",
                 help="Cytomine base path")
    p.add_option('--cytomine_id_software',
                 type="int",
                 dest="cytomine_id_software",
                 help="The Cytomine software identifier")
    p.add_option('--cytomine_working_path',
                 default="/tmp/",
                 type="string",
                 dest="cytomine_working_path",
                 help="The working directory (eg: /tmp)")
    p.add_option('--cytomine_id_project',
                 type="int",
                 dest="cytomine_id_project",
                 help="The Cytomine project identifier")
    p.add_option('-z',
                 '--cytomine_zoom_level',
                 type='int',
                 dest='cytomine_zoom_level',
                 help="working zoom level")
    p.add_option('--cytomine_annotation_projects',
                 type="string",
                 dest="cytomine_annotation_projects",
                 help="Projects from which annotations are extracted")
    p.add_option(
        '--cytomine_predict_terms',
        type='string',
        default='0',
        dest='cytomine_predict_terms',
        help="term ids of predicted terms (=positive class in binary mode)")
    p.add_option('--cytomine_excluded_terms',
                 type='string',
                 default='0',
                 dest='cytomine_excluded_terms',
                 help="term ids of excluded terms")
    p.add_option('--cytomine_reviewed',
                 type='string',
                 default="False",
                 dest="cytomine_reviewed",
                 help="Get reviewed annotations only")
    p.add_option('--pyxit_target_width',
                 type='int',
                 dest='pyxit_target_width',
                 help="pyxit subwindows width")
    p.add_option('--pyxit_target_height',
                 type='int',
                 dest='pyxit_target_height',
                 help="pyxit subwindows height")
    p.add_option('--pyxit_save_to',
                 type='string',
                 dest='pyxit_save_to',
                 help="pyxit model directory")  #future: get it from server db
    p.add_option(
        '--pyxit_colorspace',
        type='int',
        dest='pyxit_colorspace',
        help="pyxit colorspace encoding")  #future: get it from server db
    p.add_option(
        '--pyxit_n_jobs',
        type='int',
        dest='pyxit_n_jobs',
        help="pyxit number of jobs for trees")  #future: get it from server db
    p.add_option('--pyxit_n_subwindows',
                 default=10,
                 type="int",
                 dest="pyxit_n_subwindows",
                 help="number of subwindows")
    p.add_option('--pyxit_interpolation',
                 default=2,
                 type="int",
                 dest="pyxit_interpolation",
                 help="interpolation method 1,2,3,4")
    p.add_option('--pyxit_transpose',
                 type="string",
                 default="False",
                 dest="pyxit_transpose",
                 help="transpose subwindows")
    p.add_option('--pyxit_fixed_size',
                 type="string",
                 default="False",
                 dest="pyxit_fixed_size",
                 help="extract fixed size subwindows")
    p.add_option('--forest_n_estimators',
                 default=10,
                 type="int",
                 dest="forest_n_estimators",
                 help="number of base estimators (T)")
    p.add_option('--forest_max_features',
                 default=1,
                 type="int",
                 dest="forest_max_features",
                 help="max features at test node (k)")
    p.add_option('--forest_min_samples_split',
                 default=1,
                 type="int",
                 dest="forest_min_samples_split",
                 help="minimum node sample size (nmin)")
    p.add_option('--verbose',
                 type="string",
                 default="0",
                 dest="verbose",
                 help="Turn on (1) or off (0) verbose mode")

    options, arguments = p.parse_args(args=argv)

    parameters['cytomine_host'] = options.cytomine_host
    parameters['cytomine_public_key'] = options.cytomine_public_key
    parameters['cytomine_private_key'] = options.cytomine_private_key
    parameters['cytomine_base_path'] = options.cytomine_base_path
    parameters['cytomine_working_path'] = options.cytomine_working_path
    parameters['cytomine_base_path'] = options.cytomine_base_path
    parameters['cytomine_id_project'] = options.cytomine_id_project
    parameters['cytomine_id_software'] = options.cytomine_id_software
    parameters['cytomine_annotation_projects'] = map(
        int, options.cytomine_annotation_projects.split(','))
    parameters['cytomine_predict_terms'] = map(
        int, options.cytomine_predict_terms.split(','))
    parameters['cytomine_excluded_terms'] = map(
        int, options.cytomine_excluded_terms.split(','))
    parameters['cytomine_zoom_level'] = options.cytomine_zoom_level
    parameters['cytomine_reviewed'] = str2bool(options.cytomine_reviewed)

    pyxit_parameters['pyxit_target_width'] = options.pyxit_target_width
    pyxit_parameters['pyxit_target_height'] = options.pyxit_target_height
    pyxit_parameters['pyxit_n_subwindows'] = options.pyxit_n_subwindows
    pyxit_parameters['pyxit_colorspace'] = options.pyxit_colorspace
    pyxit_parameters['pyxit_interpolation'] = options.pyxit_interpolation
    pyxit_parameters['pyxit_transpose'] = str2bool(options.pyxit_transpose)
    pyxit_parameters['pyxit_fixed_size'] = str2bool(options.pyxit_fixed_size)
    pyxit_parameters['forest_n_estimators'] = options.forest_n_estimators
    pyxit_parameters['forest_max_features'] = options.forest_max_features
    pyxit_parameters[
        'forest_min_samples_split'] = options.forest_min_samples_split
    pyxit_parameters['pyxit_save_to'] = options.pyxit_save_to
    pyxit_parameters['pyxit_n_jobs'] = options.pyxit_n_jobs

    # Check for errors in the options
    if options.verbose:
        print "[pyxit.main] Options = ", options

    # Create JOB/USER/JOB
    conn = cytomine.Cytomine(parameters["cytomine_host"],
                             parameters["cytomine_public_key"],
                             parameters["cytomine_private_key"],
                             base_path=parameters['cytomine_base_path'],
                             working_path=parameters['cytomine_working_path'],
                             verbose=str2bool(options.verbose))

    #Create a new userjob if connected as human user
    current_user = conn.get_current_user()
    if current_user.algo == False:
        print "adduserJob..."
        user_job = conn.add_user_job(parameters['cytomine_id_software'],
                                     parameters['cytomine_id_project'])
        print "set_credentials..."
        conn.set_credentials(str(user_job.publicKey), str(user_job.privateKey))
        print "done"
    else:
        user_job = current_user
        print "Already running as userjob"
    job = conn.get_job(user_job.job)

    pyxit_parameters['dir_ls'] = os.path.join(
        parameters["cytomine_working_path"],
        str(parameters['cytomine_annotation_projects']).replace(
            ',', '-').replace('[', '').replace(']', '').replace(' ', ''),
        "zoom_level", str(parameters['cytomine_zoom_level']))
    if not os.path.exists(pyxit_parameters['dir_ls']):
        print "Creating annotation directory: %s" % pyxit_parameters['dir_ls']
        os.makedirs(pyxit_parameters['dir_ls'])
    time.sleep(2)
    job = conn.update_job_status(
        job, status_comment="Publish software parameters values")
    all_params = pyxit_parameters
    all_params.update(parameters)
    job_parameters_values = conn.add_job_parameters(
        user_job.job, conn.get_software(parameters['cytomine_id_software']),
        all_params)

    #Get annotation data
    job = conn.update_job_status(job,
                                 status=job.RUNNING,
                                 status_comment="Fetching data",
                                 progress=0)
    #Retrieve annotations from each annotation projects, either reviewed or unreviewed annotations
    annotations = None
    for prj in parameters['cytomine_annotation_projects']:
        if parameters["cytomine_reviewed"]:
            print "Retrieving reviewed annotations..."
            annotations_prj = conn.get_annotations(id_project=prj,
                                                   reviewed_only=True)
            print "Reviewed annotations: %d" % len(annotations_prj.data())
        else:
            print "Retrieving (unreviewed) annotations..."
            annotations_prj = conn.get_annotations(id_project=prj)
            print "(Unreviewed) annotations: %d" % len(annotations_prj.data())
        if not annotations:
            annotations = annotations_prj
        else:
            annotations.data().extend(annotations_prj.data())
        print "Nb annotations so far... = %d" % len(annotations.data())
        time.sleep(3)
    print "Total annotations projects %s = %d" % (
        parameters['cytomine_annotation_projects'], len(annotations.data()))
    time.sleep(3)
    print "Predict terms / excluded terms"
    print parameters['cytomine_predict_terms']
    print parameters['cytomine_excluded_terms']
    time.sleep(3)
    annotations = conn.dump_annotations(
        annotations=annotations,
        get_image_url_func=Annotation.get_annotation_alpha_crop_url,
        dest_path=pyxit_parameters['dir_ls'],
        excluded_terms=parameters['cytomine_excluded_terms'],
        desired_zoom=parameters['cytomine_zoom_level'])

    #Build matrix (subwindows described by pixel values and output) for training
    project = conn.get_project(parameters['cytomine_id_project'])
    terms = conn.get_terms(project.ontology)
    map_classes = {
    }  # build X, Y. Change initial problem into binary problem : "predict_terms" vs others
    for term in terms.data():
        if term.id in parameters['cytomine_predict_terms']:
            map_classes[term.id] = 1
        else:
            map_classes[term.id] = 0
    print pyxit_parameters

    #Prepare image matrix
    X, y = build_from_dir(pyxit_parameters['dir_ls'], map_classes)
    print "X length: %d " % len(X)
    print "Y length: %d " % len(y)
    time.sleep(5)
    #classes = np.unique(y)
    classes = [0, 1]
    n_classes = len(classes)
    y_original = y
    y = np.searchsorted(classes, y)

    # Instantiate classifiers
    job = conn.update_job_status(
        job,
        status=job.RUNNING,
        status_comment="[pyxit.main] Initializing PyxitClassifier...",
        progress=25)
    forest = ExtraTreesClassifier(
        n_estimators=pyxit_parameters['forest_n_estimators'],
        max_features=pyxit_parameters['forest_max_features'],
        min_samples_split=pyxit_parameters['forest_min_samples_split'],
        n_jobs=pyxit_parameters['pyxit_n_jobs'],
        verbose=True)

    pyxit = PyxitClassifier(
        base_estimator=forest,
        n_subwindows=pyxit_parameters['pyxit_n_subwindows'],
        min_size=0.0,  #segmentation use fixed-size subwindows
        max_size=1.0,  #segmentation use fixed-size subwindows
        target_width=pyxit_parameters['pyxit_target_width'],
        target_height=pyxit_parameters['pyxit_target_height'],
        interpolation=pyxit_parameters['pyxit_interpolation'],
        transpose=pyxit_parameters['pyxit_transpose'],
        colorspace=pyxit_parameters['pyxit_colorspace'],
        fixed_size=pyxit_parameters['pyxit_fixed_size'],
        n_jobs=pyxit_parameters['pyxit_n_jobs'],
        verbose=True,
        get_output=_get_output_from_mask)

    if pyxit_parameters['pyxit_save_to']:
        d = os.path.dirname(pyxit_parameters['pyxit_save_to'])
        if not os.path.exists(d):
            os.makedirs(d)
        fd = open(pyxit_parameters['pyxit_save_to'], "wb")
        pickle.dump(classes, fd, protocol=pickle.HIGHEST_PROTOCOL)

    job = conn.update_job_status(
        job,
        status_comment=
        "[pyxit.main] Extracting %d subwindows from each image in %s" %
        (pyxit_parameters['pyxit_n_subwindows'], pyxit_parameters['dir_ls']),
        progress=50)
    time.sleep(3)
    #Extract random subwindows in dumped annotations
    _X, _y = pyxit.extract_subwindows(X, y)

    #Build pixel classifier
    job = conn.update_job_status(
        job,
        status_comment="[pyxit.main] Fitting Pyxit Segmentation Model on %s",
        progress=75)
    print "TIME : %s" % strftime("%Y-%m-%d %H:%M:%S", localtime())
    start = time.time()
    pyxit.fit(X, y, _X=_X, _y=_y)
    end = time.time()
    print "Elapsed time FIT: %d s" % (end - start)
    print "TIME : %s" % strftime("%Y-%m-%d %H:%M:%S", localtime())

    print "pyxit.base_estimator.n_classes_"
    print pyxit.base_estimator.n_classes_
    print "pyxit.base_estimator.classes_"
    print pyxit.base_estimator.classes_

    if options.verbose:
        print "----------------------------------------------------------------"
        print "[pyxit.main] Saving Pyxit Segmentation Model locally into %s" % pyxit_parameters[
            'pyxit_save_to']
        print "----------------------------------------------------------------"

    #Save model on local disk
    if pyxit_parameters['pyxit_save_to']:
        pickle.dump(pyxit, fd, protocol=pickle.HIGHEST_PROTOCOL)

    if pyxit_parameters['pyxit_save_to']:
        fd.close()

    print "Not Publishing model in db.."
    #job_data = conn.add_job_data(job, "model", pyxit_parameters['pyxit_save_to'])

    job = conn.update_job_status(job,
                                 status=job.TERMINATED,
                                 status_comment="Finish",
                                 progress=100)
    print "END."
예제 #6
0
def build_models(n_subwindows=10,
                 min_size=0.5,
                 max_size=1.0,
                 target_width=16,
                 target_height=16,
                 interpolation=2,
                 transpose=False,
                 colorspace=2,
                 fixed_size=False,
                 verbose=0,
                 get_output=_get_output_from_directory,
                 create_svm=False,
                 C=1.0,
                 random_state=None,
                 **base_estimator_params):
    """Build models
    Parameters
    ----------
    n_subwindows: int
    min_size: float
    max_size: float
    target_width: int
    target_height: int
    interpolation: int
    transpose: bool
    colorspace: int
    fixed_size: bool
    verbose: int
    get_output: callable
    create_svm: bool
    C: float
    base_estimator_params: dict
        Parameters for the ExtraTreesClassifier object

    Returns
    -------
    et: ExtraTreesClassifier
        Base estimator a.k.a. extra-trees
    pyxit: PyxitClassifier|SvmPyxitClassifier
        (Svm) Pyxit classifier
    """
    n_jobs = base_estimator_params.get("n_jobs", 1)
    random_state = check_random_state(random_state)
    et = ExtraTreesClassifier(random_state=random_state,
                              **base_estimator_params)
    pyxit = PyxitClassifier(
        base_estimator=et,
        n_subwindows=n_subwindows,
        min_size=min_size,
        max_size=max_size,
        target_height=target_height,
        target_width=target_width,
        n_jobs=n_jobs,
        colorspace=colorspace,
        fixed_size=fixed_size,
        interpolation=interpolation,
        transpose=transpose,
        verbose=verbose,
        get_output=get_output,
        random_state=check_random_state(
            random_state.tomaxint() %
            0x100000000)  # ET and Pyxit must have != random nbs
    )
    if not create_svm:
        return et, pyxit
    else:
        return et, SvmPyxitClassifier(pyxit, LinearSVC(C=C))
예제 #7
0
def main(argv):
    print("Main")
    # Define command line options
    p = optparse.OptionParser(description='Cytomine Segmentation prediction',
                              prog='Cytomine segmentation prediction',
                              version='0.1')

    p.add_option(
        '--cytomine_host',
        type="string",
        default='beta.cytomine.be',
        dest="cytomine_host",
        help="The Cytomine host (eg: beta.cytomine.be, localhost:8080)")
    p.add_option('--cytomine_public_key',
                 type="string",
                 default='',
                 dest="cytomine_public_key",
                 help="Cytomine public key")
    p.add_option('--cytomine_private_key',
                 type="string",
                 default='',
                 dest="cytomine_private_key",
                 help="Cytomine private key")
    p.add_option('--cytomine_base_path',
                 type="string",
                 default='/api/',
                 dest="cytomine_base_path",
                 help="Cytomine base path")
    p.add_option('--cytomine_id_software',
                 type="int",
                 dest="cytomine_id_software",
                 help="The Cytomine software identifier")
    p.add_option('--cytomine_working_path',
                 default="/tmp/",
                 type="string",
                 dest="cytomine_working_path",
                 help="The working directory (eg: /tmp)")
    p.add_option('--cytomine_id_project',
                 type="int",
                 dest="cytomine_id_project",
                 help="The Cytomine project identifier")

    p.add_option('-i',
                 '--cytomine_id_image',
                 type='int',
                 dest='cytomine_id_image',
                 help="image id from cytomine",
                 metavar='IMAGE')
    p.add_option('-z',
                 '--cytomine_zoom_level',
                 type='int',
                 dest='cytomine_zoom_level',
                 help="working zoom level")
    p.add_option('-j',
                 '--nb_jobs',
                 type='int',
                 dest='nb_jobs',
                 help="number of parallel jobs")
    p.add_option(
        '--cytomine_predict_terms',
        type='str',
        dest='cytomine_predict_terms',
        help=
        "term id of all positive terms. The first term is the output predicted annotation term"
    )
    p.add_option('--cytomine_excluded_terms',
                 type='string',
                 dest='cytomine_excluded_terms',
                 help="term id of excluded terms)")

    p.add_option('--pyxit_target_width',
                 type='int',
                 dest='pyxit_target_width',
                 help="pyxit subwindows width")
    p.add_option('--pyxit_target_height',
                 type='int',
                 dest='pyxit_target_height',
                 help="pyxit subwindows height")
    p.add_option('--pyxit_colorspace',
                 type='int',
                 dest='pyxit_colorspace',
                 help="pyxit colorspace encoding")
    p.add_option('--pyxit_nb_jobs',
                 type='int',
                 dest='pyxit_nb_jobs',
                 help="pyxit number of jobs for trees")
    p.add_option('--pyxit_fixed_size',
                 type='string',
                 default="0",
                 dest="pyxit_fixed_size",
                 help="extract fixed size subwindows")
    p.add_option('--pyxit_transpose',
                 type='string',
                 default="0",
                 dest="pyxit_transpose",
                 help="transpose subwindows")
    p.add_option('--pyxit_n_subwindows',
                 type='int',
                 default="10",
                 dest="pyxit_n_subwindows",
                 help="number of subwindows")
    p.add_option('--pyxit_interpolation',
                 default=2,
                 type="int",
                 dest="pyxit_interpolation",
                 help="interpolation method 1,2,3,4")
    p.add_option('--pyxit_min_size',
                 default=0.5,
                 type="float",
                 dest="pyxit_min_size",
                 help="min size")
    p.add_option('--pyxit_max_size',
                 default=1.0,
                 type="float",
                 dest="pyxit_max_size",
                 help="max size")
    p.add_option('--cytomine_reviewed',
                 type='string',
                 default="False",
                 dest="cytomine_reviewed",
                 help="Get reviewed annotations only")

    p.add_option('--cytomine_dump_annotations',
                 type='string',
                 default="0",
                 dest="cytomine_dump_annotations",
                 help="Dump training annotations or not")
    p.add_option('--cytomine_dump_annotation_stats',
                 type='string',
                 default="0",
                 dest="cytomine_dump_annotation_stats",
                 help="Calculate stats on dumped annotations or not")
    p.add_option('--build_model',
                 type="string",
                 default="0",
                 dest="build_model",
                 help="Turn on (1) or off (0) model building")
    p.add_option('--cytomine_annotation_projects',
                 type="string",
                 dest="cytomine_annotation_projects",
                 help="Projects from which annotations are extracted")
    p.add_option('--verbose',
                 type="string",
                 default="0",
                 dest="verbose",
                 help="Turn on (1) or off (0) verbose mode")

    p.add_option('--keras_save_to',
                 type='string',
                 default="",
                 dest='keras_save_to',
                 help="keras model weight file")
    p.add_option('--keras_batch_size',
                 type="int",
                 dest="keras_batch_size",
                 help="Training batch size")
    p.add_option('--keras_n_epochs',
                 type="int",
                 dest="keras_n_epochs",
                 help="Number of epochs")
    p.add_option('--keras_shuffle',
                 type="string",
                 dest="keras_shuffle",
                 help="Turn on (1) or off (0) batch shuffle")
    p.add_option('--keras_validation_split',
                 type="float",
                 dest="keras_validation_split",
                 help="Batch validation split")
    options, arguments = p.parse_args(args=argv)

    parameters = {}
    parameters['keras_save_to'] = options.keras_save_to
    parameters['keras_batch_size'] = options.keras_batch_size
    parameters['keras_n_epochs'] = options.keras_n_epochs
    parameters['keras_shuffle'] = options.keras_shuffle
    parameters['keras_validation_split'] = options.keras_validation_split
    parameters['cytomine_host'] = options.cytomine_host
    parameters['cytomine_public_key'] = options.cytomine_public_key
    parameters['cytomine_private_key'] = options.cytomine_private_key
    parameters['cytomine_base_path'] = options.cytomine_base_path
    parameters['cytomine_working_path'] = options.cytomine_working_path
    parameters['cytomine_base_path'] = options.cytomine_base_path
    parameters['cytomine_id_project'] = options.cytomine_id_project
    parameters['cytomine_id_software'] = options.cytomine_id_software
    parameters['cytomine_predict_terms'] = map(
        int, options.cytomine_predict_terms.split(','))
    parameters['cytomine_predicted_annotation_term'] = parameters[
        'cytomine_predict_terms'][0]
    parameters['cytomine_excluded_terms'] = map(
        int, options.cytomine_excluded_terms.split(','))

    parameters['pyxit_colorspace'] = options.pyxit_colorspace
    parameters['pyxit_nb_jobs'] = options.pyxit_nb_jobs
    parameters['pyxit_n_jobs'] = options.pyxit_nb_jobs
    parameters['cytomine_nb_jobs'] = options.pyxit_nb_jobs
    parameters['cytomine_id_image'] = options.cytomine_id_image
    parameters['cytomine_zoom_level'] = options.cytomine_zoom_level
    parameters['nb_jobs'] = options.nb_jobs
    parameters['pyxit_target_width'] = options.pyxit_target_width
    parameters['pyxit_target_height'] = options.pyxit_target_height
    parameters['pyxit_n_subwindows'] = options.pyxit_n_subwindows
    parameters['pyxit_interpolation'] = options.pyxit_interpolation
    parameters['pyxit_transpose'] = str2bool(options.pyxit_transpose)
    parameters['pyxit_min_size'] = options.pyxit_min_size
    parameters['pyxit_max_size'] = options.pyxit_max_size
    parameters['pyxit_fixed_size'] = str2bool(options.pyxit_fixed_size)
    parameters['cytomine_annotation_projects'] = map(
        int, options.cytomine_annotation_projects.split(','))
    parameters['cytomine_reviewed'] = str2bool(options.cytomine_reviewed)
    parameters['cytomine_dump_annotation_stats'] = str2bool(
        options.cytomine_dump_annotation_stats)
    parameters['cytomine_dump_annotations'] = str2bool(
        options.cytomine_dump_annotations)
    parameters['build_model'] = str2bool(options.build_model)
    parameters['dir_ls'] = os.path.join(
        parameters["cytomine_working_path"],
        str(parameters['cytomine_annotation_projects']).replace(
            ',', '-').replace('[', '').replace(']', '').replace(' ', ''),
        "zoom_level", str(parameters['cytomine_zoom_level']))

    pyxit_parameters = {}
    pyxit_parameters['pyxit_target_width'] = options.pyxit_target_width
    pyxit_parameters['pyxit_target_height'] = options.pyxit_target_height
    pyxit_parameters['pyxit_n_subwindows'] = options.pyxit_n_subwindows
    pyxit_parameters['pyxit_min_size'] = options.pyxit_min_size
    pyxit_parameters['pyxit_max_size'] = options.pyxit_max_size
    pyxit_parameters['pyxit_colorspace'] = options.pyxit_colorspace
    pyxit_parameters['pyxit_interpolation'] = options.pyxit_interpolation
    pyxit_parameters['pyxit_transpose'] = str2bool(options.pyxit_transpose)
    pyxit_parameters['pyxit_fixed_size'] = str2bool(options.pyxit_fixed_size)
    pyxit_parameters['pyxit_n_jobs'] = options.pyxit_nb_jobs

    if options.verbose:
        print(parameters)

    # Create Cytomine connection
    conn = cytomine.Cytomine(parameters["cytomine_host"],
                             parameters["cytomine_public_key"],
                             parameters["cytomine_private_key"],
                             base_path=parameters['cytomine_base_path'],
                             working_path=parameters['cytomine_working_path'],
                             verbose=str2bool(options.verbose))

    # Dump annotations
    if parameters['cytomine_dump_annotations']:
        # Get annotation descriptions (JSON) from project(s)
        annotations = None
        for prj in parameters['cytomine_annotation_projects']:
            if parameters["cytomine_reviewed"]:
                annotations_prj = conn.get_annotations(
                    id_project=prj,
                    reviewed_only=parameters["cytomine_reviewed"])
            else:
                annotations_prj = conn.get_annotations(id_project=prj)
            if not annotations:
                annotations = annotations_prj
            else:
                annotations.data().extend(annotations_prj.data())

            if prj == 21907448 or prj == 155194683:
                annotations_prj = conn.get_annotations(id_project=prj,
                                                       id_term=91376951)
                annotations.data().extend(annotations_prj.data())
            print("Nb annotations so far... = %d" % len(annotations.data()))
        print("Total annotations projects %s = %d" %
              (parameters['cytomine_annotation_projects'],
               len(annotations.data())))

        # Set output dir parameters
        if not os.path.exists(parameters['dir_ls']):
            print("Creating annotation directory: %s" % parameters['dir_ls'])
            os.makedirs(parameters['dir_ls'])

        # Dump annotation images locally
        print("Dump training annotation images in %s...", parameters['dir_ls'])
        conn.dump_annotations(
            annotations=annotations,
            get_image_url_func=Annotation.get_annotation_alpha_crop_url,
            dest_path=parameters['dir_ls'],
            desired_zoom=parameters['cytomine_zoom_level'],
            excluded_terms=parameters['cytomine_excluded_terms'])

        # Put positive terms under the same term and same for negative terms
        term_directories = os.listdir(parameters['dir_ls'])
        pos_path = os.path.join(parameters['dir_ls'], "1")
        if not os.path.exists(pos_path):
            print("Creating positive annotation directory: %s" % pos_path)
            os.makedirs(pos_path)

        neg_path = os.path.join(parameters['dir_ls'], "0")
        if not os.path.exists(neg_path):
            print("Creating negative annotation directory: %s" % neg_path)
            os.makedirs(neg_path)

        for dir in term_directories:
            dir_abs = os.path.join(parameters['dir_ls'], dir)

            # Move files
            if int(dir) in parameters['cytomine_predict_terms']:
                for image_file in os.listdir(dir_abs):
                    os.rename(os.path.join(dir_abs, image_file),
                              os.path.join(pos_path, image_file))

            else:
                for image_file in os.listdir(dir_abs):
                    os.rename(os.path.join(dir_abs, image_file),
                              os.path.join(neg_path, image_file))

            # Remove empty directory
            if int(dir) != 0 and int(dir) != 1:
                os.rmdir(dir_abs)

    if parameters['cytomine_dump_annotation_stats']:
        pos_path = os.path.join(parameters['dir_ls'], "1")
        neg_path = os.path.join(parameters['dir_ls'], "0")
        stats_dumped_annotations(pos_path, neg_path)

    if parameters['build_model']:
        print("Build_model...")
        # Model name
        model_name = "all_in_batchsize{}_epochs{}"\
         .format(parameters['keras_batch_size'],
           parameters['keras_n_epochs']).replace(".", "")
        print("Model_name :", model_name)

        pyxit = PyxitClassifier(
            None,
            n_subwindows=pyxit_parameters['pyxit_n_subwindows'],
            min_size=pyxit_parameters['pyxit_min_size'],
            max_size=pyxit_parameters['pyxit_max_size'],
            target_width=pyxit_parameters['pyxit_target_width'],
            target_height=pyxit_parameters['pyxit_target_height'],
            n_jobs=pyxit_parameters['pyxit_n_jobs'],
            interpolation=pyxit_parameters['pyxit_interpolation'],
            transpose=pyxit_parameters['pyxit_transpose'],
            colorspace=pyxit_parameters['pyxit_colorspace'],
            fixed_size=pyxit_parameters['pyxit_fixed_size'],
            random_state=None,
            verbose=1,
            get_output=_get_output_from_mask,
            parallel_leaf_transform=False)

        # Build filenames and classes
        X, y = build_from_dir(parameters['dir_ls'])

        classes = np.unique(y)
        n_classes = len(classes)
        y_original = y
        y = np.searchsorted(classes, y)
        n_images = len(y)
        print("Number of images : ", n_images)

        images, masks, labels = image_mask_builder(
            X, y, parameters['pyxit_colorspace'])
        # ImageDataGenerator :  two instances with the same arguments
        data_gen_args = dict(rotation_range=180.,
                             width_shift_range=0.1,
                             height_shift_range=0.1,
                             zoom_range=0.2,
                             rescale=1 / 255,
                             horizontal_flip=True,
                             vertical_flip=True)
        # featurewise_center = True,
        #  featurewise_std_normalization = True)

        image_datagen = ImageDataGenerator(**data_gen_args)
        mask_datagen = ImageDataGenerator(**data_gen_args)

        # Provide the same seed and keyword arguments to the fit and flow methods
        seed = 1
        # image_datagen.fit(images, augment = True, seed = seed)
        # mask_datagen.fit(masks, augment = True, seed = seed)

        print(type(images))
        print(type(masks))
        print(type(labels))
        print(images[0:10])
        print(masks[0:10])
        print(labels[0:10])
        image_generator = image_datagen.flow(images,
                                             labels,
                                             seed=seed,
                                             shuffle=False)

        mask_generator = mask_datagen.flow(masks,
                                           labels,
                                           seed=seed,
                                           shuffle=False)

        # combine generators into one which yields image and masks
        train_generator = zip(image_generator, mask_generator)

        # Creating and compiling model
        if not os.path.exists(parameters['keras_save_to']):
            os.makedirs(parameters['keras_save_to'])

        model_weights_filename = os.path.join(parameters['keras_save_to'],
                                              "weights_" + model_name + ".h5")
        print('Fitting model...')
        model = get_unet()
        model_checkpoint = ModelCheckpoint(model_weights_filename,
                                           monitor='val_loss',
                                           save_best_only=True)

        # Train FCN
        model.fit_generator(train_generator,
                            steps_per_epoch=100,
                            epochs=30,
                            callbacks=[model_checkpoint],
                            verbose=1)