def new(extension_id=None): """ Return a form for a new GenericImageModelJob """ form = GenericImageModelForm() form.dataset.choices = get_datasets(extension_id) form.standard_networks.choices = [] form.previous_networks.choices = get_previous_networks() form.pretrained_networks.choices = get_pretrained_networks() prev_network_snapshots = get_previous_network_snapshots() ## Is there a request to clone a job with ?clone=<job_id> fill_form_if_cloned(form) return flask.render_template( 'models/images/generic/new.html', extension_id=extension_id, extension_title=extensions.data.get_extension(extension_id).get_title() if extension_id else None, form=form, frameworks=frameworks.get_frameworks(), previous_network_snapshots=prev_network_snapshots, previous_networks_fullinfo=get_previous_networks_fulldetails(), pretrained_networks_fullinfo=get_pretrained_networks_fulldetails(), multi_gpu=config_value('caffe')['multi_gpu'], )
def show(job): """ Called from digits.model.views.models_show() """ return flask.render_template( "models/images/classification/show.html", job=job, framework_ids=[fw.get_id() for fw in frameworks.get_frameworks()], )
def generic_image_model_new(): """ Return a form for a new GenericImageModelJob """ form = GenericImageModelForm() form.dataset.choices = get_datasets() form.standard_networks.choices = [] form.previous_networks.choices = get_previous_networks() prev_network_snapshots = get_previous_network_snapshots() return flask.render_template('models/images/generic/new.html', form = form, frameworks = frameworks.get_frameworks(), previous_network_snapshots = prev_network_snapshots, previous_networks_fullinfo = get_previous_networks_fulldetails(), multi_gpu = config_value('caffe_root')['multi_gpu'], )
def image_classification_model_new(): """ Return a form for a new ImageClassificationModelJob """ form = ImageClassificationModelForm() form.dataset.choices = get_datasets() form.standard_networks.choices = get_standard_networks() form.standard_networks.default = get_default_standard_network() form.previous_networks.choices = get_previous_networks() prev_network_snapshots = get_previous_network_snapshots() return flask.render_template('models/images/classification/new.html', form = form, frameworks = frameworks.get_frameworks(), previous_network_snapshots = prev_network_snapshots, previous_networks_fullinfo = get_previous_networks_fulldetails(), multi_gpu = config_value('caffe_root')['multi_gpu'], )
def new(): """ Return a form for a new ImageClassificationModelJob """ form = ImageClassificationModelForm() form.dataset.choices = get_datasets() form.standard_networks.choices = get_standard_networks() form.standard_networks.default = get_default_standard_network() form.previous_networks.choices = get_previous_networks() prev_network_snapshots = get_previous_network_snapshots() ## Is there a request to clone a job with ?clone=<job_id> fill_form_if_cloned(form) return flask.render_template('models/images/classification/new.html', form = form, frameworks = frameworks.get_frameworks(), previous_network_snapshots = prev_network_snapshots, previous_networks_fullinfo = get_previous_networks_fulldetails(), multi_gpu = config_value('caffe_root')['multi_gpu'], )
def generic_image_model_new(): """ Return a form for a new GenericImageModelJob """ form = GenericImageModelForm() form.dataset.choices = get_datasets() form.standard_networks.choices = [] form.previous_networks.choices = get_previous_networks() prev_network_snapshots = get_previous_network_snapshots() ## Is there a request to clone a job with ?clone=<job_id> fill_form_if_cloned(form) return flask.render_template( "models/images/generic/new.html", form=form, frameworks=frameworks.get_frameworks(), previous_network_snapshots=prev_network_snapshots, previous_networks_fullinfo=get_previous_networks_fulldetails(), multi_gpu=config_value("caffe_root")["multi_gpu"], )
def create(): """ Create a new ImageClassificationModelJob Returns JSON when requested: {job_id,name,status} or {errors:[]} """ form = ImageClassificationModelForm() form.dataset.choices = get_datasets() form.standard_networks.choices = get_standard_networks() form.standard_networks.default = get_default_standard_network() form.previous_networks.choices = get_previous_networks() form.pretrained_networks.choices = get_pretrained_networks() prev_network_snapshots = get_previous_network_snapshots() # Is there a request to clone a job with ?clone=<job_id> fill_form_if_cloned(form) if not form.validate_on_submit(): if request_wants_json(): return flask.jsonify({'errors': form.errors}), 400 else: return flask.render_template('models/images/classification/new.html', form=form, frameworks=frameworks.get_frameworks(), previous_network_snapshots=prev_network_snapshots, previous_networks_fullinfo=get_previous_networks_fulldetails(), pretrained_networks_fullinfo=get_pretrained_networks_fulldetails(), multi_gpu=config_value('caffe')['multi_gpu'], ), 400 datasetJob = scheduler.get_job(form.dataset.data) if not datasetJob: raise werkzeug.exceptions.BadRequest( 'Unknown dataset job_id "%s"' % form.dataset.data) # sweeps will be a list of the the permutations of swept fields # Get swept learning_rate sweeps = [{'learning_rate': v} for v in form.learning_rate.data] add_learning_rate = len(form.learning_rate.data) > 1 # Add swept batch_size sweeps = [dict(s.items() + [('batch_size', bs)]) for bs in form.batch_size.data for s in sweeps[:]] add_batch_size = len(form.batch_size.data) > 1 n_jobs = len(sweeps) jobs = [] for sweep in sweeps: # Populate the form with swept data to be used in saving and # launching jobs. form.learning_rate.data = sweep['learning_rate'] form.batch_size.data = sweep['batch_size'] # Augment Job Name extra = '' if add_learning_rate: extra += ' learning_rate:%s' % str(form.learning_rate.data[0]) if add_batch_size: extra += ' batch_size:%d' % form.batch_size.data[0] job = None try: job = ImageClassificationModelJob( username=utils.auth.get_username(), name=form.model_name.data + extra, group=form.group_name.data, dataset_id=datasetJob.id(), ) # get handle to framework object fw = frameworks.get_framework_by_id(form.framework.data) pretrained_model = None if form.method.data == 'standard': found = False # can we find it in standard networks? network_desc = fw.get_standard_network_desc(form.standard_networks.data) if network_desc: found = True network = fw.get_network_from_desc(network_desc) if not found: raise werkzeug.exceptions.BadRequest( 'Unknown standard model "%s"' % form.standard_networks.data) elif form.method.data == 'previous': old_job = scheduler.get_job(form.previous_networks.data) if not old_job: raise werkzeug.exceptions.BadRequest( 'Job not found: %s' % form.previous_networks.data) use_same_dataset = (old_job.dataset_id == job.dataset_id) network = fw.get_network_from_previous(old_job.train_task().network, use_same_dataset) for choice in form.previous_networks.choices: if choice[0] == form.previous_networks.data: epoch = float(flask.request.form['%s-snapshot' % form.previous_networks.data]) if epoch == 0: pass elif epoch == -1: pretrained_model = old_job.train_task().pretrained_model else: # verify snapshot exists pretrained_model = old_job.train_task().get_snapshot(epoch, download=True) if pretrained_model is None: raise werkzeug.exceptions.BadRequest( "For the job %s, selected pretrained_model for epoch %d is invalid!" % (form.previous_networks.data, epoch)) # the first is the actual file if a list is returned, other should be meta data if isinstance(pretrained_model, list): pretrained_model = pretrained_model[0] if not (os.path.exists(pretrained_model)): raise werkzeug.exceptions.BadRequest( "Pretrained_model for the selected epoch doesn't exist. " "May be deleted by another user/process. " "Please restart the server to load the correct pretrained_model details.") # get logical path pretrained_model = old_job.train_task().get_snapshot(epoch) break elif form.method.data == 'pretrained': pretrained_job = scheduler.get_job(form.pretrained_networks.data) model_def_path = pretrained_job.get_model_def_path() weights_path = pretrained_job.get_weights_path() network = fw.get_network_from_path(model_def_path) pretrained_model = weights_path elif form.method.data == 'custom': network = fw.get_network_from_desc(form.custom_network.data) pretrained_model = form.custom_network_snapshot.data.strip() else: raise werkzeug.exceptions.BadRequest( 'Unrecognized method: "%s"' % form.method.data) policy = {'policy': form.lr_policy.data} if form.lr_policy.data == 'fixed': pass elif form.lr_policy.data == 'step': policy['stepsize'] = form.lr_step_size.data policy['gamma'] = form.lr_step_gamma.data elif form.lr_policy.data == 'multistep': policy['stepvalue'] = form.lr_multistep_values.data policy['gamma'] = form.lr_multistep_gamma.data elif form.lr_policy.data == 'exp': policy['gamma'] = form.lr_exp_gamma.data elif form.lr_policy.data == 'inv': policy['gamma'] = form.lr_inv_gamma.data policy['power'] = form.lr_inv_power.data elif form.lr_policy.data == 'poly': policy['power'] = form.lr_poly_power.data elif form.lr_policy.data == 'sigmoid': policy['stepsize'] = form.lr_sigmoid_step.data policy['gamma'] = form.lr_sigmoid_gamma.data else: raise werkzeug.exceptions.BadRequest( 'Invalid learning rate policy') if config_value('caffe')['multi_gpu']: if form.select_gpus.data: selected_gpus = [str(gpu) for gpu in form.select_gpus.data] gpu_count = None elif form.select_gpu_count.data: gpu_count = form.select_gpu_count.data selected_gpus = None else: gpu_count = 1 selected_gpus = None else: if form.select_gpu.data == 'next': gpu_count = 1 selected_gpus = None else: selected_gpus = [str(form.select_gpu.data)] gpu_count = None # Set up data augmentation structure data_aug = {} data_aug['flip'] = form.aug_flip.data data_aug['quad_rot'] = form.aug_quad_rot.data data_aug['rot'] = form.aug_rot.data data_aug['scale'] = form.aug_scale.data data_aug['noise'] = form.aug_noise.data data_aug['contrast'] = form.aug_contrast.data data_aug['whitening'] = form.aug_whitening.data data_aug['hsv_use'] = form.aug_hsv_use.data data_aug['hsv_h'] = form.aug_hsv_h.data data_aug['hsv_s'] = form.aug_hsv_s.data data_aug['hsv_v'] = form.aug_hsv_v.data # Python Layer File may be on the server or copied from the client. fs.copy_python_layer_file( bool(form.python_layer_from_client.data), job.dir(), (flask.request.files[form.python_layer_client_file.name] if form.python_layer_client_file.name in flask.request.files else ''), form.python_layer_server_file.data) job.tasks.append(fw.create_train_task( job=job, dataset=datasetJob, train_epochs=form.train_epochs.data, snapshot_interval=form.snapshot_interval.data, learning_rate=form.learning_rate.data[0], lr_policy=policy, gpu_count=gpu_count, selected_gpus=selected_gpus, batch_size=form.batch_size.data[0], batch_accumulation=form.batch_accumulation.data, val_interval=form.val_interval.data, traces_interval=form.traces_interval.data, pretrained_model=pretrained_model, crop_size=form.crop_size.data, use_mean=form.use_mean.data, network=network, random_seed=form.random_seed.data, solver_type=form.solver_type.data, rms_decay=form.rms_decay.data, shuffle=form.shuffle.data, data_aug=data_aug, ) ) # Save form data with the job so we can easily clone it later. save_form_to_job(job, form) jobs.append(job) scheduler.add_job(job) if n_jobs == 1: if request_wants_json(): return flask.jsonify(job.json_dict()) else: return flask.redirect(flask.url_for('digits.model.views.show', job_id=job.id())) except: if job: scheduler.delete_job(job) raise if request_wants_json(): return flask.jsonify(jobs=[j.json_dict() for j in jobs]) # If there are multiple jobs launched, go to the home page. return flask.redirect('/')
def generic_image_model_create(): """ Create a new GenericImageModelJob Returns JSON when requested: {job_id,name,status} or {errors:[]} """ form = GenericImageModelForm() form.dataset.choices = get_datasets() form.standard_networks.choices = [] form.previous_networks.choices = get_previous_networks() prev_network_snapshots = get_previous_network_snapshots() ## Is there a request to clone a job with ?clone=<job_id> fill_form_if_cloned(form) if not form.validate_on_submit(): if request_wants_json(): return flask.jsonify({'errors': form.errors}), 400 else: return flask.render_template('models/images/generic/new.html', form = form, frameworks = frameworks.get_frameworks(), previous_network_snapshots = prev_network_snapshots, previous_networks_fullinfo = get_previous_networks_fulldetails(), multi_gpu = config_value('caffe_root')['multi_gpu'], ), 400 datasetJob = scheduler.get_job(form.dataset.data) if not datasetJob: raise werkzeug.exceptions.BadRequest( 'Unknown dataset job_id "%s"' % form.dataset.data) job = None try: job = GenericImageModelJob( name = form.model_name.data, dataset_id = datasetJob.id(), ) # get framework (hard-coded to caffe for now) fw = frameworks.get_framework_by_id(form.framework.data) pretrained_model = None #if form.method.data == 'standard': if form.method.data == 'previous': old_job = scheduler.get_job(form.previous_networks.data) if not old_job: raise werkzeug.exceptions.BadRequest( 'Job not found: %s' % form.previous_networks.data) network = fw.get_network_from_previous(old_job.train_task().network) for choice in form.previous_networks.choices: if choice[0] == form.previous_networks.data: epoch = float(flask.request.form['%s-snapshot' % form.previous_networks.data]) if epoch == 0: pass elif epoch == -1: pretrained_model = old_job.train_task().pretrained_model else: for filename, e in old_job.train_task().snapshots: if e == epoch: pretrained_model = filename break if pretrained_model is None: raise werkzeug.exceptions.BadRequest( "For the job %s, selected pretrained_model for epoch %d is invalid!" % (form.previous_networks.data, epoch)) if not (os.path.exists(pretrained_model)): raise werkzeug.exceptions.BadRequest( "Pretrained_model for the selected epoch doesn't exists. May be deleted by another user/process. Please restart the server to load the correct pretrained_model details") break elif form.method.data == 'custom': network = fw.get_network_from_desc(form.custom_network.data) pretrained_model = form.custom_network_snapshot.data.strip() else: raise werkzeug.exceptions.BadRequest( 'Unrecognized method: "%s"' % form.method.data) policy = {'policy': form.lr_policy.data} if form.lr_policy.data == 'fixed': pass elif form.lr_policy.data == 'step': policy['stepsize'] = form.lr_step_size.data policy['gamma'] = form.lr_step_gamma.data elif form.lr_policy.data == 'multistep': policy['stepvalue'] = form.lr_multistep_values.data policy['gamma'] = form.lr_multistep_gamma.data elif form.lr_policy.data == 'exp': policy['gamma'] = form.lr_exp_gamma.data elif form.lr_policy.data == 'inv': policy['gamma'] = form.lr_inv_gamma.data policy['power'] = form.lr_inv_power.data elif form.lr_policy.data == 'poly': policy['power'] = form.lr_poly_power.data elif form.lr_policy.data == 'sigmoid': policy['stepsize'] = form.lr_sigmoid_step.data policy['gamma'] = form.lr_sigmoid_gamma.data else: raise werkzeug.exceptions.BadRequest( 'Invalid learning rate policy') if config_value('caffe_root')['multi_gpu']: if form.select_gpu_count.data: gpu_count = form.select_gpu_count.data selected_gpus = None else: selected_gpus = [str(gpu) for gpu in form.select_gpus.data] gpu_count = None else: if form.select_gpu.data == 'next': gpu_count = 1 selected_gpus = None else: selected_gpus = [str(form.select_gpu.data)] gpu_count = None # Python Layer File may be on the server or copied from the client. fs.copy_python_layer_file( bool(form.python_layer_from_client.data), job.dir(), (flask.request.files[form.python_layer_client_file.name] if form.python_layer_client_file.name in flask.request.files else ''), form.python_layer_server_file.data) job.tasks.append(fw.create_train_task( job_dir = job.dir(), dataset = datasetJob, train_epochs = form.train_epochs.data, snapshot_interval = form.snapshot_interval.data, learning_rate = form.learning_rate.data, lr_policy = policy, gpu_count = gpu_count, selected_gpus = selected_gpus, batch_size = form.batch_size.data, val_interval = form.val_interval.data, pretrained_model= pretrained_model, crop_size = form.crop_size.data, use_mean = form.use_mean.data, network = network, random_seed = form.random_seed.data, solver_type = form.solver_type.data, shuffle = form.shuffle.data, ) ) ## Save form data with the job so we can easily clone it later. save_form_to_job(job, form) scheduler.add_job(job) if request_wants_json(): return flask.jsonify(job.json_dict()) else: return flask.redirect(flask.url_for('models_show', job_id=job.id())) except: if job: scheduler.delete_job(job) raise
def create(extension_id=None): """ Create a new GenericImageModelJob Returns JSON when requested: {job_id,name,status} or {errors:[]} """ form = GenericImageModelForm() form.dataset.choices = get_datasets(extension_id) form.standard_networks.choices = [] form.previous_networks.choices = get_previous_networks() prev_network_snapshots = get_previous_network_snapshots() ## Is there a request to clone a job with ?clone=<job_id> fill_form_if_cloned(form) if not form.validate_on_submit(): if request_wants_json(): return flask.jsonify({'errors': form.errors}), 400 else: return flask.render_template( 'models/images/generic/new.html', extension_id=extension_id, extension_title=extensions.data.get_extension(extension_id).get_title() if extension_id else None, form= form, frameworks=frameworks.get_frameworks(), previous_network_snapshots=prev_network_snapshots, previous_networks_fullinfo=get_previous_networks_fulldetails(), multi_gpu=config_value('caffe_root')['multi_gpu'], ), 400 datasetJob = scheduler.get_job(form.dataset.data) if not datasetJob: raise werkzeug.exceptions.BadRequest( 'Unknown dataset job_id "%s"' % form.dataset.data) # sweeps will be a list of the the permutations of swept fields # Get swept learning_rate sweeps = [{'learning_rate': v} for v in form.learning_rate.data] add_learning_rate = len(form.learning_rate.data) > 1 # Add swept batch_size sweeps = [dict(s.items() + [('batch_size', bs)]) for bs in form.batch_size.data for s in sweeps[:]] add_batch_size = len(form.batch_size.data) > 1 n_jobs = len(sweeps) jobs = [] for sweep in sweeps: # Populate the form with swept data to be used in saving and # launching jobs. form.learning_rate.data = sweep['learning_rate'] form.batch_size.data = sweep['batch_size'] # Augment Job Name extra = '' if add_learning_rate: extra += ' learning_rate:%s' % str(form.learning_rate.data[0]) if add_batch_size: extra += ' batch_size:%d' % form.batch_size.data[0] job = None try: job = GenericImageModelJob( username = utils.auth.get_username(), name = form.model_name.data + extra, dataset_id = datasetJob.id(), ) # get framework (hard-coded to caffe for now) fw = frameworks.get_framework_by_id(form.framework.data) pretrained_model = None #if form.method.data == 'standard': if form.method.data == 'previous': old_job = scheduler.get_job(form.previous_networks.data) if not old_job: raise werkzeug.exceptions.BadRequest( 'Job not found: %s' % form.previous_networks.data) use_same_dataset = (old_job.dataset_id == job.dataset_id) network = fw.get_network_from_previous(old_job.train_task().network, use_same_dataset) for choice in form.previous_networks.choices: if choice[0] == form.previous_networks.data: epoch = float(flask.request.form['%s-snapshot' % form.previous_networks.data]) if epoch == 0: pass elif epoch == -1: pretrained_model = old_job.train_task().pretrained_model else: for filename, e in old_job.train_task().snapshots: if e == epoch: pretrained_model = filename break if pretrained_model is None: raise werkzeug.exceptions.BadRequest( "For the job %s, selected pretrained_model for epoch %d is invalid!" % (form.previous_networks.data, epoch)) if not (os.path.exists(pretrained_model)): raise werkzeug.exceptions.BadRequest( "Pretrained_model for the selected epoch doesn't exists. May be deleted by another user/process. Please restart the server to load the correct pretrained_model details") break elif form.method.data == 'custom': network = fw.get_network_from_desc(form.custom_network.data) pretrained_model = form.custom_network_snapshot.data.strip() else: raise werkzeug.exceptions.BadRequest( 'Unrecognized method: "%s"' % form.method.data) policy = {'policy': form.lr_policy.data} if form.lr_policy.data == 'fixed': pass elif form.lr_policy.data == 'step': policy['stepsize'] = form.lr_step_size.data policy['gamma'] = form.lr_step_gamma.data elif form.lr_policy.data == 'multistep': policy['stepvalue'] = form.lr_multistep_values.data policy['gamma'] = form.lr_multistep_gamma.data elif form.lr_policy.data == 'exp': policy['gamma'] = form.lr_exp_gamma.data elif form.lr_policy.data == 'inv': policy['gamma'] = form.lr_inv_gamma.data policy['power'] = form.lr_inv_power.data elif form.lr_policy.data == 'poly': policy['power'] = form.lr_poly_power.data elif form.lr_policy.data == 'sigmoid': policy['stepsize'] = form.lr_sigmoid_step.data policy['gamma'] = form.lr_sigmoid_gamma.data else: raise werkzeug.exceptions.BadRequest( 'Invalid learning rate policy') if config_value('caffe_root')['multi_gpu']: if form.select_gpu_count.data: gpu_count = form.select_gpu_count.data selected_gpus = None else: selected_gpus = [str(gpu) for gpu in form.select_gpus.data] gpu_count = None else: if form.select_gpu.data == 'next': gpu_count = 1 selected_gpus = None else: selected_gpus = [str(form.select_gpu.data)] gpu_count = None # Python Layer File may be on the server or copied from the client. fs.copy_python_layer_file( bool(form.python_layer_from_client.data), job.dir(), (flask.request.files[form.python_layer_client_file.name] if form.python_layer_client_file.name in flask.request.files else ''), form.python_layer_server_file.data) job.tasks.append(fw.create_train_task( job = job, dataset = datasetJob, train_epochs = form.train_epochs.data, snapshot_interval = form.snapshot_interval.data, learning_rate = form.learning_rate.data[0], lr_policy = policy, gpu_count = gpu_count, selected_gpus = selected_gpus, batch_size = form.batch_size.data[0], batch_accumulation = form.batch_accumulation.data, val_interval = form.val_interval.data, pretrained_model = pretrained_model, crop_size = form.crop_size.data, use_mean = form.use_mean.data, network = network, random_seed = form.random_seed.data, solver_type = form.solver_type.data, shuffle = form.shuffle.data, ) ) ## Save form data with the job so we can easily clone it later. save_form_to_job(job, form) jobs.append(job) scheduler.add_job(job) if n_jobs == 1: if request_wants_json(): return flask.jsonify(job.json_dict()) else: return flask.redirect(flask.url_for('digits.model.views.show', job_id=job.id())) except: if job: scheduler.delete_job(job) raise if request_wants_json(): return flask.jsonify(jobs=[job.json_dict() for job in jobs]) # If there are multiple jobs launched, go to the home page. return flask.redirect('/')
class ModelForm(Form): ### Methods def selection_exists_in_choices(form, field): found = False for choice in field.choices: if choice[0] == field.data: found = True if not found: raise validators.ValidationError( "Selected job doesn't exist. Maybe it was deleted by another user." ) def validate_NetParameter(form, field): fw = frameworks.get_framework_by_id(form['framework'].data) try: # below function raises a BadNetworkException in case of validation error fw.validate_network(field.data) except frameworks.errors.BadNetworkError as e: raise validators.ValidationError('Bad network: %s' % e.message) def validate_file_exists(form, field): from_client = bool(form.python_layer_from_client.data) filename = '' if not from_client and field.type == 'StringField': filename = field.data if filename == '': return if not os.path.isfile(filename): raise validators.ValidationError( 'Server side file, %s, does not exist.' % filename) def validate_py_ext(form, field): from_client = bool(form.python_layer_from_client.data) filename = '' if from_client and field.type == 'FileField': filename = flask.request.files[field.name].filename elif not from_client and field.type == 'StringField': filename = field.data if filename == '': return (root, ext) = os.path.splitext(filename) if ext != '.py' and ext != '.pyc': raise validators.ValidationError( 'Python file, %s, needs .py or .pyc extension.' % filename) ### Fields # The options for this get set in the view (since they are dynamic) dataset = utils.forms.SelectField( 'Select Dataset', choices=[], tooltip="Choose the dataset to use for this model.") python_layer_from_client = utils.forms.BooleanField( u'Use client-side file', default=False) python_layer_client_file = utils.forms.FileField( u'Client-side file', validators=[validate_py_ext], tooltip= "Choose a Python file on the client containing layer definitions.") python_layer_server_file = utils.forms.StringField( u'Server-side file', validators=[validate_file_exists, validate_py_ext], tooltip= "Choose a Python file on the server containing layer definitions.") train_epochs = utils.forms.IntegerField( 'Training epochs', validators=[validators.NumberRange(min=1)], default=30, tooltip="How many passes through the training data?") snapshot_interval = utils.forms.FloatField( 'Snapshot interval (in epochs)', default=1, validators=[ validators.NumberRange(min=0), ], tooltip="How many epochs of training between taking a snapshot?") val_interval = utils.forms.FloatField( 'Validation interval (in epochs)', default=1, validators=[validators.NumberRange(min=0)], tooltip= "How many epochs of training between running through one pass of the validation data?" ) random_seed = utils.forms.IntegerField( 'Random seed', validators=[ validators.NumberRange(min=0), validators.Optional(), ], tooltip= "If you provide a random seed, then back-to-back runs with the same model and dataset should give identical results." ) batch_size = utils.forms.IntegerField( 'Batch size', validators=[ validators.NumberRange(min=1), validators.Optional(), ], tooltip= "How many images to process at once. If blank, values are used from the network definition." ) ### Solver types solver_type = utils.forms.SelectField( 'Solver type', choices=[ ('SGD', 'Stochastic gradient descent (SGD)'), ('NESTEROV', "Nesterov's accelerated gradient (NAG)"), ('ADAGRAD', 'Adaptive gradient (AdaGrad)'), ('RMSPROP', 'RMSprop'), ('ADADELTA', 'AdaDelta'), ('ADAM', 'Adam'), ], default='SGD', tooltip="What type of solver will be used?", ) def validate_solver_type(form, field): fw = frameworks.get_framework_by_id(form.framework) if fw is not None: if not fw.supports_solver_type(field.data): raise validators.ValidationError( 'Solver type not supported by this framework') ### Learning rate learning_rate = utils.forms.FloatField( 'Base Learning Rate', default=0.01, validators=[ validators.NumberRange(min=0), ], tooltip= "Affects how quickly the network learns. If you are getting NaN for your loss, you probably need to lower this value." ) lr_policy = wtforms.SelectField('Policy', choices=[ ('fixed', 'Fixed'), ('step', 'Step Down'), ('multistep', 'Step Down (arbitrary steps)'), ('exp', 'Exponential Decay'), ('inv', 'Inverse Decay'), ('poly', 'Polynomial Decay'), ('sigmoid', 'Sigmoid Decay'), ], default='step') lr_step_size = wtforms.FloatField('Step Size', default=33) lr_step_gamma = wtforms.FloatField('Gamma', default=0.1) lr_multistep_values = wtforms.StringField('Step Values', default="50,85") def validate_lr_multistep_values(form, field): if form.lr_policy.data == 'multistep': for value in field.data.split(','): try: float(value) except ValueError: raise validators.ValidationError('invalid value') lr_multistep_gamma = wtforms.FloatField('Gamma', default=0.5) lr_exp_gamma = wtforms.FloatField('Gamma', default=0.95) lr_inv_gamma = wtforms.FloatField('Gamma', default=0.1) lr_inv_power = wtforms.FloatField('Power', default=0.5) lr_poly_power = wtforms.FloatField('Power', default=3) lr_sigmoid_step = wtforms.FloatField('Step', default=50) lr_sigmoid_gamma = wtforms.FloatField('Gamma', default=0.1) ### Network # Use a SelectField instead of a HiddenField so that the default value # is used when nothing is provided (through the REST API) method = wtforms.SelectField( u'Network type', choices=[ ('standard', 'Standard network'), ('previous', 'Previous network'), ('custom', 'Custom network'), ], default='standard', ) ## framework - hidden field, set by Javascript to the selected framework ID framework = wtforms.HiddenField( 'framework', validators=[ validators.AnyOf( [fw.get_id() for fw in frameworks.get_frameworks()], message='The framework you choose is not currently supported.') ], default=frameworks.get_frameworks()[0].get_id()) # The options for this get set in the view (since they are dependent on the data type) standard_networks = wtforms.RadioField( 'Standard Networks', validators=[ validate_required_iff(method='standard'), ], ) previous_networks = wtforms.RadioField( 'Previous Networks', choices=[], validators=[ validate_required_iff(method='previous'), selection_exists_in_choices, ], ) custom_network = utils.forms.TextAreaField( 'Custom Network', validators=[ validate_required_iff(method='custom'), validate_NetParameter, ], ) custom_network_snapshot = utils.forms.TextField( 'Pretrained model(s)', tooltip= "Colon delimited paths to pretrained model files. Only edit this field if you understand how fine-tuning works in caffe or torch." ) def validate_custom_network_snapshot(form, field): if form.method.data == 'custom': snapshot = field.data.strip() if snapshot: if not os.path.exists(snapshot): raise validators.ValidationError('File does not exist') # Select one of several GPUs select_gpu = wtforms.RadioField( 'Select which GPU you would like to use', choices=[('next', 'Next available')] + [( index, '#%s - %s (%s memory)' % (index, get_device(index).name, sizeof_fmt( get_nvml_info(index)['memory']['total'] if get_nvml_info(index) and 'memory' in get_nvml_info(index) else get_device(index).totalGlobalMem)), ) for index in config_value('gpu_list').split(',') if index], default='next', ) # Select N of several GPUs select_gpus = utils.forms.SelectMultipleField( 'Select which GPU[s] you would like to use', choices=[( index, '#%s - %s (%s memory)' % (index, get_device(index).name, sizeof_fmt( get_nvml_info(index)['memory']['total'] if get_nvml_info(index) and 'memory' in get_nvml_info(index) else get_device(index).totalGlobalMem)), ) for index in config_value('gpu_list').split(',') if index], tooltip= "The job won't start until all of the chosen GPUs are available.") # XXX For testing # The Flask test framework can't handle SelectMultipleFields correctly select_gpus_list = wtforms.StringField( 'Select which GPU[s] you would like to use (comma separated)') def validate_select_gpus(form, field): if form.select_gpus_list.data: field.data = form.select_gpus_list.data.split(',') # Use next available N GPUs select_gpu_count = wtforms.IntegerField( 'Use this many GPUs (next available)', validators=[ validators.NumberRange(min=1, max=len( config_value('gpu_list').split(','))) ], default=1, ) def validate_select_gpu_count(form, field): if field.data is None: if form.select_gpus.data: # Make this field optional field.errors[:] = [] raise validators.StopValidation() model_name = utils.forms.StringField( 'Model Name', validators=[validators.DataRequired()], tooltip= "An identifier, later used to refer to this model in the Application.") # allows shuffling data during training (for frameworks that support this, as indicated by # their Framework.can_shuffle_data() method) shuffle = utils.forms.BooleanField( 'Shuffle Train Data', default=True, tooltip='For every epoch, shuffle the data before training.')
def create(): """ Create a new ImageClassificationModelJob Returns JSON when requested: {job_id,name,status} or {errors:[]} """ form = ImageClassificationModelForm() form.dataset.choices = get_datasets() form.standard_networks.choices = get_standard_networks() form.standard_networks.default = get_default_standard_network() form.previous_networks.choices = get_previous_networks() prev_network_snapshots = get_previous_network_snapshots() ## Is there a request to clone a job with ?clone=<job_id> fill_form_if_cloned(form) if not form.validate_on_submit(): if request_wants_json(): return flask.jsonify({'errors': form.errors}), 400 else: return flask.render_template('models/images/classification/new.html', form = form, frameworks = frameworks.get_frameworks(), previous_network_snapshots = prev_network_snapshots, previous_networks_fullinfo = get_previous_networks_fulldetails(), multi_gpu = config_value('caffe_root')['multi_gpu'], ), 400 datasetJob = scheduler.get_job(form.dataset.data) if not datasetJob: raise werkzeug.exceptions.BadRequest( 'Unknown dataset job_id "%s"' % form.dataset.data) job = None try: job = ImageClassificationModelJob( username = utils.auth.get_username(), name = form.model_name.data, dataset_id = datasetJob.id(), ) # get handle to framework object fw = frameworks.get_framework_by_id(form.framework.data) pretrained_model = None if form.method.data == 'standard': found = False # can we find it in standard networks? network_desc = fw.get_standard_network_desc(form.standard_networks.data) if network_desc: found = True network = fw.get_network_from_desc(network_desc) if not found: raise werkzeug.exceptions.BadRequest( 'Unknown standard model "%s"' % form.standard_networks.data) elif form.method.data == 'previous': old_job = scheduler.get_job(form.previous_networks.data) if not old_job: raise werkzeug.exceptions.BadRequest( 'Job not found: %s' % form.previous_networks.data) use_same_dataset = (old_job.dataset_id == job.dataset_id) network = fw.get_network_from_previous(old_job.train_task().network, use_same_dataset) for choice in form.previous_networks.choices: if choice[0] == form.previous_networks.data: epoch = float(flask.request.form['%s-snapshot' % form.previous_networks.data]) if epoch == 0: pass elif epoch == -1: pretrained_model = old_job.train_task().pretrained_model else: for filename, e in old_job.train_task().snapshots: if e == epoch: pretrained_model = filename break if pretrained_model is None: raise werkzeug.exceptions.BadRequest( "For the job %s, selected pretrained_model for epoch %d is invalid!" % (form.previous_networks.data, epoch)) if not (os.path.exists(pretrained_model)): raise werkzeug.exceptions.BadRequest( "Pretrained_model for the selected epoch doesn't exists. May be deleted by another user/process. Please restart the server to load the correct pretrained_model details") break elif form.method.data == 'custom': network = fw.get_network_from_desc(form.custom_network.data) pretrained_model = form.custom_network_snapshot.data.strip() else: raise werkzeug.exceptions.BadRequest( 'Unrecognized method: "%s"' % form.method.data) policy = {'policy': form.lr_policy.data} if form.lr_policy.data == 'fixed': pass elif form.lr_policy.data == 'step': policy['stepsize'] = form.lr_step_size.data policy['gamma'] = form.lr_step_gamma.data elif form.lr_policy.data == 'multistep': policy['stepvalue'] = form.lr_multistep_values.data policy['gamma'] = form.lr_multistep_gamma.data elif form.lr_policy.data == 'exp': policy['gamma'] = form.lr_exp_gamma.data elif form.lr_policy.data == 'inv': policy['gamma'] = form.lr_inv_gamma.data policy['power'] = form.lr_inv_power.data elif form.lr_policy.data == 'poly': policy['power'] = form.lr_poly_power.data elif form.lr_policy.data == 'sigmoid': policy['stepsize'] = form.lr_sigmoid_step.data policy['gamma'] = form.lr_sigmoid_gamma.data else: raise werkzeug.exceptions.BadRequest( 'Invalid learning rate policy') if config_value('caffe_root')['multi_gpu']: if form.select_gpus.data: selected_gpus = [str(gpu) for gpu in form.select_gpus.data] gpu_count = None elif form.select_gpu_count.data: gpu_count = form.select_gpu_count.data selected_gpus = None else: gpu_count = 1 selected_gpus = None else: if form.select_gpu.data == 'next': gpu_count = 1 selected_gpus = None else: selected_gpus = [str(form.select_gpu.data)] gpu_count = None # Python Layer File may be on the server or copied from the client. fs.copy_python_layer_file( bool(form.python_layer_from_client.data), job.dir(), (flask.request.files[form.python_layer_client_file.name] if form.python_layer_client_file.name in flask.request.files else ''), form.python_layer_server_file.data) job.tasks.append(fw.create_train_task( job_dir = job.dir(), dataset = datasetJob, train_epochs = form.train_epochs.data, snapshot_interval = form.snapshot_interval.data, learning_rate = form.learning_rate.data, lr_policy = policy, gpu_count = gpu_count, selected_gpus = selected_gpus, batch_size = form.batch_size.data, val_interval = form.val_interval.data, pretrained_model= pretrained_model, crop_size = form.crop_size.data, use_mean = form.use_mean.data, network = network, random_seed = form.random_seed.data, solver_type = form.solver_type.data, shuffle = form.shuffle.data, ) ) ## Save form data with the job so we can easily clone it later. save_form_to_job(job, form) scheduler.add_job(job) if request_wants_json(): return flask.jsonify(job.json_dict()) else: return flask.redirect(flask.url_for('digits.model.views.show', job_id=job.id())) except: if job: scheduler.delete_job(job) raise
def show(job): """ Called from digits.model.views.models_show() """ return flask.render_template('models/images/classification/show.html', job=job, framework_ids = [fw.get_id() for fw in frameworks.get_frameworks()])
def create(): """ Create a new ImageClassificationModelJob Returns JSON when requested: {job_id,name,status} or {errors:[]} """ form = ImageClassificationModelForm() form.dataset.choices = get_datasets() form.standard_networks.choices = get_standard_networks() form.standard_networks.default = get_default_standard_network() form.previous_networks.choices = get_previous_networks() form.pretrained_networks.choices = get_pretrained_networks() prev_network_snapshots = get_previous_network_snapshots() # Is there a request to clone a job with ?clone=<job_id> fill_form_if_cloned(form) if not form.validate_on_submit(): if request_wants_json(): return flask.jsonify({'errors': form.errors}), 400 else: return flask.render_template('models/images/classification/new.html', form=form, frameworks=frameworks.get_frameworks(), previous_network_snapshots=prev_network_snapshots, previous_networks_fullinfo=get_previous_networks_fulldetails(), pretrained_networks_fullinfo=get_pretrained_networks_fulldetails(), multi_gpu=config_value('caffe')['multi_gpu'], ), 400 datasetJob = scheduler.get_job(form.dataset.data) if not datasetJob: raise werkzeug.exceptions.BadRequest( 'Unknown dataset job_id "%s"' % form.dataset.data) # sweeps will be a list of the the permutations of swept fields # Get swept learning_rate sweeps = [{'learning_rate': v} for v in form.learning_rate.data] add_learning_rate = len(form.learning_rate.data) > 1 # Add swept batch_size sweeps = [{**s, **{'batch_size': bs}} for bs in form.batch_size.data for s in sweeps[:]] add_batch_size = len(form.batch_size.data) > 1 n_jobs = len(sweeps) jobs = [] for sweep in sweeps: # Populate the form with swept data to be used in saving and # launching jobs. form.learning_rate.data = sweep['learning_rate'] form.batch_size.data = sweep['batch_size'] # Augment Job Name extra = '' if add_learning_rate: extra += ' learning_rate:%s' % str(form.learning_rate.data[0]) if add_batch_size: extra += ' batch_size:%d' % form.batch_size.data[0] job = None try: job = ImageClassificationModelJob( username=utils.auth.get_username(), name=form.model_name.data + extra, group=form.group_name.data, dataset_id=datasetJob.id(), ) # get handle to framework object fw = frameworks.get_framework_by_id(form.framework.data) pretrained_model = None if form.method.data == 'standard': found = False # can we find it in standard networks? network_desc = fw.get_standard_network_desc(form.standard_networks.data) if network_desc: found = True network = fw.get_network_from_desc(network_desc) if not found: raise werkzeug.exceptions.BadRequest( 'Unknown standard model "%s"' % form.standard_networks.data) elif form.method.data == 'previous': old_job = scheduler.get_job(form.previous_networks.data) if not old_job: raise werkzeug.exceptions.BadRequest( 'Job not found: %s' % form.previous_networks.data) use_same_dataset = (old_job.dataset_id == job.dataset_id) network = fw.get_network_from_previous(old_job.train_task().network, use_same_dataset) for choice in form.previous_networks.choices: if choice[0] == form.previous_networks.data: epoch = float(flask.request.form['%s-snapshot' % form.previous_networks.data]) if epoch == 0: pass elif epoch == -1: pretrained_model = old_job.train_task().pretrained_model else: # verify snapshot exists pretrained_model = old_job.train_task().get_snapshot(epoch, download=True) if pretrained_model is None: raise werkzeug.exceptions.BadRequest( "For the job %s, selected pretrained_model for epoch %d is invalid!" % (form.previous_networks.data, epoch)) # the first is the actual file if a list is returned, other should be meta data if isinstance(pretrained_model, list): pretrained_model = pretrained_model[0] if not (os.path.exists(pretrained_model)): raise werkzeug.exceptions.BadRequest( "Pretrained_model for the selected epoch doesn't exist. " "May be deleted by another user/process. " "Please restart the server to load the correct pretrained_model details.") # get logical path pretrained_model = old_job.train_task().get_snapshot(epoch) break elif form.method.data == 'pretrained': pretrained_job = scheduler.get_job(form.pretrained_networks.data) model_def_path = pretrained_job.get_model_def_path() weights_path = pretrained_job.get_weights_path() network = fw.get_network_from_path(model_def_path) pretrained_model = weights_path elif form.method.data == 'custom': network = fw.get_network_from_desc(form.custom_network.data) pretrained_model = form.custom_network_snapshot.data.strip() else: raise werkzeug.exceptions.BadRequest( 'Unrecognized method: "%s"' % form.method.data) policy = {'policy': form.lr_policy.data} if form.lr_policy.data == 'fixed': pass elif form.lr_policy.data == 'step': policy['stepsize'] = form.lr_step_size.data policy['gamma'] = form.lr_step_gamma.data elif form.lr_policy.data == 'multistep': policy['stepvalue'] = form.lr_multistep_values.data policy['gamma'] = form.lr_multistep_gamma.data elif form.lr_policy.data == 'exp': policy['gamma'] = form.lr_exp_gamma.data elif form.lr_policy.data == 'inv': policy['gamma'] = form.lr_inv_gamma.data policy['power'] = form.lr_inv_power.data elif form.lr_policy.data == 'poly': policy['power'] = form.lr_poly_power.data elif form.lr_policy.data == 'sigmoid': policy['stepsize'] = form.lr_sigmoid_step.data policy['gamma'] = form.lr_sigmoid_gamma.data else: raise werkzeug.exceptions.BadRequest( 'Invalid learning rate policy') if config_value('caffe')['multi_gpu']: if form.select_gpus.data: selected_gpus = [str(gpu) for gpu in form.select_gpus.data] gpu_count = None elif form.select_gpu_count.data: gpu_count = form.select_gpu_count.data selected_gpus = None else: gpu_count = 1 selected_gpus = None else: if form.select_gpu.data == 'next': gpu_count = 1 selected_gpus = None else: selected_gpus = [str(form.select_gpu.data)] gpu_count = None # Set up data augmentation structure data_aug = {} data_aug['flip'] = form.aug_flip.data data_aug['quad_rot'] = form.aug_quad_rot.data data_aug['rot'] = form.aug_rot.data data_aug['scale'] = form.aug_scale.data data_aug['noise'] = form.aug_noise.data data_aug['contrast'] = form.aug_contrast.data data_aug['whitening'] = form.aug_whitening.data data_aug['hsv_use'] = form.aug_hsv_use.data data_aug['hsv_h'] = form.aug_hsv_h.data data_aug['hsv_s'] = form.aug_hsv_s.data data_aug['hsv_v'] = form.aug_hsv_v.data # Python Layer File may be on the server or copied from the client. fs.copy_python_layer_file( bool(form.python_layer_from_client.data), job.dir(), (flask.request.files[form.python_layer_client_file.name] if form.python_layer_client_file.name in flask.request.files else ''), form.python_layer_server_file.data) job.tasks.append(fw.create_train_task( job=job, dataset=datasetJob, train_epochs=form.train_epochs.data, snapshot_interval=form.snapshot_interval.data, learning_rate=form.learning_rate.data[0], lr_policy=policy, gpu_count=gpu_count, selected_gpus=selected_gpus, batch_size=form.batch_size.data[0], batch_accumulation=form.batch_accumulation.data, val_interval=form.val_interval.data, traces_interval=form.traces_interval.data, pretrained_model=pretrained_model, crop_size=form.crop_size.data, use_mean=form.use_mean.data, network=network, random_seed=form.random_seed.data, solver_type=form.solver_type.data, rms_decay=form.rms_decay.data, shuffle=form.shuffle.data, data_aug=data_aug, blob_format=form.nvcaffe_blob_format.data, ) ) # Save form data with the job so we can easily clone it later. save_form_to_job(job, form) jobs.append(job) scheduler.add_job(job) if n_jobs == 1: if request_wants_json(): return flask.jsonify(job.json_dict()) else: return flask.redirect(flask.url_for('digits.model.views.show', job_id=job.id())) except: if job: scheduler.delete_job(job) raise if request_wants_json(): return flask.jsonify(jobs=[j.json_dict() for j in jobs]) # If there are multiple jobs launched, go to the home page. return flask.redirect('/')
def create(): """ Create a new GenericImageModelJob Returns JSON when requested: {job_id,name,status} or {errors:[]} """ form = GenericImageModelForm() form.dataset.choices = get_datasets() form.standard_networks.choices = [] form.previous_networks.choices = get_previous_networks() prev_network_snapshots = get_previous_network_snapshots() ## Is there a request to clone a job with ?clone=<job_id> fill_form_if_cloned(form) if not form.validate_on_submit(): if request_wants_json(): return flask.jsonify({'errors': form.errors}), 400 else: return flask.render_template('models/images/generic/new.html', form = form, frameworks = frameworks.get_frameworks(), previous_network_snapshots = prev_network_snapshots, previous_networks_fullinfo = get_previous_networks_fulldetails(), multi_gpu = config_value('caffe_root')['multi_gpu'], ), 400 datasetJob = scheduler.get_job(form.dataset.data) if not datasetJob: raise werkzeug.exceptions.BadRequest( 'Unknown dataset job_id "%s"' % form.dataset.data) # sweeps will be a list of the the permutations of swept fields # Get swept learning_rate sweeps = [{'learning_rate': v} for v in form.learning_rate.data] add_learning_rate = len(form.learning_rate.data) > 1 # Add swept batch_size sweeps = [dict(s.items() + [('batch_size', bs)]) for bs in form.batch_size.data for s in sweeps[:]] add_batch_size = len(form.batch_size.data) > 1 n_jobs = len(sweeps) jobs = [] for sweep in sweeps: # Populate the form with swept data to be used in saving and # launching jobs. form.learning_rate.data = sweep['learning_rate'] form.batch_size.data = sweep['batch_size'] # Augment Job Name extra = '' if add_learning_rate: extra += ' learning_rate:%s' % str(form.learning_rate.data[0]) if add_batch_size: extra += ' batch_size:%d' % form.batch_size.data[0] job = None try: job = GenericImageModelJob( username = utils.auth.get_username(), name = form.model_name.data + extra, dataset_id = datasetJob.id(), ) # get framework (hard-coded to caffe for now) fw = frameworks.get_framework_by_id(form.framework.data) pretrained_model = None #if form.method.data == 'standard': if form.method.data == 'previous': old_job = scheduler.get_job(form.previous_networks.data) if not old_job: raise werkzeug.exceptions.BadRequest( 'Job not found: %s' % form.previous_networks.data) use_same_dataset = (old_job.dataset_id == job.dataset_id) network = fw.get_network_from_previous(old_job.train_task().network, use_same_dataset) for choice in form.previous_networks.choices: if choice[0] == form.previous_networks.data: epoch = float(flask.request.form['%s-snapshot' % form.previous_networks.data]) if epoch == 0: pass elif epoch == -1: pretrained_model = old_job.train_task().pretrained_model else: for filename, e in old_job.train_task().snapshots: if e == epoch: pretrained_model = filename break if pretrained_model is None: raise werkzeug.exceptions.BadRequest( "For the job %s, selected pretrained_model for epoch %d is invalid!" % (form.previous_networks.data, epoch)) if not (os.path.exists(pretrained_model)): raise werkzeug.exceptions.BadRequest( "Pretrained_model for the selected epoch doesn't exists. May be deleted by another user/process. Please restart the server to load the correct pretrained_model details") break elif form.method.data == 'custom': network = fw.get_network_from_desc(form.custom_network.data) pretrained_model = form.custom_network_snapshot.data.strip() else: raise werkzeug.exceptions.BadRequest( 'Unrecognized method: "%s"' % form.method.data) policy = {'policy': form.lr_policy.data} if form.lr_policy.data == 'fixed': pass elif form.lr_policy.data == 'step': policy['stepsize'] = form.lr_step_size.data policy['gamma'] = form.lr_step_gamma.data elif form.lr_policy.data == 'multistep': policy['stepvalue'] = form.lr_multistep_values.data policy['gamma'] = form.lr_multistep_gamma.data elif form.lr_policy.data == 'exp': policy['gamma'] = form.lr_exp_gamma.data elif form.lr_policy.data == 'inv': policy['gamma'] = form.lr_inv_gamma.data policy['power'] = form.lr_inv_power.data elif form.lr_policy.data == 'poly': policy['power'] = form.lr_poly_power.data elif form.lr_policy.data == 'sigmoid': policy['stepsize'] = form.lr_sigmoid_step.data policy['gamma'] = form.lr_sigmoid_gamma.data else: raise werkzeug.exceptions.BadRequest( 'Invalid learning rate policy') if config_value('caffe_root')['multi_gpu']: if form.select_gpu_count.data: gpu_count = form.select_gpu_count.data selected_gpus = None else: selected_gpus = [str(gpu) for gpu in form.select_gpus.data] gpu_count = None else: if form.select_gpu.data == 'next': gpu_count = 1 selected_gpus = None else: selected_gpus = [str(form.select_gpu.data)] gpu_count = None # Python Layer File may be on the server or copied from the client. fs.copy_python_layer_file( bool(form.python_layer_from_client.data), job.dir(), (flask.request.files[form.python_layer_client_file.name] if form.python_layer_client_file.name in flask.request.files else ''), form.python_layer_server_file.data) job.tasks.append(fw.create_train_task( job = job, dataset = datasetJob, train_epochs = form.train_epochs.data, snapshot_interval = form.snapshot_interval.data, learning_rate = form.learning_rate.data[0], lr_policy = policy, gpu_count = gpu_count, selected_gpus = selected_gpus, batch_size = form.batch_size.data[0], batch_accumulation = form.batch_accumulation.data, val_interval = form.val_interval.data, pretrained_model = pretrained_model, crop_size = form.crop_size.data, use_mean = form.use_mean.data, network = network, random_seed = form.random_seed.data, solver_type = form.solver_type.data, shuffle = form.shuffle.data, ) ) ## Save form data with the job so we can easily clone it later. save_form_to_job(job, form) jobs.append(job) scheduler.add_job(job) if n_jobs == 1: if request_wants_json(): return flask.jsonify(job.json_dict()) else: return flask.redirect(flask.url_for('digits.model.views.show', job_id=job.id())) except: if job: scheduler.delete_job(job) raise if request_wants_json(): return flask.jsonify(jobs=[job.json_dict() for job in jobs]) # If there are multiple jobs launched, go to the home page. return flask.redirect('/')
def image_classification_model_create(): """ Create a new ImageClassificationModelJob Returns JSON when requested: {job_id,name,status} or {errors:[]} """ form = ImageClassificationModelForm() form.dataset.choices = get_datasets() form.standard_networks.choices = get_standard_networks() form.standard_networks.default = get_default_standard_network() form.previous_networks.choices = get_previous_networks() prev_network_snapshots = get_previous_network_snapshots() if not form.validate_on_submit(): if request_wants_json(): return flask.jsonify({"errors": form.errors}), 400 else: return ( flask.render_template( "models/images/classification/new.html", form=form, frameworks=frameworks.get_frameworks(), previous_network_snapshots=prev_network_snapshots, previous_networks_fullinfo=get_previous_networks_fulldetails(), multi_gpu=config_value("caffe_root")["multi_gpu"], ), 400, ) datasetJob = scheduler.get_job(form.dataset.data) if not datasetJob: raise werkzeug.exceptions.BadRequest('Unknown dataset job_id "%s"' % form.dataset.data) job = None try: job = ImageClassificationModelJob(name=form.model_name.data, dataset_id=datasetJob.id()) # get handle to framework object fw = frameworks.get_framework_by_id(form.framework.data) pretrained_model = None if form.method.data == "standard": found = False # can we find it in standard networks? network_desc = fw.get_standard_network_desc(form.standard_networks.data) if network_desc: found = True network = fw.get_network_from_desc(network_desc) if not found: raise werkzeug.exceptions.BadRequest('Unknown standard model "%s"' % form.standard_networks.data) elif form.method.data == "previous": old_job = scheduler.get_job(form.previous_networks.data) if not old_job: raise werkzeug.exceptions.BadRequest("Job not found: %s" % form.previous_networks.data) network = fw.get_network_from_previous(old_job.train_task().network) for choice in form.previous_networks.choices: if choice[0] == form.previous_networks.data: epoch = float(flask.request.form["%s-snapshot" % form.previous_networks.data]) if epoch == 0: pass elif epoch == -1: pretrained_model = old_job.train_task().pretrained_model else: for filename, e in old_job.train_task().snapshots: if e == epoch: pretrained_model = filename break if pretrained_model is None: raise werkzeug.exceptions.BadRequest( "For the job %s, selected pretrained_model for epoch %d is invalid!" % (form.previous_networks.data, epoch) ) if not (os.path.exists(pretrained_model)): raise werkzeug.exceptions.BadRequest( "Pretrained_model for the selected epoch doesn't exists. May be deleted by another user/process. Please restart the server to load the correct pretrained_model details" ) break elif form.method.data == "custom": network = fw.get_network_from_desc(form.custom_network.data) pretrained_model = form.custom_network_snapshot.data.strip() else: raise werkzeug.exceptions.BadRequest('Unrecognized method: "%s"' % form.method.data) policy = {"policy": form.lr_policy.data} if form.lr_policy.data == "fixed": pass elif form.lr_policy.data == "step": policy["stepsize"] = form.lr_step_size.data policy["gamma"] = form.lr_step_gamma.data elif form.lr_policy.data == "multistep": policy["stepvalue"] = form.lr_multistep_values.data policy["gamma"] = form.lr_multistep_gamma.data elif form.lr_policy.data == "exp": policy["gamma"] = form.lr_exp_gamma.data elif form.lr_policy.data == "inv": policy["gamma"] = form.lr_inv_gamma.data policy["power"] = form.lr_inv_power.data elif form.lr_policy.data == "poly": policy["power"] = form.lr_poly_power.data elif form.lr_policy.data == "sigmoid": policy["stepsize"] = form.lr_sigmoid_step.data policy["gamma"] = form.lr_sigmoid_gamma.data else: raise werkzeug.exceptions.BadRequest("Invalid learning rate policy") if config_value("caffe_root")["multi_gpu"]: if form.select_gpus.data: selected_gpus = [str(gpu) for gpu in form.select_gpus.data] gpu_count = None elif form.select_gpu_count.data: gpu_count = form.select_gpu_count.data selected_gpus = None else: gpu_count = 1 selected_gpus = None else: if form.select_gpu.data == "next": gpu_count = 1 selected_gpus = None else: selected_gpus = [str(form.select_gpu.data)] gpu_count = None job.tasks.append( fw.create_train_task( job_dir=job.dir(), dataset=datasetJob, train_epochs=form.train_epochs.data, snapshot_interval=form.snapshot_interval.data, learning_rate=form.learning_rate.data, lr_policy=policy, gpu_count=gpu_count, selected_gpus=selected_gpus, batch_size=form.batch_size.data, val_interval=form.val_interval.data, pretrained_model=pretrained_model, crop_size=form.crop_size.data, use_mean=bool(form.use_mean.data), network=network, random_seed=form.random_seed.data, solver_type=form.solver_type.data, shuffle=form.shuffle.data, ) ) scheduler.add_job(job) if request_wants_json(): return flask.jsonify(job.json_dict()) else: return flask.redirect(flask.url_for("models_show", job_id=job.id())) except: if job: scheduler.delete_job(job) raise