def _validate(agent_skill, shared_data, validate=True, model_dirname=None): model = models.catalog( DictTree( name=agent_skill.skill_model.name, arg_in_len=agent_skill.skill_model.arg_in_len, max_cnt=agent_skill.skill_model.max_cnt, num_sub=agent_skill.skill_model.num_sub, sub_arg_accuracy=agent_skill.sub_arg_accuracy, )) model.fit(shared_data) if validate: valid_data = _process(agent_skill, agent_skill.data) validated = models.validate(model, valid_data, agent_skill.sub_arg_accuracy) else: validated = True if validated: agent_skill.skill_model.model = model if model_dirname is not None: try: os.makedirs(model_dirname) except OSError: pass model_fn = "{}/{}.pkl".format(model_dirname, agent_skill.skill_name) pickle.dump(model, open(model_fn, 'wb'), protocol=2) return validated
def _train(agent_skill, shared_data, validate=True, model_dirname=None): model = models.catalog(DictTree( name=agent_skill.skill_model.name, arg_in_len=agent_skill.skill_model.arg_in_len, max_cnt=agent_skill.skill_model.max_cnt, num_sub=agent_skill.skill_model.num_sub, sub_arg_accuracy=agent_skill.sub_arg_accuracy, )) if validate: num_folds = min(len(agent_skill.data), NUM_FOLDS) kf = ms.KFold(num_folds, True) validation = [] for new_train_idxs, valid_idxs in kf.split(agent_skill.data): train_data = _process(agent_skill, [agent_skill.data[idx] for idx in new_train_idxs] + shared_data) valid_data = _process(agent_skill, [agent_skill.data[idx] for idx in valid_idxs]) model.fit(train_data) validation.append(models.validate(model, valid_data)) validated = models.total_validation(validation, agent_skill.sub_arg_accuracy) else: validated = True if validated: all_data = agent_skill.data if shared_data is not None: all_data += shared_data all_data = _process(agent_skill, all_data) model.fit(all_data) agent_skill.skill_model.model = model if model_dirname is not None: try: os.makedirs(model_dirname) except OSError: pass model_fn = "{}/{}.pkl".format(model_dirname, agent_skill.skill_name) pickle.dump(model, open(model_fn, 'wb'), protocol=2) return validated
def create_db_entries(self): print "Creating entries..." if self.db: self._db_start = datetime.datetime.today() for id, entry in info['archive'].iteritems(): c = catalog(**entry) #c.blind_create() #c.save() self._db_end = datetime.datetime.today() print "Elapsed:", self._db_end - self._db_start