예제 #1
0
def _validate(agent_skill, shared_data, validate=True, model_dirname=None):
    model = models.catalog(
        DictTree(
            name=agent_skill.skill_model.name,
            arg_in_len=agent_skill.skill_model.arg_in_len,
            max_cnt=agent_skill.skill_model.max_cnt,
            num_sub=agent_skill.skill_model.num_sub,
            sub_arg_accuracy=agent_skill.sub_arg_accuracy,
        ))
    model.fit(shared_data)
    if validate:
        valid_data = _process(agent_skill, agent_skill.data)
        validated = models.validate(model, valid_data,
                                    agent_skill.sub_arg_accuracy)
    else:
        validated = True
    if validated:
        agent_skill.skill_model.model = model
        if model_dirname is not None:
            try:
                os.makedirs(model_dirname)
            except OSError:
                pass
            model_fn = "{}/{}.pkl".format(model_dirname,
                                          agent_skill.skill_name)
            pickle.dump(model, open(model_fn, 'wb'), protocol=2)
    return validated
예제 #2
0
def _train(agent_skill, shared_data, validate=True, model_dirname=None):
    model = models.catalog(DictTree(
        name=agent_skill.skill_model.name,
        arg_in_len=agent_skill.skill_model.arg_in_len,
        max_cnt=agent_skill.skill_model.max_cnt,
        num_sub=agent_skill.skill_model.num_sub,
        sub_arg_accuracy=agent_skill.sub_arg_accuracy,
    ))
    if validate:
        num_folds = min(len(agent_skill.data), NUM_FOLDS)
        kf = ms.KFold(num_folds, True)
        validation = []
        for new_train_idxs, valid_idxs in kf.split(agent_skill.data):
            train_data = _process(agent_skill, [agent_skill.data[idx] for idx in new_train_idxs] + shared_data)
            valid_data = _process(agent_skill, [agent_skill.data[idx] for idx in valid_idxs])
            model.fit(train_data)
            validation.append(models.validate(model, valid_data))
        validated = models.total_validation(validation, agent_skill.sub_arg_accuracy)
    else:
        validated = True
    if validated:
        all_data = agent_skill.data
        if shared_data is not None:
            all_data += shared_data
        all_data = _process(agent_skill, all_data)
        model.fit(all_data)
        agent_skill.skill_model.model = model
        if model_dirname is not None:
            try:
                os.makedirs(model_dirname)
            except OSError:
                pass
            model_fn = "{}/{}.pkl".format(model_dirname, agent_skill.skill_name)
            pickle.dump(model, open(model_fn, 'wb'), protocol=2)
    return validated
예제 #3
0
 def create_db_entries(self):
     print "Creating entries..."
     if self.db:
         self._db_start = datetime.datetime.today()
         for id, entry in info['archive'].iteritems():
             c = catalog(**entry)
             #c.blind_create()
             #c.save()
         self._db_end = datetime.datetime.today()
         print "Elapsed:", self._db_end - self._db_start