Ejemplo n.º 1
0
def main():
    model_dir = "/home/tensor/tensor/scene/DataSet/checkpoints/"
    train_image_dir = "/home/tensor/tensor/scene/DataSet/train/"
    validate_image_dir = "/home/tensor/tensor/scene/DataSet/validation/"
    pretrained_model_path = "/home/tensor/tensor/scene/DataSet/pre_trained/inception_resnet_v2.ckpt"
    datagen = DataGenerator(train_image_dir, validate_image_dir)
    model = ModelFactory(datagen, net='INCEPTION_RESNET_V2', model_dir=model_dir, fine_tune=True, 
        pretrained_path=pretrained_model_path)
    with tf.Session() as session:
        model.train(session)
Ejemplo n.º 2
0
def main():
    datagen = DataGenerator(FLAGS.train_json, FLAGS.train_image_dir,
                            FLAGS.validate_json, FLAGS.validate_image_dir)
    model = ModelFactory(datagen,
                         net='INCEPTION_RESNET_V2',
                         model_dir=FLAGS.model_dir,
                         fine_tune=True,
                         pretrained_path=FLAGS.pretrained_model_path)
    with tf.Session() as session:
        model.train(session)
Ejemplo n.º 3
0
 def __init__(self, model_type, data_props):
     logging.debug('init')
     self.id = str(uuid.uuid1())
     self.model_type = model_type
     self.data_props = data_props
     self.model = ModelFactory.get_model(model_type)()
     self.status = ModelStatus.INITIATED
def run_xval(modelname, x_arys, y_arys, wvfp,
             profile, cv=3, params=None):
    """Run a cross-validation on the specified model on the given data."""

    # TODO: implement micro Fmacro
    # y_pred_arys: Dict[str, np.ndarray] = {}
    fmacros4tgt: List[float] = []
    df_report = pd.DataFrame()

    for t in x_arys.keys():
        model = ModelFactory.get_model(
            modelname, wvfp,
            profile=profile, params=params)

        skf = StratifiedKFold(n_splits=cv, shuffle=True)
        fmacros_: List[float] = []
        # y_pred_: List[np.ndarray] = []
        # y_test: List[np.ndarray] = []
        for i, (train_idx, test_idx) in enumerate(skf.split(x_arys[t], y_arys[t])):
            logger.debug(f'CV {i+1}')
            x_train = x_arys[t][train_idx]
            y_train = y_arys[t][train_idx]
            x_test = x_arys[t][test_idx]
            y_test_ea = y_arys[t][test_idx]
            macrof_ea, accuracy_ea, y_pred_ea, df_cv = train_pred(
                model, modelname,
                x_train, y_train,
                x_test, y_test_ea
            )
            df_report = df_report.append(df_cv)

            fmacros_.append(macrof_ea)
            # y_pred_.append(y_pred_ea)
            # y_test.append(y_test_ea)
            report_result_ea(modelname, model, t, macrof_ea)
        macrof = np.mean(fmacros_)
        macrof_std = np.std(fmacros_)
        # y_pred = np.concatenate(tuple(y_pred_))
        # y_test = np.concatenate(tuple(y_test))

        if modelname == 'svm':
            logger.info(
                f'Target "{t}": {macrof:.4} macroF (param C: {model.best_params_["clf__C"]})')
        else:
            logger.info(
                f'Target "{t}": {macrof:.4} +/- {macrof_std:.4} macroF (mean of {cv}CV)')
        fmacros4tgt.append(macrof)
        # y_pred_arys[t] = y_pred

    # fmacro = np.mean(fmacros4tgt)
    # logger.info(f'Over all targets: {fmacro:.4} macroF')

    # y_test_all = np.concatenate(list(y_test_arys.values()))
    # y_pred_all = np.concatenate(list(y_pred_arys.values()))
    # fmicro = f1_score(y_test_all, y_pred_all, labels=[0, 1], average='macro')
    # logger.info(f'Overall: {fmicro:.4} macroF (in the __micro__ mean of results over all targets)')

    return fmacros4tgt, df_report
Ejemplo n.º 5
0
    def train(self, model_type, public_key=None):
        logging.debug(self.train.__name__)
        if public_key is not None:
            self.encryption_service.set_public_key(public_key)

        if self.model is None:
            x, y = self.data_loader.get_data()
            logging.debug('x shape: {}'.format(x.shape))
            self.model = ModelFactory.get_model(model_type)(x, y)

        params = self.model.fit(self.n_iter, self.eta)
        logging.debug('params shape: {}'.format(params.shape))
        return params.tolist()
def run_semeval(modelname: str, datafp: str, wvfp: str,
                target: str='all', profile: bool=False
                ) -> None:
    """[WIP] conduct SemEval2016 TaskA.6 experiments.

    # Arguments
    datafp:
        Input data in which test and train are combined.
        If you use semeval dataset, it should contain 'Train' column which bears binary flags (True if the tweet belongs to the official train split).
        When you use SLO dataset, the experiment is conducted by k-fold cross validation.
    profile:
        use users' profile (description) texts for features. input file must contain 'profile_t' column
    """
    # x = Dict[target, ary_tweets], y = Dict[target, ary_labels]
    x_train_arys, y_train_arys, x_test_arys, y_test_arys = load_semeval_data(
        datafp, target=target, profile=profile)

    targets = x_train_arys.keys()
    logger.info('All targets: {}'.format(', '.join(targets)))
    y_pred_arys: Dict[str, np.ndarray] = {}
    fmacros: List[float] = []
    for t in targets:
        logger.info(f'Target: {t}')
        model = ModelFactory.get_model(
            modelname, wvfp, target=t, profile=profile)
        macrof, accuracy, y_pred, _ = train_pred(
            model, modelname,
            x_train_arys[t], y_train_arys[t],
            x_test_arys[t], y_test_arys[t]
        )
        report_result_ea(modelname, model, t, macrof)
        fmacros.append(macrof)
        y_pred_arys[t] = y_pred

    fmacro = np.mean(fmacros)
    logger.info(
        f'Overall: {fmacro:.4} macroF (in the **macro** mean over macroFs of all targets)')

    y_test_all = np.concatenate(list(y_test_arys.values()))
    y_pred_all = np.concatenate(list(y_pred_arys.values()))
    fmicro = f1_score(y_test_all, y_pred_all, labels=[0, 1], average='macro')
    logger.info(
        f'Overall: {fmicro:.4} macroF (in the __micro__ mean of results over all targets)')
Ejemplo n.º 7
0
    def train_save_model(self):
        """This function constructs, trains and saves an SVM model."""
        # Load the trainset.
        logger.info(f'training model using trainset: {self.trainset_filepath}')
        x_train, y_train = load_combined_data(
            self.trainset_filepath,
            self.labels,
            profile=True,
        )

        # Build the model and train it on the given data.
        model = ModelFactory.get_model(self.model_name,
                                       target=self.target,
                                       wvfp=self.embeddings_filepath,
                                       profile=True)
        model.fit(x_train, y_train)

        # Save the trained model, saving it in a file if desired.
        logger.info(f'saving model in {self.model_filepath} ')
        with open(self.model_filepath, 'wb') as model_fout:
            pickle.dump(model, model_fout)
Ejemplo n.º 8
0
    def __init__(self, config):
        # config
        self.config = config

        # network
        self.net = ModelFactory.get_model(config.model_name, config)
        self.net = self.net.cuda()

        # data
        self.train_loader, self.test_loader = get_dataloaders(self.config)

        # loss, optimizer
        self.optimizer = torch.optim.Adam(self.net.parameters(), lr=self.config.lr,
                            betas=(0.9, 0.999), weight_decay=0.0005)
        self.criterion = CrossEntropyLoss()
        self.Lgce = GeneralizedCE(q=self.config.q_val)

        # parameters
        self.best_mAP = -1 # init
        self.step = 0
        self.total_loss_per_epoch = 0
Ejemplo n.º 9
0

# Parameters used during training
batch_size = config.BATCH_SIZE
learning_rate = 0.001
training_iters = 101
save_step = 10
display_steps = 20
validaton_log_loss_incr_threshold = 0.1
last_errors = 2
tolerance = 20
dropout = 0.5 # Dropout, probability to keep units
beta = 0.01

# Construct model
factory = ModelFactory()
model = factory.get_network_model()

if not config.RESTORE:
    # Add tensors to collection stored in the model graph
    # definition
    tf.add_to_collection('vars', x)
    tf.add_to_collection('vars', y)
    tf.add_to_collection('vars', keep_prob)

    for weigth_var in model.weights():
        tf.add_to_collection('vars', weigth_var)

    for bias_var in model.biases():
        tf.add_to_collection('vars', bias_var)
def process_tweets(tweets, lang):
    model = ModelFactory.create(lang)
    classified_tweets = classify(model, tweets)
    return classified_tweets
Ejemplo n.º 11
0
def main(args):
    global_mark = args.task + "_" + args.model
    print str(args.pre_train) + " model"
    if args.task == "wikiqa":  #ModelParam is just a tuple format, random_size means the neg_answ pool size
        model_param = ModelParam(hidden_dim=args.hidden_dim,
                                 enc_timesteps=12,
                                 dec_timesteps=50,
                                 batch_size=args.batch_size,
                                 random_size=15,
                                 lr=args.lr,
                                 k_value_ques=args.k_value_ques,
                                 k_value_ans=args.k_value_ans)
    elif args.task == "trecqa":
        model_param = ModelParam(hidden_dim=args.hidden_dim,
                                 enc_timesteps=12,
                                 dec_timesteps=50,
                                 batch_size=args.batch_size,
                                 random_size=15,
                                 lr=args.lr,
                                 k_value_ques=args.k_value_ques,
                                 k_value_ans=args.k_value_ans)
    elif args.task == "insqa":
        model_param = ModelParam(hidden_dim=args.hidden_dim,
                                 enc_timesteps=12,
                                 dec_timesteps=50,
                                 batch_size=args.batch_size,
                                 random_size=50,
                                 lr=args.lr,
                                 k_value_ques=args.k_value_ques,
                                 k_value_ans=args.k_value_ans)
    elif args.task == "selqa":
        model_param = ModelParam(hidden_dim=args.hidden_dim,
                                 enc_timesteps=12,
                                 dec_timesteps=50,
                                 batch_size=args.batch_size,
                                 random_size=15,
                                 lr=args.lr,
                                 k_value_ques=args.k_value_ques,
                                 k_value_ans=args.k_value_ans)

    logging.info(model_param.__str__())
    vocab_all, data_generator, embedding_file, dev_data, test_data = task_data_ready(
        args.task, model_param)

    train_model, predict_model = ModelFactory.get_model(model_param,
                                                        embedding_file,
                                                        vocab_all.NumIds(),
                                                        model_type=args.model)

    #after prepared model format
    def data_evaluate(epoch, small_evaluate_data, flag):
        c_1_j = 0
        c_2_j = 0
        Top1 = 0.0
        no_answer = 0.0
        for i, d in enumerate(small_evaluate_data.values()):
            prog_bar(i, len(small_evaluate_data))
            question = d["question"]
            answers = d["answer"]
            question_len = d["ques_len"]
            ans_len = d["ans_len"]
            sims = predict_model.predict(
                [question, answers, question_len, ans_len],
                batch_size=len(question))
            sims = sims[:, 0]
            rank_index = np.argsort(sims).tolist()[::-1]
            score = 0.0
            count = 0.0
            if np.sum(d["label"]) > 0:
                for i in range(1, len(sims) + 1):
                    if d["label"][rank_index[i - 1]] == 1:
                        count += 1
                        score += count / i
                for i in range(1, len(sims) + 1):
                    if d["label"][rank_index[i - 1]] == 1:
                        c_2_j += 1 / float(i)
                        break
                if d["label"][rank_index[0]] == 1:
                    Top1 += 1
                c_1_j += score / count
            else:
                no_answer += 1

        Top1 = Top1 / float(len(small_evaluate_data) - no_answer)
        MAP = c_1_j / float(len(small_evaluate_data) - no_answer)
        MRR = c_2_j / float(len(small_evaluate_data) - no_answer)
        print("no_answer:", no_answer)
        logging.info(global_mark + " evaluate on " + flag + " data at epoch " +
                     str(epoch) + ' MAP: %f' % MAP)
        logging.info(" evaluate on " + flag + " data at epoch " + str(epoch) +
                     ' MRR: %f' % MRR)
        logging.info(" evaluate on " + flag + " data at epoch " + str(epoch) +
                     ' Top@1: %f' % Top1)
        result_log(global_mark + " evaluate on " + flag + " data at epoch " +
                   str(epoch) + ' Top@1: %f' % Top1)
        result_log(global_mark + " evaluate on " + flag + " data at epoch " +
                   str(epoch) + ' MAP: %f' % MAP)
        result_log(global_mark + " evaluate on " + flag + " data at epoch " +
                   str(epoch) + ' MRR: %f' % MRR)
        return Top1, MAP, MRR

    if args.model != "listwise":
        reload_epoch = args.pre_train
        assert os.path.exists(
            'model/train_weights_epoch_%s.h5' %
            (str(reload_epoch) + "_" + args.task +
             "_listwise")), "please pre-train listwise approach"
        specific_load_epoch(train_model,
                            str(reload_epoch) + "_" + args.task + "_listwise",
                            "train")
    best_epoch = 0
    best_map = 0
    score_list = []
    for i in range(1, args.epoch + 1):
        if args.task == "wikiqa":
            train_filename = "./data/wikiqa/wiki_train.pkl"
            questions, answers, label, question_len, answer_len = data_generator.wikiQaGenerate(
                train_filename, "basic")
        elif args.task == "trecqa":
            train_filename = "./data/trecqa/trec_train.pkl"
            questions, answers, label, question_len, answer_len = data_generator.trecQaGenerate(
                train_filename, "basic")
        elif args.task == "insqa":
            train_filename = "./data/insqa/insqa_train.pkl"
            questions, answers, label, question_len, answer_len = data_generator.trecQaGenerate(
                train_filename, "basic")
        elif args.task == "selqa":
            train_filename = "./data/selqa/selqa_train.pkl"
            questions, answers, label, question_len, answer_len = data_generator.trecQaGenerate(
                train_filename, "basic")

        logging.info('Fitting epoch %d' % i)

        train_model.fit([questions, answers, question_len, answer_len],
                        label,
                        nb_epoch=1,
                        batch_size=model_param.batch_size,
                        validation_split=0,
                        verbose=1,
                        shuffle=True)

        ####evaluate
        dev_top1, dev_map, dev_mrr = data_evaluate(i, dev_data, "dev")
        test_top1, test_map, test_mrr = data_evaluate(i, test_data, "test")
        if dev_map > best_map:
            best_map = dev_map
            best_epoch = i
            score_list = [
                dev_map, dev_mrr, test_map, test_mrr, dev_top1, test_top1
            ]
    #######best result
    logging.info("best model at epoch " + str(best_epoch))
    logging.info("the dev score of best model: MAP_" + str(score_list[0]) +
                 " MRR_" + str(score_list[1]) + "Top1_" + str(score_list[4]))
    logging.info("the test score of best model: MAP_" + str(score_list[2]) +
                 " MRR_" + str(score_list[3]) + "Top1_" + str(score_list[5]))
def run_train(modelname,
              x_train_arys, y_train_arys,
              x_test_arys, y_test_arys,
              wvfp, profile, params=None, combined=False):
    """This function runs a training epoch on the specified model on the given
    data. It will run all combinations of training and testing targets.
    """

    train_targets = x_train_arys.keys()
    test_targets = x_test_arys.keys()
    logger.info(f'train targets: {list(train_targets)}; '
                f'test targets: {list(test_targets)}')

    fmacros: List[float] = []
    accuracies: List[float] = []
    df_report = pd.DataFrame()

    y_pred_arys: Dict[str, np.ndarray] = {}

    model = ModelFactory.get_model(
        modelname, wvfp=wvfp,
        profile=profile, params=params)

    # If the --combined flag is set, create another entry in the training array.
    # Delete the other entries so only the combined trainset is used in tests.
    if combined:
        x_train_combined = []
        y_train_combined = []

        for train_target in train_targets:
            for x_row in x_train_arys[train_target]:
                x_train_combined.append(x_row)
            for y_row in y_train_arys[train_target]:
                y_train_combined.append(y_row)

        x_train_arys.clear()
        y_train_arys.clear()
        x_train_arys['combined'] = np.array(x_train_combined)
        y_train_arys['combined'] = np.array(y_train_combined)

    # Train a model on each train target and test it on all test targets.
    for train_target in train_targets:

        target_fmacros: List[float] = []

        for test_target in test_targets:

            logger.info(f'train target: {train_target}; '
                        f'test target: {test_target}')
            macrof, accuracy, y_pred, df_new_columns = train_pred(
                model, modelname,
                x_train_arys[train_target], y_train_arys[train_target],
                x_test_arys[test_target], y_test_arys[test_target],
                train=train_target,
                target=test_target
            )

            for new_column in df_new_columns:
                df_report[new_column] = df_new_columns[new_column]

            report_result_ea(modelname, model, test_target, macrof)
            fmacros.append(macrof)
            target_fmacros.append(macrof)
            accuracies.append(accuracy)
            y_pred_arys[test_target] = y_pred

        logger.info(f'Average macroF for {train_target}: {np.average(target_fmacros)}')

    # Compute/print macro-f1 and micro-f1 summary statistics.
    fmacro = np.mean(fmacros)
    accuracy = np.mean(accuracies)
    logger.info(
        f'Overall: {fmacro:.4} macroF (in the **macro** mean over macroFs of all targets)')
    logger.info(
        f'Overall: {accuracy:.4} accuracy (in the accuracy mean over accuracies of all targets)')

    y_test_all = np.concatenate(list(y_test_arys.values()))
    y_pred_all = np.concatenate(list(y_pred_arys.values()))
    fmicro = f1_score(y_test_all, y_pred_all, labels=[
                      0, 1, 2], average='macro')
    logger.info(
        f'Overall: {fmicro:.4} microF (in the __micro__ mean of results over all targets)')

    return fmacro, df_report
def run_fixed_fast(modelname,
                   x_train_arys, y_train_arys,
                   x_test_arys, y_test_arys,
                   wvfp, profile, params=None):
    """This function runs a training epoch on the specified model on the given
    data. It is meant for faster training in parameter search.
    This method assumes that the training samples only contains one target in terms of reporting test results.
    """

    train_targets = x_train_arys.keys()
    test_targets = y_test_arys.keys()
    logger.info(f'train targets: {list(train_targets)}; '
                f'test targets: {list(test_targets)}')
    x_train = np.concatenate(list(x_train_arys.values()))
    y_train = np.concatenate(list(y_train_arys.values()))
    x_test = np.concatenate(list(x_test_arys.values()))
    y_test = np.concatenate(list(y_test_arys.values()))

    fmacros: List[float] = []
    accuracies: List[float] = []

    # Train a model on all train target and test it on all test targets once.
    model = ModelFactory.get_model(
        modelname, wvfp=wvfp,
        profile=profile, params=params)

    # Assume neural models only
    x_train = [x_train, x_test]
    y_train = to_categorical(y_train)
    # TODO: label smoothing here too

    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    microf = f1_score(y_test, y_pred, labels=[0, 1, 2], average='macro')

    # Assume python3.6+: dict order is fixed
    test_lengths = [len(ary) for ary in y_test_arys.values()]
    test_lengths = np.cumsum(test_lengths)
    indices = zip([0] + test_lengths.tolist(), test_lengths)
    for target, (start, end) in zip(test_targets, indices):
        macrof = f1_score(y_test_arys[target], y_pred[start:end],
                          labels=[0, 1, 2], average='macro')
        logger.info(f'Target "{target}": {macrof:.4} macroF')
        logger.debug(
            f'f1 for each label = {f1_score(y_test_arys[target], y_pred[start:end], labels=[0, 1, 2], average=None)}')
        logger.debug(
            f'confusion matrix\n{confusion_matrix(y_test_arys[target], y_pred[start:end])}')
        accuracy = accuracy_score(y_test_arys[target], y_pred[start:end])
        fmacros.append(macrof)
        accuracies.append(accuracy)

    # Compute/print macro-f1 and micro-f1 summary statistics.
    macrof = np.mean(fmacros)
    accuracy = np.mean(accuracies)
    logger.info(
        f'Overall: {macrof:.4} macroF (the **macro** mean over macroFs of all targets)')
    logger.info(
        f'Overall: {microf:.4} microF (the **micro** f1-score over all targets)')
    logger.info(
        f'Overall: {accuracy:.4} accuracy (in the accuracy mean over accuracies of all targets)')

    return macrof