Python Model.train Examples

Programming Language: Python

Namespace/Package Name: modeling

Class/Type: Model

Method/Function: train

Examples at hotexamples.com: 5

Python Model.train - 5 examples found. These are the top rated real world Python examples of modeling.Model.train extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Model(13)

train(5)

add_csv_data(2)

run(2)

train_model(1)

state_dict(1)

save(1)

predict(1)

named_parameters(1)

load_state_dict(1)

modeling(1)

load(1)

init_onehot(1)

get_results(1)

get_instance(1)

get_features(1)

create_usarname_tag_onehot_tables(1)

compute_context_features(1)

transforming(1)

Example #1

Show file

    def test_tfidf(self):
        df_train, df_dev, df_test, metadata = get_fake_dataset(
            with_text_col=True)

        text_config = Mapping()
        text_config.mode = 'tfidf'
        text_config.max_words = 20

        encoder = Encoder(metadata, text_config)
        y_train, X_train_struc, X_train_text = encoder.fit_transform(df_train)
        y_dev, X_dev_struc, X_dev_text = encoder.transform(df_dev)
        y_test, X_test_struc, X_test_text = encoder.transform(df_test)

        model_config = get_fake_modelconfig('./outputs_test')
        model_config.output_dir = os.path.join(model_config.output_dir,
                                               'tfidf_text_only')
        if not os.path.exists(model_config.output_dir):
            os.makedirs(model_config.output_dir)

        model = Model(text_config, model_config)
        hist = model.train(y_train, X_train_struc, X_train_text, y_train,
                           X_train_struc, X_train_text)

        val_acc_true = 1.0
        self.assertTrue(np.isclose(val_acc_true, hist.history['val_acc'][-1]))

Example #2

Show file

    def test_lstm(self):
        df_train, df_dev, df_test, metadata = get_fake_dataset(
            with_text_col=True)

        glove_file_path = 'glove/glove.6B.50d.txt'  # need be changed to where you store the pre-trained GloVe file.

        text_config = Mapping()
        text_config.mode = 'glove'
        text_config.max_words = 20
        text_config.maxlen = 5
        text_config.embedding_dim = 50
        text_config.embeddings_index = open_glove(
            glove_file_path)  # need to change

        encoder = Encoder(metadata, text_config=text_config)
        y_train, X_train_struc, X_train_text = encoder.fit_transform(df_train)
        y_dev, X_dev_struc, X_dev_text = encoder.transform(df_dev)
        y_test, X_test_struc, X_test_text = encoder.transform(df_test)

        text_config.embedding_matrix = encoder.embedding_matrix

        model_config = get_fake_modelconfig('./outputs_test')
        model_config.output_dir = os.path.join(model_config.output_dir, 'lstm')
        if not os.path.exists(model_config.output_dir):
            os.makedirs(model_config.output_dir)

        model = Model(text_config, model_config)
        hist = model.train(y_train, X_train_struc, X_train_text, y_train,
                           X_train_struc, X_train_text)

        # print(hist.history)
        # y_dev, X_dev_struc, X_dev_text)

        val_acc_true = 1.0
        self.assertTrue(np.isclose(val_acc_true, hist.history['val_acc'][-1]))

Example #3

Show file

    def test_strucdata_only(self):
        df_train, df_dev, df_test, metadata = get_fake_dataset(
            with_text_col=False)
        encoder = Encoder(metadata, text_config=None)
        y_train, X_train_struc, X_train_text = encoder.fit_transform(df_train)
        y_dev, X_dev_struc, X_dev_text = encoder.transform(df_dev)
        y_test, X_test_struc, X_test_text = encoder.transform(df_test)

        print(X_train_text, X_dev_text, X_test_text)

        model_config = get_fake_modelconfig('./outputs_test')
        model_config.output_dir = os.path.join(model_config.output_dir,
                                               'dense_mlp')
        if not os.path.exists(model_config.output_dir):
            os.makedirs(model_config.output_dir)

        model = Model(text_config=None, model_config=model_config)
        hist = model.train(y_train, X_train_struc, X_train_text, y_train,
                           X_train_struc, X_train_text)

        val_acc_true = 1.0
        self.assertTrue(np.isclose(val_acc_true, hist.history['val_acc'][-1]))

Example #4

Show file

File: experiments.py Project: chenchenpan/Predict-The-Success-of-Crowdfunding

def main():

    parser = argparse.ArgumentParser()

    parser.add_argument('--encoded_data_dir', type=str,
        # default='/data/home/t-chepan/projects/MS-intern-project/raw_data',
        help=('directory to load the encoded data.'))

    # this is optional 
    parser.add_argument('--data_name', type=str,
        # default='KICK',
        help=('which data will be used? (kickstarter Or indiegogo?)'))

    parser.add_argument('--search_space_filepath', type=str,
        # default='path/to/search_space.json',
        help=('where to load the search space file?'))

    parser.add_argument('--output_dir', type=str,
        # default='path/to/save/outputs',
        help=('directory to save the trained model and related model_config.'))

    parser.add_argument('--task_type', type=str,
        default='classification',
        help=('what is the type of this task? (classification or regression?)'))

    parser.add_argument('--num_classes', type=int,
        # default='classification',
        help=('what is the number of classes (classification) or outputs (regression)?'))

    parser.add_argument('--model_type', type=str,
        default='mlp',
        help=('what type of NN model you want to try? (mlp or skip_connections?)'))

    parser.add_argument('--num_trials', type=int,
        default= 1,
        help=('how many trials you want to run the model?'))


    args = parser.parse_args()

    
    if args.data_name is not None and args.encoded_data_dir is not None:
        path_to_data = os.path.join(args.encoded_data_dir, args.data_name)
        path_to_save = os.path.join(args.output_dir, args.data_name)
        if not os.path.exists(path_to_save):
            os.makedirs(path_to_save)

    elif args.data_name is None and args.encoded_data_dir is not None:
        path_to_data = args.encoded_data_dir
        path_to_save = args.output_dir

    else:
        raise argparse.ArgumentTypeError(args.data_name + ' or ' + args.encoded_data_dir + " can't be recognized.")


    ###########################################
    ## load encoded training set and dev set ##
    ###########################################

    y_train_path = os.path.join(path_to_data, 'y_train.npy')
    if os.path.exists(y_train_path):
        y_train = np.load(y_train_path, mmap_mode='r')
    else:
        raise ValueError('y_train is not found!')

    X_train_struc_path = os.path.join(path_to_data, 'X_train_struc.npy')
    if os.path.exists(X_train_struc_path):
        X_train_struc = np.load(X_train_struc_path, mmap_mode='r')
    else:
        X_train_struc = None

    X_train_text_path = os.path.join(path_to_data, 'X_train_text.npy')
    if os.path.exists(X_train_text_path):
        X_train_text = np.load(X_train_text_path, mmap_mode='r')
    else:
        X_train_text = None

    y_dev_path = os.path.join(path_to_data, 'y_dev.npy')
    if os.path.exists(y_dev_path):
        y_dev = np.load(y_dev_path, mmap_mode='r')
    else:
        raise ValueError('y_dev is not found!')

    X_dev_struc_path = os.path.join(path_to_data, 'X_dev_struc.npy')
    if os.path.exists(X_dev_struc_path):
        X_dev_struc = np.load(X_dev_struc_path, mmap_mode='r')
    else:
        X_dev_struc = None

    X_dev_text_path = os.path.join(path_to_data, 'X_dev_text.npy')
    if os.path.exists(X_dev_text_path):
        X_dev_text = np.load(X_dev_text_path, mmap_mode='r')
    else:
        X_dev_text = None

    text_config_path = os.path.join(path_to_data, 'text_config.json')
    if os.path.exists(text_config_path):
        with open(text_config_path, 'r') as f:
            text_config = json.load(f)
        text_config = Mapping(text_config)
    else:
        text_config = None

    if text_config is not None and text_config.mode == 'glove':
        embedding_matrix_path = text_config.embedding_matrix_path
        if os.path.exists(embedding_matrix_path):
            embedding_matrix = np.load(embedding_matrix_path, mmap_mode='r')
            text_config.embedding_matrix = embedding_matrix
        else:
            raise ValueError('embedding_matrix is not found!')
    else:
        embedding_matrix = None


    ###########################################
    ## sample model config from search space ##
    ###########################################

    if args.task_type is not None and args.num_classes is not None:
        print('you are choosing ' + args.model_type + ' as the model type!')
        default_model_config = create_default_modelconfig(args.task_type, args.num_classes, args.model_type, path_to_save)
    else:
        raise ValueError('You are missing task_type or num_classes or both!')

    ## load search space file which is provided by users ##
    with open(args.search_space_filepath, 'r') as f:
        search_space = json.load(f)
    search_space = Mapping(search_space)

    
    #######################################################################
    ## update default model_config based on search_space and train model ##
    #######################################################################
  
    for i in range(args.num_trials):
        model_config = sample_modelconfig(search_space, default_model_config)
        model_name = 'model_{}'.format(i)
        print('*' * 20)
        print('model_config: ' + model_config['output_dir'])

        model_config = Mapping(model_config)

        print('*' * 20)
        print('model_config: ' + model_config.output_dir)

        model_config.output_dir = os.path.join(default_model_config.output_dir, model_name)
        if not os.path.exists(model_config.output_dir):
            os.makedirs(model_config.output_dir)
        model = Model(text_config, model_config)
        hist = model.train(y_train, X_train_struc, X_train_text, y_train, X_train_struc, X_train_text)

        ## save hist.history and model_config ##
        history_path = os.path.join(model_config.output_dir, 'history.json')
        with open(history_path, 'w') as hf:
            json.dump(hist.history, hf)

        model_config_savepath = os.path.join(model_config.output_dir, 'model_config.json')
        with open(model_config_savepath, 'w') as mf:
            json.dump(model_config, mf)

Example #5

Show file

File: trainer.py Project: VernonSong/ocrdet

class Trainer(object):
    def __init__(self, cfg):
        self.storage = {}
        self.device = cfg.SOLVER.DEVICE
        self.max_iter = cfg.SOLVER.MAX_ITERS
        self.log_dir = cfg.SOLVER.TENSORBOARD_WRITER.LOG_DIR
        self.base_lr = cfg.SOLVER.LR.BASE_LR
        optimizer_name = cfg.SOLVER.OPTIMIZER
        self.weight_decay = cfg.SOLVER.WEIGHT_DECAY
        self.weights = cfg.SOLVER.WEIGHTS
        self.image_period = cfg.SOLVER.TENSORBOARD_WRITER.IMAGE_PERIOD
        self.scalar_period = cfg.SOLVER.TENSORBOARD_WRITER.SCALAR_PERIOD
        self.save_period = cfg.SOLVER.CHECKPOINT_PERIOD
        self.save_model_dir = cfg.SOLVER.SAVE_DIR
        self.model_name = cfg.SOLVER.CHECKPOINT_NAME

        data_loader = build_train_data_loader(cfg)
        self._data_loader_iter = iter(data_loader)
        self.model = Model(cfg, True).train().to(self.device)
        self.optimizer = self.build_optimizer(optimizer_name, self.model)
        self.lr_scheduler = build_LRscheduler(self.optimizer, cfg)
        self.iter = 0
        self.writer = None
        self.tic = 0
        self.toc = 0

    def build_optimizer(self, name: str,
                        model: torch.nn.Module) -> torch.optim.Optimizer:
        """No bias decay:
        Bag of Tricks for Image Classification with Convolutional Neural Networks
        (https://arxiv.org/pdf/1812.01187.pdf)"""
        weight_p, bias_p = [], []
        for p_name, p in model.named_parameters():
            if 'bias' in p_name:
                bias_p += [p]
            else:
                weight_p += [p]
        parameters = [{
            'params': weight_p,
            'weight_decay': self.weight_decay
        }, {
            'params': bias_p,
            'weight_decay': 0
        }]

        if name == 'Adam':
            return torch.optim.Adam(model.parameters(), lr=self.base_lr)
        if name == 'SGD':
            return torch.optim.SGD(model.parameters(), lr=self.base_lr)
        if name == 'SWA':
            """Stochastic Weight Averaging: 
            Averaging Weights Leads to Wider Optima and Better Generalization
            (https://arxiv.org/pdf/1803.05407.pdf)"""
            base_opt = torch.optim.SGD(parameters, lr=self.base_lr)
            return SWA(base_opt, swa_start=10, swa_freq=5, swa_lr=self.base_lr)

    def before_train(self):
        if self.weights != '':
            checkpoint = torch.load(self.weights)
            self.model.load_state_dict(checkpoint)
        if not os.path.exists(self.save_model_dir):
            os.makedirs(self.save_model_dir)
        self.writer = SummaryWriter(self.log_dir)
        self.model.train()

    def after_train(self):
        model_name = self.model_name + '_' + str(self.iter) + '.pth'
        torch.save(self.model.state_dict(),
                   os.path.join(self.save_model_dir, model_name))

    def before_step(self):
        self.tic = time.time()

    def after_step(self):
        # 统计时间
        self.toc = time.time()
        iter_time = self.toc - self.tic
        self.storage['iter_time'] = iter_time
        # 写tensorboard
        for key in self.storage:
            if isinstance(self.storage[key], dict):
                sub_dict = self.storage[key]
                for sub_key in sub_dict:
                    value = sub_dict[sub_key]
                    self._write_tensorboard(key + '/' + sub_key, value)
            else:
                value = self.storage[key]
                self._write_tensorboard(key, value)

        # 保存模型
        if self.iter % self.save_period == 0:
            model_name = self.model_name + '_' + str(self.iter) + '.pth'
            torch.save(self.model.state_dict(),
                       os.path.join(self.save_model_dir, model_name))

    def _write_tensorboard(self, key: str, value: Union[torch.Tensor, int,
                                                        float]):
        if isinstance(value, torch.Tensor) and len(value.shape) == 4:
            if self.iter % self.image_period == 0:
                self.writer.add_images(key, value, self.iter)
        elif self.iter % self.scalar_period == 0:
            self.writer.add_scalar(key, value, self.iter)

    def train(self, start_iter=0):
        try:
            self.before_train()
            for self.iter in range(start_iter, self.max_iter):
                self.before_step()
                self.run_step()
                self.after_step()
            self.after_train()
        finally:
            self.after_train()

    def run_step(self):
        data = next(self._data_loader_iter)
        total_loss, losses, metries = self.model(data)

        self.storage['total_loss'] = total_loss
        self.storage['losses'] = losses
        self.storage['image'] = data['image']
        self.storage['training_mask'] = data['training_mask']
        self.storage['metries'] = metries
        grads = {}

        self.storage['grads'] = grads

        self.optimizer.zero_grad()
        total_loss.backward()
        self.optimizer.step()
        self.storage['lr'] = self.lr_scheduler.get_lr()[0]
        self.lr_scheduler.step()

        for name, parm in self.model.named_parameters():
            if parm.grad is not None:
                grads[name] = torch.mean(torch.abs(parm.grad))