コード例 #1
0
def train(config: Config, cyclegan: CycleGAN):
    """
    Train CycleGAN using the hyperparameters in config.
    """

    # Initialize trainer
    trainer = Trainer(config, cyclegan, dataset)

    # Callback for saving checkpoints.
    def checkpoint(epoch):
        if epoch % config.checkpoint_interval == 0:
            save_checkpoint(epoch, cyclegan, config.n_epochs, dir_checkpoints)

    # Callback for saving snapshots.
    def snapshot(epoch, batch):
        if batch % config.snapshot_interval == 0:
            save_snapshot(epoch, batch, cyclegan, dataset.batch_test(1),
                          config.image_shape, dir_snapshots)

    # Register callbacks
    trainer.on_epoch_start.append(checkpoint)
    trainer.on_iteration_start.append(snapshot)

    # Train the model
    trainer.train(n_epochs=config.n_epochs,
                  batch_size=config.batch_size,
                  start_epoch=epoch)
コード例 #2
0
def core_flow(config: MutableMapping) -> None:
    dataset = DatasetCollection(config)
    trainer = Trainer(dataset, config)
    if config.experiment.inference_ckpt:
        # testing mode takes precedence of training if both ckpts specified
        logger.info(
            f'Testing model weights loaded from {config.experiment.inference_ckpt}...'
        )
        trainer.init_test(config.experiment.inference_ckpt)
    elif config.trainer.restart_training_ckpt:
        # restarting training takes precedence over just building custom swa checkpoints
        logger.info(
            f'Restarting model training from {config.trainer.restart_training_ckpt}...'
        )
        trainer.train(config.trainer.restart_training_ckpt)
    elif config.trainer.build_swa_from_ckpts:
        logger.info(
            f'Building swa checkpoint from specified ckpts: {config.trainer.build_swa_from_ckpts}...'
        )
        swa_ckpt = trainer.swa_ckpt_build(
            mode="custom", ckpt_list=config.trainer.build_swa_from_ckpts)
        logger.info(
            f'Successfully built SWA checkpoint ({swa_ckpt}) from provided list of checkpoints, '
            f'proceeding with test')
        trainer.init_test(swa_ckpt)
    else:
        logger.info('Starting model training from scratch...')
        trainer.train()
コード例 #3
0
def main():
    config = json.load(open('./config.json'))
    data_manager = DataManager(config)

    model = FConvNet()
    if config['use_cuda'] is True:
        model = model.cuda()
    model.apply(FConvNet.init_weights)

    criterion = torch.nn.MSELoss()
    optimizer = optim.Adam(model.parameters(),
                           lr=config['lr'],
                           weight_decay=config['weight_decay'])
    train_loader, validation_loader = data_manager.get_train_eval_dataloaders(
        config['train_data_path'])

    trainer = Trainer(model, train_loader, validation_loader, criterion,
                      optimizer, config)
    trainer.train()
コード例 #4
0
ファイル: main.py プロジェクト: MatanDanos/AttnGAN-v1.1
def main():
    # Some boilerplate code for easier logging.
    logger.remove(0)
    logger.add(sys.stdout, level="INFO")
    logger.add("../logs/{time}.log", level="DEBUG")

    start_time = datetime.now()
    logger.info("Started at: {}".format(start_time))

    # Parsing main arguments
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-c',
        '--cfg_path',
        required=True,
        help='Path to the configuration.ini file to work with.')
    parser.add_argument('--train',
                        action='store_true',
                        default=False,
                        help='Training the model')
    # parser.add_argument('--validation', action='store_true', default=False, help='Training the model')
    # parser.add_argument('--test', action='store_true', default=False, help='Testing the model')
    args = parser.parse_args()

    # Read Configuration file
    logger.info("Loading config file from path: {}".format(args.cfg_path))
    config = AttnGANConfig(args.cfg_path)
    config.log_config_info()

    # Pytorch stuff:
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    if args.train:
        dataset_name = config.dataset['name']
        logger.info("Starting training on {} dataset".format(dataset_name))
        # Training
        trainer = Trainer(config)
        trainer.train()
コード例 #5
0
        num_input_channels = next(iter(trainer.train_dataloader))[0].shape[2]
        encoder_params = CNN.generate_params()
        rnn_params = RNN.generate_params()
        model = CRNN(input_len=input_len,
                     output_len=output_len,
                     num_input_channels=num_input_channels,
                     encoder_params=encoder_params,
                     rnn_params=rnn_params)
    elif args.model_type == 'mlp':
        input_len = reduce(lambda x, y: x * y,
                           next(iter(trainer.train_dataloader))[0].shape[1:])
        params = MLP.generate_params()
        model = MLP(input_len=input_len,
                    output_len=output_len,
                    params=params,
                    **params)
    else:
        raise ValueError('"{}" is not a valid model.'.format(args.model_type))

    model = model.to(device)

    # Train Network
    trainer.train(model=model,
                  min_epochs=args.min_epochs,
                  max_epochs=args.max_epochs,
                  patience=args.patience,
                  verbose=args.verbose,
                  test_subject_id=test_subject_id,
                  validation_subject_id=validation_subject_id)

print('\n' + '#' * 50 + '\n')
コード例 #6
0
ファイル: train.py プロジェクト: zabulskyy/PropaGANda
from functools import partial
import yaml

from joblib import cpu_count
from torch.utils.data import DataLoader
import torch.backends.cudnn as cudnn

from training.data import get_datasets
from training.trainer import Trainer

cudnn.benchmark = True

if __name__ == '__main__':
    with open("config/train.yaml", "r") as f:
        config = yaml.load(f)

    batch_size = config.pop('batch_size')
    get_dataloader = partial(DataLoader,
                             batch_size=batch_size,
                             num_workers=cpu_count(),
                             shuffle=True,
                             drop_last=True,
                             pin_memory=True)

    datasets = get_datasets(config['dataset'])
    train, val = map(get_dataloader, datasets)

    trainer = Trainer(config, train=train, val=val)
    trainer.train()
コード例 #7
0
ファイル: label_app.py プロジェクト: cockroachzl/light-bulb
class LabelApp:
    @staticmethod
    def load_from(config_path):
        with open(config_path) as f:
            config = yaml.load(f)
            parser = ConfigParser(config)
            parser._create_directories()

        task = Task.load_from(parser.task)
        dataset = Dataset.load_from(parser.dataset)
        model_config = config['model']
        label_helper = Label.load_from(parser.label)
        user = config['user']

        return LabelApp(task, dataset, label_helper, user, model_config,
                        parser)

    def __init__(self,
                 task,
                 dataset,
                 label_helper,
                 user,
                 model_config,
                 config,
                 model_labelling=True):
        self.config = config.config
        self.task = task
        self.dataset = dataset
        self.data_type = self.dataset.data_type

        self.label_helper = label_helper

        model_directory = model_config['directory']
        self.model = model_builder.ModelBuilder(dataset, self.label_helper,
                                                model_config).build()

        self.trainer = Trainer(model_directory,
                               self.model,
                               self.dataset,
                               self.label_helper,
                               logger=logger)
        self.trainer.load_existing()

        self.labeller = ModelLabeller(self.model,
                                      self.dataset,
                                      self.label_helper,
                                      logger=logger)

        self.user = user
        self.model_labelling = model_labelling

    def score(self, x):
        scores = self.model.score(x)
        return scores

    def predict(self, x):
        predictions = self.model.predict(x)
        return predictions

    @property
    def is_done(self):
        return len(self.dataset.unlabelled) == 0

    def next_model_labelled_batch(self, size=100):
        model_labelled, target_class = self.dataset.model_labelled(size)
        return model_labelled, target_class

    def next_batch(self,
                   size=10,
                   force_stage=None,
                   reverse_entropy=False,
                   prediction=False):
        if self.is_done:
            raise ValueError(
                "Tried to sample a batch when there is nothing else to sample")

        logger.debug("Sampling a batch for {} set.".format(
            self.dataset.current_stage))
        self.dataset.set_current_stage()

        current_stage = force_stage if force_stage else self.dataset.current_stage

        if current_stage == Dataset.TEST:
            sampled_df = self.dataset.sample(size)
            return sampled_df, current_stage, [], [0.5] * len(
                sampled_df)  # TODO: This needs to be fixed

        # Generate training data
        sampled_df = self.dataset.sample(size * 5)
        if self.data_type == Dataset.IMAGE_TYPE:
            x_data, ids = self.dataset.unlabelled_set(size * 5)
        if self.data_type == Dataset.TEXT_TYPE:
            x_data, ids = self.dataset.unlabelled_set(size * 5)
        if self.data_type == Dataset.JSON_TYPE:
            x_data, ids = self.dataset.unlabelled_set(size * 5)

        scores = self.model.score(x_data)
        entropy_func = lambda scores: np.sum(scores * np.log(1 / scores),
                                             axis=-1)
        if len(scores.shape) == 3:
            entropy = np.array(
                [entropy_func(score).mean() for score in scores])
        else:
            entropy = entropy_func(scores)

        assert len(entropy.shape) == 1

        num = min(size, len(entropy) - 1)
        if reverse_entropy:
            entropy_indexes = np.argpartition(entropy, num)[:num]
        else:
            entropy_indexes = np.argpartition(-entropy, num)[:num]

        # Make predictions
        # TODO: This doesn't work for text or json types
        if self.data_type == Dataset.IMAGE_TYPE:
            x_to_score = x_data[entropy_indexes]
        else:
            x_to_score = []

        y_prediction = None
        if prediction and len(x_to_score) > 0:
            y_prediction = self.predict(x_to_score)

        response = (
            sampled_df.iloc[entropy_indexes],
            current_stage,
            y_prediction,
            entropy[entropy_indexes].tolist(),
        )
        return response

    def add_labels(self, labels, avg_time_taken):
        is_classification = self.label_helper.label_type == 'classification'
        if is_classification:
            is_binary_classification = len(self.label_helper.classes) == 2

            for idx, label in enumerate(labels):
                _id = label['path']
                is_target_class = label['is_target_class']
                save = idx == len(labels) - 1

                if is_target_class:
                    self.dataset.add_label(
                        label['path'],
                        label['target_class'],
                        Dataset.TRAIN,
                        user=self.user,
                        save=save,
                        time_taken=avg_time_taken,
                    )
                else:
                    if is_binary_classification:
                        self.dataset.add_label(
                            label['path'],
                            0 if label['target_class'] == 1 else 1,
                            Dataset.TRAIN,
                            user=self.user,
                            save=save,
                            time_taken=avg_time_taken,
                        )
                    else:
                        # If the task is not binary classification, then its impossible to know what the "other" label is.
                        # Flag this as USER_MODEL_DISAGREEMENT
                        self.dataset.add_label(
                            label['path'],
                            label['target_class'],
                            Dataset.USER_MODEL_DISAGREEMENT,
                            user=self.user,
                            save=save,
                            time_taken=avg_time_taken,
                        )
        else:
            # TODO: The is_classification case should fit nicely into code like the ones below: please refactor
            for label in labels:
                self.dataset.add_label(
                    label['path'],
                    label['label'],
                    Dataset.TRAIN,
                    user=self.user,
                    time_taken=avg_time_taken,
                )

    def add_label(self, _id, label, time_taken):
        # Validate label
        # TODO: Reevaluate this get_data thing, I'm not a fan of this.
        data = self.dataset.get_data(_id)
        self.label_helper.validate(data, label)
        label = self.label_helper.decode(label)
        # _id is just the path to the file
        self.dataset.add_label(
            _id,
            label,
            self.dataset.current_stage,
            user=self.user,
            save=True,
            time_taken=time_taken,
        )

    @property
    def title(self):
        return self.task.title

    @property
    def description(self):
        return self.task.description

    @property
    def template(self):
        return self.task.template

    def threaded_train(self):
        self.trainer.train()

    def threaded_label(self):
        self.labeller.start()

    def get_history(self):
        return self.trainer.get_history()

    def get_stats(self):
        return self.dataset.stats
コード例 #8
0
ファイル: run.py プロジェクト: pierrel/document-labeling
import numpy
import itertools

from training.dataprep.airbnb_example import AirbnbExample
from training.trainer import Trainer
from training.model import DocLabelingModel

example = AirbnbExample(file_path="/path/to/file.csv")
texts = numpy.array([i for i in example.texts()])
labels = numpy.array([i for i in example.labels()])

eval_train_index = len(texts) - 50

train_texts = texts[:eval_train_index]
eval_texts = texts[eval_train_index:]
train_labels = labels[:eval_train_index]
eval_labels = labels[eval_train_index:]

model = DocLabelingModel(len(labels[0]))
trainer = Trainer(model.model)

model = trainer.train(train_texts, train_labels)
print(trainer.evaluate(eval_texts, eval_labels))

# simple experiment
predictions = model.predict(eval_texts)
results = [(example.read_prediction(i), example.read_prediction(j))
           for (i, j) in zip(predictions, eval_labels)]
for result in results:
    print(result, "\n")
コード例 #9
0
            'Predictions from an old simulation. The configuration is uploaded...'
        )
        print('Number of epochs:' + str(epochs))
        print('Latent state dim:' + str(model_config['dim_x']))
        time.sleep(5)
        train = False
        retrain = False

model_config.update({'ds': ds_sel})

# evaluation
output_sel = Outputs  # can create new class deriving from it if need richer outputs

#
# Run
#
# load
outputs = output_sel(out_dir)
ds = ds_sel(seq_len, seq_stride, in_dir)
outputs.set_ds(ds)
model = model_sel(ds.dim_u, ds.dim_y, model_config)
outputs.set_model(model, predict_len, model_config['dim_x'])
# train
if train:
    trainer = Trainer(model, model_dir)
    trainer.train(ds, epochs, retrain=retrain, test_data=test_data)
    outputs.set_trainer(trainer)

# evaluate
outputs.create_all()
コード例 #10
0
    FaceLandmarkDataset([{'root_dir': config.TRAIN_DATA_DIR,
                         'label_file': config.LANDMARKS_ANNO_FILE}],
                         point_num=config.NUM_LANDMARKS,
                         transform=transform),
    batch_size = config.batch_size,
    num_workers = config.num_threads,
    shuffle=True)

model = LNet()
model.load_state_dict(torch.load('result/iris_lnet/check_point/32_landmarks_model_200.pth'))
if torch.cuda.device_count() > 1:
    print("Train on ", torch.cuda.device_count(), " GPUs")
    nn.DataParallel(model)
model.to(device)

lossfn = nn.MSELoss()
checkpoint = CheckPoint(config.save_path)
optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)
# optimizer = torch.optim.SGD(model.parameters(), lr=config.learning_rate)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=config.step, gamma=0.1)

logger = Logger(config.save_path)
trainer = Trainer(config.learning_rate, train_loader, model, optimizer, 
    lossfn, scheduler, logger, device, config.save_path)

for epoch in range(1, config.nEpochs+1):
    trainer.train(epoch)
    checkpoint.save_model(model, index=epoch, tag=str(config.NUM_LANDMARKS)+'_landmarks')


コード例 #11
0
ファイル: label_app.py プロジェクト: czhu12/light_bulb
class LabelApp:
    @staticmethod
    def load_from(config_meta):
        with open(config_meta['path']) as f:
            config = yaml.load(f)
            parser = ConfigParser(config)
            parser._create_directories()

        task = Task.load_from(parser.task)
        dataset = Dataset.load_from(parser.dataset)
        model_config = config['model']
        label_helper = Label.load_from(parser.label)
        user = config['user']

        # Set up logger
        log_level = config_meta['log_level']
        logger = logging.getLogger('label_app')
        logger.setLevel(getattr(logging, log_level))

        ch = logging.StreamHandler(sys.stdout)
        ch.setFormatter(
            logging.Formatter(
                '%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
        logger.addHandler(ch)

        return LabelApp(task, dataset, label_helper, user, model_config,
                        parser, logger)

    def __init__(self,
                 task,
                 dataset,
                 label_helper,
                 user,
                 model_config,
                 config,
                 logger,
                 model_labelling=True):
        self.config = config.config
        self.task = task
        self.dataset = dataset
        self.data_type = self.dataset.data_type

        self.label_helper = label_helper

        model_directory = model_config['directory']
        self.model = model_builder.ModelBuilder(dataset, self.label_helper,
                                                model_config).build()

        self.trainer = Trainer(model_directory,
                               self.model,
                               self.dataset,
                               self.label_helper,
                               logger=logger)
        self.trainer.load_existing()

        self.labeller = ModelLabeller(self.model,
                                      self.dataset,
                                      self.label_helper,
                                      logger=logger)

        self.user = user
        self.model_labelling = model_labelling
        self.logger = logger

    def score(self, x):
        scores = self.model.score(x)
        return scores

    def predict(self, x):
        predictions = self.model.predict(x)
        return predictions

    @property
    def is_done(self):
        return len(self.dataset.unlabelled) == 0

    def next_model_labelled_batch(self, size=100):
        model_labelled, target_class = self.dataset.model_labelled(size)
        return model_labelled, target_class

    def next_batch(self,
                   size=10,
                   force_stage=None,
                   reverse_entropy=False,
                   prediction=False):
        if self.is_done:
            raise ValueError(
                "Tried to sample a batch when there is nothing else to sample")

        self.logger.debug("Sampling a batch for {} set.".format(
            self.dataset.current_stage))
        self.dataset.set_current_stage()

        current_stage = force_stage if force_stage else self.dataset.current_stage

        if current_stage == Dataset.TEST:
            sampled_df = self.dataset.sample(size)
            return sampled_df, current_stage, [], [0.5] * len(
                sampled_df)  # TODO: This needs to be fixed

        # Generate training data
        sampled_df = self.dataset.sample(size * 5)
        if self.data_type == Dataset.IMAGE_TYPE:
            x_data, ids = self.dataset.unlabelled_set(size * 5)
        if self.data_type == Dataset.TEXT_TYPE:
            x_data, ids = self.dataset.unlabelled_set(size * 5)
        if self.data_type == Dataset.JSON_TYPE:
            x_data, ids = self.dataset.unlabelled_set(size * 5)

        scores = self.model.score(x_data)
        entropy_func = lambda scores: np.sum(scores * np.log(1 / scores),
                                             axis=-1)
        if len(scores.shape) == 3:
            entropy = np.array(
                [entropy_func(score).mean() for score in scores])
        else:
            entropy = entropy_func(scores)

        assert len(entropy.shape) == 1

        num = min(size, len(entropy) - 1)
        if reverse_entropy:
            entropy_indexes = np.argpartition(entropy, num)[:num]
        else:
            entropy_indexes = np.argpartition(-entropy, num)[:num]

        # Make predictions
        # TODO: This doesn't work for text or json types
        if self.data_type == Dataset.IMAGE_TYPE:
            x_to_score = x_data[entropy_indexes]
        else:
            x_to_score = []

        y_prediction = None
        if prediction and len(x_to_score) > 0:
            y_prediction = self.predict(x_to_score)

        response = (
            sampled_df.iloc[entropy_indexes],
            current_stage,
            y_prediction,
            entropy[entropy_indexes].tolist(),
        )
        return response

    def search(self, search_query: str, num_results: int = 20):
        results = self.dataset.search(search_query, num_results)
        return results

    def labelled_data(self, start_idx, end_idx, labelled=None):
        df = self.dataset.dataset
        if labelled is not None:
            df = df[df['labelled'] == labelled]

        if start_idx >= len(df):
            return [], True

        rows = df.iloc[start_idx:end_idx]
        return rows, False

    def add_labels(self, labels, avg_time_taken):
        for label in labels:
            self.dataset.add_label(
                label['path'],
                label['label'],
                Dataset.TRAIN,
                user=self.user,
                time_taken=avg_time_taken,
            )

    def add_label(self, _id, label, time_taken):
        # Validate label
        # TODO: Reevaluate this get_data thing, I'm not a fan of this.
        data = self.dataset.get_data(_id)
        self.label_helper.validate(data, label)
        label = self.label_helper.decode(label)
        # _id is just the path to the file
        self.dataset.add_label(
            _id,
            label,
            self.dataset.current_stage,
            user=self.user,
            save=True,
            time_taken=time_taken,
        )

    @property
    def title(self):
        return self.task.title

    @property
    def description(self):
        return self.task.description

    @property
    def template(self):
        return self.task.template

    def threaded_train(self):
        self.trainer.train()

    def threaded_label(self):
        self.labeller.start()

    def get_history(self):
        return self.trainer.get_history()

    def get_stats(self):
        return self.dataset.stats
コード例 #12
0
ファイル: label_app.py プロジェクト: czhu12/labelling-tool
class LabelApp:
    @staticmethod
    def load_from(config_path):
        with open(config_path) as f:
            config = yaml.load(f)
            parser = ConfigParser(config)
            parser._create_directories()

        task = Task.load_from(parser.task)
        dataset = Dataset.load_from(parser.dataset)
        model_config = config['model']
        label_helper = Label.load_from(parser.label)
        user = config['user']

        return LabelApp(task, dataset, label_helper, user, model_config,
                        parser)

    def __init__(self,
                 task,
                 dataset,
                 label_helper,
                 user,
                 model_config,
                 config,
                 model_labelling=True):
        self.config = config.config
        self.task = task
        self.dataset = dataset
        self.data_type = self.dataset.data_type

        self.label_helper = label_helper

        model_directory = model_config['directory']
        self.model = model_builder.ModelBuilder(dataset, self.label_helper,
                                                model_config).build()

        self.trainer = Trainer(model_directory,
                               self.model,
                               self.dataset,
                               self.label_helper,
                               logger=logger)
        self.trainer.load_existing()

        self.labeller = ModelLabeller(self.model, self.dataset, logger=logger)

        self.user = user
        self.model_labelling = model_labelling

    def score(self, x):
        scores = self.model.score(x)
        return scores

    def predict(self, x):
        predictions = self.model.predict(x)
        return predictions

    @property
    def is_done(self):
        return len(self.dataset.unlabelled) == 0

    def next_batch(self, size=10, force_stage=None, reverse_entropy=False):
        if self.is_done:
            raise ValueError(
                "Tried to sample a batch when there is nothing else to sample")

        logger.debug("Sampling a batch for {} set.".format(
            self.dataset.current_stage))
        self.dataset.set_current_stage()

        current_stage = force_stage if force_stage else self.dataset.current_stage

        if current_stage == Dataset.TEST:
            sampled_df = self.dataset.sample(size)
            return sampled_df, current_stage, [], [0.5] * len(
                sampled_df)  # TODO: This needs to be fixed

        # Generate training data
        sampled_df = self.dataset.sample(size * 5)
        if self.data_type == Dataset.IMAGE_TYPE:
            x_data = utils.load_images(sampled_df['path'].values,
                                       self.dataset.input_shape)
        if self.data_type == Dataset.TEXT_TYPE:
            x_data = sampled_df['text'].values

        scores = self.model.score(x_data)
        entropy_func = lambda scores: np.sum(scores * np.log(1 / scores),
                                             axis=-1)
        if type(scores) == list:
            entropy = np.array(
                [entropy_func(score).mean() for score in scores])
        else:
            entropy = entropy_func(scores)

        assert len(entropy.shape) == 1

        num = min(size, len(entropy) - 1)
        if reverse_entropy:
            entropy_indexes = np.argpartition(entropy, num)[:num]
        else:
            entropy_indexes = np.argpartition(-entropy, num)[:num]
        response = (
            sampled_df.iloc[entropy_indexes],
            current_stage,
            x_data[entropy_indexes],
            entropy[entropy_indexes].tolist(),
        )
        return response

    def add_label(self, _id, label):
        # Validate label
        # TODO: Reevaluate this get_data thing, I'm not a fan of this.
        data = self.dataset.get_data(_id)
        self.label_helper.validate(data, label)
        label = self.label_helper.decode(label)
        # _id is just the path to the file
        self.dataset.add_label(_id,
                               label,
                               self.dataset.current_stage,
                               user=self.user)

    @property
    def title(self):
        return self.task.title

    @property
    def description(self):
        return self.task.description

    def threaded_train(self):
        self.trainer.train()

    def threaded_label(self):
        self.labeller.start()

    def get_history(self):
        return self.trainer.get_history()

    def get_stats(self):
        return self.dataset.stats