Beispiel #1
0
def main():
    # capture the config path from the run arguments
    # then process the json configuration file
    try:
        args = get_args()
        json_file = '../configs/example.json'
        # config = process_config(args.config)
        config = process_config(json_file)

    except:
        print("missing or invalid arguments")
        exit(0)

    # create the experiments dirs
    create_dirs([config.summary_dir, config.checkpoint_dir])
    # create tensorflow session
    sess = tf.compat.v1.Session()
    # create your data generator
    data = DataGenerator(config)
    data.generate_data()

    # create an instance of the model you want
    model = ExampleModel(config)
    # create tensorboard logger
    logger = Logger(sess, config)
    # create trainer and pass all the previous components to it
    trainer = ExampleTrainer(sess, model, data, config, logger)
    #load model if exists
    model.load(sess)
    # here you train your model
    trainer.train()
    # here you evaluate your model
    evaluator = Evaluator(trainer.sess, trainer.model, data, config, logger)
    evaluator.evaluate()
    evaluator.analysis_results()
Beispiel #2
0
def main():
    # capture the config path from the run arguments
    # then process the json configuration file
    config = fetch_args()

    # create the experiments dirs
    create_dirs([config.tensorboard_log_dir, config.checkpoint_dir])

    print('Create the data generator.')
    train_data_loader = DataLoader(config, 'train')
    valid_data_loader = DataLoader(config, 'valid')

    print('Create the model.')
    model = Model(config)
    if config.pretrained_model_checkpoint is not None:
        model.load(config.pretrained_model_checkpoint)
    
    if config.evaluate:
        print('Predicting on test set.')
        test_data_loader = DataLoader(config, 'test')
        evaluator = Evaluator(model.model, test_data_loader, config)
        evaluator.evaluate()
        exit(0)

    print('Create the trainer')
    trainer = Trainer(model.model, train_data_loader, valid_data_loader, config)

    print('Start training the model.')
    trainer.train()
Beispiel #3
0
def evaluate(models,
             data_set_path,
             log_save_path,
             measurement,
             test_times=1):

    data_set_info_file = open(f"{data_set_path}\\data_set_info.json", 'r')
    data_set_info = json.load(data_set_info_file)

    if not os.path.exists(f"{log_save_path}\\{data_set_info['name']}"):
        os.mkdir(f"{log_save_path}\\{data_set_info['name']}")

    shutil.copy(f"{data_set_path}\\data_set_info.json", f"{log_save_path}\\{data_set_info['name']}")

    stream_list = [path for path in os.listdir(data_set_path) if os.path.isdir(f"{data_set_path}\\{path}")]
    stream_list.sort(key=lambda s: int(s.split('_')[1]))  # stream_number

    records_list = []
    for stream in stream_list:
        log_stream_path = f"{log_save_path}\\{data_set_info['name']}\\{stream}"
        if not os.path.exists(log_stream_path):
            os.mkdir(log_stream_path)
        if not os.path.exists(f"{log_stream_path}\\record"):
            os.mkdir(f"{log_stream_path}\\record")
        if not os.path.exists(f"{log_stream_path}\\figure"):
            os.mkdir(f"{log_stream_path}\\figure")
        data = pd.read_csv(f"{data_set_path}\\{stream}\\data.csv", header=None)
        with open(f"{data_set_path}\\{stream}\\data_info.json", 'r') as data_info_file:
            data_info = json.load(data_info_file)
        X = np.array(data.iloc[:, 1:])
        y = np.array(data.iloc[:, 0])

        perf_records = {}
        for model in models:
            for _ in range(test_times):
                test_model = copy.deepcopy(models[model])
                test_model.budget = data_info['budget']
                evaluator = Evaluator(measurement=measurement,
                                      pretrain_size=1,
                                      batch_size=1,
                                      budget=data_info['budget'])
                if model not in perf_records.keys():
                    perf_records[model] = evaluator.evaluate(X, y, model=test_model)
                else:
                    perf_records[model] += evaluator.evaluate(X, y, model=test_model)
            perf_records[model] = perf_records[model] / test_times
        perf_records = pd.DataFrame(perf_records)
        records_list.append(perf_records)
        perf_records.to_csv(f"{log_stream_path}\\record\\{measurement}.csv", index=None)
        plot_lines(perf_records, f"{log_stream_path}\\figure\\{measurement}", "pdf", 15, 'time', measurement)

    report_file = open(f"{log_save_path}\\report.md", 'a')
    report_file.write(f"# {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())}\n")
    report_file.write(f"{data_set_info}\n\n")
    report_file.write(f"{measurement}\n\n")
    table = get_report(data_set_info, records_list, file_type="md")
    report_file.write(f"{table}\n")
    report_file.close()
Beispiel #4
0
def main(config: DictConfig) -> None:
    print(OmegaConf.to_yaml(config))

    torch.manual_seed(config.eval.seed)
    torch.cuda.manual_seed_all(config.eval.seed)
    np.random.seed(config.eval.seed)
    random.seed(config.eval.seed)

    use_cuda = config.eval.cuda and torch.cuda.is_available()
    device = torch.device('cuda' if use_cuda else 'cpu')

    char2id, id2char = load_label(config.eval.label_path, config.eval.blank_id)
    audio_paths, transcripts, _, _ = load_dataset(config.eval.dataset_path,
                                                  config.eval.mode)

    test_dataset = SpectrogramDataset(
        config.eval.audio_path,
        audio_paths,
        transcripts,
        config.audio.sampling_rate,
        config.audio.n_mel,
        config.audio.frame_length,
        config.audio.frame_stride,
        config.audio.extension,
        config.train.sos_id,
        config.train.eos_id,
    )
    test_loader = AudioDataLoader(
        test_dataset,
        batch_size=config.eval.batch_size,
        num_workers=config.eval.num_workers,
    )

    model = load_test_model(config, device)

    print('Start Test !!!')

    evaluator = Evaluator(config, device, test_loader, id2char)
    evaluator.evaluate(model)
class Trainer(object):
    """Trainer Class"""
    def __init__(self, optimizer, criterion, batch_size, device):
        self.optimizer = optimizer
        self.criterion = criterion
        self.batch_size = batch_size
        self.device = device
        self.evaluator = Evaluator(criterion=self.criterion)

    def _train_batch(self, model, iterator, iteratorQuery, teacher_ratio,
                     clip):
        model.train()
        epoch_loss = 0
        for _, batch in enumerate(zip(iterator, iteratorQuery)):
            batch_ques, batch_query = batch
            src_ques, src_len_ques = batch_ques.src
            src_query, src_len_query = batch_query.src
            trg = batch_query.trg
            self.optimizer.zero_grad()
            input_trg = trg if model.name == RNN_NAME else trg[:, :-1]
            output = model(src_ques, src_len_ques, src_query, src_len_query,
                           input_trg, teacher_ratio)
            trg = trg.t() if model.name == RNN_NAME else trg[:, 1:]
            output = output.contiguous().view(-1, output.shape[-1])
            trg = trg.contiguous().view(-1)
            # output: (batch_size * trg_len) x output_dim
            # trg: (batch_size * trg_len)
            loss = self.criterion(output, trg)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
            self.optimizer.step()
            epoch_loss += loss.item()
        length = len(iterator)
        return epoch_loss / len(iterator)

    def _get_iterators(self, train_data, valid_data, model_name):
        return BucketIterator.splits((train_data, valid_data),
                                     repeat=False,
                                     batch_size=self.batch_size,
                                     sort_within_batch=False,
                                     sort_key=lambda x: len(x.src),
                                     device=self.device)

    def _epoch_time(self, start_time, end_time):
        elapsed_time = end_time - start_time
        elapsed_mins = int(elapsed_time / 60)
        elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
        return elapsed_mins, elapsed_secs

    def _log_epoch(self, train_loss, valid_loss, epoch, start_time, end_time):
        minutes, seconds = self._epoch_time(start_time, end_time)
        print(f'Epoch: {epoch+1:02} | Time: {minutes}m {seconds}s')
        print(
            f'\tTrain Loss: {train_loss:.3f} | Train PPL: {np.exp(train_loss):7.3f}'
        )
        print(
            f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {np.exp(valid_loss):7.3f}'
        )

    def _train_epoches(self, model, train_data, train_data_query, valid_data,
                       valid_data_query, num_of_epochs, teacher_ratio, clip):
        best_valid_loss = float('inf')
        # pylint: disable=unbalanced-tuple-unpacking
        train_iterator, valid_iterator = self._get_iterators(
            train_data, valid_data, model.name)
        train_iterator_query, valid_iterator_query = self._get_iterators(
            train_data_query, valid_data_query, model.name)
        for epoch in range(num_of_epochs):
            start_time = time.time()
            train_loss = self._train_batch(model, train_iterator,
                                           train_iterator_query, teacher_ratio,
                                           clip)
            valid_loss = self.evaluator.evaluate(model, valid_iterator,
                                                 valid_iterator_query,
                                                 teacher_ratio)
            end_time = time.time()
            self._log_epoch(train_loss, valid_loss, epoch, start_time,
                            end_time)
            if valid_loss < best_valid_loss:
                best_valid_loss = valid_loss
                Chechpoint.save(model)

    def train(self,
              model,
              train_data,
              train_data_query,
              valid_data,
              valid_data_query,
              num_of_epochs=20,
              teacher_ratio=1.0,
              clip=1):
        """Train model"""
        self._train_epoches(model, train_data, train_data_query, valid_data,
                            valid_data_query, num_of_epochs, teacher_ratio,
                            clip)
Beispiel #6
0
class Experiment():

    dataset         = None
    dataset_switch  = None
    
    model_manager   = None
    switch_manager  = None
    
    evaluator       = None
    force           = None
    
    def __init__(self, dataset_id, dataset_switch_id, force):
        self._init_dir()
        
        self.dataset        = DataSet(dataset_id = dataset_id, sframe = True)
        self.dataset_switch = DataSet(dataset_id = dataset_switch_id, sframe = False)
        
        self.model_manager  = ModelManager()
        self.switch_manager = SwitchManager()
        self.evaluator      = Evaluator()
        
        self.force          = force
        
    def _init_dir(self):
        import os
        from settings import DIR

        for d in DIR:
            if not os.path.exists(d):
                os.makedirs(d)
                
    def _train_rec_models(self):
        self.model_manager.train_models(dataset = self.dataset)
        
    def _test_rec_models(self):
        self.model_manager.test_models(dataset = self.dataset)
    
    def _evaluate_rec_models(self):
        self.model_manager.evaluate_models(dataset = self.dataset)
        
    def _create_datasets_switch(self):   
        self.dataset_switch.prepare_switch_dataset(dataset = self.dataset, model_manager = self.model_manager, 
                                                   force = self.force)
        
    def _train_switch(self):
        self.switch_manager.train_models(dataset_switch = self.dataset_switch, force = self.force)
        
    def _test_switch(self):
        self.switch_manager.rating_prediction_switches(dataset = self.dataset, dataset_switch = self.dataset_switch, 
                         model_manager = self.model_manager, force = self.force)
    
    def _evaluate(self):
        self.evaluator.evaluate(dataset = self.dataset, dataset_switch = self.dataset_switch, 
                                model_manager = self.model_manager, switch_manager = self.switch_manager, 
                                force = True)

    def run(self):
        self._train_rec_models()
        self._test_rec_models()
        self._evaluate_rec_models()
        self._create_datasets_switch()
        self._train_switch()
        self._test_switch()
        self._evaluate()
Beispiel #7
0
	lang = Lang()
	trans = U.tokenizeTranscripts('train')
	lang.init_lang(trans)
	output_size = lang.num_items

	dev_dataset = SpeechDataset(lang, 'dev')
	dev_dataloader = SpeechDataLoader(dev_dataset, batch_size=batch_size)

	num_layers = 3
	hidden_size = 256

	input_size = 40
	key_size = 128
	value_size = 128
	bidirectional = True
	p = 3

	embedding_size = 128

	encoder = EncoderRNN(input_size, hidden_size, key_size, value_size, num_layers, bidirectional, p)
	decoder = DecoderRNN(output_size, embedding_size, hidden_size, key_size, value_size, num_layers)

	teacher_forcing_ratio = 1.0
	las = LAS(encoder, decoder, teacher_forcing_ratio)

	criterion = nn.CrossEntropyLoss(size_average=False, ignore_index=C.PAD_TOKEN_IDX)

	evaluator = Evaluator(criterion)
	evaluator.evaluate(las, dev_dataloader)
Beispiel #8
0
                                   max_len,
                                   hidden_size,
                                   bidirectional=bidirectional,
                                   variable_lengths=True)

        dual_encoder = DualEncoder(context_encoder, response_encoder)
        if torch.cuda.is_available():
            dual_encoder.cuda()

        for param in dual_encoder.parameters():
            param.data.uniform_(-0.08, 0.08)

    # train
    t = SupervisedTrainer(loss_func=loss_func,
                          batch_size=1,
                          checkpoint_every=30,
                          print_every=100,
                          expt_dir=opt.expt_dir)

    t.train(dual_encoder,
            train,
            batch_size=1,
            num_epochs=20,
            dev_data=dev,
            optimizer=optimizer,
            resume=opt.resume)

    evaluator = Evaluator(batch_size=1)
    l, precision, recall = evaluator.evaluate(dual_encoder, dev)
    print("Precision: {}, Recall: {}".format(precision, recall))
class SupervisedTrainer(object):
    def __init__(self,
                 export_dir='experiment',
                 loss=NLLLoss(),
                 batch_size=64,
                 random_seed=None,
                 checkpoint_every=100,
                 print_every=100):
        self._trainer = "Simple Trainer"
        self.random_seed = random_seed

        if random_seed is not None:
            random.seed(random_seed)
            torch.manual_seed(random_seed)

        self.loss = loss
        self.evaluator = Evaluator(loss=self.loss, batch_size=batch_size)
        self.optimizer = None
        self.checkpoint_every = checkpoint_every
        self.print_every = print_every

        if not os.path.isabs(export_dir):
            export_dir = os.path.join(os.getcwd(), export_dir)
        self.export_dir = export_dir
        if not os.path.exists(self.export_dir):
            os.makedirs(self.export_dir)
        self.batch_size = batch_size
        self.logger = logging.getLogger(__name__)

    def _train_batch(self,
                     input_variable: torch.Tensor,
                     input_lengths,
                     target_variable,
                     model,
                     teacher_model=None,
                     teacher_forcing_ratio=.5):
        loss = self.loss
        loss.reset()

        input_batch_size = input_variable.size(0)
        input_seq_length = input_variable.size(1)
        target_batch_size = target_variable.size(0)
        target_seq_length = target_variable.size(1)

        if teacher_model is not None:
            tgt_arr = np.array([[model.sos_id]] * batch_size, 'i')
            tgt_var = torch.autograd.Variable(
                torch.LongTensor(tgt_arr).type(torch.LongTensor))

            transformer_output = model(input_variable, tgt_var)
            # transformer_output [batch_size,1,tgt_vocab_size]
            teacher_output, teacher_hidden, other = teacher_model(
                target_variable, target_variable.size(1))

        else:
            input_var = input_variable.view(input_seq_length, input_batch_size,
                                            1)
            target_var = target_variable.view(target_seq_length,
                                              target_batch_size, 1)
            aacc = input_lengths.size()
            decoder_output, other = model(input_var, target_var, input_lengths)

        for step, step_output in enumerate(decoder_output):

            if teacher_model is not None:
                # step_output =
                tgt = target_variable[:, step + 1].contiguous().view(-1)
                #f**k = teacher_output[-step].permute(2,1,0)
                loss.eval_batch(step_output, tgt, teacher_output[-step])

            else:
                liner_func = nn.Linear(128, 8)
                dec_out = liner_func(step_output)

                dec_out: torch.Tensor = F.log_softmax(dec_out, dim=1)
                loss.eval_batch(dec_out.view(input_batch_size, -1),
                                target_variable[:, step + 1])

        model.zero_grad()
        loss.backward()
        self.optimizer.step()

        return loss.get_loss()

    def _train_epoches(self,
                       data,
                       model,
                       teacher_model,
                       n_epochs,
                       start_epoch,
                       start_step,
                       dev_data,
                       teacher_forcing_ratio=0):
        log = self.logger
        print_loss_total = 0
        epoch_loss_total = 0

        device = None if torch.cuda.is_available() else -1
        batch_iterator = torchtext.data.BucketIterator(
            dataset=data,
            batch_size=self.batch_size,
            sort=False,
            sort_within_batch=True,
            sort_key=lambda x: len(x.src),
            device=device,
            repeat=False)

        step_per_epoch = len(batch_iterator)

        total_steps = step_per_epoch * n_epochs

        step = start_step
        step_elapsed = 0
        for epoch in range(start_epoch, n_epochs + 1):
            log.debug("Epoch: %d, Step: %d" % (epoch, step))

            batch_generator = batch_iterator.__iter__()
            for _ in range((epoch - 1) * step_per_epoch, step):
                next(batch_generator)

            model.train(True)
            for batch in batch_generator:
                step += 1
                step_elapsed += 1
                input_var, input_length = getattr(batch, 'src')
                target_var = getattr(batch, 'tgt')

                loss = self._train_batch(input_variable=input_var,
                                         input_lengths=input_length,
                                         target_variable=target_var,
                                         model=model,
                                         teacher_model=teacher_model)

                print_loss_total += loss
                epoch_loss_total += loss

                if step % self.print_every == 0 and step_elapsed > self.print_every:
                    print_loss_avg = print_loss_total / self.print_every
                    print_loss_total = 0
                    log_msg = 'Progress: %d%%, Train %s: %.4f' % (
                        step / total_steps * 100, self.loss.name,
                        print_loss_avg)
                    log.info(log_msg)
                # Checkpoint
                if step % self.checkpoint_every == 0 or step == total_steps:
                    Checkpoint(model=model,
                               optimizer=self.optimizer,
                               epoch=epoch,
                               step=step,
                               input_vocab=data.fields['src'].vocab,
                               output_vocab=data.fields['tgt'].vocab).save(
                                   self.export_dir)

            if step_elapsed == 0:
                continue

            epoch_loss_avg = epoch_loss_total / \
                min(step_per_epoch, step - start_step)
            epoch_loss_total = 0
            log_msg = "Finished epoch %d: Train %s: %.4f" % (
                epoch, self.loss.name, epoch_loss_avg)
            if dev_data is not None:
                dev_loss, accuracy = self.evaluator.evaluate(model, dev_data)
                self.optimizer.update(dev_loss, epoch)
                log_msg += ", Dev %s: %.4f, Accuracy: %.4f" % (
                    self.loss.name, dev_loss, accuracy)
                model.train(mode=True)
            else:
                self.optimizer.update(epoch_loss_avg, epoch)

            log.info(log_msg)

    def train(self,
              model,
              data,
              teacher_model=None,
              num_epochs=5,
              resume=False,
              dev_data=None,
              optimizer=None,
              teacher_forcing_ratio=0):

        if resume:
            latest_checkpoint_path = Checkpoint.get_latest_checkpoint(
                self.expt_dir)
            resume_checkpoint = Checkpoint.load(latest_checkpoint_path)

            model = resume_checkpoint.model
            self.optimizer = resume_checkpoint.optimizer

            resume_optim = self.optimizer.optimizer
            defaults = resume_optim.param_groups[0]
            defaults.pop('param', None)
            defaults.pop('initial_lr', None)
            self.optimizer.optimizer = resume_optim.__class__(
                model.parameters(), **defaults)

            start_epoch = resume_checkpoint.epoch
            step = resume_checkpoint.step

        else:
            start_epoch = 1
            step = 0
            if optimizer is None:
                optimizer = Optimizer(optim.Adam(model.parameters()),
                                      max_grad_norm=5)
            self.optimizer = optimizer

        self.logger.info("Optimizer: %s, Scheduler: %s" %
                         (self.optimizer.optimizer, self.optimizer.scheduler))

        self._train_epoches(data,
                            model,
                            teacher_model,
                            num_epochs,
                            start_epoch,
                            step,
                            dev_data=dev_data,
                            teacher_forcing_ratio=teacher_forcing_ratio)
        return model
import os
from options.test_options import TestOptions
from evaluator import train_classifier
from evaluator.evaluator import EvaluatorDataset
from evaluator.evaluator import Evaluator
from evaluator.dataset import ClassifierDataset
from torch.utils.data import DataLoader

if __name__=="__main__":
    opt = TestOptions().parse() 
    classifier_path = 'evaluator/checkpoints/latest_{}_resnet.pth'.format(opt.evaluate_mode)    
    if not os.path.exists(classifier_path):   
        train_classifier(mode=opt.evaluate_mode)
    training_data = ClassifierDataset(opt.evaluate_mode)
    dataset = EvaluatorDataset(opt)
    dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=4)
    evaluator = Evaluator(opt, num_classes=training_data.num_classes, text2label=training_data.text2label)
    
    for data in dataloader:  
        try:
            evaluator.evaluate(data)
            evaluator.record_current_results()
        except:
            print('!!!error!!! pass current data')
    evaluator.compute_final_results()
Beispiel #11
0
class Trainer:
    def __init__(self, criterion):
        super(Trainer, self).__init__()
        self.criterion = criterion
        self.evaluator = Evaluator(criterion)

    def _train_batch(self, model, input_variables, input_lengths,
                     target_variables):
        decoder_outputs, ret_dict = model(input_variables, input_lengths,
                                          target_variables)
        acc_loss = self.criterion(decoder_outputs.contiguous(),
                                  target_variables[1:, :].contiguous())
        acc_loss = acc_loss.view(
            target_variables.size(0) - 1, target_variables.size(1))
        acc_loss = acc_loss.sum(0).mean()

        self.optimizer.zero_grad()
        acc_loss.backward()

        params = itertools.chain.from_iterable(
            [group['params'] for group in self.optimizer.param_groups])
        torch.nn.utils.clip_grad_norm_(params, max_norm=self.max_grad_norm)
        self.optimizer.step()

        return acc_loss.data.item()

    def train(self, train_dataloader, dev_dataloader, model, lr, num_epochs):
        self.max_grad_norm = 5
        self.optimizer = torch.optim.Adam(model.parameters(), lr=lr)

        for epoch in range(9, num_epochs):
            model.train(True)

            epoch_loss = 0
            num_batches = len(train_dataloader.dataloader)

            for (batch_idx, data) in enumerate(train_dataloader.dataloader):
                input_variables, input_lengths, target_variables, target_lengths = data

                input_variables = U.var(
                    torch.from_numpy(input_variables).float())
                target_variables = U.var(
                    torch.from_numpy(target_variables).long())

                input_variables = input_variables.transpose(0, 1)
                target_variables = target_variables.transpose(0, 1)

                batch_loss = self._train_batch(model, input_variables,
                                               input_lengths, target_variables)
                epoch_loss += batch_loss

                if batch_idx % 50 == 0:
                    print("batch %d avg_loss %f" % (batch_idx, epoch_loss /
                                                    (batch_idx + 1)))

            print("epoch %d train_epoch_loss %f" %
                  (epoch, epoch_loss / num_batches))

            if epoch % 1 == 0:
                U.checkpoint(epoch, model)
            val_epoch_loss = self.evaluator.evaluate(model, dev_dataloader)
            print("epoch %d val_epoch_loss %f" % (epoch, val_epoch_loss))