Beispiel #1
0
    def __init__(self, args):
        self.args = args

        self.cuda = (args.cuda and torch.cuda.is_available())
        self.epoch = args.epoch
        self.batch_size = args.batch_size
        self.lr = args.lr
        self.eps = 1e-9
        self.K = args.K
        self.beta = args.beta
        self.num_avg = args.num_avg
        self.global_iter = 0
        self.global_epoch = 0

        # Network & Optimizer
        self.toynet = cuda(ToyNet(self.K), self.cuda)
        self.toynet.weight_init()
        self.toynet_ema = Weight_EMA_Update(cuda(ToyNet(self.K), self.cuda),\
                self.toynet.state_dict(), decay=0.999)

        self.optim = optim.Adam(self.toynet.parameters(),
                                lr=self.lr,
                                betas=(0.5, 0.999))
        self.scheduler = lr_scheduler.ExponentialLR(self.optim, gamma=0.97)

        self.ckpt_dir = Path(args.ckpt_dir).joinpath(args.env_name)
        if not self.ckpt_dir.exists():
            self.ckpt_dir.mkdir(parents=True, exist_ok=True)
        self.load_ckpt = args.load_ckpt
        if self.load_ckpt != '': self.load_checkpoint(self.load_ckpt)

        # History
        self.history = dict()
        self.history['avg_acc'] = 0.
        self.history['info_loss'] = 0.
        self.history['class_loss'] = 0.
        self.history['total_loss'] = 0.
        self.history['epoch'] = 0
        self.history['iter'] = 0

        # Tensorboard
        self.tensorboard = args.tensorboard
        if self.tensorboard:
            self.env_name = args.env_name
            self.summary_dir = Path(args.summary_dir).joinpath(args.env_name)
            if not self.summary_dir.exists():
                self.summary_dir.mkdir(parents=True, exist_ok=True)
            self.tf = SummaryWriter(log_dir=self.summary_dir)
            self.tf.add_text(tag='argument',
                             text_string=str(args),
                             global_step=self.global_epoch)

        # Dataset
        self.data_loader = return_data(args)
Beispiel #2
0
    def __init__(self, args):
        self.args = args
        # Basic
        self.cuda = (args.cuda and torch.cuda.is_available())
        self.epoch = args.epoch
        self.batch_size = args.batch_size
        self.lr = args.lr
        self.y_dim = args.y_dim  # MNIST and CIFAR10 have class 10
        self.target = args.target  # if you want to give pertubation to specific class then use it
        self.dataset = args.dataset
        self.data_loader = return_data(args)
        self.global_epoch = 0
        self.global_iter = 0
        self.print_ = not args.silent
        self.env_name = args.env_name  # experiment name
        self.visdom = args.visdom  # I have installed it but don't use it
        self.ckpt_dir = Path(args.ckpt_dir)
        self.save_ckpt_dir = Path('./checkpoints/' + args.env_name)
        print(self.save_ckpt_dir)
        if not self.ckpt_dir.exists():
            self.ckpt_dir.mkdir(parents=True, exist_ok=True)
        if not self.save_ckpt_dir.exists():
            self.save_ckpt_dir.mkdir(parents=True, exist_ok=True)
        self.output_dir = Path(args.output_dir).joinpath(args.env_name)
        if not self.output_dir.exists():
            self.output_dir.mkdir(parents=True, exist_ok=True)

        # Visualization Tools
        self.visualization_init(args)

        # Histories
        self.history = dict()
        self.history['acc'] = 0.
        self.history['epoch'] = 0
        self.history['iter'] = 0

        # Models & Optimizers
        self.model_init(args)
        self.load_ckpt = args.load_ckpt
        if args.load_ckpt_flag == True and self.load_ckpt != '':
            self.load_checkpoint(self.load_ckpt)

        # Adversarial Perturbation Generator
        #criterion = cuda(torch.nn.CrossEntropyLoss(), self.cuda)
        criterion = F.cross_entropy
        self.attack_mode = args.attack_mode
        if self.attack_mode == 'FGSM':
            self.attack = Attack(self.net, criterion=criterion)
        elif self.attack_mode == 'ILLC':
            self.attack = Attack(self.net, criterion=criterion)
  def get_dataset_config(self, config):
    self.data_loader = return_data(config)
    self.ignore_index = config.get('ignore_index', -100)

    self.n_visual_class = self.data_loader['train']\
                          .dataset.preprocessor.num_visual_words
    self.n_phone_class = self.data_loader['train'].dataset.preprocessor.num_tokens
    self.visual_words = self.data_loader['train'].dataset.preprocessor.visual_words
    self.phone_set = self.data_loader['train'].dataset.preprocessor.tokens
    self.max_feat_len = self.data_loader['train'].dataset.max_feat_len
    self.max_word_len = self.data_loader['train'].dataset.max_word_len
    self.max_normalize = config.get('max_normalize', False)
    print(f'Number of visual label classes = {self.n_visual_class}')
    print(f'Number of phone classes = {self.n_phone_class}')
    print(f'Max normalized: {self.max_normalize}')
Beispiel #4
0
  def get_dataset_config(self, config):
    self.data_loader = return_data(config)
    self.ignore_index = config.get('ignore_index', -100)

    self.n_visual_class = self.data_loader['train']\
                          .dataset.preprocessor.num_visual_words
    self.n_phone_class = self.data_loader['train'].dataset.preprocessor.num_tokens
    self.visual_words = self.data_loader['train'].dataset.preprocessor.visual_words
    self.phone_set = self.data_loader['train'].dataset.preprocessor.tokens
    self.max_feat_len = self.data_loader['train'].dataset.max_feat_len
    self.max_word_len = self.data_loader['train'].dataset.max_word_len
    self.max_segment_num = self.data_loader['train'].dataset.max_segment_num
    self.n_clusters = config.get("n_clusters", self.n_phone_class)
    print(f'Number of visual label classes = {self.n_visual_class}')
    print(f'Number of phone classes = {self.n_phone_class}')
    print(f'Number of clusters = {self.n_clusters}')
Beispiel #5
0
    def __init__(self, args):
        self.args = args

        # Basic
        self.cuda = (args.cuda and torch.cuda.is_available())
        self.epoch = args.epoch
        self.batch_size = args.batch_size
        self.eps = args.eps
        self.lr = args.lr
        self.y_dim = args.y_dim
        self.target = args.target
        self.dataset = args.dataset
        self.data_loader = return_data(args)
        self.global_epoch = 0
        self.global_iter = 0
        self.print_ = not args.silent

        self.env_name = args.env_name
        self.tensorboard = args.tensorboard
        self.visdom = args.visdom

        self.ckpt_dir = Path(args.ckpt_dir).joinpath(args.env_name)
        if not self.ckpt_dir.exists():
            self.ckpt_dir.mkdir(parents=True, exist_ok=True)
        self.output_dir = Path(args.output_dir).joinpath(args.env_name)
        if not self.output_dir.exists():
            self.output_dir.mkdir(parents=True, exist_ok=True)

        # Visualization Tools
        self.visualization_init(args)

        # Histories
        self.history = dict()
        self.history['acc'] = 0.
        self.history['epoch'] = 0
        self.history['iter'] = 0

        # Models & Optimizers
        self.model_init(args)
        self.load_ckpt = args.load_ckpt
        if self.load_ckpt != '':
            self.load_checkpoint(self.load_ckpt)

        # Adversarial Perturbation Generator
        #criterion = cuda(torch.nn.CrossEntropyLoss(), self.cuda)
        criterion = F.cross_entropy
        self.attack = Attack(self.net, criterion=criterion)
Beispiel #6
0
    def __init__(self, args):
        self.args = args

        self.device = torch.device(
            args.device if torch.cuda.is_available() else 'cpu')
        self.epoch = args.epoch
        self.batch_size = args.batch_size
        self.lr = args.lr
        self.eps = 1e-9
        self.K = args.K
        self.beta = args.beta
        self.num_avg = args.num_avg
        self.global_iter = 0
        self.global_epoch = 0

        # Network & Optimizer
        self.toynet = ToyNet(self.K).to(self.device)
        self.toynet.weight_init()
        self.toynet_ema = Weight_EMA_Update(ToyNet(self.K).to(self.device),
                                            self.toynet.state_dict(),
                                            decay=0.999)

        self.optim = optim.Adam(self.toynet.parameters(),
                                lr=self.lr,
                                betas=(0.5, 0.999))
        self.scheduler = lr_scheduler.ExponentialLR(self.optim, gamma=0.97)

        self.ckpt_dir = Path(args.ckpt_dir)
        if not self.ckpt_dir.exists():
            self.ckpt_dir.mkdir(parents=True, exist_ok=True)
        self.load_ckpt = args.load_ckpt
        if self.load_ckpt != '':
            self.load_checkpoint(self.load_ckpt)

        # History
        self.history = dict()
        self.history['avg_acc'] = 0.
        self.history['info_loss'] = 0.
        self.history['class_loss'] = 0.
        self.history['total_loss'] = 0.
        self.history['epoch'] = 0
        self.history['iter'] = 0

        # Dataset
        self.data_loader = return_data(args)
Beispiel #7
0
    def __init__(self, config):
        self.config = config

        self.cuda = torch.cuda.is_available()
        self.epoch = config.epoch
        self.batch_size = config.batch_size
        self.beta = config.beta
        self.lr = config.lr
        self.anneal_rate = config.get('anneal_rate', 3e-6)
        self.num_sample = config.get('num_sample', 1)
        self.use_segment = config.get('use_segment', False)
        self.ds_method = config.get('downsample_method', 'average')
        self.eps = 1e-9
        if config.audio_feature == 'mfcc':
            self.input_size = 80
        elif config.audio_feature == 'cpc':
            self.input_size = 256
        else:
            Exception(
                f'Audio feature type {config.audio_feature_type} not supported'
            )

        if self.use_segment and (
                self.ds_method == 'resample'
        ):  # input size is the concatenation of 4 frames for resample
            self.input_size = 4 * self.input_size

        self.K = config.K
        self.global_iter = 0
        self.global_epoch = 0
        self.audio_feature = config.audio_feature
        self.image_feature = config.image_feature
        self.debug = config.debug
        self.dataset = config.dataset

        # Dataset
        self.data_loader = return_data(config)
        self.n_class = self.data_loader['train']\
                       .dataset.preprocessor.num_tokens
        self.class_names = self.data_loader['train']\
                           .dataset.preprocessor.tokens
        print(f'visual label class: {self.n_class}')

        self.image_net = Resnet34(pretrained=True, n_class=self.n_class)
        '''
      self.image_net.load_state_dict(
                       torch.load(
                         os.path.join(config.image_model_dir,
                                      'best_image_model.pth'
                         )
                       )
                     ) 
      '''
        self.image_net = cuda(self.image_net, self.cuda)

        if config.model_type == 'blstm':
            self.audio_net = cuda(
                GumbelBLSTM(self.K,
                            input_size=self.input_size,
                            n_layers=1,
                            n_class=self.n_class,
                            ds_ratio=1,
                            bidirectional=True), self.cuda)
            self.K = 2 * self.K
        elif config.model_type == 'mlp':
            self.audio_net = cuda(
                GumbelMLP(self.K,
                          input_size=self.input_size,
                          n_class=self.n_class), self.cuda)
        else:
            Exception(f'Model type {config.model_type} not defined')

        self.position_model = cuda(
            PositionPredictor(input_size=self.K,
                              vocab_size=self.n_class,
                              embedding_size=50), self.cuda)

        trainables = [p for p in self.audio_net.parameters()]
        self.optim = optim.Adam(trainables, lr=self.lr, betas=(0.5, 0.999))
        self.scheduler = lr_scheduler.ExponentialLR(self.optim, gamma=0.97)

        self.ckpt_dir = Path(config.ckpt_dir)
        if not self.ckpt_dir.exists():
            self.ckpt_dir.mkdir(parents=True, exist_ok=True)
        self.load_ckpt = config.load_ckpt
        if self.load_ckpt:
            self.load_checkpoint()

        # History
        self.history = dict()
        self.history['f1'] = 0.
        self.history['token_f1'] = 0.
        self.history['abx'] = 0.5
        self.history['total_loss'] = 0.
        self.history['avg_loss'] = 0.
        self.history['epoch'] = 0
        self.history['iter'] = 0
    def __init__(self, config):
        self.config = config

        self.cuda = torch.cuda.is_available()
        self.beta = 1.  # XXX
        self.epoch = config.epoch
        self.batch_size = config.batch_size
        self.lr = config.lr
        self.n_layers = config.get('num_layers', 3)
        self.eps = 1e-9
        if config.audio_feature == 'mfcc':
            self.audio_feature_net = None
            self.input_size = 80
            self.hop_len_ms = 10
        elif config.audio_feature == 'wav2vec2':
            self.audio_feature_net = cuda(
                fairseq.checkpoint_utils.load_model_ensemble_and_task(
                    [config.wav2vec_path])[0][0], self.cuda)
            for p in self.audio_feature_net.parameters():
                p.requires_grad = False
            self.input_size = 512
            self.hop_len_ms = 20
        elif config.audio_feature == 'cpc':
            self.audio_feature_net = None
            self.input_size = 256
            self.hop_len_ms = 10
        else:
            raise ValueError(
                f"Feature type {config.audio_feature} not supported")

        self.K = config.K
        self.global_iter = 0
        self.global_epoch = 0
        self.audio_feature = config.audio_feature
        self.image_feature = config.image_feature
        self.debug = config.debug
        self.dataset = config.dataset
        self.max_normalize = config.get('max_normalize', False)
        self.loss_type = config.get('loss_type', 'macro_token_floss')
        self.beta_f_measure = config.get('beta_f_measure', 0.3)
        self.weight_word_loss = config.get('weight_word_loss', 1.0)
        self.weight_phone_loss = config.get('weight_phone_loss', 0.0)
        self.ckpt_dir = Path(config.ckpt_dir)
        if not self.ckpt_dir.exists():
            self.ckpt_dir.mkdir(parents=True, exist_ok=True)

        if self.loss_type == 'macro_token_floss':
            self.criterion = MacroTokenFLoss(beta=self.beta_f_measure)
        elif self.loss_type == 'binary_cross_entropy':
            self.criterion = nn.BCELoss()
        else:
            raise ValueError(f'Invalid loss type {self.loss_type}')

        # Dataset
        self.data_loader = return_data(config)
        self.ignore_index = config.get('ignore_index', -100)
        self.n_visual_class = self.data_loader['train']\
                              .dataset.preprocessor.num_visual_words
        self.n_phone_class = self.data_loader[
            'train'].dataset.preprocessor.num_tokens
        self.visual_words = self.data_loader[
            'train'].dataset.preprocessor.visual_words
        self.phone_set = self.data_loader['train'].dataset.preprocessor.tokens
        self.max_feat_len = self.data_loader['train'].dataset.max_feat_len
        self.max_word_len = self.data_loader['train'].dataset.max_word_len
        print(f'Number of visual label classes = {self.n_visual_class}')
        print(f'Number of phone classes = {self.n_phone_class}')
        print(f'Max normalized: {self.max_normalize}')

        self.audio_net = cuda(
            BLSTM(self.K,
                  n_layers=self.n_layers,
                  n_class=self.n_phone_class,
                  input_size=self.input_size,
                  ds_ratio=1,
                  bidirectional=True), self.cuda)

        self.phone_net = cuda(
            HMMPronunciator(self.visual_words,
                            self.phone_set,
                            config=config,
                            ignore_index=self.ignore_index), self.cuda)
        self.phone_net.train_model()
        self.align_net = cuda(LinearPositionAligner(scale=0.),
                              self.cuda)  # XXX

        trainables = [p for p in self.audio_net.parameters()]
        optim_type = config.get('optim', 'adam')
        if optim_type == 'sgd':
            self.optim = optim.SGD(trainables, lr=self.lr)
        else:
            self.optim = optim.Adam(trainables, lr=self.lr, betas=(0.5, 0.999))
        self.scheduler = lr_scheduler.ExponentialLR(self.optim, gamma=0.97)
        self.load_ckpt = config.load_ckpt
        if self.load_ckpt or config.mode in ['test', 'cluster']:
            self.load_checkpoint()

        # History
        self.history = dict()
        self.history['token_f1'] = 0.
        self.history['visual_token_f1'] = 0.
        self.history['loss'] = 0.
        self.history['epoch'] = 0
        self.history['iter'] = 0
    def __init__(self, config):
        self.config = config

        self.cuda = torch.cuda.is_available()
        self.epoch = config.epoch
        self.batch_size = config.batch_size
        self.beta = config.beta
        self.lr = config.lr
        self.n_layers = config.get('num_layers', 1)
        self.weight_word_loss = config.get('weight_word_loss', 1.)
        self.weight_phone_loss = config.get('weight_phone_loss', 0.)
        self.weight_evidence = config.get('weight_evidence', 1.)
        self.anneal_rate = config.get('anneal_rate', 3e-6)
        self.num_sample = config.get('num_sample', 1)
        self.eps = 1e-9
        if config.audio_feature == 'mfcc':
            self.audio_feature_net = None
            self.input_size = 80
            self.hop_len_ms = 10
        elif config.audio_feature == 'wav2vec2':
            self.audio_feature_net = cuda(
                fairseq.checkpoint_utils.load_model_ensemble_and_task(
                    [config.wav2vec_path])[0][0], self.cuda)
            for p in self.audio_feature_net.parameters():
                p.requires_grad = False
            self.input_size = 512
            self.hop_len_ms = 20
        else:
            raise ValueError(
                f"Feature type {config.audio_feature} not supported")

        self.K = config.K
        self.global_iter = 0
        self.global_epoch = 0
        self.audio_feature = config.audio_feature
        self.image_feature = config.image_feature
        self.debug = config.debug
        self.dataset = config.dataset

        # Dataset
        self.data_loader = return_data(config)
        self.ignore_index = config.get('ignore_index', -100)
        self.n_visual_class = self.data_loader['train']\
                              .dataset.preprocessor.num_visual_words
        self.n_phone_class = self.data_loader[
            'train'].dataset.preprocessor.num_tokens
        self.visual_words = self.data_loader[
            'train'].dataset.preprocessor.visual_words
        print(f'Number of visual label classes = {self.n_visual_class}')

        output_size = self.n_visual_class + 2 * self.input_size
        if self.weight_phone_loss > 0:
            output_size += self.n_phone_class
        self.audio_net = cuda(
            GaussianBLSTM(self.K,
                          input_size=self.input_size,
                          n_layers=self.n_layers,
                          n_class=output_size,
                          ds_ratio=1,
                          bidirectional=True), self.cuda)
        trainables = [p for p in self.audio_net.parameters()]
        optim_type = config.get('optim', 'adam')
        if optim_type == 'sgd':
            self.optim = optim.SGD(trainables, lr=self.lr)
        else:
            self.optim = optim.Adam(trainables, lr=self.lr, betas=(0.5, 0.999))
        self.scheduler = lr_scheduler.ExponentialLR(self.optim, gamma=0.97)

        # History
        self.history = dict()
        self.history['acc'] = 0.
        self.history['token_f1'] = 0.
        self.history['loss'] = 0.
        self.history['epoch'] = 0
        self.history['iter'] = 0
        self.history['temp'] = 1.

        self.ckpt_dir = Path(config.ckpt_dir)
        if not self.ckpt_dir.exists():
            self.ckpt_dir.mkdir(parents=True, exist_ok=True)
        self.load_ckpt = config.load_ckpt
        if self.load_ckpt or config.mode in ['test', 'cluster']:
            self.load_checkpoint()