コード例 #1
0
ファイル: module_net.py プロジェクト: rizar/CLOSURE
            def create_module():
                if num_inputs > 2:
                    raise Exception('Not implemented!')

                if use_film == 1:
                    return FiLMModule(shared_block, fn_idx)

                if use_film == 2:
                    separate_core_block = SharedFiLMedModule(
                        module_dim,
                        module_W,
                        kernel_size=module_kernel_size,
                        with_residual=module_residual)
                    return FiLMModule(separate_core_block, fn_idx)

                if use_simple_block:
                    # brutally simple concatentation block
                    # with 2 layers, no residual connection
                    return SimpleConcatBlock(module_dim,
                                             kernel_size=module_kernel_size)

                if num_inputs in [0, 1]:
                    return ResidualBlock(module_dim,
                                         kernel_size=module_kernel_size,
                                         with_residual=module_residual,
                                         with_batchnorm=module_batchnorm,
                                         shared_block=shared_block,
                                         post_linear=kl_loss)
                else:
                    return ConcatBlock(module_dim,
                                       kernel_size=module_kernel_size,
                                       with_residual=module_residual,
                                       with_batchnorm=module_batchnorm,
                                       shared_block=shared_block,
                                       post_linear=kl_loss)
コード例 #2
0
def build_cnn(feat_dim=(1024, 14, 14),
              res_block_dim=128,
              num_res_blocks=0,
              proj_dim=512,
              pooling='maxpool2'):
    C, H, W = feat_dim
    layers = []
    if num_res_blocks > 0:
        layers.append(nn.Conv2d(C, res_block_dim, kernel_size=3, padding=1))
        layers.append(nn.ReLU(inplace=True))
        C = res_block_dim
        for _ in range(num_res_blocks):
            layers.append(ResidualBlock(C))
    if proj_dim > 0:
        layers.append(nn.Conv2d(C, proj_dim, kernel_size=1, padding=0))
        layers.append(nn.ReLU(inplace=True))
        C = proj_dim
    if pooling == 'maxpool2':
        layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
        H, W = H // 2, W // 2
    return nn.Sequential(*layers), (C, H, W)
コード例 #3
0
ファイル: module_net.py プロジェクト: gabolsgabs/film
    def __init__(self,
                 vocab,
                 feature_dim=(1024, 14, 14),
                 stem_num_layers=2,
                 stem_batchnorm=False,
                 module_dim=128,
                 module_residual=True,
                 module_batchnorm=False,
                 classifier_proj_dim=512,
                 classifier_downsample='maxpool2',
                 classifier_fc_layers=(1024, ),
                 classifier_batchnorm=False,
                 classifier_dropout=0,
                 verbose=True):
        super(ModuleNet, self).__init__()

        self.stem = build_stem(feature_dim[0],
                               module_dim,
                               num_layers=stem_num_layers,
                               with_batchnorm=stem_batchnorm)
        if verbose:
            print('Here is my stem:')
            print(self.stem)

        num_answers = len(vocab['answer_idx_to_token'])
        module_H, module_W = feature_dim[1], feature_dim[2]
        self.classifier = build_classifier(module_dim,
                                           module_H,
                                           module_W,
                                           num_answers,
                                           classifier_fc_layers,
                                           classifier_proj_dim,
                                           classifier_downsample,
                                           with_batchnorm=classifier_batchnorm,
                                           dropout=classifier_dropout)
        if verbose:
            print('Here is my classifier:')
            print(self.classifier)
        self.stem_times = []
        self.module_times = []
        self.classifier_times = []
        self.timing = False

        self.function_modules = {}
        self.function_modules_num_inputs = {}
        self.vocab = vocab
        for fn_str in vocab['program_token_to_idx']:
            num_inputs = vr.programs.get_num_inputs(fn_str)
            self.function_modules_num_inputs[fn_str] = num_inputs
            if fn_str == 'scene' or num_inputs == 1:
                mod = ResidualBlock(module_dim,
                                    with_residual=module_residual,
                                    with_batchnorm=module_batchnorm)
            elif num_inputs == 2:
                mod = ConcatBlock(module_dim,
                                  with_residual=module_residual,
                                  with_batchnorm=module_batchnorm)
            self.add_module(fn_str, mod)
            self.function_modules[fn_str] = mod

        self.save_module_outputs = False
コード例 #4
0
    def __init__(self,
                 vocab,
                 feature_dim,
                 stem_num_layers,
                 stem_batchnorm,
                 stem_subsample_layers,
                 stem_kernel_size,
                 stem_stride,
                 stem_padding,
                 stem_dim,
                 module_dim,
                 module_kernel_size,
                 module_input_proj,
                 forward_func,
                 use_color,
                 module_residual=True,
                 module_batchnorm=False,
                 classifier_proj_dim=512,
                 classifier_downsample='maxpool2',
                 classifier_fc_layers=(1024, ),
                 classifier_batchnorm=False,
                 classifier_dropout=0,
                 use_film=False,
                 verbose=True):
        super().__init__()

        self.module_dim = module_dim
        self.func = FUNC_DICT[forward_func]
        self.use_color = use_color

        self.stem = build_stem(feature_dim[0],
                               stem_dim,
                               module_dim,
                               num_layers=stem_num_layers,
                               subsample_layers=stem_subsample_layers,
                               kernel_size=stem_kernel_size,
                               padding=stem_padding,
                               with_batchnorm=stem_batchnorm)
        tmp = self.stem(
            Variable(
                torch.zeros(
                    [1, feature_dim[0], feature_dim[1], feature_dim[2]])))
        module_H = tmp.size(2)
        module_W = tmp.size(3)

        self.coords = coord_map((module_H, module_W)).unsqueeze(0)

        if verbose:
            print('Here is my stem:')
            print(self.stem)

        num_answers = len(vocab['answer_idx_to_token'])
        self.classifier = build_classifier(module_dim,
                                           module_H,
                                           module_W,
                                           num_answers,
                                           classifier_fc_layers,
                                           classifier_proj_dim,
                                           classifier_downsample,
                                           with_batchnorm=classifier_batchnorm,
                                           dropout=classifier_dropout)
        if verbose:
            print('Here is my classifier:')
            print(self.classifier)

        self.unary_function_modules = {}
        self.binary_function_modules = {}
        self.vocab = vocab
        self.use_film = use_film

        if self.use_film:
            unary_mod = FiLMedResBlock(
                module_dim,
                with_residual=module_residual,
                with_intermediate_batchnorm=False,
                with_batchnorm=False,
                with_cond=[True, True],
                num_extra_channels=2,  # was 2 for original film,
                extra_channel_freq=1,
                with_input_proj=module_input_proj,
                num_cond_maps=0,
                kernel_size=module_kernel_size,
                batchnorm_affine=False,
                num_layers=1,
                condition_method='bn-film',
                debug_every=float('inf'))
            binary_mod = ConcatFiLMedResBlock(
                2,
                module_dim,
                with_residual=module_residual,
                with_intermediate_batchnorm=False,
                with_batchnorm=False,
                with_cond=[True, True],
                num_extra_channels=2,  #was 2 for original film,
                extra_channel_freq=1,
                with_input_proj=module_input_proj,
                num_cond_maps=0,
                kernel_size=module_kernel_size,
                batchnorm_affine=False,
                num_layers=1,
                condition_method='bn-film',
                debug_every=float('inf'))

            self.unary_function_modules['film'] = unary_mod
            self.binary_function_modules['film'] = binary_mod
            self.add_module('film_unary', unary_mod)
            self.add_module('film_binary', binary_mod)

        else:
            for fn_str in vocab['program_token_to_idx']:
                arity = self.vocab['program_token_arity'][fn_str]
                if arity == 2 and forward_func == 'tree':
                    binary_mod = ConcatBlock(module_dim,
                                             kernel_size=module_kernel_size,
                                             with_residual=module_residual,
                                             with_batchnorm=module_batchnorm,
                                             use_simple=False)

                    self.add_module(fn_str, binary_mod)
                    self.binary_function_modules[fn_str] = binary_mod

                else:
                    mod = ResidualBlock(module_dim,
                                        kernel_size=module_kernel_size,
                                        with_residual=module_residual,
                                        with_batchnorm=module_batchnorm)

                    self.add_module(fn_str, mod)
                    self.unary_function_modules[fn_str] = mod

        self.declare_film_coefficients()
コード例 #5
0
ファイル: module_net.py プロジェクト: dschaehi/film
  def __init__(self, vocab, feature_dim=(1024, 14, 14),
               stem_use_resnet=False,
               stem_resnet_fixed=False,
               resnet_model_stage=3,
               stem_num_layers=2,
               stem_batchnorm=False,
               stem_kernel_size=3,
               stem_stride=1,
               stem_stride2_freq=0,
               stem_padding=None,
               module_dim=128,
               module_residual=True,
               module_batchnorm=False,
               classifier_proj_dim=512,
               classifier_downsample='maxpool2',
               classifier_fc_layers=(1024,),
               classifier_batchnorm=False,
               classifier_dropout=0,
               verbose=True):
    super(ModuleNet, self).__init__()

    self.stem = build_stem(stem_use_resnet, stem_resnet_fixed, feature_dim[0], module_dim,
                           resnet_model_stage=resnet_model_stage, num_layers=stem_num_layers, with_batchnorm=stem_batchnorm,
                           kernel_size=stem_kernel_size, stride=stem_stride, stride2_freq=stem_stride2_freq, padding=stem_padding)
    if verbose:
      print('Here is my stem:')
      print(self.stem)

    if stem_stride2_freq > 0:
      module_H = feature_dim[1] // (2 ** (stem_num_layers // stem_stride2_freq))
      module_W = feature_dim[2] // (2 ** (stem_num_layers // stem_stride2_freq))
    else:
      module_H = feature_dim[1]
      module_W = feature_dim[2]

    num_answers = len(vocab['answer_idx_to_token'])
    self.classifier = build_classifier(module_dim, module_H, module_W, num_answers,
                                       classifier_fc_layers,
                                       classifier_proj_dim,
                                       classifier_downsample,
                                       with_batchnorm=classifier_batchnorm,
                                       dropout=classifier_dropout)
    if verbose:
      print('Here is my classifier:')
      print(self.classifier)
    self.stem_times = []
    self.module_times = []
    self.classifier_times = []
    self.timing = False

    self.function_modules = {}
    self.function_modules_num_inputs = dict(vocab['program_token_num_inputs'])
    self.vocab = vocab
    self.scene = None
    for fn_str in vocab['program_token_to_idx']:
      # num_inputs = vr.programs.get_num_inputs(fn_str)
      # self.function_modules_num_inputs[fn_str] = num_inputs
      num_inputs = self.function_modules_num_inputs[fn_str]
      if num_inputs == 0 and self.scene is None:
        self.scene = fn_str
      elif fn_str == 'scene':
        self.scene = fn_str
      if num_inputs == 0 or num_inputs == 1:
      # if fn_str == 'scene' or num_inputs == 1:
        mod = ResidualBlock(module_dim,
                with_residual=module_residual,
                with_batchnorm=module_batchnorm)
      elif num_inputs >= 2:
        mod = ConcatBlock(num_inputs, module_dim,
                with_residual=module_residual,
                with_batchnorm=module_batchnorm)
      self.add_module(fn_str, mod)
      self.function_modules[fn_str] = mod

    self.save_module_outputs = False
コード例 #6
0
ファイル: module_net.py プロジェクト: rizar/CLOSURE
    def __init__(self,
                 vocab,
                 feature_dim,
                 use_film,
                 use_simple_block,
                 stem_num_layers,
                 stem_batchnorm,
                 stem_subsample_layers,
                 stem_kernel_size,
                 stem_stride,
                 stem_padding,
                 stem_dim,
                 module_dim,
                 module_pool,
                 module_use_gammas,
                 module_kernel_size,
                 module_input_proj,
                 module_residual=True,
                 module_batchnorm=False,
                 module_num_layers=1,
                 mod_id_loss=False,
                 kl_loss=False,
                 learn_control=False,
                 rnn_dim=None,
                 classifier_proj_dim=512,
                 classifier_downsample='maxpool2',
                 classifier_fc_layers=(1024, ),
                 classifier_batchnorm=False,
                 classifier_dropout=0,
                 discriminator_proj_dim=None,
                 discriminator_downsample=None,
                 discriminator_fc_layers=None,
                 discriminator_dropout=None,
                 verbose=True,
                 type_anonymizer=False):
        super(ModuleNet, self).__init__()

        if discriminator_proj_dim is None:
            discriminator_proj_dim = classifier_proj_dim
        if discriminator_downsample is None:
            discriminator_downsample = classifier_downsample
        if discriminator_fc_layers is None:
            discriminator_fc_layers = classifier_fc_layers
        if discriminator_dropout is None:
            discriminator_dropout = classifier_dropout

        self.module_dim = module_dim
        self.use_film = use_film
        self.use_simple_block = use_simple_block
        self.mod_id_loss = mod_id_loss
        self.kl_loss = kl_loss
        self.learn_control = learn_control

        self.stem = build_stem(feature_dim[0],
                               stem_dim,
                               module_dim,
                               num_layers=stem_num_layers,
                               subsample_layers=stem_subsample_layers,
                               kernel_size=stem_kernel_size,
                               padding=stem_padding,
                               with_batchnorm=stem_batchnorm)
        tmp = self.stem(
            Variable(
                torch.zeros(
                    [1, feature_dim[0], feature_dim[1], feature_dim[2]])))
        module_H = tmp.size(2)
        module_W = tmp.size(3)

        self.coords = coord_map((module_H, module_W))

        if verbose:
            print('Here is my stem:')
            print(self.stem)

        classifier_kwargs = dict(module_C=module_dim,
                                 module_H=module_H,
                                 module_W=module_W,
                                 num_answers=len(vocab['answer_idx_to_token']),
                                 fc_dims=classifier_fc_layers,
                                 proj_dim=classifier_proj_dim,
                                 downsample=classifier_downsample,
                                 with_batchnorm=classifier_batchnorm,
                                 dropout=classifier_dropout)
        discriminator_kwargs = dict(module_C=module_dim,
                                    module_H=module_H,
                                    module_W=module_W,
                                    num_answers=len(
                                        vocab['program_idx_to_token']),
                                    fc_dims=discriminator_fc_layers,
                                    proj_dim=discriminator_proj_dim,
                                    downsample=discriminator_downsample,
                                    with_batchnorm=False,
                                    dropout=discriminator_dropout)
        if self.use_film:
            classifier_kwargs['module_H'] = 1
            classifier_kwargs['module_W'] = 1
            discriminator_kwargs['module_H'] = 1
            discriminator_kwargs['module_W'] = 1

        self.classifier = build_classifier(**classifier_kwargs)
        if self.mod_id_loss:
            self.module_identifier = build_classifier(**discriminator_kwargs)

        if verbose:
            print('Here is my classifier:')
            print(self.classifier)

        self.function_modules = {}
        self.function_modules_num_inputs = {}
        self.vocab = vocab

        shared_block = None
        if type_anonymizer:
            shared_block = ResidualBlock(module_dim,
                                         kernel_size=module_kernel_size,
                                         with_residual=module_residual,
                                         with_batchnorm=module_batchnorm)
        elif use_film == 1:
            assert module_W == module_H
            shared_block = SharedFiLMedModule(
                module_dim,
                kernel_size=module_kernel_size,
                num_layers=module_num_layers,
                with_residual=module_residual,
                pool=module_pool,
                use_gammas=module_use_gammas,
                post_linear=kl_loss,
                learn_embeddings=not learn_control)
        if shared_block:
            self.shared_block = shared_block
            self.add_module('shared', shared_block)

        for fn_str, fn_idx in vocab['program_token_to_idx'].items():
            num_inputs = vocab['program_token_arity'][fn_str]
            self.function_modules_num_inputs[fn_str] = num_inputs

            def create_module():
                if num_inputs > 2:
                    raise Exception('Not implemented!')

                if use_film == 1:
                    return FiLMModule(shared_block, fn_idx)

                if use_film == 2:
                    separate_core_block = SharedFiLMedModule(
                        module_dim,
                        module_W,
                        kernel_size=module_kernel_size,
                        with_residual=module_residual)
                    return FiLMModule(separate_core_block, fn_idx)

                if use_simple_block:
                    # brutally simple concatentation block
                    # with 2 layers, no residual connection
                    return SimpleConcatBlock(module_dim,
                                             kernel_size=module_kernel_size)

                if num_inputs in [0, 1]:
                    return ResidualBlock(module_dim,
                                         kernel_size=module_kernel_size,
                                         with_residual=module_residual,
                                         with_batchnorm=module_batchnorm,
                                         shared_block=shared_block,
                                         post_linear=kl_loss)
                else:
                    return ConcatBlock(module_dim,
                                       kernel_size=module_kernel_size,
                                       with_residual=module_residual,
                                       with_batchnorm=module_batchnorm,
                                       shared_block=shared_block,
                                       post_linear=kl_loss)

            mod = create_module()
            if mod is not None:
                self.add_module(fn_str, mod)
                self.function_modules[fn_str] = mod

        self.save_module_outputs = False
        self.noise_enabled = True

        if learn_control:
            self.controller = MACControl(30, rnn_dim, module_dim)
コード例 #7
0
    def __init__(self,
                 vocab,
                 feature_dim,
                 use_film,
                 use_simple_block,
                 sharing_patterns,
                 stem_num_layers,
                 stem_batchnorm,
                 stem_subsample_layers,
                 stem_kernel_size,
                 stem_stride,
                 stem_padding,
                 stem_dim,
                 module_dim,
                 module_kernel_size,
                 module_input_proj,
                 module_residual=True,
                 module_batchnorm=False,
                 classifier_proj_dim=512,
                 classifier_downsample='maxpool2',
                 classifier_fc_layers=(1024, ),
                 classifier_batchnorm=False,
                 classifier_dropout=0,
                 verbose=True):
        super(ModuleNet, self).__init__()

        self.module_dim = module_dim

        # should be 0 or 1 to indicate the use of film block or not (0 would bring you back to the original EE model)
        self.use_film = use_film
        # should be 0 or 1 to indicate if we are using ResNets or a simple 3x3 conv followed by ReLU
        self.use_simple_block = use_simple_block

        # this should be a list of two elements (either 0 or 1). It's only active if self.use_film == 1
        # The first element of 1 indicates the sharing of CNN weights in the film blocks, 0 otheriwse
        # The second element of 1 indicate the sharing of film coefficient in the film blocks, 0 otherwise
        # so [1,0] would be sharing the CNN weights while having different film coefficients for different modules in the program
        self.sharing_patterns = sharing_patterns

        self.stem = build_stem(feature_dim[0],
                               stem_dim,
                               module_dim,
                               num_layers=stem_num_layers,
                               subsample_layers=stem_subsample_layers,
                               kernel_size=stem_kernel_size,
                               padding=stem_padding,
                               with_batchnorm=stem_batchnorm)
        tmp = self.stem(
            Variable(
                torch.zeros(
                    [1, feature_dim[0], feature_dim[1], feature_dim[2]])))
        module_H = tmp.size(2)
        module_W = tmp.size(3)

        self.coords = coord_map((module_H, module_W))

        if verbose:
            print('Here is my stem:')
            print(self.stem)

        num_answers = len(vocab['answer_idx_to_token'])
        self.classifier = build_classifier(module_dim,
                                           module_H,
                                           module_W,
                                           num_answers,
                                           classifier_fc_layers,
                                           classifier_proj_dim,
                                           classifier_downsample,
                                           with_batchnorm=classifier_batchnorm,
                                           dropout=classifier_dropout)
        if verbose:
            print('Here is my classifier:')
            print(self.classifier)
        self.stem_times = []
        self.module_times = []
        self.classifier_times = []
        self.timing = False

        self.function_modules = {}
        self.function_modules_num_inputs = {}
        self.fn_str_2_filmId = {}
        self.vocab = vocab
        for fn_str in vocab['program_token_to_idx']:
            num_inputs = vocab['program_token_arity'][fn_str]
            self.function_modules_num_inputs[fn_str] = num_inputs

            if self.use_film:
                if self.sharing_patterns[1] == 1:
                    self.fn_str_2_filmId[fn_str] = 0
                else:
                    self.fn_str_2_filmId[fn_str] = len(self.fn_str_2_filmId)

            if fn_str == 'scene' or num_inputs == 1:
                if self.use_film:
                    if self.sharing_patterns[0] == 1:
                        mod = None
                    else:
                        mod = FiLMedResBlock(
                            module_dim,
                            with_residual=module_residual,
                            with_intermediate_batchnorm=False,
                            with_batchnorm=False,
                            with_cond=[True, True],
                            num_extra_channels=2,  # was 2 for original film,
                            extra_channel_freq=1,
                            with_input_proj=module_input_proj,
                            num_cond_maps=0,
                            kernel_size=module_kernel_size,
                            batchnorm_affine=False,
                            num_layers=1,
                            condition_method='bn-film',
                            debug_every=float('inf'))
                else:
                    if self.use_simple_block:
                        mod = SimpleVisualBlock(module_dim,
                                                kernel_size=module_kernel_size)
                    else:
                        mod = ResidualBlock(module_dim,
                                            kernel_size=module_kernel_size,
                                            with_residual=module_residual,
                                            with_batchnorm=module_batchnorm)
            elif num_inputs == 2:
                if self.use_film:
                    if self.sharing_patterns[0] == 1:
                        mod = None
                    else:
                        mod = ConcatFiLMedResBlock(
                            2,
                            module_dim,
                            with_residual=module_residual,
                            with_intermediate_batchnorm=False,
                            with_batchnorm=False,
                            with_cond=[True, True],
                            num_extra_channels=2,  #was 2 for original film,
                            extra_channel_freq=1,
                            with_input_proj=module_input_proj,
                            num_cond_maps=0,
                            kernel_size=module_kernel_size,
                            batchnorm_affine=False,
                            num_layers=1,
                            condition_method='bn-film',
                            debug_every=float('inf'))
                else:
                    mod = ConcatBlock(module_dim,
                                      kernel_size=module_kernel_size,
                                      with_residual=module_residual,
                                      with_batchnorm=module_batchnorm)
            else:
                raise Exception('Not implemented!')

            if mod is not None:
                self.add_module(fn_str, mod)
                self.function_modules[fn_str] = mod

        if self.use_film and self.sharing_patterns[0] == 1:
            mod = ConcatFiLMedResBlock(
                2,
                module_dim,
                with_residual=module_residual,
                with_intermediate_batchnorm=False,
                with_batchnorm=False,
                with_cond=[True, True],
                num_extra_channels=2,  #was 2 for original film,
                extra_channel_freq=1,
                with_input_proj=module_input_proj,
                num_cond_maps=0,
                kernel_size=module_kernel_size,
                batchnorm_affine=False,
                num_layers=1,
                condition_method='bn-film',
                debug_every=float('inf'))
            self.add_module('shared_film', mod)
            self.function_modules['shared_film'] = mod

        self.declare_film_coefficients()

        self.save_module_outputs = False