Example #1
0
def build_cnn(feat_dim=(1024, 14, 14),
              res_block_dim=128,
              num_res_blocks=0,
              proj_dim=512,
              pooling='maxpool2'):
    C, H, W = feat_dim
    layers = []
    if num_res_blocks > 0:
        layers.append(
            tf.keras.layers.Conv2D(C,
                                   res_block_dim,
                                   kernel_size=(3, 3),
                                   padding=1))
        layers.append(nn.relu())
        C = res_block_dim
        for _ in range(num_res_blocks):
            layers.append(ResidualBlock(C))
    if proj_dim > 0:
        layers.append(
            tf.keras.layers.Conv2D(C, proj_dim, kernel_size=(1, 1), padding=0))
        layers.append(tf.keras.layers.ReLU())
        C = proj_dim
    if pooling == 'maxpool2':
        layers.append(tf.keras.layers.MaxPool2D(kernel_size=(2, 2), stride=2))
        H, W = H // 2, W // 2

    model = tf.keras.Sequential()
    for layer in layers:
        model.add(layer)
    return model, (C, H, W)
Example #2
0
 def __init__(self, dim, with_residual=True, with_batchnorm=True):
     super(ConcatBlock, self).__init__()
     self.proj = tf.keras.layers.Conv2D(dim,
                                        kernel_size=(1, 1),
                                        padding='same')
     self.res_block = ResidualBlock(dim,
                                    with_residual=with_residual,
                                    with_batchnorm=with_batchnorm)
Example #3
0
  def __init__(self, vocab, feature_dim=(1024, 14, 14),
               stem_num_layers=2,
               stem_batchnorm=False,
               module_dim=128,
               module_residual=True,
               module_batchnorm=False,
               classifier_proj_dim=512,
               classifier_downsample='maxpool2',
               classifier_fc_layers=(1024,),
               classifier_batchnorm=False,
               classifier_dropout=0,
               verbose=True):
    super(ModuleNet, self).__init__()


    self.stem = build_stem(feature_dim[0], module_dim,
                           num_layers=stem_num_layers,
                           with_batchnorm=stem_batchnorm)
    if verbose:
      print('Here is my stem:')
      print(self.stem)

    num_answers = len(vocab['answer_idx_to_token'])
    module_H, module_W = feature_dim[1], feature_dim[2]
    self.classifier = build_classifier(module_dim, module_H, module_W, num_answers,
                                       classifier_fc_layers,
                                       classifier_proj_dim,
                                       classifier_downsample,
                                       with_batchnorm=classifier_batchnorm,
                                       dropout=classifier_dropout)
    if verbose:
      print('Here is my classifier:')
      print(self.classifier)
    self.stem_times = []
    self.module_times = []
    self.classifier_times = []
    self.timing = False

    self.function_modules = {}
    self.function_modules_num_inputs = {}
    self.vocab = vocab
    for fn_str in vocab['program_token_to_idx']:
      num_inputs = iep.programs.get_num_inputs(fn_str)
      self.function_modules_num_inputs[fn_str] = num_inputs
      if fn_str == 'scene' or num_inputs == 1:
        mod = ResidualBlock(module_dim,
                with_residual=module_residual,
                with_batchnorm=module_batchnorm)
      elif num_inputs == 2:
        mod = ConcatBlock(module_dim,
                with_residual=module_residual,
                with_batchnorm=module_batchnorm)
      self.add_module(fn_str, mod)
      self.function_modules[fn_str] = mod

    self.save_module_outputs = False
Example #4
0
def build_classifier(module_C,
                     module_H,
                     module_W,
                     fc_dims=[],
                     proj_dim=None,
                     downsample='maxpool2',
                     with_batchnorm=True,
                     dropout=0):

    res_block = ResidualBlock(module_C,
                              with_residual=True,
                              with_batchnorm=False)
    layers = [res_block]

    layers.append(nn.Conv2d(module_C, module_C, kernel_size=1))
    if with_batchnorm:
        layers.append(nn.BatchNorm2d(module_dim))
    layers.append(nn.ReLU(inplace=True))

    upsample = nn.Upsample(size=[320, 320], mode='bilinear')

    layers.append(upsample)
    if with_batchnorm:
        layers.append(nn.BatchNorm2d(module_dim))
    layers.append(nn.ReLU(inplace=True))

    layers.append(nn.Conv2d(module_C, module_C, kernel_size=1))
    if with_batchnorm:
        layers.append(nn.BatchNorm2d(module_dim))
    layers.append(nn.ReLU(inplace=True))

    layers.append(nn.Conv2d(module_C, module_C // 4, kernel_size=1))
    if with_batchnorm:
        layers.append(nn.BatchNorm2d(module_dim))
    layers.append(nn.ReLU(inplace=True))

    layers.append(nn.Conv2d(module_C // 4, 4, kernel_size=1))
    if with_batchnorm:
        layers.append(nn.BatchNorm2d(module_dim))
    layers.append(nn.ReLU(inplace=True))

    layers.append(nn.Conv2d(4, 2, kernel_size=1))
    if with_batchnorm:
        layers.append(nn.BatchNorm2d(module_dim))

    return nn.Sequential(*layers)
Example #5
0
def build_cnn(feat_dim=(1024, 14, 14),
              res_block_dim=128,
              num_res_blocks=0,
              proj_dim=512,
              pooling='maxpool2'):
    C, H, W = feat_dim
    layers = []
    if num_res_blocks > 0:
        layers.append(nn.Conv2d(C, res_block_dim, kernel_size=3, padding=1))
        layers.append(nn.ReLU(inplace=True))
        C = res_block_dim
        for _ in range(num_res_blocks):
            layers.append(ResidualBlock(C))
    if proj_dim > 0:
        layers.append(nn.Conv2d(C, proj_dim, kernel_size=1, padding=0))
        layers.append(nn.ReLU(inplace=True))
        C = proj_dim
    if pooling == 'maxpool2':
        layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
        H, W = H // 2, W // 2
    return nn.Sequential(*layers), (C, H, W)
Example #6
0
    def __init__(self,
                 vocab,
                 feature_dim=(1024, 14, 14),
                 stem_num_layers=2,
                 stem_batchnorm=False,
                 module_dim=128,
                 text_dim=1,
                 module_residual=True,
                 module_batchnorm=False,
                 classifier_proj_dim=512,
                 classifier_downsample='maxpool2',
                 classifier_fc_layers=(1024, ),
                 classifier_batchnorm=False,
                 classifier_dropout=0,
                 verbose=True):
        super(ModuleNet, self).__init__()

        self.stem = build_stem(feature_dim[0],
                               module_dim,
                               num_layers=stem_num_layers,
                               with_batchnorm=stem_batchnorm)
        if verbose:
            print('Here is my stem:')
            print(self.stem)
        self.char_lstm = nn.LSTM(input_size=28,
                                 hidden_size=98,
                                 bidirectional=True,
                                 batch_first=True)
        encoder_layer = nn.TransformerEncoderLayer(d_model=28, nhead=7)
        self.char_transformer = nn.TransformerEncoder(
            encoder_layer=encoder_layer, num_layers=3)
        self.char_linear = nn.Linear(28, 196)
        num_answers = len(vocab['answer_idx_to_token'])
        module_H, module_W = feature_dim[1], feature_dim[2]
        self.classifier = build_classifier(module_dim + text_dim,
                                           module_H,
                                           module_W,
                                           num_answers,
                                           classifier_fc_layers,
                                           classifier_proj_dim,
                                           classifier_downsample,
                                           with_batchnorm=classifier_batchnorm,
                                           dropout=classifier_dropout)
        if verbose:
            print('Here is my classifier:')
            print(self.classifier)
        self.stem_times = []
        self.module_times = []
        self.classifier_times = []
        self.timing = False

        self.function_modules = {}
        self.function_modules_num_inputs = {}
        self.vocab = vocab
        self.module_list = []
        for idx, fn_str in enumerate(vocab['program_token_to_idx']):
            num_inputs = iep.programs.get_num_inputs(fn_str)
            self.function_modules_num_inputs[fn_str] = num_inputs
            if fn_str == 'scene' or num_inputs == 1:
                mod = ResidualBlock(module_dim + text_dim,
                                    with_residual=module_residual,
                                    with_batchnorm=module_batchnorm)
            elif num_inputs == 2:
                mod = ConcatBlock(module_dim + text_dim,
                                  with_residual=module_residual,
                                  with_batchnorm=module_batchnorm)
            self.add_module(fn_str, mod)
            self.module_list.append(mod)
            self.function_modules[fn_str] = idx
        self.module_list = nn.ModuleList(self.module_list)
        self.save_module_outputs = False
Example #7
0
 def __init__(self, dim, with_residual=True, with_batchnorm=True):
     super(ConcatBlock, self).__init__()
     self.proj = nn.Conv2d(2 * dim, dim, kernel_size=1, padding=0)
     self.res_block = ResidualBlock(dim,
                                    with_residual=with_residual,
                                    with_batchnorm=with_batchnorm)
Example #8
0
    def __init__(self,
                 vocab,
                 feature_dim=(1024, 14, 14),
                 stem_num_layers=2,
                 stem_batchnorm=False,
                 module_dim=128,
                 module_residual=True,
                 module_batchnorm=False,
                 classifier_proj_dim=512,
                 classifier_downsample='maxpool2',
                 classifier_fc_layers=(1024, ),
                 classifier_batchnorm=False,
                 classifier_dropout=0,
                 verbose=True):
        super(ModuleNet, self).__init__()

        self.stem = build_stem(feature_dim[0],
                               module_dim,
                               num_layers=stem_num_layers,
                               with_batchnorm=stem_batchnorm)
        if verbose:
            print('Here is my stem:')
            print(self.stem)
        self.glove = torchtext.vocab.GloVe(name="6B",
                                           dim=50)  # embedding size = 50
        module_H, module_W = feature_dim[1], feature_dim[2]
        self.classifier = build_classifier(module_dim,
                                           module_H,
                                           module_W,
                                           classifier_fc_layers,
                                           classifier_proj_dim,
                                           classifier_downsample,
                                           with_batchnorm=classifier_batchnorm,
                                           dropout=classifier_dropout)
        if verbose:
            print('Here is my classifier:')
            print(self.classifier)
        self.stem_times = []
        self.module_times = []
        self.classifier_times = []
        self.timing = False

        self.function_modules = {}
        self.function_modules_num_inputs = {}
        self.vocab = vocab

        print("vocab['program_token_to_idx']={}".format(
            vocab['program_token_to_idx']))
        for fn_str in vocab['program_token_to_idx']:
            fn_str = str(fn_str)

            #FIXME: me
            if (fn_str.split("[")[0] in self.function_modules):
                continue

            num_inputs = iep.programs.get_num_inputs(fn_str)
            self.function_modules_num_inputs[fn_str.split("[")[0]] = num_inputs
            #FIXME:
            # if fn_str == 'scene' or num_inputs == 1:
            #   mod = ResidualBlock(module_dim,
            #           with_residual=module_residual,
            #           with_batchnorm=module_batchnorm)
            # elif num_inputs == 2:
            #   mod = ConcatBlock(module_dim,
            #           with_residual=module_residual,
            #           with_batchnorm=module_batchnorm)
            if fn_str == 'scene':
                mod = ResidualBlock(module_dim,
                                    with_residual=module_residual,
                                    with_batchnorm=module_batchnorm)
            elif num_inputs == 1 and len(fn_str.split("[")) < 2:
                mod = ResidualBlock(module_dim,
                                    with_residual=module_residual,
                                    with_batchnorm=module_batchnorm)
            elif num_inputs == 1 and len(fn_str.split("[")) >= 2:
                mod = ResidualBlock_LangAttention(
                    module_dim,
                    with_residual=module_residual,
                    with_batchnorm=module_batchnorm)
            elif num_inputs == 2:
                mod = ConcatBlock(module_dim,
                                  with_residual=module_residual,
                                  with_batchnorm=module_batchnorm)

            #FIXME: me
            #self.add_module(fn_str, mod)
            #self.function_modules[fn_str] = mod
            self.add_module(fn_str.split("[")[0], mod)
            self.function_modules[fn_str.split("[")[0]] = mod

        self.save_module_outputs = False