Esempio n. 1
0
 def __init__(self):
     super(resnet_modified_medium, self).__init__()
     self.resnet = tv.models.resnet50(pretrained=True)
     #probably want linear, relu, dropout
     self.linear = nn.Linear(7 * 7 * 2048, 1024)
     self.dropout2d = nn.Dropout2d(.5)
     self.dropout = nn.Dropout(.5)
     self.relu = nn.LeakyReLU()
     initLinear(self.linear)
Esempio n. 2
0
 def __init__(self):
     super(ResNetModifiedSmall, self).__init__()
     self.resnet = tv.models.resnet34(pretrained=True)
     # probably want linear, relu, dropout
     self.linear = nn.Linear(7 * 7 * 512, 1024)
     self.dropout2d = nn.Dropout2d(.5)
     self.dropout = nn.Dropout(.5)
     self.relu = nn.LeakyReLU()
     initLinear(self.linear)
Esempio n. 3
0
    def __init__(self):
        super(vgg_modified, self).__init__()
        self.vgg = tv.models.vgg16(pretrained=True)
        self.vgg_features = self.vgg.features
        #self.classifier = nn.Sequential(
        #nn.Dropout(),
        self.lin1 = nn.Linear(512 * 7 * 7, 1024)
        self.relu1 = nn.ReLU(True)
        self.dropout1 = nn.Dropout()
        self.lin2 = nn.Linear(1024, 1024)
        self.relu2 = nn.ReLU(True)
        self.dropout2 = nn.Dropout()

        initLinear(self.lin1)
        initLinear(self.lin2)
Esempio n. 4
0
    def __init__(self,
                 encoding,
                 splits=[50, 100, 283],
                 prediction_type="max_max",
                 ngpus=1,
                 cnn_type="resnet_101"):
        super(baseline_crf, self).__init__()

        self.normalize = tv.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                 std=[0.229, 0.224, 0.225])
        self.train_transform = tv.transforms.Compose([
            tv.transforms.Scale(224),
            tv.transforms.RandomCrop(224),
            tv.transforms.RandomHorizontalFlip(),
            tv.transforms.ToTensor(),
            self.normalize,
        ])

        self.dev_transform = tv.transforms.Compose([
            tv.transforms.Scale(224),
            tv.transforms.CenterCrop(224),
            tv.transforms.ToTensor(),
            self.normalize,
        ])

        self.broadcast = []
        self.nsplits = len(splits)
        self.splits = splits
        self.encoding = encoding
        self.prediction_type = prediction_type
        self.n_verbs = encoding.n_verbs()
        self.split_vr = {}
        self.v_roles = {}
        #cnn
        print(cnn_type)
        if cnn_type == "resnet_101": self.cnn = resnet_modified_large()
        elif cnn_type == "resnet_50": self.cnn = resnet_modified_medium()
        elif cnn_type == "resnet_34": self.cnn = resnet_modified_small()
        else:
            print("unknown base network")
            exit()
        self.rep_size = self.cnn.rep_size()
        for s in range(0, len(splits)):
            self.split_vr[s] = []

        #sort by length
        remapping = []
        for (vr, ns) in encoding.vr_id_n.items():
            remapping.append((vr, len(ns)))

        #find the right split
        for (vr, l) in remapping:
            i = 0
            for s in splits:
                if l <= s: break
                i += 1
            _id = (i, vr)
            self.split_vr[i].append(_id)
        total = 0
        for (k, v) in self.split_vr.items():
            #print "{} {} {}".format(k, len(v), splits[k]*len(v))
            total += splits[k] * len(v)
            #print "total compute : {}".format(total)

        #keep the splits sorted by vr id, to keep the model const w.r.t the encoding
        for i in range(0, len(splits)):
            s = sorted(self.split_vr[i], key=lambda x: x[1])
            self.split_vr[i] = []
            #enumerate?
            for (x, vr) in s:
                _id = (x, len(self.split_vr[i]), vr)
                self.split_vr[i].append(_id)
                (v, r) = encoding.id_vr[vr]
                if v not in self.v_roles: self.v_roles[v] = []
                self.v_roles[v].append(_id)

        #create the mapping for grouping the roles back to the verbs later
        max_roles = encoding.max_roles()

        #need a list that is nverbs by 6
        self.v_vr = [0 for i in range(0, self.encoding.n_verbs() * max_roles)]
        splits_offset = []
        for i in range(0, len(splits)):
            if i == 0: splits_offset.append(0)
            else:
                splits_offset.append(splits_offset[-1] +
                                     len(self.split_vr[i - 1]))

        #and we need to compute the position of the corresponding roles, and pad with the 0 symbol
        for i in range(0, self.encoding.n_verbs()):
            offset = max_roles * i
            roles = sorted(self.v_roles[i],
                           key=lambda x: x[2])  #stored in role order
            self.v_roles[i] = roles
            k = 0
            for (s, pos, r) in roles:
                #add one to account of the 0th element being the padding
                self.v_vr[offset + k] = splits_offset[s] + pos + 1
                k += 1
            #pad
            while k < max_roles:
                self.v_vr[offset + k] = 0
                k += 1

        gv_vr = Variable(torch.LongTensor(
            self.v_vr).cuda())  #.view(self.encoding.n_verbs(), -1)
        for g in range(0, ngpus):
            self.broadcast.append(Variable(
                torch.LongTensor(self.v_vr).cuda(g)))
        self.v_vr = gv_vr
        #print self.v_vr

        #verb potential
        self.linear_v = nn.Linear(self.rep_size, self.encoding.n_verbs())
        #verb-role-noun potentials
        self.linear_vrn = nn.ModuleList([
            nn.Linear(self.rep_size, splits[i] * len(self.split_vr[i]))
            for i in range(0, len(splits))
        ])
        self.total_vrn = 0
        for i in range(0, len(splits)):
            self.total_vrn += splits[i] * len(self.split_vr[i])
        print("total encoding vrn : {0}, with padding in {1} groups : {2}".
              format(encoding.n_verbrolenoun(), self.total_vrn, len(splits)))

        #initilize everything
        initLinear(self.linear_v)
        for _l in self.linear_vrn:
            initLinear(_l)
        self.mask_args()