def __init__(self): super(resnet_modified_medium, self).__init__() self.resnet = tv.models.resnet50(pretrained=True) #probably want linear, relu, dropout self.linear = nn.Linear(7 * 7 * 2048, 1024) self.dropout2d = nn.Dropout2d(.5) self.dropout = nn.Dropout(.5) self.relu = nn.LeakyReLU() initLinear(self.linear)
def __init__(self): super(ResNetModifiedSmall, self).__init__() self.resnet = tv.models.resnet34(pretrained=True) # probably want linear, relu, dropout self.linear = nn.Linear(7 * 7 * 512, 1024) self.dropout2d = nn.Dropout2d(.5) self.dropout = nn.Dropout(.5) self.relu = nn.LeakyReLU() initLinear(self.linear)
def __init__(self): super(vgg_modified, self).__init__() self.vgg = tv.models.vgg16(pretrained=True) self.vgg_features = self.vgg.features #self.classifier = nn.Sequential( #nn.Dropout(), self.lin1 = nn.Linear(512 * 7 * 7, 1024) self.relu1 = nn.ReLU(True) self.dropout1 = nn.Dropout() self.lin2 = nn.Linear(1024, 1024) self.relu2 = nn.ReLU(True) self.dropout2 = nn.Dropout() initLinear(self.lin1) initLinear(self.lin2)
def __init__(self, encoding, splits=[50, 100, 283], prediction_type="max_max", ngpus=1, cnn_type="resnet_101"): super(baseline_crf, self).__init__() self.normalize = tv.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) self.train_transform = tv.transforms.Compose([ tv.transforms.Scale(224), tv.transforms.RandomCrop(224), tv.transforms.RandomHorizontalFlip(), tv.transforms.ToTensor(), self.normalize, ]) self.dev_transform = tv.transforms.Compose([ tv.transforms.Scale(224), tv.transforms.CenterCrop(224), tv.transforms.ToTensor(), self.normalize, ]) self.broadcast = [] self.nsplits = len(splits) self.splits = splits self.encoding = encoding self.prediction_type = prediction_type self.n_verbs = encoding.n_verbs() self.split_vr = {} self.v_roles = {} #cnn print(cnn_type) if cnn_type == "resnet_101": self.cnn = resnet_modified_large() elif cnn_type == "resnet_50": self.cnn = resnet_modified_medium() elif cnn_type == "resnet_34": self.cnn = resnet_modified_small() else: print("unknown base network") exit() self.rep_size = self.cnn.rep_size() for s in range(0, len(splits)): self.split_vr[s] = [] #sort by length remapping = [] for (vr, ns) in encoding.vr_id_n.items(): remapping.append((vr, len(ns))) #find the right split for (vr, l) in remapping: i = 0 for s in splits: if l <= s: break i += 1 _id = (i, vr) self.split_vr[i].append(_id) total = 0 for (k, v) in self.split_vr.items(): #print "{} {} {}".format(k, len(v), splits[k]*len(v)) total += splits[k] * len(v) #print "total compute : {}".format(total) #keep the splits sorted by vr id, to keep the model const w.r.t the encoding for i in range(0, len(splits)): s = sorted(self.split_vr[i], key=lambda x: x[1]) self.split_vr[i] = [] #enumerate? for (x, vr) in s: _id = (x, len(self.split_vr[i]), vr) self.split_vr[i].append(_id) (v, r) = encoding.id_vr[vr] if v not in self.v_roles: self.v_roles[v] = [] self.v_roles[v].append(_id) #create the mapping for grouping the roles back to the verbs later max_roles = encoding.max_roles() #need a list that is nverbs by 6 self.v_vr = [0 for i in range(0, self.encoding.n_verbs() * max_roles)] splits_offset = [] for i in range(0, len(splits)): if i == 0: splits_offset.append(0) else: splits_offset.append(splits_offset[-1] + len(self.split_vr[i - 1])) #and we need to compute the position of the corresponding roles, and pad with the 0 symbol for i in range(0, self.encoding.n_verbs()): offset = max_roles * i roles = sorted(self.v_roles[i], key=lambda x: x[2]) #stored in role order self.v_roles[i] = roles k = 0 for (s, pos, r) in roles: #add one to account of the 0th element being the padding self.v_vr[offset + k] = splits_offset[s] + pos + 1 k += 1 #pad while k < max_roles: self.v_vr[offset + k] = 0 k += 1 gv_vr = Variable(torch.LongTensor( self.v_vr).cuda()) #.view(self.encoding.n_verbs(), -1) for g in range(0, ngpus): self.broadcast.append(Variable( torch.LongTensor(self.v_vr).cuda(g))) self.v_vr = gv_vr #print self.v_vr #verb potential self.linear_v = nn.Linear(self.rep_size, self.encoding.n_verbs()) #verb-role-noun potentials self.linear_vrn = nn.ModuleList([ nn.Linear(self.rep_size, splits[i] * len(self.split_vr[i])) for i in range(0, len(splits)) ]) self.total_vrn = 0 for i in range(0, len(splits)): self.total_vrn += splits[i] * len(self.split_vr[i]) print("total encoding vrn : {0}, with padding in {1} groups : {2}". format(encoding.n_verbrolenoun(), self.total_vrn, len(splits))) #initilize everything initLinear(self.linear_v) for _l in self.linear_vrn: initLinear(_l) self.mask_args()