Ejemplo n.º 1
0
    def __init__(self,
                 n_classes,
                 deno,
                 in_out = None,
                 # feat_dim = None,
                 sparsify = 0.5,
                 aft_nonlin = 'RL',
                 sigmoid = False,
                 feat_ret= False):
        super(Graph_Multi_Video, self).__init__()
        
        self.num_classes = n_classes
        self.feat_ret = feat_ret
        
        self.deno = deno
        self.graph_size = 1
        self.sparsify = sparsify
        
        if in_out is None:
            in_out = [2048,512]
        
        # if feat_dim is None:
        #     feat_dim = [2048,256]

        # self.linear_layer = nn.Linear(feat_dim[0], feat_dim[1], bias = True)
        
        self.graph_layer = Graph_Layer_Wrapper(in_out[0],n_out = in_out[1], aft_nonlin = aft_nonlin, type_layer = 'cooc')
        
        last_graph = []
        last_graph.append(nn.Dropout(0.5))
        last_graph.append(nn.Linear(in_out[-1],n_classes, bias = True))
        
        self.last_graph = nn.Sequential(*last_graph)
Ejemplo n.º 2
0
    def __init__(self,
                 n_classes,
                 deno,
                 in_out = None,
                 # feat_dim = None,
                 sparsify = 0.5,
                 aft_nonlin = 'RL',
                 sigmoid = False,
                 feat_ret= False):
        super(Graph_Multi_Video, self).__init__()
        
        self.num_classes = n_classes
        self.feat_ret = feat_ret
        
        self.deno = deno
        self.graph_size = 1
        self.sparsify = sparsify
        
        if in_out is None:
            in_out = [2048,64]
        
        # if feat_dim is None:
        #     feat_dim = [2048,256]

        # self.linear_layer = nn.Linear(feat_dim[0], feat_dim[1], bias = True)
        self.graph_layers = nn.ModuleList()
        for class_num in range(self.num_classes):
            self.graph_layers.append(Graph_Layer_Wrapper(in_out[0],n_out = in_out[1], aft_nonlin = aft_nonlin, type_layer = 'cooc', non_lin = None))
        # self.graph_layers = nn.ModuleList(*self.graph_layers)
        
        # self.last_graph = nn.ModuleList()
        # for class_num in range(self.num_classes):
        last_graph = []
        # if aft_nonlin is not None:
        #     to_pend = aft_nonlin.split('_')
        #     for tp in to_pend:
        #         if tp.lower()=='ht':
        #             last_graph.append(nn.Hardtanh())
        #         elif tp.lower()=='rl':
        #             last_graph.append(nn.ReLU())
        #         elif tp.lower()=='l2':
        #             last_graph.append(Normalize())
        #         elif tp.lower()=='ln':
        #             last_graph.append(nn.LayerNorm(n_out))
        #         elif tp.lower()=='bn':
        #             last_graph.append(nn.BatchNorm1d(n_out, affine = False, track_running_stats = False))
        #         elif tp.lower()=='sig':
        #             last_graph.append(nn.Sigmoid())
        #         else:
        #             error_message = str('non_lin %s not recognized', non_lin)
        #             raise ValueError(error_message)
            
        last_graph.append(nn.Dropout(0.5))
        last_graph.append(nn.Linear(in_out[-1]*self.num_classes,self.num_classes, bias = True))
        if sigmoid:
            last_graph.append(nn.Sigmoid())
        self.last_graph = nn.Sequential(*last_graph)
Ejemplo n.º 3
0
    def __init__(self,
                 n_classes,
                 deno,
                 pretrained,
                 in_out=None,
                 graph_size=None,
                 method='cos'):
        super(Graph_Multi_Video, self).__init__()

        self.num_classes = n_classes
        self.deno = deno
        self.graph_size = graph_size

        if in_out is None:
            in_out = [2048, 64, 2048, 64]

        num_layers = len(in_out) - 3
        non_lin = 'HT'

        print 'NUM LAYERS', num_layers, in_out

        self.linear_layer = nn.Linear(in_out[0], in_out[1], bias=False)
        # for param in self.linear_layer.parameters():
        #     param.requires_grad = False
        non_lin = 'HT'

        if pretrained == 'ucf':
            model_file = '../experiments/just_mill_flexible_deno_8_n_classes_20_layer_sizes_2048_64_ucf/all_classes_False_just_primary_False_limit_500_cw_True_MultiCrossEntropy_100_step_100_0.1_0.001_0.001/model_99.pt'
        elif pretrained == 'activitynet':
            model_file = '../experiments/just_mill_flexible_deno_8_n_classes_100_layer_sizes_2048_64_activitynet/all_classes_False_just_primary_False_limit_500_cw_True_MultiCrossEntropy_50_step_50_0.1_0.001_0.001/model_49.pt'
        elif pretrained == 'random':
            model_file = '../experiments/just_mill_flexible_deno_8_n_classes_20_layer_sizes_2048_64_ucf/all_classes_False_just_primary_False_limit_500_cw_True_MultiCrossEntropy_100_step_100_0.1_0_0.001/model_99.pt'
        else:
            error_message = 'Similarity method %s not valid' % method
            raise ValueError(error_message)

        model_temp = torch.load(model_file)
        self.linear_layer.weight.data = model_temp.linear.weight.data

        self.graph_layers = nn.ModuleList()
        for num_layer in range(num_layers):
            self.graph_layers.append(
                Graph_Layer_Wrapper(in_out[num_layer + 2],
                                    n_out=in_out[num_layer + 3],
                                    non_lin=non_lin,
                                    method=method))

        last_layer = []

        last_layer.append(nn.Hardtanh())
        last_layer.append(Normalize())
        last_layer.append(nn.Dropout(0.5))
        last_layer.append(nn.Linear(in_out[-1], n_classes))
        last_layer = nn.Sequential(*last_layer)
        self.last_layer = last_layer
Ejemplo n.º 4
0
    def __init__(self,
                 n_classes,
                 deno,
                 in_out=None,
                 feat_dim=None,
                 graph_size=None,
                 method='cos',
                 sparsify=0.5,
                 non_lin='RL',
                 aft_nonlin='RL',
                 sigmoid=False,
                 layer_bef=None,
                 graph_sum=False,
                 background=False,
                 just_graph=False,
                 feat_ret=False,
                 dropout=0.5):
        super(Graph_Multi_Video, self).__init__()

        self.num_classes = n_classes
        self.background = background
        self.feat_ret = feat_ret
        if self.background:
            assert sigmoid
            n_classes += 1

        self.deno = deno
        self.graph_size = graph_size
        self.sparsify = sparsify
        self.graph_sum = graph_sum
        self.just_graph = just_graph

        if in_out is None:
            in_out = [2048, 512]

        if feat_dim is None:
            feat_dim = [2048, 256]

        num_layers = 1

        print 'NUM LAYERS', num_layers, in_out

        self.bn = None
        # nn.BatchNorm1d(2048, affine = False)
        self.linear_layer = []
        self.linear_layer.append(nn.Linear(feat_dim[0], feat_dim[1],
                                           bias=True))

        if non_lin is not None:
            if non_lin.lower() == 'ht':
                self.linear_layer.append(nn.Hardtanh())
            elif non_lin.lower() == 'rl':
                self.linear_layer.append(nn.ReLU())
            else:
                error_message = str('non_lin %s not recognized', non_lin)
                raise ValueError(error_message)

        # self.linear_layer = nn.Sequential(*self.linear_layer)
        # self.graph_layer = nn.ModuleList()

        # self.last_graph = nn.ModuleList()

        self.graph_layer = Graph_Layer_Wrapper(in_out[0],
                                               n_out=in_out[1],
                                               non_lin=None,
                                               method=method,
                                               aft_nonlin=aft_nonlin)

        last_graph = []
        last_graph.append(nn.Dropout(dropout))
        last_graph.append(nn.Linear(in_out[-1], n_classes, bias=True))
        if sigmoid:
            last_graph.append(nn.Sigmoid())

        self.last_graph = nn.Sequential(*last_graph)
Ejemplo n.º 5
0
class Graph_Multi_Video(nn.Module):
    def __init__(self,
                 n_classes,
                 deno,
                 in_out=None,
                 feat_dim=None,
                 graph_size=None,
                 method='cos',
                 sparsify=0.5,
                 non_lin='RL',
                 aft_nonlin='RL',
                 sigmoid=False,
                 layer_bef=None,
                 graph_sum=False,
                 background=False,
                 just_graph=False,
                 feat_ret=False,
                 dropout=0.5):
        super(Graph_Multi_Video, self).__init__()

        self.num_classes = n_classes
        self.background = background
        self.feat_ret = feat_ret
        if self.background:
            assert sigmoid
            n_classes += 1

        self.deno = deno
        self.graph_size = graph_size
        self.sparsify = sparsify
        self.graph_sum = graph_sum
        self.just_graph = just_graph

        if in_out is None:
            in_out = [2048, 512]

        if feat_dim is None:
            feat_dim = [2048, 256]

        num_layers = 1

        print 'NUM LAYERS', num_layers, in_out

        self.bn = None
        # nn.BatchNorm1d(2048, affine = False)
        self.linear_layer = []
        self.linear_layer.append(nn.Linear(feat_dim[0], feat_dim[1],
                                           bias=True))

        if non_lin is not None:
            if non_lin.lower() == 'ht':
                self.linear_layer.append(nn.Hardtanh())
            elif non_lin.lower() == 'rl':
                self.linear_layer.append(nn.ReLU())
            else:
                error_message = str('non_lin %s not recognized', non_lin)
                raise ValueError(error_message)

        # self.linear_layer = nn.Sequential(*self.linear_layer)
        # self.graph_layer = nn.ModuleList()

        # self.last_graph = nn.ModuleList()

        self.graph_layer = Graph_Layer_Wrapper(in_out[0],
                                               n_out=in_out[1],
                                               non_lin=None,
                                               method=method,
                                               aft_nonlin=aft_nonlin)

        last_graph = []
        last_graph.append(nn.Dropout(dropout))
        last_graph.append(nn.Linear(in_out[-1], n_classes, bias=True))
        if sigmoid:
            last_graph.append(nn.Sigmoid())

        self.last_graph = nn.Sequential(*last_graph)

    def forward(self, input, epoch_num=None, ret_bg=False, branch_to_test=-1):

        strip = False
        if type(input) != type([]):
            input = [input]
            strip = True

        identity = False
        method = None

        # print 'self.graph_size' , self.graph_size
        if not self.training:
            graph_size = 1
        else:
            if self.graph_size is None:
                graph_size = len(input)
            elif self.graph_size == 'rand':
                graph_size = random.randint(1, len(input))
            elif type(self.graph_size) == str and self.graph_size.startswith(
                    'randupto'):
                graph_size = int(self.graph_size.split('_')[1])
                graph_size = random.randint(1, min(graph_size, len(input)))
            else:
                graph_size = min(self.graph_size, len(input))

        # print 'graph_size', graph_size, self.training
        input_chunks = [[input[0]]] + [
            input[i:i + graph_size] for i in xrange(1, len(input), graph_size)
        ]
        # input_chunks = [input[i:i + graph_size] for i in xrange(0, len(input), graph_size)]

        is_cuda = next(self.parameters()).is_cuda
        # print 'Graph branch'

        # pmf_all = [[] for i in range(self.num_branches)]
        # x_all_all = [[] for i in range(self.num_branches)]
        pmf_all = []
        x_all = []
        graph_sums = []
        out_graph_all = []
        input_sizes_all = np.array(
            [input_curr.size(0) for input_curr in input])

        for input in input_chunks:
            input_sizes = [input_curr.size(0) for input_curr in input]
            # print input_sizes
            input = torch.cat(input, 0)
            # print input.size()

            if is_cuda:
                input = input.cuda()

            # assert len(self.graph_layers)==(self.num_branches)

            # if hasattr(self, 'layer_bef') and self.layer_bef is not None:
            #     input = self.layer_bef(input)

            # feature_out = self.linear_layer(input)
            # # for col_num in range(len(self.graph_layers)):
            # print 'feature_out.size()',feature_out.size()

            to_keep = self.sparsify
            # if to_keep=='lin':
            # out_graph = self.graph_layer(input)
            #     else:
            out_graph = self.graph_layer(input,
                                         input,
                                         to_keep=to_keep,
                                         graph_sum=self.graph_sum,
                                         identity=identity,
                                         method=method)

            if self.graph_sum:
                [out_graph, graph_sum] = out_graph
                graph_sums.append(graph_sum.unsqueeze(0))

            # out_graph_all.append(out_graph)
            # print 'out_graph.size()',out_graph.size()

            out_col = self.last_graph(out_graph)
            # print 'out_col.size()',out_col.size()
            # x_all.append(out_col)

            # x = x_all[-1]

            for idx_sample in range(len(input_sizes)):
                if idx_sample == 0:
                    start = 0
                else:
                    start = sum(input_sizes[:idx_sample])

                end = start + input_sizes[idx_sample]
                x_curr = out_col[start:end, :]

                # THIS LINE IS DIFFERENT BETWEEN RETF AND NO RETF
                out_graph_curr = feature_out[start:end, :]

                x_all.append(x_curr)
                out_graph_all.append(out_graph_curr)
                pmf_all += [self.make_pmf(x_curr).unsqueeze(0)]

        if strip:
            # for idx_pmf, pmf in enumerate(pmf_all):
            #     assert len(pmf)==1
            pmf_all = pmf_all[0].squeeze()
            x_all = x_all[0]
            # print torch.min(pmf_all), torch.max(pmf_all)

        # for idx_x, x in enumerate(x_all):
        # x_all = torch.cat(x_all,dim=0)
        # out_graph_all = torch.cat(out_graph_all, dim = 0)
        # print x_all.size(), out_graph_all.size()
        # print graph_sums
        if hasattr(self, 'feat_ret') and self.feat_ret:
            pmf_all = [
                pmf_all,
                [torch.cat(graph_sums, dim=0), out_graph_all, input_sizes_all]
            ]
        elif self.graph_sum:
            pmf_all = [pmf_all, torch.cat(graph_sums, dim=0)]

        # raw_input()
        if ret_bg:
            return x_all, pmf_all, None
        else:
            return x_all, pmf_all

    def make_pmf(self, x):
        k = max(1, x.size(0) // self.deno)

        pmf, _ = torch.sort(x, dim=0, descending=True)
        pmf = pmf[:k, :]

        pmf = torch.sum(pmf[:k, :], dim=0) / k
        return pmf

    def get_similarity(self,
                       input,
                       idx_graph_layer=0,
                       sparsify=False,
                       nosum=True):

        # if sparsify is None:
        #     sparsify = self.sparsify

        is_cuda = next(self.parameters()).is_cuda

        # input_sizes = [input_curr.size(0) for input_curr in input]
        # input = torch.cat(input,0)

        if is_cuda:
            input = input.cuda()

        # assert idx_graph_layer<len(self.graph_layers)

        if hasattr(self, 'layer_bef') and self.layer_bef is not None:
            input = self.layer_bef(input)

        # feature_out = self.linear_layer(input)

        if sparsify:
            to_keep = self.sparsify
            # [idx_graph_layer]
        else:
            to_keep = None

        sim_mat = self.graph_layer.get_affinity(input,
                                                to_keep=to_keep,
                                                nosum=nosum)

        return sim_mat

    def printGraphGrad(self):
        grad_rel = self.graph_layers[0].graph_layer.weight.grad
        print torch.min(grad_rel).data.cpu().numpy(), torch.max(
            grad_rel).data.cpu().numpy()

    def out_f(self, input):
        # is_cuda = next(self.parameters()).is_cuda
        # if is_cuda:
        #     input = input.cuda()

        identity = False
        method = None
        feature_out = self.linear_layer(input)
        to_keep = self.sparsify
        out_graph = self.graph_layer(input,
                                     feature_out,
                                     to_keep=to_keep,
                                     graph_sum=self.graph_sum,
                                     identity=identity,
                                     method=method)

        if self.graph_sum:
            [out_graph, graph_sum] = out_graph

        return out_graph

    def out_f_f(self, input):
        # is_cuda = next(self.parameters()).is_cuda
        # if is_cuda:
        #     input = input.cuda()

        identity = False
        method = None
        feature_out = self.linear_layer(input)
        # to_keep = self.sparsify
        # out_graph = self.graph_layer(input, feature_out, to_keep = to_keep, graph_sum = self.graph_sum, identity = identity, method = method)

        # if self.graph_sum:
        #     [out_graph, graph_sum] = out_graph

        return feature_out
Ejemplo n.º 6
0
    def __init__(self,
                 n_classes,
                 deno,
                 in_out=None,
                 feat_dim=None,
                 graph_size=None,
                 method='cos',
                 sparsify=0.5,
                 non_lin='RL',
                 aft_nonlin='RL',
                 sigmoid=False,
                 layer_bef=None,
                 graph_sum=False,
                 background=False,
                 just_graph=False,
                 feat_ret=False):
        super(Graph_Multi_Video, self).__init__()

        self.num_classes = n_classes
        self.background = background
        self.feat_ret = feat_ret
        if self.background:
            assert sigmoid
            n_classes += 1

        self.deno = deno
        self.graph_size = graph_size
        self.sparsify = sparsify
        self.graph_sum = graph_sum
        self.just_graph = just_graph

        if in_out is None:
            in_out = [2048, 512]

        if feat_dim is None:
            feat_dim = [2048, 256]

        num_layers = 1

        print 'NUM LAYERS', num_layers, in_out

        self.bn = None
        # nn.BatchNorm1d(2048, affine = False)
        self.linear_layer = nn.Linear(feat_dim[0], feat_dim[1], bias=True)

        # self.graph_layer = nn.ModuleList()

        # self.last_graph = nn.ModuleList()

        self.graph_layer = Graph_Layer_Wrapper(in_out[0],
                                               n_out=in_out[1],
                                               non_lin=non_lin,
                                               method=method,
                                               aft_nonlin=aft_nonlin)

        last_graph = []
        last_graph.append(nn.Dropout(0.5))
        last_graph.append(nn.Linear(in_out[-1], n_classes, bias=True))
        if sigmoid:
            last_graph.append(nn.Sigmoid())
        self.last_graph = nn.Sequential(*last_graph)
Ejemplo n.º 7
0
    def __init__(self,
                 n_classes,
                 deno,
                 in_out = None,
                 feat_dim = None,
                 in_out_feat = None,
                 graph_size = None,
                 method = 'cos',
                 sparsify = 0.5,
                 non_lin = 'RL',
                 aft_nonlin = 'RL',
                 aft_nonlin_feat = 'RL',
                 sigmoid = False,
                 layer_bef = None,
                 graph_sum = False,
                 background = False,
                 just_graph = False
                 ):
        super(Graph_Multi_Video, self).__init__()
        
        self.num_classes = n_classes
        self.background = background
        
        if self.background:
            assert sigmoid
            n_classes+=1

        self.deno = deno
        self.graph_size = graph_size
        self.sparsify = sparsify
        self.graph_sum = graph_sum
        self.just_graph = just_graph

        if in_out_feat is None:
            in_out_feat = [2048,1024]
        
        if in_out is None:
            in_out = [1024,512]
        
        if feat_dim is None:
            feat_dim = [1024,256]


        assert feat_dim[0]==in_out_feat[1]==in_out[0]

        # num_layers = 1
        
        # print 'NUM LAYERS', num_layers, in_out
        self.num_branches = 2
        print 'self.num_branches', self.num_branches

        self.bn =None
        # nn.BatchNorm1d(2048, affine = False)
        self.feature = []
        self.feature.append(nn.Linear(in_out_feat[0], in_out_feat[1], bias = True))
        to_pend = aft_nonlin_feat.split('_')
        for tp in to_pend:
            if tp.lower()=='ht':
                self.feature.append(nn.Hardtanh())
            elif tp.lower()=='rl':
                self.feature.append(nn.ReLU())
            elif tp.lower()=='l2':
                self.feature.append(Normalize())
            elif tp.lower()=='ln':
                self.feature.append(nn.LayerNorm(n_out))
            elif tp.lower()=='bn':
                self.feature.append(nn.BatchNorm1d(n_out, affine = False, track_running_stats = False))
            elif tp.lower()=='sig':
                self.feature.append(nn.Sigmoid())
            else:
                error_message = str('non_lin %s not recognized', non_lin)
                raise ValueError(error_message)

        self.feature = nn.Sequential(*self.feature)
        # self.feature_classifier = nn.Linear(in_out[-1],n_classes)


        self.linear_layer = nn.Linear(feat_dim[0], feat_dim[1], bias = True)
        
        self.graph_layer = Graph_Layer_Wrapper(in_out[0],n_out = in_out[1], non_lin = non_lin, method = method, aft_nonlin = aft_nonlin)
        
        last_graph = []
        last_graph.append(nn.Dropout(0.5))
        last_graph.append(nn.Linear(in_out[-1],n_classes))
        if sigmoid:
            last_graph.append(nn.Sigmoid())
        self.last_graph = nn.Sequential(*last_graph)

        last_feat = []
        last_feat.append(nn.Dropout(0.5))
        last_feat.append(nn.Linear(in_out_feat[-1],n_classes))
        if sigmoid:
            last_feat.append(nn.Sigmoid())
        # last_feat.append(nn.Softmax(dim=0))
        self.last_feat = nn.Sequential(*last_feat)
Ejemplo n.º 8
0
    def __init__(self,
                 n_classes,
                 deno,
                 in_out=None,
                 feat_dim=None,
                 graph_size=None,
                 method='cos',
                 sparsify=0.5,
                 non_lin='RL',
                 aft_nonlin='RL',
                 sigmoid=False,
                 layer_bef=None,
                 graph_sum=False):
        super(Graph_Multi_Video, self).__init__()

        self.num_classes = n_classes
        # self.background = background

        # if self.background:
        #     assert sigmoid
        #     n_classes+=1

        self.deno = deno
        self.graph_size = graph_size
        self.sparsify = sparsify
        self.graph_sum = graph_sum
        # self.just_graph = just_graph

        if in_out is None:
            in_out = [2048, 512]

        if feat_dim is None:
            feat_dim = [2048, 256]

        num_layers = 1

        print 'NUM LAYERS', num_layers, in_out

        self.bn = None
        # nn.BatchNorm1d(2048, affine = False)
        self.linear_layer = nn.Linear(feat_dim[0], feat_dim[1], bias=True)

        # self.graph_layer = nn.ModuleList()

        # self.last_graph = nn.ModuleList()

        self.graph_layer = Graph_Layer_Wrapper(in_out[0],
                                               n_out=in_out[1],
                                               non_lin=non_lin,
                                               method=method,
                                               aft_nonlin=aft_nonlin)

        self.det_branch = []
        self.class_branch = []
        branches = [self.det_branch, self.class_branch]
        for branch in branches:
            # branch.append(nn.ReLU())
            branch.append(nn.Dropout(0.5))
            branch.append(nn.Linear(in_out[1], self.num_classes))

        [self.det_branch, self.class_branch] = branches
        self.det_branch.append(nn.Hardtanh())
        self.det_branch_smax = nn.Softmax(dim=0)

        self.class_branch.append(nn.Softmax(dim=1))

        self.det_branch = nn.Sequential(*self.det_branch)
        self.class_branch = nn.Sequential(*self.class_branch)

        last_graph = []
        last_graph.append(nn.Dropout(0.5))
        last_graph.append(nn.Linear(in_out[-1], n_classes))

        self.last_graph = nn.Sequential(*last_graph)
    def __init__(self,
                 n_classes,
                 deno,
                 pretrained,
                 in_out = None,
                 graph_size = None,
                 method = 'cos',
                 num_switch = 1,
                 focus = 0,
                 sparsify = False,
                 non_lin = 'HT',
                 normalize = [True,True]
                 ):
        super(Graph_Multi_Video, self).__init__()
        
        self.num_classes = n_classes
        self.deno = deno
        self.graph_size = graph_size
        self.sparsify = sparsify

        if in_out is None:
            in_out = [2048,64,2048,64]

        num_layers = len(in_out)-3
        # non_lin = 'HT'

        print 'NUM LAYERS', num_layers, in_out

        
        self.linear_layer = nn.Linear(in_out[0], in_out[1], bias = False)
        # for param in self.linear_layer.parameters():
        #     param.requires_grad = False
        # non_lin = 'HT'

        if pretrained=='ucf':
            model_file = '../experiments/just_mill_flexible_deno_8_n_classes_20_layer_sizes_2048_64_ucf/all_classes_False_just_primary_False_limit_500_cw_True_MultiCrossEntropy_100_step_100_0.1_0.001_0.001/model_99.pt'
        elif pretrained=='activitynet':
            model_file = '../experiments/just_mill_flexible_deno_8_n_classes_100_layer_sizes_2048_64_activitynet/all_classes_False_just_primary_False_limit_500_cw_True_MultiCrossEntropy_50_step_50_0.1_0.001_0.001/model_49.pt'
        elif pretrained=='random':
            model_file = '../experiments/just_mill_flexible_deno_8_n_classes_20_layer_sizes_2048_64_ucf/all_classes_False_just_primary_False_limit_500_cw_True_MultiCrossEntropy_100_step_100_0.1_0_0.001/model_99.pt'
        elif pretrained=='default':
            model_file = None
        else:
            error_message = 'Similarity method %s not valid' % method
            raise ValueError(error_message)

        if model_file is not None:
            model_temp = torch.load(model_file)
            self.linear_layer.weight.data = model_temp.linear.weight.data
        else:
            print 'NO MODEL FILE AAAAAAAA'
        
        
        self.graph_layers = nn.ModuleList()
        for num_layer in range(num_layers): 
            self.graph_layers.append(Graph_Layer_Wrapper(in_out[num_layer+2],n_out = in_out[num_layer+3], non_lin = non_lin, method = method))
        
        last_linear = []
        if non_lin =='HT':
            last_linear.append(nn.Hardtanh())
        elif non_lin =='RL':
            last_linear.append(nn.ReLU())
        else:
            error_message = str('Non lin %s not valid', non_lin)
            raise ValueError(error_message)

        if normalize[0]:
            last_linear.append(Normalize())

        last_linear.append(nn.Dropout(0.5))
        last_linear.append(nn.Linear(in_out[1],n_classes))
        last_linear = nn.Sequential(*last_linear)
        self.last_linear = last_linear
        
        last_graph = []
        if non_lin =='HT':
            last_graph.append(nn.Hardtanh())
        elif non_lin =='RL':
            last_graph.append(nn.ReLU())
        else:
            error_message = str('Non lin %s not valid', non_lin)
            raise ValueError(error_message)

        if normalize[1]:
            last_graph.append(Normalize())
        
        # last_graph.append(nn.Hardtanh())
        # last_graph.append(Normalize())
        last_graph.append(nn.Dropout(0.5))
        last_graph.append(nn.Linear(in_out[-1],n_classes))
        last_graph = nn.Sequential(*last_graph)
        self.last_graph = last_graph

        if type(num_switch)==type(1):
            num_switch = [num_switch,num_switch]

        self.num_switch = num_switch
        self.epoch_counters = [0,0]
        self.focus = focus
        self.epoch_last = 0