Example #1
0
 def __init__(self, batch_size, num_features, num_layers, J, dim_input, clip_grad_norm, logger):
     self.logger = logger
     self.clip_grad_norm = clip_grad_norm
     self.batch_size = batch_size
     self.J = J
     self.Split = Split_GNN(batch_size, num_features, num_layers, J+2, dim_input=dim_input)
     self.Tsp = GNN(num_features, num_layers, J+2, dim_input=dim_input)
     self.Merge = GNN(num_features, num_layers, J+2, dim_input=dim_input)
     self.optimizer_split = optim.RMSprop(self.Split.parameters())
     self.optimizer_tsp = optim.Adamax(self.Tsp.parameters(), lr=1e-3)
     self.optimizer_merge = optim.Adamax(self.Merge.parameters(), lr=1e-3)
     self.test_gens = []
     self.test_gens_labels = []
Example #2
0
  k_step = args.k_step
  n_samples = args.n_samples
  normalize = args.normalize
  last = args.last
  baseline = args.baseline
  
  if args.dataset == 'GM':
      gen = Generator('./data/kmeans/', num_examples_train, num_examples_test, N, clusters, dim)
  elif args.dataset == "CIFAR":
      gen = GeneratorCIFAR('./data/kmeans/', num_examples_train, num_examples_test, N, clusters, dim)
      dim = 27
  gen.load_dataset()
  num_iterations = 100000
  
  if not baseline:
      gnn = Split_GNN(num_features, num_layers, 5, dim_input=dim)
  else:
      gnn = Split_BaselineGNN(num_features, num_layers, 5, K, dim_input=dim)
  if args.load_file != '':
      gnn = load_model(args.load_file, gnn)
  optimizer = optim.RMSprop(gnn.parameters(), lr=1e-3)
  # optimizer = optim.Adam(gnn.parameters())
  
  test = args.test
  if test:
      num_iterations = num_examples_test // batch_size
  
 
  log = Logger()
  start = time.time()
  
Example #3
0
        batch_size = 100

    test = args.test

    gen = Generator(args.dataset_path,
                    args.solver_path,
                    num_examples_train,
                    num_examples_test,
                    N,
                    C_min,
                    C_max,
                    test=test)
    gen.load_dataset()
    num_iterations = 100000

    Knap = Split_GNN(num_features, num_layers, 3, dim_input=3)
    if args.load_file_path != '':
        Knap = load_model(args.load_file_path, Knap)
    optimizer = optim.Adamax(Knap.parameters(), lr=1e-3)

    log = Logger()
    log2 = Logger()
    path_train_plot = os.path.join(args.logs_path, 'training.png')

    if test:
        num_iterations = num_examples_test // batch_size

    start = time.time()
    for it in range(num_iterations):
        batch = gen.sample_batch(batch_size, is_training=not test, it=it)
        weights, volumes, C, OptW, OptV, is_chosen_opt = batch
Example #4
0
class DCN():
    def __init__(self, batch_size, num_features, num_layers, J, dim_input, clip_grad_norm, logger):
        self.logger = logger
        self.clip_grad_norm = clip_grad_norm
        self.batch_size = batch_size
        self.J = J
        self.Split = Split_GNN(batch_size, num_features, num_layers, J+2, dim_input=dim_input)
        self.Tsp = GNN(num_features, num_layers, J+2, dim_input=dim_input)
        self.Merge = GNN(num_features, num_layers, J+2, dim_input=dim_input)
        self.optimizer_split = optim.RMSprop(self.Split.parameters())
        self.optimizer_tsp = optim.Adamax(self.Tsp.parameters(), lr=1e-3)
        self.optimizer_merge = optim.Adamax(self.Merge.parameters(), lr=1e-3)
        self.test_gens = []
        self.test_gens_labels = []
   
    def load_split(self, path_load):
        self.Split = self.logger.load_model(path_load, 'split')
        self.optimizer_split = optim.RMSprop(self.Split.parameters())
    
    def load_tsp(self, path_load):
        self.Tsp = self.logger.load_model(path_load, 'tsp')
        self.optimizer_tsp = optim.Adamax(self.Tsp.parameters(), lr=1e-3)

    def load_merge(self, path_load):
        self.Merge = self.logger.load_model(path_load, 'merge')
        self.optimizer_merge = optim.Adamax(self.Merge.parameters(), lr=1e-3)

    def save_model(self, path_load, it=-1):
        self.logger.save_model(path_load, self.Split, self.Tsp, self.Merge, it=it)
    
    def set_dataset(self, path_dataset, num_examples_train, num_examples_test, N_train, N_test):
        self.gen = Generator(path_dataset, args.path_tsp)
        self.gen.num_examples_train = num_examples_train
        self.gen.num_examples_test = num_examples_test
        self.gen.N_train = N_train
        self.gen.N_test = N_test
        self.gen.load_dataset()
    
    def add_test_dataset(self, gen, label):
        self.test_gens.append(gen)
        self.test_gens_labels.append(label)

    def sample_one(self, probs, mode='train'):
        probs = 1e-4 + probs*(1 - 2e-4) # to avoid log(0)
        if mode == 'train':
            rand = torch.zeros(*probs.size()).type(dtype)
            nn.init.uniform(rand)
        else:
            rand = torch.ones(*probs.size()).type(dtype) / 2
        bin_sample = probs > Variable(rand)
        sample = bin_sample.clone().type(dtype)
        log_probs_samples = (sample*torch.log(probs) + (1-sample)*torch.log(1-probs)).sum(1)
        return bin_sample.data, sample.data, log_probs_samples
    
    def split_operator(self, W, sample, cities):
        bs = sample.size(0)
        Ns1 = sample.long().sum(1)
        N1 = Ns1.max(0)[0][0]
        W1 = torch.zeros(bs, N1, N1).type(dtype)
        cts = torch.zeros(bs, N1, 2).type(dtype)
        for b in range(bs):
            inds = torch.nonzero(sample[b]).squeeze()
            n = Ns1[b]
            W1[b,:n,:n] = W[b].index_select(1, inds).index_select(0, inds)
            cts[b,:n,:] = cities[b].index_select(0, inds)
        return W1, cts

    def compute_other_operators(self, W, Ns, cts, J):
        bs = W.size(0)
        N = W.size(-1)
        QQ = W.clone()
        WW = torch.zeros(bs, N, N, J + 2).type(dtype)
        eye = torch.eye(N).type(dtype).unsqueeze(0).expand(bs,N,N)
        WW[:, :, :, 0] = eye
        for j in range(J):
            WW[:, :, :, j+1] = QQ.clone()
            QQ = torch.bmm(QQ, QQ)
            mx = QQ.max(2)[0].max(1)[0].unsqueeze(1).unsqueeze(2).expand_as(QQ)
            QQ /= torch.clamp(mx, min=1e-6)
            QQ *= np.sqrt(2)
        d = W.sum(1)
        D = d.unsqueeze(1).expand_as(eye) * eye
        WW[:, :, :, J] = D
        U = Ns.float().unsqueeze(1).expand(bs,N)
        U = torch.ge(U, torch.arange(1,N+1).type(dtype).unsqueeze(0).expand(bs,N))
        U = U.float() / Ns.float().unsqueeze(1).expand_as(U)
        U = torch.bmm(U.unsqueeze(2),U.unsqueeze(1))
        WW[:, :, :, J+1] = U
        x = torch.cat((d.unsqueeze(2),cts),2)
        return Variable(WW), Variable(x), Variable(WW[:,:,:,1])


    def compute_operators(self, W, sample, cities, J):
        bs = sample.size(0)
        Ns1 = sample.long().sum(1)
        Ns2 = (1-sample.long()).sum(1)
        W1, cts1 = self.split_operator(W, sample, cities)
        W2, cts2 = self.split_operator(W, 1-sample, cities)
        op1 = self.compute_other_operators(W1, Ns1, cts1, J)
        op2 = self.compute_other_operators(W2, Ns2, cts2, J)
        return op1, op2
        WW[:, :, :, J + 1] = Phi / Phi.sum(1).unsqueeze(1).expand_as(Phi)
        return WW, d, Phi
    
    def join_preds(self, pred1, pred2, sample):
        bs = pred1.size(0)
        N = sample.size(1)
        N1 = pred1.size(1)
        N2 = pred2.size(1)
        pred = Variable(torch.ones(bs,N,N).type(dtype)*(-999))

        for b in range(bs):
            n1 = sample[b].long().sum(0)[0]
            n2 = (1-sample[b]).long().sum(0)[0]
            inds = torch.cat((torch.nonzero(sample[b]).type(dtype),torch.nonzero(1-sample[b]).type(dtype)),0).squeeze()
            inds = torch.topk(-inds,N)[1]
            M = Variable(torch.zeros(N,N).type(dtype))
            M[:n1,:n1] = pred1[b,:n1,:n1]
            M[n1:,n1:] = pred2[b,:n2,:n2]
            inds = Variable(inds, requires_grad=False)
            M = M.index_select(0,inds).index_select(1,inds)
            pred[b, :, :] = M
        return pred

    def forward(self, input, W, cities):
        scores, probs = self.Split(input)
        #variance = compute_variance(probs)
        bin_sample, sample, log_probs_samples = self.sample_one(probs, mode='train')
        op1, op2 = self.compute_operators(W.data, bin_sample, cities, self.J)
        pred1 = self.Tsp(op1)
        pred2 = self.Tsp(op2)
        partial_pred = self.join_preds(pred1, pred2, bin_sample)
        partial_pred = F.sigmoid(partial_pred)
        
        pred = self.Merge((input[0], input[1], partial_pred))
        return probs, log_probs_samples, pred
    
    def compute_loss(self, pred, target, logprobs):
        loss_split = 0.0
        loss_merge = 0.0
        labels = target[1]
        for i in range(labels.size()[-1]):
            for j in range(labels.size()[0]):
                lab = labels[j, :, i].contiguous().view(-1)
                cel = CEL(pred[j], lab)
                loss_merge += cel
                loss_split += Variable(cel.data) * logprobs[j]
        return loss_merge/pred.size(0), loss_split/pred.size(0)
    
    def train(self, iterations, print_freq, test_freq, save_freq, path_model):
        for it in range(iterations):
            start = time.time()
            batch = self.gen.sample_batch(self.batch_size, cuda=torch.cuda.is_available())
            input, W, WTSP, labels, target, cities, perms, costs = extract(batch)
            probs, log_probs_samples, pred = self.forward(input, W, cities)
            loss_merge, loss_split = self.compute_loss(pred, target, log_probs_samples)
            #loss_split -= variance*rf
            self.Split.zero_grad()
            loss_split.backward()
            nn.utils.clip_grad_norm(self.Split.parameters(), self.clip_grad_norm)
            self.optimizer_split.step()
            self.Tsp.zero_grad()
            self.Merge.zero_grad()
            loss_merge.backward()
            nn.utils.clip_grad_norm(self.Tsp.parameters(), clip_grad)
            nn.utils.clip_grad_norm(self.Merge.parameters(), clip_grad)
            self.optimizer_tsp.step()
            self.optimizer_merge.step()
            
            self.logger.add_train_loss(loss_split, loss_merge)
            self.logger.add_train_accuracy(pred, labels, W)
            elapsed = time.time() - start
            
            if it%print_freq == 0 and it > 0:
                loss_split = loss_split.data.cpu().numpy()[0]
                loss_merge = loss_merge.data.cpu().numpy()[0]
                out = ['---', it, loss_split, loss_merge, self.logger.cost_train[-1],
                       self.logger.accuracy_train[-1], elapsed]
                print(template_train1.format(*info_train))
                print(template_train2.format(*out))
                #print(variance)
                #print(probs[0])
                #plot_clusters(it, probs[0], cities[0])
                #os.system('eog ./plots/clustering/clustering_it_{}.png'.format(it))
            if it%test_freq == 0 and it >= 0:
                self.test()
                #self.logger.plot_test_logs()
            if it%save_freq == 0 and it > 0:
                self.save_model(path_model, it)
    
    def test(self):
        for i, gen in enumerate(self.test_gens):
            print('Test: {}'.format(self.test_gens_labels[i]))
            self.test_gen(gen)

    def test_gen(self, gen):
        iterations_test = int(gen.num_examples_test / self.batch_size)
        for it in range(iterations_test):
            start = time.time()
            batch = gen.sample_batch(self.batch_size, is_training=False, it=it,
                                    cuda=torch.cuda.is_available())
            input, W, WTSP, labels, target, cities, perms, costs = extract(batch)
            probs, log_probs_samples, pred = self.forward(input, W, cities)
            loss_merge, loss_split = self.compute_loss(pred, target, log_probs_samples)
            #loss_split -= variance*rf
            last = (it == iterations_test-1)
            self.logger.add_test_accuracy(pred, labels, perms, W, cities, costs,
                                    last=last, beam_size=beam_size)
            self.logger.add_test_loss(loss_split, loss_merge, last=last)
            elapsed = time.time() - start
            '''if not last and it % 100 == 0:
                loss = loss.data.cpu().numpy()[0]
                out = ['---', it, loss, logger.accuracy_test_aux[-1], 
                    logger.cost_test_aux[-1], beam_size, elapsed]
                print(template_test1.format(*info_test))
                print(template_test2.format(*out))'''
        print('TEST COST: {} | TEST ACCURACY {}\n'
            .format(self.logger.cost_test[-1], self.logger.accuracy_test[-1]))
Example #5
0
 batch_size = 500
 num_features = 32
 num_layers = 5
 n_samples = 20
 scales = args.splits+1
 if N >= 200:
     num_examples_test = 100
     batch_size = 100
 
 test = args.test
 
 gen = Generator(args.dataset_path, args.solver_path, num_examples_train, num_examples_test, N, C_min, C_max, test=test)
 gen.load_dataset()
 num_iterations = 100000
 
 Knap = Split_GNN(num_features, num_layers, 3, dim_input=3)
 if args.load_file_path != '':
     Knap = load_model(args.load_file_path, Knap)
 optimizer = optim.Adamax(Knap.parameters(), lr=1e-3)
 
 log = Logger()
 log2 = Logger()
 path_train_plot = os.path.join(args.logs_path, 'training.png')
 
 if test:
     num_iterations = num_examples_test // batch_size
 
 start = time.time()
 for it in range(num_iterations):
     batch = gen.sample_batch(batch_size, is_training=not test, it=it)
     weights, volumes, C, OptW, OptV, is_chosen_opt = batch
Example #6
0
 k_step = args.k_step
 n_samples = args.n_samples
 normalize = args.normalize
 last = args.last
 baseline = args.baseline
 
 if args.dataset == 'GM':
     gen = Generator('/data/anowak/dataset/', num_examples_train, num_examples_test, N, clusters, dim)
 elif args.dataset == "CIFAR":
     gen = GeneratorCIFAR('/data/anowak/dataset/', num_examples_train, num_examples_test, N, clusters, dim)
     dim = 27
 gen.load_dataset()
 num_iterations = 100000
 
 if not baseline:
     gnn = Split_GNN(num_features, num_layers, 5, dim_input=dim)
 else:
     gnn = Split_BaselineGNN(num_features, num_layers, 5, K, dim_input=dim)
 if args.load_file != '':
     gnn = load_model(args.load_file, gnn)
 optimizer = optim.RMSprop(gnn.parameters(), lr=1e-3)
 # optimizer = optim.Adam(gnn.parameters())
 
 test = args.test
 if test:
     num_iterations = num_examples_test // batch_size
 
 
 log = Logger()
 start = time.time()
 for it in range(num_iterations):
Example #7
0
    gen = Generator(path_dataset, './LKH/')
    N = 20
    gen.num_examples_train = 200
    gen.num_examples_test = 10
    gen.N = N
    gen.load_dataset()

    clip_grad = 40.0
    iterations = 5000
    batch_size = 20
    num_features = 10
    num_layers = 5
    J = 4
    rf = 10.0  # regularization factor

    Split = Split_GNN(batch_size, num_features, num_layers, J + 2, dim_input=3)
    Tsp = GNN(num_features, num_layers, J + 2, dim_input=3)
    Merge = GNN(num_features, num_layers, J + 2, dim_input=3)

    optimizer_split = optim.RMSprop(Split.parameters())
    optimizer_tsp = optim.Adamax(Tsp.parameters(), lr=1e-3)
    optimizer_merge = optim.Adamax(Merge.parameters(), lr=1e-3)

    for it in range(iterations):
        sample = gen.sample_batch(batch_size, cuda=torch.cuda.is_available())
        input, W, WTSP, labels, target, cities, perms, costs = extract(sample)
        scores, probs = Split(input)
        variance = compute_variance(probs)
        sample, log_probs_samples = sample_one(probs, mode='train')
        WW, x, Phi = compute_operators(W.data, sample, J)
        x = torch.cat((x.unsqueeze(2), cities), 2)