best_acc = 0 # LOSS # ----------------------------- if loss_type == 'bce': criterion = nn.BCELoss() else: criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) print("model and criterion loaded ...") # LOOP # ----------------------------- for epoch in range(num_epochs): model.train() shuffle(rows_train) # TRAIN one EPOCH # ------------------------- for i in range(num_iters - 1): # read data # x, y = read_csv_batch('../../data/final_plans/final_train.csv', i, batch_size) ts = time.time() x, y = read_batch(rows_train, i, batch_size) x = torch.tensor(x) x = nn.functional.interpolate(x, (75, 75)) img = x[:, :3, :, :] qst = x[:, 3:, :, :]
class Task(): def __init__(self, args): print '#' * 60 print ' ' * 20 + ' Task Created ' + ' ' * 20 print '#' * 60 ###################################################################################################### # Parameters self.batchSize = args.batchSize self.lr = args.lr self.weightDecay = 1e-4 self.objNumMax = 60 self.wordEmbeddingDim = 64 self.lstmHiddenDim = 128 self.instructionLength = 10 self.pinMemory = True self.dropout = False self.epoch = args.epoch self.epoch_i = 0 self.batchPrint = 100 self.batchModelSave = args.batchModelSave self.checkPoint = args.checkPoint # Path self.scanListTrain = '../data/scan_list_train.txt' self.scanListTest = '../data/scan_list_test.txt' self.datasetPath = '../generated_data' self.logPath = args.logPath # Dataset self.tokenizer = Tokenizer(encoding_length=self.instructionLength) self.trainDataset = DatasetGenerator(scanListPath=self.scanListTrain, datasetPath=self.datasetPath) self.testDataset = DatasetGenerator(scanListPath=self.scanListTest, datasetPath=self.datasetPath) # build vocabulary from all instructions in the training dataset self.tokenizer.build_vocab_from_dataset(self.trainDataset) # DataLoader self.trainDataLoader = DataLoader(dataset=self.trainDataset, shuffle=True, batch_size=self.batchSize, num_workers=12, pin_memory=self.pinMemory) self.testDataLoader = DataLoader(dataset=self.testDataset, shuffle=False, batch_size=self.batchSize, num_workers=12, pin_memory=self.pinMemory) # calculate batch numbers self.trainBatchNum = int( np.ceil(len(self.trainDataset) / float(self.batchSize))) self.testBatchNum = int( np.ceil(len(self.testDataset) / float(self.batchSize))) # Create model self.RN = RN(batch_size=self.batchSize, num_objects=self.objNumMax, vocab_size=self.tokenizer.get_vocal_length(), embedding_size=self.wordEmbeddingDim, hidden_size=self.lstmHiddenDim, padding_idx=1, dropout=self.dropout) # Run task on all available GPUs if torch.cuda.is_available(): if torch.cuda.device_count() > 1: print("Use ", torch.cuda.device_count(), " GPUs!") self.RN = nn.DataParallel(self.RN) self.RN = self.RN.cuda() print 'Model Created on GPUs.' # Optermizer self.optimizer = optim.Adam(self.RN.parameters(), lr=self.lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=self.weightDecay) # Scheduler self.scheduler = ReduceLROnPlateau(self.optimizer, factor=0.1, patience=10, mode='min') # Loss Function self.loss = torch.nn.MSELoss() # Load model given a checkPoint if self.checkPoint != "": self.load(self.checkPoint) # create TensorboardX record self.writer = SummaryWriter( comment='word_embedding_64_lstm_hidden_state_128') self.stepCnt_train = 1 self.stepCnt_test = 1 def train(self): print 'Training task begin.' print '----Batch Size: %d' % self.batchSize print '----Learning Rate: %f' % (self.lr) print '----Epoch: %d' % self.epoch print '----Log Path: %s' % self.logPath for self.epoch_i in range(self.epoch): # if self.epoch_i == 0: # self.save(batchIdx=0) # Test the save function if self.epoch_i != 0: try: self.map = self.map.eval() self.test() except Exception, e: print e self.RN = self.RN.train() self.epochTrain() self.save()