def evaluate(model, files, epoch=0, number_of_process=1): cnn = model.get_cnn() bs = model.get_batch_size() logger = logging.getLogger("trainer") queue = torch.multiprocessing.Queue(maxsize=QUEUE_SIZE) event_done = torch.multiprocessing.Event() class Batcher(torch.multiprocessing.Process): def __init__(self, n=1, i=0): super().__init__(daemon=True) self.n = n self.i = i def run(self): s = 0 for i in range(0, len(files), bs): if s % self.n == self.i: j = min(i + bs, len(files)) gc.collect() x = model.load_eval_files(files[i:j]) queue.put((s, x)) s += 1 event_done.wait() for i in range(number_of_process): batcher = Batcher(number_of_process, i) batcher.start() cnn.eval() if torch.cuda.is_available(): cnn.cuda() all_outputs = [None] * len(range(0, len(files), bs)) for i in range(0, len(files), bs): gc.collect() s, x = queue.get() x = torch.FloatTensor(x) if torch.cuda.is_available(): x = x.cuda() outputs = model.evaluate(x) all_outputs[s] = outputs logger.info("Evaluation [%d.%.2d|%d/%d] Memory=%s Queue=%d", epoch, 100 * i // len(files), i, len(files), gpu_memory.format_memory(gpu_memory.used_memory()), queue.qsize()) del s del x del outputs event_done.set() return np.concatenate(all_outputs, axis=0)
def train_one_epoch(epoch, model, train_files, optimizer, criterion, number_of_process): cnn = model.get_cnn() bs = model.get_batch_size(epoch) logger = logging.getLogger("trainer") indicies = list(range(len(train_files))) random.shuffle(indicies) queue = torch.multiprocessing.Queue(maxsize=QUEUE_SIZE) event_done = torch.multiprocessing.Event() class Batcher(torch.multiprocessing.Process): def __init__(self, n=1, i=0): super().__init__(daemon=True) self.n = n self.i = i def run(self): s = 0 for i in range(0, len(train_files), bs): if s % self.n == self.i: j = min(i + bs, len(train_files)) gc.collect() x, y = model.load_train_files( [train_files[g] for g in indicies[i:j]]) queue.put((x, y)) s += 1 event_done.wait() for i in range(number_of_process): batcher = Batcher(number_of_process, i) batcher.start() losses = [] cnn.train() if torch.cuda.is_available(): cnn.cuda() for i in range(0, len(train_files), bs): t0 = perf_counter() gc.collect() t = time_logging.start() x, y = queue.get() x = torch.FloatTensor(x) y = torch.FloatTensor(y) x = torch.autograd.Variable(x) y = torch.autograd.Variable(y) if torch.cuda.is_available(): x = x.cuda() y = y.cuda() t = time_logging.end("batch", t) optimizer.zero_grad() outputs = cnn(x) loss = criterion(outputs, y) t = time_logging.end("forward", t) loss.backward() optimizer.step() t = time_logging.end("backward", t) loss_ = float(loss.data.cpu().numpy()) losses.append(loss_) logger.info( "[%d.%.2d|%d/%d] RMSE=%.1e <RMSE>=%.1e Queue=%d Memory=%s Time=%.2fs", epoch, 100 * i // len(train_files), i, len(train_files), loss_**0.5, np.mean(losses)**0.5, queue.qsize(), gpu_memory.format_memory(gpu_memory.used_memory()), perf_counter() - t0) del x del y del outputs del loss event_done.set() return np.mean(losses)
def train_one_epoch(epoch, model, train_files, train_labels, optimizer, criterion, number_of_process, queue_size): cnn = model.get_cnn() logger = logging.getLogger("trainer") batches = model.create_train_batches( epoch, train_files, train_labels) # list of lists [first batch, second batch, ...] queue = torch.multiprocessing.Queue(maxsize=queue_size) event_done = torch.multiprocessing.Event() class Batcher(torch.multiprocessing.Process): def __init__(self, n=1, i=0): super().__init__(daemon=True) self.n = n self.i = i def run(self): for s, batch in enumerate(batches): if s % self.n == self.i: gc.collect() x, y = model.load_train_batch(batch) queue.put((x, y)) event_done.wait() for i in range(number_of_process): batcher = Batcher(number_of_process, i) batcher.start() losses = [] total_correct = 0 total_trained = 0 cnn.train() if torch.cuda.is_available(): cnn.cuda() for s, batch in enumerate(batches): t0 = perf_counter() gc.collect() t = time_logging.start() x, y = queue.get() x = torch.autograd.Variable(x) y = torch.autograd.Variable(y) t = time_logging.end("load batch", t) if torch.cuda.is_available(): x = x.cuda() y = y.cuda() t = time_logging.end("upload batch", t) optimizer.zero_grad() outputs = cnn(x) loss = criterion(outputs, y) t = time_logging.end("forward", t) loss.backward() optimizer.step() t = time_logging.end("backward", t) loss_ = float(loss.data.cpu().numpy()) losses.append(loss_) if outputs.size(-1) > 1: if y.dim() == 1: correct = sum(outputs.data.cpu().numpy().argmax(-1) == y.data.cpu().numpy()) else: correct = sum(outputs.data.cpu().numpy().argmax(-1) == y.data.cpu().numpy().argmax(-1)) else: correct = np.sum( np.sign(outputs.data.cpu().numpy().reshape(( -1, ))) == 2 * y.data.cpu().numpy() - 1) total_correct += correct total_trained += len(batch) logger.info( "[%d.%.2d|%d/%d] Loss=%.1e <Loss>=%.1e Accuracy=%d/%d <Accuracy>=%.2f%% Queue=%d Memory=%s Time=%.2fs", epoch, 100 * s // len(batches), s, len(batches), loss_, np.mean(losses), correct, len(batch), 100 * total_correct / total_trained, queue.qsize(), gpu_memory.format_memory(gpu_memory.used_memory()), perf_counter() - t0) del x del y del outputs del loss event_done.set() return (np.mean(losses), total_correct / total_trained)