def run(self, plot=True, resume=False): os.chdir(self.workdir) if resume: last_sn = self.get_last_snapshot() if last_sn is None: resume = False else: logging.info( 'Resuming training from iteration {}'.format(last_sn)) logging.info('Training on ' + self.train.get_name() + ' while validating on ' + ', '.join([str(v) for v in self.val]) + ' ...') if os.path.exists(self.LOG_FILE) and not resume: os.remove(self.LOG_FILE) cmd = [ '/opt/caffe/build/tools/caffe', 'train', '-gpu', '0', '-solver', 'solver.prototxt' ] if resume: cmd += [ '-snapshot', '{}/snapshot_iter_{}.solverstate'.format( self.SNAPSHOTS_DIR, last_sn) ] elif self.model.infmt.pretrain is not None: cmd += ['-weights', os.path.basename(self.model.infmt.pretrain)] caffe = subprocess.Popen(cmd, stderr=subprocess.PIPE) dst = subprocess.PIPE if plot else open(os.devnull, 'wb') tee = subprocess.Popen(['tee', '-a', self.LOG_FILE], stdin=caffe.stderr, stdout=dst) def handler(sig, frame): # propagate SIGINT down, and wait os.kill(caffe.pid, signal.SIGHUP) os.kill(caffe.pid, sig) caffe.wait() signal.signal(signal.SIGINT, handler) if plot: line_iter = iter(tee.stdout.readline, '') live_plot = pyffe.LivePlot(title=self.long_name()) pyffe.LogParser(line_iter).parse(live_plot) tee.wait() # print something in case of error if caffe.returncode != 0: os.system('tail -n 20 {}'.format(self.LOG_FILE))
def show_logs(self): plot = pyffe.LivePlot(title=self.long_name(), train=self.train, val=self.val) plot(self.get_log_data()) # def summarize(self, show_train_points=True): # # log_data = self.get_log_data() # last_iter = log_data['train']['iteration'][-1] # bs = log_data['meta']['batch_size'][0] # # # list of indices where max accuracies for each test are # it_idx, it_max = self.get_argmax_iters() # # pdata = [[round(outs['accuracy'][i], 2) for i in it_idx] for k, outs in log_data['test']['out'].iteritems()] # vnames = [v.get_name() for v in self.val] # # v_idx_num = len(self.val) # v_idx_names = vnames # # if show_train_points: # # XXX maybe bug in iteration/indexes? However, this method is merged with summarize_exact # train_pcent = ['{0:.0f}% (~{1} imgs)'.format(100 * log_data['test']['iteration'][i] / last_iter, # log_data['test']['iteration'][i] * bs) for i in it_max] # pdata = pdata + [train_pcent] # v_idx_num = len(self.val) + 1 # v_idx_names = vnames + [' --> at'] # # index = [ # [self.model.name] * v_idx_num, # [self.train.get_name()] * v_idx_num, # v_idx_names # ] # # return pd.DataFrame(pdata, index=index, columns=vnames) @preserve_cwd def trained_models_to_zip(self): os.chdir(self.workdir) _, it_max = self.get_argmax_iters() mname = self.model.name tname = self.train.get_name() for i, it in enumerate(it_max): vname = self.val[i].get_name() aname = "{}-on-{}-val-{}.zip".format(mname, tname, vname) os.system("zip -j {} deploy.prototxt".format(aname, it)) a = "zip -j {} {}".format(aname, self.SNAPSHOTS_DIR) b = "snapshot_iter_{}.caffemodel".format(it) os.system(os.path.join(a, b))
out_num = int(matches.group(1)) # maybe useless? out_name = str(matches.group(2)) out_value = float(matches.group(3)) if out_name not in self.data['train']['out']: self.data['train']['out'][out_name] = [] self.data['train']['out'][out_name].append(out_value) # LEARNING RATE matches = self.train_lr.match(line) if matches is not None: lr = float(matches.group(1)) self.data['train']['lr'].append(lr) if callback is not None: callback(self.data) return self.data if __name__ == "__main__": import pyffe import matplotlib.pyplot as plt with open("train.caffelog", "r") as f: parser = LogParser(iter(f.readline, '')) lp = pyffe.LivePlot() parser.parse(lp) lp.waitclose()