def __init__(self): super(XioPlayVideo, self).__init__() self.ui = ui.Ui_Form() self.ui.setupUi(self) self.left_cam = cv2.VideoCapture('./videos/left_cam.mp4') # 左摄像头 self.right_cam = cv2.VideoCapture('./videos/right_cam.mp4') self.frame_left = None self.frame_right = None self.thread_video_receive = threading.Thread( target=self.video_receive_local) # 该线程用来读取视频流 self.thread_video_receive.start() self.thread_time = Timer('updatePlay()') # 该线程用来每隔0.04秒在label上绘图 self.connect(self.thread_time, QtCore.SIGNAL('updatePlay()'), self.video_play) self.thread_time.start() self.thread_recog = Timer('updatePlay()', sleep_time=1) # 该线程用来每隔一秒分析图像 self.connect(self.thread_recog, QtCore.SIGNAL('updatePlay()'), self.video_recog) self.thread_recog.start() self.thread_data = Timer('updatePlay()', sleep_time=1800) # 该线程用来每隔半小时向数据库读取数据 self.connect(self.thread_data, QtCore.SIGNAL('updatePlay()'), self.data_read) self.thread_data.start() self.thread_tcp = None # 该线程用来完成tcp,未写完
def __init__(self): super(XioPlayVideo, self).__init__() self.ui = ui.Ui_Form() self.ui.setupUi(self) self.left_cam = cv2.VideoCapture('./videos/left_cam.mp4') # 左摄像头 self.right_cam = cv2.VideoCapture('./videos/right_cam.mp4') self.frame_left = None self.frame_right = None self.tcpServer = QTcpServer() # tcp 服务器端 if not self.tcpServer.listen(QHostAddress.LocalHost, 8888): print(self.tcpServer.errorString()) self.close() self.connect(self.tcpServer, QtCore.SIGNAL('newConnection()'), self.read_message) self.thread_video_receive = threading.Thread(target=self.video_receive_local) # 该线程用来读取视频流 self.thread_video_receive.start() self.thread_time = Timer('updatePlay()') # 该线程用来每隔0.04秒在label上绘图 self.connect(self.thread_time, QtCore.SIGNAL('updatePlay()'), self.video_play) self.thread_time.start() self.thread_recog = Timer('updatePlay()', sleep_time=1) # 该线程用来每隔一秒分析图像 self.connect(self.thread_recog, QtCore.SIGNAL('updatePlay()'), self.video_recog) self.thread_recog.start() self.thread_data = Timer('updatePlay()', sleep_time=1800) # 该线程用来每隔半小时向数据库读取数据 self.connect(self.thread_data, QtCore.SIGNAL('updatePlay()'), self.data_read) self.thread_data.start() self.thread_tcp = None # 该线程用来完成tcp,未写完
def __init__(self): super(XioAll, self).__init__() self.ui = ui.Ui_Form() self.ui.setupUi(self) self.frame_left = None self.frame_right = None self.is_work = True self.one_static_time = 0 # 一次故障静止的时间 self.all_time = 0 # 一天的工作时间 self.q = MyQueue() # 存放帧队列,改为存放状态比较好 self.vision = Vision() # 若日期发生改变,自行插入全零数据 da = data_access.EquipmentTimeData() # 对损失项统计表进行操作 result_loss = da.select_("select * from loss ORDER BY SJ DESC limit 1") current_time = datetime.datetime.now().strftime('%Y-%m-%d') if str(result_loss[0][0]) != current_time: da.update('insert into loss(SJ,action1,action2,action3,action4,action5,action6)values' '("%s",%d,%d,%d,%d,%d,%d)' % (current_time, 0, 0, 0, 0, 0, 0)) else: pass da_oee = data_access.OEEData() # 对oee实时利用率进行统计 result_oee = da_oee.select_('select * from oee_date ORDER BY SJC DESC limit 1') if str(result_oee[0][0]) != current_time: da_oee.update_('insert into oee_date(SJC,O8,O9,O10,O11,O12,O13,O14,O15,O16,O17,O18)values' '("' + current_time + '",0,0,0,0,0,0,0,0,0,0,0)') else: pass self.thread_figure = Timer('updatePlay()', sleep_time=120) # 该线程用来每隔2分钟刷新绘图区 self.connect(self.thread_figure, QtCore.SIGNAL('updatePlay()'), self.draw) self.thread_figure.start() self.server = ThreadedTCPServer((self.HOST, self.PORT), ThreadedTCPRequestHandler) # 该线程用来一直监听客户端的请求 self.server_thread = threading.Thread(target=self.server.serve_forever) self.server_thread.start() self.thread_video_receive = threading.Thread(target=self.video_receive_local) # 该线程用来读取视频流 self.thread_video_receive.start() self.thread_time = Timer('updatePlay()') # 该线程用来每隔0.04秒在label上绘图 self.connect(self.thread_time, QtCore.SIGNAL('updatePlay()'), self.video_play) self.thread_time.start() self.thread_recog = Timer('updatePlay()', sleep_time=1) # 该线程用来每隔一秒分析图像 self.connect(self.thread_recog, QtCore.SIGNAL('updatePlay()'), self.video_recog) self.thread_recog.start() self.thread_data = Timer('updatePlay()', sleep_time=1800) # 该线程用来每隔半小时向数据库读取数据 self.connect(self.thread_data, QtCore.SIGNAL('updatePlay()'), self.data_read) self.thread_data.start()
def batch_size_linear_search(): min = 8 max = 600 step_size = 8 optimizer = lambda x: torch.optim.SGD(x, lr=0.1) experiment_name = "batch_size_linear_search" t = Timer() batch_size_times = {} for i, batch_size in enumerate(range(min, max, step_size)): t.start() main(experiment_name, optimizer, epochs=i + 2, batch_size=batch_size) elapsed_time = t.stop() batch_size_times[batch_size] = elapsed_time pickle.dump(batch_size_times, open("batch_size_times.pickle", "wb")) # Plot batch_sizes = [] times = [] for k in sorted(batch_size_times): batch_sizes.append(k) times.append(batch_size_times[k]) plt.plot(np.array(batch_sizes), np.array(times)) plt.xlabel("Batch Size") plt.ylabel("Epoch Time") plt.title("Batch Size vs Epoch Time") plt.show()
def evaluate(config, model, dataset_loader, eval_metric, split='dev', dump=True): timer = Timer() metrics = MultiLabelMetric(config.num_class, thresholds=config.metrics_thresholds) eval_metric.clear() progress_bar = tqdm(dataset_loader) for idx, batch in enumerate(progress_bar): batch_labels = batch['label'] predict_results = model.predict(batch) batch_label_scores = predict_results['scores'] batch_labels = batch_labels.cpu().detach().numpy() batch_label_scores = batch_label_scores.cpu().detach().numpy() metrics.add_batch(batch_labels, batch_label_scores) eval_metric.add_batch(batch_labels, batch_label_scores) if not config.display_iter or idx % config.display_iter == 0: last_metrics = metrics.get_metrics() progress_bar.set_postfix(**last_metrics) log.info(f'Time for evaluating {split} set = {timer.time():.2f} (s)') print(eval_metric) metrics = eval_metric.get_metrics() if dump: dump_log(config, metrics, split) return metrics
def train_model(device, model, train_set_loader, optimizer): timer = Timer().start() model.train() # For special layers total = 0 correct = 0 total_loss = 0 for images, targets in train_set_loader: total += images.shape[0] optimizer.zero_grad() images = images.to(device, non_blocking=True) targets = targets.to(device, non_blocking=True) output = model(images) loss = F.cross_entropy(output, targets, reduction='mean') total_loss += torch.sum(loss) loss.backward() optimizer.step() # logger.info(f"Batch Loss: {loss}") _, predicted = torch.max(output.data, 1) correct += predicted.eq(targets.data).cpu().sum() average_train_loss = total_loss / total accuracy = 100. * correct.item() / total logger.info( f"Training Took {timer.stop():0.2f}s. Images in epoch: {total} ") return average_train_loss, accuracy
def __init__(self): super(XioFigurePlot, self).__init__() self.ui = ui.Ui_Form() self.ui.setupUi(self) self.thread_figure = Timer('updatePlay()', sleep_time=2) self.connect(self.thread_figure, QtCore.SIGNAL('updatePlay()'), self.draw) self.thread_figure.start()
def validate(self, loader, model, criterion, epoch, args): timer = Timer() losses = AverageMeter() top1 = AverageMeter() wtop1 = AverageMeter() alloutputs = [] metrics = {} # switch to evaluate mode model.eval() def part(x): return itertools.islice(x, int(len(x) * args.val_size)) for i, x in enumerate(part(loader)): inputs, target, meta = parse(x) output, loss, weights = forward(inputs, target, model, criterion, meta['id'], train=False) prec1 = triplet_accuracy(output, target) wprec1 = triplet_accuracy(output, target, weights) losses.update(loss.item(), inputs[0].size(0)) top1.update(prec1, inputs[0].size(0)) wtop1.update(wprec1, inputs[0].size(0)) alloutputs.extend( zip([(x.item(), y.item()) for x, y in zip(*output)], target, weights)) timer.tic() if i % args.print_freq == 0: print('[{name}] Test [{epoch}]: [{0}/{1} ({2})]\t' 'Time {timer.val:.3f} ({timer.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'WAcc@1 {wtop1.val:.3f} ({wtop1.avg:.3f})\t'.format( i, int(len(loader) * args.val_size), len(loader), name=args.name, timer=timer, loss=losses, top1=top1, epoch=epoch, wtop1=wtop1)) metrics.update(triplet_allk(*zip(*alloutputs))) metrics.update({'top1val': top1.avg, 'wtop1val': wtop1.avg}) print( ' * Acc@1 {top1val:.3f} \t WAcc@1 {wtop1val:.3f}' '\n topk1: {topk1:.3f} \t topk2: {topk2:.3f} \t ' 'topk5: {topk5:.3f} \t topk10: {topk10:.3f} \t topk50: {topk50:.3f}' .format(**metrics)) return metrics
def minhash_lsh_dedupe_cassandra(batch_minhashes_pickle_path, lsh_pickle_path, tqdm_func, global_tqdm): # [(file_id, [doc0_minhash, doc1_minhash, ...]), ....] batch_minhashes = timed_pickle_load(batch_minhashes_pickle_path, "batch minhashes") # For some reason this will freeze when loading on the first run. lsh = timed_pickle_load(lsh_pickle_path, "lsh") checkpoint_file = batch_minhashes_pickle_path.replace(".pkl", "_ckpt.pkl") if os.path.exists(checkpoint_file): ckpt_file_id, ckpt_document_id = pickle.load( open(checkpoint_file, "rb")) else: ckpt_file_id = -1 ckpt_document_id = -1 logger.info("Detecting duplicates") timer = Timer().start() duplicate_file_path = batch_minhashes_pickle_path.replace( ".pkl", "_duplicates.txt") with open(duplicate_file_path, "a") as fh: for file_id, documents in batch_minhashes: if file_id <= ckpt_file_id: global_tqdm.update(len(documents)) continue for document_id, minhash in enumerate(documents): if document_id <= ckpt_document_id: global_tqdm.update(ckpt_document_id + 1) ckpt_document_id = -1 continue results = lsh.query(minhash) duplicate_found = True if results else False is_self = False for json_results in results: found_file_id, found_document_id = json.loads(json_results) # This check is needed in case you re-run things if file_id == found_file_id and document_id == found_document_id: duplicate_found = False is_self = True break if duplicate_found: fh.write(f"{file_id} {document_id}\n") else: if not is_self: lsh.insert(json.dumps((file_id, document_id)), minhash) global_tqdm.update() pickle.dump((file_id, document_id), open(checkpoint_file, "wb")) logger.info(timer.stop_string()) return True
def train(self, loader, model, criterion, optimizer, epoch, args): adjust_learning_rate(args.lr, args.lr_decay_rate, optimizer, epoch) timer = Timer() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() wtop1 = AverageMeter() metrics = {} # switch to train mode model.train() optimizer.zero_grad() def part(x): return itertools.islice(x, int(len(x) * args.train_size)) for i, x in enumerate(part(loader)): inputs, target, meta = parse(x) data_time.update(timer.thetime() - timer.end) output, loss, weights = forward(inputs, target, model, criterion, meta['id']) prec1 = triplet_accuracy(output, target) wprec1 = triplet_accuracy(output, target, weights) losses.update(loss.item(), inputs[0].size(0)) top1.update(prec1, inputs[0].size(0)) wtop1.update(wprec1, inputs[0].size(0)) loss.backward() if i % args.accum_grad == args.accum_grad - 1: print('updating parameters') optimizer.step() optimizer.zero_grad() timer.tic() if i % args.print_freq == 0: print('[{name}] Epoch: [{0}][{1}/{2}({3})]\t' 'Time {timer.val:.3f} ({timer.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'WAcc@1 {wtop1.val:.3f} ({wtop1.avg:.3f})\t'.format( epoch, i, int(len(loader) * args.train_size), len(loader), name=args.name, timer=timer, data_time=data_time, loss=losses, top1=top1, wtop1=wtop1)) metrics.update({'top1': top1.avg, 'wtop1': wtop1.avg}) return metrics
def fine_tune_train_and_val(args, recorder): # = global lowest_val_loss, best_prec1 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # close the warning torch.manual_seed(1) cudnn.benchmark = True timer = Timer() # == dataset config== num_class, data_length, image_tmpl = ft_data_config(args) train_transforms, test_transforms, eval_transforms = ft_augmentation_config( args) train_data_loader, val_data_loader, _, _, _, _ = ft_data_loader_init( args, data_length, image_tmpl, train_transforms, test_transforms, eval_transforms) # == model config== model = ft_model_config(args, num_class) recorder.record_message('a', '=' * 100) recorder.record_message('a', '-' * 40 + 'finetune' + '-' * 40) recorder.record_message('a', '=' * 100) # == optim config== train_criterion, val_criterion, optimizer = ft_optim_init(args, model) # == data augmentation(self-supervised) config== tc = TC(args) # == train and eval== print('*' * 70 + 'Step2: fine tune' + '*' * 50) for epoch in range(args.ft_start_epoch, args.ft_epochs): timer.tic() ft_adjust_learning_rate(optimizer, args.ft_lr, epoch, args.ft_lr_steps) train_prec1, train_loss = train(args, tc, train_data_loader, model, train_criterion, optimizer, epoch, recorder) # train_prec1, train_loss = random.random() * 100, random.random() recorder.record_ft_train(train_loss / 5.0, train_prec1 / 100.0) if (epoch + 1) % args.ft_eval_freq == 0: val_prec1, val_loss = validate(args, tc, val_data_loader, model, val_criterion, recorder) # val_prec1, val_loss = random.random() * 100, random.random() recorder.record_ft_val(val_loss / 5.0, val_prec1 / 100.0) is_best = val_prec1 > best_prec1 best_prec1 = max(val_prec1, best_prec1) checkpoint = { 'epoch': epoch + 1, 'arch': "i3d", 'state_dict': model.state_dict(), 'best_prec1': best_prec1 } recorder.save_ft_model(checkpoint, is_best) timer.toc() left_time = timer.average_time * (args.ft_epochs - epoch) message = "Step2: fine tune best_prec1 is: {} left time is : {} now is : {}".format( best_prec1, timer.format(left_time), datetime.now()) print(message) recorder.record_message('a', message) return recorder.filename
def validate_egovideo(self, loader, model, epoch, args): """ Run video-level validation on the Charades ego test set""" timer = Timer() outputs, gts, ids = [], [], [] outputsw = [] metrics = {} # switch to evaluate mode model.eval() for i, x in enumerate(loader): inp, target, meta = parse(x) target = target.long().cuda(async=True) assert target[0, :].eq(target[1, :]).all(), "val_video not synced" input_var = torch.autograd.Variable(inp.cuda(), volatile=True) output, w_x, w_z = model(input_var) output = torch.nn.Softmax(dim=1)(output) sw_x = torch.nn.Softmax(dim=0)(w_x) * w_x.shape[0] sw_x = (sw_x - sw_x.mean()) / sw_x.std() scale = torch.clamp(1 + (sw_x - 1) * 0.05, 0, 100) print('scale min: {}\t max: {}\t std: {}'.format( scale.min().data[0], scale.max().data[0], scale.std().data[0])) scale = torch.clamp(scale, 0, 100) scale *= scale.shape[0] / scale.sum() outputw = output * scale.unsqueeze(1) # store predictions output_video = output.mean(dim=0) outputs.append(output_video.data.cpu().numpy()) outputsw.append(outputw.mean(dim=0).data.cpu().numpy()) gts.append(target[0, :]) ids.append(meta['id'][0]) timer.tic() if i % args.print_freq == 0: print('Test2: [{0}/{1}]\t' 'Time {timer.val:.3f} ({timer.avg:.3f})'.format( i, len(loader), timer=timer)) # mAP, _, ap = meanap.map(np.vstack(outputs), np.vstack(gts)) mAP, _, ap = meanap.charades_nanmap(np.vstack(outputs), np.vstack(gts)) mAPw, _, _ = meanap.charades_nanmap(np.vstack(outputsw), np.vstack(gts)) metrics['mAPego'] = mAP metrics['mAPegow'] = mAPw print(ap) print(' * mAPego {mAPego:.3f} \t mAPegow {mAPegow:.3f}'.format( **metrics)) submission_file(ids, outputs, '{}/egoepoch_{:03d}.txt'.format(args.cache, epoch + 1)) return metrics
def alignment(loader, model, epoch, args, task=best_one_sec_moment): timer = Timer() abssec = MedianMeter() abssec0 = MedianMeter() randsec = MedianMeter() model = ActorObserverFC7(model) # switch to evaluate mode model.eval() def fc7_generator(): for i, x in enumerate(loader): inputs, target, meta = parse(x) target = target.long().cuda(async=True) input_vars = [ torch.autograd.Variable(inp.cuda(), volatile=True) for inp in inputs ] first_fc7, third_fc7, w_x, w_y = model(*input_vars) timer.tic() if i % args.print_freq == 0: print('Alignment: [{0}/{1}]\t' 'Time {timer.val:.3f} ({timer.avg:.3f})'.format( i, len(loader), timer=timer)) for vid, o1, o2 in zip(meta['id'], first_fc7, third_fc7): yield vid, (o1.data.cpu().numpy(), o2.data.cpu().numpy()) for key, grp in groupby(fc7_generator(), key=lambda x: x[0]): print('processing id: {}'.format(key)) _, mat = fc7list2mat(grp) _, _, _, j, gt = task(mat, winsize=3) _, _, _, j0, gt0 = task(mat, winsize=0) _, _, _, jr, gtr = task(np.random.randn(*mat.shape), winsize=3) abssec.update(abs(j - gt)) abssec0.update(abs(j0 - gt0)) randsec.update(abs(jr - gtr)) print( ' abs3: {abs3.val:.3f} ({abs3.avg:.3f}) [{abs3.med:.3f}]' ' abs0: {abs0.val:.3f} ({abs0.avg:.3f}) [{abs0.med:.3f}]' '\n' ' absr: {absr.val:.3f} ({absr.avg:.3f}) [{absr.med:.3f}]'.format( abs3=abssec, abs0=abssec0, absr=randsec)) return abssec.med
def train(loader, D, G, optim_D, optim_G, criterion): G_losses = [0] D_losses = [0] timer = Timer() for i in range(1, config.num_epoch + 1): iters = 0 for data in loader: current_size = data.size(0) labels0 = torch.tensor([0] * current_size).to( config.device, torch.long) labels1 = torch.tensor([1] * current_size).to( config.device, torch.long) noise = torch.randn( (current_size, config.latent_size, 1, 1)).to(config.device) D_loss = D_train(data, D, G, optim_D, criterion, current_size, labels0, labels1, noise) G_loss = G_train(D, G, optim_G, criterion, current_size, labels0, labels1, noise) iters += 1 D_losses.append(D_loss) G_losses.append(G_loss) if iters % config.log_iter == 0: timer.save_batch_time() log_batch_history(i, iters, len(loader), D_losses, G_losses, timer) save_model(i, G, optim_G, D, optim_D) timer.save_epoch_time() log_epoch_history(i, len(loader), D_losses, G_losses, timer) if i % config.make_img_samples == 0: for x in range(5): make_img_samples(G)
def test_model(device, model, test_set_loader, optimizer): timer = Timer().start() model.eval() # For special layers total = 0 correct = 0 with torch.no_grad(): for images, targets in test_set_loader: total += images.shape[0] images = images.to(device, non_blocking=True) targets = targets.to(device, non_blocking=True) outputs = model(images) _, predicted = torch.max(outputs.data, 1) correct += predicted.eq(targets.data).cpu().sum() accuracy = 100. * correct.item() / total logger.info(f"Testing Took {timer.stop():0.2f}s. Images in epoch: {total}") return accuracy
def train_epoch(self, data_loader): """Run through one epoch of model training with the provided data loader.""" train_loss = AverageMeter() metrics = MultiLabelMetric(self.config.num_class) epoch_time = Timer() progress_bar = tqdm(data_loader) for idx, batch in enumerate(progress_bar): loss, batch_label_scores = self.train_step(batch) train_loss.update(loss) # training metrics batch_labels = batch['label'].cpu().detach().numpy() batch_label_scores = batch_label_scores.cpu().detach().numpy() metrics.add_batch(batch_labels, batch_label_scores) progress_bar.set_postfix(loss=train_loss.avg) log.info(metrics.get_metrics()) log.info(f'Epoch done. Time for epoch = {epoch_time.time():.2f} (s)') log.info(f'Epoch loss: {train_loss.avg}')
def load_hcpcs_corpus(debug=False): corpus_file = 'debug-corpus.npy' if debug else 'corpus.npy' corpus_output = os.path.join(proj_dir, 'data', corpus_file) partb_file = 'partb-2012.csv.gz' if debug else 'partb-2012-2018.csv.gz' partb_output = os.path.join(proj_dir, 'data', partb_file) # load from disk if exists if os.path.isfile(corpus_output): print(f'Loading corpus from disk {corpus_output}') corpus = np.load(corpus_output, allow_pickle=True) return corpus # load Medicare Data timer = Timer() data = load_data(data_dir, partb_output, debug) print(f'Loaded data in {timer.lap()}') # clean missing values data.dropna(subset=['hcpcs', 'count'], inplace=True) # generate sequences of HCPCS codes # that occur in the same context grouped_hcpcs = data \ .sort_values(by='count') \ .groupby(by=['year', 'npi'])['hcpcs'] \ .agg(list) grouped_hcpcs = pd.DataFrame(grouped_hcpcs) print(f'Generated hcpcs sequences in {timer.lap()}') # drop top 1 percent longest sequences quantile = 0.99 grouped_hcpcs['seq_length'] = grouped_hcpcs['hcpcs'].agg(len) max_seq_length = grouped_hcpcs['seq_length'].quantile(quantile) grouped_hcpcs = grouped_hcpcs.loc[ grouped_hcpcs['seq_length'] <= max_seq_length] print(f'Removed sequences longer than {max_seq_length}') # save corpus np.save(corpus_output, grouped_hcpcs['hcpcs'].values) return grouped_hcpcs['hcpcs'].values
def validate_video(self, loader, model, epoch, args): """ Run video-level validation on the Charades test set""" timer = Timer() outputs, gts, ids = [], [], [] metrics = {} # switch to evaluate mode model.eval() for i, x in enumerate(loader): inputs, target, meta = parse(x) target = target.long().cuda(async=True) assert target[0, :].eq(target[1, :]).all(), "val_video not synced" input_vars = [ torch.autograd.Variable(inp.cuda(), volatile=True) for inp in inputs ] output = model( *input_vars)[-1] # classification should be last output output = torch.nn.Softmax(dim=1)(output) # store predictions output_video = output.mean(dim=0) outputs.append(output_video.data.cpu().numpy()) gts.append(target[0, :]) ids.append(meta['id'][0]) timer.tic() if i % args.print_freq == 0: print('Test2: [{0}/{1}]\t' 'Time {timer.val:.3f} ({timer.avg:.3f})'.format( i, len(loader), timer=timer)) # mAP, _, ap = meanap.map(np.vstack(outputs), np.vstack(gts)) mAP, _, ap = meanap.charades_map(np.vstack(outputs), np.vstack(gts)) metrics['mAP'] = mAP print(ap) print(' * mAP {:.3f}'.format(mAP)) submission_file(ids, outputs, '{}/epoch_{:03d}.txt'.format(args.cache, epoch + 1)) return metrics
def train_and_eval(args): # = global lowest_val_loss, best_prec1 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # close the warning os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus torch.manual_seed(1) cudnn.benchmark = True timer = Timer() recorder = Record(args) # == dataset config== num_class, data_length, image_tmpl = data_config(args) train_transforms, test_transforms = augmentation_config(args) train_data_loader, val_data_loader = data_loader_init( args, data_length, image_tmpl, train_transforms, test_transforms) # == model config== models = [] optimizers = [] for i in range(args.mutual_num): model = model_config(args, num_class) models.append(model) recorder.record_message('a', '=' * 100) recorder.record_message('a', str(model.module)) recorder.record_message('a', '=' * 100) # == optim config== for i in range(args.mutual_num): train_criterion, val_criterion, optimizer = optim_init(args, model) optimizers.append(optimizer) # == data augmentation(self-supervised) config== tc = TC(args) # == train and eval== for epoch in range(args.start_epoch, args.epochs): timer.tic() for i in range(args.mutual_num): adjust_learning_rate(optimizers[i], args.lr, epoch, args.lr_steps) if args.eval_indict == 'acc': train_prec1, train_loss = train(args, tc, train_data_loader, models, train_criterion, optimizers, epoch, recorder) # train_prec1, train_loss = random.random() * 100, random.random() recorder.record_train(train_loss / 5.0, train_prec1 / 100.0) else: train_loss = train(args, tc, train_data_loader, models, train_criterion, optimizers, epoch, recorder) # train_prec1, train_loss = random.random() * 100, random.random() recorder.record_train(train_loss) if (epoch + 1) % args.eval_freq == 0: if args.eval_indict == 'acc': val_prec1, val_loss = validate(args, tc, val_data_loader, models, val_criterion, recorder) # val_prec1, val_loss = random.random() * 100, random.random() recorder.record_val(val_loss / 5.0, val_prec1 / 100.0) is_best = val_prec1 > best_prec1 best_prec1 = max(val_prec1, best_prec1) checkpoint = { 'epoch': epoch + 1, 'arch': "i3d", 'state_dict': model.state_dict(), 'best_prec1': best_prec1 } else: val_loss = validate(args, tc, val_data_loader, models, val_criterion, recorder) # val_loss = random.random() # val_prec1, val_loss = random.random() * 100, random.random() recorder.record_val(val_loss) is_best = val_loss < lowest_val_loss lowest_val_loss = min(val_loss, lowest_val_loss) checkpoint = { 'epoch': epoch + 1, 'arch': "i3d", 'state_dict': model.state_dict(), 'lowest_val': lowest_val_loss } recorder.save_model(checkpoint, is_best) timer.toc() left_time = timer.average_time * (args.epochs - epoch) if args.eval_indict == 'acc': message = "best_prec1 is: {} left time is : {}".format( best_prec1, timer.format(left_time)) else: message = "lowest_val_loss is: {} left time is : {}".format( lowest_val_loss, timer.format(left_time)) print(message) recorder.record_message('a', message) # return recorder.best_name return recorder.filename
import re import numpy as np from sklearn.neighbors import NearestNeighbors from gensim.models import Word2Vec from utils.utils import replace_umlauts, Timer from utils.utils import raw_freq model_path = "/media/echobot/Volume/home/simon/uni/masterarbeit/de/model/01/my.model" model = Word2Vec.load_word2vec_format(model_path, binary=True) w2v = {w: vec for w,vec in zip(model.index2word, model.syn0)} with Timer('Loading model from %s' % model_path): model = Word2Vec.load_word2vec_format(model_path, binary=True) dataset_path = "/media/echobot/Volume/home/simon/uni/masterarbeit/data/business_signals_samples/fuehrungswechsel.txt" dataset_path += ".corpus" with open(dataset_path, 'r') as f: W = [w.decode('utf-8') for line in f for w in line.split()] X = [w2v[w] for sentence in W for w in W] V = {w.decode for w in W} X = np.array(X) # with Timer("Calculating nearest neighbors... "): # nbrs = NearestNeighbors(n_neighbors=5, algorithm='ball_tree').fit(V) # distances, indices = nbrs.kneighbors(X)
def main(dataloader_func, model, optimizer_callback, output_directory, tensorboard_log_directory, lr_scheduler=None, epochs=150): if not os.path.isdir(output_directory): os.makedirs(output_directory, exist_ok=True) # Setup regular log file logfile_path = os.path.join(output_directory, "logfile.txt") setup_logger_tqdm(logfile_path) # Setup TensorBoard logging tensorboard_summary_writer = SummaryWriter( log_dir=tensorboard_log_directory) # Choose Training Device use_cuda = torch.cuda.is_available() logger.info(f"CUDA Available? {use_cuda}") device = "cuda" if use_cuda else "cpu" # Dataloaders train_set_loader, test_set_loader = dataloader_func() # Model & Optimizer model.to(device) optimizer = optimizer_callback(model) if lr_scheduler: lr_scheduler = lr_scheduler(optimizer) logger.info(f"Epoch Count: {epochs}") # Load Checkpoint checkpoint_file_path = os.path.join(output_directory, "checkpoint.pth") start_epoch = 0 if os.path.exists(checkpoint_file_path): logger.info("Checkpoint Found - Loading!") checkpoint = torch.load(checkpoint_file_path) logger.info(f"Last completed epoch: {checkpoint['epoch']}") logger.info(f"Average Train Loss: {checkpoint['train_loss']}") logger.info(f"Top-1 Train Accuracy: {checkpoint['train_accuracy']}") logger.info(f"Top-1 Test Accuracy: {checkpoint['test_accuracy']}") start_epoch = checkpoint["epoch"] + 1 logger.info(f"Resuming at epoch {start_epoch}") model.load_state_dict(checkpoint["model_state_dict"]) optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) if lr_scheduler: lr_scheduler.load_state_dict(checkpoint["lr_scheduler_state_dict"]) else: logger.info("No checkpoint found, starting from scratch.") # Training Loop t = Timer() for epoch in range(start_epoch, epochs): t.start() logger.info(f"Commence EPOCH {epoch}") # Train train_loss, train_accuracy = train_model(device, model, train_set_loader, optimizer) tensorboard_summary_writer.add_scalar("train_loss", train_loss, epoch) tensorboard_summary_writer.add_scalar("train_accuracy", train_accuracy, epoch) # Test test_accuracy = test_model(device, model, test_set_loader, optimizer) tensorboard_summary_writer.add_scalar("test_accuracy", test_accuracy, epoch) scheduler_dict = None if lr_scheduler: lr_scheduler.step() scheduler_dict = lr_scheduler.state_dict() # Save Checkpoint logger.info("Saving checkpoint.") torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'lr_scheduler_state_dict': scheduler_dict, 'train_loss': train_loss, 'train_accuracy': train_accuracy, 'test_accuracy': test_accuracy }, checkpoint_file_path) elapsed_time = t.stop() logger.info(f"End of epoch {epoch}, took {elapsed_time:0.4f} seconds.") logger.info(f"Average Train Loss: {train_loss}") logger.info(f"Top-1 Train Accuracy: {train_accuracy}") logger.info(f"Top-1 Test Accuracy: {test_accuracy}")
l2_reg = float(l2_reg) l1_reg = cli_args.get('l1_reg') if l1_reg != None: l1_reg = float(l1_reg) print(f'Running job with arguments\n{cli_args}') # define configs train_perf_filename = 'train-results.csv' test_perf_filename = 'test-results.csv' n_estimators = 5 if debug else 100 print(f'n_estimators: {n_estimators}') print(f'max_depth: {max_depth}') # init timer timer = Timer() # iterate over runs for run in range(runs): print(f'Starting run {run}') # load data data = load_sampled_data(sample_size) print(f'Loaded data with shape {data.shape}') # drop columns, onehot encode, or lookkup embeddings x, y = get_embedded_data(data, embedding_type, embedding_path, drop_columns) del data print(f'Encoded data shape: {x.shape}')
def __init__(self): super(XioAll, self).__init__() self.ui = ui.Ui_Form() self.ui.setupUi(self) self.frame_left = None self.frame_right = None self.is_work = True self.stype = 0 self.one_static_time = 0 # 一次故障静止的时间 self.all_time = 0 # 一天的工作时间 self.q = MyQueue() # 存放帧队列,改为存放状态比较好 self.vision = Vision() # 控制输入视频地址 self.CamPath = "" self.isWebCam = False self.isCamChanged = False # 数据库操作 self.da = data_access.DataAccess() # 若日期发生改变,自行插入全零数据 result_loss = self.da.select_( "select * from loss ORDER BY SJ DESC limit 1") current_time = datetime.datetime.now().strftime('%Y-%m-%d') if str(result_loss[0][0]) != current_time: self.da.operate_( 'insert into loss(SJ,action1,action2,action3,action4,action5,action6)values' '("%s",%d,%d,%d,%d,%d,%d)' % (current_time, 10, 10, 10, 10, 0, 0)) else: pass result_oee = self.da.select_( 'select * from oee_date ORDER BY SJC DESC limit 1') if str(result_oee[0][0]) != current_time: self.da.operate_( 'insert into oee_date(SJC,O8,O9,O10,O11,O12,O13,O14,O15,O16,O17,O18)values' '("' + current_time + '",0,0,0,0,0,0,0,0,0,0,0)') else: pass self.yolo_Model = Yolo_Model.Yolo_Model() # self.displayMessage("...加载YOLO模型成功...") self.thread_figure = Timer('updatePlay()', sleep_time=120) # 该线程用来每隔2分钟刷新绘图区 self.connect(self.thread_figure, QtCore.SIGNAL('updatePlay()'), self.draw) self.thread_figure.start() # 按钮功能 self.connect(self.ui.fileSelectButton, QtCore.SIGNAL('clicked()'), self.fileSelect) self.connect(self.ui.mailSenderButton, QtCore.SIGNAL('clicked()'), self.mailSend) self.connect(self.ui.confirmDateButton, QtCore.SIGNAL('clicked()'), self.displayMonthData) self.connect(self.ui.WebCamButton, QtCore.SIGNAL('clicked()'), self.webCamInput) self.server = ThreadedTCPServer( (self.HOST, self.PORT), ThreadedTCPRequestHandler) # 该线程用来一直监听客户端的请求 self.server_thread = threading.Thread(target=self.server.serve_forever) self.server_thread.start() self.thread_video_receive = threading.Thread( target=self.video_receive_local) # 该线程用来读取视频流 self.thread_video_receive.start() self.thread_time = Timer('updatePlay()') # 该线程用来每隔0.04秒在label上绘图 self.connect(self.thread_time, QtCore.SIGNAL('updatePlay()'), self.video_play) self.thread_time.start() self.thread_recog = Timer('updatePlay()', sleep_time=1) # 该线程用来每隔一秒分析图像 self.connect(self.thread_recog, QtCore.SIGNAL('updatePlay()'), self.video_recog) self.thread_recog.start() self.thread_data = Timer('updatePlay()', sleep_time=1800) # 该线程用来每隔半小时向数据库读取数据 self.connect(self.thread_data, QtCore.SIGNAL('updatePlay()'), self.data_read) self.thread_data.start() self.thread_shumei = threading.Thread(target=self.shumeiDeal) self.thread_shumei.start() self.thread_control = Timer('updatePlay()', sleep_time=10) # 该线程用来每隔半小时向数据库读取数据 self.connect(self.thread_control, QtCore.SIGNAL('updatePlay()'), self.control_judge) self.thread_control.start() # 12-25 self.thread_recogtiaoshi = Timer('updatePlay()', sleep_time=0.3) # 该线程用来每隔0.3秒分析图像 self.connect(self.thread_recogtiaoshi, QtCore.SIGNAL('updatePlay()'), self.video_recogtiaoshi) self.thread_recogtiaoshi.start() self.thread_recogzhuangji = Timer('updatePlay()', sleep_time=0.3) # 该线程用来每隔0.3秒分析图像 self.connect(self.thread_recogzhuangji, QtCore.SIGNAL('updatePlay()'), self.video_recogzhuangji) self.thread_recogzhuangji.start() self.X_l = 0 self.Y_l = 0 self.type_l = "" self.flag = 0 self.a = 0 self.tiaoshi_back = False self.tiaoshi_forward = False self.X_r = 0 self.Y_r = 0 self.type_r = "" self.firstFrame = None self.chaiji_left = False self.chaiji_right = False self.cltime = 0 self.crtime = 0 self.totaltime = 0 # 用于面板进行输出 self.work_time = 0 self.tf_time = 0 self.tb_time = 0 self.Ldown = [0] * 10 self.Lup = [0] * 10 # 队列操作 self.Lhandsdown = [0] * 10 self.Lhandsup = [0] * 10 self.isJudgeMachineT = True # 装机操作 self.mask_right = cv2.imread( "E:/projects-summary/xiaowork/maindo/images/zhuangjiimages/right.jpg" ) self.mask_left = cv2.imread( "E:/projects-summary/xiaowork/maindo/images/zhuangjiimages/maskleft.jpg" ) self.left_base = cv2.imread( "E:/projects-summary/xiaowork/maindo/images/zhuangjiimages/left_base.jpg", 0) self.redLower = np.array([26, 43, 46]) self.redUpper = np.array([34, 255, 255]) self.Lright = [0] * 10 self.Lleft = [0] * 10 self.is_JudgeRL = True self.isRightStart = False self.isLeftStart = False self.zhuangjitime = 0 # 调试操作 self.status_LUP = [0] * 8 self.status_LDOWN = [0] * 8 self.isActionStartUP = False self.isActionStartDOWN = False self.x1UP, self.y1UP, self.x2UP, self.y2UP = [0, 0, 0, 0] self.X1DOWN, self.Y1DOWN, self.X2DOWN, self.Y2DOWN = [0, 0, 0, 0] # 定时投入文字 self.putTextStart_time = None self.putTextEnd_time_left = None self.putTextEnd_time_right = None self.putTextEnd_time_up = None self.putTextEnd_time_down = None
def main(experiment_name, optimizer, output_directory_root="experiments/resnet18_logistic_cifar10", epochs=60, batch_size=512, num_workers=1): output_directory = os.path.join(output_directory_root, experiment_name) if not os.path.isdir(output_directory): os.makedirs(output_directory, exist_ok=True) # Setup regular log file + tensorboard logfile_path = os.path.join(output_directory, "logfile.txt") setup_logger_tqdm(logfile_path) tensorboard_log_directory = os.path.join("runs", "resnet18_logistic_cifar10", experiment_name) tensorboard_summary_writer = SummaryWriter( log_dir=tensorboard_log_directory) # Choose Training Device use_cuda = torch.cuda.is_available() logger.info(f"CUDA Available? {use_cuda}") device = "cuda" if use_cuda else "cpu" # Datasets and Loaders train_set_loader, test_set_loader = get_data_loaders( batch_size, num_workers) # Create Model & Optimizer model = torchvision.models.resnet18(pretrained=True) for param in model.parameters(): param.requires_grad = False num_classes = 10 model.fc = nn.Linear(model.fc.in_features, 10) model.to(device) optimizer = optimizer(model.parameters()) logger.info("=========== Commencing Training ===========") logger.info(f"Epoch Count: {epochs}") logger.info(f"Batch Size: {batch_size}") # Load Checkpoint checkpoint_file_path = os.path.join(output_directory, "checkpoint.pth") start_epoch = 0 if os.path.exists(checkpoint_file_path): logger.info("Checkpoint Found - Loading!") checkpoint = torch.load(checkpoint_file_path) logger.info(f"Last completed epoch: {checkpoint['epoch']}") logger.info(f"Average Train Loss: {checkpoint['train_loss']}") logger.info(f"Top-1 Train Accuracy: {checkpoint['train_accuracy']}") logger.info(f"Top-1 Test Accuracy: {checkpoint['test_accuracy']}") start_epoch = checkpoint["epoch"] + 1 logger.info(f"Resuming at epoch {start_epoch}") model.load_state_dict(checkpoint["model_state_dict"]) optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) else: logger.info("No checkpoint found, starting from scratch.") # Training Loop t = Timer() for epoch in range(start_epoch, epochs): t.start() logger.info("-" * 10) logger.info(f"Epoch {epoch}") logger.info("-" * 10) train_loss, train_accuracy = train_model(device, model, train_set_loader, optimizer) tensorboard_summary_writer.add_scalar("train_loss", train_loss, epoch) tensorboard_summary_writer.add_scalar("train_accuracy", train_accuracy, epoch) test_accuracy = test_model(device, model, test_set_loader, optimizer) tensorboard_summary_writer.add_scalar("test_accuracy", test_accuracy, epoch) # Save Checkpoint logger.info("Saving checkpoint.") torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'train_loss': train_loss, 'train_accuracy': train_accuracy, 'test_accuracy': test_accuracy }, checkpoint_file_path) elapsed_time = t.stop() logger.info(f"End of epoch {epoch}, took {elapsed_time:0.4f} seconds.") logger.info(f"Average Train Loss: {train_loss}") logger.info(f"Top-1 Train Accuracy: {train_accuracy}") logger.info(f"Top-1 Test Accuracy: {test_accuracy}") logger.info("")
def main(device, mp_args, dataloader_func, model, optimizer_callback, output_directory, tensorboard_log_directory, epochs): global_rank = mp_args.nr * mp_args.gpus + device dist.init_process_group(backend='nccl', init_method='env://', world_size=mp_args.world_size, rank=global_rank) output_directory = os.path.join(output_directory, f"rank_{global_rank}") if not os.path.isdir(output_directory): os.makedirs(output_directory, exist_ok=True) # Setup regular log file logfile_path = os.path.join(output_directory, "logfile.txt") setup_logger_tqdm(logfile_path) # Setup TensorBoard logging tensorboard_log_directory = os.path.join(tensorboard_log_directory, f"rank_{global_rank}") tensorboard_summary_writer = SummaryWriter( log_dir=tensorboard_log_directory) # Dataloaders train_set_loader, test_set_loader = dataloader_func( mp_args.world_size, global_rank) # Model & Optimizer model.to(device) optimizer = optimizer_callback(model) model = nn.parallel.DistributedDataParallel(model, device_ids=[device]) logger.info(f"Epoch Count: {epochs}") # Load Checkpoint checkpoint_file_path = os.path.join(output_directory, "checkpoint.pth") start_epoch = 0 if os.path.exists(checkpoint_file_path): logger.info("Checkpoint Found - Loading!") checkpoint = torch.load(checkpoint_file_path) logger.info(f"Last completed epoch: {checkpoint['epoch']}") logger.info(f"Average Train Loss: {checkpoint['train_loss']}") logger.info(f"Top-1 Train Accuracy: {checkpoint['train_accuracy']}") logger.info(f"Top-1 Test Accuracy: {checkpoint['test_accuracy']}") start_epoch = checkpoint["epoch"] + 1 logger.info(f"Resuming at epoch {start_epoch}") model.load_state_dict(checkpoint["model_state_dict"]) optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) else: logger.info("No checkpoint found, starting from scratch.") # Training Loop t = Timer() #progress = tqdm(total=epochs, initial=start_epoch, desc="Epochs") for epoch in range(start_epoch, epochs): t.start() logger.info(f"Commence EPOCH {epoch}") # Train train_loss, train_accuracy = train_model(device, model, train_set_loader, optimizer) tensorboard_summary_writer.add_scalar("train_loss", train_loss, epoch) tensorboard_summary_writer.add_scalar("train_accuracy", train_accuracy, epoch) # Test test_accuracy = test_model(device, model, test_set_loader) tensorboard_summary_writer.add_scalar("test_accuracy", test_accuracy, epoch) # Save Checkpoint logger.info("Saving checkpoint.") torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'train_loss': train_loss, 'train_accuracy': train_accuracy, 'test_accuracy': test_accuracy }, checkpoint_file_path) elapsed_time = t.stop() logger.info(f"End of epoch {epoch}, took {elapsed_time:0.4f} seconds.") logger.info(f"Average Train Loss: {train_loss}") logger.info(f"Top-1 Train Accuracy: {train_accuracy}") logger.info(f"Top-1 Test Accuracy: {test_accuracy}")
def inference(model, current_epoch, current_iter, local_rank, data_loader, dataset_name, device="cuda", max_instance=3200, mute=False, verbose_return=False): model.train(False) # convert to a torch.device for efficiency device = torch.device(device) if not mute: logger = logging.getLogger("maskrcnn_benchmark.inference") logger.info("Start evaluation") total_timer = Timer() total_timer.tic() torch.cuda.empty_cache() if not mute: pbar = tqdm(total=len(data_loader), desc="Validation in progress") def to_list(tensor): return tensor.cpu().numpy().tolist() with torch.no_grad(): all_pred_obj, all_truth_obj, all_pred_attr, all_truth_attr = [], [], [], [] all_image_ids, all_boxes = [], [] all_pred_attr_prob = [] all_raws = [] obj_loss_all, attr_loss_all = 0, 0 cnt = 0 for iteration, out_dict in enumerate(data_loader): if type(max_instance) is int: if iteration == max_instance // model.cfg.EXTERNAL.BATCH_SIZE: break if type(max_instance) is float: if iteration > max_instance * len( data_loader) // model.cfg.EXTERNAL.BATCH_SIZE: break # print(iteration) if verbose_return: all_image_ids.extend(out_dict['image_ids']) all_boxes.extend(out_dict['gt_bboxes']) all_raws.extend(out_dict['raw']) ret_dict = inference_step(model, out_dict, device) loss_attr, loss_obj, attr_score, obj_score = ret_dict.get('attr_loss', None), \ ret_dict.get('obj_loss', None), \ ret_dict.get('attr_score', None), \ ret_dict.get('obj_score', None) if loss_attr is not None: attr_loss_all += loss_attr.item() pred_attr_prob, pred_attr = ret_dict[ 'pred_attr_prob'], ret_dict['pred_attr'] all_pred_attr.extend(to_list(pred_attr)) all_truth_attr.extend(to_list(ret_dict['attr_labels'])) all_pred_attr_prob.extend(to_list(pred_attr_prob)) if loss_obj is not None: obj_loss_all += loss_obj.item() _, pred_obj = obj_score.max(-1) all_pred_obj.extend(to_list(pred_obj)) all_truth_obj.extend(to_list(ret_dict['obj_labels'])) cnt += 1 if not mute: pbar.update(1) obj_f1 = f1_score(all_truth_obj, all_pred_obj, average='micro') attr_f1 = f1_score(all_truth_attr, all_pred_attr, average='micro') obj_loss_all /= (cnt + 1e-10) attr_loss_all /= (cnt + 1e-10) if not mute: logger.info( 'Epoch: {}\tIteration: {}\tObject f1: {}\tAttr f1:{}\tObject loss:{}\tAttr loss:{}' .format(current_epoch, current_iter, obj_f1, attr_f1, obj_loss_all, attr_loss_all)) #compute_on_dataset(model, data_loader, local_rank, device, inference_timer, output_file) # wait for all processes to complete before measuring the time total_time = total_timer.toc() model.train(True) if not verbose_return: return obj_f1, attr_f1, len(all_truth_attr) else: return obj_f1, attr_f1, all_pred_attr, all_truth_attr, all_pred_obj, all_truth_obj, all_image_ids, all_boxes, \ all_pred_attr_prob, all_raws
def inference_mean_exemplar( model, current_epoch, current_iter, local_rank, data_loader, dataset_name, device="cuda", max_instance=3200, mute=False, ): model.train(False) # convert to a torch.device for efficiency device = torch.device(device) if not mute: logger = logging.getLogger("maskrcnn_benchmark.inference") logger.info("Start evaluation") total_timer = Timer() inference_timer = Timer() total_timer.tic() torch.cuda.empty_cache() if not mute: pbar = tqdm(total=len(data_loader), desc="Validation in progress") with torch.no_grad(): all_pred_obj, all_truth_obj, all_pred_attr, all_truth_attr = [], [], [], [] obj_loss_all, attr_loss_all = 0, 0 cnt = 0 for iteration, out_dict in enumerate(data_loader): if type(max_instance) is int: if iteration == max_instance // model.cfg.EXTERNAL.BATCH_SIZE: break if type(max_instance) is float: if iteration > max_instance * len( data_loader) // model.cfg.EXTERNAL.BATCH_SIZE: break # print(iteration) images = torch.stack(out_dict['images']) obj_labels = torch.cat(out_dict['object_labels'], -1) attr_labels = torch.cat(out_dict['attribute_labels'], -1) cropped_image = torch.stack(out_dict['cropped_image']) images = images.to(device) obj_labels = obj_labels.to(device) attr_labels = attr_labels.to(device) cropped_image = cropped_image.to(device) # loss_dict = model(images, targets) pred_obj = model.mean_of_exemplar_classify(cropped_image) all_pred_obj.extend(to_list(pred_obj)) all_truth_obj.extend(to_list(obj_labels)) cnt += 1 if not mute: pbar.update(1) obj_f1 = f1_score(all_truth_obj, all_pred_obj, average='micro') #attr_f1 = f1_score(all_truth_attr, all_pred_attr, average='micro') obj_loss_all /= (cnt + 1e-10) # wait for all processes to complete before measuring the time total_time = total_timer.toc() model.train(True) return obj_f1, 0, len(all_truth_obj)
def pretext_train(args, recorder): if args.gpus is not None: print("Use GPU: {} for pretext training".format(args.gpus)) num_class, data_length, image_tmpl = pt_data_config(args) # print("tp_length is: ", data_length) train_transforms, test_transforms, eval_transforms = pt_augmentation_config( args) train_loader, val_loader, eval_loader, train_samples, val_samples, eval_samples = pt_data_loader_init( args, data_length, image_tmpl, train_transforms, test_transforms, eval_transforms) n_data = len(train_loader) model, model_ema = pt_model_config(args, num_class) # == optim config== contrast, criterion, optimizer = pt_optim_init(args, model, n_data) model = model.cuda() # == load weights == model, model_ema = pt_load_weight(args, model, model_ema, optimizer, contrast) if args.pt_method in ['dsm', 'moco']: model_ema = model_ema.cuda() # copy weights from `model' to `model_ema' moment_update(model, model_ema, 0) cudnn.benchmark = True # optionally resume from a checkpoint args.start_epoch = 1 # ==================================== our data augmentation method================================= if args.pt_method in ['dsm', 'dsm_triplet']: pos_aug = GenPositive() neg_aug = GenNegative() # =======================================add message ===================== recorder.record_message('a', '=' * 100) recorder.record_message('a', '-' * 40 + 'pretrain' + '-' * 40) recorder.record_message('a', '=' * 100) # ====================update lr_decay from str to numpy========= iterations = args.pt_lr_decay_epochs.split(',') args.pt_lr_decay_epochs = list([]) for it in iterations: args.pt_lr_decay_epochs.append(int(it)) timer = Timer() # routine print('*' * 70 + 'Step1: pretrain' + '*' * 20 + '*' * 50) for epoch in range(args.pt_start_epoch, args.pt_epochs + 1): timer.tic() pt_adjust_learning_rate(epoch, args, optimizer) print("==> training...") time1 = time.time() if args.pt_method == "moco": loss, prob = train_moco(epoch, train_loader, model, model_ema, contrast, criterion, optimizer, args, recorder) elif args.pt_method == "dsm": loss, prob = train_dsm(epoch, train_loader, model, model_ema, contrast, criterion, optimizer, args, pos_aug, neg_aug, recorder) # loss, prob = epoch * 0.01, 0.02*epoch elif args.pt_method == "dsm_triplet": loss = train_dsm_triplet(epoch, train_loader, model, optimizer, args, pos_aug, neg_aug, recorder) else: Exception("Not support method now!") recorder.record_pt_train(loss) time2 = time.time() print('epoch {}, total time {:.2f}'.format(epoch, time2 - time1)) timer.toc() left_time = timer.average_time * (args.pt_epochs - epoch) message = "Step1: pretrain now loss is: {} left time is : {} now is: {}".format( loss, timer.format(left_time), datetime.now()) print(message) recorder.record_message('a', message) state = { 'opt': args, 'model': model.state_dict(), 'contrast': contrast.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': epoch, } recorder.save_pt_model(args, state, epoch) print("finished pretrain, the trained model is record in: {}".format( recorder.pt_checkpoint)) return recorder.pt_checkpoint
def train_one_epoch(self, model, dataloader, optimizer, scheduler, num_epochs, max_grad_norm=None, debugging=False): """Train the model for one epoch.""" model.train() timer = Timer() print( ("{:25}" + "|" + "{:^15}" * (3 + len(self.early_stopping_metrics)) + "|").format( "", "l1_loss", "l2_loss", "l3_loss", *self.early_stopping_metrics) ) total = 10 if debugging else len(dataloader) with tqdm(dataloader, total=total) as t: if num_epochs is not None: description = f"Training ({self.epoch}/{num_epochs})" else: description = "Training" t.set_description(description) for i, data in enumerate(t): timer.start() data = to_device(data, self.device) optimizer.zero_grad() # Forward output = model(**data) losses = output["losses"] # Calculate batch metrics metric = compute_metrics_from_inputs_and_outputs( inputs=data, outputs=output, tokenizer=self.tokenizer, save_csv_path=None) losses.update(metric) # Update tqdm with training information to_tqdm = [] # update tqdm for loss_type in ["l1_cls_loss", "l2_cls_loss", "l3_cls_loss", *self.early_stopping_metrics]: loss_n = losses[loss_type] if isinstance(loss_n, torch.Tensor) and torch.isnan(loss_n): to_tqdm.append("nan") else: to_tqdm.append(f"{loss_n.item():.3f}") des = ( "{:25}" + "|" + "{:^15}" * (3 + len(self.early_stopping_metrics)) + "|" ).format(description, *to_tqdm) t.set_description(des) # Backward losses["total_loss"].backward() if max_grad_norm is not None: torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_grad_norm) optimizer.step() if scheduler is not None: scheduler.step() timer.end() # Break when reaching 10 iterations when debugging if debugging and i == 9: break logger.info(f"{description} took {timer.get_total_time():.2f}s.") return
# Start training iterations = trainer.resume(checkpoint_directory, hyperparameters=config) if opts.resume else 0 while True: for it, (images_target_a, images_target_b) in enumerate(zip(train_loader_a, train_loader_b)): trainer.update_learning_rate() images_a = images_target_a[0].cuda().detach() target_a = images_target_a[1].cuda().detach() images_b = images_target_b[0].cuda().detach() target_b = images_target_b[1].cuda().detach() ids_b = images_target_b[2] # Main training code with Timer("Elapsed time in update: %f"): trainer.dis_update(images_a, images_b, config) trainer.gen_update(images_a, images_b, config, target_a, iterations) if iterations >= config['train_seg_iters']: trainer.seg_update(images_a, images_b, target_a, target_b) # Dump training stats in log file if (iterations) % config['log_iter'] == 0: print('Iteration: %08d/%08d' % (iterations, max_iter)) losses = write_loss(iterations, trainer, train_writer) # Write images if (iterations) % config['image_save_iter'] == 0: with torch.no_grad():