def __init__(self, model_path, gpu_id=0): from model import get_model, get_loss, get_converter from data_loader import get_dataloader from metric import get_metric self.gpu_id = gpu_id if self.gpu_id is not None and isinstance( self.gpu_id, int) and torch.cuda.is_available(): self.device = torch.device("cuda:%s" % self.gpu_id) else: self.device = torch.device("cpu") print('device:', self.device) checkpoint = torch.load(model_path, map_location=self.device) config = checkpoint['config'] self.config = config self.model = get_model(config['arch']) # config['converter']['args']['character'] = 'license_plate' self.converter = get_converter(config['converter']) # self.post_process = get_post_processing(config['post_processing']) self.img_mode = config['dataset']['train']['dataset']['args'][ 'img_mode'] self.model.load_state_dict(checkpoint['state_dict']) self.model.to(self.device) self.model.eval() self.metric = get_metric(config['metric']) # config['dataset']['validate']['loader']['num_workers'] = 8 # config['dataset']['validate']['dataset']['args']['pre_processes'] = [{'type': 'CropWordBox', 'args': [1, 1.2]}] if args.img_path is not None: config['dataset']['validate']['dataset']['args']['data_path'] = [ args.img_path ] self.validate_loader = get_dataloader(config['dataset']['validate'], config['distributed'])
def eval_fn(hparams): """Inference function.""" hparams.tgt_sos_id, hparams.tgt_eos_id = _get_tgt_sos_eos_id(hparams) model_fn = make_model_fn(hparams) eval_runner = create_eval_runner_and_build_graph(hparams, model_fn) predictions = list(eval_runner.predict()) checkpoint_path = tf.train.latest_checkpoint(hparams.out_dir) current_step = int(os.path.basename(checkpoint_path).split("-")[1]) return metric.get_metric(hparams, predictions, current_step)
def add_task(): r = metric.get_redis() pos = 1 end = 28010000 #end = 100000 limit = 10000 while pos <= end: cnt = int(r.scard(task_key)) if cnt < limit: print 'add tasks', pos pipeline = r.pipeline() for i in xrange(pos, pos + limit): exits = metric.get_metric( i, 'answer') or metric.get_metric(i) or metric.get_metric( i, '404') if not exits: pipeline.sadd(task_key, i) pipeline.execute() pos += limit time.sleep(0.3)
def main(config): import torch from model import get_model, get_loss, get_converter, get_post_processing from metric import get_metric from data_loader import get_dataloader from tools.rec_trainer import RecTrainer as rec from tools.det_trainer import DetTrainer as det if torch.cuda.device_count() > 1: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group( backend="nccl", init_method="env://", world_size=torch.cuda.device_count(), rank=args.local_rank) config['distributed'] = True else: config['distributed'] = False config['local_rank'] = args.local_rank train_loader = get_dataloader(config['dataset']['train'], config['distributed']) assert train_loader is not None if 'validate' in config['dataset']: validate_loader = get_dataloader(config['dataset']['validate'], False) else: validate_loader = None criterion = get_loss(config['loss']).cuda() if config.get('post_processing', None): post_p = get_post_processing(config['post_processing']) else: post_p = None metric = get_metric(config['metric']) if config['arch']['algorithm'] == 'rec': converter = get_converter(config['converter']) config['arch']['num_class'] = len(converter.character) model = get_model(config['arch']) else: converter = None model = get_model(config['arch']) trainer = eval(config['arch']['algorithm'])( config=config, model=model, criterion=criterion, train_loader=train_loader, post_process=post_p, metric=metric, validate_loader=validate_loader, converter=converter) trainer.train()
def __init__(self, model_path, gpu_id=0): from model import get_model, get_post_processing from data_loader import get_dataloader from metric import get_metric self.device = torch.device("cuda:%s" % gpu_id) if gpu_id is not None: torch.backends.cudnn.benchmark = True checkpoint = torch.load(model_path, map_location=torch.device('cpu')) config = checkpoint['config'] config['arch']['args']['pretrained'] = False if args.img_path is not None: config['dataset']['validate']['dataset']['args']['data_path'] = [args.img_path] self.validate_loader = get_dataloader(config['dataset']['validate']) self.model = get_model(config['arch']) self.model.load_state_dict(checkpoint['state_dict']) self.model.to(self.device) self.post_process = get_post_processing(config['post_processing']) self.metric_cls = get_metric(config['metric'])
def fit(self, X, y, validation_data=(None, None), early_stopping_rounds=np.inf, maximize=True, eval_metric=None, loss="logisticloss", eta=0.3, num_boost_round=1000, max_depth=6, scale_pos_weight=1, subsample=0.8, colsample_bytree=0.8, colsample_bylevel=0.8, min_child_weight=1, min_sample_split=10, reg_lambda=1.0, gamma=0, num_thread=-1): """ :param X: pandas.core.frame.DataFrame :param y: pandas.core.series.Series :param eta: learning rate :param num_boost_round: number of boosting round :param max_depth: max depth of each tree :param subsample: row sample rate when building a tree :param colsample_bytree: column sample rate when building a tree :param colsample_bylevel: column sample rate when spliting each tree node, the number of features = total_features*colsample_bytree*colsample_bylevel :param min_sample_split: min number of samples in a leaf node :param loss: loss object logisticloss,squareloss, or customize loss :param reg_lambda: lambda :param gamma: gamma :param seed: random seed :param num_thread: number of threself.tree_predict_Xad to parallel :param eval_metric: evaluation metric, provided: "accuracy" """ self.eta = eta self.num_boost_round = num_boost_round self.max_depth = max_depth self.subsample = subsample self.colsample_bytree = colsample_bytree self.colsample_bylevel = colsample_bylevel self.reg_lambda = reg_lambda self.gamma = gamma self.min_sample_split = min_sample_split self.num_thread = num_thread self.eval_metric = eval_metric self.min_child_weight = min_child_weight self.scale_pos_weight = scale_pos_weight self.first_round_pred = 0.0 X.reset_index(drop=True, inplace=True) y.reset_index(drop=True, inplace=True) # initial loss function if loss == "logisticloss": self.loss = LogisticLoss(reg_lambda) elif loss == "squareloss": self.loss = SquareLoss(reg_lambda) self.first_round_pred = y.mean() else: try: self.loss = CustomizeLoss(loss, reg_lambda) except: raise NotImplementedError( "loss should be 'logisticloss','squareloss', or customize loss function" ) # to evaluate on validation set and conduct early stopping # we should get (val_X,val_y) # and set some variable to check when to stop do_validation = True if not isinstance(validation_data, tuple): raise TypeError("validation_data should be (val_X, val_y)") val_X, val_y = validation_data if val_X is None or val_y is None: do_validation = False else: # type check if not isinstance(val_X, pd.core.frame.DataFrame): raise TypeError("val_X should be 'pd.core.frame.DataFrame'") if not isinstance(val_y, pd.core.series.Series): raise TypeError("val_X should be 'pd.core.series.Series'") val_X.reset_index(drop=True, inplace=True) val_y.reset_index(drop=True, inplace=True) val_Y = pd.DataFrame(val_y.values, columns=['label']) val_Y['y_pred'] = self.first_round_pred if maximize: best_val_metric = -np.inf best_round = 0 become_worse_round = 0 else: best_val_metric = np.inf best_round = 0 become_worse_round = 0 # Y stores: label, y_pred, grad, hess, sample_weight Y = pd.DataFrame(y.values, columns=['label']) Y['y_pred'] = self.first_round_pred Y['grad'] = self.loss.grad(Y.y_pred.values, Y.label.values) Y['hess'] = self.loss.hess(Y.y_pred.values, Y.label.values) Y['sample_weight'] = 1.0 Y.loc[Y.label == 1, 'sample_weight'] = self.scale_pos_weight for i in range(self.num_boost_round): # weighted grad and hess Y.grad = Y.grad * Y.sample_weight Y.hess = Y.hess * Y.sample_weight # row and column sample before training the current tree data = X.sample(frac=self.colsample_bytree, axis=1) data = pd.concat([data, Y], axis=1) data = data.sample(frac=self.subsample, axis=0) Y_selected = data[['label', 'y_pred', 'grad', 'hess']] X_selected = data.drop( ['label', 'y_pred', 'grad', 'hess', 'sample_weight'], axis=1) # train current tree tree = Tree() tree.fit(X_selected, Y_selected, max_depth=self.max_depth, min_child_weight=self.min_child_weight, colsample_bylevel=self.colsample_bylevel, min_sample_split=self.min_sample_split, reg_lambda=self.reg_lambda, gamma=self.gamma, num_thread=self.num_thread) # predict the whole trainset and update y_pred,grad,hess preds = tree.predict(X) Y['y_pred'] += self.eta * preds Y['grad'] = self.loss.grad(Y.y_pred.values, Y.label.values) Y['hess'] = self.loss.hess(Y.y_pred.values, Y.label.values) # update feature importance for k in tree.feature_importance.iterkeys(): self.feature_importance[k] += tree.feature_importance[k] self.trees.append(tree) # print training information if self.eval_metric is None: print "TGBoost round {iteration}".format(iteration=i) else: try: mertric_func = get_metric(self.eval_metric) except: raise NotImplementedError( "The given eval_metric is not provided") train_metric = mertric_func( self.loss.transform(Y.y_pred.values), Y.label.values) if not do_validation: print "TGBoost round {iteration}, train-{eval_metric} is {train_metric}".format( iteration=i, eval_metric=self.eval_metric, train_metric=train_metric) else: val_Y['y_pred'] += self.eta * tree.predict(val_X) val_metric = mertric_func( self.loss.transform(val_Y.y_pred.values), val_Y.label.values) print "TGBoost round {iteration}, train-{eval_metric} is {train_metric}, val-{eval_metric} is {val_metric}".format( iteration=i, eval_metric=self.eval_metric, train_metric=train_metric, val_metric=val_metric) # check if to early stop if maximize: if val_metric > best_val_metric: best_val_metric = val_metric best_round = i become_worse_round = 0 else: become_worse_round += 1 if become_worse_round > early_stopping_rounds: print "TGBoost training Stop, best round is {best_round}, best {eval_metric} is {best_val_metric}".format( best_round=best_round, eval_metric=eval_metric, best_val_metric=best_val_metric) break else: if val_metric < best_val_metric: best_val_metric = val_metric best_round = i become_worse_round = 0 else: become_worse_round += 1 if become_worse_round > early_stopping_rounds: print "TGBoost training Stop, best round is {best_round}, best val-{eval_metric} is {best_val_metric}".format( best_round=best_round, eval_metric=eval_metric, best_val_metric=best_val_metric) break
def fit(self, train_data, validation_data, early_stopping_rounds=np.inf, eval_metric=None, loss="logisticloss", eta=0.3, num_round=1000, max_depth=6, pool_size=1, min_instances_byleaf=1, scale_pos_weight=1, subsample=0.8, colsample_bytree=0.8, min_child_weight=1, reg_lambda=1.0, gamma=0): """ :param train_data: Data object, train data :param validation_data: Data object, validation data :param eta: learning rate :param num_round: number of boosting round :param max_depth: max depth of each tree :param pool_size: the num of processes :param subsample: row sample rate when building a tree :param colsample_bytree: column sample rate when building a tree :param min_instances_byleaf: min number of samples in a leaf node :param loss: loss object logisticloss,squareloss :param reg_lambda: lambda :param gamma: gamma :param eval_metric: evaluation metric, provided: "accuracy" """ self.eta = eta self.num_round = num_round self.max_depth = max_depth self.pool_size = pool_size self.subsample = subsample self.colsample_bytree = colsample_bytree self.reg_lambda = reg_lambda self.gamma = gamma self.min_instances_byleaf = min_instances_byleaf self.eval_metric = eval_metric self.min_child_weight = min_child_weight self.scale_pos_weight = scale_pos_weight self.first_round_pred = 0.0 # initial loss function if loss == "logisticloss": self.loss = LogisticLoss(self.reg_lambda) elif loss == "squareloss": self.loss = SquareLoss(self.reg_lambda) self.first_round_pred = train_data.getLabelMean() else: raise NotImplementedError( "loss should be 'logisticloss' or 'squareloss'") # to evaluate on validation set and conduct early stopping do_validation = True valData = validation_data.getData() if not valData: raise ValueError("validation_data is empty !") valIdxList = [] #save an fixed order valLabels = [] for idx in valData: valData[idx][ 'yPred'] = self.first_round_pred #init it with traindata valIdxList.append(idx) valLabels.append(valData[idx]['label']) best_val_metric = np.inf best_round = 0 become_worse_round = 0 data = train_data.getData() if not train_data: raise ValueError("train_data is empty !") idxList = [] #save an fixed order labels = [] for idx in data: data[idx]['yPred'] = self.first_round_pred data[idx]['grad'] = self.loss.grad(data[idx]['grad'], data[idx]['label']) data[idx]['hess'] = self.loss.hess(data[idx]['hess'], data[idx]['label']) if data[idx]['label'] == 1.0: data[idx]['weight'] = self.scale_pos_weight idxList.append(idx) labels.append(data[idx]['label']) labels = np.array(labels) for i in range(self.num_round): # weighted grad and hess for idx in data: data[idx]['grad'] = data[idx]['grad'] * data[idx]['weight'] data[idx]['hess'] = data[idx]['hess'] * data[idx]['weight'] # row and column sample before training the current tree factors = train_data.getFactors() factorTypes = train_data.getFeatureTypes() sampledFactors = random.sample( factors, int(len(factors) * self.colsample_bytree)) sampledData = {} for idx in random.sample(idxList, int(len(idxList) * self.subsample)): sampledData.update({idx: data[idx]}) # train current tree tree = Tree() tree.fit(sampledData, sampledFactors, factorTypes, max_depth=self.max_depth, pool_size=self.pool_size, min_child_weight=self.min_child_weight, min_instances_byleaf=self.min_instances_byleaf, reg_lambda=self.reg_lambda, gamma=self.gamma) # predict the whole trainset and update y_pred,grad,hess preds = tree.predict(sampledData) for idx in sampledData: data[idx]['yPred'] += self.eta * preds[idx] data[idx]['grad'] = self.loss.grad(data[idx]["yPred"], data[idx]["label"]) data[idx]['hess'] = self.loss.hess(data[idx]["yPred"], data[idx]["label"]) # update feature importance for k in tree.feature_importance.iterkeys(): self.feature_importance[k] += tree.feature_importance[k] self.trees.append(tree) # print training information if self.eval_metric is None: print "Apollo round {iteration}".format(iteration=i) else: try: mertric_func = get_metric(self.eval_metric) except: raise NotImplementedError( "The given eval_metric is not provided") curPreds = np.array([data[idx]["yPred"] for idx in idxList]) train_metric = mertric_func(self.loss.transform(curPreds), labels) if not do_validation: print "Apollo round {iteration}, train-{eval_metric} is {train_metric}".format( iteration=i, eval_metric=self.eval_metric, train_metric=train_metric) else: valPreds = tree.predict(valData) for idx in valData: valData[idx]['yPred'] += self.eta * valPreds[idx] curValPreds = [valData[idx]['yPred'] for idx in valIdxList] assert len(curValPreds) == len(valLabels) val_metric = mertric_func( self.loss.transform(np.array(curValPreds)), np.array(valLabels)) print "Apollo round {iteration}, train-{eval_metric} is {train_metric}, val-{eval_metric} is {val_metric}".format( iteration=i, eval_metric=self.eval_metric, train_metric=train_metric, val_metric=val_metric) # check if to early stop if val_metric < best_val_metric: best_val_metric = val_metric best_round = i become_worse_round = 0 else: become_worse_round += 1 if become_worse_round > early_stopping_rounds: print "Apollo training Stop, best round is {best_round}, best val-{eval_metric} is {best_val_metric}".format( best_round=best_round, eval_metric=eval_metric, best_val_metric=best_val_metric) break
def evaluate(config): model = get_model(config) weight_file = 'experiment/' + config['dir'] + '/' + config['weights'] model.load_state_dict(torch.load(weight_file)['model']) model.eval() n_kpoints = 21 is_real = bool(config['is_real']) cuda = config['cuda'] path_dir = 'experiment/' + config['dir'] + '/' path_val_file = path_dir + config['dataset'] + '_valuation.json' dataloader = get_dataloader(config, scope='val') roc_auc_2d = get_metric('roc_auc_2d') roc_auc_3d = get_metric('roc_auc_3d') keypoint_error = get_metric('keypoint_error') area_2d = 0 error_keypoint_2d = np.zeros(n_kpoints) error_keypoint_total_2d = 0 area_3d = 0 error_keypoint_3d = np.zeros(n_kpoints) error_keypoint_total_3d = 0 present_counter = np.zeros(n_kpoints) thres_range = np.arange(0, 1, 0.01) acc_range_2d = np.zeros(len(thres_range)) acc_range_3d = np.zeros(len(thres_range)) for num, sample in enumerate(dataloader, 1): sample['image'] = sample['image'].cuda(cuda) output = model(sample) if is_real: batch_avg_keypoint_error_2d = keypoint_error( sample, output, 'vector_2d', True) error_keypoint_2d += batch_avg_keypoint_error_2d['keypoint_error'] present_counter += batch_avg_keypoint_error_2d['present_counter'] else: batch_avg_metric_2d = roc_auc_2d(sample, output) area_2d += batch_avg_metric_2d['area'] acc_range_2d += batch_avg_metric_2d['acc_range'] batch_avg_keypoint_error_2d = keypoint_error( sample, output, 'vector_2d', False) error_keypoint_2d += batch_avg_keypoint_error_2d['keypoint_error'] error_keypoint_total_2d += batch_avg_keypoint_error_2d[ 'total_error'] batch_avg_metric_3d = roc_auc_3d(sample, output) area_3d += batch_avg_metric_3d['area'] acc_range_3d += batch_avg_metric_3d['acc_range'] batch_avg_keypoint_error_3d = keypoint_error( sample, output, 'vector_3d', False) error_keypoint_3d += batch_avg_keypoint_error_3d['keypoint_error'] error_keypoint_total_3d += batch_avg_keypoint_error_3d[ 'total_error'] if num % 50 == 0: print('Evaluation done for {} batches'.format(num)) if is_real: error_keypoint_2d_avg = np.array([-1.] * 21) for i in range(21): if not present_counter[i] == 0: error_keypoint_2d_avg[ i] = error_keypoint_2d[i] / present_counter[i] error_total_2d_avg = np.sum(error_keypoint_2d) / np.sum( present_counter) val_dict = { 'error_keypoint_2d': list(error_keypoint_2d_avg), 'error_total_2d': error_total_2d_avg } else: val_dict = { 'thres_range': list(thres_range), 'area_2d': area_2d / num, 'acc_range_2d': list(acc_range_2d / num), 'area_3d': area_3d / num, 'acc_range_3d': list(acc_range_3d / num), 'error_keypoint_2d': list(error_keypoint_2d / num), 'error_total_2d': error_keypoint_total_2d / num, 'error_keypoint_3d': list(error_keypoint_3d / num), 'error_total_3d': error_keypoint_total_3d / num } # saving data with open(path_val_file, 'w') as fp: json.dump(val_dict, fp)
def test(args): # Prepare dataset data = get_data(args) data_test = data(args, 'test') loader_test = DataLoader(dataset=data_test, batch_size=1, shuffle=False, num_workers=args.num_threads) # Network model = get_model(args) net = model(args) net.cuda() if args.pretrain is not None: assert os.path.exists(args.pretrain), \ "file not found: {}".format(args.pretrain) checkpoint = torch.load(args.pretrain) key_m, key_u = net.load_state_dict(checkpoint['net'], strict=False) if key_u: print('Unexpected keys :') print(key_u) if key_m: print('Missing keys :') print(key_m) raise KeyError net = nn.DataParallel(net) metric = get_metric(args) metric = metric(args) summary = get_summary(args) try: os.makedirs(args.save_dir, exist_ok=True) os.makedirs(args.save_dir + '/test', exist_ok=True) except OSError: pass writer_test = summary(args.save_dir, 'test', args, None, metric.metric_name) net.eval() num_sample = len(loader_test) * loader_test.batch_size pbar = tqdm(total=num_sample) t_total = 0 for batch, sample in enumerate(loader_test): sample = { key: val.cuda() for key, val in sample.items() if val is not None } t0 = time.time() output = net(sample) t1 = time.time() t_total += (t1 - t0) metric_val = metric.evaluate(sample, output, 'train') writer_test.add(None, metric_val) # Save data for analysis if args.save_image: writer_test.save(args.epochs, batch, sample, output) current_time = time.strftime('%y%m%d@%H:%M:%S') error_str = '{} | Test'.format(current_time) pbar.set_description(error_str) pbar.update(loader_test.batch_size) pbar.close() writer_test.update(args.epochs, sample, output) t_avg = t_total / num_sample print('Elapsed time : {} sec, ' 'Average processing time : {} sec'.format(t_total, t_avg))
def fit(self, X, y, eta=0.01, num_boost_round=1000, max_depth=5, rowsample=0.8, colsample_bytree=0.8, colsample_bylevel=0.8, min_sample_split=10, loss="logisticloss", l2_regularization=1.0, gamma=0.1, num_thread=-1, eval_metric=None): """ :param X: pandas.core.frame.DataFrame :param y: pandas.core.series.Series :param eta: learning rate :param num_boost_round: number of boosting round :param max_depth: max depth of each tree :param rowsample: row sample rate when building a tree :param colsample_bytree: column sample rate when building a tree :param colsample_bylevel: column sample rate when spliting each tree node, the number of features = total_features*colsample_bytree*colsample_bylevel :param min_sample_split: min number of samples in a leaf node :param loss: loss object logisticloss,squareloss, or customize loss :param l2_regularization: lambda :param gamma: gamma :param seed: random seed :param num_thread: number of thread to parallel :param eval_metric: evaluation metric, provided: "accuracy" """ self.eta = eta self.num_boost_round = num_boost_round self.max_depth = max_depth self.rowsample = rowsample self.colsample_bytree = colsample_bytree self.colsample_bylevel = colsample_bylevel self.l2_regularization = l2_regularization self.gamma = gamma self.min_sample_split = min_sample_split self.num_thread = num_thread self.eval_metric = eval_metric if loss == "logisticloss": self.loss = LogisticLoss(l2_regularization) elif loss == "squareloss": self.loss = SquareLoss(l2_regularization) else: try: self.loss = CustomizeLoss(loss, l2_regularization) except: raise NotImplementedError( "loss should be 'logisticloss','squareloss', or customize loss function" ) self.first_round_pred = y.mean() # Y stores label, y_pred, grad, hess Y = pd.DataFrame(y.values, columns=['label']) # only one column "label" Y['y_pred'] = self.first_round_pred Y['grad'] = self.loss.grad(Y.y_pred.values, Y.label.values) Y['hess'] = self.loss.hess(Y.y_pred.values, Y.label.values) for i in range(self.num_boost_round): # sample samples and features to train current tree data = X.sample(frac=self.colsample_bytree, axis=1) data = pd.concat([data, Y], axis=1) data = data.sample(frac=self.rowsample, axis=0) Y_selected = data[['label', 'y_pred', 'grad', 'hess']] X_selected = data.drop(['label', 'y_pred', 'grad', 'hess'], axis=1) # train current tree tree = Tree() tree.fit(X_selected, Y_selected, max_depth=self.max_depth, colsample_bylevel=self.colsample_bylevel, min_sample_split=self.min_sample_split, l2_regularization=self.l2_regularization, gamma=self.gamma, num_thread=self.num_thread) # predict the whole dataset and update y_pred,grad,hess preds = tree.predict(X) Y['y_pred'] += self.eta * preds Y['grad'] = self.loss.grad(Y.y_pred.values, Y.label.values) Y['hess'] = self.loss.hess(Y.y_pred.values, Y.label.values) if self.eval_metric is not None: try: mertric_func = get_metric(self.eval_metric) except: raise NotImplementedError( "The given eval_metric is not provided") metric_value = mertric_func( self.loss.transform(Y.y_pred.values), Y.label.values) print "TGBoost round {iteration}, {eval_metric} is {metric_value}".format( iteration=i, eval_metric=self.eval_metric, metric_value=metric_value) else: print "TGBoost round {iteration}" # update feature importance for k in tree.feature_importance.iterkeys(): self.feature_importance[k] += tree.feature_importance[k] self.trees.append(tree)
def train(gpu, args): # Initialize workers # NOTE : the worker with gpu=0 will do logging dist.init_process_group(backend='nccl', init_method='env://', world_size=args.num_gpus, rank=gpu) torch.cuda.set_device(gpu) # Prepare dataset data = get_data(args) data_train = data(args, 'train') data_val = data(args, 'val') sampler_train = DistributedSampler(data_train, num_replicas=args.num_gpus, rank=gpu) sampler_val = DistributedSampler(data_val, num_replicas=args.num_gpus, rank=gpu) batch_size = args.batch_size // args.num_gpus loader_train = DataLoader(dataset=data_train, batch_size=batch_size, shuffle=False, num_workers=args.num_threads, pin_memory=True, sampler=sampler_train, drop_last=True) loader_val = DataLoader(dataset=data_val, batch_size=1, shuffle=False, num_workers=args.num_threads, pin_memory=True, sampler=sampler_val, drop_last=False) # Network model = get_model(args) net = model(args) net.cuda(gpu) if gpu == 0: if args.pretrain is not None: assert os.path.exists(args.pretrain), \ "file not found: {}".format(args.pretrain) checkpoint = torch.load(args.pretrain) net.load_state_dict(checkpoint['net']) print('Load network parameters from : {}'.format(args.pretrain)) # Loss loss = get_loss(args) loss = loss(args) loss.cuda(gpu) # Optimizer optimizer, scheduler = utility.make_optimizer_scheduler(args, net) net = apex.parallel.convert_syncbn_model(net) net, optimizer = amp.initialize(net, optimizer, opt_level=args.opt_level, verbosity=0) if gpu == 0: if args.pretrain is not None: if args.resume: try: optimizer.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint['scheduler']) amp.load_state_dict(checkpoint['amp']) print('Resume optimizer, scheduler and amp ' 'from : {}'.format(args.pretrain)) except KeyError: print('State dicts for resume are not saved. ' 'Use --save_full argument') del checkpoint net = DDP(net) metric = get_metric(args) metric = metric(args) summary = get_summary(args) if gpu == 0: utility.backup_source_code(args.save_dir + '/code') try: os.makedirs(args.save_dir, exist_ok=True) os.makedirs(args.save_dir + '/train', exist_ok=True) os.makedirs(args.save_dir + '/val', exist_ok=True) except OSError: pass if gpu == 0: writer_train = summary(args.save_dir, 'train', args, loss.loss_name, metric.metric_name) writer_val = summary(args.save_dir, 'val', args, loss.loss_name, metric.metric_name) with open(args.save_dir + '/args.json', 'w') as args_json: json.dump(args.__dict__, args_json, indent=4) if args.warm_up: warm_up_cnt = 0.0 warm_up_max_cnt = len(loader_train) + 1.0 for epoch in range(1, args.epochs + 1): # Train net.train() sampler_train.set_epoch(epoch) if gpu == 0: current_time = time.strftime('%y%m%d@%H:%M:%S') list_lr = [] for g in optimizer.param_groups: list_lr.append(g['lr']) print('=== Epoch {:5d} / {:5d} | Lr : {} | {} | {} ==='.format( epoch, args.epochs, list_lr, current_time, args.save_dir)) num_sample = len( loader_train) * loader_train.batch_size * args.num_gpus if gpu == 0: pbar = tqdm(total=num_sample) log_cnt = 0.0 log_loss = 0.0 for batch, sample in enumerate(loader_train): sample = { key: val.cuda(gpu) for key, val in sample.items() if val is not None } if epoch == 1 and args.warm_up: warm_up_cnt += 1 for param_group in optimizer.param_groups: lr_warm_up = param_group['initial_lr'] \ * warm_up_cnt / warm_up_max_cnt param_group['lr'] = lr_warm_up optimizer.zero_grad() output = net(sample) loss_sum, loss_val = loss(sample, output) # Divide by batch size loss_sum = loss_sum / loader_train.batch_size loss_val = loss_val / loader_train.batch_size with amp.scale_loss(loss_sum, optimizer) as scaled_loss: scaled_loss.backward() optimizer.step() if gpu == 0: metric_val = metric.evaluate(sample, output, 'train') writer_train.add(loss_val, metric_val) log_cnt += 1 log_loss += loss_sum.item() current_time = time.strftime('%y%m%d@%H:%M:%S') error_str = '{:<10s}| {} | Loss = {:.4f}'.format( 'Train', current_time, log_loss / log_cnt) if epoch == 1 and args.warm_up: list_lr = [] for g in optimizer.param_groups: list_lr.append(round(g['lr'], 6)) error_str = '{} | Lr Warm Up : {}'.format( error_str, list_lr) pbar.set_description(error_str) pbar.update(loader_train.batch_size * args.num_gpus) if gpu == 0: pbar.close() writer_train.update(epoch, sample, output) if args.save_full or epoch == args.epochs: state = { 'net': net.module.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'amp': amp.state_dict(), 'args': args } else: state = {'net': net.module.state_dict(), 'args': args} torch.save(state, '{}/model_{:05d}.pt'.format(args.save_dir, epoch)) # Val torch.set_grad_enabled(False) net.eval() num_sample = len(loader_val) * loader_val.batch_size * args.num_gpus if gpu == 0: pbar = tqdm(total=num_sample) log_cnt = 0.0 log_loss = 0.0 for batch, sample in enumerate(loader_val): sample = { key: val.cuda(gpu) for key, val in sample.items() if val is not None } output = net(sample) loss_sum, loss_val = loss(sample, output) # Divide by batch size loss_sum = loss_sum / loader_val.batch_size loss_val = loss_val / loader_val.batch_size if gpu == 0: metric_val = metric.evaluate(sample, output, 'train') writer_val.add(loss_val, metric_val) log_cnt += 1 log_loss += loss_sum.item() current_time = time.strftime('%y%m%d@%H:%M:%S') error_str = '{:<10s}| {} | Loss = {:.4f}'.format( 'Val', current_time, log_loss / log_cnt) pbar.set_description(error_str) pbar.update(loader_val.batch_size * args.num_gpus) if gpu == 0: pbar.close() writer_val.update(epoch, sample, output) print('') writer_val.save(epoch, batch, sample, output) torch.set_grad_enabled(True) scheduler.step()
def fit(self, features, label, validation_data=(None, None), early_stopping_rounds=np.inf, maximize=True, eval_metric=None, loss="logisticloss", eta=0.3, num_boost_round=1000, max_depth=6, scale_pos_weight=1, subsample=0.8, colsample=0.8, min_child_weight=1, min_sample_split=10, reg_lambda=1.0, gamma=0, num_thread=-1): """ :param features: np.array :param label: np.array :param eta: learning rate :param num_boost_round: number of boosting round :param max_depth: max depth of each tree :param subsample: row sample rate when building a tree :param colsample: column sample rate when building a tree :param min_sample_split: min number of samples in a leaf node :param loss: loss object logisticloss,squareloss, or customize loss :param reg_lambda: lambda :param gamma: gamma :param num_thread: number of threself.tree_predict_Xad to parallel :param eval_metric: evaluation metric, provided: "accuracy" """ self.eta = eta self.num_boost_round = num_boost_round self.max_depth = max_depth self.subsample = subsample self.colsample = colsample self.reg_lambda = reg_lambda self.gamma = gamma self.min_sample_split = min_sample_split self.num_thread = num_thread self.eval_metric = eval_metric self.min_child_weight = min_child_weight self.scale_pos_weight = scale_pos_weight self.first_round_pred = 0 # initial loss function if loss == "logisticloss": self.loss = LogisticLoss() elif loss == "squareloss": self.loss = SquareLoss() self.first_round_pred = label.mean() else: try: self.loss = CustomizeLoss(loss) except: raise NotImplementedError( "loss should be 'logisticloss','squareloss', or customize loss function" ) # initialize row_sampler, col_sampler, bin_structure, attribute_list, class_list row_sampler = RowSampler(features.shape[0], self.subsample) col_sampler = ColumnSampler(features.shape[1], self.colsample) bin_structure = BinStructure(features) attribute_list = AttributeList(features, bin_structure) class_list = ClassList(label) class_list.initialize_pred(self.first_round_pred) class_list.update_grad_hess(self.loss) # to evaluate on validation set and conduct early stopping # we should get (val_features,val_label) # and set some variable to check when to stop do_validation = True if not isinstance(validation_data, tuple): raise TypeError( "validation_data should be (val_features, val_label)") val_features, val_label = validation_data val_pred = None if val_features is None or val_label is None: do_validation = False else: val_pred = np.ones(val_label.shape) * self.first_round_pred if maximize: best_val_metric = -np.inf best_round = 0 become_worse_round = 0 else: best_val_metric = np.inf best_round = 0 become_worse_round = 0 # start learning logging.info("TGBoost start training") for i in range(self.num_boost_round): t0 = time() # train current tree tree = Tree(self.min_sample_split, self.min_child_weight, self.max_depth, self.colsample, self.subsample, self.reg_lambda, self.gamma, self.num_thread) tree.fit(attribute_list, class_list, row_sampler, col_sampler, bin_structure) # when finish building this tree, update the class_list.pred, grad, hess class_list.update_pred(self.eta) class_list.update_grad_hess(self.loss) # save this tree self.trees.append(tree) t1 = time() # print training information if self.eval_metric is None: logging.info("TGBoost round {iteration}".format(iteration=i)) else: try: mertric_func = get_metric(self.eval_metric) except: raise NotImplementedError( "The given eval_metric is not provided") train_metric = mertric_func( self.loss.transform(class_list.pred), label) if not do_validation: logging.info( "TGBoost round {iteration}, train-{eval_metric}: {train_metric:.4f}, exec time {tc:.3f}s" .format(iteration=i, eval_metric=self.eval_metric, train_metric=train_metric, tc=t1 - t0)) else: val_pred += self.eta * tree.predict(val_features) val_metric = mertric_func(self.loss.transform(val_pred), val_label) logging.info( "TGBoost round {iteration}, train-{eval_metric}: {train_metric:.4f}, val-{eval_metric}: {val_metric:.4f}, exec time {tc:.3f}s" .format(iteration=i, eval_metric=self.eval_metric, train_metric=train_metric, val_metric=val_metric, tc=t1 - t0)) # check whether to early stop if maximize: if val_metric > best_val_metric: best_val_metric = val_metric best_round = i become_worse_round = 0 else: become_worse_round += 1 if become_worse_round > early_stopping_rounds: logging.info( "TGBoost training Stop, best round is {best_round}, best {eval_metric} is {best_val_metric:.4f}" .format(best_round=best_round, eval_metric=eval_metric, best_val_metric=best_val_metric)) break else: if val_metric < best_val_metric: best_val_metric = val_metric best_round = i become_worse_round = 0 else: become_worse_round += 1 if become_worse_round > early_stopping_rounds: logging.info( "TGBoost training Stop, best round is {best_round}, best val-{eval_metric} is {best_val_metric:.4f}" .format(best_round=best_round, eval_metric=eval_metric, best_val_metric=best_val_metric)) break
import json from nltk.util import ngrams import metric f=open('test_ppl.json') lines=json.load(f) c=lines[4] multi_ref=lines[2] keyword=lines[1] query=lines[0] expose=lines[7] keyword_ng=[] for i in range(len(query)): qk=list(set(query[i]+keyword[i])) keyword_ng.append(qk) log_path='./' bleu, recall_q, recall_k, recall_qk, dist1, dist2=\ metric.get_metric(c, multi_ref, keyword, query, keyword_ng,expose, log_path) print(' bleu{}, recall_q{}, recall_k{}, recall_qk{}, dist1{}, dist2{} \n'.format(bleu, recall_q, recall_k, recall_qk, dist1, dist2))
def train_and_predict(self): """Run the predict loop on the TPU device.""" self.sess.run([self.compile_op]) # Train and eval thread. def train_eval_thread_fn(sess, train_eval_op): tf.logging.info("train_eval_op start") sess.run([train_eval_op]) train_eval_thread = threading.Thread( target=train_eval_thread_fn, args=(self.sess, self.train_eval_op)) train_eval_thread.start() # Infeed thread. def infeed_thread_fn(sess, train_enqueue_ops, eval_enqueue_ops, eval_init): """Start the infeed.""" time.sleep(300) mlp_log.mlperf_print("init_stop", None) mlp_log.mlperf_print("run_start", None) for i in range(self.hparams.max_train_epochs): tf.logging.info("Infeed for epoch: %d", i + 1) mlp_log.mlperf_print( "block_start", None, metadata={ "first_epoch_num": i + 1, "epoch_count": 1 }) mlp_log.mlperf_print("epoch_start", None, metadata={"epoch_num": i + 1}) sess.run(eval_init) sess.run([train_enqueue_ops]) sess.run([eval_enqueue_ops]) infeed_thread = threading.Thread( target=infeed_thread_fn, args=(self.sess, self.enqueue_ops, self.eval_enqueue_ops, self.eval_dataset_initializer)) infeed_thread.start() if self.eval_steps > 0: eval_state = {"run_success": False, "score": 0.0} for epoch in range(self.hparams.max_train_epochs): predictions = list(self.predict()) mlp_log.mlperf_print( "eval_start", None, metadata={"epoch_num": epoch + 1}) current_step = epoch * self.iterations eval_state["score"] = metric.get_metric(self.hparams, predictions, current_step) tf.logging.info("Score after epoch %d: %f", epoch, eval_state["score"]) mlp_log.mlperf_print( "eval_accuracy", eval_state["score"], metadata={"epoch_num": epoch + 1}) mlp_log.mlperf_print( "eval_stop", None, metadata={"epoch_num": epoch + 1}) mlp_log.mlperf_print( "block_stop", None, metadata={ "first_epoch_num": epoch, "epoch_count": 1 }) if eval_state["score"] >= self.hparams.target_bleu: eval_state["run_success"] = True mlp_log.mlperf_print("run_stop", None, metadata={"status": "success"}) break if not eval_state["run_success"]: mlp_log.mlperf_print("run_stop", None, metadata={"status": "abort"}) infeed_thread.join() train_eval_thread.join() if self.eval_steps > 0: return eval_state["score"], current_step else: return None, None
def eval(model_sample, model_generate, vocab, dataloader_k, dataloader_qk, epoch, updates): model_sample.eval() model_generate.eval() multi_ref, query, keyword, keyword_ng = [], [], [], [] candidate, candidate_s, candidate_qk, candidate_qk_s = [], [], [], [] expose_sum = [] for batch in tqdm(dataloader_k): sample_node_idx, sample_adj, sample_adj_weight, sample_prob, tgt, word_type, query_batch, sub_node_idx_batch = model_sample( batch, train_type=args.train_type, sample=False, sample_type='eval') expose = batch.expose expose = [int(e) for e in expose] expose_sum += expose sub_batch_sent = [vocab.id2sent(k) for k in sub_node_idx_batch] query_batch_sent = [vocab.id2sent(s) for s in query_batch] ref = [vocab.id2sent(t[1:]) for t in tgt] multi_ref += ref keyword += sub_batch_sent query += query_batch_sent samples = model_generate.sample(sample_node_idx, sample_adj, sample_adj_weight, word_type) cand = [vocab.id2sent(s) for s in samples] candidate += cand sample_node_idx, sample_adj, sample_adj_weight, sample_prob, tgt, word_type, query_batch_s, sub_node_idx_batch_s = model_sample( batch, train_type=args.train_type, sample=True, sample_type='eval') samples_s = model_generate.sample(sample_node_idx, sample_adj, sample_adj_weight, word_type) cand_s = [vocab.id2sent(s) for s in samples_s] candidate_s += cand_s for batch in tqdm(dataloader_qk): sample_node_idx, sample_adj, sample_adj_weight, sample_prob, tgt, word_type, query_batch_qk, sub_node_idx_batch_qk = model_sample( batch, train_type=args.train_type, sample=False, sample_type='eval') key = [vocab.id2sent(k) for k in sub_node_idx_batch_qk] '''for i in key: print(i) a = 1 assert a == 0''' keyword_ng += key samples_qk = model_generate.sample(sample_node_idx, sample_adj, sample_adj_weight, word_type) cand_qk = [vocab.id2sent(s) for s in samples_qk] candidate_qk += cand_qk sample_node_idx, sample_adj, sample_adj_weight, sample_prob, tgt, word_type, query_batch, sub_node_idx_batch = model_sample( batch, train_type=args.train_type, sample=True, sample_type='eval') samples_qk_s = model_generate.sample(sample_node_idx, sample_adj, sample_adj_weight, word_type) cand_qk_s = [vocab.id2sent(s) for s in samples_qk_s] candidate_qk_s += cand_qk_s text_result, bleu = utils.eval_bleu(multi_ref, candidate, log_path) text_result_s, bleu_s = utils.eval_bleu(multi_ref, candidate_s, log_path) text_result_qk, bleu_qk = utils.eval_bleu(multi_ref, candidate_qk, log_path) text_result_qk_s, bleu_qk_s = utils.eval_bleu(multi_ref, candidate_qk_s, log_path) logging_csv([ epoch, updates, text_result, text_result_s, text_result_qk, text_result_qk_s ]) print_list = [ query, keyword, multi_ref, candidate, candidate_s, candidate_qk, candidate_qk_s, expose_sum ] with open(log_path + "test_ppl.json", "w") as f: json.dump(print_list, f) print_list = [ query, keyword, multi_ref, candidate, candidate_s, candidate_qk, candidate_qk_s ] utils.write_result_to_file(print_list, log_path) candidate_list = [ multi_ref, candidate, candidate_s, candidate_qk, candidate_qk_s ] name = ['ori', 'nosample', 'sample', 'nosample+q', 'sample+q'] bleu_target = 0 for c, n in zip(candidate_list, name): bleu, recall_q, recall_k, recall_qk, dist1, dist2 = metric.get_metric( c, multi_ref, keyword, query, keyword_ng, expose_sum, log_path) logging( '{}: bleu {}, recall_q {}, recall_k {}, recall_qk {}, dist1 {}, dist2 {} \n' .format(n, bleu, recall_q, recall_k, recall_qk, dist1, dist2)) logging_csv([ epoch, updates, n, bleu, recall_q, recall_k, recall_qk, dist1, dist2 ]) if n == 'sample': bleu_target = bleu return bleu_target
def fit(self, X, y, validation_data=(None, None), early_stopping_rounds=10, maximize=True, eval_metric=None, loss='logisticloss', eta=0.3, num_boost_round=1000, max_depth=5, scale_pos_weight=1, subsample=0.8, colsample_bytree=0.8, colsample_bylevel=0.8, min_child_weight=1, min_sample_split=10, reg_lambda=1.0, gamma=0, num_thread=-1, pred_cutoff=0.5): ''' X:pandas.core.frame.DataFrame y:pandas.core.series.Series early_stopping_rounds: early_stop when eval rsult become worse more the early_stopping_rounds times maximize:the target is to make loss as large as possible eval_metric: evaluate method loss : loss function for optionmize num_boost_round : number of boosting max_depth: max_depth for a tree scale_pos_weight: weight for samples with 1 labels subsample: row sample rate when build a tree colsample_bytree: column sample rate when building a tree colsample_bylevel: column sample rate when spliting each tree node. when split a tree,the number of features = total_features*colsample_bytree*colsample_bylevel min_sample_split: min number of samples in a leaf node ''' self.eval_metric = eval_metric self.eta = eta self.num_boost_round = num_boost_round self.first_round_pred = 0.0 self.subsample = subsample self.max_depth = max_depth self.colsample_bytree = colsample_bytree self.colsample_bylevel = colsample_bylevel self.reg_lambda = reg_lambda self.min_sample_split = min_sample_split self.gamma = gamma self.num_thread = num_thread self.min_child_weight = min_child_weight self.scale_pos_weight = scale_pos_weight self.pred_cutoff = pred_cutoff #将X,y修改为能通过int下标(从0开始)进行索引的FramData X.reset_index(drop=True, inplace=True) y.reset_index(drop=True, inplace=True) if 'logisticloss': self.loss = LogisticLoss(self.reg_lambda) elif 'squareloss' == loss: self.loss = SquareLoss(self.reg_lambda) else: raise Exception('No find match loss') if not isinstance(validation_data, tuple): raise Exception('validation_data must be tuple') val_X, val_y = validation_data do_val = True if val_X is None or val_y is None: do_val = False else: if not isinstance(val_X, pd.core.frame.DataFrame): raise Exception('val_X must be pd.core.frame.DataFrame') if not isinstance(val_y, pd.core.series.Series): raise Exception('val_y must be pd.core.series.Series') val_X.reset_index(drop=True, inplace=True) val_y.reset_index(drop=True, inplace=True) val_Y = pd.DataFrame(val_y.values, columns=['label']) #set default pred value val_Y['y_pred'] = self.first_round_pred if maximize: best_val_metric = -np.inf best_round = 0 become_worse_round = 0 else: best_val_metric = np.inf best_round = 0 become_worse_round = 0 Y = pd.DataFrame(y.values, columns=['label']) Y['y_pred'] = self.first_round_pred Y['grad'] = self.loss.grad(Y.y_pred.values, Y.label.values) Y['hess'] = self.loss.hess(Y.y_pred.values, Y.label.values) Y['sample_weight'] = 1.0 #调整正样本权重 Y.loc[Y.label == 1, 'sample_weight'] = self.scale_pos_weight for i in range(self.num_boost_round): # row and column sample before training the current tree data = X.sample(frac=self.colsample_bytree, axis=1) #column sample data = pd.concat([data, Y], axis=1) data = data.sample(frac=self.subsample, axis=0) #row sample Y_selected = data[['label', 'y_pred', 'grad', 'hess']] X_selected = data.drop( ['label', 'y_pred', 'grad', 'hess', 'sample_weight'], axis=1) #print X_selected #print Y_selected # fit a tree tree = Tree() tree.fit(X_selected, Y_selected, max_depth=self.max_depth, min_child_weight=self.min_child_weight, colsample_bylevel=self.colsample_bylevel, min_sample_split=self.min_sample_split, reg_lambda=self.reg_lambda, gamma=self.gamma, num_thread=self.num_thread) # predict the whole trainset and update y_pred,grad,hess preds = tree.predict(X) Y['y_pred'] += self.eta * preds Y['grad'] = self.loss.grad(Y.y_pred.values, Y.label.values) * Y.sample_weight Y['hess'] = self.loss.hess(Y.y_pred.values, Y.label.values) * Y.sample_weight # update feature importance for k in tree.feature_importance.iterkeys(): self.feature_importance[k] += tree.feature_importance[k] self.trees.append(tree) # print training information if self.eval_metric is None or not do_val: print "GBoost round {iteration}".format(iteration=i) #evaluate in validation data else: try: mertric_func = get_metric(self.eval_metric) except: raise NotImplementedError( "The given eval_metric is not provided") train_metric = mertric_func( self.loss.transform(Y.y_pred.values), Y.label.values) #val_Y is [n_sampels 2], column is label,pred val_Y['y_pred'] += self.eta * tree.predict(val_X) #evaludate on the current predict result val_metric = mertric_func( self.loss.transform(val_Y.y_pred.values), val_Y.label.values) print "GBoost round {iteration}, train-{eval_metric} is {train_metric}, val-{eval_metric} is {val_metric}".format( iteration=i, eval_metric=self.eval_metric, train_metric=train_metric, val_metric=val_metric) # check if to early stop if maximize: if val_metric > best_val_metric: best_val_metric = val_metric best_round = i become_worse_round = 0 else: become_worse_round += 1 #when the evaluation result is worse more than early_stopping_rounds times #stop to continue building tree if become_worse_round > early_stopping_rounds: print "training early Stop, best round is {best_round}, best {eval_metric} is {best_val_metric}".format( best_round=best_round, eval_metric=eval_metric, best_val_metric=best_val_metric) break else: if val_metric < best_val_metric: best_val_metric = val_metric best_round = i become_worse_round = 0 else: become_worse_round += 1 if become_worse_round > early_stopping_rounds: print "training early Stop, best round is {best_round}, best val-{eval_metric} is {best_val_metric}".format( best_round=best_round, eval_metric=eval_metric, best_val_metric=best_val_metric) break
# for quick test use python .\main.py --layers 3 --features 5 --end_features 10 --iterations 101 --batch_size 6 --model pixelcnn if conf.model == 'graph': data = Dataset(conf) test_data = data.get_plain_test_values() with tf.Session() as sess: samples = [] for _ in range(data.total_test_batches): X, _ = sess.run(test_data) samples.append(X) X = np.concatenate(samples) print(X.shape) X_noncausal_graph = NonCausal(conf, data).get_test_samples_graph() tf.reset_default_graph() get_metric(X, X_noncausal_graph) tf.reset_default_graph() elif conf.model == 'evaluate': data = Dataset(conf) test_data = data.get_plain_test_values() with tf.Session() as sess: samples = [] for _ in range(data.total_test_batches): X, _ = sess.run(test_data) samples.append(X) X = np.concatenate(samples) print(X.shape) X_denoising = PixelCNN(conf, data, True).get_test_samples() tf.reset_default_graph() X_noncausal = NonCausal(conf, data).get_test_samples() tf.reset_default_graph()