def main(): parser = argparse.ArgumentParser() parser.add_argument('--arch', required=True) parser.add_argument('--do_train', action='store_true') parser.add_argument('--do_predict', action='store_true') parser.add_argument('--data_dir', default='../../data/') parser.add_argument('--cuda', default=1, type=str) parser.add_argument('--hidden_size', default=256, type=int) parser.add_argument('--batch_size', default=256, type=int) parser.add_argument('--max_epoch', default=1500, type=int) parser.add_argument('--lr', default=1e-3, type=float) parser.add_argument('--wd', default=1e-2, type=float) parser.add_argument('--do_plot', action='store_true') args = parser.parse_args() missing_list = ['F1'] if args.do_train: data = pd.read_csv(args.data_dir + 'train.csv') # axis = 0 for row ; axis = 1 for column # inplace = if modify the origin data data.drop("Id", axis = 1, inplace = True) # for drop in missing_list: # data.drop(drop, axis = 1, inplace = True) # fixed random_state for same samples train_set, valid_set = train_test_split(data, test_size = 0.1, random_state = 73) train = preprocess_samples(train_set, missing_list) valid = preprocess_samples(valid_set, missing_list) trainData = FeatureDataset(train) validData = FeatureDataset(valid) device = torch.device('cuda:%d' % args.cuda if torch.cuda.is_available() else 'cpu') model = simpleNet(args.hidden_size, 9-len(missing_list)) model.to(device) batch_size = args.batch_size optimizer = torch.optim.Adam(model.parameters(), lr = args.lr, weight_decay = args.wd) loss_function1 = torch.nn.MSELoss() loss_function2 = torch.nn.CrossEntropyLoss() max_epoch = args.max_epoch trainer = Trainer(device, trainData, validData, model, loss_function1, loss_function2, optimizer, batch_size, args.arch) for epoch in range(max_epoch): print('Epoch: {}'.format(epoch)) # True for training ; False for validation trainer.run_epoch(epoch, True) trainer.run_epoch(epoch, False) pass if args.do_predict: pass if args.do_plot: pass
def buildnet(actor_ckpt=None, clf_ckpt="result/0804_1708_e2e_ucf_model.pth.tar"): fuser = Fuser(fuse_type='average') feature_length = 800 num_class = 101 i3d_model_checkpoint = clf_ckpt clf = Classifier(feature_length, num_class, isbn=False) clf = load_clf_from_i3d(clf, i3d_model_checkpoint) clf = torch.nn.DataParallel(clf, device_ids=[i for i in range(torch.cuda.device_count())]).cuda() obs_shape = [800, 800, 800] eval_dataset = FeatureDataset('features/thumos14/test/data.csv', is_thumos14_test_folder=True) env = make_env(dataset=eval_dataset, classifier=clf, fuser=fuser, observation_space=obs_shape, index=int(0), threshold=0.4, verbose=False) envs = DummyVecEnv([env]) env = envs.envs[0] actor_critic = Policy(obs_shape, envs.action_space, output_size=256) if actor_ckpt: act_ckpt = torch.load(actor_ckpt) actor_critic.load_state_dict(act_ckpt['state_dict']) actor_critic = actor_critic.cuda() return env, actor_critic
def validate_RL(num_process, ckpt, force_num=None): from torch import multiprocessing ########### multiprocess ctx = multiprocessing.get_context('spawn') index_queue = ctx.Queue() result_queue = ctx.Queue() props = [] workers = [ctx.Process(target=runner_func, args=(index_queue, result_queue, ckpt)) for i in range(num_process)] for w in workers: w.daemon = True w.start() video_num = len(FeatureDataset('features/thumos14/test/data.csv', is_thumos14_test_folder=True)) if force_num: video_num = force_num for i in range(video_num): index_queue.put(i) for i in range(video_num): props.extend(result_queue.get()) return props
def main(): parser = argparse.ArgumentParser() parser.add_argument('--arch', required=True, help='architecture (model_dir)') parser.add_argument('--data_dir', default='../../data/', type=str) parser.add_argument('--do_train', action='store_true') parser.add_argument('--do_predict', action='store_true') parser.add_argument('--hidden_size', default=512, type=int) parser.add_argument('--batch_size', default=64, type=int) parser.add_argument('--max_epoch', default=300, type=int) parser.add_argument('--lr', default=1e-3, type=float) parser.add_argument('--cuda', default=1, type=int) parser.add_argument('--ckpt', default=-1, type=int, help='load pre-trained model epoch') args = parser.parse_args() if args.do_train: dataset = pd.read_csv(args.data_dir + "train.csv") dataset.drop("Id", axis=1, inplace=True) # drop outlier # outlier_idx = [] # features = ['F1', 'F2', 'F3', 'F4', 'F5', 'F6', 'F7', 'F8', 'F9', 'F10', 'F11', 'F12', 'F13', 'F14'] # for f in features: # outlier_idx += get_outlier(dataset[f]) # outlier_idx = list(set(outlier_idx)) # print(len(outlier_idx)) # dataset.drop(outlier_idx) train_set, valid_set = train_test_split(dataset, test_size=0.1, random_state=58) train = preprocess_samples(train_set, missing=["F2", "F7", "F12"]) valid = preprocess_samples(valid_set, missing=["F2", "F7", "F12"]) trainData = FeatureDataset(train) validData = FeatureDataset(valid) device = torch.device( 'cuda:%d' % args.cuda if torch.cuda.is_available() else 'cpu') model = simpleNet(args.hidden_size) model.to(device) trainer = Trainer(device, trainData, validData, model, args.lr, args.batch_size, args.arch) for epoch in range(1, args.max_epoch + 1): print('Epoch: {}'.format(epoch)) trainer.run_epoch(epoch, True) trainer.run_epoch(epoch, False) trainer.save(epoch) if args.do_predict: dataset = pd.read_csv(args.data_dir + "test.csv") dataset.drop("Id", axis=1, inplace=True) test = preprocess_samples(dataset, missing=["F2", "F7", "F12"]) testData = FeatureDataset(test) device = torch.device( 'cuda:%d' % args.cuda if torch.cuda.is_available() else 'cpu') model = simpleNet(args.hidden_size) model.load_state_dict( torch.load('%s/model.pkl.%d' % (args.arch, args.ckpt))) model.train(False) model.to(device) dataloader = DataLoader(dataset=testData, batch_size=args.batch_size, shuffle=False, collate_fn=testData.collate_fn, num_workers=4) trange = tqdm(enumerate(dataloader), total=len(dataloader), desc='Predict') prediction = [] for i, (x, missing, y) in trange: # call model.predict instead of model.forward o_labels = model.predict(x.to(device)) o_labels = torch.argmax(o_labels, axis=1) prediction.append(o_labels.to('cpu')) prediction = torch.cat(prediction).detach().numpy().astype(int) SubmitGenerator(prediction, args.data_dir + 'sampleSubmission.csv')
BR_SERIE_A = DatasetAggregator(ObservationDataset('leagues/br_serie_a.json')) BAHRAIN_PL = DatasetAggregator(ObservationDataset('leagues/bahrain_pl.json')) BEL_PL = DatasetAggregator(ObservationDataset('leagues/bel_pl.json')) K_LEAGUE = DatasetAggregator(ObservationDataset('leagues/k_league.json')) K_LEAGUE2 = DatasetAggregator(ObservationDataset('leagues/k_league2.json')) COSTA_RICA = DatasetAggregator(ObservationDataset('leagues/costa_rica_primera.json')) NB_1_LIGA = DatasetAggregator(ObservationDataset('leagues/nb_1_liga.json')) Eliteserien = DatasetAggregator(ObservationDataset('leagues/eliteserien.json')) Allsvenskan = DatasetAggregator(ObservationDataset('leagues/allsvenskan.json')) CHINA_SUPER_LEAGUE = DatasetAggregator(ObservationDataset('leagues/china_super_league.json')) FA_CUP = DatasetAggregator( ObservationDataset('cups/fa_cup.json'), FeatureDataset([ BaseDataset.from_file('leagues/epl.json', {'strength': 0}), BaseDataset.from_file('leagues/efl_championship.json', {'strength': 1}), BaseDataset.from_file('leagues/efl_league1.json', {'strength': 2}), BaseDataset.from_file('leagues/efl_league2.json', {'strength': 3}) ]) ) LEAGUE_CUP = DatasetAggregator( ObservationDataset('cups/league_cup.json'), FeatureDataset([ BaseDataset.from_file('leagues/epl.json', {'strength': 0}), BaseDataset.from_file('leagues/efl_championship.json', {'strength': 1}), BaseDataset.from_file('leagues/efl_league1.json', {'strength': 2}), BaseDataset.from_file('leagues/efl_league2.json', {'strength': 3}) ]) ) DFB_POKAL = DatasetAggregator(ObservationDataset('cups/dfb_pokal.json'))
z = 1 / z n = (1 / len(dc)) * torch.matmul(s, z) q = torch.dot(n, q) def save(db, q, path): suffix = 'database' path = os.path.join(path, suffix) torch.save(db, path) torch.save(q, path) print("Database and Q matrix has been saved at: ", path) if __name__ == '__main__': # build the models model_rgb = build_model(ckpt_rgb_path, 'rgb') model_flow = build_model(ckpt_flow_path, 'flow') ds = FeatureDataset('features/kinetics/thumos_validation/data.csv', Fuser(fuse_type='none')) db = build_database(model_rgb, model_flow, ds) db = refine_database(db, 1000) q = get_q(db) save(db, q, save_path) print("Database and Q Matrix are generated, save at {}".format(save_path))
import config import os dirs_list = ['./info', './save_models'] if __name__ == '__main__': opt = config.parse_opt() torch.cuda.set_device(1) torch.manual_seed(opt.SEED) torch.cuda.manual_seed(opt.SEED) torch.backends.cudnn.bechmark = True dictionary = Dictionary({'Yes': 0}, ['Yes']) dictionary.init_dict() train_set = FeatureDataset('Action', dictionary, 'Train') test_set = FeatureDataset('Action', dictionary, 'Test') constructor = 'build_baseline' model = getattr(baseline, constructor)(train_set, opt).cuda() model.w_emb.init_embedding() train_loader = DataLoader(train_set, opt.BATCH_SIZE, shuffle=True, num_workers=1) test_loader = DataLoader(test_set, opt.BATCH_SIZE, shuffle=True, num_workers=1) print 'Length of train:', len(train_loader) print 'Length of test:', len(test_loader)
parser.add_argument('--mode', type=str, choices=['train', 'test'], default='test') parser.add_argument('--model_path', type=str, default='./lstm/model.t7') parser.add_argument('--model_dir', type=str, default='./lstm') parser.add_argument('--stride', type=int, default=0) args = parser.parse_args() use_cuda = torch.cuda.is_available() best_acc = 0 # best test accuracy start_epoch = 0 # start from epoch 0 or last checkpoint epoch # Data print('==> Preparing data..') normset = FeatureDataset(args.feature_dir, args.modality, 'train', None, None, args.stride) feat_mean, feat_std = normset.norm() trainset = FeatureDataset(args.feature_dir, args.modality, 'train', feat_mean, feat_std, args.stride) trainloader = torch.utils.data.DataLoader(trainset, batch_size=1, shuffle=True, num_workers=0) valset = FeatureDataset(args.feature_dir, args.modality, 'val', feat_mean, feat_std, args.stride) valloader = torch.utils.data.DataLoader(valset, batch_size=1, shuffle=True, num_workers=0)
def main(args): fuser = Fuser(fuse_type=args.fuse_type, s=16) if args.front_end: train_dataset = build_video_dataset( "ucf", train=True, unet=True, unet_clip_num=args.clip_num)['dataset'] else: if args.dataset == 'ucf': train_dataset = FeatureDataset('features/ucf101/data.csv', fuser) elif args.dataset == 'thumos': train_dataset = FeatureDataset( 'features/kinetics/thumos_validation/data.csv', fuser) else: raise ValueError train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_worker, pin_memory=True) eval_dataset = FeatureDataset('features/kinetics/thumos_test/data.csv', Fuser(fuse_type='none')) num_class = 101 front = None if args.front_end: front = I3DFeatureExtractor() front = torch.nn.DataParallel( front, device_ids=[i for i in range(torch.cuda.device_count())]).cuda() front.eval() model = UntrimmedNetBack(num_class) if args.restore: if os.path.isfile(args.restore): print(("=> loading checkpoint '{}'".format(args.restore))) checkpoint = torch.load(args.restore) new = OrderedDict() for key in checkpoint['state_dict'].keys(): if key[7:] in model.state_dict(): new[key[7:]] = checkpoint['state_dict'][key] model.load_state_dict(new) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.restore))) model = torch.nn.DataParallel( model, device_ids=[i for i in range(torch.cuda.device_count())]).cuda() criterion = UnetLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=0.0005) best_prec1 = 0 best_prec5 = 0 lastavg = 0 if args.evaluate: best_prec1, best_prec5 = validate(eval_dataset, model, criterion, args.modality) print( 'Experiment {} finished! Best Accu@1 is {:.6f}, Best Accu@5 is {:.6f}.' .format(args.exp_name, best_prec1, best_prec5)) return for epoch in range(args.start_epoch, args.epochs): train(train_loader, model, criterion, optimizer, epoch, args.modality, front) if args.dataset == 'thumos': if (epoch + 1) == 100: top1.reset() top5.reset() lastavg = losses.avg losses.reset() batch_time.reset() data_time.reset() if (epoch + 1) == 1000: optimizer.param_groups[-1][ 'lr'] = optimizer.param_groups[-1]['lr'] / 10 if (epoch + 1) == 2000: optimizer.param_groups[-1][ 'lr'] = optimizer.param_groups[-1]['lr'] / 2 if (epoch + 1) == 3000: optimizer.param_groups[-1][ 'lr'] = optimizer.param_groups[-1]['lr'] / 2 if (epoch + 1) == 4000: optimizer.param_groups[-1][ 'lr'] = optimizer.param_groups[-1]['lr'] / 2 elif args.dataset == 'ucf': if (epoch + 1) == 10: top1.reset() top5.reset() lastavg = losses.avg losses.reset() batch_time.reset() data_time.reset() if (epoch + 1) == 400: optimizer.param_groups[-1][ 'lr'] = optimizer.param_groups[-1]['lr'] / 10 if (epoch + 1) == 800: optimizer.param_groups[-1][ 'lr'] = optimizer.param_groups[-1]['lr'] / 2 else: raise ValueError if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: top1.reset() top5.reset() lastavg = losses.avg losses.reset() batch_time.reset() data_time.reset() prefix = 'result/{}'.format(args.exp_name) if not os.path.exists(prefix): os.mkdir(prefix) prec1, prec5 = validate(eval_dataset, model, criterion, args.modality) is_best = (prec1 > best_prec1) or (prec5 > best_prec5) best_prec1 = max(prec1, best_prec1) best_prec5 = max(prec5, best_prec5) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'best_prec5': best_prec5 }, is_best, args.exp_name + "_epoch{}".format(epoch), prefix) print(args) print( 'Experiment {} finished! Best Accu@1 is {:.6f}, Best Accu@5 is {:.6f}. Saved@ {}' .format( args.exp_name, best_prec1, best_prec5, 'result/{}/{}_epoch{}'.format(args.exp_name, args.exp_name, epoch)))
import baseline from train import train import utils import config if __name__ == '__main__': opt = config.parse_opt() torch.cuda.set_device(1) torch.manual_seed(opt.SEED) torch.cuda.manual_seed(opt.SEED) torch.backends.cudnn.bechmark = True dictionary = Dictionary({'Yes': 0}, ['Yes']) dictionary.init_dict() train_set = FeatureDataset(opt.QUESTION_TYPE, dictionary, 'Train') test_set = FeatureDataset(opt.QUESTION_TYPE, dictionary, 'Test') constructor = 'build_baseline' model = getattr(baseline, constructor)(train_set, opt).cuda() model.w_emb.init_embedding() train_loader = DataLoader(train_set, opt.BATCH_SIZE, shuffle=True, num_workers=1) test_loader = DataLoader(test_set, opt.BATCH_SIZE, shuffle=True, num_workers=1) print 'Length of train:', len(train_loader) print 'Length of test:', len(test_loader)
n_feature = 43 data_FN = 'data.csv' # n_feature = 157 # data_FN = 'D:/data/data.csv' if task_type == 'classification': label_column = 'label_3' # set the label column of the dataframe file n_label = 3 model = LCM(n_feature=n_feature, n_label=n_label) elif task_type == 'regression': label_column = 'return' # set the label column of the dataframe file model = LRM(n_feature=n_feature) train_loader = torch.utils.data.DataLoader( FeatureDataset(data_FN=data_FN, type='train', label_column=label_column), batch_size=batch_size, shuffle=False, ) test_loader = torch.utils.data.DataLoader(FeatureDataset( data_FN=data_FN, type='test', label_column=label_column), batch_size=batch_size, shuffle=False) optimizer = optim.Adagrad(model.parameters(), lr=lr) # acc, for report use best_loss = 1000 plot_batch = 100
def main(): parser = argparse.ArgumentParser() parser.add_argument('--arch', required=True, help='architecture (model_dir)') parser.add_argument('--data_dir', default='../../data/', type=str) parser.add_argument('--do_train', action='store_true') parser.add_argument('--do_predict', action='store_true') parser.add_argument('--hidden_size', default=512, type=int) parser.add_argument('--batch_size', default=128, type=int) parser.add_argument('--max_epoch', default=300, type=int) parser.add_argument('--lr', default=1e-3, type=float) parser.add_argument('--cuda', default=0, type=int) parser.add_argument('--ckpt', default=-1, type=int, help='load pre-trained model epoch') args = parser.parse_args() if args.do_train: dataset = pd.read_csv(args.data_dir + "train.csv") dataset.drop("Id", axis=1, inplace=True) dataset.drop("F2", axis=1, inplace=True) dataset.drop("F7", axis=1, inplace=True) dataset.drop("F12", axis=1, inplace=True) train_set, valid_set = train_test_split(dataset, test_size=0.1, random_state=58) train = preprocess_samples(train_set, missing=["F2", "F7", "F12"]) valid = preprocess_samples(valid_set, missing=["F2", "F7", "F12"]) trainData = FeatureDataset(train) validData = FeatureDataset(valid) device = torch.device('cuda:%d' % args.cuda if torch.cuda.is_available() else 'cpu') model = simpleNet(args.hidden_size) model.to(device) opt = torch.optim.Adam(model.parameters(), lr=args.lr) # criteria = torch.nn.CrossEntropyLoss() criteria = torch.nn.BCEWithLogitsLoss() max_epoch = args.max_epoch batch_size = args.batch_size trainer = Trainer(device, trainData, validData, model, criteria, opt, batch_size, args.arch) for epoch in range(1, max_epoch + 1): print('Epoch: {}'.format(epoch)) trainer.run_epoch(epoch, True) trainer.run_epoch(epoch, False) trainer.save(epoch) if args.do_predict: dataset = pd.read_csv(args.data_dir + "test.csv") dataset.drop("Id", axis=1, inplace=True) dataset.drop("F2", axis=1, inplace=True) dataset.drop("F7", axis=1, inplace=True) dataset.drop("F12", axis=1, inplace=True) test = preprocess_samples(dataset, missing=["F2", "F7", "F12"]) testData = FeatureDataset(test) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = simpleNet(args.hidden_size) model.load_state_dict(torch.load('%s/model.pkl.%d' % (args.arch, args.ckpt))) model.train(False) model.to(device) dataloader = DataLoader(dataset=testData, batch_size=args.batch_size, shuffle=False, collate_fn=testData.collate_fn, num_workers=4) trange = tqdm(enumerate(dataloader), total=len(dataloader), desc='Predict') prediction = [] for i, (x, y) in trange: o_labels = model(x.to(device)) o_labels = F.sigmoid(o_labels) > 0.5 prediction.append(o_labels.to('cpu')) prediction = torch.cat(prediction).detach().numpy().astype(int) SubmitGenerator(prediction, args.data_dir + 'sampleSubmission.csv')
def main(): #parser = argparse.ArgumentParser() parser = ArgumentParser() parser.add_argument('--arch', required=True) parser.add_argument('--do_train', action='store_true') parser.add_argument('--do_predict', action='store_true') parser.add_argument('--data_dir', default='../../data/', type=str) parser.add_argument('--cuda', default=0) parser.add_argument('--hidden_size', default=256, type=int) parser.add_argument('--batch_size', default=256, type=int) parser.add_argument('--max_epoch', default=1500, type=int) parser.add_argument('--lr', default=1e-3, type=float) parser.add_argument('--wd', default=1e-2, type=float) parser.add_argument('--do_plot', action='store_true') args = parser.parse_args() missing_list = ["F1"] if args.do_train: data = pd.read_csv(args.data_dir + 'train.csv') # axis = 0 for row ; axis = 1 for column # inplace = if modify the origin data data.drop("Id", axis=1, inplace=True) for drop in missing_list: data.drop(drop, axis=1, inplace=True) # fixed random_state for same samples train_set, valid_set = train_test_split(data, test_size=0.1, random_state=73) train = preprocess_samples(train_set, missing_list) valid = preprocess_samples(valid_set, missing_list) trainData = FeatureDataset(train) validData = FeatureDataset(valid) device = torch.device( 'cuda:%d' % args.cuda if torch.cuda.is_available() else 'cpu') model = simpleNet(args.hidden_size) model.to(device) batch_size = args.batch_size optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd) # CrossEntropyLoss is loss function for multi-class classification loss_function = torch.nn.CrossEntropyLoss() max_epoch = args.max_epoch trainer = Trainer(device, trainData, validData, model, loss_function, optimizer, batch_size, args.arch) for epoch in range(max_epoch): print('Epoch: {}'.format(epoch)) # True for training ; False for validation trainer.run_epoch(epoch, True) trainer.run_epoch(epoch, False) if args.do_predict: data = pd.read_csv(args.data, 'test.csv') data.drop("Id", axis=1, inplace=True) for drop in missing_list: data.drop(drop, axis=1, inplace=True) test = preprocess_samples(data, missing_list) testData = FeatureDataset(test) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = simpleNet(args.hidden_size, missing_list) model.load_state_dict(torch.load(f'{args.arch}/model.pkl')) #model.eval() # same as model.train(False) model.train(False) model.to(device) # Dataloader for testdata dataloader = DataLoader( testData, batch_size=args.batch_size, shuffle=False, num_workers=8, collate_fn=testData.collate_fn, ) trange = tqdm(enumerate(dataloader), total=len(dataloader), desc='Predict') prediction = [] for i, (x, y) in trange: o_labels = model.forward(x.to(device)) o_labels = torch.argmax(o_labels, dim=1) prediction.append(o_labels.to('cpu')) prediction = torch.cat(prediction).detach().numpy().astype(int) SubmitGenerator(prediction, args.data_dir + 'sampleSubmission.csv') if args.do_plot: plot_history(args.arch, args.max_epoch, plot_acc=True)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--arch', required=True, help='architecture (model_dir)') parser.add_argument('--data_dir', default='../../data/', type=str) parser.add_argument('--do_train', action='store_true') parser.add_argument('--do_predict', action='store_true') parser.add_argument('--do_plot', action='store_true') parser.add_argument('--hidden_size', default=256, type=int) parser.add_argument('--batch_size', default=256, type=int) parser.add_argument('--max_epoch', default=1500, type=int) parser.add_argument('--lr', default=1e-3, type=float) parser.add_argument('--wd', default=1e-2, type=float) parser.add_argument('--cuda', default=1, type=int) args = parser.parse_args() missing_list = ["F1"] if args.do_train: dataset = pd.read_csv(args.data_dir + "train.csv") dataset.drop("Id", axis=1, inplace=True) for drop in missing_list: dataset.drop(drop, axis=1, inplace=True) train_set, valid_set = train_test_split(dataset, test_size=0.1, random_state=73) train = preprocess_samples(train_set, missing=missing_list) valid = preprocess_samples(valid_set, missing=missing_list) trainData = FeatureDataset(train) validData = FeatureDataset(valid) device = torch.device( 'cuda:%d' % args.cuda if torch.cuda.is_available() else 'cpu') model = simpleNet(args.hidden_size, missing_list) model.to(device) batch_size = args.batch_size opt = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd) # TODO: choose proper loss function for multi-class classification criteria = torch.nn.CrossEntropyLoss() trainer = Trainer(device, trainData, validData, model, criteria, opt, batch_size, args.arch) max_epoch = args.max_epoch for epoch in range(max_epoch): #if epoch >= 10: #plot_history(args.arch, plot_acc=True) print('Epoch: {}'.format(epoch)) trainer.run_epoch(epoch, True) # True for training trainer.run_epoch(epoch, False) if args.do_predict: dataset = pd.read_csv(args.data_dir + "test.csv") dataset.drop("Id", axis=1, inplace=True) for drop in missing_list: dataset.drop(drop, axis=1, inplace=True) test = preprocess_samples(dataset, missing=missing_list) testData = FeatureDataset(test) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = simpleNet(args.hidden_size, missing_list) # TODO: Load saved model here model.load_state_dict(torch.load(f'{args.arch}/model.pkl')) model.eval() pass model.train(False) model.to(device) # TODO: create dataloader for testData. # You can set batch_size as `args.batch_size` here, and `collate_fn=testData.collate_fn`. # DO NOT shuffle for testing dataloader = DataLoader( testData, batch_size=args.batch_size, shuffle=False, num_workers=8, collate_fn=testData.collate_fn, ) trange = tqdm(enumerate(dataloader), total=len(dataloader), desc='Predict') prediction = [] for i, (x, y) in trange: o_labels = model(x.to(device)) #o_labels = x.to(device) o_labels = torch.argmax(o_labels, dim=1) prediction.append(o_labels.to('cpu')) prediction = torch.cat(prediction).detach().numpy().astype(int) SubmitGenerator(prediction, args.data_dir + 'sampleSubmission.csv') if args.do_plot: plot_history(args.arch, args.max_epoch, plot_acc=True)
if key[7:] in model.state_dict(): new[key[7:]] = checkpoint['state_dict'][key] model.load_state_dict(new) else: print(("=> no checkpoint found at '{}'".format(ckpt))) model = torch.nn.DataParallel( model, device_ids=[i for i in range(torch.cuda.device_count())]).cuda() return model if __name__ == '__main__': if args.exp_name == None: args.exp_name = "{}_{}_{}".format(now, 'unet', args.modality) print('Experiment {} start!'.format(args.exp_name)) eval_dataset = FeatureDataset('features/kinetics/thumos_test/data.csv', Fuser(fuse_type='none')) num_class = 101 model_rgb = UntrimmedNetBack(num_class) model_rgb = build_model(model_rgb, args.restore_rgb) model_flow = UntrimmedNetBack(num_class) model_flow = build_model(model_flow, args.restore_flow) criterion = UnetLoss().cuda() best_prec1, best_prec5 = validate(eval_dataset, model_rgb, criterion, args.modality, model2=model_flow,
torch.load(os.path.join(folder, 'bert_model.pth'), map_location=device)) image_mha.load_state_dict( torch.load(os.path.join(folder, 'image_mha.pth'), map_location=device)) image_encoder.eval() text_encoder.eval() bert_model.eval() image_mha.eval() image_mha = None # Define dataloader def my_collate_fn(batch): return tuple(zip(*batch)) image_folder = opt['feature_folder'] test_image_dataset = FeatureDataset(image_folder, opt['test_file']) test_image_dataloader = DataLoader(test_image_dataset, batch_size=64, shuffle=False, collate_fn=my_collate_fn) test_text_dataset = TextDataset(opt['test_file'], tokenizer, opt['max_seq_len']) test_text_dataloader = DataLoader(test_text_dataset, batch_size=32, shuffle=False) # Print result ks = [1, 5, 10] t2i_recall = evaluate_t2i(image_mha, image_encoder, bert_model, text_encoder, test_image_dataloader, test_text_dataloader, ks)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--arch', required=True) parser.add_argument('--do_train', action='store_true') parser.add_argument('--do_predict', action='store_true') parser.add_argument('--data_dir', default='../../data/') parser.add_argument('--cuda', default=1) parser.add_argument('--hidden_size', default=256, type=int) parser.add_argument('--batch_size', default=256, type=int) parser.add_argument('--epoch1', default=10, type=int) parser.add_argument('--epoch2', default=50, type=int) parser.add_argument('--lr', default=1e-3, type=float) parser.add_argument('--wd', default=1e-2, type=float) parser.add_argument('--do_plot', action='store_true') args = parser.parse_args() missing_list = ['F1'] if args.do_train: data = pd.read_csv(args.data_dir + 'train.csv') # axis = 0 for row ; axis = 1 for column # inplace = if modify the origin data data.drop("Id", axis=1, inplace=True) # for drop in missing_list: # data.drop(drop, axis = 1, inplace = True) train_set, valid_set = train_test_split(data, test_size=0.1, random_state=73) train = preprocess_samples(train_set, missing_list) valid = preprocess_samples(valid_set, missing_list) trainData = FeatureDataset(train) validData = FeatureDataset(valid) device = torch.device( 'cuda:%d' % args.cuda if torch.cuda.is_available() else 'cpu') model1 = simpleNet(args.hidden_size, 9 - len(missing_list), len(missing_list)) model1.to(device) batch_size = args.batch_size optimizer = torch.optim.Adam(model1.parameters(), lr=args.lr, weight_decay=args.wd) # MSELoss is loss function for regression trainer = Trainer(device, trainData, validData, model1, None, optimizer, batch_size, args.arch) epoch1 = args.epoch1 for epoch in range(epoch1): print('Epoch: {}'.format(epoch)) # True for training ; False for validation trainer.run_epoch(epoch1, True, stage1=True) trainer.run_epoch(epoch1, False, stage1=True) # CrossEntropyLoss is loss function for multi-class classification model2 = simpleNet(args.hidden_size, 9, 12) model2.to(device) optimizer = torch.optim.Adam(model2.parameters(), lr=args.lr, weight_decay=args.wd) trainer = Trainer(device, trainData, validData, model1, model2, optimizer, batch_size, args.arch) epoch2 = args.epoch2 for epoch in range(epoch2): print('Epoch: {}'.format(epoch)) # True for training ; False for validation trainer.run_epoch(epoch, True, stage1=False) trainer.run_epoch(epoch, False, stage1=False) pass if args.do_predict: pass if args.do_plot: pass
def main(): parser = argparse.ArgumentParser() parser.add_argument('--arch', default="model", help='architecture (model_dir)') parser.add_argument('--do_train', action='store_true') parser.add_argument('--do_predict', action='store_true') parser.add_argument('--do_plot', action='store_true') parser.add_argument('--hidden_size', default=256, type=int) parser.add_argument('--batch_size', default=256, type=int) parser.add_argument('--max_epoch', default=10000, type=int) parser.add_argument('--lr', default=1e-3, type=float) parser.add_argument('--step_lr', default=0.5, type=float) parser.add_argument('--cuda', default=0, type=int) parser.add_argument('--ckpt', type=int, help='load pre-trained model epoch') args = parser.parse_args() if args.do_train: dataset = pd.read_csv("../../data/train.csv") dataset.drop("Id", axis=1, inplace=True) train_set, valid_set = train_test_split(dataset, test_size=0.1, random_state=73) feature_for_training = ["F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9"] feature_for_prediction = ["F1"] train = preprocess_samples(train_set, feature_for_training, feature_for_prediction) valid = preprocess_samples(valid_set, feature_for_training, feature_for_prediction) trainData = FeatureDataset(train) validData = FeatureDataset(valid) device = torch.device( 'cuda:%d' % args.cuda if torch.cuda.is_available() else 'cpu') max_epoch = args.max_epoch trainer = Trainer(device, trainData, validData, args) for epoch in range(1, max_epoch + 1): print('Epoch: {}'.format(epoch)) trainer.run_epoch(epoch, True) trainer.run_epoch(epoch, False) if args.do_predict: dataset = pd.read_csv("../../data/test.csv") dataset.drop("Id", axis=1, inplace=True) feature_for_testing = ["F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9"] test = preprocess_samples(dataset, feature_for_testing) testData = FeatureDataset(test) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = SimpleNet(input_size=9, output_size=12, hidden_size=args.hidden_size) model.load_state_dict( torch.load('%s/model.pkl.%d' % (args.arch, args.ckpt))) model.train(False) model.to(device) dataloader = DataLoader(dataset=testData, batch_size=args.batch_size, shuffle=False, collate_fn=testData.collate_fn, num_workers=4) trange = tqdm(enumerate(dataloader), total=len(dataloader), desc='Predict') prediction = [] for i, (ft, _, y) in trange: b = ft.shape[0] missing_ft = torch.zeros(b, 1) all_ft = torch.cat([missing_ft, ft], dim=1) o_labels, _ = model(all_ft.to(device)) o_labels = torch.argmax(o_labels, axis=1) prediction.append(o_labels.to('cpu').numpy().tolist()) prediction = sum(prediction, []) SubmitGenerator(prediction, "../../data/sampleSubmission.csv") if args.do_plot: plot_history("{file}/history.json".format(file=args.arch))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--arch', required=True, help='architecture (model_dir)') parser.add_argument('--data_dir', default='../../data/', type=str) parser.add_argument('--do_train', action='store_true') parser.add_argument('--do_predict', action='store_true') parser.add_argument('--hidden_size', default=512, type=int) parser.add_argument('--batch_size', default=32, type=int) parser.add_argument('--max_epoch', default=800, type=int) parser.add_argument('--lr', default=1e-3, type=float) parser.add_argument('--cuda', default=1, type=int) parser.add_argument('--ckpt', default=-1, type=int, help='load pre-trained model epoch') args = parser.parse_args() if args.do_train: dataset = pd.read_csv(args.data_dir + "train.csv") dataset.drop("Id", axis=1, inplace=True) train_set, valid_set = train_test_split(dataset, test_size=0.2, random_state=42) train = preprocess_samples(train_set, missing=["F2", "F7", "F12"]) valid = preprocess_samples(valid_set, missing=["F2", "F7", "F12"]) trainData = FeatureDataset(train) validData = FeatureDataset(valid) device = torch.device( 'cuda:%d' % args.cuda if torch.cuda.is_available() else 'cpu') trainer = Trainer(device, trainData, validData, args.hidden_size, args.lr, args.batch_size, args.arch) for epoch in range(1, args.max_epoch + 1): print('Epoch: {}'.format(epoch)) trainer.run_epoch(epoch, True) trainer.run_epoch(epoch, False) if epoch % 50 == 0: trainer.save(epoch) if args.do_predict: dataset = pd.read_csv(args.data_dir + "test.csv") dataset.drop("Id", axis=1, inplace=True) test = preprocess_samples(dataset, missing=["F2", "F7", "F12"]) testData = FeatureDataset(test) path = '%s/model.pkl.%d' % (args.arch, args.ckpt) checkpoint = torch.load(path) device = torch.device( 'cuda:%d' % args.cuda if torch.cuda.is_available() else 'cpu') model = SimpleNet(args.hidden_size) model.load_state_dict(checkpoint['model']) model.to(device) model.train(False) generator = Generator(args.hidden_size) generator.load_state_dict(checkpoint['generator']) generator.to(device) generator.train(False) dataloader = DataLoader(dataset=testData, batch_size=args.batch_size, shuffle=False, collate_fn=testData.collate_fn, num_workers=4) trange = tqdm(enumerate(dataloader), total=len(dataloader), desc='Predict') prediction = [] for i, (features, missing, y) in trange: gen_missing = generator(features.to(device)) all_features = torch.cat( (features.to(device), gen_missing.to(device)), dim=1) o_labels = model(all_features) o_labels = F.sigmoid(o_labels) > 0.5 prediction.append(o_labels.to('cpu')) prediction = torch.cat(prediction).detach().numpy().astype(int) SubmitGenerator(prediction, args.data_dir + 'sampleSubmission.csv')
batch_size = args.batch_size torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.backends.cudnn.benchmark = True #Construct dataset datasets = {} loaders = {} for key in ['train', 'eval', 'test']: if key == 'train': is_shuffle = True else: is_shuffle = False datasets[key] = FeatureDataset(key) loaders[key] = DataLoader(datasets[key], batch_size, shuffle=is_shuffle, num_workers=4) #Construct Model (by argument) constructor = 'build_%s' % args.model print("[*] Model Construction Start") if args.model == 'baseline': import models.baseline as baseline print("[*]\t None model construction") #model = getattr(baseline.model, constructor)(train_dset, args.num_hid).cuda() model = baseline.model() else: raise NotImplementedError
# Seed everything seed_everything(2019) # Define tokenizer for text if opt['text_model_type'] == 'roberta': tokenizer = RobertaTokenizer.from_pretrained(opt['text_model_pretrained']) else: tokenizer = BertTokenizer.from_pretrained(opt['text_model_pretrained']) # Define dataset image_dataset = ImageFeatureDataset(opt['feature_folder'], max_num_regions=opt["max_num_regions"], device=device) text_dataset = TextDataset(opt['train_file'], tokenizer, opt['max_seq_len']) dataset = PairFeatureDataset(image_dataset, text_dataset) dataloader = DataLoader(dataset, batch_size = opt['batch_size'], shuffle=True, collate_fn=my_collate_fn) # Dataset for evaluation val_image_dataset = FeatureDataset(opt['feature_folder'], opt['val_file'], max_num_regions=opt["max_num_regions"], device=device) val_image_dataloader = DataLoader(val_image_dataset, batch_size=opt['batch_size'], collate_fn=my_collate_fn) val_text_dataset = TextDataset(opt['val_file'], tokenizer, opt['max_seq_len']) val_text_dataloader = DataLoader(val_text_dataset, batch_size = opt['batch_size'], shuffle=False) # Test dataset test_image_dataset = FeatureDataset(opt['feature_folder'], opt['test_file'], max_num_regions=opt["max_num_regions"], device=device) test_image_dataloader = DataLoader(test_image_dataset, batch_size=opt['batch_size'], collate_fn=my_collate_fn) test_text_dataset = TextDataset(opt['test_file'], tokenizer, opt['max_seq_len']) test_text_dataloader = DataLoader(test_text_dataset, batch_size = opt['batch_size'], shuffle=False) # Define model text_encoder = NeuralNetwork(input_dim=opt['text_dim'], output_dim=opt['common_dim'], hidden_units=opt['text_encoder_hidden'], hidden_activation=opt['text_encoder_hidden_activation'], output_activation=opt['text_encoder_output_activation'],
return None def _frame_to_feature(self, frame): assert isinstance(frame, int) or isinstance(frame, np.int64), type(frame) return self.current_features[self._frame_to_segment(frame)] def _frame_to_segment(self, index_of_frame): assert index_of_frame < self.current_video_length assert index_of_frame >= 0 rst = int(index_of_frame // self.segment_length) if rst > self.current_maximum_segment: return self.current_maximum_segment return rst if __name__ == "__main__": from dataset import FeatureDataset from models import buildClassifier clf = buildClassifier('result/clf_avg_ucfandt4_model.pth.tar') fuser = Fuser(fuse_type='average') eval_dataset = FeatureDataset('features/thumos14/test/data.csv', is_thumos14_test_folder=True) env = TADEnv(eval_dataset, clf, fuser) ob = env.reset() # import numpy as np a = np.zeros([5]) nob, rew, done, info = env.step(a)
def main(args): print('[MAIN] Experiment {} start!'.format(args.exp_name)) # define necessary variable torch.set_num_threads(1) feature_length = 800 filepath = 'None' obs_shape = [800, 800, 800] num_class = 101 log_file = "result/rl/" + args.exp_name + "_log.csv" num_updates = int(args.num_frames) // args.num_steps // args.num_processes with open(log_file, 'w') as f: f.write( 'updates,num_timesteps,FPS,mean_reward,median_reward,min_reward,max_reward,entropy,value_loss,policy_loss,clf_loss,score,all_top1,all_top5\n' ) # define classifier i3d_model_checkpoint = "result/0804_1708_e2e_ucf_model.pth.tar" clf = Classifier(feature_length, num_class, isbn=False) clf = load_clf_from_i3d(clf, i3d_model_checkpoint) clf = torch.nn.DataParallel( clf, device_ids=[i for i in range(torch.cuda.device_count())]).cuda() # clf_criterion = torch.nn.CrossEntropyLoss().cuda() # clf_optimizer = torch.optim.Adam(clf.parameters(), lr=args.lr) # define dataset train_dataset = FeatureDataset('features/thumos14/val/data.csv') eval_dataset = FeatureDataset( 'features/thumos14/test/data.csv', is_thumos14_test_folder=True) # eval detection # define environment fuser = Fuser(fuse_type='average') envs = [] for i in range(args.num_processes): print("[MAIN]\tBegin prepare the {}th env!".format(i)) envs.append( make_env(dataset=train_dataset, classifier=clf, fuser=fuser, observation_space=obs_shape, index=int(i), threshold=0.4)) if args.num_processes > 1: envs = SubprocVecEnv(envs) else: envs = DummyVecEnv(envs) envs = VecNormalize(envs, ob=False, ret=False, gamma=args.gamma) # define actor actor_critic = Policy(obs_shape, envs.action_space, output_size=256) if args.cuda: actor_critic.cuda() # define actor's update algorithm if args.algo == 'a2c': agent = A2C_ACKTR(actor_critic, args.value_loss_coef, args.entropy_coef, lr=args.lr, eps=args.eps, alpha=args.alpha, max_grad_norm=args.max_grad_norm) elif args.algo == 'ppo': agent = PPO(actor_critic, args.clip_param, args.ppo_epoch, args.num_mini_batch, args.value_loss_coef, args.entropy_coef, lr=args.lr, eps=args.eps, max_grad_norm=args.max_grad_norm) elif args.algo == 'acktr': agent = A2C_ACKTR(actor_critic, args.value_loss_coef, args.entropy_coef, acktr=True) # prepare rollouts/observation rollouts = RolloutStorage(args.num_steps, args.num_processes, (sum(obs_shape), ), envs.action_space, 1) current_obs = torch.zeros(args.num_processes, (sum(obs_shape, ))) def update_current_obs(obs, current_obs): print(envs.observation_space.shape) shape_dim0 = envs.observation_space.shape[0] obs = torch.from_numpy(obs).float() current_obs[:, -shape_dim0:] = obs return current_obs obs = envs.reset() current_obs = update_current_obs(obs, current_obs) rollouts.observations[0].copy_(current_obs) if args.cuda: current_obs = current_obs.cuda() rollouts.cuda() # These variables are used to log training. episode_rewards = torch.zeros([args.num_processes, 1]) final_rewards = torch.zeros([args.num_processes, 1]) score = AverageMeter() avg_prop_length = AverageMeter() start = time.time() top1 = top5 = -1 # start training for j in range(num_updates): score.reset() if j == 10: break for step in range(args.num_steps): # Sample actions with torch.no_grad(): value, action, action_log_prob, states = actor_critic.act( rollouts.observations[step], rollouts.states[step], rollouts.masks[step]) cpu_actions = action.squeeze(1).cpu().numpy() # Here is the step! obs, reward, done, info = envs.step(cpu_actions) print( "[MAIN]\tIn updates {}, step {}, startframe {}, endframe {}, totleframe {}, action{}, reward {}, prop_s {}, start_s {}, end_s {}".format( j, \ step, [i['start_frame'] for i in info], [i['end_frame'] for i in info], [i['max_frame'] * 16 + 15 for i in info], cpu_actions, reward, [i['proposal_score'] for i in info], [i['start_score'] for i in info], [i['end_score'] for i in info])) reward = torch.from_numpy(np.expand_dims(np.stack(reward), 1)).float() episode_rewards += reward label = torch.from_numpy( np.expand_dims(np.stack([i['label'] for i in info]), 1)).float() # If done then clean the history of observations. masks = torch.FloatTensor([[0.0] if done_ else [1.0] for done_ in done]) final_rewards *= masks final_rewards += (1 - masks) * episode_rewards episode_rewards *= masks score.update( ((1 - masks.numpy()) * np.array([i['proposal_score'] for i in info])).mean(), n=np.sum(1 - masks.numpy(), dtype=np.int32)) avg_prop_length.update(np.mean( (1 - masks.numpy()) * np.array([i['start_frame'] - i['end_frame'] for i in info])), n=np.sum(1 - masks.numpy(), dtype=np.int32)) if args.cuda: masks = masks.cuda() if current_obs.dim() == 4: current_obs *= masks.unsqueeze(2).unsqueeze(2) elif current_obs.dim() == 2: current_obs *= masks else: current_obs *= masks.unsqueeze(2) update_current_obs(obs, current_obs) rollouts.insert(current_obs, states, action, action_log_prob, value, reward, masks, label) with torch.no_grad(): next_value = actor_critic.get_value(rollouts.observations[-1], rollouts.states[-1], rollouts.masks[-1]).detach() rollouts.compute_returns(next_value, args.use_gae, args.gamma, args.tau) value_loss, action_loss, dist_entropy = agent.update(rollouts) rollouts.after_update() clf_loss = 0 # if j > 200: # clf_loss = train_classifier(data=rollouts, model=clf, criterion=clf_criterion, optimizer=clf_optimizer) if j % args.save_interval == 0 and args.save_dir != "": save_path = os.path.join(args.save_dir, args.algo) try: os.makedirs(save_path) except OSError: pass state = {'updates': j + 1, 'state_dict': actor_critic.state_dict()} filepath = os.path.join( save_path, args.exp_name + "_up{:06d}_model.pth.tar".format(j + 1)) torch.save(state, filepath) # if j % args.clf_test_interval == 0: # top1, top5 = validate(val_loader=eval_loader, model=clf, criterion=clf_criterion) if j % args.log_interval == 0: end = time.time() total_num_steps = (j + 1) * args.num_processes * args.num_steps print( "[MAIN]\tUpdates {}, num timesteps {}, FPS {}, mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}, entropy {:.5f}, value loss {:.5f}, policy loss {:.5f}, score {:.5f}" .format(j, total_num_steps, int(total_num_steps / (end - start)), final_rewards.mean(), final_rewards.median(), final_rewards.min(), final_rewards.max(), dist_entropy, value_loss, action_loss, score.avg)) if top1: print('[MAIN]\tCLF TEST RUNNED! Top1 {}, TOP5 {}'.format( top1, top5)) with open(log_file, 'a') as f: f.write("{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n".format( j, total_num_steps, int(total_num_steps / (end - start)), final_rewards.mean(), final_rewards.median(), final_rewards.min(), final_rewards.max(), dist_entropy, value_loss, action_loss, clf_loss, score.avg, top1, top5)) top1 = top5 = None return filepath