def get_args(): parser = argparse.ArgumentParser() parser.add_argument('--start-epoch', type=int, default=0) parser.add_argument('--epochs', type=int, default=1000) parser.add_argument('--save-every', type=int, default=5) parser.add_argument('--model-folder', type=str, default=join(EXP_DIR, EXP_NAME, 'trained_models', MODEL_FOLDER, time_stamped())) parser.add_argument('--load-model', type=str, required=False) # parser.add_argument('--train-directory', type=str, default='./data/multiview-pouring/train/') # parser.add_argument('--validation-directory', type=str, default='./data/multiview-pouring/val/') parser.add_argument('--train-directory', type=str, default=join(EXP_DIR, EXP_NAME, 'videos/train/')) parser.add_argument('--validation-directory', type=str, default=join(EXP_DIR, EXP_NAME, 'videos/valid/')) parser.add_argument('--minibatch-size', type=int, default=16) parser.add_argument('--margin', type=float, default=2.0) parser.add_argument('--model-name', type=str, default='tcn') parser.add_argument('--log-file', type=str, default='./out.log') parser.add_argument('--lr-start', type=float, default=0.01) parser.add_argument('--triplets-from-videos', type=int, default=5) parser.add_argument('--n-views', type=int, default=NUM_VIEWS) parser.add_argument( '--alpha', type=float, default=0.001, help='weighing factor of language loss to triplet loss') # parser.add_argument('--model_path', type=str, default='models/' , help='path for saving trained models') # parser.add_argument('--crop_size', type=int, default=224 , help='size for randomly cropping images') # parser.add_argument('--vocab_path', type=str, default='data/vocab.pkl', help='path for vocabulary wrapper') # parser.add_argument('--image_dir', type=str, default='data/resized2014', help='directory for resized images') # parser.add_argument('--caption_path', type=str, default='data/annotations/captions_train2014.json', help='path for train annotation json file') # parser.add_argument('--log_step', type=int , default=10, help='step size for prining log info') # parser.add_argument('--save_step', type=int , default=1000, help='step size for saving trained models') # Model parameters parser.add_argument('--embed_size', type=int, default=32, help='dimension of word embedding vectors') parser.add_argument('--hidden_size', type=int, default=256, help='dimension of lstm hidden states') parser.add_argument('--num_layers', type=int, default=1, help='number of layers in lstm') # parser.add_argument('--num_epochs', type=int, default=5) # parser.add_argument('--batch_size', type=int, default=128) # parser.add_argument('--num_workers', type=int, default=2) # parser.add_argument('--learning_rate', type=float, default=0.001) return parser.parse_args()
def get_args(): parser = argparse.ArgumentParser() parser.add_argument('--start-epoch', type=int, default=0) parser.add_argument('--epochs', type=int, default=1000) parser.add_argument('--save-every', type=int, default=50) parser.add_argument('--model-folder', type=str, default=join(EXP_DIR, EXP_NAME, 'trained_models', MODEL_FOLDER, time_stamped())) parser.add_argument('--load-model', type=str, required=False) # parser.add_argument('--train-directory', type=str, default='./data/multiview-pouring/train/') # parser.add_argument('--validation-directory', type=str, default='./data/multiview-pouring/val/') parser.add_argument('--train-directory', type=str, default=join(EXP_DIR, EXP_NAME, 'videos/train/')) parser.add_argument('--validation-directory', type=str, default=join(EXP_DIR, EXP_NAME, 'videos/valid/')) parser.add_argument('--minibatch-size', type=int, default=32) parser.add_argument('--margin', type=float, default=2.0) parser.add_argument('--model-name', type=str, default='tcn') parser.add_argument('--log-file', type=str, default='./out.log') parser.add_argument('--lr-start', type=float, default=0.01) parser.add_argument('--triplets-from-videos', type=int, default=5) parser.add_argument('--n-views', type=int, default=SELECTED_VIEW) parser.add_argument( '--alpha', type=float, default=0.001, help='weighing factor of language loss to triplet loss') # Model parameters parser.add_argument('--embed_size', type=int, default=32, help='dimension of word embedding vectors') parser.add_argument('--hidden_size', type=int, default=256, help='dimension of lstm hidden states') parser.add_argument('--num_layers', type=int, default=1, help='number of layers in lstm') return parser.parse_args()
def main(args): # module = importlib.import_module(args.exp_name + '.config') # conf = getattr(module, 'Config_Isaac_Server')() # EXP_DIR = conf.EXP_DIR # MODEL_FOLDER = conf.MODEL_FOLDER # GPU Configuration device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') use_cuda = torch.cuda.is_available() # Load model model_folder = join(EXP_ROOT_DIR, args.exp_name, 'trained_models', args.run_name, time_stamped()) if not os.path.exists(model_folder): os.makedirs(model_folder) # Get data loader builder and loss function builder = getattr(importlib.import_module('utils.builders'), args.builder) loss_fn = LOSS_FN # Define train and validation directories train_directory = join(EXP_ROOT_DIR, args.exp_name, 'videos/train/') validation_directory = join(EXP_ROOT_DIR, args.exp_name, 'videos/valid/') # Copies of executed config if not os.path.exists( '/'.join(os.path.realpath(__file__).split('/')[:-1]) + '/experiments'): os.makedirs('/'.join(os.path.realpath(__file__).split('/')[:-1]) + '/experiments') copy2( '/'.join(os.path.realpath(__file__).split('/')[:-1]) + '/train_tcn_no_captions.py', model_folder) copy2( '/'.join(os.path.realpath(__file__).split('/')[:-2]) + '/gps-lfd' + '/config.py', model_folder) # Build training class trainer = Trainer(use_cuda, args.load_model, model_folder, train_directory, validation_directory, builder, loss_fn, args) trainer.train()
def main(args): # output_folder = join(OUTPUT_PATH, args.experiment_relative_path) output_folder = OUTPUT_PATH if not os.path.exists(output_folder): os.makedirs(output_folder) tcn = load_tcn_model(MODEL_PATH, use_cuda=USE_CUDA) # input_folder = join(INPUT_PATH, args.experiment_relative_path) logdir = os.path.join('runs', MODEL_FOLDER, 'embeddings_viz', time_stamped()) print("logging to {}".format(logdir)) writer = SummaryWriter(logdir) image_buffer = [] label_buffer = [] feature_buffer = [] j = 0 for file in [p for p in os.listdir(RGB_PATH) if p.endswith('.mp4')]: if SELECTED_SEQS is not None and file.split( '_')[0] not in SELECTED_SEQS: continue #if file.split('view')[1].split('.mp4')[0] not in ['0']: # continue print("Processing ", file) reader = imageio.get_reader(join(RGB_PATH, file)) reader_depth = imageio.get_reader(join(DEPTH_PATH, file)) embeddings = np.zeros((len(reader), EMBEDDING_DIM)) embeddings_normalized = np.zeros((len(reader), EMBEDDING_DIM)) embeddings_episode_buffer = [] embeddings_normalized_episode_buffer = [] i = 0 for im, im_depth in zip(reader, reader_depth): i += 1 if i % 5 != 0: continue image_buffer.append(im) resized_image = resize_frame(im, IMAGE_SIZE)[None, :] resized_depth = resize_frame(im_depth, IMAGE_SIZE)[None, :] # resized_depth = resize_frame(depth_rescaled[:, :, None], IMAGE_SIZE)[None, :] frame = np.concatenate( [resized_image[0], resized_depth[0, None, 0]], axis=0) if USE_CUDA: output_normalized, output_unnormalized, pose_output = tcn( torch.Tensor(frame[None, :]).cuda()) else: output_normalized, output_unnormalized, pose_output = tcn( torch.Tensor(frame[None, :])) embeddings_episode_buffer.append( output_unnormalized.detach().cpu().numpy()) embeddings_normalized_episode_buffer.append( output_normalized.detach().cpu().numpy()) label_buffer.append(int( file.split('_')[0])) # video sequence label #label_buffer.append(int(file.split('view')[1].split('.mp4')[0])) # view label #label_buffer.append(i) # view label feature_buffer.append(np.array(embeddings_normalized_episode_buffer)) j += 1 if j > 30: break print('generate embedding') feature_buffer = np.squeeze(np.array(feature_buffer)) features = torch.Tensor( np.reshape(np.array(feature_buffer), [ feature_buffer.shape[0] * feature_buffer.shape[1], feature_buffer.shape[2] ])) label = torch.Tensor(np.asarray(label_buffer)) images = torch.Tensor( np.transpose(np.asarray(image_buffer) / 255.0, [0, 3, 1, 2])) writer.add_embedding(features, metadata=label, label_img=images) print("=" * 10) print('Exit function')
os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1,2,3" ITERATE_OVER_TRIPLETS = 3 EXP_NAME = conf.EXP_NAME #EXP_DIR = os.path.join('/home/msieb/data/tcn_data/experiments', EXP_NAME) #EXP_DIR = os.path.join('/home/msieb/projects/data/tcn_data/experiments', EXP_NAME) EXP_DIR = conf.EXP_DIR MODEL_FOLDER = conf.MODEL_FOLDER USE_CUDA = conf.USE_CUDA NUM_VIEWS = conf.NUM_VIEWS SAMPLE_SIZE = 100 builder = SingleViewPoseBuilder logdir = os.path.join('runs', MODEL_FOLDER, time_stamped()) print("logging to {}".format(logdir)) writer = SummaryWriter(logdir) def get_args(): parser = argparse.ArgumentParser() parser.add_argument('--start-epoch', type=int, default=0) parser.add_argument('--epochs', type=int, default=1000) parser.add_argument('--save-every', type=int, default=5) parser.add_argument('--model-folder', type=str, default=join(EXP_DIR, EXP_NAME, 'trained_models', MODEL_FOLDER, time_stamped())) parser.add_argument('--load-model', type=str, required=False) # parser.add_argument('--train-directory', type=str, default='./data/multiview-pouring/train/')
def main(args): # output_folder = join(OUTPUT_PATH, args.experiment_relative_path) print("Model path: ", args.model_path) output_folder = OUTPUT_PATH if not os.path.exists(output_folder): os.makedirs(output_folder) tcn = load_tcn_model(args.model_path, use_cuda=USE_CUDA) # input_folder = join(INPUT_PATH, args.experiment_relative_path) logdir = os.path.join('runs', '/'.join(str.split(args.model_path, '/')[:-1]), 'embeddings_viz', time_stamped()) print("logging to {}".format(logdir)) writer = SummaryWriter(logdir) image_buffer = [] label_buffer = [] feature_buffer = [] j = 0 files = [p for p in os.listdir(RGB_PATH) if p.endswith('.mp4')] #files = sorted(files) for file in files: if SELECTED_SEQS is not None and file.split( '_')[0] not in SELECTED_SEQS: continue if EMBEDDING_VIZ_VIEWS is not None and file.split('view')[1].split( '.mp4')[0] not in EMBEDDING_VIZ_VIEWS: continue print("Processing ", file) reader = imageio.get_reader(join(RGB_PATH, file)) reader_depth = imageio.get_reader(join(DEPTH_PATH, file)) embeddings = np.zeros((len(reader), 4)) embeddings_episode_buffer = [] #poses = np.load(join(RGB_PATH, file.split('.mp4')[0]+'.npy'))[:, -4:] i = 0 rgb_buffer = [] depth_buffer = [] for im, im_depth in zip(reader, reader_depth): rgb_buffer.append(im) depth_buffer.append(im_depth) for i in range(0, len(reader)): #i += 1 #if i % 5 != 0: # continue im = rgb_buffer[i] im_depth = depth_buffer[i] im_before = rgb_buffer[i - 1] im_depth_before = depth_buffer[i - 1] image_buffer.append(im) resized_image = resize_frame(im, IMAGE_SIZE)[None, :] resized_depth = resize_frame(im_depth, IMAGE_SIZE)[None, :] # resized_depth = resize_frame(depth_rescaled[:, :, None], IMAGE_SIZE)[None, :] frame = np.concatenate( [resized_image[0], resized_depth[0, None, 0]], axis=0) resized_image_before = resize_frame(im_before, IMAGE_SIZE)[None, :] resized_depth_before = resize_frame(im_depth_before, IMAGE_SIZE)[None, :] # resized_depth = resize_frame(depth_rescaled[:, :, None], IMAGE_SIZE)[None, :] #frames = np.concatenate(resized_image, axis=0) #emb_unnormalized, a_pred = get_view_embedding(tcn, resized_image, use_cuda=USE_CUDA) emb_unnormalized, a_pred = get_view_embedding(tcn, resized_image_before, resized_image, use_cuda=USE_CUDA) #emb_unnormalized, a_pred = get_view_embedding(tcn, resized_image, use_cuda=USE_CUDA) embedding = emb_unnormalized / np.linalg.norm(emb_unnormalized) embeddings_episode_buffer.append(embedding) label_buffer.append(int( file.split('_')[0])) # video sequence label #label_buffer.append(poses[i-1]) # video sequence label #label_buffer.append(np.concatenate([delta_euler,np.array(int(file.split('view')[1].split('.mp4')[0]))[None]])) #label_buffer.append(i) # view label feature_buffer.append(np.array(embeddings_episode_buffer)) j += 1 if j >= 30: break print('generate embedding') feature_buffer = np.squeeze(np.array(feature_buffer)) #feature_buffer = np.array(feature_buffer) #features = torch.Tensor(np.reshape(np.array(feature_buffer), [feature_buffer.shape[0]*feature_buffer.shape[1], feature_buffer.shape[2]])) features = feature_buffer label = torch.Tensor(np.asarray(label_buffer)) images = torch.Tensor( np.transpose(np.asarray(image_buffer) / 255.0, [0, 3, 1, 2])) writer.add_embedding(features, metadata=label, label_img=images) print("=" * 10) print('Exit function') print("logged to {}".format(logdir))