Esempio n. 1
0
def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--start-epoch', type=int, default=0)
    parser.add_argument('--epochs', type=int, default=1000)
    parser.add_argument('--save-every', type=int, default=5)
    parser.add_argument('--model-folder',
                        type=str,
                        default=join(EXP_DIR, EXP_NAME, 'trained_models',
                                     MODEL_FOLDER, time_stamped()))
    parser.add_argument('--load-model', type=str, required=False)
    # parser.add_argument('--train-directory', type=str, default='./data/multiview-pouring/train/')
    # parser.add_argument('--validation-directory', type=str, default='./data/multiview-pouring/val/')
    parser.add_argument('--train-directory',
                        type=str,
                        default=join(EXP_DIR, EXP_NAME, 'videos/train/'))

    parser.add_argument('--validation-directory',
                        type=str,
                        default=join(EXP_DIR, EXP_NAME, 'videos/valid/'))

    parser.add_argument('--minibatch-size', type=int, default=16)
    parser.add_argument('--margin', type=float, default=2.0)
    parser.add_argument('--model-name', type=str, default='tcn')
    parser.add_argument('--log-file', type=str, default='./out.log')
    parser.add_argument('--lr-start', type=float, default=0.01)
    parser.add_argument('--triplets-from-videos', type=int, default=5)
    parser.add_argument('--n-views', type=int, default=NUM_VIEWS)
    parser.add_argument(
        '--alpha',
        type=float,
        default=0.001,
        help='weighing factor of language loss to triplet loss')

    # parser.add_argument('--model_path', type=str, default='models/' , help='path for saving trained models')
    # parser.add_argument('--crop_size', type=int, default=224 , help='size for randomly cropping images')
    # parser.add_argument('--vocab_path', type=str, default='data/vocab.pkl', help='path for vocabulary wrapper')
    # parser.add_argument('--image_dir', type=str, default='data/resized2014', help='directory for resized images')
    # parser.add_argument('--caption_path', type=str, default='data/annotations/captions_train2014.json', help='path for train annotation json file')
    # parser.add_argument('--log_step', type=int , default=10, help='step size for prining log info')
    # parser.add_argument('--save_step', type=int , default=1000, help='step size for saving trained models')

    # Model parameters
    parser.add_argument('--embed_size',
                        type=int,
                        default=32,
                        help='dimension of word embedding vectors')
    parser.add_argument('--hidden_size',
                        type=int,
                        default=256,
                        help='dimension of lstm hidden states')
    parser.add_argument('--num_layers',
                        type=int,
                        default=1,
                        help='number of layers in lstm')

    # parser.add_argument('--num_epochs', type=int, default=5)
    # parser.add_argument('--batch_size', type=int, default=128)
    # parser.add_argument('--num_workers', type=int, default=2)
    # parser.add_argument('--learning_rate', type=float, default=0.001)
    return parser.parse_args()
Esempio n. 2
0
def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--start-epoch', type=int, default=0)
    parser.add_argument('--epochs', type=int, default=1000)
    parser.add_argument('--save-every', type=int, default=50)
    parser.add_argument('--model-folder',
                        type=str,
                        default=join(EXP_DIR, EXP_NAME, 'trained_models',
                                     MODEL_FOLDER, time_stamped()))
    parser.add_argument('--load-model', type=str, required=False)
    # parser.add_argument('--train-directory', type=str, default='./data/multiview-pouring/train/')
    # parser.add_argument('--validation-directory', type=str, default='./data/multiview-pouring/val/')
    parser.add_argument('--train-directory',
                        type=str,
                        default=join(EXP_DIR, EXP_NAME, 'videos/train/'))

    parser.add_argument('--validation-directory',
                        type=str,
                        default=join(EXP_DIR, EXP_NAME, 'videos/valid/'))

    parser.add_argument('--minibatch-size', type=int, default=32)
    parser.add_argument('--margin', type=float, default=2.0)
    parser.add_argument('--model-name', type=str, default='tcn')
    parser.add_argument('--log-file', type=str, default='./out.log')
    parser.add_argument('--lr-start', type=float, default=0.01)
    parser.add_argument('--triplets-from-videos', type=int, default=5)
    parser.add_argument('--n-views', type=int, default=SELECTED_VIEW)
    parser.add_argument(
        '--alpha',
        type=float,
        default=0.001,
        help='weighing factor of language loss to triplet loss')
    # Model parameters
    parser.add_argument('--embed_size',
                        type=int,
                        default=32,
                        help='dimension of word embedding vectors')
    parser.add_argument('--hidden_size',
                        type=int,
                        default=256,
                        help='dimension of lstm hidden states')
    parser.add_argument('--num_layers',
                        type=int,
                        default=1,
                        help='number of layers in lstm')
    return parser.parse_args()
Esempio n. 3
0
def main(args):
    # module = importlib.import_module(args.exp_name + '.config')
    # conf = getattr(module, 'Config_Isaac_Server')()
    # EXP_DIR = conf.EXP_DIR
    # MODEL_FOLDER = conf.MODEL_FOLDER

    # GPU Configuration
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    use_cuda = torch.cuda.is_available()

    # Load model
    model_folder = join(EXP_ROOT_DIR, args.exp_name, 'trained_models',
                        args.run_name, time_stamped())
    if not os.path.exists(model_folder):
        os.makedirs(model_folder)

    # Get data loader builder and loss function
    builder = getattr(importlib.import_module('utils.builders'), args.builder)
    loss_fn = LOSS_FN

    # Define train and validation directories
    train_directory = join(EXP_ROOT_DIR, args.exp_name, 'videos/train/')
    validation_directory = join(EXP_ROOT_DIR, args.exp_name, 'videos/valid/')

    # Copies of executed config
    if not os.path.exists(
            '/'.join(os.path.realpath(__file__).split('/')[:-1]) +
            '/experiments'):
        os.makedirs('/'.join(os.path.realpath(__file__).split('/')[:-1]) +
                    '/experiments')
    copy2(
        '/'.join(os.path.realpath(__file__).split('/')[:-1]) +
        '/train_tcn_no_captions.py', model_folder)
    copy2(
        '/'.join(os.path.realpath(__file__).split('/')[:-2]) + '/gps-lfd' +
        '/config.py', model_folder)

    # Build training class
    trainer = Trainer(use_cuda, args.load_model, model_folder, train_directory,
                      validation_directory, builder, loss_fn, args)
    trainer.train()
Esempio n. 4
0
def main(args):
    # output_folder = join(OUTPUT_PATH, args.experiment_relative_path)
    output_folder = OUTPUT_PATH
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    tcn = load_tcn_model(MODEL_PATH, use_cuda=USE_CUDA)
    # input_folder = join(INPUT_PATH, args.experiment_relative_path)

    logdir = os.path.join('runs', MODEL_FOLDER, 'embeddings_viz',
                          time_stamped())
    print("logging to {}".format(logdir))
    writer = SummaryWriter(logdir)
    image_buffer = []
    label_buffer = []
    feature_buffer = []
    j = 0
    for file in [p for p in os.listdir(RGB_PATH) if p.endswith('.mp4')]:
        if SELECTED_SEQS is not None and file.split(
                '_')[0] not in SELECTED_SEQS:
            continue
        #if file.split('view')[1].split('.mp4')[0] not in ['0']:
        #    continue
        print("Processing ", file)
        reader = imageio.get_reader(join(RGB_PATH, file))
        reader_depth = imageio.get_reader(join(DEPTH_PATH, file))

        embeddings = np.zeros((len(reader), EMBEDDING_DIM))
        embeddings_normalized = np.zeros((len(reader), EMBEDDING_DIM))
        embeddings_episode_buffer = []
        embeddings_normalized_episode_buffer = []

        i = 0
        for im, im_depth in zip(reader, reader_depth):
            i += 1
            if i % 5 != 0:
                continue
            image_buffer.append(im)
            resized_image = resize_frame(im, IMAGE_SIZE)[None, :]
            resized_depth = resize_frame(im_depth, IMAGE_SIZE)[None, :]
            # resized_depth = resize_frame(depth_rescaled[:, :, None], IMAGE_SIZE)[None, :]
            frame = np.concatenate(
                [resized_image[0], resized_depth[0, None, 0]], axis=0)
            if USE_CUDA:
                output_normalized, output_unnormalized, pose_output = tcn(
                    torch.Tensor(frame[None, :]).cuda())
            else:
                output_normalized, output_unnormalized, pose_output = tcn(
                    torch.Tensor(frame[None, :]))
            embeddings_episode_buffer.append(
                output_unnormalized.detach().cpu().numpy())
            embeddings_normalized_episode_buffer.append(
                output_normalized.detach().cpu().numpy())
            label_buffer.append(int(
                file.split('_')[0]))  # video sequence label
            #label_buffer.append(int(file.split('view')[1].split('.mp4')[0])) # view label
            #label_buffer.append(i) # view label
        feature_buffer.append(np.array(embeddings_normalized_episode_buffer))
        j += 1
        if j > 30:
            break
    print('generate embedding')
    feature_buffer = np.squeeze(np.array(feature_buffer))
    features = torch.Tensor(
        np.reshape(np.array(feature_buffer), [
            feature_buffer.shape[0] * feature_buffer.shape[1],
            feature_buffer.shape[2]
        ]))
    label = torch.Tensor(np.asarray(label_buffer))
    images = torch.Tensor(
        np.transpose(np.asarray(image_buffer) / 255.0, [0, 3, 1, 2]))
    writer.add_embedding(features, metadata=label, label_img=images)

    print("=" * 10)

    print('Exit function')
Esempio n. 5
0
os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1,2,3"

ITERATE_OVER_TRIPLETS = 3

EXP_NAME = conf.EXP_NAME

#EXP_DIR = os.path.join('/home/msieb/data/tcn_data/experiments', EXP_NAME)
#EXP_DIR = os.path.join('/home/msieb/projects/data/tcn_data/experiments', EXP_NAME)
EXP_DIR = conf.EXP_DIR
MODEL_FOLDER = conf.MODEL_FOLDER

USE_CUDA = conf.USE_CUDA
NUM_VIEWS = conf.NUM_VIEWS
SAMPLE_SIZE = 100
builder = SingleViewPoseBuilder
logdir = os.path.join('runs', MODEL_FOLDER, time_stamped())
print("logging to {}".format(logdir))
writer = SummaryWriter(logdir)


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--start-epoch', type=int, default=0)
    parser.add_argument('--epochs', type=int, default=1000)
    parser.add_argument('--save-every', type=int, default=5)
    parser.add_argument('--model-folder',
                        type=str,
                        default=join(EXP_DIR, EXP_NAME, 'trained_models',
                                     MODEL_FOLDER, time_stamped()))
    parser.add_argument('--load-model', type=str, required=False)
    # parser.add_argument('--train-directory', type=str, default='./data/multiview-pouring/train/')
Esempio n. 6
0
def main(args):
    # output_folder = join(OUTPUT_PATH, args.experiment_relative_path)
    print("Model path: ", args.model_path)
    output_folder = OUTPUT_PATH
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    tcn = load_tcn_model(args.model_path, use_cuda=USE_CUDA)
    # input_folder = join(INPUT_PATH, args.experiment_relative_path)

    logdir = os.path.join('runs',
                          '/'.join(str.split(args.model_path,
                                             '/')[:-1]), 'embeddings_viz',
                          time_stamped())
    print("logging to {}".format(logdir))
    writer = SummaryWriter(logdir)
    image_buffer = []
    label_buffer = []
    feature_buffer = []
    j = 0
    files = [p for p in os.listdir(RGB_PATH) if p.endswith('.mp4')]
    #files = sorted(files)
    for file in files:
        if SELECTED_SEQS is not None and file.split(
                '_')[0] not in SELECTED_SEQS:
            continue
        if EMBEDDING_VIZ_VIEWS is not None and file.split('view')[1].split(
                '.mp4')[0] not in EMBEDDING_VIZ_VIEWS:
            continue
        print("Processing ", file)
        reader = imageio.get_reader(join(RGB_PATH, file))
        reader_depth = imageio.get_reader(join(DEPTH_PATH, file))

        embeddings = np.zeros((len(reader), 4))
        embeddings_episode_buffer = []
        #poses = np.load(join(RGB_PATH, file.split('.mp4')[0]+'.npy'))[:, -4:]
        i = 0

        rgb_buffer = []
        depth_buffer = []
        for im, im_depth in zip(reader, reader_depth):
            rgb_buffer.append(im)
            depth_buffer.append(im_depth)

        for i in range(0, len(reader)):
            #i += 1
            #if i % 5 != 0:
            #    continue
            im = rgb_buffer[i]
            im_depth = depth_buffer[i]
            im_before = rgb_buffer[i - 1]
            im_depth_before = depth_buffer[i - 1]
            image_buffer.append(im)

            resized_image = resize_frame(im, IMAGE_SIZE)[None, :]
            resized_depth = resize_frame(im_depth, IMAGE_SIZE)[None, :]
            # resized_depth = resize_frame(depth_rescaled[:, :, None], IMAGE_SIZE)[None, :]
            frame = np.concatenate(
                [resized_image[0], resized_depth[0, None, 0]], axis=0)
            resized_image_before = resize_frame(im_before, IMAGE_SIZE)[None, :]
            resized_depth_before = resize_frame(im_depth_before,
                                                IMAGE_SIZE)[None, :]
            # resized_depth = resize_frame(depth_rescaled[:, :, None], IMAGE_SIZE)[None, :]
            #frames = np.concatenate(resized_image, axis=0)
            #emb_unnormalized, a_pred = get_view_embedding(tcn, resized_image, use_cuda=USE_CUDA)
            emb_unnormalized, a_pred = get_view_embedding(tcn,
                                                          resized_image_before,
                                                          resized_image,
                                                          use_cuda=USE_CUDA)
            #emb_unnormalized, a_pred = get_view_embedding(tcn, resized_image, use_cuda=USE_CUDA)
            embedding = emb_unnormalized / np.linalg.norm(emb_unnormalized)
            embeddings_episode_buffer.append(embedding)
            label_buffer.append(int(
                file.split('_')[0]))  # video sequence label
            #label_buffer.append(poses[i-1]) # video sequence label
            #label_buffer.append(np.concatenate([delta_euler,np.array(int(file.split('view')[1].split('.mp4')[0]))[None]]))
            #label_buffer.append(i) # view label
        feature_buffer.append(np.array(embeddings_episode_buffer))
        j += 1
        if j >= 30:
            break
    print('generate embedding')
    feature_buffer = np.squeeze(np.array(feature_buffer))
    #feature_buffer = np.array(feature_buffer)
    #features = torch.Tensor(np.reshape(np.array(feature_buffer), [feature_buffer.shape[0]*feature_buffer.shape[1], feature_buffer.shape[2]]))
    features = feature_buffer
    label = torch.Tensor(np.asarray(label_buffer))
    images = torch.Tensor(
        np.transpose(np.asarray(image_buffer) / 255.0, [0, 3, 1, 2]))
    writer.add_embedding(features, metadata=label, label_img=images)

    print("=" * 10)

    print('Exit function')
    print("logged to {}".format(logdir))