Example #1
0
    args = parser.parse_args()

    # import tensorflow
    if args.verbosity >= 2:
        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'
    else:
        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    import tensorflow as tf

    # import tf_util for TFRecords
    if args.tf_records:
        from shapeworld import tf_util

    # dataset
    dataset = dataset(dtype=args.type,
                      name=args.name,
                      language=args.language,
                      config=args.config)

    # information about dataset and model
    if args.verbosity >= 1:
        sys.stdout.write('{time} train {model} on {dataset}\n'.format(
            time=datetime.now().strftime('%H:%M:%S'),
            model=args.model,
            dataset=dataset))
        sys.stdout.write('         config: {}\n'.format(args.config))
        sys.stdout.write('         hyperparameters: {}\n'.format(
            args.hyperparams_file))
        sys.stdout.flush()

    if args.type == 'agreement':
        parameters = dict(
def load_shapeworld_dataset(data_path, embed_path, mode, size, ds_type, name, batch_size, random_seed, shuffle, img_feats, cuda, truncate_final_batch=False):
    """
    Reads ShapeWorld dataset into random num_batches
    Args:
        - data_path: path to folder containing the shapeworld data
        - embed_path: path to folder containing pretrained word vectors
        - mode: 'train', 'eval', or 'test'
        - size: size of dataset
        - ds_type: problem type e.g. 'agreement'
        - name: name of dataset, e.g. 'oneshape_simple_textselect'
        - batch_size: size of each batch
        - random_seed: int to use to set random seed
        - shuffle: whether to shuffle the dataset
        - img_feats: what type of image features to use e.g. 'avgpool_512', 'layer4_2'
        - whether to use cuda
        - truncate_final_batch: whether to use a smaller final batch or not

    Each batch is a dict consisting of:
        batch = { "im_feats_1": im_feats_1,
                  "im_feats_2": im_feats_2,
                  "im_1": masked_im_1,
                  "im_2": masked_im_2,
                  "p": p,
                  "texts_str": natural_lang_desc_texts,
                  "texts_vec": texts_vec,
                  "texts_int": texts_int,
                  "texts_extra": texts_extra,
                  "target": targets,
                  "shapes": shapes,
                  "colors": colors,
                  "caption_str": caption_str,
        }

    im_feats_1: image features for agent 1
    im_feats_1: image features for agent 2
    masked_im_1: masked input image received by agent 1
    masked_im_2: masked input image received by agent 2
    p: percentage of the input image received by agent 1. Agent 2 received (1 - p)
    texts_str: set of natural language descriptions of the image (only one is correct)
    texts_int: set of integer descriptions of the image (only one is correct)
    texts_vec: vector representation of the set of natural language image descriptions for each example
    texts_extra: dict for individual word vectors for each description for each example and their corresponding lengths
    target: index of correct textual description
    shapes: shape of the object in the correct caption, None if there is no explicit shape in the caption
    colors: color of the object in the correct caption, None if there is no explicit color in the caption
    caption_str: correct natural language description of the image
    """
    # Read data
    debuglogger.debug(f'Reading in dataset...')
    load_cmd = 'load(' + data_path + ')'
    data = dataset(dtype=ds_type, name=name, config=load_cmd)
    generated = data.generate(n=size, mode=mode)
    debuglogger.debug(f'Dataset read...')
    order = list(range(size))
    assert len(generated['texts_str']) == size

    # Convert texts to vector
    texts_str = generated['texts_str']
    texts_int, word2id, id2word = convert_texts(texts_str)
    word2id = embed(word2id, embed_path)

    # Create feature extraction model
    model = FeatureModel()
    model.fn.eval()
    model.eval()

    if cuda:
        model.fn.cuda()
        model.cuda()

    # Shuffle
    if shuffle:
        random.shuffle(order)

    # Generate batches
    num_batches = size // batch_size

    if truncate_final_batch:
        if size - (num_batches * batch_size) > 0:
            num_batches = num_batches + 1

    for i in range(num_batches):
        batch_indices = sorted(order[i * batch_size:(i + 1) * batch_size])
        batch = dict()
        debuglogger.debug(f'batch idxs: {batch_indices}')

        # Upscale images and convert to tensors
        ims = generated['world'][batch_indices]
        if FLAGS.improc_from_scratch:
            ims = downsize(ims, FLAGS.image_size)
        else:
            ims = upscale(ims)
        batch['images'] = torch.from_numpy(ims).float().permute(0, 3, 1, 2)

        # Extract target and texts
        batch['target'] = torch.from_numpy(generated['target'][batch_indices]).long()
        batch["texts_str"] = [generated['texts_str'][j] for j in batch_indices]
        batch["caption_str"] = [generated['caption_str'][j] for j in batch_indices]
        batch["texts_int"] = [texts_int[j] for j in batch_indices]

        # Get shape and color for batch
        batch["shapes"] = []
        batch["colors"] = []
        for cap in batch["caption_str"]:
            cap = cap.split()
            color = None
            shape = None
            for w in cap:
                if w in SHAPES:
                    shape = w
                if w in COLORS:
                    color = w
            batch["shapes"].append(shape)
            batch["colors"].append(color)
        assert len(batch["shapes"]) == batch_size
        assert len(batch["colors"]) == batch_size

        # Get shape and color for texts
        batch["texts_shapes"] = []
        batch["texts_colors"] = []
        for t in batch["texts_str"]:
            s = []
            c = []
            for cap in t:
                cap = cap.split()
                color = None
                shape = None
                for w in cap:
                    if w in SHAPES:
                        shape = w
                    if w in COLORS:
                        color = w
                s.append(shape)
                c.append(color)
            batch["texts_shapes"].append(s)
            batch["texts_colors"].append(c)
        assert len(batch["texts_shapes"]) == batch_size
        assert len(batch["texts_colors"]) == batch_size

        # Generate p
        batch['p'] = torch.from_numpy(np.random.rand(batch_size))

        # Mask images
        debuglogger.debug(f'Image dims: {batch["images"].shape}')
        (bs, ch, width, height) = batch['images'].shape
        mask = torch.ones(bs, ch, width, height)
        # Vertical mask
        if FLAGS.vertical_mask:
            cutoffs = (width * batch["p"]).int().clamp(0, width - 1).numpy().tolist()
            debuglogger.debug(f'cutoffs: {cutoffs}')
            for i_c, c in enumerate(cutoffs):
                mask[i_c, :, :, c:] = 0
        else:
            # Random mask
            for i_m in range(bs):
                mask[i_m] = generate_mask(batch['images'][i_m])
        batch['masked_im_1'] = torch.mul(mask, batch['images']) + (1 - mask)
        batch['masked_im_2'] = torch.mul(1 - mask, batch['images']) + mask

        if i == 0:
            # Save example batch
            save_image(batch['images'], data_path + '/example_ims_orig.png', pad_value=0.5)
            save_image(batch['masked_im_1'], data_path + '/example_ims_1.png', pad_value=0.5)
            save_image(batch['masked_im_2'], data_path + '/example_ims_2.png', pad_value=0.5)

        # Build descriptions
        desc_cbow, desc_set, desc_set_lens = cbow_general(batch["texts_int"], word2id, id2word)
        batch["texts_vec"] = desc_cbow
        batch["texts_extra"] = {"desc_set": desc_set,
                                "desc_set_lens": desc_set_lens}

        # Extract image feats
        m_im_1 = Variable(batch['masked_im_1'])
        m_im_2 = Variable(batch['masked_im_2'])
        if cuda:
            m_im_1 = m_im_1.cuda()
            m_im_2 = m_im_2.cuda()
        if FLAGS.improc_from_scratch:
            batch["im_feats_1"] = m_im_1
            batch["im_feats_2"] = m_im_2
        else:
            batch["im_feats_1"] = (model(m_im_1, request=img_feats)[0]).detach()
            batch["im_feats_2"] = (model(m_im_2, request=img_feats)[0]).detach()

        # Identify non blank partition
        non_blank_partition = []
        for j in range(batch_size):
            idx = get_non_blank_partition(batch['masked_im_1'][j], batch['masked_im_2'][j])
            non_blank_partition.append(idx)
        batch['non_blank_partition'] = non_blank_partition

        yield batch
from shapeworld import dataset

dataset = dataset(dtype='agreement',
                  name='oneshape_simple_textselect',
                  config='load(../data/oneshape_simple_textselect)')
generated = dataset.generate(n=250, mode='train')

k = ['caption_str', 'texts_str', 'pred_items']

for l in generated:
    print(l, type(generated[l]))
    if l == 'target':
        print(generated[l].shape)

for i in range(10):
    print(
        f'Prediction items: {generated[k[2]][i]}, caption: {generated[k[0]][i]}, texts: {generated[k[1]][i]}'
    )
Example #4
0
from shapeworld import dataset
import pprint

dataset = dataset(
    dtype='agreement',
    name='oneshape_simple_textselect',
)
generated = dataset.generate(n=30,
                             mode='train',
                             noise_range=0.1,
                             include_model=True)
Example #5
0
N_VAL = 500
N_TEST = 500

#N_CAPTIONS = 100
#N_TRAIN = 50
#N_VAL = 25
#N_TEST = 25

assert N_TRAIN + N_VAL + N_TEST == N_CAPTIONS

WIDTH = 64
HEIGHT = 64
CHANNELS = 3
EXAMPLES = 4

DATASET = dataset(dtype="agreement", name="spatial_jda")
random = np.random.RandomState(0)

all_captions = {}
while len(all_captions) < N_CAPTIONS:
    if len(all_captions) % 500 == 0:
        print("%d / %d captions" % (len(all_captions), N_CAPTIONS))

    DATASET.world_generator.sample_values(mode="train")
    DATASET.world_captioner.sample_values(mode="train", correct=True)
    while True:
        world = DATASET.world_generator()
        if world is None:
            continue
        caption = DATASET.world_captioner(entities=world.entities)
        if caption is None:
Example #6
0
                        '--restore',
                        action='store_true',
                        help='Restore model (requires --model-file)')
    parser.add_argument(
        '-E',
        '--evaluate',
        action='store_true',
        help='Evaluate model without training (requires --model-file)')
    parser.add_argument('-V',
                        '--verbose-tensorflow',
                        action='store_true',
                        help='TensorFlow verbosity')
    args = parser.parse_args()

    # dataset
    dataset = dataset(dtype=args.type, name=args.name, config=args.config)
    sys.stdout.write('{} {} dataset: {}\n'.format(
        datetime.now().strftime('%H:%M:%S'), dataset.type, dataset.name))
    sys.stdout.write('         config: {}\n'.format(args.config))
    sys.stdout.flush()

    # import tensorflow
    if args.verbose_tensorflow:
        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'
    else:
        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    import tensorflow as tf

    # model
    module = import_module('models.{}.{}'.format(args.type, args.model))
    sys.stdout.write('{} {} model: {}\n'.format(