def write_data(vid_path, out_dir, train_frac=0.75):
    im_dir = ut.mkdir(pj(out_dir, 'ims'))
    in_data = []
    meta_files = sorted(ut.glob(vid_path, 'train', '*.txt'))
    print 'meta files:'
    for x in meta_files:
        print x
    print
    for meta_idx, meta_file in enumerate(meta_files):
        last_prev_time = 0.
        vid_file = meta_file.replace('.txt', '.mp4')
        for clip_idx, ex in enumerate(ut.read_lines(meta_file)):
            prev_time = last_prev_time
            vid_idx = '%05d_%05d' % (meta_idx, clip_idx)
            print ex
            s, time = ex.split()
            time = float(time)
            if s == 'p':
                label = 1
            elif s == 'n':
                label = 0
                last_prev_time = time
            else:
                raise RuntimeError()
            in_data.append((vid_file, time, label, vid_idx, im_dir, prev_time))
    print 'Writing:', len(in_data), 'sequences'
    meta_examples = ut.flatten(ut.parmap(extract_frames, in_data))
    meta_examples = ut.shuffled_with_seed(meta_examples)

    # add manu examples
    db_files = sorted(
        ut.sys_with_stdout('find ../data/manu-press -name "*.hdf5"').split())
    db_files = ut.shuffled_with_seed(db_files)
    print 'Train fraction:', train_frac
    num_train = int(train_frac * len(db_files))
    db_train = db_files[:num_train]
    db_test = db_files[num_train:]
    train_db_examples = ut.flatten(
        ut.parmap(examples_from_db, [(x, im_dir) for x in db_train]))
    test_db_examples = ut.flatten(
        ut.parmap(examples_from_db, [(x, im_dir) for x in db_test]))
    print 'Number of db train examples:', len(train_db_examples)
    print 'Number of meta examples:', len(meta_examples)
    train_examples = ut.shuffled_with_seed(meta_examples + train_db_examples)
    ut.write_lines(pj(out_dir, 'train.csv'),
                   ['%s,%s,%d,%s' % x for x in train_examples])

    test_examples = ut.shuffled_with_seed(test_db_examples)
    ut.write_lines(pj(out_dir, 'test.csv'),
                   ['%s,%s,%d,%s' % x for x in test_examples])
def show_db(pr, num_sample=None, num_per_object=5, n=None):
    db_files = ut.read_lines(pj(pr.dsdir, 'db_files.txt'))[:n]
    db_files = ut.shuffled_with_seed(db_files)
    counts = {}
    #db_files = ut.parfilter(db_ok, db_files)
    names = ut.parmap(name_from_file, db_files)
    table = []
    for name, db_file in zip(names, db_files[:num_sample]):
        if counts.get(name, 0) < num_per_object:
            counts[name] = 1 + counts.get(name, 0)
            row = vis_example(db_file)
            table.append(row)
    ig.show(table)
        def load_im(k, v):
            if k.startswith('gel') or k.startswith('im'):
                im = ig.uncompress(v)
            elif k.startswith('depth'):
                #v = np.tile(v, (1, 1, 3))
                im = v.astype('float32')
            else:
                raise RuntimeError()

            if crop_type == 'center':
                crops = [ut.crop_center(im, 224)]
            elif crop_type == 'multi':
                crops = []
                dh = (im.shape[0] - crop_dim)
                num_dim_samples = 3
                for y in np.linspace(0, dh, num_dim_samples).astype('l'):
                    dw = (im.shape[1] - crop_dim)
                    for x in np.linspace(0, dw, num_dim_samples).astype('l'):
                        crops.append(im[y:y + crop_dim, x:x + crop_dim])
            return ut.shuffled_with_seed(crops, k.split('_')[0] + str(i))
def make_tf(path):
    tf_file = pj(path, 'train.tf')
    if os.path.exists(tf_file):
        os.remove(tf_file)
    writer = tf.python_io.TFRecordWriter(tf_file)
    lines = ut.shuffled_with_seed(ut.read_lines(pj(path, 'train.csv')))
    print 'Number of examples:', len(lines)
    for line in lines:
        fname, prev_fname, label, _ = line.split(',')
        label = int(label)
        s = ut.read_file(fname, binary=True)
        s_prev = ut.read_file(prev_fname, binary=True)
        feat = {
            'im':
            tf.train.Feature(bytes_list=tf.train.BytesList(value=[s])),
            'im_prev':
            tf.train.Feature(bytes_list=tf.train.BytesList(value=[s_prev])),
            'label':
            tf.train.Feature(int64_list=tf.train.Int64List(value=[label]))
        }
        ex = tf.train.Example(features=tf.train.Features(feature=feat))
        writer.write(ex.SerializeToString())
    writer.close()
def write_data(out_dir,
               rebalance_data=True,
               train_frac=0.75,
               val_frac=0.0,
               n=None,
               seed=0):
    #def write_data(out_dir, rebalance_data = True, train_frac = 0.75, val_frac = 0.0, n = 10):
    assert not os.path.exists(out_dir)
    ut.mkdir(out_dir)
    base_data = '../data/grasp/'
    ut.sys_check('find -L %s -name "*.hdf5" > %s/all_db_files.txt' %
                 (base_data, out_dir))

    all_db_files = map(os.path.abspath,
                       ut.read_lines(pj(out_dir, 'all_db_files.txt'))[:n])
    all_db_files = ut.shuffled_with_seed(all_db_files, seed)
    all_db_files = filter(db_ok, all_db_files)
    ut.write_lines(pj(out_dir, 'db_files.txt'), all_db_files)

    by_name = ut.accum_dict((name_from_file(x), x) for x in all_db_files)

    names = ut.shuffled_with_seed(sorted(by_name.keys()), seed)
    num_names = len(names)
    num_train = int(train_frac * num_names)
    num_val = int(val_frac * num_names)
    i = 0
    train_names = names[i:num_train]
    i += num_train
    val_names = names[i:i + num_val]
    i += num_val
    test_names = names[i:]
    print num_train, num_val, len(test_names)

    splits = [('train', train_names), ('val', val_names), ('test', test_names)]

    print 'Number of objects in each split:'
    for s, o in splits:
        print s, '->', len(o)

    #press_clf = press.NetClf(press_model_file, gpu = write_data_gpu)
    press_clf = None  #press.NetClf(press_model_file, gpu = write_data_gpu)

    for dset_name, names in splits:
        ut.write_lines(pj(out_dir, '%s_objects.txt' % dset_name), names)
        tf_file = pj(out_dir, '%s.tf' % dset_name)
        pk_file = pj(out_dir, '%s.pk' % dset_name)
        full_pk_file = pj(out_dir, 'full_%s.pk' % dset_name)

        if os.path.exists(tf_file):
            os.remove(tf_file)
        writer = tf.python_io.TFRecordWriter(tf_file)

        split_db_files = ut.flatten(by_name[name] for name in names)
        split_db_files = ut.shuffled_with_seed(split_db_files, dset_name)

        data = []
        for db_file in ut.time_est(split_db_files):
            with h5py.File(db_file, 'r') as db:
                #print 'keys =', db.keys()
                def im(x, crop=False, compress=True):
                    x = ig.uncompress(x)
                    x = np.array(x)
                    if crop:
                        x = crop_kinect(x)
                        #ig.show(x)
                    x = ig.scale(x, (256, 256), 1)
                    if compress:
                        x = ig.compress(x)
                    return x

                def depth(x):
                    x = np.array(x).astype('float32')
                    x = ig.scale(x, (256, 256), 1)
                    return x

                def parse_ee(x):
                    names = [
                        'angle_of_EE_at_grasping', 'location_of_EE_at_grasping'
                    ]
                    vs = [x[name].value for name in names]
                    ee = np.concatenate([np.array(v).flatten()
                                         for v in vs]).astype('float32')
                    return ee

                label_file = pj(
                    label_path,
                    db_file.split('/')[-1].replace('.hdf5', '.txt'))
                if os.path.exists(label_file):
                    print 'Reading label from file'
                    is_gripping = bool(ut.read_file(label_file))
                else:
                    is_gripping = int(np.array(db['is_gripping']))

                pre, mid, _ = milestone_frames(db)

                # Estimate the probability that the robot is initially gripping the object
                if 0:
                    press_a = press_clf.predict(
                        im(db['/GelSightA_image'].value[mid], compress=False),
                        im(db['/GelSightA_image'].value[pre], compress=False))
                    press_b = press_clf.predict(
                        im(db['/GelSightB_image'].value[mid], compress=False),
                        im(db['/GelSightB_image'].value[pre], compress=False))
                    initial_press_prob = 0.5 * (press_a + press_b)
                else:
                    initial_press_prob = np.float32(-1.)
                #print initial_press_prob, ig.show(im(db['/GelSightA_image'].value[mid], compress = False))

                d = dict(
                    gel0_pre=im(db['/GelSightA_image'].value[pre]),
                    gel1_pre=im(db['/GelSightB_image'].value[pre]),
                    gel0_post=im(db['/GelSightA_image'].value[mid]),
                    gel1_post=im(db['/GelSightB_image'].value[mid]),
                    im0_pre=im(db['/color_image_KinectA'].value[pre],
                               crop=True),
                    im0_post=im(db['/color_image_KinectA'].value[mid],
                                crop=True),
                    im1_pre=im(db['/color_image_KinectB'].value[pre],
                               crop=True),
                    im1_post=im(db['/color_image_KinectB'].value[mid],
                                crop=True),
                    depth0_pre=depth(
                        crop_kinect(db['/depth_image_KinectA'].value[pre])),
                    depth0_post=depth(
                        crop_kinect(db['/depth_image_KinectA'].value[mid])),
                    initial_press_prob=initial_press_prob,
                    is_gripping=int(is_gripping),
                    end_effector=parse_ee(db),
                    object_name=str(np.array(db['object_name'].value)[0]),
                    db_file=db_file)

                data.append(d)
        # for db files
        ut.save(full_pk_file, data)

        # rebalance data?
        if rebalance_data:
            by_label = [[], []]
            for x in ut.shuffled_with_seed(data, 'rebalance1'):
                by_label[x['is_gripping']].append(x)
            n = min(map(len, by_label))
            print len(data), 'before rebalance'
            data = ut.shuffled_with_seed(by_label[0][:n] + by_label[1][:n],
                                         'rebalance2')
            print len(data), 'after rebalance'

        writer = tf.python_io.TFRecordWriter(tf_file)
        for d in data:
            fbl = lambda x: tf.train.Feature(bytes_list=tf.train.BytesList(
                value=[x]))
            fl = lambda x: tf.train.Feature(float_list=tf.train.FloatList(
                value=map(float, x.flatten())))
            il = lambda x: tf.train.Feature(int64_list=tf.train.Int64List(value
                                                                          =x))

            feat = {
                'gel0_pre': fbl(d['gel0_pre']),
                'gel1_pre': fbl(d['gel1_pre']),
                'gel0_post': fbl(d['gel0_post']),
                'gel1_post': fbl(d['gel1_post']),
                'im0_pre': fbl(d['im0_pre']),
                'im0_post': fbl(d['im0_post']),
                'im1_pre': fbl(d['im1_pre']),
                'im1_post': fbl(d['im1_post']),
                'depth0_pre': fl(d['depth0_pre']),
                'depth0_post': fl(d['depth0_post']),
                'end_effector': fl(d['end_effector']),
                'initial_press_prob': fl(d['initial_press_prob']),
                'is_gripping': il([d['is_gripping']])
            }
            ex = tf.train.Example(features=tf.train.Features(feature=feat))
            writer.write(ex.SerializeToString())
        writer.close()

        ut.save(pk_file, data)
        print dset_name, '->', len(data), 'examples'
Esempio n. 6
0
def write_data(out_dir, train_frac=0.75, val_frac=0.05):
    ut.mkdir(out_dir)
    base_data = '../data/grasp/'
    ut.sys_check('find %s -name "*.hdf5" > %s/db_files.txt' %
                 (base_data, out_dir))

    all_db_files = ut.read_lines(pj(out_dir, 'db_files.txt'))
    all_db_files = ut.shuffled_with_seed(all_db_files)
    name_from_file = lambda x: '_'.join(x.split('/')[-1].split('_')[2:])

    by_name = ut.accum_dict((name_from_file(x), x) for x in all_db_files)

    names = ut.shuffled_with_seed(sorted(by_name.keys()))
    num_names = len(all_db_files)
    num_train = int(train_frac * num_names)
    num_val = int(val_frac * num_names)
    i = 0
    train_names = names[i:num_train]
    i += num_train
    val_names = names[i:i + num_val]
    i += num_val
    test_names = names[i:]

    for dset_name, names in [('train', train_names), ('val', val_names),
                             ('test', test_names)]:
        ut.write_lines(pj(out_dir, '%s_objects.txt' % dset_name), names)
        tf_file = pj(out_dir, '%s.tf' % dset_name)
        pk_file = pj(out_dir, '%s.pk' % dset_name)

        if os.path.exists(tf_file):
            os.remove(tf_file)
        writer = tf.python_io.TFRecordWriter(tf_file)

        data = []
        for name in names:
            for db_file in by_name[name]:
                with h5py.File(db_file, 'r') as db:

                    def im(x):
                        x = np.array(x)
                        x = ig.scale(x, (256, 256), 1)
                        return ig.compress(x)

                    if 'is_gripping' in db:
                        label = int(np.array(db['is_gripping']))
                    elif 'Is gripping?' in db:
                        label = int(np.array(db['Is gripping?']))
                    else:
                        print 'Skipping: %s. Missing is_gripping' % db_file
                        print 'Keys:', ' '.join(db.keys())
                        continue

                    data.append({
                        'gel0_pre':
                        im(db['GelSightA_image_pre_gripping']),
                        'gel1_pre':
                        im(db['GelSightB_image_pre_gripping']),
                        'gel0_post':
                        im(db['GelSightA_image_post_gripping']),
                        'gel1_post':
                        im(db['GelSightB_image_post_gripping']),
                        'is_gripping':
                        label
                    })

                    fbl = lambda x: tf.train.Feature(bytes_list=tf.train.
                                                     BytesList(value=[x]))
                    feat = {
                        'gel0_pre':
                        fbl(im(db['GelSightA_image_pre_gripping'])),
                        'gel1_pre':
                        fbl(im(db['GelSightB_image_pre_gripping'])),
                        'gel0_post':
                        fbl(im(db['GelSightA_image_post_gripping'])),
                        'gel1_post':
                        fbl(im(db['GelSightB_image_post_gripping'])),
                        'is_gripping':
                        tf.train.Feature(int64_list=tf.train.Int64List(
                            value=[label]))
                    }
                    ex = tf.train.Example(features=tf.train.Features(
                        feature=feat))
                    writer.write(ex.SerializeToString())
        writer.close()
        ut.save(pk_file, data)
        print dset_name, '->', len(data), 'examples'