def readSubjectData(self, subjectID):
        """
        Creates gesture object, reads subject data, creates gesture clips and
        writes into TFRecord.
        """
        if self.labelFilePathFormat is not None:
            gestureObject = GestureSample(self.dataFolder + self.sampleNameFormat.format(subjectID), 
                self.dataFolder+'/labels/'+self.labelFilePathFormat)
        else:
            gestureObject = GestureSample(self.dataFolder + self.sampleNameFormat.format(subjectID))
        labels = gestureObject.getGestures()
        # Filter gesture labels.
        numSubjectSamples = 0
        for labelEntry in labels: # labelEntry = [gestureLabel, startFrame, endFrame]
            if labelEntry[0] in self.config['gestureList']:
                try:
                    # If the clip has blank segmentation mask, discard it.
                    if self.fetchFrames(gestureObject, range(labelEntry[1], labelEntry[2])):
                        self.gestureLabel = labelEntry[0] #s.append(np.ones((1), dtype='uint8')*labelEntry[0])
                        self. ()
                        self.numSamples += 1
                        numSubjectSamples += 1
                except:
                    print(str(labelEntry[1]) + " - " + str(labelEntry[2]))

                self.data['rgb'] = []
                self.data['segmentation'] = []
                self.data['depth'] = []
                self.data['skeleton'] = []
                self.gestureLabel = 0
        return numSubjectSamples
Esempio n. 2
0
def process():

    samples = glob(data + "/*.zip")
    # random.shuffle(samples)
    samples.sort()
    sample = samples[40]
    print sample
    sample = GestureSample(sample)
    gestures = sample.getGestures()
    gesture = gestures[3]

    skelet, depth, gray, user, c = pp.get_data(sample, gesture)
    user_o = user.copy()
    skelet, c = pp.proc_skelet(skelet)
    user = pp.proc_user(user)
    user_new, depth, c = pp.proc_depth(depth, user, user_o, skelet)
    gray, c = pp.proc_gray(gray, user, skelet)
    user = user_new

    video = empty((
        1,
        3,
    ) + gray.shape, dtype="uint8")
    video[0, 0], video[0, 1], video[0, 2] = gray, depth, user

    v = array(video, dtype="uint8")
    v = v.swapaxes(1, 2)
    # for i in xrange(gray.shape[1]):

    res_shape = (1, 2, 2, 32, 64, 64)
    v_new = empty(res_shape, dtype="uint8")
    h = res_shape[-1]
    v = v[:, :, :res_shape[2]]

    p = skelet[3]
    if p < 10: p = 100
    ofs = p * 0.25
    mid = v.shape[-1] / 2.
    sli = None
    if ofs < mid:
        start = int(round(mid - ofs))
        end = int(round(mid + ofs))
        sli = slice(start, end)

    for j in xrange(v.shape[2]):  #maps
        for k in xrange(v.shape[3]):  #frames
            #body
            img = v[0, 0, j, k]
            img = cut_img(img, 5)
            img = misc.imresize(img, (h, h))
            # if j==0: img = 255-misc.imfilter(img,"contour")
            v_new[0, 0, j, k] = img

            #hand
            img = v[0, 1, j, k]
            img = img[sli, sli]
            img = misc.imresize(img, (h, h))
            v_new[0, 1, j, k] = img
    return v_new
Esempio n. 3
0
def preprocess(samples):
    n_f = n_frames

    for file in samples:
        print "Processing", file
        sample = GestureSample(data + "/" + file)
        # proc_sample(sample)
        gestures = sample.getGestures()
        # for i in range(len(gestures)-1):
        #     end_prev = gestures[i][2]
        #     st_next = gestures[i+1][1]
        #     l = st_next-end_prev
        #     if l > n_frames:
        #         start = end_prev + int((l-n_frames)/2.)
        #         end = start + n_frames
        #         gestures.append([21,start,end])
        #         break
        # gestures.sort(reverse=True)
        # print gestures
        gv = sample.rgb
        n = sample.data['numFrames']

        for gesture in gestures:
            id, start, end = gesture

            vid = empty((n_f, ) + vid_res, "uint8")

            l = end - start
            start = start + l / 2 - n_f / 2
            end = start + n_f
            if start < 1: start, end = (1, 1 + n_f)
            elif end >= n: start, end = (n - 1 - n_f, n - 1)
            go_to_frame(gv, start)
            for i, framenum in enumerate(range(start, end)):
                vid[i] = to_grayscale(gv.read()[1])

            vid = vid[:, :, 80:560]
            h = 240
            new_vid = empty((n_f, h, h), "uint8")
            for i, img in enumerate(vid):
                new_vid[i] = misc.imresize(img, (h, h))
            vid = new_vid

            if show_gray: play_vid(vid, norm=False, wait=5)

            assert vid.dtype == "uint8"
            assert vid.shape == (32, h, h)
            store_preproc(vid, id)
        gv = None
        n = None
        # sample.__del__()

        # dump_data(file)
    dump_last_data()
    print 'Process', p_i, 'finished'
Esempio n. 4
0
def showComposed(path_to_data):
    
    import cv2
    from ChalearnLAPSample import GestureSample
    
    gestureSample = GestureSample(path_to_data)
    cv2.namedWindow(path_to_data,cv2.WINDOW_NORMAL) 
    for x in range(1, gestureSample.getNumFrames()):
        img=gestureSample.getComposedFrame(x)
#        img=gestureSample.getSkeletonImage(x)
        cv2.imshow(path_to_data,img)
        cv2.waitKey(1)
    del gestureSample
    cv2.destroyAllWindows()    
Esempio n. 5
0
def preprocess(samples):

    for file in samples:
        print "Processing", file 
        sample = GestureSample(data+"/"+file)
        # proc_sample(sample)
        gestures = sample.getGestures()
        for i in range(len(gestures)-1):
            end_prev = gestures[i][2]
            st_next = gestures[i+1][1]
            l = st_next-end_prev
            if l > n_frames:
                start = end_prev + int((l-n_frames)/2.)
                end = start + n_frames
                gestures.append([21,start,end])
                break
        # gestures.sort(reverse=True)
        # print gestures
        for gesture in gestures:
            skelet, depth, gray, user, c = get_data(sample, gesture)
            if c: print 'corrupt'; continue

            user_o = user.copy()

            # preprocess
            skelet,c = proc_skelet(skelet)
            if c: print 'corrupt'; continue
            user = proc_user(user)
            depth,c = proc_depth(depth, user, user_o, skelet)
            if c: print 'corrupt'; continue
            gray,c = proc_gray(gray, user,  skelet)
            if c: print 'corrupt'; continue

            if show_depth: play_vid(depth,norm=False)
            if show_gray: play_vid(gray, norm=False)
            if show_user: play_vid(user,norm=True)

            traj2D,traj3D,ori,pheight,hand,center = skelet
            skelet = traj3D,ori,pheight

            assert gray.dtype==depth.dtype==traj3D.dtype==ori.dtype=="uint8"
            assert gray.shape==depth.shape==(2,)+vid_shape_hand
            assert traj3D.shape[1]==ori.shape[1]==n_frames

            video = array([gray,depth],dtype="uint8")
            store_preproc(video,skelet,gesture[0])

    dump_last_data()
    print 'Process',p_i,'finished'
Esempio n. 6
0
def show_sk_image(gesture_id=100):
    data = os.path.join("E:\\program\\Chalearn\\rawdata\\train\\")
    # Get the list of training samples

    smp = GestureSample(os.path.join(data, 'Sample%04d.zip' % gesture_id))
    frame_num = smp.getNumFrames()
    fps = 30
    cv2.namedWindow("sk_image")
    for i in range(1, frame_num + 1):
        sk = smp.getSkeletonImage(i)

        cv2.imshow("sk_image", sk)
        cv2.waitKey(int(1000 / fps))
    cv2.destroyAllWindows()
    del smp
Esempio n. 7
0
    predictions = tf.get_collection('predictions')[0]
    predictions_lstm = tf.get_collection('predictions_lstm')[0]
    input_samples_op = tf.get_collection('input_samples_op')[0]
    mode = tf.get_collection('mode')[0]
    mode_lstm = tf.get_collection('mode_lstm')[0]
    net_type = tf.get_collection('net_type')[0]

    logits = tf.get_default_graph().get_tensor_by_name("accuracy/Reshape:0")
    logits_soft = tf.nn.softmax(logits)

    correct_predictions = 0
    total_predictions = 0

    for sample_id in tqdm(TEST_ID):
        print('========== sample %d ===========' % sample_id)
        sample = GestureSample('%s/%s/Sample%04d.zip' %
                               (RAW_DATA_PATH, 'Test', sample_id))

        num_of_frames = sample.getNumFrames()
        num_of_clip_batch = math.ceil(num_of_frames / FRAMES_PER_VIDEO /
                                      BATCH_SIZE)
        # get entire video
        user = sample.get_entire_user_video()
        vid = sample.get_entire_rgb_video()
        mask = np.mean(user, axis=3) > 150
        mask = mask.reshape((mask.shape + (1, )))
        vid = vid * mask
        ''' get clip ground truth labels'''
        clip_labels = []
        dense_labels = np.asarray([NO_GESTURE] *
                                  math.ceil(num_of_frames / FRAMES_PER_CLIP) *
                                  FRAMES_PER_CLIP)
def get_data_training(path, data_type, write_path, sample_ids):
    for sample_id in tqdm(sample_ids):
        '''Get ChaLearn Data reader'''
        sample = GestureSample('%s/%s/Sample%04d.zip' %
                               (path, data_type, sample_id))
        '''Get label per frame'''
        gesture_list = sample.getGestures()
        num_of_frames = sample.getNumFrames()

        labels = []
        mid_frame = []
        for gesture_id, start_frame, end_frame in gesture_list:
            labels += [gesture_id]
            mid_frame += [round((start_frame + end_frame) / 2)]

        # get entire video
        vid = sample.get_entire_rgb_video()
        user = sample.get_entire_user_video()
        mask = np.mean(user, axis=3) > 150
        mask = mask.reshape((mask.shape + (1, )))
        vid = vid * mask
        '''Split it into videos of MAX_FRAMES (80 as in the paper) frames'''
        # padding = np.zeros(IMAGE_SIZE, dtype=np.uint8)
        padding = get_padding(vid, gesture_list)
        start_padding = 0
        end_padding = 0
        videos = []
        dense_label = []
        clip_label = []
        clip_label_video = []

        for f, lab, id in zip(mid_frame, labels, range(len(labels))):
            start = f - int(FRAMES_PER_VIDEO / 2)
            end = f + int(FRAMES_PER_VIDEO / 2)

            label_padding_start = abs(start - gesture_list[id][1])
            label_padding_end = abs(gesture_list[id][2] - end)
            label_gesture = gesture_list[id][2] - gesture_list[id][1]

            if start < 0:
                start_padding = -start
                start = 0

            if end > num_of_frames:
                end_padding = end - num_of_frames
                end = num_of_frames

            if (start < gesture_list[id - 1][2]) and (id > 0):
                start_padding = gesture_list[id - 1][2] - start
                start = gesture_list[id - 1][2]

            if id < (len(labels) - 1):
                if (end > gesture_list[id + 1][1]):
                    end_padding = end - gesture_list[id + 1][1]
                    end = gesture_list[id + 1][1]

            single_video = [
                padding[:start_padding] + list(vid[start:end]) +
                padding[:end_padding]
            ]
            single_video = np.asarray(single_video, dtype=np.uint8).reshape(
                (int(FRAMES_PER_VIDEO / FRAMES_PER_CLIP), FRAMES_PER_CLIP) +
                (IMAGE_SIZE))

            # get frame by frame labels to calculate accuracy during training and Jaccard score for val/test
            dense_lab = label_padding_start * [NO_GESTURE] + label_gesture * [
                lab
            ] + label_padding_end * [NO_GESTURE]
            dense_lab = dense_lab[:FRAMES_PER_VIDEO]
            for i in range(0, FRAMES_PER_VIDEO, FRAMES_PER_CLIP):
                extracted_labels = np.asarray(
                    dense_lab[i:i + FRAMES_PER_CLIP]) == lab
                if np.sum(extracted_labels) < 4:
                    clip_label_video += [NO_GESTURE]
                else:
                    clip_label_video += [lab]

            videos += [single_video]
            dense_label += [dense_lab]
            clip_label += [clip_label_video]
            start_padding = 0
            end_padding = 0
            clip_label_video = []

        #add also padding video
        videos += [
            np.asarray(padding, dtype=np.uint8).reshape(
                (int(FRAMES_PER_VIDEO / FRAMES_PER_CLIP), FRAMES_PER_CLIP) +
                (IMAGE_SIZE))
        ]
        dense_label += [[NO_GESTURE] * FRAMES_PER_VIDEO]
        clip_label += [[NO_GESTURE] * int(FRAMES_PER_VIDEO / FRAMES_PER_CLIP)]

        for gesture_video, label, ind in zip(videos, labels,
                                             range(len(labels))):
            '''Create TFRecord structure'''
            # context = tf.train.Features(feature={'sample_id': util._int64_feature(sample_id),
            #                                     })
            featureLists = tf.train.FeatureLists(
                feature_list={
                    'rgbs':
                    util._bytes_feature_list(gesture_video),
                    'label':
                    util._bytes_feature_list(
                        np.asarray((label - 1, ), dtype=np.int32)),
                    'dense_label':
                    util._bytes_feature_list(
                        np.asarray(dense_label[ind], dtype=np.int32) - 1),
                    'clip_label':
                    util._bytes_feature_list(
                        np.asarray(clip_label[ind], dtype=np.int32) - 1),
                    'sample_id':
                    util._bytes_feature_list(
                        np.asarray((sample_id, ), dtype=np.int32)),
                    'num_frames':
                    util._bytes_feature_list(
                        np.asarray((num_of_frames, ), dtype=np.int32))
                })

            sequence_example = tf.train.SequenceExample(
                feature_lists=featureLists)
            '''Write to .tfrecord file'''

            tf_write_option = tf.python_io.TFRecordOptions(
                compression_type=tf.python_io.TFRecordCompressionType.GZIP)
            filename = '%s/%s/Sample%04d_%02d.tfrecords' % (
                write_path, data_type, sample_id, ind)
            tf_writer = tf.python_io.TFRecordWriter(filename,
                                                    options=tf_write_option)
            tf_writer.write(sequence_example.SerializeToString())
            tf_writer.close()
Esempio n. 9
0
def gather_stats(samples):

    for file in samples:
        print "Processing", file
        smp = GestureSample(data + "/" + file)
        # proc_sample(sample)
        gestures = smp.getGestures()
        for gesture in gestures:
            skelet = []
            id, start, end = gesture
            n_f = n_frames
            n = smp.data['numFrames']
            l = end - start
            statn.append(end - start)
            # start = start + l/2 -n_f/2
            # end = start + n_f
            # if start < 1: start,end = (1,1+n_f)
            # elif end >= n: start,end = (n-1-n_f,n-1)
            # l = n_frames

            # for i,framenum in enumerate(range(start,end)): skelet.append(smp.getSkeleton(framenum))

            # phl, phr, ph, pc = [empty((2,l)) for _ in range(4)]
            # whl, whr, wh, wc = [empty((3,l)) for _ in range(4)]
            # ohl, ohr = [empty((4,l)) for _ in range(2)]

            # for i,skel in enumerate(skelet):
            #     pix = skel.getPixelCoordinates()
            #     world = skel.getWorldCoordinates()
            #     ori = skel.getJoinOrientations()
            #     phl[:,i] = array(pix['HandLeft'])
            #     phr[:,i] = array(pix['HandRight'])
            #     whl[:,i] = array(world['HandLeft'])
            #     whr[:,i] = array(world['HandRight'])
            #     ohl[:,i] = array(ori['HandLeft'])
            #     ohr[:,i] = array(ori['HandRight'])
            #     ph[:,i] = array(pix['Head'])
            #     pc[:,i] = array(pix['HipCenter'])
            #     wh[:,i] = array(world['Head'])
            #     wc[:,i] = array(world['HipCenter'])

            # if count_nonzero(phl) < 10*2: continue

            # phl,phr,ph,pc,whl,whr,wh,wc = [smooth(s) for s in \
            #                                         phl,phr,ph,pc,whl,whr,wh,wc]
            # ohl,ohr = [smooth(s,3) for s in ohl,ohr]

            # phl_y = phl[1][phl[1].nonzero()]
            # phr_y = phr[1][phr[1].nonzero()]

            # hand = "left" if phl_y.mean() < phr_y.mean() else "right"

            # if hand=="left":
            #     # whl[0] = whl[0]*(-1)
            #     traj2D,traj3D,ori = phl, whl, ohl
            # else:
            #     traj2D,traj3D,ori = phr, whr, ohr

            # wheight = array([linalg.norm(wc[:,i]-wh[:,i]) for i in range(l)]).mean()

            # traj3D = (wh-traj3D)/wheight

            # if hand=="left":
            #     traj3D[0] *=-1
            #     # print traj3D[0].min(), traj3D[0].mean(), traj3D[0].max()

            # statw.append([ [traj3D[0].min(), traj3D[0].max()],
            #             [traj3D[1].min(), traj3D[1].max()],
            #             [traj3D[2].min(), traj3D[2].max()]])
            # stato.append([[ori[0].min(), ori[0].max()],
            #             [ori[1].min(), ori[1].max()],
            #             [ori[2].min(), ori[2].max()],
            #             [ori[3].min(), ori[3].max()]])

            # traj3D,ori = [d.astype("uint8") for d in traj3D,ori]

        report_stats()
Esempio n. 10
0
data = "/home/lio/mp/chalearn2014/train_raw"
output = "/home/lio/Dropbox/MP/chalearn2014/preproc"
# data = "/home/lio/mp/chalearn2014/train_raw"

def write(_s): 
    with open(output+"/sample.txt","a") as f: f.write(_s+"\n")
    print _s

# get samples
os.chdir(data)
samples=glob("*.zip")
samples.sort()

for file in samples:
    print file,
    smp = GestureSample(data+"/"+file)
    # gestures = smp.getGestures()

    n = smp.data['numFrames']
    vid = smp.rgb

    for i in range(n):
    	img = vid.read()[1]
    	ratio = 1.*img.shape[1]/img.shape[0]
    	size=200 if img.shape[0]<200 else 400
    	img = cv2.resize(img, (int(size*ratio), size))
    	cv2.imshow("Video", img)
    	key =  cv2.waitKey(0)
    	if key==65505: 
    		break
    	elif key==13: 
Esempio n. 11
0
# -*- coding: utf-8 -*-
"""
Created on Wed May 22 13:27:08 2019

@author: cmp3tahera
"""

#from classes import GestureSample
from ChalearnLAPSample import GestureSample
gestureSample = GestureSample("Sample0002.zip")
#fps=gestureSample.getFPS()
 #Finally, we can access to an object that encodes the skeleton information in the same way:

skeleton=gestureSample.getSkeleton(10)
'''
SampleXXXX_skeleton.mp4: CSV with the skeleton information for each frame of the viedos. Each line corresponds to one frame. 
Skeletons are encoded as a sequence of joins, providing 9 values per join [Wx, Wy, Wz, Rx, Ry, Rz, Rw, Px, Py]
(W are world coordinats, R rotation values and P the pixel coordinats). The order of the joins in the sequence is:
1.HipCenter, 2.Spine, 3.ShoulderCenter, 4.Head,5.ShoulderLeft, 6.ElbowLeft,7.WristLeft, 8.HandLeft, 9.ShoulderRight, 
10.ElbowRight, 11.WristRight, 12.HandRight, 13.HipLeft, 14.KneeLeft, 15.AnkleLeft, 16.FootLeft, 17.HipRight, 
18.KneeRight, 19.AnkleRight, and 20.FootRight.
'''
'''
To get the skeleton information, we have some provided functionalities. For each join 
the [Wx, Wy, Wz, Rx, Ry, Rz, Rw, Px, Py] description array is stored in a dictionary as three independent vectors. 
You can access each value for each join (eg. the head) as follows:
'''
[Wx, Wy, Wz]=skeleton.getAllData()['Head'][0]

[Rx, Ry, Rz, Rw]=skeleton.getAllData()['Head'][1]
Esempio n. 12
0
def generate_pva(feature_name='sk_pva_99', labels_name='labels_raw'):
    print("Extracting the training set of position, velocity and acceleration")
    data = os.path.join("E:\\program\\Chalearn\\rawdata\\train\\")
    # Get the list of training samples
    samples = os.listdir(data)
    target_dir = 'E:\\program\\Chalearn\\Chalearn_LSTM\\target\\'
    output_dir = 'E:\\program\\Chalearn\\Chalearn_LSTM\\feature\\' + feature_name
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    used_joints = [
        'ElbowLeft', 'WristLeft', 'ShoulderLeft', 'HandLeft', 'ElbowRight',
        'WristRight', 'ShoulderRight', 'HandRight', 'Head', 'Spine',
        'HipCenter'
    ]
    njoints = len(used_joints)

    #    f = open('SK_normalization.pkl','r')
    #    normal_params = pickle.load(f)
    #    f.close()
    #    Mean = normal_params['Mean1']
    #    Std = normal_params['Std1']

    count = 0
    #    target_category = 21
    Target_all = []
    #Feature_all =  numpy.zeros(shape=(400000, (njoints*(njoints-1)/2 + njoints**2)*3),dtype=numpy.float32)
    for file_count, file in enumerate(samples):
        if int(file[-8:-4]) != 417 and int(file[-8:-4]) != 675:
            print("\t Processing file " + file)
            # Create the object to access the sample
            smp = GestureSample(os.path.join(data, file))
            # ###############################################
            # USE Ground Truth information to learn the model
            # ###############################################
            # Get the list of actions for this frame
            gesturesList = smp.getGestures()
            frame_num = smp.getNumFrames()
            Feature_Array = np.zeros(shape=(frame_num, 3 * 3 * njoints),
                                     dtype=np.float32)
            #        Target = np.zeros( shape=(frame_num, target_category), dtype=np.uint8)

            #feature generate
            Skeleton_matrix, valid_skel = Extract_feature_normalized_ALL(
                smp, used_joints, 1, frame_num)
            #            Feature_Array = Extract_feature_Realtime(Skeleton_matrix, njoints)
            #            Skeleton_matrix = Smooth_Skeleton(Skeleton_matrix, window_len = 5, smooth_mode = 'gaussian')
            Feature_Array = Extract_feature_pva(Skeleton_matrix, njoints)

            Mean = np.mean(Feature_Array, axis=0)
            Std = np.std(Feature_Array, axis=0)

            Feature_Array = normalize(Feature_Array, Mean, Std)

            #save sample sk features
            output_name = '%04d.npy' % count
            #            output_name = file[-8:-4]+'.npy'
            np.save(os.path.join(output_dir, output_name), Feature_Array)

            count += 1
            #target generate

            labels = np.zeros(frame_num, np.uint8)
            for row in gesturesList:
                labels[int(row[1]) - 1:int(row[2]) - 1] = int(row[0])
            Target_all.append(labels)
            del smp
    np.save(target_dir + '%s.npy' % labels_name, Target_all)
Esempio n. 13
0
def generate_eigenjoint(feature_name='sk_eigenjoint_nor_528',
                        labels_name='labels_raw'):
    # Data folder (Training data)
    print("Extracting the training files")
    data = os.path.join("E:\\program\\Chalearn\\rawdata\\train\\")
    target_dir = 'E:\\program\\Chalearn\\Chalearn_LSTM\\target\\'
    # Get the list of training samples
    samples = os.listdir(data)
    output_dir = 'E:\\program\\Chalearn\\Chalearn_LSTM\\feature\\' + feature_name

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    used_joints = [
        'ElbowLeft', 'WristLeft', 'ShoulderLeft', 'HandLeft', 'ElbowRight',
        'WristRight', 'ShoulderRight', 'HandRight', 'Head', 'Spine',
        'HipCenter'
    ]
    njoints = len(used_joints)

    f = open('SK_normalization.pkl', 'r')
    normal_params = pickle.load(f)
    f.close()
    Mean = normal_params['Mean1']
    Std = normal_params['Std1']

    count = 0
    #    target_category = 21
    Target_all = []
    #Feature_all =  numpy.zeros(shape=(400000, (njoints*(njoints-1)/2 + njoints**2)*3),dtype=numpy.float32)
    for file_count, file in enumerate(samples):
        if int(file[-8:-4]) != 417 and int(file[-8:-4]) != 675:
            print("\t Processing file " + file)
            # Create the object to access the sample
            smp = GestureSample(os.path.join(data, file))
            # ###############################################
            # USE Ground Truth information to learn the model
            # ###############################################
            # Get the list of actions for this frame
            gesturesList = smp.getGestures()
            frame_num = smp.getNumFrames()
            Feature_Array = np.zeros(
                shape=(frame_num,
                       (njoints * (njoints - 1) / 2 + njoints**2) * 3),
                dtype=np.float32)
            #        Target = np.zeros( shape=(frame_num, target_category), dtype=np.uint8)

            #feature generate
            Skeleton_matrix, valid_skel = Extract_feature_UNnormalized(
                smp, used_joints, 1, frame_num)
            Feature_Array = Extract_feature_Realtime(Skeleton_matrix, njoints)

            Feature_Array = normalize(Feature_Array, Mean, Std)
            add_ = Feature_Array[-1].reshape((1, Feature_Array.shape[1]))
            Feature_Array = np.concatenate((Feature_Array, add_), axis=0)

            #save sample sk features
            output_name = '%04d.npy' % count

            count += 1
            np.save(os.path.join(output_dir, output_name), Feature_Array)

            #target generate

            labels = np.zeros(frame_num, np.uint8)
            for row in gesturesList:
                labels[int(row[1]) - 1:int(row[2]) - 1] = int(row[0])
            Target_all.append(labels)
            del smp

    np.save(target_dir + '%s.npy' % labels_name, Target_all)
Esempio n. 14
0
def preprocess(samples):
    first = True
    for file in samples:
        print "Processing", file
        sample = GestureSample(data + "/" + file)
        # proc_sample(sample)
        gestures = sample.getGestures()
        # for i in range(len(gestures)-1):
        #     end_prev = gestures[i][2]
        #     st_next = gestures[i+1][1]
        #     l = st_next-end_prev
        #     if l > n_frames:
        #         start = end_prev + int((l-n_frames)/2.)
        #         end = start + n_frames
        #         gestures.append([21,start,end])
        #         break
        # gestures.sort(reverse=True)
        # print gestures
        for gesture in gestures[:5]:
            skelet, depth, gray, user, c = get_data(sample, gesture)
            if c:
                print 'corrupt'
                continue

            user_o = user.copy()
            gray_o = gray.copy()

            # preprocess
            skelet, c = proc_skelet(skelet)
            if c:
                print 'corrupt'
                continue
            user = proc_user(user)
            user_new, depth, c = proc_depth(depth, user, user_o, skelet)
            if c:
                print 'corrupt'
                continue
            gray, c = proc_gray(gray, user, skelet)
            if c:
                print 'corrupt'
                continue

            gray2 = proc_gray2(gray_o, skelet)
            gray2 = gray2[:, :, 80:560]

            gray1 = gray[1, :, 14:-14, 14:-14]
            depth1 = depth[1, :, 14:-14, 14:-14]

            def resize(
                    v,
                    size,
                    interp=cv2.INTER_NEAREST):  # _NEAREST _LINEAR _AREA _CUBIC
                vn = empty((v.shape[0], ) + size, dtype=uint8)
                for i, img in enumerate(v):
                    img = cv2.resize(img, size, interpolation=interp)
                    vn[i] = img
                return vn

            # vid = hstack([gray2,gray])

            vid = gray2

            t = 8
            h1 = 512
            h2 = 256 - t / 2

            vbar = zeros((32, h1, t), dtype=uint8)
            hbar = zeros((32, t, h2), dtype=uint8)

            vid = resize(vid, (h1, h1))
            vid2 = resize(gray[0], (h2, h2))
            vid3 = resize(gray1, (h2, h2))
            vid2 = concatenate([vid2, hbar, vid3], axis=1)

            vid = concatenate([vid, vbar, vid2], axis=2)

            vid2 = resize(depth[0], (h2, h2))
            vid3 = resize(depth1, (h2, h2))
            vid2 = concatenate([vid2, hbar, vid3], axis=1)

            vid = concatenate([vid, vbar, vid2], axis=2)

            if first:
                print "init"
                # fourcc = cv2.cv.CV_FOURCC(*'FMP4')
                # video = cv2.VideoWriter('/home/lio/Desktop/video.avi',
                #     fourcc,30,
                #     (vid.shape[2],vid.shape[1]))

                vsk = VideoSink('/home/lio/Desktop/video.avi',
                                rate=20,
                                size=vid.shape[1:3],
                                colorspace='rgb24',
                                codec="x264")
                first = False

            # play_vid(vid, norm=False, wait=30, resize=False, fs=True)

            # print vid.shape

            for img in vid:
                img = cv2.cvtColor(img, cv2.cv.CV_GRAY2RGB)
                vsk(img)

            # user = user_new

            # if show_depth: play_vid(depth,norm=False)
            # if show_gray: play_vid(gray[0], norm=False)
            # if show_user: play_vid(user,norm=True)

            # # user_new = user_new.astype("bool")

            # traj2D,traj3D,ori,pheight,hand,center = skelet
            # skelet = traj3D,ori,pheight

            # assert user.dtype==gray.dtype==depth.dtype==traj3D.dtype==ori.dtype=="uint8"
            # assert user.shape==gray.shape==depth.shape==(2,)+vid_shape_hand
            # assert traj3D.shape[1]==ori.shape[1]==n_frames

            # video = empty((3,)+gray.shape,dtype="uint8")
            # video[0],video[1],video[2] = gray,depth,user
            # store_preproc(video,skelet,gesture[0])

        # dump_data(file)
    # dump_last_data()
    cv2.destroyAllWindows()
    # video.release()
    vsk.close()
    print 'Process', p_i, 'finished'
Esempio n. 15
0
def get_data_training(path, data_type, write_path, sample_ids):

    is_gesture = 0
    no_gesture = 0
    count = [0] * 21
    quota = {}
    for i in range(0, 21):
        quota[i] = []

    for sample_id in tqdm(sample_ids):
        '''Get ChaLearn Data reader'''
        sample = GestureSample('%s/%s/Sample%04d.zip' %
                               (path, data_type, sample_id))
        '''Get label per frame'''
        gesture_list = sample.getGestures()
        num_of_frames = sample.getNumFrames()

        dense_label = np.zeros(num_of_frames)
        dense_label[:] = constants_3dcnn.NO_GESTURE

        for gesture_id, start_frame, end_frame in gesture_list:
            dense_label[start_frame:end_frame] = gesture_id

        range_num = np.arange(0, num_of_frames,
                              constants_3dcnn.FRAMES_PER_CLIP_PP)[:-1]

        # no_gesture_ranges = get_no_gesture(gesture_list)
        # ranges_lengths.append(NUM_OF_NO_GESTURE_CLIPS)
        # ranges.append(no_gesture_ranges)
        # labels.append(NO_GESTURE)

        # get entire video
        user = sample.get_entire_user_video()
        vid = sample.get_entire_rgb_video()
        mask = np.mean(user, axis=3) > 150
        mask = mask.reshape((mask.shape + (1, )))
        vid = vid * mask

        id = 0
        for rang in range_num:
            counter = np.zeros(shape=22)
            clip = vid[rang:(rang + constants_3dcnn.FRAMES_PER_CLIP_PP)]
            clip_dense_label = dense_label[rang:(
                rang + constants_3dcnn.FRAMES_PER_CLIP_PP)]
            check = np.sum(clip_dense_label != constants_3dcnn.NO_GESTURE)

            # if most of the frames belong to a gesture label then store the clip under than label
            if check > int(constants_3dcnn.FRAMES_PER_CLIP_PP / 2):
                is_gesture += 1

                lab_unique = np.unique(clip_dense_label[
                    clip_dense_label != constants_3dcnn.NO_GESTURE])
                for l in list(lab_unique):
                    counter[int(l)] += 1

                lab = np.argmax(counter)

                featureLists = tf.train.FeatureLists(
                    feature_list={
                        'rgbs':
                        util._bytes_feature_list(clip),
                        'label':
                        util._bytes_feature_list(
                            np.asarray((lab - 1, ), dtype=np.int32)),
                        'dense_label':
                        util._bytes_feature_list(
                            np.asarray(clip_dense_label, dtype=np.int32) - 1),
                        'clip_label':
                        util._bytes_feature_list(
                            np.asarray([lab], dtype=np.int32) - 1),
                        'sample_id':
                        util._bytes_feature_list(
                            np.asarray((sample_id, ), dtype=np.int32)),
                        'num_frames':
                        util._bytes_feature_list(
                            np.asarray((num_of_frames, ), dtype=np.int32))
                    })

                count[lab - 1] += 1
                if (len(quota[lab - 1]) < 100):
                    quota[lab - 1].append(featureLists)

                sequence_example = tf.train.SequenceExample(
                    feature_lists=featureLists)
                '''Write to .tfrecord file'''

                tf_write_option = tf.python_io.TFRecordOptions(
                    compression_type=tf.python_io.TFRecordCompressionType.GZIP)
                filename = '%s/%s/Sample%04d_%02d.tfrecords' % (
                    write_path, data_type, sample_id, id)
                tf_writer = tf.python_io.TFRecordWriter(
                    filename, options=tf_write_option)
                tf_writer.write(sequence_example.SerializeToString())
                tf_writer.close()
                id += 1

            #get with prob 30% also some noisy no-gesture frames. Probability is 30% to avoid class imbalance
            elif (check != 0) and no_gesture * 20 < is_gesture:
                lab = constants_3dcnn.NO_GESTURE
                no_gesture += 1

                featureLists = tf.train.FeatureLists(
                    feature_list={
                        'rgbs':
                        util._bytes_feature_list(clip),
                        'label':
                        util._bytes_feature_list(
                            np.asarray((lab - 1, ), dtype=np.int32)),
                        'dense_label':
                        util._bytes_feature_list(
                            np.asarray(clip_dense_label, dtype=np.int32) - 1),
                        'clip_label':
                        util._bytes_feature_list(
                            np.asarray([lab], dtype=np.int32) - 1),
                        'sample_id':
                        util._bytes_feature_list(
                            np.asarray((sample_id, ), dtype=np.int32)),
                        'num_frames':
                        util._bytes_feature_list(
                            np.asarray((num_of_frames, ), dtype=np.int32))
                    })

                count[lab - 1] += 1
                if (len(quota[lab - 1]) < 100):
                    quota[lab - 1].append(featureLists)

                sequence_example = tf.train.SequenceExample(
                    feature_lists=featureLists)
                '''Write to .tfrecord file'''

                tf_write_option = tf.python_io.TFRecordOptions(
                    compression_type=tf.python_io.TFRecordCompressionType.GZIP)
                filename = '%s/%s/Sample%04d_%02d.tfrecords' % (
                    write_path, data_type, sample_id, id)
                tf_writer = tf.python_io.TFRecordWriter(
                    filename, options=tf_write_option)
                tf_writer.write(sequence_example.SerializeToString())
                tf_writer.close()
                id += 1

        if (sample_id == sample_ids[-1]):
            max_count = max(count)
            for i in range(21):
                while (count[i] < max_count):
                    count[i] += 1
                    idx = random.randrange(len(quota[i]))
                    sequence_example = tf.train.SequenceExample(
                        feature_lists=quota[i][idx])

                    tf_write_option = tf.python_io.TFRecordOptions(
                        compression_type=tf.python_io.TFRecordCompressionType.
                        GZIP)
                    filename = '%s/%s/Sample%04d_%02d.tfrecords' % (
                        write_path, data_type, sample_id, id)
                    tf_writer = tf.python_io.TFRecordWriter(
                        filename, options=tf_write_option)
                    tf_writer.write(sequence_example.SerializeToString())
                    tf_writer.close()
                    id += 1

    print(is_gesture, no_gesture)
    print(max_count)
    print(count)
def get_data_training(path, data_type, write_path, sample_ids):

    is_gesture = 0
    no_gesture = 0
    count = [0] * 21
    quota = {}
    for i in range(0, 21):
        quota[i] = np.zeros([8, 150, 120, 3], np.uint8)

    isTrain = data_type.find('Train') >= 0

    for sample_id in tqdm(sample_ids):
        '''Get ChaLearn Data reader'''
        sample = GestureSample('%s/%s/Sample%04d.zip' %
                               (path, data_type, sample_id))
        '''Get label per frame'''
        gesture_list = sample.getGestures()

        num_of_frames = sample.getNumFrames()

        # get entire video
        user = sample.get_entire_user_video()
        vid = sample.get_entire_rgb_video()
        mask = np.mean(user, axis=3) > 150
        mask = mask.reshape((mask.shape + (1, )))
        vid = vid * mask

        labels = []

        #get also clip labels
        clip_label_range = np.arange(0, num_of_frames, FRAMES_PER_CLIP)
        dense_label = np.zeros(num_of_frames)
        dense_label[:] = NO_GESTURE
        clip_labels = []

        cut_dense_labels = []
        cut_clip_labels = []
        cut_vid = np.zeros([8, 150, 120, 3], np.uint8)

        for gesture_id, start_frame, end_frame in gesture_list:
            labels += [gesture_id]
            dense_label[(start_frame - 1):end_frame] = gesture_id

        for clip_label in clip_label_range:
            clip_dense_labels_slice = dense_label[clip_label:clip_label +
                                                  FRAMES_PER_CLIP]
            lab_truth = clip_dense_labels_slice != NO_GESTURE
            n = np.sum(lab_truth)
            lab = -1
            if n > 5:
                lab = int(clip_dense_labels_slice[lab_truth][0])
                if (count[lab] <= 5 * max(count) + 10):
                    cut_clip_labels.append(lab)
                    #cut_vid+= [vid[clip_label : clip_label + FRAMES_PER_CLIP]]
                    cut_vid = np.concatenate(
                        (cut_vid,
                         vid[clip_label:clip_label + FRAMES_PER_CLIP]),
                        axis=0)
                    cut_dense_labels += [lab] * FRAMES_PER_CLIP
                    is_gesture += 1
                    count[lab - 1] += 1
                else:
                    lab = -1

            elif is_gesture >= 20 * no_gesture:
                lab = NO_GESTURE
                cut_clip_labels.append(lab)
                cut_vid = np.concatenate(
                    (cut_vid, vid[clip_label:clip_label + FRAMES_PER_CLIP]),
                    axis=0)
                #cut_vid += [vid[clip_label : clip_label + FRAMES_PER_CLIP]]
                cut_dense_labels += [lab] * FRAMES_PER_CLIP
                no_gesture += 1
                count[lab - 1] += 1

            if (lab >= 0 and count[lab - 1] < 100):
                quota[lab - 1] = np.concatenate(
                    (quota[lab - 1],
                     vid[clip_label:clip_label + FRAMES_PER_CLIP]),
                    axis=0)

        if sample_id == sample_ids[-1]:

            print(len(cut_dense_labels))
            print(len(cut_clip_labels))
            print(cut_vid.shape)
            print()

            max_count = max(count)
            q = []
            for i in range(NO_GESTURE):
                quota[i] = quota[i][8:]
                if (count[i] < max_count):
                    q += [i]

            while (len(q) > 0):
                idx = random.randrange(len(q))
                lab = q[idx] + 1
                l = random.randrange(min([max_count - count[lab - 1], 6])) + 1
                st = random.randrange(50)
                cut_vid = np.concatenate(
                    (cut_vid, quota[lab - 1][st * FRAMES_PER_CLIP:(st + l) *
                                             FRAMES_PER_CLIP]),
                    axis=0)
                cut_clip_labels += [lab] * l
                cut_dense_labels += [lab] * l * FRAMES_PER_CLIP
                count[lab - 1] += l
                if (count[lab - 1] >= max_count):
                    q = q[0:idx] + q[idx + 1:]
            print(len(cut_dense_labels))
            print(len(cut_clip_labels))
            print(cut_vid.shape)
            print()

        cut_clip_labels = np.asarray(cut_clip_labels, dtype=np.int32)
        cut_vid = cut_vid[8:]
        #cut_vid = np.asarray(cut_vid, dtype = np.uint8)
        #cut_vid = np.reshape(cut_vid, (cut_vid.shape[0] * cut_vid.shape[1], cut_vid.shape[2], cut_vid.shape[3], cut_vid.shape[4]))
        cut_dense_labels = np.asarray(cut_dense_labels, dtype=np.int32)

        if (sample_ids[-1] == sample_id or sample_ids[-2] == sample_id):
            print(count)

        num_of_frames = cut_dense_labels.shape[0]

        frames_range = list(
            np.arange(0, num_of_frames, FRAMES_PER_VIDEO_PP)[:-1])
        #frames_range.append(num_of_frames)

        for id in range(len(frames_range[:-1])):
            start = frames_range[id]
            end = frames_range[id + 1]

            clip_label_slice = np.asarray(
                cut_clip_labels[math.floor(start / 8):math.floor(end / 8)],
                dtype=np.int32)
            featureLists = tf.train.FeatureLists(
                feature_list={
                    'rgbs':
                    util._bytes_feature_list(cut_vid[start:end]),
                    'label':
                    util._bytes_feature_list(
                        np.asarray((cut_clip_labels[id] -
                                    1, ), dtype=np.int32)),
                    'dense_label':
                    util._bytes_feature_list(
                        np.asarray(cut_dense_labels[start:end], dtype=np.int32)
                        - 1),
                    'clip_label':
                    util._bytes_feature_list(clip_label_slice - 1),
                    'sample_id':
                    util._bytes_feature_list(
                        np.asarray((sample_id, ), dtype=np.int32)),
                    'num_frames':
                    util._bytes_feature_list(
                        np.asarray((num_of_frames, ), dtype=np.int32))
                })

            sequence_example = tf.train.SequenceExample(
                feature_lists=featureLists)
            '''Write to .tfrecord file'''

            tf_write_option = tf.python_io.TFRecordOptions(
                compression_type=tf.python_io.TFRecordCompressionType.GZIP)
            filename = '%s/%s/Sample%04d_%02d.tfrecords' % (
                write_path, data_type, sample_id, id)
            tf_writer = tf.python_io.TFRecordWriter(filename,
                                                    options=tf_write_option)
            tf_writer.write(sequence_example.SerializeToString())
            tf_writer.close()
Esempio n. 17
0
def preprocess(samples):

    for file in samples:
        print "Processing", file
        sample = GestureSample(data + "/" + file)
        # proc_sample(sample)
        gestures = sample.getGestures()

        for gesture in gestures:
            skelet, depth, gray, user, c = get_data(sample, gesture)
            if c:
                print 'corrupt'
                continue

            user_o = user.copy()
            gray_o = gray.copy()
            depth_o = depth.copy()
            fr = depth_o.shape[0] / 2
            # print gray_o.shape

            # preprocess
            skelet, c = proc_skelet(skelet)
            if c:
                print 'corrupt'
                continue
            user = proc_user(user)
            user_new, depth, c = proc_depth(depth, user, user_o, skelet)
            # if c: print 'corrupt'; continue
            gray, c = proc_gray(gray, user, skelet)
            # if c: print 'corrupt'; continue

            gray2 = proc_gray2(gray_o, skelet)
            # show_img(depth_o[fr],resize=False)
            # show_img(gray2[fr],resize=False)
            # show_img(user_o[fr],norm=True,resize=False)

            # show_img(gray[0][fr],resize=True)
            # show_img(gray[1][fr],resize=True)
            size = (512, 512)
            save_img(gray[0][fr], "1", size)
            save_img(gray[1][fr], "2", size)
            save_img(depth[0][fr], "3", size)
            save_img(depth[1][fr], "4", size)
            show_img(gray_o[fr], resize=False)

            # user = user_new

            # if show_depth: play_vid(depth,norm=False)
            # if show_gray: play_vid(gray[0], norm=False)
            # if show_user: play_vid(user,norm=True)

            # # user_new = user_new.astype("bool")

            # traj2D,traj3D,ori,pheight,hand,center = skelet
            # skelet = traj3D,ori,pheight

            # assert user.dtype==gray.dtype==depth.dtype==traj3D.dtype==ori.dtype=="uint8"
            # assert user.shape==gray.shape==depth.shape==(2,)+vid_shape_hand
            # assert traj3D.shape[1]==ori.shape[1]==n_frames

            # video = empty((3,)+gray.shape,dtype="uint8")
            # video[0],video[1],video[2] = gray,depth,user
            # store_preproc(video,skelet,gesture[0])

        # dump_data(file)
    dump_last_data()
    print 'Process', p_i, 'finished'
###############################
# load prestore template
ref_depth = numpy.load('distance_median.npy')
template = cv2.imread('template.png')
template = numpy.mean(template, axis=2)
template /= template.max()

# pre-allocating the memory
IM_SZ = 90

for file_count, file in enumerate(samples):
    if not file_count < 650:
        time_tic = time.time()
        print("\t Processing file " + file)
        # Create the object to access the sample
        smp = GestureSample(os.path.join(data_path, file))
        # ###############################################
        # USE Ground Truth information to learn the model
        # ###############################################
        # Get the list of actions for this frame
        total_frame = smp.getNumFrames()
        ##################################################
        # obtain the shift and scaling according to
        shift, scale = smp.get_shift_scale(template,
                                           ref_depth,
                                           start_frame=total_frame - 100,
                                           end_frame=total_frame - 10)
        if numpy.isnan(scale):
            scale = 1

        cuboid = numpy.zeros((IM_SZ, IM_SZ, total_frame), numpy.uint8)
####################################
### Start predicting here ##########
####################################
samples=os.listdir(data_path)#listdir展示所有路径
for file_count, file in enumerate(samples):
    #if not file.endswith(".zip"):
    #    continue;  
    time_tic = time.time()      
    if  file_count > -1: # do nothing, just for bebug convenience
        ''' 这里 是对一个样本文件的处理,
        先获取动作list,然后对一个文件中的所有动作,一起提取特征。
        通过网络计算 概率可能度, (一个视频开始的前5帧,最后一帧无效)'''
        print("\t Processing file " + file)
        # Create the object to access the sample
        smp=GestureSample(os.path.join(data_path,file))#读取数据,按特定格式, os.path.join是链接路径用的
        # ###############################################
        # USE Ground Truth information to learn the model
        # ###############################################
        # Get the list of actions for this frame
        gesturesList=smp.getGestures()#得到帧对应的动作标签

        #提取所有帧中,原始的骨架点,得到一个矩阵,Skeleton_matrix ,同时返回骨架是否归0化
        Skeleton_matrix, valid_skel = Extract_feature_UNnormalized(smp, used_joints, 1, smp.getNumFrames())

        import cPickle
        cPickle.dump(Skeleton_matrix,open("testSkeleton_matrix","wb"))
        #print Skeleton_matrix
        #print Skeleton_matrix.get_value()