Example #1
0
    def supervised_lstm(input_shape,
                        action_size,
                        learning_rate=0.01,
                        backbone='resnet',
                        time_distributed=True,
                        multi_gpu=True):
        img_input = Input(shape=input_shape, dtype='float32')
        if backbone == 'resnet':
            x = dcn_resnet(img_input, time_distributed)
        elif backbone == 'mobilenet':
            mobilenet = keras.applications.mobilenet_v2.MobileNetV2(
                include_top=False, weights=None, pooling='max')
            x = TimeDistributed(mobilenet)(img_input)
        elif backbone == 'convLSTM':
            x = ConvLSTM2D(filters=40,
                           kernel_size=(3, 3),
                           padding='same',
                           return_sequences=False)(img_input)
            x = BatchNormalization()(x)
            x = ConvLSTM2D(filters=40,
                           kernel_size=(3, 3),
                           padding='same',
                           return_sequences=False)(x)
            x = BatchNormalization()(x)
            x = ConvLSTM2D(filters=40,
                           kernel_size=(3, 3),
                           padding='same',
                           return_sequences=False)(x)
            x = BatchNormalization()(x)
            x = ConvLSTM2D(filters=40,
                           kernel_size=(3, 3),
                           padding='same',
                           return_sequences=False)(x)
            x = BatchNormalization()(x)
        else:
            x = TimeDistributed(
                Conv2D(32, kernel_size=8, strides=4,
                       activation='relu'))(img_input)
            x = TimeDistributed(
                Conv2D(64, kernel_size=4, strides=2, activation='relu'))(x)
            x = TimeDistributed(
                Conv2D(64, kernel_size=3, strides=1, activation='relu'))(x)
        x = TimeDistributed(Flatten())(x)
        x = LSTM(512)(x)
        x = Dropout(rate=0.5)(x)
        x = Dense(action_size, activation='sigmoid', name='x_train_out')(x)
        optimizer = Adam(lr=learning_rate)
        model = Model(inputs=img_input, outputs=x)

        model.compile(optimizer=optimizer,
                      loss=keras.losses.binary_crossentropy,
                      metrics=['accuracy'])
        model.summary()
        return model
Example #2
0
sess.run(tf.global_variables_initializer())

# TODO state 작성

sign_ary = [[0., 0.], [0., 1.], [1., 0.], [1., 1.], [0., -1.], [-1., 0.],
            [-1., -1.], [-1., 1.], [1., -1.]]  #

for video, data in zip(sorted(os.listdir(video_dir)), dataset['train']):
    # data --> [45, 100, 7]
    cap = cv2.VideoCapture(os.path.join(video_dir, video))
    width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
    height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)

    view = Viewport(width, height)

    m = dcn_resnet((width, height, 3))

    print("video name : ", video)
    loss = []
    for scan in data:
        c_idx = 0
        idx = 0
        cap = cv2.VideoCapture(os.path.join(video_dir, video))
        state = (np.zeros([1, 256]), np.zeros([1, 256]))  # initial state

        while True:
            ret, frame = cap.read()

            if ret:
                frame = view.get_view(frame)
                frame = cv2.resize(frame, (84, 84))
Example #3
0
video_dir = 'sample_videos'
train_dir = os.path.join(video_dir, '320x160')
test_dir = os.path.join(video_dir, '3840x1920')
scanpath_h = os.path.join('datasets/Scanpaths_H', 'Scanpaths')
dataset = Sal360().read_scanpath_H()

for video, data in zip(sorted(os.listdir(test_dir)), dataset['train']):
    cap = cv2.VideoCapture(os.path.join(test_dir, video))
    ret, frame = cap.read()

    width, height = cap.get(cv2.CAP_PROP_FRAME_WIDTH), cap.get(
        cv2.CAP_PROP_FRAME_HEIGHT)

    Viewport = Viewport(width, height)
    input_shape = (Viewport.width, height, 3)

    model = dcn_resnet(input_shape)
    while True:
        ret, frame = cap.read()

        if ret:
            cv2.imshow('video', frame)
        else:
            break

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
Example #4
0
    def drqn(input_shape,
             action_size,
             learning_rate=0.001,
             backbone='mobilenet'):
        img_input = Input(shape=input_shape)
        adam = Adam(lr=learning_rate)

        if backbone == 'resnet':
            x = dcn_resnet(img_input, time_distributed=True)
            x = TimeDistributed(Flatten())(x)
            x = LSTM(512, activation='tanh')(x)
            x = Dropout(0.5)(x)
            x = Dense(action_size, activation='linear')(x)
            model = Model(inputs=img_input, outputs=x)
            model.compile(loss='mse', optimizer=adam)
        elif backbone == 'mobilenet':
            mobilenet = keras.applications.mobilenet_v2.MobileNetV2(
                include_top=False, weights=None, pooling='max')
            x = TimeDistributed(mobilenet)(img_input)
            x = TimeDistributed(Flatten())(x)
            x = LSTM(512, activation='tanh')(x)
            x = Dropout(0.5)(x)
            x = Dense(action_size, activation='linear')(x)
            model = Model(inputs=img_input, outputs=x)
            model.compile(loss='mse', optimizer=adam)
        elif backbone == 'cnn':
            x = TimeDistributed(
                Conv2D(32, kernel_size=8, strides=4,
                       activation='relu'))(img_input)
            x = TimeDistributed(
                Conv2D(64, kernel_size=4, strides=2, activation='relu'))(x)
            x = TimeDistributed(
                Conv2D(64, kernel_size=3, strides=1, activation='relu'))(x)
            x = TimeDistributed(Flatten())(x)
            x = LSTM(256, activation='tanh')(x)
            x = Dropout(0.5)(x)
            x = Dense(1, activation='sigmoid')(x)
            model = Model(inputs=img_input, outputs=x)
            model.compile(loss='mse', optimizer=adam)
        elif backbone == 'convLSTM':
            print(np.shape(img_input))
            # ?, ?, img_w, img_h, channels
            x = convLSTM(64, 3)(img_input)
            x = BatchNormalization()(x)

            x = convLSTM(64, 3)(x)
            x = BatchNormalization()(x)

            x = convLSTM(64, 3)(x)
            x = BatchNormalization()(x)

            x = convLSTM(64, 3, return_sequences=False)(x)
            x = BatchNormalization()(x)

            # x = Conv3D(filters=1, kernel_size=(3, 3, 3),
            #            activation=LeakyReLU(alpha=0.2),
            #            padding='same', data_format='channels_last')(x)
            x = Flatten()(x)
            x = Dropout(rate=0.5)(x)
            x = Dense(action_size, activation='linear')(x)
            model = Model(inputs=img_input, outputs=x)
            model.compile(loss='mse', optimizer='adam')
        elif backbone == '2.5D':
            base_cnn_model = keras.applications.mobilenet_v2.MobileNetV2(
                include_top=False, weights=None, pooling='max')
            temporal = TimeDistributed(base_cnn_model)(img_input)
            conv3d_analysis1 = Conv3D(filters=40,
                                      kernel_size=3,
                                      strides=3,
                                      padding='same')(temporal)
            conv3d_analysis2 = Conv3D(filters=40,
                                      kernel_size=3,
                                      strides=3,
                                      padding='same')(conv3d_analysis1)
            output = Flatten()(conv3d_analysis2)
            output = Dense(action_size, activation='tanh')(output)
            model = Model(inputs=img_input, output=output)
            model.compile(loss=keras.losses.binary_crossentropy,
                          optimizer='adam')

        else:
            raise ValueError("invalid value")
        model.summary()
        return model
Example #5
0
    def __init__(self):
        action_input = Input(shape=[None, 1])
        state_input = Input(shape=[None, 224, 224, 3])
        value_dcn = dcn_resnet()
        policy_dcn = dcn_resnet()  # 변수 공유? 다른 변수?
        value_lstm = CuDNNLSTM(256)(value_dcn, state_input)  # state-value : expected return
        policy_lstm = CuDNNLSTM(256)(policy_dcn)  # policy : agent's action selection
        self.value_model = Dense(1, activation='relu')(value_lstm)
        self.policy_model = Dense(1, activation='relu')(policy_lstm)

        self.action_max = 2
        self.conv1 = slim.conv2d(self.input_image,
                                 activation_fn=tf.nn.relu,
                                 num_outputs=32,
                                 kernel_size=[8, 8],
                                 stride=[4, 4],
                                 padding='VALID')
        self.conv2 = slim.conv2d(self.conv1,
                                 activation_fn=tf.nn.relu,
                                 num_outputs=64,
                                 kernel_size=[4, 4],
                                 stride=[2, 2],
                                 padding='VALID')
        self.conv3 = slim.convolution2d(
            inputs=self.conv2, num_outputs=64,
            kernel_size=[3, 3], stride=[1, 1], padding='VALID',
            activation_fn=tf.nn.relu)

        self.conv4 = slim.convolution2d(
            inputs=self.conv3, num_outputs=256,
            kernel_size=[7, 7], stride=[1, 1], padding='VALID',
            activation_fn=tf.nn.relu)
        hidden = slim.fully_connected(slim.flatten(self.conv4), 256,
                                      activation_fn=tf.nn.relu)

        # temporal dependency
        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(256, reuse=tf.AUTO_REUSE)
        c_init = np.zeros((1, lstm_cell.state_size.c), np.float32)
        h_init = np.zeros((1, lstm_cell.state_size.h), np.float32)

        self.state_init = [c_init, h_init]
        c_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.c])
        h_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.h])

        # self.state_in = (c_in, h_in)

        self.rnn_in = tf.expand_dims(hidden, [0])
        # step_size = tf.shape(self.imageIn[:1])  # 84 84 3
        self.state_in = tf.nn.rnn_cell.LSTMStateTuple(c_in, h_in)  # c --> hidden, h --> output

        lstm_outputs, self.lstm_state = tf.nn.dynamic_rnn(lstm_cell, self.rnn_in, initial_state=self.state_in,
                                                          time_major=False, scope="A3C")
        lstm_c, lstm_h = self.lstm_state
        self.state_out = (lstm_c[:1, :], lstm_h[:1, :])
        rnn_out = tf.reshape(lstm_outputs, [-1, 256])

        # self.policy = slim.fully_connected(rnn_out, a_size,
        #                                    activation_fn=tf.nn.relu,
        #                                    weights_initializer=normalized_columns_initializer(0.01),
        #                                    biases_initializer=None)
        #
        # hidden1 = tf.layers.dense(rnn_out, 16, activation=tf.nn.relu)
        # hidden2 = tf.layers.dense(hidden1, 16, activation=tf.nn.relu)
        # hidden3 = tf.layers.dense(hidden2, 16, activation=tf.nn.relu)

        self.policy = tf.layers.dense(rnn_out, 9, activation=tf.nn.relu)
        self.policy = tf.nn.softmax(self.policy)
        self.value = slim.fully_connected(rnn_out, 1,
                                          activation_fn=None,
                                          weights_initializer=normalized_columns_initializer(1.0),
                                          biases_initializer=None)

        self.true_val = tf.placeholder(tf.int32, shape=[9])
        # self.error = tf.reduce_mean(tf.square(self.true_val - self.policy))
        # self.train_op = tf.train.AdamOptimizer(0.001)
        self.error = tf.nn.softmax_cross_entropy_with_logits(labels=self.true_val, logits=self.policy)
        # self.error = tf.reduce_mean(tf.square(tf.subtract(self.true_val, self.policy)))
        self.train_op = tf.train.AdamOptimizer(0.01).minimize(self.error)

        self.saver = tf.train.Saver()