Esempio n. 1
0
def create_column(col_names, self_i, state, col_hiddens):
    print("creating column %i" % self_i)

    arch = [
        [8, constants.history_frames, 16, 4],  # size, in, out, stride
        [4, 16, 32, 2],
        [256],
        -1
    ]

    train_vars = []
    lats = [] #k, i
    c_lats = []

    if self_i > 0:
        with tf.variable_scope("laterals"):
            print("creating lateral connections to column %i" % self_i)

            for col_i in range(self_i):
                hiddens = col_hiddens[col_i]

                print("##" + str(len(col_hiddens[col_i])))

                col_lats = []

                print("creating laterals %i -> %i" % (col_i, self_i))

                with tf.variable_scope("%s_to_%s" % (col_names[col_i], col_names[self_i])):
                    for layer_i in range(len(hiddens)):
                        layer_lats = []
                        print("###" + str(layer_i))
                        dest_h_shape = arch[layer_i + 1]

                        with tf.variable_scope("layer%ito%i" % (layer_i, layer_i+1)):
                            orig_h = hiddens[layer_i]#tf.stop_gradient(hiddens[layer_i]) #origin

                            print("layer %i -> %i" % (layer_i, layer_i + 1))

                            if dest_h_shape == -1: # to policy and value layer
                                with tf.variable_scope("policy"):
                                    lat_h_p, lat_vars_p = lateral_connection(orig_h, [FLAGS.action_size], self_i)
                                with tf.variable_scope("value"):
                                    lat_h_v, lat_vars_v = lateral_connection(orig_h, [1], self_i)

                                layer_lats.append(lat_h_p)
                                layer_lats.append(lat_h_v)
                                train_vars.extend(lat_vars_p)
                                train_vars.extend(lat_vars_v)
                            else:
                                lat_h, lat_vars = lateral_connection(orig_h, dest_h_shape, self_i, arch[layer_i + 1])

                                layer_lats.append(lat_h)
                                train_vars.extend(lat_vars)

                            col_hiddens[col_i][layer_i] = orig_h

                        col_lats.append(layer_lats)
                lats.append(col_lats)

        #print("columns: %i" % (len(lats) + 1))
        #print("hidden layers: %i" % (len(lats[0])))
        #print("hidden shapes: %s" % col_hiddens[0])

        #concatenate same-layer lateral connections
        for i in range(len(lats[0])):
            if arch[i+1] == -1:
                to_policy_list = [lats[k][i][0] for k in range(len(lats))]
                to_value_list = [lats[k][i][1] for k in range(len(lats))]
                to_policy = tf.reduce_sum(to_policy_list, 0)
                to_value = tf.reduce_sum(to_value_list, 0)

                c_lats.append([to_policy, to_value])

                print("summing ->policy and ->value layers")
                print(to_policy_list)
                print("=>")
                print(to_policy)
                print("&")
                print(to_value_list)
                print("=>")
                print(to_value)
            else:
                h_list = [lats[k][i][0] for k in range(len(lats))]

                if len(arch[i+1]) > 1:
                    c = tf.reduce_sum(h_list, 0)
                    c_lats.append(c)
                    print("summing convolutional layers")
                    print(h_list)
                    print("=>")
                    print(c)
                else:
                    c = tf.reduce_sum(h_list, 0)
                    c_lats.append(c)
                    print("summing fully connected layers")
                    print(h_list)
                    print("=>")
                    print(c)

            print("~~~")

    print("done summing layers")
    #print("c lats:")
    #print(c_lats)

    def add_lat(layer, i, act=tf.nn.relu):

        if self_i <= 0:
            if act is None:
                return layer[0], layer[1], layer[2]
            else:
                return act(layer[0]), layer[1], layer[2]
        elif len(i) == 1:
            print("adding %s and %s" % (layer[0], c_lats[i[0]]))
            return act(layer[0]+c_lats[i[0]]), layer[1], layer[2]
        else:
            if act is None:
                print("(value) adding %s and %s" % (layer[0], c_lats[i[0]][i[1]]))
                return layer[0] + c_lats[i[0]][i[1]], layer[1], layer[2]
            else:
                print("(policy) adding %s and %s" % (layer[0], c_lats[i[0]][i[1]]))
                return act(layer[0]+c_lats[i[0]][i[1]]), layer[1], layer[2]

    train = self_i == len(constants.tasks)-1
    print("column trainable: %s" % train)

    with tf.variable_scope(col_names[self_i]):
        #resized = tf.image.resize_images(state, 84, 84)

        c1, w1, b1 = tfc.conv2d("c1", state, arch[0][1], arch[0][2], size=arch[0][0], stride=arch[0][3], trainable=train)
        c2, w2, b2 = add_lat(tfc.conv2d("c2", c1, arch[1][1], arch[1][2], size=arch[1][0], stride=arch[1][3], act=None, trainable=train), [0])

        c2_size = np.prod(c2.get_shape().as_list()[1:])
        c2_flat = tf.reshape(c2, [-1, c2_size])

        if self_i <= 0:
            h_fc1, w3, b3 = tfc.fc("fc1", c2_flat, c2_size, arch[2][0], trainable=train)
        else:
            h_fc1, w3, b3 = tfc.fc("fc1", c2_flat, c2_size, arch[2][0], act=None, trainable=train)

            lat = c_lats[1]
            print("adding %s and %s" % (h_fc1, lat))
            lat_size = np.prod(lat.get_shape().as_list()[1:])
            lat_flat = tf.reshape(lat, [-1, lat_size])
            h_fc1 = tf.nn.relu(h_fc1 + lat_flat)

        pi, wp, bp = add_lat(tfc.fc("p_fc", h_fc1, arch[2][0], FLAGS.action_size, act=None, trainable=train), [2, 0], tf.nn.softmax)
        v_, wv, bv = add_lat(tfc.fc("v_fc", h_fc1, arch[2][0], 1, act=None, trainable=train), [2, 1], None)

        v = tf.reshape(v_, [-1])

        train_vars.extend([w1, b1, w2, b2, w3, b3, wp, bp, wv, bv])

        col_vars = pi, v, train_vars, [c1, c2, h_fc1]

        print("policy: %s" % pi)
        print("last fc: %s" % h_fc1)
        print("wp: %s" % wp.name)

        print("created column %i." % self_i)

        return col_vars
Esempio n. 2
0
def lateral_connection(orig_hidden, dest_shape, self_i, current_op_shape=None):


    print("adapter origin: %s" % orig_hidden.name)
    train = self_i == len(constants.tasks)-1
    #print(self_i)
    #print(len(constants.tasks)-1)
    print("lateral trainable: %s" % train)
    nonlinear = True

    omit_b = True

    a = tf.get_variable(name="adapter", shape=[1], initializer=tf.constant_initializer(1), trainable=train)
    ah = tf.mul(a, orig_hidden)

    if nonlinear:
        if len(orig_hidden.get_shape().as_list()) == 4:
            maps_in = ah.get_shape().as_list()[3]
            nic = int(maps_in / (2.0 * (self_i)))
            lateral, w1, b1 = tfc.conv2d("V", ah, maps_in, nic, size=1, stride=1, trainable=train)  # reduction (keep bias)

            print("1) conv 1x1: %s" % w1.get_shape())

            if len(dest_shape) > 1:   # conv layer to conv layer
                lateral, w2, _ = tfc.conv2d("U", lateral, nic, current_op_shape[2], size=current_op_shape[0],
                                           stride=current_op_shape[3], act=None, omit_bias=omit_b, padding="SAME", trainable=train)
                print("2) conv 1x1: %s" % w2.get_shape())
                print("end result: %s" % lateral.name)

                return lateral, [w1, b1, w2]

            else:  # conv layer to fc layer
                c_size = np.prod(lateral.get_shape().as_list()[1:])
                c_flat = tf.reshape(lateral, [-1, c_size])
                lateral, w2, _ = tfc.fc("U", c_flat, c_size, dest_shape[0], act=None, omit_bias=omit_b, trainable=train)
                print("2) flattened conv fc: %s" % w2.get_shape())
                print("end result: %s" % lateral.name)

                return lateral, [w1, b1, w2]

        else:  # fc layer to fc layer
            n_in = ah.get_shape().as_list()[1]
            ni = int(n_in / (2.0 * (self_i)))
            lateral, w1, b1 = tfc.fc("V", ah, n_in, ni, trainable=train)  # reduction (keep bias)
            print("1) fc: %s" % w1.get_shape())
            lateral, w2, _ = tfc.fc("U", lateral, ni, dest_shape[0], act=None, omit_bias=omit_b, trainable=train) # to be added to next hidden
            print("2) fc: %s" % w2.get_shape())
            print("end result: %s" % lateral.name)

            return lateral, [w1, b1, w2]
    else:
        if len(orig_hidden.get_shape().as_list()) == 4:
            maps_in = ah.get_shape().as_list()[3]

            if len(dest_shape) > 1:   # conv layer to conv layer
                lateral, w2, _ = tfc.conv2d("U", ah, maps_in, current_op_shape[2], size=current_op_shape[0],
                                           stride=current_op_shape[3], act=None, omit_bias=omit_b, padding="SAME", trainable=train)
                return lateral, [w2]

            else:  # conv layer to fc layer
                c_size = np.prod(ah.get_shape().as_list()[1:])
                c_flat = tf.reshape(ah, [-1, c_size])
                lateral, w2, _ = tfc.fc("U", c_flat, c_size, dest_shape[0], act=None, omit_bias=True, trainable=train)
                return lateral, [w2]

        else:  # fc layer to fc layer
            n_in = ah.get_shape().as_list()[1]
            lateral, w2, _ = tfc.fc("U", ah, n_in, dest_shape[0], act=None, omit_bias=True, trainable=train) # to be added to next hidden
            return lateral, [w2]
Esempio n. 3
0
def create_column(col_names, i_task, state, col_hiddens):
    print("creating column {}".format(i_task))
    arch = [
        [8, HIST_FRM, 16, 4],  # size, in, out, stride
        [4, 16, 32, 2],
        [256],
        -1
    ]
    train_vars = []
    lats = []
    c_lats = []
    # If current task is not task 0, build laterals
    if i_task > 0:
        with tf.variable_scope("laterals"):
            print("creating lateral connections to column {}".format(i_task))
            # From task 0 to current task
            for i_col in range(i_task):
                hiddens = col_hiddens[i_col]
                print("##{}".format(len(hiddens)))
                col_lats = []
                print("creating laterals {} -> {} ".format(i_col, i_task))
                with tf.variable_scope("{}_to_{}".format(
                        col_names[i_col], col_names[i_task])):

                    # From the first layer to last layer
                    for i_layer in range(len(hiddens)):
                        layer_lats = []
                        print("###Layer {}".format(i_layer))
                        dest_h_shape = arch[
                            i_layer + 1]  # The shape of h in destination layer

                        with tf.variable_scope("layer{}to{}".format(
                                i_layer, i_layer + 1)):
                            orig_h = hiddens[i_layer]
                            #OLD: tf.stop_gradient(hiddens[i_layer]) #origin
                            print("layer {} -> {}".format(
                                i_layer, i_layer + 1))
                            if dest_h_shape == -1:  # to policy and value layer
                                with tf.variable_scope("policy"):
                                    lat_h_p, lat_vars_p = lateral_connection(
                                        orig_h, [FLAGS.action_size], i_task)
                                with tf.variable_scope("value"):
                                    lat_h_v, lat_vars_v = lateral_connection(
                                        orig_h, [1], i_task)
                                layer_lats.append(lat_h_p)
                                layer_lats.append(lat_h_v)
                                train_vars.extend(lat_vars_p)
                                train_vars.extend(lat_vars_v)
                            else:
                                lat_h, lat_vars = lateral_connection(
                                    orig_h, dest_h_shape, i_task,
                                    arch[i_layer + 1])

                                layer_lats.append(lat_h)
                                train_vars.extend(lat_vars)

                            col_hiddens[i_col][i_layer] = orig_h

                        col_lats.append(layer_lats)
                lats.append(col_lats)

        #print("columns: %i" % (len(lats) + 1))
        #print("hidden layers: %i" % (len(lats[0])))
        #print("hidden shapes: %s" % col_hiddens[0])

        #concatenate same-layer lateral connections
        for i in range(len(lats[0])):
            if arch[i + 1] == -1:
                to_policy_list = [lats[k][i][0] for k in range(len(lats))]
                to_value_list = [lats[k][i][1] for k in range(len(lats))]
                to_policy = tf.reduce_sum(to_policy_list, 0)
                to_value = tf.reduce_sum(to_value_list, 0)

                c_lats.append([to_policy, to_value])

                print("summing ->policy and ->value layers")
                print(to_policy_list)
                print("=>")
                print(to_policy)
                print("&")
                print(to_value_list)
                print("=>")
                print(to_value)
            else:
                h_list = [lats[k][i][0] for k in range(len(lats))]

                if len(arch[i + 1]) > 1:
                    c = tf.reduce_sum(h_list, 0)
                    c_lats.append(c)
                    print("summing convolutional layers")
                    print(h_list)
                    print("=>")
                    print(c)
                else:
                    c = tf.reduce_sum(h_list, 0)
                    c_lats.append(c)
                    print("summing fully connected layers")
                    print(h_list)
                    print("=>")
                    print(c)

            print("~~~")

    print("done summing layers")

    #print("c lats:")
    #print(c_lats)

    def add_lat(layer, i, act=tf.nn.relu):

        if i_task <= 0:
            if act is None:
                return layer[0], layer[1], layer[2]
            else:
                return act(layer[0]), layer[1], layer[2]
        elif len(i) == 1:
            print("adding {} and {}".format(layer[0], c_lats[i[0]]))
            return act(layer[0] + c_lats[i[0]]), layer[1], layer[2]
        else:
            if act is None:
                print("(value) adding {} and {}".format(
                    layer[0], c_lats[i[0]][i[1]]))
                return layer[0] + c_lats[i[0]][i[1]], layer[1], layer[2]
            else:
                print("(policy) adding {} and {}".format(
                    layer[0], c_lats[i[0]][i[1]]))
                return act(layer[0] + c_lats[i[0]][i[1]]), layer[1], layer[2]

    train = i_task == len(constants.tasks) - 1
    print("column trainable: {}".format(train))

    with tf.variable_scope(col_names[i_task]):
        #resized = tf.image.resize_images(state, IMG_SIZE, IMG_SIZE)

        c1, w1, b1 = tfc.conv2d("c1",
                                state,
                                arch[0][1],
                                arch[0][2],
                                size=arch[0][0],
                                stride=arch[0][3],
                                trainable=train)
        c2, w2, b2 = add_lat(
            tfc.conv2d("c2",
                       c1,
                       arch[1][1],
                       arch[1][2],
                       size=arch[1][0],
                       stride=arch[1][3],
                       act=None,
                       trainable=train), [0])

        c2_size = np.prod(c2.get_shape().as_list()[1:])
        c2_flat = tf.reshape(c2, [-1, c2_size])

        if i_task <= 0:
            h_fc1, w3, b3 = tfc.fc("fc1",
                                   c2_flat,
                                   c2_size,
                                   arch[2][0],
                                   trainable=train)
        else:
            h_fc1, w3, b3 = tfc.fc("fc1",
                                   c2_flat,
                                   c2_size,
                                   arch[2][0],
                                   act=None,
                                   trainable=train)

            lat = c_lats[1]
            print("adding {} and {}".format(h_fc1, lat))
            lat_size = np.prod(lat.get_shape().as_list()[1:])
            lat_flat = tf.reshape(lat, [-1, lat_size])
            h_fc1 = tf.nn.relu(h_fc1 + lat_flat)

        pi, wp, bp = add_lat(
            tfc.fc("p_fc",
                   h_fc1,
                   arch[2][0],
                   FLAGS.action_size,
                   act=None,
                   trainable=train), [2, 0], tf.nn.softmax)
        v_, wv, bv = add_lat(
            tfc.fc("v_fc", h_fc1, arch[2][0], 1, act=None, trainable=train),
            [2, 1], None)

        v = tf.reshape(v_, [-1])

        train_vars.extend([w1, b1, w2, b2, w3, b3, wp, bp, wv, bv])

        col_vars = pi, v, train_vars, [c1, c2, h_fc1]

        print("policy: {}".format(pi))
        print("last fc: {}".format(h_fc1))
        print("wp: {}".format(wp.name))
        print("created column {}.".format(i_task))

        return col_vars