def unpack_all_params(all_params): layer_params = np.array_split(all_params[:sum(num_params_each_layer)], n_layers) pseudo_params = all_params[sum(num_params_each_layer):] x0, y0 = np.array_split(pseudo_params, 2) x0 = np.array_split(x0, n_layers) y0 = np.array_split(y0, n_layers) return layer_params, x0, y0
def unpack_all_params(all_params): layer_params = np.array_split(all_params[:sum(num_params_each_layer)], n_layers) pseudo_params = all_params[sum(num_params_each_layer):] x0, y0 = np.array_split(pseudo_params, 2) x0 = x0.reshape((n_layers,num_pseudo_params,input_dimension)) y0 = np.array_split(y0, n_layers) return layer_params, x0, y0
def __init__(self, x, dist=para.Weibull, **kwargs): self.m = kwargs.pop('m', 2) raw_data = {} c = kwargs.pop('c', None) n = kwargs.pop('n', None) raw_data['x'] = x raw_data['c'] = c raw_data['n'] = n self.raw_data = raw_data self.dist = dist x, c, n = surpyval.xcn_handler(x, c, n) assert len(x) > self.m * (self.dist.k + 1) self.x = x self.c = c self.n = n self.N = n.sum() splits_x = np.array_split(x, self.m) splits_c = np.array_split(c, self.m) splits_n = np.array_split(n, self.m) params = np.zeros(shape=(self.m, self.dist.k)) for i in range(self.m): params[i, :] = self.dist.fit(x=splits_x[i], c=splits_c[i], n=splits_n[i]).params self.params = params self.w = np.ones(shape=(self.m)) / self.m self.p = np.ones(shape=(self.m, len(self.x))) / self.m
def unpack_all_params(all_params): layer_params = np.array_split(all_params[:sum(num_params_each_layer)], n_layers) pseudo_params = all_params[sum(num_params_each_layer):] x0, y0 = np.array_split(pseudo_params, 2) x0 = x0.reshape((n_layers, num_pseudo_params, input_dimension)) y0 = np.array_split(y0, n_layers) return layer_params, x0, y0
def stratify_propensity(propensity_score,x1,B): ''' Outputs B mutually exclusive (equal sized) subgroups stratified based on the propensity score. B: number of subsets propensity_score = propensity values of the combined sample (x1,x2) ''' ind_1 = np.argsort(propensity_score[:len(x1[:,0])]) ind_2 = np.argsort(propensity_score[len(x1[:,0]):]) split_list_x1 = np.array_split(ind_1, B) split_list_x2 = np.array_split(ind_2, B) return split_list_x1, split_list_x2
def BinarySplit(z, j): D = z.shape[-1] d = D//2 if D % 2 == 1: d += (np.int(j) % 2) return np.array_split(z, [d], -1)
def kFoldsHelper(x, index, folds): if (index > folds-1) or (index < 0): raise IndexError('Index out of range of permitted folds') if folds < 2: raise ValueError('Insufficient number of folds') observations = x.shape[0] if observations < folds: raise IndexError('Cannot have more folds than observations') indices = [(observations/folds)*i for i in xrange(1,10)] splits = np.array_split(x, indices) test = splits.pop(index) return np.concatenate(splits), test
def build_tree(self, depth=2): """ builds the adjancey list up to depth of 2 """ total_nodes = np.sum([2 ** x for x in range(depth)]) nodes = list(range(total_nodes)) nodes_per_level = np.cumsum([2 ** x for x in range(depth - 1)]) nodes_level = [x.tolist() for x in np.array_split(nodes, nodes_per_level)] adj_list = dict((idx, {}) for idx in nodes) for fr in nodes_level[:-1]: for i in fr: i_list = adj_list.get(i, {}) # the connected nodes always follows this pattern i_list["left"] = i * 2 + 1 i_list["right"] = i * 2 + 2 adj_list[i] = i_list.copy() return adj_list
def make_batch_iter(X, batch_size, max_iter): N, D = X.shape n_batches = int(np.ceil(N / batch_size)) n_epochs = int(np.ceil(max_iter / n_batches)) idx = np.arange(N) batch_sched = [] for i in range(n_epochs): idx_shuffled = np.random.permutation(idx) batches = np.array_split(idx_shuffled, n_batches) batch_sched.append(batches) def get_batch(t): epoch = int(np.floor(t / n_batches)) batch = t % n_batches epoch_idx = batch_sched[epoch] idx_batch = epoch_idx[batch] return X[idx_batch].reshape(len(idx_batch), D) return get_batch
def apply_net_st(self, params, inputs): inpW, inpb = params[0] inputs = utils.leakyrelu(np.dot(inputs, inpW) + inpb) for W, b in params[1:-1]: outputs = np.dot(inputs, W) + b inputs = utils.leakyrelu(outputs) outW, outb = params[-1] outputs = np.dot(inputs, outW) + outb assert(outputs.shape[:-1] == inputs.shape[:-1]) assert(outputs.shape[-1] % 2 == 0) s, t = np.array_split(outputs, 2, -1) assert(s.shape == t.shape) return utils.tanh(s), t
def fit(self, X): # Encoder weight & bias self.W = np.random.randn(X.shape[1], self.code_size) self.b = np.full(self.code_size, 0.1) # Decoder weight & bias self.W_prime = np.random.randn(self.code_size, X.shape[1]) self.b_prime = np.full(X.shape[1], 0.1) # Group model parameters for later optimizations params = [self.W, self.b, self.W_prime, self.b_prime] # Make batches out of datasets batches = np.array_split(X, X.shape[0] // self.batch_size) # set the objective function def objective(params, step): self.W, self.b, self.W_prime, self.b_prime = params chunk = batches[int(step % len(batches))] C = self.encode(chunk) X_prime = self.decode(C) return rmse(chunk, X_prime) # Compute gradient of model parameters. Yes, we are not doing manual # partial differentiation. No one sane does. objective_grad = grad(objective) # See? Science. max_epoch = 500 def callback(params, step, g): if step % max_epoch == 0: print("Iteration {0:3d} objective {1:1.2e}". format(step//max_epoch + 1, objective(params, step))) # The real optimization goes here params = adam(objective_grad, params, step_size = 0.01, num_iters = 50 * max_epoch, callback = callback)
def unpack_layer_params(params): gp_params = np.array_split(params, output_dimension) # assuming all parameters have equal dims, change to what we had below return gp_params
def unpack_all_params(all_params): all_layer_params = np.array_split(all_params,np.cumsum(num_params_each_layer)) return all_layer_params
def batches(data, batch_size=100): n = data.shape[0] return np.array_split(data, n / batch_size)
def unpack_layer_params(params): gp_params = np.array_split(params, np.cumsum(num_params_each_output)) return gp_params
def fit(self, X, X_tar, y, y_tar, max_iter=500, warm_start=False, use_dropout=False, desc='', regularize=True): m = X.shape[0] n_x = X.shape[1] print(n_x) n_class_src = len(set(y)) if len(set(y_tar)) > 0: n_class_tar = len(set(y_tar)) m_tar = X_tar.shape[0] if not warm_start: ''' weight and bias initialization''' # shared weights self.W1 = np.random.randn(self.nn_hidden, n_x) self.b1 = np.zeros((self.nn_hidden, 1)) # task 1 (source) specific weights self.task_1 = Task(self.nn_hidden, n_class_src, self.learning_rate, m, self.T) # task 2 (target) specific weights self.task_2 = Task(self.nn_hidden, n_class_src, self.learning_rate, m, self.T) X_shuf, y_shuf = shuffle(X, y) if len(y_tar) > 0: X_tar_shuf, y_tar_shuf = shuffle(X_tar, y_tar) # transform labels into one-hot vectors le = LabelBinarizer() le.fit(list(y) + list(y_tar)) if len(y_tar) > 0: le_tar = LabelBinarizer() le_tar.fit(y_tar) bs = np.min([self.batch_size, X_shuf.shape[0]]) batches_X = np.array_split(X_shuf, m / bs) batches_y = np.array_split(y_shuf, m / bs) tasks_1 = [1 for i in range(len(batches_y))] batches_X_tar = np.array([]) batches_y_tar = np.array([]) if len(y_tar) > 0: batches_X_tar = np.array_split(X_tar_shuf, max(1, m_tar / self.batch_size)) batches_y_tar = np.array_split(y_tar_shuf, max(1, m_tar / self.batch_size)) tasks_2 = [2 for i in range(len(batches_y_tar))] # TO DO: hstack source and target batches in alternating way all_batches_X = list(itertoolz.interleave([batches_X, batches_X_tar]))[::-1] all_batches_y = list(itertoolz.interleave([batches_y, batches_y_tar]))[::-1] all_tasks = list(itertoolz.interleave([tasks_1, tasks_2]))[::-1] def get_batch(step): idx = step % len(all_tasks) task = all_tasks[idx] X_new = all_batches_X[idx].T y_new = all_batches_y[idx] y_new = le.transform(y_new) y_new = y_new.T return X_new, y_new, task def batch_normalize(W): mu = np.mean(W, axis=0) var = np.var(W, axis=0) W = (W - mu) / np.sqrt(var + 1) return W def bhattacharyya(a, b): """ Bhattacharyya distance between distributions (lists of floats). """ if not len(a) == len(b): raise ValueError("a and b must be of the same size") return -np.log(sum((np.sqrt(u * w) for u, w in zip(a, b)))) def model_loss(params, step): W, b1, W2_1, b2_1, W2_2, b2_2 = params W_norm = W #batch_normalize(W) # W2_1 = batch_normalize(W2_1) # W2_2 = batch_normalize(W2_2) X, y, task = get_batch(step) prod = W_norm @ X + b1 nonlin = relu(prod) if use_dropout: nonlin *= np.random.binomial( [np.ones((len(prod), nonlin.shape[1]))], 1 - self.dropout_percent)[0] * (1.0 / (1 - self.dropout_percent)) if task == 1: out = (W2_1 @ nonlin) + b2_1 else: out = (W2_2 @ nonlin) + b2_2 prob = np.exp(out / self.T) / np.sum(np.exp(out / self.T)) L = loss(y, prob) # task relatedness if regularize: a_bar = (flatten(self.task_1.W)[0] + flatten(self.task_2.W)[0]) / 2 a_bar_norm = np.linalg.norm(a_bar, 2) source_norm = np.linalg.norm( flatten(self.task_1.W)[0] - a_bar, 2) tar_norm = np.linalg.norm(flatten(self.task_2.W)[0] - a_bar, 2) reg = a_bar_norm + 0.1 * (source_norm + tar_norm) / 2 else: reg = 0 # bhattacharya penalty P_s_prime = relu(((W_norm @ X_shuf.T) + b1)).T.mean(axis=0) P_t_prime = relu(((W_norm @ X_tar_shuf.T) + b1)).T.mean(axis=0) P_s = P_s_prime / (np.sum(P_s_prime)) P_t = P_t_prime / (np.sum(P_t_prime)) m = np.multiply(P_s, P_t) bt_distance = -(np.log(np.sum(P_s * P_t))) return L + 0.3 * bt_distance #+ 0.3 * reg params = [ self.W1, self.b1, self.task_1.W, self.task_1.b, self.task_2.W, self.task_2.b ] model_loss_grad = grad(model_loss) max_epoch = 500 def callback(params, step, g): if step % max_epoch == 0: print("Iteration {0:3d} objective {1:1.2e}; task {2}".format( step // max_epoch + 1, model_loss(params, step), '-')) self.W1, self.b1, self.task_1.W, self.task_1.b, self.task_2.W, self.task_2.b = adam( model_loss_grad, params, step_size=self.learning_rate, num_iters=30 * max_epoch, callback=callback) return self
net = nn.add_forward(net, 1, nn.sigmoid) learning_rate = 0.001 weights = nn.init_weights(net) batch_size = 64 chunks = int(len(train_xs) / batch_size) def cost(yhat,y ): eps = 1e-18 loss = -(y * np.log(yhat + eps) + (1-y) * np.log(1-yhat + eps)) m = yhat.shape[1] cost = np.squeeze(np.mean(loss,axis=1)) return cost for epoch in range(0,1000): losses = [] for item in np.array_split(list(zip(train_xs, train_ys)), chunks): batch_xs = [ i[0] for i in item] batch_ys = [ i[1] for i in item] batch_xs = np.transpose(batch_xs) batch_ys = np.transpose(batch_ys).reshape( (1,-1) ) c, weights = nn.grad_descent(batch_xs, batch_ys, cost, net, weights, learning_rate = learning_rate) losses.append(c) print('epoch %d is loss %f' % (epoch, np.mean(losses) ) ) print('os loss', cost( nn.forward_pass(test_xs, net, weights), test_ys ) ) print('loss', l(weights))