def DAEGO(X_s,H,P,batch_range): """ Parameters ---------- X_s: small class features H : layers (first layers shoud have same neurons as number of features) P : percent oversampling batch_range : size of minibatch Returns ------- syn_Z: synthetic sample with same number of features as smaller class """ #normalization scaler=StdScaler() x_tr=scaler.fit_transform(X_s.astype(float)) x_norm=norm(x_tr,axis=0) n_samples=int(X_s.shape[0]*P/100) print "generating %d samples" %(n_samples) norm_param=[LA.norm(x) for x in x_tr.T] X_init=np.random.standard_normal(size=(n_samples,X_s.shape[1])) x_init_tr=scaler.transform(X_init) x_ini_norm=norm(x_init_tr) ae=autoencoder(dimensions=H) learning_rate = 0.001 optimizer = tf.train.AdamOptimizer(learning_rate).minimize(ae['cost']) sess = tf.Session() sess.run(tf.initialize_all_variables()) n_epoch=100 for epoch_i in range(n_epoch): for start, end in zip(range(0, len(x_norm), batch_range),range(batch_range, len(x_norm), batch_range)): input_ = x_norm[start:end] sess.run(optimizer, feed_dict={ae['x']: input_, ae['corrupt_prob']: [1.0]}) s="\r Epoch: %d Cost: %f"%(epoch_i, sess.run(ae['cost'], feed_dict={ae['x']: X_s, ae['corrupt_prob']: [1.0]})) stderr.write(s) stderr.flush() x_init_encoded = sess.run(ae['y'], feed_dict={ae['x']: x_ini_norm, ae['corrupt_prob']: [0.0]}) sess.close() x_init_norminv=np.multiply(x_init_encoded,norm_param) syn_Z=scaler.inverse_transform(x_init_norminv) return syn_Z
"Clus_Coeff", ] labels = dict((x, y + 1) for x, y in zip(all_measurement_labels, range(len(all_measurement_labels)))) measurement_labels = ["Betweenness", "Closeness", "#BCC", "Degree", "Wt_Degree", "Clus_Coeff"] pruned_labels = dict((x, y) for x, y in zip(all_measurement_labels, range(len(measurement_labels)))) cikm_node_mapping = {} for i, node_id in enumerate(id_cikm): cikm_node_mapping[node_id] = i p, q = m_cikm.shape normalized_measurements_cikm = np.zeros((p, q)) normalized_measurements_cikm[:, 0] = m_cikm[:, 0] normalized_measurements_cikm[:, 1:] = norm(m_cikm[:, 1:], norm="l2", axis=0) author_nr_vector = nr_cikm[cikm_node_mapping[names_cikm[author_name]], :] author_nr_idx = cikm_node_mapping[names_cikm[author_name]] cosine_values = [] for idx, node_id in enumerate(id_cikm): if idx == author_nr_idx: continue sim = cosine_similarity(author_nr_vector, nr_cikm[idx, :]) cosine_values.append((idx, sim)) sorted_cosine = sorted(cosine_values, key=lambda x: x[1], reverse=True) author_ranks = get_measurements(normalized_measurements_cikm, names_cikm[author_name], labels)
''' print("Data Summary\n", dataset.describe()) # Gathering Feature Matrix and vector of prediction X = dataset.iloc[:, [0, 1, 2, 4, 5]].values y = dataset.iloc[:, 3].values # Deaths # Not splitting into train test set # Label encoding Categorical valus lab = LB() X[:, 0] = lab.fit_transform(X[:, 0].astype(str)) X[:, 1] = lab.fit_transform(X[:, 1].astype(str)) #normalizing X = norm(X) #Building the model # Linear Regression lin_reg = LinearRegression() lin_reg.fit(X, y) print("Feature : Coefficient ") for coeff, feature in zip(lin_reg.coef_, dataset.columns[[0, 1, 2, 4, 5]]): print(feature, ' : ', coeff) print("Intercept : ", lin_reg.intercept_) # Polynomial Regression poly = PolynomialFeatures(2)
def ball_move(self): tmp_reward = 0.01 # move pre_pos = np.copy(self.ballPos) self.ballPos += self.ballSpeed * self.ballVector # touchwall if self.ballPos[0] <= self.ballSize: self.ballPos[0] = float(self.ballSize) self.ballVector[0] *= -1 elif self.ballPos[0] >= (self.windowWidth - self.ballSize): self.ballPos[0] = self.windowWidth - self.ballSize self.ballVector[0] *= -1 if self.ballPos[1] <= self.ballSize: self.ballPos[1] = float(self.ballSize) self.ballVector[1] *= -1 elif self.ballPos[1] >= (self.windowHeight - self.ballSize): return True, -1 # breakbrick for i in range(self.bricky): for j in range(self.brickx): if self.brick[i][j] == 1: # touch from right if (self.brickRect[i][j].right - self.ballSize) < self.ballPos[0] <= ( self.brickRect[i][j].right + self.ballSize) and pre_pos[0] > self.ballPos[0] and \ self.brickRect[i][j].top <= self.ballPos[1] < self.brickRect[i][j].bottom: self.ballVector[0] *= -1 self.brick[i][j] = 0 # touch from left if (self.brickRect[i][j].right + self.ballSize) > self.ballPos[0] >= ( self.brickRect[i][j].left - self.ballSize) \ and pre_pos[0] < self.ballPos[0] and self.brickRect[i][j].bottom > self.ballPos[1] >= \ self.brickRect[i][j].top: self.ballVector[0] *= -1 self.brick[i][j] = 0 # touch from bottom if (self.brickRect[i][j].bottom - self.ballSize) < self.ballPos[1] <= ( self.brickRect[i][j].bottom + self.ballSize) \ and pre_pos[1] > self.ballPos[1] and self.brickRect[i][j].right > self.ballPos[0] >= \ self.brickRect[i][j].left: self.ballVector[1] *= -1 self.brick[i][j] = 0 # touch from top if (self.brickRect[i][j].top + self.ballSize) > self.ballPos[1] >= ( self.brickRect[i][j].top - self.ballSize) \ and pre_pos[1] < self.ballPos[1] and self.brickRect[i][j].right > self.ballPos[0] >= \ self.brickRect[i][j].left: self.ballVector[1] *= -1 self.brick[i][j] = 0 tmp_vector = np.copy(self.ballVector) # touchpad ball_rect = pygame.Rect(self.ballPos[0] - self.ballSize, self.ballPos[1] - self.ballSize, self.ballSize * 2, self.ballSize * 2) if pygame.Rect.colliderect(ball_rect, self.barRect): tmp_vector = norm([self.ballPos - np.array(self.barRect.center) ]).ravel() if tmp_vector[1] >= 0: tmp_vector = norm([[tmp_vector[0], 1]]).ravel() elif tmp_vector[1] > -0.5: if tmp_vector[0] > 0: tmp_vector += norm(np.array([[0.86, -0.5] ])).ravel() - tmp_vector else: tmp_vector += norm(np.array([[-0.86, -0.5] ])).ravel() - tmp_vector tmp_reward = 0.1 self.ballVector = tmp_vector if np.sum(self.brick) > 0: return False, tmp_reward else: return True, 1
return j12 if __name__ == "__main__": from extensions.twpg_wavefront import Wavefront for itr in range(1): if itr == 0: wfr = Wavefront() wfr.load_hdf5( r"/nfs/data/users/twg/gsmProp/atSource/wfr_mode_{}.hdf5". format(itr)) eField = copy(wfr.data.arrEhor) eField[:, :, 0, 0] = norm(np.nan_to_num(eField[:, :, 0, 0])) else: wfr = Wavefront() wfr.load_hdf5( r"/nfs/data/users/twg/gsmProp/atSource/wfr_mode_{}.hdf5". format(itr)) eField[:, :, 0, 0] += norm(np.nan_to_num(wfr.data.arrEhor[:, :, 0, 0])) eField[:, :, 0, 1] += copy(wfr.data.arrEhor[:, :, 0, 1]) x, y, z, c = eField.shape fixPhase(eField) E = np.zeros((x, y, 1)).astype(complex) E[:, :, 0] += eField[:, :, 0, 0] E[:, :, 0] += eField[:, :, 0, 1] * 1j
measurement_labels = [ 'Betweenness', 'Closeness', '#BCC', 'Degree', 'Wt_Degree', 'Clus_Coeff' ] pruned_labels = dict((x, y) for x, y in zip( all_measurement_labels, range(len(measurement_labels)))) cikm_node_mapping = {} for i, node_id in enumerate(id_cikm): cikm_node_mapping[node_id] = i p, q = m_cikm.shape normalized_measurements_cikm = np.zeros((p, q)) normalized_measurements_cikm[:, 0] = m_cikm[:, 0] normalized_measurements_cikm[:, 1:] = norm(m_cikm[:, 1:], norm='l2', axis=0) author_nr_vector = nr_cikm[ cikm_node_mapping[names_cikm[author_name]], :] author_nr_idx = cikm_node_mapping[names_cikm[author_name]] cosine_values = [] for idx, node_id in enumerate(id_cikm): if idx == author_nr_idx: continue sim = cosine_similarity(author_nr_vector, nr_cikm[idx, :]) cosine_values.append((idx, sim)) sorted_cosine = sorted(cosine_values, key=lambda x: x[1], reverse=True)
def delt(beta, vect): # use optimization to find the minimum positive delta with constraints # (1-d)||b||_2^2 <= ||Xb||_2^2 <= (1+d)||b||_2^2 return np.abs(np.sum(np.square(np.dot(vect, beta))) - 1) # get the data data = np.genfromtxt(csv_path, delimiter=",") # remove rows that have NaN values (not ideal but IDGAF yet) data = data[~np.isnan(data).any(axis=1)] # from stack overflow: https://bit.ly/1QhfcmZ Y = np.array([x[1] - 1 for x in data]) # y values in the second column X = np.array([x[2:] for x in data]) del x del data stan = ss() x_norm = norm(stan.fit_transform(X.astype("float")), axis=0) n_feat = np.size(X, axis=1) n_row = np.size(X, axis=0) del X del Y # test instances if test_runs: x_norm = norm(stan.fit_transform(np.random.normal(size=(n_row, n_feat))), axis=0) results = np.zeros((num_runs, max_s)) d_quant = np.zeros((max_s, 4)) while run and b <= max_s: print b
def DAEGO(X_s, H, P, batch_range): """ Parameters ---------- X_s: small class features H : layers (first layers shoud have same neurons as number of features) P : percent oversampling batch_range : size of minibatch Returns ------- syn_Z: synthetic sample with same number of features as smaller class """ #normalization scaler = StdScaler() x_tr = scaler.fit_transform(X_s.astype(float)) x_norm = norm(x_tr, axis=0) n_samples = int(X_s.shape[0] * P / 100) print "generating %d samples" % (n_samples) norm_param = [LA.norm(x) for x in x_tr.T] X_init = np.random.standard_normal(size=(n_samples, X_s.shape[1])) x_init_tr = scaler.transform(X_init) x_ini_norm = norm(x_init_tr) ae = autoencoder(dimensions=H) learning_rate = 0.001 optimizer = tf.train.AdamOptimizer(learning_rate).minimize(ae['cost']) sess = tf.Session() sess.run(tf.initialize_all_variables()) n_epoch = 100 for epoch_i in range(n_epoch): for start, end in zip(range(0, len(x_norm), batch_range), range(batch_range, len(x_norm), batch_range)): input_ = x_norm[start:end] sess.run(optimizer, feed_dict={ ae['x']: input_, ae['corrupt_prob']: [1.0] }) s = "\r Epoch: %d Cost: %f" % (epoch_i, sess.run(ae['cost'], feed_dict={ ae['x']: X_s, ae['corrupt_prob']: [1.0] })) stderr.write(s) stderr.flush() x_init_encoded = sess.run(ae['y'], feed_dict={ ae['x']: x_ini_norm, ae['corrupt_prob']: [0.0] }) sess.close() x_init_norminv = np.multiply(x_init_encoded, norm_param) syn_Z = scaler.inverse_transform(x_init_norminv) return syn_Z
'Degree', 'Wt_Degree', 'Clus_Coeff'] labels = dict((x, y + 1) for x, y in zip(all_measurement_labels, range(len(all_measurement_labels)))) measurement_labels = ['Betweenness', 'Closeness', '#BCC', 'Degree', 'Wt_Degree', 'Clus_Coeff'] pruned_labels = dict((x, y) for x, y in zip(all_measurement_labels, range(len(measurement_labels)))) cikm_node_mapping = {} for i, node_id in enumerate(id_cikm): cikm_node_mapping[node_id] = i # m_cikm = norm(m_cikm, norm='l2', axis=0) p, q = m_cikm.shape normalized_measurements_cikm = np.zeros((p, q)) normalized_measurements_cikm[:, 0] = m_cikm[:, 0] normalized_measurements_cikm[:, 1:] = norm(m_cikm[:, 1:], norm='l2', axis=0) author_nr_vector = nr_cikm[cikm_node_mapping[names_cikm[author_name]], :] author_nr_idx = cikm_node_mapping[names_cikm[author_name]] cosine_values = [] for idx, id_file_id in enumerate(id_cikm): if idx == author_nr_idx: continue sim = cosine_similarity(author_nr_vector, nr_cikm[idx, :]) cosine_values.append((id_file_id, sim)) sorted_cosine = sorted(cosine_values, key=lambda x: x[1], reverse=True) author_ranks = get_measurements(normalized_measurements_cikm, names_cikm[author_name], labels)
def colorbar_plot(dataset, mesh=None, label=None, title=None, xlabel=None, ylabel=None, clabel="", context='paper', cmap='bone', normalise=True, scale=1, aspect='auto', sdir=None, lognorm=False): """ plot a 2D datasetay with a colorbar (x,y) :param corr: 2D correlation datasetay (via get_correlation) :param mesh: coordinate mesh [np datasetay] :param sdir: save directory for output .png :param label: figure label :param title: figure title :param cmap: figure color map """ sns.set_context(context) if normalise: dataset = norm(dataset) vmin, vmax = 0, 1 else: vmin, vmax = np.min(dataset), np.max(dataset) if mesh is not None: extent = [ np.min(mesh[1]) * scale, np.max(mesh[1]) * scale, np.min(mesh[0]) * scale, np.max(mesh[0]) * scale ] else: extent = None if lognorm == True: lognorm = matplotlib.colors.LogNorm() vmin += 1e-100 else: lognorm = None fig, ax1 = plt.subplots(figsize=fig_size) img = ax1.imshow(dataset, cmap=cmap, extent=extent, vmin=vmin, vmax=vmax, aspect=aspect, norm=lognorm) divider = make_axes_locatable(ax1) cax = divider.append_axes('right', size='7.5%', pad=0.05) cbar = fig.colorbar(img, cax) ax1.set_title(title) ax1.set_xlabel(xlabel) ax1.set_ylabel(ylabel) cbar.set_label(clabel) ax1.annotate(label, horizontalalignment='left', verticalalignment='bottom', xy=(0, 1), c='white') if sdir is None: fig.show() else: fig.savefig(sdir + ".png") plt.show()
'Betweenness', 'Closeness', '#BCC', 'Degree', 'Wt_Degree', 'Clus_Coeff' ] pruned_labels = dict((x, y) for x, y in zip( all_measurement_labels, range(len(measurement_labels)))) cikm_node_mapping = {} for i, node_id in enumerate(id_cikm): cikm_node_mapping[node_id] = i # m_cikm = norm(m_cikm, norm='l2', axis=0) p, q = m_cikm.shape normalized_measurements_cikm = np.zeros((p, q)) normalized_measurements_cikm[:, 0] = m_cikm[:, 0] normalized_measurements_cikm[:, 1:] = norm(m_cikm[:, 1:], norm='l2', axis=0) author_nr_vector = nr_cikm[ cikm_node_mapping[names_cikm[author_name]], :] author_nr_idx = cikm_node_mapping[names_cikm[author_name]] cosine_values = [] for idx, id_file_id in enumerate(id_cikm): if idx == author_nr_idx: continue sim = cosine_similarity(author_nr_vector, nr_cikm[idx, :]) cosine_values.append((id_file_id, sim)) sorted_cosine = sorted(cosine_values, key=lambda x: x[1], reverse=True)