def verify(align_imgs): """Verify images after align @input: image after align @output: distance """ g = get_graph() with g.as_default(): # Get input and output tensors images_placeholder = g.get_tensor_by_name("input:0") embeddings = g.get_tensor_by_name("embeddings:0") phase_train_placeholder = g.get_tensor_by_name("phase_train:0") # Run forward pass to calculate embeddings feed_dict = { images_placeholder: align_imgs, phase_train_placeholder: False } sess = get_session() emb = sess.run(embeddings, feed_dict=feed_dict) dist = np.sqrt(np.sum(np.square(np.substract(emb[0, :], emb[1, :])))) print('distance: %1.4f' % dist) return '%1.4f' % dist
def lmds(k, sqr_dist_mat): """ Landmark multidimensional scaling. First argument is desired dimension and second argument is squared distance matrix; each row corresponds to a landmark point and each entry in the row is the distance to a different data point. Here we discard the landmark points because, for topological L-isomap, the landmarks come from outside the data. """ num_landmarks = len(sqr_dist_mat) num_points = len(sqr_dist_mat[0]) sub_mat = sqr_dist_mat[:, 0:num_landmarks - 1] evalues, landmark_embedding = mds(k, sub_mat) k = len(landmark_embedding[:, 0]) pseudo_embedding = np.multiply(landmark_embedding, np.diag(np.reciprocal(evalues))) sum_sq_dist = np.sum(sqr_dist_mat[:, 0:num_landmarks - 1]) mean_sq_dist = np.transpose(np.multiply(1.0 / num_landmarks, sum_sq_dist)) rest_of_dist = sqr_dist_mat[:, num_landmarks:num_points - 1] for i in range(0, len(rest_of_dist - 1)): helper = rest_of_dist[:, i] rest_of_dist[:, i] = np.substract(helper, mean_sq_dist) rest_of_embedding = np.multiply( -0.5, np.multiply(pseudo_embedding, rest_of_dist)) return rest_of_embedding
def prehiten(x): mean = np.mean(x) std = np.std(x) std_adj = np.maximum(std, 1.0 / np.sqrt(x.size)) y = np.multiply(np.substract(x, mean), 1 / std_adj) return y
def calc_score(img1, img2): threshold = 0.35 face1 = mtcnn_detector.get_face_from_single_image(img1) face2 = mtcnn_detector.get_face_from_single_image(img2) print(face1) print(face2) if len(face1) != 1 or len(face2) != 1: ''' check if there is only one face in the image ''' return 'Please upload image with exact one person.', 0 # ============================================================================= # Old way, loading pretrained resnet1 # emb1 = face_net.predict(face1[0]) # emb2 = face_net.predict(face2[0]) # ============================================================================= emb1 = detectors[2].predict(face1[0]) emb2 = detectors[2].predict(face2[0]) score = np.sqrt(np.sum(np.square(np.substract(emb1, emb2)))) if score < threshold: return 'Same person with score %s' % str(score) else: return 'Not the same person with score %s' % str(score)
def evaluate(self, inputs, labels): # inputs: (input_dim, seq_len, N_BATCH) # labels: (output_dim, N_BATCH) preds = self.feed_forwaed(inputs) # (OUTPUT_DIM, N_BATCH) labels = np.asarray(labels) loss = np.mean(np.square(np.substract(preds, labels))) return loss
def error_calulation(self, labels): if self.cost_func == "mean_sqaured": self.error += np.mean( np.divide( np.square(np.substract(labels, self.Layers[-1].activations)), 2)) elif self.cost_func == "cross_entropy": self.error += -np.sum(labels * np.nan_to_num( np.log(self.Layers[-1].activations + 1e-9)))
def grad_descent(self, X, y, theta, alpha=0.3, precise=0.000001): # theta: column vector, parameter we want to learn # alpha: learning rate m, n = X.shape[0], X,shape[1] X = np.concatenate((np.ones((m, 1)), X), axis=1) theta = np.ones((n, 1)) pretheta = theta - 1.0 while sum(abs(theta - pretheta)) / m > precise: h = np.dot(X, theta) feedback = np.dot(X.transpose(), np.substract(h - y)) pretheta = theta theta = np.substract(theta, alpha * feedback / m) return theta
def _kernel(self, x1, x2, type='basic'): if type == 'basic': return np.dot(x1, x2) elif type == 'poly': # 拍脑袋定超参,3次方; # 实际中需要试验 return math.pow(np.dot(x1, x2) + 1, 3) elif type == 'gaussian': norm2square = math.pow(np.linalg(np.substract(x1, x2)), 2) # sigma的值也拍个脑袋吧=,= return math.exp(float(norm2square) / 2 / 10 * -1) else: return np.dot(x1, x2)
def below_noise_model(station, data, inv, save_plot=False): tr = df_to_trace(station, data) ppsd = PPSD(tr.stats, metadata=inv) ppsd.add(tr) fig = ppsd.plot(show=False) if save_plot: julday = format_date_to_str(tr.stats.starttime.julday, 3) fig.savefig( f"plot_data/psd/{station}/{tr.stats.starttime.year}.{julday}.png", dpi=300) nlnm_t, nlnm_db = get_nlnm() trace_t = ppsd.period_bin_centers.tolist() interp_func = interpolate.interp1d(nlnm_t, nlnm_db, bounds_error=False) interp_db = interp_func(trace_t) traces_db = ppsd.psd_values min_t = closest_index_of_list(trace_t, 2.5) max_t = closest_index_of_list(trace_t, 10) for t, trace_db in enumerate(traces_db): diff = np.substract(trace_db[min_t:max_t + 1], interp_db[min_t, max_t + 1]) for i, element in enumerate(diff): if element < 0: time_processed = ppsd.times_processed[t] year = format_date_to_str(time_processed.year, 4) month = format_date_to_str(time_processed.month, 2) day = format_date_to_str(time_processed.day, 2) hour = format_date_to_str(time_processed.hour, 2) minute = format_date_to_str(time_processed.minute, 2) second = format_date_to_str(time_processed.second, 2) datetime = f'D{year}{month}{day}T{hour}{minute}{second}' _id = station + '.' + datetime + '.1' return datetime, f'{str(element)}dB', _id, 1, 'Below Low Noise Model', station return None, f'OK. BelowLowNoiseModel of {station}', None, 0, None, None
def compare(v, sim, data, errdata, time): #v en pxl par jour length_subsim = int((time[-1]-time[0])*v)+2 subset=[] step = 20 pxl_parc = np.multiply(np.substract(time, time[0]),v) pxl_parc = pxl_parc.astype(int) #for i, elem in enumerate(time): # pxl_parc.append(int((time[i] - time[0]) * v)) new_subset = [] for j,lc in enumerate(sim): temp_res = [] for i in range(int((len(lc)-1-length_subsim)/step)): temp = np.array(lc[i*step:length_subsim +i*step-1]) temp2 = temp[pxl_parc]-temp[0]*np.ones(len(temp[pxl_parc])) new_subset.append(temp2) temp_res.append(chi2(temp2, data,errdata)) return [min(temp_res), new_subset[temp_res.index(min(temp_res))],v]
def generate_donor(self, x, generation): """ :param x: an individual from the current generation :param generation: the curent generation :return: a single donor vector for the individual """ # pic randomly 3 individuals from the current generation: # but not x itself choice_of_individuals = generation choice_of_individuals.remove(x) vectors = np.random.choice(choice_of_individuals, 3, replace=False) x1 = vectors[0] x2 = vectors[1] x3 = vectors[2] diff = np.substract(x2, x3) donor = x1 + self.F * diff return donor
def unload(self, warehouse_position, products_to_unload): self.remaining_duration += ceil(distance(self.position, warehouse_position) + 1) np.substract(self.products, products_to_unload)
def compute_disentanglement_metric_score(self): """ Returns the classification accuracy of the disentanglement metric. As in Higgings 2017, good reconstructions are associated with entangled representations (lower disentanglement scores). Disentangled representations (high disentanglement scores) often result in blurry reconstructions. Uses the abs. linear difference between the inferred latent representations: a linear difference equation[ or linear recurrence relation equates 0 to a polynomial that is linear in the various iterates of a variable—that is, in the values of the elements of a sequence. The polynomial's linearity means that each of its terms has degree 0 or 1 """ DIMENSION_OUT = 3 # read from dataset #TODO generalize for DIMENSION_OUT total_z_diff = 0 # 1. Choose a generative factor y ~ Unif[1...K], e.g. y = scale, shape, orientation. TODO: is there other factors than position of arm we could sample from? button/table position or color? # In our case, position of Baxter robot arm, (x, y, z), or one coordinate only? # 2. For a batch of L samples: avg_metric = 0 for batch in range(self.n_batches): latent_v_set1 = [] latent_v_set2 = [] datapoints = 0 # to be splitted into latent sets v1 and v2 samples_per_batch = len(self.representations) # L n_repr_per_set = len(self.representations)/2 # a) Sample 2 sets of latent representations, latent_v_set1 and latent_v_set2 enforcing [v1,l]_k = [v2,l]_k # if k=y (so that the value of factor k=y is kept fixed) y = randint(0, DIMENSION_OUT-1) # 1 coordinate ground truth or all? We randomly choose one dimension index in [0,2] z_diff_batch = 0 while datapoints <n_repr_per_set: if randint(0, 1): latent_v_set1.append((self.images[datapoints], self.learned_representations[datapoints][y])) else: latent_v_set2.append((self.images[datapoints], self.learned_representations[datapoints][y])) if len(latent_v_set1) == n_repr_per_set: for (img, repres) in zip(self.images[datapoints:], self.learned_representations[datapoints:][y]): latent_v_set2.append((img, repres)) return if len(latent_v_set2) == n_repr_per_set: for (img, repres) in zip(self.images[datapoints:], self.learned_representations[datapoints:][y]): latent_v_set1.append((img, repres)) return datapoints +=1 print 'v1 and v2:\n',#latent_v_set1, latent_v_set2,'\n', print len(latent_v_set1), len(latent_v_set2) # b) Simuilate image x_1l, ~ Sim(v_1l) and then infer z_1l = mu(x_1l) # using the encoder q(z|x) ~ N (mu(x), sigma(x))-> i.e. our priors siamese model for (sample1, sample2) in zip(latent_v_set2, latent_v_set2): print sample1 print sample2 simulated_img1, encoded_img_z1 = sample1[0], sample1[1][y] simulated_img2, encoded_img_z2 = sample2[0], sample2[1][y] #print simulated_img1, encoded_img_z1 # encoded_img_z1 = representations[simulated_img1] # z_1l # encoded_img_z2 = representations[simulated_img2] # z_2l # c) compute the difference z_diff = |z_1l - z_2l|, the absolute linear difference between the inferred latent representations. z_diff_batch += np.absolute(np.subtract(encoded_img_z1, encoded_img_z2)) avg_z_diff = z_diff_batch/float(samples_per_batch) total_z_diff += avg_z_diff print 'Disentanglement metric score for batch ',batch,': ', avg_z_diff # 3. Predict the factor y p(y|z_diff) and report the accuracy of this predictor as disentanglement metric score total_z_diff += total_z_diff/float(self.n_batches) factor_predictor_accuracy = 0 for img in images: y_hat = self.predict_posterior_with_linear_model(img, y, total_z_diff) # computes p(y|z_diff_batch) factor_predictor_accuracy += np.abs(np.substract(y_hat, learned_representations)) # predicting the real value of the factor (arm real position in our case) factor_predictor_accuracy /= len(images) print 'Disentanglement metric score for batch in dataset ',data_folder,': ',factor_predictor_accuracy return factor_predictor_accuracy
model = SemanticModel(corpus, window=7) print(' - transforming raw frequencies (Positive Pointwise Mutual Information)...') model.ppmi_transform(laplace_smoothing=2) print(' - smoothing model (SVD)...') model.svd_smoothing(dimension=300) while True: choice = input('type l to lookup a word\'s id, s to find synonyms of a word') if choice == 'l': a_word = input('Word: ') print('This word\'s id is: ', corpus.id_for_word(a_word)) if choice == 's': a_word_id = input('Word id: ') print('This id corresponds to: ', corpus.word_for_id(a_word_id)) for similar_word_id in model.most_similar_words(model.word_context_matrix[a_word_id, :], 5): print(corpus.word_for_id(similar_word_id)) elif choice == '-': word_a = input('word (a) id: ') word_b = input('word (b) id: ') word_c = np.substract(model.word_context_matrix[word_a, :], model.word_context_matrix[word_b, :]) for similar_word_id in model.most_similar_words(word_c, 5): print(corpus.word_for_id(similar_word_id)) else: word_a = input('word (a) id: ') word_b = input('word (b) id: ') word_c = np.add(model.word_context_matrix[word_a, :], model.word_context_matrix[word_b, :]) for similar_word_id in model.most_similar_words(word_c, 5): print(corpus.word_for_id(similar_word_id))
def dotSub_f(a, b): return numpy.substract(a,b).tolist() def dotMul_f(a,b): return numpy.dot(a,b).tolist()
print("<범위지정 슬라이싱2>") b = a[1, 1] #a배열의 2행2열 요소 출력 print(b) print('\n') print("<정수 인덱싱>") a = np.array([[20, 40], [50, 60], [100, 300]]) b = a[[2, 1], [1, 0]] # a[[row2, row1],[col1, col0]] print(b) print('\n') print("<Numpy 연산 - 요소끼리의 사칙연산>") #Numpy를 이용하여 배열간 연산 용이함 x = np.array([1, 2, 3]) y = np.array([4, 5, 6]) b = np.add(x, y) # 행렬 요소끼리 덧셈 print('덧셈: ' + b) b = np.substract(x, y) # 행렬 요소끼리 뺄셈 print('뺄셈: ' + b) b = np.multiply(b, x) # 행렬 요소끼리 곱셈 print('곱셈: ' + b) b = np.divide(b, x) # 행렬 요소끼리 나눗셈 print('나눗셈: ' + b) print('\n') print("<Numpy 연산 - 행렬간 행렬곱>") a = np.array([[1, 2], [3, 4]]) b = np.array([[5, 6], [7, 8]]) c = np.dot(a, b) print(c)
def kernel_rad(x1, x2, sig=2): return math.exp(-numpy.dot((numpy.substract(x1, x2)), (numpy.substract(x1, x2))) / (2 * sig ^ 2))
def rbf(gamma): return lambda x, y: np.exp(-gamma * la.norm(np.substract(x, y)))
import numpy as np # Change False to True for each block of code to see what it does # Arithmetic operations between 2 NumPy arrays if False: a = np.array([1, 2, 3, 4]) b = np.array([1, 2, 1, 2]) print(np.add(a, b)) print(a + b) print(np.substract(a, b)) print(a - b) print(np.multiply(a, b)) print(a * b) print(np.divide(a, b)) print(a / b) print(a**b) print(np.sqrt(a)) # Arithmetic operations between a NumPy array and a single number if False: a = np.array([1, 2, 3, 4]) b = 2 print(a + b)
a.astype(int) # ## 数组计算 # ### 算术计算 # In[ ]: # 加法 a+b np.add(a,b) # 减法 a-b np.substract(a,b) # 乘法 a*b np.multiply(a,b) # 除法 a/b np.divide(a,b) # 幂 np.exp(a) # 平方根 np.sqrt(a) # 正弦 np.sin(a) # 余弦 np.cos(a) # 自然对数
eiganValH2, eiganVect=LA.eig(H) psiArray=[] for i in range(40,numVectors): #constructs array of eigan vectors psi=np.insert(eiganVect[:,i],0 ,0) psi=np.append(psi,0) psi=normalize(psi) psiArray.append(psi) plt.plot(x, psi) plt.ylabel("Psi") plt.xlabel("Position (x)") plt.title("V_0 = Ground State Energy") plt.grid() plt.savefig('Assignment_4_quantum_simulation/plots/2BeiganVect'+str(i)+'.png',bbox_inches='tight') #plt.show() plt.clf() print "V0 = 10 % of Groundstate\nPertubation Theory:" print np.array(newEnergy[40:]) print "Computed:" print eiganValH[40:50] print "Difference:" print np.substract(np.array(newEnergy[40:]),eiganValH[40:50]) print "V0 = Groundstate\nPertubation Theory:" print np.array(newEnergy2[40:]) print "Computed:" print eiganValH2[40:50] print "Difference:" print np.subtract(np.array(newEnergy2[40:]),eiganValH2[40:50])
def minDistance(self, vec1, vec2): return min(self,np.abs(np.substract(vec1,vec2)))
def maxDistance(self, vector1, vector2): return max(np.abs(np.substract(vector1,vector2)))
def metrics(self, output=None, split='test', verbose=False, ctx=None): ''' Compute the metrics. Arguments: output: in multi-output models, specify the output, split: one of ['training', 'validation', 'test'], verbose: verbose output, ctx: Context. ''' true = self.__y[split] pred = self.predictions(split=split) if output is not None: true = true[output] pred = pred[output] metric = None name = None if self.__metric == 'mse': name = 'mean squared error' metric = np.mean(np.square(np.substract(true, pred))) if self.__metric == 'acc': name = 'accuracy' metric = np.mean(np.equal(true, pred)) if verbose: if output is not None: string = f'{output} | {split.upper()} | {name} = {metric:.3f}.' else: string = f'{split.upper()} | {name} = {metric:.3f}.' print(string) if ctx is not None: ctx.logger().info(string) # save the metric if output is not None: if self.__metrics[split] is None: self.__metrics[split] = {} self.__metrics[split][output] = metric else: self.__metrics[split] = metric return metric
def nmfsc(V, rdim, sW, sH, iter_num, showflag, W0, H0): V = V / np.max(V) vdim = V.shape[0] samples = V.shape[1] W = np.absolute(np.random.randn(vdim, rdim)) H = np.absolute(np.random.randn(rdim, samples)) H = H / np.dot(np.reshape(np.sqrt(np.sum(H**2, 1)), (H.shape[0], 1)), np.ones((1, samples))) if (not (np.all(sW))): L1a = math.sqrt(vdim) - (math.sqrt(vdim) - 1) * sW for i in range(0, rdim): W[:, i] = projfunc(W[:, i], L1a, 1, 1) if (not (np.all(sH))): L1s = math.sqrt(samples) - (math.sqrt(samples) - 1) * sH for i in range(0, rdim): H[i, :] = projfunc(H[i, :], L1s, 1, 1) objhistory = np.array( [0.5 * np.sum(np.sum(np.subtract(V, np.dot(W, H))**2))]) #initial step stepsizeW = 1 stepsizeH = 1 for iteration in range(0, iter_num): Wold = np.array(W) Hold = np.array(W) ##update if (not (np.all(sH))): dH = np.dot(np.transpose(W), np.subtract(np.dot(W, H), V)) begobj = objhistory[-1] count = 1 while (1): Hnew = H - np.dot(stepsizeH, dH) for i in range(0, rdim): Hnew[i, :] = np.transpose( projfunc(np.transpose(Hnew[i, :]), L1s, 1, 1)) newobj = 0.5 * np.sum( np.sum(np.substract(V, np.dot(W, Hnew))**2)) if (newobj <= begobj): break else: count = count + 1 if (count >= 10): break stepsizeH = stepsizeH / 2 stepsizeH = stepsizeH * 1.2 H = Hnew else: H = H * (np.dot(np.transpose(W), V)) / ( np.dot(np.dot(np.transpose(W), W), H) + 1e-9) norms = np.sqrt(np.sum(np.transpose(H)**2)) #print(norms.shape) H = H / (np.dot(np.transpose(norms), np.ones((1, samples)))) W = W * (np.dot(np.ones((vdim, 1)), norms)) #update W if (not (np.all(sW))): dW = np.dot(np.subtract(np.dot(W, H), V), np.transpose(H)) begobj = 0.5 * sum(sum(np.dot(np.subtract(V, W), H)**2)) count = 1 while (1): Wnew = np.subtract(W, np.dot(stepsizeW, dW)) norms = math.sqrt(np.sum(Wnew**2)) for i in range(0, rdim): Wnew[:, i] = projfunc(Wnew[:, i], np.dot(L1a, norms[i]), np.pow(norms[i], 2), 1) newobj = 0.5 * sum(sum(np.subtract(V, np.dot(Wnew, H)**2))) if (newobj <= begobj): break else: count = count + 1 if (count >= 10): break stepsizeW = stepsizeW / 2 stepsizeW = stepsizeW * 1.2 W = Wnew else: W = W * (np.dot(V, np.transpose(H))) / ( (np.dot(np.dot(W, H), np.transpose(H))) + 1e-9) newobj = 0.5 * np.sum(np.subtract(V, np.dot(W, H))**2) newo = np.array([newobj]) objhistory = np.concatenate((objhistory, newo)) return W, H
def __init__(self, point1, point2): self.coords = np.substract(point2, point1)
def substract(t1, t2): return tuple(numpy.substract(t1, t2))
def kaczmarz(A, b, I): m, n = A.shape v, X, err = np.zeros(n), np.zeros((n, I)), np.zeros(I) v_star = ((A.T).dot(np.linalg.inv(A.dot(A.T))).dot(b) for i in tqdm(range(I *m), total = I * m, leave = False): #注意在Python里数组和矩阵的下标是从0开始的 ai = A[i%m] bi = b[i%m] v = np.substract(v, np.multiply(ai, (v.dot(ai) - bi)/(ai.dot(ai)))) if (i+1) % m == 0: err[i //m] = np.amax(abs(A.dot(v) - b)) X[:, i//m] = v.T return X, err A = np.array([[2, 5, 11, 17, 23], [3, 7, 13, 19, 29]]) b = np.array([228, 227]) I = 500 X, err = kaczmarz(A, b, I) plt.figure(figsize=(8, 6)) plt.semilogy(np.arange(1, I + 1), err) #Python 中@也相当于矩阵乘法 A = randn(500, 1000) b = A @ randn(1000) I = 100 X, err = kaczmarz(A, b, I) plt.figure(figsize=(8, 6)) plt.semilogy(np.arange(1, I + 1), err) x_hat = ((A.T).dot(np.linalg.inv(A.dot(A.T))).dot(b) plt.title(f'norm of difference between xhat and Kaczmarz is {np.linalg.norm(x_hat - X[:, -1]):.2e}'); def lp_altproj(A, b, I, s=1): """ Find a feasible solution for A v >= b using alternating projection with every entry of v0 obeying Uniform[0,1] Arguments: A {numpy.ndarray} -- matrix defines the LHS of linear equation b {numpy.ndarray} -- vector defines the RHS of linear equation I {int} -- number of full passes through the alternating projection s {numpy.float} -- step size of projection (defaults to 1) Returns: v {numpy.ndarray} -- the output after I full passes err {numpy.ndarray} -- the error after each full pass """ # Add code here m, n = A.shape v = np.zeros(n) err = np.zeros(I) res = linprog(c, A_ub=-A, b_ub=-b, bounds=[(0, None)] * c.size, method='interior-point') print(res) for t in tqdm(range(I * m), total = I * m, leave = False): ai = A[t%m] bi = b[t%m] if (v.dot(ai) < bi: np.substract(v, np.multiply(ai, (v.dot(ai) - bi)/(ai.dot(ai)))) if (t + 1)%m == 0: err[t // m] = np.amax(abs(res.x - v)) return v, err I = 500 #Use more iteration to meet the requirement. # Do not forget constraint xi >= 0 A1 = np.array([[2, -1, 1], [1, 0, 2], [-7, 4, -6],[-3,1,-2],[1,0,0],[0,1,0],[0,0,1]]) b1 = np.array([-1, 2, 1,0,0,0,0]) x, err = lp_altproj(A1, b1, I, s = 1) plt.figure(figsize=(8, 6)) plt.semilogy(np.arange(1, I + 1), err) print(np.all(A @ x - b > 0), np.all(x > 0)) #np.concatent() join sequence of arrays along an existing axis #np.hstack() stack arrays in sequence horizontally(column wise) #np.vstack() --- vertically--- #this is exercise 5 np.random.seed(0) c = randn(1000) A = np.vstack([-np.ones((1, 1000), randn(500, 1000)]) b = np.concatenate([[-1000], A[1:]@rand(1000)]) I, ep = 1000, le -6 #Do not forget constraint xi >= 0 and c^T x < = -1000 A1 = np.vstack([A, -c, np.identity(1000)]) b1 = np.concatenate([b, [1000], np.zeros(1000)]) x, err = lp_altproj(A1, b1 + ep, I, s =1) plt.figure(figsize=(8, 6)) plt.semilogy(np.arange(1, I + 1), err) print(np.all(A @ x - b > 0), np.all(x > 0)) import pandas as pd # read mnist csv file to a dataframe df = pd.read_csv('mnist_train.csv') # append feature column by merging all pixel columns df['feature'] = df.apply(lambda row: row.values[1:], axis=1) # only keep feature and label column df = df[['feature', 'label']] df.head() from sklearn.model_selection import train_test_split def extract_and_split(df, d, test_size=0.5): """ Extract the samples with given lables and randomly separate the samples into equal-sized training and testing groups, extend each vector to length 785 by appending a −1 Arguments: df {dataframe} -- the dataframe of MNIST dataset d {int} -- digit needs to be extracted, can be 0, 1, ..., 9 test_size {float} -- the fraction of testing set, default value is 0.5 Returns: X_tr {numpy.ndarray} -- training set features, a matrix with 785 columns each row corresponds the feature of a sample y_tr {numpy.ndarray} -- training set labels, 1d-array each element corresponds the label of a sample X_te {numpy.ndarray} -- testing set features, a matrix with 785 columns each row corresponds the feature of a sample y_te {numpy.ndarray} -- testing set labels, 1d-array each element corresponds the label of a sample """ df_d=df.loc[df['label']==d] #extract digit d X=df_d['feature'] y=df_d['label'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=0) X_train=X_train.values #convert datafram to array X_test=X_test.values y_train=y_train.values y_test=y_test.values X_tr=np.zeros((X_train.shape[0],784)) y_tr=np.zeros(y_train.shape[0]) for i in range(X_train.shape[0]): for j in range(784): X_tr[i,j]=X_train[i][j] for i in range(y_train.shape[0]): y_tr[i]=y_train[i] X_tr=np.insert(X_tr,784,-1,axis=1) X_te=np.zeros((X_test.shape[0],784)) y_te=np.zeros(y_test.shape[0]) for i in range(X_test.shape[0]): for j in range(784): X_te[i,j]=X_test[i][j] for i in range(y_test.shape[0]): y_te[i]=y_test[i] X_te=np.insert(X_te,784,-1,axis=1) return X_tr, X_te, y_tr, y_te def remove_outlier(x, thresh=3.5): """ Returns points that are not outliers to make histogram prettier reference: https://stackoverflow.com/questions/11882393/matplotlib-disregard-outliers-when-plotting/11886564 Arguments: x {numpy.ndarray} -- 1d-array, points to be filtered thresh {float} -- the modified z-score to use as a threshold. Observations with a modified z-score (based on the median absolute deviation) greater than this value will be classified as outliers. Returns: x_filtered {numpy.ndarray} -- 1d-array, filtered points after dropping outlier """ if len(x.shape) == 1: x = x[:,None] median = np.median(x, axis=0) diff = np.sqrt(((x - median)**2).sum(axis=-1)) modified_z_score = 0.6745 * diff / np.median(diff) x_filtered = x[modified_z_score <= thresh] return x_filtered #numpy.multiply指的是矩阵的点乘 #Python format格式化函数 #>>>"{} {}".format("hello", "world") # 不设置指定位置,按默认顺序 #'hello world' #>>> "{0} {1}".format("hello", "world") # 设置指定位置 #'hello world' #>>> "{1} {0} {1}".format("hello", "world") # 设置指定位置 'world hello world' def mnist_pairwise_altproj(df, a, b, solver, test_size = 0.5, verbose = False): """ Pairwise experiment for applying alternating projection to classify digit a and digit b Arguments: df {dataframe} -- the dataframe of MNIST dataset a, b {int} -- digits to be classified test_size {float} -- the fraction of testing set, default value is 0.5 solver {function} -- function to compute linear classifier verbose {bool} -- whether to print and plot results Returns: z_hat {numpy.ndarray} -- coefficients for linear classifier res {numpy.ndarray} -- numpy.array([traing error, testing error]) """ Xa_tr, Xa_te, ya_tr, ya_te = extract_and_split(df, a, test_size) Xb_tr, Xb_te, yb_tr, yb_te = extract_and_split(df, b, test_size) for i in range (ya_tr.shape[0]): ya_tr[i] = -1; for i in range(ya_te.shape[0]): ya_te[i]=-1; for i in range(yb_tr.shape[0]): yb_tr[i]=1; for i in range(yb_te.shape[0]): yb_te[i]=1; #Construct full set X_tr = np.concatenate((Xa_tr, Xb_tr), axis = 0) y_tr = np.concatenate((ya_tr, yb_tr), axis = 0) X_te = np.concatenate((Xa_te, Xb_te), axis=0) y_te = np.concatenate((ya_te, yb_te), axis=0) #Run solver on training set to get the linear classifier A_tilde = np.multiply(X_tr, y_tr[:, np.newaxis]) z_hat, err = solver(A_tilde, np.ones(y_tr.shape[0])) #compute estimation and misclassification on training set y_hat_tr = X_tr.dot(z_hat) for i in range (y_hat_tr.shape[0]): if y_hat_tr[i] >= 0: y_hat_tr[i] = 1 else: y_hat_tr[i] = -1 cm_tr = np.array([[0, 0], [0, 0]]) for i in range(y_hat_tr.shape(0)) if (y_tr[i]==-1 and y_hat_tr[i]==-1): cm_tr[0,0]=cm_tr[0,0]+1 elif (y_tr[i]==-1 and y_hat_tr[i]==1): cm_tr[0,1]=cm_tr[0,1]+1 elif (y_tr[i]==1 and y_hat_tr[i]==-1): cm_tr[1,0]=cm_tr[1,0]+1 elif (y_tr[i]==1 and y_hat_tr[i]==1): cm_tr[1,1]=cm_tr[1,1]+1 err_tr = cm_tr[0,1] + cm_tr[1, 0])/y_hat_tr.shape[0] #compute estimation and misclassification on testing set y_hat_te = X_te.dot(z_hat) for i in range(y_hat_te.shape[0]): if y_hat_te[i]>=0: y_hat_te[i]=1 else: y_hat_te[i]=-1 cm_te=np.array([[0,0],[0,0]]) for i in range(y_hat_te.shape[0]): if (y_te[i]==-1 and y_hat_te[i]==-1): cm_te[0,0]=cm_te[0,0]+1 elif (y_te[i]==-1 and y_hat_te[i]==1): cm_te[0,1]=cm_te[0,1]+1 elif (y_te[i]==1 and y_hat_te[i]==-1): cm_te[1,0]=cm_te[1,0]+1 elif (y_te[i]==1 and y_hat_te[i]==1): cm_te[1,1]=cm_te[1,1]+1 err_te = (cm_te[0,1]+cm_te[1,0])/y_hat_te.shape[0] if verbose: print('Pairwise experiment, mapping {0} to -1, mapping {1} to 1'.format(a, b)) print('training error = {0: .2f}%, testing error = {1: .2f}%'.format(100 * err_tr, 100* err_te)) print('Training set confusion matrix: \n {0}'.format(cm_tr)) print('Testing set confusion matrix: \n {0}' .format(cm_te)) #plot the two histogram together #plt.hist(-,bins) bins指的是在整个区间划分的小区间的个数 ya_te_hat = Xa_te.dot(z_hat) yb_te_hat = Xb_te.dot(z_hat) output = [remove_outlier(ya_te_hat), remove_outlier(yb_te_hat)] plt.figure(figsize = (8, 4)) plt.hist(output, bins = 50) res = np.array([err_tr,m err_te]) return z_hat, res solver = lambda A, b: lp_altproj(A, b + le-6, 100) z_hat , res = mnist_pairwise_altproj(df, 0, 1,solver, verbose = True) #This is excercise 7 def mnist_multiclass_altproj(df, solver, test_size=0.5): """ Experiment for applying least-square to classify all digits using one-hot encoding Arguments: df {dataframe} -- the dataframe of MNIST dataset solver {function} -- function to compute linear classifier test_size {float} -- the fraction of testing set, default value is 0.5 Returns: Z {numpy.ndarray} -- coefficients for linear classifier res {numpy.ndarray} -- numpy.array([traing error, testing error]) """ #Split training and testing sets X =df['feature'] y =df[' label' ] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size, random_state = 0) X_train = X_train.values #convert data frame to array X_test = X_test.values y_train = y_train.values y_test = y_test.values X_tr = np.zeros((X_train.shape[0], 784)) y_tr = np.zeros((X_te,shape[0]) for i in range(X_train.shape[0]): for j in range(784): X_tr[i, j] = X_train[i, j] for i in range(y_train.shape[0]) : y_tr[i] = y_train[i] X_tr = np.insert(X_tr, 784, -1, axis = 1) X_te=np.zeros((X_test.shape[0],784)) y_te=np.zeros(y_test.shape[0]) for i in range(X_test.shape[0]): for j in range(784): X_te[i,j]=X_test[i][j] for i in range(y_test.shape[0]): y_te[i]=y_test[i] X_te=np.insert(X_te,784,-1,axis=1) for i in range(10): if i==0: A_tilde = np.multiply(X_tr, Y[:,0][:, np.newaxis]) else: A_tilde = np.concatenate((A_tilde, np.multiply(X_tr, Y[:,i][:, np.newaxis])),axis=1) A_new=np.zeros((10*y_tr.shape[0],7850)) for i in range(10*y_tr.shape[0]): for j in range((i%10)*785,(i%10)*785+785): A_new[i][j]=A_tilde[i//10][j] b_tilde = np.ones(10*y_tr.shape[0]) Z, err = solver(A_new, b_tilde) # Reshape z Z = (Z.reshape((10,785))).T y_hat_tr = X_tr.dot(Z) y_hat_tr = Y_hat_tr.argmax(axis=1) #how to find the maximum index of rows in maylab #[M, I] = max(A, [], 2) cm_tr = np.zeros((10,10)) for m in range(y_tr.shape[0]): for i in range(10): if (y_tr[m] == i and y_hat_tr[m] == j): cm[i, j] = cm[i, j] + 1 err_tr = 0 for i in range (10) for j in range (10) if i != j: err_tr = err_tr + cm_tr[i, j] err_tr = err_tr/ y_tr.shape[0] # Compute estimation and misclassification on testing set Y_hat_te=X_te.dot(Z) y_hat_te = Y_hat_te.argmax(axis=1) cm_te=np.zeros((10,10)) for m in range(y_te.shape[0]): for i in range(10): for j in range(10): if (y_te[m]==i and y_hat_te[m]==j): cm_te[i,j]=cm_te[i,j]+1 err_te = 0 for i in range(10): for j in range(10): if i!=j: err_te=err_te+cm_te[i,j] err_te=err_te/y_te.shape[0] print('training error = {0:,2f}%,testing error = {1:, 2f}%' .format(100 * err_tr, 100 * err_te)) print('Training set condusion matrix: \n {0}' .format(cm_tr)) print('Testing set confusion matrix: \n {0}' .format(cm_te)) res = np.array([err_tr, err_te]) return Z, res solver = lambda A, b: lp_altproj(A, b +le-6, 100) Z ,res = mnist_multiclass_altproj(df, solver)
def substract(x, y): return np.substract(x, y)
## Masked Array from numpy import ma x = np.arange(10) y = ma.masked_array(x , x<5) # copy=False print(y) print(y.shape) x[6]=99 print(x) print(y) ## The above shows that masked_array does not force an implicit copy operation ## Linear Algebra - `np.add(arr, 2)` - `np.substract(arr, 2)` - `np.multiply(arr, 2)` - `np.divide(arr, 2)` - `np.power(arr, 2)` - `np.array_equal(arr1, arr2)` - `np.sqrt()` - `np.sin()` - `np.log()` - `np.abs()` - `np.ceil()`: Round up to the nearest int - `np.floor()` - `np.round()` ## Linear Algebra ## Matrix Multiplication
#ARRAY MATH x = np.array([[1, 2], [3, 4]], dtype=np.float64) y = np.array([[5, 6], [7, 8]], dtype=np.float64) # Elementwise sum; both produce the array # [[ 6.0 8.0] # [10.0 12.0]] print(x + y) print(np.add(x, y)) # Elementwise difference; both produce the array print(x + y) print(np.substract(x, y)) # Elementwise product; both produce the array # [[ 5.0 12.0] # [21.0 32.0]] print(x * y) print(np.multiply(x, y)) # Elementwise division; both produce the array # [[ 0.2 0.33333333] # [ 0.42857143 0.5 ]] print(x / y) print(np.divide(x, y)) # Elementwise square root; produces the array # [[ 1. 1.41421356]