def makeMock(grnd): grnd_x = np.around(grnd['xnano'] / 100).astype(int) grnd_y = np.around(grnd['ynano'] / 100).astype(int) grnd_arr = np.zeros((64, 64)) grnd_arr[grnd_y, grnd_x] = grnd['intensity'] grnd_arr = grnd_arr / np.max(grnd_arr) grnd_conv = np.real(fft.ifft2(fft.fft2(grnd_arr) * psf_k)) return grnd_conv
def detect_circles(img): circles = cv.HoughCircles(img, cv.HOUGH_GRADIENT, 2, 20, param1=10, param2=150, minRadius=0, maxRadius=20) circles = np.uint16(np.around(circles)) for circle in circles[0,:]: # draw the outer circle cv.circle(img, (circle[0], circle[1]), circle[2], (0,255,0), 1, lineType=cv.LINE_AA)
def project(self, X, quant_error=False): """ Project points in X (4*n array) and normalize coordinates. """ self.set_P() x = np.dot(self.P, X) for i in range(x.shape[1]): x[:, i] /= x[2, i] if (quant_error): x = np.around(x, decimals=0) return x
def get_orthogonality_score(C_matrix, verbose=True): """ Gets the angle between each subspace and the other ones. Note the we leave the diagonal as zeros, because the angles are 1 anyway And it helps to have a more representative mean. """ in_degree = True len_1, len_2 = C_matrix.shape orthogonality_matrix = np.zeros((len_2, len_2)) for lat_i in range(0, len_2): for lat_j in range(lat_i + 1, len_2): angle = np.dot(C_matrix[:, lat_i], C_matrix[:, lat_j]) / (np.dot( np.linalg.norm(C_matrix[:, lat_i]), np.linalg.norm(C_matrix[:, lat_j]))) orthogonality_matrix[lat_i, lat_j] = np.arccos(np.abs(angle)) orthogonality_matrix[lat_j, lat_i] = np.arccos(np.abs(angle)) if in_degree: orthogonality_matrix = 180 * orthogonality_matrix / np.pi mean_per_sub_space = np.sum(np.abs(orthogonality_matrix), 1) / (len_2 - 1) glob_mean = np.mean(mean_per_sub_space) try: all_non_diag = orthogonality_matrix.flatten() all_non_diag = all_non_diag[np.nonzero(all_non_diag)] tenth_percentil = np.percentile(all_non_diag, 25) ninetith_percentil = np.percentile(all_non_diag, 75) small_avr = np.average( all_non_diag, weights=(all_non_diag <= tenth_percentil).astype(int)) high_avr = np.average( all_non_diag, weights=(all_non_diag >= ninetith_percentil).astype(int)) except: small_avr = glob_mean high_avr = glob_mean if verbose: print(np.around(orthogonality_matrix, 2)) print("Mean abs angle per subspace: ", mean_per_sub_space) print("Mean abs angle overall: ", glob_mean) #print("Std abs angle overall: ", np.std(mean_per_sub_space)) # print(small_avr, high_avr) if len_2 <= 1: glob_mean = small_avr = high_avr = 0 return glob_mean, small_avr, high_avr
def evaluate(self, iteration, expectation, kl): if iteration == 0: print(" iteration | test mae | train mae | E-term | KL |") train_mae = self.evaluate_train_error() test_mae = self.evaluate_test_error() # expectation_term = expectation.detach().numpy() # kl_term = kl.detach().numpy() print ("{:^10} {:^10} {:^10} {:^10} {:^10}".format(iteration, np.around(test_mae, 4), np.around(train_mae, 4), \ expectation, kl))
def is_jastrow_np_adjusted(X, n, beta): adjusted = False for i in range(X.shape[0]): for j in range(i + 1, X.shape[0]): r_ij = X[i] - X[j] r_ij -= np.around(r_ij / L) * L r_ij = np.dot(r_ij, r_ij)**0.5 if r_ij < 0.3 * 2.556: return True return False
def plot_train_valid_errors(self, ax, k, train_errors, valid_errors, num_units): num_elements = np.arange(len(train_errors)) ax.plot([v + 1 for v in num_elements[:k + 1]], train_errors[:k + 1], color=[0, 0.7, 1], linewidth=2.5, zorder=1, label='training') #ax.scatter([v+1 for v in num_elements[:k+1]] ,train_errors[:k+1],color = [0,0.7,1],s = 70,edgecolor = 'w',linewidth = 1.5,zorder = 3) ax.plot([v + 1 for v in num_elements[:k + 1]], valid_errors[:k + 1], color=[1, 0.8, 0.5], linewidth=2.5, zorder=1, label='validation') #ax.scatter([v+1 for v in num_elements[:k+1]] ,valid_errors[:k+1],color= [1,0.8,0.5],s = 70,edgecolor = 'w',linewidth = 1.5,zorder = 3) ax.set_title('misclassifications', fontsize=15) # cleanup ax.set_xlabel('step', fontsize=12) # cleanp panel num_iterations = len(train_errors) minxc = 0.5 maxxc = len(num_elements) + 0.5 minc = min(min(copy.deepcopy(train_errors)), min(copy.deepcopy(valid_errors))) maxc = max(max(copy.deepcopy(train_errors[:10])), max(copy.deepcopy(valid_errors[:10]))) gapc = (maxc - minc) * 0.25 minc -= gapc maxc += gapc ax.set_xlim([minxc, maxxc]) ax.set_ylim([minc, maxc]) tics = np.arange(1, len(num_elements) + 1 + len(num_elements) / float(5), len(num_elements) / float(5)) labels = np.arange(1, num_units + 1 + num_units / float(5), num_units / float(5)) labels = [int(np.around(v, decimals=-1)) for v in labels] ax.xaxis.set_major_locator(MaxNLocator(integer=True)) ax.set_xticks(tics) ax.set_xticklabels(labels)
def jastrow_np(X, n, beta): exponent = 0 for i in range(X.shape[0]): for j in range(i + 1, X.shape[0]): r_ij = X[i] - X[j] r_ij -= np.around(r_ij / L) * L r_ij = np.dot(r_ij, r_ij)**0.5 if r_ij > 0.5 * L: continue r_ij = max(0.3 * 2.556, r_ij) exponent += (beta / r_ij)**n return np.exp(-0.5 * exponent)
def main(): # Parse optional arguments parser = argparse.ArgumentParser() parser.add_argument("--epochs", help="Number of epochs to iterate over", type=int) parser.add_argument("--alpha", help="Step size for gradient descent", type=float) parser.add_argument("--func", help="Distribution choice for epsilon. \ Can be norm for normal, log for logistic, \ ,gumbel for gumbel, or r_gumbel for reverse_gumbel" ) args = parser.parse_args() # Set default epochs to 1 if args.epochs: epoch = args.epochs else: epoch = 5 # Set default alpha to 0.05 if args.alpha: alpha = args.alpha else: alpha = 0.05 # Set default distribution to normal if args.func in ['log', 'gumbel', 'r_gumbel']: if args.func == 'log': func = log_cdf elif args.func == 'gumbel': func = gumbel_cdf else: func = r_gumbel_cdf else: func = sci.stats.norm.cdf #Initialize the Spark Context sc = pyspark.SparkContext() df = pd.read_csv('../data/musicdata.small.csv', header=None) df.columns = ['uid', 'aid', 'rating'] # I and J are the number of users and artists, respectively I = df.uid.max() + 1 J = df.aid.max() + 1 # Take the first 2000 samples dftouse = df[['rating', 'uid', 'aid']].head(2000) # Adjust the indices dftouse['uid'] = dftouse['uid'] - 1 dftouse['aid'] = dftouse['aid'] - 1 # Take the ratings from 0-100 and transform them from 0-5 dftouse.rating = np.around((dftouse.rating - 1) / 20) rating_vals = np.arange(1, dftouse.rating.max() + 1) minR = dftouse.rating.min() dftouse['rating'] = dftouse['rating'] - minR # R is the number of rating values R = len(rating_vals) # create buckets as midpoints buckets = 0.5 * (rating_vals[1:] + rating_vals[:-1]) # get length I, J I = dftouse.uid.max() + 1 J = dftouse.aid.max() + 1 # define some K K = 2 # convert to numpy data matrix Xrat = np.array(dftouse) # initialize a theta vector with some random values theta = np.zeros((I + J) * K + I + J + 1) theta = np.random.normal(size=theta.shape[0], loc=0., scale=0.1) # define gradll as the gradient of the log likelihood gradrll = grad(rowloglikelihood) # Open up some files f1 = open("out.txt", "w") # Now we begin the parallelization! # set up parameters S = 200 # turn Xrat into an RDD xrat_rdd = sc.parallelize(Xrat) # Split the ratings into size S chunks split_xrat = np.split(Xrat, Xrat.shape[0] / S) #And then parallelize those chunks split_xrat = sc.parallelize(split_xrat) # then run the sgd! t = time.time() ptheta = split_xrat.map(lambda subX: n_row_sgd( theta, subX, buckets, I, J, K, R, alpha, epoch, gradrll, func)).mean() # then we predict (in parallel) y_preds = xrat_rdd.map( lambda row: parallel_predict(ptheta, row, buckets, I, J, K)).collect() print Xrat[:20, 0] print y_preds[:20] print ptheta[:20] # Write things to file f1.write("Time (training): " + str(time.time() - t) + "\n") f1.write("Log likelihood: " + str(loglikelihood(ptheta, Xrat, buckets, I, J, K, R, func)) + "\n") f1.write("Accuracy: " + str(accuracy(y_preds, Xrat)) + "\n") f1.write("RMSE: " + str(rmse(y_preds, Xrat))) f1.close()
def fit(self, feature_vectors, train_adjacency_matrix, n_epochs=10, test_adjacency_matrix=None, num_negatives=16): # Initialize optimizer # optimizer = torch.optim.SGD(self.parameters(), lr=0.01) optimizer = torch.optim.Adagrad(self.parameters(), lr=1) # optimizer = torch.optim.Adam(self.parameters(), lr=0.1) # optimizer = torch.optim.RMSprop(self.parameters(), lr=1) num_nodes = feature_vectors.shape[0] self.num_negatives = num_negatives for epoch in range(n_epochs): # Do round-robin optimization for idx in range(num_nodes): x_ND = Variable(torch.FloatTensor([feature_vectors[idx, :]])) ys_ND = list() observed_entries = train_adjacency_matrix[idx] other_vec_idx = [entry[0][1] for entry in observed_entries] vals = [entry[1] for entry in observed_entries] for idy in other_vec_idx: ys_ND.append(feature_vectors[idy, :]) negative_idx = np.random.choice(num_nodes, self.num_negatives, replace=False) for idy in negative_idx: ys_ND.append(copy(feature_vectors[idy, :])) vals.append(0) ys_ND = Variable(torch.FloatTensor(ys_ND)) optimizer.zero_grad() # NOTE: # expected_ll refers to the expected log likelihood term of ELBO # kl refers to the KL divergence term of the ELBO # matrix_loss refers to the matrix reconstruction loss neg_expected_ll, KL, reg, matrix_loss = self.calc_vi_loss( x_ND, ys_ND, vals, n_mc_samples=10) KL = 1 / len(vals) * KL # TODO: scale the KL term # ELBO loss = negative expected log likelihood + KL elbo_loss = neg_expected_ll + KL + reg elbo_loss.backward() optimizer.step() if epoch % 10 == 0: all_vectors = Variable(torch.FloatTensor(feature_vectors)) train_accuracy = classification_accuracy( self, all_vectors, train_adjacency_matrix) test_accuracy = classification_accuracy( self, all_vectors, test_adjacency_matrix) self.epochs.append(epoch) self.all_epochs.append(epoch) self.ELBOs.append(-elbo_loss) self.train_losses.append(1.0 - train_accuracy) self.test_losses.append(1.0 - test_accuracy) # if epoch in [0, 1, 25] or epoch % 50 == 0: # all_vectors = Variable(torch.FloatTensor(feature_vectors)) # train_accuracy = classification_accuracy(self, all_vectors, train_adjacency_matrix) # test_accuracy = classification_accuracy(self, all_vectors, test_adjacency_matrix) # self.epochs.append(epoch) # self.train_losses.append(1.0 - train_accuracy) # self.test_losses.append(1.0 - test_accuracy) print("epoch: ", epoch, " - objective loss: ", np.around(elbo_loss.data.item(), 4), " - train accuracy: ", np.around(train_accuracy, 4), " - test accuracy: ", np.around(test_accuracy, 4))
def pred_fun(weights, smiles): fingerprint_weights, net_weights = unpack_weights(weights) fingerprints = fingerprint_func(fingerprint_weights, smiles) predictions = sigmoid(net_pred_fun(net_weights, fingerprints)) return np.around(predictions).astype(bool)
#NtoD = lambda N: 2 NtoD = lambda N, D=args.D: D elif args.NtoD == 'scaling': NtoD = lambda N: int(np.ceil(N / 10)) elif args.NtoD == '2scaling': NtoD = lambda N: int(np.ceil(N * 2)) elif args.NtoD == '1scaling': NtoD = lambda N: int(np.ceil(N)) elif args.NtoD == 'scalingOver5': NtoD = lambda N: int(np.ceil(N / 5)) elif args.NtoD == 'squared': NtoD = lambda N: N**2 if args.datasetName.count('Synthetic') > 0: Ntrains = np.around( np.logspace(np.log10(args.minNtrain), np.log10(args.maxNtrain), args.numNtrains), 0) Ntrains = Ntrains.astype(np.int32) else: Ntrains = [ 0, ] if args.upTo == -1: upTo = None else: upTo = args.upTo if args.Xrank == -1: Xrank = None else: Xrank = args.Xrank
def main(): # Parse optional arguments parser=argparse.ArgumentParser() parser.add_argument("--epochs",help="Number of epochs to iterate over", type=int) parser.add_argument("--alpha",help="Step size for gradient descent", type=float) parser.add_argument("--func", help="Distribution choice for epsilon. \ Can be norm for normal, log for logistic, \ ,gumbel for gumbel, or r_gumbel for reverse_gumbel") args=parser.parse_args() # Set default epochs to 1 if args.epochs: epoch = args.epochs else: epoch = 5 # Set default alpha to 0.05 if args.alpha: alpha = args.alpha else: alpha = 0.05 # Set default distribution to normal if args.func in ['log','gumbel','r_gumbel']: if args.func == 'log': func = log_cdf elif args.func == 'gumbel': func = gumbel_cdf else: func = r_gumbel_cdf else: func = sci.stats.norm.cdf #Initialize the Spark Context sc=pyspark.SparkContext() df=pd.read_csv('../data/musicdata.small.csv',header=None) df.columns=['uid', 'aid', 'rating'] # I and J are the number of users and artists, respectively I = df.uid.max() + 1 J = df.aid.max() + 1 # Take the first 2000 samples dftouse = df[['rating', 'uid', 'aid']].head(2000) # Adjust the indices dftouse['uid'] = dftouse['uid'] - 1 dftouse['aid'] = dftouse['aid'] - 1 # Take the ratings from 0-100 and transform them from 0-5 dftouse.rating=np.around((dftouse.rating-1)/20) rating_vals = np.arange(1,dftouse.rating.max()+1) minR = dftouse.rating.min() dftouse['rating'] = dftouse['rating'] - minR # R is the number of rating values R = len(rating_vals) # create buckets as midpoints buckets = 0.5 * (rating_vals[1:] + rating_vals[:-1]) # get length I, J I = dftouse.uid.max() + 1 J = dftouse.aid.max() + 1 # define some K K = 2 # convert to numpy data matrix Xrat = np.array(dftouse) # initialize a theta vector with some random values theta = np.zeros((I + J) * K + I + J + 1) theta = np.random.normal(size=theta.shape[0], loc=0., scale=0.1) # define gradll as the gradient of the log likelihood gradrll = grad(rowloglikelihood) # Open up some files f1 = open("out.txt", "w") # Now we begin the parallelization! # set up parameters S=200 # turn Xrat into an RDD xrat_rdd = sc.parallelize(Xrat) # Split the ratings into size S chunks split_xrat=np.split(Xrat,Xrat.shape[0]/S) #And then parallelize those chunks split_xrat = sc.parallelize(split_xrat) # then run the sgd! t=time.time() ptheta = split_xrat.map(lambda subX:n_row_sgd(theta, subX, buckets, I, J, K, R, alpha, epoch, gradrll, func)).mean() # then we predict (in parallel) y_preds = xrat_rdd.map(lambda row: parallel_predict(ptheta, row, buckets, I, J, K)).collect() print Xrat[:20,0] print y_preds[:20] print ptheta[:20] # Write things to file f1.write("Time (training): "+ str(time.time()-t)+"\n") f1.write("Log likelihood: "+ str(loglikelihood(ptheta, Xrat, buckets, I, J, K, R, func))+"\n") f1.write("Accuracy: "+ str(accuracy(y_preds, Xrat))+"\n") f1.write("RMSE: "+ str(rmse(y_preds, Xrat))) f1.close()
plt.close("all") # total confirmed and unconfirmed solution_opt = data["solution_opt"] solution_opt = model.grouping(solution_opt) plt.figure() plt.semilogy(solution_opt[:, -2], 'r.-', label="confirmed") plt.semilogy(solution_opt[:, -1], 'y.-', label="unconfirmed") ratio = np.divide(solution_opt[:, -1], solution_opt[:, -2]) plt.semilogy(ratio, 'k.-', label="ratio") plt.legend() plt.grid() plt.xlabel('time t (days)') plt.ylabel("the number of infections") plt.title("unconfirmed/confirmed = " + str(np.around(ratio[-1], decimals=1))) plt.savefig(filename_prex + "confirmed-unconfirmed.pdf") lag_death = np.int(np.mean(np.divide(1., eta_I) + np.divide(1., mu))) IFR = np.divide( solution_opt[date_deceased[lag_death:], 7], (solution_opt[simulation_first_deceased:simulation_first_deceased + len(date_deceased[lag_death:]), 8] + solution_opt[simulation_first_deceased:simulation_first_deceased + len(date_deceased[lag_death:]), 9])) plt.figure() plt.plot(date_deceased[lag_death:], IFR, 'r.-') plt.grid() plt.xlabel('time t (days)') plt.ylabel("IFR")
def accuracy(params, inputs, targets, factor): #target_class = np.argmax(targets, axis=1) predicted_class = np.around(neural_net_predict_binary(params, inputs, factor), decimals=0) #print ('Accuracy, True, Predicted: ', targets, neural_net_predict_binary(params, inputs, factor), predicted_class) return np.mean(predicted_class == targets)
def plot_lines(img, lines, color=(0,255,0)): for a, b in np.around(lines): cv.line(img, (int(a[0]), int(a[1])), (int(b[0]), int(b[1])), color, 1, lineType=cv.LINE_AA)
def predict(self, x): x = (x - self.x_avg) / self.x_std x = np.insert(x, 0, values=1, axis=1) return np.argmax(np.around(self.fit(self.w, x), 0).astype(int), axis=1)
def basin_traj_1d(sts, m0, m1, x0, noises, nt, dt, tau, lag, npts = 6, rdotf = rdot_1d2w, basinf = basins_1d2w_h): rs = pos_traj_1d(sts, m0, m1, x0, noises, nt, dt, tau, lag, rdotf) tidxs = np.array(np.around(np.linspace(0,nt-1,npts+1)), dtype='int')[1:] return basinf(rs[:,tidxs])
def basin_traj(sts1, sts2, m0, m1, m2, r0, noises, nt, dt, tau, lag, npts = 6, rdotf = rdot_2d3w_S, basinf = basins_2d3w_h): rs = pos_traj(sts1, sts2, m0, m1, m2, r0, noises, nt, dt, tau, lag, rdotf) tidxs = np.array(np.around(np.linspace(0,nt-1,npts+1)), dtype='int')[1:] return basinf(rs[:,tidxs])