def flag(weights, coord, axesToExt, selection, percent=90, size=[0], cycles=3, outQueue=None): """ Flag data if surreounded by other flagged data weights = the weights to convert into flags percent = percent of surrounding flagged point to extend the flag return: flags array and final rms """ def extendFlag(flags, percent): #flags = flags.astype(np.int) if float(np.sum( flags ))/len(flags) > percent/100.: return 1 else: return 0 import scipy.ndimage initialPercent = 100.*(np.size(weights)-np.count_nonzero(weights))/np.size(weights) # if size=0 then extend to all 2*axis, this otherwise create issues with mirroring for i, s in enumerate(size): if s == 0: size[i] = 2*weights.shape[i] for cycle in xrange(cycles): flag = scipy.ndimage.filters.generic_filter((weights==0), extendFlag, size=size, mode='mirror', cval=0.0, origin=0, extra_keywords={'percent':percent}) weights[ ( flag == 1 ) ] = 0 # no new flags if cycle != 0 and np.count_nonzero(flag) == oldFlagCount: break oldFlagCount = np.count_nonzero(flag) logging.debug('Percentage of data flagged (%s): %.3f -> %.3f %%' \ % (removeKeys(coord, axesToExt), initialPercent, 100.*(np.size(weights)-np.count_nonzero(weights))/np.size(weights))) outQueue.put([weights, selection])
def question2(train, valid, test): """ Find Best l1 value for Lasso regression :param train: pandas dataframe :param valid: pandas dataframe :param test: pandas dataframe :return: """ best_rss = float('inf') # figure out best penalty for Lasso for penalty in np.logspace(1, 7, num=13): model = linear_model.Lasso(alpha=penalty, normalize=True) model.fit(train[ALL_FEATURES], train['price']) rss = sum((model.predict(valid[ALL_FEATURES]) - valid['price'])**2) if rss < best_rss: best_rss, best_penalty = rss, penalty best_model = model print('best L1 on validation set: ' + str(best_penalty) + '\n') # Calculate non-zero coefficient in model print('\nNonzero Weights: ' + str(np.count_nonzero(best_model.coef_) + np.count_nonzero(best_model.intercept_))) # calculate RSS on test data print('RSS on test data:') print('{:f}'.format( sum((best_model.predict(test[ALL_FEATURES]) - test['price'])**2))) print()
def _update_labels(self): labelvar = self.cluster_var_model[self.cluster_var_idx] labels, _ = self.data.get_column_view(labelvar) labels = np.asarray(labels, dtype=float) cluster_mask = np.isnan(labels) dist_mask = np.isnan(self._matrix).all(axis=0) mask = cluster_mask | dist_mask labels = labels.astype(int) labels = labels[~mask] labels_unq, _ = np.unique(labels, return_counts=True) if len(labels_unq) < 2: self.Error.need_two_clusters() labels = silhouette = mask = None elif len(labels_unq) == len(labels): self.Error.singleton_clusters_all() labels = silhouette = mask = None else: silhouette = sklearn.metrics.silhouette_samples( self._matrix[~mask, :][:, ~mask], labels, metric="precomputed") self._mask = mask self._labels = labels self._silhouette = silhouette if mask is not None: count_missing = np.count_nonzero(cluster_mask) if count_missing: self.Warning.missing_cluster_assignment( count_missing, s="s" if count_missing > 1 else "") count_nandist = np.count_nonzero(dist_mask) if count_nandist: self.Warning.nan_distances( count_nandist, s="s" if count_nandist > 1 else "")
def verify(self, mask, exp): maxDiffRatio = 0.02 expArea = np.count_nonzero(exp) nonIntersectArea = np.count_nonzero(mask != exp) curRatio = float(nonIntersectArea) / expArea return curRatio < maxDiffRatio
def despike(self, n=3, recursive=False, verbose=False): """ Replace spikes with np.NaN. Removing spikes that are >= n * std. default n = 3. """ result = self.values.copy() outliers = (np.abs(self.values - nanmean(self.values)) >= n * nanstd(self.values)) removed = np.count_nonzero(outliers) result[outliers] = np.NaN if verbose and not recursive: print("Removing from %s\n # removed: %s" % (self.name, removed)) counter = 0 if recursive: while outliers.any(): result[outliers] = np.NaN outliers = np.abs(result - nanmean(result)) >= n * nanstd(result) counter += 1 removed += np.count_nonzero(outliers) if verbose: print("Removing from %s\nNumber of iterations: %s # removed: %s" % (self.name, counter, removed)) return Series(result, index=self.index, name=self.name)
def measure(self, image, workspace): data = image.pixel_data data = data.astype(numpy.bool) measurements = workspace.measurements measurement_name = self.skeleton_name.value statistics = [] name = "Skeleton_Branches_{}".format(measurement_name) value = numpy.count_nonzero(branches(data)) statistics.append(value) measurements.add_image_measurement(name, value) name = "Skeleton_Endpoints_{}".format(measurement_name) value = numpy.count_nonzero(endpoints(data)) statistics.append(value) measurements.add_image_measurement(name, value) return [statistics]
def __init__(self, image, skin_mask, labeled_image, label_number, rectangle_slices): """Creates a new skin region. image: The entire image in YCrCb mode. skin_mask: The entire image skin mask. labeled_image: A matrix of the size of the image with the region label in each position. See scipy.ndimage.measurements.label. label_number: The label number of this skin region. rectangle_slices: The slices to get the rectangle of the image in which the region fits as returned by scipy.ndimage.measurements.find_objects. """ self.region_skin_pixels = np.count_nonzero( labeled_image[rectangle_slices] == label_number ) self.bounding_rectangle_size = \ ( rectangle_slices[1].start - rectangle_slices[0].start ) * ( rectangle_slices[1].stop - rectangle_slices[0].stop ) self.bounding_rectangle_skin_pixels = np.count_nonzero( skin_mask[rectangle_slices] ) self.bounding_rectangle_avarage_pixel_intensity = np.average( image[rectangle_slices].take([0], axis=2) )
def _dump_mo_energy(mol, mo_energy, mo_occ, ehomo, elumo, orbsym, title='', verbose=logger.DEBUG): if isinstance(verbose, logger.Logger): log = verbose else: log = logger.Logger(mol.stdout, verbose) nirrep = mol.symm_orb.__len__() for i, ir in enumerate(mol.irrep_id): irname = mol.irrep_name[i] ir_idx = (orbsym == ir) nso = numpy.count_nonzero(ir_idx) nocc = numpy.count_nonzero(mo_occ[ir_idx]) e_ir = mo_energy[ir_idx] if nocc == 0: log.debug('%s%s nocc = 0', title, irname) elif nocc == nso: log.debug('%s%s nocc = %d H**O = %.15g', title, irname, nocc, e_ir[nocc-1]) else: log.debug('%s%s nocc = %d H**O = %.15g LUMO = %.15g', title, irname, nocc, e_ir[nocc-1], e_ir[nocc]) if e_ir[nocc-1]+1e-3 > elumo: log.warn('!! %s%s H**O %.15g > system LUMO %.15g', title, irname, e_ir[nocc-1], elumo) if e_ir[nocc] < ehomo+1e-3: log.warn('!! %s%s LUMO %.15g < system H**O %.15g', title, irname, e_ir[nocc], ehomo) log.debug(' mo_energy = %s', e_ir)
def print_results(labels, predictions): total = len(labels) num_correct = total - np.count_nonzero(np.subtract(predictions,labels)) print "\n***** ACCURACY *****" print "Overall Accuracy: %.3f percent\n" % ((float(num_correct)/float(total)) * 100.0) results = pd.DataFrame() results['real'] = labels results['predicted'] = predictions for label in np.unique(labels): data = results[results['real'] == label] num_correct = len(data) - np.count_nonzero(data['real'].sub(data['predicted'])) acc = ((float(num_correct)/float(len(data))) * 100.0) print "Total class label '%s' accuracy: %f percent" % (label, acc) print "" # Distribution graphs utils.print_distribution_graph(labels, 'Actual Distribution of Classes') utils.print_distribution_graph(predictions, 'Distribution of Predictions') # Distribution graphs for each class label for label in np.unique(labels): data = results[results['predicted'] == label]['real'].tolist() title = "When class label '%s' was predicted, the actual class was:" % label utils.print_distribution_graph(data, title)
def get_frequece(self, thresh, col=None): if col is not None: radio = round(np.count_nonzero(self.array[:, int(col)] >= float(thresh)) / float(len(self.array)), 4) * 100 return "%4.2f%%" % radio else: radio = round(np.count_nonzero(self.array >= float(thresh)) / float(len(self.array)), 4) * 100 return "%4.2f%%" % radio
def cost_logit(X, A, R, lam, n, k): ''' The cost function n is the number of examples k is the feature dimension R is the matrix indicating which entries of A are known. ''' # get the matrices # U, V, beta, alpha U = X[:n*k] U = np.reshape(U, (n,k)) V = X[n*k:2*n*k] V = np.reshape(V, (n,k)) beta = X[2*n*k:2*n*k+n] beta = np.reshape(beta, (n,1)) alpha = X[-1] num_knowns = np.count_nonzero(R) num_edges = np.count_nonzero(np.multiply(A, R)) num_nonedges = num_knowns - num_edges h = alpha + np.dot(U, np.transpose(V)) # add beta to every row, column for i in range(h.shape[0]): for j in range(h.shape[1]): h[i,j] += beta[i]+beta[j] sigH = sigmoid(h) J = ((-A/(2*num_edges))*np.log(sigH)) - (((1-A)/(2*num_nonedges))*np.log(1-sigH)) J = J*R # regularizer for i in range(J.shape[0]): for j in range(J.shape[1]): J[i,j] += lam*( np.abs(beta[i])**2 + np.abs(beta[j])**2 + np.linalg.norm(U[i,:])**2 + np.linalg.norm(V[j,:])**2 ) # sum over known values cost = sum(sum(J)) return cost
def kappa_score(y_true, y_pred): """Calculate Cohen's kappa for classification tasks. See https://en.wikipedia.org/wiki/Cohen%27s_kappa Note that this implementation of Cohen's kappa expects binary labels. Args: y_true: Numpy array containing true values. y_pred: Numpy array containing predicted values. Returns: kappa: Numpy array containing kappa for each classification task. Raises: AssertionError: If y_true and y_pred are not the same size, or if class labels are not in [0, 1]. """ assert len(y_true) == len(y_pred), 'Number of examples does not match.' yt = np.asarray(y_true, dtype=int) yp = np.asarray(y_pred, dtype=int) assert np.array_equal(np.unique(yt), [0, 1]), ( 'Class labels must be binary: %s' % np.unique(yt)) observed_agreement = np.true_divide(np.count_nonzero(np.equal(yt, yp)), len(yt)) expected_agreement = np.true_divide( np.count_nonzero(yt == 1) * np.count_nonzero(yp == 1) + np.count_nonzero(yt == 0) * np.count_nonzero(yp == 0), len(yt) ** 2) kappa = np.true_divide(observed_agreement - expected_agreement, 1.0 - expected_agreement) return kappa
def analyze_param(net, layers): # plt.figure() print '\n=============analyze_param start===============' total_nonzero = 0 total_allparam = 0 percentage_list = [] for i, layer in enumerate(layers): i += 1 W = net.params[layer][0].data b = net.params[layer][1].data # plt.subplot(3, 1, i); # numBins = 2 ^ 8 # plt.hist(W.flatten(), numBins, color='blue', alpha=0.8) # plt.show() print 'W(%d) range = [%f, %f]' % (i, min(W.flatten()), max(W.flatten())) print 'W(%d) mean = %f, std = %f' % (i, np.mean(W.flatten()), np.std(W.flatten())) non_zero = (np.count_nonzero(W.flatten()) + np.count_nonzero(b.flatten())) all_param = (np.prod(W.shape) + np.prod(b.shape)) this_layer_percentage = non_zero / float(all_param) total_nonzero += non_zero total_allparam += all_param print 'non-zero W and b cnt = %d' % non_zero print 'total W and b cnt = %d' % all_param print 'percentage = %f\n' % (this_layer_percentage) percentage_list.append(this_layer_percentage) print '=====> summary:' print 'non-zero W and b cnt = %d' % total_nonzero print 'total W and b cnt = %d' % total_allparam print 'percentage = %f' % (total_nonzero / float(total_allparam)) print '=============analyze_param ends ===============' return (total_nonzero / float(total_allparam), percentage_list)
def test_cross_div(dtypea, dtypeb, dtypec): if dtypea == np.int8 and dtypeb == np.int8: pytest.skip("Different behaviour in c++ and python for int8 / int8".format(dtypea, dtypeb)) def fkt(a, b, c): c[:] = a / b hfkt = hope.jit(fkt) (ao, ah), (bo, bh), (co, ch) = random(dtypea, [10]), random(dtypeb, [10]), random(dtypec, [10]) ao, ah, bo, bh = ao.astype(np.float64), ah.astype(np.float64), bo.astype(np.float64), bh.astype(np.float64) ao, ah = ( np.copysign(np.power(np.abs(ao), 1.0 / 4.0), ao).astype(dtypea), np.copysign(np.power(np.abs(ah), 1.0 / 4.0), ah).astype(dtypea), ) bo, bh = ( np.copysign(np.power(np.abs(bo), 1.0 / 4.0), bo).astype(dtypeb), np.copysign(np.power(np.abs(bh), 1.0 / 4.0), bh).astype(dtypeb), ) if np.count_nonzero(bo == 0) > 0: bo[bo == 0] += 1 if np.count_nonzero(bh == 0) > 0: bh[bh == 0] += 1 fkt(ao, bo, co), hfkt(ah, bh, ch) assert check(co, ch) fkt(ao, bo, co), hfkt(ah, bh, ch) assert check(co, ch)
def norm_mean_cent(movies_np): mean_movie= [] count_movie = [] for row in movies_np: row_sum = np.sum(row) count = np.count_nonzero(row) count_movie.append(count) mean_movie.append(row_sum/count) count_user = [] mean_user = [] for row in movies_np.T: row_sum = np.sum(row) count = np.count_nonzero(row) count_user.append(count) mean_user.append(row_sum/count) movies_np[movies_np==0] = np.nan mean_cent = [] i = 0 for row in movies_np: mean_cent.append(row - mean_movie[i]) i += 1 mean_cent = np.array(mean_cent) mean_cent = np.nan_to_num(mean_cent) return mean_cent
def simula(N, n, PM, beta, pmig, grupos, listafitness, listafitness_m, mpvencer, x): s = int(time.time() + random.randint(0, 2**32-1) + x) % (2**32-1) random.seed(s) s = int(time.time() + random.randint(0, 2**32-1) + x) % (2**32-1) np.random.seed(s) IT = 50002 #IT = 5002 precisao = 0.01 AL = [] AL.append(np.count_nonzero(grupos)/(N*n)) crit = 0. if AL[0] > (1.-precisao) else 1. # Para cada periodo, os grupos entram em conflito e se reproduzem, e # os individuos sofrem mutacao e migram entre os grupos for it in xrange(1,IT): if abs(AL[it-1]-crit)<precisao: print "Acabou na geracao ", it -1 break # knums = [np.count_nonzero(line) for line in grupos] glabels = conflito(N,knums,beta,listafitness_m, mpvencer) if N>1 \ else knums grupos = reproducao_ind(N,n,listafitness,listafitness_m,glabels) grupos = mutacao(N,n,PM,grupos) grupos = migracao(N,n,grupos,pmig) freqA = float(np.count_nonzero(grupos))/(N*n) AL.append(freqA) logger.debug("%d \t----------->\t %f" %(it,freqA)) return it-1
def relearn(self, test_size=0): samples, weights, targets = self.learning_component.get_training_set(const_weight=True) train_samples, test_samples, train_targets, test_targets = train_test_split(samples, targets, test_size=test_size, random_state=np.random.RandomState(0)) count_positives = 1.0*np.count_nonzero(train_targets) count_negatives = 1.0*(len(train_targets) - count_positives) positive_weight = count_negatives/len(train_targets) negative_weight = count_positives/len(train_targets) weights = np.array([positive_weight if target == 1 else negative_weight for target in train_targets]) self.classifier.fit(train_samples, train_targets, sample_weight=weights) self.learning_component.new_samples_count = 0 if len(test_samples) > 0: test_result = [self.classifier.predict(sample) for sample in test_samples] true_positives = 0.0 count_test_positives = 1.0*np.count_nonzero(test_targets) count_result_positives = 1.0*np.count_nonzero(test_result) for i in xrange(len(test_targets)): if test_targets[i] == test_result[i] and test_result[i] == 1: true_positives += 1 precision = true_positives / count_test_positives recall = true_positives / count_result_positives print "Precision:", precision print "Recall", recall if precision + recall != 0: print "F-score:", 2 * precision * recall / (precision + recall) else: print "F-score:", 0 self.positive_class_index = 0 for elem in self.classifier.classes_: if elem != 1.0: self.positive_class_index += 1 else: break
def count_element_values(self): """Shows the total count of detected elements after the segmentation""" from numpy import count_nonzero from app.imgprocessing.slice_mask import apply_mask collection_mask = self.collection.copy() collection_mask = apply_mask(collection_mask) empty = count_nonzero(collection_mask == 0) mastic = count_nonzero(collection_mask == 1) aggregate = count_nonzero(collection_mask == 2) total = (empty + mastic + aggregate) QtWidgets.QMessageBox.about(self, "Element counting", """ <br> <table> <tr><th>The sample has = %s pixels:</th><\tr> <tr> <td>Empty pixels = %s</td> <td>%3.2f%%</td> </tr> <tr> <td>Mastic pixels = %s</td> <td>%3.2f%%</td> </tr> <tr> <td>Aggregate pixels = %s</td> <td>%3.2f%%</td> </tr> </table> """ % (total, empty, ((empty * 100.) / total), mastic, ((mastic * 100.) / total), aggregate, \ ((aggregate * 100.) / total)))
def update_selected_info_label(self): pl = lambda c: "" if c == 1 else "s" if self.data is not None and self.scores is not None: scores = self.scores low, high = self.min_value, self.max_value _, side, _, _ = self.Scores[self.score_index] test = self.test_f[side] count_undef = np.count_nonzero(np.isnan(scores)) count_scores = len(scores) scores = scores[np.isfinite(scores)] nselected = np.count_nonzero(test(scores, low, high)) defined_txt = ("{} of {} score{} undefined." .format(count_undef, count_scores, pl(count_scores))) elif self.data is not None: nselected = 0 defined_txt = "No defined scores" else: nselected = 0 defined_txt = "" self.selectedInfoLabel.setText( defined_txt + "\n" + "{} selected gene{}".format(nselected, pl(nselected)) )
def constrain_UHF(molecule, this): occupancy = numpy.add(this.Alpha.Occupancy, this.Beta.Occupancy) N = molecule.NElectrons Nab = this.NAlpha * this.NBeta Na = numpy.count_nonzero(occupancy == 1) # Dimension of active space Nc = numpy.count_nonzero(occupancy == 2) # Dimension of core space S = molecule.S half_density_matrix = S.dot(this.Total.Density/2).dot(S) NO_vals, NO_vects = numpy.linalg.eigh(half_density_matrix) # See J. Chem. Phys. 1988, 88(8), 4926 NO_coeffs = numpy.linalg.inv(S).dot(NO_vects) # for details on finding the NO coefficents back_trans = numpy.linalg.inv(NO_coeffs) # Calculate the expectation value of the spin operator this.S2 = N*(N+4)/4. - Nab - 2 * sum([x ** 2 for x in NO_vals]) # Using formula from J. Chem. Phys. 88, 4926 # Sort in order of descending occupancy idx = NO_vals.argsort()[::-1] # Note the [::-1] reverses the index array core_space = idx[:Nc] # Indices of the core NOs valence_space = idx[(Nc + Na):] # Indices of the valence NOs delta = (this.Alpha.Fock - this.Beta.Fock) / 2 delta = NO_coeffs.T.dot(delta).dot(NO_coeffs) # Transforming delta into the NO basis lambda_matrix = numpy.zeros(numpy.shape(delta)) for i in core_space: for j in valence_space: lambda_matrix[i,j] = -delta[i,j] lambda_matrix[j,i] = -delta[j,i] lambda_matrix = back_trans.T.dot(lambda_matrix).dot(back_trans) # Transforming lambda back to the AO basis this.Alpha.Fock = this.Alpha.Fock + lambda_matrix this.Beta.Fock = this.Beta.Fock - lambda_matrix
def question_1(): # Adjacency matrix. A = numpy.matrix([ [0, 0, 1, 0, 0, 1, 0, 0], [0, 0, 0, 0, 1, 0, 0, 1], [1, 0, 0, 1, 0, 1, 0, 0], [0, 0, 1, 0, 1, 0, 1, 0], [0, 1, 0, 1, 0, 0, 0, 1], [1, 0, 1, 0, 0, 0, 1, 0], [0, 0, 0, 1, 0, 1, 0, 1], [0, 1, 0, 0, 1, 0, 1, 0] ]) rn, cn = A.shape # Degree matrix. D = numpy.asmatrix(numpy.zeros((rn, cn), int)) numpy.fill_diagonal(D, sum(A)) # Laplacian matrix. L = D - A sum_a = A.sum() sum_d = D.sum() sum_l = L.sum() nonzero_a = numpy.count_nonzero(A) nonzero_d = numpy.count_nonzero(D) nonzero_l = numpy.count_nonzero(L) print('A: sum={} #nonzero={}'.format(sum_a, nonzero_a)) print('D: sum={} #nonzero={}'.format(sum_d, nonzero_d)) print('L: sum={} #nonzero={}'.format(sum_l, nonzero_l))
def __recall(self, y_test, Y_vote): """ recall extended to multi-class classification """ # predicted classes y_hat = np.argmax(Y_vote, axis=1) if True or self.mode == "one-vs-one": # need confusion matrix conf = self.__confusion(y_test, Y_vote) # consider each class separately recall = np.zeros(self.numClasses) for c in xrange(self.numClasses): # true positives: label is c, classifier predicted c tp = conf[c,c] # false negatives: label is not c, classifier predicted c fn = np.sum(conf[c,:]) - conf[c,c] if tp>0 and fn>0: recall[c] = tp*1./(tp+fn) elif self.mode == "one-vs-rest": # consider each class separately recall = np.zeros(self.numClasses) for c in xrange(self.numClasses): # true positives: label is c, classifier predicted c tp = np.count_nonzero((y_test==c) * (y_hat==c)) # false negatives: label is not c, classifier predicted c fn = np.count_nonzero((y_test!=c) * (y_hat==c)) recall[c] = tp*1./(tp+fn) return recall
def test_that_build_pyramid_relaxes_mask(): from _stbt.match import _build_pyramid mask = numpy.ones((20, 20, 3), dtype=numpy.uint8) * 255 mask[3:9, 3:9] = 0 # first 0 is an even row/col, last 0 is an odd row/col n = mask.size - numpy.count_nonzero(mask) assert n == 6 * 6 * 3 cv2.imwrite("/tmp/dave1.png", mask) mask_pyramid = _build_pyramid(mask, 2, is_mask=True) assert numpy.all(mask_pyramid[0] == mask) downsampled = mask_pyramid[1] cv2.imwrite("/tmp/dave2.png", downsampled) assert downsampled.shape == (10, 10, 3) print downsampled[:, :, 0] # pylint:disable=unsubscriptable-object n = downsampled.size - numpy.count_nonzero(downsampled) assert 3 * 3 * 3 <= n <= 6 * 6 * 3 expected = [ # pylint:disable=bad-whitespace [255, 255, 255, 255, 255, 255, 255, 255, 255, 255], [255, 0, 0, 0, 0, 0, 255, 255, 255, 255], [255, 0, 0, 0, 0, 0, 255, 255, 255, 255], [255, 0, 0, 0, 0, 0, 255, 255, 255, 255], [255, 0, 0, 0, 0, 0, 255, 255, 255, 255], [255, 0, 0, 0, 0, 0, 255, 255, 255, 255], [255, 255, 255, 255, 255, 255, 255, 255, 255, 255], [255, 255, 255, 255, 255, 255, 255, 255, 255, 255], [255, 255, 255, 255, 255, 255, 255, 255, 255, 255], [255, 255, 255, 255, 255, 255, 255, 255, 255, 255]] assert numpy.all(downsampled[:, :, 0] == expected) # pylint:disable=unsubscriptable-object
def testFeatureGenWithOnePoint(self): # ensure that the start and end datetimes are the same, since the average calculation uses # the total distance and the total duration ts = esta.TimeSeries.get_time_series(self.testUUID) trackpoint1 = ecwlo.Location({u'coordinates': [0,0], 'type': 'Point'}) ts.insert_data(self.testUUID, "analysis/recreated_location", trackpoint1) testSeg = ecws.Section({"start_loc": trackpoint1, "end_loc": trackpoint1, "distance": 500, "sensed_mode": 1, "duration": 150, "start_ts": arrow.now().timestamp, "end_ts": arrow.now().timestamp, "_id": 2, "speeds":[], "distances":[], }) testSegEntry = ecwe.Entry.create_entry(self.testUUID, "analysis/cleaned_section", testSeg) d = testSegEntry.data m = testSegEntry.metadata enufc.expand_start_end_data_times(d, m) testSegEntry["data"] = d testSegEntry["metadata"] = m inserted_id = ts.insert(testSegEntry) featureMatrix = np.zeros([1, len(self.pipeline.featureLabels)]) resultVector = np.zeros(1) self.pipeline.updateFeatureMatrixRowWithSection(featureMatrix, 0, testSegEntry) logging.debug("featureMatrix = %s" % featureMatrix) self.assertEqual(np.count_nonzero(featureMatrix[0][5:16]), 0) self.assertEqual(np.count_nonzero(featureMatrix[0][19:21]), 0)
def corners(self, bandNames=None): "Return the corners of the tilted rectangle of valid image data as (x, y) pixel coordinates." alpha = self.mask(bandNames) alphaT = numpy.transpose(alpha) ysize, xsize = alpha.shape output = [] for i in xrange(ysize): if numpy.count_nonzero(alpha[i]) > 0: break output.append((numpy.argwhere(alpha[i]).mean(), i)) for i in xrange(xsize): if numpy.count_nonzero(alphaT[i]) > 0: break output.append((i, numpy.argwhere(alphaT[i]).mean())) for i in xrange(ysize - 1, 0, -1): if numpy.count_nonzero(alpha[i]) > 0: break output.append((numpy.argwhere(alpha[i]).mean(), i)) for i in xrange(xsize - 1, 0, -1): if numpy.count_nonzero(alphaT[i]) > 0: break output.append((i, numpy.argwhere(alphaT[i]).mean())) return output
def go(sltree, score, X_train, Y_train, X_test, Y_test): t_train_begin = time() sltree.train(X_train, Y_train) t_train_end = time() t_test_begin = time() Y_predict_train, AP_train, complexity_train, depths_train = sltree.test(X_train, Y_train, return_complexity=True, return_depth=True) Y_predict_test, AP_test, complexity_test, depths_test = sltree.test(X_test, Y_test, return_complexity=True, return_depth=True) t_test_end = time() n_acc_train = np.count_nonzero(Y_predict_train == Y_train) n_acc_test = np.count_nonzero(Y_predict_test == Y_test) score.update({'acc_train':float(n_acc_train)/Y_predict_train.shape[0], 'n_acc_train':n_acc_train, 'AP_train':AP_train, 'mAP_train':np.mean(AP_train), 'complexity_train':complexity_train, 'avg_complexity_train':np.mean(complexity_train), 'depths_train':depths_train, 'avg_depth_train':np.mean(depths_train), 'acc_test':float(n_acc_test)/Y_predict_test.shape[0], 'n_acc_test':n_acc_test, 'AP_test':AP_test, 'mAP_test':np.mean(AP_test), 'complexity_test':complexity_test, 'avg_complexity_test':np.mean(complexity_test), 'depths_test':depths_test, 'avg_depth_test':np.mean(depths_test), 'time_test':t_test_end-t_test_begin})
def __init__(self, op_type, op_name, output_index, num_outputs, value): """Constructor of InfOrNanError. Args: op_type: Type name of the op that generated the tensor that generated the `inf`(s) or `nan`(s) (e.g., `Div`). op_name: Name of the op that generated the tensor with `inf`(s) or `nan`(s). This name is set by client and can be `None` if it is unset. output_index: The 0-based output index of the tensor that contains `inf`(s) or `nan`(s). num_outputs: Total number of outputs of the operation. value: The tensor value that contains `inf`(s) or `nan`(s). """ self._op_type = op_type self._op_name = op_name self._output_index = output_index self._num_outputs = num_outputs self._value = value self._total_count = np.size(value) self._inf_count = np.count_nonzero(np.isinf(value)) self._nan_count = np.count_nonzero(np.isnan(value)) super(InfOrNanError, self).__init__(self._get_error_message())
def __precision(self, y_test, Y_vote): """ precision extended to multi-class classification """ # predicted classes y_hat = np.argmax(Y_vote, axis=1) if True or self.mode == "one-vs-one": # need confusion matrix conf = self.__confusion(y_test, Y_vote) # consider each class separately prec = np.zeros(self.numClasses) for c in xrange(self.numClasses): # true positives: label is c, classifier predicted c tp = conf[c,c] # false positives: label is c, classifier predicted not c fp = np.sum(conf[:,c]) - conf[c,c] # precision prec[c] = tp*1./(tp+fp) elif self.mode == "one-vs-rest": # consider each class separately prec = np.zeros(self.numClasses) for c in xrange(self.numClasses): # true positives: label is c, classifier predicted c tp = np.count_nonzero((y_test==c) * (y_hat==c)) # false positives: label is c, classifier predicted not c fp = np.count_nonzero((y_test==c) * (y_hat!=c)) prec[c] = tp*1./(tp+fp) return prec
def get_stats(self): # number of trades num_of_trades = self.record.shape[0] / 2 # number of profit_lock_out num_of_profitlock = np.count_nonzero(np.where(self.record[:,2] == "profit_lock_out")) # number of stop_out num_of_stopout = np.count_nonzero(np.where(self.record[:,2] == "trailing_stop_out" )) num_of_stopout += np.count_nonzero(np.where(self.record[:,2] == "hard_stop_out" )) # number of reversed_out num_of_reversed_out = np.count_nonzero(np.where(self.record[:,2] == "reversed_out")) # number of time_out num_of_time_out = np.count_nonzero(np.where(self.record[:,2] == "time_out")) # PNL i = 1 for i in range(1, num_of_trades * 2, 2): if self.record[i, 3] == "long": self.pnl = np.append(self.pnl,float(self.record[i,4])-float(self.record[i-1,4])) elif self.record[i, 3] == "short": self.pnl = np.append(self.pnl,float(self.record[i-1,4])-float(self.record[i,4])) lst.pnl = lst.pnl[1:] # output statistical results print "# trades", num_of_trades, "# profit_lock", num_of_profitlock,\ "# stopout", num_of_stopout, "# reversed_out",\ num_of_reversed_out, "# time_out", num_of_time_out print "P&L Summary Stats:", lst.pnl.__len__(), lst.pnl.mean()/tickBase, lst.pnl.std()/tickBase, lst.pnl.min()/tickBase, lst.pnl.max()/tickBase
def test_estimator(): omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs) omp.fit(X, y[:, 0]) assert_equal(omp.coef_.shape, (n_features,)) assert_equal(omp.intercept_.shape, ()) assert np.count_nonzero(omp.coef_) <= n_nonzero_coefs omp.fit(X, y) assert_equal(omp.coef_.shape, (n_targets, n_features)) assert_equal(omp.intercept_.shape, (n_targets,)) assert np.count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs coef_normalized = omp.coef_[0].copy() omp.set_params(fit_intercept=True, normalize=False) omp.fit(X, y[:, 0]) assert_array_almost_equal(coef_normalized, omp.coef_) omp.set_params(fit_intercept=False, normalize=False) omp.fit(X, y[:, 0]) assert np.count_nonzero(omp.coef_) <= n_nonzero_coefs assert_equal(omp.coef_.shape, (n_features,)) assert_equal(omp.intercept_, 0) omp.fit(X, y) assert_equal(omp.coef_.shape, (n_targets, n_features)) assert_equal(omp.intercept_, 0) assert np.count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs
#-- read a timestep of 'ta' variable = f.variables['ta'] #-- first time step, lev, ncells data = variable[0,0,:] #-- ta [time,lev,ncells]; miss _FillValue var = data - 273.15 #-- convert to degrees Celsius; miss _FillValue #-- define _FillValue and missing_value if not existing missing = -1e20 if not hasattr(var,'_FillValue'): var._FillValue = missing #-- set _FillValue if not hasattr(var,'missing_value'): var.missing_value = missing #-- set missing_value varM = np.ma.array(var, mask=np.equal(var,missing)) #-- mask array with missing values nummissing = np.count_nonzero(varM.mask) #-- number of missing values #-- set data intervals, levels, labels, color indices varMin, varMax, varInt = -32, 28, 4 #-- set data minimum, maximum, interval levels = list(range(varMin,varMax,varInt)) #-- set levels array nlevs = len(levels) #-- number of levels labels = ['{:.2f}'.format(x) for x in levels] #-- convert list of floats to list of strings #-- print info to stdout print('') print('min/max: {:0.2f} / {:0.2f}'.format(np.min(varM), np.max(varM))) print('') print('varMin: {:3d}'.format(varMin)) print('varMax: {:3d}'.format(varMax)) print('varInt: {:3d}'.format(varInt))
def train_model(X_train, y_train, seed, ccru_version, base_classifier, X_val, y_val, feature_subsets_per_cc=[]): pid = os.getpid() print('The id of ' + str(seed) + ' is :' + str(pid)) # print('Train ecc: '+str(seed)+' started') if ccru_version == 'standard': model = ClassifierChain(base_classifier, order='random', random_state=seed) elif ccru_version == 'eccru' or ccru_version == 'eccru2' or ccru_version == 'eccru3': model = CCRU(base_classifier, order='random', random_state=seed) elif ccru_version == 'binary_relevance': model = SVC(gamma='auto', kernel='linear') else: print('Cannot recoginize ccru version!!!!') class_1 = 1 class_2 = 0 if -1 in y_train: class_2 = -1 if ccru_version == 'binary_relevance': class_1_counter = np.count_nonzero(y_train[:, 0] == class_1) class_2_counter = np.count_nonzero(y_train[:, 0] == class_2) # class_1_counter = y_train.flatten().tolist()[0].count(class_1) # class_2_counter = y_train.flatten().tolist()[0].count(class_2) if class_1_counter <= class_2_counter: minority_class = class_1 majority_class = class_2 minority_counter = class_1_counter else: minority_class = class_2 majority_class = class_1 minority_counter = class_2_counter sampled_index = [ index for index, label in enumerate(y_train) if label == minority_class ] sampled_y = [minority_class] * minority_counter temp_sampled_index = [ index for index, label in enumerate(y_train) if label == majority_class ] sampled_index.extend( random.sample(temp_sampled_index, minority_counter)) sampled_y.extend([majority_class] * minority_counter) print('Train binary_relevance: ' + str(seed) + ' started') print('training on ' + str(len(sampled_y))) if len(feature_subsets_per_cc) != 0: trained_model = model.fit( X_train[np.array(sampled_index), feature_subsets_per_cc[seed]], y_train, X_val, y_val) else: trained_model = model.fit(X_train[np.array(sampled_index), :], sampled_y) else: print('Train ecc: ' + str(seed) + ' started ') if len(feature_subsets_per_cc) != 0: trained_model = model.fit(X_train[:, feature_subsets_per_cc[seed]], y_train, X_val, y_val) else: trained_model = model.fit(X_train, y_train, X_val, y_val) print('Train model: ' + str(seed) + ' ended') return trained_model
plt.imshow(test_dataset[prefix][i, :, :, 0]) else: plt.imshow(test_dataset[prefix][i, :, :, :]) plt.title('out[' + prefix + ']') '''Convolutional neural network training Note: you need to use my branch of keras with the new functionality, that allows element-wise weights of the loss function ''' # list all CPUs and GPUs device_list = K.get_session().list_devices() # number of GPUs gpu_number = np.count_nonzero(['GPU' in str(x) for x in device_list]) # load dmap model that we are going to use as the basis for the contour model dmap_model_filename = os.path.join(saved_models_dir, dmap_model_basename + '_model_fold_' + str(i_fold) + '.h5') dmap_model = keras.models.load_model(dmap_model_filename) # instantiate contour model with tf.device('/cpu:0'): contour_model = fcn_sherrah2016_classifier(input_shape=train_dataset['im'].shape[1:]) for lay in [1, 4, 7]: # transfer weights from dmap to contour model in the first 3 convolutional layers dmap_layer = dmap_model.get_layer(index=lay) contour_layer = contour_model.get_layer(index=lay) contour_layer.set_weights(dmap_layer.get_weights())
def opponent_policy(curr_state, prev_state, prev_action): opponent_policy.second_move = False # check if a new games is started. if np.count_nonzero(curr_state[2, :, :]) == board_size**2 - 1: opponent_policy.second_move = True # coords is the coordinate of the previous action. coords = GomokuEnv.action_to_coordinate( board_size, prev_action) if prev_action is not None else None if prev_state is None: ''' First move should be the center of the board. ''' move = (board_size // 2, board_size // 2) elif opponent_policy.second_move: ''' If the AI must go second, it shouldn't think, it should just go diagonal adjacent to the first placed tile; diagonal into the larger area of the board if one exists ''' if coords[1] <= board_size // 2: dy = 1 else: dy = -1 if coords[0] <= board_size // 2: dx = 1 else: dx = -1 move = (coords[0] + dx, coords[1] + dy) opponent_policy.second_move = False else: free_x, free_y = np.where(curr_state[2, :, :] == 1) possible_moves = [(x, y) for x, y in zip(free_x, free_y)] if len(possible_moves) == 0: # no more moves return None ''' Strategy for the naive agent: 1. Search if there is a win opportunity. 2. Search if opponent is winning, if yes, then block 3. Search if opponent has a open stream that equals 2 less than win_len, if yes, then block 3. Try to extend the longest existing trend. ''' if curr_state[0, coords[0], coords[1]] != 0: color = 1 else: color = 0 # 1: opponent position, 2: empty, 3: my position my_board = np.add( np.subtract(curr_state[color, :, :], curr_state[1 - color, :, :]), 2) # print(my_board) # check if we have a winning move move = search_winning_move(my_board, '3') if move is None: # check if opponent has a winning move move = search_winning_move(my_board, '1') if move is None: # check if we have open win_len - 2 move = search_move(my_board, '2' + ('3' * (win_len - 2)) + '2', win_len) if move is None: # check if opponent has open win_len - 2 move = search_move(my_board, '2' + ('1' * (win_len - 2)) + '2', win_len) if move is None: for i in range(2, level + 2): if win_len - i < 1: break # search for connected win_len - i stones move = search_move(my_board, '23{' + str(win_len - i) + '}', win_len - i + 1) if move is None: move = search_move(my_board, '3{' + str(win_len - i) + '}2', win_len - i + 1, False) if move is not None: break if move is None: print(np.random.choice(possible_moves)) move = np.random.choice(possible_moves) return GomokuEnv.coordinate_to_action(board_size, move)
def modify(frame, data): # TODO: make parameters tunable elsewhere dim = 3 # system dimensionality slice_thickness = 50. # thickness of volume slab in sweep direction positional_step = 50. # shift of volume slab per evaluation slice_normal = np.array([0., 0., 1.]) # spatial direction of sweep # ignore off-diagonal stresses for now peratom_stress = data.particles["c_peratom_stress"][:, 0:3] position = data.particles["Position"] # process selection only, otherwise whole system if "Selection" in data.particles: global_selection = data.particles["Selection"] else: global_selection = np.ones(data.particles.count) global_peratom_stress = peratom_stress[np.nonzero(global_selection)] global_position = position[np.nonzero(global_selection)] global_natoms = np.count_nonzero(global_selection) global_max_pos = np.max(position[np.nonzero(global_selection)], axis=0) global_min_pos = np.min(position[np.nonzero(global_selection)], axis=0) global_measure = global_max_pos - global_min_pos # vector spanning the slice surface: slice_surface_diagonal = global_measure * (np.ones(dim) - slice_normal) # at given slab thcikness and step, that many slices fit into selected vol: slice_count = int ( np.floor( np.dot(global_measure + slice_normal * slice_thickness, slice_normal) \ / positional_step ) ) + 1 # per-atom properties for intermmeditae results slice_overlap_count = data.particles_.create_property( 'Slice Overlap Count', dtype=int, components=1) slice_volume_sum = data.particles_.create_property('Slice Volume Sum', dtype=float, components=1) local_cumulative_stress_tensor_diagonal = data.particles_.create_property( 'Local Cumulative Stress', dtype=float, components=3) with slice_overlap_count: slice_overlap_count[np.nonzero(global_selection)] = np.zeros( global_natoms) with slice_volume_sum: slice_volume_sum[np.nonzero(global_selection)] = np.zeros( global_natoms) msg = """Sweeping selection of {} particles with extreme coordinates {} and {} by {} slices of {} [length units] thickness at steps of {} [length units] in direction ({})""".format( global_natoms, global_min_pos, global_max_pos, slice_count, slice_thickness, positional_step, slice_normal) print(msg) yield msg start_pos = global_min_pos - slice_thickness * slice_normal step_vec = positional_step * slice_normal # sweep "representative volume" and an according selection across system for i in range(slice_count): yield (i / slice_count) # ovito progress bar cur_min_pos = start_pos + i * step_vec cur_max_pos = start_pos + i * step_vec \ + slice_thickness * slice_normal + slice_surface_diagonal print("------------------------------------------") print("""slice #{} of {}, spanned between corners {} and {}""".format(i + 1, slice_count, cur_min_pos, cur_max_pos)) selection = \ np.greater_equal( position, cur_min_pos ).all(axis=1) \ & np.less( position, cur_max_pos ).all(axis=1) \ & global_selection natoms = np.count_nonzero(selection) print(" #selected particles : {}".format(natoms)) if natoms < 1: continue stress = np.sum(peratom_stress[np.nonzero(selection)], axis=0) max_pos = np.max(position[np.nonzero(selection)], axis=0) min_pos = np.min(position[np.nonzero(selection)], axis=0) measure = max_pos - min_pos volume = np.product(measure) pressure_tensor_diagonal = -stress / volume pressure_tensor_trace = np.sum(pressure_tensor_diagonal) / dim print(" cumulative stress (X,Y,Z): {}".format(stress)) print(" maximum coordinates (X,Y,Z): {}".format(max_pos)) print(" minimum coordinates (X,Y,Z): {}".format(min_pos)) print(" slab measures (X,Y,Z): {}".format(measure)) print(" slab volume : {}".format(volume)) print(" pressure tensor diagonal : {}".format( pressure_tensor_diagonal)) print( " pressure tensor trace : {}".format(pressure_tensor_trace)) # sum up slice stresses: stress_tensor_diagonal_outer = np.outer(stress, np.ones(natoms)) with local_cumulative_stress_tensor_diagonal: local_cumulative_stress_tensor_diagonal[ np.nonzero(selection) ] \ += stress_tensor_diagonal_outer.T with slice_overlap_count: slice_overlap_count[np.nonzero(selection)] += 1 with slice_volume_sum: slice_volume_sum[np.nonzero(selection)] += volume local_mean_pressure_tensor_diagonal = \ data.particles_.create_property('Local Mean Pressure Tensor Diagonal', dtype=float, components=3) local_mean_pressure_tensor_trace = \ data.particles_.create_property('Local Mean Pressure Tensor Trace', dtype=float, components=1) # in case of overlap between volume elements: # weighted mean for any atom part of one or several overlaps # <p_ii> = sum(p_{ii,j} * V_j, j) / sum(3 V_j, j) with local_mean_pressure_tensor_diagonal: local_mean_pressure_tensor_diagonal[ np.nonzero(global_selection) ] = \ - local_cumulative_stress_tensor_diagonal[ np.nonzero(global_selection) ] / \ np.atleast_2d( dim * slice_volume_sum[ np.nonzero(global_selection) ] ).T with local_mean_pressure_tensor_trace: local_mean_pressure_tensor_trace[ np.nonzero(global_selection) ] = \ np.sum( local_mean_pressure_tensor_diagonal[ \ np.nonzero(global_selection) ], axis=1 )
def print_summary(self, v): v = np.array(v) print("Max:", v.max(), "Min:", v.min(), "Mean:", v.mean(), "Size:", v.size(), "# Non-zero:", np.count_nonzero(v))
def CountParticles(group, idArray): nParticle = np.count_nonzero(group['pid'].isin(idArray)) return nParticle
tot = np.count_nonzero(img[i * 2 * diam:i * 2 * diam + 2 * diam, j * 2 * diam:j * 2 * diam + 2 * diam]) vec[k] = 1 if tot > 0.2 * diam * diam else 0 print(vec) if repr(vec) in alphabet: return alphabet[repr(vec)] return '#' letters = [] for i in range(N): for j in range(M - 1, -1, -1): if i - hei + 1 < 0 or j + wid - 1 >= M: continue tot = np.count_nonzero(dots_matrix[i - hei + 1:i + 1, j:j + wid]) if tot == 0: continue ## big rect tot2 = np.count_nonzero( dots_matrix[max(0, i - 12 * diam + 1):min(i + 6 * diam + 1, N), max(0, j - 4 * diam):min(M, j + 8 * diam)]) offset = diam // 2 if tot2 == tot and i - hei + 1 + offset >= 0 and i + 1 + offset <= N and j - offset >= 0 and j + wid - offset <= M: # We found a pattern, cut it pattern = dots_matrix[i - hei + 1 + offset:i + 1 + offset, j - offset:j + wid - offset] # print(pattern) print(pattern.shape) c = find_letter(pattern) if c == '#' and i + offset + 2 * diam < N:
words = line.split(' ') assert (len(words) == 4) # Look up the index of the words and store them. for i in range(0, 4): analogies[analogy_num, i] = model.vocab[words[i]].index # Increment the row number. analogy_num += 1 print('Validating indeces...') sys.stdout.flush() # Verify no entries are zero. assert (np.count_nonzero(analogies) == (analogies.shape[0] * analogies.shape[1])) ############################################################################## # Precompute Query Vectors ############################################################################## print('Computing analogy query vectors...') sys.stdout.flush() # Create a matrix to hold all of the query vectors. query_vecs = np.zeros((num_analogies, model.syn0.shape[1])) # For each of the analogies... for i in range(0, num_analogies):
import numpy as np import os import pylab as pl import matplotlib.pyplot as plt os.system("clear") g=np.array([ [12, 23], #hola mundo [34, 34], [6666,9999] ]) g[0][1]=1+g[0][1] g= np.count_nonzero(g) print (g) """ raiz=np.sqrt ln=np.log
sigma=[0.05] accuracy=np.zeros(len(sigma)) for i in range (0,len(Y_train),1): if Y_train[i]==1: Y_train[i]=1; else: Y_train[i]=-1; for i in range (0,len(Y_test),1): if Y_test[i]==1: Y_test[i]=1; else: Y_test[i]=-1; Y_train[1500:3000]=0 l=np.count_nonzero(Y_train) u=len(Y_train)-l n=l+u for s in range(0,len(sigma),1): alpha=np.matlib.zeros((l+u,1)) beta=np.matlib.zeros((l,1)) K=np.matlib.zeros((l+u,l+u)) Kx=np.matlib.zeros((len(X_test),l+u)) J=np.matlib.zeros((l,l+u)) L=np.matlib.zeros((l+u,l+u)) W=np.matlib.zeros((l+u,l+u)) D=np.matlib.zeros((l+u,l+u)) Y_predcted=np.matlib.zeros((len(X_test),1)) Q=np.matlib.zeros((l,l)) Yd=np.matlib.zeros((l,l)) f=np.matlib.zeros((len(X_test),1))
def __init__(self, dataset, bandwidth=None, weights=None, kernel=None, extrema=None, points=None, reflect=None, neff=None, diagonal=False, helper=True, bw_rescale=None, **kwargs): """Initialize the `KDE` class with the given dataset and optional specifications. Arguments --------- dataset : array_like (N,) or (D,N,) Dataset from which to construct the kernel-density-estimate. For multivariate data with `D` variables and `N` values, the data must be shaped (D,N). For univariate (D=1) data, this can be a single array with shape (N,). bandwidth : str, float, array of float, None [optional] Specification for the bandwidth, or the method by which the bandwidth should be determined. If a `str` is given, it must match one of the standard bandwidth determination methods. If a `float` is given, it is used as the bandwidth in each dimension. If an array of `float`s are given, then each value will be used as the bandwidth for the corresponding data dimension. weights : array_like (N,), None [optional] Weights corresponding to each `dataset` point. Must match the number of points `N` in the `dataset`. If `None`, weights are uniformly set to 1.0 for each value. kernel : str, Distribution, None [optional] The distribution function that should be used for the kernel. This can be a `str` specification that must match one of the existing distribution functions, or this can be a `Distribution` subclass itself that overrides the `_evaluate` method. neff : int, None [optional] An effective number of datapoints. This is used in the plugin bandwidth determination methods. If `None`, `neff` is calculated from the `weights` array. If `weights` are all uniform, then `neff` equals the number of datapoints `N`. diagonal : bool, Whether the bandwidth/covariance matrix should be set as a diagonal matrix (i.e. without covariances between parameters). NOTE: see `KDE` docstrings, "Dynamic Range". """ self._squeeze = (np.ndim(dataset) == 1) self._dataset = np.atleast_2d(dataset) ndim, ndata = self.dataset.shape reflect = kernels._check_reflect(reflect, self.dataset) self._helper = helper self._ndim = ndim self._ndata = ndata self._diagonal = diagonal self._reflect = reflect # The first time `points` are used, they need to be 'checked' for consistency self._check_points_flag = True self._points = points if ndata < 3: err = "ERROR: too few data points! Dataset shape: ({}, {})".format( ndim, ndata) raise ValueError(err) # Set `weights` # -------------------------------- weights_uniform = True if weights is not None: if np.shape(weights) != (ndata, ): raise ValueError("`weights` input should be shaped as (N,)!") if np.count_nonzero(weights) == 0 or np.any(~np.isfinite(weights) | (weights < 0)): raise ValueError( "Invalid `weights` entries, all must be finite and > 0!") weights = np.asarray(weights).astype(float) weights_uniform = False if neff is None: if weights_uniform: neff = ndata else: neff = np.sum(weights)**2 / np.sum(weights**2) self._weights = weights self._weights_uniform = weights_uniform # currently unused self._neff = neff # Set covariance, bandwidth, distribution and kernel # ----------------------------------------------------------- covariance = np.cov(dataset, rowvar=True, bias=False, aweights=weights) self._covariance = np.atleast_2d(covariance) if bandwidth is None: bandwidth = _BANDWIDTH_DEFAULT self._set_bandwidth(bandwidth, bw_rescale) # Convert from string, class, etc to a kernel dist = kernels.get_distribution_class(kernel) self._kernel = kernels.Kernel(distribution=dist, bandwidth=self._bandwidth, covariance=self._covariance, helper=helper, **kwargs) # Get Distribution Extrema # ------------------------------------ # Determine the effective minima / maxima that should be used; KDE generally has support # outside of the data values themselves. # If the Kernel is finite, then there is only support out to `bandwidth` beyond datapoints if self.kernel.FINITE: out = (1.0 + _NUM_PAD) # If infinite kernel, how many standard-deviations can we expect values to lie at else: out = sp.stats.norm.ppf(1.0 - 1.0 / neff) # Extra to be double sure... out *= 1.2 # Find the effective-extrema in each dimension, to be used if `extrema` is not specified _bandwidth = np.sqrt(self.kernel.matrix.diagonal()) eff_extrema = [[np.min(dd) - bw * out, np.max(dd) + bw * out] for bw, dd in zip(_bandwidth, self.dataset)] if (extrema is None) and (reflect is not None): extrema = copy.deepcopy(reflect) # `eff_extrema` is, by design, outside of data limits, so don't `warn` about limits extrema = utils._parse_extrema(eff_extrema, extrema, warn=False) self._extrema = extrema # Finish Intialization # ------------------------------- self._cdf_grid = None self._cdf_func = None self._finalize() return
def prepare_dataset_ABIDE_matrices_masked(mask): """ Code to prepare the ABIDE (ASD) dataset Reads in .npy files from subfolders (for each class), combine into a list/numpy array and returns them Inputs: - mask: Numpy array containing the existing mask, for repeated removal of features (not used here, so it is always a simple mask of all 1s) Returns: - subject_names_list: list of subject names, used for creating folds that ensure that a subject isn't found in both train and test set - all_matrices: Numpy array of matrices containing the dataset - Y: Numpy array containing the dataset labels """ src_dir = '../data/ABIDE/' num_remaining_features = np.count_nonzero(np.sum(mask, axis=0), axis=None) num_features = (num_remaining_features, num_remaining_features) non_zero_rows = np.where(np.sum(mask, axis=0) > 0)[0] all_matrices_normal = [] subject_names_list = [] for i, file_or_dir in enumerate(os.listdir(src_dir + "normal/")): if ".DS_Store" not in file_or_dir: all_matrices_normal.append( np.load(src_dir + "normal/" + file_or_dir)) subject_names_list.append(file_or_dir[0:-10]) for i, matrix in enumerate(all_matrices_normal): matrix = np.nan_to_num(matrix) masked_matrix = np.multiply(matrix, mask) reduced_matrix = masked_matrix[np.ix_(non_zero_rows, non_zero_rows)] all_matrices_normal[i] = reduced_matrix all_matrices_diseased = [] for i, file_or_dir in enumerate(os.listdir(src_dir + "diseased/")): if ".DS_Store" not in file_or_dir: all_matrices_diseased.append( np.load(src_dir + "diseased/" + file_or_dir)) subject_names_list.append(file_or_dir[0:-10]) for i, matrix in enumerate(all_matrices_diseased): matrix = np.nan_to_num(matrix) masked_matrix = np.multiply(matrix, mask) reduced_matrix = masked_matrix[np.ix_(non_zero_rows, non_zero_rows)] all_matrices_diseased[i] = reduced_matrix all_matrices = np.empty( (len(all_matrices_normal) + len(all_matrices_diseased), num_features[0], num_features[1])) for i, matrix in enumerate(all_matrices): if i < len(os.listdir(src_dir + 'normal')): all_matrices[i] = all_matrices_normal[i] elif i < len(os.listdir(src_dir + 'normal')) + len( os.listdir(src_dir + 'diseased')): all_matrices[i] = all_matrices_diseased[ i - (len(os.listdir(src_dir + 'normal')))] else: print("There are more matrices than expected!") label_normal = [0 for i in range(len(all_matrices_normal))] label_diseased = [1 for i in range(len(all_matrices_diseased))] all_labels = np.array(label_normal + label_diseased) Y = np.zeros((all_matrices.shape[0], 2)) for i in range(all_labels.shape[0]): Y[i, all_labels[i]] = 1 # 1-hot vectors return (subject_names_list, all_matrices, Y)
def main(): #-- Read the system arguments listed after the program long_options=['DIR=','FILTER=','CLOBBER'] optlist,arglist = getopt.getopt(sys.argv[1:],'D:F:C',long_options) #-- Set default settings subdir = 'atrous_32init_drop0.2_customLossR727.dir' FILTER = 0. flt_str = '' clobber = False for opt, arg in optlist: if opt in ("-D","--DIR"): subdir = arg elif opt in ("-F","--FILTER"): if arg not in ['NONE','none','None','N','n',0]: FILTER = float(arg) flt_str = '_%.1fkm'%(FILTER/1000) elif opt in ("-C","--CLOBBER"): clobber = True #-- Get list of files pred_dir = os.path.join(ddir,'stitched.dir',subdir) fileList = os.listdir(pred_dir) pred_list = [f for f in fileList if (f.endswith('.tif') and ('mask' not in f))] #-- output directory output_dir = os.path.join(pred_dir,'shapefiles.dir') #-- make directories if they don't exist if not os.path.exists(output_dir): os.mkdir(output_dir) #-- if CLOBBBER is False, we are not overwriting old files, so remove exisiting files from list if not clobber: print('Removing exisitng files.') existingList = os.listdir(output_dir) existing = [f for f in existingList if (f.endswith('.shp') and ('ERR' not in f) and f.startswith('gl_'))] rem_list = [] for p in pred_list: if p.replace('.tif','%s.shp'%flt_str) in existing: #-- save index for removing at the end rem_list.append(p) for p in rem_list: print('Ignoring %s.'%p) pred_list.remove(p) # pred_list = ['gl_069_181218-181224-181224-181230_014095-025166-025166-014270_T110614_T110655.tif'] # pred_list = ['gl_007_180518-180524-180530-180605_021954-011058-022129-011233_T050854_T050855.tif'] print('# of files: ', len(pred_list)) #-- threshold for getting contours and centerlines eps = 0.3 #-- loop through prediction files #-- get contours and save each as a line in shapefile #-- also save training label as line for f in pred_list: #-- read file raster = rasterio.open(os.path.join(pred_dir,f),'r') im = raster.read(1) #-- get transformation matrix trans = raster.transform #-- also read the corresponding mask file mask_file = os.path.join(pred_dir,f.replace('.tif','_mask.tif')) print(mask_file) mask_raster = rasterio.open(mask_file,'r') mask = mask_raster.read(1) mask_raster.close() #-- get contours of prediction #-- close contour ends to make polygons im[np.nonzero(im[:,0] > eps),0] = eps im[np.nonzero(im[:,-1] > eps),-1] = eps im[0,np.nonzero(im[0,:] > eps)] = eps im[-1,np.nonzero(im[-1,:] > eps)] = eps contours = skimage.measure.find_contours(im, eps) #-- make contours into closed polyons to find pinning points #-- also apply noise filter and append to noise list x = {} y = {} noise = [] pols = [None]*len(contours) pol_type = [None]*len(contours) for n,contour in enumerate(contours): #-- convert to coordinates x[n],y[n] = rasterio.transform.xy(trans, contour[:,0], contour[:,1]) pols[n] = Polygon(zip(x[n],y[n])) #-- get elements of mask the contour is on submask = mask[np.round(contour[:, 0]).astype('int'), np.round(contour[:, 1]).astype('int')] #-- if more than half of the elements are from test tile, count contour as test type if np.count_nonzero(submask) > submask.size/2.: pol_type[n] = 'Test' else: pol_type[n] = 'Train' #-- Loop through all the polygons and taking any overlapping areas out #-- of the enclosing polygon and ignore the inside polygon ignore_list = [] for i in range(len(pols)): for j in range(len(pols)): if (i != j) and pols[i].contains(pols[j]): pols[i] = pols[i].difference(pols[j]) ignore_list.append(j) #-- loop through and apply noise filter for n in range(len(contours)): #-- apply filter if (n not in ignore_list) and (len(x[n]) < 2 or LineString(zip(x[n],y[n])).length <= FILTER): noise.append(n) #-- loop through remaining polygons and determine which ones are #-- pinning points based on the width and length of the bounding box pin_list = [] box_ll = [None]*len(contours) box_ww = [None]*len(contours) for n in range(len(contours)): box_ll[n] = pols[n].length box_ww[n] = pols[n].area/box_ll[n] if (n not in noise) and (n not in ignore_list): #-- make bounding box # box = pols[n].minimum_rotated_rectangle # bx,by = box.exterior.coords.xy # #-- get the dimensions of the sides of the box # edge_length = (Point(bx[0],by[0]).distance(Point(bx[1],by[1])), Point(bx[1],by[1]).distance(Point(bx[2],by[2]))) #-- length is the larger dimension # box_ll = max(edge_length) # #-- width is the smaller dimension # box_ww = min(edge_length) #-- if the with is larger than 1/4 of the length, it's a pinning point if box_ww[n] > box_ll[n]/25: pin_list.append(n) #-- find overlap between ignore list nad noise list if len(list(set(noise) & set(ignore_list))) != 0: sys.exit('Overlap not empty: ', list(set(noise) & set(ignore_list))) #-- initialize list of contour linestrings er = [None]*len(contours) cn = [] #[None]*(len(contours)-len(ignore_list)-len(noise)) n = 0 # total center line counter pc = 1 # pinning point counter lc = 1 # line counter er_type = [None]*len(er) cn_type = [] #[None]*len(cn) er_class = [None]*len(er) cn_class = [] #[None]*len(cn) er_lbl = [None]*len(er) cn_lbl = [] #[None]*len(cn) #-- loop through polygons, get centerlines, and save for idx,p in enumerate(pols): er[idx] = [list(a) for a in zip(x[idx],y[idx])] er_type[idx] = pol_type[idx] if idx in noise: er_class[idx] = 'Noise' elif idx in ignore_list: er_class[idx] = 'Inner Contour' else: if idx in pin_list: #-- pinning point. Just get perimeter of polygon xc,yc = pols[idx].exterior.coords.xy cn.append([[list(a) for a in zip(xc,yc)]]) cn_class.append(['Pinning Point']) cn_type.append([pol_type[idx]]) #-- set label cn_lbl.append(['pin%i'%pc]) pc += 1 #- incremenet pinning point counter else: #-- get centerlines attributes = {"id": idx, "name": "polygon", "valid": True} #-- loop over interpolation distances until we can get a single line dis = pols[idx].length/400 #100 try: cl = Centerline(p,interpolation_distance=dis, **attributes) except: print('not enough ridges. Skip') continue else: #-- merge all the lines merged_lines = linemerge(cl) if merged_lines.geom_type == 'LineString': #-- save coordinates of linestring xc,yc = merged_lines.coords.xy cn.append([[list(a) for a in zip(xc,yc)]]) cn_class.append(['Grounding Line']) cn_lbl.append(['line%i'%lc]) cn_type.append([pol_type[idx]]) er_class[idx] = 'GL Uncertainty' #-- set label er_lbl[idx] = 'err%i'%lc lc += 1 #- incremenet line counter else: nml = len(merged_lines) #-- for lines with many bifurcations, the average segment is #-- about 300m, so if # of segments is length/300 or more, ignore. if nml < pols[idx].length/300: coord_list = [] for nn in range(nml): xc,yc = merged_lines[nn].coords.xy coord_list.append([list(a) for a in zip(xc,yc)]) cn.append(coord_list) cn_class.append(['Grounding Line']*nml) cn_lbl.append(['line%i'%lc]*nml) cn_type.append([pol_type[idx]]*nml) er_class[idx] = 'GL Uncertainty' #-- set label er_lbl[idx] = 'err%i'%lc lc += 1 #- incremenet line counter #-- save all linestrings to file #-- make separate files for centerlines and errors # 1) GL file gl_file = os.path.join(output_dir,f.replace('.tif','%s.shp'%flt_str)) w = shapefile.Writer(gl_file) w.field('ID', 'C') w.field('Type','C') w.field('Class','C') #-- loop over contour centerlines for n in range(len(cn)): for nn in range(len(cn[n])): w.line([cn[n][nn]]) w.record(cn_lbl[n][nn], cn_type[n][nn], cn_class[n][nn]) w.close() # create the .prj file prj = open(gl_file.replace('.shp','.prj'), "w") prj.write(raster.crs.to_wkt()) prj.close() # 2) Err File er_file = os.path.join(output_dir,f.replace('.tif','%s_ERR.shp'%flt_str)) w = shapefile.Writer(er_file) w.field('ID', 'C') w.field('Type','C') w.field('Class','C') w.field('Length','C') w.field('Width','C') #-- loop over contours and write them for n in range(len(er)): w.line([er[n]]) w.record(er_lbl[n] , er_type[n], er_class[n], box_ll[n], box_ww[n]) w.close() # create the .prj file prj = open(er_file.replace('.shp','.prj'), "w") prj.write(raster.crs.to_wkt()) prj.close() #-- close input file raster.close()
def Write_MST_Gurobi(casename): global debug debug = False data = loadmat(casename) S1 = data['Seq_Retirada'] C = data['C'][0][0] R = data['R'][0][0] Seq_Navio_Inv = data['Seq_Navio_Inv'].tolist()[0] Seq_Navio_Id_Inv = data['Seq_Navio_Id_Inv'].tolist()[0] Patios = data['patio'].tolist() q_o = data['q_o'].tolist() q_d = data['q_d'].tolist() q_r = data['q_r'].tolist() q_c = data['q_c'].tolist() w_o = data['w_o'].tolist() w_d = data['w_d'].tolist() w_a = data['w_a'].tolist() w_r = data['w_r'].tolist() w_c = data['w_c'].tolist() phi = data['phi'].tolist() Npatios = len(Patios) P=Npatios+1 # numero de portos for o in range(Npatios): for d in range(P): if phi[o][d].shape[1] != 0 : phi[o][d] = phi[o][d].tolist()[0] else: phi[o][d] = [] omega=[ [] for i in range(Npatios) ] # omega = conjunto dos indices dos conteineres em cada patio S = [] for i in range(Npatios): Patios[i]=Patios[i][0] omega[i]=np.extract(Patios[i]!= 0 , Patios[i]).tolist() S.append(S1[0][i].tolist()[0]) N=[ 0 for i in range(Npatios) ] # N = quantidade de conteineres em cada patio for i in range(Npatios): N[i]=np.count_nonzero(Patios[i]) T=N H=[] # H = numero de linhas de cada patio for i in range(Npatios): H.append(Patios[i].shape[0]) W= [] # W = numero de colunas de cada patio for i in range(Npatios): W.append(Patios[i].shape[1]) print('parametros criados') model = cplex.Cplex() start_time = model.get_time() model.objective.set_sense(model.objective.sense.minimize) startVar=[] startVal=[] #------------------------------------------------------------# #-------------------- Variaveis ---------------------------# #------------------------------------------------------------# nvar = 0 model,nvar,startVar,startVal = variavel_v(model,S,N,T,nvar,omega,startVar,startVal) model,nvar,startVar,startVal = variavel_q(model,N,R,C,nvar,q_o,q_d,q_r,q_c,startVar,startVal) model,nvar,startVar,startVal = variavel_u(model,N,R,C,nvar,Seq_Navio_Inv,startVar,startVal) model,nvar,startVar,startVal = variavel_w(model,N,R,C,nvar,w_o,w_d,w_a,w_r,w_c,startVar,startVal) model,nvar,startVar,startVal = variavel_z(model,omega,N,T,R,C,S,Seq_Navio_Id_Inv,nvar,startVar,startVal) model,nvar,startVar,startVal = variavel_y(model,omega,Patios,S,N,H,W,T,nvar,startVar,startVal) model,nvar,startVar,startVal = variavel_b(model,omega,Patios,S,N,H,W,T,nvar,startVar,startVal) model,nvar,startVar,startVal = variavel_x(model,omega,N,H,W,T,nvar,startVar,startVal) print('variaveis criadas') solucao_inicial_gurobi_mst = casename + '.mst' out_file = open(solucao_inicial_gurobi_mst,'w+') out_file.write("# MIP start \n") for Var,Val in zip(startVar,startVal): out_file.write(str(Var)+" "+str(int(Val)) + "\n") out_file.close()
TS = [] beta = (0.5) #For background ts TS_beta = [] #Calculated from the total TS median after we get all the TS. beta_err = [] gamma = [] for file in files: for item in range(len(file['n_inj'])): n_inj.append(file['n_inj'][item]) nsources.append(file['nsources'][item]) TS.append(file['TS'][item]) gamma.append(file['gamma'][item]) TSs = file['TS'] TS_beta = np.percentile(TSs, 100. * (1. - beta)) m = np.count_nonzero(np.asarray(TSs) > (TS_beta)) i = len(TSs) fraction = float(m) / float(i) beta_err = (np.sqrt(fraction * (1. - fraction) / float(i)) if 0 < beta < 1 else 1.) ##Now we have all the pieces of the original dictionary. Time to glue bckg_trials back in place, in their proper file type.## bckg_trials = { 'n_inj': n_inj, 'nsources': np.asarray(nsources), 'TS': np.asarray(TS), 'beta': beta, 'beta_err': beta_err, 'TS_beta': TS_beta, 'gamma': np.asarray(gamma) }
def has_powers(self): """ Identify if the files include the power metrics""" if np.count_nonzero(np.isnan(self.powers)) == len(self.powers): return False return True
def prepare_dataset_ADNI_matrices_masked(choice, mask): """ Code to prepare the ADNI dataset Reads in .npy files from subfolders (for each class), combine into a list/numpy array and returns them Inputs: - choice: one of 'CN-AD' 'CN-MCI' (str) - mask: Numpy array containing the existing mask, for repeated removal of features (not used here, so it is always a simple mask of all 1s) Returns: - subject_names_list: list of subject names, used for creating folds that ensure that a subject isn't found in both train and test set - all_matrices: Numpy array of matrices containing the dataset - Y: Numpy array containing the dataset labels """ src_dir = '../data/ADNI/' if not (choice == 'CN-MCI' or choice == 'MCI-AD' or choice == 'CN-AD'): print( 'Invalid input detected. Allowable options: CN-MCI, MCI-AD, CN-AD') exit() subject_names_list = [] num_remaining_features = np.count_nonzero(np.sum(mask, axis=0), axis=None) num_features = (num_remaining_features, num_remaining_features) non_zero_rows = np.where(np.sum(mask, axis=0) > 0)[0] if 'CN' in choice: print('Preparing CN...') all_matrices_cn = [] for i, file_or_dir in enumerate(os.listdir(src_dir + "CN/")): if ".DS_Store" not in file_or_dir: all_matrices_cn.append(np.load(src_dir + "CN/" + file_or_dir)) subject_names_list.append(file_or_dir[10:18]) for i, matrix in enumerate(all_matrices_cn): matrix = np.nan_to_num(matrix) masked_matrix = np.multiply(matrix, mask) reduced_matrix = masked_matrix[np.ix_(non_zero_rows, non_zero_rows)] all_matrices_cn[i] = reduced_matrix if 'MCI' in choice: print('Preparing MCI...') all_matrices_mci = [] for i, file_or_dir in enumerate(os.listdir(src_dir + "MCI/")): if ".DS_Store" not in file_or_dir: all_matrices_mci.append(np.load(src_dir + "MCI/" + file_or_dir)) subject_names_list.append(file_or_dir[10:18]) for i, matrix in enumerate(all_matrices_mci): matrix = np.nan_to_num(matrix) masked_matrix = np.multiply(matrix, mask) reduced_matrix = masked_matrix[np.ix_(non_zero_rows, non_zero_rows)] all_matrices_mci[i] = reduced_matrix if 'AD' in choice: print('Preparing AD...') all_matrices_ad = [] for i, file_or_dir in enumerate(os.listdir(src_dir + "AD/")): if ".DS_Store" not in file_or_dir: all_matrices_ad.append(np.load(src_dir + "AD/" + file_or_dir)) subject_names_list.append(file_or_dir[10:18]) for i, matrix in enumerate(all_matrices_ad): matrix = np.nan_to_num(matrix) masked_matrix = np.multiply(matrix, mask) reduced_matrix = masked_matrix[np.ix_(non_zero_rows, non_zero_rows)] all_matrices_ad[i] = reduced_matrix ## Combine if choice == 'CN-MCI': all_matrices = np.empty((len(all_matrices_cn) + len(all_matrices_mci), num_features[0], num_features[1])) for i, matrix in enumerate(all_matrices): if i < len(os.listdir(src_dir + 'CN')): all_matrices[i] = all_matrices_cn[i] elif i < len(os.listdir(src_dir + 'CN')) + len( os.listdir(src_dir + 'MCI')): all_matrices[i] = all_matrices_mci[ i - (len(os.listdir(src_dir + 'CN')))] else: print("There are more matrices than expected!") label_cn = [0 for i in range(len(all_matrices_cn))] label_mci = [1 for i in range(len(all_matrices_mci))] all_labels = np.array(label_cn + label_mci) Y = np.zeros((all_matrices.shape[0], 2)) for i in range(all_labels.shape[0]): Y[i, all_labels[i]] = 1 # 1-hot vectors elif choice == 'MCI-AD': all_matrices = np.empty((len(all_matrices_mci) + len(all_matrices_ad), num_features[0], num_features[1])) for i, matrix in enumerate(all_matrices): if i < len(os.listdir(src_dir + 'MCI')): all_matrices[i] = all_matrices_mci[i] elif i < len(os.listdir(src_dir + 'MCI')) + len( os.listdir(src_dir + 'AD')): all_matrices[i] = all_matrices_ad[ i - (len(os.listdir(src_dir + 'MCI')))] else: print("There are more matrices than expected!") label_mci = [0 for i in range(len(all_matrices_mci))] label_ad = [1 for i in range(len(all_matrices_ad))] all_labels = np.array(label_mci + label_ad) Y = np.zeros((all_matrices.shape[0], 2)) for i in range(all_labels.shape[0]): Y[i, all_labels[i]] = 1 # 1-hot vectors elif choice == 'CN-AD': all_matrices = np.empty((len(all_matrices_cn) + len(all_matrices_ad), num_features[0], num_features[1])) for i, matrix in enumerate(all_matrices): if i < len(os.listdir(src_dir + 'CN')): all_matrices[i] = all_matrices_cn[i] elif i < len(os.listdir(src_dir + 'CN')) + len( os.listdir(src_dir + 'AD')): all_matrices[i] = all_matrices_ad[ i - (len(os.listdir(src_dir + 'CN')))] else: print("There are more matrices than expected!") label_cn = [0 for i in range(len(all_matrices_cn))] label_ad = [1 for i in range(len(all_matrices_ad))] all_labels = np.array(label_cn + label_ad) Y = np.zeros((all_matrices.shape[0], 2)) for i in range(all_labels.shape[0]): Y[i, all_labels[i]] = 1 # 1-hot vectors else: print('Not possible to reach here!') exit() return (subject_names_list, all_matrices, Y)
from matplotlib import cm from matplotlib.ticker import LinearLocator, FormatStrFormatter import numpy as np os.system("clear") fig = pl.figure() axx = Axes3D(fig) raiz=np.sqrt ln=np.log Xa = np.arange(-2, 12, 0.1) Ya = np.arange(-2, 12, 0.1) #X, Y = np.meshgrid(X, Y) print (np.count_nonzero(Xa)) l = 2 rho= 100 ik=25 Electrodos=8 E=Electrodos-1 P=np.array([ [0.55, 0.55], #Posicion electrodo A [4.55, 0.55], #Posicion electrodo B [8.55, 0.55], #Posicion electrodo C [0.55, 4.55], #Posicion electrodo D [8.55, 4.55], #Posicion electrodo E [0.55, 8.55], #Posicion electrodo F [4.55, 8.55], #Posicion electrodo G [8.55, 8.55] #Posicion electrodo H
def sparsify_dynamics(mtx, _b, init_tol, max_iter=25, thresh_iter=10, l0_penalty=None, split=0.8, normalize=0): """ :param mtx: the theta matrix of shape (M, N) :param _b: a vector or an array of shape (M,) or (M, K) :param init_tol: maximum tolerance (cut_off value) :param max_iter: maximum iteration of the outer loop :param thresh_iter: maximum iteration for threshold least squares :param l0_penalty: penalty factor for nonzero coefficients :param split: proportion of the training set :param normalize: normalization methods, default as 0 (no normalization) :return: the best coefficients of fit """ if mtx.ndim != 2: raise ValueError('mtx is not a 2D numpy array!') if _b.ndim == 1: _b = _b[:, np.newaxis] elif _b.ndim > 2: raise ValueError('b is not a 1D/2D numpy array!') # split the data np.random.seed(12345) _n = mtx.shape[0] train = np.random.choice(_n, int(_n*split), replace=False) test = [x for x in np.arange(_n) if x not in train] train_mtx = mtx[train, :] test_mtx = mtx[test, :] train_b = _b[train, :] test_b = _b[test, :] # set up initial tolerance, l0 penalty, best error, etc. if l0_penalty is None: # l0_penalty = 0.001*np.linalg.cond(mtx) l0_penalty = np.linalg.norm(test_b) / len(test) tol = d_tol = float(init_tol) # no sparsity constraints w_best = np.linalg.lstsq(train_mtx, train_b, rcond=None)[0] err_best = np.linalg.norm(test_b - test_mtx.dot(w_best), 2) + \ l0_penalty*np.count_nonzero(w_best) tol_best = 0. imp_flag = True for i in np.arange(max_iter): _w = SINDyBase.threshold_ls(train_mtx, train_b, tol, thresh_iter, normalize) err = np.linalg.norm(test_b - test_mtx.dot(_w), 2) + l0_penalty*np.count_nonzero(_w) if err < err_best: err_best = err w_best = _w tol_best = tol tol += d_tol imp_flag = False else: # tol = max([0, tol - d_tol]) tol = max([0, tol - 2*d_tol]) # d_tol /= 2 d_tol = 2 * d_tol/(max_iter - i) tol = tol + d_tol if imp_flag: print('cutoff value maybe too small/large to threshold ....') return w_best, tol_best
del loss del t del x # 損失関数の現在値を表示 print(' train loss = {0:.4f}'.format(sum_loss / n_samples), file=sys.stderr) # 評価用データに対する識別精度を計算・表示 model.eval() n_failed = 0 for i in range(0, n_samples_ev, batchsize): x = torch.tensor(features_ev[i : i + batchsize], device=dev) t = torch.tensor(labels_ev[i : i + batchsize], device=dev, dtype=torch.long) y = model(x) y = y.to('cpu').detach().numpy().copy() t = t.to('cpu').detach().numpy().copy() n_failed += np.count_nonzero(np.argmax(y, axis=1) - t) del y del x del t acc = (n_samples_ev - n_failed) / n_samples_ev print(' accuracy = {0:.2f}%'.format(100 * acc), file=sys.stderr) # 現状態の可視化 if visualization_interval > 0 and (e + 1) % visualization_interval == 0: visualizer.show(model, device=dev, samples=data, title='Epoch {0}'.format(e + 1)) # グラフを表示 print('', file=sys.stderr) print('', file=sys.stderr)
def iterate(mode, args, loader, model, optimizer, logger, best_acc, epoch): start_val = time.clock() nonsense = 0 acc_sum = 0 # switch to appropriate mode assert mode in ["train", "val", "eval", "test_prediction", "test_completion"], \ "unsupported mode: {}".format(mode) if mode == 'train': model.train() lr = completion_segmentation_helper.adjust_learning_rate( args.lr, optimizer, epoch) else: model.eval() lr = 0 lane_acc_lst = [] lane_loss_lst = [] total_acc_lst = [] for i, batch_data in enumerate(loader): start = time.time() batch_data = { key: val.to(device) for key, val in batch_data.items() if val is not None } # 道路分割的label road_label = batch_data[ 'road_label'] if mode != 'test_road_lane_segmentation' else None # 车道线分割的label lane_label = batch_data[ 'lane_label'] if mode != 'test_road_lane_segmentation' else None data_time = time.time() - start start = time.time() if mode == 'val': with torch.no_grad(): # 设置torch.no_grad(),在val时不计算梯度,可以节省显存 pred = model(batch_data) else: pred = model(batch_data) lane_pred = pred start_ = time.clock() # 不计入时间 if mode == 'train': # 语义分割loss #road_loss = road_criterion(road_pred, road_label.long()) if epoch == 0: class_weight = torch.tensor([0.5, 0.5]) else: lane_pred_w = lane_pred.data.cpu() bs, c, h, w = lane_pred_w.size() value_w, index_w = lane_pred_w.max(1) LPW = 0 for i in range(bs): lpw = index_w[i].view(h, w).numpy() LPW += (np.count_nonzero(lpw) / lpw.size) LPW /= bs class_weight = torch.tensor([LPW, 1 - LPW]) #print('class_weight: ',class_weight) lane_criterion = nn.NLLLoss2d(weight=class_weight.cuda()) lane_loss = lane_criterion(lane_pred, lane_label.long()) lane_loss_lst.append(lane_loss.item()) # 损失 #loss = road_loss + lane_loss loss = lane_loss #print('lane loss {}'.format(lane_loss.data.cpu())) # 准确率 #road_acc = acc(road_pred.data.cpu(), road_label.cpu()) total_acc, lane_acc = acc(lane_pred.data.cpu(), lane_label.cpu()) lane_acc_lst.append(lane_acc.item()) total_acc_lst.append(total_acc.item()) #print('total acc {}'.format(total_acc), 'lane acc {}'.format(lane_acc)) #print('\n-------------------------epoch '+str(epoch)+'-----------------------------\n') optimizer.zero_grad() loss.backward() optimizer.step() elif mode == 'val': # 准确率 #road_acc = acc(road_pred.data.cpu(), road_label.cpu()) total_acc, lane_acc = acc(lane_pred.data.cpu(), lane_label.cpu()) lane_acc_lst.append(lane_acc.item()) total_acc_lst.append(total_acc.item()) #print('total acc {}'.format(total_acc), 'lane acc {}'.format(lane_acc)) #print('\n------------------------epoch '+str(epoch)+'------------------------------\n') #accuracy = (road_acc+lane_acc)/2 accuracy = lane_acc acc_sum += accuracy gpu_time = time.time() - start # measure accuracy and record loss with torch.no_grad(): # 保存预测结果为图片 logger.conditional_save_pred(mode, i, pred, epoch) nonsense += (time.clock() - start_) print('total cost time: ', time.clock() - start_val - nonsense) if mode == 'train': lane_loss_mean = np.array(lane_loss_lst).mean() lane_acc_mean = np.array(lane_acc_lst).mean() total_acc_mean = np.array(total_acc_lst).mean() print('lane loss {}'.format(lane_loss_mean), 'lane acc {}'.format(lane_acc_mean), 'total acc {}'.format(total_acc_mean)) elif mode == 'val': lane_acc_mean = np.array(lane_acc_lst).mean() total_acc_mean = np.array(total_acc_lst).mean() print('lane acc {}'.format(lane_acc_mean), 'total acc {}'.format(total_acc_mean)) print('\n-------------------------epoch ' + str(epoch) + '-----------------------------\n') acc_avg = acc_sum / len(loader) is_best = (acc_avg > best_acc) # 每一个epoch保存一次信息 # avg = logger.conditional_save_info(mode, average_meter, epoch) # is_best = logger.rank_conditional_save_best(mode, avg, epoch) # if is_best and not (mode == "train"): # # 验证时,保存最好的预测结果为图片 # logger.save_img_comparison_as_best(mode, epoch) # logger.conditional_summarize(mode, avg, is_best) if mode == 'train': return acc_avg, is_best, lane_loss_mean, lane_acc_mean, total_acc_mean elif mode == 'val': return acc_avg, is_best, lane_acc_mean, total_acc_mean
# Training loss loss = tf.reduce_mean(cross_entropy) # Create an operation that initializes all variables init = tf.global_variables_initializer() # Test Cases with tf.Session() as session: session.run(init) session.run(loss, feed_dict=train_feed_dict) session.run(loss, feed_dict=valid_feed_dict) session.run(loss, feed_dict=test_feed_dict) biases_data = session.run(biases) assert not np.count_nonzero(biases_data), 'biases must be zeros' print('Tests Passed!') #%% # Determine if the predictions are correct is_correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(labels, 1)) # Calculate the accuracy of the predictions accuracy = tf.reduce_mean(tf.cast(is_correct_prediction, tf.float32)) print('Accuracy function created.') #%% # TODO: Find the best parameters for each configuration eps = [1, 2, 3, 4, 5]
def test_subtract(self): subtracted_field = self.subtract(self) assert numpy.count_nonzero(subtracted_field.values) == 0
def main(filename, read_raw_matrix=False): """ :param filename: path to be configuration file to read out data sets (type string) :param read_raw_matrix: only true if you really want to load the giant unfiltered count matrix :return: raw and filtered read out matrices and annotation dataset """ # config_paths = load_config_file(json_filename=json_filename) # check whether to do a single sample load or if you have more than one sample to load # input_path = os.path.join(os.environ['PYTHONPATH'].split(os.pathsep)[0], 'Input', 'config_files', filename) config_paths = ht.load_sample_config_file(filename=filename, file_type="csv") absolute_path = os.path.join(os.sep, config_paths[0][0], config_paths[2][0]) # matrix_file_end, features_file_end, barcode_file_end feature_bc_matrix_string = np.array( [config_paths[9][0], config_paths[8][0], config_paths[7][0]]) # # Parse Filenames project = config_paths[2][ 0] # if more examples then use sample_strings.pop(0) sample_id = config_paths[3][0] # loom_id = config_paths[22][0] # contains the information about spliced and unspliced genes -- todo # save sample ids in list list_sample_ids = [sample_id] if read_raw_matrix: print( "\nRaw/Unfiltered feature-barcode matrix contains every barcode from fixed list of known-good barcode " "sequences. This includes background and cell associated barcodes") # path to raw files ending with .mtx and .tsv (type string) raw_feature_bc_matrix_path = os.path.join(absolute_path, config_paths[1][0], project + "_" + sample_id, config_paths[4][0], config_paths[5][0]) print( "Filtered feature-barcode matrix contains only cells associated barcodes" ) # path h5 filtered_bc_matrix_h5_path = os.path.join(absolute_path, config_paths[1][0], project + "_" + sample_id, config_paths[4][0], config_paths[11][0]) raw_bc_matrix_h5_path = os.path.join(absolute_path, config_paths[1][0], project + "_" + sample_id, config_paths[4][0], config_paths[10][0]) # path to filtered files ending with .mtx and .tsv (type string) filtered_feature_bc_matrix_path = os.path.join( absolute_path, config_paths[1][0], project + "_" + sample_id, config_paths[4][0], config_paths[6][0]) # # Annotate data print("\n-------- Start: Read out values --------") # Two options to read in feature_ids, gene_names, feature_types, barcodes, count_matrix_data # 1. Malte Luecken using Scanpy from TheisLab; read out mtx and tsv files raw_annot_data, filtered_annot_data = _scanpy_load_annotate_tsv_mtx_files( raw_feature_bc_matrix_path, filtered_feature_bc_matrix_path, feature_bc_matrix_string, file_matrix_h5=raw_bc_matrix_h5_path, read_raw_matrix=read_raw_matrix) # # Loop to load all data sets for c_sample in tqdm(range(len(config_paths[0][1:])), desc='Loading samples'): print(" ... reading out ...") c_sample += 1 # matrix_file_end, features_file_end, barcode_file_end feature_bc_matrix_string = \ np.array([config_paths[9][c_sample], config_paths[8][c_sample], config_paths[7][c_sample]]) # path to h5 and matrices path_matrix = os.path.join(os.sep, config_paths[0][c_sample], config_paths[1][c_sample], config_paths[2][c_sample], config_paths[3][c_sample], config_paths[4][c_sample]) # path h5 filtered_bc_matrix_h5_path = os.path.join( path_matrix, config_paths[11][c_sample]) raw_bc_matrix_h5_path = os.path.join(path_matrix, config_paths[10][c_sample]) # matrix raw_feature_bc_matrix_path = os.path.join( path_matrix, config_paths[5][c_sample]) filtered_feature_bc_matrix_path = os.path.join( path_matrix, config_paths[6][c_sample]) # # Load count matrix, features and observables raw_adata_tmp, filtered_adata_tmp = _scanpy_load_annotate_tsv_mtx_files( raw_feature_bc_matrix_path, filtered_feature_bc_matrix_path, feature_bc_matrix_string, file_matrix_h5=raw_bc_matrix_h5_path, read_raw_matrix=read_raw_matrix) # # Concatenate data sets (also do this if you have more than one donor!) # RAW raw_annot_data = raw_annot_data.concatenate(raw_adata_tmp, batch_key='sample_id') # raw_annot_data.var['gene_id'] = raw_annot_data.var['gene_id-1'] # raw_annot_data.var.drop(columns=['gene_id-1', 'gene_id-0'], inplace=True) raw_annot_data.obs.drop(columns=['sample_id'], inplace=True) raw_annot_data.obs_names = [ c.split("-")[0] for c in raw_annot_data.obs_names ] raw_annot_data.obs_names_make_unique(join='_') # raw_annot_data.obs_names_make_unique(join='_') # FILTERED filtered_annot_data = filtered_annot_data.concatenate( filtered_adata_tmp, batch_key='sample_id') # filtered_annot_data.var['gene_id'] = filtered_annot_data.var['gene_id-1'] # filtered_annot_data.var.drop(columns=['gene_id-1', 'gene_id-0'], inplace=True) filtered_annot_data.obs.drop(columns=['sample_id'], inplace=True) filtered_annot_data.obs_names = [ c.split("-")[0] for c in filtered_annot_data.obs_names ] filtered_annot_data.obs_names_make_unique(join='_') # filtered_annot_data.obs_names_make_unique(join='_') # save sample ids in list list_sample_ids.append(config_paths[3][c_sample]) else: print( "Filtered feature-barcode matrix contains only cells associated barcodes" ) # path to h5 and matrices path_matrix = os.path.join(os.sep, config_paths[0][0], config_paths[1][0], config_paths[2][0], config_paths[3][0], config_paths[4][0]) # path h5 filtered_bc_matrix_h5_path = os.path.join(path_matrix, config_paths[11][0]) raw_bc_matrix_h5_path = os.path.join(path_matrix, config_paths[10][0]) # path to filtered files ending with .mtx and .tsv (type string) filtered_feature_bc_matrix_path = os.path.join(path_matrix, config_paths[6][0]) # # Annotate data print("\n-------- Start: Read out values --------") # Two options to read in feature_ids, gene_names, feature_types, barcodes, count_matrix_data # 1. Malte Luecken using Scanpy from TheisLab; read out mtx and tsv files _, filtered_annot_data = _scanpy_load_annotate_tsv_mtx_files( filtered_feature_bc_matrix_path, feature_bc_matrix_string, file_matrix_h5=filtered_bc_matrix_h5_path) # # Loop to load all data sets for c_sample in tqdm(range(len(config_paths[0][1:])), desc='Loading samples'): c_sample += 1 # +1 becaue we already loaded the first sample # matrix_file_end, features_file_end, barcode_file_end feature_bc_matrix_string = \ np.array([config_paths[9][c_sample], config_paths[8][c_sample], config_paths[7][c_sample]]) # path to h5 and matrices path_matrix = os.path.join(os.sep, config_paths[0][c_sample], config_paths[1][c_sample], config_paths[2][c_sample], config_paths[3][c_sample], config_paths[4][c_sample]) # path h5 filtered_bc_matrix_h5_path = os.path.join( path_matrix, config_paths[11][c_sample]) raw_bc_matrix_h5_path = os.path.join(path_matrix, config_paths[10][c_sample]) filtered_feature_bc_matrix_path = os.path.join( path_matrix, config_paths[6][c_sample]) # # Load count matrix, features and observables _, filtered_adata_tmp = _scanpy_load_annotate_tsv_mtx_files( filtered_feature_bc_matrix_path, feature_bc_matrix_string, file_matrix_h5=filtered_bc_matrix_h5_path) # FILTERED filtered_annot_data = filtered_annot_data.concatenate( filtered_adata_tmp, batch_key='sample_id') filtered_annot_data.obs.drop(columns=['sample_id'], inplace=True) filtered_annot_data.obs_names = [ c.split("-")[0] for c in filtered_annot_data.obs_names ] filtered_annot_data.obs_names_make_unique() # Workaround to ensure that something can be returned raw_annot_data = [] # ---- Side notes to know but can also be looked up in the summary created by 10x Genomics Spaceranger --- # unique_sample = np.unique(filtered_annot_data.obs["sample"]) num_cells_previous_sample = 0 for sample_name in unique_sample: print("\nSample {}: ".format(sample_name)) print("\nSide notes of {} ".format( filtered_annot_data.obs['sample'].values[1])) number_cells = len( np.where( filtered_annot_data.obs['sample'].values == sample_name)[0]) print("No. cells: ", number_cells) # Count number of expressed genes (count one gene over all spots) counts_gene = filtered_annot_data[ num_cells_previous_sample:number_cells].X.sum(0) counts_gene_sorted = np.sort(counts_gene) print("Total No. genes detected: ", np.count_nonzero(counts_gene_sorted)) # Calculate median genes per spot copy_s1 = filtered_annot_data[:number_cells].X.copy() mask = copy_s1 > 0 zero_array = np.zeros_like(copy_s1) # count numbers of True == numbers of gene overall spots zero_array[mask] = 1 median_genes_per_spot = np.median(zero_array.sum(1)) median_umi_counts_per_spot = np.median(copy_s1.sum(1)) print("Median genes: ", median_genes_per_spot) print("Total No. of UMI Counts: ", sum(copy_s1.sum(1))) print("Median UMI Counts: ", median_umi_counts_per_spot) num_cells_previous_sample = number_cells # All samples # Get barcodes of each sample and therefore the number of cells for each sample model = filtered_annot_data.obs[['sample'] + []] batch_info = model.groupby('sample').groups.values() n_batches = np.array([len(v) for v in batch_info]) print("\n Sorted No. genes for each sample: ", n_batches) print("\n") # --- End side notes --- # # Second option: load from hdf5 files return raw_annot_data, filtered_annot_data, config_paths
def run_task(self): # {{{ ''' Compute the regional-mean time series ''' # Authors # ------- # Xylar Asay-Davis config = self.config self.logger.info("\nCompute time series of regional means...") startDate = '{:04d}-01-01_00:00:00'.format(self.startYear) endDate = '{:04d}-12-31_23:59:59'.format(self.endYear) regionGroup = self.regionGroup sectionSuffix = regionGroup[0].upper() + \ regionGroup[1:].replace(' ', '') timeSeriesName = sectionSuffix[0].lower() + sectionSuffix[1:] sectionName = 'timeSeries{}'.format(sectionSuffix) outputDirectory = '{}/{}/'.format( build_config_full_path(config, 'output', 'timeseriesSubdirectory'), timeSeriesName) try: os.makedirs(outputDirectory) except OSError: pass outFileName = '{}/{}_{:04d}-{:04d}.nc'.format(outputDirectory, timeSeriesName, self.startYear, self.endYear) inputFiles = sorted( self.historyStreams.readpath('timeSeriesStatsMonthlyOutput', startDate=startDate, endDate=endDate, calendar=self.calendar)) years, months = get_files_year_month(inputFiles, self.historyStreams, 'timeSeriesStatsMonthlyOutput') variables = config.getExpression(sectionName, 'variables') variableList = [var['mpas'] for var in variables] + \ ['timeMonthly_avg_layerThickness'] outputExists = os.path.exists(outFileName) outputValid = outputExists if outputExists: with open_mpas_dataset(fileName=outFileName, calendar=self.calendar, timeVariableNames=None, variableList=None, startDate=startDate, endDate=endDate) as dsOut: for inIndex in range(dsOut.dims['Time']): mask = numpy.logical_and( dsOut.year[inIndex].values == years, dsOut.month[inIndex].values == months) if numpy.count_nonzero(mask) == 0: outputValid = False break if outputValid: self.logger.info(' Time series exists -- Done.') return # Load mesh related variables try: restartFileName = self.runStreams.readpath('restart')[0] except ValueError: raise IOError('No MPAS-O restart file found: need at least one ' 'restart file for ocean region time series') cellsChunk = 32768 timeChunk = 1 datasets = [] for timeIndex, fileName in enumerate(inputFiles): dsTimeSlice = open_mpas_dataset(fileName=fileName, calendar=self.calendar, variableList=variableList, startDate=startDate, endDate=endDate) datasets.append(dsTimeSlice) chunk = {'Time': timeChunk, 'nCells': cellsChunk} if config.has_option(sectionName, 'zmin'): config_zmin = config.getfloat(sectionName, 'zmin') else: config_zmin = None if config.has_option(sectionName, 'zmax'): config_zmax = config.getfloat(sectionName, 'zmax') else: config_zmax = None with dask.config.set(schedular='threads', pool=ThreadPool(self.daskThreads)): # combine data sets into a single data set dsIn = xarray.concat(datasets, 'Time').chunk(chunk) chunk = {'nCells': cellsChunk} dsRestart = xarray.open_dataset(restartFileName) dsRestart = dsRestart.isel(Time=0).chunk(chunk) dsIn['areaCell'] = dsRestart.areaCell if 'landIceMask' in dsRestart: # only the region outside of ice-shelf cavities dsIn['openOceanMask'] = dsRestart.landIceMask == 0 dsIn['zMid'] = compute_zmid(dsRestart.bottomDepth, dsRestart.maxLevelCell, dsRestart.layerThickness) regionMaskFileName = self.masksSubtask.maskFileName dsRegionMask = xarray.open_dataset(regionMaskFileName) maskRegionNames = decode_strings(dsRegionMask.regionNames) datasets = [] regionIndices = [] for regionName in self.regionNames: self.logger.info(' region: {}'.format(regionName)) regionIndex = maskRegionNames.index(regionName) regionIndices.append(regionIndex) chunk = {'nCells': cellsChunk} dsMask = dsRegionMask.isel(nRegions=regionIndex).chunk(chunk) cellMask = dsMask.regionCellMasks == 1 if 'openOceanMask' in dsIn: cellMask = numpy.logical_and(cellMask, dsIn.openOceanMask) dsRegion = dsIn.where(cellMask, drop=True) totalArea = dsRegion['areaCell'].sum() self.logger.info(' totalArea: {} mil. km^2'.format( 1e-12 * totalArea.values)) self.logger.info("Don't worry about the following dask " "warnings.") if config_zmin is None: zmin = dsMask.zmin else: zmin = config_zmin if config_zmax is None: zmax = dsMask.zmax else: zmax = config_zmax depthMask = numpy.logical_and(dsRegion.zMid >= zmin, dsRegion.zMid <= zmax) depthMask.compute() self.logger.info("Dask warnings should be done.") dsRegion['depthMask'] = depthMask layerThickness = dsRegion.timeMonthly_avg_layerThickness dsRegion['volCell'] = (dsRegion.areaCell * layerThickness).where(depthMask) totalVol = dsRegion.volCell.sum(dim='nVertLevels').sum( dim='nCells') totalVol.compute() self.logger.info(' totalVol (mil. km^3): {}'.format( 1e-15 * totalVol.values)) dsRegion = dsRegion.transpose('Time', 'nCells', 'nVertLevels') dsOut = xarray.Dataset() dsOut['totalVol'] = totalVol dsOut.totalVol.attrs['units'] = 'm^3' dsOut['totalArea'] = totalArea dsOut.totalArea.attrs['units'] = 'm^2' dsOut['zbounds'] = ('nbounds', [zmin, zmax]) dsOut.zbounds.attrs['units'] = 'm' for var in variables: outName = var['name'] self.logger.info(' {}'.format(outName)) mpasVarName = var['mpas'] timeSeries = dsRegion[mpasVarName] units = timeSeries.units description = timeSeries.long_name is3d = 'nVertLevels' in timeSeries.dims if is3d: timeSeries = \ (dsRegion.volCell*timeSeries.where(depthMask)).sum( dim='nVertLevels').sum(dim='nCells') / totalVol else: timeSeries = \ (dsRegion.areaCell*timeSeries).sum( dim='nCells') / totalArea timeSeries.compute() dsOut[outName] = timeSeries dsOut[outName].attrs['units'] = units dsOut[outName].attrs['description'] = description dsOut[outName].attrs['is3d'] = str(is3d) datasets.append(dsOut) # combine data sets into a single data set dsOut = xarray.concat(datasets, 'nRegions') dsOut.coords['regionNames'] = dsRegionMask.regionNames.isel( nRegions=regionIndices) dsOut.coords['year'] = (('Time'), years) dsOut['year'].attrs['units'] = 'years' dsOut.coords['month'] = (('Time'), months) dsOut['month'].attrs['units'] = 'months' write_netcdf(dsOut, outFileName)
def evaluate(truth_val, n_answered): return np.count_nonzero(truth_val - ground_truth) / (n_answered * numA)
def long_to_wide(table: pd.DataFrame, keycolnames: List[str], varcolname: str) -> pd.DataFrame: warnings = [] quick_fixes = [] varcol = table[varcolname] if varcol.dtype != object and not hasattr(varcol, "cat"): # Convert to str, in-place warnings.append( ('Column "%s" was auto-converted to Text because column names ' "must be text.") % varcolname) quick_fixes.append({ "text": 'Convert "%s" to text' % varcolname, "action": "prependModule", "args": ["converttotext", { "colnames": [varcolname] }], }) na = varcol.isnull() varcol = varcol.astype(str) varcol[na] = np.nan table[varcolname] = varcol # Remove empty values, in-place. Empty column headers aren't allowed. # https://www.pivotaltracker.com/story/show/162648330 empty = varcol.isin([np.nan, pd.NaT, None, ""]) n_empty = np.count_nonzero(empty) if n_empty: if n_empty == 1: text_empty = "1 input row" else: text_empty = "{:,d} input rows".format(n_empty) warnings.append('%s with empty "%s" were removed.' % (text_empty, varcolname)) table = table[~empty] table.reset_index(drop=True, inplace=True) table.set_index(keycolnames + [varcolname], inplace=True, drop=True) if np.any(table.index.duplicated()): return "Cannot reshape: some variables are repeated" if len(table.columns) == 0: return ("There is no Value column. " "All but one table column must be a Row or Column variable.") if len(table.columns) > 1: return ("There are too many Value columns. " "All but one table column must be a Row or Column variable. " "Please drop extra columns before reshaping.") table = table.unstack() table.columns = [col[-1] for col in table.columns.values] table.reset_index(inplace=True) if warnings: return { "dataframe": table, "error": "\n".join(warnings), "quick_fixes": quick_fixes, } else: return table
def get_sum_metrics(batch_output, batch_target, metrics_type, test=False, printDice=False): if torch.is_tensor(batch_output): batch_output = batch_output.data.cpu().numpy() if torch.is_tensor(batch_target): batch_target = batch_target.data.cpu().numpy() assert batch_output.shape == batch_target.shape assert len(batch_output.shape) == 4 spacing = (1, 1) size = batch_output.shape[0] metrics = dict.fromkeys(metrics_type, 0) dices = [] for i in range(size): output = batch_output[i, 0] target = batch_target[i, 0] labelPred = sitk.GetImageFromArray(output, isVector=False) labelPred.SetSpacing(spacing) labelTrue = sitk.GetImageFromArray(target, isVector=False) labelTrue.SetSpacing(spacing) # spacing order (x, y, z) # voxel_metrics pred = output.astype(int) gdth = target.astype(int) fp_array = copy.deepcopy(pred) # keep pred unchanged fn_array = copy.deepcopy(gdth) gdth_sum = np.sum(gdth) pred_sum = np.sum(pred) intersection = gdth & pred union = gdth | pred intersection_sum = np.count_nonzero(intersection) union_sum = np.count_nonzero(union) tp_array = intersection tmp = pred - gdth fp_array[tmp < 1] = 0 tmp2 = gdth - pred fn_array[tmp2 < 1] = 0 tn_array = np.ones(gdth.shape) - union tp, fp, fn, tn = np.sum(tp_array), np.sum(fp_array), np.sum( fn_array), np.sum(tn_array) smooth = EPSILON precision = (tp) / (pred_sum + smooth) recall = (tp) / (gdth_sum + smooth) false_positive_rate = (fp) / (fp + tn + smooth) false_negtive_rate = (fn) / (fn + tp + smooth) jaccard = (intersection_sum) / (union_sum + smooth) dice = (2 * intersection_sum) / (gdth_sum + pred_sum + smooth) ppv = (intersection_sum) / (pred_sum + smooth) dicecomputer = sitk.LabelOverlapMeasuresImageFilter() dicecomputer.Execute(labelTrue > 0.5, labelPred > 0.5) # distance_metrics signed_distance_map = sitk.SignedMaurerDistanceMap( labelTrue > 0.5, squaredDistance=False, useImageSpacing=True) # It need to be adapted. ref_distance_map = sitk.Abs(signed_distance_map) ref_surface = sitk.LabelContour(labelTrue > 0.5, fullyConnected=True) statistics_image_filter = sitk.StatisticsImageFilter() statistics_image_filter.Execute(ref_surface > 0.5) num_ref_surface_pixels = int(statistics_image_filter.GetSum()) signed_distance_map_pred = sitk.SignedMaurerDistanceMap( labelPred > 0.5, squaredDistance=False, useImageSpacing=True) seg_distance_map = sitk.Abs(signed_distance_map_pred) seg_surface = sitk.LabelContour(labelPred > 0.5, fullyConnected=True) seg2ref_distance_map = ref_distance_map * sitk.Cast( seg_surface, sitk.sitkFloat32) ref2seg_distance_map = seg_distance_map * sitk.Cast( ref_surface, sitk.sitkFloat32) statistics_image_filter.Execute(seg_surface > 0.5) num_seg_surface_pixels = int(statistics_image_filter.GetSum()) seg2ref_distance_map_arr = sitk.GetArrayViewFromImage( seg2ref_distance_map) seg2ref_distances = list( seg2ref_distance_map_arr[seg2ref_distance_map_arr != 0]) seg2ref_distances = seg2ref_distances + list( np.zeros(num_seg_surface_pixels - len(seg2ref_distances))) ref2seg_distance_map_arr = sitk.GetArrayViewFromImage( ref2seg_distance_map) ref2seg_distances = list( ref2seg_distance_map_arr[ref2seg_distance_map_arr != 0]) ref2seg_distances = ref2seg_distances + list( np.zeros(num_ref_surface_pixels - len(ref2seg_distances))) # all_surface_distances = seg2ref_distances + ref2seg_distances metrics['dice'] += dice metrics['jaccard'] += jaccard metrics['precision'] += precision metrics['recall'] += recall metrics['fpr'] += false_positive_rate metrics['fnr'] += false_negtive_rate metrics['vs'] += dicecomputer.GetVolumeSimilarity() metrics['ppv'] += ppv metrics["msd"] += np.mean(all_surface_distances) metrics["mdsd"] += np.median(all_surface_distances) metrics["stdsd"] += np.std(all_surface_distances) metrics["hd95"] += np.percentile(all_surface_distances, 95) metrics["hd"] += np.max(all_surface_distances) if printDice: dices.append(dice) if printDice: return metrics, dices return metrics
def no_more_moves(board): return np.count_nonzero(board) == COLUMN_COUNT * ROW_COUNT