def PredictionMatrix(): print('Loading valid data...') (X_valid, y_valid, the_images) = LARGEload.load_valid_data() X_valid = X_valid.astype("float32") X_valid /= 255 print('Generating prediction...') print ("The actual values are:") #print (y_valid) y = np.bincount(y_valid) ii = np.nonzero(y)[0] print (zip(ii,y[ii])) y_pred = model.predict_classes(X_valid, batch_size=20) #print (y_pred) y = np.bincount(y_pred) ii = np.nonzero(y)[0] print ("The predicted values are:") print (zip(ii,y[ii])) print ("Our confusion matrix is:") cm = confusion_matrix(y_valid, y_pred) print(cm) plot_confusion_matrix(cm, weights=save_name[:-5]) # how many samples match the ground truth validation labels ? correct = np.sum(y_pred == y_valid) print ("Number of correct classifications is %d/5000" % (correct)) # accuracy = number correct / total number accuracy = float(correct) / 5000 print ("Accuracy of %f" % accuracy) return accuracy, correct
def splat(t, value, bins): """put value into bins according to t the points are assumed to be describing a continuum field, if two points have the same position, they are merged into one point for points crossing the edge part is added to the left bin and part is added to the right bin. the sum is conserved. """ if len(t) == 0: return numpy.zeros(len(bins) + 1) t = numpy.float64(t) t, label = numpy.unique(t, return_inverse=True) if numpy.isscalar(value): value = numpy.bincount(label) * value else: value = numpy.bincount(label, weights=value) edge = numpy.concatenate(([t[0]], (t[1:] + t[:-1]) * 0.5, [t[-1]])) dig = numpy.digitize(edge, bins) #use the right edge as the reference ref = bins[dig[1:] - 1] norm = (edge[1:] - edge[:-1]) assert ((edge[1:] - edge[:-1]) > 0).all() norm = 1 / norm weightleft = -(edge[:-1] - ref) * norm weightright = (edge[1:] - ref) * norm # when dig < 1 or dig >= len(bins), t are out of bounds and does not # contribute. l = numpy.bincount(dig[:-1], value * weightleft, minlength=len(bins)+1) r = numpy.bincount(dig[1:], value * weightright, minlength=len(bins)+1) return l + r
def hist_from_snapshots(rpt = 10): # hist_all = np.zeros(256,dtype=int) hist1 = np.zeros(256,dtype=int) hist2 = np.zeros(256,dtype=int) hist3 = np.zeros(256,dtype=int) hist4 = np.zeros(256,dtype=int) for i in range(rpt): snap=adc5g.get_snapshot(roach2, snap_name, man_trig=True, wait_period=2) snap = 128 + np.array(snap) # hist = np.bincount(snap, minlength=256) # hist_all += hist hist = np.bincount(snap[0:: 4], minlength=256) hist1 += hist hist = np.bincount(snap[1:: 4], minlength=256) hist2 += hist hist = np.bincount(snap[2:: 4], minlength=256) hist3 += hist hist = np.bincount(snap[3:: 4], minlength=256) hist4 += hist data=np.column_stack((np.arange(-128., 128, dtype=int), hist1, hist2, hist3, hist4)) np.savetxt("hist_cores", data, fmt=("%d")) # print "all ",np.sum(hist_all[0:128]), np.sum(hist_all[128:256]) print "core a ",np.sum(hist1[0:128]), np.sum(hist1[129:256]) print "core b ",np.sum(hist3[0:128]), np.sum(hist3[129:256]) print "core c ",np.sum(hist2[0:128]), np.sum(hist2[129:256]) print "core d ",np.sum(hist4[0:128]), np.sum(hist4[129:256])
def test_stratified_shuffle_split_iter(): ys = [np.array([1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3]), np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]), np.array([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2] * 2), np.array([1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]), np.array([-1] * 800 + [1] * 50) ] for y in ys: sss = cval.StratifiedShuffleSplit(y, 6, test_size=0.33, random_state=0) test_size = np.ceil(0.33 * len(y)) train_size = len(y) - test_size for train, test in sss: assert_array_equal(np.unique(y[train]), np.unique(y[test])) # Checks if folds keep classes proportions p_train = (np.bincount(np.unique(y[train], return_inverse=True)[1]) / float(len(y[train]))) p_test = (np.bincount(np.unique(y[test], return_inverse=True)[1]) / float(len(y[test]))) assert_array_almost_equal(p_train, p_test, 1) assert_equal(len(train) + len(test), y.size) assert_equal(len(train), train_size) assert_equal(len(test), test_size) assert_array_equal(np.lib.arraysetops.intersect1d(train, test), [])
def check_min_samples_leaf(name): X, y = hastie_X, hastie_y # Test if leaves contain more than leaf_count training examples ForestEstimator = FOREST_ESTIMATORS[name] # test boundary value assert_raises(ValueError, ForestEstimator(min_samples_leaf=-1).fit, X, y) assert_raises(ValueError, ForestEstimator(min_samples_leaf=0).fit, X, y) est = ForestEstimator(min_samples_leaf=5, n_estimators=1, random_state=0) est.fit(X, y) out = est.estimators_[0].tree_.apply(X) node_counts = np.bincount(out) # drop inner nodes leaf_count = node_counts[node_counts != 0] assert_greater(np.min(leaf_count), 4, "Failed with {0}".format(name)) est = ForestEstimator(min_samples_leaf=0.25, n_estimators=1, random_state=0) est.fit(X, y) out = est.estimators_[0].tree_.apply(X) node_counts = np.bincount(out) # drop inner nodes leaf_count = node_counts[node_counts != 0] assert_greater(np.min(leaf_count), len(X) * 0.25 - 1, "Failed with {0}".format(name))
def test_bincountOp(self): w = T.vector('w') for dtype in ('int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64'): # uint64 always fails # int64 and uint32 also fail if python int are 32-bit int_bitwidth = theano.gof.python_int_bitwidth() if int_bitwidth == 64: numpy_unsupported_dtypes = ('uint64',) if int_bitwidth == 32: numpy_unsupported_dtypes = ('uint32', 'int64', 'uint64') x = T.vector('x', dtype=dtype) if dtype in numpy_unsupported_dtypes: self.assertRaises(TypeError, bincount, x) else: a = np.random.random_integers(50, size=(25)).astype(dtype) weights = np.random.random((25,)).astype(config.floatX) f1 = theano.function([x], bincount(x)) f2 = theano.function([x, w], bincount(x, weights=w)) assert (np.bincount(a) == f1(a)).all() assert np.allclose(np.bincount(a, weights=weights), f2(a, weights)) if not numpy_16: continue f3 = theano.function([x], bincount(x, minlength=23)) f4 = theano.function([x], bincount(x, minlength=5)) assert (np.bincount(a, minlength=23) == f3(a)).all() assert (np.bincount(a, minlength=5) == f4(a)).all()
def stats(self, out_counts, out_adj, adj_index = string.ascii_letters + ' '): """Given two input arrays this adds to them the statistics of the contained text. The first array is of length 256, and counts the instances of character codes. The second array is 2D, with ['a', 'b'] being the number of times a 'b' follows an 'a'. It is indexed by adj_index however, and character pairs that contain a character not included are not counted.""" # Counts are relativly easy - convert and histogram... text_codes = numpy.fromstring(self.text.encode('utf8'), dtype=numpy.uint8) out_counts += numpy.bincount(text_codes, minlength=256) # Adjacencies require a little more sneakyness... # First convert the codes array into an index into the adj_index, with entrys that are not in it set to -1... adj_codes = numpy.fromstring(adj_index, dtype=numpy.uint8) cap = len(adj_index) * len(adj_index) conversion = numpy.empty(256, dtype=numpy.int64) conversion[:] = cap conversion[adj_codes] = numpy.arange(adj_codes.shape[0]) text_codes = conversion[text_codes] # Now take adjacent pairs, and calculate the 1D index in out_adj matrix... pos = (text_codes[:-1] * len(adj_index)) + text_codes[1:] # Lose values that are too large - they are pairs we do not record... pos = pos[pos < cap] # Histogram and sum into the adjacency matrix... if pos.shape[0]>0: out_adj += numpy.bincount(pos, minlength=cap).reshape((len(adj_index),len(adj_index)))
def reproject(self, nj_obj, field): """Reproject a field of another njord inst. to the current grid""" if not hasattr(self,'nj_ivec'): self.add_njijvec(nj_obj) field = getattr(nj_obj, field) if type(field) is str else field if hasattr(nj_obj, 'tvec') and (len(nj_obj.tvec) == field.shape[0]): newfield = np.zeros(nj_obj.tvec.shape + self.llat.shape) for tpos in range(len(nj_obj.tvec)): newfield[tpos,:,:] = self.reproject(nj_obj, field[tpos,...]) return newfield di = self.i2 - self.i1 dj = self.j2 - self.j1 xy = np.vstack((self.nj_jvec, self.nj_ivec)) if type(field) == str: weights = np.ravel(nj_obj.__dict__[field])[self.nj_mask] else: weights = np.ravel(field)[self.nj_mask] mask = ~np.isnan(weights) flat_coord = np.ravel_multi_index(xy[:,mask],(dj, di)) sums = np.bincount(flat_coord, weights[mask]) cnts = np.bincount(flat_coord) fld = np.zeros((dj, di)) * np.nan fld.flat[:len(sums)] = sums.astype(np.float)/cnts try: self.add_landmask() fld[self.landmask] = np.nan except: print "Couldn't load landmask for %s" % self.projname return fld
def update_nogrid(self, params): endog = self.model.endog_li cached_means = self.model.cached_means varfunc = self.model.family.variance dep_params = np.zeros(self.max_lag + 1) dn = np.zeros(self.max_lag + 1) for i in range(self.model.num_group): expval, _ = cached_means[i] stdev = np.sqrt(varfunc(expval)) resid = (endog[i] - expval) / stdev j1, j2 = np.tril_indices(len(expval)) dx = np.abs(self.time[i][j1] - self.time[i][j2]) ii = np.flatnonzero(dx <= self.max_lag) j1 = j1[ii] j2 = j2[ii] dx = dx[ii] vs = np.bincount(dx, weights=resid[ j1] * resid[j2], minlength=self.max_lag + 1) vd = np.bincount(dx, minlength=self.max_lag + 1) ii = np.flatnonzero(vd > 0) dn[ii] += 1 if len(ii) > 0: dep_params[ii] += vs[ii] / vd[ii] dep_params /= dn self.dep_params = dep_params[1:] / dep_params[0]
def test_shc_semi_supervised_scoring_data_affinity(): """Test semi-supervised learning for SHC when scoring_data='affinity'.""" # Passing feature matrix X1, y1 = generate_data(supervised=True, affinity=False) def _scoring1(X_affinity, labels_true, labels_pred): assert X_affinity.shape[0] == X_affinity.shape[1] assert X_affinity.shape != X1.shape score = b3_f_score(labels_true, labels_pred) return score clusterer = ScipyHierarchicalClustering(scoring=_scoring1, scoring_data="affinity", affinity=euclidean_distances) clusterer.fit(X1, y1) labels = clusterer.labels_ assert_array_equal([25, 25, 25, 25], np.bincount(labels)) # Passing affinity matrix X2, y2 = generate_data(supervised=True, affinity=True) def _scoring2(X_affinity, labels_true, labels_pred): assert X_affinity.shape[0] == X_affinity.shape[1] assert X_affinity.shape == X2.shape score = b3_f_score(labels_true, labels_pred) return score clusterer = ScipyHierarchicalClustering(scoring=_scoring2, scoring_data="affinity", affinity="precomputed") clusterer.fit(X2, y2) labels = clusterer.labels_ assert_array_equal([25, 25, 25, 25], np.bincount(labels))
def test_stratified_shuffle_split_iter(): ys = [np.array([1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3]), np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]), np.array([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2] * 2), np.array([1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]), np.array([-1] * 800 + [1] * 50), np.concatenate([[i] * (100 + i) for i in range(11)]), [1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3], ['1', '1', '1', '1', '2', '2', '2', '3', '3', '3', '3', '3'], ] for y in ys: sss = StratifiedShuffleSplit(6, test_size=0.33, random_state=0).split(np.ones(len(y)), y) y = np.asanyarray(y) # To make it indexable for y[train] # this is how test-size is computed internally # in _validate_shuffle_split test_size = np.ceil(0.33 * len(y)) train_size = len(y) - test_size for train, test in sss: assert_array_equal(np.unique(y[train]), np.unique(y[test])) # Checks if folds keep classes proportions p_train = (np.bincount(np.unique(y[train], return_inverse=True)[1]) / float(len(y[train]))) p_test = (np.bincount(np.unique(y[test], return_inverse=True)[1]) / float(len(y[test]))) assert_array_almost_equal(p_train, p_test, 1) assert_equal(len(train) + len(test), y.size) assert_equal(len(train), train_size) assert_equal(len(test), test_size) assert_array_equal(np.lib.arraysetops.intersect1d(train, test), [])
def display_roc(): thresholds = np.linspace(0, 1, 21) for hash_name in hash_names: tpr = [] fpr = [] with open(hash_name + ".same", 'r+b') as f: same_family_dm = np.array(cPickle.load(f)) same_family_uniqw, same_family_inverse = np.unique(same_family_dm, return_inverse=True) same_family_dmlist = dict(zip(same_family_uniqw, np.bincount(same_family_inverse))) with open(hash_name + ".diff", 'r+b') as f: diff_family_dm = np.array(cPickle.load(f)) diff_family_uniqw, diff_family_inverse = np.unique(diff_family_dm, return_inverse=True) diff_family_dmlist = dict(zip(diff_family_uniqw, np.bincount(diff_family_inverse))) for threshold in thresholds: tp = fp = 0 for dm in same_family_dmlist: if dm <= threshold: tp += same_family_dmlist[dm] for dm in diff_family_dmlist: if dm <= threshold: fp += diff_family_dmlist[dm] tpr.append(tp*1.0/same_family_dm.size) fpr.append(fp*1.0/diff_family_dm.size) print sm.auc(fpr, tpr) print "Fuzzy hashing algorithm: %s, AUC: %f" %(hash_name, sm.auc(fpr, tpr)) plt.figure(0) plt.plot(fpr, tpr, label=hash_name) plt.ylim(0.75, 1) plt.legend(loc='best') plt.title("ROC curve for different algorithms") plt.xlabel("False posive rate") plt.ylabel("True posive rate") plt.show()
def relaxation(nodes, links): """ Gauss-Seidel relaxation for links """ sources_idx = links["source"] targets_idx = links["target"] sources = nodes[sources_idx] targets = nodes[targets_idx] distances = links["distance"] strengths = links["strength"] D = targets["position"] - sources["position"] L = np.sqrt((D * D).sum(axis=1)) # This avoid to test L != 0 (I = np.where(L>0)) L = np.where(L, L, np.NaN) L = strengths * (L - distances) / L # Replace nan by 0, i.e. where L was 0 L = np.nan_to_num(L) D *= L.reshape(len(L), 1) K = sources["weight"] / (sources["weight"] + targets["weight"]) K = K.reshape(len(K), 1) # Note that a direct nodes['position'][links['source']] += K*D*(1-F) # would't work as expected because of repeated indices F = nodes["fixed"][sources_idx].reshape(len(links), 1) W = K * D * (1 - F) * 0.1 nodes["position"][:, 0] += np.bincount(sources_idx, W[:, 0], minlength=len(nodes)) nodes["position"][:, 1] += np.bincount(sources_idx, W[:, 1], minlength=len(nodes)) F = nodes["fixed"][targets_idx].reshape(len(links), 1) W = (1 - K) * D * (1 - F) * 0.1 nodes["position"][:, 0] -= np.bincount(targets_idx, W[:, 0], minlength=len(nodes)) nodes["position"][:, 1] -= np.bincount(targets_idx, W[:, 1], minlength=len(nodes))
def _make_cm(X,M,R): N = len(X) # we pregenerate all indices i_idx,j_idx = np.triu_indices(N - M) # We start by making Cm Em = _embed_seq(X, 1, M) dif = np.abs(Em[i_idx] - Em[j_idx]) max_dist = np.max(dif, 1) inrange_cm = max_dist <= R in_range_i = i_idx[inrange_cm] in_range_j = j_idx[inrange_cm] Cm = np.bincount(in_range_i, minlength=N-M+1) Cm += np.bincount(in_range_j, minlength=N-M+1) inrange_last = np.max(np.abs(Em[:-1] - Em[-1]),1) <= R Cm[inrange_last] += 1 # all matches + self match Cm[-1] += np.sum(inrange_last) + 1 return Cm.astype(np.float), in_range_i, in_range_j
def uniform_paa(directory, min_time, max_time, bin_count, timeseries_index, attribute_index): """ Create waveforms using a piecewise aggregate approximation. :param directory: working directory for the timeseries :param min_time: :param max_time: :param bin_count: :param timeseries_index: :param attribute_index: :return: computed time series """ import h5py import numpy import os import slycat.hdf5 bin_edges = numpy.linspace(min_time, max_time, bin_count + 1) bin_times = (bin_edges[:-1] + bin_edges[1:]) / 2 with h5py.File(os.path.join(directory, "timeseries-%s.hdf5" % timeseries_index), "r") as file: original_times = slycat.hdf5.ArraySet(file)[0].get_data(0)[:] original_values = slycat.hdf5.ArraySet(file)[0].get_data(attribute_index + 1)[:] bin_indices = numpy.digitize(original_times, bin_edges[1:]) bin_counts = numpy.bincount(bin_indices, minlength=bin_count + 1)[1:] bin_sums = numpy.bincount(bin_indices, original_values, minlength=bin_count + 1)[1:] lonely_bins = (bin_counts < 2) bin_counts[lonely_bins] = 1 bin_sums[lonely_bins] = numpy.interp(bin_times, original_times, original_values)[lonely_bins] bin_values = bin_sums / bin_counts return { "input-index": timeseries_index, "times": bin_times, "values": bin_values, }
def getnnz(self, axis=None): """Get the count of explicitly-stored values (nonzeros) Parameters ---------- axis : None, 0, or 1 Select between the number of values across the whole matrix, in each column, or in each row. """ if axis is None: nnz = len(self.data) if nnz != len(self.row) or nnz != len(self.col): raise ValueError('row, column, and data array must all be the ' 'same length') if self.data.ndim != 1 or self.row.ndim != 1 or \ self.col.ndim != 1: raise ValueError('row, column, and data arrays must be 1-D') return int(nnz) if axis < 0: axis += 2 if axis == 0: return np.bincount(downcast_intp_index(self.col), minlength=self.shape[1]) elif axis == 1: return np.bincount(downcast_intp_index(self.row), minlength=self.shape[0]) else: raise ValueError('axis out of bounds')
def plot_val_train(list_train,fig_name,epoch): import matplotlib.pyplot as plt if(epoch==-1): nd=numpy.array([[int(b), int(c), d] for (b, c, d) in list_train]) #all error idx=map(int,nd[:,0]) err=nd[:,2] y=numpy.bincount(idx, err)[1:len(idx)+1] / np.bincount(idx)[1:len(idx)+1] x =[x+1 for x in range(len(y))] plt.title('Train Error change with epoch') plt.xlabel('epoch (x)') else: y = numpy.array([[b, c, d] for (b, c, d) in list_train if b==epoch ])[:,2] #all error x =numpy.array([[b, c, d] for (b, c, d) in list_train if b==epoch ])[:,1] #all error plt.title('Train Error change with minibatch') plt.xlabel('minibatc (x)') plt.plot(x, y) plt.ylabel('error (y)') plt.grid(True) #f.subplots_adjust(hspace=0) plt.savefig(fig_name) #plt.setp([a.get_xticklabels() for a in f.axes[:-1]], visible=False) plt.show()
def mean_avg_precision(x, y_val): rank = 1 - x # concatenate rank and y_val by column mat = np.matrix([rank,y_val]).T mat3 = mat[np.argsort(mat.A[:, 0])] mat3 = mat3[::-1] print mat3 somma = 0 for i in range(len(rank)): temp = 0 if mat3[i, 1] == 0: a = mat3[:(i+1), 1] a = np.asarray(a) a = a[:,0] a = a.astype(int) count = np.bincount(a) # counts the number of 0's and 1's up to entry i numb = count[0] # number of 0's up to entry i temp = float(numb) / (i+1) somma = somma + temp count = np.bincount(y_val) numb = count[0] avg = float(somma) / numb return avg
def histogrammap(ra, dec, weights=None, nside=32, perarea=False, range=None): if range is not None: (ra1, ra2), (dec1, dec2) = range m = (ra >= ra1)& (ra <= ra2) m &= (dec >= dec1)& (dec <= dec2) ra = ra[m] dec = dec[m] if weights is not None: weights = weights[m] ipix = ang2pix(nside, numpy.radians(90-dec), numpy.radians(ra)) npix = nside2npix(nside) if perarea: npix = nside2npix(nside) sky = 360. ** 2 / numpy.pi area = 1. * (sky / npix) else: area = 1 if weights is not None: w = numpy.bincount(ipix, weights=weights, minlength=npix) N = numpy.bincount(ipix, minlength=npix) w = w / area N = N / area return w, N else: w = 1.0 * numpy.bincount(ipix, minlength=npix) return w / area
def compute_B_prob_using_part_prob(data, probs, weight_column='N_sig_sw', event_id_column='event_id', signB_column='signB', sign_part_column='signTrack', normed_signs=False, prior_probs=None, functor=None): """ Compute p(B+) using probs for parts of event (tracks/vertices). :param data: pandas.DataFrame, data :param probs: probabilities for parts of events, numpy.array of shape [n_samples] :param weight_column: column for weights in data :param event_id_column: column for event id in data :param signB_column: column for event B sign in data :param sign_part_column: column for part sign in data :return: B sign array, B weight array, B+ prob array, B event id """ result_event_id, data_ids = numpy.unique(data[event_id_column].values, return_inverse=True) if prior_probs is None: log_probs = numpy.log(probs) - numpy.log(1 - probs) else: new_probs = prior_probs * (1 - probs) + (1 - prior_probs) * probs log_probs = numpy.log(new_probs) - numpy.log(1 - new_probs) sign_weights = numpy.ones(len(log_probs)) if normed_signs: for sign in [-1, 1]: maskB = (data[signB_column].values == sign) maskPart = (data[sign_part_column].values == 1) sign_weights[maskB * maskPart] = sum(maskB * (~maskPart)) * 1. / sum(maskB * maskPart) log_probs *= sign_weights * data[sign_part_column].values result_logprob = numpy.bincount(data_ids, weights=log_probs) # simply reconstructing original result_label = numpy.bincount(data_ids, weights=data[signB_column].values) / numpy.bincount(data_ids) result_weight = numpy.bincount(data_ids, weights=data[weight_column]) / numpy.bincount(data_ids) return result_label, result_weight, expit(result_logprob), result_event_id
def plot_calibration(p, labels, bins=[10, 20, 30, 40, 50, 60, 70, 80, 90], weight=None): """ Plot calibration plot: probability vs true probability by percentile bins. :param array p: probability :param array labels: labels :param array bins: percentile values for numpy.percentile to compute bins ranges """ if weight is None: weight = numpy.ones(len(p)) bins = numpy.percentile(p, bins) bins_index = numpy.searchsorted(bins, p) pos_tagged = numpy.bincount(bins_index, weights=labels * weight) neg_tagged = numpy.bincount(bins_index, weights=(1 - labels) * weight) p_ = pos_tagged / (pos_tagged + neg_tagged) bins = [0.] + list(bins) + [1.] bins = numpy.array(bins) bins_centers = (bins[1:] + bins[:-1]) / 2 bins_error = (bins[1:] - bins[:-1]) / 2 err = numpy.sqrt(neg_tagged * pos_tagged) / (pos_tagged + neg_tagged)**1.5 plt.errorbar(bins_centers, p_, xerr=bins_error, yerr=err, fmt='.', linewidth=2) plt.plot([0, 1], [0, 1], 'k--') plt.ylim(-0.1, 1.1) plt.xlim(-0.1, 1.1) plt.xlabel('probability') plt.ylabel('true probability')
def remove_wrongly_sized_connected_components(self, a, min_size, max_size, in_place): """ Adapted from http://github.com/jni/ray/blob/develop/ray/morpho.py (MIT License) """ bin_out = self.BinaryOut.value original_dtype = a.dtype if not in_place: a = a.copy() if min_size == 0 and (max_size is None or max_size > numpy.prod(a.shape)): # shortcut for efficiency return a try: component_sizes = numpy.bincount( a.ravel() ) except TypeError: # On 32-bit systems, must explicitly convert from uint32 to int # (This fix is just for VM testing.) component_sizes = numpy.bincount( numpy.asarray(a.ravel(), dtype=int) ) bad_sizes = component_sizes < min_size if max_size is not None: numpy.logical_or( bad_sizes, component_sizes > max_size, out=bad_sizes ) bad_locations = bad_sizes[a] a[bad_locations] = 0 if (bin_out): # Replace non-zero values with 1 numpy.place(a,a,1) return numpy.array(a, dtype=original_dtype)
def sanity_checks(R): #extract out condition names and assess that we have nice uniform time point distributions condnames = np.array([x[0] for x in list(R.columns)]) tps = np.array([x[1] for x in list(R.columns)]) conds = np.unique(condnames) #commence sanity checks if len(conds)!=2: sys.stderr.write('ERROR: More than two treatment specifications detected. Exiting\n') sys.exit(1) if np.sum(condnames==conds[0])!=np.sum(condnames==conds[1]): sys.stderr.write('ERROR: Unbalanced number of data points between the two treatments. Exiting\n') sys.exit(1) #okay, so if we made it this far we have the same number of data points and only two condition names tp_conds = [] for cond in conds: cond_tps = tps[condnames==cond] #we should have the same exact number of reps per time point #so if we count up how many reps we have per time point, there should only be one unique value tpholder, inverse = np.unique(cond_tps, return_inverse=True) if len(np.unique(np.bincount(inverse)))!=1: sys.stderr.write('ERROR: Non-uniformity of time points for replicates detected in condition '+cond+'. Exiting\n') sys.exit(1) #well, if not, then we're fine and can store information tp_conds.append(tpholder) Nrepl = np.bincount(inverse)[0] #one last sanity check - are the time points the same? if not np.array_equal(tp_conds[0],tp_conds[1]): sys.stderr.write('ERROR: Different time points specified across the two treatments. Exiting\n') sys.exit(1)
def _elbo_grad_common(self, fep_mean, fep_sd, vcp_mean, vcp_sd, vc_mean, vc_sd): # p(vc | vcp) contributions m = vcp_mean[self.ident] s = vcp_sd[self.ident] u = vc_mean**2 + vc_sd**2 ve = np.exp(2*(s**2 - m)) dm = u * ve - 1 ds = -2 * u * ve * s vcp_mean_grad = np.bincount(self.ident, weights=dm) vcp_sd_grad = np.bincount(self.ident, weights=ds) vc_mean_grad = -vc_mean.copy() * ve vc_sd_grad = -vc_sd.copy() * ve # p(vcp) contributions vcp_mean_grad -= vcp_mean / self.vcp_p**2 vcp_sd_grad -= vcp_sd / self.vcp_p**2 # p(b) contributions fep_mean_grad = -fep_mean.copy() / self.fe_p**2 fep_sd_grad = -fep_sd.copy() / self.fe_p**2 return (fep_mean_grad, fep_sd_grad, vcp_mean_grad, vcp_sd_grad, vc_mean_grad, vc_sd_grad)
def _bincount_mapper(ex, tiles, minlength=None): if len(tiles) > 1: result = np.bincount(tiles[0], weights=tiles[1], minlength=minlength) else: result = np.bincount(tiles[0], minlength=minlength) result_ex = extent.from_shape(result.shape) yield result_ex, result
def get_indicator(tm): if isinstance(tm,np.ndarray): edges_plain = np.zeros_like(image) counts = np.bincount(tm.ravel()) for c in xrange(image.shape[2]): vals = np.bincount(tm.ravel(),image[:,:,c].ravel()) edges_plain[:,:,c] = (vals/counts)[tm] return edges_plain else: indicator = np.zeros(image_lab.shape[:2]+(3,),np.float32) indicator_map = tm.copy_map_for_image(indicator) tm_color = tm.copy_map_for_image(image) #data_management.add_array('diff_mat',diff_mat) steps,precondition_runs,accept_ratio for loc in xrange(len(tm)): key = tm.key_from_index(loc) im_data = np.reshape(tm_color[key],(-1,3)) color = np.mean(im_data,axis=0) for c in xrange(color.shape[0]): indicator_map[key][:,:,c] = color[c] return indicator
def joint_and_marginals(labels1,labels2,smoothing=0.0): """ marginal and joint distributions for a sequence of observations from a pair of disrete random variables, with additive smoothing on the joint distribution and the marginals in such a way that marginal(smooth(conditional)) = smooth(marginal) """ smoothing = float(smoothing) if len(labels1) != len(labels2): raise ValueError("label lists must have the same length") set1 = set(labels1) set2 = set(labels2) n1 = len(set1) n2 = len(set2) l2i1 = dict(zip(set1,range(len(set1)))) l2i2 = dict(zip(set2,range(len(set2)))) l1 = array([l2i1[l] for l in labels1]) l2 = array([l2i2[l] for l in labels2]) d1 = bincount(l1) + smoothing*n2 d1 = d1/d1.sum() d2 = bincount(l2) + smoothing*n1 d2 = d2/d2.sum() a_true = full((n1,n2),smoothing) for i,j in zip(l1,l2): a_true[i,j] += 1.0 a_true = a_true/a_true.sum() return d1,d2,a_true
def infer_labels(x, wu, wp, z=None, y=None): t_max = [] E_max = -1000000000 for t0 in [1,2,3]: for t1 in [1,2,3]: for t2 in [1,2,3]: for t3 in [1,2,3]: for t4 in [1,2,3]: t = [t0,t1,t2,t3,t4] if z is not None: if not np.all(np.bincount(t) == z): continue E = compute_energy(x,wu,wp,t) if y is not None: if y.full_labeled: E += np.sum(t!=y.full) else: w1 = np.zeros(4) w2 = np.zeros(4) tw = np.bincount(t) w1[:tw.shape[0]] = tw w2[:y.weak.shape[0]] = y.weak E += np.sum(np.abs(w1 - w2)) if E > E_max: t_max = t E_max = E return t_max
def make_batch_prediction_ensemble(self,phi_x): m, nsub, nfeat = np.shape(phi_x); hat = np.zeros(m); sub_hat = self.predictor.predict(np.reshape(phi_x,(m*nsub,nfeat))); #TODO:return_rec for i in range(m): votes = sub_hat[i*nsub:(i+1)*nsub] vote_bins = np.bincount(votes); vote_bins = np.append(vote_bins,np.zeros(7-np.size(vote_bins))); vote_bins_sort = np.sort(vote_bins); vote_bins_sort = vote_bins_sort[::-1]; #Descending #if vote_bins_sort[0] - vote_bins_sort[1] <= 1: #Small margit vote. use back-up predictor if vote_bins_sort[0] - vote_bins_sort[1] == 1: #Retest ties tie_votes = self.tie_predictor.predict(phi_x[i,:,:]); tie_vote_bins = np.bincount(tie_votes) #Ensemble: aggregate votes tie_vote_bins = np.append(tie_vote_bins,np.zeros(7-np.size(tie_vote_bins))); total_vote_bins = tie_vote_bins + 1.1*vote_bins; #tie breaker is rbf tie_maxvote = np.max(total_vote_bins); #Get highest vote total tie_argmaxx = np.where(np.array(total_vote_bins)==tie_maxvote)[0]; #Find all regions with that vote total if np.size(tie_argmaxx)>1: hat[i] = np.random.choice(tie_argmaxx); else: #No Tie hat[i] = tie_argmaxx[0]; else: hat[i]=np.argmax(vote_bins); return hat
def get_events_number(data, id_column='event_id'): """ :return: number of B events """ _, data_ids = numpy.unique(data[id_column], return_inverse=True) weights = numpy.bincount(data_ids, weights=data.N_sig_sw) / numpy.bincount(data_ids) return numpy.sum(weights)
import numpy as np # unique : 중복 제거 후 리스트 출력, (당연히) 나온 것만 출력한다 # unique 인수 중 return_counts = True : 데이터 개수도 출력해 줌 a = np.unique([1, 1, 2, 2, 3, 3, 3, 2, 2, 4]) print(a) # [1 2 3 4] b = np.array(['a', 'b', 'b', 'c', 'a', 'd']) b1 = np.unique(b) print(b1) # ['a' 'b' 'c' 'd'] b2 = np.unique(b, return_counts=True) print(b2) # (array(['a', 'b', 'c', 'd'], dtype='<U1'), array([2, 2, 1, 1], dtype=int64)) print(b2[0]) # ['a' 'b' 'c' 'd'] print(b2[1]) # [2 2 1 1] data, count = np.unique(b, return_counts=True) print(data) # ['a' 'b' 'c' 'd'] print(count) # [2 2 1 1] # bincount : 나오지 않은 숫자도 count 를 0 으로 출력해줌 # bincount 인수 중 minlength 이용하면 편리함 # 주사위를 여러번 던져도 한번도 안 나온 수가 있을 때? # 사례) 주사위를 6번 던졌는데, 1,1,2,2,3,4 이렇게 나왔을 때 print(np.bincount([1, 1, 2, 2, 3, 4], minlength=6)) # [0 2 2 1 1 0]
def create_patches(data, labels, distrib, crop_size, num_classes, is_train=True): patches = [] classes = num_classes * [0] classes_patches = [] masks = [] overall_count = 0 flip_count = 0 for i in range(len(distrib)): cur_x = distrib[i][0] cur_y = distrib[i][1] cur_patch = data[cur_x:cur_x + crop_size, cur_y:cur_y + crop_size, :] if len(cur_patch) != crop_size and len(cur_patch[0]) != crop_size: cur_x = cur_x - (crop_size - len(cur_patch)) cur_y = cur_y - (crop_size - len(cur_patch[0])) cur_patch = data[cur_x:cur_x + crop_size, cur_y:cur_y + crop_size, :] elif len(cur_patch) != crop_size: cur_x = cur_x - (crop_size - len(cur_patch)) cur_patch = data[cur_x:cur_x + crop_size, cur_y:cur_y + crop_size, :] elif len(cur_patch[0]) != crop_size: cur_y = cur_y - (crop_size - len(cur_patch[0])) cur_patch = data[cur_x:cur_x + crop_size, cur_y:cur_y + crop_size, :] cur_mask_patch = labels[cur_x:cur_x + crop_size, cur_y:cur_y + crop_size] assert len(cur_patch) == crop_size and len(cur_patch[0]) == crop_size, \ "Error: Current PATCH size is " + str(len(cur_patch)) + "x" + str(len(cur_patch[0])) assert len(cur_mask_patch) == crop_size and len(cur_mask_patch[0]) == crop_size, \ "Error: Current MASK size is " + str(len(cur_mask_patch)) + "x" + str(len(cur_mask_patch[0])) cur_class = np.argmax(np.bincount( cur_mask_patch.astype(int).flatten())) classes[int(cur_class)] += 1 cur_mask = np.ones((crop_size, crop_size), dtype=np.bool) # DATA AUGMENTATION if is_train is True: # ROTATION AUGMENTATION cur_rot = np.random.randint(0, 360) possible_rotation = np.random.randint(0, 2) if possible_rotation == 1: # default = 1 # print 'rotation' cur_patch = scipy.ndimage.rotate(cur_patch, cur_rot, order=0, reshape=False) cur_mask_patch = scipy.ndimage.rotate(cur_mask_patch, cur_rot, order=0, reshape=False) cur_mask = scipy.ndimage.rotate(cur_mask, cur_rot, order=0, reshape=False) # NORMAL NOISE possible_noise = np.random.randint(0, 2) if possible_noise == 1: cur_patch = cur_patch + np.random.normal( 0, 0.01, cur_patch.shape) # FLIP AUGMENTATION possible_noise = np.random.randint(0, 3) if possible_noise == 0: patches.append(cur_patch) classes_patches.append(cur_mask_patch) masks.append(cur_mask) if possible_noise == 1: patches.append(np.flipud(cur_patch)) classes_patches.append(np.flipud(cur_mask_patch)) masks.append(np.flipud(cur_mask)) flip_count += 1 elif possible_noise == 2: patches.append(np.fliplr(cur_patch)) classes_patches.append(np.fliplr(cur_mask_patch)) masks.append(np.fliplr(cur_mask)) flip_count += 1 else: patches.append(cur_patch) classes_patches.append(cur_mask_patch) masks.append(cur_mask) overall_count += 1 pt_arr = np.asarray(patches) cl_arr = np.asarray(classes_patches, dtype=np.int) mask_arr = np.asarray(masks, dtype=np.bool) return pt_arr, cl_arr, mask_arr
refined_anchors = utils.apply_box_deltas( anchors[indices], rpn_bbox[b, :len(indices)] * config.RPN_BBOX_STD_DEV) log("anchors", anchors) log("refined_anchors", refined_anchors) # Get list of positive anchors positive_anchor_ids = np.where(rpn_match[b] == 1)[0] print("Positive anchors: {}".format(len(positive_anchor_ids))) negative_anchor_ids = np.where(rpn_match[b] == -1)[0] print("Negative anchors: {}".format(len(negative_anchor_ids))) neutral_anchor_ids = np.where(rpn_match[b] == 0)[0] print("Neutral anchors: {}".format(len(neutral_anchor_ids))) # ROI breakdown by class for c, n in zip(dataset.class_names, np.bincount(mrcnn_class_ids[b].flatten())): if n: print("{:23}: {}".format(c[:20], n)) # Show positive anchors visualize.draw_boxes(sample_image, boxes=anchors[positive_anchor_ids], refined_boxes=refined_anchors) # In[17]: # Show negative anchors visualize.draw_boxes(sample_image, boxes=anchors[negative_anchor_ids]) # In[18]:
def __call__(self, data_source, parameters): ds = data_source.ds exp_time = parameters["FiducialExposureTime"] area = parameters["FiducialArea"] redshift = parameters["FiducialRedshift"] D_A = parameters["FiducialAngularDiameterDistance"].in_cgs() dist_fac = 1.0/(4.*np.pi*D_A.value*D_A.value*(1.+redshift)**2) src_ctr = parameters["center"] my_kT_min, my_kT_max = data_source.quantities.extrema("kT") self.spectral_model.prepare_spectrum(redshift) emid = self.spectral_model.emid ebins = self.spectral_model.ebins nchan = len(emid) citer = data_source.chunks([], "io") photons = {} photons["x"] = [] photons["y"] = [] photons["z"] = [] photons["vx"] = [] photons["vy"] = [] photons["vz"] = [] photons["dx"] = [] photons["Energy"] = [] photons["NumberOfPhotons"] = [] spectral_norm = area.v*exp_time.v*dist_fac tot_num_cells = data_source.ires.shape[0] pbar = get_pbar("Generating photons ", tot_num_cells) cell_counter = 0 for chunk in parallel_objects(citer): kT = chunk["kT"].v num_cells = len(kT) if num_cells == 0: continue vol = chunk["cell_volume"].in_cgs().v EM = (chunk["density"]/mp).in_cgs().v**2 EM *= 0.5*(1.+self.X_H)*self.X_H*vol if isinstance(self.Zmet, string_types): metalZ = chunk[self.Zmet].v else: metalZ = self.Zmet*np.ones(num_cells) idxs = np.argsort(kT) kT_bins = np.linspace(kT_min, max(my_kT_max.v, kT_max), num=n_kT+1) dkT = kT_bins[1]-kT_bins[0] kT_idxs = np.digitize(kT[idxs], kT_bins) kT_idxs = np.minimum(np.maximum(1, kT_idxs), n_kT) - 1 bcounts = np.bincount(kT_idxs).astype("int") bcounts = bcounts[bcounts > 0] n = int(0) bcell = [] ecell = [] for bcount in bcounts: bcell.append(n) ecell.append(n+bcount) n += bcount kT_idxs = np.unique(kT_idxs) cell_em = EM[idxs]*spectral_norm number_of_photons = np.zeros(num_cells, dtype="uint64") energies = np.zeros(self.photons_per_chunk) start_e = 0 end_e = 0 for ibegin, iend, ikT in zip(bcell, ecell, kT_idxs): kT = kT_bins[ikT] + 0.5*dkT n_current = iend-ibegin cem = cell_em[ibegin:iend] cspec, mspec = self.spectral_model.get_spectrum(kT) tot_ph_c = cspec.d.sum() tot_ph_m = mspec.d.sum() u = self.prng.uniform(size=n_current) cell_norm_c = tot_ph_c*cem cell_norm_m = tot_ph_m*metalZ[ibegin:iend]*cem cell_norm = np.modf(cell_norm_c + cell_norm_m) cell_n = np.uint64(cell_norm[1]) + np.uint64(cell_norm[0] >= u) number_of_photons[ibegin:iend] = cell_n end_e += int(cell_n.sum()) if end_e > self.photons_per_chunk: raise RuntimeError("Number of photons generated for this chunk "+ "exceeds photons_per_chunk (%d)! " % self.photons_per_chunk + "Increase photons_per_chunk!") if self.method == "invert_cdf": cumspec_c = np.cumsum(cspec.d) cumspec_m = np.cumsum(mspec.d) cumspec_c = np.insert(cumspec_c, 0, 0.0) cumspec_m = np.insert(cumspec_m, 0, 0.0) ei = start_e for cn, Z in zip(number_of_photons[ibegin:iend], metalZ[ibegin:iend]): if cn == 0: continue # The rather verbose form of the few next statements is a # result of code optimization and shouldn't be changed # without checking for performance degradation. See # https://bitbucket.org/yt_analysis/yt/pull-requests/1766 # for details. if self.method == "invert_cdf": cumspec = cumspec_c cumspec += Z * cumspec_m norm_factor = 1.0 / cumspec[-1] cumspec *= norm_factor randvec = self.prng.uniform(size=cn) randvec.sort() cell_e = np.interp(randvec, cumspec, ebins) elif self.method == "accept_reject": tot_spec = cspec.d tot_spec += Z * mspec.d norm_factor = 1.0 / tot_spec.sum() tot_spec *= norm_factor eidxs = self.prng.choice(nchan, size=cn, p=tot_spec) cell_e = emid[eidxs] energies[int(ei):int(ei + cn)] = cell_e cell_counter += 1 pbar.update(cell_counter) ei += cn start_e = end_e active_cells = number_of_photons > 0 idxs = idxs[active_cells] photons["NumberOfPhotons"].append(number_of_photons[active_cells]) photons["Energy"].append(ds.arr(energies[:end_e].copy(), "keV")) photons["x"].append((chunk["x"][idxs]-src_ctr[0]).in_units("kpc")) photons["y"].append((chunk["y"][idxs]-src_ctr[1]).in_units("kpc")) photons["z"].append((chunk["z"][idxs]-src_ctr[2]).in_units("kpc")) photons["vx"].append(chunk["velocity_x"][idxs].in_units("km/s")) photons["vy"].append(chunk["velocity_y"][idxs].in_units("km/s")) photons["vz"].append(chunk["velocity_z"][idxs].in_units("km/s")) photons["dx"].append(chunk["dx"][idxs].in_units("kpc")) pbar.finish() for key in photons: if len(photons[key]) > 0: photons[key] = uconcatenate(photons[key]) elif key == "NumberOfPhotons": photons[key] = np.array([]) else: photons[key] = YTArray([], photon_units[key]) mylog.info("Number of photons generated: %d" % int(np.sum(photons["NumberOfPhotons"]))) mylog.info("Number of cells with photons: %d" % len(photons["x"])) self.spectral_model.cleanup_spectrum() return photons
def guess_numbers(img): strings = np.array([]) strings_rotated = np.array([]) strings_rotated_mirrored = np.array([]) # create and train kNN model # samples = np.loadtxt('generalsamples.data', np.float32) # responses = np.loadtxt('generalresponses_slanted.data', np.float32) samples = np.loadtxt('redesign_samples1.data', np.float32) responses = np.loadtxt('redesign_responses1.data', np.float32) responses = responses.reshape((responses.size, 1)) model = cv2.ml.KNearest_create() model.train(samples, cv2.ml.ROW_SAMPLE, responses) # Within each individual image find the contours gray = cv2.cvtColor(img.copy(), cv2.COLOR_BGR2GRAY) thresh = cv2.adaptiveThreshold(gray, 255, 1, 1, 11, 2) _, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) results = np.array([]) results_rotated = np.array([]) results_rotated_mirrored = np.array([]) for cnt in contours: if cv2.contourArea(cnt) > 40: # cv2.drawContours(img, cnt, -1, (0,0,255), 2) # cv2.imshow('detection', img) # cv2.waitKey(0) [x, y, w, h] = cv2.boundingRect(cnt) if h > 28: roi = thresh[y:y + h, x:x + w] roismall = cv2.resize(roi, (10, 10)) roismall = roismall.reshape((1, 100)) roismall = np.float32(roismall) # Use kNN model to try identify digit value, result, neigh_resp, dists = model.findNearest(roismall, k=11) cv2.imshow('roismall', roi) print 'Original {}, {}, {}'.format(value, result, neigh_resp) roi = rotate(roi, -15) roismall = cv2.resize(roi, (10, 10)) roismall = roismall.reshape((1, 100)) roismall = np.float32(roismall) # Use kNN model to try identify digit value_rotated, result, neigh_resp, dists = model.findNearest(roismall, k=11) cv2.imshow('roismall rotated', roi) print 'Rotated {}, {}, {}'.format(value_rotated, result, neigh_resp) roi = cv2.flip(roi, 1) roismall = cv2.resize(roi, (10, 10)) roismall = roismall.reshape((1, 100)) roismall = np.float32(roismall) # Use kNN model to try identify digit value_rotated_mirrored, result, neigh_resp, dists = model.findNearest(roismall, k=3) print 'Rotated and mirrored {}, {}, {}'.format(value_rotated_mirrored, result, neigh_resp) cv2.imshow('roismall rotated, mirrored', roi) cv2.waitKey(1) cv2.destroyAllWindows() results = np.append(results, value) results_rotated = np.append(results_rotated, value_rotated) results_rotated_mirrored = np.append(results_rotated_mirrored, value_rotated_mirrored) # If multiple numbers are found in image take number with greatest number of occurrences if len(results) > 0: results = results.astype(int) results_rotated = results_rotated.astype(int) results_rotated_mirrored = results_rotated_mirrored.astype(int) strings = np.append(strings, str(np.bincount(results).argmax())) strings_rotated = np.append(strings_rotated, str(np.bincount(results_rotated).argmax())) strings_rotated_mirrored = np.append(strings_rotated_mirrored, str(np.bincount(results_rotated_mirrored).argmax())) else: strings = np.append(strings, '') x= ["".join(strings), "".join(strings_rotated), "".join(strings_rotated_mirrored)] y = [results, results_rotated, results_rotated_mirrored] print x, y return x
figsize=(col_nbr * col_size, row_nbr * row_size)) fig.suptitle('plot RGB images from the training set', fontsize=2, fontweight='bold') ax = ax.flatten() for i in range(42): img = X_train[y_train == i][0] ax[i].imshow(img, interpolation='nearest') plt.tight_layout() # In[4]: # Number of examples per label fig, ax = plt.subplots() bins = np.bincount(y_train) ax.bar(range(n_classes), bins, .40, color='c') plt.xlabel('Label Id') plt.ylabel('Label Count') plt.xticks(np.arange(0, n_classes, 2)) plt.title('Number of examples per label') plt.tight_layout() plt.grid(True) plt.show() # In[5]: # Read Signname from csv file import pandas as pd signnames = pd.read_csv('signnames.csv') signnames.head()
def make_lightcurve(toa, dt, tseg=None, tstart=None, gti=None, mjdref=0, use_hist=False): """ Make a light curve out of photon arrival times. Parameters ---------- toa: iterable list of photon arrival times dt: float time resolution of the light curve (the bin width) tseg: float, optional, default None The total duration of the light curve. If this is `None`, then the total duration of the light curve will be the interval between the arrival between the first and the last photon in `toa`. **Note**: If tseg is not divisible by dt (i.e. if tseg/dt is not an integer number), then the last fractional bin will be dropped! tstart: float, optional, default None The start time of the light curve. If this is None, the arrival time of the first photon will be used as the start time of the light curve. gti: 2-d float array [[gti0_0, gti0_1], [gti1_0, gti1_1], ...] Good Time Intervals use_hist : bool Use `np.histogram` instead of `np.bincounts`. Might be advantageous for very short datasets. Returns ------- lc: :class:`Lightcurve` object A light curve object with the binned light curve """ toa = np.asarray(toa) # tstart is an optional parameter to set a starting time for # the light curve in case this does not coincide with the first photon if tstart is None: # if tstart is not set, assume light curve starts with first photon tstart = toa[0] # compute the number of bins in the light curve # for cases where tseg/dt is not integer. # TODO: check that this is always consistent and that we # are not throwing away good events. if tseg is None: tseg = toa[-1] - tstart logging.info("make_lightcurve: tseg: " + str(tseg)) timebin = np.int64(tseg / dt) logging.info("make_lightcurve: timebin: " + str(timebin)) tend = tstart + timebin * dt good = (tstart <= toa) & (toa < tend) if not use_hist: binned_toas = ((toa[good] - tstart) // dt).astype(np.int64) counts = \ np.bincount(binned_toas, minlength=timebin) time = tstart + np.arange(0.5, 0.5 + len(counts)) * dt else: histbins = np.arange(tstart, tend + dt, dt) counts, histbins = np.histogram(toa[good], bins=histbins) time = histbins[:-1] + 0.5 * dt return Lightcurve(time, counts, gti=gti, mjdref=mjdref, dt=dt)
file = tf.keras.utils #raw_df = pd.read_csv('https://storage.googleapis.com/download.tensorflow.org/data/creditcard.csv') #raw_df.to_csv(r'C:\Users\ELECTROBOT\Desktop\kaggle\tf_credit_card.csv', index=False) raw_df = pd.read_csv(r'C:\Users\ELECTROBOT\Desktop\kaggle\tf_credit_card.csv') raw_df.head() raw_df[[ 'Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V26', 'V27', 'V28', 'Amount', 'Class' ]].describe() #class imbalance neg, pos = np.bincount(raw_df['Class']) total = neg + pos print('Examples:\n Total: {}\n Positive: {} ({:.2f}% of total)\n'.format( total, pos, 100 * pos / total)) #Clean, split and normalize the data cleaned_df = raw_df.copy() # You don't want the `Time` column. cleaned_df.pop('Time') # The `Amount` column covers a huge range. Convert to log-space. eps = 0.001 # 0 => 0.1¢ cleaned_df['Log Ammount'] = np.log(cleaned_df.pop('Amount') + eps)
import numpy as np import h5py from time import time vocab_size = 50257 with h5py.File('./tokenized', "r") as f: x = f['tokens'][:10000000] y = np.bincount(x) if len(y) != vocab_size: NotImplementedError map = (-y).argsort().argsort() print(x, np.take(map,x)) with h5py.File('./map', "w") as g: _ = g.create_dataset('map', shape=(vocab_size,), dtype='i4') with h5py.File('./map', "a") as g: tmp = g['map'] tmp = map t = time() count = 0 n = 100000000 with h5py.File('./tokenized', "r") as f: tokens_in = f['tokens'] docs_in = f['docs'] tokens_len = tokens_in.len() docs_len = docs_in.len() with h5py.File('./tokenized2', "w") as g: _ = g.create_dataset('tokens', shape=(tokens_len,), chunks=(1000000,), dtype='u2') _ = g.create_dataset('docs', shape=(docs_len,), chunks=(10000,), dtype='i4') with h5py.File('./tokenized2', "a") as g: tokens_out = g['tokens'] docs_out = g['docs']
while counter < 72: with picamera.array.PiRGBArray(camera, size=(IMG_SIZE, IMG_SIZE)) as stream: camera.capture(stream, 'rgb', resize=(IMG_SIZE, IMG_SIZE)) # At this point the image IN 3 CHANNELS is available as stream.array image = stream.array stream.truncate() #clear the stream for next capture stream.seek(0) # go back to start of stream and overwrite it image = np.dot(image, [.3, .6, .1]) #convert the 3 channel array to a greyscale 50x50x1 batch = image.reshape(1, IMG_SIZE, IMG_SIZE, 1) #let tensorflow know there's only one image by padding a 1 model_out = model.predict(batch)[0] if model_out[0] > .5: label = "Dry " + str(model_out[0]) history[counter % 10] = 1 else: label = "Wet " + str(model_out[0]) history[counter % 10] = 0 print(label) drynum = "Not enough data" if counter > 8: ratio = np.bincount(history, minlength=2) print("The ratio of dry to wet classifications in the past 10 captures is " + str(ratio[1]) + " to " + str(ratio[0])) drynum = str(ratio[1]) with open(outfilename, 'a') as f: t = datetime.datetime.now().strftime("%H:%M:%S") f.write("{},{},{}\n".format(t,label,drynum)) time.sleep(2.5) counter += 1 camera.stop_recording()
def phase_fold_animation(self, period_fold=None, cad_min=3): """ Animate phase-folded light curve versus time. Parameters ---------- period_fold : float, optional Specify a different period at which to fold. cad_min: int, optional Exclude light curve sections with fewer cadences than `cad_min`. """ # Calculate the phase. if period_fold is None: period_fold = self.params.p_orb phase = self.phase_fold(period_fold=period_fold) # Calculate the cycle number. cycle = ((self.l_curve.times - self.params.bjd_0) // self.params.p_orb).astype(int) # Start at zero cycle -= cycle.min() # Only use cycles with more cadences than `cad_min`. cycle_num = np.arange(cycle.max() + 1)[np.bincount(cycle) > cad_min] def data_gen(): for ii, nn in enumerate(cycle_num): mask = np.abs(cycle - nn) <= 0 phase_section = phase[mask] flux_section = self.l_curve.fluxes[mask] phase_sort = np.argsort(phase_section) yield phase_section[phase_sort], flux_section[phase_sort] def init(): lt_zero = -self.l_curve.fluxes[self.l_curve.fluxes < 0] flux_min = -1.1 * np.percentile(lt_zero, 99) gt_zero = self.l_curve.fluxes[self.l_curve.fluxes > 0] flux_max = 1.1 * np.percentile(gt_zero, 99) ax.set_ylim(flux_min, flux_max) ax.set_xlim(-0.1, 1.1) ax.set_xlabel('Phase') ax.set_ylabel('Relative Flux') del xdata[:] del ydata[:] line.set_data(xdata, ydata) return line, fig, ax = plt.subplots() line, = ax.plot([], [], color='k', lw=1) xdata, ydata = [], [] def run(data): # update the data t, y = data xdata = t ydata = y line.set_data(xdata, ydata) return line, ani = animation.FuncAnimation(fig, run, data_gen, blit=False, interval=100, repeat=True, init_func=init) plt.show()
def add_phase_interconnections(net, snow_partitioning_n, voxel_size=1, marching_cubes_area=False, alias=None): r""" This function connects networks of two or more phases together by interconnecting neighbouring nodes inside different phases. The resulting network can be used for the study of transport and kinetics at interphase of two phases. Parameters ---------- network : 2D or 3D network A dictoionary containing structural information of two or more phases networks. The dictonary format must be same as porespy region_to_network function. snow_partitioning_n : tuple The output generated by snow_partitioning_n function. The tuple should have phases_max_labels and original image of material. voxel_size : scalar The resolution of the image, expressed as the length of one side of a voxel, so the volume of a voxel would be **voxel_size**-cubed. The default is 1, which is useful when overlaying the PNM on the original image since the scale of the image is alway 1 unit lenth per voxel. marching_cubes_area : bool If ``True`` then the surface area and interfacial area between regions will be causing the marching cube algorithm. This is a more accurate representation of area in extracted network, but is quite slow, so it is ``False`` by default. The default method simply counts voxels so does not correctly account for the voxelated nature of the images. alias : dict (Optional) A dictionary that assigns unique image label to specific phase. For example {1: 'Solid'} will show all structural properties associated with label 1 as Solid phase properties. If ``None`` then default labelling will be used i.e {1: 'Phase1',..}. Returns ------- A dictionary containing network information of individual and connected networks. The dictionary names use the OpenPNM convention so it may be converted directly to an OpenPNM network object using the ``update`` command. """ # ------------------------------------------------------------------------- # Get alias if provided by user im = snow_partitioning_n.im al = _create_alias_map(im, alias=alias) # ------------------------------------------------------------------------- # Find interconnection and interfacial area between ith and jth phases conns1 = net['throat.conns'][:, 0] conns2 = net['throat.conns'][:, 1] label = net['pore.label'] - 1 num = snow_partitioning_n.phase_max_label num = [0, *num] phases_num = np.unique(im * 1) phases_num = np.trim_zeros(phases_num) for i0, i1 in enumerate(phases_num): loc1 = np.logical_and(conns1 >= num[i0], conns1 < num[i0 + 1]) loc2 = np.logical_and(conns2 >= num[i0], conns2 < num[i0 + 1]) loc3 = np.logical_and(label >= num[i0], label < num[i0 + 1]) net['throat.{}'.format(al[i1])] = loc1 * loc2 net['pore.{}'.format(al[i1])] = loc3 if i1 == phases_num[-1]: loc4 = np.logical_and(conns1 < num[-1], conns2 >= num[-1]) loc5 = label >= num[-1] net['throat.boundary'] = loc4 net['pore.boundary'] = loc5 for j0, j1 in enumerate(phases_num): if j0 > i0: pi_pj_sa = np.zeros_like(label, dtype=float) loc6 = np.logical_and(conns2 >= num[j0], conns2 < num[j0 + 1]) pi_pj_conns = loc1 * loc6 net['throat.{}_{}'.format(al[i1], al[j1])] = pi_pj_conns if any(pi_pj_conns): # --------------------------------------------------------- # Calculates phase[i] interfacial area that connects with # phase[j] and vice versa p_conns = net['throat.conns'][:, 0][pi_pj_conns] s_conns = net['throat.conns'][:, 1][pi_pj_conns] ps = net['throat.area'][pi_pj_conns] p_sa = np.bincount(p_conns, ps) # trim zeros at head/tail position to avoid extra bins p_sa = np.trim_zeros(p_sa) i_index = np.arange(min(p_conns), max(p_conns) + 1) j_index = np.arange(min(s_conns), max(s_conns) + 1) s_pa = np.bincount(s_conns, ps) s_pa = np.trim_zeros(s_pa) pi_pj_sa[i_index] = p_sa pi_pj_sa[j_index] = s_pa # --------------------------------------------------------- # Calculates interfacial area using marching cube method if marching_cubes_area: ps_c = net['throat.area'][pi_pj_conns] p_sa_c = np.bincount(p_conns, ps_c) p_sa_c = np.trim_zeros(p_sa_c) s_pa_c = np.bincount(s_conns, ps_c) s_pa_c = np.trim_zeros(s_pa_c) pi_pj_sa[i_index] = p_sa_c pi_pj_sa[j_index] = s_pa_c net[f'pore.{al[i1]}_{al[j1]}_area'] = pi_pj_sa * voxel_size**2 return net
# Instanciate a PCA object for the sake of easy visualisation pca = PCA(n_components=2) # Fit and transform x to visualise inside a 2D feature space X_vis= pca.fit_transform(X) # Apply One-Sided Selection ncl = NeighbourhoodCleaningRule(random_state = 42, return_indices=True) X_resampled, y_resampled, idx_resampled = ncl.fit_sample(X, y) X_res_vis = pca.transform(X_resampled) fig = plt.figure() ax = fig.add_subplot(1, 1, 1) idx_samples_removed = np.setdiff1d(np.arange(X_vis.shape[0]), idx_resampled) frq = np.bincount(y_resampled) aar_neg = np.transpose((y_resampled==0).nonzero()) aar_pos = np.transpose((y_resampled==1).nonzero()) idx_class_0 = y_resampled == 0 h5filename = "histonemodTF_resample_ncl.h5" if os.path.exists(h5filename): #os.remove(h5filename) h5file = h5.File(h5filename,'a') #in_group = h5file.get('input') h5file.create_dataset('/input/H3K27me3_RPKM',data = X_resampled, dtype = np.float64, compression ='gzip') #out_group = h5file.get('output') h5file.create_dataset('/output/H3K27me3',data = y_resampled, dtype = np.int8, compression ='gzip')
def get_light_state(self, light): """Determines the current color of the traffic light Args: light (TrafficLight): light to classify Returns: int: ID of traffic light color (specified in styx_msgs/TrafficLight) """ if (not self.has_image): self.prev_light_loc = None return False self.image_count = self.image_count + 1 cv_image = self.bridge.imgmsg_to_cv2(self.camera_image, "rgb8") height, width, channels = cv_image.shape #x, y = self.project_to_image_plane(light.pose.pose.position) #file_name = "/home/shyam/Work/SDCNDP/Project13/Vidyut-CarND-Capstone/ros/images/img_light_tl" + str(self.image_count) + ".jpg" #log_file = open("/home/shyam/Work/SDCNDP/Project13/Vidyut-CarND-Capstone/ros/images/log.txt", 'a') #TODO use light location to zoom in on traffic light in image if height != 600 or width != 800: cv_image = cv2.resize(cv_image, (800, 600), interpolation=cv2.INTER_AREA) #cv2.imwrite(file_name, cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR)) pred_state = TrafficLight.UNKNOWN #print("Tensorflow version " + tflow.__version__) with self.detection_graph.as_default(): with tflow.Session(graph=self.detection_graph) as sess: # Definite input and output Tensors for detection_graph image_tensor = self.detection_graph.get_tensor_by_name( 'image_tensor:0') # Each box represents a part of the image where a particular object was detected. detection_boxes = self.detection_graph.get_tensor_by_name( 'detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. detection_scores = self.detection_graph.get_tensor_by_name( 'detection_scores:0') detection_classes = self.detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = self.detection_graph.get_tensor_by_name( 'num_detections:0') #image_np = load_image_into_numpy_array(cv_image) image_np_expanded = np.expand_dims(cv_image, axis=0) (boxes, scores, classes, num) = sess.run([ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: image_np_expanded}) class_id = [] for i in range(len(scores[0])): if scores[0][i] > 0.3: class_id.append(classes[0][i]) if (class_id): class_val = np.argmax(np.bincount(np.array(class_id, dtype=int))) if class_val == 1: pred_state = TrafficLight.RED elif class_val == 2: pred_state = TrafficLight.YELLOW elif class_val == 3: pred_state = TrafficLight.GREEN #print("pred_state {}".format(pred_state)) #print("ground_truth {}".format(light.state)) #log_file.write(file_name + " pred = " + str(pred_state) + " truth = " + str(light.state) + "\n") #log_file.close() #Get classification #return self.light_classifier.get_classification(cv_image) return pred_state
def __init__(self, count, datagen, batch_size, num_label, init_states, seq_length, width, height, sort_by_duration=True, is_bi_graphemes=False, language="zh", zh_type="zi", partition="train", buckets=[], save_feature_as_csvfile=False, num_parts=1, part_index=0, noise_percent=0.4, fbank=False ): super(BucketSTTIter, self).__init__() self.maxLabelLength = num_label # global param self.batch_size = batch_size self.count = count self.num_label = num_label self.init_states = init_states self.init_state_arrays = [mx.nd.zeros(x[1]) for x in init_states] self.width = width self.height = height self.datagen = datagen self.label = None self.is_bi_graphemes = is_bi_graphemes self.language = language self.zh_type = zh_type self.num_parts = num_parts self.part_index = part_index self.noise_percent = noise_percent self.fbank = fbank # self.partition = datagen.partition if partition == 'train': durations = datagen.train_durations audio_paths = datagen.train_audio_paths texts = datagen.train_texts elif partition == 'validation': durations = datagen.val_durations audio_paths = datagen.val_audio_paths texts = datagen.val_texts elif partition == 'test': durations = datagen.test_durations audio_paths = datagen.test_audio_paths texts = datagen.test_texts else: raise Exception("Invalid partition to load metadata. " "Must be train/validation/test") log = LogUtil().getlogger() # if sortagrad if sort_by_duration: durations, audio_paths, texts = datagen.sort_by_duration(durations, audio_paths, texts) else: durations = durations audio_paths = audio_paths texts = texts self.trainDataList = list(zip(durations, audio_paths, texts)) # self.trainDataList = [d for index, d in enumerate(zip(durations, audio_paths, texts)) if index % self.num_parts == self.part_index] # log.info("partition: %s, num_works: %d, part_index: %d 's data size is %d of all size is %d" % # (partition, self.num_parts, self.part_index, len(self.trainDataList), len(durations))) self.trainDataIter = iter(self.trainDataList) self.is_first_epoch = True data_lengths = [int(d * 100) for d in durations] if len(buckets) == 0: buckets = [i for i, j in enumerate(np.bincount(data_lengths)) if j >= batch_size] if len(buckets) == 0: raise Exception( 'There is no valid buckets. It may occured by large batch_size for each buckets. max bincount:%d batch_size:%d' % ( max(np.bincount(data_lengths)), batch_size)) buckets.sort() ndiscard = 0 self.data = [[] for _ in buckets] for i, sent in enumerate(data_lengths): buck = bisect.bisect_left(buckets, sent) if buck == len(buckets): ndiscard += 1 continue self.data[buck].append(self.trainDataList[i]) if ndiscard != 0: print("WARNING: discarded %d sentences longer than the largest bucket." % ndiscard) # self.num_parts = 3 debug # self.part_index = 2 for index_buck, buck in enumerate(self.data): self.data[index_buck] = [d for index_d, d in enumerate( self.data[index_buck][:len(self.data[index_buck]) // self.num_parts * self.num_parts]) if index_d % self.num_parts == self.part_index] log.info("partition: %s, num_works: %d, part_index: %d %d's data size is %d " % (partition, self.num_parts, self.part_index, index_buck, len(self.data[index_buck]))) self.buckets = buckets self.nddata = [] self.ndlabel = [] self.default_bucket_key = max(buckets) self.idx = [] for i, buck in enumerate(self.data): self.idx.extend([(i, j) for j in range(0, len(buck) - batch_size + 1, batch_size)]) self.curr_idx = 0 if not self.fbank: self.provide_data = [('data', (self.batch_size, self.default_bucket_key, width * height))] + init_states else: self.provide_data = [('data', (self.batch_size, 3, self.default_bucket_key, 41))] + init_states self.provide_label = [('label', (self.batch_size, self.maxLabelLength))] self.save_feature_as_csvfile = save_feature_as_csvfile
y_8 = y_train_All[idx_8] idx_9 = np.array(np.where(y_train_All == 9)).T idx_9 = idx_9[0:2, 0] X_9 = X_train_All[idx_9, :, :, :] y_9 = y_train_All[idx_9] X_train = np.concatenate( (X_0, X_1, X_2, X_3, X_4, X_5, X_6, X_7, X_8, X_9), axis=0) y_train = np.concatenate( (y_0, y_1, y_2, y_3, y_4, y_5, y_6, y_7, y_8, y_9), axis=0) print('X_train shape:', X_train.shape) print(X_train.shape[0], 'train samples') print('Distribution of Training Classes:', np.bincount(y_train)) X_train = X_train.astype('float32') X_test = X_test.astype('float32') X_valid = X_valid.astype('float32') X_Pool = X_Pool.astype('float32') X_train /= 255 X_valid /= 255 X_Pool /= 255 X_test /= 255 Y_test = np_utils.to_categorical(y_test, nb_classes) Y_valid = np_utils.to_categorical(y_valid, nb_classes) Y_Pool = np_utils.to_categorical(y_Pool, nb_classes) #loss values in each experiment
def GetInterestPoints(groundTruth, geoTransform, imgNum, maxRows, maxCols, limits, railSegments, stepSize, patchSize): print "Getting interest Points" targetPoints = [] relevantPoints = [] iniRow = max(0, limits[0] - patchSize) iniRow -= iniRow % stepSize endRow = min(maxRows, limits[2] + patchSize) endRow += stepSize - (endRow % stepSize) iniCol = max(0, limits[1] - patchSize) iniCol -= iniCol % stepSize endCol = min(maxCols, limits[3] + patchSize) endCol += stepSize - (endCol % stepSize) print(iniRow, endRow, (endRow - iniRow) / stepSize) print(iniCol, endCol, (endCol - iniCol) / stepSize) for i in range(iniRow, endRow, stepSize): for j in range(iniCol, endCol, stepSize): #for i in range(0, maxRows, stepSize): #for j in range(0,maxCols, stepSize): #print (i,j) ulc, urc = j, min(j + patchSize, maxCols - 1) ulr = urr = i dlc, drc = ulc, urc dlr = drr = min(i + patchSize, maxRows - 1) # Se tem algum pixel de erosao if (np.any(groundTruth[ulr:dlr, ulc:urc])): count = np.bincount( (groundTruth[ulr:dlr, ulc:urc] > 0).astype(int).flatten()) if count[1] >= count[0]: targetPoints.append((imgNum, i, j)) else: relevantPoints.append((imgNum, i, j)) elif i > endRow or i < iniRow or j > endCol or j < iniCol: continue else: # Checa se o patch tem intersecao com a ferrovia ul = utils.PixelToCoordinate(geoTransform, (ulc, ulr)) ur = utils.PixelToCoordinate(geoTransform, (urc, urr)) dl = utils.PixelToCoordinate(geoTransform, (dlc, dlr)) dr = utils.PixelToCoordinate(geoTransform, (drc, drr)) ''' box = createPolygon([ul, ur, dr, dl]) rails.ResetReading() for feature in rails: railway = feature.GetGeometryRef() railway.Transform(transform) if box.Intersect(railway): #print (i, j) relevantPoints.append((i,j)) break ''' seg1 = (ul, ur) seg2 = (ur, dr) seg3 = (dr, dl) seg4 = (dl, ul) for segment in railSegments: if segmentIntersect(seg1, segment) or segmentIntersect( seg2, segment) or segmentIntersect( seg3, segment) or segmentIntersect( seg4, segment): relevantPoints.append((imgNum, i, j)) break return targetPoints, relevantPoints
def _paga_graph(adata, ax, solid_edges=None, dashed_edges=None, adjacency_solid=None, adjacency_dashed=None, transitions=None, threshold=None, root=0, colors=None, labels=None, fontsize=None, fontweight=None, text_kwds=None, node_size_scale=1, node_size_power=0.5, edge_width_scale=1, normalize_to_color='reference', title=None, pos=None, cmap=None, frameon=True, min_edge_width=None, max_edge_width=None, export_to_gexf=False, cax=None, colorbar=None, use_raw=True, cb_kwds={}, single_component=False, arrowsize=30): import networkx as nx node_labels = labels # rename for clarity if (node_labels is not None and isinstance(node_labels, str) and node_labels != adata.uns['paga']['groups']): raise ValueError( 'Provide a list of group labels for the PAGA groups {}, not {}.'. format(adata.uns['paga']['groups'], node_labels)) groups_key = adata.uns['paga']['groups'] if node_labels is None: node_labels = adata.obs[groups_key].cat.categories if (colors is None or colors == groups_key) and groups_key is not None: if (groups_key + '_colors' not in adata.uns or len(adata.obs[groups_key].cat.categories) != len( adata.uns[groups_key + '_colors'])): utils.add_colors_for_categorical_sample_annotation( adata, groups_key) colors = adata.uns[groups_key + '_colors'] for iname, name in enumerate(adata.obs[groups_key].cat.categories): if name in settings.categories_to_ignore: colors[iname] = 'grey' nx_g_solid = nx.Graph(adjacency_solid) if dashed_edges is not None: nx_g_dashed = nx.Graph(adjacency_dashed) # convert pos to dict if isinstance(pos, str): if not pos.endswith('.gdf'): raise ValueError( 'Currently only supporting reading positions from .gdf files.' 'Consider generating them using, for instance, Gephi.') s = '' # read the node definition from the file with open(pos) as f: f.readline() for line in f: if line.startswith('edgedef>'): break s += line from io import StringIO df = pd.read_csv(StringIO(s), header=-1) pos = df[[4, 5]].values pos_array = pos # convert to dictionary pos = {n: [p[0], p[1]] for n, p in enumerate(pos)} # uniform color if isinstance(colors, str) and is_color_like(colors): colors = [colors for c in range(len(node_labels))] # color degree of the graph if isinstance(colors, str) and colors.startswith('degree'): # see also tools.paga.paga_degrees if colors == 'degree_dashed': colors = [d for _, d in nx_g_dashed.degree(weight='weight')] elif colors == 'degree_solid': colors = [d for _, d in nx_g_solid.degree(weight='weight')] else: raise ValueError( '`degree` either "degree_dashed" or "degree_solid".') colors = (np.array(colors) - np.min(colors)) / (np.max(colors) - np.min(colors)) # plot gene expression var_names = adata.var_names if adata.raw is None else adata.raw.var_names if isinstance(colors, str) and colors in var_names: x_color = [] cats = adata.obs[groups_key].cat.categories for icat, cat in enumerate(cats): subset = (cat == adata.obs[groups_key]).values if adata.raw is not None and use_raw: adata_gene = adata.raw[:, colors] else: adata_gene = adata[:, colors] x_color.append(np.mean(adata_gene.X[subset])) colors = x_color # plot continuous annotation if (isinstance(colors, str) and colors in adata.obs and not is_categorical_dtype(adata.obs[colors])): x_color = [] cats = adata.obs[groups_key].cat.categories for icat, cat in enumerate(cats): subset = (cat == adata.obs[groups_key]).values x_color.append(adata.obs.loc[subset, colors].mean()) colors = x_color # plot categorical annotation if (isinstance(colors, str) and colors in adata.obs and is_categorical_dtype(adata.obs[colors])): from ... import utils as sc_utils asso_names, asso_matrix = sc_utils.compute_association_matrix_of_groups( adata, prediction=groups_key, reference=colors, normalization='reference' if normalize_to_color else 'prediction') utils.add_colors_for_categorical_sample_annotation(adata, colors) asso_colors = sc_utils.get_associated_colors_of_groups( adata.uns[colors + '_colors'], asso_matrix) colors = asso_colors if len(colors) < len(node_labels): print(node_labels, colors) raise ValueError( '`color` list need to be at least as long as `groups`/`node_labels` list.' ) # count number of connected components n_components, labels = scipy.sparse.csgraph.connected_components( adjacency_solid) if n_components > 1 and not single_component: logg.debug( 'Graph has more than a single connected component. ' 'To restrict to this component, pass `single_component=True`.') if n_components > 1 and single_component: component_sizes = np.bincount(labels) largest_component = np.where( component_sizes == component_sizes.max())[0][0] adjacency_solid = adjacency_solid.tocsr()[labels == largest_component, :] adjacency_solid = adjacency_solid.tocsc()[:, labels == largest_component] colors = np.array(colors)[labels == largest_component] node_labels = np.array(node_labels)[labels == largest_component] cats_dropped = adata.obs[groups_key].cat.categories[ labels != largest_component].tolist() logg.info( 'Restricting graph to largest connected component by dropping categories\n' f'{cats_dropped}') nx_g_solid = nx.Graph(adjacency_solid) if dashed_edges is not None: raise ValueError( '`single_component` only if `dashed_edges` is `None`.') # edge widths base_edge_width = edge_width_scale * 5 * rcParams['lines.linewidth'] # draw dashed edges if dashed_edges is not None: widths = [x[-1]['weight'] for x in nx_g_dashed.edges(data=True)] widths = base_edge_width * np.array(widths) if max_edge_width is not None: widths = np.clip(widths, None, max_edge_width) nx.draw_networkx_edges(nx_g_dashed, pos, ax=ax, width=widths, edge_color='grey', style='dashed', alpha=0.5) # draw solid edges if transitions is None: widths = [x[-1]['weight'] for x in nx_g_solid.edges(data=True)] widths = base_edge_width * np.array(widths) if min_edge_width is not None or max_edge_width is not None: widths = np.clip(widths, min_edge_width, max_edge_width) with warnings.catch_warnings(): warnings.simplefilter("ignore") nx.draw_networkx_edges(nx_g_solid, pos, ax=ax, width=widths, edge_color='black') # draw directed edges else: adjacency_transitions = adata.uns['paga'][transitions].copy() if threshold is None: threshold = 0.01 adjacency_transitions.data[adjacency_transitions.data < threshold] = 0 adjacency_transitions.eliminate_zeros() g_dir = nx.DiGraph(adjacency_transitions.T) widths = [x[-1]['weight'] for x in g_dir.edges(data=True)] widths = base_edge_width * np.array(widths) if min_edge_width is not None or max_edge_width is not None: widths = np.clip(widths, min_edge_width, max_edge_width) nx.draw_networkx_edges(g_dir, pos, ax=ax, width=widths, edge_color='black', arrowsize=arrowsize) if export_to_gexf: if isinstance(colors[0], tuple): from matplotlib.colors import rgb2hex colors = [rgb2hex(c) for c in colors] for count, n in enumerate(nx_g_solid.nodes()): nx_g_solid.node[count]['label'] = str(node_labels[count]) nx_g_solid.node[count]['color'] = str(colors[count]) nx_g_solid.node[count]['viz'] = { 'position': { 'x': 1000 * pos[count][0], 'y': 1000 * pos[count][1], 'z': 0 } } filename = settings.writedir / 'paga_graph.gexf' logg.warning(f'exporting to {filename}') settings.writedir.mkdir(parents=True, exist_ok=True) nx.write_gexf(nx_g_solid, settings.writedir / 'paga_graph.gexf') ax.set_frame_on(frameon) ax.set_xticks([]) ax.set_yticks([]) # groups sizes if groups_key is not None and groups_key + '_sizes' in adata.uns: groups_sizes = adata.uns[groups_key + '_sizes'] else: groups_sizes = np.ones(len(node_labels)) base_scale_scatter = 2000 base_pie_size = (base_scale_scatter / (np.sqrt(adjacency_solid.shape[0]) + 10) * node_size_scale) median_group_size = np.median(groups_sizes) groups_sizes = base_pie_size * np.power(groups_sizes / median_group_size, node_size_power) if fontsize is None: fontsize = rcParams['legend.fontsize'] # usual scatter plot if not isinstance(colors[0], dict): n_groups = len(pos_array) sct = ax.scatter(pos_array[:, 0], pos_array[:, 1], c=colors[:n_groups], edgecolors='face', s=groups_sizes, cmap=cmap) for count, group in enumerate(node_labels): ax.text(pos_array[count, 0], pos_array[count, 1], group, verticalalignment='center', horizontalalignment='center', size=fontsize, fontweight=fontweight, **text_kwds) # else pie chart plot else: # start with this dummy plot... otherwise strange behavior sct = ax.scatter(pos_array[:, 0], pos_array[:, 1], c='white', edgecolors='face', s=groups_sizes, cmap=cmap) trans = ax.transData.transform bbox = ax.get_position().get_points() ax_x_min = bbox[0, 0] ax_x_max = bbox[1, 0] ax_y_min = bbox[0, 1] ax_y_max = bbox[1, 1] ax_len_x = ax_x_max - ax_x_min ax_len_y = ax_y_max - ax_y_min trans2 = ax.transAxes.inverted().transform pie_axs = [] for count, n in enumerate(nx_g_solid.nodes()): pie_size = groups_sizes[count] / base_scale_scatter x1, y1 = trans(pos[n]) # data coordinates xa, ya = trans2((x1, y1)) # axis coordinates xa = ax_x_min + (xa - pie_size / 2) * ax_len_x ya = ax_y_min + (ya - pie_size / 2) * ax_len_y # clip, the fruchterman layout sometimes places below figure if ya < 0: ya = 0 if xa < 0: xa = 0 pie_axs.append( pl.axes([xa, ya, pie_size * ax_len_x, pie_size * ax_len_y], frameon=False)) pie_axs[count].set_xticks([]) pie_axs[count].set_yticks([]) if not isinstance(colors[count], dict): raise ValueError( '{} is neither a dict of valid matplotlib colors ' 'nor a valid matplotlib color.'.format(colors[count])) color_single = colors[count].keys() fracs = [colors[count][c] for c in color_single] if sum(fracs) < 1: color_single = list(color_single) color_single.append('grey') fracs.append(1 - sum(fracs)) pie_axs[count].pie(fracs, colors=color_single) if node_labels is not None: for ia, a in enumerate(pie_axs): a.text(0.5, 0.5, node_labels[ia], verticalalignment='center', horizontalalignment='center', transform=a.transAxes, size=fontsize, fontweight=fontweight, **text_kwds) return sct
def predict(self, X): predictions = np.asarray([clf.predict(X) for clf in self.classifier_array]).T maj = np.apply_along_axis(lambda x: np.argmax(np.bincount(x)), axis=1, arr=predictions) maj = self.label_encoder.inverse_transform(maj) return maj
import script_chdir import numpy as np import results.plots as lplot import matplotlib.pyplot as plt from hybrid_model.dataset import get_dataset from hybrid_model.index_sampler import IndexSamplerUserItembased as IndexSampler dataset = get_dataset('ml100k') (inds_u, inds_i, y, users_features, items_features) = dataset.data user_dist = np.bincount(inds_u, minlength=dataset.n_users) item_dist = np.bincount(inds_i, minlength=dataset.n_items) order_users = np.argsort(-user_dist) order_items = np.argsort(-item_dist) dist_users = user_dist[order_users] dist_items = item_dist[order_items] inds_u = np.argsort(order_users)[inds_u] inds_i = np.argsort(order_items)[inds_i] # Index sampling sampler_config = {'f_cf': 0.15, 'min_ratings_user': 30, 'f_user': 3.0, 'min_ratings_item': 10, 'f_item': 3.0} sampler = IndexSampler(dist_users, dist_items, sampler_config, [inds_u, inds_i]) from_cf = sampler.get_indices_from_cf() from_md = sampler.get_indices_from_md() from_cf = (from_cf[0].flatten(), from_cf[1].flatten()) from_md = (from_md[0].flatten(), from_md[1].flatten())
def fast_hist(a, b, n): k = (a >= 0) & (a < n) return np.bincount(n * a[k].astype(int) + b[k], minlength=n**2).reshape(n, n)
def createPatches(imgs, gts, batch, crop_size, band_count, test=False, debug=False): if not test: print " ------------------ Creating Patches -------------------" patches = [] patchesclass = [] wd = int(floor(crop_size / 2)) i = 0 while i < len(batch): p = np.asarray(batch[i], dtype=int) m = p[0] maxRows = imgs[m].shape[1] maxCols = imgs[m].shape[2] patch = np.zeros((band_count, crop_size, crop_size), dtype=imgs[m].dtype) gtpatch = np.zeros((crop_size, crop_size), dtype=gts[m].dtype) ulc, urc = p[2], min(p[2] + crop_size, maxCols - 1) ulr = urr = p[1] dlc, drc = ulc, urc dlr = drr = min(p[1] + crop_size, maxRows - 1) patch = imgs[m][:, ulr:dlr, ulc:urc] gtpatch = gts[m][ulr:dlr, ulc:urc] #lMin = max(0, p[1] - wd) #lMax = min(p[1] + wd, imgs[m].shape[1] - 1) #cMin = max(0, p[2] - wd) #cMax = min(p[2] + wd, imgs[m].shape[2] - 1) # print(lMin, lMax) # print(cMin, cMax) # for b in range(band_count): # for l in range(lMin, lMax + 1): # for c in range(cMin, cMax + 1): # patch[b][l - lMin][c - cMin] = imgs[m][b][l][c] # gtpatch[l - lMin][c - cMin] = gts[m][l][c] patches.append(np.moveaxis(patch, 0, -1)) #gtMax = max(1, np.amax(gtpatch)) #gtpatch /= gtMax gtpatch = (gtpatch > 0).astype(int) if debug: print(m, maxRows, maxCols) print(ulr, ulc, dlr, dlc, drr, drc, urr, urc) print gtpatch.shape print gtpatch count = np.bincount(gtpatch.flatten()) print count if count[0] != crop_size * crop_size: print "Non-erosion: {0} | Erosion: {1}".format( count[0], count[1]) else: print "Non-erosion: {0}".format(count[0]) patchesclass.append(gtpatch) if not test: if i % 10 == 0: print "{0} patches out of {1} done.".format(i, len(batch)) i += 1 if not test: print " -------------- Finished Creating Patches --------------" return np.asarray(patches), np.asarray(patchesclass)
if col != 0: inferred_network.append([int(i), int(j), col]) if dataset != 0 and plot: DG.add_weighted_edges_from([(int(j),int(i),col)]) np.savetxt(inferredNetworkFileName, inferred_network, delimiter=",") # Check which nodes are connected for k in range(N): if np.array(np.where(np.array(inferred_network) == k)).size == 0: not_connected.append(k) if plot: senders = np.array(DG.edges)[:,0] senders_count = np.bincount(senders,minlength=N) receivers = np.array(DG.edges)[:,1] receivers_count = np.bincount(receivers,minlength=N) count = senders_count - receivers_count # Plot Indegree plt.figure() bins = np.linspace(0, 15, 12) plt.hist(senders_count,bins, label='outdegree', color='blue', alpha=0.5) plt.hist(receivers_count,bins, label='indegree', color='red', alpha=0.5) plt.xlabel('number of connections') plt.ylabel('number of occurrences') plt.title('Indegree and outdegree of the biological neural network') plt.grid() plt.legend() plt.savefig('plot/degree_histogram_real_network.pdf', dpi=300)
def pick_largest_cc(traversible): out = scipy.ndimage.label(traversible)[0] cnt = np.bincount(out.reshape(-1))[1:] return out == np.argmax(cnt) + 1
def part1(jolts): # number of 1-step and 3-step differences multiplied bins = np.bincount(np.diff(jolts)) return bins[1] * bins[3]
- repo: https://github.com/marofmar/60daysofudacity/blob/master/Day20_review_PATE.py - OMG...? please,,, at leat github should work in here. ''' import numpy as np num_teachers = 10 num_examples = 10000 num_labels = 10 preds = (np.random.rand(num_teachers, num_examples) * num_labels).astype(int).transpose(1,0) new_labels = list() for an_image in preds: label_counts = np.bincount(an_image, minlength = num_labels) epsilon = 0.1 beta = 1/ epsilon for i in range(len(label_counts)): label_counts[i] += np.random.laplace(0, beta, 1) nes_label = np.argmax(label_counts) new_labels.append(new_label) labels = np.array([9,9,3,6,9,9,9,9,8,2]) counts = np.bincount(lables, minlength = 10) query_result = np.argmax(counts) print(query_result)
def aggregate_vote(preds): counts = np.bincount(preds) max_vote = np.argmax(counts) return max_vote
def correct_error(data_flip, data_e_flip, c_out, c_out_flip): s_out = np.zeros(k_ols, dtype=int) for i in range(16): s_out[i] = c_out[i] ^ c_out_flip[i] # ols d_ols_c = np.zeros(k_ols, dtype=int) # 1st 4 bits d1_s = np.array([s_out[0], s_out[4], s_out[8], s_out[12]]) majority = np.argmax(np.bincount(d1_s)) d_ols_c[0] = majority ^ data_flip[0] d2_s = np.array([s_out[0], s_out[5], s_out[9], s_out[13]]) majority = np.argmax(np.bincount(d2_s)) d_ols_c[1] = majority ^ data_flip[1] d3_s = np.array([s_out[0], s_out[6], s_out[10], s_out[14]]) majority = np.argmax(np.bincount(d3_s)) d_ols_c[2] = majority ^ data_flip[2] d4_s = np.array([s_out[0], s_out[7], s_out[11], s_out[15]]) majority = np.argmax(np.bincount(d4_s)) d_ols_c[3] = majority ^ data_flip[3] # 2nd 4 bits d5_s = np.array([s_out[1], s_out[4], s_out[9], s_out[14]]) majority = np.argmax(np.bincount(d5_s)) d_ols_c[4] = majority ^ data_flip[4] d6_s = np.array([s_out[1], s_out[5], s_out[8], s_out[15]]) majority = np.argmax(np.bincount(d6_s)) d_ols_c[5] = majority ^ data_flip[5] d7_s = np.array([s_out[1], s_out[6], s_out[11], s_out[12]]) majority = np.argmax(np.bincount(d7_s)) d_ols_c[6] = majority ^ data_flip[6] d8_s = np.array([s_out[1], s_out[5], s_out[10], s_out[13]]) majority = np.argmax(np.bincount(d8_s)) d_ols_c[7] = majority ^ data_flip[7] # 3rd 4 bits d9_s = np.array([s_out[2], s_out[4], s_out[10], s_out[15]]) majority = np.argmax(np.bincount(d9_s)) d_ols_c[8] = majority ^ data_flip[8] d10_s = np.array([s_out[2], s_out[5], s_out[11], s_out[14]]) majority = np.argmax(np.bincount(d10_s)) d_ols_c[9] = majority ^ data_flip[9] d11_s = np.array([s_out[2], s_out[6], s_out[8], s_out[13]]) majority = np.argmax(np.bincount(d11_s)) d_ols_c[10] = majority ^ data_flip[10] d12_s = np.array([s_out[2], s_out[7], s_out[9], s_out[12]]) majority = np.argmax(np.bincount(d12_s)) d_ols_c[11] = majority ^ data_flip[11] # last 4 bits d13_s = np.array([s_out[3], s_out[4], s_out[11], s_out[13]]) majority = np.argmax(np.bincount(d13_s)) d_ols_c[12] = majority ^ data_flip[12] d14_s = np.array([s_out[3], s_out[5], s_out[10], s_out[12]]) majority = np.argmax(np.bincount(d14_s)) d_ols_c[13] = majority ^ data_flip[13] d15_s = np.array([s_out[3], s_out[6], s_out[9], s_out[15]]) majority = np.argmax(np.bincount(d15_s)) d_ols_c[14] = majority ^ data_flip[14] d16_s = np.array([s_out[3], s_out[7], s_out[14], s_out[15]]) majority = np.argmax(np.bincount(d16_s)) d_ols_c[15] = majority ^ data_flip[15] ################################################ # sec-ded de_sec_ded_c = np.zeros(k_ols, dtype=int) # 1st 4 bits de1_s = s_out[0] and s_out[1] and s_out[2] and not s_out[3] de_sec_ded_c[0] = de1_s ^ data_e_flip[0] de2_s = s_out[0] and s_out[1] and s_out[3] and not s_out[2] de_sec_ded_c[1] = de2_s ^ data_e_flip[1] de3_s = s_out[0] and s_out[2] and s_out[3] and not s_out[1] de_sec_ded_c[2] = de3_s ^ data_e_flip[2] de4_s = s_out[1] and s_out[2] and s_out[3] and not s_out[0] de_sec_ded_c[3] = de4_s ^ data_e_flip[3] # 2nd 4 bits de5_s = s_out[4] and s_out[5] and s_out[6] and not s_out[7] de_sec_ded_c[4] = de5_s ^ data_e_flip[4] de6_s = s_out[4] and s_out[5] and s_out[7] and not s_out[6] de_sec_ded_c[5] = de6_s ^ data_e_flip[5] de7_s = s_out[4] and s_out[6] and s_out[7] and not s_out[5] de_sec_ded_c[6] = de7_s ^ data_e_flip[6] de8_s = s_out[5] and s_out[6] and s_out[7] and not s_out[4] de_sec_ded_c[7] = de8_s ^ data_e_flip[7] # 3rd 4 bits de9_s = s_out[8] and s_out[9] and s_out[10] and not s_out[11] de_sec_ded_c[8] = de9_s ^ data_e_flip[8] de10_s = s_out[8] and s_out[9] and s_out[11] and not s_out[10] de_sec_ded_c[9] = de10_s ^ data_e_flip[9] de11_s = s_out[8] and s_out[10] and s_out[11] and not s_out[9] de_sec_ded_c[10] = de11_s ^ data_e_flip[10] de12_s = s_out[9] and s_out[10] and s_out[11] and not s_out[8] de_sec_ded_c[11] = de12_s ^ data_e_flip[11] # Last 4 bits de13_s = s_out[12] and s_out[13] and s_out[14] and not s_out[15] de_sec_ded_c[12] = de13_s ^ data_e_flip[12] de14_s = s_out[12] and s_out[13] and s_out[15] and not s_out[14] de_sec_ded_c[13] = de14_s ^ data_e_flip[13] de15_s = s_out[12] and s_out[14] and s_out[15] and not s_out[13] de_sec_ded_c[14] = de15_s ^ data_e_flip[14] de16_s = s_out[13] and s_out[14] and s_out[15] and not s_out[12] de_sec_ded_c[15] = de16_s ^ data_e_flip[15] return d_ols_c, de_sec_ded_c
def mod(self, val): bincount = np.bincount(val) return np.argmax(bincount)
def onehot(data, min_length): return np.bincount(data, minlength=min_length)