Exemple #1
0
def PredictionMatrix():
    print('Loading valid data...')
    (X_valid, y_valid, the_images) = LARGEload.load_valid_data()
    X_valid = X_valid.astype("float32")
    X_valid /= 255
    print('Generating prediction...')
    print ("The actual values are:")
    #print (y_valid)
    y = np.bincount(y_valid)
    ii = np.nonzero(y)[0]
    print (zip(ii,y[ii]))
    y_pred = model.predict_classes(X_valid, batch_size=20)
    #print (y_pred)
    y = np.bincount(y_pred)
    ii = np.nonzero(y)[0]
    print ("The predicted values are:")
    print (zip(ii,y[ii]))
    print ("Our confusion matrix is:")
    cm = confusion_matrix(y_valid, y_pred)
    print(cm)
    plot_confusion_matrix(cm, weights=save_name[:-5])
    # how many samples match the ground truth validation labels ?
    correct = np.sum(y_pred == y_valid)
    print ("Number of correct classifications is %d/5000" % (correct))
    # accuracy = number correct / total number
    accuracy = float(correct) / 5000
    print ("Accuracy of %f" % accuracy)
    return accuracy, correct
Exemple #2
0
def splat(t, value, bins):
    """put value into bins according to t
       the points are assumed to be describing a continuum field,
       if two points have the same position, they are merged into one point

       for points crossing the edge part is added to the left bin
       and part is added to the right bin.
       the sum is conserved.
    """
    if len(t) == 0:
        return numpy.zeros(len(bins) + 1)
    t = numpy.float64(t)
    t, label = numpy.unique(t, return_inverse=True)
    if numpy.isscalar(value):
        value = numpy.bincount(label) * value
    else:
        value = numpy.bincount(label, weights=value)
    edge = numpy.concatenate(([t[0]], (t[1:] + t[:-1]) * 0.5, [t[-1]]))
    dig = numpy.digitize(edge, bins)
    #use the right edge as the reference
    ref = bins[dig[1:] - 1]
    norm = (edge[1:] - edge[:-1])
    assert ((edge[1:] - edge[:-1]) > 0).all()
    norm = 1 / norm
    weightleft = -(edge[:-1] - ref) * norm
    weightright = (edge[1:] - ref) * norm
    # when dig < 1 or dig >= len(bins), t are out of bounds and does not
    # contribute.
    l = numpy.bincount(dig[:-1], value * weightleft, minlength=len(bins)+1)
    r = numpy.bincount(dig[1:], value * weightright, minlength=len(bins)+1)
    return l + r
Exemple #3
0
def hist_from_snapshots(rpt = 10):
#  hist_all = np.zeros(256,dtype=int)
  hist1 = np.zeros(256,dtype=int)
  hist2 = np.zeros(256,dtype=int)
  hist3 = np.zeros(256,dtype=int)
  hist4 = np.zeros(256,dtype=int)
  for i in range(rpt):
    snap=adc5g.get_snapshot(roach2, snap_name, man_trig=True, wait_period=2)
    snap = 128 + np.array(snap)
#    hist = np.bincount(snap, minlength=256)
#    hist_all += hist
    hist = np.bincount(snap[0:: 4], minlength=256)
    hist1 += hist
    hist = np.bincount(snap[1:: 4], minlength=256)
    hist2 += hist
    hist = np.bincount(snap[2:: 4], minlength=256)
    hist3 += hist
    hist = np.bincount(snap[3:: 4], minlength=256)
    hist4 += hist
  data=np.column_stack((np.arange(-128., 128, dtype=int), hist1, hist2,
      hist3, hist4))
  np.savetxt("hist_cores", data, fmt=("%d"))
#  print "all ",np.sum(hist_all[0:128]), np.sum(hist_all[128:256])
  print "core a  ",np.sum(hist1[0:128]), np.sum(hist1[129:256])
  print "core b  ",np.sum(hist3[0:128]), np.sum(hist3[129:256])
  print "core c  ",np.sum(hist2[0:128]), np.sum(hist2[129:256])
  print "core d  ",np.sum(hist4[0:128]), np.sum(hist4[129:256])
def test_stratified_shuffle_split_iter():
    ys = [np.array([1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3]),
          np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]),
          np.array([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2] * 2),
          np.array([1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]),
          np.array([-1] * 800 + [1] * 50)
          ]

    for y in ys:
        sss = cval.StratifiedShuffleSplit(y, 6, test_size=0.33,
                                          random_state=0)
        test_size = np.ceil(0.33 * len(y))
        train_size = len(y) - test_size
        for train, test in sss:
            assert_array_equal(np.unique(y[train]), np.unique(y[test]))
            # Checks if folds keep classes proportions
            p_train = (np.bincount(np.unique(y[train],
                                   return_inverse=True)[1]) /
                       float(len(y[train])))
            p_test = (np.bincount(np.unique(y[test],
                                  return_inverse=True)[1]) /
                      float(len(y[test])))
            assert_array_almost_equal(p_train, p_test, 1)
            assert_equal(len(train) + len(test), y.size)
            assert_equal(len(train), train_size)
            assert_equal(len(test), test_size)
            assert_array_equal(np.lib.arraysetops.intersect1d(train, test), [])
Exemple #5
0
def check_min_samples_leaf(name):
    X, y = hastie_X, hastie_y

    # Test if leaves contain more than leaf_count training examples
    ForestEstimator = FOREST_ESTIMATORS[name]

    # test boundary value
    assert_raises(ValueError,
                  ForestEstimator(min_samples_leaf=-1).fit, X, y)
    assert_raises(ValueError,
                  ForestEstimator(min_samples_leaf=0).fit, X, y)

    est = ForestEstimator(min_samples_leaf=5, n_estimators=1, random_state=0)
    est.fit(X, y)
    out = est.estimators_[0].tree_.apply(X)
    node_counts = np.bincount(out)
    # drop inner nodes
    leaf_count = node_counts[node_counts != 0]
    assert_greater(np.min(leaf_count), 4,
                   "Failed with {0}".format(name))

    est = ForestEstimator(min_samples_leaf=0.25, n_estimators=1,
                          random_state=0)
    est.fit(X, y)
    out = est.estimators_[0].tree_.apply(X)
    node_counts = np.bincount(out)
    # drop inner nodes
    leaf_count = node_counts[node_counts != 0]
    assert_greater(np.min(leaf_count), len(X) * 0.25 - 1,
                   "Failed with {0}".format(name))
Exemple #6
0
    def test_bincountOp(self):
        w = T.vector('w')
        for dtype in ('int8', 'int16', 'int32', 'int64',
                      'uint8', 'uint16', 'uint32', 'uint64'):
            # uint64 always fails
            # int64 and uint32 also fail if python int are 32-bit
            int_bitwidth = theano.gof.python_int_bitwidth()
            if int_bitwidth == 64:
                numpy_unsupported_dtypes = ('uint64',)
            if int_bitwidth == 32:
                numpy_unsupported_dtypes = ('uint32', 'int64', 'uint64')

            x = T.vector('x', dtype=dtype)

            if dtype in numpy_unsupported_dtypes:
                self.assertRaises(TypeError, bincount, x)

            else:
                a = np.random.random_integers(50, size=(25)).astype(dtype)
                weights = np.random.random((25,)).astype(config.floatX)

                f1 = theano.function([x], bincount(x))
                f2 = theano.function([x, w], bincount(x, weights=w))

                assert (np.bincount(a) == f1(a)).all()
                assert np.allclose(np.bincount(a, weights=weights),
                                   f2(a, weights))
                if not numpy_16:
                    continue
                f3 = theano.function([x], bincount(x, minlength=23))
                f4 = theano.function([x], bincount(x, minlength=5))
                assert (np.bincount(a, minlength=23) == f3(a)).all()
                assert (np.bincount(a, minlength=5) == f4(a)).all()
Exemple #7
0
 def stats(self, out_counts, out_adj, adj_index = string.ascii_letters + ' '):
   """Given two input arrays this adds to them the statistics of the contained text. The first array is of length 256, and counts the instances of character codes. The second array is 2D, with ['a', 'b'] being the number of times a 'b' follows an 'a'. It is indexed by adj_index however, and character pairs that contain a character not included are not counted."""
   
   # Counts are relativly easy - convert and histogram...
   text_codes = numpy.fromstring(self.text.encode('utf8'), dtype=numpy.uint8)
   out_counts += numpy.bincount(text_codes, minlength=256)
   
   # Adjacencies require a little more sneakyness...
   # First convert the codes array into an index into the adj_index, with entrys that are not in it set to -1...
   adj_codes = numpy.fromstring(adj_index, dtype=numpy.uint8)
   
   cap = len(adj_index) * len(adj_index)
   conversion = numpy.empty(256, dtype=numpy.int64)
   conversion[:] = cap
   conversion[adj_codes] = numpy.arange(adj_codes.shape[0])
   
   text_codes = conversion[text_codes]
   
   # Now take adjacent pairs, and calculate the 1D index in out_adj matrix...
   pos = (text_codes[:-1] * len(adj_index)) + text_codes[1:]
   
   # Lose values that are too large - they are pairs we do not record...
   pos = pos[pos < cap]
   
   # Histogram and sum into the adjacency matrix...
   if pos.shape[0]>0:
     out_adj += numpy.bincount(pos, minlength=cap).reshape((len(adj_index),len(adj_index)))
Exemple #8
0
    def reproject(self, nj_obj, field):
        """Reproject a field of another njord inst. to the current grid"""
        if not hasattr(self,'nj_ivec'):
            self.add_njijvec(nj_obj)
        field = getattr(nj_obj, field) if type(field) is str else field
        
        if hasattr(nj_obj, 'tvec') and (len(nj_obj.tvec) == field.shape[0]):
            newfield = np.zeros(nj_obj.tvec.shape + self.llat.shape)
            for tpos in range(len(nj_obj.tvec)):
                newfield[tpos,:,:] = self.reproject(nj_obj, field[tpos,...])
            return newfield

        di = self.i2 - self.i1
        dj = self.j2 - self.j1
        xy = np.vstack((self.nj_jvec, self.nj_ivec))
        if type(field) == str:
            weights = np.ravel(nj_obj.__dict__[field])[self.nj_mask]
        else:
            weights = np.ravel(field)[self.nj_mask]
        mask = ~np.isnan(weights) 
        flat_coord = np.ravel_multi_index(xy[:,mask],(dj, di))
        sums = np.bincount(flat_coord, weights[mask])
        cnts = np.bincount(flat_coord)
        fld = np.zeros((dj, di)) * np.nan
        fld.flat[:len(sums)] = sums.astype(np.float)/cnts
        try:
            self.add_landmask()
            fld[self.landmask] = np.nan
        except:
            print "Couldn't load landmask for %s" % self.projname
        return fld
    def update_nogrid(self, params):

        endog = self.model.endog_li
        cached_means = self.model.cached_means
        varfunc = self.model.family.variance

        dep_params = np.zeros(self.max_lag + 1)
        dn = np.zeros(self.max_lag + 1)
        for i in range(self.model.num_group):

            expval, _ = cached_means[i]
            stdev = np.sqrt(varfunc(expval))
            resid = (endog[i] - expval) / stdev

            j1, j2 = np.tril_indices(len(expval))
            dx = np.abs(self.time[i][j1] - self.time[i][j2])
            ii = np.flatnonzero(dx <= self.max_lag)
            j1 = j1[ii]
            j2 = j2[ii]
            dx = dx[ii]

            vs = np.bincount(dx, weights=resid[
                             j1] * resid[j2], minlength=self.max_lag + 1)
            vd = np.bincount(dx, minlength=self.max_lag + 1)

            ii = np.flatnonzero(vd > 0)
            dn[ii] += 1
            if len(ii) > 0:
                dep_params[ii] += vs[ii] / vd[ii]

        dep_params /= dn
        self.dep_params = dep_params[1:] / dep_params[0]
Exemple #10
0
def test_shc_semi_supervised_scoring_data_affinity():
    """Test semi-supervised learning for SHC when scoring_data='affinity'."""
    # Passing feature matrix
    X1, y1 = generate_data(supervised=True, affinity=False)

    def _scoring1(X_affinity, labels_true, labels_pred):
        assert X_affinity.shape[0] == X_affinity.shape[1]
        assert X_affinity.shape != X1.shape
        score = b3_f_score(labels_true, labels_pred)
        return score

    clusterer = ScipyHierarchicalClustering(scoring=_scoring1,
                                            scoring_data="affinity",
                                            affinity=euclidean_distances)
    clusterer.fit(X1, y1)
    labels = clusterer.labels_
    assert_array_equal([25, 25, 25, 25], np.bincount(labels))

    # Passing affinity matrix
    X2, y2 = generate_data(supervised=True, affinity=True)

    def _scoring2(X_affinity, labels_true, labels_pred):
        assert X_affinity.shape[0] == X_affinity.shape[1]
        assert X_affinity.shape == X2.shape
        score = b3_f_score(labels_true, labels_pred)
        return score

    clusterer = ScipyHierarchicalClustering(scoring=_scoring2,
                                            scoring_data="affinity",
                                            affinity="precomputed")
    clusterer.fit(X2, y2)
    labels = clusterer.labels_
    assert_array_equal([25, 25, 25, 25], np.bincount(labels))
def test_stratified_shuffle_split_iter():
    ys = [np.array([1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3]),
          np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]),
          np.array([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2] * 2),
          np.array([1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]),
          np.array([-1] * 800 + [1] * 50),
          np.concatenate([[i] * (100 + i) for i in range(11)]),
          [1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3],
          ['1', '1', '1', '1', '2', '2', '2', '3', '3', '3', '3', '3'],
          ]

    for y in ys:
        sss = StratifiedShuffleSplit(6, test_size=0.33,
                                     random_state=0).split(np.ones(len(y)), y)
        y = np.asanyarray(y)  # To make it indexable for y[train]
        # this is how test-size is computed internally
        # in _validate_shuffle_split
        test_size = np.ceil(0.33 * len(y))
        train_size = len(y) - test_size
        for train, test in sss:
            assert_array_equal(np.unique(y[train]), np.unique(y[test]))
            # Checks if folds keep classes proportions
            p_train = (np.bincount(np.unique(y[train],
                                   return_inverse=True)[1]) /
                       float(len(y[train])))
            p_test = (np.bincount(np.unique(y[test],
                                  return_inverse=True)[1]) /
                      float(len(y[test])))
            assert_array_almost_equal(p_train, p_test, 1)
            assert_equal(len(train) + len(test), y.size)
            assert_equal(len(train), train_size)
            assert_equal(len(test), test_size)
            assert_array_equal(np.lib.arraysetops.intersect1d(train, test), [])
Exemple #12
0
def display_roc():
    thresholds = np.linspace(0, 1, 21)
    for hash_name in hash_names:
        tpr = []
        fpr = []
        with open(hash_name + ".same", 'r+b') as f:
            same_family_dm = np.array(cPickle.load(f))
        same_family_uniqw, same_family_inverse = np.unique(same_family_dm, return_inverse=True)
        same_family_dmlist = dict(zip(same_family_uniqw, np.bincount(same_family_inverse)))
        with open(hash_name + ".diff", 'r+b') as f:
            diff_family_dm = np.array(cPickle.load(f))
        diff_family_uniqw, diff_family_inverse = np.unique(diff_family_dm, return_inverse=True)
        diff_family_dmlist = dict(zip(diff_family_uniqw, np.bincount(diff_family_inverse)))
        for threshold in thresholds:
            tp = fp = 0
            for dm in same_family_dmlist:
                if dm <= threshold:
                    tp += same_family_dmlist[dm]
            for dm in diff_family_dmlist:
                if dm <= threshold:
                    fp += diff_family_dmlist[dm]
            tpr.append(tp*1.0/same_family_dm.size)
            fpr.append(fp*1.0/diff_family_dm.size)
        print sm.auc(fpr, tpr)
        print "Fuzzy hashing algorithm: %s, AUC: %f" %(hash_name, sm.auc(fpr, tpr))
        plt.figure(0)
        plt.plot(fpr, tpr, label=hash_name)
        plt.ylim(0.75, 1)
        plt.legend(loc='best')
        plt.title("ROC curve for different algorithms")
        plt.xlabel("False posive rate")
        plt.ylabel("True posive rate")
    plt.show()
Exemple #13
0
def relaxation(nodes, links):
    """ Gauss-Seidel relaxation for links """

    sources_idx = links["source"]
    targets_idx = links["target"]
    sources = nodes[sources_idx]
    targets = nodes[targets_idx]
    distances = links["distance"]
    strengths = links["strength"]

    D = targets["position"] - sources["position"]
    L = np.sqrt((D * D).sum(axis=1))

    # This avoid to test L != 0 (I = np.where(L>0))
    L = np.where(L, L, np.NaN)
    L = strengths * (L - distances) / L

    # Replace nan by 0, i.e. where L was 0
    L = np.nan_to_num(L)

    D *= L.reshape(len(L), 1)
    K = sources["weight"] / (sources["weight"] + targets["weight"])
    K = K.reshape(len(K), 1)

    # Note that a direct  nodes['position'][links['source']] += K*D*(1-F)
    # would't work as expected because of repeated indices
    F = nodes["fixed"][sources_idx].reshape(len(links), 1)
    W = K * D * (1 - F) * 0.1
    nodes["position"][:, 0] += np.bincount(sources_idx, W[:, 0], minlength=len(nodes))
    nodes["position"][:, 1] += np.bincount(sources_idx, W[:, 1], minlength=len(nodes))

    F = nodes["fixed"][targets_idx].reshape(len(links), 1)
    W = (1 - K) * D * (1 - F) * 0.1
    nodes["position"][:, 0] -= np.bincount(targets_idx, W[:, 0], minlength=len(nodes))
    nodes["position"][:, 1] -= np.bincount(targets_idx, W[:, 1], minlength=len(nodes))
Exemple #14
0
def _make_cm(X,M,R):
    N = len(X)

    # we pregenerate all indices
    
    i_idx,j_idx  = np.triu_indices(N - M)
    
    # We start by making Cm
    Em = _embed_seq(X, 1, M)
    dif =  np.abs(Em[i_idx] - Em[j_idx])
    max_dist = np.max(dif, 1)
    inrange_cm = max_dist <= R


    in_range_i = i_idx[inrange_cm]
    in_range_j = j_idx[inrange_cm]


    Cm = np.bincount(in_range_i, minlength=N-M+1)
    Cm += np.bincount(in_range_j, minlength=N-M+1)

    inrange_last = np.max(np.abs(Em[:-1] - Em[-1]),1) <= R
    Cm[inrange_last] += 1
    # all matches + self match
    Cm[-1] += np.sum(inrange_last) + 1

    return Cm.astype(np.float), in_range_i, in_range_j
            def uniform_paa(directory, min_time, max_time, bin_count, timeseries_index, attribute_index):
                """
                Create waveforms using a piecewise aggregate approximation.

                :param directory: working directory for the timeseries
                :param min_time:
                :param max_time:
                :param bin_count:
                :param timeseries_index:
                :param attribute_index:
                :return: computed time series
                """
                import h5py
                import numpy
                import os
                import slycat.hdf5

                bin_edges = numpy.linspace(min_time, max_time, bin_count + 1)
                bin_times = (bin_edges[:-1] + bin_edges[1:]) / 2
                with h5py.File(os.path.join(directory, "timeseries-%s.hdf5" % timeseries_index), "r") as file:
                    original_times = slycat.hdf5.ArraySet(file)[0].get_data(0)[:]
                    original_values = slycat.hdf5.ArraySet(file)[0].get_data(attribute_index + 1)[:]
                bin_indices = numpy.digitize(original_times, bin_edges[1:])
                bin_counts = numpy.bincount(bin_indices, minlength=bin_count + 1)[1:]
                bin_sums = numpy.bincount(bin_indices, original_values, minlength=bin_count + 1)[1:]
                lonely_bins = (bin_counts < 2)
                bin_counts[lonely_bins] = 1
                bin_sums[lonely_bins] = numpy.interp(bin_times, original_times, original_values)[lonely_bins]
                bin_values = bin_sums / bin_counts
                return {
                    "input-index": timeseries_index,
                    "times": bin_times,
                    "values": bin_values,
                }
Exemple #16
0
    def getnnz(self, axis=None):
        """Get the count of explicitly-stored values (nonzeros)

        Parameters
        ----------
        axis : None, 0, or 1
            Select between the number of values across the whole matrix, in
            each column, or in each row.
        """
        if axis is None:
            nnz = len(self.data)
            if nnz != len(self.row) or nnz != len(self.col):
                raise ValueError('row, column, and data array must all be the '
                                 'same length')

            if self.data.ndim != 1 or self.row.ndim != 1 or \
                    self.col.ndim != 1:
                raise ValueError('row, column, and data arrays must be 1-D')

            return int(nnz)

        if axis < 0:
            axis += 2
        if axis == 0:
            return np.bincount(downcast_intp_index(self.col),
                               minlength=self.shape[1])
        elif axis == 1:
            return np.bincount(downcast_intp_index(self.row),
                               minlength=self.shape[0])
        else:
            raise ValueError('axis out of bounds')
Exemple #17
0
def plot_val_train(list_train,fig_name,epoch):
   import matplotlib.pyplot as plt

   if(epoch==-1):
       nd=numpy.array([[int(b), int(c), d] for (b, c, d) in list_train]) #all error
       idx=map(int,nd[:,0])
       err=nd[:,2]
       y=numpy.bincount(idx, err)[1:len(idx)+1] / np.bincount(idx)[1:len(idx)+1]
       x =[x+1 for x in range(len(y))]
       plt.title('Train Error change with epoch')
       plt.xlabel('epoch (x)')
   else:
       y = numpy.array([[b, c, d] for (b, c, d) in list_train if b==epoch ])[:,2] #all error
       x =numpy.array([[b, c, d] for (b, c, d) in list_train if b==epoch ])[:,1]  #all error
       plt.title('Train Error change with minibatch')
       plt.xlabel('minibatc (x)')


   plt.plot(x, y)
   plt.ylabel('error (y)')
   plt.grid(True)
   #f.subplots_adjust(hspace=0)
   plt.savefig(fig_name)
   #plt.setp([a.get_xticklabels() for a in f.axes[:-1]], visible=False)
   plt.show()
Exemple #18
0
def mean_avg_precision(x, y_val):
	rank = 1 - x
	# concatenate rank and y_val by column
	mat = np.matrix([rank,y_val]).T
	mat3 = mat[np.argsort(mat.A[:, 0])]
	mat3 = mat3[::-1] 
	print mat3

	somma = 0
	for i in range(len(rank)):
		temp = 0
		if mat3[i, 1] == 0:
			a = mat3[:(i+1), 1]
			a = np.asarray(a)
			a = a[:,0]
			a = a.astype(int)
 			count = np.bincount(a) # counts the number of 0's and 1's up to entry i 
 			numb = count[0]   # number of 0's up to entry i
 			temp = float(numb) / (i+1) 
 		
 		somma = somma + temp
    
  	count = np.bincount(y_val)
  	numb = count[0]
 	avg = float(somma) / numb

 	return avg	
Exemple #19
0
def histogrammap(ra, dec, weights=None, nside=32, perarea=False, range=None):
    if range is not None:
        (ra1, ra2), (dec1, dec2) = range
        m  = (ra >= ra1)& (ra <= ra2)
        m &= (dec >= dec1)& (dec <= dec2)
        ra = ra[m]
        dec = dec[m]
        if weights is not None:
            weights = weights[m]

    ipix = ang2pix(nside, numpy.radians(90-dec), numpy.radians(ra))
    npix = nside2npix(nside)
    if perarea:
        npix = nside2npix(nside)
        sky = 360. ** 2 / numpy.pi
        area = 1. * (sky / npix)
    else:
        area = 1

    if weights is not None:
        w = numpy.bincount(ipix, weights=weights, minlength=npix)
        N = numpy.bincount(ipix, minlength=npix)
        w = w / area
        N = N / area
        return w, N
    else:
        w = 1.0 * numpy.bincount(ipix, minlength=npix)
        return w / area
Exemple #20
0
def compute_B_prob_using_part_prob(data, probs, weight_column='N_sig_sw', event_id_column='event_id', signB_column='signB',
                                   sign_part_column='signTrack', normed_signs=False, prior_probs=None, functor=None):
    """
    Compute p(B+) using probs for parts of event (tracks/vertices).
    
    :param data: pandas.DataFrame, data
    :param probs: probabilities for parts of events, numpy.array of shape [n_samples]
    :param weight_column: column for weights in data
    :param event_id_column: column for event id in data
    :param signB_column: column for event B sign in data
    :param sign_part_column: column for part sign in data
    
    :return: B sign array, B weight array, B+ prob array, B event id
    """
    result_event_id, data_ids = numpy.unique(data[event_id_column].values, return_inverse=True)
    if prior_probs is None:
        log_probs = numpy.log(probs) - numpy.log(1 - probs)
    else:
        new_probs = prior_probs * (1 - probs) + (1 - prior_probs) * probs
        log_probs = numpy.log(new_probs) - numpy.log(1 - new_probs)
    sign_weights = numpy.ones(len(log_probs))
    if normed_signs:
        for sign in [-1, 1]:
            maskB = (data[signB_column].values == sign)
            maskPart = (data[sign_part_column].values == 1)
            sign_weights[maskB * maskPart] = sum(maskB * (~maskPart)) * 1. /  sum(maskB * maskPart)
    log_probs *= sign_weights * data[sign_part_column].values
    result_logprob = numpy.bincount(data_ids, weights=log_probs)
    # simply reconstructing original
    result_label = numpy.bincount(data_ids, weights=data[signB_column].values) / numpy.bincount(data_ids)
    result_weight = numpy.bincount(data_ids, weights=data[weight_column]) / numpy.bincount(data_ids)
    return result_label, result_weight, expit(result_logprob), result_event_id
Exemple #21
0
def plot_calibration(p, labels, bins=[10, 20, 30, 40, 50, 60, 70, 80, 90], weight=None):
    """
    Plot calibration plot: probability vs true probability by percentile bins.
    
    :param array p: probability
    :param array labels: labels
    :param array bins: percentile values for numpy.percentile to compute bins ranges
    
    """
    if weight is None:
        weight = numpy.ones(len(p))
    bins = numpy.percentile(p, bins)
    bins_index = numpy.searchsorted(bins, p)
    pos_tagged = numpy.bincount(bins_index, weights=labels * weight)
    neg_tagged = numpy.bincount(bins_index, weights=(1 - labels) * weight)
    p_ = pos_tagged / (pos_tagged + neg_tagged)

    bins = [0.] + list(bins) + [1.]
    bins = numpy.array(bins)
    bins_centers = (bins[1:] + bins[:-1]) / 2
    bins_error = (bins[1:] - bins[:-1]) / 2
    err = numpy.sqrt(neg_tagged * pos_tagged) / (pos_tagged + neg_tagged)**1.5
    plt.errorbar(bins_centers, p_, xerr=bins_error, yerr=err, fmt='.', linewidth=2)
    plt.plot([0, 1], [0, 1], 'k--')
    plt.ylim(-0.1, 1.1)
    plt.xlim(-0.1, 1.1)
    plt.xlabel('probability')
    plt.ylabel('true probability')
Exemple #22
0
 def remove_wrongly_sized_connected_components(self, a, min_size, max_size, in_place):
     """
     Adapted from http://github.com/jni/ray/blob/develop/ray/morpho.py
     (MIT License)
     """
     bin_out = self.BinaryOut.value
     
     original_dtype = a.dtype
         
     if not in_place:
         a = a.copy()
     if min_size == 0 and (max_size is None or max_size > numpy.prod(a.shape)): # shortcut for efficiency
         return a
     
     try:
         component_sizes = numpy.bincount( a.ravel() )
     except TypeError:
         # On 32-bit systems, must explicitly convert from uint32 to int
         # (This fix is just for VM testing.)
         component_sizes = numpy.bincount( numpy.asarray(a.ravel(), dtype=int) )
     bad_sizes = component_sizes < min_size
     if max_size is not None:
         numpy.logical_or( bad_sizes, component_sizes > max_size, out=bad_sizes )
     
     bad_locations = bad_sizes[a]
     a[bad_locations] = 0
     if (bin_out):
         # Replace non-zero values with 1
         numpy.place(a,a,1)
     return numpy.array(a, dtype=original_dtype)
Exemple #23
0
def sanity_checks(R):
    #extract out condition names and assess that we have nice uniform time point distributions
    condnames = np.array([x[0] for x in list(R.columns)])
    tps = np.array([x[1] for x in list(R.columns)])
    conds = np.unique(condnames)
    #commence sanity checks
    if len(conds)!=2:
        sys.stderr.write('ERROR: More than two treatment specifications detected. Exiting\n')
        sys.exit(1)
    if np.sum(condnames==conds[0])!=np.sum(condnames==conds[1]):
        sys.stderr.write('ERROR: Unbalanced number of data points between the two treatments. Exiting\n')
        sys.exit(1)
    #okay, so if we made it this far we have the same number of data points and only two condition names
    tp_conds = []
    for cond in conds:
        cond_tps = tps[condnames==cond]
        #we should have the same exact number of reps per time point
        #so if we count up how many reps we have per time point, there should only be one unique value
        tpholder, inverse = np.unique(cond_tps, return_inverse=True)
        if len(np.unique(np.bincount(inverse)))!=1:
            sys.stderr.write('ERROR: Non-uniformity of time points for replicates detected in condition '+cond+'. Exiting\n')
            sys.exit(1)
        #well, if not, then we're fine and can store information
        tp_conds.append(tpholder)
        Nrepl = np.bincount(inverse)[0]
    #one last sanity check - are the time points the same?
    if not np.array_equal(tp_conds[0],tp_conds[1]):
        sys.stderr.write('ERROR: Different time points specified across the two treatments. Exiting\n')
        sys.exit(1)
    def _elbo_grad_common(self, fep_mean, fep_sd, vcp_mean, vcp_sd,
                          vc_mean, vc_sd):

        # p(vc | vcp) contributions
        m = vcp_mean[self.ident]
        s = vcp_sd[self.ident]
        u = vc_mean**2 + vc_sd**2
        ve = np.exp(2*(s**2 - m))
        dm = u * ve - 1
        ds = -2 * u * ve * s
        vcp_mean_grad = np.bincount(self.ident, weights=dm)
        vcp_sd_grad = np.bincount(self.ident, weights=ds)

        vc_mean_grad = -vc_mean.copy() * ve
        vc_sd_grad = -vc_sd.copy() * ve

        # p(vcp) contributions
        vcp_mean_grad -= vcp_mean / self.vcp_p**2
        vcp_sd_grad -= vcp_sd / self.vcp_p**2

        # p(b) contributions
        fep_mean_grad = -fep_mean.copy() / self.fe_p**2
        fep_sd_grad = -fep_sd.copy() / self.fe_p**2

        return (fep_mean_grad, fep_sd_grad, vcp_mean_grad, vcp_sd_grad,
                vc_mean_grad, vc_sd_grad)
Exemple #25
0
def _bincount_mapper(ex, tiles, minlength=None):
  if len(tiles) > 1:
    result = np.bincount(tiles[0], weights=tiles[1], minlength=minlength)
  else:
    result = np.bincount(tiles[0], minlength=minlength)
  result_ex = extent.from_shape(result.shape)
  yield result_ex, result
    def get_indicator(tm):
        if isinstance(tm,np.ndarray):
            edges_plain = np.zeros_like(image)

            counts = np.bincount(tm.ravel())
            for c in xrange(image.shape[2]):
                vals = np.bincount(tm.ravel(),image[:,:,c].ravel())
                edges_plain[:,:,c] = (vals/counts)[tm]
                
            return edges_plain
        else:
            indicator = np.zeros(image_lab.shape[:2]+(3,),np.float32)
            indicator_map = tm.copy_map_for_image(indicator)
            
            tm_color = tm.copy_map_for_image(image)        
            
            #data_management.add_array('diff_mat',diff_mat) steps,precondition_runs,accept_ratio
            for loc in xrange(len(tm)):
                key = tm.key_from_index(loc)
                im_data = np.reshape(tm_color[key],(-1,3))
                color = np.mean(im_data,axis=0)
                
                for c in xrange(color.shape[0]):
                    indicator_map[key][:,:,c] = color[c]
                
            return indicator
def joint_and_marginals(labels1,labels2,smoothing=0.0):
    """
    marginal and joint distributions for a sequence of observations
    from a pair of disrete random variables, with additive smoothing on the
    joint distribution and the marginals in such a way that 
    marginal(smooth(conditional)) = smooth(marginal)
    """
    smoothing = float(smoothing)
    if len(labels1) != len(labels2):
        raise ValueError("label lists must have the same length")
    
    set1 = set(labels1)
    set2 = set(labels2)
    n1 = len(set1)
    n2 = len(set2)
    l2i1 = dict(zip(set1,range(len(set1))))
    l2i2 = dict(zip(set2,range(len(set2))))
    l1 = array([l2i1[l] for l in labels1])
    l2 = array([l2i2[l] for l in labels2])
    
    d1 = bincount(l1) + smoothing*n2
    d1 = d1/d1.sum()
    d2 = bincount(l2) + smoothing*n1
    d2 = d2/d2.sum()
    
    a_true = full((n1,n2),smoothing)
    for i,j in zip(l1,l2):
        a_true[i,j] += 1.0
    a_true = a_true/a_true.sum()
    
    return d1,d2,a_true
Exemple #28
0
def infer_labels(x, wu, wp, z=None, y=None):
    t_max = []
    E_max = -1000000000
    for t0 in [1,2,3]:
        for t1 in [1,2,3]:
            for t2 in [1,2,3]:
                for t3 in [1,2,3]:
                    for t4 in [1,2,3]:
                        t = [t0,t1,t2,t3,t4]
                        if z is not None:
                            if not np.all(np.bincount(t) == z):
                                continue
                        E = compute_energy(x,wu,wp,t)
                        if y is not None:
                            if y.full_labeled:
                                E += np.sum(t!=y.full)
                            else:
                                w1 = np.zeros(4)
                                w2 = np.zeros(4)
                                tw = np.bincount(t)
                                w1[:tw.shape[0]] = tw
                                w2[:y.weak.shape[0]] = y.weak
                                E += np.sum(np.abs(w1 - w2))
                        if E > E_max:
                            t_max = t
                            E_max = E
    
    return t_max
 def make_batch_prediction_ensemble(self,phi_x):
     m, nsub, nfeat = np.shape(phi_x);
     hat = np.zeros(m);
     sub_hat = self.predictor.predict(np.reshape(phi_x,(m*nsub,nfeat)));
     #TODO:return_rec
     for i in range(m):
         votes = sub_hat[i*nsub:(i+1)*nsub]
         vote_bins = np.bincount(votes);
         vote_bins = np.append(vote_bins,np.zeros(7-np.size(vote_bins)));
         vote_bins_sort = np.sort(vote_bins);
         vote_bins_sort = vote_bins_sort[::-1]; #Descending
         #if vote_bins_sort[0] - vote_bins_sort[1] <= 1:
             #Small margit vote. use back-up predictor
         if vote_bins_sort[0] - vote_bins_sort[1] == 1:
             #Retest ties
             tie_votes = self.tie_predictor.predict(phi_x[i,:,:]);
             tie_vote_bins = np.bincount(tie_votes) #Ensemble: aggregate votes
             tie_vote_bins = np.append(tie_vote_bins,np.zeros(7-np.size(tie_vote_bins)));
             total_vote_bins = tie_vote_bins + 1.1*vote_bins; #tie breaker is rbf
             tie_maxvote = np.max(total_vote_bins); #Get highest vote total
             tie_argmaxx = np.where(np.array(total_vote_bins)==tie_maxvote)[0]; #Find all regions with that vote total
             if np.size(tie_argmaxx)>1:
                 hat[i] = np.random.choice(tie_argmaxx);
             else:
                 #No Tie
                 hat[i] = tie_argmaxx[0];
         else:
             hat[i]=np.argmax(vote_bins);
     return hat
Exemple #30
0
def get_events_number(data, id_column='event_id'):
    """
    :return: number of B events
    """
    _, data_ids = numpy.unique(data[id_column], return_inverse=True)
    weights = numpy.bincount(data_ids, weights=data.N_sig_sw) / numpy.bincount(data_ids)
    return numpy.sum(weights)
Exemple #31
0
import numpy as np

# unique    : 중복 제거 후 리스트 출력, (당연히) 나온 것만 출력한다
# unique 인수 중 return_counts = True : 데이터 개수도 출력해 줌
a = np.unique([1, 1, 2, 2, 3, 3, 3, 2, 2, 4])
print(a)  # [1 2 3 4]

b = np.array(['a', 'b', 'b', 'c', 'a', 'd'])
b1 = np.unique(b)
print(b1)  # ['a' 'b' 'c' 'd']
b2 = np.unique(b, return_counts=True)
print(b2)
# (array(['a', 'b', 'c', 'd'], dtype='<U1'), array([2, 2, 1, 1], dtype=int64))
print(b2[0])  # ['a' 'b' 'c' 'd']
print(b2[1])  # [2 2 1 1]

data, count = np.unique(b, return_counts=True)
print(data)  # ['a' 'b' 'c' 'd']
print(count)  # [2 2 1 1]

# bincount : 나오지 않은 숫자도 count 를 0 으로 출력해줌
# bincount 인수 중 minlength 이용하면 편리함
# 주사위를 여러번 던져도 한번도 안 나온 수가 있을 때?
# 사례) 주사위를 6번 던졌는데, 1,1,2,2,3,4 이렇게 나왔을 때
print(np.bincount([1, 1, 2, 2, 3, 4], minlength=6))
# [0 2 2 1 1 0]
Exemple #32
0
def create_patches(data,
                   labels,
                   distrib,
                   crop_size,
                   num_classes,
                   is_train=True):
    patches = []
    classes = num_classes * [0]
    classes_patches = []
    masks = []

    overall_count = 0
    flip_count = 0

    for i in range(len(distrib)):
        cur_x = distrib[i][0]
        cur_y = distrib[i][1]

        cur_patch = data[cur_x:cur_x + crop_size, cur_y:cur_y + crop_size, :]
        if len(cur_patch) != crop_size and len(cur_patch[0]) != crop_size:
            cur_x = cur_x - (crop_size - len(cur_patch))
            cur_y = cur_y - (crop_size - len(cur_patch[0]))
            cur_patch = data[cur_x:cur_x + crop_size,
                             cur_y:cur_y + crop_size, :]
        elif len(cur_patch) != crop_size:
            cur_x = cur_x - (crop_size - len(cur_patch))
            cur_patch = data[cur_x:cur_x + crop_size,
                             cur_y:cur_y + crop_size, :]
        elif len(cur_patch[0]) != crop_size:
            cur_y = cur_y - (crop_size - len(cur_patch[0]))
            cur_patch = data[cur_x:cur_x + crop_size,
                             cur_y:cur_y + crop_size, :]

        cur_mask_patch = labels[cur_x:cur_x + crop_size,
                                cur_y:cur_y + crop_size]

        assert len(cur_patch) == crop_size and len(cur_patch[0]) == crop_size, \
            "Error: Current PATCH size is " + str(len(cur_patch)) + "x" + str(len(cur_patch[0]))

        assert len(cur_mask_patch) == crop_size and len(cur_mask_patch[0]) == crop_size, \
            "Error: Current MASK size is " + str(len(cur_mask_patch)) + "x" + str(len(cur_mask_patch[0]))

        cur_class = np.argmax(np.bincount(
            cur_mask_patch.astype(int).flatten()))
        classes[int(cur_class)] += 1

        cur_mask = np.ones((crop_size, crop_size), dtype=np.bool)

        # DATA AUGMENTATION
        if is_train is True:
            # ROTATION AUGMENTATION
            cur_rot = np.random.randint(0, 360)
            possible_rotation = np.random.randint(0, 2)
            if possible_rotation == 1:  # default = 1
                # print 'rotation'
                cur_patch = scipy.ndimage.rotate(cur_patch,
                                                 cur_rot,
                                                 order=0,
                                                 reshape=False)
                cur_mask_patch = scipy.ndimage.rotate(cur_mask_patch,
                                                      cur_rot,
                                                      order=0,
                                                      reshape=False)
                cur_mask = scipy.ndimage.rotate(cur_mask,
                                                cur_rot,
                                                order=0,
                                                reshape=False)

            # NORMAL NOISE
            possible_noise = np.random.randint(0, 2)
            if possible_noise == 1:
                cur_patch = cur_patch + np.random.normal(
                    0, 0.01, cur_patch.shape)

            # FLIP AUGMENTATION
            possible_noise = np.random.randint(0, 3)
            if possible_noise == 0:
                patches.append(cur_patch)
                classes_patches.append(cur_mask_patch)
                masks.append(cur_mask)
            if possible_noise == 1:
                patches.append(np.flipud(cur_patch))
                classes_patches.append(np.flipud(cur_mask_patch))
                masks.append(np.flipud(cur_mask))
                flip_count += 1
            elif possible_noise == 2:
                patches.append(np.fliplr(cur_patch))
                classes_patches.append(np.fliplr(cur_mask_patch))
                masks.append(np.fliplr(cur_mask))
                flip_count += 1
        else:
            patches.append(cur_patch)
            classes_patches.append(cur_mask_patch)
            masks.append(cur_mask)

        overall_count += 1

    pt_arr = np.asarray(patches)
    cl_arr = np.asarray(classes_patches, dtype=np.int)
    mask_arr = np.asarray(masks, dtype=np.bool)

    return pt_arr, cl_arr, mask_arr
Exemple #33
0
refined_anchors = utils.apply_box_deltas(
    anchors[indices], rpn_bbox[b, :len(indices)] * config.RPN_BBOX_STD_DEV)
log("anchors", anchors)
log("refined_anchors", refined_anchors)

# Get list of positive anchors
positive_anchor_ids = np.where(rpn_match[b] == 1)[0]
print("Positive anchors: {}".format(len(positive_anchor_ids)))
negative_anchor_ids = np.where(rpn_match[b] == -1)[0]
print("Negative anchors: {}".format(len(negative_anchor_ids)))
neutral_anchor_ids = np.where(rpn_match[b] == 0)[0]
print("Neutral anchors: {}".format(len(neutral_anchor_ids)))

# ROI breakdown by class
for c, n in zip(dataset.class_names,
                np.bincount(mrcnn_class_ids[b].flatten())):
    if n:
        print("{:23}: {}".format(c[:20], n))

# Show positive anchors
visualize.draw_boxes(sample_image,
                     boxes=anchors[positive_anchor_ids],
                     refined_boxes=refined_anchors)

# In[17]:

# Show negative anchors
visualize.draw_boxes(sample_image, boxes=anchors[negative_anchor_ids])

# In[18]:
Exemple #34
0
    def __call__(self, data_source, parameters):

        ds = data_source.ds

        exp_time = parameters["FiducialExposureTime"]
        area = parameters["FiducialArea"]
        redshift = parameters["FiducialRedshift"]
        D_A = parameters["FiducialAngularDiameterDistance"].in_cgs()
        dist_fac = 1.0/(4.*np.pi*D_A.value*D_A.value*(1.+redshift)**2)
        src_ctr = parameters["center"]

        my_kT_min, my_kT_max = data_source.quantities.extrema("kT")

        self.spectral_model.prepare_spectrum(redshift)
        emid = self.spectral_model.emid
        ebins = self.spectral_model.ebins
        nchan = len(emid)

        citer = data_source.chunks([], "io")

        photons = {}
        photons["x"] = []
        photons["y"] = []
        photons["z"] = []
        photons["vx"] = []
        photons["vy"] = []
        photons["vz"] = []
        photons["dx"] = []
        photons["Energy"] = []
        photons["NumberOfPhotons"] = []

        spectral_norm = area.v*exp_time.v*dist_fac

        tot_num_cells = data_source.ires.shape[0]

        pbar = get_pbar("Generating photons ", tot_num_cells)

        cell_counter = 0

        for chunk in parallel_objects(citer):

            kT = chunk["kT"].v
            num_cells = len(kT)
            if num_cells == 0:
                continue
            vol = chunk["cell_volume"].in_cgs().v
            EM = (chunk["density"]/mp).in_cgs().v**2
            EM *= 0.5*(1.+self.X_H)*self.X_H*vol

            if isinstance(self.Zmet, string_types):
                metalZ = chunk[self.Zmet].v
            else:
                metalZ = self.Zmet*np.ones(num_cells)

            idxs = np.argsort(kT)

            kT_bins = np.linspace(kT_min, max(my_kT_max.v, kT_max), num=n_kT+1)
            dkT = kT_bins[1]-kT_bins[0]
            kT_idxs = np.digitize(kT[idxs], kT_bins)
            kT_idxs = np.minimum(np.maximum(1, kT_idxs), n_kT) - 1
            bcounts = np.bincount(kT_idxs).astype("int")
            bcounts = bcounts[bcounts > 0]
            n = int(0)
            bcell = []
            ecell = []
            for bcount in bcounts:
                bcell.append(n)
                ecell.append(n+bcount)
                n += bcount
            kT_idxs = np.unique(kT_idxs)

            cell_em = EM[idxs]*spectral_norm

            number_of_photons = np.zeros(num_cells, dtype="uint64")
            energies = np.zeros(self.photons_per_chunk)

            start_e = 0
            end_e = 0

            for ibegin, iend, ikT in zip(bcell, ecell, kT_idxs):

                kT = kT_bins[ikT] + 0.5*dkT

                n_current = iend-ibegin

                cem = cell_em[ibegin:iend]

                cspec, mspec = self.spectral_model.get_spectrum(kT)

                tot_ph_c = cspec.d.sum()
                tot_ph_m = mspec.d.sum()

                u = self.prng.uniform(size=n_current)

                cell_norm_c = tot_ph_c*cem
                cell_norm_m = tot_ph_m*metalZ[ibegin:iend]*cem
                cell_norm = np.modf(cell_norm_c + cell_norm_m)
                cell_n = np.uint64(cell_norm[1]) + np.uint64(cell_norm[0] >= u)

                number_of_photons[ibegin:iend] = cell_n

                end_e += int(cell_n.sum())

                if end_e > self.photons_per_chunk:
                    raise RuntimeError("Number of photons generated for this chunk "+
                                       "exceeds photons_per_chunk (%d)! " % self.photons_per_chunk +
                                       "Increase photons_per_chunk!")

                if self.method == "invert_cdf":
                    cumspec_c = np.cumsum(cspec.d)
                    cumspec_m = np.cumsum(mspec.d)
                    cumspec_c = np.insert(cumspec_c, 0, 0.0)
                    cumspec_m = np.insert(cumspec_m, 0, 0.0)

                ei = start_e
                for cn, Z in zip(number_of_photons[ibegin:iend], metalZ[ibegin:iend]):
                    if cn == 0: continue
                    # The rather verbose form of the few next statements is a
                    # result of code optimization and shouldn't be changed
                    # without checking for performance degradation. See
                    # https://bitbucket.org/yt_analysis/yt/pull-requests/1766
                    # for details.
                    if self.method == "invert_cdf":
                        cumspec = cumspec_c
                        cumspec += Z * cumspec_m
                        norm_factor = 1.0 / cumspec[-1]
                        cumspec *= norm_factor
                        randvec = self.prng.uniform(size=cn)
                        randvec.sort()
                        cell_e = np.interp(randvec, cumspec, ebins)
                    elif self.method == "accept_reject":
                        tot_spec = cspec.d
                        tot_spec += Z * mspec.d
                        norm_factor = 1.0 / tot_spec.sum()
                        tot_spec *= norm_factor
                        eidxs = self.prng.choice(nchan, size=cn, p=tot_spec)
                        cell_e = emid[eidxs]
                    energies[int(ei):int(ei + cn)] = cell_e
                    cell_counter += 1
                    pbar.update(cell_counter)
                    ei += cn

                start_e = end_e

            active_cells = number_of_photons > 0
            idxs = idxs[active_cells]

            photons["NumberOfPhotons"].append(number_of_photons[active_cells])
            photons["Energy"].append(ds.arr(energies[:end_e].copy(), "keV"))
            photons["x"].append((chunk["x"][idxs]-src_ctr[0]).in_units("kpc"))
            photons["y"].append((chunk["y"][idxs]-src_ctr[1]).in_units("kpc"))
            photons["z"].append((chunk["z"][idxs]-src_ctr[2]).in_units("kpc"))
            photons["vx"].append(chunk["velocity_x"][idxs].in_units("km/s"))
            photons["vy"].append(chunk["velocity_y"][idxs].in_units("km/s"))
            photons["vz"].append(chunk["velocity_z"][idxs].in_units("km/s"))
            photons["dx"].append(chunk["dx"][idxs].in_units("kpc"))

        pbar.finish()

        for key in photons:
            if len(photons[key]) > 0:
                photons[key] = uconcatenate(photons[key])
            elif key == "NumberOfPhotons":
                photons[key] = np.array([])
            else:
                photons[key] = YTArray([], photon_units[key])

        mylog.info("Number of photons generated: %d" % int(np.sum(photons["NumberOfPhotons"])))
        mylog.info("Number of cells with photons: %d" % len(photons["x"]))

        self.spectral_model.cleanup_spectrum()

        return photons
def guess_numbers(img):

    strings = np.array([])
    strings_rotated = np.array([])
    strings_rotated_mirrored = np.array([])

    # create and train kNN model
    # samples = np.loadtxt('generalsamples.data', np.float32)
    # responses = np.loadtxt('generalresponses_slanted.data', np.float32)
    samples = np.loadtxt('redesign_samples1.data', np.float32)
    responses = np.loadtxt('redesign_responses1.data', np.float32)
    responses = responses.reshape((responses.size, 1))
    model = cv2.ml.KNearest_create()
    model.train(samples, cv2.ml.ROW_SAMPLE, responses)

    # Within each individual image find the contours
    gray = cv2.cvtColor(img.copy(), cv2.COLOR_BGR2GRAY)
    thresh = cv2.adaptiveThreshold(gray, 255, 1, 1, 11, 2)
    _, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    results = np.array([])
    results_rotated = np.array([])
    results_rotated_mirrored = np.array([])

    for cnt in contours:
        if cv2.contourArea(cnt) > 40:
            # cv2.drawContours(img, cnt, -1, (0,0,255), 2)
            # cv2.imshow('detection', img)
            # cv2.waitKey(0)
            [x, y, w, h] = cv2.boundingRect(cnt)

            if h > 28:
                roi = thresh[y:y + h, x:x + w]
                roismall = cv2.resize(roi, (10, 10))
                roismall = roismall.reshape((1, 100))
                roismall = np.float32(roismall)

                # Use kNN model to try identify digit
                value, result, neigh_resp, dists = model.findNearest(roismall, k=11)
                cv2.imshow('roismall', roi)
                print 'Original {}, {}, {}'.format(value, result, neigh_resp)

                roi = rotate(roi, -15)
                roismall = cv2.resize(roi, (10, 10))
                roismall = roismall.reshape((1, 100))
                roismall = np.float32(roismall)

                # Use kNN model to try identify digit
                value_rotated, result, neigh_resp, dists = model.findNearest(roismall, k=11)
                cv2.imshow('roismall rotated', roi)
                print 'Rotated {}, {}, {}'.format(value_rotated, result, neigh_resp)

                roi = cv2.flip(roi, 1)
                roismall = cv2.resize(roi, (10, 10))
                roismall = roismall.reshape((1, 100))
                roismall = np.float32(roismall)

                # Use kNN model to try identify digit
                value_rotated_mirrored, result, neigh_resp, dists = model.findNearest(roismall, k=3)
                print 'Rotated and mirrored {}, {}, {}'.format(value_rotated_mirrored, result, neigh_resp)

                cv2.imshow('roismall rotated, mirrored', roi)
                cv2.waitKey(1)
                cv2.destroyAllWindows()

                results = np.append(results, value)
                results_rotated = np.append(results_rotated, value_rotated)
                results_rotated_mirrored = np.append(results_rotated_mirrored, value_rotated_mirrored)
                # If multiple numbers are found in image take number with greatest number of occurrences
            if len(results) > 0:

                results = results.astype(int)
                results_rotated = results_rotated.astype(int)
                results_rotated_mirrored = results_rotated_mirrored.astype(int)
                strings = np.append(strings, str(np.bincount(results).argmax()))
                strings_rotated = np.append(strings_rotated, str(np.bincount(results_rotated).argmax()))
                strings_rotated_mirrored = np.append(strings_rotated_mirrored, str(np.bincount(results_rotated_mirrored).argmax()))

            else:

                strings = np.append(strings, '')
    x= ["".join(strings), "".join(strings_rotated), "".join(strings_rotated_mirrored)]
    y = [results, results_rotated, results_rotated_mirrored]
    print x, y
    return  x
                       figsize=(col_nbr * col_size, row_nbr * row_size))
fig.suptitle('plot RGB images from the training set',
             fontsize=2,
             fontweight='bold')
ax = ax.flatten()
for i in range(42):
    img = X_train[y_train == i][0]
    ax[i].imshow(img, interpolation='nearest')
plt.tight_layout()

# In[4]:

# Number of examples per label

fig, ax = plt.subplots()
bins = np.bincount(y_train)
ax.bar(range(n_classes), bins, .40, color='c')
plt.xlabel('Label Id')
plt.ylabel('Label Count')
plt.xticks(np.arange(0, n_classes, 2))
plt.title('Number of examples per label')
plt.tight_layout()
plt.grid(True)
plt.show()

# In[5]:

# Read Signname from csv file
import pandas as pd
signnames = pd.read_csv('signnames.csv')
signnames.head()
    def make_lightcurve(toa,
                        dt,
                        tseg=None,
                        tstart=None,
                        gti=None,
                        mjdref=0,
                        use_hist=False):
        """
        Make a light curve out of photon arrival times.

        Parameters
        ----------
        toa: iterable
            list of photon arrival times

        dt: float
            time resolution of the light curve (the bin width)

        tseg: float, optional, default None
            The total duration of the light curve.
            If this is `None`, then the total duration of the light curve will
            be the interval between the arrival between the first and the last
            photon in `toa`.

                **Note**: If tseg is not divisible by dt (i.e. if tseg/dt is
                not an integer number), then the last fractional bin will be
                dropped!

        tstart: float, optional, default None
            The start time of the light curve.
            If this is None, the arrival time of the first photon will be used
            as the start time of the light curve.

        gti: 2-d float array
            [[gti0_0, gti0_1], [gti1_0, gti1_1], ...]
            Good Time Intervals

        use_hist : bool
            Use `np.histogram` instead of `np.bincounts`. Might be advantageous
            for very short datasets.

        Returns
        -------
        lc: :class:`Lightcurve` object
            A light curve object with the binned light curve
        """

        toa = np.asarray(toa)
        # tstart is an optional parameter to set a starting time for
        # the light curve in case this does not coincide with the first photon
        if tstart is None:
            # if tstart is not set, assume light curve starts with first photon
            tstart = toa[0]

        # compute the number of bins in the light curve
        # for cases where tseg/dt is not integer.
        # TODO: check that this is always consistent and that we
        # are not throwing away good events.

        if tseg is None:
            tseg = toa[-1] - tstart

        logging.info("make_lightcurve: tseg: " + str(tseg))

        timebin = np.int64(tseg / dt)
        logging.info("make_lightcurve: timebin:  " + str(timebin))

        tend = tstart + timebin * dt
        good = (tstart <= toa) & (toa < tend)
        if not use_hist:
            binned_toas = ((toa[good] - tstart) // dt).astype(np.int64)
            counts = \
                np.bincount(binned_toas, minlength=timebin)
            time = tstart + np.arange(0.5, 0.5 + len(counts)) * dt
        else:
            histbins = np.arange(tstart, tend + dt, dt)
            counts, histbins = np.histogram(toa[good], bins=histbins)
            time = histbins[:-1] + 0.5 * dt

        return Lightcurve(time, counts, gti=gti, mjdref=mjdref, dt=dt)
file = tf.keras.utils
#raw_df = pd.read_csv('https://storage.googleapis.com/download.tensorflow.org/data/creditcard.csv')

#raw_df.to_csv(r'C:\Users\ELECTROBOT\Desktop\kaggle\tf_credit_card.csv', index=False)
raw_df = pd.read_csv(r'C:\Users\ELECTROBOT\Desktop\kaggle\tf_credit_card.csv')

raw_df.head()

raw_df[[
    'Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V26', 'V27', 'V28', 'Amount',
    'Class'
]].describe()

#class imbalance

neg, pos = np.bincount(raw_df['Class'])
total = neg + pos
print('Examples:\n    Total: {}\n    Positive: {} ({:.2f}% of total)\n'.format(
    total, pos, 100 * pos / total))

#Clean, split and normalize the data

cleaned_df = raw_df.copy()

# You don't want the `Time` column.
cleaned_df.pop('Time')

# The `Amount` column covers a huge range. Convert to log-space.
eps = 0.001  # 0 => 0.1¢
cleaned_df['Log Ammount'] = np.log(cleaned_df.pop('Amount') + eps)
Exemple #39
0
import numpy as np
import h5py
from time import time
vocab_size = 50257
with h5py.File('./tokenized', "r") as f:
	x = f['tokens'][:10000000]
	y = np.bincount(x)
	if len(y) != vocab_size:
		NotImplementedError
	map = (-y).argsort().argsort()
	print(x, np.take(map,x))
	with h5py.File('./map', "w") as g:
		_ = g.create_dataset('map', shape=(vocab_size,), dtype='i4')
	with h5py.File('./map', "a") as g:
		tmp = g['map']
		tmp = map

t = time()
count = 0
n = 100000000
with h5py.File('./tokenized', "r") as f:
	tokens_in = f['tokens']
	docs_in = f['docs']	
	tokens_len = tokens_in.len()
	docs_len = docs_in.len()	
	with h5py.File('./tokenized2', "w") as g:
		_ = g.create_dataset('tokens', shape=(tokens_len,), chunks=(1000000,), dtype='u2')
		_ = g.create_dataset('docs', shape=(docs_len,), chunks=(10000,), dtype='i4')
	with h5py.File('./tokenized2', "a") as g:
		tokens_out = g['tokens']
		docs_out = g['docs']
Exemple #40
0
    while counter < 72:
        with picamera.array.PiRGBArray(camera, size=(IMG_SIZE, IMG_SIZE)) as stream:
            camera.capture(stream, 'rgb', resize=(IMG_SIZE, IMG_SIZE))
            # At this point the image IN 3 CHANNELS is available as stream.array
            image = stream.array
            stream.truncate() #clear the stream for next capture
            stream.seek(0) # go back to start of stream and overwrite it
        image = np.dot(image, [.3, .6, .1]) #convert the 3 channel array to a greyscale 50x50x1
        batch = image.reshape(1, IMG_SIZE, IMG_SIZE, 1) #let tensorflow know there's only one image by padding a 1
        model_out = model.predict(batch)[0]
        if model_out[0] > .5:
            label = "Dry " + str(model_out[0])
            history[counter % 10] = 1
            
        else:
            label = "Wet " + str(model_out[0])
            history[counter % 10] = 0
        print(label)
        drynum = "Not enough data"
        if counter > 8:
            ratio = np.bincount(history, minlength=2)
            print("The ratio of dry to wet classifications in the past 10 captures is " + str(ratio[1]) + " to " + str(ratio[0]))
            drynum = str(ratio[1])
        with open(outfilename, 'a') as f:
            t = datetime.datetime.now().strftime("%H:%M:%S")
            f.write("{},{},{}\n".format(t,label,drynum))
        time.sleep(2.5)
        counter += 1
    camera.stop_recording()    
    
Exemple #41
0
    def phase_fold_animation(self, period_fold=None, cad_min=3):
        """
        Animate phase-folded light curve versus time.

        Parameters
        ----------
        period_fold : float, optional
            Specify a different period at which to fold.
        cad_min: int, optional
            Exclude light curve sections with fewer cadences than `cad_min`.
        """
        # Calculate the phase.
        if period_fold is None:
            period_fold = self.params.p_orb
        phase = self.phase_fold(period_fold=period_fold)
        # Calculate the cycle number.
        cycle = ((self.l_curve.times - self.params.bjd_0) //
                 self.params.p_orb).astype(int)
        # Start at zero
        cycle -= cycle.min()

        # Only use cycles with more cadences than `cad_min`.
        cycle_num = np.arange(cycle.max() + 1)[np.bincount(cycle) > cad_min]

        def data_gen():
            for ii, nn in enumerate(cycle_num):

                mask = np.abs(cycle - nn) <= 0

                phase_section = phase[mask]
                flux_section = self.l_curve.fluxes[mask]

                phase_sort = np.argsort(phase_section)

                yield phase_section[phase_sort], flux_section[phase_sort]

        def init():
            lt_zero = -self.l_curve.fluxes[self.l_curve.fluxes < 0]
            flux_min = -1.1 * np.percentile(lt_zero, 99)
            gt_zero = self.l_curve.fluxes[self.l_curve.fluxes > 0]
            flux_max = 1.1 * np.percentile(gt_zero, 99)
            ax.set_ylim(flux_min, flux_max)
            ax.set_xlim(-0.1, 1.1)
            ax.set_xlabel('Phase')
            ax.set_ylabel('Relative Flux')

            del xdata[:]
            del ydata[:]

            line.set_data(xdata, ydata)

            return line,

        fig, ax = plt.subplots()
        line, = ax.plot([], [], color='k', lw=1)
        xdata, ydata = [], []

        def run(data):
            # update the data
            t, y = data
            xdata = t
            ydata = y

            line.set_data(xdata, ydata)

            return line,

        ani = animation.FuncAnimation(fig,
                                      run,
                                      data_gen,
                                      blit=False,
                                      interval=100,
                                      repeat=True,
                                      init_func=init)
        plt.show()
Exemple #42
0
def add_phase_interconnections(net,
                               snow_partitioning_n,
                               voxel_size=1,
                               marching_cubes_area=False,
                               alias=None):
    r"""
    This function connects networks of two or more phases together by
    interconnecting neighbouring nodes inside different phases.

    The resulting network can be used for the study of transport and kinetics
    at interphase of two phases.

    Parameters
    ----------
    network : 2D or 3D network
        A dictoionary containing structural information of two or more
        phases networks. The dictonary format must be same as porespy
        region_to_network function.

    snow_partitioning_n : tuple
        The output generated by snow_partitioning_n function. The tuple should
        have phases_max_labels and original image of material.

    voxel_size : scalar
        The resolution of the image, expressed as the length of one side of a
        voxel, so the volume of a voxel would be **voxel_size**-cubed.  The
        default is 1, which is useful when overlaying the PNM on the original
        image since the scale of the image is alway 1 unit lenth per voxel.

    marching_cubes_area : bool
        If ``True`` then the surface area and interfacial area between regions
        will be causing the marching cube algorithm. This is a more accurate
        representation of area in extracted network, but is quite slow, so
        it is ``False`` by default.  The default method simply counts voxels
        so does not correctly account for the voxelated nature of the images.

    alias : dict (Optional)
        A dictionary that assigns unique image label to specific phase.
        For example {1: 'Solid'} will show all structural properties associated
        with label 1 as Solid phase properties.
        If ``None`` then default labelling will be used i.e {1: 'Phase1',..}.

    Returns
    -------
    A dictionary containing network information of individual and connected
    networks. The dictionary names use the OpenPNM convention so it may be
    converted directly to an OpenPNM network object using the ``update``
    command.

    """
    # -------------------------------------------------------------------------
    # Get alias if provided by user
    im = snow_partitioning_n.im
    al = _create_alias_map(im, alias=alias)
    # -------------------------------------------------------------------------
    # Find interconnection and interfacial area between ith and jth phases
    conns1 = net['throat.conns'][:, 0]
    conns2 = net['throat.conns'][:, 1]
    label = net['pore.label'] - 1

    num = snow_partitioning_n.phase_max_label
    num = [0, *num]
    phases_num = np.unique(im * 1)
    phases_num = np.trim_zeros(phases_num)
    for i0, i1 in enumerate(phases_num):
        loc1 = np.logical_and(conns1 >= num[i0], conns1 < num[i0 + 1])
        loc2 = np.logical_and(conns2 >= num[i0], conns2 < num[i0 + 1])
        loc3 = np.logical_and(label >= num[i0], label < num[i0 + 1])
        net['throat.{}'.format(al[i1])] = loc1 * loc2
        net['pore.{}'.format(al[i1])] = loc3
        if i1 == phases_num[-1]:
            loc4 = np.logical_and(conns1 < num[-1], conns2 >= num[-1])
            loc5 = label >= num[-1]
            net['throat.boundary'] = loc4
            net['pore.boundary'] = loc5
        for j0, j1 in enumerate(phases_num):
            if j0 > i0:
                pi_pj_sa = np.zeros_like(label, dtype=float)
                loc6 = np.logical_and(conns2 >= num[j0], conns2 < num[j0 + 1])
                pi_pj_conns = loc1 * loc6
                net['throat.{}_{}'.format(al[i1], al[j1])] = pi_pj_conns
                if any(pi_pj_conns):
                    # ---------------------------------------------------------
                    # Calculates phase[i] interfacial area that connects with
                    # phase[j] and vice versa
                    p_conns = net['throat.conns'][:, 0][pi_pj_conns]
                    s_conns = net['throat.conns'][:, 1][pi_pj_conns]
                    ps = net['throat.area'][pi_pj_conns]
                    p_sa = np.bincount(p_conns, ps)
                    # trim zeros at head/tail position to avoid extra bins
                    p_sa = np.trim_zeros(p_sa)
                    i_index = np.arange(min(p_conns), max(p_conns) + 1)
                    j_index = np.arange(min(s_conns), max(s_conns) + 1)
                    s_pa = np.bincount(s_conns, ps)
                    s_pa = np.trim_zeros(s_pa)
                    pi_pj_sa[i_index] = p_sa
                    pi_pj_sa[j_index] = s_pa
                    # ---------------------------------------------------------
                    # Calculates interfacial area using marching cube method
                    if marching_cubes_area:
                        ps_c = net['throat.area'][pi_pj_conns]
                        p_sa_c = np.bincount(p_conns, ps_c)
                        p_sa_c = np.trim_zeros(p_sa_c)
                        s_pa_c = np.bincount(s_conns, ps_c)
                        s_pa_c = np.trim_zeros(s_pa_c)
                        pi_pj_sa[i_index] = p_sa_c
                        pi_pj_sa[j_index] = s_pa_c
                    net[f'pore.{al[i1]}_{al[j1]}_area'] = pi_pj_sa * voxel_size**2
    return net
Exemple #43
0
 # Instanciate a PCA object for the sake of easy visualisation
 pca = PCA(n_components=2)
 
 # Fit and transform x to visualise inside a 2D feature space
 X_vis= pca.fit_transform(X)
 
 # Apply One-Sided Selection
 ncl = NeighbourhoodCleaningRule(random_state = 42, return_indices=True)
 X_resampled, y_resampled, idx_resampled = ncl.fit_sample(X, y)
 X_res_vis = pca.transform(X_resampled)
 
 fig = plt.figure()
 ax = fig.add_subplot(1, 1, 1)
 idx_samples_removed = np.setdiff1d(np.arange(X_vis.shape[0]), idx_resampled)
 
 frq = np.bincount(y_resampled)
 aar_neg = np.transpose((y_resampled==0).nonzero()) 
 aar_pos = np.transpose((y_resampled==1).nonzero())
 idx_class_0 = y_resampled == 0
 

 
 h5filename = "histonemodTF_resample_ncl.h5"
 if os.path.exists(h5filename):
	#os.remove(h5filename)
	h5file = h5.File(h5filename,'a')
	#in_group = h5file.get('input')
	h5file.create_dataset('/input/H3K27me3_RPKM',data = X_resampled, dtype = np.float64, compression ='gzip')
	#out_group = h5file.get('output')
	h5file.create_dataset('/output/H3K27me3',data = y_resampled, dtype = np.int8, compression ='gzip')	
	
Exemple #44
0
    def get_light_state(self, light):
        """Determines the current color of the traffic light

        Args:
            light (TrafficLight): light to classify

        Returns:
            int: ID of traffic light color (specified in styx_msgs/TrafficLight)

        """
        if (not self.has_image):
            self.prev_light_loc = None
            return False

        self.image_count = self.image_count + 1
        cv_image = self.bridge.imgmsg_to_cv2(self.camera_image, "rgb8")
        height, width, channels = cv_image.shape

        #x, y = self.project_to_image_plane(light.pose.pose.position)

        #file_name = "/home/shyam/Work/SDCNDP/Project13/Vidyut-CarND-Capstone/ros/images/img_light_tl" + str(self.image_count) + ".jpg"
        #log_file = open("/home/shyam/Work/SDCNDP/Project13/Vidyut-CarND-Capstone/ros/images/log.txt", 'a')

        #TODO use light location to zoom in on traffic light in image
        if height != 600 or width != 800:
            cv_image = cv2.resize(cv_image, (800, 600),
                                  interpolation=cv2.INTER_AREA)

        #cv2.imwrite(file_name, cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR))
        pred_state = TrafficLight.UNKNOWN

        #print("Tensorflow version " + tflow.__version__)

        with self.detection_graph.as_default():
            with tflow.Session(graph=self.detection_graph) as sess:
                # Definite input and output Tensors for detection_graph
                image_tensor = self.detection_graph.get_tensor_by_name(
                    'image_tensor:0')
                # Each box represents a part of the image where a particular object was detected.
                detection_boxes = self.detection_graph.get_tensor_by_name(
                    'detection_boxes:0')
                # Each score represent how level of confidence for each of the objects.
                # Score is shown on the result image, together with the class label.
                detection_scores = self.detection_graph.get_tensor_by_name(
                    'detection_scores:0')
                detection_classes = self.detection_graph.get_tensor_by_name(
                    'detection_classes:0')
                num_detections = self.detection_graph.get_tensor_by_name(
                    'num_detections:0')

                #image_np = load_image_into_numpy_array(cv_image)
                image_np_expanded = np.expand_dims(cv_image, axis=0)
                (boxes, scores, classes,
                 num) = sess.run([
                     detection_boxes, detection_scores, detection_classes,
                     num_detections
                 ],
                                 feed_dict={image_tensor: image_np_expanded})

        class_id = []
        for i in range(len(scores[0])):
            if scores[0][i] > 0.3:
                class_id.append(classes[0][i])

        if (class_id):
            class_val = np.argmax(np.bincount(np.array(class_id, dtype=int)))
            if class_val == 1:
                pred_state = TrafficLight.RED
            elif class_val == 2:
                pred_state = TrafficLight.YELLOW
            elif class_val == 3:
                pred_state = TrafficLight.GREEN

        #print("pred_state {}".format(pred_state))
        #print("ground_truth {}".format(light.state))

        #log_file.write(file_name + " pred = " + str(pred_state) + " truth = " + str(light.state) + "\n")
        #log_file.close()

        #Get classification
        #return self.light_classifier.get_classification(cv_image)

        return pred_state
    def __init__(self, count, datagen, batch_size, num_label, init_states, seq_length, width, height,
                 sort_by_duration=True,
                 is_bi_graphemes=False,
                 language="zh",
                 zh_type="zi",
                 partition="train",
                 buckets=[],
                 save_feature_as_csvfile=False,
                 num_parts=1,
                 part_index=0,
                 noise_percent=0.4,
                 fbank=False
                 ):
        super(BucketSTTIter, self).__init__()

        self.maxLabelLength = num_label
        # global param
        self.batch_size = batch_size
        self.count = count
        self.num_label = num_label
        self.init_states = init_states
        self.init_state_arrays = [mx.nd.zeros(x[1]) for x in init_states]
        self.width = width
        self.height = height
        self.datagen = datagen
        self.label = None
        self.is_bi_graphemes = is_bi_graphemes
        self.language = language
        self.zh_type = zh_type
        self.num_parts = num_parts
        self.part_index = part_index
        self.noise_percent = noise_percent
        self.fbank = fbank
        # self.partition = datagen.partition
        if partition == 'train':
            durations = datagen.train_durations
            audio_paths = datagen.train_audio_paths
            texts = datagen.train_texts
        elif partition == 'validation':
            durations = datagen.val_durations
            audio_paths = datagen.val_audio_paths
            texts = datagen.val_texts
        elif partition == 'test':
            durations = datagen.test_durations
            audio_paths = datagen.test_audio_paths
            texts = datagen.test_texts
        else:
            raise Exception("Invalid partition to load metadata. "
                            "Must be train/validation/test")
        log = LogUtil().getlogger()
        # if sortagrad
        if sort_by_duration:
            durations, audio_paths, texts = datagen.sort_by_duration(durations,
                                                                     audio_paths,
                                                                     texts)
        else:
            durations = durations
            audio_paths = audio_paths
            texts = texts
        self.trainDataList = list(zip(durations, audio_paths, texts))

        # self.trainDataList = [d for index, d in enumerate(zip(durations, audio_paths, texts)) if index % self.num_parts == self.part_index]
        # log.info("partition: %s, num_works: %d, part_index: %d 's data size is %d of all size is %d" %
        #          (partition, self.num_parts, self.part_index, len(self.trainDataList), len(durations)))
        self.trainDataIter = iter(self.trainDataList)
        self.is_first_epoch = True

        data_lengths = [int(d * 100) for d in durations]
        if len(buckets) == 0:
            buckets = [i for i, j in enumerate(np.bincount(data_lengths))
                       if j >= batch_size]
        if len(buckets) == 0:
            raise Exception(
                'There is no valid buckets. It may occured by large batch_size for each buckets. max bincount:%d batch_size:%d' % (
                    max(np.bincount(data_lengths)), batch_size))
        buckets.sort()
        ndiscard = 0
        self.data = [[] for _ in buckets]
        for i, sent in enumerate(data_lengths):
            buck = bisect.bisect_left(buckets, sent)
            if buck == len(buckets):
                ndiscard += 1
                continue
            self.data[buck].append(self.trainDataList[i])
        if ndiscard != 0:
            print("WARNING: discarded %d sentences longer than the largest bucket." % ndiscard)
        # self.num_parts = 3 debug
        # self.part_index = 2
        for index_buck, buck in enumerate(self.data):
            self.data[index_buck] = [d for index_d, d in enumerate(
                self.data[index_buck][:len(self.data[index_buck]) // self.num_parts * self.num_parts]) if
                                     index_d % self.num_parts == self.part_index]
            log.info("partition: %s, num_works: %d, part_index: %d %d's data size is %d " %
                     (partition, self.num_parts, self.part_index, index_buck, len(self.data[index_buck])))
        self.buckets = buckets
        self.nddata = []
        self.ndlabel = []
        self.default_bucket_key = max(buckets)

        self.idx = []
        for i, buck in enumerate(self.data):
            self.idx.extend([(i, j) for j in range(0, len(buck) - batch_size + 1, batch_size)])
        self.curr_idx = 0

        if not self.fbank:
            self.provide_data = [('data', (self.batch_size, self.default_bucket_key, width * height))] + init_states
        else:
            self.provide_data = [('data', (self.batch_size, 3, self.default_bucket_key, 41))] + init_states
        self.provide_label = [('label', (self.batch_size, self.maxLabelLength))]
        self.save_feature_as_csvfile = save_feature_as_csvfile
    y_8 = y_train_All[idx_8]

    idx_9 = np.array(np.where(y_train_All == 9)).T
    idx_9 = idx_9[0:2, 0]
    X_9 = X_train_All[idx_9, :, :, :]
    y_9 = y_train_All[idx_9]

    X_train = np.concatenate(
        (X_0, X_1, X_2, X_3, X_4, X_5, X_6, X_7, X_8, X_9), axis=0)
    y_train = np.concatenate(
        (y_0, y_1, y_2, y_3, y_4, y_5, y_6, y_7, y_8, y_9), axis=0)

    print('X_train shape:', X_train.shape)
    print(X_train.shape[0], 'train samples')

    print('Distribution of Training Classes:', np.bincount(y_train))

    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')
    X_valid = X_valid.astype('float32')
    X_Pool = X_Pool.astype('float32')
    X_train /= 255
    X_valid /= 255
    X_Pool /= 255
    X_test /= 255

    Y_test = np_utils.to_categorical(y_test, nb_classes)
    Y_valid = np_utils.to_categorical(y_valid, nb_classes)
    Y_Pool = np_utils.to_categorical(y_Pool, nb_classes)

    #loss values in each experiment
def GetInterestPoints(groundTruth, geoTransform, imgNum, maxRows, maxCols,
                      limits, railSegments, stepSize, patchSize):
    print "Getting interest Points"
    targetPoints = []
    relevantPoints = []
    iniRow = max(0, limits[0] - patchSize)
    iniRow -= iniRow % stepSize
    endRow = min(maxRows, limits[2] + patchSize)
    endRow += stepSize - (endRow % stepSize)
    iniCol = max(0, limits[1] - patchSize)
    iniCol -= iniCol % stepSize
    endCol = min(maxCols, limits[3] + patchSize)
    endCol += stepSize - (endCol % stepSize)

    print(iniRow, endRow, (endRow - iniRow) / stepSize)
    print(iniCol, endCol, (endCol - iniCol) / stepSize)

    for i in range(iniRow, endRow, stepSize):
        for j in range(iniCol, endCol, stepSize):
            #for i in range(0, maxRows, stepSize):
            #for j in range(0,maxCols, stepSize):

            #print (i,j)

            ulc, urc = j, min(j + patchSize, maxCols - 1)
            ulr = urr = i

            dlc, drc = ulc, urc
            dlr = drr = min(i + patchSize, maxRows - 1)

            # Se tem algum pixel de erosao
            if (np.any(groundTruth[ulr:dlr, ulc:urc])):
                count = np.bincount(
                    (groundTruth[ulr:dlr, ulc:urc] > 0).astype(int).flatten())
                if count[1] >= count[0]:
                    targetPoints.append((imgNum, i, j))
                else:
                    relevantPoints.append((imgNum, i, j))
            elif i > endRow or i < iniRow or j > endCol or j < iniCol:
                continue
            else:
                # Checa se o patch tem intersecao com a ferrovia

                ul = utils.PixelToCoordinate(geoTransform, (ulc, ulr))
                ur = utils.PixelToCoordinate(geoTransform, (urc, urr))
                dl = utils.PixelToCoordinate(geoTransform, (dlc, dlr))
                dr = utils.PixelToCoordinate(geoTransform, (drc, drr))
                '''
                box = createPolygon([ul, ur, dr, dl])

                rails.ResetReading()
                for feature in rails:
                    railway = feature.GetGeometryRef()
                    railway.Transform(transform)
                    if box.Intersect(railway):
                        #print (i, j)
                        relevantPoints.append((i,j))
                        break
                '''

                seg1 = (ul, ur)
                seg2 = (ur, dr)
                seg3 = (dr, dl)
                seg4 = (dl, ul)

                for segment in railSegments:
                    if segmentIntersect(seg1, segment) or segmentIntersect(
                            seg2, segment) or segmentIntersect(
                                seg3, segment) or segmentIntersect(
                                    seg4, segment):
                        relevantPoints.append((imgNum, i, j))
                        break

    return targetPoints, relevantPoints
Exemple #48
0
def _paga_graph(adata,
                ax,
                solid_edges=None,
                dashed_edges=None,
                adjacency_solid=None,
                adjacency_dashed=None,
                transitions=None,
                threshold=None,
                root=0,
                colors=None,
                labels=None,
                fontsize=None,
                fontweight=None,
                text_kwds=None,
                node_size_scale=1,
                node_size_power=0.5,
                edge_width_scale=1,
                normalize_to_color='reference',
                title=None,
                pos=None,
                cmap=None,
                frameon=True,
                min_edge_width=None,
                max_edge_width=None,
                export_to_gexf=False,
                cax=None,
                colorbar=None,
                use_raw=True,
                cb_kwds={},
                single_component=False,
                arrowsize=30):
    import networkx as nx

    node_labels = labels  # rename for clarity
    if (node_labels is not None and isinstance(node_labels, str)
            and node_labels != adata.uns['paga']['groups']):
        raise ValueError(
            'Provide a list of group labels for the PAGA groups {}, not {}.'.
            format(adata.uns['paga']['groups'], node_labels))
    groups_key = adata.uns['paga']['groups']
    if node_labels is None:
        node_labels = adata.obs[groups_key].cat.categories

    if (colors is None or colors == groups_key) and groups_key is not None:
        if (groups_key + '_colors' not in adata.uns
                or len(adata.obs[groups_key].cat.categories) != len(
                    adata.uns[groups_key + '_colors'])):
            utils.add_colors_for_categorical_sample_annotation(
                adata, groups_key)
        colors = adata.uns[groups_key + '_colors']
        for iname, name in enumerate(adata.obs[groups_key].cat.categories):
            if name in settings.categories_to_ignore: colors[iname] = 'grey'

    nx_g_solid = nx.Graph(adjacency_solid)
    if dashed_edges is not None:
        nx_g_dashed = nx.Graph(adjacency_dashed)

    # convert pos to dict
    if isinstance(pos, str):
        if not pos.endswith('.gdf'):
            raise ValueError(
                'Currently only supporting reading positions from .gdf files.'
                'Consider generating them using, for instance, Gephi.')
        s = ''  # read the node definition from the file
        with open(pos) as f:
            f.readline()
            for line in f:
                if line.startswith('edgedef>'):
                    break
                s += line
        from io import StringIO
        df = pd.read_csv(StringIO(s), header=-1)
        pos = df[[4, 5]].values
    pos_array = pos
    # convert to dictionary
    pos = {n: [p[0], p[1]] for n, p in enumerate(pos)}

    # uniform color
    if isinstance(colors, str) and is_color_like(colors):
        colors = [colors for c in range(len(node_labels))]

    # color degree of the graph
    if isinstance(colors, str) and colors.startswith('degree'):
        # see also tools.paga.paga_degrees
        if colors == 'degree_dashed':
            colors = [d for _, d in nx_g_dashed.degree(weight='weight')]
        elif colors == 'degree_solid':
            colors = [d for _, d in nx_g_solid.degree(weight='weight')]
        else:
            raise ValueError(
                '`degree` either "degree_dashed" or "degree_solid".')
        colors = (np.array(colors) - np.min(colors)) / (np.max(colors) -
                                                        np.min(colors))

    # plot gene expression
    var_names = adata.var_names if adata.raw is None else adata.raw.var_names
    if isinstance(colors, str) and colors in var_names:
        x_color = []
        cats = adata.obs[groups_key].cat.categories
        for icat, cat in enumerate(cats):
            subset = (cat == adata.obs[groups_key]).values
            if adata.raw is not None and use_raw:
                adata_gene = adata.raw[:, colors]
            else:
                adata_gene = adata[:, colors]
            x_color.append(np.mean(adata_gene.X[subset]))
        colors = x_color

    # plot continuous annotation
    if (isinstance(colors, str) and colors in adata.obs
            and not is_categorical_dtype(adata.obs[colors])):
        x_color = []
        cats = adata.obs[groups_key].cat.categories
        for icat, cat in enumerate(cats):
            subset = (cat == adata.obs[groups_key]).values
            x_color.append(adata.obs.loc[subset, colors].mean())
        colors = x_color

    # plot categorical annotation
    if (isinstance(colors, str) and colors in adata.obs
            and is_categorical_dtype(adata.obs[colors])):
        from ... import utils as sc_utils
        asso_names, asso_matrix = sc_utils.compute_association_matrix_of_groups(
            adata,
            prediction=groups_key,
            reference=colors,
            normalization='reference' if normalize_to_color else 'prediction')
        utils.add_colors_for_categorical_sample_annotation(adata, colors)
        asso_colors = sc_utils.get_associated_colors_of_groups(
            adata.uns[colors + '_colors'], asso_matrix)
        colors = asso_colors

    if len(colors) < len(node_labels):
        print(node_labels, colors)
        raise ValueError(
            '`color` list need to be at least as long as `groups`/`node_labels` list.'
        )

    # count number of connected components
    n_components, labels = scipy.sparse.csgraph.connected_components(
        adjacency_solid)
    if n_components > 1 and not single_component:
        logg.debug(
            'Graph has more than a single connected component. '
            'To restrict to this component, pass `single_component=True`.')
    if n_components > 1 and single_component:
        component_sizes = np.bincount(labels)
        largest_component = np.where(
            component_sizes == component_sizes.max())[0][0]
        adjacency_solid = adjacency_solid.tocsr()[labels ==
                                                  largest_component, :]
        adjacency_solid = adjacency_solid.tocsc()[:,
                                                  labels == largest_component]
        colors = np.array(colors)[labels == largest_component]
        node_labels = np.array(node_labels)[labels == largest_component]
        cats_dropped = adata.obs[groups_key].cat.categories[
            labels != largest_component].tolist()
        logg.info(
            'Restricting graph to largest connected component by dropping categories\n'
            f'{cats_dropped}')
        nx_g_solid = nx.Graph(adjacency_solid)
        if dashed_edges is not None:
            raise ValueError(
                '`single_component` only if `dashed_edges` is `None`.')

    # edge widths
    base_edge_width = edge_width_scale * 5 * rcParams['lines.linewidth']

    # draw dashed edges
    if dashed_edges is not None:
        widths = [x[-1]['weight'] for x in nx_g_dashed.edges(data=True)]
        widths = base_edge_width * np.array(widths)
        if max_edge_width is not None:
            widths = np.clip(widths, None, max_edge_width)
        nx.draw_networkx_edges(nx_g_dashed,
                               pos,
                               ax=ax,
                               width=widths,
                               edge_color='grey',
                               style='dashed',
                               alpha=0.5)

    # draw solid edges
    if transitions is None:
        widths = [x[-1]['weight'] for x in nx_g_solid.edges(data=True)]
        widths = base_edge_width * np.array(widths)
        if min_edge_width is not None or max_edge_width is not None:
            widths = np.clip(widths, min_edge_width, max_edge_width)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            nx.draw_networkx_edges(nx_g_solid,
                                   pos,
                                   ax=ax,
                                   width=widths,
                                   edge_color='black')
    # draw directed edges
    else:
        adjacency_transitions = adata.uns['paga'][transitions].copy()
        if threshold is None: threshold = 0.01
        adjacency_transitions.data[adjacency_transitions.data < threshold] = 0
        adjacency_transitions.eliminate_zeros()
        g_dir = nx.DiGraph(adjacency_transitions.T)
        widths = [x[-1]['weight'] for x in g_dir.edges(data=True)]
        widths = base_edge_width * np.array(widths)
        if min_edge_width is not None or max_edge_width is not None:
            widths = np.clip(widths, min_edge_width, max_edge_width)
        nx.draw_networkx_edges(g_dir,
                               pos,
                               ax=ax,
                               width=widths,
                               edge_color='black',
                               arrowsize=arrowsize)

    if export_to_gexf:
        if isinstance(colors[0], tuple):
            from matplotlib.colors import rgb2hex
            colors = [rgb2hex(c) for c in colors]
        for count, n in enumerate(nx_g_solid.nodes()):
            nx_g_solid.node[count]['label'] = str(node_labels[count])
            nx_g_solid.node[count]['color'] = str(colors[count])
            nx_g_solid.node[count]['viz'] = {
                'position': {
                    'x': 1000 * pos[count][0],
                    'y': 1000 * pos[count][1],
                    'z': 0
                }
            }
        filename = settings.writedir / 'paga_graph.gexf'
        logg.warning(f'exporting to {filename}')
        settings.writedir.mkdir(parents=True, exist_ok=True)
        nx.write_gexf(nx_g_solid, settings.writedir / 'paga_graph.gexf')

    ax.set_frame_on(frameon)
    ax.set_xticks([])
    ax.set_yticks([])

    # groups sizes
    if groups_key is not None and groups_key + '_sizes' in adata.uns:
        groups_sizes = adata.uns[groups_key + '_sizes']
    else:
        groups_sizes = np.ones(len(node_labels))
    base_scale_scatter = 2000
    base_pie_size = (base_scale_scatter /
                     (np.sqrt(adjacency_solid.shape[0]) + 10) *
                     node_size_scale)
    median_group_size = np.median(groups_sizes)
    groups_sizes = base_pie_size * np.power(groups_sizes / median_group_size,
                                            node_size_power)

    if fontsize is None:
        fontsize = rcParams['legend.fontsize']

    # usual scatter plot
    if not isinstance(colors[0], dict):
        n_groups = len(pos_array)
        sct = ax.scatter(pos_array[:, 0],
                         pos_array[:, 1],
                         c=colors[:n_groups],
                         edgecolors='face',
                         s=groups_sizes,
                         cmap=cmap)
        for count, group in enumerate(node_labels):
            ax.text(pos_array[count, 0],
                    pos_array[count, 1],
                    group,
                    verticalalignment='center',
                    horizontalalignment='center',
                    size=fontsize,
                    fontweight=fontweight,
                    **text_kwds)
    # else pie chart plot
    else:
        # start with this dummy plot... otherwise strange behavior
        sct = ax.scatter(pos_array[:, 0],
                         pos_array[:, 1],
                         c='white',
                         edgecolors='face',
                         s=groups_sizes,
                         cmap=cmap)
        trans = ax.transData.transform
        bbox = ax.get_position().get_points()
        ax_x_min = bbox[0, 0]
        ax_x_max = bbox[1, 0]
        ax_y_min = bbox[0, 1]
        ax_y_max = bbox[1, 1]
        ax_len_x = ax_x_max - ax_x_min
        ax_len_y = ax_y_max - ax_y_min
        trans2 = ax.transAxes.inverted().transform
        pie_axs = []
        for count, n in enumerate(nx_g_solid.nodes()):
            pie_size = groups_sizes[count] / base_scale_scatter
            x1, y1 = trans(pos[n])  # data coordinates
            xa, ya = trans2((x1, y1))  # axis coordinates
            xa = ax_x_min + (xa - pie_size / 2) * ax_len_x
            ya = ax_y_min + (ya - pie_size / 2) * ax_len_y
            # clip, the fruchterman layout sometimes places below figure
            if ya < 0: ya = 0
            if xa < 0: xa = 0
            pie_axs.append(
                pl.axes([xa, ya, pie_size * ax_len_x, pie_size * ax_len_y],
                        frameon=False))
            pie_axs[count].set_xticks([])
            pie_axs[count].set_yticks([])
            if not isinstance(colors[count], dict):
                raise ValueError(
                    '{} is neither a dict of valid matplotlib colors '
                    'nor a valid matplotlib color.'.format(colors[count]))
            color_single = colors[count].keys()
            fracs = [colors[count][c] for c in color_single]
            if sum(fracs) < 1:
                color_single = list(color_single)
                color_single.append('grey')
                fracs.append(1 - sum(fracs))
            pie_axs[count].pie(fracs, colors=color_single)
        if node_labels is not None:
            for ia, a in enumerate(pie_axs):
                a.text(0.5,
                       0.5,
                       node_labels[ia],
                       verticalalignment='center',
                       horizontalalignment='center',
                       transform=a.transAxes,
                       size=fontsize,
                       fontweight=fontweight,
                       **text_kwds)
    return sct
Exemple #49
0
 def predict(self, X):
     predictions = np.asarray([clf.predict(X) for clf in self.classifier_array]).T
     maj = np.apply_along_axis(lambda x: np.argmax(np.bincount(x)), axis=1, arr=predictions)
     maj = self.label_encoder.inverse_transform(maj)
     return maj
Exemple #50
0
import script_chdir
import numpy as np
import results.plots as lplot
import matplotlib.pyplot as plt

from hybrid_model.dataset import get_dataset
from hybrid_model.index_sampler import IndexSamplerUserItembased as IndexSampler

dataset = get_dataset('ml100k')
(inds_u, inds_i, y, users_features, items_features) = dataset.data

user_dist = np.bincount(inds_u, minlength=dataset.n_users)
item_dist = np.bincount(inds_i, minlength=dataset.n_items)

order_users = np.argsort(-user_dist)
order_items = np.argsort(-item_dist)

dist_users = user_dist[order_users]
dist_items = item_dist[order_items]

inds_u = np.argsort(order_users)[inds_u]
inds_i = np.argsort(order_items)[inds_i]

# Index sampling
sampler_config = {'f_cf': 0.15, 'min_ratings_user': 30, 'f_user': 3.0, 'min_ratings_item': 10, 'f_item': 3.0}
sampler = IndexSampler(dist_users, dist_items, sampler_config, [inds_u, inds_i])
from_cf = sampler.get_indices_from_cf()
from_md = sampler.get_indices_from_md()

from_cf = (from_cf[0].flatten(), from_cf[1].flatten())
from_md = (from_md[0].flatten(), from_md[1].flatten())
def fast_hist(a, b, n):
    k = (a >= 0) & (a < n)
    return np.bincount(n * a[k].astype(int) + b[k],
                       minlength=n**2).reshape(n, n)
def createPatches(imgs,
                  gts,
                  batch,
                  crop_size,
                  band_count,
                  test=False,
                  debug=False):
    if not test:
        print " ------------------ Creating Patches -------------------"
    patches = []
    patchesclass = []

    wd = int(floor(crop_size / 2))
    i = 0
    while i < len(batch):

        p = np.asarray(batch[i], dtype=int)
        m = p[0]
        maxRows = imgs[m].shape[1]
        maxCols = imgs[m].shape[2]

        patch = np.zeros((band_count, crop_size, crop_size),
                         dtype=imgs[m].dtype)
        gtpatch = np.zeros((crop_size, crop_size), dtype=gts[m].dtype)

        ulc, urc = p[2], min(p[2] + crop_size, maxCols - 1)
        ulr = urr = p[1]

        dlc, drc = ulc, urc
        dlr = drr = min(p[1] + crop_size, maxRows - 1)

        patch = imgs[m][:, ulr:dlr, ulc:urc]
        gtpatch = gts[m][ulr:dlr, ulc:urc]

        #lMin = max(0, p[1] - wd)
        #lMax = min(p[1] + wd, imgs[m].shape[1] - 1)
        #cMin = max(0, p[2] - wd)
        #cMax = min(p[2] + wd, imgs[m].shape[2] - 1)

        # print(lMin, lMax)
        # print(cMin, cMax)

        # for b in range(band_count):
        #     for l in range(lMin, lMax + 1):
        #         for c in range(cMin, cMax + 1):
        #             patch[b][l - lMin][c - cMin] = imgs[m][b][l][c]
        #             gtpatch[l - lMin][c - cMin] = gts[m][l][c]
        patches.append(np.moveaxis(patch, 0, -1))
        #gtMax = max(1, np.amax(gtpatch))
        #gtpatch /= gtMax
        gtpatch = (gtpatch > 0).astype(int)
        if debug:
            print(m, maxRows, maxCols)
            print(ulr, ulc, dlr, dlc, drr, drc, urr, urc)
            print gtpatch.shape
            print gtpatch
            count = np.bincount(gtpatch.flatten())
            print count
            if count[0] != crop_size * crop_size:
                print "Non-erosion: {0} | Erosion: {1}".format(
                    count[0], count[1])
            else:
                print "Non-erosion: {0}".format(count[0])
        patchesclass.append(gtpatch)

        if not test:
            if i % 10 == 0:
                print "{0} patches out of {1} done.".format(i, len(batch))

        i += 1

    if not test:
        print " -------------- Finished Creating Patches --------------"
    return np.asarray(patches), np.asarray(patchesclass)
        if col != 0:
            inferred_network.append([int(i), int(j), col])
            if dataset != 0 and plot:
                DG.add_weighted_edges_from([(int(j),int(i),col)])

np.savetxt(inferredNetworkFileName, inferred_network, delimiter=",")

# Check which nodes are connected
for k in range(N):
    if np.array(np.where(np.array(inferred_network) == k)).size == 0:
        not_connected.append(k)


if plot:
    senders = np.array(DG.edges)[:,0]
    senders_count = np.bincount(senders,minlength=N)
    receivers = np.array(DG.edges)[:,1]
    receivers_count = np.bincount(receivers,minlength=N)
    count = senders_count - receivers_count

    # Plot Indegree
    plt.figure()
    bins = np.linspace(0, 15, 12)
    plt.hist(senders_count,bins, label='outdegree', color='blue', alpha=0.5)
    plt.hist(receivers_count,bins, label='indegree', color='red', alpha=0.5)
    plt.xlabel('number of connections')
    plt.ylabel('number of occurrences')
    plt.title('Indegree and outdegree of the biological neural network')
    plt.grid()
    plt.legend()
    plt.savefig('plot/degree_histogram_real_network.pdf', dpi=300)
Exemple #54
0
def pick_largest_cc(traversible):
    out = scipy.ndimage.label(traversible)[0]
    cnt = np.bincount(out.reshape(-1))[1:]
    return out == np.argmax(cnt) + 1
Exemple #55
0
def part1(jolts):
    # number of 1-step and 3-step differences multiplied
    bins = np.bincount(np.diff(jolts))
    return bins[1] * bins[3]
- repo: https://github.com/marofmar/60daysofudacity/blob/master/Day20_review_PATE.py
- OMG...? please,,, at leat github should work in here.
'''



import numpy as np 

num_teachers = 10 
num_examples = 10000
num_labels = 10 

preds = (np.random.rand(num_teachers, num_examples) * num_labels).astype(int).transpose(1,0) 
new_labels = list() 
for an_image in preds: 
	label_counts = np.bincount(an_image, minlength = num_labels) 
	epsilon = 0.1 
	beta = 1/ epsilon 

	for i in range(len(label_counts)):
		label_counts[i] += np.random.laplace(0, beta, 1) 

	nes_label = np.argmax(label_counts) 

	new_labels.append(new_label) 

labels = np.array([9,9,3,6,9,9,9,9,8,2]) 
counts = np.bincount(lables, minlength = 10) 
query_result = np.argmax(counts) 
print(query_result) 
Exemple #57
0
def aggregate_vote(preds):
    counts = np.bincount(preds)
    max_vote = np.argmax(counts)
    return max_vote
Exemple #58
0
def correct_error(data_flip, data_e_flip, c_out, c_out_flip):
    s_out = np.zeros(k_ols, dtype=int)
    for i in range(16):
        s_out[i] = c_out[i] ^ c_out_flip[i]

    # ols
    d_ols_c = np.zeros(k_ols, dtype=int)

    # 1st 4 bits
    d1_s = np.array([s_out[0], s_out[4], s_out[8], s_out[12]])
    majority = np.argmax(np.bincount(d1_s))
    d_ols_c[0] = majority ^ data_flip[0]

    d2_s = np.array([s_out[0], s_out[5], s_out[9], s_out[13]])
    majority = np.argmax(np.bincount(d2_s))
    d_ols_c[1] = majority ^ data_flip[1]

    d3_s = np.array([s_out[0], s_out[6], s_out[10], s_out[14]])
    majority = np.argmax(np.bincount(d3_s))
    d_ols_c[2] = majority ^ data_flip[2]

    d4_s = np.array([s_out[0], s_out[7], s_out[11], s_out[15]])
    majority = np.argmax(np.bincount(d4_s))
    d_ols_c[3] = majority ^ data_flip[3]

    # 2nd 4 bits
    d5_s = np.array([s_out[1], s_out[4], s_out[9], s_out[14]])
    majority = np.argmax(np.bincount(d5_s))
    d_ols_c[4] = majority ^ data_flip[4]

    d6_s = np.array([s_out[1], s_out[5], s_out[8], s_out[15]])
    majority = np.argmax(np.bincount(d6_s))
    d_ols_c[5] = majority ^ data_flip[5]

    d7_s = np.array([s_out[1], s_out[6], s_out[11], s_out[12]])
    majority = np.argmax(np.bincount(d7_s))
    d_ols_c[6] = majority ^ data_flip[6]

    d8_s = np.array([s_out[1], s_out[5], s_out[10], s_out[13]])
    majority = np.argmax(np.bincount(d8_s))
    d_ols_c[7] = majority ^ data_flip[7]

    # 3rd 4 bits
    d9_s = np.array([s_out[2], s_out[4], s_out[10], s_out[15]])
    majority = np.argmax(np.bincount(d9_s))
    d_ols_c[8] = majority ^ data_flip[8]

    d10_s = np.array([s_out[2], s_out[5], s_out[11], s_out[14]])
    majority = np.argmax(np.bincount(d10_s))
    d_ols_c[9] = majority ^ data_flip[9]

    d11_s = np.array([s_out[2], s_out[6], s_out[8], s_out[13]])
    majority = np.argmax(np.bincount(d11_s))
    d_ols_c[10] = majority ^ data_flip[10]

    d12_s = np.array([s_out[2], s_out[7], s_out[9], s_out[12]])
    majority = np.argmax(np.bincount(d12_s))
    d_ols_c[11] = majority ^ data_flip[11]

    # last 4 bits
    d13_s = np.array([s_out[3], s_out[4], s_out[11], s_out[13]])
    majority = np.argmax(np.bincount(d13_s))
    d_ols_c[12] = majority ^ data_flip[12]

    d14_s = np.array([s_out[3], s_out[5], s_out[10], s_out[12]])
    majority = np.argmax(np.bincount(d14_s))
    d_ols_c[13] = majority ^ data_flip[13]

    d15_s = np.array([s_out[3], s_out[6], s_out[9], s_out[15]])
    majority = np.argmax(np.bincount(d15_s))
    d_ols_c[14] = majority ^ data_flip[14]

    d16_s = np.array([s_out[3], s_out[7], s_out[14], s_out[15]])
    majority = np.argmax(np.bincount(d16_s))
    d_ols_c[15] = majority ^ data_flip[15]

    ################################################

    # sec-ded
    de_sec_ded_c = np.zeros(k_ols, dtype=int)

    # 1st 4 bits
    de1_s = s_out[0] and s_out[1] and s_out[2] and not s_out[3]
    de_sec_ded_c[0] = de1_s ^ data_e_flip[0]

    de2_s = s_out[0] and s_out[1] and s_out[3] and not s_out[2]
    de_sec_ded_c[1] = de2_s ^ data_e_flip[1]

    de3_s = s_out[0] and s_out[2] and s_out[3] and not s_out[1]
    de_sec_ded_c[2] = de3_s ^ data_e_flip[2]

    de4_s = s_out[1] and s_out[2] and s_out[3] and not s_out[0]
    de_sec_ded_c[3] = de4_s ^ data_e_flip[3]

    # 2nd 4 bits
    de5_s = s_out[4] and s_out[5] and s_out[6] and not s_out[7]
    de_sec_ded_c[4] = de5_s ^ data_e_flip[4]

    de6_s = s_out[4] and s_out[5] and s_out[7] and not s_out[6]
    de_sec_ded_c[5] = de6_s ^ data_e_flip[5]

    de7_s = s_out[4] and s_out[6] and s_out[7] and not s_out[5]
    de_sec_ded_c[6] = de7_s ^ data_e_flip[6]

    de8_s = s_out[5] and s_out[6] and s_out[7] and not s_out[4]
    de_sec_ded_c[7] = de8_s ^ data_e_flip[7]

    # 3rd 4 bits
    de9_s = s_out[8] and s_out[9] and s_out[10] and not s_out[11]
    de_sec_ded_c[8] = de9_s ^ data_e_flip[8]

    de10_s = s_out[8] and s_out[9] and s_out[11] and not s_out[10]
    de_sec_ded_c[9] = de10_s ^ data_e_flip[9]

    de11_s = s_out[8] and s_out[10] and s_out[11] and not s_out[9]
    de_sec_ded_c[10] = de11_s ^ data_e_flip[10]

    de12_s = s_out[9] and s_out[10] and s_out[11] and not s_out[8]
    de_sec_ded_c[11] = de12_s ^ data_e_flip[11]

    # Last 4 bits
    de13_s = s_out[12] and s_out[13] and s_out[14] and not s_out[15]
    de_sec_ded_c[12] = de13_s ^ data_e_flip[12]

    de14_s = s_out[12] and s_out[13] and s_out[15] and not s_out[14]
    de_sec_ded_c[13] = de14_s ^ data_e_flip[13]

    de15_s = s_out[12] and s_out[14] and s_out[15] and not s_out[13]
    de_sec_ded_c[14] = de15_s ^ data_e_flip[14]

    de16_s = s_out[13] and s_out[14] and s_out[15] and not s_out[12]
    de_sec_ded_c[15] = de16_s ^ data_e_flip[15]

    return d_ols_c, de_sec_ded_c
 def mod(self, val):
     bincount = np.bincount(val)
     return np.argmax(bincount)
def onehot(data, min_length):
    return np.bincount(data, minlength=min_length)