Example #1
0
def flag(weights, coord, axesToExt, selection, percent=90, size=[0], cycles=3, outQueue=None):
        """
        Flag data if surreounded by other flagged data
        weights = the weights to convert into flags
        percent = percent of surrounding flagged point to extend the flag
        
        return: flags array and final rms
        """
        def extendFlag(flags, percent):
            #flags = flags.astype(np.int)
            if float(np.sum( flags ))/len(flags) > percent/100.:
                return 1
            else:
                return 0

        import scipy.ndimage
        initialPercent = 100.*(np.size(weights)-np.count_nonzero(weights))/np.size(weights)

        # if size=0 then extend to all 2*axis, this otherwise create issues with mirroring
        for i, s in enumerate(size):
            if s == 0: size[i] = 2*weights.shape[i]

        for cycle in xrange(cycles):
            flag = scipy.ndimage.filters.generic_filter((weights==0), extendFlag, size=size, mode='mirror', cval=0.0, origin=0, extra_keywords={'percent':percent})
            weights[ ( flag == 1 ) ] = 0
            # no new flags
            if cycle != 0 and np.count_nonzero(flag) == oldFlagCount: break
            oldFlagCount = np.count_nonzero(flag)

        logging.debug('Percentage of data flagged (%s): %.3f -> %.3f %%' \
            % (removeKeys(coord, axesToExt), initialPercent, 100.*(np.size(weights)-np.count_nonzero(weights))/np.size(weights)))

        outQueue.put([weights, selection])
def question2(train, valid, test):
    """
    Find Best l1 value for Lasso regression
    :param train: pandas dataframe
    :param valid: pandas dataframe
    :param test: pandas dataframe
    :return:
    """
    best_rss = float('inf')

    # figure out best penalty for Lasso
    for penalty in np.logspace(1, 7, num=13):
        model = linear_model.Lasso(alpha=penalty, normalize=True)
        model.fit(train[ALL_FEATURES], train['price'])
        rss = sum((model.predict(valid[ALL_FEATURES]) - valid['price'])**2)
        if rss < best_rss:
            best_rss, best_penalty = rss, penalty
            best_model = model
    print('best L1 on validation set: ' + str(best_penalty) + '\n')

    # Calculate non-zero coefficient in model
    print('\nNonzero Weights: ' + str(np.count_nonzero(best_model.coef_) +
                                      np.count_nonzero(best_model.intercept_)))

    # calculate RSS on test data
    print('RSS on test data:')
    print('{:f}'.format(
            sum((best_model.predict(test[ALL_FEATURES]) - test['price'])**2)))
    print()
Example #3
0
    def _update_labels(self):
        labelvar = self.cluster_var_model[self.cluster_var_idx]
        labels, _ = self.data.get_column_view(labelvar)
        labels = np.asarray(labels, dtype=float)
        cluster_mask = np.isnan(labels)
        dist_mask = np.isnan(self._matrix).all(axis=0)
        mask = cluster_mask | dist_mask
        labels = labels.astype(int)
        labels = labels[~mask]

        labels_unq, _ = np.unique(labels, return_counts=True)

        if len(labels_unq) < 2:
            self.Error.need_two_clusters()
            labels = silhouette = mask = None
        elif len(labels_unq) == len(labels):
            self.Error.singleton_clusters_all()
            labels = silhouette = mask = None
        else:
            silhouette = sklearn.metrics.silhouette_samples(
                self._matrix[~mask, :][:, ~mask], labels, metric="precomputed")
        self._mask = mask
        self._labels = labels
        self._silhouette = silhouette

        if mask is not None:
            count_missing = np.count_nonzero(cluster_mask)
            if count_missing:
                self.Warning.missing_cluster_assignment(
                    count_missing, s="s" if count_missing > 1 else "")
            count_nandist = np.count_nonzero(dist_mask)
            if count_nandist:
                self.Warning.nan_distances(
                    count_nandist, s="s" if count_nandist > 1 else "")
Example #4
0
    def verify(self, mask, exp):

        maxDiffRatio = 0.02
        expArea = np.count_nonzero(exp)
        nonIntersectArea = np.count_nonzero(mask != exp)
        curRatio = float(nonIntersectArea) / expArea
        return curRatio < maxDiffRatio
Example #5
0
def despike(self, n=3, recursive=False, verbose=False):
    """
    Replace spikes with np.NaN.
    Removing spikes that are >= n * std.
    default n = 3.

    """

    result = self.values.copy()
    outliers = (np.abs(self.values - nanmean(self.values)) >= n *
                nanstd(self.values))

    removed = np.count_nonzero(outliers)
    result[outliers] = np.NaN

    if verbose and not recursive:
        print("Removing from %s\n # removed: %s" % (self.name, removed))

    counter = 0
    if recursive:
        while outliers.any():
            result[outliers] = np.NaN
            outliers = np.abs(result - nanmean(result)) >= n * nanstd(result)
            counter += 1
            removed += np.count_nonzero(outliers)
        if verbose:
            print("Removing from %s\nNumber of iterations: %s # removed: %s" %
                  (self.name, counter, removed))
    return Series(result, index=self.index, name=self.name)
    def measure(self, image, workspace):
        data = image.pixel_data

        data = data.astype(numpy.bool)

        measurements = workspace.measurements

        measurement_name = self.skeleton_name.value

        statistics = []

        name = "Skeleton_Branches_{}".format(measurement_name)

        value = numpy.count_nonzero(branches(data))

        statistics.append(value)

        measurements.add_image_measurement(name, value)

        name = "Skeleton_Endpoints_{}".format(measurement_name)

        value = numpy.count_nonzero(endpoints(data))

        statistics.append(value)

        measurements.add_image_measurement(name, value)

        return [statistics]
Example #7
0
    def __init__(self, image, skin_mask, labeled_image, label_number,
                 rectangle_slices):
        """Creates a new skin region.

            image: The entire image in YCrCb mode.
            skin_mask: The entire image skin mask.
            labeled_image: A matrix of the size of the image with the region
                label in each position. See scipy.ndimage.measurements.label.
            label_number: The label number of this skin region.
            rectangle_slices: The slices to get the rectangle of the image in
                which the region fits as returned by
                scipy.ndimage.measurements.find_objects.
        """
        self.region_skin_pixels = np.count_nonzero(
            labeled_image[rectangle_slices] == label_number
        )

        self.bounding_rectangle_size = \
            (
                rectangle_slices[1].start - rectangle_slices[0].start
            ) * (
                rectangle_slices[1].stop - rectangle_slices[0].stop
            )

        self.bounding_rectangle_skin_pixels = np.count_nonzero(
            skin_mask[rectangle_slices]
        )

        self.bounding_rectangle_avarage_pixel_intensity = np.average(
            image[rectangle_slices].take([0], axis=2)
        )
Example #8
0
def _dump_mo_energy(mol, mo_energy, mo_occ, ehomo, elumo, orbsym, title='',
                    verbose=logger.DEBUG):
    if isinstance(verbose, logger.Logger):
        log = verbose
    else:
        log = logger.Logger(mol.stdout, verbose)
    nirrep = mol.symm_orb.__len__()
    for i, ir in enumerate(mol.irrep_id):
        irname = mol.irrep_name[i]
        ir_idx = (orbsym == ir)
        nso = numpy.count_nonzero(ir_idx)
        nocc = numpy.count_nonzero(mo_occ[ir_idx])
        e_ir = mo_energy[ir_idx]
        if nocc == 0:
            log.debug('%s%s nocc = 0', title, irname)
        elif nocc == nso:
            log.debug('%s%s nocc = %d  H**O = %.15g',
                      title, irname, nocc, e_ir[nocc-1])
        else:
            log.debug('%s%s nocc = %d  H**O = %.15g  LUMO = %.15g',
                      title, irname, nocc, e_ir[nocc-1], e_ir[nocc])
            if e_ir[nocc-1]+1e-3 > elumo:
                log.warn('!! %s%s H**O %.15g > system LUMO %.15g',
                         title, irname, e_ir[nocc-1], elumo)
            if e_ir[nocc] < ehomo+1e-3:
                log.warn('!! %s%s LUMO %.15g < system H**O %.15g',
                         title, irname, e_ir[nocc], ehomo)
        log.debug('   mo_energy = %s', e_ir)
Example #9
0
def print_results(labels, predictions):
    total = len(labels)
    num_correct = total - np.count_nonzero(np.subtract(predictions,labels))
    print "\n***** ACCURACY *****"
    print "Overall Accuracy: %.3f percent\n" % ((float(num_correct)/float(total)) * 100.0)

    results = pd.DataFrame()
    results['real'] = labels
    results['predicted'] = predictions

    for label in np.unique(labels):
        data = results[results['real'] == label]
        num_correct = len(data) - np.count_nonzero(data['real'].sub(data['predicted']))
        acc = ((float(num_correct)/float(len(data))) * 100.0)
        print "Total class label '%s' accuracy: %f percent" % (label, acc)
    print ""

    # Distribution graphs
    utils.print_distribution_graph(labels, 'Actual Distribution of Classes')
    utils.print_distribution_graph(predictions, 'Distribution of Predictions')

    # Distribution graphs for each class label
    for label in np.unique(labels):
        data = results[results['predicted'] == label]['real'].tolist()
        title = "When class label '%s' was predicted, the actual class was:" % label
        utils.print_distribution_graph(data, title)
Example #10
0
	def get_frequece(self, thresh, col=None):
		if col is not None:
			radio = round(np.count_nonzero(self.array[:, int(col)] >= float(thresh)) / float(len(self.array)), 4) * 100
			return "%4.2f%%" % radio
		else:
			radio = round(np.count_nonzero(self.array >= float(thresh)) / float(len(self.array)), 4) * 100
			return "%4.2f%%" % radio
Example #11
0
def cost_logit(X, A, R, lam, n, k):
    '''
    The cost function

    n is the number of examples
    k is the feature dimension
    R is the matrix indicating which entries of A are known.
    '''
    # get the matrices
    # U, V, beta, alpha
    U = X[:n*k]
    U = np.reshape(U, (n,k))
    V = X[n*k:2*n*k]
    V = np.reshape(V, (n,k))
    beta = X[2*n*k:2*n*k+n]
    beta = np.reshape(beta, (n,1))
    alpha = X[-1]
    num_knowns = np.count_nonzero(R)
    num_edges = np.count_nonzero(np.multiply(A, R))
    num_nonedges = num_knowns - num_edges
    h = alpha + np.dot(U, np.transpose(V))
    # add beta to every row, column
    for i in range(h.shape[0]):
        for j in range(h.shape[1]):
            h[i,j] += beta[i]+beta[j]
    sigH = sigmoid(h)
    J = ((-A/(2*num_edges))*np.log(sigH)) - (((1-A)/(2*num_nonedges))*np.log(1-sigH))
    J = J*R
    # regularizer
    for i in range(J.shape[0]):
        for j in range(J.shape[1]):
            J[i,j] += lam*( np.abs(beta[i])**2 + np.abs(beta[j])**2 + np.linalg.norm(U[i,:])**2 + np.linalg.norm(V[j,:])**2 )
    # sum over known values
    cost = sum(sum(J))
    return cost
Example #12
0
def kappa_score(y_true, y_pred):
  """Calculate Cohen's kappa for classification tasks.

  See https://en.wikipedia.org/wiki/Cohen%27s_kappa

  Note that this implementation of Cohen's kappa expects binary labels.

  Args:
    y_true: Numpy array containing true values.
    y_pred: Numpy array containing predicted values.

  Returns:
    kappa: Numpy array containing kappa for each classification task.

  Raises:
    AssertionError: If y_true and y_pred are not the same size, or if class
      labels are not in [0, 1].
  """
  assert len(y_true) == len(y_pred), 'Number of examples does not match.'
  yt = np.asarray(y_true, dtype=int)
  yp = np.asarray(y_pred, dtype=int)
  assert np.array_equal(np.unique(yt), [0, 1]), (
      'Class labels must be binary: %s' % np.unique(yt))
  observed_agreement = np.true_divide(np.count_nonzero(np.equal(yt, yp)),
                                      len(yt))
  expected_agreement = np.true_divide(
      np.count_nonzero(yt == 1) * np.count_nonzero(yp == 1) +
      np.count_nonzero(yt == 0) * np.count_nonzero(yp == 0),
      len(yt) ** 2)
  kappa = np.true_divide(observed_agreement - expected_agreement,
                         1.0 - expected_agreement)
  return kappa
def analyze_param(net, layers):
#   plt.figure()
    print '\n=============analyze_param start==============='
    total_nonzero = 0
    total_allparam = 0
    percentage_list = []
    for i, layer in enumerate(layers):
        i += 1
        W = net.params[layer][0].data
        b = net.params[layer][1].data
#       plt.subplot(3, 1, i);
#       numBins = 2 ^ 8
#       plt.hist(W.flatten(), numBins, color='blue', alpha=0.8)
#       plt.show()
        print 'W(%d) range = [%f, %f]' % (i, min(W.flatten()), max(W.flatten()))
        print 'W(%d) mean = %f, std = %f' % (i, np.mean(W.flatten()), np.std(W.flatten()))
        non_zero = (np.count_nonzero(W.flatten()) + np.count_nonzero(b.flatten()))
        all_param = (np.prod(W.shape) + np.prod(b.shape))
        this_layer_percentage = non_zero / float(all_param)
        total_nonzero += non_zero
        total_allparam += all_param
        print 'non-zero W and b cnt = %d' % non_zero
        print 'total W and b cnt = %d' % all_param
        print 'percentage = %f\n' % (this_layer_percentage)
        percentage_list.append(this_layer_percentage)
    print '=====> summary:'
    print 'non-zero W and b cnt = %d' % total_nonzero
    print 'total W and b cnt = %d' % total_allparam
    print 'percentage = %f' % (total_nonzero / float(total_allparam))
    print '=============analyze_param ends ==============='
    return (total_nonzero / float(total_allparam), percentage_list)
Example #14
0
def test_cross_div(dtypea, dtypeb, dtypec):
    if dtypea == np.int8 and dtypeb == np.int8:
        pytest.skip("Different behaviour in c++ and python for int8 / int8".format(dtypea, dtypeb))

    def fkt(a, b, c):
        c[:] = a / b

    hfkt = hope.jit(fkt)
    (ao, ah), (bo, bh), (co, ch) = random(dtypea, [10]), random(dtypeb, [10]), random(dtypec, [10])
    ao, ah, bo, bh = ao.astype(np.float64), ah.astype(np.float64), bo.astype(np.float64), bh.astype(np.float64)
    ao, ah = (
        np.copysign(np.power(np.abs(ao), 1.0 / 4.0), ao).astype(dtypea),
        np.copysign(np.power(np.abs(ah), 1.0 / 4.0), ah).astype(dtypea),
    )
    bo, bh = (
        np.copysign(np.power(np.abs(bo), 1.0 / 4.0), bo).astype(dtypeb),
        np.copysign(np.power(np.abs(bh), 1.0 / 4.0), bh).astype(dtypeb),
    )
    if np.count_nonzero(bo == 0) > 0:
        bo[bo == 0] += 1
    if np.count_nonzero(bh == 0) > 0:
        bh[bh == 0] += 1
    fkt(ao, bo, co), hfkt(ah, bh, ch)
    assert check(co, ch)
    fkt(ao, bo, co), hfkt(ah, bh, ch)
    assert check(co, ch)
Example #15
0
def norm_mean_cent(movies_np):
    mean_movie= []
    
    count_movie = []

    for row in movies_np:
        row_sum = np.sum(row)
        count = np.count_nonzero(row)
        count_movie.append(count)
        mean_movie.append(row_sum/count)
    
    count_user = []
    mean_user = []

    for row in movies_np.T:
        row_sum = np.sum(row)
        count = np.count_nonzero(row)
        count_user.append(count)
        mean_user.append(row_sum/count)

    movies_np[movies_np==0] = np.nan

    mean_cent = []
    i = 0
    for row in  movies_np:
        mean_cent.append(row - mean_movie[i]) 
        i += 1
    
    mean_cent = np.array(mean_cent)
    mean_cent = np.nan_to_num(mean_cent)
    
    return mean_cent
Example #16
0
def simula(N, n, PM, beta, pmig, grupos, listafitness, listafitness_m, mpvencer, x):

    s = int(time.time() + random.randint(0, 2**32-1) + x) % (2**32-1)
    random.seed(s)

    s = int(time.time() + random.randint(0, 2**32-1) + x) % (2**32-1)
    np.random.seed(s)
    
    IT = 50002
    #IT = 5002
    precisao = 0.01

    AL = [] 
    AL.append(np.count_nonzero(grupos)/(N*n))
    crit = 0. if AL[0] > (1.-precisao) else 1.

    # Para cada periodo, os grupos entram em conflito e se reproduzem, e
    # os individuos sofrem mutacao e migram entre os grupos
    for it in xrange(1,IT):
        if abs(AL[it-1]-crit)<precisao:
            print "Acabou na geracao ", it -1
            break
        # 
        knums = [np.count_nonzero(line) for line in grupos]
        glabels = conflito(N,knums,beta,listafitness_m, mpvencer) if N>1 \
                    else knums
        grupos = reproducao_ind(N,n,listafitness,listafitness_m,glabels)
        grupos = mutacao(N,n,PM,grupos)
        grupos = migracao(N,n,grupos,pmig)
        freqA = float(np.count_nonzero(grupos))/(N*n)
        AL.append(freqA)

        logger.debug("%d \t----------->\t %f" %(it,freqA))

    return it-1
 def relearn(self, test_size=0):
     samples, weights, targets = self.learning_component.get_training_set(const_weight=True)
     train_samples, test_samples, train_targets, test_targets = train_test_split(samples, targets, test_size=test_size, random_state=np.random.RandomState(0))
     count_positives = 1.0*np.count_nonzero(train_targets)
     count_negatives = 1.0*(len(train_targets) - count_positives)
     positive_weight = count_negatives/len(train_targets)
     negative_weight = count_positives/len(train_targets)
     weights = np.array([positive_weight if target == 1 else negative_weight for target in train_targets])
     self.classifier.fit(train_samples, train_targets, sample_weight=weights)
     self.learning_component.new_samples_count = 0
     if len(test_samples) > 0:
         test_result = [self.classifier.predict(sample) for sample in test_samples]
         true_positives = 0.0
         count_test_positives = 1.0*np.count_nonzero(test_targets)
         count_result_positives = 1.0*np.count_nonzero(test_result)
         for i in xrange(len(test_targets)):
             if test_targets[i] == test_result[i] and test_result[i] == 1:
                 true_positives += 1
         precision = true_positives / count_test_positives
         recall = true_positives / count_result_positives
         print "Precision:", precision
         print "Recall", recall
         if precision + recall != 0:
             print "F-score:", 2 * precision * recall / (precision + recall)
         else:
             print "F-score:", 0
     self.positive_class_index = 0
     for elem in self.classifier.classes_:
         if elem != 1.0:
             self.positive_class_index += 1
         else:
             break
    def count_element_values(self):
        """Shows the total count of detected elements after the segmentation"""
        from numpy import count_nonzero
        from app.imgprocessing.slice_mask import apply_mask

        collection_mask = self.collection.copy()
        collection_mask = apply_mask(collection_mask)

        empty = count_nonzero(collection_mask == 0)
        mastic = count_nonzero(collection_mask == 1)
        aggregate = count_nonzero(collection_mask == 2)

        total = (empty + mastic + aggregate)

        QtWidgets.QMessageBox.about(self,
                                "Element counting",
                                """
                    <br>
                    <table>
                    <tr><th>The sample has = %s pixels:</th><\tr>
                    <tr>
                    <td>Empty pixels = %s</td> <td>%3.2f%%</td>
                    </tr>
                    <tr>
                    <td>Mastic pixels = %s</td> <td>%3.2f%%</td>
                    </tr>
                    <tr>
                    <td>Aggregate pixels = %s</td> <td>%3.2f%%</td>
                    </tr>
                    </table>
                    """
                                % (total, empty, ((empty * 100.) / total), mastic,
                                   ((mastic * 100.) / total), aggregate, \
                                   ((aggregate * 100.) / total)))
Example #19
0
    def update_selected_info_label(self):
        pl = lambda c: "" if c == 1 else "s"
        if self.data is not None and self.scores is not None:
            scores = self.scores
            low, high = self.min_value, self.max_value
            _, side, _, _ = self.Scores[self.score_index]
            test = self.test_f[side]
            count_undef = np.count_nonzero(np.isnan(scores))
            count_scores = len(scores)
            scores = scores[np.isfinite(scores)]

            nselected = np.count_nonzero(test(scores, low, high))
            defined_txt = ("{} of {} score{} undefined."
                           .format(count_undef, count_scores, pl(count_scores)))

        elif self.data is not None:
            nselected = 0
            defined_txt = "No defined scores"
        else:
            nselected = 0
            defined_txt = ""

        self.selectedInfoLabel.setText(
            defined_txt + "\n" +
            "{} selected gene{}".format(nselected, pl(nselected))
        )
Example #20
0
def constrain_UHF(molecule, this):

    occupancy = numpy.add(this.Alpha.Occupancy, this.Beta.Occupancy)
    N = molecule.NElectrons
    Nab = this.NAlpha * this.NBeta
    Na = numpy.count_nonzero(occupancy == 1)                    # Dimension of active space
    Nc = numpy.count_nonzero(occupancy == 2)                    # Dimension of core space
    S = molecule.S

    half_density_matrix = S.dot(this.Total.Density/2).dot(S)
    NO_vals, NO_vects = numpy.linalg.eigh(half_density_matrix)  # See J. Chem. Phys. 1988, 88(8), 4926
    NO_coeffs = numpy.linalg.inv(S).dot(NO_vects)               # for details on finding the NO coefficents
    back_trans = numpy.linalg.inv(NO_coeffs)

    # Calculate the expectation value of the spin operator
    this.S2 = N*(N+4)/4. - Nab - 2 * sum([x ** 2 for x in NO_vals])   # Using formula from J. Chem. Phys. 88, 4926

    # Sort in order of descending occupancy
    idx = NO_vals.argsort()[::-1]                    # Note the [::-1] reverses the index array
    core_space = idx[:Nc]                            # Indices of the core NOs
    valence_space = idx[(Nc + Na):]                  # Indices of the valence NOs

    delta = (this.Alpha.Fock - this.Beta.Fock) / 2
    delta = NO_coeffs.T.dot(delta).dot(NO_coeffs)    # Transforming delta into the NO basis
    lambda_matrix = numpy.zeros(numpy.shape(delta))
    for i in core_space:
        for j in valence_space:
            lambda_matrix[i,j] = -delta[i,j]
            lambda_matrix[j,i] = -delta[j,i]
    lambda_matrix = back_trans.T.dot(lambda_matrix).dot(back_trans)  # Transforming lambda back to the AO basis

    this.Alpha.Fock = this.Alpha.Fock + lambda_matrix
    this.Beta.Fock = this.Beta.Fock - lambda_matrix
Example #21
0
def question_1():
    # Adjacency matrix.
    A = numpy.matrix([
        [0, 0, 1, 0, 0, 1, 0, 0],
        [0, 0, 0, 0, 1, 0, 0, 1],
        [1, 0, 0, 1, 0, 1, 0, 0],
        [0, 0, 1, 0, 1, 0, 1, 0],
        [0, 1, 0, 1, 0, 0, 0, 1],
        [1, 0, 1, 0, 0, 0, 1, 0],
        [0, 0, 0, 1, 0, 1, 0, 1],
        [0, 1, 0, 0, 1, 0, 1, 0]
    ])
    rn, cn = A.shape

    # Degree matrix.
    D = numpy.asmatrix(numpy.zeros((rn, cn), int))
    numpy.fill_diagonal(D, sum(A))

    # Laplacian matrix.
    L = D - A

    sum_a = A.sum()
    sum_d = D.sum()
    sum_l = L.sum()
    nonzero_a = numpy.count_nonzero(A)
    nonzero_d = numpy.count_nonzero(D)
    nonzero_l = numpy.count_nonzero(L)

    print('A: sum={} #nonzero={}'.format(sum_a, nonzero_a))
    print('D: sum={} #nonzero={}'.format(sum_d, nonzero_d))
    print('L: sum={} #nonzero={}'.format(sum_l, nonzero_l))
Example #22
0
    def __recall(self, y_test, Y_vote):
        """ recall extended to multi-class classification """
        # predicted classes
        y_hat = np.argmax(Y_vote, axis=1)

        if True or self.mode == "one-vs-one":
            # need confusion matrix
            conf = self.__confusion(y_test, Y_vote)

            # consider each class separately
            recall = np.zeros(self.numClasses)
            for c in xrange(self.numClasses):
                # true positives: label is c, classifier predicted c
                tp = conf[c,c]

                # false negatives: label is not c, classifier predicted c
                fn = np.sum(conf[c,:]) - conf[c,c]
                if tp>0 and fn>0:
                    recall[c] = tp*1./(tp+fn)
        elif self.mode == "one-vs-rest":
            # consider each class separately
            recall = np.zeros(self.numClasses)
            for c in xrange(self.numClasses):
                # true positives: label is c, classifier predicted c
                tp = np.count_nonzero((y_test==c) * (y_hat==c))

                # false negatives: label is not c, classifier predicted c
                fn = np.count_nonzero((y_test!=c) * (y_hat==c))

                recall[c] = tp*1./(tp+fn)
        return recall
Example #23
0
def test_that_build_pyramid_relaxes_mask():
    from _stbt.match import _build_pyramid

    mask = numpy.ones((20, 20, 3), dtype=numpy.uint8) * 255
    mask[3:9, 3:9] = 0  # first 0 is an even row/col, last 0 is an odd row/col
    n = mask.size - numpy.count_nonzero(mask)
    assert n == 6 * 6 * 3
    cv2.imwrite("/tmp/dave1.png", mask)

    mask_pyramid = _build_pyramid(mask, 2, is_mask=True)
    assert numpy.all(mask_pyramid[0] == mask)

    downsampled = mask_pyramid[1]
    cv2.imwrite("/tmp/dave2.png", downsampled)
    assert downsampled.shape == (10, 10, 3)
    print downsampled[:, :, 0]  # pylint:disable=unsubscriptable-object
    n = downsampled.size - numpy.count_nonzero(downsampled)
    assert 3 * 3 * 3 <= n <= 6 * 6 * 3
    expected = [
        # pylint:disable=bad-whitespace
        [255, 255, 255, 255, 255, 255, 255, 255, 255, 255],
        [255,   0,   0,   0,   0,   0, 255, 255, 255, 255],
        [255,   0,   0,   0,   0,   0, 255, 255, 255, 255],
        [255,   0,   0,   0,   0,   0, 255, 255, 255, 255],
        [255,   0,   0,   0,   0,   0, 255, 255, 255, 255],
        [255,   0,   0,   0,   0,   0, 255, 255, 255, 255],
        [255, 255, 255, 255, 255, 255, 255, 255, 255, 255],
        [255, 255, 255, 255, 255, 255, 255, 255, 255, 255],
        [255, 255, 255, 255, 255, 255, 255, 255, 255, 255],
        [255, 255, 255, 255, 255, 255, 255, 255, 255, 255]]
    assert numpy.all(downsampled[:, :, 0] == expected)  # pylint:disable=unsubscriptable-object
 def testFeatureGenWithOnePoint(self):
   # ensure that the start and end datetimes are the same, since the average calculation uses
   # the total distance and the total duration
   ts = esta.TimeSeries.get_time_series(self.testUUID)
   trackpoint1 = ecwlo.Location({u'coordinates': [0,0], 'type': 'Point'})
   ts.insert_data(self.testUUID, "analysis/recreated_location", trackpoint1)
   testSeg = ecws.Section({"start_loc": trackpoint1,
               "end_loc": trackpoint1,
               "distance": 500,
               "sensed_mode": 1,
               "duration": 150,
               "start_ts": arrow.now().timestamp,
               "end_ts": arrow.now().timestamp,
               "_id": 2,
               "speeds":[],
               "distances":[],
               })
   testSegEntry = ecwe.Entry.create_entry(self.testUUID, "analysis/cleaned_section", testSeg)
   d = testSegEntry.data
   m = testSegEntry.metadata
   enufc.expand_start_end_data_times(d, m)
   testSegEntry["data"] = d
   testSegEntry["metadata"] = m
   inserted_id = ts.insert(testSegEntry)
   featureMatrix = np.zeros([1, len(self.pipeline.featureLabels)])
   resultVector = np.zeros(1)
   self.pipeline.updateFeatureMatrixRowWithSection(featureMatrix, 0, testSegEntry) 
   logging.debug("featureMatrix = %s" % featureMatrix)
   self.assertEqual(np.count_nonzero(featureMatrix[0][5:16]), 0)
   self.assertEqual(np.count_nonzero(featureMatrix[0][19:21]), 0)
Example #25
0
    def corners(self, bandNames=None):
        "Return the corners of the tilted rectangle of valid image data as (x, y) pixel coordinates."
        alpha = self.mask(bandNames)
        alphaT = numpy.transpose(alpha)
        ysize, xsize = alpha.shape

        output = []
        for i in xrange(ysize):
            if numpy.count_nonzero(alpha[i]) > 0:
                break
        output.append((numpy.argwhere(alpha[i]).mean(), i))

        for i in xrange(xsize):
            if numpy.count_nonzero(alphaT[i]) > 0:
                break
        output.append((i, numpy.argwhere(alphaT[i]).mean()))

        for i in xrange(ysize - 1, 0, -1):
            if numpy.count_nonzero(alpha[i]) > 0:
                break
        output.append((numpy.argwhere(alpha[i]).mean(), i))

        for i in xrange(xsize - 1, 0, -1):
            if numpy.count_nonzero(alphaT[i]) > 0:
                break
        output.append((i, numpy.argwhere(alphaT[i]).mean()))

        return output
Example #26
0
def go(sltree, score, X_train, Y_train, X_test, Y_test):
    t_train_begin = time()
    sltree.train(X_train, Y_train)
    t_train_end = time()
    t_test_begin = time()
    Y_predict_train, AP_train, complexity_train, depths_train = sltree.test(X_train, Y_train, return_complexity=True, return_depth=True)
    Y_predict_test, AP_test, complexity_test, depths_test = sltree.test(X_test, Y_test, return_complexity=True, return_depth=True)
    t_test_end = time()
    n_acc_train = np.count_nonzero(Y_predict_train == Y_train)
    n_acc_test = np.count_nonzero(Y_predict_test == Y_test)

    score.update({'acc_train':float(n_acc_train)/Y_predict_train.shape[0],
                  'n_acc_train':n_acc_train,
                  'AP_train':AP_train,
                  'mAP_train':np.mean(AP_train),
                  'complexity_train':complexity_train,
                  'avg_complexity_train':np.mean(complexity_train),
                  'depths_train':depths_train,
                  'avg_depth_train':np.mean(depths_train),
                  'acc_test':float(n_acc_test)/Y_predict_test.shape[0],
                  'n_acc_test':n_acc_test,
                  'AP_test':AP_test,
                  'mAP_test':np.mean(AP_test),
                  'complexity_test':complexity_test,
                  'avg_complexity_test':np.mean(complexity_test),
                  'depths_test':depths_test,
                  'avg_depth_test':np.mean(depths_test),
                  'time_test':t_test_end-t_test_begin})
  def __init__(self,
               op_type,
               op_name,
               output_index,
               num_outputs,
               value):
    """Constructor of InfOrNanError.

    Args:
      op_type: Type name of the op that generated the tensor that generated the
        `inf`(s) or `nan`(s) (e.g., `Div`).
      op_name: Name of the op that generated the tensor with `inf`(s) or
        `nan`(s). This name is set by client and can be `None` if it is unset.
      output_index: The 0-based output index of the tensor that contains
        `inf`(s) or `nan`(s).
      num_outputs: Total number of outputs of the operation.
      value: The tensor value that contains `inf`(s) or `nan`(s).
    """
    self._op_type = op_type
    self._op_name = op_name
    self._output_index = output_index
    self._num_outputs = num_outputs
    self._value = value

    self._total_count = np.size(value)
    self._inf_count = np.count_nonzero(np.isinf(value))
    self._nan_count = np.count_nonzero(np.isnan(value))

    super(InfOrNanError, self).__init__(self._get_error_message())
Example #28
0
    def __precision(self, y_test, Y_vote):
        """ precision extended to multi-class classification """
        # predicted classes
        y_hat = np.argmax(Y_vote, axis=1)

        if True or self.mode == "one-vs-one":
            # need confusion matrix
            conf = self.__confusion(y_test, Y_vote)

            # consider each class separately
            prec = np.zeros(self.numClasses)
            for c in xrange(self.numClasses):
                # true positives: label is c, classifier predicted c
                tp = conf[c,c]

                # false positives: label is c, classifier predicted not c
                fp = np.sum(conf[:,c]) - conf[c,c]

                # precision
                prec[c] = tp*1./(tp+fp)
        elif self.mode == "one-vs-rest":
            # consider each class separately
            prec = np.zeros(self.numClasses)
            for c in xrange(self.numClasses):
                # true positives: label is c, classifier predicted c
                tp = np.count_nonzero((y_test==c) * (y_hat==c))

                # false positives: label is c, classifier predicted not c
                fp = np.count_nonzero((y_test==c) * (y_hat!=c))

                prec[c] = tp*1./(tp+fp)
        return prec
Example #29
0
 def get_stats(self):
     # number of trades
     num_of_trades = self.record.shape[0] / 2
     # number of profit_lock_out
     num_of_profitlock = np.count_nonzero(np.where(self.record[:,2] == "profit_lock_out"))
     # number of stop_out
     num_of_stopout = np.count_nonzero(np.where(self.record[:,2] == "trailing_stop_out" ))
     num_of_stopout += np.count_nonzero(np.where(self.record[:,2] == "hard_stop_out" ))
     # number of reversed_out
     num_of_reversed_out = np.count_nonzero(np.where(self.record[:,2] == "reversed_out"))
     # number of time_out
     num_of_time_out = np.count_nonzero(np.where(self.record[:,2] == "time_out"))
     # PNL
     i = 1
     for i in range(1, num_of_trades * 2, 2):
         if self.record[i, 3] == "long":
             self.pnl = np.append(self.pnl,float(self.record[i,4])-float(self.record[i-1,4]))
         elif self.record[i, 3] == "short":
             self.pnl = np.append(self.pnl,float(self.record[i-1,4])-float(self.record[i,4]))
     lst.pnl = lst.pnl[1:]
     # output statistical results
     print "# trades", num_of_trades, "# profit_lock", num_of_profitlock,\
           "# stopout", num_of_stopout, "# reversed_out",\
           num_of_reversed_out, "# time_out", num_of_time_out
     print "P&L Summary Stats:", lst.pnl.__len__(), lst.pnl.mean()/tickBase, lst.pnl.std()/tickBase, lst.pnl.min()/tickBase, lst.pnl.max()/tickBase
Example #30
0
def test_estimator():
    omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)
    omp.fit(X, y[:, 0])
    assert_equal(omp.coef_.shape, (n_features,))
    assert_equal(omp.intercept_.shape, ())
    assert np.count_nonzero(omp.coef_) <= n_nonzero_coefs

    omp.fit(X, y)
    assert_equal(omp.coef_.shape, (n_targets, n_features))
    assert_equal(omp.intercept_.shape, (n_targets,))
    assert np.count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs

    coef_normalized = omp.coef_[0].copy()
    omp.set_params(fit_intercept=True, normalize=False)
    omp.fit(X, y[:, 0])
    assert_array_almost_equal(coef_normalized, omp.coef_)

    omp.set_params(fit_intercept=False, normalize=False)
    omp.fit(X, y[:, 0])
    assert np.count_nonzero(omp.coef_) <= n_nonzero_coefs
    assert_equal(omp.coef_.shape, (n_features,))
    assert_equal(omp.intercept_, 0)

    omp.fit(X, y)
    assert_equal(omp.coef_.shape, (n_targets, n_features))
    assert_equal(omp.intercept_, 0)
    assert np.count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs
 
#-- read a timestep of 'ta'  
variable =  f.variables['ta']                       #-- first time step, lev, ncells
data     =  variable[0,0,:]                         #-- ta [time,lev,ncells]; miss _FillValue
var      =  data - 273.15                           #-- convert to degrees Celsius; miss _FillValue

#-- define _FillValue and missing_value if not existing
missing = -1e20

if not hasattr(var,'_FillValue'):
   var._FillValue  =  missing                       #-- set _FillValue
if not hasattr(var,'missing_value'): 
   var.missing_value =  missing                     #-- set missing_value

varM = np.ma.array(var, mask=np.equal(var,missing)) #-- mask array with missing values 
nummissing = np.count_nonzero(varM.mask)            #-- number of missing values

#-- set data intervals, levels, labels, color indices
varMin, varMax, varInt = -32, 28, 4                 #-- set data minimum, maximum, interval
 
levels   =  list(range(varMin,varMax,varInt))             #-- set levels array
nlevs    =  len(levels)                             #-- number of levels
labels   =  ['{:.2f}'.format(x) for x in levels]    #-- convert list of floats to list of strings

#-- print info to stdout
print('')
print('min/max:          {:0.2f} / {:0.2f}'.format(np.min(varM), np.max(varM)))
print('')
print('varMin:           {:3d}'.format(varMin))
print('varMax:           {:3d}'.format(varMax))
print('varInt:           {:3d}'.format(varInt))
Example #32
0
def train_model(X_train,
                y_train,
                seed,
                ccru_version,
                base_classifier,
                X_val,
                y_val,
                feature_subsets_per_cc=[]):
    pid = os.getpid()
    print('The id of ' + str(seed) + ' is :' + str(pid))
    # print('Train ecc: '+str(seed)+' started')

    if ccru_version == 'standard':
        model = ClassifierChain(base_classifier,
                                order='random',
                                random_state=seed)
    elif ccru_version == 'eccru' or ccru_version == 'eccru2' or ccru_version == 'eccru3':
        model = CCRU(base_classifier, order='random', random_state=seed)
    elif ccru_version == 'binary_relevance':
        model = SVC(gamma='auto', kernel='linear')
    else:
        print('Cannot recoginize ccru version!!!!')

    class_1 = 1
    class_2 = 0
    if -1 in y_train:
        class_2 = -1

    if ccru_version == 'binary_relevance':

        class_1_counter = np.count_nonzero(y_train[:, 0] == class_1)
        class_2_counter = np.count_nonzero(y_train[:, 0] == class_2)
        # class_1_counter = y_train.flatten().tolist()[0].count(class_1)
        # class_2_counter = y_train.flatten().tolist()[0].count(class_2)

        if class_1_counter <= class_2_counter:
            minority_class = class_1
            majority_class = class_2
            minority_counter = class_1_counter
        else:
            minority_class = class_2
            majority_class = class_1
            minority_counter = class_2_counter

        sampled_index = [
            index for index, label in enumerate(y_train)
            if label == minority_class
        ]
        sampled_y = [minority_class] * minority_counter

        temp_sampled_index = [
            index for index, label in enumerate(y_train)
            if label == majority_class
        ]

        sampled_index.extend(
            random.sample(temp_sampled_index, minority_counter))
        sampled_y.extend([majority_class] * minority_counter)
        print('Train binary_relevance: ' + str(seed) + ' started')

        print('training on ' + str(len(sampled_y)))
        if len(feature_subsets_per_cc) != 0:
            trained_model = model.fit(
                X_train[np.array(sampled_index), feature_subsets_per_cc[seed]],
                y_train, X_val, y_val)
        else:
            trained_model = model.fit(X_train[np.array(sampled_index), :],
                                      sampled_y)
    else:
        print('Train ecc: ' + str(seed) + ' started ')
        if len(feature_subsets_per_cc) != 0:
            trained_model = model.fit(X_train[:, feature_subsets_per_cc[seed]],
                                      y_train, X_val, y_val)
        else:
            trained_model = model.fit(X_train, y_train, X_val, y_val)
    print('Train model: ' + str(seed) + ' ended')
    return trained_model
                plt.imshow(test_dataset[prefix][i, :, :, 0])
            else:
                plt.imshow(test_dataset[prefix][i, :, :, :])
            plt.title('out[' + prefix + ']')

    '''Convolutional neural network training
    
    Note: you need to use my branch of keras with the new functionality, that allows element-wise weights of the loss
    function
    '''

    # list all CPUs and GPUs
    device_list = K.get_session().list_devices()

    # number of GPUs
    gpu_number = np.count_nonzero(['GPU' in str(x) for x in device_list])

    # load dmap model that we are going to use as the basis for the contour model
    dmap_model_filename = os.path.join(saved_models_dir, dmap_model_basename + '_model_fold_' + str(i_fold) + '.h5')
    dmap_model = keras.models.load_model(dmap_model_filename)

    # instantiate contour model
    with tf.device('/cpu:0'):
        contour_model = fcn_sherrah2016_classifier(input_shape=train_dataset['im'].shape[1:])

    for lay in [1, 4, 7]:
        # transfer weights from dmap to contour model in the first 3 convolutional layers
        dmap_layer = dmap_model.get_layer(index=lay)
        contour_layer = contour_model.get_layer(index=lay)
        contour_layer.set_weights(dmap_layer.get_weights())
Example #34
0
    def opponent_policy(curr_state, prev_state, prev_action):
        opponent_policy.second_move = False
        # check if a new games is started.
        if np.count_nonzero(curr_state[2, :, :]) == board_size**2 - 1:
            opponent_policy.second_move = True

        # coords is the coordinate of the previous action.
        coords = GomokuEnv.action_to_coordinate(
            board_size, prev_action) if prev_action is not None else None

        if prev_state is None:
            '''
                First move should be the center of the board.
            '''
            move = (board_size // 2, board_size // 2)
        elif opponent_policy.second_move:
            '''
                If the AI must go second, it shouldn't think,
                it should just go diagonal adjacent to the first
                placed tile; diagonal into the larger area of the
                board if one exists
            '''
            if coords[1] <= board_size // 2:
                dy = 1
            else:
                dy = -1

            if coords[0] <= board_size // 2:
                dx = 1
            else:
                dx = -1
            move = (coords[0] + dx, coords[1] + dy)
            opponent_policy.second_move = False
        else:
            free_x, free_y = np.where(curr_state[2, :, :] == 1)
            possible_moves = [(x, y) for x, y in zip(free_x, free_y)]
            if len(possible_moves) == 0:
                # no more moves
                return None
            '''
                Strategy for the naive agent:
                1. Search if there is a win opportunity.
                2. Search if opponent is winning, if yes, then block
                3. Search if opponent has a open stream that equals 2 less than win_len, if yes, then block
                3. Try to extend the longest existing trend.
            '''
            if curr_state[0, coords[0], coords[1]] != 0:
                color = 1
            else:
                color = 0

            # 1: opponent position, 2: empty, 3: my position
            my_board = np.add(
                np.subtract(curr_state[color, :, :],
                            curr_state[1 - color, :, :]), 2)
            # print(my_board)
            # check if we have a winning move
            move = search_winning_move(my_board, '3')
            if move is None:
                # check if opponent has a winning move
                move = search_winning_move(my_board, '1')
            if move is None:
                # check if we have open win_len - 2
                move = search_move(my_board, '2' + ('3' * (win_len - 2)) + '2',
                                   win_len)
            if move is None:
                # check if opponent has open win_len - 2
                move = search_move(my_board, '2' + ('1' * (win_len - 2)) + '2',
                                   win_len)

            if move is None:
                for i in range(2, level + 2):
                    if win_len - i < 1:
                        break
                    # search for connected win_len - i stones
                    move = search_move(my_board,
                                       '23{' + str(win_len - i) + '}',
                                       win_len - i + 1)
                    if move is None:
                        move = search_move(my_board,
                                           '3{' + str(win_len - i) + '}2',
                                           win_len - i + 1, False)
                    if move is not None:
                        break

            if move is None:
                print(np.random.choice(possible_moves))
                move = np.random.choice(possible_moves)

        return GomokuEnv.coordinate_to_action(board_size, move)
def modify(frame, data):
    # TODO: make parameters tunable elsewhere
    dim = 3  # system dimensionality
    slice_thickness = 50.  # thickness of volume slab in sweep direction
    positional_step = 50.  # shift of volume slab per evaluation
    slice_normal = np.array([0., 0., 1.])  # spatial direction of sweep

    # ignore off-diagonal stresses for now
    peratom_stress = data.particles["c_peratom_stress"][:, 0:3]
    position = data.particles["Position"]

    # process selection only, otherwise whole system
    if "Selection" in data.particles:
        global_selection = data.particles["Selection"]
    else:
        global_selection = np.ones(data.particles.count)

    global_peratom_stress = peratom_stress[np.nonzero(global_selection)]
    global_position = position[np.nonzero(global_selection)]

    global_natoms = np.count_nonzero(global_selection)

    global_max_pos = np.max(position[np.nonzero(global_selection)], axis=0)
    global_min_pos = np.min(position[np.nonzero(global_selection)], axis=0)

    global_measure = global_max_pos - global_min_pos

    # vector spanning the slice surface:
    slice_surface_diagonal = global_measure * (np.ones(dim) - slice_normal)

    # at given slab thcikness and step, that many slices fit into selected vol:
    slice_count = int ( np.floor(
        np.dot(global_measure + slice_normal * slice_thickness, slice_normal) \
            / positional_step ) ) + 1

    # per-atom properties for intermmeditae results
    slice_overlap_count = data.particles_.create_property(
        'Slice Overlap Count', dtype=int, components=1)
    slice_volume_sum = data.particles_.create_property('Slice Volume Sum',
                                                       dtype=float,
                                                       components=1)
    local_cumulative_stress_tensor_diagonal = data.particles_.create_property(
        'Local Cumulative Stress', dtype=float, components=3)

    with slice_overlap_count:
        slice_overlap_count[np.nonzero(global_selection)] = np.zeros(
            global_natoms)
    with slice_volume_sum:
        slice_volume_sum[np.nonzero(global_selection)] = np.zeros(
            global_natoms)

    msg = """Sweeping selection of {} particles with
        extreme coordinates
            {}
        and
            {}
        by {} slices of {} [length units] thickness
        at steps of {} [length units] in direction ({})""".format(
        global_natoms, global_min_pos, global_max_pos, slice_count,
        slice_thickness, positional_step, slice_normal)

    print(msg)
    yield msg

    start_pos = global_min_pos - slice_thickness * slice_normal

    step_vec = positional_step * slice_normal

    # sweep "representative volume" and an according selection across system
    for i in range(slice_count):
        yield (i / slice_count)  # ovito progress bar

        cur_min_pos = start_pos + i * step_vec
        cur_max_pos = start_pos + i * step_vec \
            + slice_thickness * slice_normal + slice_surface_diagonal

        print("------------------------------------------")
        print("""slice #{} of {}, spanned between corners
                {}
            and
                {}""".format(i + 1, slice_count, cur_min_pos, cur_max_pos))

        selection = \
                np.greater_equal(
                    position,
                    cur_min_pos ).all(axis=1) \
            &   np.less(
                    position,
                    cur_max_pos ).all(axis=1) \
            &   global_selection

        natoms = np.count_nonzero(selection)
        print("  #selected particles        : {}".format(natoms))

        if natoms < 1:
            continue

        stress = np.sum(peratom_stress[np.nonzero(selection)], axis=0)
        max_pos = np.max(position[np.nonzero(selection)], axis=0)
        min_pos = np.min(position[np.nonzero(selection)], axis=0)
        measure = max_pos - min_pos
        volume = np.product(measure)

        pressure_tensor_diagonal = -stress / volume
        pressure_tensor_trace = np.sum(pressure_tensor_diagonal) / dim

        print("  cumulative stress   (X,Y,Z): {}".format(stress))
        print("  maximum coordinates (X,Y,Z): {}".format(max_pos))
        print("  minimum coordinates (X,Y,Z): {}".format(min_pos))
        print("  slab measures       (X,Y,Z): {}".format(measure))
        print("  slab volume                : {}".format(volume))
        print("  pressure tensor diagonal   : {}".format(
            pressure_tensor_diagonal))
        print(
            "  pressure tensor trace      : {}".format(pressure_tensor_trace))

        # sum up slice stresses:
        stress_tensor_diagonal_outer = np.outer(stress, np.ones(natoms))
        with local_cumulative_stress_tensor_diagonal:
            local_cumulative_stress_tensor_diagonal[ np.nonzero(selection) ] \
                += stress_tensor_diagonal_outer.T
        with slice_overlap_count:
            slice_overlap_count[np.nonzero(selection)] += 1
        with slice_volume_sum:
            slice_volume_sum[np.nonzero(selection)] += volume

    local_mean_pressure_tensor_diagonal = \
        data.particles_.create_property('Local Mean Pressure Tensor Diagonal',
            dtype=float, components=3)
    local_mean_pressure_tensor_trace = \
        data.particles_.create_property('Local Mean Pressure Tensor Trace',
            dtype=float, components=1)

    # in case of overlap between volume elements:
    # weighted mean for any atom part of one or several overlaps
    # <p_ii> = sum(p_{ii,j} * V_j, j) / sum(3 V_j, j)
    with local_mean_pressure_tensor_diagonal:
        local_mean_pressure_tensor_diagonal[ np.nonzero(global_selection) ] = \
            - local_cumulative_stress_tensor_diagonal[
                    np.nonzero(global_selection) ] / \
                np.atleast_2d( dim * slice_volume_sum[
                    np.nonzero(global_selection) ] ).T
    with local_mean_pressure_tensor_trace:
        local_mean_pressure_tensor_trace[ np.nonzero(global_selection) ] = \
            np.sum( local_mean_pressure_tensor_diagonal[ \
                np.nonzero(global_selection) ], axis=1 )
 def print_summary(self, v):
     v = np.array(v)
     print("Max:", v.max(), "Min:", v.min(), "Mean:", v.mean(), "Size:",
           v.size(), "# Non-zero:", np.count_nonzero(v))
def CountParticles(group, idArray):
    nParticle = np.count_nonzero(group['pid'].isin(idArray))
    return nParticle
Example #38
0
        tot = np.count_nonzero(img[i * 2 * diam:i * 2 * diam + 2 * diam,
                                   j * 2 * diam:j * 2 * diam + 2 * diam])
        vec[k] = 1 if tot > 0.2 * diam * diam else 0

    print(vec)
    if repr(vec) in alphabet:
        return alphabet[repr(vec)]
    return '#'


letters = []

for i in range(N):
    for j in range(M - 1, -1, -1):
        if i - hei + 1 < 0 or j + wid - 1 >= M: continue
        tot = np.count_nonzero(dots_matrix[i - hei + 1:i + 1, j:j + wid])
        if tot == 0: continue

        ## big rect
        tot2 = np.count_nonzero(
            dots_matrix[max(0, i - 12 * diam + 1):min(i + 6 * diam + 1, N),
                        max(0, j - 4 * diam):min(M, j + 8 * diam)])
        offset = diam // 2
        if tot2 == tot and i - hei + 1 + offset >= 0 and i + 1 + offset <= N and j - offset >= 0 and j + wid - offset <= M:
            # We found a pattern, cut it
            pattern = dots_matrix[i - hei + 1 + offset:i + 1 + offset,
                                  j - offset:j + wid - offset]
            # print(pattern)
            print(pattern.shape)
            c = find_letter(pattern)
            if c == '#' and i + offset + 2 * diam < N:
Example #39
0
        words = line.split(' ')

        assert (len(words) == 4)

        # Look up the index of the words and store them.
        for i in range(0, 4):
            analogies[analogy_num, i] = model.vocab[words[i]].index

        # Increment the row number.
        analogy_num += 1

print('Validating indeces...')
sys.stdout.flush()

# Verify no entries are zero.
assert (np.count_nonzero(analogies) == (analogies.shape[0] *
                                        analogies.shape[1]))

##############################################################################
#  Precompute Query Vectors
##############################################################################

print('Computing analogy query vectors...')
sys.stdout.flush()

# Create a matrix to hold all of the query vectors.
query_vecs = np.zeros((num_analogies, model.syn0.shape[1]))

# For each of the analogies...
for i in range(0, num_analogies):
import numpy as np
import os
import pylab as pl
import matplotlib.pyplot as plt
os.system("clear")


g=np.array([
    [12, 23], #hola mundo
    [34, 34],
    [6666,9999]
])
g[0][1]=1+g[0][1]
g= np.count_nonzero(g)

print (g)











"""

raiz=np.sqrt
ln=np.log
sigma=[0.05]
accuracy=np.zeros(len(sigma))
for i in range (0,len(Y_train),1):
    if Y_train[i]==1:
        Y_train[i]=1;            
    else:
        Y_train[i]=-1;
for i in range (0,len(Y_test),1):
    if Y_test[i]==1:
        Y_test[i]=1;
    else:
        Y_test[i]=-1;

Y_train[1500:3000]=0
l=np.count_nonzero(Y_train)
u=len(Y_train)-l
n=l+u
for s in range(0,len(sigma),1):
    alpha=np.matlib.zeros((l+u,1))
    beta=np.matlib.zeros((l,1))
    K=np.matlib.zeros((l+u,l+u))
    Kx=np.matlib.zeros((len(X_test),l+u))
    J=np.matlib.zeros((l,l+u))
    L=np.matlib.zeros((l+u,l+u))
    W=np.matlib.zeros((l+u,l+u))
    D=np.matlib.zeros((l+u,l+u))
    Y_predcted=np.matlib.zeros((len(X_test),1))
    Q=np.matlib.zeros((l,l))
    Yd=np.matlib.zeros((l,l))
    f=np.matlib.zeros((len(X_test),1))
Example #42
0
    def __init__(self,
                 dataset,
                 bandwidth=None,
                 weights=None,
                 kernel=None,
                 extrema=None,
                 points=None,
                 reflect=None,
                 neff=None,
                 diagonal=False,
                 helper=True,
                 bw_rescale=None,
                 **kwargs):
        """Initialize the `KDE` class with the given dataset and optional specifications.

        Arguments
        ---------
        dataset : array_like (N,) or (D,N,)
            Dataset from which to construct the kernel-density-estimate.
            For multivariate data with `D` variables and `N` values, the data must be shaped (D,N).
            For univariate (D=1) data, this can be a single array with shape (N,).

        bandwidth : str, float, array of float, None  [optional]
            Specification for the bandwidth, or the method by which the bandwidth should be
            determined.  If a `str` is given, it must match one of the standard bandwidth
            determination methods.  If a `float` is given, it is used as the bandwidth in each
            dimension.  If an array of `float`s are given, then each value will be used as the
            bandwidth for the corresponding data dimension.

        weights : array_like (N,), None  [optional]
            Weights corresponding to each `dataset` point.  Must match the number of points `N` in
            the `dataset`.
            If `None`, weights are uniformly set to 1.0 for each value.

        kernel : str, Distribution, None  [optional]
            The distribution function that should be used for the kernel.  This can be a `str`
            specification that must match one of the existing distribution functions, or this can
            be a `Distribution` subclass itself that overrides the `_evaluate` method.

        neff : int, None  [optional]
            An effective number of datapoints.  This is used in the plugin bandwidth determination
            methods.
            If `None`, `neff` is calculated from the `weights` array.  If `weights` are all
            uniform, then `neff` equals the number of datapoints `N`.

        diagonal : bool,
            Whether the bandwidth/covariance matrix should be set as a diagonal matrix
            (i.e. without covariances between parameters).
            NOTE: see `KDE` docstrings, "Dynamic Range".

        """

        self._squeeze = (np.ndim(dataset) == 1)
        self._dataset = np.atleast_2d(dataset)
        ndim, ndata = self.dataset.shape

        reflect = kernels._check_reflect(reflect, self.dataset)

        self._helper = helper
        self._ndim = ndim
        self._ndata = ndata
        self._diagonal = diagonal
        self._reflect = reflect
        # The first time `points` are used, they need to be 'checked' for consistency
        self._check_points_flag = True
        self._points = points
        if ndata < 3:
            err = "ERROR: too few data points!  Dataset shape: ({}, {})".format(
                ndim, ndata)
            raise ValueError(err)

        # Set `weights`
        # --------------------------------
        weights_uniform = True
        if weights is not None:
            if np.shape(weights) != (ndata, ):
                raise ValueError("`weights` input should be shaped as (N,)!")

            if np.count_nonzero(weights) == 0 or np.any(~np.isfinite(weights)
                                                        | (weights < 0)):
                raise ValueError(
                    "Invalid `weights` entries, all must be finite and > 0!")

            weights = np.asarray(weights).astype(float)
            weights_uniform = False

        if neff is None:
            if weights_uniform:
                neff = ndata
            else:
                neff = np.sum(weights)**2 / np.sum(weights**2)

        self._weights = weights
        self._weights_uniform = weights_uniform  # currently unused
        self._neff = neff

        # Set covariance, bandwidth, distribution and kernel
        # -----------------------------------------------------------
        covariance = np.cov(dataset, rowvar=True, bias=False, aweights=weights)
        self._covariance = np.atleast_2d(covariance)

        if bandwidth is None:
            bandwidth = _BANDWIDTH_DEFAULT

        self._set_bandwidth(bandwidth, bw_rescale)

        # Convert from string, class, etc to a kernel
        dist = kernels.get_distribution_class(kernel)
        self._kernel = kernels.Kernel(distribution=dist,
                                      bandwidth=self._bandwidth,
                                      covariance=self._covariance,
                                      helper=helper,
                                      **kwargs)

        # Get Distribution Extrema
        # ------------------------------------
        # Determine the effective minima / maxima that should be used; KDE generally has support
        #   outside of the data values themselves.

        # If the Kernel is finite, then there is only support out to `bandwidth` beyond datapoints
        if self.kernel.FINITE:
            out = (1.0 + _NUM_PAD)
        # If infinite kernel, how many standard-deviations can we expect values to lie at
        else:
            out = sp.stats.norm.ppf(1.0 - 1.0 / neff)
            # Extra to be double sure...
            out *= 1.2

        # Find the effective-extrema in each dimension, to be used if `extrema` is not specified
        _bandwidth = np.sqrt(self.kernel.matrix.diagonal())
        eff_extrema = [[np.min(dd) - bw * out,
                        np.max(dd) + bw * out]
                       for bw, dd in zip(_bandwidth, self.dataset)]

        if (extrema is None) and (reflect is not None):
            extrema = copy.deepcopy(reflect)

        # `eff_extrema` is, by design, outside of data limits, so don't `warn` about limits
        extrema = utils._parse_extrema(eff_extrema, extrema, warn=False)
        self._extrema = extrema

        # Finish Intialization
        # -------------------------------
        self._cdf_grid = None
        self._cdf_func = None

        self._finalize()
        return
Example #43
0
def prepare_dataset_ABIDE_matrices_masked(mask):
    """
    Code to prepare the ABIDE (ASD) dataset
    Reads in .npy files from subfolders (for each class), combine into a list/numpy array and returns them

    Inputs:
    - mask: Numpy array containing the existing mask, for repeated removal of features (not used here, so it is always a simple mask of all 1s)

    Returns:
    - subject_names_list: list of subject names, used for creating folds that ensure that a subject isn't found in both train and test set
    - all_matrices: Numpy array of matrices containing the dataset
    - Y: Numpy array containing the dataset labels
    """

    src_dir = '../data/ABIDE/'

    num_remaining_features = np.count_nonzero(np.sum(mask, axis=0), axis=None)
    num_features = (num_remaining_features, num_remaining_features)
    non_zero_rows = np.where(np.sum(mask, axis=0) > 0)[0]

    all_matrices_normal = []
    subject_names_list = []

    for i, file_or_dir in enumerate(os.listdir(src_dir + "normal/")):
        if ".DS_Store" not in file_or_dir:
            all_matrices_normal.append(
                np.load(src_dir + "normal/" + file_or_dir))
            subject_names_list.append(file_or_dir[0:-10])

    for i, matrix in enumerate(all_matrices_normal):
        matrix = np.nan_to_num(matrix)
        masked_matrix = np.multiply(matrix, mask)
        reduced_matrix = masked_matrix[np.ix_(non_zero_rows, non_zero_rows)]
        all_matrices_normal[i] = reduced_matrix

    all_matrices_diseased = []

    for i, file_or_dir in enumerate(os.listdir(src_dir + "diseased/")):
        if ".DS_Store" not in file_or_dir:
            all_matrices_diseased.append(
                np.load(src_dir + "diseased/" + file_or_dir))
            subject_names_list.append(file_or_dir[0:-10])

    for i, matrix in enumerate(all_matrices_diseased):
        matrix = np.nan_to_num(matrix)
        masked_matrix = np.multiply(matrix, mask)
        reduced_matrix = masked_matrix[np.ix_(non_zero_rows, non_zero_rows)]
        all_matrices_diseased[i] = reduced_matrix

    all_matrices = np.empty(
        (len(all_matrices_normal) + len(all_matrices_diseased),
         num_features[0], num_features[1]))

    for i, matrix in enumerate(all_matrices):
        if i < len(os.listdir(src_dir + 'normal')):
            all_matrices[i] = all_matrices_normal[i]
        elif i < len(os.listdir(src_dir + 'normal')) + len(
                os.listdir(src_dir + 'diseased')):
            all_matrices[i] = all_matrices_diseased[
                i - (len(os.listdir(src_dir + 'normal')))]
        else:
            print("There are more matrices than expected!")

    label_normal = [0 for i in range(len(all_matrices_normal))]
    label_diseased = [1 for i in range(len(all_matrices_diseased))]

    all_labels = np.array(label_normal + label_diseased)

    Y = np.zeros((all_matrices.shape[0], 2))
    for i in range(all_labels.shape[0]):
        Y[i, all_labels[i]] = 1  # 1-hot vectors

    return (subject_names_list, all_matrices, Y)
def main():
	#-- Read the system arguments listed after the program
	long_options=['DIR=','FILTER=','CLOBBER']
	optlist,arglist = getopt.getopt(sys.argv[1:],'D:F:C',long_options)

	#-- Set default settings
	subdir = 'atrous_32init_drop0.2_customLossR727.dir'
	FILTER = 0.
	flt_str = ''
	clobber = False
	for opt, arg in optlist:
		if opt in ("-D","--DIR"):
			subdir = arg
		elif opt in ("-F","--FILTER"):
			if arg not in ['NONE','none','None','N','n',0]:
				FILTER = float(arg)
				flt_str = '_%.1fkm'%(FILTER/1000)
		elif opt in ("-C","--CLOBBER"):
			clobber = True
			
	
	#-- Get list of files
	pred_dir = os.path.join(ddir,'stitched.dir',subdir)
	fileList = os.listdir(pred_dir)
	pred_list = [f for f in fileList if (f.endswith('.tif') and ('mask' not in f))]
	#-- output directory
	output_dir = os.path.join(pred_dir,'shapefiles.dir')
	#-- make directories if they don't exist
	if not os.path.exists(output_dir):
		os.mkdir(output_dir)
	#-- if CLOBBBER is False, we are not overwriting old files, so remove exisiting files from list
	if not clobber:
		print('Removing exisitng files.')
		existingList = os.listdir(output_dir)
		existing = [f for f in existingList if (f.endswith('.shp') and ('ERR' not in f) and f.startswith('gl_'))]
		rem_list = []
		for p in pred_list:
			if p.replace('.tif','%s.shp'%flt_str) in existing:
				#-- save index for removing at the end
				rem_list.append(p)
		for p in rem_list:
			print('Ignoring %s.'%p)
			pred_list.remove(p)
	
	# pred_list = ['gl_069_181218-181224-181224-181230_014095-025166-025166-014270_T110614_T110655.tif']
	# pred_list = ['gl_007_180518-180524-180530-180605_021954-011058-022129-011233_T050854_T050855.tif']
	print('# of files: ', len(pred_list))

	#-- threshold for getting contours and centerlines
	eps = 0.3

	#-- loop through prediction files
	#-- get contours and save each as a line in shapefile
	#-- also save training label as line
	for f in pred_list:
		#-- read file
		raster = rasterio.open(os.path.join(pred_dir,f),'r')
		im = raster.read(1)
		#-- get transformation matrix
		trans = raster.transform

		#-- also read the corresponding mask file
		mask_file = os.path.join(pred_dir,f.replace('.tif','_mask.tif'))
		print(mask_file)
		mask_raster = rasterio.open(mask_file,'r')
		mask = mask_raster.read(1)
		mask_raster.close()

		#-- get contours of prediction
		#-- close contour ends to make polygons
		im[np.nonzero(im[:,0] > eps),0] = eps
		im[np.nonzero(im[:,-1] > eps),-1] = eps
		im[0,np.nonzero(im[0,:] > eps)] = eps
		im[-1,np.nonzero(im[-1,:] > eps)] = eps
		contours = skimage.measure.find_contours(im, eps)
		#-- make contours into closed polyons to find pinning points
		#-- also apply noise filter and append to noise list
		x = {}
		y = {}
		noise = []
		pols = [None]*len(contours)
		pol_type = [None]*len(contours)
		for n,contour in enumerate(contours):
			#-- convert to coordinates
			x[n],y[n] = rasterio.transform.xy(trans, contour[:,0], contour[:,1])

			pols[n] = Polygon(zip(x[n],y[n]))
			#-- get elements of mask the contour is on
			submask = mask[np.round(contour[:, 0]).astype('int'), np.round(contour[:, 1]).astype('int')]
			#-- if more than half of the elements are from test tile, count contour as test type
			if np.count_nonzero(submask) > submask.size/2.:
				pol_type[n] = 'Test'
			else:
				pol_type[n] = 'Train'
		
		#-- Loop through all the polygons and taking any overlapping areas out
		#-- of the enclosing polygon and ignore the inside polygon
		ignore_list = []
		for i in range(len(pols)):
			for j in range(len(pols)):
				if (i != j) and pols[i].contains(pols[j]):
					pols[i] = pols[i].difference(pols[j])
					ignore_list.append(j)

		#-- loop through and apply noise filter
		for n in range(len(contours)):
			#-- apply filter
			if (n not in ignore_list) and (len(x[n]) < 2 or LineString(zip(x[n],y[n])).length <= FILTER):
				noise.append(n)

		#-- loop through remaining polygons and determine which ones are 
		#-- pinning points based on the width and length of the bounding box
		pin_list = []
		box_ll = [None]*len(contours)
		box_ww = [None]*len(contours)
		for n in range(len(contours)):
			box_ll[n] = pols[n].length
			box_ww[n] = pols[n].area/box_ll[n]
			if (n not in noise) and (n not in ignore_list):
				#-- make bounding box
				# box = pols[n].minimum_rotated_rectangle
				# bx,by = box.exterior.coords.xy
				# #-- get the dimensions of the sides of the box
				# edge_length = (Point(bx[0],by[0]).distance(Point(bx[1],by[1])), Point(bx[1],by[1]).distance(Point(bx[2],by[2])))
				#-- length is the larger dimension
				# box_ll = max(edge_length)
				# #-- width is the smaller dimension
				# box_ww = min(edge_length)
				#-- if the with is larger than 1/4 of the length, it's a pinning point
				if box_ww[n] > box_ll[n]/25:
					pin_list.append(n)

		#-- find overlap between ignore list nad noise list
		if len(list(set(noise) & set(ignore_list))) != 0:
			sys.exit('Overlap not empty: ', list(set(noise) & set(ignore_list)))

		#-- initialize list of contour linestrings
		er = [None]*len(contours)
		cn = [] #[None]*(len(contours)-len(ignore_list)-len(noise))
		n = 0  # total center line counter
		pc = 1 # pinning point counter
		lc = 1 # line counter
		er_type = [None]*len(er)
		cn_type = [] #[None]*len(cn)
		er_class = [None]*len(er)
		cn_class = [] #[None]*len(cn)
		er_lbl = [None]*len(er)
		cn_lbl = [] #[None]*len(cn)
		#-- loop through polygons, get centerlines, and save
		for idx,p in enumerate(pols):
			er[idx] = [list(a) for a in zip(x[idx],y[idx])]
			er_type[idx] = pol_type[idx]
			if idx in noise:
				er_class[idx] = 'Noise'				
			elif idx in ignore_list:
				er_class[idx] = 'Inner Contour'
			else:
				if idx in pin_list:
					#-- pinning point. Just get perimeter of polygon
					xc,yc = pols[idx].exterior.coords.xy
					cn.append([[list(a) for a in zip(xc,yc)]])
					cn_class.append(['Pinning Point'])
					cn_type.append([pol_type[idx]])
					#-- set label
					cn_lbl.append(['pin%i'%pc])
					pc += 1 #- incremenet pinning point counter
				else:
					#-- get centerlines
					attributes = {"id": idx, "name": "polygon", "valid": True}
					#-- loop over interpolation distances until we can get a single line
					dis = pols[idx].length/400	#100
					try:
						cl = Centerline(p,interpolation_distance=dis, **attributes)
					except:
						print('not enough ridges. Skip')
						continue
					else:
						#-- merge all the lines
						merged_lines = linemerge(cl)
						if merged_lines.geom_type == 'LineString':
							#-- save coordinates of linestring
							xc,yc = merged_lines.coords.xy
							cn.append([[list(a) for a in zip(xc,yc)]])
							cn_class.append(['Grounding Line'])
							cn_lbl.append(['line%i'%lc])
							cn_type.append([pol_type[idx]])
							er_class[idx] = 'GL Uncertainty'
							#-- set label
							er_lbl[idx] = 'err%i'%lc
							lc += 1 #- incremenet line counter
						else:
							nml = len(merged_lines)
							#-- for lines with many bifurcations, the average segment is 
							#-- about 300m, so if # of segments is length/300 or more, ignore.
							if nml < pols[idx].length/300:
								coord_list = []
								for nn in range(nml):
									xc,yc = merged_lines[nn].coords.xy
									coord_list.append([list(a) for a in zip(xc,yc)])
								cn.append(coord_list)
								cn_class.append(['Grounding Line']*nml)
								cn_lbl.append(['line%i'%lc]*nml)
								cn_type.append([pol_type[idx]]*nml)
								er_class[idx] = 'GL Uncertainty'
								#-- set label
								er_lbl[idx] = 'err%i'%lc
								lc += 1 #- incremenet line counter
		
		#-- save all linestrings to file
		#-- make separate files for centerlines and errors
		# 1) GL file
		gl_file = os.path.join(output_dir,f.replace('.tif','%s.shp'%flt_str))
		w = shapefile.Writer(gl_file)
		w.field('ID', 'C')
		w.field('Type','C')
		w.field('Class','C')
		#-- loop over contour centerlines
		for n in range(len(cn)):
			for nn in range(len(cn[n])):
				w.line([cn[n][nn]])
				w.record(cn_lbl[n][nn], cn_type[n][nn], cn_class[n][nn])
		w.close()
		# create the .prj file
		prj = open(gl_file.replace('.shp','.prj'), "w")
		prj.write(raster.crs.to_wkt())
		prj.close()

		# 2) Err File
		er_file = os.path.join(output_dir,f.replace('.tif','%s_ERR.shp'%flt_str))
		w = shapefile.Writer(er_file)
		w.field('ID', 'C')
		w.field('Type','C')
		w.field('Class','C')
		w.field('Length','C')
		w.field('Width','C')
		#-- loop over contours and write them
		for n in range(len(er)):
			w.line([er[n]])
			w.record(er_lbl[n] , er_type[n], er_class[n], box_ll[n], box_ww[n])
		w.close()
		# create the .prj file
		prj = open(er_file.replace('.shp','.prj'), "w")
		prj.write(raster.crs.to_wkt())
		prj.close()

		#-- close input file
		raster.close()
def Write_MST_Gurobi(casename):
    
    global debug
    debug = False
    data = loadmat(casename)
    S1 = data['Seq_Retirada']
    C = data['C'][0][0]
    R = data['R'][0][0]
    Seq_Navio_Inv = data['Seq_Navio_Inv'].tolist()[0]
    Seq_Navio_Id_Inv = data['Seq_Navio_Id_Inv'].tolist()[0]
    Patios = data['patio'].tolist()
    q_o = data['q_o'].tolist()
    q_d = data['q_d'].tolist()
    q_r = data['q_r'].tolist()
    q_c = data['q_c'].tolist()
    w_o = data['w_o'].tolist()
    w_d = data['w_d'].tolist()
    w_a = data['w_a'].tolist()
    w_r = data['w_r'].tolist()
    w_c = data['w_c'].tolist()
    phi = data['phi'].tolist()
    Npatios = len(Patios)
    P=Npatios+1 # numero de portos
    
    for o in range(Npatios):
        for d in range(P):
            if phi[o][d].shape[1] != 0 :
                phi[o][d] = phi[o][d].tolist()[0]         
            else:
                phi[o][d] = []
    
    omega=[ [] for i in range(Npatios) ] # omega = conjunto dos indices dos conteineres em cada patio
    S = []
    for i in range(Npatios):
        Patios[i]=Patios[i][0]
        omega[i]=np.extract(Patios[i]!= 0 , Patios[i]).tolist()
        S.append(S1[0][i].tolist()[0])            
        
    N=[ 0 for i in range(Npatios) ] # N = quantidade de conteineres em cada patio
    for i in range(Npatios):
        N[i]=np.count_nonzero(Patios[i])
    
    T=N
    
    H=[] # H = numero de linhas de cada patio
    for i in range(Npatios):
        H.append(Patios[i].shape[0])
    
    W= [] # W = numero de colunas de cada patio
    for i in range(Npatios):
        W.append(Patios[i].shape[1])
    
    print('parametros criados')
    
    model = cplex.Cplex()
    start_time = model.get_time()
    model.objective.set_sense(model.objective.sense.minimize)    
    startVar=[]
    startVal=[]    
        
    #------------------------------------------------------------#
    #--------------------  Variaveis  ---------------------------#
    #------------------------------------------------------------#
    nvar = 0 
    model,nvar,startVar,startVal = variavel_v(model,S,N,T,nvar,omega,startVar,startVal)
    model,nvar,startVar,startVal = variavel_q(model,N,R,C,nvar,q_o,q_d,q_r,q_c,startVar,startVal)
    model,nvar,startVar,startVal = variavel_u(model,N,R,C,nvar,Seq_Navio_Inv,startVar,startVal)
    model,nvar,startVar,startVal = variavel_w(model,N,R,C,nvar,w_o,w_d,w_a,w_r,w_c,startVar,startVal)
    model,nvar,startVar,startVal = variavel_z(model,omega,N,T,R,C,S,Seq_Navio_Id_Inv,nvar,startVar,startVal)
    model,nvar,startVar,startVal = variavel_y(model,omega,Patios,S,N,H,W,T,nvar,startVar,startVal)
    model,nvar,startVar,startVal = variavel_b(model,omega,Patios,S,N,H,W,T,nvar,startVar,startVal)
    model,nvar,startVar,startVal = variavel_x(model,omega,N,H,W,T,nvar,startVar,startVal)
    print('variaveis criadas')
    
    solucao_inicial_gurobi_mst = casename + '.mst'    
    out_file = open(solucao_inicial_gurobi_mst,'w+') 
    out_file.write("# MIP start \n")   

    for Var,Val in zip(startVar,startVal):

        out_file.write(str(Var)+" "+str(int(Val)) + "\n")
    
    out_file.close()
TS = []
beta = (0.5)  #For background ts
TS_beta = []  #Calculated from the total TS median after we get all the TS.
beta_err = []
gamma = []

for file in files:
    for item in range(len(file['n_inj'])):
        n_inj.append(file['n_inj'][item])
        nsources.append(file['nsources'][item])
        TS.append(file['TS'][item])
        gamma.append(file['gamma'][item])

TSs = file['TS']
TS_beta = np.percentile(TSs, 100. * (1. - beta))
m = np.count_nonzero(np.asarray(TSs) > (TS_beta))
i = len(TSs)
fraction = float(m) / float(i)
beta_err = (np.sqrt(fraction * (1. - fraction) /
                    float(i)) if 0 < beta < 1 else 1.)

##Now we have all the pieces of the original dictionary. Time to glue bckg_trials back in place, in their proper file type.##
bckg_trials = {
    'n_inj': n_inj,
    'nsources': np.asarray(nsources),
    'TS': np.asarray(TS),
    'beta': beta,
    'beta_err': beta_err,
    'TS_beta': TS_beta,
    'gamma': np.asarray(gamma)
}
Example #47
0
 def has_powers(self):
     """ Identify if the files include the power metrics"""
     if np.count_nonzero(np.isnan(self.powers)) == len(self.powers):
         return False
     return True
Example #48
0
def prepare_dataset_ADNI_matrices_masked(choice, mask):
    """
    Code to prepare the ADNI dataset
    Reads in .npy files from subfolders (for each class), combine into a list/numpy array and returns them

    Inputs:
    - choice: one of 'CN-AD' 'CN-MCI' (str) 
    - mask: Numpy array containing the existing mask, for repeated removal of features (not used here, so it is always a simple mask of all 1s)

    Returns:
    - subject_names_list: list of subject names, used for creating folds that ensure that a subject isn't found in both train and test set
    - all_matrices: Numpy array of matrices containing the dataset
    - Y: Numpy array containing the dataset labels
    """

    src_dir = '../data/ADNI/'

    if not (choice == 'CN-MCI' or choice == 'MCI-AD' or choice == 'CN-AD'):
        print(
            'Invalid input detected. Allowable options: CN-MCI, MCI-AD, CN-AD')
        exit()

    subject_names_list = []
    num_remaining_features = np.count_nonzero(np.sum(mask, axis=0), axis=None)
    num_features = (num_remaining_features, num_remaining_features)

    non_zero_rows = np.where(np.sum(mask, axis=0) > 0)[0]

    if 'CN' in choice:

        print('Preparing CN...')
        all_matrices_cn = []

        for i, file_or_dir in enumerate(os.listdir(src_dir + "CN/")):
            if ".DS_Store" not in file_or_dir:
                all_matrices_cn.append(np.load(src_dir + "CN/" + file_or_dir))
                subject_names_list.append(file_or_dir[10:18])

        for i, matrix in enumerate(all_matrices_cn):
            matrix = np.nan_to_num(matrix)
            masked_matrix = np.multiply(matrix, mask)
            reduced_matrix = masked_matrix[np.ix_(non_zero_rows,
                                                  non_zero_rows)]
            all_matrices_cn[i] = reduced_matrix

    if 'MCI' in choice:

        print('Preparing MCI...')
        all_matrices_mci = []

        for i, file_or_dir in enumerate(os.listdir(src_dir + "MCI/")):
            if ".DS_Store" not in file_or_dir:
                all_matrices_mci.append(np.load(src_dir + "MCI/" +
                                                file_or_dir))
                subject_names_list.append(file_or_dir[10:18])

        for i, matrix in enumerate(all_matrices_mci):
            matrix = np.nan_to_num(matrix)
            masked_matrix = np.multiply(matrix, mask)
            reduced_matrix = masked_matrix[np.ix_(non_zero_rows,
                                                  non_zero_rows)]
            all_matrices_mci[i] = reduced_matrix

    if 'AD' in choice:

        print('Preparing AD...')
        all_matrices_ad = []

        for i, file_or_dir in enumerate(os.listdir(src_dir + "AD/")):
            if ".DS_Store" not in file_or_dir:
                all_matrices_ad.append(np.load(src_dir + "AD/" + file_or_dir))
                subject_names_list.append(file_or_dir[10:18])

        for i, matrix in enumerate(all_matrices_ad):
            matrix = np.nan_to_num(matrix)
            masked_matrix = np.multiply(matrix, mask)
            reduced_matrix = masked_matrix[np.ix_(non_zero_rows,
                                                  non_zero_rows)]
            all_matrices_ad[i] = reduced_matrix

    ## Combine

    if choice == 'CN-MCI':

        all_matrices = np.empty((len(all_matrices_cn) + len(all_matrices_mci),
                                 num_features[0], num_features[1]))

        for i, matrix in enumerate(all_matrices):
            if i < len(os.listdir(src_dir + 'CN')):
                all_matrices[i] = all_matrices_cn[i]
            elif i < len(os.listdir(src_dir + 'CN')) + len(
                    os.listdir(src_dir + 'MCI')):
                all_matrices[i] = all_matrices_mci[
                    i - (len(os.listdir(src_dir + 'CN')))]
            else:
                print("There are more matrices than expected!")

        label_cn = [0 for i in range(len(all_matrices_cn))]
        label_mci = [1 for i in range(len(all_matrices_mci))]

        all_labels = np.array(label_cn + label_mci)

        Y = np.zeros((all_matrices.shape[0], 2))
        for i in range(all_labels.shape[0]):
            Y[i, all_labels[i]] = 1  # 1-hot vectors

    elif choice == 'MCI-AD':
        all_matrices = np.empty((len(all_matrices_mci) + len(all_matrices_ad),
                                 num_features[0], num_features[1]))

        for i, matrix in enumerate(all_matrices):
            if i < len(os.listdir(src_dir + 'MCI')):
                all_matrices[i] = all_matrices_mci[i]
            elif i < len(os.listdir(src_dir + 'MCI')) + len(
                    os.listdir(src_dir + 'AD')):
                all_matrices[i] = all_matrices_ad[
                    i - (len(os.listdir(src_dir + 'MCI')))]
            else:
                print("There are more matrices than expected!")

        label_mci = [0 for i in range(len(all_matrices_mci))]
        label_ad = [1 for i in range(len(all_matrices_ad))]

        all_labels = np.array(label_mci + label_ad)

        Y = np.zeros((all_matrices.shape[0], 2))
        for i in range(all_labels.shape[0]):
            Y[i, all_labels[i]] = 1  # 1-hot vectors

    elif choice == 'CN-AD':
        all_matrices = np.empty((len(all_matrices_cn) + len(all_matrices_ad),
                                 num_features[0], num_features[1]))

        for i, matrix in enumerate(all_matrices):
            if i < len(os.listdir(src_dir + 'CN')):
                all_matrices[i] = all_matrices_cn[i]
            elif i < len(os.listdir(src_dir + 'CN')) + len(
                    os.listdir(src_dir + 'AD')):
                all_matrices[i] = all_matrices_ad[
                    i - (len(os.listdir(src_dir + 'CN')))]
            else:
                print("There are more matrices than expected!")

        label_cn = [0 for i in range(len(all_matrices_cn))]
        label_ad = [1 for i in range(len(all_matrices_ad))]

        all_labels = np.array(label_cn + label_ad)

        Y = np.zeros((all_matrices.shape[0], 2))
        for i in range(all_labels.shape[0]):
            Y[i, all_labels[i]] = 1  # 1-hot vectors

    else:
        print('Not possible to reach here!')
        exit()

    return (subject_names_list, all_matrices, Y)
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
import numpy as np

os.system("clear")


fig = pl.figure()
axx = Axes3D(fig)
raiz=np.sqrt
ln=np.log
 
Xa = np.arange(-2, 12, 0.1)
Ya = np.arange(-2, 12, 0.1)
#X, Y = np.meshgrid(X, Y)
print (np.count_nonzero(Xa))
l = 2
rho= 100
ik=25
Electrodos=8
E=Electrodos-1

P=np.array([
    [0.55, 0.55],   #Posicion electrodo A
    [4.55, 0.55],   #Posicion electrodo B
    [8.55, 0.55],   #Posicion electrodo C
    [0.55, 4.55],   #Posicion electrodo D
    [8.55, 4.55],   #Posicion electrodo E
    [0.55, 8.55],   #Posicion electrodo F
    [4.55, 8.55],   #Posicion electrodo G
    [8.55, 8.55]    #Posicion electrodo H
Example #50
0
    def sparsify_dynamics(mtx, _b, init_tol, max_iter=25, thresh_iter=10,
                          l0_penalty=None, split=0.8, normalize=0):
        """
        :param mtx: the theta matrix of shape (M, N)
        :param _b: a vector or an array of shape (M,) or (M, K)
        :param init_tol: maximum tolerance (cut_off value)
        :param max_iter: maximum iteration of the outer loop
        :param thresh_iter: maximum iteration for threshold least squares
        :param l0_penalty: penalty factor for nonzero coefficients
        :param split: proportion of the training set
        :param normalize: normalization methods, default as 0 (no normalization)
        :return: the best coefficients of fit
        """
        if mtx.ndim != 2:
            raise ValueError('mtx is not a 2D numpy array!')
        if _b.ndim == 1:
            _b = _b[:, np.newaxis]
        elif _b.ndim > 2:
            raise ValueError('b is not a 1D/2D numpy array!')

        # split the data
        np.random.seed(12345)
        _n = mtx.shape[0]
        train = np.random.choice(_n, int(_n*split), replace=False)
        test = [x for x in np.arange(_n) if x not in train]
        train_mtx = mtx[train, :]
        test_mtx = mtx[test, :]
        train_b = _b[train, :]
        test_b = _b[test, :]
        # set up initial tolerance, l0 penalty, best error, etc.
        if l0_penalty is None:
            # l0_penalty = 0.001*np.linalg.cond(mtx)
            l0_penalty = np.linalg.norm(test_b) / len(test)

        tol = d_tol = float(init_tol)

        # no sparsity constraints
        w_best = np.linalg.lstsq(train_mtx, train_b, rcond=None)[0]
        err_best = np.linalg.norm(test_b - test_mtx.dot(w_best), 2) + \
                   l0_penalty*np.count_nonzero(w_best)
        tol_best = 0.
        imp_flag = True
        for i in np.arange(max_iter):
            _w = SINDyBase.threshold_ls(train_mtx, train_b, tol, thresh_iter, normalize)
            err = np.linalg.norm(test_b - test_mtx.dot(_w), 2) + l0_penalty*np.count_nonzero(_w)
            if err < err_best:
                err_best = err
                w_best = _w
                tol_best = tol
                tol += d_tol
                imp_flag = False
            else:
                # tol = max([0, tol - d_tol])
                tol = max([0, tol - 2*d_tol])
                # d_tol /= 2
                d_tol = 2 * d_tol/(max_iter - i)
                tol = tol + d_tol

        if imp_flag:
            print('cutoff value maybe too small/large to threshold ....')

        return w_best, tol_best
Example #51
0
            del loss
            del t
            del x

        # 損失関数の現在値を表示
        print('  train loss = {0:.4f}'.format(sum_loss / n_samples), file=sys.stderr)

        # 評価用データに対する識別精度を計算・表示
        model.eval()
        n_failed = 0
        for i in range(0, n_samples_ev, batchsize):
            x = torch.tensor(features_ev[i : i + batchsize], device=dev)
            t = torch.tensor(labels_ev[i : i + batchsize], device=dev, dtype=torch.long)
            y = model(x)
            y = y.to('cpu').detach().numpy().copy()
            t = t.to('cpu').detach().numpy().copy()
            n_failed += np.count_nonzero(np.argmax(y, axis=1) - t)
            del y
            del x
            del t
        acc = (n_samples_ev - n_failed) / n_samples_ev
        print('  accuracy = {0:.2f}%'.format(100 * acc), file=sys.stderr)

        # 現状態の可視化
        if visualization_interval > 0 and (e + 1) % visualization_interval == 0:
            visualizer.show(model, device=dev, samples=data, title='Epoch {0}'.format(e + 1)) # グラフを表示

        print('', file=sys.stderr)

    print('', file=sys.stderr)
Example #52
0
def iterate(mode, args, loader, model, optimizer, logger, best_acc, epoch):
    start_val = time.clock()
    nonsense = 0
    acc_sum = 0
    # switch to appropriate mode
    assert mode in ["train", "val", "eval", "test_prediction", "test_completion"], \
        "unsupported mode: {}".format(mode)
    if mode == 'train':
        model.train()
        lr = completion_segmentation_helper.adjust_learning_rate(
            args.lr, optimizer, epoch)
    else:
        model.eval()
        lr = 0
    lane_acc_lst = []
    lane_loss_lst = []
    total_acc_lst = []
    for i, batch_data in enumerate(loader):
        start = time.time()
        batch_data = {
            key: val.to(device)
            for key, val in batch_data.items() if val is not None
        }

        # 道路分割的label
        road_label = batch_data[
            'road_label'] if mode != 'test_road_lane_segmentation' else None

        # 车道线分割的label
        lane_label = batch_data[
            'lane_label'] if mode != 'test_road_lane_segmentation' else None

        data_time = time.time() - start

        start = time.time()

        if mode == 'val':
            with torch.no_grad():  # 设置torch.no_grad(),在val时不计算梯度,可以节省显存
                pred = model(batch_data)
        else:
            pred = model(batch_data)

        lane_pred = pred
        start_ = time.clock()  # 不计入时间
        if mode == 'train':
            # 语义分割loss
            #road_loss = road_criterion(road_pred, road_label.long())
            if epoch == 0:
                class_weight = torch.tensor([0.5, 0.5])
            else:
                lane_pred_w = lane_pred.data.cpu()
                bs, c, h, w = lane_pred_w.size()
                value_w, index_w = lane_pred_w.max(1)
                LPW = 0
                for i in range(bs):
                    lpw = index_w[i].view(h, w).numpy()
                    LPW += (np.count_nonzero(lpw) / lpw.size)
                LPW /= bs
                class_weight = torch.tensor([LPW, 1 - LPW])
                #print('class_weight: ',class_weight)
            lane_criterion = nn.NLLLoss2d(weight=class_weight.cuda())
            lane_loss = lane_criterion(lane_pred, lane_label.long())
            lane_loss_lst.append(lane_loss.item())

            # 损失
            #loss = road_loss + lane_loss
            loss = lane_loss

            #print('lane loss {}'.format(lane_loss.data.cpu()))

            # 准确率
            #road_acc = acc(road_pred.data.cpu(), road_label.cpu())
            total_acc, lane_acc = acc(lane_pred.data.cpu(), lane_label.cpu())
            lane_acc_lst.append(lane_acc.item())
            total_acc_lst.append(total_acc.item())

            #print('total acc {}'.format(total_acc), 'lane acc {}'.format(lane_acc))
            #print('\n-------------------------epoch '+str(epoch)+'-----------------------------\n')

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        elif mode == 'val':
            # 准确率
            #road_acc = acc(road_pred.data.cpu(), road_label.cpu())
            total_acc, lane_acc = acc(lane_pred.data.cpu(), lane_label.cpu())
            lane_acc_lst.append(lane_acc.item())
            total_acc_lst.append(total_acc.item())
            #print('total acc {}'.format(total_acc), 'lane acc {}'.format(lane_acc))
            #print('\n------------------------epoch '+str(epoch)+'------------------------------\n')

            #accuracy = (road_acc+lane_acc)/2
            accuracy = lane_acc

            acc_sum += accuracy

        gpu_time = time.time() - start

        # measure accuracy and record loss
        with torch.no_grad():
            # 保存预测结果为图片
            logger.conditional_save_pred(mode, i, pred, epoch)
        nonsense += (time.clock() - start_)

    print('total cost time: ', time.clock() - start_val - nonsense)
    if mode == 'train':
        lane_loss_mean = np.array(lane_loss_lst).mean()
        lane_acc_mean = np.array(lane_acc_lst).mean()
        total_acc_mean = np.array(total_acc_lst).mean()
        print('lane loss {}'.format(lane_loss_mean),
              'lane acc {}'.format(lane_acc_mean),
              'total acc {}'.format(total_acc_mean))
    elif mode == 'val':
        lane_acc_mean = np.array(lane_acc_lst).mean()
        total_acc_mean = np.array(total_acc_lst).mean()
        print('lane acc {}'.format(lane_acc_mean),
              'total acc {}'.format(total_acc_mean))
    print('\n-------------------------epoch ' + str(epoch) +
          '-----------------------------\n')
    acc_avg = acc_sum / len(loader)

    is_best = (acc_avg > best_acc)

    # 每一个epoch保存一次信息
    #    avg = logger.conditional_save_info(mode, average_meter, epoch)
    #    is_best = logger.rank_conditional_save_best(mode, avg, epoch)
    #    if is_best and not (mode == "train"):
    #        # 验证时,保存最好的预测结果为图片
    #        logger.save_img_comparison_as_best(mode, epoch)
    #    logger.conditional_summarize(mode, avg, is_best)

    if mode == 'train':
        return acc_avg, is_best, lane_loss_mean, lane_acc_mean, total_acc_mean

    elif mode == 'val':
        return acc_avg, is_best, lane_acc_mean, total_acc_mean
Example #53
0
# Training loss
loss = tf.reduce_mean(cross_entropy)

# Create an operation that initializes all variables
init = tf.global_variables_initializer()

# Test Cases
with tf.Session() as session:
    session.run(init)
    session.run(loss, feed_dict=train_feed_dict)
    session.run(loss, feed_dict=valid_feed_dict)
    session.run(loss, feed_dict=test_feed_dict)
    biases_data = session.run(biases)

assert not np.count_nonzero(biases_data), 'biases must be zeros'

print('Tests Passed!')

#%%
# Determine if the predictions are correct
is_correct_prediction = tf.equal(tf.argmax(prediction, 1),
                                 tf.argmax(labels, 1))
# Calculate the accuracy of the predictions
accuracy = tf.reduce_mean(tf.cast(is_correct_prediction, tf.float32))

print('Accuracy function created.')

#%%
# TODO: Find the best parameters for each configuration
eps = [1, 2, 3, 4, 5]
Example #54
0
 def test_subtract(self):
   subtracted_field = self.subtract(self)
   assert numpy.count_nonzero(subtracted_field.values) == 0
def main(filename, read_raw_matrix=False):
    """

    :param filename: path to be configuration file to read out data sets (type string)
    :param read_raw_matrix: only true if you really want to load the giant unfiltered count matrix
    :return: raw and filtered read out matrices and annotation dataset
    """

    # config_paths = load_config_file(json_filename=json_filename)
    # check whether to do a single sample load or if you have more than one sample to load
    # input_path = os.path.join(os.environ['PYTHONPATH'].split(os.pathsep)[0], 'Input', 'config_files', filename)
    config_paths = ht.load_sample_config_file(filename=filename,
                                              file_type="csv")

    absolute_path = os.path.join(os.sep, config_paths[0][0],
                                 config_paths[2][0])
    # matrix_file_end, features_file_end, barcode_file_end
    feature_bc_matrix_string = np.array(
        [config_paths[9][0], config_paths[8][0], config_paths[7][0]])

    # # Parse Filenames
    project = config_paths[2][
        0]  # if more examples then use sample_strings.pop(0)
    sample_id = config_paths[3][0]
    # loom_id = config_paths[22][0]  # contains the information about spliced and unspliced genes -- todo

    # save sample ids in list
    list_sample_ids = [sample_id]

    if read_raw_matrix:
        print(
            "\nRaw/Unfiltered feature-barcode matrix contains every barcode from fixed list of known-good barcode "
            "sequences. This includes background and cell associated barcodes")

        # path to raw files ending with .mtx and .tsv (type string)
        raw_feature_bc_matrix_path = os.path.join(absolute_path,
                                                  config_paths[1][0],
                                                  project + "_" + sample_id,
                                                  config_paths[4][0],
                                                  config_paths[5][0])
        print(
            "Filtered feature-barcode matrix contains only cells associated barcodes"
        )
        # path h5
        filtered_bc_matrix_h5_path = os.path.join(absolute_path,
                                                  config_paths[1][0],
                                                  project + "_" + sample_id,
                                                  config_paths[4][0],
                                                  config_paths[11][0])
        raw_bc_matrix_h5_path = os.path.join(absolute_path, config_paths[1][0],
                                             project + "_" + sample_id,
                                             config_paths[4][0],
                                             config_paths[10][0])

        # path to filtered files ending with .mtx and .tsv (type string)
        filtered_feature_bc_matrix_path = os.path.join(
            absolute_path, config_paths[1][0], project + "_" + sample_id,
            config_paths[4][0], config_paths[6][0])

        # # Annotate data
        print("\n-------- Start: Read out values --------")
        # Two options to read in feature_ids, gene_names, feature_types, barcodes, count_matrix_data
        # 1. Malte Luecken using Scanpy from TheisLab; read out mtx and tsv files
        raw_annot_data, filtered_annot_data = _scanpy_load_annotate_tsv_mtx_files(
            raw_feature_bc_matrix_path,
            filtered_feature_bc_matrix_path,
            feature_bc_matrix_string,
            file_matrix_h5=raw_bc_matrix_h5_path,
            read_raw_matrix=read_raw_matrix)

        # # Loop to load all data sets
        for c_sample in tqdm(range(len(config_paths[0][1:])),
                             desc='Loading samples'):
            print("         ... reading out ...")
            c_sample += 1
            # matrix_file_end, features_file_end, barcode_file_end
            feature_bc_matrix_string = \
                np.array([config_paths[9][c_sample], config_paths[8][c_sample], config_paths[7][c_sample]])

            # path to h5 and matrices
            path_matrix = os.path.join(os.sep, config_paths[0][c_sample],
                                       config_paths[1][c_sample],
                                       config_paths[2][c_sample],
                                       config_paths[3][c_sample],
                                       config_paths[4][c_sample])

            # path h5
            filtered_bc_matrix_h5_path = os.path.join(
                path_matrix, config_paths[11][c_sample])
            raw_bc_matrix_h5_path = os.path.join(path_matrix,
                                                 config_paths[10][c_sample])

            # matrix
            raw_feature_bc_matrix_path = os.path.join(
                path_matrix, config_paths[5][c_sample])
            filtered_feature_bc_matrix_path = os.path.join(
                path_matrix, config_paths[6][c_sample])

            # # Load count matrix, features and observables
            raw_adata_tmp, filtered_adata_tmp = _scanpy_load_annotate_tsv_mtx_files(
                raw_feature_bc_matrix_path,
                filtered_feature_bc_matrix_path,
                feature_bc_matrix_string,
                file_matrix_h5=raw_bc_matrix_h5_path,
                read_raw_matrix=read_raw_matrix)

            # # Concatenate data sets (also do this if you have more than one donor!)
            #   RAW
            raw_annot_data = raw_annot_data.concatenate(raw_adata_tmp,
                                                        batch_key='sample_id')
            # raw_annot_data.var['gene_id'] = raw_annot_data.var['gene_id-1']
            # raw_annot_data.var.drop(columns=['gene_id-1', 'gene_id-0'], inplace=True)
            raw_annot_data.obs.drop(columns=['sample_id'], inplace=True)
            raw_annot_data.obs_names = [
                c.split("-")[0] for c in raw_annot_data.obs_names
            ]
            raw_annot_data.obs_names_make_unique(join='_')
            # raw_annot_data.obs_names_make_unique(join='_')

            #  FILTERED
            filtered_annot_data = filtered_annot_data.concatenate(
                filtered_adata_tmp, batch_key='sample_id')
            # filtered_annot_data.var['gene_id'] = filtered_annot_data.var['gene_id-1']
            # filtered_annot_data.var.drop(columns=['gene_id-1', 'gene_id-0'], inplace=True)
            filtered_annot_data.obs.drop(columns=['sample_id'], inplace=True)
            filtered_annot_data.obs_names = [
                c.split("-")[0] for c in filtered_annot_data.obs_names
            ]
            filtered_annot_data.obs_names_make_unique(join='_')
            # filtered_annot_data.obs_names_make_unique(join='_')

            # save sample ids in list
            list_sample_ids.append(config_paths[3][c_sample])

    else:
        print(
            "Filtered feature-barcode matrix contains only cells associated barcodes"
        )
        # path to h5 and matrices
        path_matrix = os.path.join(os.sep, config_paths[0][0],
                                   config_paths[1][0], config_paths[2][0],
                                   config_paths[3][0], config_paths[4][0])

        # path h5
        filtered_bc_matrix_h5_path = os.path.join(path_matrix,
                                                  config_paths[11][0])
        raw_bc_matrix_h5_path = os.path.join(path_matrix, config_paths[10][0])

        # path to filtered files ending with .mtx and .tsv (type string)
        filtered_feature_bc_matrix_path = os.path.join(path_matrix,
                                                       config_paths[6][0])

        # # Annotate data
        print("\n-------- Start: Read out values --------")
        # Two options to read in feature_ids, gene_names, feature_types, barcodes, count_matrix_data
        # 1. Malte Luecken using Scanpy from TheisLab; read out mtx and tsv files
        _, filtered_annot_data = _scanpy_load_annotate_tsv_mtx_files(
            filtered_feature_bc_matrix_path,
            feature_bc_matrix_string,
            file_matrix_h5=filtered_bc_matrix_h5_path)

        # # Loop to load all data sets
        for c_sample in tqdm(range(len(config_paths[0][1:])),
                             desc='Loading samples'):
            c_sample += 1  # +1 becaue we already loaded the first sample

            # matrix_file_end, features_file_end, barcode_file_end
            feature_bc_matrix_string = \
                np.array([config_paths[9][c_sample], config_paths[8][c_sample], config_paths[7][c_sample]])

            # path to h5 and matrices
            path_matrix = os.path.join(os.sep, config_paths[0][c_sample],
                                       config_paths[1][c_sample],
                                       config_paths[2][c_sample],
                                       config_paths[3][c_sample],
                                       config_paths[4][c_sample])

            # path h5
            filtered_bc_matrix_h5_path = os.path.join(
                path_matrix, config_paths[11][c_sample])
            raw_bc_matrix_h5_path = os.path.join(path_matrix,
                                                 config_paths[10][c_sample])

            filtered_feature_bc_matrix_path = os.path.join(
                path_matrix, config_paths[6][c_sample])

            # # Load count matrix, features and observables
            _, filtered_adata_tmp = _scanpy_load_annotate_tsv_mtx_files(
                filtered_feature_bc_matrix_path,
                feature_bc_matrix_string,
                file_matrix_h5=filtered_bc_matrix_h5_path)

            #  FILTERED
            filtered_annot_data = filtered_annot_data.concatenate(
                filtered_adata_tmp, batch_key='sample_id')
            filtered_annot_data.obs.drop(columns=['sample_id'], inplace=True)
            filtered_annot_data.obs_names = [
                c.split("-")[0] for c in filtered_annot_data.obs_names
            ]
            filtered_annot_data.obs_names_make_unique()

        # Workaround to ensure that something can be returned
        raw_annot_data = []

    # ---- Side notes to know but can also be looked up in the summary created by 10x Genomics Spaceranger --- #
    unique_sample = np.unique(filtered_annot_data.obs["sample"])
    num_cells_previous_sample = 0
    for sample_name in unique_sample:
        print("\nSample {}: ".format(sample_name))
        print("\nSide notes of {} ".format(
            filtered_annot_data.obs['sample'].values[1]))
        number_cells = len(
            np.where(
                filtered_annot_data.obs['sample'].values == sample_name)[0])

        print("No. cells: ", number_cells)
        # Count number of expressed genes (count one gene over all spots)
        counts_gene = filtered_annot_data[
            num_cells_previous_sample:number_cells].X.sum(0)
        counts_gene_sorted = np.sort(counts_gene)
        print("Total No. genes detected: ",
              np.count_nonzero(counts_gene_sorted))

        # Calculate median genes per spot
        copy_s1 = filtered_annot_data[:number_cells].X.copy()
        mask = copy_s1 > 0
        zero_array = np.zeros_like(copy_s1)
        # count numbers of True == numbers of gene overall spots
        zero_array[mask] = 1
        median_genes_per_spot = np.median(zero_array.sum(1))
        median_umi_counts_per_spot = np.median(copy_s1.sum(1))
        print("Median genes: ", median_genes_per_spot)
        print("Total No. of UMI Counts: ", sum(copy_s1.sum(1)))
        print("Median UMI Counts: ", median_umi_counts_per_spot)
        num_cells_previous_sample = number_cells

    # All samples
    # Get barcodes of each sample and therefore the number of cells for each sample
    model = filtered_annot_data.obs[['sample'] + []]
    batch_info = model.groupby('sample').groups.values()
    n_batches = np.array([len(v) for v in batch_info])
    print("\n Sorted No. genes for each sample: ", n_batches)

    print("\n")
    # ---                                            End side notes                                         --- #

    # Second option: load from hdf5 files

    return raw_annot_data, filtered_annot_data, config_paths
    def run_task(self):  # {{{
        '''
        Compute the regional-mean time series
        '''
        # Authors
        # -------
        # Xylar Asay-Davis

        config = self.config

        self.logger.info("\nCompute time series of regional means...")

        startDate = '{:04d}-01-01_00:00:00'.format(self.startYear)
        endDate = '{:04d}-12-31_23:59:59'.format(self.endYear)

        regionGroup = self.regionGroup
        sectionSuffix = regionGroup[0].upper() + \
            regionGroup[1:].replace(' ', '')
        timeSeriesName = sectionSuffix[0].lower() + sectionSuffix[1:]
        sectionName = 'timeSeries{}'.format(sectionSuffix)

        outputDirectory = '{}/{}/'.format(
            build_config_full_path(config, 'output', 'timeseriesSubdirectory'),
            timeSeriesName)
        try:
            os.makedirs(outputDirectory)
        except OSError:
            pass

        outFileName = '{}/{}_{:04d}-{:04d}.nc'.format(outputDirectory,
                                                      timeSeriesName,
                                                      self.startYear,
                                                      self.endYear)

        inputFiles = sorted(
            self.historyStreams.readpath('timeSeriesStatsMonthlyOutput',
                                         startDate=startDate,
                                         endDate=endDate,
                                         calendar=self.calendar))

        years, months = get_files_year_month(inputFiles, self.historyStreams,
                                             'timeSeriesStatsMonthlyOutput')

        variables = config.getExpression(sectionName, 'variables')

        variableList = [var['mpas'] for var in variables] + \
            ['timeMonthly_avg_layerThickness']

        outputExists = os.path.exists(outFileName)
        outputValid = outputExists
        if outputExists:
            with open_mpas_dataset(fileName=outFileName,
                                   calendar=self.calendar,
                                   timeVariableNames=None,
                                   variableList=None,
                                   startDate=startDate,
                                   endDate=endDate) as dsOut:

                for inIndex in range(dsOut.dims['Time']):

                    mask = numpy.logical_and(
                        dsOut.year[inIndex].values == years,
                        dsOut.month[inIndex].values == months)
                    if numpy.count_nonzero(mask) == 0:
                        outputValid = False
                        break

        if outputValid:
            self.logger.info('  Time series exists -- Done.')
            return

        # Load mesh related variables
        try:
            restartFileName = self.runStreams.readpath('restart')[0]
        except ValueError:
            raise IOError('No MPAS-O restart file found: need at least one '
                          'restart file for ocean region time series')

        cellsChunk = 32768
        timeChunk = 1

        datasets = []
        for timeIndex, fileName in enumerate(inputFiles):

            dsTimeSlice = open_mpas_dataset(fileName=fileName,
                                            calendar=self.calendar,
                                            variableList=variableList,
                                            startDate=startDate,
                                            endDate=endDate)
            datasets.append(dsTimeSlice)

        chunk = {'Time': timeChunk, 'nCells': cellsChunk}

        if config.has_option(sectionName, 'zmin'):
            config_zmin = config.getfloat(sectionName, 'zmin')
        else:
            config_zmin = None

        if config.has_option(sectionName, 'zmax'):
            config_zmax = config.getfloat(sectionName, 'zmax')
        else:
            config_zmax = None

        with dask.config.set(schedular='threads',
                             pool=ThreadPool(self.daskThreads)):
            # combine data sets into a single data set
            dsIn = xarray.concat(datasets, 'Time').chunk(chunk)

            chunk = {'nCells': cellsChunk}
            dsRestart = xarray.open_dataset(restartFileName)
            dsRestart = dsRestart.isel(Time=0).chunk(chunk)
            dsIn['areaCell'] = dsRestart.areaCell
            if 'landIceMask' in dsRestart:
                # only the region outside of ice-shelf cavities
                dsIn['openOceanMask'] = dsRestart.landIceMask == 0

            dsIn['zMid'] = compute_zmid(dsRestart.bottomDepth,
                                        dsRestart.maxLevelCell,
                                        dsRestart.layerThickness)

            regionMaskFileName = self.masksSubtask.maskFileName

            dsRegionMask = xarray.open_dataset(regionMaskFileName)

            maskRegionNames = decode_strings(dsRegionMask.regionNames)

            datasets = []
            regionIndices = []
            for regionName in self.regionNames:

                self.logger.info('    region: {}'.format(regionName))
                regionIndex = maskRegionNames.index(regionName)
                regionIndices.append(regionIndex)

                chunk = {'nCells': cellsChunk}
                dsMask = dsRegionMask.isel(nRegions=regionIndex).chunk(chunk)

                cellMask = dsMask.regionCellMasks == 1
                if 'openOceanMask' in dsIn:
                    cellMask = numpy.logical_and(cellMask, dsIn.openOceanMask)
                dsRegion = dsIn.where(cellMask, drop=True)

                totalArea = dsRegion['areaCell'].sum()
                self.logger.info('      totalArea: {} mil. km^2'.format(
                    1e-12 * totalArea.values))

                self.logger.info("Don't worry about the following dask "
                                 "warnings.")
                if config_zmin is None:
                    zmin = dsMask.zmin
                else:
                    zmin = config_zmin

                if config_zmax is None:
                    zmax = dsMask.zmax
                else:
                    zmax = config_zmax

                depthMask = numpy.logical_and(dsRegion.zMid >= zmin,
                                              dsRegion.zMid <= zmax)
                depthMask.compute()
                self.logger.info("Dask warnings should be done.")
                dsRegion['depthMask'] = depthMask

                layerThickness = dsRegion.timeMonthly_avg_layerThickness
                dsRegion['volCell'] = (dsRegion.areaCell *
                                       layerThickness).where(depthMask)
                totalVol = dsRegion.volCell.sum(dim='nVertLevels').sum(
                    dim='nCells')
                totalVol.compute()
                self.logger.info('      totalVol (mil. km^3): {}'.format(
                    1e-15 * totalVol.values))

                dsRegion = dsRegion.transpose('Time', 'nCells', 'nVertLevels')

                dsOut = xarray.Dataset()
                dsOut['totalVol'] = totalVol
                dsOut.totalVol.attrs['units'] = 'm^3'
                dsOut['totalArea'] = totalArea
                dsOut.totalArea.attrs['units'] = 'm^2'
                dsOut['zbounds'] = ('nbounds', [zmin, zmax])
                dsOut.zbounds.attrs['units'] = 'm'

                for var in variables:
                    outName = var['name']
                    self.logger.info('      {}'.format(outName))
                    mpasVarName = var['mpas']
                    timeSeries = dsRegion[mpasVarName]
                    units = timeSeries.units
                    description = timeSeries.long_name

                    is3d = 'nVertLevels' in timeSeries.dims
                    if is3d:
                        timeSeries = \
                            (dsRegion.volCell*timeSeries.where(depthMask)).sum(
                                dim='nVertLevels').sum(dim='nCells') / totalVol
                    else:
                        timeSeries = \
                            (dsRegion.areaCell*timeSeries).sum(
                                dim='nCells') / totalArea

                    timeSeries.compute()

                    dsOut[outName] = timeSeries
                    dsOut[outName].attrs['units'] = units
                    dsOut[outName].attrs['description'] = description
                    dsOut[outName].attrs['is3d'] = str(is3d)

                datasets.append(dsOut)

            # combine data sets into a single data set
            dsOut = xarray.concat(datasets, 'nRegions')

            dsOut.coords['regionNames'] = dsRegionMask.regionNames.isel(
                nRegions=regionIndices)
            dsOut.coords['year'] = (('Time'), years)
            dsOut['year'].attrs['units'] = 'years'
            dsOut.coords['month'] = (('Time'), months)
            dsOut['month'].attrs['units'] = 'months'

            write_netcdf(dsOut, outFileName)
Example #57
0
def evaluate(truth_val, n_answered):
    return np.count_nonzero(truth_val - ground_truth) / (n_answered * numA)
Example #58
0
def long_to_wide(table: pd.DataFrame, keycolnames: List[str],
                 varcolname: str) -> pd.DataFrame:
    warnings = []
    quick_fixes = []

    varcol = table[varcolname]
    if varcol.dtype != object and not hasattr(varcol, "cat"):
        # Convert to str, in-place
        warnings.append(
            ('Column "%s" was auto-converted to Text because column names '
             "must be text.") % varcolname)
        quick_fixes.append({
            "text":
            'Convert "%s" to text' % varcolname,
            "action":
            "prependModule",
            "args": ["converttotext", {
                "colnames": [varcolname]
            }],
        })
        na = varcol.isnull()
        varcol = varcol.astype(str)
        varcol[na] = np.nan
        table[varcolname] = varcol

    # Remove empty values, in-place. Empty column headers aren't allowed.
    # https://www.pivotaltracker.com/story/show/162648330
    empty = varcol.isin([np.nan, pd.NaT, None, ""])
    n_empty = np.count_nonzero(empty)
    if n_empty:
        if n_empty == 1:
            text_empty = "1 input row"
        else:
            text_empty = "{:,d} input rows".format(n_empty)
        warnings.append('%s with empty "%s" were removed.' %
                        (text_empty, varcolname))
        table = table[~empty]
        table.reset_index(drop=True, inplace=True)

    table.set_index(keycolnames + [varcolname], inplace=True, drop=True)
    if np.any(table.index.duplicated()):
        return "Cannot reshape: some variables are repeated"
    if len(table.columns) == 0:
        return ("There is no Value column. "
                "All but one table column must be a Row or Column variable.")
    if len(table.columns) > 1:
        return ("There are too many Value columns. "
                "All but one table column must be a Row or Column variable. "
                "Please drop extra columns before reshaping.")

    table = table.unstack()
    table.columns = [col[-1] for col in table.columns.values]
    table.reset_index(inplace=True)

    if warnings:
        return {
            "dataframe": table,
            "error": "\n".join(warnings),
            "quick_fixes": quick_fixes,
        }
    else:
        return table
Example #59
0
def get_sum_metrics(batch_output,
                    batch_target,
                    metrics_type,
                    test=False,
                    printDice=False):

    if torch.is_tensor(batch_output):
        batch_output = batch_output.data.cpu().numpy()
    if torch.is_tensor(batch_target):
        batch_target = batch_target.data.cpu().numpy()
    assert batch_output.shape == batch_target.shape
    assert len(batch_output.shape) == 4
    spacing = (1, 1)
    size = batch_output.shape[0]
    metrics = dict.fromkeys(metrics_type, 0)
    dices = []
    for i in range(size):
        output = batch_output[i, 0]
        target = batch_target[i, 0]
        labelPred = sitk.GetImageFromArray(output, isVector=False)
        labelPred.SetSpacing(spacing)
        labelTrue = sitk.GetImageFromArray(target, isVector=False)
        labelTrue.SetSpacing(spacing)  # spacing order (x, y, z)
        # voxel_metrics
        pred = output.astype(int)
        gdth = target.astype(int)
        fp_array = copy.deepcopy(pred)  # keep pred unchanged
        fn_array = copy.deepcopy(gdth)
        gdth_sum = np.sum(gdth)
        pred_sum = np.sum(pred)
        intersection = gdth & pred
        union = gdth | pred
        intersection_sum = np.count_nonzero(intersection)
        union_sum = np.count_nonzero(union)

        tp_array = intersection

        tmp = pred - gdth
        fp_array[tmp < 1] = 0

        tmp2 = gdth - pred
        fn_array[tmp2 < 1] = 0

        tn_array = np.ones(gdth.shape) - union

        tp, fp, fn, tn = np.sum(tp_array), np.sum(fp_array), np.sum(
            fn_array), np.sum(tn_array)

        smooth = EPSILON
        precision = (tp) / (pred_sum + smooth)
        recall = (tp) / (gdth_sum + smooth)

        false_positive_rate = (fp) / (fp + tn + smooth)
        false_negtive_rate = (fn) / (fn + tp + smooth)

        jaccard = (intersection_sum) / (union_sum + smooth)
        dice = (2 * intersection_sum) / (gdth_sum + pred_sum + smooth)
        ppv = (intersection_sum) / (pred_sum + smooth)
        dicecomputer = sitk.LabelOverlapMeasuresImageFilter()
        dicecomputer.Execute(labelTrue > 0.5, labelPred > 0.5)

        # distance_metrics
        signed_distance_map = sitk.SignedMaurerDistanceMap(
            labelTrue > 0.5, squaredDistance=False,
            useImageSpacing=True)  # It need to be adapted.

        ref_distance_map = sitk.Abs(signed_distance_map)

        ref_surface = sitk.LabelContour(labelTrue > 0.5, fullyConnected=True)

        statistics_image_filter = sitk.StatisticsImageFilter()
        statistics_image_filter.Execute(ref_surface > 0.5)

        num_ref_surface_pixels = int(statistics_image_filter.GetSum())

        signed_distance_map_pred = sitk.SignedMaurerDistanceMap(
            labelPred > 0.5, squaredDistance=False, useImageSpacing=True)

        seg_distance_map = sitk.Abs(signed_distance_map_pred)

        seg_surface = sitk.LabelContour(labelPred > 0.5, fullyConnected=True)

        seg2ref_distance_map = ref_distance_map * sitk.Cast(
            seg_surface, sitk.sitkFloat32)

        ref2seg_distance_map = seg_distance_map * sitk.Cast(
            ref_surface, sitk.sitkFloat32)

        statistics_image_filter.Execute(seg_surface > 0.5)

        num_seg_surface_pixels = int(statistics_image_filter.GetSum())

        seg2ref_distance_map_arr = sitk.GetArrayViewFromImage(
            seg2ref_distance_map)
        seg2ref_distances = list(
            seg2ref_distance_map_arr[seg2ref_distance_map_arr != 0])
        seg2ref_distances = seg2ref_distances + list(
            np.zeros(num_seg_surface_pixels - len(seg2ref_distances)))
        ref2seg_distance_map_arr = sitk.GetArrayViewFromImage(
            ref2seg_distance_map)
        ref2seg_distances = list(
            ref2seg_distance_map_arr[ref2seg_distance_map_arr != 0])
        ref2seg_distances = ref2seg_distances + list(
            np.zeros(num_ref_surface_pixels - len(ref2seg_distances)))  #
        all_surface_distances = seg2ref_distances + ref2seg_distances

        metrics['dice'] += dice
        metrics['jaccard'] += jaccard
        metrics['precision'] += precision
        metrics['recall'] += recall
        metrics['fpr'] += false_positive_rate
        metrics['fnr'] += false_negtive_rate
        metrics['vs'] += dicecomputer.GetVolumeSimilarity()
        metrics['ppv'] += ppv
        metrics["msd"] += np.mean(all_surface_distances)
        metrics["mdsd"] += np.median(all_surface_distances)
        metrics["stdsd"] += np.std(all_surface_distances)
        metrics["hd95"] += np.percentile(all_surface_distances, 95)
        metrics["hd"] += np.max(all_surface_distances)
        if printDice:
            dices.append(dice)
    if printDice:
        return metrics, dices
    return metrics
def no_more_moves(board):
    return np.count_nonzero(board) == COLUMN_COUNT * ROW_COUNT