Example #1
0
def project(x, mask=None):
    """ Take a vector x (with possible nonnegative entries and non-normalized)
        and project it onto the unit simplex.

        mask:   do not project these entries
                project remaining entries onto lower dimensional simplex
    """
    if mask is not None:
        mask = np.asarray(mask)
        xsorted = np.sort(x[~mask])[::-1]
        # remaining entries need to sum up to 1 - sum x[mask]
        sum_ = 1.0 - np.sum(x[mask])
    else:
        xsorted = np.sort(x)[::-1]
        # entries need to sum up to 1 (unit simplex)
        sum_ = 1.0
    lambda_a = (np.cumsum(xsorted) - sum_) / np.arange(1.0, len(xsorted)+1.0)
    for i in xrange(len(lambda_a)-1):
        if lambda_a[i] >= xsorted[i+1]:
            astar = i
            break
    else:
        astar = -1
    p = np.maximum(x-lambda_a[astar],  0)
    if mask is not None:
        p[mask] = x[mask]
    return p
Example #2
0
  def testMutableHashTableOfTensors(self):
    with self.test_session():
      default_val = tf.constant([-1, -1], tf.int64)
      keys = tf.constant(["brain", "salad", "surgery"])
      values = tf.constant([[0, 1], [2, 3], [4, 5]], tf.int64)
      table = tf.contrib.lookup.MutableHashTable(tf.string, tf.int64,
                                                 default_val)
      self.assertAllEqual(0, table.size().eval())

      table.insert(keys, values).run()
      self.assertAllEqual(3, table.size().eval())

      input_string = tf.constant(["brain", "salad", "tank"])
      output = table.lookup(input_string)
      self.assertAllEqual([3, 2], output.get_shape())

      result = output.eval()
      self.assertAllEqual([[0, 1], [2, 3], [-1, -1]], result)

      exported_keys, exported_values = table.export()
      self.assertAllEqual([None], exported_keys.get_shape().as_list())
      self.assertAllEqual([None, 2], exported_values.get_shape().as_list())
      # exported data is in the order of the internal map, i.e. undefined
      sorted_keys = np.sort(exported_keys.eval())
      sorted_values = np.sort(exported_values.eval())
      self.assertAllEqual([b"brain", b"salad", b"surgery"], sorted_keys)
      self.assertAllEqual([[4, 5], [2, 3], [0, 1]], sorted_values)
Example #3
0
def test_vectorizer_unicode():
    # tests that the count vectorizer works with cyrillic.
    document = (
        "\xd0\x9c\xd0\xb0\xd1\x88\xd0\xb8\xd0\xbd\xd0\xbd\xd0\xbe\xd0"
        "\xb5 \xd0\xbe\xd0\xb1\xd1\x83\xd1\x87\xd0\xb5\xd0\xbd\xd0\xb8\xd0"
        "\xb5 \xe2\x80\x94 \xd0\xbe\xd0\xb1\xd1\x88\xd0\xb8\xd1\x80\xd0\xbd"
        "\xd1\x8b\xd0\xb9 \xd0\xbf\xd0\xbe\xd0\xb4\xd1\x80\xd0\xb0\xd0\xb7"
        "\xd0\xb4\xd0\xb5\xd0\xbb \xd0\xb8\xd1\x81\xd0\xba\xd1\x83\xd1\x81"
        "\xd1\x81\xd1\x82\xd0\xb2\xd0\xb5\xd0\xbd\xd0\xbd\xd0\xbe\xd0\xb3"
        "\xd0\xbe \xd0\xb8\xd0\xbd\xd1\x82\xd0\xb5\xd0\xbb\xd0\xbb\xd0"
        "\xb5\xd0\xba\xd1\x82\xd0\xb0, \xd0\xb8\xd0\xb7\xd1\x83\xd1\x87"
        "\xd0\xb0\xd1\x8e\xd1\x89\xd0\xb8\xd0\xb9 \xd0\xbc\xd0\xb5\xd1\x82"
        "\xd0\xbe\xd0\xb4\xd1\x8b \xd0\xbf\xd0\xbe\xd1\x81\xd1\x82\xd1\x80"
        "\xd0\xbe\xd0\xb5\xd0\xbd\xd0\xb8\xd1\x8f \xd0\xb0\xd0\xbb\xd0\xb3"
        "\xd0\xbe\xd1\x80\xd0\xb8\xd1\x82\xd0\xbc\xd0\xbe\xd0\xb2, \xd1\x81"
        "\xd0\xbf\xd0\xbe\xd1\x81\xd0\xbe\xd0\xb1\xd0\xbd\xd1\x8b\xd1\x85 "
        "\xd0\xbe\xd0\xb1\xd1\x83\xd1\x87\xd0\xb0\xd1\x82\xd1\x8c\xd1\x81\xd1"
        "\x8f.")

    vect = CountVectorizer()
    X_counted = vect.fit_transform([document])
    assert_equal(X_counted.shape, (1, 15))

    vect = HashingVectorizer(norm=None, non_negative=True)
    X_hashed = vect.transform([document])
    assert_equal(X_hashed.shape, (1, 2 ** 20))

    # No collisions on such a small dataset
    assert_equal(X_counted.nnz, X_hashed.nnz)

    # When norm is None and non_negative, the tokens are counted up to
    # collisions
    assert_array_equal(np.sort(X_counted.data), np.sort(X_hashed.data))
Example #4
0
def sortrows(A, col=None):
    A = np.asarray(A)
    if not ismatrix(A):
        if isrow(A):
            return np.sort(A), np.argsort(A)
        else:
            return np.sort(A, axis=0), np.argsort(A, axis=0)
            
    # Sort the whole row
    if not col:
        col = list(range(A.shape[1]))
    
    nrows = A.shape[0]
    I = np.arange(nrows)[:, np.newaxis]
    A = np.concatenate((A, I), axis=1)
    A = np.asarray(sorted(A, key=operator.itemgetter(*col)))
    I = list(A[:, -1]) # get the index
    # convert to numeric if index in string
    for n, i in enumerate(I):
        if not isnumeric(i):
            I[n] = str2num(i)
    # I = I[:, np.newaxis]
    I = np.asarray(I)
    A = A[:, :-1]
    
    return A, I
Example #5
0
 def compute_steepness_vector(self):        
     gradient_vec0 = np.zeros((self.size_ws))
     tempvec = np.zeros((self.Y.shape[1]))
     _interactive_rls_classifier.compute_gradient(self.Y_ws,
                  gradient_vec0,
                  self.classcounts_ws,
                  self.classvec_ws,
                  self.size_ws,
                  self.size,
                  self.DVTY,
                  self.sqrtRx2_ws,
                  self.sqrtR.shape[1],
                  0,
                  tempvec,
                  self.Y.shape[1])
     gradient_vec1 = np.zeros((self.size_ws))
     tempvec = np.zeros((self.Y.shape[1]))
     _interactive_rls_classifier.compute_gradient(self.Y_ws,
                  gradient_vec1,
                  self.classcounts_ws,
                  self.classvec_ws,
                  self.size_ws,
                  self.size,
                  self.DVTY,
                  self.sqrtRx2_ws,
                  self.sqrtR.shape[1],
                  1,
                  tempvec,
                  self.Y.shape[1])
     
     steepness_vector = np.zeros((self.size_ws))
     steepness_vector[0:self.classcounts_ws[1]] = np.sort(gradient_vec0)[0:self.classcounts_ws[1]][::-1]
     steepness_vector[self.classcounts_ws[1]:] = np.sort(gradient_vec1)[0:self.classcounts_ws[0]]
     #print steepness_vector
     return steepness_vector
Example #6
0
def brightestPxl(img, threshold, **kwargs):
    """
    Centroids using brightest Pixel Algorithm
    (A. G. Basden et al,  MNRAS, 2011)

    Finds the nPxlsth brightest pixel, subtracts that value from frame, 
    sets anything below 0 to 0, and finally takes centroid.

    Parameters:
        img (ndarray): 2d or greater rank array of imgs to centroid
        threshold (float): Percentage of pixels to use for centroid

    Returns:
        ndarray: Array of centroid values
    """
    
    nPxls = threshold*img.shape[-1]*img.shape[-2]

    if len(img.shape)==2:
        pxlValue = numpy.sort(img.flatten())[-nPxls]
        img-=pxlValue
        img.clip(0, img.max())

    elif len(img.shape)==3:
        pxlValues = numpy.sort(
                        img.reshape(img.shape[0], img.shape[-1]*img.shape[-2])
                        )[:,-nPxls]
        img[:]  = (img.T - pxlValues).T
        img.clip(0, img.max(), out=img)

    return centreOfGravity(img)
Example #7
0
    def _cells_to_rects(self, cells):
        """
        Converts the extents of a list of cell grid coordinates (i,j) into
        a list of rect tuples (x,y,w,h).  The set should be disjoint, but may
        or may not be minimal.
        """
        # Since this function is generally used to generate clipping regions
        # or other screen-related graphics, we should try to return large
        # rectangular blocks if possible.
        # For now, we just look for horizontal runs and return those.
        cells = array(cells)
        y_sorted = sort_points(cells, index=1)  # sort acoording to row
        rownums = sort(array(tuple(set(cells[:,1]))))

        row_start_indices = searchsorted(y_sorted[:,1], rownums)
        row_end_indices = left_shift(row_start_indices, len(cells))

        rects = []
        for rownum, start, end in zip(rownums, row_start_indices, row_end_indices):
            # y_sorted is sorted by the J (row) coordinate, so after we
            # extract the column indices, we need to sort them before
            # passing them to find_runs().
            grid_column_indices = sort(y_sorted[start:end][:,0])
            #pdb.set_trace()
            #print grid_column_indices.shape
            for span in find_runs(grid_column_indices):
                x = self._cell_lefts[span[0]]
                y = self._cell_bottoms[rownum]
                w = (span[-1] - span[0] + 1) * self._cell_extents[0]
                h = self._cell_extents[1]
                rects.append((x,y,w,h))
        return rects
Example #8
0
def intervalo_confianza(muestra_x, muestra_y, err_x, err_y, porcentaje):
    '''
    busca intervalo de confianza. genera una muestra aleatoria en base a los
    datos experimentales. a partir de cada muestra obtiene el valor de la
    constante apropiada para modelo lineal.
    corresponde al metodo de monte carlo. porcentaje refiere al porcentaje del
    intervalo de confianza que se busca.
    '''
    N = len(muestra_x)
    Nmc = 10000
    promedios_a = np.zeros(Nmc)
    promedios_b = np.zeros(Nmc)
    for i in range(Nmc):
        r = np.random.normal(0, 1, size=len(muestra_x))
        x_i = muestra_x + err_x * r
        y_i = muestra_y + err_y * r
        a_i, b_i = biseccion(x_i, y_i)
        promedios_a[i-1] = a_i
        promedios_b[i-1] = b_i
    promedios_a = np.sort(promedios_a)
    promedios_b = np.sort(promedios_b)
    minim = ((100 - porcentaje) / 2) * 0.01
    maxim = 1 - (minim)
    lim_min_a = promedios_a[int(Nmc * minim)]
    lim_max_a = promedios_a[int(Nmc * maxim)]
    lim_min_b = promedios_b[int(Nmc * minim)]
    lim_max_b = promedios_b[int(Nmc * maxim)]
    histograma_confianza(promedios_a, promedios_b, lim_min_a, lim_max_a,
                         lim_min_b, lim_max_b)
    return lim_min_a, lim_max_a, lim_min_b, lim_max_b
    pass
Example #9
0
def quantiles(x, qlist=(2.5, 25, 50, 75, 97.5), transform=lambda x: x):
    R"""Returns a dictionary of requested quantiles from array

    Parameters
    ----------
    x : Numpy array
        An array containing MCMC samples
    qlist : tuple or list
        A list of desired quantiles (defaults to (2.5, 25, 50, 75, 97.5))
    transform : callable
        Function to transform data (defaults to identity)

    Returns
    -------
    `dictionary` with the quantiles {quantile: value}
    """
    # Make a copy of trace
    x = transform(x.copy())

    # For multivariate node
    if x.ndim > 1:
        # Transpose first, then sort, then transpose back
        sx = np.sort(x.T).T
    else:
        # Sort univariate node
        sx = np.sort(x)

    try:
        # Generate specified quantiles
        quants = [sx[int(len(sx) * q / 100.0)] for q in qlist]

        return dict(zip(qlist, quants))

    except IndexError:
        pm._log.warning("Too few elements for quantile calculation")
Example #10
0
    def computeError(self, Res, method="None"):
        """ Compute median absolute and relative errors """
        absErr = np.abs(Res - self.trueRes)
        idx_nonzero = np.where(self.trueRes != 0)
        absErr_nonzero = absErr[idx_nonzero]
        true_nonzero = self.trueRes[idx_nonzero]
        relErr = absErr_nonzero / true_nonzero

        # log_str_rel = "\n".join(map(str, relErr))
        # log_str_abs = "\n".join(map(str, absErr))

        if Params.IS_LOGGING:
            log_str = ""
            for i in range(len(self.query_list)):
                area = rect_area(self.query_list[i])
                query_str = str(self.query_list[i][0][0]) + "\t" + str(self.query_list[i][0][1]) + "\t" + str(
                    self.query_list[i][1][0]) + "\t" + str(self.query_list[i][1][1]) + "\t" + str(area)
                err_str = str(self.trueRes[i]) + "\t" + str(Res[i]) + "\t" + str(absErr[i]) + "\t" + str(relErr[i])
                log_str = log_str + query_str + "\t" + err_str + "\n"
            log(method, log_str)

        absErr = np.sort(absErr)
        relErr = np.sort(relErr)
        n_abs = len(absErr)
        n_rel = len(relErr)
        return absErr[int(n_abs / 2)], relErr[int(n_rel / 2)]
Example #11
0
def plot_raw_data(ratings):
    """plot the statistics result on raw rating data."""
    # do statistics.
    num_items_per_user = np.array((ratings != 0).sum(axis=0)).flatten()
    num_users_per_item = np.array((ratings != 0).sum(axis=1).T).flatten()
    sorted_num_movies_per_user = np.sort(num_items_per_user)[::-1]
    sorted_num_users_per_movie = np.sort(num_users_per_item)[::-1]

    # plot
    fig = plt.figure()
    ax1 = fig.add_subplot(1, 2, 1)
    ax1.plot(sorted_num_movies_per_user, color='blue')
    ax1.set_xlabel("users")
    ax1.set_ylabel("number of ratings (sorted)")
    ax1.grid()

    ax2 = fig.add_subplot(1, 2, 2)
    ax2.plot(sorted_num_users_per_movie)
    ax2.set_xlabel("items")
    ax2.set_ylabel("number of ratings (sorted)")
    ax2.set_xticks(np.arange(0, 2000, 300))
    ax2.grid()

    plt.tight_layout()
    plt.savefig("stat_ratings")
    plt.show()
    # plt.close()
    return num_items_per_user, num_users_per_item
Example #12
0
def test_fetch_rcv1():
    try:
        data1 = fetch_rcv1(shuffle=False, download_if_missing=False)
    except IOError as e:
        if e.errno == errno.ENOENT:
            raise SkipTest("Download RCV1 dataset to run this test.")

    X1, Y1 = data1.data, data1.target
    cat_list, s1 = data1.target_names.tolist(), data1.sample_id

    # test sparsity
    assert_true(sp.issparse(X1))
    assert_true(sp.issparse(Y1))
    assert_equal(60915113, X1.data.size)
    assert_equal(2606875, Y1.data.size)

    # test shapes
    assert_equal((804414, 47236), X1.shape)
    assert_equal((804414, 103), Y1.shape)
    assert_equal((804414,), s1.shape)
    assert_equal(103, len(cat_list))

    # test ordering of categories
    first_categories = [u'C11', u'C12', u'C13', u'C14', u'C15', u'C151']
    assert_array_equal(first_categories, cat_list[:6])

    # test number of sample for some categories
    some_categories = ('GMIL', 'E143', 'CCAT')
    number_non_zero_in_cat = (5, 1206, 381327)
    for num, cat in zip(number_non_zero_in_cat, some_categories):
        j = cat_list.index(cat)
        assert_equal(num, Y1[:, j].data.size)

    # test shuffling and subset
    data2 = fetch_rcv1(shuffle=True, subset='train', random_state=77,
                       download_if_missing=False)
    X2, Y2 = data2.data, data2.target
    s2 = data2.sample_id

    # test return_X_y option
    fetch_func = partial(fetch_rcv1, shuffle=False, subset='train',
                         download_if_missing=False)
    check_return_X_y(data2, fetch_func)

    # The first 23149 samples are the training samples
    assert_array_equal(np.sort(s1[:23149]), np.sort(s2))

    # test some precise values
    some_sample_ids = (2286, 3274, 14042)
    for sample_id in some_sample_ids:
        idx1 = s1.tolist().index(sample_id)
        idx2 = s2.tolist().index(sample_id)

        feature_values_1 = X1[idx1, :].toarray()
        feature_values_2 = X2[idx2, :].toarray()
        assert_almost_equal(feature_values_1, feature_values_2)

        target_values_1 = Y1[idx1, :].toarray()
        target_values_2 = Y2[idx2, :].toarray()
        assert_almost_equal(target_values_1, target_values_2)
    def test_weaklimit(self):
        a = distributions.CRP(10,1)
        b = distributions.GammaCompoundDirichlet(1000,10,1)

        a.concentration = b.concentration = 10.

        from matplotlib import pyplot as plt

        plt.figure()
        crp_counts = np.zeros(10)
        gcd_counts = np.zeros(10)
        for itr in range(500):
            crp_rvs = np.sort(a.rvs(25))[::-1][:10]
            crp_counts[:len(crp_rvs)] += crp_rvs
            gcd_counts += np.sort(b.rvs(25))[::-1][:10]

        plt.plot(crp_counts/200,gcd_counts/200,'bx-')
        plt.xlim(0,10)
        plt.ylim(0,10)

        import os
        from mixins import mkdir
        figpath = os.path.join(os.path.dirname(__file__),'figures',
                self.__class__.__name__,'weaklimittest.pdf')
        mkdir(os.path.dirname(figpath))
        plt.savefig(figpath)
Example #14
0
	def check_obs_scheme(self):
		" Checks the internal validity of provided observation schemes "

		# check sub_pops
		idx_union = np.sort(self._sub_pops[0])
		i = 1
		while idx_union.size < self._p and i < len(self._sub_pops):
			idx_union = np.union1d(idx_union, self._sub_pops[i]) 
			i += 1
		if idx_union.size != self._p or np.any(idx_union!=np.arange(self._p)):
			raise Exception(('all subpopulations together have to cover '
			'exactly all included observed varibles y_i in y.'
			'This is not the case. Change the difinition of '
			'subpopulations in variable sub_pops or reduce '
			'the number of observed variables p. '
			'The union of indices of all subpopulations is'),
			idx_union )

		# check obs_time
		if not self._obs_time[-1]==self._T:
			raise Exception(('Entries of obs_time give the respective ends of '
							'the periods of observation for any '
							'subpopulation. Hence the last entry of obs_time '
							'has to be the full recording length. The last '
							'entry of obs_time before is '), self._obs_time[-1])

		if np.any(np.diff(self._obs_time)<1):
			raise Exception(('lengths of observation have to be at least 1. '
							'Minimal observation time for a subpopulation: '),
							np.min(np.diff(self._obs_time)))

		# check obs_pops
		if not self._obs_time.size == self._obs_pops.size:
			raise Exception(('each entry of obs_pops gives the index of the '
							'subpopulation observed up to the respective '
							'time given in obs_time. Thus the sizes of the '
							'two arrays have to match. They do not. '
							'no. of subpop. switch points and no. of '
							'subpopulations ovserved up to switch points '
							'are '), (self._obs_time.size, self._obs_pops.size))

		idx_pops = np.sort(np.unique(self._obs_pops))
		if not np.min(idx_pops)==0:
			raise Exception(('first subpopulation has to have index 0, but '
							'is given the index '), np.min(idx_pops))
		elif not idx_pops.size == len(self._sub_pops):
			raise Exception(('number of specified subpopulations in variable '
							'sub_pops does not meet the number of '
							'subpopulations indexed in variable obs_pops. '
							'Delete subpopulations that are never observed, '
							'or change the observed subpopulations in '
							'variable obs_pops accordingly. The number of '
							'indexed subpopulations is '),
							len(self._sub_pops))
		elif not np.all(np.diff(idx_pops)==1):
			raise Exception(('subpopulation indices have to be consecutive '
							'integers from 0 to the total number of '
							'subpopulations. This is not the case. '
							'Given subpopulation indices are '),
							idx_pops)
Example #15
0
    def test_multiindex_objects(self):
        mi = MultiIndex(levels=[['b', 'd', 'a'], [1, 2, 3]],
                        labels=[[0, 1, 0, 2], [2, 0, 0, 1]],
                        names=['col1', 'col2'])
        recons = mi._sort_levels_monotonic()

        # these are equal
        assert mi.equals(recons)
        assert Index(mi.values).equals(Index(recons.values))

        # _hashed_values and hash_pandas_object(..., index=False)
        # equivalency
        expected = hash_pandas_object(
            mi, index=False).values
        result = mi._hashed_values
        tm.assert_numpy_array_equal(result, expected)

        expected = hash_pandas_object(
            recons, index=False).values
        result = recons._hashed_values
        tm.assert_numpy_array_equal(result, expected)

        expected = mi._hashed_values
        result = recons._hashed_values

        # values should match, but in different order
        tm.assert_numpy_array_equal(np.sort(result),
                                    np.sort(expected))
Example #16
0
def remove_outliers(inpd, min_fiber_distance, n_jobs=0, distance_method ='Mean'):
    """ Remove fibers that have no other nearby fibers, i.e. outliers.

    The pairwise fiber distance matrix is computed, then fibers
    are rejected if their average neighbor distance (using closest 3
    neighbors) is higher than min_fiber_distance.

    """

    fiber_array = fibers.FiberArray()
    #fiber_array.points_per_fiber = 5
    fiber_array.points_per_fiber = 10
    fiber_array.convert_from_polydata(inpd)

    fiber_indices = range(0, fiber_array.number_of_fibers)

    # squared distances are computed
    min_fiber_distance = min_fiber_distance * min_fiber_distance
    
    # pairwise distance matrix
    if USE_PARALLEL and n_jobs > 0:
        distances = Parallel(n_jobs=n_jobs, verbose=1)(
            delayed(similarity.fiber_distance)(
                fiber_array.get_fiber(lidx),
                fiber_array,
                threshold = 0,
                distance_method = distance_method)
            for lidx in fiber_indices)

        distances = numpy.array(distances)

        # now we check where there are no nearby fibers in d
        mindist = numpy.zeros(fiber_array.number_of_fibers)
        for lidx in fiber_indices:
            dist = numpy.sort(distances[lidx, :])
            # robust minimum distance
            mindist[lidx] = (dist[1] + dist[2] + dist[3]) / 3.0
            #mindist[lidx] = (dist[1] + dist[2]) / 2.0

    else:
        # do this in a loop to use less memory. then parallelization can 
        # happen over the number of subjects.
        mindist = numpy.zeros(fiber_array.number_of_fibers)
        for lidx in fiber_indices:
            distances = similarity.fiber_distance(fiber_array.get_fiber(lidx), fiber_array, 0,  distance_method = distance_method)
            dist = numpy.sort(distances)
            # robust minimum distance
            mindist[lidx] = (dist[1] + dist[2] + dist[3]) / 3.0
            
    # keep only fibers who have nearby similar fibers
    fiber_mask = mindist < min_fiber_distance

    if True:
        num_fibers = len(numpy.nonzero(fiber_mask)[0]), "/", len(fiber_mask)
        print "<filter.py> Number retained after outlier removal: ", num_fibers

    outpd = mask(inpd, fiber_mask, mindist)
    outpd_reject = mask(inpd, ~fiber_mask, mindist)

    return outpd, fiber_mask, outpd_reject
Example #17
0
def index_trim_outlier(resid, k):
    '''returns indices to residual array with k outliers removed

    Parameters
    ----------
    resid : array_like, 1d
        data vector, usually residuals of a regression
    k : int
        number of outliers to remove

    Returns
    -------
    trimmed_index : array, 1d
        index array with k outliers removed
    outlier_index : array, 1d
        index array of k outliers

    Notes
    -----

    Outliers are defined as the k observations with the largest
    absolute values.

    '''
    sort_index = np.argsort(np.abs(resid))
    # index of non-outlier
    trimmed_index = np.sort(sort_index[:-k])
    outlier_index = np.sort(sort_index[-k:])
    return trimmed_index, outlier_index
Example #18
0
def indices_from_grid(c, ref):
    """ Convert coordinates to indices defined by grid of reference
    values.

    Parameters
    ----------
    c : array of floats, shape (M,)
      Coordinates.
    ref : array of floats, shape (N,)
      Reference grid coordinates. They must be equally spaced.

    Returns
    -------
    ind : arrays of floats
      Coordinates mapped onto the indices of the reference grid.
    """
    ref = np.sort(ref)

    dref = ref[1:] - ref[:-1]
    dref0 = float(dref[0])
    assert np.allclose(dref0, dref[1:])

    c = np.sort(c)

    assert c[0] >= ref[0] and c[-1] <= ref[-1]

    ind = (c - ref[0]) / dref0

    return ind
Example #19
0
    def by_lblimg(self, lbldata):
        """
        Get specific template regions by rois given by user
        All regions overlapped with a specific label region will be covered

        Parameters:
        -----------
        lbldata: rois given by user

        Return:
        -------
        out_template: new template contains part of regions
                      if lbldata has multiple different rois, then new template will extract regions with each of roi given by user

        Example:
        --------
        >>> glr_cls = GetLblRegion(template)
        >>> out_template = glr_cls.by_lblimg(lbldata)
        """
        assert lbldata.shape == self._template.shape, "the shape of template should be equal to the shape of lbldata"
        labels = np.sort(np.unique(lbldata)[1:]).astype('int')
        out_template = np.zeros_like(lbldata)
        out_template = out_template[...,np.newaxis]
        out_template = np.tile(out_template, (1, len(labels)))
        for i,lbl in enumerate(labels):
            lbldata_tmp = tools.get_specificroi(lbldata, lbl)
            lbldata_tmp[lbldata_tmp!=0] = 1
            part_template = self._template*lbldata_tmp
            template_lbl = np.sort(np.unique(part_template)[1:])
            out_template[...,i] = tools.get_specificroi(self._template, template_lbl)
        return out_template
Example #20
0
def get_fn(data, fp):
    """ Given some scores data and a false negatives rate
    find the corresponding false positive rate in the ROC curve.
    If the point does not exist, we will interpolate it.

    """
    if fp in data.fpr:
        pos = np.where(data.fpr == fp)
        fnr, thr = np.mean(data.fnr[pos]), np.mean(data.thrs[pos])
    else:
        # Set data for interpolation
        x = np.sort(data.fpr)
        # Set new arange whichs includes the wanted value
        xnew = np.arange(fp, x[-1])
        # Interpolate the FN
        y = np.sort(data.tpr)
        f = interpolate.interp1d(x, y)
        tpr = f(xnew)[0]
        fnr = 1 - tpr
        # Interpolate the threashold
        y = np.sort(data.thrs)
        f = interpolate.interp1d(x, y)
        thr = f(xnew)[0]
    print("Dado el valor de fp: {0}, el valor de fnr es: {1} y el umbral: {2} "
          .format(fp, fnr, thr))
Example #21
0
def stats(arr):
    """ Show the minimum, maximum median, mean, shape and size of an
    array.

    Also show the number of NaN entries (if any).
    """
    arr = np.asarray(arr)
    shape = arr.shape
    arr = arr.ravel()
    size = len(arr)
    bad = np.isnan(arr)
    nbad = bad.sum()
    if nbad == size:
        return "#NaN %i of %i" % (nbad, size)
    elif nbad == 0:
        arr = np.sort(arr)
    else:
        arr = np.sort(arr[~bad])
    if len(arr) % 2 == 0:
        i = len(arr) // 2
        median = 0.5 * (arr[i - 1] + arr[i])
    else:
        median = arr[len(arr) // 2]

    return "min %.5g max %.5g median %.5g mean %.5g shape %s #NaN %i of %i" % (
        arr[0],
        arr[-1],
        median,
        arr.mean(),
        shape,
        nbad,
        size,
    )
Example #22
0
def pick_channels(ch_names, include, exclude=[]):
    """Pick channels by names

    Returns the indices of the good channels in ch_names.

    Parameters
    ----------
    ch_names : list of string
        List of channels.
    include : list of string
        List of channels to include (if empty include all available).
    exclude : list of string
        List of channels to exclude (if empty do not exclude any channel).
        Defaults to [].

    Returns
    -------
    sel : array of int
        Indices of good channels.
    """
    if len(np.unique(ch_names)) != len(ch_names):
        raise RuntimeError('ch_names is not a unique list, picking is unsafe')
    sel = []
    for k, name in enumerate(ch_names):
        if (len(include) == 0 or name in include) and name not in exclude:
            sel.append(k)
    sel = np.unique(sel)
    np.sort(sel)
    return sel
Example #23
0
    def test_mass_grid(self):
        """
        Check that the mass-based grid is constructed correctly.
        """
        ## Test typical input - should be sorted
        levels = utl.define_density_mass_grid(self.unique_density)
        answer = np.sort(self.unique_density)
        assert_array_equal(answer, levels)

        ## Test more levels than density values (answer is the same as typical
        #  input).
        levels = utl.define_density_mass_grid(self.unique_density,
                                              num_levels=self.n * 2)
        assert_array_equal(answer, levels)

        ## Test fewer levels than density values.
        levels = utl.define_density_mass_grid(self.unique_density,
                                              num_levels=2)
        answer = np.array([1, 10])
        assert_array_equal(answer, levels)

        ## Test negative values.
        levels = utl.define_density_mass_grid(self.generic_array)
        answer = np.sort(self.generic_array)
        assert_array_equal(answer, levels)

        ## Test uniform input.
        levels = utl.define_density_mass_grid(self.uniform_density)
        self.assertItemsEqual(levels, [1.])
Example #24
0
def read_multivector_griddata_ascii(name_or_obj):
    """Read 2-d grid data from a text file.

    Each line has values `x0 x1 y0 y1 ...`. Space separated.
    Assumed to be grid of values.

    Parameters
    ----------
    name_or_obj : str or file-like object
        The name of the file or a file-like object containing the
        data.

    Returns
    -------
    x0 : numpy.ndarray
        1-d array.
    x1 : numpy.ndarray
        1-d array.
    y : numpy.ndarray
        3-d array of shape ``(n, len(x0), len(x1))`` where ``n`` is
        the number of y values on each line.
    """
    data = np.loadtxt(name_or_obj)

    x0 = np.sort(np.unique(data[:, 0]))
    x1 = np.sort(np.unique(data[:, 1]))
    y = np.zeros((len(data[0]) - 2, len(x0), len(x1)))

    for i0, p in enumerate(x0):
        for i1, q in enumerate(x1):
            ind = (data[:, 0] == p) & (data[:, 1] == q)
            y[:, i0, i1] = data[ind, 2:]

    return x0, x1, y
Example #25
0
def regenerate_dim(x):
    """ assume x in ns since epoch from the current time """
    msg = None  # msg allows us to see which shot/diag was at fault
    diffs = np.diff(x)
    # bincount needs a positive input and needs an array with N elts where N is the largest number input
    small = (diffs > 0) & (diffs < 1000000)
    sorted_diffs = np.sort(diffs[np.where(small)[0]])
    counts = np.bincount(sorted_diffs)
    bigcounts, bigvals = myhist(diffs[np.where(~small)[0]])

    if pyfusion.VERBOSE>0:
        print('[[diff, count],....]')
        print('small:', [[argc, counts[argc]] for argc in np.argsort(counts)[::-1][0:5]])
        print('big or negative:', [[bigvals[argc], bigcounts[argc]] for argc in np.argsort(bigcounts)[::-1][0:10]])

    dtns = 1 + np.argmax(counts[1:])  # skip the first position - it is 0
    # wgt0 = np.where(sorted_diffs > 0)[0]  # we are in ns, so no worry about rounding
    histo = plt.hist if pyfusion.DBG() > 1 else np.histogram
    cnts, vals = histo(x, bins=200)[0:2]
    # ignore the two end bins - hopefully there will be very few there
    wmin = np.where(cnts[1:-1] < np.max(cnts[1:-1]))[0]
    if len(wmin)>0:
        print('**********\n*********** Gap in data > {p:.2f}%'.format(p=100*len(wmin)/float(len(cnts))))
    x01111 = np.ones(len(x))  # x01111 will be all 1s except for the first elt.
    x01111[0] = 0
    errcnt = np.sum(bigcounts) + np.sum(np.sort(counts)[::-1][1:])
    if errcnt>0 or (pyfusion.VERBOSE > 0): 
        msg = str('** repaired length of {l:,}, dtns={dtns:,}, {e} erroneous utcs'
              .format(l=len(x01111), dtns=dtns, e=errcnt))

    fixedx = np.cumsum(x01111)*dtns
    wbad = np.where((x - fixedx)>1e8)[0]
    fixedx[wbad] = np.nan
    debug_(pyfusion.DEBUG, 3, key="repair", msg="repair of W7-X scrambled Langmuir timebase") 
    return(fixedx, msg)
Example #26
0
def test_non_euclidean_kneighbors():
    rng = np.random.RandomState(0)
    X = rng.rand(5, 5)

    # Find a reasonable radius.
    dist_array = pairwise_distances(X).flatten()
    np.sort(dist_array)
    radius = dist_array[15]

    # Test kneighbors_graph
    for metric in ['manhattan', 'chebyshev']:
        nbrs_graph = neighbors.kneighbors_graph(
            X, 3, metric=metric).toarray()
        nbrs1 = neighbors.NearestNeighbors(3, metric=metric).fit(X)
        assert_array_equal(nbrs_graph, nbrs1.kneighbors_graph(X).toarray())

    # Test radiusneighbors_graph
    for metric in ['manhattan', 'chebyshev']:
        nbrs_graph = neighbors.radius_neighbors_graph(
            X, radius, metric=metric).toarray()
        nbrs1 = neighbors.NearestNeighbors(metric=metric, radius=radius).fit(X)
        assert_array_equal(nbrs_graph,
                           nbrs1.radius_neighbors_graph(X).toarray())

    # Raise error when wrong parameters are supplied,
    X_nbrs = neighbors.NearestNeighbors(3, metric='manhattan')
    X_nbrs.fit(X)
    assert_raises(ValueError, neighbors.kneighbors_graph, X_nbrs, 3,
                  metric='euclidean')
    X_nbrs = neighbors.NearestNeighbors(radius=radius, metric='manhattan')
    X_nbrs.fit(X)
    assert_raises(ValueError, neighbors.radius_neighbors_graph, X_nbrs,
                  radius, metric='euclidean')
def test_calculate_landslide_probability_lognormal_method():
    """Testing the main method 'calculate_landslide_probability()' with
    'lognormal' method. 
    """
    grid_2 = RasterModelGrid((5, 4), spacing=(0.2, 0.2))
    gridnum = grid_2.number_of_nodes
    np.random.seed(seed=6)
    grid_2.at_node['topographic__slope'] = np.random.rand(gridnum)
    scatter_dat = np.random.randint(1, 10, gridnum)
    grid_2.at_node['topographic__specific_contributing_area']= (
             np.sort(np.random.randint(30, 900, gridnum)))
    grid_2.at_node['soil__transmissivity']= (
             np.sort(np.random.randint(5, 20, gridnum), -1))
    grid_2.at_node['soil__mode_total_cohesion']= (
             np.sort(np.random.randint(30, 900, gridnum)))
    grid_2.at_node['soil__minimum_total_cohesion']= (
             grid_2.at_node['soil__mode_total_cohesion'] - scatter_dat)
    grid_2.at_node['soil__maximum_total_cohesion']= (
             grid_2.at_node['soil__mode_total_cohesion'] + scatter_dat)
    grid_2.at_node['soil__internal_friction_angle']= (
             np.sort(np.random.randint(26, 37, gridnum)))
    grid_2.at_node['soil__thickness']= (
             np.sort(np.random.randint(1, 10, gridnum)))
    grid_2.at_node['soil__density']= (2000. * np.ones(gridnum))

    ls_prob_lognormal = LandslideProbability(grid_2, number_of_iterations=10,
        groundwater__recharge_distribution='lognormal',
        groundwater__recharge_mean=5.,
        groundwater__recharge_standard_deviation=0.25,
        seed=6)
    ls_prob_lognormal.calculate_landslide_probability()
    np.testing.assert_almost_equal(
        grid_2.at_node['landslide__probability_of_failure'][5], 0.8)
    np.testing.assert_almost_equal(
        grid_2.at_node['landslide__probability_of_failure'][9], 0.4)
Example #28
0
def quantiles(x, qlist=(2.5, 25, 50, 75, 97.5)):
    """Returns a dictionary of requested quantiles from array

    :Arguments:
      x : Numpy array
          An array containing MCMC samples
      qlist : tuple or list
          A list of desired quantiles (defaults to (2.5, 25, 50, 75, 97.5))

    """

    # Make a copy of trace
    x = x.copy()

    # For multivariate node
    if x.ndim > 1:
        # Transpose first, then sort, then transpose back
        sx = np.sort(x.T).T
    else:
        # Sort univariate node
        sx = np.sort(x)

    try:
        # Generate specified quantiles
        quants = [sx[int(len(sx)*q/100.0)] for q in qlist]

        return dict(zip(qlist, quants))

    except IndexError:
        print("Too few elements for quantile calculation")
Example #29
0
def unit_maker(func, func0):
    "Test bn.(arg)partsort gives same output as bn.slow.(arg)partsort."
    msg = '\nfunc %s | input %s (%s) | shape %s | n %d | axis %s\n'
    msg += '\nInput array:\n%s\n'
    for i, arr in enumerate(arrays()):
        for axis in list(range(-arr.ndim, arr.ndim)) + [None]:
            if axis is None:
                n = arr.size
            else:
                n = arr.shape[axis]
            n = max(n // 2, 1)
            with np.errstate(invalid='ignore'):
                actual = func(arr.copy(), n, axis=axis)
                actual[:n] = np.sort(actual[:n], axis=axis)
                actual[n:] = np.sort(actual[n:], axis=axis)
                desired = func0(arr.copy(), n, axis=axis)
                if 'arg' in func.__name__:
                    desired[:n] = np.sort(desired[:n], axis=axis)
                    desired[n:] = np.sort(desired[n:], axis=axis)
            tup = (func.__name__, 'a'+str(i), str(arr.dtype),
                   str(arr.shape), n, str(axis), arr)
            err_msg = msg % tup
            assert_array_equal(actual, desired, err_msg)
            err_msg += '\n dtype mismatch %s %s'
            if hasattr(actual, 'dtype') or hasattr(desired, 'dtype'):
                da = actual.dtype
                dd = desired.dtype
                assert_equal(da, dd, err_msg % (da, dd))
Example #30
0
 def test_fit(self):
   self.kmeans.fit(input_fn=self.input_fn(), steps=10)
   centers = normalize(self.kmeans.clusters())
   self.assertAllClose(
       np.sort(
           centers, axis=0), np.sort(
               self.true_centers, axis=0))
Example #31
0
 def bbox(self):
     """numpy.ndarray(dtype=int): The bounding box of the object. Its default format is
     [[x_ll, y_ll], [x_ur, y_ur]]"""
     return np.sort(np.array([self.xy[0, :], self.xy[1, :]]), axis=0)
Example #32
0
etr_y = etr.predict(X_test)

#采用梯度提升模型
gbr = GradientBoostingRegressor()
gbr.fit(X_train,y_train)
gbr_y = gbr.predict(X_test)

#对单一回归树做出预测
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error
print('R_squared value of DecisionTreeRegressor is ',dtr.score(X_test,y_test))
print('The mean squared error of DecisionTreeRegressor is ',mean_squared_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(dtr_y)))
print('The mean absolute error of DecisionTreeRegressor is ',mean_absolute_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(dtr_y)))

#对随机森林做出预测
print('R_squared value of RandomForestRegressor is ',rfr.score(X_test,y_test))
print('The mean squared error of RandomForestRegressor is ',mean_squared_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(rfr_y)))
print('The mean absolute error of RandomForestRegressor is ',mean_absolute_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(rfr_y)))

#对极端森林做出预测
print('R_squared value of ExtraTreesRegressor is ',etr.score(X_test,y_test))
print('The mean squared error of ExtraTreesRegressor is ',mean_squared_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(etr_y)))
print('The mean absolute error of ExtraTreesRegressor is ',mean_absolute_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(etr_y)))
#输出每种特征的贡献度
print(np.sort(list(zip(etr.feature_importances_,boston.feature_names)),axis = 0))

#对梯度提升做出预测
print('R_squared value of GradientBoostingRegressor is ',gbr.score(X_test,y_test))
print('The mean squared error of GradientBoostingRegressor is ',mean_squared_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(gbr_y)))
print('The mean absolute error of GradientBoostingRegressor is ',mean_absolute_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(gbr_y)))

Example #33
0
with open('jpm_quotes.csv', 'r') as quotes_csv:
    quotes = np.array(list(csv.reader(quotes_csv))[1:]).astype('double')
    prices = quotes[:,1]
    counts = quotes[:,2]

    Neff = 32
    results = compute_ab_ema(Neff, prices, counts)
    
    ema_prices = results[0]
    vwap_prices = results[2]

    ema_counts = results[1]

    Neff_star = results[3]
    Neff_star_sorted = np.sort(Neff_star)
    prob_axis = np.arange(len(Neff_star)) / (len(Neff_star) - 1)

    axarr[0, 0].set_title('Prices, Ema(dashed) and VWAP')
    axarr[0, 0].plot(prices)
    axarr[0, 0].plot(ema_prices, '--')
    axarr[0, 0].plot(vwap_prices)

    axarr[0, 1].set_title('Intensity series (quote counts)')
    axarr[0, 1].plot(counts)

    axarr[1, 0].set_title('Neff*')
    axarr[1, 0].plot(Neff_star)
    
    axarr[1, 1].set_title('CDF of Neff*')
    axarr[1, 1].plot(Neff_star_sorted, prob_axis)
Example #34
0
                    cls_dets = cls_dets[keep.view(-1).long()]
                    if vis:
                        im2show = vis_detections(im2show, imdb.classes[j],
                                                 cls_dets.cpu().numpy(), 0.3)
                    all_boxes[j][i] = cls_dets.cpu().numpy()
                else:
                    all_boxes[j][i] = empty_array
        else:
            for j in xrange(1, imdb.num_classes):
                all_boxes[j][i] = empty_array
        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
            .format(i + 1, num_images, detect_time, nms_time))
        sys.stdout.flush()

        if vis:
            cv2.imwrite('./output/vis/result_%d.png' % (i), im2show)
            #pdb.set_trace()
            #cv2.imshow('test', im2show)
Example #35
0
                    action='store',
                    type=float,
                    dest='window_size',
                    help='hamming window size (default: 0.01ms)',
                    default=0.01)
args = parser.parse_args()

Fs, signal = wavfile.read(args.input_file)
signal = signal / max(abs(signal))

sampsPerMilli = Fs / 1000
millisPerFrame = int(args.window_size * 1000)
sampsPerFrame = int(sampsPerMilli * millisPerFrame)
nFrames = int(len(signal) / sampsPerFrame)

STEs = []
for k in range(nFrames):
    startIdx = k * sampsPerFrame
    stopIdx = startIdx + sampsPerFrame
    window = signal[startIdx:stopIdx]
    STE = np.sum(window**2) / np.float64(len(window))
    STEs.append(STE)

F = np.sort(STEs)[args.step_size:args.num_results * args.step_size +
                  args.step_size:args.step_size]
seconds = [[np.where(STEs == e)[0][0] * millisPerFrame / 1000, e] for e in F]

seconds = np.array(seconds)
for s in seconds[seconds[:, 1].argsort()]:
    print "%.2f %.7f" % (s[0], s[1])
Example #36
0
 def bbox(self):
     """numpy.ndarray(dtype=int): The bounding box for this object, represented as a Numpy array
     [[x_ll, y_ll], [x_ur, y_ur]]."""
     return np.sort(np.array([self.xy, self.xy]), axis=0)
Example #37
0
 def bbox(self):
     """numpy.ndarray(dtype=int): The bounding box of the object. Its default format is [[x0, y0], [x1, y1]], where
     [x0, y0] is the lower-left corner of this object, and [x1, y1] is the upper-right one."""
     return np.sort(np.array([self.xy[0, :], self.xy[1, :]]), axis=0)
Example #38
0
def load_umc_sheets(data_dir="/home/matthias/Data/umc_mozart", require_performance=False):
    """ load unwarpped sheets """
    import glob
    import cv2

    # initialize omr system
    from omr.omr_app import OpticalMusicRecognizer
    from omr.utils.data import prepare_image
    from lasagne_wrapper.network import SegmentationNetwork

    from omr.models import system_detector, bar_detector

    net = system_detector.build_model()
    system_net = SegmentationNetwork(net, print_architecture=False)
    system_net.load('sheet_utils/omr_models/system_params.pkl')

    net = bar_detector.build_model()
    bar_net = SegmentationNetwork(net, print_architecture=False)
    bar_net.load('sheet_utils/omr_models/bar_params.pkl')

    piece_names = []
    unwrapped_sheets = []
    piece_paths = []

    # get list of all pieces
    piece_dirs = np.sort(glob.glob(os.path.join(data_dir, '*')))
    n_pieces = len(piece_dirs)

    # iterate pieces
    kept_pages = 0
    for i_piece, piece_dir in enumerate(piece_dirs):
        piece_name = piece_dir.split('/')[-1]

        # if "214_" not in piece_name:
        #     continue

        print(col.print_colored("Processing piece %d of %d (%s)" % (i_piece + 1, n_pieces, piece_name), col.OKBLUE))

        # check if there is a performance
        if require_performance and len(glob.glob(os.path.join(piece_dir, "*performance*"))) == 0:
            print("No performance found!")
            continue

        # load pages
        page_paths = np.sort(glob.glob(os.path.join(piece_dir, "sheet/*.png")))
        if len(page_paths) == 0:
            print("No sheet available!!!")
            continue

        unwrapped_sheet = np.zeros((SYSTEM_HEIGHT, 0), dtype=np.uint8)
        system_problem = False
        for i_page, page_path in enumerate(page_paths):
            kept_pages += 1

            # load sheet image
            I = cv2.imread(page_path, 0)

            # load system coordinates
            # page_id = i_page + 1
            # page_systems = np.load(os.path.join(piece_dir, "coords", "systems_%02d.npy" % (i_page + 1)))

            # detect systems
            I_prep = prepare_image(I)
            omr = OpticalMusicRecognizer(note_detector=None, system_detector=system_net, bar_detector=bar_net)

            try:
                page_systems = omr.detect_systems(I_prep, verbose=False)
            except:
                print("Problem in system detection!!!")
                system_problem = True
                continue

            # plt.figure("System Localization")
            # plt.clf()
            # plt.imshow(I, cmap=plt.cm.gray)
            # plt.xlim([0, I.shape[1] - 1])
            # plt.ylim([I.shape[0] - 1, 0])

            # for system in page_systems:
            #     plt.plot(system[:, 1], system[:, 0], 'mo', alpha=0.5)
            # plt.show(block=True)

            # unwrap sheet
            for system in page_systems:

                r0 = int(np.mean([system[0, 0], system[2, 0]])) - SYSTEM_HEIGHT // 2
                r1 = r0 + SYSTEM_HEIGHT
                c0 = int(system[0, 1])
                c1 = int(system[1, 1])

                # fix row slice coordinates
                r0 = max(0, r0)
                r1 = min(r1, I.shape[0])
                r0 = max(r0, r1 - SYSTEM_HEIGHT)

                staff_img = I[r0:r1, c0:c1].astype(np.uint8)

                if staff_img.shape[0] < SYSTEM_HEIGHT:
                    to_pad = SYSTEM_HEIGHT - staff_img.shape[0]
                    if to_pad > (0.1 * SYSTEM_HEIGHT):
                        print("Problem in system padding!!!")
                        continue
                    staff_img = np.pad(staff_img, ((0, to_pad), (0, 0)), mode="edge")

                unwrapped_sheet = np.hstack((unwrapped_sheet, staff_img))

            # plt.figure("Unwrapped")
            # plt.imshow(unwrapped_sheet)
            # plt.show(block=True)

        if not system_problem:
            piece_names.append(piece_name)
            piece_paths.append(piece_dir)
            unwrapped_sheets.append(unwrapped_sheet)

    print("%d pieces covering %d pages of sheet music." % (len(piece_names), kept_pages))

    return piece_names, piece_paths, unwrapped_sheets
Example #39
0
def cluster(coord_array, tri_array, v, simil):
    """ Function to find clusters of points with similar heat persistence values
    :param coord_array: xyz coordinates per vertex in array of shape=(#points, 3)
    :param tri_array: vertex connection indices of the part in array of shape=(#triangles, 3)
    :param simil: array with cluster similarity fractions
    :param v: (#points x 2) array with cluster points for part specified
    :return clusters: array of shape=(2*len(simil), #points) with cluster index/persistence values on even/odd rows
    """

    print("Finding clusters..")

    simil = np.sort(list(
        set(simil)))  # Remove duplicate entries in simil, order unordered set
    clusters = np.zeros(shape=(2 * len(simil), coord_array.shape[0]))
    for l in range(len(simil)):
        starttime = datetime.datetime.now()

        newcluster = cluster5(coord_array, tri_array, simil[l], v)
        newcluster_i = newcluster[0, :]
        newcluster_v = newcluster[1, :]

        clmat = get_cluster_adj_matrix(newcluster_i, tri_array)

        # find very small clusters
        for tcli in range(int(np.amax(newcluster_i))):
            if np.count_nonzero(newcluster_i == tcli + 1) < 3:
                # combine small clusters to smallest cluster of their neighbors
                neis_tcl = np.nonzero(clmat[:, tcli])[0]
                count = 0
                if np.size(neis_tcl) > 0:
                    for nei in neis_tcl:  # find biggest neighbor cluster
                        nnei = np.count_nonzero(newcluster_i == nei + 1)

                        if nnei > count:
                            com_nei = nei
                            count = nnei
                    pts_nei = np.nonzero(
                        newcluster_i == com_nei +
                        1)[0]  # index of points in chosen cluster
                    v_nei = np.amax(newcluster_v[pts_nei])  # value of points
                    newcluster_v[newcluster_i == tcli + 1] = v_nei
                    newcluster_i[newcluster_i == tcli + 1] = com_nei + 1

        # resort the index of clusters, remove empty cluster
        while len(np.unique(newcluster_i)) != np.amax(newcluster_i):
            for i in range(1, int(np.amax(newcluster_i)) + 1):
                if i not in newcluster_i:
                    for n in range(len(newcluster_i)):
                        if newcluster_i[n] > i:
                            newcluster_i[n] -= 1

        newcluster[0, :] = newcluster_i
        newcluster[1, :] = newcluster_v
        clusters[2 * l:2 * l + 2, :] = newcluster

        endtime = datetime.datetime.now()
        elapsedtime = (endtime - starttime).seconds
        print(
            "%d%% similarity complete. %d clusters found. Elapsed time is %s seconds."
            % (simil[l] * 100, np.amax(newcluster_i), elapsedtime))

    np.savez_compressed('temp/clusters', clusters=clusters)
    print("Clusters complete.\n")

    return clusters
Example #40
0
def test_pi_positive(pts):
    pi = PersistenceImage(sigma=1)
    diagrams = np.expand_dims(np.concatenate([
        np.sort(pts, axis=1), np.zeros((pts.shape[0], 1))],
        axis=1), axis=0)
    assert np.all(pi.fit_transform(diagrams) >= 0.)
# turn into timestamps
times = np.cumsum(gaps, axis=0)

# draw from each column according to distribution
pass_on = np.random.uniform(size=times.shape) < probabilities

# sanity check - are the correct probabilites demonstrated and the correct
# lambdas?
print('simulated probabilities:', p_e:= np.mean(pass_on, axis=0), 'expected:',
        probabilities, '\ndiff:', p_e - probabilities, end='\n\n')
print('simultated lambdas:', l_e := np.mean(gaps, axis=0), 'expected:',
        lambdas, '\ndiff:', l_e - lambdas, end='\n\n')

# concatenate arrays and remove unwanted
supervisor = np.sort(np.concatenate(times)[np.concatenate(pass_on)])

# remove any past the last entry of the shortest simulation (to ensure all
# streams run for the same amount of time)
supervisor = supervisor[supervisor < np.min(times[-1,:])]

# print the final estimate
print('the mean time between customers for the supervisor was',
        res := np.diff(supervisor).mean(), '\nThis is accurate to'
        f' {np.abs(res - 1155/2648)/1155*2648 * 100:.2f}%.')

# Bonus: save data for visualization
import json

data_size = 500  # number of samples to save
maxtime = supervisor[data_size]
Example #42
0
def evaluate_recall(json_dataset,
                    roidb,
                    thresholds=None,
                    area='all',
                    limit=None):
    """Evaluate detection proposal recall metrics. This function is a much
    faster alternative to the official COCO API recall evaluation code. However,
    it produces slightly different results.
    """
    # Record max overlap value for each gt box
    # Return vector of overlap values
    areas = {
        'all': 0,
        'small': 1,
        'medium': 2,
        'large': 3,
        '96-128': 4,
        '128-256': 5,
        '256-512': 6,
        '512-inf': 7
    }
    area_ranges = [
        [0**2, 1e5**2],  # all
        [0**2, 32**2],  # small
        [32**2, 96**2],  # medium
        [96**2, 1e5**2],  # large
        [96**2, 128**2],  # 96-128
        [128**2, 256**2],  # 128-256
        [256**2, 512**2],  # 256-512
        [512**2, 1e5**2]
    ]  # 512-inf
    assert area in areas, 'Unknown area range: {}'.format(area)
    area_range = area_ranges[areas[area]]
    gt_overlaps = np.zeros(0)
    num_pos = 0
    for entry in roidb:
        gt_inds = np.where((entry['gt_classes'] > 0)
                           & (entry['is_crowd'] == 0))[0]
        gt_boxes = entry['boxes'][gt_inds, :]
        gt_areas = entry['seg_areas'][gt_inds]
        valid_gt_inds = np.where((gt_areas >= area_range[0])
                                 & (gt_areas <= area_range[1]))[0]
        gt_boxes = gt_boxes[valid_gt_inds, :]
        num_pos += len(valid_gt_inds)
        non_gt_inds = np.where(entry['gt_classes'] == 0)[0]
        boxes = entry['boxes'][non_gt_inds, :]
        if boxes.shape[0] == 0:
            continue
        if limit is not None and boxes.shape[0] > limit:
            boxes = boxes[:limit, :]
        overlaps = box_utils.bbox_overlaps(
            boxes.astype(dtype=np.float32, copy=False),
            gt_boxes.astype(dtype=np.float32, copy=False))
        _gt_overlaps = np.zeros((gt_boxes.shape[0]))
        for j in range(min(boxes.shape[0], gt_boxes.shape[0])):
            # find which proposal box maximally covers each gt box
            argmax_overlaps = overlaps.argmax(axis=0)
            # and get the iou amount of coverage for each gt box
            max_overlaps = overlaps.max(axis=0)
            # find which gt box is 'best' covered (i.e. 'best' = most iou)
            gt_ind = max_overlaps.argmax()
            gt_ovr = max_overlaps.max()
            assert gt_ovr >= 0
            # find the proposal box that covers the best covered gt box
            box_ind = argmax_overlaps[gt_ind]
            # record the iou coverage of this gt box
            _gt_overlaps[j] = overlaps[box_ind, gt_ind]
            assert _gt_overlaps[j] == gt_ovr
            # mark the proposal box and the gt box as used
            overlaps[box_ind, :] = -1
            overlaps[:, gt_ind] = -1
        # append recorded iou coverage level
        gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))

    gt_overlaps = np.sort(gt_overlaps)
    if thresholds is None:
        step = 0.05
        thresholds = np.arange(0.5, 0.95 + 1e-5, step)
    recalls = np.zeros_like(thresholds)
    # compute recall for each iou threshold
    for i, t in enumerate(thresholds):
        recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
    # ar = 2 * np.trapz(recalls, thresholds)
    ar = recalls.mean()
    return {
        'ar': ar,
        'recalls': recalls,
        'thresholds': thresholds,
        'gt_overlaps': gt_overlaps,
        'num_pos': num_pos
    }
Example #43
0
def generate_maps():
    import random
    print 'Generating optic aberration maps using Proper'
    wfo = proper.prop_begin(tp.diam, 1., tp.grid_size, tp.beam_ratio)
    # rms_error = 5e-6#500.e-9       # RMS wavefront error in meters
    # c_freq = 0.005             # correlation frequency (cycles/meter)
    # high_power = 1.          # high frewquency falloff (r^-high_power)
    rms_error = 2.5e-3  #500.e-9       # RMS wavefront error in meters
    c_freq = 0.000005  # correlation frequency (cycles/meter)
    high_power = 1.  # high frewquency falloff (r^-high_power)

    # tp.abertime = [0.5,2,10] # characteristic time for each aberation in secs
    tp.abertime = [
        100
    ]  # if beyond numframes then abertime will be auto set to duration of simulation
    abercubes = []
    for abertime in tp.abertime:
        # ap.numframes = 100
        aberfreq = 1. / abertime
        # tp.abertime=2 # aberfreq: number of frames goals per sec?
        num_longframes = aberfreq * ap.numframes * cp.frame_time
        print num_longframes, ap.numframes, cp.frame_time
        aber_cube = np.zeros((ap.numframes + 1, tp.grid_size, tp.grid_size))
        lin_size = tp.grid_size**2
        # spacing = int(ap.numframes/num_longframes)
        # frame_idx = np.int_(np.linspace(0,ap.numframes,num_longframes+1))
        c = range(0, ap.numframes)
        print num_longframes
        frame_idx = np.sort(random.sample(c, int(num_longframes + 1 - 2)))
        # frame_idx = np.int_(np.sort(np.round(np.random.uniform(0,ap.numframes,num_longframes+1-2))))
        frame_idx = np.hstack(([0], frame_idx, [ap.numframes]))
        # frame_idx = [0,  15,   69,  278,  418,  703, 1287, 1900, 3030, 3228, 5000]
        print frame_idx
        for f in frame_idx:
            aber_cube[f] = proper.prop_psd_errormap(
                wfo, rms_error, c_freq, high_power,
                MAP="prim_map")  #FILE=td.aberdir+'/telzPrimary_Map.fits')
            # quicklook_im(aber_cube[f], logAmp=False)
        for i, f in enumerate(frame_idx[:-1]):
            spacing = int(frame_idx[i + 1] - frame_idx[i])
            # quicklook_im(aber_cube[f], logAmp=False, show=False)

            frame1 = aber_cube[f]
            frame2 = aber_cube[frame_idx[i + 1]]
            lin_map = [
                np.linspace(f1, f2, spacing) for f1, f2 in zip(
                    frame1.reshape(lin_size), frame2.reshape(lin_size))
            ]
            interval_cube = np.array(lin_map).reshape(tp.grid_size,
                                                      tp.grid_size, spacing)
            interval_cube = np.transpose(interval_cube)
            print i, f, frame_idx[i], frame_idx[i + 1], np.shape(interval_cube)
            # loop_frames(interval_cube, logAmp=False)
            aber_cube[f:frame_idx[i + 1]] = interval_cube
        abercubes.append(aber_cube)
        plt.plot(aber_cube[:, 20, 20])
        plt.show()
    abercubes = np.array(abercubes)
    # print abercubes.shape
    # plt.plot(aber_cube[:,20,20])
    aber_cube = np.sum(abercubes, axis=0)
    plt.plot(aber_cube[:, 20, 20])
    plt.show()
    if not os.path.isdir(iop.aberdir):
        os.mkdir(iop.aberdir)
    for f in range(0, ap.numframes, 1):
        # print 'saving frame #', f
        if f % 100 == 0: misc.progressBar(value=f, endvalue=ap.numframes)
        rawImageIO.saveFITS(aber_cube[f],
                            '%stelz%f.fits' % (iop.aberdir, f * cp.frame_time))
        # quicklook_im(aber_cube[f], logAmp=False, show=True)

    plt.show()
Example #44
0
def fit_variance(human_data_original, save_path, trial_type, model_params_df,
                 constraints):
    # written to fit 4 parameters but we hold the likelihood width at 0 so its not stochastic at all,
    test_x = np.arange(0, 400, 20)

    collumn_values = [""]
    if constraints is not "":
        collumn_values = np.unique(human_data_original[constraints])

        for collumn_value in collumn_values:

            # only filter results by the constraint if a constraint is parsed
            if collumn_value is not "":
                human_data = human_data_original[(
                    human_data_original[constraints] == collumn_value)]
            else:
                human_data = human_data_original

            group_parameter_fits = model_params_df[
                (model_params_df["ppid"] == "group")
                & (model_params_df["trial_type"] == trial_type) &
                (model_params_df[constraints] == collumn_value)]
            model_params.likelihood_width = 0
            model_params.gamma = group_parameter_fits["gamma"].iloc[0]
            model_params.lambda_coef = group_parameter_fits["lambda"].iloc[0]
            model_params.k = group_parameter_fits["k"].iloc[0]

            human_data = human_data.dropna()
            human_mean_data = human_data.groupby([
                'Trial type', 'integration_length'
            ])['first_stop_location'].mean().reset_index()
            human_mean_data_with_id = human_data.groupby(
                ['Trial type', 'integration_length',
                 "ppid"])['first_stop_location'].mean().reset_index()

            human_std_data_with_id = human_data.groupby(
                ['Trial type', 'integration_length',
                 "ppid"])['first_stop_location'].std().reset_index()
            human_std_data_group = human_data.groupby([
                'Trial type', 'integration_length'
            ])['first_stop_location'].std().reset_index()

            human_mean_with_id_x = np.asarray(
                human_mean_data_with_id[(human_mean_data_with_id["Trial type"]
                                         == trial_type)]['integration_length'])
            human_mean_with_id_y = np.asarray(human_mean_data_with_id[(
                human_mean_data_with_id["Trial type"] == trial_type
            )]['first_stop_location'])
            human_mean_data_x = np.asarray(
                human_mean_data[(human_mean_data["Trial type"] == trial_type
                                 )]['integration_length'])
            human_mean_data_y = np.asarray(
                human_mean_data[(human_mean_data["Trial type"] == trial_type
                                 )]['first_stop_location'])
            human_data_x = np.asarray(human_data[(
                human_data["Trial type"] == trial_type)]['integration_length'])
            human_data_y = np.asarray(
                human_data[(human_data["Trial type"] == trial_type
                            )]['first_stop_location'])

            human_data_y_std = compute_sigmas(human_data_x, human_data_y)

            #human_data_with_std_individual = get_std_with_id(human_data, human_std_data_with_id, y_column='first_stop_location')
            #human_data_with_std_group = get_std_with_group(human_data, human_std_data_group,  y_column='first_stop_location')
            #human_data_std_id = np.asarray(human_data_with_std_individual[(human_data_with_std_individual["Trial type"] == trial_type)]['y_std'])
            #human_data_std_group = np.asarray(human_data_with_std_group[(human_data_with_std_group["Trial type"] == trial_type)]['y_std'])
            var_param = fit_variance_to_model(human_data_x, human_data_y_std)

            # plotting model fit

            test_x = np.sort(human_data_x)
            best_fit_responses, best_fit_sigmas = simple_variance_model_full(
                test_x, likelihood_width=var_param[0])

            # plot optimised response target
            fig = plt.figure(figsize=(6, 6))
            ax = fig.add_subplot(1, 1, 1)  #stops per trial
            plt.title("All subjects", fontsize="20")
            plt.scatter(human_mean_with_id_x,
                        human_mean_with_id_y,
                        color="r",
                        marker="o")
            plt.plot(human_mean_data_x, human_mean_data_y, "r", label="data")

            _, unique_idx = np.unique(test_x, return_index=True)
            unique_mask = create_mask(indices=unique_idx, size=len(test_x))
            model_data_x_means, model_data_y_std_means = sort_by_other_array(
                first_array_orderby=test_x[unique_mask],
                second_array=compute_means(test_x,
                                           best_fit_responses)[unique_mask])
            plt.plot(model_data_x_means,
                     model_data_y_std_means,
                     "g",
                     label="model")

            plt.plot(np.arange(0, 400),
                     np.arange(0, 400),
                     "k--",
                     label="Unity")
            plt.xlabel("Target (VU)", fontsize=20)
            plt.xlim((0, 400))
            plt.ylim((0, 400))
            plt.ylabel("Response (VU)", fontsize=20)
            plt.subplots_adjust(left=0.2)
            ax.tick_params(axis='both', which='major', labelsize=15)
            plt.gca().spines['top'].set_visible(False)
            plt.gca().spines['right'].set_visible(False)
            textstr = '\n'.join(
                (r'$\Gamma=%.2f$' % (model_params.gamma, ),
                 r'$\lambda=%.2f$' % (model_params.lambda_coef, ),
                 r'$\mathrm{k}=%.2f$' % (model_params.k, ),
                 r'$\L=%.2f$' % (var_param[0], )))
            props = dict(boxstyle='round', facecolor='white', alpha=0.5)
            ax.text(0.80,
                    0.05,
                    textstr,
                    transform=ax.transAxes,
                    fontsize=14,
                    bbox=props)
            plt.legend(loc="upper left")

            if constraints is not "":
                plt.savefig(save_path + "\\" + trial_type + "_" + constraints +
                            remove_dots(str(collumn_value)) +
                            "_group_model_fit.png")
            else:
                plt.savefig(save_path + "\\" + trial_type + "_" + constraints +
                            "_group_model_fit.png")
            plt.show()
            plt.close()

            # plot optimised variance target
            fig = plt.figure(figsize=(6, 6))
            ax = fig.add_subplot(1, 1, 1)  #stops per trial
            plt.title("All subjects", fontsize="20")
            plt.scatter(human_data_x, human_data_y_std, color="r", marker="o")

            _, unique_idx = np.unique(human_data_x, return_index=True)
            unique_mask = create_mask(indices=unique_idx,
                                      size=len(human_data_x))
            human_data_x_means, human_data_y_std_means = sort_by_other_array(
                first_array_orderby=human_data_x[unique_mask],
                second_array=compute_means(human_data_x,
                                           human_data_y_std)[unique_mask])
            plt.plot(human_data_x_means,
                     human_data_y_std_means,
                     "r",
                     label="data")

            _, unique_idx = np.unique(test_x, return_index=True)
            unique_mask = create_mask(indices=unique_idx, size=len(test_x))
            model_data_x_means, model_data_y_std_means = sort_by_other_array(
                first_array_orderby=test_x[unique_mask],
                second_array=compute_means(test_x,
                                           best_fit_sigmas)[unique_mask])

            plt.plot(model_data_x_means,
                     model_data_y_std_means,
                     "g",
                     label="model")
            plt.xlabel("Target (VU)", fontsize=20)
            plt.xlim((0, 400))
            plt.ylim((0, 100))
            plt.ylabel("Response STD (VU)", fontsize=20)
            plt.subplots_adjust(left=0.2)
            ax.tick_params(axis='both', which='major', labelsize=15)
            plt.gca().spines['top'].set_visible(False)
            plt.gca().spines['right'].set_visible(False)
            textstr = '\n'.join(
                (r'$\Gamma=%.2f$' % (model_params.gamma, ),
                 r'$\lambda=%.2f$' % (model_params.lambda_coef, ),
                 r'$\mathrm{k}=%.2f$' % (model_params.k, ),
                 r'$\L=%.2f$' % (var_param[0], )))
            props = dict(boxstyle='round', facecolor='white', alpha=0.5)
            ax.text(0.80,
                    0.05,
                    textstr,
                    transform=ax.transAxes,
                    fontsize=14,
                    bbox=props)
            plt.legend(loc="upper left")

            if constraints is not "":
                plt.savefig(save_path + "\\" + trial_type + "_" + constraints +
                            remove_dots(str(collumn_value)) +
                            "_group_model_variance_fit.png")
            else:
                plt.savefig(save_path + "\\" + trial_type + "_" + constraints +
                            "_group_model_variance_fit.png")
            plt.show()
            plt.close()
            '''

            # now we do it per subject
            ppids = np.unique(human_data["ppid"])
            subjects_model_params = np.zeros((len(ppids), 4)) # fitting 3 parameters
            for j in range(len(ppids)):

                subject_parameter_fits = model_params_df[(model_params_df["ppid"] == ppids[j]) & (model_params_df["trial_type"] == trial_type) & (model_params_df[constraints] == collumn_value)]
                model_params.likelihood_width = 0
                model_params.gamma = subject_parameter_fits["gamma"].iloc[0]
                model_params.lambda_coef = subject_parameter_fits["lambda"].iloc[0]
                model_params.k = subject_parameter_fits["k"].iloc[0]

                subject_data_mean_x = np.asarray(human_mean_data_with_id[(human_mean_data_with_id["Trial type"] == trial_type) & (human_mean_data_with_id["ppid"] == ppids[j])]['integration_length'])
                subject_data_mean_y = np.asarray(human_mean_data_with_id[(human_mean_data_with_id["Trial type"] == trial_type) & (human_mean_data_with_id["ppid"] == ppids[j])]['first_stop_location'])

                subject_data_x = np.asarray(human_data[(human_data["Trial type"] == trial_type) & (human_data["ppid"] == ppids[j])]['integration_length'])
                subject_data_y = np.asarray(human_data[(human_data["Trial type"] == trial_type) & (human_data["ppid"] == ppids[j])]["first_stop_location"])

                subject_data_y_std = compute_sigmas(subject_data_x, subject_data_y)

                subject_human_data_with_std_individual = np.asarray(human_data_with_std_individual[(human_data_with_std_individual["Trial type"] == trial_type) &
                                                                                                   (human_data_with_std_individual["ppid"] == ppids[j])]["y_std"])

                sub_var_param = fit_variance_to_model(subject_data_x, subject_data_y_std)
                subjects_model_params[j] = sub_var_param

                # plotting model fit
                best_fit_responses, best_fit_sigmas = simple_variance_model_full(test_x, likelihood_width=sub_var_param[0])
                # plot optimised response target
                fig = plt.figure(figsize = (6,6))
                ax = fig.add_subplot(1,1,1) #stops per trial
                plt.title(ppids[j], fontsize="20")
                plt.scatter(subject_data_x, subject_data_y, color="r", marker="o")
                plt.plot(subject_data_mean_x, subject_data_mean_y, "r", label="data")
                plt.plot(test_x, best_fit_responses, "g", label="model")
                plt.plot(np.arange(0,400), np.arange(0,400), "k--", label="Unity")
                plt.xlabel("Target", fontsize=20)
                plt.xlim((0,400))
                plt.ylim((0,400))
                plt.ylabel("Optimal Response", fontsize=20)
                plt.subplots_adjust(left=0.2)
                ax.tick_params(axis='both', which='major', labelsize=15)
                plt.gca().spines['top'].set_visible(False)
                plt.gca().spines['right'].set_visible(False)
                textstr = '\n'.join((
                    r'$\Gamma=%.2f$' % (model_params.gamma, ),
                    r'$\lambda=%.2f$' % (model_params.lambda_coef, ),
                    r'$\mathrm{k}=%.2f$' % (model_params.k,),
                    r'$\L=%.2f$'         % (sub_var_param[0],)))
                props = dict(boxstyle='round', facecolor='white', alpha=0.5)
                ax.text(0.80, 0.05, textstr, transform=ax.transAxes, fontsize=14, bbox=props)
                plt.legend(loc="upper left")
                plt.savefig(save_path+"\\"+trial_type+"_"+ppids[j]+"_model_stochastic_fit.png")
                plt.show()
                plt.close()


                # plot optimised variance target
                fig = plt.figure(figsize = (6,6))
                ax = fig.add_subplot(1,1,1) #stops per trial
                plt.title(ppids[j], fontsize="20")
                plt.scatter(subject_data_x, subject_data_y_std, color="r", marker="o")

                _, unique_idx = np.unique(subject_data_x, return_index=True)
                unique_mask = create_mask(indices=unique_idx, size=len(subject_data_x))
                subject_data_x_means, subject_data_y_std_means = sort_by_other_array(first_array_orderby= subject_data_x[unique_mask],
                                                                                 second_array=compute_means(subject_data_x, subject_data_y_std)[unique_mask])

                plt.plot(subject_data_x_means, subject_data_y_std_means, "r", label="data")
                plt.plot(test_x, best_fit_sigmas, "g", label="model")
                #plt.plot(np.arange(0,400), np.arange(0,400), "k--", label="Unity")
                plt.xlabel("Target (VU)", fontsize=20)
                plt.xlim((0,400))
                plt.ylim((0,100))
                plt.ylabel("Response SD (VU)", fontsize=20)
                plt.subplots_adjust(left=0.2)
                ax.tick_params(axis='both', which='major', labelsize=15)
                plt.gca().spines['top'].set_visible(False)
                plt.gca().spines['right'].set_visible(False)
                textstr = '\n'.join((
                    r'$\Gamma=%.2f$' % (model_params.gamma, ),
                    r'$\lambda=%.2f$' % (model_params.lambda_coef, ),
                    r'$\mathrm{k}=%.2f$' % (model_params.k,),
                    r'$\L=%.2f$'         % (sub_var_param[0],)))
                props = dict(boxstyle='round', facecolor='white', alpha=0.5)
                ax.text(0.80, 0.05, textstr, transform=ax.transAxes, fontsize=14, bbox=props)
                plt.legend(loc="upper left")

                if constraints is not "":
                    plt.savefig(save_path+"\\"+trial_type+"_"+constraints+remove_dots(str(collumn_value))+"_group_model_variance_fit.png")
                else:
                    plt.savefig(save_path+"\\"+trial_type+"_"+constraints+"_group_model_variance_fit.png")
                plt.show()
                plt.close()
                
            '''

    return model_params_df
Example #45
0
def data_to_json(degree, points, weights):
    d = {"s1": [], "s2": [], "s3": []}

    idx = numpy.argsort(weights)
    weights = weights[idx]
    points = points[idx]

    # get groups of equal weights
    for s, length in zip(*_grp_start_len(weights, 1.0e-12)):
        weight = weights[s]
        pts = points[s:s + length]
        if length == 1:
            d["s3"].append([weight])
        elif length == 3:
            # Symmetry group [[a, a, b], [a, b, a], [b, a, a]].
            # Find the equal value `a`.
            tol = 1.0e-12
            beta = pts[0] - pts[0][0]
            ct = numpy.count_nonzero(abs(beta) < tol)
            assert ct in [1, 2], beta
            val = pts[0][0] if ct == 2 else pts[0][1]
            d["s2"].append([weight, val])
        else:
            # Symmetry group perm([[a, b, c]]). Deliberately take the two smallest of a,
            # b, c as representatives.
            assert length == 6
            srt = numpy.sort(pts[0])
            d["s1"].append([weight, srt[0], srt[1]])

    d["degree"] = degree

    if len(d["s1"]) == 0:
        d.pop("s1")
    if len(d["s2"]) == 0:
        d.pop("s2")
    if len(d["s3"]) == 0:
        d.pop("s3")

    # Getting floats in scientific notation in python.json is almost impossible, so do
    # some work here. Compare with <https://stackoverflow.com/a/1733105/353337>.
    class PrettyFloat(float):
        def __repr__(self):
            return '{:.16e}'.format(self)

    def pretty_floats(obj):
        if isinstance(obj, float):
            return PrettyFloat(obj)
        elif isinstance(obj, dict):
            return dict((k, pretty_floats(v)) for k, v in obj.items())
        elif isinstance(obj, (list, tuple)):
            return list(map(pretty_floats, obj))
        return obj

    with open('wv{:02d}.json'.format(degree), "w") as f:
        string = pretty_floats(d).__repr__() \
            .replace("'", "\"") \
            .replace("[[", "[\n  [") \
            .replace("],", "],\n   ") \
            .replace("]],", "]\n  ],")
        f.write(string)

    return
Example #46
0
def histogramdd(sample, bins=10, range=None, normed=False, weights=None):
    """
    Compute the multidimensional histogram of some data.

    Parameters
    ----------
    sample : array_like
        The data to be histogrammed. It must be an (N,D) array or data
        that can be converted to such. The rows of the resulting array
        are the coordinates of points in a D dimensional polytope.
    bins : sequence or int, optional
        The bin specification:

        * A sequence of arrays describing the bin edges along each dimension.
        * The number of bins for each dimension (nx, ny, ... =bins)
        * The number of bins for all dimensions (nx=ny=...=bins).

    range : sequence, optional
        A sequence of lower and upper bin edges to be used if the edges are
        not given explicitly in `bins`. Defaults to the minimum and maximum
        values along each dimension.
    normed : bool, optional
        If False, returns the number of samples in each bin. If True,
        returns the bin density ``bin_count / sample_count / bin_volume``.
    weights : array_like (N,), optional
        An array of values `w_i` weighing each sample `(x_i, y_i, z_i, ...)`.
        Weights are normalized to 1 if normed is True. If normed is False,
        the values of the returned histogram are equal to the sum of the
        weights belonging to the samples falling into each bin.
        Weights can also be a list of (weight arrays or None), in which case
        a list of histograms is returned as H.

    Returns
    -------
    H : ndarray
        The multidimensional histogram of sample x. See normed and weights
        for the different possible semantics.
    edges : list
        A list of D arrays describing the bin edges for each dimension.

    See Also
    --------
    histogram: 1-D histogram
    histogram2d: 2-D histogram

    Examples
    --------
    >>> r = np.random.randn(100,3)
    >>> H, edges = np.histogramdd(r, bins = (5, 8, 4))
    >>> H.shape, edges[0].size, edges[1].size, edges[2].size
    ((5, 8, 4), 6, 9, 5)

    """

    try:
        # Sample is an ND-array.
        N, D = sample.shape
    except (AttributeError, ValueError):
        # Sample is a sequence of 1D arrays.
        sample = atleast_2d(sample).T
        N, D = sample.shape
    
    if weights is None:
        W = None
    else:    
        try:
            # Weights is a 1D-array
            weights.shape
            W = -1
        except (AttributeError, ValueError):
            # Weights is a list of 1D-arrays or None's
            W = len(weights)

    if W == -1 and weights.ndim != 1:
        raise AttributeError('Weights must be a 1D-array, None, or a list of both')

    nbin = empty(D, int)
    edges = D*[None]
    dedges = D*[None]
    if weights is not None:
        if W == -1:
            weights = asarray(weights)
            assert weights.shape == (N,)
        else:
            for i in arange(W):
                if weights[i] is not None:
                    weights[i] = asarray(weights[i])
                    assert weights[i].shape == (N,)

    try:
        M = len(bins)
        if M != D:
            raise AttributeError(
                'The dimension of bins must be equal to the dimension of the '
                ' sample x.')
    except TypeError:
        # bins is an integer
        bins = D*[bins]

    # Select range for each dimension
    # Used only if number of bins is given.
    if range is None:
        # Handle empty input. Range can't be determined in that case, use 0-1.
        if N == 0:
            smin = zeros(D)
            smax = ones(D)
        else:
            smin = atleast_1d(array(sample.min(0), float))
            smax = atleast_1d(array(sample.max(0), float))
    else:
        smin = zeros(D)
        smax = zeros(D)
        for i in arange(D):
            smin[i], smax[i] = range[i]

    # Make sure the bins have a finite width.
    for i in arange(len(smin)):
        if smin[i] == smax[i]:
            smin[i] = smin[i] - .5
            smax[i] = smax[i] + .5

    # Create edge arrays
    for i in arange(D):
        if isscalar(bins[i]):
            if bins[i] < 1:
                raise ValueError(
                    "Element at index %s in `bins` should be a positive "
                    "integer." % i)
            nbin[i] = bins[i] + 2  # +2 for outlier bins
            edges[i] = linspace(smin[i], smax[i], nbin[i]-1)
        else:
            edges[i] = asarray(bins[i], float)
            nbin[i] = len(edges[i]) + 1  # +1 for outlier bins
        dedges[i] = diff(edges[i])
        if np.any(np.asarray(dedges[i]) <= 0):
            raise ValueError(
                "Found bin edge of size <= 0. Did you specify `bins` with"
                "non-monotonic sequence?")

    nbin = asarray(nbin)

    # Handle empty input.
    if N == 0:
        if W > 0:
            return [np.zeros(nbin-2) for _ in arange(W)], edges
        else:
            return np.zeros(nbin-2), edges

    # Compute the bin number each sample falls into.
    Ncount = {}
    for i in arange(D):
        # searchsorted is faster for many bins
        Ncount[i] = searchsorted(edges[i], sample[:, i], "right")
        #Ncount[i] = digitize(sample[:, i], edges[i])

    # Using digitize, values that fall on an edge are put in the right bin.
    # For the rightmost bin, we want values equal to the right
    # edge to be counted in the last bin, and not as an outlier.
    for i in arange(D):
        # Rounding precision
        mindiff = dedges[i].min()
        if not np.isinf(mindiff):
            decimal = int(-log10(mindiff)) + 6
            # Find which points are on the rightmost edge.
            not_smaller_than_edge = (sample[:, i] >= edges[i][-1])
            on_edge = (around(sample[:, i], decimal) == around(edges[i][-1], decimal))
            # Shift these points one bin to the left.
            Ncount[i][where(on_edge & not_smaller_than_edge)[0]] -= 1

    # Compute the sample indices in the flattened histogram matrix.
    ni = nbin.argsort()
    xy = zeros(N, int)
    for i in arange(0, D-1):
        xy += Ncount[ni[i]] * nbin[ni[i+1:]].prod()
    xy += Ncount[ni[-1]]

    # Compute the number of repetitions in xy and assign it to the
    # flattened histmat.
    if len(xy) == 0:
        if W > 0:
            return [np.zeros(nbin-2) for _ in arange(W)], edges
        else:
            return zeros(nbin-2, int), edges

    # Flattened histogram matrix (1D)
    # Reshape is used so that overlarge arrays
    # will raise an error.
    Wd = W if W > 0 else 1
    hists = [zeros(nbin, float).reshape(-1) for _ in arange(Wd)]
    for histidx, hist in enumerate(hists):
        weights_ = weights[histidx] if W > 0 else weights
        flatcount = bincount(xy, weights_)
        a = arange(len(flatcount))
        hist[a] = flatcount
    
        # Shape into a proper matrix
        hist = hist.reshape(sort(nbin))
        ni = nbin.argsort()
        for i in arange(nbin.size):
            j = ni.argsort()[i]
            hist = hist.swapaxes(i, j)
            ni[i], ni[j] = ni[j], ni[i]
    
        # Remove outliers (indices 0 and -1 for each dimension).
        core = D*[slice(1, -1)]
        hist = hist[core]
    
        # Normalize if normed is True
        if normed:
            s = hist.sum()
            for i in arange(D):
                shape = ones(D, int)
                shape[i] = nbin[i] - 2
                hist = hist / dedges[i].reshape(shape)
            hist /= s
    
        if (hist.shape != nbin - 2).any():
            raise RuntimeError(
                "Internal Shape Error: hist.shape != nbin-2 -> " + str(hist.shape) + " != " + str(nbin-2))
        
        hists[histidx] = hist
    
    if W in [None, -1]:
        return hists[0], edges
    else:
        return hists, edges
Example #47
0
 def handle_crop(self, event_click, event_release):
     corners = (
         np.sort([event.xdata for event in (event_click, event_release)]),
         np.sort([event.ydata for event in (event_click, event_release)]),
     )
     self.roi_limits = np.rint(np.hstack(corners)).astype(int)
from sklearn import tree
X = [[0, 0], [3,3]]
y = [0.75, 3]

tree_reg = tree.DecisionTreeRegressor(random_state=42)
tree_reg = tree_reg.fit(X, y)
tree_reg.predict([[1.5, 1.5]])

# Import the necessary modules and libraries
import numpy as np
from sklearn.tree import DecisionTreeRegressor
import matplotlib.pyplot as plt

# Create a random dataset
rng = np.random.RandomState(1)
X = np.sort(5 * rng.rand(80, 1), axis=0)
y = np.sin(X).ravel()
y[::5] += 3 * (0.5 - rng.rand(16))

# Fit regression model
regr_1 = DecisionTreeRegressor(max_depth=2)
regr_2 = DecisionTreeRegressor(max_depth=5)
regr_1.fit(X, y)
regr_2.fit(X, y)

# Predict
X_test = np.arange(0.0, 5.0, 0.01)[:, np.newaxis]
y_1 = regr_1.predict(X_test)
y_2 = regr_2.predict(X_test)

# Plot the results
def trimmed_Kmeans(data,k,trim=0.1, runs=100, points= None,printcrit=False,maxit=None):
    '''
    data: np.array of dataset
    k   : nb of clusters
    trim: trimmed parameters
    runs: nb of iterations max
    points : initial datapoints. None by default
    '''
    if maxit is None:
        maxit = 2*len(data)
    
    countmode = runs+1
    data = np.array(data)
    n,p  = data.shape
    nin  = round((1-trim)*n)
    crit = np.Inf

    oldclass = np.zeros((n,))
    iclass   = np.zeros((n,))
    optclass = np.zeros((n,))
    disttom  = np.zeros((n,))
    
    for i in range(runs):
        #if i/countmode == round(i/countmode):
            #print("iteration",i)
        if points is None:
            means = data[sample(np.arange(n).tolist(),k),:]
        else:
            means = points.copy()
        wend = False
        itcounter = 0

        while not wend:
            itcounter += 1
            for j in range(n):
                dj = np.zeros((k,))
                for l in range(k):
                    #print(data[j,:],means[j,:])
                    dj_   = (data[j,:]-means[l,:])**2
                    dj[l] = dj_.sum()
                iclass[j] = dj.argmin()
                disttom[j]= dj.min()

            order_idx = np.argsort(disttom)[(nin+1):]
            iclass[order_idx] = -1 # t'es sur que c'est pas la classe d'outliers ici? -1, 0 ou K+1??
            
            if itcounter >= maxit or np.all(oldclass in iclass) :
                wend = True
            else:
                for l in range(k):
                    if sum(iclass==l)==0 : # j'ai l'impression que si ==0 alors toutes les donnees sont outliers
                        means[l,:] = data[iclass==0,:]
                    else:
                        if sum(iclass==l)>1 :
                            if means.shape[1] == 1:
                                means[l,:] = data[iclass==l,:].means()
                            else:
                                means[l,:] = data[iclass==l,:].means(axis=1)
                        else:
                            means[l,:] = data[iclass==l,:]
                oldclass = iclass # here i changed "<-" into '='
        
        newcrit = disttom[iclass>0].sum()
        if printcrit:
            print("Iteration ",i," criterion value ",newcrit/nin,"\n") # ah bon!? on calcul la distorsion moyenne sur les donnees non trimmmees...?!
        if newcrit <= crit :
            optclass = iclass.copy()
            crit = newcrit.copy()
            optmeans = means.copy()

#     optclass[optclass==0] = k+1 # ca suggere que les outliers sont les 0
    out = {'classification':optclass,'means':optmeans,'criterion':crit/nin,'disttom':disttom,
           'ropt':np.sort(disttom)[nin],'k':k,'trim':trim,"runs":runs}
    return(out)
Example #50
0
 def handle_crop(self, event_click, event_release):
     self.time_limits = np.sort(
         [event.xdata for event in (event_click, event_release)])
     self.position_limits = np.sort(
         [event.ydata for event in (event_click, event_release)])
Example #51
0
def make_plots(datadf, settings):
    '''
    Call plotting functions from nanoplotter
    settings["lengths_pointer"] is a column in the DataFrame specifying which lengths to use
    '''
    plot_settings = dict(font_scale=settings["font_scale"])
    nanoplotter.plot_settings(plot_settings, dpi=settings["dpi"])
    color = nanoplotter.check_valid_color(settings["color"])
    colormap = nanoplotter.check_valid_colormap(settings["colormap"])
    plotdict = {type: settings["plots"].count(type) for type in ["kde", "hex", "dot", 'pauvre']}
    plots = []
    if settings["N50"]:
        n50 = nanomath.get_N50(np.sort(datadf["lengths"]))
    else:
        n50 = None
    plots.extend(
        nanoplotter.length_plots(
            array=datadf[datadf["length_filter"]]["lengths"].astype('uint64'),
            name="Read length",
            path=settings["path"],
            n50=n50,
            color=color,
            figformat=settings["format"],
            title=settings["title"])
    )
    logging.info("Created length plots")
    if "quals" in datadf:
        plots.extend(
            nanoplotter.scatter(
                x=datadf[datadf["length_filter"]][settings["lengths_pointer"].replace('log_', '')],
                y=datadf[datadf["length_filter"]]["quals"],
                names=['Read lengths', 'Average read quality'],
                path=settings["path"] + "LengthvsQualityScatterPlot",
                color=color,
                figformat=settings["format"],
                plots=plotdict,
                title=settings["title"],
                plot_settings=plot_settings)
        )
        if settings["logBool"]:
            plots.extend(
                nanoplotter.scatter(
                    x=datadf[datadf["length_filter"]][settings["lengths_pointer"]],
                    y=datadf[datadf["length_filter"]]["quals"],
                    names=['Read lengths', 'Average read quality'],
                    path=settings["path"] + "LengthvsQualityScatterPlot",
                    color=color,
                    figformat=settings["format"],
                    plots=plotdict,
                    log=True,
                    title=settings["title"],
                    plot_settings=plot_settings)
            )
        logging.info("Created LengthvsQual plot")
    if "channelIDs" in datadf:
        plots.extend(
            nanoplotter.spatial_heatmap(
                array=datadf["channelIDs"],
                title=settings["title"],
                path=settings["path"] + "ActivityMap_ReadsPerChannel",
                color=colormap,
                figformat=settings["format"])
        )
        logging.info("Created spatialheatmap for succesfull basecalls.")
    if "start_time" in datadf:
        plots.extend(
            nanoplotter.time_plots(
                df=datadf,
                path=settings["path"],
                color=color,
                figformat=settings["format"],
                title=settings["title"],
                plot_settings=plot_settings)
        )
        if settings["logBool"]:
            plots.extend(
                nanoplotter.time_plots(
                    df=datadf,
                    path=settings["path"],
                    color=color,
                    figformat=settings["format"],
                    title=settings["title"],
                    log_length=True,
                    plot_settings=plot_settings)
            )
        logging.info("Created timeplots.")
    if "aligned_lengths" in datadf and "lengths" in datadf:
        plots.extend(
            nanoplotter.scatter(
                x=datadf[datadf["length_filter"]]["aligned_lengths"],
                y=datadf[datadf["length_filter"]]["lengths"],
                names=["Aligned read lengths", "Sequenced read length"],
                path=settings["path"] + "AlignedReadlengthvsSequencedReadLength",
                figformat=settings["format"],
                plots=plotdict,
                color=color,
                title=settings["title"],
                plot_settings=plot_settings)
        )
        logging.info("Created AlignedLength vs Length plot.")
    if "mapQ" in datadf and "quals" in datadf:
        plots.extend(
            nanoplotter.scatter(
                x=datadf["mapQ"],
                y=datadf["quals"],
                names=["Read mapping quality", "Average basecall quality"],
                path=settings["path"] + "MappingQualityvsAverageBaseQuality",
                color=color,
                figformat=settings["format"],
                plots=plotdict,
                title=settings["title"],
                plot_settings=plot_settings)
        )
        logging.info("Created MapQvsBaseQ plot.")
        plots.extend(
            nanoplotter.scatter(
                x=datadf[datadf["length_filter"]][settings["lengths_pointer"].replace('log_', '')],
                y=datadf[datadf["length_filter"]]["mapQ"],
                names=["Read length", "Read mapping quality"],
                path=settings["path"] + "MappingQualityvsReadLength",
                color=color,
                figformat=settings["format"],
                plots=plotdict,
                title=settings["title"],
                plot_settings=plot_settings)
        )
        if settings["logBool"]:
            plots.extend(
                nanoplotter.scatter(
                    x=datadf[datadf["length_filter"]][settings["lengths_pointer"]],
                    y=datadf[datadf["length_filter"]]["mapQ"],
                    names=["Read length", "Read mapping quality"],
                    path=settings["path"] + "MappingQualityvsReadLength",
                    color=color,
                    figformat=settings["format"],
                    plots=plotdict,
                    log=True,
                    title=settings["title"],
                    plot_settings=plot_settings)
            )
        logging.info("Created Mapping quality vs read length plot.")
    if "percentIdentity" in datadf:
        minPID = np.percentile(datadf["percentIdentity"], 1)
        if "aligned_quals" in datadf:
            plots.extend(
                nanoplotter.scatter(
                    x=datadf["percentIdentity"],
                    y=datadf["aligned_quals"],
                    names=["Percent identity", "Average Base Quality"],
                    path=settings["path"] + "PercentIdentityvsAverageBaseQuality",
                    color=color,
                    figformat=settings["format"],
                    plots=plotdict,
                    stat=stats.pearsonr if not settings["hide_stats"] else None,
                    minvalx=minPID,
                    title=settings["title"],
                    plot_settings=plot_settings)
            )
            logging.info("Created Percent ID vs Base quality plot.")
        plots.extend(
            nanoplotter.scatter(
                x=datadf[datadf["length_filter"]][settings["lengths_pointer"].replace('log_', '')],
                y=datadf[datadf["length_filter"]]["percentIdentity"],
                names=["Aligned read length", "Percent identity"],
                path=settings["path"] + "PercentIdentityvsAlignedReadLength",
                color=color,
                figformat=settings["format"],
                plots=plotdict,
                stat=stats.pearsonr if not settings["hide_stats"] else None,
                minvaly=minPID,
                title=settings["title"],
                plot_settings=plot_settings)
        )
        if settings["logBool"]:
            plots.extend(
                nanoplotter.scatter(
                    x=datadf[datadf["length_filter"]][settings["lengths_pointer"]],
                    y=datadf[datadf["length_filter"]]["percentIdentity"],
                    names=["Aligned read length", "Percent identity"],
                    path=settings["path"] + "PercentIdentityvsAlignedReadLength",
                    color=color,
                    figformat=settings["format"],
                    plots=plotdict,
                    stat=stats.pearsonr if not settings["hide_stats"] else None,
                    log=True,
                    minvaly=minPID,
                    title=settings["title"],
                    plot_settings=plot_settings)
            )

        plots.append(nanoplotter.dynamic_histogram(array=datadf["percentIdentity"],
                                                   name="percent identity",
                                                   path=settings["path"]
                                                   + "PercentIdentityHistogram",
                                                   title=settings["title"],
                                                   color=color))
        logging.info("Created Percent ID vs Length plot")
    return plots
Example #52
0
def best_times_day(coord, lat=-30.7133, lon=21.443, utcoff=2., date='2019-01-01', plot=True, show=False,\
                 distsun=5, distmoon=3, elev=20, night=False, leg=None, setrise=True, filename='auspiciousness_day.png', satellites=False):
    location = co.EarthLocation(lat=lat * u.deg,
                                lon=lon * u.deg,
                                height=1e3 * u.m)
    utcoffset = utcoff * u.hour
    timesteps = 120
    delta_time = np.linspace(-12, 12, timesteps) * u.hour
    ind = range(0, len(delta_time))

    midnight = Time(date) - utcoffset
    times = midnight + delta_time
    frame = co.AltAz(obstime=times, location=location)

    sun = co.get_sun(times).transform_to(frame)
    moon = co.get_moon(times).transform_to(frame)
    src = coord.transform_to(frame)
    ind_riseset = np.where(
        np.logical_and(sun.alt > -12 * u.deg, sun.alt < 12 * u.deg))[0]
    ind_sun = np.where(src.separation(sun).deg < distsun)[0]
    ind_moon = np.where(src.separation(moon).deg < distmoon)[0]
    alt_max = src.alt.max()
    ind_low = np.where(src.alt <= (alt_max - alt_max * (elev / 100.)))[0]

    if night == True: ind_day = np.where(sun.alt > 0 * u.deg)[0]
    else: ind_day = [None]

    g = np.sort(
        list(
            set(ind) - set(ind_riseset) - set(ind_sun) - set(ind_moon) -
            set(ind_low) - set(ind_day)))
    try:
        tg = delta_time[g]
    except:
        tg = None

    if satellites:
        print "Calculating satellite separations"
        sat_times = [
            (times[g][0] + i * u.minute).datetime
            for i in range(int((tg[-1] - tg[0]).to(u.minute) / u.minute))
        ]
        params = [
            list(i) for i in zip([[coord.ra.rad, coord.dec.rad]
                                  for i in range(len(sat_times))], sat_times)
        ]
        min_seps = np.nanmin(np.array(parmap(sat_separations, params)), axis=1)
        sat_time_steps = np.linspace(delta_time[g][0], delta_time[g][-1],
                                     len(min_seps))
        sat_frame = co.AltAz(obstime=sat_times, location=location)
        sat_alts = coord.transform_to(sat_frame).alt

    if plot == True:
        plt.plot(delta_time, sun.alt, 'r--', label='Sun')
        plt.plot(delta_time, moon.alt, 'b--', label='Moon')
        plt.fill_between(delta_time.to('hr').value, 0, 90, np.logical_and(sun.alt<12*u.deg,sun.alt>-12*u.deg), \
                 color='0.5', alpha=0.5)
        plt.fill_between(delta_time.to('hr').value,
                         0,
                         90,
                         sun.alt < 0 * u.deg,
                         color='0.9',
                         alpha=0.7)
        plt.plot(delta_time, src.alt, 'g-', label='Target')
        if satellites:
            try:
                cb = plt.scatter(sat_time_steps,
                                 sat_alts,
                                 c=min_seps,
                                 s=30,
                                 alpha=0.5,
                                 vmin=0,
                                 vmax=10)
            except:
                None
        else:
            try:
                plt.scatter(delta_time[g],
                            src.alt[g],
                            s=30,
                            c='g',
                            alpha=0.5,
                            label='Best time')
            except:
                None
        plt.ylim(0, 90)
        plt.xlim(-12, 12)
        plt.xticks(range(-12, 13, 2))
        plt.legend(loc='best', ncol=2)
        plt.grid()
        plt.title("{0} to {1}".format(
            str(Time(date) - 1 * u.day).split()[0], date))
        plt.xlabel('Time from midnight [hour]')
        plt.ylabel('Elevation [deg]')
        if satellites:
            cbar = plt.colorbar(cb)
            cbar.set_ticks(np.arange(0, 11, 2))
            cbar.set_label('Nearest satellite distance [deg]',
                           rotation=270,
                           labelpad=+20)
        if show: plt.show()
        else:
            plt.tight_layout()
            plt.savefig(filename, dpi=80)

    else:
        return tg
Example #53
0
    def add_measures_to_metrics(self, metrics_and_measures):
        """Update a metric with a new measures, computing new aggregations.

        :param metrics_and_measures: A dict there keys are `storage.Metric`
                                     objects and values are timeseries array of
                                     the new measures.
        """
        with self.statistics.time("raw measures fetch"):
            raw_measures = self._get_or_create_unaggregated_timeseries(
                metrics_and_measures.keys())
        self.statistics["raw measures fetch"] += len(metrics_and_measures)
        self.statistics["processed measures"] += sum(
            map(len, metrics_and_measures.values()))

        new_boundts = []
        splits_to_delete = {}
        splits_to_update = {}

        for metric, measures in six.iteritems(metrics_and_measures):
            measures = numpy.sort(measures, order='timestamps')

            agg_methods = list(metric.archive_policy.aggregation_methods)
            block_size = metric.archive_policy.max_block_size
            back_window = metric.archive_policy.back_window
            # NOTE(sileht): We keep one more blocks to calculate rate of change
            # correctly
            if any(filter(lambda x: x.startswith("rate:"), agg_methods)):
                back_window += 1

            if raw_measures[metric] is None:
                ts = None
            else:
                try:
                    ts = carbonara.BoundTimeSerie.unserialize(
                        raw_measures[metric], block_size, back_window)
                except carbonara.InvalidData:
                    LOG.error("Data corruption detected for %s "
                              "unaggregated timeserie, creating a new one",
                              metric.id)
                    ts = None

            if ts is None:
                # This is the first time we treat measures for this
                # metric, or data are corrupted, create a new one
                ts = carbonara.BoundTimeSerie(block_size=block_size,
                                              back_window=back_window)
                current_first_block_timestamp = None
            else:
                current_first_block_timestamp = ts.first_block_timestamp()

            # NOTE(jd) This is Python where you need such
            # hack to pass a variable around a closure,
            # sorry.
            computed_points = {"number": 0}

            def _map_compute_splits_operations(bound_timeserie):
                # NOTE (gordc): bound_timeserie is entire set of
                # unaggregated measures matching largest
                # granularity. the following takes only the points
                # affected by new measures for specific granularity
                tstamp = max(bound_timeserie.first, measures['timestamps'][0])
                new_first_block_timestamp = (
                    bound_timeserie.first_block_timestamp()
                )
                computed_points['number'] = len(bound_timeserie)

                aggregations = metric.archive_policy.aggregations

                grouped_timeseries = {
                    granularity: bound_timeserie.group_serie(
                        granularity,
                        carbonara.round_timestamp(tstamp, granularity))
                    for granularity, aggregations
                    # No need to sort the aggregation, they are already
                    in itertools.groupby(aggregations, ATTRGETTER_GRANULARITY)
                }

                aggregations_and_timeseries = {
                    aggregation:
                    carbonara.AggregatedTimeSerie.from_grouped_serie(
                        grouped_timeseries[aggregation.granularity],
                        aggregation)
                    for aggregation in aggregations
                }

                deleted_keys, keys_and_split_to_store = (
                    self._compute_split_operations(
                        metric, aggregations_and_timeseries,
                        current_first_block_timestamp,
                        new_first_block_timestamp)
                )

                return (new_first_block_timestamp,
                        deleted_keys,
                        keys_and_split_to_store)

            with self.statistics.time("aggregated measures compute"):
                (new_first_block_timestamp,
                 deleted_keys,
                 keys_and_splits_to_store) = ts.set_values(
                     measures,
                     before_truncate_callback=_map_compute_splits_operations,
                )

            splits_to_delete[metric] = deleted_keys
            splits_to_update[metric] = (keys_and_splits_to_store,
                                        new_first_block_timestamp)

            new_boundts.append((metric, ts.serialize()))

        with self.statistics.time("splits delete"):
            self._delete_metric_splits(splits_to_delete)
        self.statistics["splits delete"] += len(splits_to_delete)
        with self.statistics.time("splits update"):
            self._update_metric_splits(splits_to_update)
        self.statistics["splits delete"] += len(splits_to_update)
        with self.statistics.time("raw measures store"):
            self._store_unaggregated_timeseries(new_boundts)
        self.statistics["raw measures store"] += len(new_boundts)
Example #54
0
def hpd(x, credible_interval=0.94, transform=lambda x: x, circular=False):
    """
    Calculate highest posterior density (HPD) of array for given credible_interval.

    The HPD is the minimum width Bayesian credible interval (BCI). This implementation works only
    for unimodal distributions.

    Parameters
    ----------
    x : Numpy array
        An array containing posterior samples
    credible_interval : float, optional
        Credible interval to plot. Defaults to 0.94.
    transform : callable
        Function to transform data (defaults to identity)
    circular : bool, optional
        Whether to compute the error taking into account `x` is a circular variable
        (in the range [-np.pi, np.pi]) or not. Defaults to False (i.e non-circular variables).

    Returns
    -------
    np.ndarray
        lower and upper value of the interval.
    """
    if x.ndim > 1:
        return np.array([
            hpd(row,
                credible_interval=credible_interval,
                transform=transform,
                circular=circular) for row in x.T
        ])
    # Make a copy of trace
    x = transform(x.copy())
    len_x = len(x)

    if circular:
        mean = st.circmean(x, high=np.pi, low=-np.pi)
        x = x - mean
        x = np.arctan2(np.sin(x), np.cos(x))

    x = np.sort(x)
    interval_idx_inc = int(np.floor(credible_interval * len_x))
    n_intervals = len_x - interval_idx_inc
    interval_width = x[interval_idx_inc:] - x[:n_intervals]

    if len(interval_width) == 0:
        raise ValueError(
            "Too few elements for interval calculation. "
            "Check that credible_interval meets condition 0 =< credible_interval < 1"
        )

    min_idx = np.argmin(interval_width)
    hdi_min = x[min_idx]
    hdi_max = x[min_idx + interval_idx_inc]

    if circular:
        hdi_min = hdi_min + mean
        hdi_max = hdi_max + mean
        hdi_min = np.arctan2(np.sin(hdi_min), np.cos(hdi_min))
        hdi_max = np.arctan2(np.sin(hdi_max), np.cos(hdi_max))

    return np.array([hdi_min, hdi_max])
Example #55
0
]].values[0]

# Compute distribution from Lagrange multipliers values
Pp = ccutils.maxent.maxEnt_from_lagrange(mRNA_space,
                                         protein_space,
                                         lagrange_sample,
                                         exponents=moments).T

# Compute mean protein copy number
mean_delta_p = np.sum(protein_space * Pp)

# Transform protein_space into fold-change
fc_space = protein_space / mean_delta_p  # Define operators to be included

# Define concnentration to include in plot
inducer = np.sort(df_maxEnt.inducer_uM.unique())[::2]

# Define repressor copy number and operator
rep = [22, 260, 1740]
op = "O3"

# Define binstep for plot
binstep = 10
binstep_theory = 100

# Define colors
colors = sns.color_palette("Greens_r", n_colors=len(inducer) + 2)

# Initialize plot
fig, ax = plt.subplots(len(rep),
                       len(inducer),
Example #56
0
def nanoporeplots(inFastq, outputPrefix="output", genomeSizeMb=0, desiredCoverage=1):
    sequenceLengths = []
    with open(inFastq, 'r') as infile:
    	for line in infile:
	    line = line.strip()
	    if line.startswith(("A", "C", "G", "T", "N")):
	        sequenceLengths.append(len(line))
	    else:
	        pass
    if int(genomeSizeMb) != 0:
        genomeSize = int(genomeSizeMb)*1000000
    else:
        genomeSize = 1000000
    desiredAmountOfData = genomeSize*int(desiredCoverage)
    sequenceLengths = np.array(sequenceLengths)
    sequenceLengths = np.sort(sequenceLengths)[::-1]
    cumulativeSum = np.cumsum(sequenceLengths)
    x = np.arange(len(cumulativeSum))
    y = cumulativeSum
    fewerX = x[0:len(x):len(x)*0.01]
    fewerY = y[0:len(y):len(y)*0.01]
    myBins = np.arange(1000, 70000, 1000, dtype=float) 
    sumsInBins = np.array(calculateSumsInBins(myBins, sequenceLengths), dtype=float)
    proportionsInBins = sumsInBins/sum(sequenceLengths)
    countsInBins = np.array(countSeqsInBins(myBins, sequenceLengths), dtype=float)
    minimumSum = np.array(cumulativeSum[cumulativeSum < desiredAmountOfData])
    idx = len(minimumSum)+1
    if idx > len(sequenceLengths):
        print "You do not have enough sequence data to attain " + str(desiredCoverage) + "X coverage. "
    elif idx <= len(sequenceLengths):
        cutoff = sequenceLengths[idx]
        print "Retain sequences larger than "+str(cutoff)+" to achieve "+str(desiredCoverage)+"X coverage. "

    # plot accumulation curve
    
    fig = plt.figure()
    plt.scatter(fewerX,fewerY, c="#d62728")
    plt.xlim(max(x)*-0.05, max(x)+max(x)*0.05)
    plt.ylim(max(y)*-0.1, max(y)+max(y)*0.1)
    yTickLabels = []
    if max(y) >= genomeSize*10:
        yArrayOfTicks = np.arange(0, max(y), genomeSize*10)
    if max(y) < genomeSize*10:
        yArrayOfTicks = np.arange(0, max(y), genomeSize*1)
    for tick in np.nditer(yArrayOfTicks):
        if int(genomeSizeMb) == 0:
            oneLabel = str(tick/(genomeSize*0.1))
            plt.ylabel("Data (Mb)")
        else:
    	    oneLabel = str(tick/genomeSize) + "x"
            plt.ylabel("Genome coverage")
	yTickLabels.append(oneLabel)
    plt.yticks(yArrayOfTicks, yTickLabels, fontsize=10)
    plt.title("Accumulation curve")
    # plt.show()
    if int(desiredCoverage) == 1 or idx > len(sequenceLengths):
        plt.xlabel("Nth read")
        xArrayOfTicks = np.arange(0, max(x), len(x)*0.1)
        plt.xticks(xArrayOfTicks, fontsize=10, rotation='vertical')
        plt.savefig(outputPrefix+"_accumulationCurve.pdf", format='pdf')
    elif int(desiredCoverage) > 1:
        plt.plot([max(x)*-0.05, idx], [desiredAmountOfData, desiredAmountOfData], color='r', linestyle='-')
        plt.plot([idx, idx], [max(y)*-0.1, desiredAmountOfData], color='r', linestyle='-')
        plt.xticks([])
        cutoffText = str(cutoff) + "bp"
        plt.text(idx, max(y)*-0.1275, cutoffText, rotation='vertical', fontsize=8, ha='center', ma='right')
        plt.savefig(outputPrefix+"_accumulationCurve_"+str(desiredCoverage)+"X.pdf", format='pdf')

    plt.close(fig)

    # plot read length histogram

    fig = plt.figure()
    plt.bar(range(len(myBins)), countsInBins, color="#d62728", align='edge')
    plt.title("Histogram of sequence lengths")
    plt.ylabel("Number of reads")
    plt.xlabel("Length (Kb)")
    # plt.show()
    plt.savefig(outputPrefix+"_rawHist.pdf", format='pdf')
    plt.close(fig)
    
    # plot read length histogram as proportion of dataset for real
    fig = plt.figure()
    plt.bar(range(len(myBins)), proportionsInBins, color="#328AFF", align='edge')
    plt.title("Histogram of sequence lengths as a proportion of data")
    plt.ylabel("Proportion of reads")
    plt.xlabel("Length (Kb)")
    # plt.show()
    plt.savefig(outputPrefix+"_proportionHist.pdf", format='pdf')
    plt.close(fig)
Example #57
0
              activation='relu',
              kernel_regularizer=keras.regularizers.l1_l2(l1=0.001, l2=0.001)))
    dp.compile(loss=dnn_loss, optimizer=keras.optimizers.Adam())
    dp.fit(Xnew_train,
           y_train,
           epochs=dnn_epoch,
           batch_size=bs,
           verbose=dnn_verb)

    weights = dp.get_weights()
    w3 = np.matmul(weights[1], weights[2]).reshape(d, )
    w1 = np.multiply(weights[0][:d], w3)
    w2 = np.multiply(weights[0][d:], w3)
    W = w1**2 - w2**2

    t = np.sort(np.concatenate(([0], abs(W))))

    ratio = [
        float(sum(W <= -tt)) / float(max(1, sum(W >= tt))) for tt in t[:d]
    ]
    ind = np.where(np.array(ratio) <= q)[0]
    if len(ind) == 0:
        T = float('inf')
    else:
        T = t[ind[0]]

    selected = np.where(W >= T)[0]

    print(selected)
    mat_selected[i, :] = W >= T
Example #58
0
def graph():
    #
    # Pb Filter
    #
    mask_filenames = glob(PathManager.path_valid_masks + 'mask*.npy')
    valid_bits = np.sort(
        np.unique([
            int(name.replace('\\', '/').split('/')[-1].split('_')[1])
            for name in mask_filenames
        ]))

    #
    # Questions
    #

    reldist_filter = np.load(
        PathManager.path_questions_hamming_reldistance_keep_bit_idxs)

    questions = np.concatenate([
        np.load(PathManager.path_questions_hamming_angles),
        np.load(PathManager.path_questions_hamming_distances),
        np.load(
            PathManager.path_questions_hamming_reldistances)[reldist_filter]
    ])

    #
    # Posebyte
    #

    posebyte_conditional = np.load('../posebytes/posebyte_conditioned.npy')

    angles_val = np.load(PathManager.path_annotations_hamming_valtest_angle)
    distances_val = np.load(
        PathManager.path_annotations_hamming_valtest_distance)
    reldistances_val = np.load(
        PathManager.path_annotations_hamming_valtest_reldistance)
    posebyte_valtest = np.concatenate((
        angles_val,
        distances_val,
        reldistances_val,
    ),
                                      axis=1)[1919:]

    #
    # Embeddings
    #

    embedding_conditional = np.load('../embeddings/embeddings_conditional.npy')
    embedding_test = np.load(
        '../../image/hamming/embeddings/embeddings_valtest_0.npy')[1919:]

    #
    # Distances
    #

    distances = cdist(embedding_conditional, embedding_test)
    nearest_indices = np.argsort(distances, axis=1)

    #
    # Display
    #

    output_path = 'predictions/'

    root_img_dir = PathManager.path_image_root
    sequence_file = PathManager.path_dataset_valtest_txt
    with open(sequence_file, 'r') as in_file:
        label_lines = in_file.readlines()
        image_list = [x.strip() for x in label_lines]
        image_list = [[' '.join(x.strip().split(' ')[:-16]) + '/'] +
                      x.strip().split(' ')[-16:] for x in image_list]
    image_list = image_list[1919:]

    for anno_idx, anno in enumerate(embedding_conditional):
        question_idx = int(anno_idx / 2)
        answer = posebyte_conditional[anno_idx, question_idx]

        if question_idx in valid_bits:
            pass
        else:
            continue

        answer = bool(answer)
        question = str(question_idx) + ': ' + str(questions[question_idx])
        question = question.replace('angle:', 'is bent:')
        question = question.replace('distance:', 'is near:')
        question = question.replace('beyond:', 'is beyond:')
        question = question + '? ' + str(answer)

        output_file_name = output_path + question + '.png'

        nearest = nearest_indices[anno_idx]

        fig = plt.figure()
        fig.set_size_inches(8.0, 8.0)

        for frame_idx in range(25):
            near_idx = nearest[frame_idx]
            image_name = root_img_dir + image_list[near_idx][0] + image_list[
                near_idx][1].split('_')[1] + '.png'

            axes = fig.add_subplot(5, 5, frame_idx + 1)

            if posebyte_valtest[near_idx, question_idx] == answer:
                for spine in axes.spines.values():
                    spine.set_edgecolor('green')
                    spine.set_linewidth(8)
            else:
                for spine in axes.spines.values():
                    spine.set_edgecolor('red')
                    spine.set_linewidth(8)

            image_to_show = imread(image_name)
            plt.suptitle(question, fontsize=16)
            plt.imshow(imresize(image_to_show, (288, 288)))
            plt.setp(axes.get_xticklabels(), visible=False)
            plt.setp(axes.get_yticklabels(), visible=False)

        plt.show()
Example #59
0
 def cmp(a, b):
     tm.assert_almost_equal(np.sort(np.unique(a)), np.sort(np.unique(b)))
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mae = mean_absolute_error(y_test, y_pred)
    rmae = np.sqrt(mean_squared_error(y_test, y_pred)) / mae
    r2 = r2_score(y_test, y_pred)
    print('RMSE, MAE, RMSE/MAE, R^2 = {:.3f}, {:.3f}, {:.3f}, {:.3f}'\
    .format(rmse, mae, rmae, r2))


def print_gscv_score(gscv):
    print("Best parameters set found on development set:")
    print()
    print(gscv.best_params_)
    print()


X_train = np.sort(1 * np.pi * np.random.rand(40, 1), axis=0)
y_train = np.sin(X_train).ravel()
y_train[::5] += 3 * (0.5 - np.random.rand(8))
#
# test data: y = sin(x)
#
# X_test = X_train[:]
X_test = np.sort(4 * np.pi * np.random.rand(80, 1), axis=0)
y_test = np.sin(X_test).ravel()

start = time()
print('')
print('')
print('# 1. SVR with default hyper parameters')

# step 1. model