コード例 #1
0
def sc_guess_offset(y, measurement_type):
    """Gueses normalization for reflection fit."""
    filt_perc = 0.33
    # cut out bottom low_filt_perc of y values. Basically want to filter
    # out the notch a bit.
    if measurement_type == "reflection":
        y_filtered = stats.trim1(y, filt_perc, tail='left')
    elif measurement_type == "transmission":
        y_filtered = stats.trim1(y, filt_perc, tail='right')
    else:
        raise Exception("not a valid measurement type")
    return np.median(y_filtered)
コード例 #2
0
    def rule2(self, original, mean=None, sigma=None, K=9):
        """Nine (or more) points in a row are on the same side of the mean."""
        if mean is None:
            mean = np.nanmean(trim1(original, .2, tail='left'))

        if sigma is None:
            sigma = np.nanstd(original)

        copy_original = original
        segment_len = K

        side_of_mean = []
        for i in range(len(copy_original)):
            if copy_original[i] > mean:
                side_of_mean.append(1)
            else:
                side_of_mean.append(-1)

        chunks = self._sliding_chunker(side_of_mean, segment_len, 1)

        results = []
        for i in range(len(chunks)):
            if chunks[i].sum() == segment_len or chunks[i].sum() == (
                    -1 * segment_len):
                results.append(True)
            else:
                results.append(False)

        # clean up results
        results = self._clean_chunks(copy_original, results, segment_len)

        return results
コード例 #3
0
    def rule7(self, original, mean=None, sigma=None, K=15):
        """Fifteen points in a row are all within 1 standard deviation of the mean on either side of the mean."""

        if mean is None:
            mean = np.nanmean(trim1(original, .15, 'left'))

        if sigma is None:
            sigma = np.nanstd(trim1(original, .15, 'left'))

        segment_len = K
        copy_original = original
        chunks = self._sliding_chunker(copy_original, segment_len, 1)

        results = []
        for i in range(len(chunks)):
            if all((mean - sigma) < i < (mean + sigma) for i in chunks[i]):
                results.append(True)
            else:
                results.append(False)

        # fill incomplete chunks with False
        results = self._clean_chunks(copy_original, results, segment_len)

        return results
コード例 #4
0
ファイル: univariate.py プロジェクト: r24mille/ldc_analysis
def create_histogram(a, trim_p, bin_size, p_title, p_ylabel, p_xlabel, 
                     file_prefix, dec_prec=0, trim_type="both"):
    """Interacts with pylab to draw and save histogram plot.
    
    Arguments:
    a -- array_like list of values to plot
    trim_p -- Percentile (range 0 to 1) of values to trim (float)
    bin_size -- Size of histogram's value bins (float)
    p_title -- Title of plot
    p_ylabel -- ylabel of plot
    p_xlabel -- xlabel of plot
    file_prefix -- Filename prefix
    dec_prec -- (Optional) Decimal precision of bins. Defaults to 0. (int)
    trim_type -- (Optional) Controls the tail of distribution that percentile 
                 trim_p is applied. Values include "both", "left", and "right".
                 Defaults to "both".
    """
    a.sort()
    sample_size = len(a)
    print("a length pre-trim_p", len(a))
    if trim_type == "left" or trim_type == "right":
        a = stats.trim1(a, trim_p, trim_type)
    else:
        a = stats.trimboth(a, trim_p)
    print("a length post-trim_p", len(a))
    bin_min = math.floor(min(a)) # TODO Round down to dec_prec instead
    bin_max = round(max(a), dec_prec)
    print("bin size=" + str(bin_size) + 
          ", bin min=" + str(bin_min) + 
          ", bin max=" + str(bin_max))
    # Create histogram of values
    n, bins, patches = pylab.hist(a, 
                                  bins=pylab.frange(bin_min, 
                                                    bin_max, 
                                                    bin_size), 
                                  normed=False, 
                                  histtype="stepfilled")
    pylab.setp(patches, "facecolor", "g", "alpha", 0.75)
    pylab.title(p_title)
    pylab.xlabel(p_xlabel)
    pylab.ylabel(p_ylabel)
    
    if trim_p > 0:
        pylab.savefig(file_prefix + "_trimmed.png")
    else:
        pylab.savefig(file_prefix + ".png")
    pylab.show()
コード例 #5
0
def k2p2FixFromSum(SumImage, thresh=1, output_folder=None, plot_folder=None, show_plot=True,
				   min_no_pixels_in_mask=8, min_for_cluster=4, cluster_radius=np.sqrt(2),
				   segmentation=True, ws_alg='flux', ws_blur=0.5, ws_thres=0.05, ws_footprint=3,
				   extend_overflow=True, catalog=None):
	"""
	Create pixel masks from Sum-image.

	Parameters:
		SumImage (ndarray): Sum-image.
		thres (float, optional): Threshold for significant flux. The threshold is calculated as MODE+thres*MAD. Default=1.
		output_folder (string, optional): Path to directory where output should be saved. Default=None.
		plot_folder (string, optional): Path to directory where plots should be saved. Default=None.
		show_plot (boolean, optional): Should plots be shown to the user? Default=True.
		min_no_pixels_in_mask (integer, optional): Minimim number of pixels to constitute a mask.
		min_for_cluster (integer, optional): Minimum number of pixels to be considered a cluster in DBSCAN clustering.
		cluster_radius (float, optional): Radius around points to consider cluster in DBSCAN clustering.
		segmentation (boolean, optional): Perform segmentation of clusters using Watershed segmentation.
		ws_alg (string, optional): Watershed method to use. Default='flux'.
		ws_thres (float, optional): Threshold for watershed segmentation.
		ws_footprint (integer, optional): Footprint to use in watershed segmentation.
		extend_overflow (boolean, optional): Enable extension of overflow columns for bright stars.
		catalog (ndarray, optional): Catalog of stars as an array with three columns (column, row and magnitude). If this is provided
			the results will only allow masks to be returned for stars in the catalog and the information is
			also used in the extension of overflow columns.

	Returns:
		tuple: Tuple with two elements: A 3D boolean ndarray of masks and a float indicating the bandwidth used for the estimation background-levels.

	.. codeauthor:: Rasmus Handberg <*****@*****.**>
	.. codeauthor:: Mikkel Lund <*****@*****.**>
	"""

	# Get logger for printing messages:
	logger = logging.getLogger(__name__)
	logger.info("Creating masks from sum-image...")

	NY, NX = np.shape(SumImage)
	ori_mask = ~np.isnan(SumImage)
	X, Y = np.meshgrid(np.arange(NX), np.arange(NY))

	# Cut out pixels from sum image which were collected and contains flux
	# and flatten the 2D image to 1D array:
	Flux = SumImage[ori_mask].flatten()
	Flux = Flux[Flux > 0]

	# Check if there was actually any flux measured:
	if len(Flux) == 0:
		raise K2P2NoFlux("No measured flux in sum-image")

	# Cut away the top 15% of the fluxes:
	flux_cut = stats.trim1(np.sort(Flux), 0.15)
	# Also do a cut on the absolute values of pixel - This helps in cases where
	# the image is dominated by saturated pixels. The exact value is of course
	# in principle dependent on the CCD, but we have found this value to be
	# reasonable in TESS simulated data:
	flux_cut = flux_cut[flux_cut < 70000]

	# Estimate the bandwidth we are going to use for the background:
	background_bandwidth = select_bandwidth(flux_cut, bw='scott', kernel='gau')
	logger.debug("  Sum-image KDE bandwidth: %f", background_bandwidth)

	# Make the Kernel Density Estimation of the fluxes:
	kernel = KDE(flux_cut)
	kernel.fit(kernel='gau', bw=background_bandwidth, fft=True, gridsize=100)

	# MODE
	def kernel_opt(x): return -1*kernel.evaluate(x)
	max_guess = kernel.support[np.argmax(kernel.density)]
	MODE = minimize(kernel_opt, max_guess, method='Powell').x

	# MAD (around mode)
	MAD1 = mad_to_sigma * nanmedian( np.abs( Flux[(Flux < MODE)] - MODE ) )

	# Define the cutoff above which pixels are regarded significant:
	CUT = MODE + thresh * MAD1

	logger.debug("  Threshold used: %f", thresh)
	logger.debug("  Flux cut is: %f", CUT)
	if logger.isEnabledFor(logging.DEBUG) and plot_folder is not None:
		fig = plt.figure()
		ax = fig.add_subplot(111)
		ax.fill_between(kernel.support, kernel.density, alpha=0.3)
		ax.axvline(MODE, color='k')
		ax.axvline(CUT, color='r')
		ax.set_xlabel('Flux')
		ax.set_ylabel('Distribution')
		save_figure(os.path.join(plot_folder, 'flux_distribution'))
		plt.close(fig)

	#==========================================================================
	# Find and seperate clusters of pixels
	#==========================================================================

	# Cut out pixels of sum image with flux above the cut-off:
	idx = (SumImage > CUT)
	X2 = X[idx]
	Y2 = Y[idx]

	if np.all(~idx):
		raise K2P2NoStars("No flux above threshold")

	logger.debug("  Min for cluster is: %f", min_for_cluster)
	logger.debug("  Cluster radius is: %f", cluster_radius)

	# Run clustering algorithm
	XX, labels_ini, core_samples_mask = run_DBSCAN(X2, Y2, cluster_radius, min_for_cluster)

	# Run watershed segmentation algorithm:
	# Demand that there was any non-noise clusters found.
	if segmentation and any(labels_ini != -1):
		# Create a set of dummy-masks that are made up of the clusters
		# that were found by DBSCAN, meaning that there could be masks
		# with several stars in them:
		DUMMY_MASKS = np.zeros((0, NY, NX), dtype='bool')
		DUMMY_MASKS_LABELS = []
		m = np.zeros_like(SumImage, dtype='bool')
		for lab in set(labels_ini):
			if lab == -1: continue
			# Create "image" of this mask:
			m[:,:] = False
			for x,y in XX[labels_ini == lab]:
				m[y, x] = True
			# Append them to lists:
			DUMMY_MASKS = np.append(DUMMY_MASKS, [m], axis=0)
			DUMMY_MASKS_LABELS.append(lab)

		# Run the dummy masks through the detection of saturated columns:
		logger.debug("Detecting saturated columns in non-segmentated masks...")
		smask, _ = k2p2_saturated(SumImage, DUMMY_MASKS, idx)

		# Create dictionary that will map a label to the mask of saturated pixels:
		if np.any(smask):
			saturated_masks = {}
			for u,sm in enumerate(smask):
				saturated_masks[DUMMY_MASKS_LABELS[u]] = sm
		else:
			saturated_masks = None

		# Run the mask segmentaion algorithm on the found clusters:
		labels, unique_labels, NoCluster = k2p2WS(X, Y, X2, Y2, SumImage, XX, labels_ini, core_samples_mask, saturated_masks=saturated_masks, ws_thres=ws_thres,
												  ws_footprint=ws_footprint, ws_blur=ws_blur, ws_alg=ws_alg, output_folder=plot_folder, catalog=catalog)
	else:
		labels = labels_ini
		unique_labels = set(labels)
		#NoCluster = len(unique_labels) - (1 if -1 in labels else 0)

	# Make sure it is a tuple and not a set - much easier to work with:
	unique_labels = tuple(unique_labels)

	# Create list of clusters and their number of pixels:
	No_pix_sort = np.zeros([len(unique_labels), 2])
	for u,lab in enumerate(unique_labels):
		No_pix_sort[u, 0] = np.sum(labels == lab)
		No_pix_sort[u, 1] = lab

	# Only select the clusters that have enough pixels and are not noise:
	cluster_select = (No_pix_sort[:, 0] >= min_no_pixels_in_mask) & (No_pix_sort[:, 1] != -1)
	no_masks = sum(cluster_select)
	No_pix_sort = No_pix_sort[cluster_select, :]

	# No masks were found, so return None:
	if no_masks == 0:
		MASKS = None

	else:
		# Sort the clusters by the number of pixels:
		cluster_sort = np.argsort(No_pix_sort[:, 0])
		No_pix_sort = No_pix_sort[cluster_sort[::-1], :]

		# Create 3D array that will hold masks for each target:
		MASKS = np.zeros((no_masks, NY, NX))
		for u in range(no_masks):
			lab = No_pix_sort[u, 1]
			class_member_mask = (labels == lab)
			xy = XX[class_member_mask ,:]
			MASKS[u, xy[:,1], xy[:,0]] = 1

		#==========================================================================
		# Fill holes in masks
		#==========================================================================
		pattern = np.array([[[0, 0.25, 0],[0.25, 0, 0.25],[0, 0.25, 0]]]) # 3D array - shape=(1, 3, 3)
		mask_holes_indx = ndimage.convolve(MASKS, pattern, mode='constant', cval=0.0)
		mask_holes_indx = (mask_holes_indx > 0.95) & (MASKS == 0) # Should be exactly 1.0, but let's assume some round-off errors
		if np.any(mask_holes_indx):
			logger.info("Filling %d holes in the masks", np.sum(mask_holes_indx))
			MASKS[mask_holes_indx] = 1

			if not plot_folder is None:
				# Create image showing all masks at different levels:
				img = np.zeros((NY,NX))
				for r in np.transpose(np.where(MASKS > 0)):
					img[r[1], r[2]] = r[0]+1

				# Plot everything together:
				fig = plt.figure()
				ax = fig.add_subplot(111)
				plot_image(img, ax=ax, scale='linear', percentile=100, cmap='nipy_spectral', title='Holes in mask filled')

				# Create outline of filled holes:
				for hole in np.transpose(np.where(mask_holes_indx)):
					cen = (hole[2]-0.5, hole[1]-0.5)
					ax.add_patch(mpl.patches.Rectangle(cen, 1, 1, color='k', lw=2, fill=False, hatch='//'))

				#fig.savefig(os.path.join(plot_folder, 'mask_filled_holes.png'), format='png', bbox_inches='tight')
				save_figure(os.path.join(plot_folder, 'mask_filled_holes'))
				plt.close(fig)

		#==========================================================================
		# Entend overflow lanes
		#==========================================================================
		if extend_overflow:
			logger.debug("Detecting saturated columns in masks...")

			# Find pixels that are saturated in each mask and find out if they should
			# be added to the mask:
			saturated_mask, pixels_added = k2p2_saturated(SumImage, MASKS, idx)
			logger.info("Overflow will add %d pixels in total to the masks.", pixels_added)

			# If we have a catalog of stars, we will only allow stars above the saturation
			# limit to get their masks extended:
			if catalog is not None:
				# Filter that catalog, only keeping stars actully inside current image:
				c = np.asarray(np.round(catalog[:, 0]), dtype='int32')
				r = np.asarray(np.round(catalog[:, 1]), dtype='int32')
				tmag = catalog[:, 2]
				indx = (c >= 0) & (c < SumImage.shape[1]) & (r >= 0) & (r < SumImage.shape[0])
				c = c[indx]
				r = r[indx]
				tmag = tmag[indx]
				# Loop through the masks:
				for u in range(no_masks):
					if np.any(saturated_mask[u, :, :]):
						# Find out which stars fall inside this mask:
						which_stars = np.asarray(MASKS[u, :, :][r, c], dtype='bool')
						if np.any(which_stars):
							# Only allow extension of columns if the combined light of
							# the targts in the mask exceeds the saturation limit:
							mags_in_mask = tmag[which_stars]
							mags_total = -2.5*np.log10(np.nansum(10**(-0.4*mags_in_mask)))
							if mags_total > saturation_limit:
								# The combined magnitude of the targets is now
								# above saturation
								saturated_mask[u, :, :] = False
						else:
							# Do not add saturation columns if no stars were found:
							saturated_mask[u, :, :] = False

			# If we are going to plot later on, make a note
			# of how the outline of the masks looked before
			# changing anything:
			if plot_folder is not None and logger.isEnabledFor(logging.DEBUG):
				outline_before = []
				for u in range(no_masks):
					outline_before.append( k2p2maks(MASKS[u,:,:], 1, 0.5) )

			# Add the saturated pixels to the masks:
			MASKS[saturated_mask] = 1

			# If we are running as DEBUG, output some plots as well:
			if plot_folder is not None and logger.isEnabledFor(logging.DEBUG):
				logger.debug("Plotting overflow figures...")
				Ypixel = np.arange(NY)
				for u in range(no_masks):
					mask = np.asarray(MASKS[u, :, :], dtype='bool')
					mask_rows, mask_columns = np.where(mask)
					mask_max = np.nanmax(SumImage[mask])

					# The outline of the mask after saturated columns have been
					# corrected for:
					outline = k2p2maks(mask, 1, 0.5)

					with PdfPages(os.path.join(plot_folder, 'overflow_mask' + str(u) + '.pdf')) as pdf:
						for c in sorted(set(mask_columns)):

							column_rows = mask_rows[mask_columns == c]

							title = "Mask %d - Column %d" % (u, c)
							if np.any(saturated_mask[u,:,c]):
								title += " - Saturated"

							fig = plt.figure(figsize=(14,6))
							ax1 = fig.add_subplot(121)
							ax1.axvspan(np.min(column_rows)-0.5, np.max(column_rows)+0.5, color='0.7')
							ax1.plot(Ypixel, SumImage[:, c], 'ro-', drawstyle='steps-mid')
							ax1.set_title(title)
							ax1.set_xlabel('Y pixels')
							ax1.set_ylabel('Sum-image counts')
							ax1.set_ylim(0, mask_max)
							ax1.set_xlim(-0.5, NY-0.5)

							ax2 = fig.add_subplot(122)
							plot_image(SumImage, ax=ax2, scale='log')
							ax2.plot(outline_before[u][:,0], outline_before[u][:,1], 'r:')
							ax2.plot(outline[:,0], outline[:,1], 'r-')
							ax2.axvline(c, color='r', ls='--')

							pdf.savefig(fig)
							plt.close(fig)

	#==============================================================================
	# Create plots
	#==============================================================================
	if plot_folder is not None:
		# Colors to use for each cluster label:
		colors = plt.cm.gist_rainbow(np.linspace(0, 1, len(unique_labels)))

		# Colormap to use for clusters:
		# https://stackoverflow.com/questions/9707676/defining-a-discrete-colormap-for-imshow-in-matplotlib/9708079#9708079
		#cmap = mpl.colors.ListedColormap(np.append([[1, 1, 1, 1]], colors, axis=0))
		#cmap_norm = mpl.colors.BoundaryNorm(np.arange(-1, len(unique_labels)-1)+0.5, cmap.N)

		# Set up figure to hold subplots:
		if NY/NX > 5:
			aspect = 0.5
		else:
			aspect = 0.2

		fig0 = plt.figure(figsize=(2*plt.figaspect(aspect)))
		fig0.subplots_adjust(wspace=0.12)

		# ---------------
		# PLOT 1
		ax0 = fig0.add_subplot(151)
		plot_image(SumImage, ax=ax0, scale='log', title='Sum-image', xlabel=None, ylabel=None)

		# ---------------
		# PLOT 2
		Flux_mat2 = np.zeros_like(SumImage)
		Flux_mat2[SumImage < CUT] = 1
		Flux_mat2[SumImage > CUT] = 2
		Flux_mat2[ori_mask == 0] = 0

		ax2 = fig0.add_subplot(152)
		plot_image(Flux_mat2, ax=ax2, scale='linear', percentile=100, cmap='nipy_spectral', title='Significant flux', xlabel=None, ylabel=None)

		# ---------------
		# PLOT 3
		ax2 = fig0.add_subplot(153)

		Flux_mat4 = np.zeros_like(SumImage)
		for u,lab in enumerate(unique_labels):
			class_member_mask = (labels == lab)
			xy = XX[class_member_mask,:]
			if lab == -1:
				# Black used for noise.
				ax2.plot(xy[:, 0], xy[:, 1], '+', markerfacecolor='k',
					 markeredgecolor='k', markersize=5)

			else:
				Flux_mat4[xy[:,1], xy[:,0]] = u+1
				ax2.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(colors[u]),
						 markeredgecolor='k', markersize=5)

		ax2.set_title("Clustering + Watershed")
		ax2.set_xlim([-0.5, SumImage.shape[1]-0.5])
		ax2.set_ylim([-0.5, SumImage.shape[0]-0.5])
		ax2.set_aspect('equal')

		# ---------------
		# PLOT 4
		ax4 = fig0.add_subplot(154)
		plot_image(Flux_mat4, ax=ax4, scale='linear', percentile=100, cmap='nipy_spectral', title='Extracted clusters', xlabel=None, ylabel=None)

		# ---------------
		# PLOT 5
		ax5 = fig0.add_subplot(155)
		plot_image(SumImage, ax=ax5, scale='log', title='Final masks', xlabel=None, ylabel=None)

		# Plot outlines of selected masks:
		for u in range(no_masks):
			# Get the color associated with this label:
			col = colors[ int(np.where(unique_labels == No_pix_sort[u, 1])[0]) ]
			# Make mask outline:
			outline = k2p2maks(MASKS[u, :, :], 1, threshold=0.5)
			# Plot outlines:
			ax5.plot(outline[:, 0], outline[:, 1], color=col, zorder=10, lw=2.5)
			ax4.plot(outline[:, 0], outline[:, 1], color='k', zorder=10, lw=1.5)

		# Save the figure and close it:
		save_figure(os.path.join(plot_folder, 'masks_'+ws_alg))
		if show_plot:
			plt.show()
		else:
			plt.close('all')

	return MASKS, background_bandwidth
コード例 #6
0
 def t_est_trim1(self):
     a = np.arange(11)
     assert_equal(trim1(a, 0.1), np.arange(10))
     assert_equal(trim1(a, 0.2), np.arange(9))
     assert_equal(trim1(a, 0.2, tail='left'), np.arange(2, 11))
     assert_equal(trim1(a, 3 / 11., tail='left'), np.arange(3, 11))
コード例 #7
0
    # Filter out poor matches by ratio test , maximum (descriptor) distance
    matchdist = []
    filteredmatches = []
    for m in matches:
        if (len(m) == 2 and
                m[0].distance >= 0.8 * m[1].distance) or m[0].distance >= 0.25:
            continue
        filteredmatches.append(m[0])
        qkp, tkp = getMatchKPs(m[0])
        tkp.class_id = qkp.class_id  # carry over the key point's ID
        matchdist.append(diffKP_L2(qkp, tkp))  # get the match pixel distance
    matches = filteredmatches

    if matchdist:  # Filter out matches with outlier spatial distances
        threshdist = np.mean(stats.trim1(matchdist,
                                         0.25)) + 2 * np.std(matchdist)
        matches = [
            m for m, mdist in zip(matches, matchdist) if mdist < threshdist
        ]

    if not opts.nodraw:  # Draw rectangle around RoI
        cv2.rectangle(
            dispim, (scrapX, scrapY),
            (currFrame.shape[1] - scrapX, currFrame.shape[0] - scrapY),
            (192, 192, 192),
            thickness=2)

    if not opts.nodraw and matches:  # Draw matched keypoints
        qkp, tkp = zip(*map(getMatchKPs, matches))
        cv2.drawKeypoints(dispim, qkp, dispim, color=(0, 255, 0))
        cv2.drawKeypoints(dispim, tkp, dispim, color=(255, 0, 0))
コード例 #8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--infile", required=True, help="Tabular file.")
    parser.add_argument("-o", "--outfile", required=True, help="Path to the output file.")
    parser.add_argument("--sample_one_cols", help="Input format, like smi, sdf, inchi")
    parser.add_argument("--sample_two_cols", help="Input format, like smi, sdf, inchi")
    parser.add_argument("--sample_cols", help="Input format, like smi, sdf, inchi,separate arrays using ;")
    parser.add_argument("--test_id", help="statistical test method")
    parser.add_argument(
        "--mwu_use_continuity",
        action="store_true",
        default=False,
        help="Whether a continuity correction (1/2.) should be taken into account.",
    )
    parser.add_argument(
        "--equal_var",
        action="store_true",
        default=False,
        help="If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch's t-test, which does not assume equal population variance.",
    )
    parser.add_argument(
        "--reta", action="store_true", default=False, help="Whether or not to return the internally computed a values."
    )
    parser.add_argument("--fisher", action="store_true", default=False, help="if true then Fisher definition is used")
    parser.add_argument(
        "--bias",
        action="store_true",
        default=False,
        help="if false,then the calculations are corrected for statistical bias",
    )
    parser.add_argument("--inclusive1", action="store_true", default=False, help="if false,lower_limit will be ignored")
    parser.add_argument(
        "--inclusive2", action="store_true", default=False, help="if false,higher_limit will be ignored"
    )
    parser.add_argument("--inclusive", action="store_true", default=False, help="if false,limit will be ignored")
    parser.add_argument(
        "--printextras",
        action="store_true",
        default=False,
        help="If True, if there are extra points a warning is raised saying how many of those points there are",
    )
    parser.add_argument(
        "--initial_lexsort",
        action="store_true",
        default="False",
        help="Whether to use lexsort or quicksort as the sorting method for the initial sort of the inputs.",
    )
    parser.add_argument("--correction", action="store_true", default=False, help="continuity correction ")
    parser.add_argument(
        "--axis",
        type=int,
        default=0,
        help="Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b)",
    )
    parser.add_argument(
        "--n",
        type=int,
        default=0,
        help="the number of trials. This is ignored if x gives both the number of successes and failures",
    )
    parser.add_argument("--b", type=int, default=0, help="The number of bins to use for the histogram")
    parser.add_argument("--N", type=int, default=0, help="Score that is compared to the elements in a.")
    parser.add_argument("--ddof", type=int, default=0, help="Degrees of freedom correction")
    parser.add_argument("--score", type=int, default=0, help="Score that is compared to the elements in a.")
    parser.add_argument("--m", type=float, default=0.0, help="limits")
    parser.add_argument("--mf", type=float, default=2.0, help="lower limit")
    parser.add_argument("--nf", type=float, default=99.9, help="higher_limit")
    parser.add_argument(
        "--p",
        type=float,
        default=0.5,
        help="The hypothesized probability of success. 0 <= p <= 1. The default value is p = 0.5",
    )
    parser.add_argument("--alpha", type=float, default=0.9, help="probability")
    parser.add_argument("--new", type=float, default=0.0, help="Value to put in place of values in a outside of bounds")
    parser.add_argument(
        "--proportiontocut",
        type=float,
        default=0.0,
        help="Proportion (in range 0-1) of total data set to trim of each end.",
    )
    parser.add_argument(
        "--lambda_",
        type=float,
        default=1.0,
        help="lambda_ gives the power in the Cressie-Read power divergence statistic",
    )
    parser.add_argument(
        "--imbda",
        type=float,
        default=0,
        help="If lmbda is not None, do the transformation for that value.If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument.",
    )
    parser.add_argument("--base", type=float, default=1.6, help="The logarithmic base to use, defaults to e")
    parser.add_argument("--dtype", help="dtype")
    parser.add_argument("--med", help="med")
    parser.add_argument("--cdf", help="cdf")
    parser.add_argument("--zero_method", help="zero_method options")
    parser.add_argument("--dist", help="dist options")
    parser.add_argument("--ties", help="ties options")
    parser.add_argument("--alternative", help="alternative options")
    parser.add_argument("--mode", help="mode options")
    parser.add_argument("--method", help="method options")
    parser.add_argument("--md", help="md options")
    parser.add_argument("--center", help="center options")
    parser.add_argument("--kind", help="kind options")
    parser.add_argument("--tail", help="tail options")
    parser.add_argument("--interpolation", help="interpolation options")
    parser.add_argument("--statistic", help="statistic options")

    args = parser.parse_args()
    infile = args.infile
    outfile = open(args.outfile, "w+")
    test_id = args.test_id
    nf = args.nf
    mf = args.mf
    imbda = args.imbda
    inclusive1 = args.inclusive1
    inclusive2 = args.inclusive2
    sample0 = 0
    sample1 = 0
    sample2 = 0
    if args.sample_cols != None:
        sample0 = 1
        barlett_samples = []
        for sample in args.sample_cols.split(";"):
            barlett_samples.append(map(int, sample.split(",")))
    if args.sample_one_cols != None:
        sample1 = 1
        sample_one_cols = args.sample_one_cols.split(",")
    if args.sample_two_cols != None:
        sample_two_cols = args.sample_two_cols.split(",")
        sample2 = 1
    for line in open(infile):
        sample_one = []
        sample_two = []
        cols = line.strip().split("\t")
        if sample0 == 1:
            b_samples = columns_to_values(barlett_samples, line)
        if sample1 == 1:
            for index in sample_one_cols:
                sample_one.append(cols[int(index) - 1])
        if sample2 == 1:
            for index in sample_two_cols:
                sample_two.append(cols[int(index) - 1])
        if test_id.strip() == "describe":
            size, min_max, mean, uv, bs, bk = stats.describe(map(float, sample_one))
            cols.append(size)
            cols.append(min_max)
            cols.append(mean)
            cols.append(uv)
            cols.append(bs)
            cols.append(bk)
        elif test_id.strip() == "mode":
            vals, counts = stats.mode(map(float, sample_one))
            cols.append(vals)
            cols.append(counts)
        elif test_id.strip() == "nanmean":
            m = stats.nanmean(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "nanmedian":
            m = stats.nanmedian(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "kurtosistest":
            z_value, p_value = stats.kurtosistest(map(float, sample_one))
            cols.append(z_value)
            cols.append(p_value)
        elif test_id.strip() == "variation":
            ra = stats.variation(map(float, sample_one))
            cols.append(ra)
        elif test_id.strip() == "itemfreq":
            freq = stats.itemfreq(map(float, sample_one))
            for list in freq:
                elements = ",".join(map(str, list))
                cols.append(elements)
        elif test_id.strip() == "nanmedian":
            m = stats.nanmedian(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "variation":
            ra = stats.variation(map(float, sample_one))
            cols.append(ra)
        elif test_id.strip() == "boxcox_llf":
            IIf = stats.boxcox_llf(imbda, map(float, sample_one))
            cols.append(IIf)
        elif test_id.strip() == "tiecorrect":
            fa = stats.tiecorrect(map(float, sample_one))
            cols.append(fa)
        elif test_id.strip() == "rankdata":
            r = stats.rankdata(map(float, sample_one), method=args.md)
            cols.append(r)
        elif test_id.strip() == "nanstd":
            s = stats.nanstd(map(float, sample_one), bias=args.bias)
            cols.append(s)
        elif test_id.strip() == "anderson":
            A2, critical, sig = stats.anderson(map(float, sample_one), dist=args.dist)
            cols.append(A2)
            for list in critical:
                cols.append(list)
            cols.append(",")
            for list in sig:
                cols.append(list)
        elif test_id.strip() == "binom_test":
            p_value = stats.binom_test(map(float, sample_one), n=args.n, p=args.p)
            cols.append(p_value)
        elif test_id.strip() == "gmean":
            gm = stats.gmean(map(float, sample_one), dtype=args.dtype)
            cols.append(gm)
        elif test_id.strip() == "hmean":
            hm = stats.hmean(map(float, sample_one), dtype=args.dtype)
            cols.append(hm)
        elif test_id.strip() == "kurtosis":
            k = stats.kurtosis(map(float, sample_one), axis=args.axis, fisher=args.fisher, bias=args.bias)
            cols.append(k)
        elif test_id.strip() == "moment":
            n_moment = stats.moment(map(float, sample_one), n=args.n)
            cols.append(n_moment)
        elif test_id.strip() == "normaltest":
            k2, p_value = stats.normaltest(map(float, sample_one))
            cols.append(k2)
            cols.append(p_value)
        elif test_id.strip() == "skew":
            skewness = stats.skew(map(float, sample_one), bias=args.bias)
            cols.append(skewness)
        elif test_id.strip() == "skewtest":
            z_value, p_value = stats.skewtest(map(float, sample_one))
            cols.append(z_value)
            cols.append(p_value)
        elif test_id.strip() == "sem":
            s = stats.sem(map(float, sample_one), ddof=args.ddof)
            cols.append(s)
        elif test_id.strip() == "zscore":
            z = stats.zscore(map(float, sample_one), ddof=args.ddof)
            for list in z:
                cols.append(list)
        elif test_id.strip() == "signaltonoise":
            s2n = stats.signaltonoise(map(float, sample_one), ddof=args.ddof)
            cols.append(s2n)
        elif test_id.strip() == "percentileofscore":
            p = stats.percentileofscore(map(float, sample_one), score=args.score, kind=args.kind)
            cols.append(p)
        elif test_id.strip() == "bayes_mvs":
            c_mean, c_var, c_std = stats.bayes_mvs(map(float, sample_one), alpha=args.alpha)
            cols.append(c_mean)
            cols.append(c_var)
            cols.append(c_std)
        elif test_id.strip() == "sigmaclip":
            c, c_low, c_up = stats.sigmaclip(map(float, sample_one), low=args.m, high=args.n)
            cols.append(c)
            cols.append(c_low)
            cols.append(c_up)
        elif test_id.strip() == "kstest":
            d, p_value = stats.kstest(
                map(float, sample_one), cdf=args.cdf, N=args.N, alternative=args.alternative, mode=args.mode
            )
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == "chi2_contingency":
            chi2, p, dof, ex = stats.chi2_contingency(
                map(float, sample_one), correction=args.correction, lambda_=args.lambda_
            )
            cols.append(chi2)
            cols.append(p)
            cols.append(dof)
            cols.append(ex)
        elif test_id.strip() == "tmean":
            if nf is 0 and mf is 0:
                mean = stats.tmean(map(float, sample_one))
            else:
                mean = stats.tmean(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(mean)
        elif test_id.strip() == "tmin":
            if mf is 0:
                min = stats.tmin(map(float, sample_one))
            else:
                min = stats.tmin(map(float, sample_one), lowerlimit=mf, inclusive=args.inclusive)
            cols.append(min)
        elif test_id.strip() == "tmax":
            if nf is 0:
                max = stats.tmax(map(float, sample_one))
            else:
                max = stats.tmax(map(float, sample_one), upperlimit=nf, inclusive=args.inclusive)
            cols.append(max)
        elif test_id.strip() == "tvar":
            if nf is 0 and mf is 0:
                var = stats.tvar(map(float, sample_one))
            else:
                var = stats.tvar(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(var)
        elif test_id.strip() == "tstd":
            if nf is 0 and mf is 0:
                std = stats.tstd(map(float, sample_one))
            else:
                std = stats.tstd(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(std)
        elif test_id.strip() == "tsem":
            if nf is 0 and mf is 0:
                s = stats.tsem(map(float, sample_one))
            else:
                s = stats.tsem(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(s)
        elif test_id.strip() == "scoreatpercentile":
            if nf is 0 and mf is 0:
                s = stats.scoreatpercentile(
                    map(float, sample_one), map(float, sample_two), interpolation_method=args.interpolation
                )
            else:
                s = stats.scoreatpercentile(
                    map(float, sample_one), map(float, sample_two), (mf, nf), interpolation_method=args.interpolation
                )
            for list in s:
                cols.append(list)
        elif test_id.strip() == "relfreq":
            if nf is 0 and mf is 0:
                rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b)
            else:
                rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b, (mf, nf))
            for list in rel:
                cols.append(list)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "binned_statistic":
            if nf is 0 and mf is 0:
                st, b_edge, b_n = stats.binned_statistic(
                    map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b
                )
            else:
                st, b_edge, b_n = stats.binned_statistic(
                    map(float, sample_one),
                    map(float, sample_two),
                    statistic=args.statistic,
                    bins=args.b,
                    range=(mf, nf),
                )
            cols.append(st)
            cols.append(b_edge)
            cols.append(b_n)
        elif test_id.strip() == "threshold":
            if nf is 0 and mf is 0:
                o = stats.threshold(map(float, sample_one), newval=args.new)
            else:
                o = stats.threshold(map(float, sample_one), mf, nf, newval=args.new)
            for list in o:
                cols.append(list)
        elif test_id.strip() == "trimboth":
            o = stats.trimboth(map(float, sample_one), proportiontocut=args.proportiontocut)
            for list in o:
                cols.append(list)
        elif test_id.strip() == "trim1":
            t1 = stats.trim1(map(float, sample_one), proportiontocut=args.proportiontocut, tail=args.tail)
            for list in t1:
                cols.append(list)
        elif test_id.strip() == "histogram":
            if nf is 0 and mf is 0:
                hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b)
            else:
                hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b, (mf, nf))
            cols.append(hi)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "cumfreq":
            if nf is 0 and mf is 0:
                cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b)
            else:
                cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b, (mf, nf))
            cols.append(cum)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "boxcox_normmax":
            if nf is 0 and mf is 0:
                ma = stats.boxcox_normmax(map(float, sample_one))
            else:
                ma = stats.boxcox_normmax(map(float, sample_one), (mf, nf), method=args.method)
            cols.append(ma)
        elif test_id.strip() == "boxcox":
            if imbda is 0:
                box, ma, ci = stats.boxcox(map(float, sample_one), alpha=args.alpha)
                cols.append(box)
                cols.append(ma)
                cols.append(ci)
            else:
                box = stats.boxcox(map(float, sample_one), imbda, alpha=args.alpha)
                cols.append(box)
        elif test_id.strip() == "histogram2":
            h2 = stats.histogram2(map(float, sample_one), map(float, sample_two))
            for list in h2:
                cols.append(list)
        elif test_id.strip() == "ranksums":
            z_statistic, p_value = stats.ranksums(map(float, sample_one), map(float, sample_two))
            cols.append(z_statistic)
            cols.append(p_value)
        elif test_id.strip() == "ttest_1samp":
            t, prob = stats.ttest_1samp(map(float, sample_one), map(float, sample_two))
            for list in t:
                cols.append(list)
            for list in prob:
                cols.append(list)
        elif test_id.strip() == "ansari":
            AB, p_value = stats.ansari(map(float, sample_one), map(float, sample_two))
            cols.append(AB)
            cols.append(p_value)
        elif test_id.strip() == "linregress":
            slope, intercept, r_value, p_value, stderr = stats.linregress(
                map(float, sample_one), map(float, sample_two)
            )
            cols.append(slope)
            cols.append(intercept)
            cols.append(r_value)
            cols.append(p_value)
            cols.append(stderr)
        elif test_id.strip() == "pearsonr":
            cor, p_value = stats.pearsonr(map(float, sample_one), map(float, sample_two))
            cols.append(cor)
            cols.append(p_value)
        elif test_id.strip() == "pointbiserialr":
            r, p_value = stats.pointbiserialr(map(float, sample_one), map(float, sample_two))
            cols.append(r)
            cols.append(p_value)
        elif test_id.strip() == "ks_2samp":
            d, p_value = stats.ks_2samp(map(float, sample_one), map(float, sample_two))
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == "mannwhitneyu":
            mw_stats_u, p_value = stats.mannwhitneyu(
                map(float, sample_one), map(float, sample_two), use_continuity=args.mwu_use_continuity
            )
            cols.append(mw_stats_u)
            cols.append(p_value)
        elif test_id.strip() == "zmap":
            z = stats.zmap(map(float, sample_one), map(float, sample_two), ddof=args.ddof)
            for list in z:
                cols.append(list)
        elif test_id.strip() == "ttest_ind":
            mw_stats_u, p_value = stats.ttest_ind(
                map(float, sample_one), map(float, sample_two), equal_var=args.equal_var
            )
            cols.append(mw_stats_u)
            cols.append(p_value)
        elif test_id.strip() == "ttest_rel":
            t, prob = stats.ttest_rel(map(float, sample_one), map(float, sample_two), axis=args.axis)
            cols.append(t)
            cols.append(prob)
        elif test_id.strip() == "mood":
            z, p_value = stats.mood(map(float, sample_one), map(float, sample_two), axis=args.axis)
            cols.append(z)
            cols.append(p_value)
        elif test_id.strip() == "shapiro":
            W, p_value, a = stats.shapiro(map(float, sample_one), map(float, sample_two), args.reta)
            cols.append(W)
            cols.append(p_value)
            for list in a:
                cols.append(list)
        elif test_id.strip() == "kendalltau":
            k, p_value = stats.kendalltau(
                map(float, sample_one), map(float, sample_two), initial_lexsort=args.initial_lexsort
            )
            cols.append(k)
            cols.append(p_value)
        elif test_id.strip() == "entropy":
            s = stats.entropy(map(float, sample_one), map(float, sample_two), base=args.base)
            cols.append(s)
        elif test_id.strip() == "spearmanr":
            if sample2 == 1:
                rho, p_value = stats.spearmanr(map(float, sample_one), map(float, sample_two))
            else:
                rho, p_value = stats.spearmanr(map(float, sample_one))
            cols.append(rho)
            cols.append(p_value)
        elif test_id.strip() == "wilcoxon":
            if sample2 == 1:
                T, p_value = stats.wilcoxon(
                    map(float, sample_one),
                    map(float, sample_two),
                    zero_method=args.zero_method,
                    correction=args.correction,
                )
            else:
                T, p_value = stats.wilcoxon(
                    map(float, sample_one), zero_method=args.zero_method, correction=args.correction
                )
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == "chisquare":
            if sample2 == 1:
                rho, p_value = stats.chisquare(map(float, sample_one), map(float, sample_two), ddof=args.ddof)
            else:
                rho, p_value = stats.chisquare(map(float, sample_one), ddof=args.ddof)
            cols.append(rho)
            cols.append(p_value)
        elif test_id.strip() == "power_divergence":
            if sample2 == 1:
                stat, p_value = stats.power_divergence(
                    map(float, sample_one), map(float, sample_two), ddof=args.ddof, lambda_=args.lambda_
                )
            else:
                stat, p_value = stats.power_divergence(map(float, sample_one), ddof=args.ddof, lambda_=args.lambda_)
            cols.append(stat)
            cols.append(p_value)
        elif test_id.strip() == "theilslopes":
            if sample2 == 1:
                mpe, met, lo, up = stats.theilslopes(map(float, sample_one), map(float, sample_two), alpha=args.alpha)
            else:
                mpe, met, lo, up = stats.theilslopes(map(float, sample_one), alpha=args.alpha)
            cols.append(mpe)
            cols.append(met)
            cols.append(lo)
            cols.append(up)
        elif test_id.strip() == "combine_pvalues":
            if sample2 == 1:
                stat, p_value = stats.combine_pvalues(
                    map(float, sample_one), method=args.med, weights=map(float, sample_two)
                )
            else:
                stat, p_value = stats.combine_pvalues(map(float, sample_one), method=args.med)
            cols.append(stat)
            cols.append(p_value)
        elif test_id.strip() == "obrientransform":
            ob = stats.obrientransform(*b_samples)
            for list in ob:
                elements = ",".join(map(str, list))
                cols.append(elements)
        elif test_id.strip() == "f_oneway":
            f_value, p_value = stats.f_oneway(*b_samples)
            cols.append(f_value)
            cols.append(p_value)
        elif test_id.strip() == "kruskal":
            h, p_value = stats.kruskal(*b_samples)
            cols.append(h)
            cols.append(p_value)
        elif test_id.strip() == "friedmanchisquare":
            fr, p_value = stats.friedmanchisquare(*b_samples)
            cols.append(fr)
            cols.append(p_value)
        elif test_id.strip() == "fligner":
            xsq, p_value = stats.fligner(center=args.center, proportiontocut=args.proportiontocut, *b_samples)
            cols.append(xsq)
            cols.append(p_value)
        elif test_id.strip() == "bartlett":
            T, p_value = stats.bartlett(*b_samples)
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == "levene":
            w, p_value = stats.levene(center=args.center, proportiontocut=args.proportiontocut, *b_samples)
            cols.append(w)
            cols.append(p_value)
        elif test_id.strip() == "median_test":
            stat, p_value, m, table = stats.median_test(
                ties=args.ties, correction=args.correction, lambda_=args.lambda_, *b_samples
            )
            cols.append(stat)
            cols.append(p_value)
            cols.append(m)
            cols.append(table)
            for list in table:
                elements = ",".join(map(str, list))
                cols.append(elements)
        outfile.write("%s\n" % "\t".join(map(str, cols)))
    outfile.close()
コード例 #9
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--infile", required=True, help="Tabular file.")
    parser.add_argument("-o",
                        "--outfile",
                        required=True,
                        help="Path to the output file.")
    parser.add_argument("--sample_one_cols",
                        help="Input format, like smi, sdf, inchi")
    parser.add_argument("--sample_two_cols",
                        help="Input format, like smi, sdf, inchi")
    parser.add_argument(
        "--sample_cols",
        help="Input format, like smi, sdf, inchi,separate arrays using ;",
    )
    parser.add_argument("--test_id", help="statistical test method")
    parser.add_argument(
        "--mwu_use_continuity",
        action="store_true",
        default=False,
        help=
        "Whether a continuity correction (1/2.) should be taken into account.",
    )
    parser.add_argument(
        "--equal_var",
        action="store_true",
        default=False,
        help=
        "If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch's t-test, which does not assume equal population variance.",
    )
    parser.add_argument(
        "--reta",
        action="store_true",
        default=False,
        help="Whether or not to return the internally computed a values.",
    )
    parser.add_argument(
        "--fisher",
        action="store_true",
        default=False,
        help="if true then Fisher definition is used",
    )
    parser.add_argument(
        "--bias",
        action="store_true",
        default=False,
        help=
        "if false,then the calculations are corrected for statistical bias",
    )
    parser.add_argument(
        "--inclusive1",
        action="store_true",
        default=False,
        help="if false,lower_limit will be ignored",
    )
    parser.add_argument(
        "--inclusive2",
        action="store_true",
        default=False,
        help="if false,higher_limit will be ignored",
    )
    parser.add_argument(
        "--inclusive",
        action="store_true",
        default=False,
        help="if false,limit will be ignored",
    )
    parser.add_argument(
        "--printextras",
        action="store_true",
        default=False,
        help=
        "If True, if there are extra points a warning is raised saying how many of those points there are",
    )
    parser.add_argument(
        "--initial_lexsort",
        action="store_true",
        default="False",
        help=
        "Whether to use lexsort or quicksort as the sorting method for the initial sort of the inputs.",
    )
    parser.add_argument(
        "--correction",
        action="store_true",
        default=False,
        help="continuity correction ",
    )
    parser.add_argument(
        "--axis",
        type=int,
        default=0,
        help=
        "Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b)",
    )
    parser.add_argument(
        "--n",
        type=int,
        default=0,
        help=
        "the number of trials. This is ignored if x gives both the number of successes and failures",
    )
    parser.add_argument("--b",
                        type=int,
                        default=0,
                        help="The number of bins to use for the histogram")
    parser.add_argument("--N",
                        type=int,
                        default=0,
                        help="Score that is compared to the elements in a.")
    parser.add_argument("--ddof",
                        type=int,
                        default=0,
                        help="Degrees of freedom correction")
    parser.add_argument(
        "--score",
        type=int,
        default=0,
        help="Score that is compared to the elements in a.",
    )
    parser.add_argument("--m", type=float, default=0.0, help="limits")
    parser.add_argument("--mf", type=float, default=2.0, help="lower limit")
    parser.add_argument("--nf", type=float, default=99.9, help="higher_limit")
    parser.add_argument(
        "--p",
        type=float,
        default=0.5,
        help=
        "The hypothesized probability of success. 0 <= p <= 1. The default value is p = 0.5",
    )
    parser.add_argument("--alpha", type=float, default=0.9, help="probability")
    parser.add_argument(
        "--new",
        type=float,
        default=0.0,
        help="Value to put in place of values in a outside of bounds",
    )
    parser.add_argument(
        "--proportiontocut",
        type=float,
        default=0.0,
        help="Proportion (in range 0-1) of total data set to trim of each end.",
    )
    parser.add_argument(
        "--lambda_",
        type=float,
        default=1.0,
        help=
        "lambda_ gives the power in the Cressie-Read power divergence statistic",
    )
    parser.add_argument(
        "--imbda",
        type=float,
        default=0,
        help=
        "If lmbda is not None, do the transformation for that value.If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument.",
    )
    parser.add_argument(
        "--base",
        type=float,
        default=1.6,
        help="The logarithmic base to use, defaults to e",
    )
    parser.add_argument("--dtype", help="dtype")
    parser.add_argument("--med", help="med")
    parser.add_argument("--cdf", help="cdf")
    parser.add_argument("--zero_method", help="zero_method options")
    parser.add_argument("--dist", help="dist options")
    parser.add_argument("--ties", help="ties options")
    parser.add_argument("--alternative", help="alternative options")
    parser.add_argument("--mode", help="mode options")
    parser.add_argument("--method", help="method options")
    parser.add_argument("--md", help="md options")
    parser.add_argument("--center", help="center options")
    parser.add_argument("--kind", help="kind options")
    parser.add_argument("--tail", help="tail options")
    parser.add_argument("--interpolation", help="interpolation options")
    parser.add_argument("--statistic", help="statistic options")

    args = parser.parse_args()
    infile = args.infile
    outfile = open(args.outfile, "w+")
    test_id = args.test_id
    nf = args.nf
    mf = args.mf
    imbda = args.imbda
    inclusive1 = args.inclusive1
    inclusive2 = args.inclusive2
    sample0 = 0
    sample1 = 0
    sample2 = 0
    if args.sample_cols is not None:
        sample0 = 1
        barlett_samples = []
        for sample in args.sample_cols.split(";"):
            barlett_samples.append(map(int, sample.split(",")))
    if args.sample_one_cols is not None:
        sample1 = 1
        sample_one_cols = args.sample_one_cols.split(",")
    if args.sample_two_cols is not None:
        sample_two_cols = args.sample_two_cols.split(",")
        sample2 = 1
    for line in open(infile):
        sample_one = []
        sample_two = []
        cols = line.strip().split("\t")
        if sample0 == 1:
            b_samples = columns_to_values(barlett_samples, line)
        if sample1 == 1:
            for index in sample_one_cols:
                sample_one.append(cols[int(index) - 1])
        if sample2 == 1:
            for index in sample_two_cols:
                sample_two.append(cols[int(index) - 1])
        if test_id.strip() == "describe":
            size, min_max, mean, uv, bs, bk = stats.describe(
                map(float, sample_one))
            cols.append(size)
            cols.append(min_max)
            cols.append(mean)
            cols.append(uv)
            cols.append(bs)
            cols.append(bk)
        elif test_id.strip() == "mode":
            vals, counts = stats.mode(map(float, sample_one))
            cols.append(vals)
            cols.append(counts)
        elif test_id.strip() == "nanmean":
            m = stats.nanmean(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "nanmedian":
            m = stats.nanmedian(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "kurtosistest":
            z_value, p_value = stats.kurtosistest(map(float, sample_one))
            cols.append(z_value)
            cols.append(p_value)
        elif test_id.strip() == "variation":
            ra = stats.variation(map(float, sample_one))
            cols.append(ra)
        elif test_id.strip() == "itemfreq":
            freq = stats.itemfreq(map(float, sample_one))
            for list in freq:
                elements = ",".join(map(str, list))
                cols.append(elements)
        elif test_id.strip() == "nanmedian":
            m = stats.nanmedian(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "variation":
            ra = stats.variation(map(float, sample_one))
            cols.append(ra)
        elif test_id.strip() == "boxcox_llf":
            IIf = stats.boxcox_llf(imbda, map(float, sample_one))
            cols.append(IIf)
        elif test_id.strip() == "tiecorrect":
            fa = stats.tiecorrect(map(float, sample_one))
            cols.append(fa)
        elif test_id.strip() == "rankdata":
            r = stats.rankdata(map(float, sample_one), method=args.md)
            cols.append(r)
        elif test_id.strip() == "nanstd":
            s = stats.nanstd(map(float, sample_one), bias=args.bias)
            cols.append(s)
        elif test_id.strip() == "anderson":
            A2, critical, sig = stats.anderson(map(float, sample_one),
                                               dist=args.dist)
            cols.append(A2)
            for list in critical:
                cols.append(list)
            cols.append(",")
            for list in sig:
                cols.append(list)
        elif test_id.strip() == "binom_test":
            p_value = stats.binom_test(map(float, sample_one),
                                       n=args.n,
                                       p=args.p)
            cols.append(p_value)
        elif test_id.strip() == "gmean":
            gm = stats.gmean(map(float, sample_one), dtype=args.dtype)
            cols.append(gm)
        elif test_id.strip() == "hmean":
            hm = stats.hmean(map(float, sample_one), dtype=args.dtype)
            cols.append(hm)
        elif test_id.strip() == "kurtosis":
            k = stats.kurtosis(
                map(float, sample_one),
                axis=args.axis,
                fisher=args.fisher,
                bias=args.bias,
            )
            cols.append(k)
        elif test_id.strip() == "moment":
            n_moment = stats.moment(map(float, sample_one), n=args.n)
            cols.append(n_moment)
        elif test_id.strip() == "normaltest":
            k2, p_value = stats.normaltest(map(float, sample_one))
            cols.append(k2)
            cols.append(p_value)
        elif test_id.strip() == "skew":
            skewness = stats.skew(map(float, sample_one), bias=args.bias)
            cols.append(skewness)
        elif test_id.strip() == "skewtest":
            z_value, p_value = stats.skewtest(map(float, sample_one))
            cols.append(z_value)
            cols.append(p_value)
        elif test_id.strip() == "sem":
            s = stats.sem(map(float, sample_one), ddof=args.ddof)
            cols.append(s)
        elif test_id.strip() == "zscore":
            z = stats.zscore(map(float, sample_one), ddof=args.ddof)
            for list in z:
                cols.append(list)
        elif test_id.strip() == "signaltonoise":
            s2n = stats.signaltonoise(map(float, sample_one), ddof=args.ddof)
            cols.append(s2n)
        elif test_id.strip() == "percentileofscore":
            p = stats.percentileofscore(map(float, sample_one),
                                        score=args.score,
                                        kind=args.kind)
            cols.append(p)
        elif test_id.strip() == "bayes_mvs":
            c_mean, c_var, c_std = stats.bayes_mvs(map(float, sample_one),
                                                   alpha=args.alpha)
            cols.append(c_mean)
            cols.append(c_var)
            cols.append(c_std)
        elif test_id.strip() == "sigmaclip":
            c, c_low, c_up = stats.sigmaclip(map(float, sample_one),
                                             low=args.m,
                                             high=args.n)
            cols.append(c)
            cols.append(c_low)
            cols.append(c_up)
        elif test_id.strip() == "kstest":
            d, p_value = stats.kstest(
                map(float, sample_one),
                cdf=args.cdf,
                N=args.N,
                alternative=args.alternative,
                mode=args.mode,
            )
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == "chi2_contingency":
            chi2, p, dof, ex = stats.chi2_contingency(
                map(float, sample_one),
                correction=args.correction,
                lambda_=args.lambda_)
            cols.append(chi2)
            cols.append(p)
            cols.append(dof)
            cols.append(ex)
        elif test_id.strip() == "tmean":
            if nf == 0 and mf == 0:
                mean = stats.tmean(map(float, sample_one))
            else:
                mean = stats.tmean(map(float, sample_one), (mf, nf),
                                   (inclusive1, inclusive2))
            cols.append(mean)
        elif test_id.strip() == "tmin":
            if mf == 0:
                min = stats.tmin(map(float, sample_one))
            else:
                min = stats.tmin(map(float, sample_one),
                                 lowerlimit=mf,
                                 inclusive=args.inclusive)
            cols.append(min)
        elif test_id.strip() == "tmax":
            if nf == 0:
                max = stats.tmax(map(float, sample_one))
            else:
                max = stats.tmax(map(float, sample_one),
                                 upperlimit=nf,
                                 inclusive=args.inclusive)
            cols.append(max)
        elif test_id.strip() == "tvar":
            if nf == 0 and mf == 0:
                var = stats.tvar(map(float, sample_one))
            else:
                var = stats.tvar(map(float, sample_one), (mf, nf),
                                 (inclusive1, inclusive2))
            cols.append(var)
        elif test_id.strip() == "tstd":
            if nf == 0 and mf == 0:
                std = stats.tstd(map(float, sample_one))
            else:
                std = stats.tstd(map(float, sample_one), (mf, nf),
                                 (inclusive1, inclusive2))
            cols.append(std)
        elif test_id.strip() == "tsem":
            if nf == 0 and mf == 0:
                s = stats.tsem(map(float, sample_one))
            else:
                s = stats.tsem(map(float, sample_one), (mf, nf),
                               (inclusive1, inclusive2))
            cols.append(s)
        elif test_id.strip() == "scoreatpercentile":
            if nf == 0 and mf == 0:
                s = stats.scoreatpercentile(
                    map(float, sample_one),
                    map(float, sample_two),
                    interpolation_method=args.interpolation,
                )
            else:
                s = stats.scoreatpercentile(
                    map(float, sample_one),
                    map(float, sample_two),
                    (mf, nf),
                    interpolation_method=args.interpolation,
                )
            for list in s:
                cols.append(list)
        elif test_id.strip() == "relfreq":
            if nf == 0 and mf == 0:
                rel, low_range, binsize, ex = stats.relfreq(
                    map(float, sample_one), args.b)
            else:
                rel, low_range, binsize, ex = stats.relfreq(
                    map(float, sample_one), args.b, (mf, nf))
            for list in rel:
                cols.append(list)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "binned_statistic":
            if nf == 0 and mf == 0:
                st, b_edge, b_n = stats.binned_statistic(
                    map(float, sample_one),
                    map(float, sample_two),
                    statistic=args.statistic,
                    bins=args.b,
                )
            else:
                st, b_edge, b_n = stats.binned_statistic(
                    map(float, sample_one),
                    map(float, sample_two),
                    statistic=args.statistic,
                    bins=args.b,
                    range=(mf, nf),
                )
            cols.append(st)
            cols.append(b_edge)
            cols.append(b_n)
        elif test_id.strip() == "threshold":
            if nf == 0 and mf == 0:
                o = stats.threshold(map(float, sample_one), newval=args.new)
            else:
                o = stats.threshold(map(float, sample_one),
                                    mf,
                                    nf,
                                    newval=args.new)
            for list in o:
                cols.append(list)
        elif test_id.strip() == "trimboth":
            o = stats.trimboth(map(float, sample_one),
                               proportiontocut=args.proportiontocut)
            for list in o:
                cols.append(list)
        elif test_id.strip() == "trim1":
            t1 = stats.trim1(
                map(float, sample_one),
                proportiontocut=args.proportiontocut,
                tail=args.tail,
            )
            for list in t1:
                cols.append(list)
        elif test_id.strip() == "histogram":
            if nf == 0 and mf == 0:
                hi, low_range, binsize, ex = stats.histogram(
                    map(float, sample_one), args.b)
            else:
                hi, low_range, binsize, ex = stats.histogram(
                    map(float, sample_one), args.b, (mf, nf))
            cols.append(hi)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "cumfreq":
            if nf == 0 and mf == 0:
                cum, low_range, binsize, ex = stats.cumfreq(
                    map(float, sample_one), args.b)
            else:
                cum, low_range, binsize, ex = stats.cumfreq(
                    map(float, sample_one), args.b, (mf, nf))
            cols.append(cum)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "boxcox_normmax":
            if nf == 0 and mf == 0:
                ma = stats.boxcox_normmax(map(float, sample_one))
            else:
                ma = stats.boxcox_normmax(map(float, sample_one), (mf, nf),
                                          method=args.method)
            cols.append(ma)
        elif test_id.strip() == "boxcox":
            if imbda == 0:
                box, ma, ci = stats.boxcox(map(float, sample_one),
                                           alpha=args.alpha)
                cols.append(box)
                cols.append(ma)
                cols.append(ci)
            else:
                box = stats.boxcox(map(float, sample_one),
                                   imbda,
                                   alpha=args.alpha)
                cols.append(box)
        elif test_id.strip() == "histogram2":
            h2 = stats.histogram2(map(float, sample_one),
                                  map(float, sample_two))
            for list in h2:
                cols.append(list)
        elif test_id.strip() == "ranksums":
            z_statistic, p_value = stats.ranksums(map(float, sample_one),
                                                  map(float, sample_two))
            cols.append(z_statistic)
            cols.append(p_value)
        elif test_id.strip() == "ttest_1samp":
            t, prob = stats.ttest_1samp(map(float, sample_one),
                                        map(float, sample_two))
            for list in t:
                cols.append(list)
            for list in prob:
                cols.append(list)
        elif test_id.strip() == "ansari":
            AB, p_value = stats.ansari(map(float, sample_one),
                                       map(float, sample_two))
            cols.append(AB)
            cols.append(p_value)
        elif test_id.strip() == "linregress":
            slope, intercept, r_value, p_value, stderr = stats.linregress(
                map(float, sample_one), map(float, sample_two))
            cols.append(slope)
            cols.append(intercept)
            cols.append(r_value)
            cols.append(p_value)
            cols.append(stderr)
        elif test_id.strip() == "pearsonr":
            cor, p_value = stats.pearsonr(map(float, sample_one),
                                          map(float, sample_two))
            cols.append(cor)
            cols.append(p_value)
        elif test_id.strip() == "pointbiserialr":
            r, p_value = stats.pointbiserialr(map(float, sample_one),
                                              map(float, sample_two))
            cols.append(r)
            cols.append(p_value)
        elif test_id.strip() == "ks_2samp":
            d, p_value = stats.ks_2samp(map(float, sample_one),
                                        map(float, sample_two))
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == "mannwhitneyu":
            mw_stats_u, p_value = stats.mannwhitneyu(
                map(float, sample_one),
                map(float, sample_two),
                use_continuity=args.mwu_use_continuity,
            )
            cols.append(mw_stats_u)
            cols.append(p_value)
        elif test_id.strip() == "zmap":
            z = stats.zmap(map(float, sample_one),
                           map(float, sample_two),
                           ddof=args.ddof)
            for list in z:
                cols.append(list)
        elif test_id.strip() == "ttest_ind":
            mw_stats_u, p_value = stats.ttest_ind(map(float, sample_one),
                                                  map(float, sample_two),
                                                  equal_var=args.equal_var)
            cols.append(mw_stats_u)
            cols.append(p_value)
        elif test_id.strip() == "ttest_rel":
            t, prob = stats.ttest_rel(map(float, sample_one),
                                      map(float, sample_two),
                                      axis=args.axis)
            cols.append(t)
            cols.append(prob)
        elif test_id.strip() == "mood":
            z, p_value = stats.mood(map(float, sample_one),
                                    map(float, sample_two),
                                    axis=args.axis)
            cols.append(z)
            cols.append(p_value)
        elif test_id.strip() == "shapiro":
            W, p_value, a = stats.shapiro(map(float, sample_one),
                                          map(float, sample_two), args.reta)
            cols.append(W)
            cols.append(p_value)
            for list in a:
                cols.append(list)
        elif test_id.strip() == "kendalltau":
            k, p_value = stats.kendalltau(
                map(float, sample_one),
                map(float, sample_two),
                initial_lexsort=args.initial_lexsort,
            )
            cols.append(k)
            cols.append(p_value)
        elif test_id.strip() == "entropy":
            s = stats.entropy(map(float, sample_one),
                              map(float, sample_two),
                              base=args.base)
            cols.append(s)
        elif test_id.strip() == "spearmanr":
            if sample2 == 1:
                rho, p_value = stats.spearmanr(map(float, sample_one),
                                               map(float, sample_two))
            else:
                rho, p_value = stats.spearmanr(map(float, sample_one))
            cols.append(rho)
            cols.append(p_value)
        elif test_id.strip() == "wilcoxon":
            if sample2 == 1:
                T, p_value = stats.wilcoxon(
                    map(float, sample_one),
                    map(float, sample_two),
                    zero_method=args.zero_method,
                    correction=args.correction,
                )
            else:
                T, p_value = stats.wilcoxon(
                    map(float, sample_one),
                    zero_method=args.zero_method,
                    correction=args.correction,
                )
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == "chisquare":
            if sample2 == 1:
                rho, p_value = stats.chisquare(map(float, sample_one),
                                               map(float, sample_two),
                                               ddof=args.ddof)
            else:
                rho, p_value = stats.chisquare(map(float, sample_one),
                                               ddof=args.ddof)
            cols.append(rho)
            cols.append(p_value)
        elif test_id.strip() == "power_divergence":
            if sample2 == 1:
                stat, p_value = stats.power_divergence(
                    map(float, sample_one),
                    map(float, sample_two),
                    ddof=args.ddof,
                    lambda_=args.lambda_,
                )
            else:
                stat, p_value = stats.power_divergence(map(float, sample_one),
                                                       ddof=args.ddof,
                                                       lambda_=args.lambda_)
            cols.append(stat)
            cols.append(p_value)
        elif test_id.strip() == "theilslopes":
            if sample2 == 1:
                mpe, met, lo, up = stats.theilslopes(map(float, sample_one),
                                                     map(float, sample_two),
                                                     alpha=args.alpha)
            else:
                mpe, met, lo, up = stats.theilslopes(map(float, sample_one),
                                                     alpha=args.alpha)
            cols.append(mpe)
            cols.append(met)
            cols.append(lo)
            cols.append(up)
        elif test_id.strip() == "combine_pvalues":
            if sample2 == 1:
                stat, p_value = stats.combine_pvalues(
                    map(float, sample_one),
                    method=args.med,
                    weights=map(float, sample_two),
                )
            else:
                stat, p_value = stats.combine_pvalues(map(float, sample_one),
                                                      method=args.med)
            cols.append(stat)
            cols.append(p_value)
        elif test_id.strip() == "obrientransform":
            ob = stats.obrientransform(*b_samples)
            for list in ob:
                elements = ",".join(map(str, list))
                cols.append(elements)
        elif test_id.strip() == "f_oneway":
            f_value, p_value = stats.f_oneway(*b_samples)
            cols.append(f_value)
            cols.append(p_value)
        elif test_id.strip() == "kruskal":
            h, p_value = stats.kruskal(*b_samples)
            cols.append(h)
            cols.append(p_value)
        elif test_id.strip() == "friedmanchisquare":
            fr, p_value = stats.friedmanchisquare(*b_samples)
            cols.append(fr)
            cols.append(p_value)
        elif test_id.strip() == "fligner":
            xsq, p_value = stats.fligner(center=args.center,
                                         proportiontocut=args.proportiontocut,
                                         *b_samples)
            cols.append(xsq)
            cols.append(p_value)
        elif test_id.strip() == "bartlett":
            T, p_value = stats.bartlett(*b_samples)
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == "levene":
            w, p_value = stats.levene(center=args.center,
                                      proportiontocut=args.proportiontocut,
                                      *b_samples)
            cols.append(w)
            cols.append(p_value)
        elif test_id.strip() == "median_test":
            stat, p_value, m, table = stats.median_test(
                ties=args.ties,
                correction=args.correction,
                lambda_=args.lambda_,
                *b_samples)
            cols.append(stat)
            cols.append(p_value)
            cols.append(m)
            cols.append(table)
            for list in table:
                elements = ",".join(map(str, list))
                cols.append(elements)
        outfile.write("%s\n" % "\t".join(map(str, cols)))
    outfile.close()
コード例 #10
0
def remove_stars(tpf):

    sumimage = np.nansum(tpf, axis=0, dtype='float64')

    ny, nx = np.shape(sumimage)
    ori_mask = ~np.isnan(sumimage)

    X, Y = np.meshgrid(np.arange(nx), np.arange(ny))

    Flux = sumimage[ori_mask].flatten()
    Flux = Flux[Flux > 0]

    flux_cut = stats.trim1(np.sort(Flux), 0.15)

    background_bandwidth = select_bandwidth(flux_cut, bw='scott', kernel='gau')
    kernel = KDE(flux_cut)

    kernel.fit(kernel='gau', bw=background_bandwidth, fft=True, gridsize=100)

    def kernel_opt(x):
        return -1 * kernel.evaluate(x)

    max_guess = kernel.support[np.argmax(kernel.density)]
    MODE = optimize.fmin_powell(kernel_opt, max_guess, disp=0)

    mad_to_sigma = 1.482602218505602
    MAD1 = mad_to_sigma * nanmedian(np.abs(Flux[(Flux < MODE)] - MODE))

    thresh = 2.
    CUT = MODE + thresh * MAD1

    idx = (sumimage > CUT)
    X2 = X[idx]
    Y2 = Y[idx]

    cluster_radius = np.sqrt(2)
    min_for_cluster = 4

    XX, labels_ini, core_samples_mask = run_DBSCAN(X2, Y2, cluster_radius,
                                                   min_for_cluster)

    DUMMY_MASKS = np.zeros((0, ny, nx), dtype='bool')
    DUMMY_MASKS_LABELS = []
    m = np.zeros_like(sumimage, dtype='bool')
    for lab in set(labels_ini):
        if lab == -1: continue
        # Create "image" of this mask:
        m[:, :] = False
        for x, y in XX[labels_ini == lab]:
            m[y, x] = True
        # Append them to lists:
        DUMMY_MASKS = np.append(DUMMY_MASKS, [m], axis=0)
        DUMMY_MASKS_LABELS.append(lab)

        smask, _ = k2p2_saturated(sumimage, DUMMY_MASKS, idx)

        if np.any(smask):
            saturated_masks = {}
            for u, sm in enumerate(smask):
                saturated_masks[DUMMY_MASKS_LABELS[u]] = sm
        else:
            saturated_masks = None

        ws_thres = 0.02
        ws_footprint = 3
        ws_blur = 0.2
        ws_alg = 'flux'
        plot_folder = None
        catalog = None

        labels, unique_labels, NoCluster = k2p2WS(
            X,
            Y,
            X2,
            Y2,
            sumimage,
            XX,
            labels_ini,
            core_samples_mask,
            saturated_masks=saturated_masks,
            ws_thres=ws_thres,
            ws_footprint=ws_footprint,
            ws_blur=ws_blur,
            ws_alg=ws_alg,
            output_folder=plot_folder,
            catalog=catalog)

    # Make sure it is a tuple and not a set - much easier to work with:
    unique_labels = tuple(unique_labels)

    # Create list of clusters and their number of pixels:
    No_pix_sort = np.zeros([len(unique_labels), 2])
    for u, lab in enumerate(unique_labels):
        No_pix_sort[u, 0] = np.sum(labels == lab)
        No_pix_sort[u, 1] = lab

    # Only select the clusters that are not the largest or noise:

    cluster_select = (No_pix_sort[:, 0] < np.max(
        No_pix_sort.T[0])) & (No_pix_sort[:, 1] != -1)
    # cluster_select = (No_pix_sort[:, 0] < np.max(No_pix_sort.T[0]))
    no_masks = sum(cluster_select)
    No_pix_sort = No_pix_sort[cluster_select, :]

    MASKS = np.zeros((no_masks, ny, nx))
    for u in range(no_masks):
        lab = No_pix_sort[u, 1]
        class_member_mask = (labels == lab)
        xy = XX[class_member_mask, :]
        MASKS[u, xy[:, 1], xy[:, 0]] = 1

    maskimg = np.sum(MASKS, axis=0)
    invmaskimg = np.abs(maskimg - 1)

    return invmaskimg * tpf
コード例 #11
0
def guess_offset(y):
    low_filt_perc = 0.33
    y_filtered = stats.trim1(
        y, low_filt_perc, tail='left'
    )  #cut out bottom low_filt_perc of y values. Basically want to filter out the notch a bit.
    return np.median(y_filtered)
コード例 #12
0
ファイル: graph.py プロジェクト: EQt/barnacleboy
                   help='Write an HDF5 instance')
    args = p.parse_args()

    fname = args.fname() if callable(args.fname) else args.fname
    df = pd.read_csv(fname)
    values = df[df.columns[-1]]
    coord = df[df.columns[:2]].values

    edges = delaunay_graph(coord)
    edges = delaunay_graph(coord)
    lens = euclidean_edge_length(edges, coord)
    thres = lens.mean() + 1.2*lens.std()
    edges = edges[lens <= thres]

    zeros = values <= 0
    zr = zeros.sum() / len(values)
    print(len(edges), 'edges')
    print(f"zero = {zr * 100:.2f}%")

    if args.plot:
        plt.figure("graph")
        plot_edges(edges, coord)
        plt.plot(*coord[zeros].T, 'r.')
        plt.figure(f"input: {df.columns[-1]}, zeros cutted")
        val = stats.trim1(values, 0.01)
        plt.hist(val[val > 0], bins=100, log=True, histtype='step')
        plt.show()

    if args.out:
        store_graph(args.out, edges, values, coord)
コード例 #13
0
ファイル: main.py プロジェクト: OpenGelo/flownav
    else:                               matches = bfmatcher.knnMatch(qdesc,tdesc,k=2)

    # Filter out poor matches by ratio test , maximum (descriptor) distance
    matchdist = []
    filteredmatches = []
    for m in matches:                           
        if (len(m)==2 and m[0].distance >= 0.8*m[1].distance) or m[0].distance >= 0.25:
            continue
        filteredmatches.append(m[0])
        qkp, tkp = getMatchKPs(m[0])
        tkp.class_id = qkp.class_id             # carry over the key point's ID
        matchdist.append(diffKP_L2(qkp,tkp))    # get the match pixel distance
    matches = filteredmatches

    if matchdist:       # Filter out matches with outlier spatial distances
        threshdist = np.mean(stats.trim1(matchdist,0.25)) + 2*np.std(matchdist)
        matches = [m for m,mdist in zip(matches,matchdist) if mdist < threshdist]

    if not opts.nodraw: # Draw rectangle around RoI
        cv2.rectangle(dispim,(scrapX,scrapY)
                      ,(currFrame.shape[1]-scrapX,currFrame.shape[0]-scrapY)
                      ,(192,192,192),thickness=2)

    if not opts.nodraw and matches: # Draw matched keypoints
        qkp, tkp = zip(*map(getMatchKPs,matches))
        cv2.drawKeypoints(dispim, qkp, dispim, color=(0,255,0))
        cv2.drawKeypoints(dispim, tkp, dispim, color=(255,0,0))
        for q,t in zip(qkp,tkp): cv2.line(dispim, inttuple(*q.pt), inttuple(*t.pt), (0,255,0), 1)
        
    '''
    Find an estimate of the scale change for keypoints that are expanding