def similarity(sketch, other_sketch): """ sketch = sketchpad json output other_sketch = saved json output from Django """ result = {} if isinstance(sketch, dict) and isinstance(other_sketch, dict): x, y = create_xy_coords('search', sketch) P = np.array([x, y]).T x1, y1 = create_xy_coords('other', other_sketch) Q = np.array([x1, y1]).T dh, ind1, ind2 = directed_hausdorff(P, Q) df = similaritymeasures.frechet_dist(P, Q) dtw, d = similaritymeasures.dtw(P, Q) pcm = similaritymeasures.pcm(P, Q) area = similaritymeasures.area_between_two_curves(P, Q) cl = similaritymeasures.curve_length_measure(P, Q) result = { "dh": dh, "df": df, "dtw": dtw, "pcw": pcm, "cl": cl, "area": area } return result else: return {"dh": 0}
def test_pcm_rev(self): x1 = np.linspace(0.0, 1.0, 100) y1 = x1*20.0 temp1 = np.array((x1, y1)).T x2 = np.linspace(1.0, 2.5, 50) y2 = x2*3.0 temp2 = np.array((x2, y2)).T _ = similaritymeasures.pcm(temp2, temp1) self.assertTrue(True)
def similarity_measures(LOOKBACK=MS_IN_A_DAY, **kwargs): """ Calculate all similarity measures between two trajectories """ log.info(f'Loading GPS data for 1st trajectory...') gps1 = gps(**kwargs) if gps1: arr1 = pd.DataFrame(gps1)[['latitude', 'longitude']].to_numpy() else: return {'timestamp':kwargs['start'], 'frechet_distance': None, 'area_between': None, 'partial_curve_mapping': None, 'curve_length_similarity': None, 'fastDTW_score': None} log.info(f'Loading GPS data for 2nd trajectory...') start2 = kwargs['start'] - LOOKBACK end2 = kwargs['end'] - LOOKBACK gps2 = gps(id = kwargs['id'], start = start2, end = end2) log.info(f'Calculating all similarity measures...') if gps2: arr2 = pd.DataFrame(gps2)[['latitude', 'longitude']].to_numpy() log.info(f'Calculating Frechet...') discrete_frechet = similaritymeasures.frechet_dist(arr1, arr2) log.info(f'Calculating Area between...') area_between = similaritymeasures.area_between_two_curves(arr1, arr2) log.info(f'Calculating PCM...') pcm = similaritymeasures.pcm(arr1, arr2) log.info(f'Calculating curve length...') curve_length = similaritymeasures.curve_length_measure(arr1, arr2) log.info(f'Calculating FastDTW...') fastDTW_score, _ = fastdtw(arr1, arr2, dist=euclidean) else: return {'timestamp':kwargs['start'], 'frechet_distance': None, 'area_between': None, 'partial_curve_mapping': None, 'curve_length_similarity': None, 'fastDTW_score': None} return {'timestamp':kwargs['start'], 'frechet_distance': discrete_frechet, 'area_between': area_between, 'partial_curve_mapping': pcm, 'curve_length_similarity': curve_length, 'fastDTW_score': fastDTW_score}
def pcm(LOOKBACK=MS_IN_A_DAY, **kwargs): """ Calculate Partial Curve Mapping between two trajectories """ log.info(f'Loading GPS data for 1st trajectory...') gps1 = gps(**kwargs) if gps1: arr1 = pd.DataFrame(gps1)[['latitude', 'longitude']].to_numpy() else: return None log.info(f'Loading GPS data for 2nd trajectory...') start2 = kwargs['start'] - LOOKBACK end2 = kwargs['end'] - LOOKBACK gps2 = gps(id=kwargs['id'], start=start2, end=end2) log.info(f'Calculating PCM...') if gps2: arr2 = pd.DataFrame(gps2)[['latitude', 'longitude']].to_numpy() pcm = similaritymeasures.pcm(arr1, arr2) else: arr2 = None #testing return {'timestamp': kwargs['start'], 'partial_curve_mapping': pcm}
def stats_between_series( xaxis_1: pandas.Series, values_1: pandas.Series, xaxis_2: pandas.Series, values_2: pandas.Series, print_: bool = False, ) -> dict: """Dynamic time warping and discret frechet distance for measuring similarity between two temporal sequences Args: xaxis_1 (pandas.Series): index axis of the dataframe 1 values_1 (pandas.Series): value axis of the dataframe 1 xaxis_2 (pandas.Series): index axis of the dataframe 2 values_2 (pandas.Series): value axis of the dataframe 2 Returns: dict: `{"dtw": float, "frechet_dist": float}` """ dataframe_1 = pandas.merge(xaxis_1, values_1, right_index=True, left_index=True) dataframe_2 = pandas.merge(xaxis_2, values_2, right_index=True, left_index=True) dataframe_1.rename(columns={ xaxis_1.name: "id", values_1.name: "values_1" }, inplace=True) dataframe_2.rename(columns={ xaxis_2.name: "id", values_2.name: "values_2" }, inplace=True) dataframe_1.set_index("id", inplace=True) dataframe_2.set_index("id", inplace=True) unified = pandas.concat([dataframe_1, dataframe_2], axis=1) unified["values_1"] = (pandas.to_numeric( unified["values_1"], errors="coerce", downcast="float").interpolate().fillna(method="bfill").fillna( method="ffill")) unified["values_2"] = (pandas.to_numeric( unified["values_2"], errors="coerce", downcast="float").interpolate().fillna(method="bfill").fillna( method="ffill")) xaxis_arranged = numpy.arange(len(unified)) dataframe_values_2 = numpy.array( [xaxis_arranged, unified["values_2"].values]) dataframe_values_1 = numpy.array( [xaxis_arranged, unified["values_1"].values]) dtw, d = similaritymeasures.dtw(dataframe_values_1, dataframe_values_2) frechet_dist = similaritymeasures.frechet_dist(dataframe_values_1, dataframe_values_2) pcm = similaritymeasures.pcm(dataframe_values_1, dataframe_values_2) area = similaritymeasures.area_between_two_curves(dataframe_values_1, dataframe_values_2) std = numpy.abs( numpy.nanstd(dataframe_values_2[1]) - numpy.nanstd(dataframe_values_1[1])) if print_: print( { "dtw": dtw, "frechet_dist": frechet_dist, "pcm": pcm, "area": area, "std": std, }, dataframe_values_2, ) return { "dtw": dtw, "frechet_dist": frechet_dist, "pcm": pcm, "area": area, "std": std, }
def get_best_blur( evt_file, numiter, region_file, blur_vals, radius_100p, binsize=0.2, spectrum="core_flux_chart.dat", out_root_common="core_psf", ): xcen, ycen, ra, dec = get_centroids(evt_file, region_file) print(xcen, ycen) ecf_fractions = [ 0.01, 0.025, 0.05, 0.075, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.85, 0.9, 0.95, 0.99, ] n_fractions = len(ecf_fractions) ecf_profile_values = np.zeros((blur_vals.shape[0], n_fractions), dtype=float) ecf_radii_values = np.zeros((blur_vals.shape[0], n_fractions), dtype=float) # generate the profile of the core obs_ecf_file = "obs_ecf.fits" ecf_calc.punlearn() ecf_calc( infile=evt_file, outfile="obs_ecf.fits", xpos=xcen, ypos=ycen, radius=radius_100p, binsize=binsize, clobber=True, fraction=ecf_fractions, ) for idx, blur in enumerate(blur_vals): print(f"Blur={blur}") outroot = out_root_common + "_blur_" + str(blur) if not Path(outroot + "_projrays.fits").is_file(): print(f"Simulating PSF") simulate_psf( infile=evt_file, outroot=outroot, spectrum=spectrum, numiter=numiter, ra=ra, dec=dec, binsize=binsize, blur=blur, ) print("Generating the ecf profile") out_ecf = generate_ecf_profile( outroot + "_projrays.fits", blur, xcen, ycen, binsize=binsize, ecf_fractions=ecf_fractions, radius=radius_100p, ) load_table(idx, out_ecf, colkeys=["r_mid", "fraction"]) ecf_profile = get_data(idx) ecf_profile_values[idx, :] = ecf_profile.y ecf_radii_values[idx, :] = ecf_profile.x plt.plot(ecf_profile.x, ecf_profile.y, label=f"blur={blur}") print("Done") load_table(idx + 1, obs_ecf_file, colkeys=["r_mid", "fraction"]) obs_ecf = get_data(idx + 1) plt.plot(obs_ecf.x, obs_ecf.y, label=f"Observation", ls="--", lw=2, color="black") plt.legend() plt.xlabel("Radius (ACIS pixels)") plt.ylabel("ECF") plt.show() dtw_vals = np.zeros(idx + 1) pcm_vals = np.zeros(idx + 1) clm_vals = pcm_vals.copy() abtc_vals = pcm_vals.copy() for i in range(idx + 1): n1 = np.zeros((n_fractions, 2)) n2 = n1.copy() n1[:, 0] = ecf_radii_values[i, :] n2[:, 0] = obs_ecf.x n1[:, 1] = ecf_profile_values[i, :] n2[:, 1] = obs_ecf.y dtw_vals[i], _ = similaritymeasures.dtw(n1, n2) pcm_vals[i] = similaritymeasures.pcm(n1, n2) clm_vals[i] = similaritymeasures.curve_length_measure(n1, n2) abtc_vals[i] = similaritymeasures.area_between_two_curves(n1, n2) print(f"Blur (Dynamic time warping) {blur_vals[np.argmin(dtw_vals)]}") print(f"Blur (Partial Curve Mapping): {blur_vals[np.argmin(pcm_vals)]}") print(f"Blur (Curve Length Measure): {blur_vals[np.argmin(clm_vals)]}") print(f"Blur (Area Curve Measure): {blur_vals[np.argmin(abtc_vals)]}")
def test_c3_c4_pcm(self): pcm = similaritymeasures.pcm(curve3, curve4) self.assertTrue(pcm, 50.0)
def test_c1_c2_pcm_swapped(self): pcm = similaritymeasures.pcm(curve2, curve1) self.assertTrue(pcm, np.nan)
def test_random_pcm(self): _ = similaritymeasures.pcm(curve_a_rand, curve_b_rand) self.assertTrue(True)
def test_P_Q_pcm(self): Q[:, 1] -= 1 a = similaritymeasures.pcm(P, Q) self.assertTrue(np.isclose(a, 0.0))
subzz = np.array([ zz, zz, zz, zz, zz, zz, zz, zz, zz, zz, ]) # quantify the difference between the two curves using PCM pcm = similaritymeasures.pcm(exp_data, num_data) # quantify the difference between the two curves using # Discrete Frechet distance df = similaritymeasures.frechet_dist(exp_data, num_data) # quantify the difference between the two curves using # area between two curves area = similaritymeasures.area_between_two_curves(exp_data, num_data) # quantify the difference between the two curves using # Curve Length based similarity measure cl = similaritymeasures.curve_length_measure(exp_data, num_data) # quantify the difference between the two curves using # Dynamic Time Warping distance
Q = Q[::-1] f_d, p_i, q_i = FrechetDist.frechetdist_index_2(P, Q) print("frechet distance: ", f_d, p_i, q_i) f_d, p_i, q_i = FrechetDist.frechetdist_index(P, Q) print("frechet distance 2: ", f_d, p_i, q_i) d = simm.frechet_dist(P, Q) print("frechet distance: ", d) distance = directed_hausdorff(P, Q) print("hausdorff distance P-Q: ", distance) distance = directed_hausdorff(Q, P) print("hausdorff distance Q-P: ", distance) px = [p[0] for p in P] py = [p[1] for p in P] qx = [p[0] for p in Q] qy = [p[1] for p in Q] plt.scatter(px+qx, py+qy) plt.plot(px, py, label="P", color="red") plt.plot(qx, qy, label="Q", color="blue") plt.plot([px[p_i], qx[q_i]], [py[p_i], qy[q_i]], label="frechet dist", linestyle=":", color="purple") plt.show() df = simm.area_between_two_curves(P, Q) print("area bt two curves: {}".format(df)) df = simm.dtw(P, Q) print("dtw: {}".format(df)) df = simm.curve_length_measure(P, Q) print("curve length: {}".format(df)) df = simm.pcm(P, Q) print("pcm: {}".format(df))