コード例 #1
0
def getSTSEmd(query_sts, historical_sts):
    """
	Returns the Earth Mover's distance for a single PTV-OAR pair's spatial transformation signature.

	Parameters
	----------

	query_sts : 2D NdArray
        Dimensions are [num_combinations, 4]. Contains
        percentage of points in each interval, normalized but not cumulative, for the query case.

    historical_sts : 2D NdArray
        Dimensions are [num_combinations, 4]. Contains
        percentage of points in each interval, normalized but not cumulative, for the historical case.

	Returns
	-------
	emd : float
		The scalar earth mover's distance (dissimilarity) between the two study pairs.
	"""
    weights_hist = np.ones((historical_sts.shape[0], 1))
    weights_query = np.ones((query_sts.shape[0], 1))

    query_hist = np.array(np.concatenate((weights_query, query_sts),
                                         axis=1)).astype(np.float32)
    historical_hist = np.array(
        np.concatenate((weights_hist, historical_sts),
                       axis=1)).astype(np.float32)

    emd = cv2.EMD(query_hist, historical_hist, distType=2)[0]
    return emd
コード例 #2
0
    def forward(self,p1,p2):

        up1=p1.unsqueeze(2)
        up2=p2.unsqueeze(1)
        b,m,_=p1.size()

        distance_map=torch.sum((up1-up2)**2,dim=3)

        index=p1.new_zeros((b,m,1)).long()



        for i in range(b):
            _, _, flow = cv2.EMD(p1[i].detach().cpu().numpy(), p2[i].detach().cpu().numpy(), cv2.DIST_USER, distance_map[i].detach().cpu().numpy())
            flow=np.argmax(flow,axis=1)
            print('opencvemd:',np.sort(flow))
            index[i,:,0]=index.new_tensor(flow)
        index=index.repeat((1,1,2))

        new_p2=torch.gather(p2,1,index)

        cost=torch.sum((p1-new_p2)**2,dim=2)
        cost=torch.sum(cost,dim=1)
        
        return cost
コード例 #3
0
def get_em_distance(image_path_1, image_path_2):
    hist1 = get_image_hist(image_path_1)
    sig1 = get_signature_from_hist(hist1).astype(np.float32)
    hist2 = get_image_hist(image_path_2)
    sig2 = get_signature_from_hist(hist2).astype(np.float32)
    distance = cv2.EMD(sig1, sig2, cv2.DIST_L2)[0]
    return distance
コード例 #4
0
def train_online(env, agent, writer, num_episodes, eval_cycle, num_eval_episodes, soft_update, skip_frames,
                 history_length, rendering, max_timesteps, normalize_images):
    print("... train agent")

    global visitation_map
    visitation_map = np.zeros((env.height, env.width))
    uniform_prob = np.array(visitation_map)
    uniform_prob[:, :] = 1 / ((visitation_map.shape[0] - 2) * (visitation_map.shape[1] - 2))
    uniform_prob[:, 0] = 0
    uniform_prob[:, -1] = 0
    uniform_prob[0, :] = 0
    uniform_prob[-1, :] = 0

    for i in range(num_episodes):
        print("episode %d" % i)
        max_timesteps_current = max_timesteps
        stats = run_episode(env, agent, max_timesteps=max_timesteps_current, deterministic=False, do_training=True,
                            rendering=rendering, soft_update=soft_update, skip_frames=skip_frames,
                            history_length=history_length, normalize_images=normalize_images)

        writer.add_scalar('train_episode_reward', stats.episode_reward, global_step=i)
        writer.add_scalar('train_straight', stats.get_action_usage(STRAIGHT), global_step=i)
        writer.add_scalar('train_left', stats.get_action_usage(LEFT), global_step=i)
        writer.add_scalar('train_right', stats.get_action_usage(RIGHT), global_step=i)
        writer.add_scalar('train_accel', stats.get_action_usage(ACCELERATE), global_step=i)
        writer.add_scalar('train_brake', stats.get_action_usage(BRAKE), global_step=i)

        # EVALUATION
        # check its performance with greedy actions only. You can also use tensorboard to plot the mean episode reward.
        if i % eval_cycle == 0:
            total_visits = sum(sum(visitation_map))
            obs_prob = visitation_map / total_visits
            dist, _, _ = cv2.EMD(arr_to_sig(obs_prob), arr_to_sig(uniform_prob), cv2.DIST_L2)
            coverage = np.count_nonzero(visitation_map) / (
                        (visitation_map.shape[0] - 2) * (visitation_map.shape[1] - 2))
            writer.add_scalar('exploration_coverage', coverage, global_step=i)
            writer.add_scalar('exploration_EM_distance', dist, global_step=i)

            # plt.figure(figsize=(20, 20))
            # sns.heatmap(np.transpose(visitation_map), annot=True, linewidths=.5, square=True)
            # plt.title('EM Distance ' + str(dist))
            # plt.savefig(os.path.join(writer.logdir, 'visit_map_' + str(i) + '.png'), bbox_inches="tight")
            # plt.clf()
            print('exploration coverage: {}     dist: {}'.format(coverage, dist))
            stats = []
            for j in range(num_eval_episodes):
                stats.append(run_episode(env, agent, deterministic=True, do_training=False, max_timesteps=5,
                                         history_length=history_length, skip_frames=skip_frames,
                                         normalize_images=normalize_images))
            stats_agg = [stat.episode_reward for stat in stats]
            episode_reward_mean, episode_reward_std = np.mean(stats_agg), np.std(stats_agg)
            print('Validation {} +- {}'.format(episode_reward_mean, episode_reward_std))
            print('Replay buffer length', len(agent.replay_buffer._data))
            writer.add_scalar('val_episode_reward_mean', episode_reward_mean, global_step=i)
            writer.add_scalar('val_episode_reward_std', episode_reward_std, global_step=i)
        # store model.
        if i % eval_cycle == 0 or i >= (num_episodes - 1):
            model_dir = agent.save(os.path.join(writer.logdir, "agent.pt"))
            print("Model saved in file: %s" % model_dir)
コード例 #5
0
def emd(img1, img2):
    # convert to (?,h*w)
    # get only subsample if image is too big:
    h, w = img1.shape[-2:]
    max_x_off, max_y_off = max(0, w - 60), max(0, h - 60)
    sigs = []
    s = [60, 60]
    _s = np.shape(img1)[:-2]
    for _s_ in _s:
        s.append(_s_)
    #print(list(s))
    img1 = np.swapaxes(np.swapaxes(img1, 0, -2), 1, -1)
    img2 = np.swapaxes(np.swapaxes(img2, 0, -2), 1, -1)

    new_img1, new_img2 = np.zeros(s), np.zeros(s)
    for i in range(len(new_img1[0, 0, :])):
        x_start, y_start = np.random.randint(0, max_x_off), np.random.randint(
            0, max_y_off)
        x_end, y_end = min(x_start + 60, w), min(y_start + 60, h)
        #np.swapaxes(img1, 0,-1)
        new_img1[:, :, i] = img1[y_start:y_end, x_start:x_end, i]
        new_img2[:, :, i] = img2[y_start:y_end, x_start:x_end, i]

    new_img1 = np.swapaxes(new_img1, 0, -1)
    new_img2 = np.swapaxes(new_img2, 0, -1)
    print('Shapes:', new_img1.shape, new_img2.shape)

    for img in [new_img1, new_img2]:

        h, w = img.shape[-2:]
        #print(h, w)

        ds = len(np.shape(img))
        if ds > 2:
            keep_shape = [
                img.shape[i] for i in np.flip(range(ds, ds - 2, -1)) - 1
            ]
            img = img.reshape(-1, *keep_shape).reshape(-1, h * w)

        sig = np.empty((len(img), h * w, 3))
        #print(sig.shape)
        inds = np.array(
            list(itertools.product(np.arange(0, h, 1), np.arange(0, w, 1)))).T

        for n in range(len(img)):
            #print('HO',img[n].shape)
            sig[n] = np.array([img[n], inds[0], inds[1]]).T
        sigs.append(
            cv2.normalize(sig, None, 1, 0, cv2.NORM_MINMAX, cv2.CV_32FC1))

    dists = []
    for i in tqdm(range(len(sigs[0]))):
        dist, _, _ = cv2.EMD(sigs[0][i], sigs[1][i], 2)
        dists.append(dist)
        #print(dist)
    print(np.mean(dists))
    return np.mean(dists)
コード例 #6
0
    def _solve_emd(cost_matrix, weights_a, weights_b):
        data_type = cost_matrix.dtype
        device = cost_matrix.device

        cost_matrix = cost_matrix.detach().cpu().numpy()
        weights_a = weights_a.detach().cpu().numpy()
        weights_b = weights_b.detach().cpu().numpy()

        _, _, flow = cv2.EMD(weights_a, weights_b, cv2.DIST_USER, cost_matrix)

        return torch.from_numpy(flow).to(device).type(data_type)
def emd_calc(arr1, arr2):
    arr1 = np.array(arr1)
    arr2 = np.array(arr2)
    sig1 = img_to_sig(arr1)
    sig2 = img_to_sig(arr2)
    # print type(sig1)
    dist, _, flow = cv2.EMD(sig1, sig2, cv2.DIST_L2)

    print(dist)
    print(_)
    print(flow)
コード例 #8
0
def organ_emd(db, p1, p2, centroids):
    #this one calculates a patient-wise emd based on a list of organ centroids
    #assumes centroids is an array npatientsxnorgansxndims
    def weighted_point(p):
        centroid = centroids[p, :, :]
        volumes = db.volumes[p, :].reshape(-1, 1)
        return np.hstack([np.sqrt(volumes), centroid]).astype('float32')

    point1 = weighted_point(p1)
    point2 = weighted_point(p2)
    return cv2.EMD(point1, point2, cv2.DIST_C)[0]
コード例 #9
0
 def get_wasserstein_distance(self, p, q):
     p_sig = np.empty((self.num_sectors, 3), dtype=np.float32)
     q_sig = np.empty((self.num_sectors, 3), dtype=np.float32)
     for i in range(self.num_sectors):
         cord_x = int(self.sector_bbs['section_{}'.format(i)]['cog'][0] -
                      self.map_x_min)
         cord_y = int(self.sector_bbs['section_{}'.format(i)]['cog'][1] -
                      self.map_y_min)
         p_sig[i] = np.array([p[i], cord_x, cord_y])
         q_sig[i] = np.array([q[i], cord_x, cord_y])
     dist, _, _ = cv2.EMD(p_sig, q_sig, cv2.DIST_L2)
     return dist
コード例 #10
0
def get_hs_histogram_emd(hist1: np.ndarray, hist2: np.ndarray):
    assert hist1.shape == hist2.shape
    # Convert 2d histogram to needed form.
    signature1 = np.zeros((hist1.size, 3), dtype=np.float32)
    signature2 = np.zeros((hist1.size, 3), dtype=np.float32)
    for i in range(hist1.shape[0]):
        for j in range(hist1.shape[1]):
            signature1[i * hist1.shape[1] + j, :] = np.array(
                [hist1[i, j], i, j])
            signature2[i * hist1.shape[1] + j, :] = np.array(
                [hist2[i, j], i, j])

    distance = cv2.EMD(signature1, signature2, cv2.DIST_L2)[0]
    return distance
コード例 #11
0
ファイル: emd_utils.py プロジェクト: zhushaoquan/DeepEMD
def emd_inference_opencv(cost_matrix, weight1, weight2):
    # cost matrix is a tensor of shape [N,N]
    cost_matrix = cost_matrix.detach().cpu().numpy()

    weight1 = F.relu(weight1) + 1e-5
    weight2 = F.relu(weight2) + 1e-5

    weight1 = (weight1 * (weight1.shape[0] / weight1.sum().item())).view(
        -1, 1).detach().cpu().numpy()
    weight2 = (weight2 * (weight2.shape[0] / weight2.sum().item())).view(
        -1, 1).detach().cpu().numpy()

    cost, _, flow = cv2.EMD(weight1, weight2, cv2.DIST_USER, cost_matrix)
    return cost, flow
コード例 #12
0
def classwise_emd_dist(vec, mv):
    g = int(np.sqrt(len(vec)))
    h1 = vec.reshape((g, g)).astype(np.float32)
    h2 = mv.reshape((g, g)).astype(np.float32)

    s1 = get_signature_from_heatmap(h1)
    s2 = get_signature_from_heatmap(h2)
    # s2[0] *= 1.0/27.0
    if testing_case == 2:
        s1[0] *= 0.5

    dis = cv2.EMD(s1, s2, cv2.DIST_L1)
    # dis = cv2.cv.CalcEMD2(cv2.cv.fromarray(s1), cv2.cv.fromarray(s2), cv2.cv.CV_DIST_L2)
    return dis[0]
コード例 #13
0
def getOVHEmd(query_bin_vals, query_bin_amts, historical_bin_vals,
              historical_bin_amts):
    """
	Returns the Earth Mover's distance for a single PTV-OAR pair. 

	Parameters
	----------
	query_bin_vals : 1D NdArray
		A vector of length `n-bins + 1`. Contains the bin intervals starting at
		minimum distance, ending at maximum distance for the query patient.

	query_bin_amts : 1D NdArray
		Contains the percentage of pixels at a given distance range (`i to i + 1`)
		or less for the query patient.

	historical_bin_vals : 1D NdArray
		A vector of length `n-bins + 1`. Contains the bin intervals starting at
		minimum distance, ending at maximum distance for the historical patient.

	historical_bin_amts : 1D NdArray
		Contains the percentage of pixels at a given distance range (`i to i + 1`)
		or less for the historical patient.

	Returns
	-------
	emd : float
		The scalar earth mover's distance (dissimilarity) between the two study pairs.

	"""

    query_bin_vals = np.expand_dims(query_bin_vals[1:], axis=1)
    query_bin_amts = np.expand_dims(query_bin_amts, axis=1)
    weights = np.ones((query_bin_vals.shape[0], 1))

    query_hist = np.array(
        np.concatenate((weights, query_bin_vals, query_bin_amts), axis=1))

    historical_bin_vals = np.expand_dims(historical_bin_vals[1:], axis=1)
    historical_bin_amts = np.expand_dims(historical_bin_amts, axis=1)
    weights_historical = np.ones((historical_bin_vals.shape[0], 1))

    historical_hist = np.array(
        np.concatenate((weights, historical_bin_vals, historical_bin_amts),
                       axis=1))

    query_hist = query_hist.astype(np.float32)
    historical_hist = historical_hist.astype(np.float32)

    emd = cv2.EMD(query_hist, historical_hist, distType=2)[0]
    return emd
コード例 #14
0
def tumor_emd(db, p1, p2, centroids):
    #calculates an emd between tumors
    #should be passed as a lambda function to getsim with centroids as a
    #normalized list of tumor centroid value
    def weighted_point(p):
        #currently set to give the largest tumor a weight of 3 and everything else 1
        #works better than just volume
        centroid = centroids[p]
        volume = np.array([g.volume for g in db.gtvs[p]]).reshape(-1, 1)
        max_volume = volume.max()
        for v in range(len(volume)):
            if volume[v] == max_volume:
                volume[v] = 3
            else:
                volume[v] = np.sign(volume[v])
        return np.hstack([np.sqrt(volume), centroid]).astype('float32')

    point1 = weighted_point(p1)
    point2 = weighted_point(p2)
    return cv2.EMD(point1, point2, cv2.DIST_C)[0]
コード例 #15
0
def get_hs_histogram_emd_n_m(hist1: np.ndarray, hist2: np.array):
    assert hist1.shape == hist2.shape
    size = hist1.size
    shape = hist1.shape
    signature1 = np.zeros((size, 5), dtype=np.float32)
    signature2 = np.zeros((size, 5), dtype=np.float32)
    size1 = shape[1] * shape[2] * shape[3]
    size2 = shape[2] * shape[3]
    size3 = shape[3]
    for i in range(shape[0]):
        for j in range(shape[1]):
            for k in range(shape[2]):
                for t in range(shape[3]):
                    signature1[i * size1 + j * size2 + k * size3 +
                               t, :] = np.array(
                                   [hist1[i, j, k, t], i, j, k, t])
                    signature2[i * size1 + j * size2 + k * size3 +
                               t, :] = np.array(
                                   [hist2[i, j, k, t], i, j, k, t])

    distance = cv2.EMD(signature1, signature2, cv2.DIST_L2)[0]
    return distance
コード例 #16
0
#print chi, chi_alt, correl, hellinger, intersect, kl_div
#print hist1.__class__
#print hist1.shape
hist1_indexed = np.zeros((len(hist1), 2), np.float32)
hist1_indexed[:, 0] = hist1[:, 0]
hist1_indexed[:, 1] = range(0, len(hist1))

hist2_indexed = np.zeros((len(hist2), 2), np.float32)
hist2_indexed[:, 0] = hist2[:, 0]
hist2_indexed[:, 1] = range(0, len(hist2))

#hist1.resize((len(hist1), 2))
#hist1 = np.resize(hist1, (len(hist1), 2))
#print hist1
#newHist1 = np.zeros((len(hist1), 2))
#newHist1[:,0] = hist1[0]
#print newHist1

emd = cv2.EMD(hist1_indexed, hist2_indexed, cv2.DIST_L1)
print emd

plt.figure(0)
plt.title("no shadow")
plt.plot(hist1)
plt.figure(1)
plt.title("shadow")
plt.plot(hist2)
#plt.show()

#cv2.imshow("asd", img1)
cv2.waitKey(0)
コード例 #17
0
import cv2
import numpy as np
import matplotlib.pyplot as plt


# image to signature for color image
def img_to_sig(img):
    sig = np.empty((img.size, 4), dtype=np.float32)
    idx = 0
    for i in range(img.shape[0]):
        for j in range(img.shape[1]):
            for k in range(img.shape[2]):
                sig[idx] = np.array([img[i, j, k], i, j, k])
                idx += 1
    return sig


# Load the images
img1 = cv2.imread('app1.png')
img2 = cv2.imread('app2.png')

sig1 = img_to_sig(img1)
sig2 = img_to_sig(img2)
distance, lowerbound, flow_matrix = cv2.EMD(sig1,
                                            sig2,
                                            cv2.DIST_L1,
                                            lowerBound=0)
コード例 #18
0
ファイル: EMD.py プロジェクト: MoriZSJ/pbMoMa-1
    # cv2.EMD requires single-precision, floating-point input
    sig = np.empty((arr.size, 3), dtype=np.float32)
    count = 0
    for i in range(arr.shape[0]):
        for j in range(arr.shape[1]):
            sig[count] = np.array([arr[i, j] / arr.sum(), i, j])
            count += 1
    return sig


img1 = 'mag_Videos/tri/magtri2.jpg'
img2 = 'Truth_Patches/13.jpg'

im1 = cv2.imread(img1, 0)
im2 = cv2.imread(img2, 0)
# print(im1.shape)

h1 = cv2.calcHist([im1], [0], None, [256], [0, 256])
h2 = cv2.calcHist([im2], [0], None, [256], [0, 256])
# plt.plot(h2)
# plt.show()
# print(h1.shape)
# print(h1.sum())
sig1 = img_to_sig(h1)
sig2 = img_to_sig(h2)
# print(sig1)

dis, _, _ = cv2.EMD(sig1, sig2, cv2.DIST_L2)

print(dis)
コード例 #19
0
ファイル: EMD_FS.PY プロジェクト: Gelivabilities/Projects
for i in range(cols):
    #相关系数
    corr[i]=abs(np.corrcoef(data[:,i],labels)[0,1])
    #EM距离
    parts=10#分段数目
    l_kde=np.zeros([1,parts])
    m_kde=np.zeros([1,parts])
    for j in range(parts):
        l_kde[0,j]=sum((data_l[:,i]>=j/parts)==(data_l[:,i]<(j+1)/parts))/l_rows
        m_kde[0,j]=sum((data_m[:,i]>=j/parts)==(data_m[:,i]<(j+1)/parts))/m_rows
        if j==parts-1:
            l_kde[0,j]+=sum(data_l[:,i]==1)/l_rows
            m_kde[0,j]+=sum(data_m[:,i]==1)/m_rows
    E1=np.asarray([np.hstack([1,l_kde[0]]).tolist()],np.float32)
    E2=np.asarray([np.hstack([1,m_kde[0]]).tolist()],np.float32)
    EM=cv.EMD(E1,E2,DIST_L1)[0]
    em[i]=EM
    #可视化
    s_l=pd.Series(data_l[:,i])
    s_m=pd.Series(data_m[:,i])
    plt.figure(figsize=(6,3))
    plt.title('feature: '+str(i+1)+', EMD='+str(EM))
    #if np.size(np.unique(data_l[:,i]))>1 and np.size(np.unique(data_m[:,i]))>1:
    #    pd.Series(data_l[:,i],name='legitimate').plot(kind='kde',color='b',legend=True)
    #    pd.Series(data_m[:,i],name='malignant').plot(kind='kde',color='r',legend=True)
    #else:
    df=pd.DataFrame(np.vstack([l_kde,m_kde]).T,columns=['legitimate','malignant'])
    df['legitimate'].plot(kind='bar',color='b',alpha=0.5,legend=True)
    df['malignant'].plot(kind='bar',color='r',alpha=0.5,grid=True,legend=True)
    
df_score=pd.DataFrame(np.vstack([corr,em]).T,columns=['Correlation','EMDistance'])
コード例 #20
0
    def _fit(self, coefficients, X, y, use_l1_penalty=True, calculate_optimal_distance=False):
        """Calculate the error between observed and estimated values for the given
        parameters and data.

        Parameters
        ----------
        coefficients : ndarray
            coefficients for each of the model's predictors

        X : pandas.DataFrame
            standardized tip attributes by timepoint

        y : pandas.DataFrame
            final weighted distances at delta time in the future from each
            timepoint in the given tip attributes table

        Returns
        -------
        float :
            error between estimated values using the given coefficients and
            input data and the observed values
        """
        # Estimate target values.
        y_hat = self.predict(X, coefficients)

        # Calculate EMD for each timepoint in the estimated values and sum that
        # distance across all timepoints.
        error = 0.0
        count = 0
        for timepoint, timepoint_df in y_hat.groupby("timepoint"):
            samples_a = timepoint_df["strain"]
            sample_a_initial_frequencies = timepoint_df["frequency"].values.astype(np.float32)
            sample_a_frequencies = timepoint_df["projected_frequency"].values.astype(np.float32)

            future_timepoint_df = y[y["timepoint"] == timepoint]
            assert future_timepoint_df.shape[0] > 0

            samples_b = future_timepoint_df["strain"]
            sample_b_frequencies = future_timepoint_df["frequency"].values.astype(np.float32)

            distance_matrix = get_distance_matrix_by_sample_names(
                samples_a,
                samples_b,
                self.distances
            ).astype(np.float32)

            # Calculate the optimal distance to the future timepoint by mapping
            # the frequency of each future strain to the closest strain in the
            # current timepoint.
            if calculate_optimal_distance:
                # For each strain in the future timepoint, identify the closest
                # strain in the current timepoint. This is an array of current
                # strain indices (one index per future strain).
                closest_strain_to_future = np.argmin(distance_matrix, axis=0)

                # Sum the frequencies of the future strains across each closest
                # strain in the current timepoint. This can and will often
                # result in a few current strains accuring most of the future
                # frequencies.
                estimated_frequencies = np.zeros_like(sample_a_frequencies)
                for i in range(sample_b_frequencies.shape[0]):
                    estimated_frequencies[closest_strain_to_future[i]] += sample_b_frequencies[i]

                # Calculate earth mover's distance to the future based on this
                # optimal (or, at least, greedy) mapping of strains between
                # timepoints. The resulting EMD value should be the best any
                # model can hope to perform and establishes a lower bound for
                # all models.
                self.optimal_model_emd, _, optimal_model_flow = cv2.EMD(
                    estimated_frequencies,
                    sample_b_frequencies,
                    cv2.DIST_USER,
                    cost=distance_matrix
                )

            # Estimate the distance between the model's estimated future and the
            # observed future populations.
            model_emd, _, self.model_flow = cv2.EMD(
                sample_a_frequencies,
                sample_b_frequencies,
                cv2.DIST_USER,
                cost=distance_matrix
            )

            error += model_emd
            count += 1

        error = error / float(count)

        if use_l1_penalty:
            l1_penalty = self.l1_lambda * np.abs(coefficients).sum()
        else:
            l1_penalty = 0.0

        return error + l1_penalty
コード例 #21
0
ファイル: fit_model.py プロジェクト: tweetz0r/seasonal-flu
    def _fit(self, coefficients, X, y, use_l1_penalty=True):
        """Calculate the error between observed and estimated values for the given
        parameters and data.

        Parameters
        ----------
        coefficients : ndarray
            coefficients for each of the model's predictors

        X : pandas.DataFrame
            standardized tip attributes by timepoint

        y : pandas.DataFrame
            final weighted distances at delta time in the future from each
            timepoint in the given tip attributes table

        Returns
        -------
        float :
            error between estimated values using the given coefficients and
            input data and the observed values
        """
        import cv2

        # Estimate target values.
        y_hat = self.predict(X, coefficients)

        # Calculate EMD for each timepoint in the estimated values and sum that
        # distance across all timepoints.
        error = 0.0
        count = 0
        for timepoint, timepoint_df in y_hat.groupby("timepoint"):
            samples_a = timepoint_df["strain"]
            sample_a_initial_frequencies = timepoint_df[
                "frequency"].values.astype(np.float32)
            sample_a_frequencies = timepoint_df[
                "projected_frequency"].values.astype(np.float32)

            future_timepoint_df = y[y["timepoint"] == timepoint]
            assert future_timepoint_df.shape[0] > 0

            samples_b = future_timepoint_df["strain"]
            sample_b_frequencies = future_timepoint_df[
                "frequency"].values.astype(np.float32)

            distance_matrix = get_distance_matrix_by_sample_names(
                samples_a, samples_b, self.distances).astype(np.float32)

            # Estimate the distance between the model's estimated future and the
            # observed future populations.
            model_emd, _, self.model_flow = cv2.EMD(sample_a_frequencies,
                                                    sample_b_frequencies,
                                                    cv2.DIST_USER,
                                                    cost=distance_matrix)

            error += model_emd
            count += 1

        error = error / float(count)

        if use_l1_penalty:
            l1_penalty = self.l1_lambda * np.abs(coefficients).sum()
        else:
            l1_penalty = 0.0

        return error + l1_penalty
コード例 #22
0
def train_online(env, agent, writer, num_episodes, eval_cycle,
                 num_eval_episodes, soft_update, skip_frames, history_length,
                 rendering, max_timesteps, normalize_images, state_dim,
                 init_prio, num_model_files, simple_coverage_threshold,
                 geometric_coverage_gamma, num_total_steps, store_cycle):
    print("... train agent")

    if type(env) == DoomEnv:
        sector_bbs = create_sector_bounding_box(env.state.sectors)

        map_x_min = int(
            min([sector['x_min'] for _, sector in sector_bbs.items()]))
        map_x_max = int(
            max([sector['x_max'] for _, sector in sector_bbs.items()]))

        map_y_min = int(
            min([sector['y_min'] for _, sector in sector_bbs.items()]))
        map_y_max = int(
            max([sector['y_max'] for _, sector in sector_bbs.items()]))

        map_total_area = sum(
            [sector['area'] for _, sector in sector_bbs.items()])

        uniform_sector_prob = {}
        uniform_dist_sig = np.empty((len(sector_bbs), 3), dtype=np.float32)
        for index, (k, sector) in enumerate(sorted(sector_bbs.items())):
            cord_x = int(sector['cog'][0] - map_x_min)
            cord_y = int(sector['cog'][1] - map_y_min)
            uniform_sector_prob[k] = ((cord_x, cord_y),
                                      sector['area'] / map_total_area)
            uniform_dist_sig[index] = np.array(
                [sector['area'] / map_total_area, cord_x, cord_y])
        # uniform_dist_sig = arr_to_sig(uniform_dist)
        cumulative_obs_sector_visits = {}
        cumulative_obs_sector_total_visits = 0
        # Initialize the coverage metric
        coverage_metrics = Coverage(num_sectors=len(uniform_sector_prob))

    # practically infinite episodes if num_episodes is not >= 1. also add 1 to num_episodes because agent is not trained
    # for last episode. this way, agent is evaluated num_episodes + 1 times but trained num_episodes times
    # (only matters if num_episodes is the determining criterion for number of runs)
    num_episodes = num_episodes + 1 if num_episodes >= 1 else sys.maxsize

    # quite similar to num_total_steps
    num_total_steps = num_total_steps if num_total_steps >= 1 else sys.maxsize

    total_steps = 0
    for episode_idx in range(num_episodes):
        is_last_episode = episode_idx >= (num_episodes -
                                          1) or total_steps >= num_total_steps

        # EVALUATION
        # check its performance with greedy actions only
        if eval_cycle >= 1 and num_eval_episodes >= 1 and (
                episode_idx % eval_cycle == 0 or is_last_episode):
            stats = []
            for j in range(num_eval_episodes):
                stats.append(
                    run_episode(env,
                                agent,
                                deterministic=True,
                                do_training=False,
                                max_timesteps=max_timesteps,
                                history_length=history_length,
                                skip_frames=skip_frames,
                                normalize_images=normalize_images,
                                state_dim=state_dim,
                                init_prio=init_prio,
                                rendering=rendering,
                                soft_update=False)[0])
            episode_rewards = [stat.episode_reward for stat in stats]
            episode_reward_mean, episode_reward_std = np.mean(
                episode_rewards), np.std(episode_rewards)
            print('Validation {} +- {}'.format(episode_reward_mean,
                                               episode_reward_std))
            print('Replay buffer length', agent.replay_buffer.size)
            writer.add_scalar('val_episode_reward_mean',
                              episode_reward_mean,
                              global_step=episode_idx)
            writer.add_scalar('val_episode_reward_std',
                              episode_reward_std,
                              global_step=episode_idx)

        # store model.
        if store_cycle >= 1 and (episode_idx % store_cycle == 0
                                 or is_last_episode):
            if num_model_files >= 1:
                model_files = glob.glob(os.path.join(writer.logdir, "agent*"))
                # Sort by date
                model_files.sort(key=os.path.getmtime)
                if len(model_files) > num_model_files - 1:
                    # Delete the oldest model file
                    os.remove(model_files[0])
            agent.save(
                os.path.join(writer.logdir, "agent_{}.pt".format(episode_idx)))
            print('model saved!')

        if is_last_episode:
            break

        # training episode
        print("episode %d" % episode_idx)
        max_timesteps_current = max_timesteps
        stats, losses, info, trajectory, action_values, \
        sectors, visited_sectors, sector_bbs = run_episode(env, agent, max_timesteps=max_timesteps_current,
                                                           deterministic=False,
                                                           do_training=True,
                                                           rendering=rendering,
                                                           soft_update=soft_update,
                                                           skip_frames=skip_frames,
                                                           history_length=history_length,
                                                           normalize_images=normalize_images,
                                                           state_dim=state_dim,
                                                           init_prio=init_prio)

        if len(trajectory) > 0:
            if type(env) == DoomEnv:
                objects = env.state.objects
                writer.add_figure('trajectory',
                                  figure=plot_trajectory(
                                      trajectory,
                                      sectors,
                                      sector_bbs,
                                      objects,
                                      intrinsic=agent.intrinsic),
                                  global_step=episode_idx)
                writer.add_scalar('num_visited_sectors',
                                  len(visited_sectors),
                                  global_step=episode_idx)

                simple_coverage, geometric_coverage, occupancy_density_entropy = \
                    coverage_metrics.compute_coverage(visited_sectors=visited_sectors,
                                                      K=simple_coverage_threshold * skip_frames,
                                                      gamma=geometric_coverage_gamma)
                writer.add_scalar('simple_coverage',
                                  simple_coverage,
                                  global_step=episode_idx)
                writer.add_scalar('geometric_coverage',
                                  geometric_coverage,
                                  global_step=episode_idx)
                writer.add_scalar('occupancy_density_entropy',
                                  occupancy_density_entropy,
                                  global_step=episode_idx)

                writer.add_histogram('visited_sector_ids', [
                    i for i in range(len(env.state.sectors))
                    if 'section_{}'.format(i) in visited_sectors.keys()
                ],
                                     global_step=episode_idx)

                total_visits = sum(
                    [count for _, count in visited_sectors.items()])
                obs_sector_prob = {}
                obs_dist_sig = np.empty((len(sector_bbs), 3), dtype=np.float32)
                cumulative_obs_dist_sig = np.empty((len(sector_bbs), 3),
                                                   dtype=np.float32)
                for index, (k,
                            sector) in enumerate(sorted(sector_bbs.items())):
                    cord_x = int(sector['cog'][0] - map_x_min)
                    cord_y = int(sector['cog'][1] - map_y_min)
                    obs_sector_prob[k] = ((cord_x, cord_y),
                                          visited_sectors.get(k, 0) /
                                          total_visits)
                    obs_dist_sig[index] = np.array([
                        visited_sectors.get(k, 0) / total_visits, cord_x,
                        cord_y
                    ])
                    if cumulative_obs_sector_total_visits > 0:
                        cumulative_obs_dist_sig[index] = np.array([
                            (cumulative_obs_sector_visits.get(k, (0, 0))[1] /
                             cumulative_obs_sector_total_visits), cord_x,
                            cord_y
                        ])

                if cumulative_obs_sector_total_visits > 0:
                    dist, _, _ = cv2.EMD(obs_dist_sig, cumulative_obs_dist_sig,
                                         cv2.DIST_L2)
                    writer.add_scalar(
                        'wasserstein_distance (current trajectory vs past trajectories)',
                        dist,
                        global_step=episode_idx)

                for index, (k,
                            sector) in enumerate(sorted(sector_bbs.items())):
                    cord_x = int(sector['cog'][0] - map_x_min)
                    cord_y = int(sector['cog'][1] - map_y_min)
                    cumulative_obs_sector_visits[k] = (
                        (cord_x, cord_y),
                        cumulative_obs_sector_visits.get(k, (0, 0))[1] +
                        visited_sectors.get(k, 0))
                cumulative_obs_sector_total_visits += total_visits

                current_cumulative_obs_dist_sig = np.empty(
                    (len(sector_bbs), 3), dtype=np.float32)
                for index, (k,
                            sector) in enumerate(sorted(sector_bbs.items())):
                    cord_x = int(sector['cog'][0] - map_x_min)
                    cord_y = int(sector['cog'][1] - map_y_min)
                    current_cumulative_obs_dist_sig[index] = np.array([
                        cumulative_obs_sector_visits.get(k, (0, 0))[1] /
                        cumulative_obs_sector_total_visits, cord_x, cord_y
                    ])

                dist, _, _ = cv2.EMD(obs_dist_sig, uniform_dist_sig,
                                     cv2.DIST_L2)
                writer.add_scalar(
                    'wasserstein_distance (current trajectory vs uniform disttribution)',
                    dist,
                    global_step=episode_idx)

                dist, _, _ = cv2.EMD(current_cumulative_obs_dist_sig,
                                     uniform_dist_sig, cv2.DIST_L2)
                writer.add_scalar(
                    'wasserstein_distance (cumulative trajectory vs uniform disttribution)',
                    dist,
                    global_step=episode_idx)
            else:
                tmp_figure = plot_trajectory(trajectory,
                                             sectors,
                                             sector_bbs,
                                             None,
                                             intrinsic=agent.intrinsic)
                writer.add_figure('trajectory',
                                  figure=tmp_figure,
                                  global_step=episode_idx)

            # Append current trajectory to save of all trajectories
            with open(os.path.join(writer.logdir, "trajectories.obj"),
                      'ab+') as fp:
                pickle.dump(trajectory, fp)

        for key, value in info.items():
            if type(value) is not str:
                writer.add_scalar('info_{}'.format(key),
                                  value,
                                  global_step=episode_idx)

        writer.add_scalar('train_loss',
                          np.mean([losses[i][0] for i in range(len(losses))]),
                          global_step=episode_idx)
        writer.add_scalar('train_td_loss',
                          np.mean([losses[i][1] for i in range(len(losses))]),
                          global_step=episode_idx)
        writer.add_scalar('train_l_i',
                          np.mean([losses[i][2] for i in range(len(losses))]),
                          global_step=episode_idx)
        writer.add_scalar('train_l_f',
                          np.mean([losses[i][3] for i in range(len(losses))]),
                          global_step=episode_idx)
        writer.add_scalar('train_episode_reward',
                          stats.episode_reward,
                          global_step=episode_idx)
        writer.add_scalar('train_episode_length',
                          stats.steps,
                          global_step=episode_idx)
        writer.add_scalar('intrinsic_episode_reward',
                          stats.intrinsic_reward,
                          global_step=episode_idx)
        for action in range(env.action_space.n):
            writer.add_scalar('action_freq_{}'.format(action),
                              stats.get_action_usage(action),
                              global_step=episode_idx)
            mean_action_value = np.mean(
                [action_values[i][action] for i in range(len(action_values))])
            writer.add_scalar('action_val_{}'.format(action),
                              mean_action_value,
                              global_step=episode_idx)

        total_steps += stats.steps
コード例 #23
0
## cv2.normalize(H1, H1, norm_type=cv2.NORM_L1)

H2 = cv2.calcHist(images=[pts2], channels=[0], mask=None,
                  histSize=[256], ranges=[0, 256])
## cv2.normalize(H2, H2, norm_type=cv2.NORM_L1)

# 3
S1 = np.zeros((H1.shape[0], 2), dtype=np.float32)
S2 = np.zeros((H1.shape[0], 2), dtype=np.float32)
##S1[:,0] = H1[:,0]
##S2[:,0] = H2[:,0]
for i in range(S1.shape[0]):
    S1[i, 0] = H1[i, 0]
    S2[i, 0] = H2[i, 0]
    S1[i, 1] = i
    S2[i, 1] = i

emd1, lowerBound, flow = cv2.EMD(S1, S2, cv2.DIST_L1)
print('EMD(S1, S2, DIST_L1) = ', emd1)

emd2, lowerBound, flow = cv2.EMD(S1, S2, cv2.DIST_L2)
print('EMD(S1, S2, DIST_L2) = ', emd2)

emd3, lowerBound, flow = cv2.EMD(S1, S2, cv2.DIST_C)
print('EMD(S1, S2, DIST_C) = ', emd3)

plt.plot(H1, color='r', label='H1')
plt.plot(H2, color='b', label='H2')
plt.legend(loc = 'best')
plt.show()
コード例 #24
0
ファイル: EMD.PY プロジェクト: Gelivabilities/Projects
p=np.random.randn(2,100000)
p1=p[0]-1.5
p2=p[1]+0.8
p_min=np.min([np.min(p1),np.min(p2)])
p_max=np.max([np.max(p1),np.max(p2)])
p1=(p1-p_min)/(p_max-p_min)
p2=(p2-p_min)/(p_max-p_min)
l=np.zeros(20)
m=np.zeros(20)
for i in range(20):
    l[i]=sum((p1>=i/20)==(p1<(i+1)/20))/100000
    m[i]=sum((p2>=i/20)==(p2<(i+1)/20))/100000
    

A=pd.DataFrame(np.vstack([a[0,1:],b[0,1:]]).T,index=[1,2,3],columns=['malignant','legitimate'])
A.plot(kind='bar',grid=True,figsize=(4,2),title='EMD: '+str(cv.EMD(a,b,DIST_L1)[0]))

A1=pd.DataFrame(np.vstack([a1[0,1:],b1[0,1:]]).T,index=[1,2,3],columns=['malignant','legitimate'])
A1.plot(kind='bar',grid=True,figsize=(4,2),title='EMD: '+str(cv.EMD(a1,b1,DIST_L1)[0]))

A2=pd.DataFrame(np.vstack([a2[0,1:],b2[0,1:]]).T,index=[1,2,3],columns=['malignant','legitimate'])
A2.plot(kind='bar',grid=True,figsize=(4,2),title='EMD: '+str(cv.EMD(a2,b2,DIST_L1)[0]))

B=pd.DataFrame(np.vstack([c[0,1:],d[0,1:]]).T,index=[1,2,3],columns=['malignant','legitimate'])
B.plot(kind='bar',grid=True,figsize=(4,2),title='EMD: '+str(cv.EMD(c,d,DIST_L1)[0]))

C=pd.DataFrame(np.vstack([e[0,1:],f[0,1:]]).T,index=[1,2,3],columns=['malignant','legitimate'])
C.plot(kind='bar',grid=True,figsize=(4,2),title='EMD: '+str(cv.EMD(e,f,DIST_L1)[0]))

D=pd.DataFrame(np.vstack([g[0,1:],h[0,1:]]).T,index=[1,2,3],columns=['malignant','legitimate'])
D.plot(kind='bar',grid=True,figsize=(4,2),title='EMD: '+str(cv.EMD(g,h,DIST_L1)[0]))
コード例 #25
0
ファイル: code.py プロジェクト: jaytimbadia/UnWrapper
            sig[count] = np.array([arr[i, j], i, j])
            count += 1
    return sig


arr1 = np.array([[1, 2, 3]])

arr2 = np.array([[3, 2, 1]])

sig1 = img_to_sig(arr1)
sig2 = img_to_sig(arr2)

print(sig1)
print(sig2)

dist, _, flow = cv2.EMD(sig1, sig2, cv2.DIST_L2)

print(dist)
print(_)
print(flow)

# In[24]:

import itertools
ROOT = 'ROOT'
tree = c
from collections import defaultdict


def getNodes(parent):
    frequency = []
コード例 #26
0
ファイル: wasserstein.py プロジェクト: zll302/PowerAI
    # Plot the marginals
    plt.figure(figsize=(6, 6))
    plotp(x, 'b')
    plotp(y, 'r')
    # plt.axis("off")
    plt.xlim(np.min(y[0, :]) - .1, np.max(y[0, :]) + .1)
    plt.ylim(np.min(y[1, :]) - .1, np.max(y[1, :]) + .1)
    plt.title("Input marginals")

    x, y = x.T, y.T
    l1 = sinkhorn_loss(x, y, epsilon, n, niter)
    l2 = sinkhorn_normalized(x, y, epsilon, n, niter)

    print("Sinkhorn loss : ", l1)
    print("Sinkhorn loss (normalized) : ", l2)

    plt.show()

    """
    n_dim = 3
    n_cut = 10
    n1 = 100
    p = np.random.rand(n1, n_dim)
    dp = np.ones((n1, 1)) / n1
    p = np.hstack((dp, p)).astype(np.float32)
    q = dist_uniform(n_dim, n_cut)
    dq = np.ones((n_cut**n_dim, 1)) / (n_cut**n_dim)
    q = np.hstack((dq, q)).astype(np.float32)
    cost, _, P = cv2.EMD(p, q, cv2.DIST_L2)