Beispiel #1
0
def generate_tracks_classification(n, dimensions, min_T=5, max_T=1001):
    """
    Generate tracks for training classification model

    Parameters:
    n: number of tracks to generate
    dimensions: number of dimensions (currently only supports 1 and 2)
    min_T: minimum track length
    max_T: maximum track length (e.g. for 1001 will generate tracks up to 1000 steps)

    Returns:
    tracks_array: a numpy array of shape [n, max_T, dimensions] containing the generated tracks
    classes: a numpy array of length n, representing the model class for each track (see andi_datasets package)

    """

    # Create tracks
    np.random.seed()
    AD = andi.andi_datasets()
    X1, Y1, X2, Y2, X3, Y3 = AD.andi_dataset(N=n,
                                             min_T=min_T,
                                             max_T=max_T,
                                             tasks=[2],
                                             dimensions=[dimensions])
    classes = np.array(Y2[dimensions - 1]).astype(int)
    tracks = X2[dimensions - 1]

    # Package into array
    tracks_array = package_tracks(tracks=tracks,
                                  max_T=max_T,
                                  dimensions=dimensions)
    return tracks_array, classes
Beispiel #2
0
    def plot_results(self, alpha):

        models = torch.unique(alpha[:, 3])
        n_models = len(models)

        fig = plt.figure(figsize=(10, 10), dpi=100)
        ax1 = fig.add_subplot(211)
        ax1.set_xlabel("Trajectory length")
        ax1.set_ylabel("Mean absolute error")
        ax1.set_title("Prediction of exponent")
        for m in models:
            x, y = smooth_point_cloud(
                alpha[alpha[:, 3] == m, 2].detach().cpu().numpy(),
                torch.abs(alpha[alpha[:, 3] == m, 0] -
                          alpha[alpha[:, 3] == m, 1]).detach().cpu().numpy())
            try:
                l = andi.andi_datasets().avail_models_name[int(m)]
            except:
                l = "all models"
            ax1.plot(x, y, label=l)
        ax1.legend()

        ax2 = fig.add_subplot(212)
        ax2.set_xlabel("Noise ratio")
        ax2.set_ylabel("Mean absolute error")
        x, y = smooth_point_cloud(
            alpha[:, 4].detach().cpu().numpy(),
            torch.abs(alpha[:, 0] - alpha[:, 1]).detach().cpu().numpy())
        ax2.plot(x, y)
        plt.tight_layout()
        return fig
Beispiel #3
0
def generate_tracks_regression(n, dimensions, min_T=5, max_T=1001):
    # Create tracks
    AD = andi.andi_datasets()
    X1, Y1, X2, Y2, X3, Y3 = AD.andi_dataset(N=n,
                                             min_T=min_T,
                                             max_T=max_T,
                                             tasks=[1],
                                             dimensions=[dimensions])
    exponents = np.array(Y1[dimensions - 1])
    tracks = X1[dimensions - 1]

    # Package into array
    tracks_array = np.zeros([n, max_T, dimensions])
    if dimensions == 1:
        for i, t in enumerate(tracks):
            tracks_array[i, max_T - len(t):, 0] = t
    elif dimensions == 2:
        for i, t in enumerate(tracks):
            len_t = int(len(t) / 2)
            tracks_array[i, max_T - len_t:, 0] = t[:len_t]
            tracks_array[i, max_T - len_t:, 1] = t[len_t:]

    # Preprocess
    tracks_array = preprocess_tracks(tracks_array)
    return tracks_array, exponents
Beispiel #4
0
def generate_tracks_segmentation(n, dimensions):
    """
    Generate tracks for training segmentation model (all length 200)

    Parameters:
    n: number of tracks to generate
    dimensions: number of dimensions (currently only supports 1 and 2)

    Returns:
    tracks_array: a numpy array of shape [n, 200, dimensions] containing the generated tracks
    positions: a numpy array of length n, representing the switch point for each model

    """

    # Create tracks
    np.random.seed()
    AD = andi.andi_datasets()
    X1, Y1, X2, Y2, X3, Y3 = AD.andi_dataset(N=n,
                                             tasks=[3],
                                             dimensions=[dimensions],
                                             min_T=200,
                                             max_T=201)
    positions = np.array(Y3[dimensions - 1])[:, 1].astype(int) - 1
    tracks = X3[dimensions - 1]

    # Package into array
    tracks_array = package_tracks(tracks=tracks,
                                  max_T=200,
                                  dimensions=dimensions)
    return tracks_array, positions
Beispiel #5
0
def generate_tracks_regression(n, dimensions, min_T=5, max_T=1001):
    """
    Generate tracks for training regression model

    Parameters:
    n: number of tracks to generate
    dimensions: number of dimensions (currently only supports 1 and 2)
    min_T: minimum track length
    max_T: maximum track length (e.g. for 1001 will generate tracks up to 1000 steps)

    Returns:
    tracks_array: a numpy array of shape [n, max_T, dimensions] containing the generated tracks
    exponents: a numpy array of length n, containing the anomalous exponent value for each track

    """
    # Create tracks
    np.random.seed()  # prevents data duplication
    AD = andi.andi_datasets()
    X1, Y1, X2, Y2, X3, Y3 = AD.andi_dataset(N=n,
                                             min_T=min_T,
                                             max_T=max_T,
                                             tasks=[1],
                                             dimensions=[dimensions])
    exponents = np.array(Y1[dimensions - 1])
    tracks = X1[dimensions - 1]

    # Package into array
    tracks_array = package_tracks(tracks=tracks,
                                  max_T=max_T,
                                  dimensions=dimensions)
    return tracks_array, exponents
def example_trajs():
    print('Generowanie i zapisywanie przykładowych trajektorii...')
    path = 'data/part0/example_traj'
    dirmake(path)
    logg('Generowanie przykładowych trajektorii - start')
    AD = andi.andi_datasets()
    for model in range(5):
        try:
            dataset = AD.create_dataset(100, 1, [0.7], [model], 2)
        except:
            dataset = AD.create_dataset(100, 1, [1.7], [model], 2)

        x = dataset[0][2:102]
        y = dataset[0][102:]
        plt.figure(figsize=(2, 2))
        plt.cla()
        plt.gca().spines['top'].set_visible(False)
        plt.gca().spines['right'].set_visible(False)
        plt.xlabel('x')
        plt.ylabel('y')
        plt.title(AD.avail_models_name[model], loc='left')
        plt.plot(x, y, color=colors[model], linewidth=2, alpha=0.5)
        plt.scatter(x,
                    y,
                    c=range(len(x)),
                    cmap=color_maps[model],
                    marker='.',
                    s=100)
        plt.savefig(path + '/' + str(AD.avail_models_name[model]) + '.pdf',
                    transparent=True,
                    bbox_inches='tight',
                    dpi=300)
    logg('Generowanie przykładowych trajektorii - stop')
    print(' --- ZAKOŃCZONO')
def generate_balanced_dataset(N, dimensions, save, save_path):
    
    """
    Simple wrapper for generation of balanced dataset for task 1 in ANDI challenge.
    :param N: int, number of trajectories
    :param dimensions: list of ints, dimensions for which generate datasets
    :param save: bool, whether to save data
    :param save_path: string, the directory for saving data
    :return X1, Y1, X2, Y2, X3, Y3: numpy arrays, X and Y data in 3 dimensions
            (if dimension was not requested, a particular array is empty)
    """
    
    project_directory = os.path.dirname(os.getcwd())
    save_directory = os.path.join(project_directory,save_path)
    if not os.path.exists(save_directory):
        os.makedirs(save_directory)
    
    AD = andi.andi_datasets()    
    X1, Y1, X2, Y2, X3, Y3 = AD.andi_dataset(N = N, tasks = 1, dimensions = dimensions, 
                                         save_dataset=save, path_datasets = save_directory)

    return X1, Y1, X2, Y2, X3, Y3
Beispiel #8
0
def generate_tracks_segmentation(n, dimensions):
    # Create tracks
    AD = andi.andi_datasets()
    X1, Y1, X2, Y2, X3, Y3 = AD.andi_dataset(N=n,
                                             tasks=[3],
                                             dimensions=[dimensions])
    positions = np.array(Y3[dimensions - 1])[:, 1].astype(int) - 1
    tracks = X3[dimensions - 1]

    # Package into array
    tracks_array = np.zeros([n, 200, dimensions])
    if dimensions == 1:
        for i, t in enumerate(tracks):
            tracks_array[i, :, 0] = t
    elif dimensions == 2:
        for i, t in enumerate(tracks):
            len_t = int(len(t) / 2)
            tracks_array[i, :, 0] = t[:len_t]
            tracks_array[i, :, 1] = t[len_t:] - t[len_t]

    # Preprocess
    tracks_array = preprocess_tracks(tracks_array)
    return tracks_array, positions
Beispiel #9
0

def make_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)


data_path = './origin_data/'
pp_data_path = './pp_data/'
make_dir(data_path)
make_dir(pp_data_path)

filename = data_path + 'data-1d-{}.csv'.format(l)
output = pp_data_path + 'data-1d-{}-pp.csv'.format(l)

AD = andi.andi_datasets()
X1, Y1, X2, Y2, X3, Y3 = AD.andi_dataset(N=N,
                                         max_T=l + 1,
                                         min_T=l,
                                         tasks=1,
                                         dimensions=1)

with open(filename, 'w') as f:
    f.write('pos;label\n')
    for i in range(len(X1[0])):
        f.write(','.join([str(j) for j in X1[0][i]]))
        f.write(';' + str(Y1[0][i]) + '\n')
    f.close()

del X1, Y1
gc.collect()
Beispiel #10
0
def create_custom_dataset(N,
                          max_T=1000,
                          min_T=10,
                          dimensions=[1, 2, 3],
                          save=True):
    ad = andi_datasets()
    exponents = np.arange(0.05, 2.01, 0.05)
    n_exp, n_models = len(exponents), len(ad.avail_models_name)
    # Trajectories per model and exponent. Arbitrarely chose to fulfill balanced classes
    N_per_model = np.ceil(1.6 * N / 5)
    subdif, superdif = n_exp // 2, n_exp // 2 + 1
    num_per_class = np.zeros((n_models, n_exp))
    num_per_class[:2, :subdif] = np.ceil(N_per_model / subdif)  # ctrw, attm
    num_per_class[2, :] = np.ceil(N_per_model / (n_exp - 1))  # fbm
    num_per_class[2, exponents == 2] = 0  # fbm can't be ballistic
    num_per_class[3, subdif:] = np.ceil((N_per_model / superdif) * 0.8)  # lw
    num_per_class[4, :] = np.ceil(N_per_model / n_exp)  # sbm

    for dim in dimensions:
        dataset = ad.create_dataset(T=max_T,
                                    N=num_per_class,
                                    exponents=exponents,
                                    dimension=dim,
                                    models=np.arange(n_models))

        # Normalize trajectories
        n_traj = dataset.shape[0]
        norm_trajs = normalize(dataset[:, 2:].reshape(n_traj * dim, max_T))
        dataset[:, 2:] = norm_trajs.reshape(dataset[:, 2:].shape)

        # Add localization error, Gaussian noise with sigma = [0.1, 0.5, 1]
        loc_error_amplitude = np.random.choice(np.array([0.1, 0.5, 1]),
                                               size=n_traj * dim)
        loc_error = (np.random.randn(n_traj * dim, int(max_T)).transpose() *
                     loc_error_amplitude).transpose()
        dataset = ad.create_noisy_localization_dataset(dataset,
                                                       dimension=dim,
                                                       T=max_T,
                                                       noise_func=loc_error)

        # Add random diffusion coefficients
        trajs = dataset[:, 2:].reshape(n_traj * dim, max_T)
        displacements = trajs[:, 1:] - trajs[:, :-1]
        # Get new diffusion coefficients and displacements
        diffusion_coefficients = np.random.randn(trajs.shape[0])
        new_displacements = (displacements.transpose() *
                             diffusion_coefficients).transpose()
        # Generate new trajectories and add to dataset
        new_trajs = np.cumsum(new_displacements, axis=1)
        new_trajs = np.concatenate((np.zeros(
            (new_trajs.shape[0], 1)), new_trajs),
                                   axis=1)
        dataset[:, 2:] = new_trajs.reshape(dataset[:, 2:].shape)

        df = pd.DataFrame(columns=['dim', 'model', 'exp', 'x', 'len'],
                          dtype=object)
        for traj in dataset:
            mod, exp, x = int(traj[0]), traj[1], traj[2:]
            x = cut_trajectory(x, np.random.randint(min_T, max_T), dim=dim)
            x = tensor(x).view(dim, -1).T
            df = df.append(
                {
                    'dim': dim,
                    'model': mod,
                    'exp': exp,
                    'x': x,
                    'len': len(x)
                },
                ignore_index=True)

        if save:
            DATA_PATH.mkdir(exist_ok=True)
            ds_path = DATA_PATH / f"custom{dim}.pkl"
            df.to_pickle(ds_path, protocol=pickle.HIGHEST_PROTOCOL)

    return df
def example_TAMSD():
    print('Generowanie i zapisywanie przykładowych TAMSD...')
    path = 'data/part0/example_TAMSD'
    dirmake(path)
    logg('Generowanie przykładowych TAMSD - start')

    AD = andi.andi_datasets()
    dataset = AD.create_dataset(200, 1, [0.7], [2], 2)
    x = dataset[0][2:202]
    y = dataset[0][202:]
    trajectory = [x, y]
    D, expo, expo_est, tamsds = TAMSD_estimation(trajectory, 0.7, 0, 'A')
    tamsds = tamsds[:100]
    t = range(1, len(tamsds) + 1)
    expo_est = estimate_expo(t, tamsds, D, 100)

    plt.cla()
    plt.figure(figsize=(3, 3))
    plt.plot(t, tamsds, '.', label='punkty TAMSD')
    plt.plot(t, [4 * D * i**expo_est for i in t],
             'b',
             label=r'Wyestymowana krzywa wzorcowa')
    plt.plot(t, [4 * D * i**expo for i in t],
             'r',
             label=r'Prawdziwa krzywa wzorcowa')
    plt.xlabel('t')
    plt.ylabel(r'$\rho(t)$')
    plt.title('c', loc='left')
    plt.gca().spines['top'].set_visible(False)
    plt.gca().spines['right'].set_visible(False)
    plt.savefig(path + '/TAMSD.pdf',
                transparent=True,
                bbox_inches='tight',
                dpi=300)

    plt.cla()
    plt.loglog(t, tamsds, '.', label='punkty TAMSD')
    plt.loglog(t, [4 * D * i**expo_est for i in t],
               'b',
               label=r'Wyestymowana krzywa TAMSD')
    plt.loglog(t, [4 * D * i**expo for i in t],
               'r',
               label=r'Prawdziwa krzywa wzorcowa')
    plt.xlabel('t')
    plt.ylabel(r'$\rho(t)$')
    plt.legend(loc='lower left', bbox_to_anchor=(1.05, 1))
    plt.title('d', loc='left')
    plt.gca().spines['top'].set_visible(False)
    plt.gca().spines['right'].set_visible(False)
    plt.savefig(path + '/TAMSD_loglog.pdf',
                transparent=True,
                bbox_inches='tight',
                dpi=300)

    # perfekcyjne tamsd
    plt.cla()
    D = 0.3
    t = [0.1 * i for i in range(101)]
    exps = [0.7, 1, 1.3]
    label = ['superdyfuzja', 'dyfuzja normalna', 'subdyfuzja']
    for expo in exps:
        plt.plot(t, [4 * D * i**expo for i in t],
                 color=colors[exps.index(expo)],
                 label=r'$\alpha=\ $' + str(expo) + ' - ' +
                 label[-exps.index(expo) - 1])
    plt.xlabel('t')
    plt.ylabel(r'$\rho(t)$')
    plt.legend(loc='lower left', bbox_to_anchor=(1.05, 1), ncol=3)
    plt.title('a', loc='left')
    plt.gca().spines['top'].set_visible(False)
    plt.gca().spines['right'].set_visible(False)
    plt.savefig(path + '/perfect_TAMSD.pdf',
                transparent=True,
                bbox_inches='tight',
                dpi=300)

    # perfekcyjne tamsd - loglog
    plt.cla()
    D = 1
    t = [0.1 * i for i in range(101)]
    exps = [0.7, 1, 1.3]
    for expo in exps:
        plt.loglog(t, [4 * D * i**expo for i in t],
                   color=colors[exps.index(expo)],
                   label=r'$\alpha=\ $' + str(expo))
    plt.xlabel('t')
    plt.ylabel(r'$\rho(t)$')
    plt.title('b', loc='left')
    plt.gca().spines['top'].set_visible(False)
    plt.gca().spines['right'].set_visible(False)
    plt.savefig(path + '/perfect_TAMSD_loglog.pdf',
                transparent=True,
                bbox_inches='tight',
                dpi=300)

    logg('Generowanie przykładowych TAMSD - stop')
    print(' --- ZAKOŃCZONO')