Example #1
0
    # DatasetInfo(name="simple", neigh=1000, relevant_neigh=4),
    # DatasetInfo(name="simple", neigh=3000, relevant_neigh=4)
]

if __name__ == "__main__":
    args = __pars_args__()
    # device = torch.device("cuda:{}".format(args.device) if args.use_cuda else "cpu")
    for dataset in DATASETS:
        print("\n\n---------------")
        print("{}".format(dataset))
        # prefix = "{}_neigh-{}_rel-{}".format(dataset.name, dataset.neigh, dataset.relevant_neigh)
        prefix = "{}".format(dataset.name)
        args.dataset_prefix = prefix

        args.max_neighbors = dataset.neigh
        input_embeddings, target_embeddings, neighbor_embeddings, edge_types, mask_neighbor = get_embeddings(
            path.join("data", args.data_dir), prefix=prefix)
        train_dataset = CDataset(path.join("data", args.data_dir),
                                 "{}_{}".format(prefix, args.train_file_name))
        eval_dataset = CDataset(path.join("data", args.data_dir),
                                "{}_{}".format(prefix, args.eval_file_name))
        test_dataset = CDataset(path.join("data", args.data_dir),
                                "{}_{}".format(prefix, args.test_file_name))

        train_dataloader = DataLoader(train_dataset,
                                      batch_size=args.batch_size,
                                      shuffle=True,
                                      num_workers=2,
                                      drop_last=True)
        eval_dataloader = DataLoader(eval_dataset,
                                     batch_size=args.eval_batch_size,
                                     shuffle=False,
                        help="Iteration number.")
    parser.add_argument("--use_cuda",
                        "-cuda",
                        type=bool,
                        default=False,
                        help="Use cuda computation")

    return parser.parse_args()


if __name__ == "__main__":
    args = __pars_args__()
    risk_tsfm = RiskToTensor(args.data_dir)
    attribute_tsfm = AttributeToTensor(args.data_dir)
    input_embeddings, target_embeddings, neighbor_embeddings, seq_len = get_embeddings(
        args.data_dir, args.customer_file_name, args.neighbors_file_name,
        args.embedding_dim, risk_tsfm, attribute_tsfm)

    # customer_id_2_customer_idx = pickle.load(open("../data/customers/customerid_to_idx.bin", "rb"))
    # customer_idx_2_neighbors_idx = pickle.load(open("../data/customers/customeridx_to_neighborsidx.bin", "rb"))

    train_dataset = CustomerDataset(args.data_dir, args.train_file_name)
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=1,
                                  drop_last=True)

    eval_dataset = CustomerDataset(args.data_dir, args.eval_file_name)
    eval_dataloader = DataLoader(eval_dataset,
                                 batch_size=args.batch_size,
Example #3
0
def main(settings,
         dataset_number=5,
         image_size=200,
         padding=2,
         n_clusters=None,
         out_file='../img.png'):
    # Load images and get embeddings from NN
    imgs = helper.get_images(dataset_number)
    embeddings = helper.get_embeddings(dataset_number, imgs)
    print('loaded {} images'.format(len(imgs)))

    if settings.shuffle:
        random.shuffle(imgs)

    # Compute 2D embeddings with MDS
    if settings.no_mds:
        em_2d = np.random.random((len(imgs), 2))
    else:
        em_2d = compute.mds(embeddings, init=compute.pca(embeddings))

    # Perform clustering
    cluster_centers, labels = compute.k_means(em_2d, k_default=n_clusters)
    print('clusters:', len(cluster_centers))
    print('sizes of clusters: ', end='')
    for l in range(max(labels) + 1):
        print(sum(labels == l), end=', ')
    print()

    # Representative images
    silhouettes = compute.get_silhouettes(em_2d, labels)
    representative = compute.get_representative(em_2d, cluster_centers, labels,
                                                silhouettes)

    # Sizes and positions of the images
    ratios = helper.get_image_size_ratios(imgs)
    sizes = compute.get_sizes(image_size, em_2d, ratios, cluster_centers,
                              labels, representative)
    positions = compute.get_positions(em_2d, image_size)

    # Expand as long as overlaps occur - gradually increase space between images
    iters = 0
    while compute.overlap(positions, sizes, padding):
        positions *= 1.05
        iters += 1
    print('overlap resolved in {} iterations'.format(iters))

    dists = [compute.get_distances(positions)]

    # Overlapping resolved, now "shrink" towards representative images
    if not settings.no_intra:
        positions = compute.shrink_intra(positions, sizes, representative,
                                         labels, padding)
        dists.append(compute.get_distances(positions))

    if not settings.no_inter:
        # Move clusters closer together by same factor
        positions = compute.shrink_inter1(positions, sizes, representative,
                                          labels, padding)
        dists.append(compute.get_distances(positions))

        # Move clusters closer together separately by different factors
        positions = compute.shrink_inter2(positions, sizes, representative,
                                          labels, padding)
        dists.append(compute.get_distances(positions))

    if not settings.no_xy and not settings.no_intra:
        # Shrink by x and y separately
        positions = compute.shrink_xy(positions, sizes, representative, labels,
                                      padding)
        dists.append(compute.get_distances(positions))

    if not settings.no_shake:
        # "Shake" images with small offsets
        for _ in range(10):
            positions = compute.shrink_with_shaking(positions, sizes, padding)
        dists.append(compute.get_distances(positions))

    if not settings.no_final and not settings.no_intra:
        # Shrink to finalize positions
        positions = compute.shrink_xy(positions, sizes, representative, labels,
                                      padding)
        dists.append(compute.get_distances(positions))
        positions = compute.shrink_xy(positions,
                                      sizes,
                                      representative,
                                      labels,
                                      padding,
                                      smaller=True)
        dists.append(compute.get_distances(positions))

        if not settings.no_inter:
            positions = compute.shrink_inter2(positions, sizes, representative,
                                              labels, padding)
            dists.append(compute.get_distances(positions))

    im = helper.plot(imgs, positions, sizes)
    im.save(out_file)
    # helper.plot_clusters(em_2d, cluster_centers, labels, representative)

    scores = list(map(lambda d: compute.compare_distances(dists[0], d), dists))

    print('\nscores:')
    for i, s in enumerate(scores[1:]):
        print('{:.3f},'.format(s), end=' ')
Example #4
0
import torch
import pickle
import random
from helper import get_embeddings
from os import path
BASE_DIR = path.join("..", "..", "data", "pems")

if __name__ == "__main__":
    input_embeddings, target_embeddings, neighbor_embeddings, edge_types, mask_neighbor = get_embeddings(
        path.join("..", "..", "data", "pems"))
    idx = random.randint(0, 10000)
    print(input_embeddings[idx, :, 0])
    print(target_embeddings[idx])
    print(neighbor_embeddings[idx, :, :, 0].t())
    # pickle.dump(neighbor_embeddings/10, open(path.join(BASE_DIR, "simple_neighbor_embeddings.bin"), "wb"))
Example #5
0
                        default=102,
                        help="Iteration number.")

    parser.add_argument('--eval_step',
                        type=int,
                        default=10,
                        help='How often do an eval step')
    parser.add_argument('--save_rate',
                        type=float,
                        default=0.9,
                        help='How often do save an eval example')
    return parser.parse_args()


if __name__ == "__main__":
    args = __pars_args__()
    input_embeddings, target_embeddings, neighbor_embeddings, edge_types, mask_neighbor = get_embeddings(
        path_join("..", "data", args.data_dir), prefix=args.dataset_prefix)
    model = RNNJointAttention(args.input_dim,
                              args.hidden_dim,
                              args.output_dim,
                              args.n_head,
                              args.time_windows,
                              dropout_prob=args.drop_prob,
                              temperature=args.temp)

    # model = StructuralRNN(args.input_dim, args.hidden_size, args.output_size, args.num_layers, args.max_neighbors, input_embeddings.size(1),
    #                       dropout_prob=args.drop_prob)

    n_params = get_param_numbers(model)
    print(n_params)