Ejemplo n.º 1
0
 def __init__(self, pos_wt=0.0, qtn_wt=0.0, learn_flag=False):
     super().__init__()
     self.pos_loss = PairwiseDistance()
     self.qtn_loss = PairwiseDistance()
     self.learn_flag = learn_flag
     self.pos_wt = torch.nn.Parameter(torch.Tensor([pos_wt]),
                                      requires_grad=self.learn_flag)
     self.qtn_wt = torch.nn.Parameter(torch.Tensor([qtn_wt]),
                                      requires_grad=self.learn_flag)
Ejemplo n.º 2
0
    def __init__(self, lambda_reconstruction=1):
        super(BaurLoss).__init__()

        self.lambda_reconstruction = lambda_reconstruction
        self.lambda_gdl = 0

        self.l1_loss = lambda x, y: PairwiseDistance(p=1)(x.view(
            x.shape[0], -1), y.view(y.shape[0], -1)).sum()
        self.l2_loss = lambda x, y: PairwiseDistance(p=2)(x.view(
            x.shape[0], -1), y.view(y.shape[0], -1)).sum()
Ejemplo n.º 3
0
def create_classes_data_frame(dataset_name, distance="cosine", tsne_dimension=2):
    """Create a new classes dataframe for the specified dataset. The dataset must be registered in the project settings.
    The data frame is pickled before function return, to prevent re-calculating things.

    Args:
      dataset_name: the name of the dataset
      distance: which distance function to be used for nearest neighbor computation. Either 'cosine' or 'pairwise' (Default value = "cosine")
      tsne_dimension: the dimensions for the lower dimensional vector projections (Default value = 2)

    Returns:
      a pandas DataFrame with "class", "vector" (document embeddings) and "tsne" columns

    """
    dataset_dir = DATA_SOURCES[dataset_name]["images"]
    paths = classes_set(dataset_dir)
    classes = pd.DataFrame(columns=["class", "vector", "tsne"])
    classes["classes"] = sorted(list(paths))
    tqdm.pandas(desc="Removing special characters.")
    classes["classes"] = classes["classes"].progress_apply(lambda cls: " ".join(re.split(r"[_\-]", cls)))
    tqdm.pandas(desc="Applying full clean.")
    classes["classes"] = classes["classes"].progress_apply(full_clean)
    tqdm.pandas(desc="Creating document vectors.")
    vectors = torch.tensor(np.vstack(classes["classes"].progress_apply(document_vector)))
    classes["vectors"] = vectors
    p_dist = PairwiseDistance(p=2) if distance == "pairwise" else CosineSimilarity()
    classes["distances"] = p_dist(  # distance from every node to every node
        vectors.repeat_interleave(vectors.shape[0], 0),  # each index repeated num_edges times
        vectors.repeat(vectors.shape[0], 1),  # the index range repeated num_edges times
    ).reshape(
        vectors.shape[0], -1
    )  # convert to 2D matrix with shape [vectors.shape[0], vectors.shape[0]]
    classes["tsne"] = torch.tensor(TSNE(n_components=tsne_dimension).fit_transform(vectors))
    pickle.dump(classes, open(os.path.join(dataset_dir, "classes.pickle"), "wb"))
    return classes
Ejemplo n.º 4
0
def evaluate_model(settings_model: ModelSettings, settings_data: DataSettings):
    data_loader_validate: DataLoader = get_validation_data_loader(
        settings_model, settings_data)

    distance_l2: Module = PairwiseDistance(2).cuda()
    model: Module = ModelBuilder.build(
        settings_model.model_architecture,
        settings_model.embedding_dim,
        imagenet_pretrained=False,
    )
    model = model.cuda()

    checkpoint = load_checkpoint(checkpoint_path=settings_data.checkpoint_path,
                                 model=model)
    model = checkpoint.model
    epoch_last = checkpoint.epoch

    model.eval()

    figure_name = f"roc_eval_{epoch_last}.png"
    figure_path: Path = settings_data.output_dir.joinpath(figure_name)

    metrics: EvaluationMetrics = evaluate(model, distance_l2,
                                          data_loader_validate, figure_path)
    pprint(dataclasses.asdict(metrics))
Ejemplo n.º 5
0
    def ccdist(self, input1, input2):
        'Calculate the pair-wise distance between two empirical samples'
        output = torch.empty(len(input1),len(input2))
        pdist = PairwiseDistance(p=2)

        for i in range( len(input1) ):
            dup_input1 = input1[i].repeat(len(input2),1)
            output[i] = pdist(dup_input1, input2)
            
        return output
Ejemplo n.º 6
0
    def nearest_center_face_point(self, point: torch.Tensor) -> torch.Tensor:
        """
        Returns the closest face center of the box to the given point
        Args:
            point: tensor (3,) x,y,z coordinate: an arbitrary point in 3d space

        Returns: tensor (3,) x,y,z coordinate: the center face of the box closest to the function argument

        """
        face_centers = self.get_face_centers()
        return face_centers[torch.argmin(PairwiseDistance().forward(
            face_centers, point.expand(6, 3)))]
Ejemplo n.º 7
0
    def nearest_center_face_distance_from_point(
            self, point: torch.Tensor) -> torch.Tensor:
        """ returns the distance from the closest face center to the given point

        Args:
            point: tensor (3,) of a coordinate to check distance from this box's face centers
        Returns: tensor (1,)

        """
        centers = self.get_face_centers()
        return torch.min(PairwiseDistance().forward(centers,
                                                    point.expand(6, 3)))
Ejemplo n.º 8
0
def get_top_k(query_embedding, queried_embeddings, k, distance):
    """Returns the distances and indices of the k nearest embeddings in the `queried_embeddings` tensor to the
    `query_embedding` tensor.

    Args:
      query_embedding: tensor with the embedding of the query image.
      queried_embeddings: tensor with the stacked embeddings of the queried dataset.
      k: the number of most similar images to be returned.
      distance: which distance function to be used for nearest neighbor computation. Either 'cosine' or 'pairwise'

    Returns:
      the closest k embeddings in the `embeddings` tensor to the `query_embedding`. A 2-tuple with shape `[k]`
      tensor with their distances and indices are returned (respectively).

    """
    p_dist = PairwiseDistance(
        p=2) if distance == "pairwise" else CosineSimilarity()
    distances = p_dist(queried_embeddings, query_embedding)
    return torch.topk(distances, k)  # return the top k results
Ejemplo n.º 9
0
 def __init__(self,
              name,
              model,
              data_set,
              optimizer,
              scheduler,
              criterion,
              plot,
              batch_size=64,
              max_epoch=50,
              log_interval=15):
     train, val, test = data_set
     self.batch_size = batch_size
     self.train_set = DataLoader(train,
                                 batch_size=batch_size,
                                 shuffle=True,
                                 pin_memory=True,
                                 drop_last=True)
     self.test_set = DataLoader(test,
                                batch_size=batch_size,
                                shuffle=False,
                                pin_memory=True,
                                drop_last=True)
     self.validate_set = DataLoader(val,
                                    batch_size=batch_size,
                                    shuffle=False,
                                    pin_memory=False,
                                    drop_last=True)
     self.distance = PairwiseDistance()
     self.optimizer = optimizer
     self.scheduler = scheduler
     self.criterion = criterion
     self.name = name
     self.model = model
     self.plot = plot
     self.max_epoch = max_epoch
     self.log_interval = log_interval
     self.best_accuracy = 0
Ejemplo n.º 10
0
    def __init__(self,
                 name,
                 model,
                 data_set,
                 optimizer,
                 scheduler,
                 criterion,
                 plot,
                 batch_size=64,
                 max_epoch=50,
                 log_interval=15):
        train, val, test = data_set
        self.batch_size = batch_size

        self.train_set = tuple([
            DataLoader(t, batch_size=batch_size, drop_last=True) for t in train
        ])
        self.train_batches = len(self.train_set[0])
        self.train_len = len(self.train_set[0].dataset)
        self.test_set = tuple([
            DataLoader(t, batch_size=batch_size, drop_last=True) for t in test
        ])
        self.test_batches = len(self.test_set[0])
        self.validate_set = tuple([
            DataLoader(v, batch_size=batch_size, drop_last=True) for v in val
        ])
        self.validate_len = len(self.validate_set[0].dataset)

        self.distance = PairwiseDistance()
        self.optimizer = optimizer
        self.scheduler = scheduler
        self.criterion = criterion
        self.model = model
        self.plot = plot
        self.max_epoch = max_epoch
        self.log_interval = log_interval
        self.name = name
        self.best_accuracy = 0
Ejemplo n.º 11
0
    noisy_train_data = [{
        'text': [x['text']],
        'label': x['label']
    } for x in original_train_data]
    for item in noisy_train_data_raw:
        idx = item['idx']
        noisy_train_data[idx]['text'].append(item['text'])

    learning_rate_lst = [5e-8, 5e-7]
    batch_size_lst = [5]
    original_loss_tradeoff_lst = [0.75, 0.50,
                                  1.0]  # 1.0 means no stability loss

    standard_loss_fn = F.cross_entropy
    stability_loss_fn = PairwiseDistance(p=2)  # L2 distance for stability loss

    for learning_rate in learning_rate_lst:
        for batch_size in batch_size_lst:
            for original_loss_tradeoff in original_loss_tradeoff_lst:
                is_this_model_trained = False
                model_prefix = 'NAT_{}_finetune_lr{}_bs{}_tradeoff{}'.format(checkpoint.split('/')[-1].replace('.pt', ''), \
                learning_rate, batch_size, original_loss_tradeoff)

                done_model_lst = [x for x in os.listdir(best_model_save_path)]
                for done_model in done_model_lst:
                    if (model_prefix in done_model_lst):
                        is_this_model_trained = True
                if (is_this_model_trained):
                    print('{} is already trained, continue...'.format(
                        model_prefix))
Ejemplo n.º 12
0
import numpy as np
import torch
from torch.nn import PairwiseDistance

cpu = torch.device("cpu")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

l2_dist = PairwiseDistance(2)


def rank1(embeddings_anc, embeddings_pos, clf):
    n = len(embeddings_anc)
    n_good = 0

    A = conjagate_matrix(embeddings_anc, embeddings_pos, clf)
    for i, anc_base_dists in enumerate(A):
        j = np.argmin(anc_base_dists)
        if i == j:
            n_good += 1
        # print(i,j)
    return n_good / n


def roc_curve(embeddings_anc, embeddings_pos, clf):
    A = conjagate_matrix(embeddings_anc, embeddings_pos, clf)
    A = (A - A.min()) / (A.max() - A.min())
    trshs = []
    tprs = [0]
    fprs = [0]
    for th in np.sort(np.unique(A.ravel())):
        tpr, fpr = tpr_fpr(A, th)
Ejemplo n.º 13
0
def train(
    settings_model: ModelSettings,
    settings_data: DataSettings,
    settings_federated: FederatedSettings,
):
    output_dir: Path = settings_data.output_dir
    output_dir_logs = output_dir.joinpath("logs")
    output_dir_plots = output_dir.joinpath("plots")
    output_dir_checkpoints = output_dir.joinpath("checkpoints")
    output_dir_tensorboard = output_dir.joinpath("tensorboard")

    output_dir_logs.mkdir(exist_ok=True, parents=True)
    output_dir_plots.mkdir(exist_ok=True, parents=True)
    output_dir_checkpoints.mkdir(exist_ok=True, parents=True)

    model_architecture = settings_model.model_architecture

    start_epoch: int = 0
    global_step: int = 0

    data_loader_validate: DataLoader = get_validation_data_loader(
        settings_model, settings_data)

    model: Module = ModelBuilder.build(
        settings_model.model_architecture,
        settings_model.embedding_dim,
        settings_model.pretrained_on_imagenet,
    )

    print("Using {} model architecture.".format(model_architecture))

    # Load model to GPU or multiple GPUs if available
    if torch.cuda.is_available():
        print("Using single-gpu training.")
        model.cuda()

    if settings_data.checkpoint_path:
        checkpoint = load_checkpoint(settings_data.checkpoint_path, model,
                                     None)
        model = checkpoint.model
        start_epoch = checkpoint.epoch
        global_step = checkpoint.global_step

    # Start Training loop

    face_local__meta_dataset = FaceMetaDataset(
        root_dir=settings_data.dataset_local_dir,
        csv_name=settings_data.dataset_local_csv_file,
        min_images_per_class=2,
    )

    face_remote_meta_dataset = FaceMetaDataset(
        root_dir=settings_data.dataset_remote_dir,
        csv_name=settings_data.dataset_remote_csv_file,
        min_images_per_class=1,
    )

    l2_distance = PairwiseDistance(2).cuda()

    tensorboard = Tensorboard(output_dir_tensorboard)

    federated_training(
        model=model,
        global_step=global_step,
        start_epoch=start_epoch,
        face_local_meta_dataset=face_local__meta_dataset,
        face_remote_meta_dataset=face_remote_meta_dataset,
        validate_dataloader=data_loader_validate,
        settings_federated=settings_federated,
        settings_model=settings_model,
        tensorboard=tensorboard,
        distance_fn=l2_distance,
        checkpoint_path=output_dir_checkpoints,
    )