Exemple #1
0
def fix_seed(seed: int,
             deterministic: bool = False,
             benchmark: bool = False) -> None:
    """
    Fixed the ``Seed`` value of PyTorch, NumPy, Pure Python Random at once.

    Examples:
        >>> import torch
        >>> import numpy as np
        >>> fix_seed(0)
        >>> x = torch.randn(...)
        >>> y = np.random.randn(...)

    Args:
        seed (int): random state (sedd)
        deterministic (bool): Whether to ensure reproducibility as much as possible on CuDNN.
        benchmark (bool):

    Returns:
        None
    """
    std_seed(seed)
    os_environ["PYTHONHASHSEED"] = str(seed)
    np_seed(seed)
    torch.manual_seed(seed)

    if cuda_is_available():
        if deterministic:
            cudnn.deterministic = True
        if benchmark:
            cudnn.benchmark = False
Exemple #2
0
def generate_images(model,
                    batch,
                    mask_descriptors,
                    num_samples=64,
                    temp=1.,
                    verbose=False):
    """Generates image completions based on the images in batch masked by the
    masks in mask_descriptors. This will generate
    batch.size(0) * len(mask_descriptors) * num_samples completions, i.e.
    num_samples completions for every image and mask combination.

    Parameters
    ----------
    model : pixconcnn.models.pixel_constrained.PixelConstrained instance

    batch : torch.Tensor

    mask_descriptors : list of mask_descriptor
        See utils.masks.MaskGenerator for allowed mask_descriptors.

    num_samples : int
        Number of samples to generate for a given image-mask combination.

    temp : float
        Temperature for sampling.

    verbose : bool
        If True prints progress information while generating images
    """
    device = torch_device("cuda" if cuda_is_available() else "cpu")
    model.to(device)
    outputs = []
    for i in range(batch.size(0)):
        outputs_per_img = []
        for j in range(len(mask_descriptors)):
            if verbose:
                print("Generating samples for image {} using mask {}".format(
                    i, mask_descriptors[j]))
            # Get image and mask combination
            img = batch[i:i + 1]
            mask_generator = MaskGenerator(model.prior_net.img_size,
                                           mask_descriptors[j])
            mask = mask_generator.get_masks(1)
            # Create conditional pixels which will be used to sample completions
            cond_pixels = get_repeated_conditional_pixels(
                img, mask, model.prior_net.num_colors, num_samples)
            cond_pixels = cond_pixels.to(device)
            samples, log_probs = model.sample(cond_pixels,
                                              return_likelihood=True,
                                              temp=temp)
            outputs_per_img.append({
                "orig_img": img,
                "cond_pixels": cond_pixels,
                "mask": mask,
                "samples": samples,
                "log_probs": log_probs
            })
        outputs.append(outputs_per_img)
    return outputs
def select_device(gpu_if_possible: bool):
    """
    Select the device handling computations based on whether a GPU is
    requested and whether it is available.
    """
    # employing a GPU if requested and possible:
    if gpu_if_possible and cuda_is_available():
        return 'cuda:0'
    return 'cpu'
Exemple #4
0
 def __init__(
     self,
     dataset: Dataset,
     instance: Instance,
     model: Model,
     optimizer: Optimizer,
     scheduler: Any,
     epochs: int,
     batch_size: int,
     run_dir_path: Path,
     eval_every: int,
     limit_epochs_at: Optional[int],
     train_eval_split: float,
     metric_names: List[str],
     selection_metric: str,
     kept_checkpoints: int,
     cuda_device: Optional[int],
 ) -> None:
     self.dataset = dataset
     self.instance = instance
     self.optimizer = optimizer
     self.scheduler = scheduler
     self.epochs = epochs
     self.batch_size = batch_size
     self.run_dir_path = run_dir_path
     self.eval_every = eval_every
     self.train_eval_split = train_eval_split
     self.limit_epochs_at = 10e1000 if limit_epochs_at is None else limit_epochs_at
     self.selection_metric = selection_metric
     self.kept_checkpoints = kept_checkpoints
     self._checkpoints_stats: List[Tuple[float, str]] = []
     if cuda_device is not None:
         if not cuda_is_available():
             raise RuntimeError("CUDA is not available on this system.")
         self.use_cuda = True
         self.device = torch_device("cuda:%d" % cuda_device)
     else:
         self.use_cuda = False
         self.device = torch_device("cpu")
     self.model = model.to(self.device)
     self._checkpoints_dir = self.run_dir_path / "checkpoints"
     self._writers: Dict[DataType, SummaryWriter] = {}
     self._accumulated_metrics: Dict[DataType, Dict[
         Metric,
         List[float]]] = defaultdict(lambda: defaultdict(lambda: []))
     self._metrics = [_metrics[metric_name] for metric_name in metric_names]
     self._metric_names = {
         metric: metric_name
         for metric, metric_name in zip(self._metrics, metric_names)
     }
     self._epochs_size = len(str(epochs))
Exemple #5
0
    def __init__(
        self,
        dpr_fn: str,
        tokenizer_fn: str,
        tokenizer_max_len: int,
    ):

        self.dpr = DPRReader.from_pretrained(dpr_fn)
        self.tokenizer_max_len = tokenizer_max_len
        self.tokenizer = DPRReaderTokenizer.from_pretrained(
            tokenizer_fn, max_len=tokenizer_max_len)
        device = 'cuda' if cuda_is_available() else 'cpu'
        self.dpr.to(device)
        self.device = device
def make_results_reproducible(model_is_convolutional: bool = False) -> None:
    """
    Make the subsequent instructions produce purely deterministic outputs
    by fixing all the relevant seeds:
    """
    random_seed(0)
    _ = numpy_seed(0)
    _ = torch_manual_seed(0)

    # since GPU computations may introduce additional stochasticity with
    # their convolutional operation optimizations:
    if model_is_convolutional:
        if cuda_is_available():
            # disabling benchmarking and choosing among convolution operation
            # implementation alternatives, which is stochastic based due to
            # noise and hardware:
            cudnn.benchmark = False
            # ansuring deterministic algorithms for colvolutional operations
            # are employed:
            cudnn.deterministic = True
Exemple #7
0
        if vc_num == 512:
            extractor = models.vgg16(pretrained=True).features
        elif vc_num == 256:
            extractor = models.vgg16(pretrained=True).features[:12]
        elif vc_num == 128:
            extractor = models.vgg16(pretrained=True).features[:10]

elif nn_type[:6] == 'resnet' or nn_type == 'resnext' or nn_type == 'alexnet':
    layer = 'last'  # 'last','second'
    extractor = resnet_feature_extractor(nn_type, layer)

elif nn_type == 'unet':
    layer = 'pool5'
    path_to_unet = os.path.join(Directories.CHECKPOINTS, unet_filename)
    unet = UNet(pretrained=True)
    device = device('cuda:0' if cuda_is_available() else 'cpu')
    unet.load_state_dict(
        load(path_to_unet, map_location=device)['model_state_dict'])

    unet_ones = UNet(pretrained=False)
    unet_ones = unet_ones.get_features()

    if vc_num == 1024:
        extractor = unet.get_features()[:24]
    elif vc_num == 512:
        extractor = unet.get_features()[:19]
    elif vc_num == 256:
        extractor = unet.get_features()[:15]
    elif vc_num == 128:
        extractor = unet.get_features()[:10]
    else:
Exemple #8
0
from .draft import RelEnc as RelationalEncoder, Encoder, RelationHead, RelationHead2, RelationHead3, RelationHeadLong, RelationHeadCone, aggregate
from .from_paper import MultiCIFAR10, train_transform
from .datasets import MRCData, MRCSampler
from torch.utils.data import DataLoader
from torch import save as save_model
from torch.cuda import is_available as cuda_is_available
device = "cuda" if cuda_is_available() else "cpu"


def relation_head(i):
    return [
        RelationHead, RelationHead2, RelationHead3, RelationHeadLong,
        RelationHeadCone
    ][i]
Exemple #9
0
def train_classifiers(cfg,
                      annot_dataloader,
                      count_celltypes,
                      count_classes,
                      track=False,
                      test_data=None):
    device = torch.device(
        'cuda' if cuda_is_available() and cfg.use_cuda else 'cpu')
    celltype_clf = Classifier(inp_size=cfg.classifier_input_dim,
                              out_size=count_celltypes)
    form_clf = Classifier(inp_size=cfg.classifier_input_dim,
                          out_size=count_classes)
    if cfg.use_cuda and cuda_is_available():
        celltype_clf = celltype_clf.cuda()
        form_clf = form_clf.cuda()

    celltype_clf_opt = torch.optim.Adam(celltype_clf.parameters(),
                                        weight_decay=cfg.celltype_clf_wdecay,
                                        lr=cfg.celltype_clf_lr)
    form_clf_opt = torch.optim.Adam(form_clf.parameters(),
                                    weight_decay=cfg.form_clf_wdecay,
                                    lr=cfg.form_clf_lr)

    celltype_criterion = nn.CrossEntropyLoss()
    form_criterion = nn.CrossEntropyLoss()

    print('\n')
    metrics = {
        'celltype_losses': [],
        'form_losses': [],
        'celltype_accuracy': [],
        'form_accuracy': [],
        'epoch': []
    }
    for epoch in range(cfg.classifier_epochs):
        if cfg.verbose == 'all':
            print(f'\rTraining classifier [{epoch+1}/{cfg.classifier_epochs}]',
                  end='')
        celltype_clf.train()
        form_clf.train()
        metrics['epoch'].append(epoch + 1)
        celltype_av_loss = 0.
        form_av_loss = 0.
        counter = 0
        for exp_, form_, cell_type_ in annot_dataloader:
            exp_ = exp_.to(device)
            form_ = form_.argmax(-1).to(device)
            cell_type_ = cell_type_.argmax(-1).to(device)

            predicted_celltype_ = celltype_clf(exp_)
            celltype_loss_on_batch = celltype_criterion(
                predicted_celltype_, cell_type_)
            celltype_av_loss += celltype_loss_on_batch.item()

            celltype_clf_opt.zero_grad()
            celltype_loss_on_batch.backward(retain_graph=True)
            celltype_clf_opt.step()

            predicted_form_ = form_clf(exp_)
            form_loss_on_batch = form_criterion(predicted_form_, form_)
            form_av_loss += form_loss_on_batch.item()

            form_clf_opt.zero_grad()
            form_loss_on_batch.backward(retain_graph=True)
            form_clf_opt.step()

            counter += 1

        metrics['celltype_losses'].append(celltype_av_loss / counter)
        metrics['form_losses'].append(form_av_loss / counter)
    if track:
        celltype_clf.eval()
        form_clf.eval()
        counter = 0
        celltype_accuracy = 0.
        form_accuracy = 0.
        for exp_, form_, cell_type_ in test_data:
            exp_ = exp_.to(device)
            form_ = form_.argmax(-1)
            cell_type_ = cell_type_.argmax(-1)

            predicted_celltype_ = celltype_clf(
                exp_).cpu().detach().numpy().argmax(-1)
            cell_type_ = cell_type_.cpu().detach().numpy()
            celltype_accuracy += (predicted_celltype_ == cell_type_).mean()

            predicted_form_ = form_clf(exp_).cpu().detach().numpy().argmax(-1)
            form_ = form_.cpu().detach().numpy()
            form_accuracy += (predicted_form_ == form_).mean()

            counter += 1
        metrics['celltype_accuracy'].append(celltype_accuracy / counter)
        metrics['form_accuracy'].append(form_accuracy / counter)

    if track:
        return celltype_clf, form_clf, metrics
    return celltype_clf, form_clf
Exemple #10
0
def test(cfg,
         vae_model,
         discrim,
         annot_dataloader,
         test_expression,
         class_ohe_test,
         celltype_test,
         pretrained_classifiers=None,
         dataset_name: str = '') -> Dict[str, Union[float, str, None]]:
    '''Calculate metrics and return dict
        :Param cfg: Config dataclass
        :Param vae_model: autoecoder with declarated signature
        :Param discrim: deprecated, unused
        :Param annot_dataloader: dataloader(expression, batch_indices, labels)
            for classifier training
        :Param test_expression:
    '''
    #STYLE TRANSFER
    ge_transfer_raw = np.repeat(test_expression,
                                class_ohe_test.shape[1],
                                axis=0)
    ge_transfer_raw = Tensor(ge_transfer_raw)
    init_classes_transfer = np.repeat(class_ohe_test,
                                      class_ohe_test.shape[1],
                                      axis=0)
    init_classes_transfer = Tensor(init_classes_transfer)
    init_celltypes_transfer = np.repeat(celltype_test,
                                        class_ohe_test.shape[1],
                                        axis=0)
    init_celltypes_transfer = Tensor(init_celltypes_transfer)

    device = torch.device('cpu')
    if cfg.use_cuda and cuda_is_available():
        device = torch.device('cuda')
        ge_transfer_raw = ge_transfer_raw.cuda()
        init_classes_transfer = init_classes_transfer.cuda()
        init_celltypes_transfer = init_celltypes_transfer.cuda()

    target_classes_transfer = np.zeros(
        (class_ohe_test.shape[0] * class_ohe_test.shape[1],
         class_ohe_test.shape[1]))
    target_classes_transfer[np.arange(target_classes_transfer.shape[0]),
                            np.arange(target_classes_transfer.shape[0]) %
                            target_classes_transfer.shape[1]] = 1

    target_classes_transfer = Tensor(target_classes_transfer)
    if cfg.use_cuda and cuda_is_available():
        target_classes_transfer = target_classes_transfer.cuda()

    transfer_expression_tensor = vae_model(ge_transfer_raw,
                                           target_classes_transfer)[0]
    if isinstance(transfer_expression_tensor, tuple):
        transfer_expression_tensor = transfer_expression_tensor[0]
    transfer_expression_np = transfer_expression_tensor.cpu().detach().numpy()

    test_expression_tensor = Tensor(test_expression)
    class_ohe_test_tensor = Tensor(class_ohe_test)
    if cfg.use_cuda and cuda_is_available():
        target_classes_transfer = target_classes_transfer.cuda()
        test_expression_tensor = test_expression_tensor.cuda()
        class_ohe_test_tensor = class_ohe_test_tensor.cuda()
    reconstruction = vae_model(test_expression_tensor,
                               class_ohe_test_tensor)[0]
    if isinstance(reconstruction, tuple):
        reconstruction = reconstruction[0]
    reconstruction = reconstruction.cpu().detach().numpy()

    mse = (reconstruction - test_expression)**2

    residual_transfer = ge_transfer_raw.cpu().numpy() - transfer_expression_np
    res_norm = (residual_transfer**2).mean(1).reshape(
        (-1, cfg.count_classes))  #l2 norm,
    res_equal = class_ohe_test.argmax(1)
    res_equal = res_equal if isinstance(
        res_equal, np.ndarray) else res_equal.cpu().detach().numpy()

    if cfg.verbose in ('all', 'test'):
        print('Mean square error reconstructed expression:', np.mean(mse))
        print('Mean values of test expression:', test_expression.mean())
        print('Mean values of reconstructed expression:',
              reconstruction.mean())

        print('part of test expression <0.5 mean:',
              (test_expression < 0.5).mean())
        print('part reconstructed exrression <0.5 mean:',
              (reconstruction < 0.5).mean())
        print('Calibration accuracy', (res_norm.argmin(1) == res_equal).mean())
        print('\n')

    test_latents = vae_model.latents(test_expression_tensor,
                                     class_ohe_test_tensor)
    transfered_latents = vae_model.latents(ge_transfer_raw,
                                           target_classes_transfer)

    #train
    celltype_clf_expr = None
    celltype_clf_latents = None
    form_clf_expr = None
    form_clf_latents = None
    if not isinstance(test_latents, torch.Tensor):
        test_latents = torch.Tensor(test_latents)
    if not isinstance(transfered_latents, torch.Tensor):
        transfered_latents = torch.Tensor(transfered_latents)
    if cfg.use_cuda and cuda_is_available():
        test_latents = test_latents.cuda()
        transfered_latents = transfered_latents.cuda()

    if pretrained_classifiers is None:
        cfg.classifier_input_dim = cfg.input_dim
        celltype_clf_expr, form_clf_expr = train_classifiers(
            cfg, annot_dataloader, celltype_test.shape[1], cfg.count_classes)
        cfg.classifier_input_dim = vae_model.latent_dim
        latents_dataset = torch.utils.data.TensorDataset(
            test_latents, class_ohe_test_tensor,
            torch.randn(test_latents.shape[0], cfg.count_labels))
        latents_dataloader = torch.utils.data.DataLoader(
            latents_dataset,
            batch_size=cfg.batch_size,
            shuffle=True,
            drop_last=True)
        celltype_clf_latents, form_clf_latents = train_classifiers(
            cfg, latents_dataloader, celltype_test.shape[1], cfg.count_classes)
    else:
        celltype_clf = pretrained_classifiers[0]
        form_clf = pretrained_classifiers[1]

    if isinstance(test_latents, Tensor):
        test_latents_np = test_latents.cpu().detach().numpy()
    if isinstance(transfered_latents, Tensor):
        transfered_latents_np = transfered_latents.cpu().detach().numpy()

    # entropy batch mixing
    ebm_score = {}
    ebm_score['test'] = [0. for i in range(1)]
    ebm_score['transfered'] = [0. for i in range(1)]
    for n in range(1):
        for lat, bind, key in zip(
            (test_latents_np, transfered_latents_np),
            (class_ohe_test_tensor, target_classes_transfer),
                ebm_score.keys()):

            batch_ind = bind.argmax(1).cpu().numpy()
            ind = list(combinations(np.unique(batch_ind), 2))
            for i in ind:
                a = (batch_ind == i[0])
                b = (batch_ind == i[1])
                condition = np.logical_or(a, b)

                #Important breakpoint
                ebm_score['test'][n] += entropy_batch_mixing(
                    lat[condition], batch_ind[condition])
                ebm_score['transfered'][n] += entropy_batch_mixing(
                    lat[condition], batch_ind[condition])

            if len(ind) > 0:
                ebm_score[key][n] /= len(ind)

    # KNN Purity
    test_purity = []
    transfered_purity = []
    test_purity.append(get_knn_purity(test_latents_np,
                                      celltype_test.argmax(1)))
    transfered_purity.append(
        get_knn_purity(transfered_latents_np,
                       target_classes_transfer.argmax(dim=1).cpu().numpy()))

    celltype_train_labelenc = None
    if not annot_dataloader is None:
        celltype_train_labelenc = annot_dataloader.dataset.tensors[2].argmax(
            1).cpu().numpy()
    celltype_test_raw_labelenc = celltype_test.argmax(1)
    celltype_test_labelenc = init_celltypes_transfer.argmax(1).cpu().numpy()
    #form_test_labelenc = init_classes_transfer.argmax(1).cpu().numpy()
    form_test_labelenc = target_classes_transfer.argmax(1).cpu().numpy()

    predicted_celltype_test_raw = celltype_clf_expr(test_expression_tensor)
    predicted_celltype_test = celltype_clf_expr(transfer_expression_tensor)
    predicted_celltype_train = None
    if not annot_dataloader is None:
        annot_expression = annot_dataloader.dataset.tensors[0].to(device)
        predicted_celltype_train = celltype_clf_expr(annot_expression)
    predicted_form_test = form_clf_expr(transfer_expression_tensor)
    #predict celltypes and batch indices by model's latents
    predicted_celltype_test_latents = celltype_clf_latents(test_latents)
    predicted_celltype_transfer_latents = celltype_clf_latents(
        transfered_latents)
    predicted_form_latents = form_clf_latents(transfered_latents)

    # classifier on expression
    predicted_celltype_test_labelenc = predicted_celltype_test.argmax(
        1).cpu().detach().numpy()
    predicted_celltype_test_raw_labelenc = predicted_celltype_test_raw.argmax(
        1).cpu().detach().numpy()
    predicted_celltype_train_labelenc = None
    if not predicted_celltype_train is None:
        predicted_celltype_train_labelenc = predicted_celltype_train.argmax(
            1).cpu().detach().numpy()

    sameclass_mask = (init_classes_transfer.argmax(
        1) == target_classes_transfer.argmax(1)).cpu().numpy().astype('bool')

    celltype_train_accuracy = None
    if not predicted_celltype_train_labelenc is None:
        celltype_train_accuracy = (predicted_celltype_train_labelenc ==
                                   celltype_train_labelenc).mean()
    celltype_test_raw_accuracy = (predicted_celltype_test_raw_labelenc ==
                                  np.array(celltype_test_raw_labelenc)).mean()
    celltype_test_accuracy = (
        predicted_celltype_test_labelenc == celltype_test_labelenc).mean()

    celltype_notransfer_accuracy = (
        predicted_celltype_test_labelenc == celltype_test_labelenc).mean(0)
    recon_confusion = confusion_matrix(
        celltype_test_labelenc[sameclass_mask],
        predicted_celltype_test_labelenc[sameclass_mask])
    if cfg.verbose in ('all', 'test'):
        print('Confusion matrix | CELLTYPE RECONSTRUCTION')
        print(recon_confusion)

    from sklearn.metrics import classification_report
    rep = classification_report(
        celltype_test_labelenc[sameclass_mask],
        predicted_celltype_test_labelenc[sameclass_mask],
        output_dict=True)
    df = pd.DataFrame(rep).transpose()
    if cfg.verbose in ('all', 'test'):
        print(df)
        print('Celltype reconstruction report')

    if (~sameclass_mask).any():
        celltype_transfer_accuracy = (
            predicted_celltype_test_labelenc[~sameclass_mask] ==
            celltype_test_labelenc[~sameclass_mask]).mean(0)
        transfer_confusion = confusion_matrix(
            celltype_test_labelenc[~sameclass_mask],
            predicted_celltype_test_labelenc[~sameclass_mask])
        if cfg.verbose in ('all', 'test'):
            print('Confusion matrix | CELLTYPE TRANSFER')
            print(transfer_confusion)

        rep = classification_report(
            celltype_test_labelenc[~sameclass_mask],
            predicted_celltype_test_labelenc[~sameclass_mask],
            output_dict=True)
        df = pd.DataFrame(rep).transpose()
        if cfg.verbose in ('all', 'test'):
            print('Celltype transfer report')
            print(df)

        pred_f_t = predicted_form_test if isinstance(
            predicted_form_test, np.ndarray) else predicted_form_test.argmax(
                1).cpu().detach().numpy()
        rep = classification_report(form_test_labelenc[~sameclass_mask],
                                    pred_f_t[~sameclass_mask],
                                    output_dict=True)
        df = pd.DataFrame(rep).transpose()
        if cfg.verbose in ('all', 'test'):
            print('Form transfer report')
            print(df)
    else:
        celltype_transfer_accuracy = "Not enoug classes for transfering"
    predicted_form_test = predicted_form_test.argmax(1).cpu().detach().numpy()
    form_test_accuracy = (predicted_form_test == form_test_labelenc).mean(0)
    recon_confusion = confusion_matrix(form_test_labelenc[sameclass_mask],
                                       predicted_form_test[sameclass_mask],
                                       normalize='all')
    if cfg.verbose in ('all', 'test'):
        print('Confusion matrix | FORM RECONSTRUCTED')
        print(recon_confusion)

    transfer_confusion = confusion_matrix(form_test_labelenc[~sameclass_mask],
                                          pred_f_t[~sameclass_mask],
                                          normalize='all')
    if cfg.verbose in ('all', 'test'):
        print('Confusion matrix | FORM TRANSFER')
        print(transfer_confusion)

    rep = classification_report(form_test_labelenc[~sameclass_mask],
                                pred_f_t[~sameclass_mask],
                                output_dict=True)
    df = pd.DataFrame(rep).transpose()
    if cfg.verbose in ('all', 'test'):
        print('Form reconstructed report')
        print(df)

    # classifier on latents
    predicted_celltype_latents_labelenc = predicted_celltype_test_latents.argmax(
        1).cpu().detach().numpy()
    predicted_celltype_transfer_latents_labelenc = predicted_celltype_transfer_latents.argmax(
        1).cpu().detach().numpy()

    #celltype_test_latents_accuracy = (predicted_celltype_latents_labelenc == celltype_test_raw_labelenc).mean()
    celltype_notransfer_latents_accuracy = (
        np.array(predicted_celltype_latents_labelenc) == np.array(
            celltype_test_raw_labelenc)).mean(0)
    if (~sameclass_mask).any():
        celltype_transfer_latents_accuracy = (
            predicted_celltype_transfer_latents_labelenc[~sameclass_mask] ==
            celltype_test_labelenc[~sameclass_mask]).mean()
    else:
        celltype_transfer_latents_accuracy = "Not enoug classes for transfering"

    #reconstructed means transfered to the same class

    if cfg.verbose in ('all', 'test'):
        print('On expression:')
        print('\tCell type prediction accuracy [vae transfered data]',
              celltype_test_accuracy, '< with the same class transfer')
        print('\tCell type prediction accuracy [reconstructed data]',
              celltype_notransfer_accuracy)
        print(
            '\tCell type prediction accuracy [transfered to another classes]',
            celltype_transfer_accuracy)
        print('\tClass prediction accuracy:', form_test_accuracy)
        print('On latents:')
        #print('\tCell type prediction accuracy [vae transfered data]', celltype_test_latents_accuracy,
        #        '< with the same class transfer')
        print('\tCell type prediction accuracy [reconstructed data]',
              celltype_notransfer_latents_accuracy)
        print(
            '\tCell type prediction accuracy [transfered to another classes]',
            celltype_transfer_latents_accuracy)

        #print('K neighbors purity [vae test data]:', test_purity)
        #print('K neighbors purity [transfer]:', transfered_purity)
        #print('Entropy batch mixing [vae test data]:', ebm_score['test'])
        #print('Entropy batch mixing [transfered data]:', ebm_score['transfered'])

    return {
        'MSE':
        float(np.mean(mse)),
        'Calibration accuracy':
        float((res_norm.argmin(1) == res_equal).mean()),
        'Cell type prediction accuracy [train]:':
        celltype_train_accuracy,
        'Cell type prediction accuracy expression [test hold out data]':
        celltype_test_raw_accuracy,
        #'Cell type prediction accuracy latents [test hold out data]': celltype_test_latents_accuracy,
        #'Cell type prediction accuracy expression [vae transfered data]': celltype_test_accuracy,
        #'Cell type prediction accuracy latents [vae transfered data]': celltype_test_accuracy,
        'Cell type prediction accuracy expression [reconstructed data]':
        celltype_notransfer_accuracy,
        'Cell type prediction accuracy latents [reconstructed data]':
        celltype_notransfer_latents_accuracy,
        'Cell type prediction accuracy expression [transfered to another classes]':
        celltype_transfer_accuracy,
        'Cell type prediction accuracy latents [transfered to another classes]':
        celltype_transfer_latents_accuracy,
        'Class prediction accuracy':
        form_test_accuracy,
        #'K neighbors purity [vae test data]': test_purity,
        #'K neighbors purity [transfer]': transfered_purity,
        #'Entropy batch mixing [test data]': ebm_score['test'],
        #'Entropy batch mixing [transfer data]': ebm_score['transfered']
    }
Exemple #11
0
from torch.cuda import is_available as cuda_is_available
from pycm import ConfusionMatrix
import numpy as np
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm
from torch.optim.lr_scheduler import ExponentialLR
from transformers import get_linear_schedule_with_warmup
import argparse
from transformers.models.bert import BertTokenizer, BertForSequenceClassification
from nlp_utils.tokenizer import WhiteSpaceTokenizer
from nlp_utils.dataset import prepare_qqsim_train_test_org_data_loader, prepare_qqsim_predict_org_data_loader, prepare_qqsim_train_test_tensor_data_loader
from torch.nn.utils import clip_grad_norm_
from pytorch_pretrained_bert.optimization import BertAdam
from torch.nn.functional import softmax

device = device("cuda:0" if cuda_is_available() else "cpu")
#device = device("cpu")


class Config:
    last_model_path = ''
    pretrained_model_folder = '../user_data/pretrained/self_nezha_bert_double_sentence'
    train_batch_size = 128
    test_batch_size = 128
    predict_batch_size = 128
    epoch = 5
    test_model_num = 10
    log_dir = './output'
    model_folder = './output'
    num_labels = 2
Exemple #12
0
def train(
    *,
    instance_file: str,
    tensors_dir: str,
    train_dir: str,
    configs_dir: str,
    model_encoder_iterations: int,
    model_encoder_output_dim: int,
    model_encoder_message_dim: int,
    model_decoder_type: str,
    model_learning_rate: float,
    model_batch_size: int,
    trainer_epochs: int,
    trainer_eval_every: int,
    trainer_limit_epochs_at: Optional[int],
    trainer_train_eval_split: float,
    trainer_selection_metric: str,
    trainer_kept_checkpoints: int,
    trainer_cuda: Optional[int],
    log_level: str,
) -> None:
    """Run the training."""
    Config.from_arguments(locals(),
                          ["instance_file", "tensors_dir", "train_dir"],
                          "configs_dir").save(
                              Path(configs_dir) / "train.json")
    logger = setup_logging(__name__, log_level)

    tensors_dir_path = Path(tensors_dir).expanduser().resolve()
    train_dir_path = Path(train_dir).expanduser().resolve()
    train_dir_path.mkdir(parents=True, exist_ok=True)

    with bz2_open(instance_file, "rb") as fh:
        instance = pickle_load(fh)

    dataset = CodRepDataset(input_dir=tensors_dir_path)
    logger.info("Dataset of size %d", len(dataset))

    train_length = round(0.9 * len(dataset))
    eval_length = round(0.05 * len(dataset))
    test_length = len(dataset) - train_length - eval_length

    train_dataset, eval_dataset, test_dataset = random_split(
        dataset, [train_length, eval_length, test_length])

    if trainer_cuda is not None:
        if not cuda_is_available():
            raise RuntimeError("CUDA is not available on this system.")
        device = torch_device("cuda:%d" % trainer_cuda)
    else:
        device = torch_device("cpu")
    model = build_model(
        instance=instance,
        model_encoder_iterations=model_encoder_iterations,
        model_encoder_output_dim=model_encoder_output_dim,
        model_encoder_message_dim=model_encoder_message_dim,
        model_decoder_type=model_decoder_type,
        model_learning_rate=model_learning_rate,
        model_batch_size=model_batch_size,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        test_dataset=test_dataset,
    )
    # The model needs a forward to be completely initialized.
    model.training_step(instance.collate([dataset[0]]), 0)
    logger.info("Configured model %s", model)

    checkpoint_callback = ModelCheckpoint(
        filepath=train_dir,
        save_best_only=True,
        verbose=True,
        monitor="eval_mrr",
        mode="max",
        prefix="",
    )

    trainer = Trainer(default_save_path=train_dir,
                      checkpoint_callback=checkpoint_callback)
    trainer.fit(model)