def post_init(self): """Load a model from clip specified in `model_name`.""" import clip assert self.model_name in clip.available_models(), \ f'model_name={self.model_name} not in clip.available_models' model, _ = clip.load(self.model_name, self.device) self.model = model
def init_weights(self): """Initiate the parameters either from existing checkpoint or from scratch.""" if isinstance(self.pretrained, str): assert ( self.pretrained in clip.available_models() ), "not allowed pretrained model" logger = get_root_logger() logger.info(f"load model from: {self.pretrained}") self.model = clip.load(self.pretrained)[0].visual else: raise TypeError("pretrained must be a str")
# with open('G:\PythonWorkplace\HCP-MIC-at-ImageCLEF-VQA-Med-2020-master\BioBert\data/train2set.json') as c: # q3 = json.load(c) # # # for i in range(len(p2)): # sample = p2[i] # print("debug") # train_close_answer_dict = {} # close_ques_list = [] # close_ans_list = [] # with open('G:\PythonWorkplace\VQA-master\data/trainset_rad.json') as t: # trainset = json.load(t) # for train_entity in trainset: # question = train_entity['question'] # answer = train_entity['answer'] # answer_type = train_entity['answer_type'] # if answer_type == 'CLOSED' and answer not in close_ans_list: # close_ans_list.append(answer) # if answer_type == 'CLOSED' and question not in close_ques_list: # close_ques_list.append(question) # if answer_type == 'CLOSED': # train_close_answer_dict[question] = answer # with open("G:\PythonWorkplace\ImageCLEF2021\data2020/Rad.json", "w") as f: # f.write(json.dumps(train_close_answer_dict, ensure_ascii=False, indent=4, separators=(',', ':'))) import torch import clip print(clip.available_models())
import torch.nn.functional as F from torch.utils.data import Dataset, DataLoader import sklearn from torch import nn from torch.nn.utils.rnn import * import time cuda = torch.cuda.is_available() print("cuda", cuda) num_workers = 8 if cuda else 0 print(num_workers) print("Torch version:", torch.__version__) # # Load CLIP Model print("Avaliable Models: ", clip.available_models()) model, preprocess = clip.load("RN50", jit=False) # clip.load("ViT-B/32") # input_resolution = model.input_resolution #.item() context_length = model.context_length #.item() vocab_size = model.vocab_size #.item() print("Model parameters:", f"{np.sum([int(np.prod(p.shape)) for p in model.parameters()]):,}") print("Input resolution:", input_resolution) print("Context length:", context_length) print("Vocab size:", vocab_size) # # Selected classes and mapping for kinetics dataset labels = [
numbers for width and height. It supports expression variables so you can actually change the resolution during training, e.g: ```yaml resolution: - 224 if t < .2 else 448 ``` would change the resolution from 224x224 to 448x448 at 20% of training time. """, expression_groups=EXPR_GROUPS.resolution, ), "model": Parameter( str, default="ViT-B/32", doc=("The pre-trained CLIP model to use. Options are " + ", ".join(f"`{m}`" for m in clip.available_models()) + "\n\nThe models are downloaded from `openaipublic.azureedge.net` and stored " "in the user's `~/.cache/` directory" ) ), "device": Parameter( str, default="auto", doc="The device to run the training on. Can be `cpu`, `cuda`, `cuda:1` etc.", ), "learnrate": Parameter( float, default=1., expression_groups=EXPR_GROUPS.learnrate, doc=""" The learning rate of the optimizer. Different optimizers have different learning rates that work well. However, this value is scaled *by hand* so that `1.0` translates to
import numpy as np import pytest import torch from PIL import Image import clip @pytest.mark.parametrize('model_name', clip.available_models()) def test_consistency(model_name): device = "cpu" jit_model, transform = clip.load(model_name, device=device) py_model, _ = clip.load(model_name, device=device, jit=False) image = transform(Image.open("CLIP.png")).unsqueeze(0).to(device) text = clip.tokenize(["a diagram", "a dog", "a cat"]).to(device) with torch.no_grad(): logits_per_image, _ = jit_model(image, text) jit_probs = logits_per_image.softmax(dim=-1).cpu().numpy() logits_per_image, _ = py_model(image, text) py_probs = logits_per_image.softmax(dim=-1).cpu().numpy() assert np.allclose(jit_probs, py_probs, atol=0.01, rtol=0.1)
print("total files : ", len(folds)) results = '../results_top1/finetuneVAE_finetuneTrans/img/' results_name = os.listdir(results) temp = [] for fold in folds: if fold in results_name: temp.append(fold) folds = temp names = clip.available_models() print(names) device = 'cuda' model, preprocess = clip.load("ViT-B/32", device=device) for fold in tqdm(folds): files = os.listdir(os.path.join(root, fold)) imgs = [] txt = [] origin = [] for file in files: