Ejemplo n.º 1
0
    def post_init(self):
        """Load a model from clip specified in `model_name`."""
        import clip

        assert self.model_name in clip.available_models(), \
            f'model_name={self.model_name} not in clip.available_models'

        model, _ = clip.load(self.model_name, self.device)
        self.model = model
Ejemplo n.º 2
0
 def init_weights(self):
     """Initiate the parameters either from existing checkpoint or from
     scratch."""
     if isinstance(self.pretrained, str):
         assert (
                 self.pretrained in clip.available_models()
         ), "not allowed pretrained model"
         logger = get_root_logger()
         logger.info(f"load model from: {self.pretrained}")
         self.model = clip.load(self.pretrained)[0].visual
     else:
         raise TypeError("pretrained must be a str")
Ejemplo n.º 3
0
# with open('G:\PythonWorkplace\HCP-MIC-at-ImageCLEF-VQA-Med-2020-master\BioBert\data/train2set.json') as c:
#     q3 = json.load(c)
#
#
# for i in range(len(p2)):
#     sample = p2[i]
#     print("debug")

# train_close_answer_dict = {}
# close_ques_list = []
# close_ans_list = []
# with open('G:\PythonWorkplace\VQA-master\data/trainset_rad.json') as t:
#     trainset = json.load(t)
#     for train_entity in trainset:
#         question = train_entity['question']
#         answer = train_entity['answer']
#         answer_type = train_entity['answer_type']
#         if answer_type == 'CLOSED' and answer not in close_ans_list:
#             close_ans_list.append(answer)
#         if answer_type == 'CLOSED' and question not in close_ques_list:
#             close_ques_list.append(question)
#         if answer_type == 'CLOSED':
#             train_close_answer_dict[question] = answer
# with open("G:\PythonWorkplace\ImageCLEF2021\data2020/Rad.json", "w") as f:
#     f.write(json.dumps(train_close_answer_dict, ensure_ascii=False, indent=4, separators=(',', ':')))

import torch
import clip

print(clip.available_models())
Ejemplo n.º 4
0
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import sklearn
from torch import nn
from torch.nn.utils.rnn import *
import time

cuda = torch.cuda.is_available()
print("cuda", cuda)
num_workers = 8 if cuda else 0
print(num_workers)
print("Torch version:", torch.__version__)

# # Load CLIP Model

print("Avaliable Models: ", clip.available_models())
model, preprocess = clip.load("RN50", jit=False)  # clip.load("ViT-B/32") #

input_resolution = model.input_resolution  #.item()
context_length = model.context_length  #.item()
vocab_size = model.vocab_size  #.item()

print("Model parameters:",
      f"{np.sum([int(np.prod(p.shape)) for p in model.parameters()]):,}")
print("Input resolution:", input_resolution)
print("Context length:", context_length)
print("Vocab size:", vocab_size)

# # Selected classes and mapping for kinetics dataset

labels = [
Ejemplo n.º 5
0
     numbers for width and height.
     
     It supports expression variables so you can actually change the resolution
     during training, e.g:
     ```yaml
     resolution:
     - 224 if t < .2 else 448
     ```
     would change the resolution from 224x224 to 448x448 at 20% of training time.
     """,
     expression_groups=EXPR_GROUPS.resolution,
 ),
 "model": Parameter(
     str, default="ViT-B/32",
     doc=("The pre-trained CLIP model to use. Options are " +
          ", ".join(f"`{m}`" for m in clip.available_models()) +
          "\n\nThe models are downloaded from `openaipublic.azureedge.net` and stored "
          "in the user's `~/.cache/` directory"
          )
 ),
 "device": Parameter(
     str, default="auto",
     doc="The device to run the training on. Can be `cpu`, `cuda`, `cuda:1` etc.",
 ),
 "learnrate": Parameter(
     float, default=1., expression_groups=EXPR_GROUPS.learnrate,
     doc="""
     The learning rate of the optimizer. 
     
     Different optimizers have different learning rates that work well. 
     However, this value is scaled *by hand* so that `1.0` translates to 
Ejemplo n.º 6
0
import numpy as np
import pytest
import torch
from PIL import Image

import clip


@pytest.mark.parametrize('model_name', clip.available_models())
def test_consistency(model_name):
    device = "cpu"
    jit_model, transform = clip.load(model_name, device=device)
    py_model, _ = clip.load(model_name, device=device, jit=False)

    image = transform(Image.open("CLIP.png")).unsqueeze(0).to(device)
    text = clip.tokenize(["a diagram", "a dog", "a cat"]).to(device)

    with torch.no_grad():
        logits_per_image, _ = jit_model(image, text)
        jit_probs = logits_per_image.softmax(dim=-1).cpu().numpy()

        logits_per_image, _ = py_model(image, text)
        py_probs = logits_per_image.softmax(dim=-1).cpu().numpy()

    assert np.allclose(jit_probs, py_probs, atol=0.01, rtol=0.1)
Ejemplo n.º 7
0
print("total files : ", len(folds))

results = '../results_top1/finetuneVAE_finetuneTrans/img/'

results_name = os.listdir(results)

temp = []

for fold in folds:
    if fold in results_name:
        temp.append(fold)

folds = temp

names = clip.available_models()

print(names)

device = 'cuda'

model, preprocess = clip.load("ViT-B/32", device=device)

for fold in tqdm(folds):
    files = os.listdir(os.path.join(root, fold))

    imgs = []
    txt = []
    origin = []

    for file in files: