def fetch_mnist(): import gzip parse = lambda dat: np.frombuffer(gzip.decompress(dat), dtype=np.uint8).copy() X_train = parse(fetch("http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz"))[0x10:].reshape((-1, 28*28)).astype(np.float32) Y_train = parse(fetch("http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz"))[8:] X_test = parse(fetch("http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz"))[0x10:].reshape((-1, 28*28)).astype(np.float32) Y_test = parse(fetch("http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz"))[8:] return X_train, Y_train, X_test, Y_test
def load_weights_from_torch(self): # load b0 # https://github.com/lukemelas/EfficientNet-PyTorch/blob/master/efficientnet_pytorch/utils.py#L551 if self.number == 0: b0 = fetch( "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth" ) elif self.number == 2: b0 = fetch( "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b2-8bb594d6.pth" ) elif self.number == 4: b0 = fetch( "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b4-6ed6700e.pth" ) elif self.number == 7: b0 = fetch( "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b7-dcc49843.pth" ) else: raise Exception("no pretrained weights") if USE_TORCH: import io import torch b0 = torch.load(io.BytesIO(b0)) else: b0 = fake_torch_load(b0) for k, v in b0.items(): if '_blocks.' in k: k = "%s[%s].%s" % tuple(k.split(".", 2)) mk = "self." + k #print(k, v.shape) try: mv = eval(mk) except AttributeError: try: mv = eval(mk.replace(".weight", "")) except AttributeError: mv = eval(mk.replace(".bias", "_bias")) vnp = v.numpy().astype(np.float32) if USE_TORCH else v.astype( np.float32) vnp = vnp if k != '_fc.weight' else vnp.T vnp = vnp if vnp.shape != () else np.array([vnp]) if mv.shape == vnp.shape: mv.assign(Tensor(vnp)) else: print("MISMATCH SHAPE IN %s, %r %r" % (k, mv.shape, vnp.shape))
def load_from_pretrained(self): model_urls = { 0: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth", 1: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b1-f1951068.pth", 2: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b2-8bb594d6.pth", 3: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b3-5fb5a3c3.pth", 4: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b4-6ed6700e.pth", 5: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b5-b6417697.pth", 6: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b6-c76e70fd.pth", 7: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b7-dcc49843.pth" } b0 = fake_torch_load(fetch(model_urls[self.number])) for k,v in b0.items(): for cat in ['_conv_head', '_conv_stem', '_depthwise_conv', '_expand_conv', '_fc', '_project_conv', '_se_reduce', '_se_expand']: if cat in k: k = k.replace('.bias', '_bias') k = k.replace('.weight', '') #print(k, v.shape) mv = get_child(self, k) vnp = v.astype(np.float32) vnp = vnp if k != '_fc' else vnp.T vnp = vnp if vnp.shape != () else np.array([vnp]) if mv.shape == vnp.shape: mv.assign(vnp) else: print("MISMATCH SHAPE IN %s, %r %r" % (k, mv.shape, vnp.shape))
def add_boxes(img, prediction): if isinstance(prediction, int): # no predictions return img coco_labels = fetch('https://raw.githubusercontent.com/pjreddie/darknet/master/data/coco.names') coco_labels = coco_labels.decode('utf-8').split('\n') height, width = img.shape[0:2] scale_factor = 608 / width prediction[:,[1,3]] -= (608 - scale_factor * width) / 2 prediction[:,[2,4]] -= (608 - scale_factor * height) / 2 for i in range(prediction.shape[0]): pred = prediction[i] corner1 = tuple(pred[1:3].astype(int)) corner2 = tuple(pred[3:5].astype(int)) w = corner2[0] - corner1[0] h = corner2[1] - corner1[1] corner2 = (corner2[0] + w, corner2[1] + h) label = coco_labels[int(pred[-1])] img = cv2.rectangle(img, corner1, corner2, (255, 0, 0), 2) t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0] c2 = corner1[0] + t_size[0] + 3, corner1[1] + t_size[1] + 4 img = cv2.rectangle(img, corner1, c2, (255, 0, 0), -1) img = cv2.putText(img, label, (corner1[0], corner1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1) return img
def show_labels(prediction, confidence = 0.5, num_classes = 80): coco_labels = fetch('https://raw.githubusercontent.com/pjreddie/darknet/master/data/coco.names') coco_labels = coco_labels.decode('utf-8').split('\n') prediction = prediction.detach().cpu().data conf_mask = (prediction[:,:,4] > confidence) conf_mask = np.expand_dims(conf_mask, 2) prediction = prediction * conf_mask def numpy_max(input, dim): # Input -> tensor (10x8) return np.amax(input, axis=dim), np.argmax(input, axis=dim) # Iterate over batches for i in range(prediction.shape[0]): img_pred = prediction[i] max_conf, max_conf_score = numpy_max(img_pred[:,5:5 + num_classes], 1) max_conf_score = np.expand_dims(max_conf_score, axis=1) max_conf = np.expand_dims(max_conf, axis=1) seq = (img_pred[:,:5], max_conf, max_conf_score) image_pred = np.concatenate(seq, axis=1) non_zero_ind = np.nonzero(image_pred[:,4])[0] # TODO: Check if this is right image_pred_ = np.reshape(image_pred[np.squeeze(non_zero_ind),:], (-1, 7)) try: image_pred_ = np.reshape(image_pred[np.squeeze(non_zero_ind),:], (-1, 7)) except: print("No detections found!") pass classes, indexes = np.unique(image_pred_[:, -1], return_index=True) for index, coco_class in enumerate(classes): probability = image_pred_[indexes[index]][4] * 100 print("Detected", coco_labels[int(coco_class)], "{:.2f}%".format(probability))
def load_cifar(): tt = tarfile.open(fileobj=io.BytesIO( fetch('https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz')), mode='r:gz') db = pickle.load(tt.extractfile('cifar-10-batches-py/data_batch_1'), encoding="bytes") X = db[b'data'].reshape((-1, 3, 32, 32)) Y = np.array(db[b'labels']) return X, Y
#!/usr/bin/env python3 import io import pickle from extra.utils import fetch, my_unpickle if __name__ == "__main__": dat = fetch( 'https://github.com/ultralytics/yolov5/releases/download/v4.0/yolov5s.pt' ) import zipfile fp = zipfile.ZipFile(io.BytesIO(dat)) #fp.printdir() data = fp.read('archive/data.pkl') # yolo specific ret, out = my_unpickle(io.BytesIO(data)) d = ret['model'].yaml for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): tm = ret['model']._modules['model'][i] print(i, f, n, m, args, tm._modules.keys()) # Focus, Conv, BottleneckCSP, SPP, Concat, Detect #for k,v in tm._modules.items(): # print(" ", k, v) if m in "Focus": conv = tm._modules['conv'] print(" ", conv._modules) if m in "Conv": conv, bn = tm._modules['conv'], tm._modules['bn'] print(" ", conv) #print(bn)
def load_from_pretrained(m): import io from extra.utils import fetch # https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py if m.embed_dim == 192: url = "https://storage.googleapis.com/vit_models/augreg/Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npz" elif m.embed_dim == 768: url = "https://storage.googleapis.com/vit_models/augreg/B_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_224.npz" else: raise Exception("no pretrained weights for configuration") dat = np.load(io.BytesIO(fetch(url))) #for x in dat.keys(): # print(x, dat[x].shape, dat[x].dtype) m.embedding[0].assign( np.transpose(dat['embedding/kernel'], (3, 2, 0, 1))) m.embedding[1].assign(dat['embedding/bias']) m.cls.assign(dat['cls']) m.head[0].assign(dat['head/kernel']) m.head[1].assign(dat['head/bias']) m.pos_embedding.assign(dat['Transformer/posembed_input/pos_embedding']) m.encoder_norm[0].assign(dat['Transformer/encoder_norm/scale']) m.encoder_norm[1].assign(dat['Transformer/encoder_norm/bias']) for i in range(12): m.tbs[i].query[0].assign(dat[ f'Transformer/encoderblock_{i}/MultiHeadDotProductAttention_1/query/kernel'] .reshape(m.embed_dim, m.embed_dim)) m.tbs[i].query[1].assign(dat[ f'Transformer/encoderblock_{i}/MultiHeadDotProductAttention_1/query/bias'] .reshape(m.embed_dim)) m.tbs[i].key[0].assign(dat[ f'Transformer/encoderblock_{i}/MultiHeadDotProductAttention_1/key/kernel'] .reshape(m.embed_dim, m.embed_dim)) m.tbs[i].key[1].assign(dat[ f'Transformer/encoderblock_{i}/MultiHeadDotProductAttention_1/key/bias'] .reshape(m.embed_dim)) m.tbs[i].value[0].assign(dat[ f'Transformer/encoderblock_{i}/MultiHeadDotProductAttention_1/value/kernel'] .reshape(m.embed_dim, m.embed_dim)) m.tbs[i].value[1].assign(dat[ f'Transformer/encoderblock_{i}/MultiHeadDotProductAttention_1/value/bias'] .reshape(m.embed_dim)) m.tbs[i].out[0].assign(dat[ f'Transformer/encoderblock_{i}/MultiHeadDotProductAttention_1/out/kernel'] .reshape(m.embed_dim, m.embed_dim)) m.tbs[i].out[1].assign(dat[ f'Transformer/encoderblock_{i}/MultiHeadDotProductAttention_1/out/bias'] .reshape(m.embed_dim)) m.tbs[i].ff1[0].assign( dat[f'Transformer/encoderblock_{i}/MlpBlock_3/Dense_0/kernel']) m.tbs[i].ff1[1].assign( dat[f'Transformer/encoderblock_{i}/MlpBlock_3/Dense_0/bias']) m.tbs[i].ff2[0].assign( dat[f'Transformer/encoderblock_{i}/MlpBlock_3/Dense_1/kernel']) m.tbs[i].ff2[1].assign( dat[f'Transformer/encoderblock_{i}/MlpBlock_3/Dense_1/bias']) m.tbs[i].ln1[0].assign( dat[f'Transformer/encoderblock_{i}/LayerNorm_0/scale']) m.tbs[i].ln1[1].assign( dat[f'Transformer/encoderblock_{i}/LayerNorm_0/bias']) m.tbs[i].ln2[0].assign( dat[f'Transformer/encoderblock_{i}/LayerNorm_2/scale']) m.tbs[i].ln2[1].assign( dat[f'Transformer/encoderblock_{i}/LayerNorm_2/bias'])
plt.show() """ return out, retimg if __name__ == "__main__": # instantiate my net model = EfficientNet(int(os.getenv("NUM", "0"))) model.load_weights_from_torch() if GPU: [x.gpu_() for x in get_parameters(model)] # category labels import ast lbls = fetch( "https://gist.githubusercontent.com/yrevar/942d3a0ac09ec9e5eb3a/raw/238f720ff059c1f82f368259d1ca4ffa5dd8f9f5/imagenet1000_clsidx_to_labels.txt" ) lbls = ast.literal_eval(lbls.decode('utf-8')) # load image and preprocess from PIL import Image url = sys.argv[1] if url == 'webcam': import cv2 cap = cv2.VideoCapture(0) cap.set(cv2.CAP_PROP_BUFFERSIZE, 1) while 1: _ = cap.grab() # discard one frame to circumvent capture buffering ret, frame = cap.read() img = Image.fromarray(frame[:, :, [2, 1, 0]]) out, retimg = infer(model, img)
def load_weights(self, url): weights = fetch(url) # First 5 values (major, minor, subversion, Images seen) header = np.frombuffer(weights, dtype=np.int32, count = 5) self.seen = header[3] def numel(tensor): from functools import reduce return reduce(lambda x, y: x*y, tensor.shape) weights = np.frombuffer(weights, dtype=np.float32) weights = weights[5:] ptr = 0 for i in range(len(self.module_list)): module_type = self.blocks[i + 1]["type"] if module_type == "convolutional": model = self.module_list[i] try: # we have batchnorm, load conv weights without biases, and batchnorm values batch_normalize = int(self.blocks[i + 1]["batch_normalize"]) except: # no batchnorm, load conv weights + biases batch_normalize = 0 conv = model[0] if (batch_normalize): bn = model[1] # Get the number of weights of batchnorm num_bn_biases = numel(bn.bias) # Load weights bn_biases = Tensor(weights[ptr:ptr + num_bn_biases]) ptr += num_bn_biases bn_weights = Tensor(weights[ptr:ptr+num_bn_biases]) ptr += num_bn_biases bn_running_mean = Tensor(weights[ptr:ptr+num_bn_biases]) ptr += num_bn_biases bn_running_var = Tensor(weights[ptr:ptr+num_bn_biases]) ptr += num_bn_biases # Cast the loaded weights into dims of model weights bn_biases = bn_biases.reshape(shape=tuple(bn.bias.shape)) bn_weights = bn_weights.reshape(shape=tuple(bn.weight.shape)) bn_running_mean = bn_running_mean.reshape(shape=tuple(bn.running_mean.shape)) bn_running_var = bn_running_var.reshape(shape=tuple(bn.running_var.shape)) # Copy data bn.bias = bn_biases bn.weight = bn_weights bn.running_mean = bn_running_mean bn.running_var = bn_running_var else: # load biases of the conv layer num_biases = numel(conv.bias) # Load wieghts conv_biases = Tensor(weights[ptr: ptr+num_biases]) ptr += num_biases # Reshape conv_biases = conv_biases.reshape(shape=tuple(conv.bias.shape)) # Copy conv.bias = conv_biases # Load weighys for conv layers num_weights = numel(conv.weight) conv_weights = Tensor(weights[ptr:ptr+num_weights]) ptr += num_weights conv_weights = conv_weights.reshape(shape=tuple(conv.weight.shape)) conv.weight = conv_weights
num_classes = int(module["classes"]) # Transform x = predict_transform(x, inp_dim, anchors, num_classes) if not write: detections = x write = 1 else: detections = Tensor(np.concatenate((detections.cpu().data, x.cpu().data), 1)) # print(module_type, 'layer took %.2f s' % (time.time() - st)) outputs[i] = x return detections # Return detections if __name__ == "__main__": cfg = fetch('https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3.cfg') # normal model # cfg = fetch('https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3-tiny.cfg') # tiny model # Make deterministic np.random.seed(1337) # Start model model = Darknet(cfg) print("Loading weights file (237MB). This might take a while…") model.load_weights('https://pjreddie.com/media/files/yolov3.weights') # normal model # model.load_weights('https://pjreddie.com/media/files/yolov3-tiny.weights') # tiny model if GPU: params = get_parameters(model) [x.gpu_() for x in params]