def elastic_transform(image,
                      label,
                      alpha=1000,
                      sigma=30,
                      spline_order=1,
                      mode='nearest',
                      random_state=np.random):
    """Elastic deformation of image as described in [Simard2003]_.
    .. [Simard2003] Simard, Steinkraus and Platt, "Best Practices for
       Convolutional Neural Networks applied to Visual Document Analysis", in
       Proc. of the International Conference on Document Analysis and
       Recognition, 2003.
    """
    #assert image.ndim == 3
    image = np.array(image)
    label = np.array(label)
    shape = image.shape[:2]

    dx = gaussian_filter(
        (random_state.rand(*shape) * 2 - 1), sigma, mode="constant",
        cval=0) * alpha
    dy = gaussian_filter(
        (random_state.rand(*shape) * 2 - 1), sigma, mode="constant",
        cval=0) * alpha

    x, y = np.meshgrid(np.arange(shape[0]), np.arange(shape[1]), indexing='ij')
    indices = [np.reshape(x + dx, (-1, 1)), np.reshape(y + dy, (-1, 1))]

    result1 = map_coordinates(image, indices, order=spline_order,
                              mode=mode).reshape(shape)
    result2 = map_coordinates(label, indices, order=spline_order,
                              mode=mode).reshape(shape)
    return Image.fromarray(result1), Image.fromarray(result2)
Esempio n. 2
0
 def pad_and_bg(self,img):
     
     bg_img = np.asarray(Image.open(io.BytesIO( np.random.choice(self.bg_buffer))))
     ret_img = img
     rows,cols = img.size
     
     s_x = int(np.random.normal(0,6))
     s_y = int(np.random.normal(0,10))
     
     M = np.float32([[1,0,s_x],[0,1,s_y]])
     ret_img = cv2.warpAffine(np.asarray(ret_img),M,(cols,rows),borderMode = cv2.BORDER_CONSTANT,borderValue=(255,255,255))
     
     
    
     
     # ret_img = cv2.copyMakeBorder( np.asarray(ret_img), pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT,value=(255,255,255))
     
     #mask
     mask_img = np.zeros((img.size[0],img.size[1],3))
     # mask_2 = Image.fromarray(mask_img.astype('uint8')).rotate(rotate_param,resample = Image.NEAREST,expand = True, fillcolor = (255,255,255))
     # mask_3 = cv2.copyMakeBorder( np.asarray(mask_img), pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT,value=(255,255,255))
     mask_3 = cv2.warpAffine(np.asarray(mask_img),M,(cols,rows),borderMode = cv2.BORDER_CONSTANT,borderValue=(255,255,255))
     
     # ret_img = cv2.resize(ret_img, dsize=(img.size[0], img.size[1]))
     bg_img = cv2.resize(bg_img, dsize=(img.size[0], img.size[1]))
     # mask_3 = cv2.resize(mask_3, dsize=(img.size[0], img.size[1]))
     
     mask_3 = mask_3/255
     ret_img = ret_img*(1-mask_3) + bg_img*mask_3
     ret_img = Image.fromarray(ret_img.astype('uint8'))
     
     return  ret_img
def cv2_imshow(img):
    img = img[:, :, [2, 1, 0]]
    img = Image.fromarray(img)
    plt.figure(figsize=(20, 20))
    plt.imshow(img)
    plt.axis('off')
    plt.show()
Esempio n. 4
0
 def pad_and_bg(self,img):
     
     bg_img = np.asarray(Image.open(io.BytesIO( np.random.choice(self.bg_buffer))))
     ret_img = img
     
     
     pad_top = int(abs(np.random.normal(0,self.pad_param)))
     pad_bottom = int(abs(np.random.normal(0,self.pad_param)))
     pad_left = int(abs(np.random.normal(0,self.pad_param)))
     pad_right = int(abs(np.random.normal(0,self.pad_param)))
     
     
     # trim_top = int(abs(np.random.normal(0,self.trim_param)))
     # trim_bottom = int(abs(np.random.normal(0,self.trim_param)))
     # trim_left = int(abs(np.random.normal(0,self.trim_param)))
     # trim_right = int(abs(np.random.normal(0,self.trim_param)))
     
     ret_img = cv2.copyMakeBorder( np.asarray(ret_img), pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT,value=(255,255,255))
     
     #mask
     mask_img = np.zeros((img.size[0],img.size[1],3))
     # mask_2 = Image.fromarray(mask_img.astype('uint8')).rotate(rotate_param,resample = Image.NEAREST,expand = True, fillcolor = (255,255,255))
     mask_3 = cv2.copyMakeBorder( np.asarray(mask_img), pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT,value=(255,255,255))
     
     ret_img = cv2.resize(ret_img, dsize=(img.size[0], img.size[1]))
     bg_img = cv2.resize(bg_img, dsize=(img.size[0], img.size[1]))
     mask_3 = cv2.resize(mask_3, dsize=(img.size[0], img.size[1]))
     
     mask_3 = mask_3/255
     ret_img = ret_img*(1-mask_3) + bg_img*mask_3
     ret_img = Image.fromarray(ret_img.astype('uint8'))
     
     return  ret_img
Esempio n. 5
0
def checkValidateCode():
    img = Image.open('code.png').convert('L')
    ret, img = cv2.threshold(np.array(img), 125, 255, cv2.THRESH_BINARY)
    img = Image.fromarray(img.astype(np.uint8))
    pytesseract.pytesseract.tesseract_cmd = 'C:/Program Files/Tesseract-OCR/tesseract.exe'
    code = pytesseract.image_to_string(img)

    return code.strip()
Esempio n. 6
0
    def __getitem__(self, idx):

        image_path = os.path.join(self.images_path, self.images_name[idx])
        image = Image.open(image_path)
        if len(image.getbands()) == 4:  # Fix in png.
            im2arr = np.array(image)
            image = Image.fromarray(im2arr[:, :, :-1])
        image = image.resize((215, 215), Image.BILINEAR)
        X = self.transform(image)

        return X
Esempio n. 7
0
def extract_face(filename, required_size=(224, 224)):
    pixels = plt.imread(filename)
    results = detector.detect_faces(pixels)
    x1, y1, width, height = results[0]['box']
    x2, y2 = x1 + width, y1 + height
    face = pixels[y1:y2, x1:x2]
    a = np.asarray(face)
    image = Image.fromarray(a)
    image = image.resize(required_size)
    face_array = np.array(image)
    return face_array
Esempio n. 8
0
 def to_image(self,data,index_axial=0,index_azimuthal=0,scale=None,absolute_scale=False): 
     from PIL import Image
     a = float32(data[index_axial,index_azimuthal,:,:].reshape((data.shape[2],data.shape[3])))   
     if scale is None:
         a = 255.0*(a)/(a.max()+1e-12)
     else: 
         if absolute_scale: 
             a = scale*(a) 
         else: 
             a = scale*255.0*(a)/(a.max()+1e-12)
     im = Image.fromarray(a).convert("RGB")
     return im.rotate(90) 
Esempio n. 9
0
def create_image(rectangles_in):
    """
    Appends all cut and transformed rectangles into one resulting image
    :param rectangles_in: list: list of cut and transformed rectangles
    :return: PIL.Image: resulting Image
    """

    lines=[]
    for i in range(10):
        lines.append([])
        lines[i]=Image.fromarray(np.hstack(tuple(rectangles_in[j*10+i] for j in range(10))))
    new_image=append_images(lines)
    return new_image
Esempio n. 10
0
def make_greyscale_white_bg(im, r, b, g):
    im = im.convert('RGBA')  # Convert to RGBA

    data = np.array(im)  # "data" is a height x width x 4 numpy array
    red, green, blue, alpha = data.T  # Temporarily unpack the bands for readability

    # Replace grey with white... (leaves alpha values alone...)
    grey_areas = (red == r) & (blue == b) & (green == g)
    data[..., :-1][grey_areas.T] = (255, 255, 255)  # Transpose back needed

    im2 = Image.fromarray(data)
    im2 = im2.convert('L')  # convert to greyscale image

    return im2
Esempio n. 11
0
def show_img(img_arr, label_arr, meta, index, label_fn=default_label_fn):
    """ Given a numpy array of image from CIFAR-10 labels this method transform the data so that PIL
    can read and show the image. Check here how CIFAR encodes the image http://www.cs.toronto.ed
    u/~kriz/cifar.html"""
    one_img = img_arr[index, :]
    # Assume image size is 32 x 32. First 1024 px is r, next
    # 1024 px is g, last 1024 px is b from the (r,g b) channel
    r = one_img[:1024].reshape(32, 32)
    g = one_img[1024:2048].reshape(32, 32)
    b = one_img[2048:].reshape(32, 32)
    rgb = np.dstack([r, g, b])
    img = Image.fromarray(np.array(rgb), 'RGB')
    display(img)
    print(label_fn(index, meta[label_arr[index][0]].decode('utf-8')))
Esempio n. 12
0
    def make_gif_slices(self, vol, name='test', timestr=None, length=None):
        images = []
        gifdir = 'figures/gif_dir/'
        if timestr is not None:

            if not os.path.isdir(gifdir + timestr):
                os.mkdir(gifdir + timestr)
            gifdir += timestr + '/'
        print('Saving to ', gifdir)
        if length is None:
            length = len(nbody.redshift_bins)
        for i in xrange(length):
            plooop = (cm.gist_earth(
                (vol[i, :, :] + 1) / 2) * 255).astype('uint8')
            images.append(Image.fromarray(plooop).resize((512, 512)))
        imageio.mimsave(gifdir + name + '.gif', images, fps=2)
def make_greyscale_white_bg(im, r, b, g):

    im = im.convert('RGBA')   # Convert to RGBA


    data = np.array(im)   # "data" is a height x width x 4 numpy array
    red, green, blue, alpha = data.T # Temporarily unpack the bands for readability

    # Replace grey with white... (leaves alpha values alone...)
    grey_areas = (red == r) & (blue == b) & (green == g)
    data[..., :-1][grey_areas.T] = (255, 255, 255) # Transpose back needed

    im2 = Image.fromarray(data)
    im2 = im2.convert('L')   # convert to greyscale image


    return im2
Esempio n. 14
0
 def to_image(self,
              data,
              index_axial=0,
              index_azimuthal=0,
              scale=None,
              absolute_scale=False):
     from PIL import Image
     a = float32(data[index_axial, index_azimuthal, :, :].reshape(
         (data.shape[2], data.shape[3])))
     if scale is None:
         a = 255.0 * (a) / (a.max() + 1e-12)
     else:
         if absolute_scale:
             a = scale * (a)
         else:
             a = scale * 255.0 * (a) / (a.max() + 1e-12)
     im = Image.fromarray(a).convert("RGB")
     return im.rotate(90)
Esempio n. 15
0
    def make_zslice_gif(self, model, timestr, epoch, zs=None, fps=2):
        fixed_z = torch.randn(1, 201, 1, 1, 1).float()
        zslices = np.zeros((len(nbody.redshift_bins), 64, 64))
        images = []
        iteration = nbody.redshift_bins
        if zs is not None:
            iteration = zs

        for i in xrange(len(iteration)):
            fixed_z[:, -1] = iteration[i]
            gen_samp = model(fixed_z).detach().numpy()
            ploop = (cm.gist_earth(
                (gen_samp[0][0][10, :, :] + 1) / 2) * 255).astype('uint8')
            images.append(Image.fromarray(ploop).resize((512, 512)))
        imageio.mimsave('figures/gif_dir/' + timestr + '/zslicegif_epoch_' +
                        str(epoch) + '.gif',
                        images,
                        fps=fps)
Esempio n. 16
0
def invert_colors(im):
    im = im.convert('RGBA')  # Convert to RGBA
    data = np.array(im)  # "data" is a height x width x 4 numpy array
    red, green, blue, alpha = data.T  # Temporarily unpack the bands for readability

    black_areas = (red == 0) & (blue == 0) & (green == 0)
    data[..., :-1][black_areas.T] = (255, 0, 0)  # Transpose back needed

    white_areas = (red == 255) & (blue == 255) & (green == 255)
    data[..., :-1][white_areas.T] = (0, 0, 0)  # Transpose back needed

    red_areas = (red == 255) & (blue == 0) & (green == 0)
    data[..., :-1][red_areas.T] = (255, 255, 255)  # Transpose back needed

    im2 = Image.fromarray(data)
    im2 = im2.convert('L')  # convert to greyscale image

    return im2
Esempio n. 17
0
def upload_PIL_s3(image, bucket, file_name):
    """
    Takes numpy image array as input.Because numpy images cannot be uploaded directly. 
    Must be converted into memory file within the memory. 
    file_name here refers to the file name that ends with PNG or JPEG
    """

    img = Image.fromarray(image)

    s3_client = boto3.client('s3')

    buffer = BytesIO()
    img.save(buffer, get_safe_ext(file_name))
    buffer.seek(0)
    sent_data = s3_client.put_object(Bucket=bucket, Key=file_name, Body=buffer)

    if sent_data['ResponseMetadata']['HTTPStatusCode'] != 200:
        raise S3ImagesUploadFailed(
            'Failed to upload image {} to bucket {}'.format(key, bucket))
Esempio n. 18
0
 def train(self,start_epoch, max_epoch, batch_size, viz_interval):
     max_step = sum([len(i) for i in self.pathlist]) // batch_size
     # permu_ind = list(range(len(self.pathlist)))
     
     step = 0
     for epoch in range(start_epoch,max_epoch):
         # permu_ind = np.random.permutation(permu_ind)
         real_index = 0
         for step_index in range(max_step):
                 batch_img = np.zeros((batch_size,self.input_shape[0],self.input_shape[1],self.input_shape[2] ))
                 batch_target = np.zeros((batch_size,7))
                 
                 for batch_index in range(batch_size):
                     tget = batch_index % self.numclass
                     random_index = np.random.randint(self.train_data_count[tget])
                     img,target = self.gen_data(io.BytesIO(self.image_buffer[tget][random_index]),self.path_buffer[tget][random_index])
                     batch_img[batch_index] = img
                     batch_target[batch_index] = target
                     real_index = real_index+1
                 save_img = (batch_img[np.random.randint(batch_size)]+1)*127.5
                 save_img = Image.fromarray(save_img.astype('uint8'))
                 save_img.save('temppp.png')
                 # print(batch_target)
                 train_loss = self.model.train_on_batch(batch_img,batch_target)
                 with train_summary_writer.as_default():
                     tf.summary.scalar('loss', train_loss[0], step=step)
                     tf.summary.scalar('accuracy', train_loss[1], step=step)
                     step = step + 1 
                 # train_summary_writer = tf.summary.create_file_writer(train_log_dir)
                 print('\r epoch ' + str(epoch) + ' / ' + str(max_epoch) + '   ' + 'step ' + str(step_index) + ' / ' + str(max_step) + '    loss = ' + str(train_loss))
                 
                 # if(step_index%viz_interval==0):
                 # #     rand_index = permu_ind[np.random.randint(0,len(permu_ind))]
                 # #     img,target = self.gen_data(io.BytesIO(self.image_buffer[rand_index]),self.path_buffer[rand_index])
                 # #     img = np.expand_dims(img, axis=0)
                 # #     test_result = self.model.predict(img)
                 # #     # print((test_result),(target))
         self.test()
                 # #     print(np.argmax(test_result),np.argmax(target))
                     
         os.makedirs('models',exist_ok=True)
         self.model.save('countscoreMobile_64_class7_tv6_transAll_rotate.h5')
Esempio n. 19
0
def print_images(x_data, num_samples):
    count = 0
    Path('/home/kayque/LENSLOAD/').parent
    os.chdir('/home/kayque/LENSLOAD/')
    PATH = os.getcwd()

    for xe in range(0, num_samples, 1):
        x_line = x_data[xe, :, :, :]
        print("x_line shape:", x_line.shape)
        #x_line = x_data[:][:][:][xe]
        rgb = toimage(x_line)
        rgb = np.array(rgb)
        im1 = Image.fromarray(rgb)
        #im1 = im1.resize((101,101), Image.ANTIALIAS)
        #cv2.resize(im1, (84, 84))
        im1.save("img_Y_%s.png" % xe)
        count = count + 1

    source = '/home/kayque/LENSLOAD/'
    print(' ** Does an old folder exists?')
    if os.path.exists(source + 'lens_yes_2'):
        print(' ** Yes, it does! Trying to delete... ')
        shutil.rmtree(source + 'lens_yes_2', ignore_errors=True)
        print(" ** Supposedly done. Checking if there's an RNCV folder...")
        os.mkdir('lens_yes_2')
        print(' ** Done!')
    else:
        print(" ** None found. Creating one.")
        os.mkdir('lens_yes_2')
        print(' ** Done!')

    dest1 = ('/home/kayque/LENSLOAD/lens_yes_2/')
    counter = 0

    for bu in range(0, num_samples, 1):
        if os.path.exists(source + './img_Y_%s.png' % bu):
            shutil.move(source + './img_Y_%s.png' % bu, dest1)
            counter = counter + 1

    print("\n ** Done. %s files moved." % counter)
def invert_colors(im):

    im = im.convert('RGBA')  # Convert to RGBA
    data = np.array(im)  # "data" is a height x width x 4 numpy array
    red, green, blue, alpha = data.T  # Temporarily unpack the bands for readability

    # Replace black with red temporarily... (leaves alpha values alone...)
    black_areas = (red == 0) & (blue == 0) & (green == 0)
    data[..., :-1][black_areas.T] = (255, 0, 0)  # Transpose back needed

    # Replace white areas with black
    white_areas = (red == 255) & (blue == 255) & (green == 255)
    data[..., :-1][white_areas.T] = (0, 0, 0)  # Transpose back needed

    # Replace red areas (originally white) with black
    red_areas = (red == 255) & (blue == 0) & (green == 0)
    data[..., :-1][red_areas.T] = (255, 255, 255)  # Transpose back needed

    im2 = Image.fromarray(data)
    im2 = im2.convert('L')  # convert to greyscale image

    return im2
Esempio n. 21
0
def extract_read_tags(model, prediction, image, height, width):

    tag_names = []

    #may need to get move this logic to get_rekognition_annotation.

    for pred in prediction['CustomLabels']:

        if pred['Name'] == 'Tag':

            bounding_box = pred['Geometry']['BoundingBox']
            left = width * bounding_box['Left']
            top = height * bounding_box['Top']
            width = width * bounding_box['Width']
            height = height * bounding_box['Height']

            img = Image.fromarray(image)
            canvas = copy.deepcopy(img)  #in case making changes ruin the image

            #recognition cannot work with images less than 80 pixels
            width = max(width, 80)
            height = max(height, 80)

            cropped = canvas.crop(
                (left, top, left + width, top + height))  #crop the images
            display(cropped)

            tags = read_tags(model, cropped)
            tag_names.append(tags)

        else:
            pass

    #we don't really need multiple labels.
    tag = tag_names[0]

    return tag
def invert_colors(im):

    im = im.convert('RGBA')   # Convert to RGBA
    data = np.array(im)   # "data" is a height x width x 4 numpy array
    red, green, blue, alpha = data.T # Temporarily unpack the bands for readability


    # Replace black with red temporarily... (leaves alpha values alone...)
    black_areas = (red == 0) & (blue == 0) & (green == 0)
    data[..., :-1][black_areas.T] = (255, 0, 0) # Transpose back needed

    # Replace white areas with black
    white_areas = (red == 255) & (blue == 255) & (green == 255)
    data[..., :-1][white_areas.T] = (0, 0, 0) # Transpose back needed

    # Replace red areas (originally white) with black
    red_areas = (red == 255) & (blue == 0) & (green == 0)
    data[..., :-1][red_areas.T] = (255, 255, 255) # Transpose back needed

    im2 = Image.fromarray(data)
    im2 = im2.convert('L')   # convert to greyscale image


    return im2
Esempio n. 23
0
    def frame(self, idx, frame):
        # pretty expensive, only do every 10 seconds
        if idx % (self.in_fps * 1) != 0:
            return

        # Inference
        results = self.yolo(frame)
        predictions = results.pred[0]

        # PyTorch uses PIL Format
        # I wonder if I can skip some of these switches
        # You may need to convert the color.
        img_pil = np.ascontiguousarray(
            Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)))

        annotator = Annotator((img_pil))
        for *box, confidence, cls in predictions:
            # ic(cls, confidence, results.names[int(cls)])
            label = f"{results.names[int(cls)]} {confidence:.2f}"
            annotator.box_label(box, label, color=colors(cls))

        # For reversing the operation:
        im_np = np.asarray(annotator.im)
        cv_helper.display_jupyter(im_np)
Esempio n. 24
0
def showarray(a, fmt='jpeg'):
    a = np.uint8(np.clip(a, 0, 255))
    f = io.BytesIO()

    display(Image.fromarray(a))
Esempio n. 25
0
def predict_video(video_name, enc_model, clf_model):
   
    n_frames = get_video_frame_num(video_name) 
    pred_trace = {'picked_f': [], 'total_frame': 0, 'picked_rate': 0.0, 'proc_stage': []}
 
    encdata = getMetadata(video_name) 
    cvdata = getCVInfoFromLog(video_name)
    mv_file = os.path.join(MV_INPUT_FOLDER, video_name + '.pickle')
    with open(mv_file) as fh:
        mv_features = pickle.load(fh)
    w = encdata['metadata']['w'] 
    h = encdata['metadata']['h']

    selected_fids = [0]
    prev_fid = 0
    stages = [-1]
    for fid in xrange(1, n_frames):
        frame_name = str(fid) + '.jpg'
        prev_frame_name = str(prev_fid) + '.jpg'

 
        # generate features
        #### single frame features
        enc = encdata[frame_name]
        x = {}
        if enc['type'] == 'P': 
            x[ENC_TYPE] = 0
        else:
            x[ENC_TYPE] = 1

        cv = cvdata[frame_name]
        mv = mv_features[fid]
        #########################

        x[IMG_WIDTH] = w
        x[IMG_HEIGHT] = h
        x[ENC_SIZE] = enc['size']
        x[MV_SIZE] = mv[0] 
        x[MV_MEAN] = mv[1]
        x[MV_MAX] = mv[2]
        x[MV_MIN] = mv[3]
        x[DIST_FROM_PFID] = fid - prev_fid
 
        # predict if we should pick this frame
        fst_layer_f = [x[ENC_TYPE], x[IMG_WIDTH], x[IMG_HEIGHT], x[ENC_SIZE], x[MV_SIZE], x[MV_MEAN], x[MV_MAX], x[MV_MIN], x[DIST_FROM_PFID]]
        pred_y = enc_model.predict(fst_layer_f)
        if int(pred_y) == 0:
            stages += [0]
            continue
        
        ### two-frame
        cur_img = cv2.imread(os.path.join('/home/t-yuche/frames', video_name, frame_name))     
        prev_img = cv2.imread(os.path.join('/home/t-yuche/frames', video_name, prev_frame_name))  
        if  h * w > 320 * 240:
            cur_img = cv2.resize(cur_img, (320, 240)) 
            prev_img = cv2.resize(prev_img, (320, 240)) 
        ##   
        framediff = getFrameDiff(prev_img, cur_img)
        framediff_prec = framediff/ (h * w * 1.0)

        ##
        pilcur_img = cv2.cvtColor(cur_img, cv2.COLOR_BGR2RGB)
        pilprev_img = cv2.cvtColor(prev_img, cv2.COLOR_BGR2RGB)
        pil_cur = Image.fromarray(pilcur_img)
        pil_prev = Image.fromarray(pilprev_img)
        phash_v = phash(pil_prev, pil_cur) 
        
        ##
        hist_score = colorHistSim(prev_img, cur_img)

        ##
        sprev_img = cv2.resize(prev_img, (160,120))
        scur_img = cv2.resize(cur_img, (160,120))
        sift_score = getSIFTMatchingSim(sprev_img, scur_img)            
        ##
        surf_score = getSURFMatchingSim(sprev_img, scur_img)            
        ###


        x[SOBEL] = cv['sobel'][0]
        x[ILLU] = cv['illu'][0]
        x[FRAME_DIFF] = framediff_prec
        x[PHASH] = phash_v
        x[COLORHIST] = hist_score
        x[SIFTMATCH] = sift_score
        x[SURFMATCH] = surf_score
 
        #x = scale_feature(x, range_value) 
        #p_label, dummy, dummy = svm_predict([1], [x], svm_model, '-q')
        snd_layer_f = [x[ENC_TYPE], x[IMG_WIDTH], x[IMG_HEIGHT], x[ENC_SIZE], x[MV_SIZE], x[MV_MEAN], x[MV_MAX], x[MV_MIN], x[SOBEL], x[ILLU], x[FRAME_DIFF], x[PHASH], x[COLORHIST], x[SIFTMATCH], x[SURFMATCH], x[DIST_FROM_PFID]]
        pred_y = clf_model.predict(snd_layer_f) 
         
        #print p_label
        if int(pred_y) == 1:
            stages += [2]
            selected_fids += [fid]
            prev_fid = fid
        else:
            stages += [1]

    pred_trace['picked_f'] = selected_fids
    pred_trace['total_frame'] =  n_frames
    pred_trace['picked_rate'] = len(selected_fids)/(n_frames * 1.0)
    pred_trace['proc_stage'] = stages
    #print video_name, selected_fids, '\n' , n_frames, pred_trace['picked_rate']
    
    return pred_trace
def horizontal_plus_vertical_edge_detection(self,image) :
    horizontal = self.horizontal_edge_detection(image)
    vertical = self.vertical_edge_detection(image)
    rows = len(horizontal)
    columns = len(horizontal[0])
    
    return_array = []
    for i in range(rows) :
        return_array.append([])
        for j in range(columns) : 
            return_array[-1].append(np.uint8(int(horizontal[i,j]) + int(vertical[i,j])))
    return np.array(return_array)

image_processing.horizontal_plus_vertical_edge_detection = horizontal_plus_vertical_edge_detection
impr = image_processing()
im = Image.fromarray(impr.horizontal_plus_vertical_edge_detection(image))
display(im)


# #gaussian blur

# In[174]:

def gaussian_filter(self,image) : 
    kernel = [273,[[1,4 ,7 ,4 ,1],
                [4,16,26,16,4],
                [7,26,41,26,7],
                [4,16,26,16,4],
                [1,4 ,7 ,4 ,1]]]
    processed_image = self.convolution(image,kernel)
    return processed_image
Esempio n. 27
0
def predict_video(video_name, enc_model, clf_model):

    n_frames = get_video_frame_num(video_name)
    pred_trace = {
        'picked_f': [],
        'total_frame': 0,
        'picked_rate': 0.0,
        'proc_stage': []
    }

    encdata = getMetadata(video_name)
    cvdata = getCVInfoFromLog(video_name)
    mv_file = os.path.join(MV_INPUT_FOLDER, video_name + '.pickle')
    with open(mv_file) as fh:
        mv_features = pickle.load(fh)
    w = encdata['metadata']['w']
    h = encdata['metadata']['h']

    selected_fids = [0]
    prev_fid = 0
    stages = [-1]
    for fid in xrange(1, n_frames):
        frame_name = str(fid) + '.jpg'
        prev_frame_name = str(prev_fid) + '.jpg'

        # generate features
        #### single frame features
        enc = encdata[frame_name]
        x = {}
        if enc['type'] == 'P':
            x[ENC_TYPE] = 0
        else:
            x[ENC_TYPE] = 1

        cv = cvdata[frame_name]
        mv = mv_features[fid]
        #########################

        x[IMG_WIDTH] = w
        x[IMG_HEIGHT] = h
        x[ENC_SIZE] = enc['size']
        x[MV_SIZE] = mv[0]
        x[MV_MEAN] = mv[1]
        x[MV_MAX] = mv[2]
        x[MV_MIN] = mv[3]
        x[DIST_FROM_PFID] = fid - prev_fid

        # predict if we should pick this frame
        fst_layer_f = [
            x[ENC_TYPE], x[IMG_WIDTH], x[IMG_HEIGHT], x[ENC_SIZE], x[MV_SIZE],
            x[MV_MEAN], x[MV_MAX], x[MV_MIN], x[DIST_FROM_PFID]
        ]
        pred_y = enc_model.predict(fst_layer_f)
        if int(pred_y) == 0:
            stages += [0]
            continue

        ### two-frame
        cur_img = cv2.imread(
            os.path.join('/home/t-yuche/frames', video_name, frame_name))
        prev_img = cv2.imread(
            os.path.join('/home/t-yuche/frames', video_name, prev_frame_name))
        if h * w > 320 * 240:
            cur_img = cv2.resize(cur_img, (320, 240))
            prev_img = cv2.resize(prev_img, (320, 240))
        ##
        framediff = getFrameDiff(prev_img, cur_img)
        framediff_prec = framediff / (h * w * 1.0)

        ##
        pilcur_img = cv2.cvtColor(cur_img, cv2.COLOR_BGR2RGB)
        pilprev_img = cv2.cvtColor(prev_img, cv2.COLOR_BGR2RGB)
        pil_cur = Image.fromarray(pilcur_img)
        pil_prev = Image.fromarray(pilprev_img)
        phash_v = phash(pil_prev, pil_cur)

        ##
        hist_score = colorHistSim(prev_img, cur_img)

        ##
        sprev_img = cv2.resize(prev_img, (160, 120))
        scur_img = cv2.resize(cur_img, (160, 120))
        sift_score = getSIFTMatchingSim(sprev_img, scur_img)
        ##
        surf_score = getSURFMatchingSim(sprev_img, scur_img)
        ###

        x[SOBEL] = cv['sobel'][0]
        x[ILLU] = cv['illu'][0]
        x[FRAME_DIFF] = framediff_prec
        x[PHASH] = phash_v
        x[COLORHIST] = hist_score
        x[SIFTMATCH] = sift_score
        x[SURFMATCH] = surf_score

        #x = scale_feature(x, range_value)
        #p_label, dummy, dummy = svm_predict([1], [x], svm_model, '-q')
        snd_layer_f = [
            x[ENC_TYPE], x[IMG_WIDTH], x[IMG_HEIGHT], x[ENC_SIZE], x[MV_SIZE],
            x[MV_MEAN], x[MV_MAX], x[MV_MIN], x[SOBEL], x[ILLU], x[FRAME_DIFF],
            x[PHASH], x[COLORHIST], x[SIFTMATCH], x[SURFMATCH],
            x[DIST_FROM_PFID]
        ]
        pred_y = clf_model.predict(snd_layer_f)

        #print p_label
        if int(pred_y) == 1:
            stages += [2]
            selected_fids += [fid]
            prev_fid = fid
        else:
            stages += [1]

    pred_trace['picked_f'] = selected_fids
    pred_trace['total_frame'] = n_frames
    pred_trace['picked_rate'] = len(selected_fids) / (n_frames * 1.0)
    pred_trace['proc_stage'] = stages
    #print video_name, selected_fids, '\n' , n_frames, pred_trace['picked_rate']

    return pred_trace
Esempio n. 28
0
    box = cv2.boxPoints(rect)
    box = np.int0(box)
    #check for 90 degrees rotation
    if ((width / float(height)) > 1):
        # crop a particular rectangle from the source image
        cropped_image = cv2.getRectSubPix(carsample_mask, (width, height),
                                          centre)
    else:
        # crop a particular rectangle from the source image
        cropped_image = cv2.getRectSubPix(carsample_mask, (height, width),
                                          centre)

    # convert into grayscale
    cropped_image = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY)
    # equalize the histogram
    cropped_image = cv2.equalizeHist(cropped_image)
    # resize to 260 cols and 63 rows. (Just something I have set as standard here)
    cropped_image = cv2.resize(cropped_image, (260, 180))
    cropped_images.append(cropped_image)

_ = plt.subplots_adjust(hspace=0.000)
number_of_subplots = len(cropped_images)
for i, v in enumerate(range(number_of_subplots)):
    v = v + 1
    #ax1 = plt.subplot(number_of_subplots,1,v)
    showfig(cropped_images[i], plt.get_cmap('gray'))
    plt.show()
for img in cropped_images:
    data = pytesseract.image_to_data(Image.fromarray(img), output_type='dict')
    print(data)
Esempio n. 29
0
def remove_noise_and_smooth(file_name):
    img = cv2.imread(file_name, 0)
    filtered = cv2.adaptiveThreshold(img.astype(np.uint8), 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 41,
                                     3)
    kernel = np.ones((1, 1), np.uint8)
    opening = cv2.morphologyEx(filtered, cv2.MORPH_OPEN, kernel)
    closing = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, kernel)
    img = image_smoothening(img)
    or_image = cv2.bitwise_or(img, closing)
    return or_image

img = process_image_for_ocr(path) #this is ndArray

# ndArray to image
from matplotlib import cm
img2 = Image.fromarray(np.uint8(cm.gist_earth(img)*255))
display(img2)
# OCR
import pytesseract
print('Result without any preprocessing..')
img = Image.open(path)
print(type(img))
result = pytesseract.image_to_string(img)
print(result)

print('Result with #1 preprocessing...')
print(type(img2))
result = pytesseract.image_to_string(img2,lang='eng')
print(result)

import time
Esempio n. 30
0
img = cv2.imread('image.jpg')

dst = []

pts1 = np.float32(areas[0])
pts2 = np.float32([[600, 300], [600, 0], [0, 0], [0, 300]])

M = cv2.getPerspectiveTransform(pts1, pts2)
dst = cv2.warpPerspective(img, M, (600, 300))

cv2.imshow("dst", dst)
cv2.waitKey(0)
cv2.destroyAllWindows()

import pyocr
from PIL import Image

tools = pyocr.get_available_tools()

if len(tools) == 0:
    print("No OCR tool found")
    sys.exit(1)
tool = tools[0]

txt = tool.image_to_string(Image.fromarray(dst), lang="eng")

print(txt)
file = open('test.txt', 'w')
file.write(txt)
file.close()
Esempio n. 31
0
def checkValidateCode():
    img = Image.open('code.png').convert('L')
    ret, img = cv2.threshold(np.array(img), 125, 255, cv2.THRESH_BINARY)
    img = Image.fromarray(img.astype(np.uint8))
    code = pytesseract.image_to_string(img)
    return code
Esempio n. 32
0
from IPython.display import Image, display
display(Image(filename="filippo.jpg"))

import face_recognition

my_photo = face_recognition.load_image_file("filippo.jpg")
my_face_locations = face_recognition.face_locations(my_photo)

my_face_locations

top, right, bottom, left = my_face_locations[0]

from PIL import Image, ImageDraw
my_face = my_photo[top:bottom, left:right]
my_face_img = Image.fromarray(my_face)

display(my_face_img)

my_face_encoding = face_recognition.face_encodings(my_photo)[0]

from IPython.display import Image, display
display(Image(filename="teamphoto.jpg"))

group_photo = face_recognition.load_image_file("teamphoto.jpg")
group_face_locations = face_recognition.face_locations(group_photo)
print(len(group_face_locations))
counter = 0
for face_location in group_face_locations:
    top, right, bottom, left = face_location
    group_face_encoding = face_recognition.face_encodings(group_photo)[counter]
    result = face_recognition.compare_faces([my_face_encoding],
        image = image.transpose(0, 1, 2)

        return image    

WINDOW_SIZE = 200
STRIDE = 64
K = 36
counter = 0

import matplotlib
from PIL import Image

fig, ax = plt.subplots(3, 2, figsize=(20, 25))

for i, img in enumerate(images):
    url = data_dir + img + '.tiff'
    image, _, best_regions = generate_patches(url, window_size=WINDOW_SIZE, stride=STRIDE, k=K)

    if np.sum(image) == None:
        continue

    glued_image = glue_images_one(tiles=best_regions, image_size=WINDOW_SIZE, n_tiles=K)

    #Rescale to 0-255 and convert to uint8
    rescaled = (255.0 / glued_image.max() * (glued_image - glued_image.min())).astype(np.uint8)
    im = Image.fromarray(rescaled)
    im.save(f'./test/{img}.png')
    print(counter)
    counter += 1
    
Esempio n. 34
0
def imresize(img, size):
    img = np.array(Image.fromarray(img).resize(size))
    return img
Esempio n. 35
0
bgfg= train_a['mask']

upd = bgfg.squeeze(0)
# upd.size()
bgfg_np = upd/2

# bgfg_np
np_img = bgfg_np.cpu().numpy()
# type(np_img)
plt.imshow(np_img)

np_img.shape

from PIL import Image
pil_img = Image.fromarray(np_img, mode='L')
pil_img.save('n.png')

mask_t = _samples[1]['pred_mask']
mask = mask_t.cpu()
# type(bgfg_cpu)

print("mask size is: ", mask.size())
from torchvision import transforms
im = transforms.ToPILImage()(mask)#.convert("RGB")
display(im)
print(im)
print(im.size)

a=train[9]
bgfg=  _samples[1]['mask']