def data_augmentation_create_X_y(data):
    
    digit = 1
    decoded_images = []
    while digit < 10:
        #images = db.select_by_actual_digit('split_grey', str(digit))    
        images = [d for d in data if d[6] == str(digit)]
        
        for image in images:
            
            # Decode the images
            im = decode_image(image[2])
            im = np.expand_dims(im, axis=-1)    # Used for greyscale and black and white, if you want to do this for color images, this line is not needed
            
            # Get the original variables that are needed to upload the augmented images back into the training set
            orig_name = image[1]
            row = image[4]
            pos = image[5]
            actual_digits = image[6]
            number_of_digits = image[7]
            source = image[8]
            
            # Make sure only valid, greyscale images, are kept. If working with color images, this check is not needed
            if len(im.shape) < 4:
                decoded_images.append( (im, orig_name, row, pos, actual_digits, number_of_digits, source) )
                
        
        digit += 1
        
    return decoded_images
Ejemplo n.º 2
0
def decode_and_convert(names_and_images, batch_number):

    print('Starting conversion and logging of batch number {}'.format(
        batch_number))

    # Need to convert the images into the correct format
    # TODO: This functionality could probably be added to the utklipp function
    width = 200
    height = 115
    dim = (width, height)

    names = names_and_images[0]
    images = names_and_images[1]

    #For testing
    index = 0
    total = len(images)

    decoded_images = []

    # Decode the images
    for img in images:
        decoded = decode_image(img)
        decoded = cv2.resize(decoded, dim, interpolation=cv2.INTER_AREA)
        decoded = cv2.cvtColor(decoded, cv2.COLOR_BGR2GRAY)
        decoded_images.append(decoded)

        print('Complated number {} out of {}.'.format(index, total))
        index += 1

    decoded_images = np.array(decoded_images)
    np.save('C:\Production Images\\batch_{}_names'.format(batch_number), names)
    np.save('C:\Production Images\\batch_{}_images'.format(batch_number),
            decoded_images)
Ejemplo n.º 3
0
def split_3digit_into_1digit_training(output_db):
    
    splitting_error = 0
    images_completed = 0
    
    conn = sqlite3.connect('\\\\129.242.140.132\\remote\\UtklippsDatabaser\\full_3digit_trainingset.db')
    
    query = 'SELECT * FROM cells'
    df = pd.read_sql_query(query, conn)
    
    df = df[['name', 'original', 'row', 'code', 'source']]
    
    total_images = len(df)

    
    # Iterate over each row in the dataframe, to get needed information from the original 3-digit images that will be split
    for index, row in df.iterrows():
        name = row['name']
        image= row['original']
        image_row = row['row']
        code= list(row['code'])     # To get easy access to each individual digit
        source = row['source']
        
        # Convert the image into a numpy array instead of a bytes-object
        image = decode.decode_image(image)
        
        # Get the split versions of the cell image, and all the different conversions
        split_result = splitter.split_and_convert(image)

        # If a split image exists in the 'split_orig' table, then it will also exist in the other cell tables
        if db_output.test_exists_any_source(name, 'split_orig'):
            images_completed += 1
            perc_done = ((images_completed + splitting_error) / total_images) * 100
            
            print('Skipping image {} that already exists in the database, - {}% done'.format(name, perc_done))
            continue        
        # Check if an error occured during the splitting of the image
        if split_result is None:
            splitting_error += 1
            with open('splitting errors.txt', 'a') as file:
                file.write('Error number: {} - Cell image: {} - Original image: {}\n\n'.format(str(splitting_error), name, source))
                
            perc_done = ((images_completed + splitting_error) / total_images) * 100
            print('An error occured with splitting the cell image: {} - From the original image: {}, - {}% done'.format(name, source, perc_done))
            continue
        
        
        i = 0
        while i < 3:
            split_name = code[i] + '-' + str(i) + '-' + name
            split_imgs = [split_result[x][i] for x in range(3)]
            
            
            # Else, upload the split images
            output_db.store_single_splits_training(split_name, split_imgs, image_row, str(i), code[i], len(code), name)
            i += 1
            
        perc_done = ((images_completed + splitting_error) / total_images) * 100
        print('Completed image {}, - {}% done'.format(name, perc_done))
        images_completed += 1
        
    
    conn.close()
    
    return df
Ejemplo n.º 4
0
def decode(names_and_images, batch_number, oneDigit = False, training = False):
    
    print('Starting conversion of batch number {}'.format(batch_number))
    
    # Need to convert the images into the correct format
    # TODO: This functionality could probably be added to the utklipp function
    
    # If we want to use 1 digit images, use this. If not, there is no reason to differentiate the sizes
# =============================================================================
#     if training == True:
#         width = 200
#         height = 115
#     else:
#         width = 100
#         height = 100
# =============================================================================
    
    width = 200
    height = 115
        
    dim = (width, height)
    
    names = names_and_images[0]
    images = names_and_images[1]
    
    index = 0
    total = len(images)
    
    decoded_images = []
    decoded_names = []
    
    splittable_images = []
    
    non_splittable_images = []
    non_splittable_names = []
    
    
    # Decode the images
    for img in images:
        decoded = decode_image(img)
        
        if oneDigit == True and training == False:
            decoded_split = splitter.split_and_convert(decoded, onlyGrey = True)
            
            if decoded_split is not None:
                split_image = (decoded_split[0], decoded_split[1], decoded_split[2])
                splittable_images.append(split_image)
                
            else:
                non_splittable_images.append(decoded)
                non_splittable_names.append(names[index])
                
        if training == False or len(decoded.shape) > 2:
            decoded = cv2.resize(decoded, dim, interpolation = cv2.INTER_AREA)
            #decoded = cv2.cvtColor(decoded, cv2.COLOR_BGR2GRAY) 
            decoded = convert_img_bw(decoded)
            
        decoded_images.append(decoded)
        decoded_names.append(names[index])


        print('Complated decoding and conversion of image number {} out of {}.'.format(index, total)) 
        index += 1


        
    if oneDigit == False or training == True:
        return (decoded_names, decoded_images)
    else:
        return (decoded_names, decoded_images, splittable_images, non_splittable_names, non_splittable_images)
def create_X_y(data, db, table, color):
    X = []
    y = []
    
    for digit in data:
        
        image = decode_image(digit[0])
        
        # If any original color images managed to sneak into the database
        if len(image.shape) > 2 and color != 'orig' and color != 'original':
            name = digit[1]
            db.remove_by_name(table, name)
            continue
        
        # Should have done the transformation into bitwise_not when uploading the images to the Black & White database
        if color == 'bw':
            image = cv2.bitwise_not(image)
            
        #TODO: Move this functionality. 3-digit images did not get standardized on upload, do that manually here for now. 
        if table == 'cells':
            width = 200
            height = 115
            dim = (width, height)
            
            image = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
            
        X.append(image)
        
        label = digit[1]
        y.append(label)
        
        
# =============================================================================
#     # Remove invalid labels and images
#     zero_indexes = [i for i in y if i == '0']
#     del y[: len(zero_indexes)]
#     del X[: len(zero_indexes)]
# =============================================================================
        
    X = np.array(X)
    y = np.array(y)
    
    # If we are working with 3-digit codes, we need to remap each code to a number between 0-<max number of unique codes> to fit in our model's softmax output layer
    if table == 'cells':
        unique_labels = np.unique(y)
        temp = {y:x for x, y in enumerate(unique_labels)}
        
        y = [temp.get(elem) for elem in y]
        
        y = np.array(y)
        
        np.save('C:\\Models\\Ground_truth_arrays\\3_digit_{}_ground_truth_mapping'.format(color), unique_labels)

    
    # Reshape X for later use in Keras, normal shape is (XXXX, 100, 100) we want it to be (XXXX, 100, 100, 1) for B&W and Greyscale, for original images no expansion is needed
    if color != 'original':
        if color != 'orig':
            X = np.expand_dims(X, axis=-1)
        

        
    return X, y