def plot_inkml(data, plot=True, save=False, classes=False): filename = None with open('colors','r') as c: # List of colors for each class, read from the 'colors' file colors = eval(c.read()) if isinstance(data, str): filename = data data = parse_inkml(data) # Parses the inkml file if the input data is a filename (string) instead of some already processed inkml data (list) for i in range(0, len(data['trace'])): # data['trace'] is a list of traces; each element contains a trace, and each trace has a list of [x, y] coordinates x = [] y = [] for j in range(0, len(data['trace'][i])): # Returns j-th point of each trace x.append(data['trace'][i][j][0]) # Returns x coordinate of the j-th point of each trace y.append(data['trace'][i][j][1]) # Returns y coordinate of the j-th point of each trace if classes: plt.plot(x, y, color=colors[data['class'][i]], linewidth=0.6) # Plots current trace highlighting the trace's class with a specific color else: plt.plot(x, y, color='black', linewidth=0.6) # Plots current trace in plain black plt.axis('equal') # Constrain proportions plt.axis('off') # Remove axes from figure if plot: # Only shows plot if plot = True plt.show() if save and filename is not None: # If save = True and the input data is a filename (not some already processed data), it saves the plot as a 640x480 px png image in a folder with the same name as the original file folder, with '_png' added at the end plt.savefig(filename.partition('/')[0] + '_png/' + filename.partition('/')[2].replace('inkml', 'png')) plt.cla() # Clears plot (otherwise subsequent plots would be drawn over the older ones)
def remove_random_elements_inkml(inkml_file, percentage=30): data = parse_inkml(inkml_file) # Parses inkml file perc = len( data['group_id'].unique() ) * percentage / 100 # dati quanti group_id ho, faccio il 30%: es ho 25 gropu_id distinti e faccio il 30% di 25. numRows = math.ceil(perc) # arrotondo la percentuale random_number = [ ] #lista vuota che servirà per segnare quali sono i numeri random usciti for i in range(numRows): # randrange gives you an integral value irand = randrange(0, len(data['group_id'].unique( ))) #prendo un numero random tra 0 e il numero di group_id distinti while irand in random_number: irand = randrange(0, len(data['group_id'].unique())) random_number.append(irand) # Get names of indexes for which column group_id has value irand indexNames = data[data['group_id'] == irand].index # Delete these row indexes from dataFrame data.drop(indexNames, inplace=True) return data
def plot_inkml_remove_text(data, save=True): print(data) filename = None if not ('b' in data): # controlla che il file su cui fare i bb sia un file di train e NON di test with open('colors','r') as c: # List of colors for each class, read from the 'colors' file colors = eval(c.read()) if isinstance(data, str): filename = data data = parse_inkml(data) # Parses the inkml file if the input data is a filename (string) instead of some already processed inkml data (list) for i in range(0, len(data['trace'])): # data['trace'] is a list of traces; each element contains a trace, and each trace has a list of [x, y] coordinates x = [] y = [] for j in range(0, len(data['trace'][i])): # Returns j-th point of each trace x.append(data['trace'][i][j][0]) # Returns x coordinate of the j-th point of each trace y.append(data['trace'][i][j][1]) # Returns y coordinate of the j-th point of each trace if not(data['class'][i] == 'Text'): # se ha come classe Text non viene plot plt.plot(x, y, color='black', linewidth=0.6) plt.axis('equal') # Constrain proportions plt.axis('off') # Remove axes from figure if save and filename is not None: # If save = True and the input data is a filename (not some already processed data), it saves the plot as a 640x480 px png image in a folder with the same name as the original file folder, with '_png' added at the end fig1 = plt.gcf() #plt.show() plt.draw() #plt.cla() fig1.savefig('path/FCinkML_aug/' + filename.partition('/')[2].replace('FCinkML', 'FCinkML_aug').replace('writer', 'writer_augm_').replace('inkml', 'png')) #cambio il nome in modo da non sovrascrivere le immagini #fig1.savefig(filename.partition('/')[0] + '_aug/' + filename.partition('/')[2].replace('inkml', 'png')) plt.cla() # Clears plot (otherwise subsequent plots would be drawn over the older ones) else: print('Test File')
# Example svg to inkml conversion and inkml plotting n = 1 for i in range(0, n + 1): svg_file = './NeoSmartpenM1_demo/neo_smtpen_m1_demo_' + str( i) + '.svg' # svg file to be converted to inkml inkml_file = './NeoSmartpenM1_demo/neo_smtpen_m1_demo_' + str( i) + '.inkml' # Converted file name svg2inkml(svg_file, inkml_file) # Converts svg file to inkml plot_inkml(inkml_file) # Plots the returned data # Other test plots show = True plot_inkml( parse_inkml('./FCinkML/writer3_5.inkml'), plot=show) # Plots a random inkml file from the dataset (by parsed data) plot_inkml( './FCinkML/writer3_5.inkml', plot=show) # Plots a random inkml file from the dataset (by filename) plot_inkml( parse_inkml('./FCinkML/writer3_5.inkml'), plot=show, classes=True ) # Plots a random inkml file from the dataset (by parsed data) and highlights classes plot_inkml( './FCinkML/writer3_5.inkml', plot=show, classes=True ) # Plots a random inkml file from the dataset (by filename) and highlights classes from draw_bb import draw_bb draw_bb('./FCinkML/writer3_5.inkml', show=True)
def bounding_boxes(inkml_file, save=False, plot=False): annotation_file = 'noText_annotation.csv' # Edit this line with your own annotation file data = parse_inkml(inkml_file) # Parses inkml file cols = ['x_min', 'x_max', 'y_min', 'y_max', 'class'] # Columns of bboxs dataframe bboxs = pd.DataFrame(columns=cols) # Dataframe containing all coordinates of all bounding boxes, plus the class for each bounding box #------------------------------------------------------------------------ if plot: # If plot = True, plot the original figure first with open('colors','r') as c: # List of colors for each class, read from the 'colors' file colors = eval(c.read()) for i in range(0, len(data['trace'])): # data['trace'] is a list of traces; each element contains a trace, and each trace has a list of [x, y] coordinates x = [] # Contains current trace x coordinates y = [] # Contains current trace y coordinates for j in range(0, len(data['trace'][i])): # Return j-th point of each trace x.append(data['trace'][i][j][0]) # Returns x coordinate of the j-th point of each trace y.append(data['trace'][i][j][1]) # Returns y coordinate of the j-th point of each trace #plt.plot(x, y, color='black', linewidth=0.6) # Plot current trace in plain black if not (data['class'][i] == 'Text'): # se ha come classe Text non viene plot plt.plot(x, y, color='black', linewidth=0.6) plt.axis('equal') # Constrain proportions plt.axis('off') # Remove axes from figure # ------------------------------------------------------------------------ # Crea bb per ogni figura # Escludo le tracce che appartengono alla classe text for k in range(0, len(data['group_id'].drop_duplicates())): # For each unique trace group of the inkml file... group_x = [] # Contains all x coordinates of all traces in a specific trace group group_y = [] # Contains all y coordinates of all traces in a specific trace group for i in range(0, len(data['trace'])): # ...for each trace in the dataset... for j in range(0, len(data['trace'][i])): # ...for each pair of [x, y] coordinates in trace i... if data['group_id'][i] == k: # ...get the [x, y] coordinates in two separate arrays if not (data['class'][i] == 'Text'): label = data['class'][i] # Identify the class of the current bounding box group_x.append(data['trace'][i][j][0]) # Add all x coordinates of trace i to the group x coordinates group_y.append(data['trace'][i][j][1]) # Add all y coordinates of trace i to the group y coordinates group_x.sort() # Sort the x coordinates array group_y.sort() # Sort the y coordinates array #---------------------------------- bboxs = bboxs.append({'x_min': group_x[0], 'x_max': group_x[len(group_x) - 1], 'y_min': group_y[0], 'y_max': group_y[len(group_y) - 1], 'class': label}, ignore_index=True) # Save minimum and maximum [x, y] coordinates to the bboxs dataframe # Add +1 to the maximum coordinates and -1 to the minimum to get x_coord = [group_x[0], group_x[len(group_x)-1], group_x[len(group_x)-1], group_x[0], group_x[0]] # Contain all x coordinates of a bounding box, plus the first one repeated to be able to draw a rectangle over the figure y_coord = [group_y[len(group_y)-1], group_y[len(group_y)-1], group_y[0], group_y[0], group_y[len(group_y)-1]] # Contain all x coordinates of a bounding box, plus the first one repeated to be able to draw a rectangle over the figure if plot: # If plot = True, plot one by one the resulting bounding boxes over the original figure from data coordinates (not pixels!), that has also been plotted if plot = True plt.plot(x_coord, y_coord, color=colors[label], linewidth=1) pboxs = pd.DataFrame(columns=cols) # Create a new dataframe containing the pixel coordinates of the bounding boxes # print('Calculating pixel coordinates for ' + filename, end='') for i in range(0, len(bboxs)): # Transform data coordinates in bboxs to pixel coordinates pboxs = transform_coord(data, bboxs, i, pboxs) # print(' done.') if plot: # If plot = True, show the generated plot; the whole plot thing is for debug purposes only (and nice, coloured images) plt.show() pboxs['class'] = bboxs[['class']].copy() # Copy the labels from the original bboxs dataframe, as they will not change later if save: # If save = True, append the bounding box pixel coordinates to the specified annotation file #DA METTERE .replace('writer', 'writer_augm_') filename = inkml_file.split('/')[len(inkml_file.split('/'))-1].replace('writer', 'writer_augm_').replace('.inkml', '.png') # Get the image file name from the inkml file ------- pboxs.insert(0, 'filename', 'path/' + filename) # Insert a new column to pboxs in position 0, containing the processed file name cols = ['filename', 'x_min', 'y_min', 'x_max', 'y_max', 'class'] # csv file columns pboxs.to_csv(annotation_file, columns=cols, header=False, index=False, mode='a') # Add data to the specified annotation file print('Annotation data for ' + inkml_file.split('/')[len(inkml_file.split('/'))-1] + ' saved to ' + annotation_file + '.') return pboxs, bboxs
from parse_inkml import parse_inkml from transform_coord import transform_coord parser = argparse.ArgumentParser(description='Returns bounding boxes pixel coordinates for the specified file.') # Parser definition parser.add_argument('-file', action='store', dest='file') # Declare an argument called '-inkml_file' that is going to be stored in the 'inkml_file' variable parser.add_argument('-annotation', action='store', dest='annotation') # Declare an argument called '-annotation' that is going to be stored in the 'annotation' variable inkml_file = parser.parse_args().file # Parse the command line arguments and store the value in the 'inkml_file' variable annotation_file = parser.parse_args().annotation # Parse the command line arguments and store the value in the 'annotation_file' variable filename = inkml_file.split('/')[len(inkml_file.split('/'))-1].replace('.inkml', '.png') # Get the image file name from the inkml file print('Working on ' + filename + '... ', end='') data = parse_inkml(inkml_file) # Parses inkml file cols = ['x_min', 'x_max', 'y_min', 'y_max', 'class'] # Columns of bboxs dataframe bboxs = pd.DataFrame(columns=cols) # Dataframe containing all coordinates of all bounding boxes, plus the class for each bounding box for k in range(0, len(data['group_id'].drop_duplicates())): # For each unique trace group of the inkml file... group_x = [] # Contains all x coordinates of all traces in a specific trace group group_y = [] # Contains all y coordinates of all traces in a specific trace group for i in range(0, len(data['trace'])): # ...for each trace in the dataset... for j in range(0, len(data['trace'][i])): # ...for each pair of [x, y] coordinates in trace i... if data['group_id'][i] == k: # ...get the [x, y] coordinates in two separate arrays label = data['class'][i] # Identify the class of the current bounding box group_x.append(data['trace'][i][j][0]) # Add all x coordinates of trace i to the group x coordinates group_y.append(data['trace'][i][j][1]) # Add all y coordinates of trace i to the group y coordinates