def main(): alphabet_name = "ascii" avg_seq_len = 30 noise = 0.05 variable_len = True if len(sys.argv) < 2: print('Usage \n' '{} <out_file_name> [alphabet={}] [avg_sequence_len={}] ' '[noise={}] [variable_length={}]'.format( sys.argv[0], alphabet_name, avg_seq_len, noise, variable_len)) sys.exit() out_file_name = sys.argv[1] out_file_name += '.pkl' if not out_file_name.endswith('.pkl') else '' if len(sys.argv) > 2: alphabet_name = sys.argv[2] if len(sys.argv) > 3: avg_seq_len = int(sys.argv[3]) if len(sys.argv) > 4: noise = float(sys.argv[4]) if len(sys.argv) > 5: variable_len = sys.argv[5].lower()[0] in "yt1" if alphabet_name == "ascii": alphabet = ascii_alphabet else: alphabet = hindu_alphabet print(alphabet) scribe = Scribe(alphabet, avg_seq_len, noise) xs = [] ys = [] for i in range(1000): x, y = scribe.get_sample(variable_len) xs.append(x) ys.append(y) print(y, "".join(alphabet.chars[i] for i in y)) slab_print(x) print('Output: {}\n' 'Char set : {}\n' '(Avg.) Len: {}\n' 'Varying Length: {}\n' 'Noise Level: {}'.format( out_file_name, alphabet.chars, avg_seq_len, variable_len, noise)) with open(out_file_name, 'wb') as f: pickle.dump({'x': xs, 'y': ys, 'chars': alphabet.chars}, f, -1)
def show_all(shown_seq, shown_img, seen_probabilities=None, aux_img=None, aux_name=None): """ Utility function to show the input and output and debug :param shown_seq: Labelings of the input :param shown_img: Input Image :param seen_probabilities: Seen Probabilities (Excitations of Softmax) :param aux_img: Other image/matrix for debugging :param aux_name: Name of aux :return: """ print('Shown : ', end='') labels_print(shown_seq) if seen_probabilities is not None: print('Seen : ', end='') maxes = np.argmax(seen_probabilities, 0) labels_print(maxes) print('Image Shown:') slab_print(shown_img) if seen_probabilities is not None: print('SoftMax Firings:') slab_print(seen_probabilities) if aux_img is not None: print(aux_name) slab_print(aux_img)
try: complx = sys.argv[4].lower() in ("yes", "true", "t", "1") except IndexError: complx = True try: variable_len = sys.argv[5].lower() in ("yes", "true", "t", "1") except IndexError: variable_len = True scribe = RowScribe(nChars, avg_seq_len, buffer_len=avg_seq_len // 10) xs = [] ys = [] for i in range(1000): x, y = scribe.get_data(complx, variable_len) xs.append(x) ys.append(y) print(y) slab_print(x) print('Output: {}\n' 'Char set size: {}\n' '(Avg.) Len: {}\n' 'Varying Length: {}\n' 'Complex Scribe: {}\n'.format( out_file_name, nChars, avg_seq_len, variable_len, complx, )) with open(out_file_name, 'wb') as f: pickle.dump({'x': xs, 'y': ys, 'nChars': nChars}, f, -1)
data_x.append(np.asarray(x, dtype=theano.config.floatX)) # Actual training for epoch in range(100): print('Epoch : ', epoch) for samp in range(nSamples): x = data_x[samp] y = data_y[samp] if samp < nTrainSamples: pred, cst = train_fn(x.T, y) if epoch % 10 == 0 and samp < 3: # Print some training info maxes = np.argmax(pred, 1) print('## TRAIN cost: ', np.round(cst, 3)) pred_print(y) pred_print(maxes) slab_print(x) slab_print(pred.T) elif epoch % 10 == 0 and samp - nTrainSamples < 3: # Print some test images pred = np.asarray(test_fn(x.T))[0] maxes = np.argmax(pred, 1) print('## TEST') pred_print(y) pred_print(maxes) slab_print(x) slab_print(pred.T)
try: variable_len = sys.argv[5].lower() in ("yes", "true", "t", "1") except IndexError: variable_len = True scribe = RowScribe(nChars, avg_seq_len, buffer_len=avg_seq_len // 10) xs = [] ys = [] for i in range(1000): x, y = scribe.get_data(complx, variable_len) xs.append(x) ys.append(y) print(y) slab_print(x) print('Output: {}\n' 'Char set size: {}\n' '(Avg.) Len: {}\n' 'Varying Length: {}\n' 'Complex Scribe: {}\n'.format( out_file_name, nChars, avg_seq_len, variable_len, complx, )) with open(out_file_name, 'wb') as f: pickle.dump({'x': xs, 'y': ys, 'nChars': nChars}, f, -1)
data_y = [] for x, y in zip(data['x'], data['y']): # Insert blanks at alternate locations in the labelling (blank is nClasses) y1 = [nClasses] for char in y: y1 += [char, nClasses] data_y.append(np.asarray(y1, dtype=np.int32)) data_x.append(np.asarray(x, dtype=theano.config.floatX)) # The lists *data_x* and *data_y* are the training sequences and their corresponding labels. A sample sequence and label is printed below: print("Printing sample input ...") idx = 0 slab_print(data_x[idx]) chars.append(' ') print(data_y[idx], "".join(chars[i] for i in data_y[idx])) # ## Zero Padding Training Data to fixed Length # Here, both input sequences and target labels are zero-padded to fixed maximum sequence lengths. # Convert list of input sequences to zero-padded 3D array num_feat = data_x[0].shape[0] max_x_len = np.max([bb.shape[1] for bb in data_x]) # list comprehension to get all lengths x = np.zeros([len(data_x), max_x_len, num_feat]) for i, examples in enumerate(data_x): for j, feat in enumerate(examples): for k, seq in enumerate(feat): x[i][k][j] = seq