Example #1
0
def main():
    alphabet_name = "ascii"
    avg_seq_len = 30
    noise = 0.05
    variable_len = True

    if len(sys.argv) < 2:
        print('Usage \n'
              '{} <out_file_name> [alphabet={}] [avg_sequence_len={}] '
              '[noise={}] [variable_length={}]'.format(
            sys.argv[0], alphabet_name, avg_seq_len, noise, variable_len))
        sys.exit()

    out_file_name = sys.argv[1]
    out_file_name += '.pkl' if not out_file_name.endswith('.pkl') else ''


    if len(sys.argv) > 2:
        alphabet_name = sys.argv[2]

    if len(sys.argv) > 3:
        avg_seq_len = int(sys.argv[3])

    if len(sys.argv) > 4:
        noise = float(sys.argv[4])

    if len(sys.argv) > 5:
        variable_len = sys.argv[5].lower()[0] in "yt1"

    if alphabet_name == "ascii":
        alphabet = ascii_alphabet
    else:
        alphabet = hindu_alphabet

    print(alphabet)
    scribe = Scribe(alphabet, avg_seq_len, noise)

    xs = []
    ys = []
    for i in range(1000):
        x, y = scribe.get_sample(variable_len)
        xs.append(x)
        ys.append(y)
        print(y, "".join(alphabet.chars[i] for i in y))
        slab_print(x)

    print('Output: {}\n'
          'Char set : {}\n'
          '(Avg.) Len: {}\n'
          'Varying Length: {}\n'
          'Noise Level: {}'.format(
        out_file_name, alphabet.chars, avg_seq_len, variable_len, noise))

    with open(out_file_name, 'wb') as f:
        pickle.dump({'x': xs, 'y': ys, 'chars': alphabet.chars}, f, -1)
Example #2
0
def show_all(shown_seq, shown_img,
             seen_probabilities=None,
             aux_img=None, aux_name=None):
    """
    Utility function to show the input and output and debug
    :param shown_seq: Labelings of the input
    :param shown_img: Input Image
    :param seen_probabilities: Seen Probabilities (Excitations of Softmax)
    :param aux_img: Other image/matrix for debugging
    :param aux_name: Name of aux
    :return:
    """
    print('Shown : ', end='')
    labels_print(shown_seq)

    if seen_probabilities is not None:
        print('Seen  : ', end='')
        maxes = np.argmax(seen_probabilities, 0)
        labels_print(maxes)

    print('Image Shown:')
    slab_print(shown_img)

    if seen_probabilities is not None:
        print('SoftMax Firings:')
        slab_print(seen_probabilities)

    if aux_img is not None:
        print(aux_name)
        slab_print(aux_img)
Example #3
0
def show_all(shown_seq,
             shown_img,
             seen_probabilities=None,
             aux_img=None,
             aux_name=None):
    """
    Utility function to show the input and output and debug
    :param shown_seq: Labelings of the input
    :param shown_img: Input Image
    :param seen_probabilities: Seen Probabilities (Excitations of Softmax)
    :param aux_img: Other image/matrix for debugging
    :param aux_name: Name of aux
    :return:
    """
    print('Shown : ', end='')
    labels_print(shown_seq)

    if seen_probabilities is not None:
        print('Seen  : ', end='')
        maxes = np.argmax(seen_probabilities, 0)
        labels_print(maxes)

    print('Image Shown:')
    slab_print(shown_img)

    if seen_probabilities is not None:
        print('SoftMax Firings:')
        slab_print(seen_probabilities)

    if aux_img is not None:
        print(aux_name)
        slab_print(aux_img)
Example #4
0
    try:
        complx = sys.argv[4].lower() in ("yes", "true", "t", "1")
    except IndexError:
        complx = True

    try:
        variable_len = sys.argv[5].lower() in ("yes", "true", "t", "1")
    except IndexError:
        variable_len = True

    scribe = RowScribe(nChars, avg_seq_len, buffer_len=avg_seq_len // 10)

    xs = []
    ys = []
    for i in range(1000):
        x, y = scribe.get_data(complx, variable_len)
        xs.append(x)
        ys.append(y)
        print(y)
        slab_print(x)

    print('Output: {}\n'
          'Char set size: {}\n'
          '(Avg.) Len: {}\n'
          'Varying Length: {}\n'
          'Complex Scribe: {}\n'.format(
        out_file_name, nChars, avg_seq_len, variable_len, complx, ))

    with open(out_file_name, 'wb') as f:
        pickle.dump({'x': xs, 'y': ys, 'nChars': nChars}, f, -1)
Example #5
0
        data_x.append(np.asarray(x, dtype=theano.config.floatX))

    # Actual training
    for epoch in range(100):
        print('Epoch : ', epoch)
        for samp in range(nSamples):
            x = data_x[samp]
            y = data_y[samp]

            if samp < nTrainSamples:
                pred, cst = train_fn(x.T, y)
                if epoch % 10 == 0 and samp < 3:
                    # Print some training info
                    maxes = np.argmax(pred, 1)
                    print('## TRAIN cost: ', np.round(cst, 3))
                    pred_print(y)
                    pred_print(maxes)
                    slab_print(x)
                    slab_print(pred.T)

            elif epoch % 10 == 0 and samp - nTrainSamples < 3:
                # Print some test images
                pred = np.asarray(test_fn(x.T))[0]
                maxes = np.argmax(pred, 1)

                print('## TEST')
                pred_print(y)
                pred_print(maxes)
                slab_print(x)
                slab_print(pred.T)
Example #6
0
    try:
        variable_len = sys.argv[5].lower() in ("yes", "true", "t", "1")
    except IndexError:
        variable_len = True

    scribe = RowScribe(nChars, avg_seq_len, buffer_len=avg_seq_len // 10)

    xs = []
    ys = []
    for i in range(1000):
        x, y = scribe.get_data(complx, variable_len)
        xs.append(x)
        ys.append(y)
        print(y)
        slab_print(x)

    print('Output: {}\n'
          'Char set size: {}\n'
          '(Avg.) Len: {}\n'
          'Varying Length: {}\n'
          'Complex Scribe: {}\n'.format(
              out_file_name,
              nChars,
              avg_seq_len,
              variable_len,
              complx,
          ))

    with open(out_file_name, 'wb') as f:
        pickle.dump({'x': xs, 'y': ys, 'nChars': nChars}, f, -1)
Example #7
0
data_y = []

for x, y in zip(data['x'], data['y']):
    # Insert blanks at alternate locations in the labelling (blank is nClasses)
    y1 = [nClasses]
    for char in y:
        y1 += [char, nClasses]

    data_y.append(np.asarray(y1, dtype=np.int32))
    data_x.append(np.asarray(x, dtype=theano.config.floatX))

# The lists *data_x* and *data_y* are the training sequences and their corresponding labels. A sample sequence and label is printed below:

print("Printing sample input ...")
idx = 0
slab_print(data_x[idx])
chars.append(' ')
print(data_y[idx], "".join(chars[i] for i in data_y[idx]))

# ## Zero Padding  Training Data to fixed Length
# Here, both input sequences and target labels are zero-padded to fixed maximum sequence lengths. 

# Convert list of input sequences to zero-padded 3D array
num_feat = data_x[0].shape[0]
max_x_len = np.max([bb.shape[1] for bb in data_x])  # list comprehension to get all lengths
x = np.zeros([len(data_x), max_x_len, num_feat])
for i, examples in enumerate(data_x):
    for j, feat in enumerate(examples):
        for k, seq in enumerate(feat):
            x[i][k][j] = seq