def _sample(self, lines, biases=None, styles=None):
        num_samples = len(lines)
        max_tsteps = 40 * max([len(i) for i in lines])
        biases = biases if biases is not None else [0.5] * num_samples

        x_prime = np.zeros([num_samples, 1600, 3])
        x_prime_len = np.zeros([num_samples])
        chars = np.zeros([num_samples, 120])
        chars_len = np.zeros([num_samples])

        if styles is not None:
            for i, (cs, style) in enumerate(zip(lines, styles)):
                if isinstance(style, dict):
                    x_p = style['stroke']
                    c_p = style['text']
                else:
                    x_p = np.load('styles/style-{}-strokes.npy'.format(style))
                    c_p = np.load('styles/style-{}-chars.npy'.format(
                        style)).tostring().decode('utf-8')

                c_p = str(c_p) + " " + cs
                c_p = drawing.encode_ascii(c_p)
                c_p = np.array(c_p)

                x_prime[i, :len(x_p), :] = x_p
                x_prime_len[i] = len(x_p)
                chars[i, :len(c_p)] = c_p
                chars_len[i] = len(c_p)

        else:
            for i in range(num_samples):
                encoded = drawing.encode_ascii(lines[i])
                chars[i, :len(encoded)] = encoded
                chars_len[i] = len(encoded)

        [samples] = self.nn.session.run(
            [self.nn.sampled_sequence],
            feed_dict={
                self.nn.prime: styles is not None,
                self.nn.x_prime: x_prime,
                self.nn.x_prime_len: x_prime_len,
                self.nn.num_samples: num_samples,
                self.nn.sample_tsteps: max_tsteps,
                self.nn.c: chars,
                self.nn.c_len: chars_len,
                self.nn.bias: biases
            })
        samples = [
            sample[~np.all(sample == 0.0, axis=1)] for sample in samples
        ]
        return samples
def process_chars(chars):
    chars = chars.strip()
    if any(c not in drawing.alphabet
           for c in chars) or len(chars) > MAX_CHAR_LEN:
        return None
    chars = drawing.encode_ascii(chars)[:MAX_CHAR_LEN]
    return chars
Esempio n. 3
0
def collect_data(fnames):
    #    fnames = []
    #    for dirpath, dirnames, filenames in os.walk('data/raw_npz/'):
    #        if dirnames:
    #            continue
    #        for filename in filenames:
    #            if filename.startswith('.'):
    #                continue
    #            if not filename.endswith('.npz'):
    #                continue
    #            print(os.path.join(dirpath,filename))
    #            fnames.append(os.path.join(dirpath, filename))

    x_out = []
    c_out = []

    for i, fname in enumerate(fnames):
        print('loading \'' + fname + '\'...')
        dataset = np.load(fname, allow_pickle=True)

        assert (len(dataset['samples']) == len(dataset['texts']))
        for strokes, text in zip(tqdm(dataset['samples'], file=sys.stdout),
                                 dataset['texts']):
            x_out.append(strokes)
            c_out.append(
                drawing.encode_ascii(text.strip())[:drawing.MAX_CHAR_LEN])

    return x_out, c_out
    def _sample(self, line, x_p, c_p, bias=None):

        max_tsteps = 40 * len(line)
        biases = [bias] if bias is not None else [0.5]

        x_prime = np.zeros([1, 2000, 3])
        x_prime_len = np.zeros([1])
        chars = np.zeros([1, 120])
        chars_len = np.zeros([1])

        c_p = c_p + " " + line
        c_p = drawing.encode_ascii(c_p)
        c_p = np.array(c_p)

        x_prime[0, :len(x_p), :] = x_p
        x_prime_len[0] = len(x_p)
        chars[0, :len(c_p)] = c_p
        chars_len[0] = len(c_p)

        [samples] = self.nn.session.run(
            [self.nn.sampled_sequence],
            feed_dict={
                self.nn.prime: True,
                self.nn.x_prime: x_prime,
                self.nn.x_prime_len: x_prime_len,
                self.nn.num_samples: 1,
                self.nn.sample_tsteps: max_tsteps,
                self.nn.c: chars,
                self.nn.c_len: chars_len,
                self.nn.bias: biases
            }
        )
        return samples[0]
def get_ascii_sequences(filename):
    sequences = open(filename, 'r').read()
    sequences = sequences.replace(r'%%%%%%%%%%%', '\n')
    sequences = [i.strip() for i in sequences.split('\n')]
    lines = sequences[sequences.index('CSR:') + 2:]
    lines = [line.strip() for line in lines if line.strip()]
    lines = [drawing.encode_ascii(line)[:drawing.MAX_CHAR_LEN] for line in lines]
    return lines
Esempio n. 6
0
    def _predict_strokes(self, lines_v):
        num_samples = len(lines_v)
        max_tsteps = 40*max([len(i) for i in lines_v])
        biases = [self.bias]*num_samples if self.bias is not None else [0.5]*num_samples

        x_prime = np.zeros([num_samples, 1200, 3])
        x_prime_len = np.zeros([num_samples])
        chars = np.zeros([num_samples, 120])
        chars_len = np.zeros([num_samples])

        if self.style is not None:
            for i, cs in enumerate(lines_v):
                x_p = np.load(os.path.join(self.styles_dir, 'style-{}-strokes.npy'.format(self.style)))
                c_p = np.load(os.path.join(self.styles_dir, 'style-{}-chars.npy'.format(self.style))).tostring().decode('utf-8')

                c_p = str(c_p) + " " + cs
                c_p = drawing.encode_ascii(c_p)
                c_p = np.array(c_p)

                x_prime[i, :len(x_p), :] = x_p
                x_prime_len[i] = len(x_p)
                chars[i, :len(c_p)] = c_p
                chars_len[i] = len(c_p)

        else:
            for i in range(num_samples):
                encoded = drawing.encode_ascii(lines_v[i])
                chars[i, :len(encoded)] = encoded
                chars_len[i] = len(encoded)

        [samples] = self.nn.session.run(
            [self.nn.sampled_sequence],
            feed_dict={
                self.nn.prime: self.style is not None,
                self.nn.x_prime: x_prime,
                self.nn.x_prime_len: x_prime_len,
                self.nn.num_samples: num_samples,
                self.nn.sample_tsteps: max_tsteps,
                self.nn.c: chars,
                self.nn.c_len: chars_len,
                self.nn.bias: biases
            }
        )
        
        strokes_v = [sample[~np.all(sample == 0.0, axis=1)] for sample in samples]
        return strokes_v
Esempio n. 7
0
def get_ascii_sequences(filename):
    sequences = open(filename, 'r').read()
    sequences = sequences.replace(r'%%%%%%%%%%%', '\n')
    sequences = [i.strip() for i in sequences.split('\n')]
    lines = sequences[sequences.index('CSR:') + 2:]
    lines = [line.strip() for line in lines if line.strip()]
    lines = [drawing.encode_ascii(line)[:drawing.MAX_CHAR_LEN] for line in lines]
    return lines
Esempio n. 8
0
    def _sample(self, lines, biases=None, styles=None):
        num_samples = len(lines)
        max_tsteps = 40*max([len(i) for i in lines])
        biases = biases if biases is not None else [0.5]*num_samples

        x_prime = np.zeros([num_samples, 1200, 3])
        x_prime_len = np.zeros([num_samples])
        chars = np.zeros([num_samples, 120])
        chars_len = np.zeros([num_samples])

        if styles is not None:
            for i, (cs, style) in enumerate(zip(lines, styles)):
                x_p = np.load('styles/style-{}-strokes.npy'.format(style))
                c_p = np.load('styles/style-{}-chars.npy'.format(style))

                c_p = str(c_p) + " " + cs
                c_p = drawing.encode_ascii(c_p)
                c_p = np.array(c_p)

                x_prime[i, :len(x_p), :] = x_p
                x_prime_len[i] = len(x_p)
                chars[i, :len(c_p)] = c_p
                chars_len[i] = len(c_p)

        else:
            for i in range(num_samples):
                encoded = drawing.encode_ascii(lines[i])
                chars[i, :len(encoded)] = encoded
                chars_len[i] = len(encoded)

        [samples] = self.nn.session.run(
            [self.nn.sampled_sequence],
            feed_dict={
                self.nn.prime: styles is not None,
                self.nn.x_prime: x_prime,
                self.nn.x_prime_len: x_prime_len,
                self.nn.num_samples: num_samples,
                self.nn.sample_tsteps: max_tsteps,
                self.nn.c: chars,
                self.nn.c_len: chars_len,
                self.nn.bias: biases
            }
        )
        samples = [sample[~np.all(sample == 0.0, axis=1)] for sample in samples]
        return samples
def collect_data():
    fnames = []
    for dirpath, dirnames, filenames in os.walk('data/raw_deepwriting/'):
        if dirnames:
            continue
        for filename in filenames:
            if filename.startswith('.'):
                continue
            if not filename.endswith('.npz'):
                continue
            fnames.append(os.path.join(dirpath, filename))

    x_out = []
    c_out = []

    for i, fname in enumerate(fnames):
        print(i, fname)
        dataset = HandWritingDatasetConditional(fname)

        assert (len(dataset.samples) == len(dataset.texts))
        for i2, (strokes, text, line_onehot, eoc_labels,
                 bow_labels) in enumerate(
                     zip(dataset.samples, dataset.texts,
                         dataset.one_hot_char_labels, dataset.eoc_labels,
                         dataset.bow_labels)):
            if i2 % 200 == 0:
                print(i2, '\t', '/', len(dataset.samples))
            x_out.append(dataset.undo_preprocess(strokes))
            line_int = dataset.one_hot_to_int_labels(line_onehot)
            line_chars = int_labels_to_text(line_int, eoc_labels, bow_labels,
                                            dataset)
            #line_chars += " | " + text
            c_out.append(
                drawing.encode_ascii(
                    line_chars.strip())[:drawing.MAX_CHAR_LEN])

    return x_out, c_out