def _sample(self, lines, biases=None, styles=None): num_samples = len(lines) max_tsteps = 40 * max([len(i) for i in lines]) biases = biases if biases is not None else [0.5] * num_samples x_prime = np.zeros([num_samples, 1600, 3]) x_prime_len = np.zeros([num_samples]) chars = np.zeros([num_samples, 120]) chars_len = np.zeros([num_samples]) if styles is not None: for i, (cs, style) in enumerate(zip(lines, styles)): if isinstance(style, dict): x_p = style['stroke'] c_p = style['text'] else: x_p = np.load('styles/style-{}-strokes.npy'.format(style)) c_p = np.load('styles/style-{}-chars.npy'.format( style)).tostring().decode('utf-8') c_p = str(c_p) + " " + cs c_p = drawing.encode_ascii(c_p) c_p = np.array(c_p) x_prime[i, :len(x_p), :] = x_p x_prime_len[i] = len(x_p) chars[i, :len(c_p)] = c_p chars_len[i] = len(c_p) else: for i in range(num_samples): encoded = drawing.encode_ascii(lines[i]) chars[i, :len(encoded)] = encoded chars_len[i] = len(encoded) [samples] = self.nn.session.run( [self.nn.sampled_sequence], feed_dict={ self.nn.prime: styles is not None, self.nn.x_prime: x_prime, self.nn.x_prime_len: x_prime_len, self.nn.num_samples: num_samples, self.nn.sample_tsteps: max_tsteps, self.nn.c: chars, self.nn.c_len: chars_len, self.nn.bias: biases }) samples = [ sample[~np.all(sample == 0.0, axis=1)] for sample in samples ] return samples
def process_chars(chars): chars = chars.strip() if any(c not in drawing.alphabet for c in chars) or len(chars) > MAX_CHAR_LEN: return None chars = drawing.encode_ascii(chars)[:MAX_CHAR_LEN] return chars
def collect_data(fnames): # fnames = [] # for dirpath, dirnames, filenames in os.walk('data/raw_npz/'): # if dirnames: # continue # for filename in filenames: # if filename.startswith('.'): # continue # if not filename.endswith('.npz'): # continue # print(os.path.join(dirpath,filename)) # fnames.append(os.path.join(dirpath, filename)) x_out = [] c_out = [] for i, fname in enumerate(fnames): print('loading \'' + fname + '\'...') dataset = np.load(fname, allow_pickle=True) assert (len(dataset['samples']) == len(dataset['texts'])) for strokes, text in zip(tqdm(dataset['samples'], file=sys.stdout), dataset['texts']): x_out.append(strokes) c_out.append( drawing.encode_ascii(text.strip())[:drawing.MAX_CHAR_LEN]) return x_out, c_out
def _sample(self, line, x_p, c_p, bias=None): max_tsteps = 40 * len(line) biases = [bias] if bias is not None else [0.5] x_prime = np.zeros([1, 2000, 3]) x_prime_len = np.zeros([1]) chars = np.zeros([1, 120]) chars_len = np.zeros([1]) c_p = c_p + " " + line c_p = drawing.encode_ascii(c_p) c_p = np.array(c_p) x_prime[0, :len(x_p), :] = x_p x_prime_len[0] = len(x_p) chars[0, :len(c_p)] = c_p chars_len[0] = len(c_p) [samples] = self.nn.session.run( [self.nn.sampled_sequence], feed_dict={ self.nn.prime: True, self.nn.x_prime: x_prime, self.nn.x_prime_len: x_prime_len, self.nn.num_samples: 1, self.nn.sample_tsteps: max_tsteps, self.nn.c: chars, self.nn.c_len: chars_len, self.nn.bias: biases } ) return samples[0]
def get_ascii_sequences(filename): sequences = open(filename, 'r').read() sequences = sequences.replace(r'%%%%%%%%%%%', '\n') sequences = [i.strip() for i in sequences.split('\n')] lines = sequences[sequences.index('CSR:') + 2:] lines = [line.strip() for line in lines if line.strip()] lines = [drawing.encode_ascii(line)[:drawing.MAX_CHAR_LEN] for line in lines] return lines
def _predict_strokes(self, lines_v): num_samples = len(lines_v) max_tsteps = 40*max([len(i) for i in lines_v]) biases = [self.bias]*num_samples if self.bias is not None else [0.5]*num_samples x_prime = np.zeros([num_samples, 1200, 3]) x_prime_len = np.zeros([num_samples]) chars = np.zeros([num_samples, 120]) chars_len = np.zeros([num_samples]) if self.style is not None: for i, cs in enumerate(lines_v): x_p = np.load(os.path.join(self.styles_dir, 'style-{}-strokes.npy'.format(self.style))) c_p = np.load(os.path.join(self.styles_dir, 'style-{}-chars.npy'.format(self.style))).tostring().decode('utf-8') c_p = str(c_p) + " " + cs c_p = drawing.encode_ascii(c_p) c_p = np.array(c_p) x_prime[i, :len(x_p), :] = x_p x_prime_len[i] = len(x_p) chars[i, :len(c_p)] = c_p chars_len[i] = len(c_p) else: for i in range(num_samples): encoded = drawing.encode_ascii(lines_v[i]) chars[i, :len(encoded)] = encoded chars_len[i] = len(encoded) [samples] = self.nn.session.run( [self.nn.sampled_sequence], feed_dict={ self.nn.prime: self.style is not None, self.nn.x_prime: x_prime, self.nn.x_prime_len: x_prime_len, self.nn.num_samples: num_samples, self.nn.sample_tsteps: max_tsteps, self.nn.c: chars, self.nn.c_len: chars_len, self.nn.bias: biases } ) strokes_v = [sample[~np.all(sample == 0.0, axis=1)] for sample in samples] return strokes_v
def get_ascii_sequences(filename): sequences = open(filename, 'r').read() sequences = sequences.replace(r'%%%%%%%%%%%', '\n') sequences = [i.strip() for i in sequences.split('\n')] lines = sequences[sequences.index('CSR:') + 2:] lines = [line.strip() for line in lines if line.strip()] lines = [drawing.encode_ascii(line)[:drawing.MAX_CHAR_LEN] for line in lines] return lines
def _sample(self, lines, biases=None, styles=None): num_samples = len(lines) max_tsteps = 40*max([len(i) for i in lines]) biases = biases if biases is not None else [0.5]*num_samples x_prime = np.zeros([num_samples, 1200, 3]) x_prime_len = np.zeros([num_samples]) chars = np.zeros([num_samples, 120]) chars_len = np.zeros([num_samples]) if styles is not None: for i, (cs, style) in enumerate(zip(lines, styles)): x_p = np.load('styles/style-{}-strokes.npy'.format(style)) c_p = np.load('styles/style-{}-chars.npy'.format(style)) c_p = str(c_p) + " " + cs c_p = drawing.encode_ascii(c_p) c_p = np.array(c_p) x_prime[i, :len(x_p), :] = x_p x_prime_len[i] = len(x_p) chars[i, :len(c_p)] = c_p chars_len[i] = len(c_p) else: for i in range(num_samples): encoded = drawing.encode_ascii(lines[i]) chars[i, :len(encoded)] = encoded chars_len[i] = len(encoded) [samples] = self.nn.session.run( [self.nn.sampled_sequence], feed_dict={ self.nn.prime: styles is not None, self.nn.x_prime: x_prime, self.nn.x_prime_len: x_prime_len, self.nn.num_samples: num_samples, self.nn.sample_tsteps: max_tsteps, self.nn.c: chars, self.nn.c_len: chars_len, self.nn.bias: biases } ) samples = [sample[~np.all(sample == 0.0, axis=1)] for sample in samples] return samples
def collect_data(): fnames = [] for dirpath, dirnames, filenames in os.walk('data/raw_deepwriting/'): if dirnames: continue for filename in filenames: if filename.startswith('.'): continue if not filename.endswith('.npz'): continue fnames.append(os.path.join(dirpath, filename)) x_out = [] c_out = [] for i, fname in enumerate(fnames): print(i, fname) dataset = HandWritingDatasetConditional(fname) assert (len(dataset.samples) == len(dataset.texts)) for i2, (strokes, text, line_onehot, eoc_labels, bow_labels) in enumerate( zip(dataset.samples, dataset.texts, dataset.one_hot_char_labels, dataset.eoc_labels, dataset.bow_labels)): if i2 % 200 == 0: print(i2, '\t', '/', len(dataset.samples)) x_out.append(dataset.undo_preprocess(strokes)) line_int = dataset.one_hot_to_int_labels(line_onehot) line_chars = int_labels_to_text(line_int, eoc_labels, bow_labels, dataset) #line_chars += " | " + text c_out.append( drawing.encode_ascii( line_chars.strip())[:drawing.MAX_CHAR_LEN]) return x_out, c_out