def __call__(self, canvas, doc): canvas.saveState() w, h = canvas._pagesize print(canvas.getPageNumber()) font_size = random_integer_from_list(self.config['text']['font_size']) line_y = random_integer_from_list(self.config['text']['bottom_margin']) canvas.setFontSize(font_size) canvas.drawCentredString(w/2, line_y, str(canvas.getPageNumber())) canvas.restoreState()
def _gen_line_coords(self): cfg_line = self.config['line'] left = random_integer_from_list(cfg_line['left_margin']) top = random_integer_from_list(cfg_line['top_margin']) linewidth = random_integer_from_list(cfg_line['linewidth']) if cfg_line['center']: right = left else: right = random_integer_from_list(cfg_line['right_margin']) return left, right, top, linewidth
def _add_imgs(self, kind, x, y, w, h, exist_coords): """ x, y: coords of the table lower left corner, origins from lower left of the page Avoid stamp overlap """ cfg_object = self.config[kind] x_shift = random_integer_from_list(cfg_object['corner_dx']) y_shift = random_integer_from_list(cfg_object['corner_dy']) image_path = cfg_object['img_path'] image_width = random_integer_from_list(cfg_object['width']) image_file = np.random.choice(os.listdir(image_path)) # get original image size of image image_img = Image.open(os.path.join(image_path, image_file)) w_image, h_image = image_img.size # rescale to setted width (keepAspect = True in draw) h_image = image_width / w_image * h_image w_image = image_width max_iter = 50 n_iter = 0 while n_iter < max_iter: # choose one of the 4 corners and add shift if np.random.random() < 0.5: # image along top/bottom of table # Fix y in narrow range while having x range y_image = int(np.random.choice([int(y), int(y + h + 1)]) - image_width // 2 + y_shift) x_image = np.random.randint(int(x), int(x + w)) - image_width //2 + x_shift else: #image along left / right of table x_image = np.random.choice([int(x), int(x + w)]) - image_width // 2 + x_shift y_image = np.random.randint(int(y), int(y + h + 1)) - image_width //2 + y_shift image_coord = ((x_image, y_image), (x_image + w_image, y_image + h_image)) if self._is_overlap(image_coord, exist_coords): n_iter += 1 continue else: exist_coords.append(image_coord) break img_width, img_height = self.canv.drawImage(os.path.join(image_path, image_file), x_image, y_image, mask = 'auto', anchor = 'sw', # anchored at center width = image_width, preserveAspectRatio = True) return {'kind': kind, 'is_flowable': False, # page number added by the caller func 'x': x_image, 'y': y_image, 'w': image_width, 'h': int(img_height * image_width / img_width)}
def _gen_random_decimal(self, n_integers = [4], n_digits = [2]): n_int = random_integer_from_list(n_integers) n_dig = random_integer_from_list(n_digits) part_int = ''.join(np.random.choice(list('0123456789'), size = n_int, replace = True)) part_dig = ''.join(np.random.choice(list('0123456789'), size = n_dig, replace = True)) if len(part_dig) == 0: return part_int elif len(part_int) == 0: part_int = '0' return '.'.join([part_int, part_dig]) else: return '.'.join([part_int, part_dig])
def bullet_list(self): lb_sentence, ub_sentence = self.config['sentence_length'] n_bullet = random_integer_from_list(self.config['n_bullet']) seperator = [',', '.', '!'] items = [] for i in range(n_bullet): n_sentenses = random_integer_from_list(self.config['n_sentences']) all_words = [self._gen_random_sentence([lb_sentence, ub_sentence]) for _ in range(n_sentenses)]+ [''] item = '&bull' + np.random.choice(seperator).join(all_words) items.append(item) text = '<br />\n'.join(items) title_style = ParagraphStyleGenerator(self.config).style() return Paragraph(text, title_style)
def __call__(self, canvas, doc): canvas.saveState() w, h = canvas._pagesize # Gen hline coords left_margin, right_margin, top_margin, linewidth = self._gen_line_coords() line_start_x = left_margin line_start_y = h - top_margin line_end_x = w - right_margin line_end_y = h - top_margin # Set text fonts font_name = ParagraphStyleGenerator._gen_font() font_size = random_integer_from_list(self.config['text']['font_size']) canvas.setFont(font_name, font_size) # draw text text_locations = self.config['text']['locations'] random_locations = np.random.choice(text_locations, size = np.random.randint(0, len(text_locations) + 1), replace = False) for loc in random_locations: if loc == 'left': canvas.drawString(line_start_x, line_start_y + font_size // 3, self._gen_text()) elif loc == 'right': words = self._gen_text() canvas.drawString(line_end_x - font_size * len(words), line_start_y + font_size // 3, words) elif loc == 'center': canvas.drawCentredString(w/2, line_start_y + font_size // 3, self._gen_text()) else: raise ValueError("Text location %s not identified !"%loc) # draw hline canvas.setLineWidth(linewidth) canvas.line(line_start_x, line_start_y, line_end_x, line_end_y) # add footer line_y = random_integer_from_list([20, 40]) canvas.drawCentredString(w/2, line_y, str(np.random.randint(100))) canvas.restoreState()
def paragraph(self): seperator = [',', ',', ':', ':', '.', '。', '!', '!', '?', '?', ' '] #seperator = [',', '.', '!'] cfg_para_long = self.config['long'] cfg_para_short = self.config['short'] prob_long = cfg_para_long['prob'] prob_short = cfg_para_short['prob'] prob_short = prob_short / (prob_short + prob_long) # select by prob to have long/short paragraph cfg_select = cfg_para_short if np.random.random() < prob_short else cfg_para_long lb_sentence, ub_sentence = cfg_select['sentence_length'] n_sentences = random_integer_from_list(cfg_select['n_sentences']) all_words = [self._gen_random_sentence([lb_sentence, ub_sentence]) for _ in range(n_sentences)] text = '' for w in all_words: text += w text += np.random.choice(seperator) paragraph_style = ParagraphStyleGenerator(self.config).style() return Paragraph(text, paragraph_style)
def _gen_table_space(self): space_before = random_integer_from_list(self.config['layout']['space_before']) space_after = random_integer_from_list(self.config['layout']['space_after']) return space_before, space_after
def _gen_random_en_word(self, length = [2, 6]): word_len = random_integer_from_list(length) word = ''.join(np.random.choice(self.enChar, size = word_len, replace = True).tolist()) return word
def _gen_random_cn_sentence(self, length = [2, 6]): word_len = random_integer_from_list(length) #print('****************', word_len) #print('****************', self.cn_char) sentence = ''.join(np.random.choice(self.cnChar, size = word_len, replace = True).tolist()) return sentence
def _gen_text(self): cfg_text = self.config['text'] word_length = cfg_text['word_length'] word_len = random_integer_from_list(word_length) word = ''.join(np.random.choice(self.cnChar, size = word_len, replace = True).tolist()) return word
def table(self): cfg_blocks = self.config['content']['blocks'] cfg_number = self.config['content']['numbers'] cfg_cn = self.config['content']['cn_chars'] cfg_en = self.config['content']['en_chars'] cfg_special = self.config['content']['special'] table_style = TableStyleGenerator(self.config).style() # First generate row/col headers [list of n_col or n_row elements row_header = self._gen_cell_content(cfg_blocks['row_header'], self.nrows) col_header = self._gen_cell_content(cfg_blocks['col_header'], self.ncols) # Then generate table contents [ list of (col-1)*(row-1) elements content = self._gen_cell_content(cfg_blocks['content'], (self.ncols - 1) * (self.nrows - 1)) # Then add special docorations prob_parentheses = cfg_special['prob_parentheses'] prob_empty = cfg_special['prob_empty'] prob_dash = cfg_special['prob_dash'] prob_underline = cfg_special['prob_underline'] content = self._decorate_parentheses(content, prob_parentheses) content = self._decorate_underline(content, prob_underline, table_style) content = self._decorate_empty(content, prob_empty) content = self._decorate_dash(content, prob_dash) # Then merge content with headers to a [n_rows x n_cols] list content_ptr = 0 table_data = [] for i in range(self.nrows): if i == 0: # make the 1st cell empty prob_empty_first_cell = cfg_blocks['prob_empty_first_cell'] if np.random.random() < prob_empty_first_cell: table_data.append([''] + col_header[1:self._ncols]) else: table_data.append(col_header[:self._ncols]) else: table_data.append([row_header.pop()] + content[content_ptr: content_ptr + self._ncols -1]) content_ptr += self._ncols - 1 # Then add single random empty col if np.random.random() < self.config['space']['prob_empty_col'] and 3<= self._ncols <=5: # set 50% missing cols to be the 2nd col empty_col = 1 if np.random.random() < 0.5 else np.random.randint(1, self._ncols-1) empty_size = random_integer_from_list(self.config['space']['size_empty_col']) for i in range(len(table_data)): table_data[i][empty_col] = ' ' * empty_size # Then add single random empty row if np.random.random() < self.config['space']['prob_empty_row'] and 4<= self._nrows: empty_row = np.random.randint(1, self._nrows -1) table_data[empty_row] = [] *self._ncols # Then set second row (count start from 1) to be empty: simulate space / gap if np.random.random() < self.config['space']['prob_empty_second_row'] and self._nrows>3 and self._ncols > 2: table_data[1] = [] * self._ncols # Then set last second row (count start from 1) to be empty: simulate space / gap if np.random.random() < self.config['space']['prob_empty_last_second_row'] and self._nrows > 3 and self._ncols > 2: table_data[-2] = [] * self._ncols # Finally build the table instance space_before, space_after = self._gen_table_space() table = Table(table_data, style = table_style, spaceBefore = space_before, spaceAfter = space_after) #print(table.__dict__) return table
def nrows(self, rows): self._nrows = random_integer_from_list(rows)
def ncols(self, cols): self._ncols = random_integer_from_list(cols)
def add_spacer(self): W, H = A4 height = self.config['spacer']['height'] h = random_integer_from_list(height) spacer = Spacer(W, h) self.elements.append(spacer)
def afterFlowable(self, flowable): x_lowerLeft, y_lowerLeft = self.frame._x, self.frame._y x_upperRight, y_upperRight = self.frame._x2, self.frame._y2 page_number = self.canv._pageNumber # start from 1 #print(type(flowable)) #print(self.canv._pageNumber) #print(flowable.__dict__) #print(self.canv.__dict__) #print(self.frame.__dict__) # parse flowable coords if isinstance(flowable, Paragraph): kind = 'paragraph' width, height = flowable.width, flowable.height # note the difference from table elif isinstance(flowable, Table): kind = 'table' width, height = flowable._width, flowable._height # elif isinstance(flowable, Spacer): # kind = 'spacer' # width, height = flowable.width, flowable.height else: return -1 # fix shifts x_lowerLeft = x_lowerLeft - self.frame._leftPadding x_lowerLeft = (x_upperRight + x_lowerLeft) / 2 - width / 2 y_lowerLeft = y_lowerLeft + self.frame._prevASpace # add flowable result to coords result = {'kind': kind, 'page': page_number, 'is_flowable': True, 'x': x_lowerLeft, 'y': y_lowerLeft, 'w': width, 'h': height} self.coords.append(result) # Parse special none flowables ## stamp at bordered table corners ## only table has attribute '_linecmds' == [.....] ## Bordered table is a none empty list, while borderless table is [] if hasattr(flowable, '_linecmds') and flowable._linecmds: cache_coords = [] if np.random.random() < self.config['stamp']['prob']: n_stamps = random_integer_from_list(self.config['stamp']['n']) # Add n_stamps for _ in range(n_stamps): info = self._add_imgs('stamp', x_lowerLeft, y_lowerLeft, width, height, cache_coords) info['page'] = page_number self.coords.append(info) if np.random.random() < self.config['signature']['prob']: n_signatures = random_integer_from_list(self.config['signature']['n']) # Add n_signatures for _ in range(n_signatures): info = self._add_imgs('signature', x_lowerLeft, y_lowerLeft, width, height, cache_coords) info['page'] = page_number self.coords.append(info)