def handle_starttag(self, tag, attrs): if tag == "a": if len(attrs) == 0: pass else: for (variable, value) in attrs: if variable == "href": self.links.append(value) if tag == 'img': for name,value in attrs: if name == 'src': util.get_img(self.urlString.rsplit('/',1)[0] + "/" + value)
def handle_starttag(self, tag, attrs): if tag == "a": if len(attrs) == 0: pass else: for (variable, value) in attrs: if variable == "href": self.links.append(value) if tag == 'img': for name, value in attrs: if name == 'src': util.get_img( self.urlString.rsplit('/', 1)[0] + "/" + value)
def test_generator(self): test_len = self.test_len() index = 0 while True: data = self.data[ index * self.batch_size + self.train_size: (index + 1) * self.batch_size + self.train_size] index += 1 index %= test_len words = [] images = [] nums = [] for i in range(self.batch_size): img_path = data[i][0][0] word = data[i][1][0] nums.append(len(word)) word = self.__word_process(word) img = get_img("{}/{}".format(self.path, img_path), self.width, self.height) images.append(img) words.append(word) words = pad_sequences(words, maxlen=self.max_string_length, truncating="post", padding="post") label_input = np.array(nums).reshape([-1, 1]) images = np.array(images) # images = np.transpose(images, [0, 1, 2]) yield {"img": images[:, :, :, np.newaxis], "label_input": label_input, "y_input": words}, np.zeros([self.batch_size, 1])
def predict(base_model, file_): img = get_img(file_, width, height) y_pred = base_model.predict(img[np.newaxis, :, :, np.newaxis]) print(file_.center(50, "*")) print(y_pred) str_out = ''.join([id2char[x] for x in y_pred[0] if x != -1]) print(str_out) plt.imshow(img, cmap="gray") plt.show()
def get_ratio(path: str): img = get_img(path) width, height = img.size return { "width": width, "height": height, "value": width / height, "aspect": "landscape" if width > height else "portrait" }
def __getitem__(self, index): data = self.data[index * self.batch_size: (index + 1) * self.batch_size] words = [] images = [] nums = [] for i in range(self.batch_size): img_path = data[i][0][0] word = data[i][1][0] word = self.__word_process(word) img = get_img("{}/{}".format(self.path, img_path), self.width, self.height) img = random_brightness(img[:, :, np.newaxis], [0.1, 1.5]) images.append(img[:, :, 0]) words.append(word) nums.append(len(word)) words = pad_sequences(words, maxlen=self.max_string_length, truncating="post", padding="post") label_input = np.array(nums).reshape([-1, 1]) images = np.array(images) return {"img": images[:, :, :, np.newaxis], "label_input": label_input, "y_input": words}, np.zeros([self.batch_size, 1])
def get_colors(path: str, nb_colors=4): imgfile = get_img(path) numarray = numpy.array(imgfile.getdata(), numpy.uint8) clusters = KMeans(n_clusters=nb_colors) clusters.fit(numarray) npbins = numpy.arange(0, nb_colors + 1) histogram = numpy.histogram(clusters.labels_, bins=npbins) indexes = [] to_sort = list(histogram[0]) for i in range(nb_colors): vMax = max(to_sort) indexes.append({'was': i, 'to': list(histogram[0]).index(vMax)}) to_sort.remove(vMax) # indexes.sort(key=lambda xxx: xxx["to"]) # HISTOGRAM # values = sorted(histogram[0], reverse=True) colors = [] for i in range(nb_colors): centerIndex = next(x for x in indexes if x["to"] == i)["was"] center = clusters.cluster_centers_[centerIndex] color = '#%02x%02x%02x' % (math.ceil(center[0]), math.ceil( center[1]), math.ceil(center[2])) colors.append(color) # Liste de couleurs def convert_to_rgb(color: str): (r, g, b) = ImageColor.getrgb(color) return {"r": r, "g": g, "b": b} return list(map(convert_to_rgb, colors))
from bs4 import BeautifulSoup import util import re import shutil import os import requests infi = 'download.html' with open(infi, 'r') as f: bsobj = BeautifulSoup(f.read(), 'html.parser') del bsobj.span['class'] del bsobj.span['itemprop'] ncts = bsobj.find_all('noscript') for n in ncts: n.decompose() imgs = bsobj.find_all('img') for i in imgs: #如果img包含src属性 if 'data-actualsrc' in i.attrs: img_url = i['data-actualsrc'] for a in list(i.attrs): del i[a] i['src'] = util.get_img(img_url, 'images') #保存新生成的html util.save_html('test.html', str(bsobj))