Exemple #1
0
 def handle_starttag(self, tag, attrs):
     if tag == "a":
         if len(attrs) == 0:
             pass
         else:
             for (variable, value) in attrs:
                 if variable == "href":
                     self.links.append(value)
     if tag == 'img':
         for name,value in attrs:
             if name == 'src':
                 util.get_img(self.urlString.rsplit('/',1)[0]  + "/" + value)
Exemple #2
0
 def handle_starttag(self, tag, attrs):
     if tag == "a":
         if len(attrs) == 0:
             pass
         else:
             for (variable, value) in attrs:
                 if variable == "href":
                     self.links.append(value)
     if tag == 'img':
         for name, value in attrs:
             if name == 'src':
                 util.get_img(
                     self.urlString.rsplit('/', 1)[0] + "/" + value)
    def test_generator(self):
        test_len = self.test_len()
        index = 0
        while True:
            data = self.data[
                   index * self.batch_size + self.train_size: (index + 1) * self.batch_size + self.train_size]
            index += 1
            index %= test_len
            words = []
            images = []
            nums = []
            for i in range(self.batch_size):
                img_path = data[i][0][0]
                word = data[i][1][0]
                nums.append(len(word))
                word = self.__word_process(word)
                img = get_img("{}/{}".format(self.path, img_path), self.width, self.height)
                images.append(img)
                words.append(word)

            words = pad_sequences(words, maxlen=self.max_string_length, truncating="post", padding="post")
            label_input = np.array(nums).reshape([-1, 1])
            images = np.array(images)
            # images = np.transpose(images, [0, 1, 2])
            yield {"img": images[:, :, :, np.newaxis], "label_input": label_input,
                   "y_input": words}, np.zeros([self.batch_size, 1])
Exemple #4
0
def predict(base_model, file_):
    img = get_img(file_, width, height)
    y_pred = base_model.predict(img[np.newaxis, :, :, np.newaxis])
    print(file_.center(50, "*"))
    print(y_pred)
    str_out = ''.join([id2char[x] for x in y_pred[0] if x != -1])
    print(str_out)
    plt.imshow(img, cmap="gray")
    plt.show()
Exemple #5
0
def get_ratio(path: str):
    img = get_img(path)
    width, height = img.size
    return {
        "width": width,
        "height": height,
        "value": width / height,
        "aspect": "landscape" if width > height else "portrait"
    }
 def __getitem__(self, index):
     data = self.data[index * self.batch_size: (index + 1) * self.batch_size]
     words = []
     images = []
     nums = []
     for i in range(self.batch_size):
         img_path = data[i][0][0]
         word = data[i][1][0]
         word = self.__word_process(word)
         img = get_img("{}/{}".format(self.path, img_path), self.width, self.height)
         img = random_brightness(img[:, :, np.newaxis], [0.1, 1.5])
         images.append(img[:, :, 0])
         words.append(word)
         nums.append(len(word))
     words = pad_sequences(words, maxlen=self.max_string_length, truncating="post", padding="post")
     label_input = np.array(nums).reshape([-1, 1])
     images = np.array(images)
     return {"img": images[:, :, :, np.newaxis], "label_input": label_input,
             "y_input": words}, np.zeros([self.batch_size, 1])
Exemple #7
0
def get_colors(path: str, nb_colors=4):
    imgfile = get_img(path)
    numarray = numpy.array(imgfile.getdata(), numpy.uint8)
    clusters = KMeans(n_clusters=nb_colors)
    clusters.fit(numarray)
    npbins = numpy.arange(0, nb_colors + 1)
    histogram = numpy.histogram(clusters.labels_, bins=npbins)

    indexes = []
    to_sort = list(histogram[0])

    for i in range(nb_colors):
        vMax = max(to_sort)
        indexes.append({'was': i, 'to': list(histogram[0]).index(vMax)})
        to_sort.remove(vMax)

    # indexes.sort(key=lambda xxx: xxx["to"])

    # HISTOGRAM

    # values = sorted(histogram[0], reverse=True)

    colors = []

    for i in range(nb_colors):
        centerIndex = next(x for x in indexes if x["to"] == i)["was"]
        center = clusters.cluster_centers_[centerIndex]
        color = '#%02x%02x%02x' % (math.ceil(center[0]), math.ceil(
            center[1]), math.ceil(center[2]))
        colors.append(color)

    # Liste de couleurs

    def convert_to_rgb(color: str):
        (r, g, b) = ImageColor.getrgb(color)
        return {"r": r, "g": g, "b": b}

    return list(map(convert_to_rgb, colors))
from bs4 import BeautifulSoup
import util
import re
import shutil
import os
import requests

infi = 'download.html'
with open(infi, 'r') as f:
    bsobj = BeautifulSoup(f.read(), 'html.parser')
    del bsobj.span['class']
    del bsobj.span['itemprop']

ncts = bsobj.find_all('noscript')

for n in ncts:
    n.decompose()

imgs = bsobj.find_all('img')

for i in imgs:
    #如果img包含src属性
    if 'data-actualsrc' in i.attrs:
        img_url = i['data-actualsrc']
        for a in list(i.attrs):
            del i[a]
        i['src'] = util.get_img(img_url, 'images')

#保存新生成的html
util.save_html('test.html', str(bsobj))