Ejemplo n.º 1
0
    def simple_file_run(self, img, download_path):
        """对单独的一个文件进行搜索"""
        if os.path.isfile(img):  # 这里的img是一个完成的路径
            img_name = os.path.splitext(os.path.split(img)[1])[0]  # 所要上传图片的名字

            print("--> 正在处理图片:  {}  ".format(img_name))
            if is_img(img, self.extention):
                # 在对应的目录下创建新的目录来储存对应获取的内容
                this_download_dir = os.path.join(
                    download_path, img_name + "_search_data_folder")

                if not os.path.exists(this_download_dir):
                    os.mkdir(this_download_dir)

                html_name = "{}.html".format(
                    os.path.join(this_download_dir, img_name))

                html_source = self.upload_img_get_html(
                    img)  # 获取上传图片之后获取的html source

                with open(html_name, 'a', encoding='utf-8',
                          errors='ignore') as file:
                    file.write("<!--下载源码时间: " + time.asctime() + " -->")
                    file.write(html_source)

                self.analyse(html_source, this_download_dir,
                             this_download_dir + "/" +
                             img_name)  # 解析网页,下载图片,写入网页文本

                print("{}图片{}处理完成\n{}".format(Fore.GREEN, img_name,
                                              Fore.RESET))
            else:
                print(f"{Fore.RED}文件 {img_name} 不是图片类型文件{Fore.RESET}")
Ejemplo n.º 2
0
def create_image_path(dir_path, url):
    if not is_img(url):
        print(Fore.RED + "Inappropriate file extension: " + url)
        return
    encoded_url = url.encode('utf-8')
    hashed_url = hashlib.sha3_256(encoded_url).hexdigest()
    file_extension = os.path.splitext(url)[-1]
    full_path = os.path.join(dir_path, hashed_url + file_extension.lower())
    return full_path
Ejemplo n.º 3
0
    def simple_file_run(self, img, download_path):
        """对单独的一个文件进行搜索"""
        if os.path.isfile(img):  # 这里的img是一个完成的路径
            img_name = os.path.splitext(os.path.split(img)[1])[0]  # 所要上传图片的名字

            print("--> 正在处理图片:  {}  ".format(img_name))
            if is_img(img, self.extention):
                # 在对应的目录下创建新的目录来储存对应获取的内容
                this_download_dir = os.path.join(
                    download_path, img_name + "_search_data_folder")

                if not os.path.exists(this_download_dir):
                    os.mkdir(this_download_dir)

                while_count = 3
                while (1):
                    try:
                        html_source = self.upload_img_get_html(
                            img)  # 获取上传图片之后获取的html source

                        self.analyse(html_source, this_download_dir,
                                     this_download_dir + "/" +
                                     img_name)  # 解析网页,下载图片,写入网页文本

                        print("{}图片{}处理完成\n{}".format(Fore.GREEN, img_name,
                                                      Fore.RESET))
                        break
                    except:
                        while_count -= 1
                        traceback.print_exc()
                        time.sleep(1)
                        if while_count <= 0:
                            break
                        continue
            else:
                print(f"{Fore.RED}文件 {img_name} 不是图片类型文件{Fore.RESET}")
Ejemplo n.º 4
0
import os
import sys
from utils import is_img

contentPath = "images/UHD_content/center_img"
stylePath = "images/UHD_style"
content_imgs = [x for x in os.listdir(contentPath) if is_img(x)]
style_imgs = [x for x in os.listdir(stylePath) if is_img(x)]
pairs = [(i, j) for i in content_imgs for j in style_imgs]
for i, j in pairs:
    script = "python WCT_my.py --cuda  --UHD --UHD_contentPath images/UHD_content/center_img/ --fineSize=3000 --log_mark=20181114-1016  --picked_content_mark=%s  --picked_style_mark=%s" % (
        i, j)
    os.system(script)
Ejemplo n.º 5
0
from PIL import Image as I
import sys
import os
from utils import is_img
pjoin = os.path.join

def crop_and_save(img_path):
  img = I.open(img_path)
  w, h = img.size
  center_img = img
  if w > h:
    margin = int((w - h)/2)
    center_img = img.crop((margin, 0, margin+h, h))
  elif w < h:
    margin = int((h - w)/2)
    center_img = img.crop((0, margin, w, margin+w))
  ext = os.path.splitext(img_path)[1]
  center_img.save(img_path.replace(ext, "_center" + ext))

inDir = sys.argv[1]
[crop_and_save(pjoin(inDir, i)) for i in os.listdir(inDir) if is_img(i)]
Ejemplo n.º 6
0
 def __init__(self, pathC, pathS, shorter_side):
   self.imgListC = [os.path.join(pathC, i) for i in os.listdir(pathC) if is_img(i)]
   self.imgListS = [os.path.join(pathS, i) for i in os.listdir(pathS) if is_img(i)]
   self.shorter_side = shorter_side
Ejemplo n.º 7
0
 def __init__(self, img_dir, shorter_side):
   self.img_list = [os.path.join(img_dir, i) for i in os.listdir(img_dir) if is_img(i)]
   random_order = np.random.permutation(len(self.img_list))
   self.img_list = list(np.array(self.img_list)[random_order])
   self.shorter_side = shorter_side
Ejemplo n.º 8
0
 def __init__(self, img_dir, shorter_side):
   self.img_list = [os.path.join(img_dir, i) for i in os.listdir(img_dir) if is_img(i)]
   self.shorter_side = shorter_side
Ejemplo n.º 9
0
    resize = (side, side)
    try:
        resized = cv2.resize(crop_img, resize, interpolation=cv2.INTER_AREA)
        return resized
    except Exception as err:
        print "Resizing error. image shape:" + str(crop_img.shape)
        raise err


input_dir = "./images/original/"
output_dir = "./images/preprocessed/"
size = 32

for root, _, files in os.walk(input_dir):
    for file in files:
        if not is_img(file):
            continue
        sem_root = root.replace(input_dir, "")
        sem_file = os.path.join(sem_root, file)
        input_img_path = os.path.join(input_dir, sem_file)
        output_img_path = os.path.join(output_dir, sem_file)

        print "Converting " + input_img_path + " => " + output_img_path

        img = cv2.imread(input_img_path, cv2.IMREAD_COLOR)
        try:
            cropped_img = crop_and_resize(img, size)
        except Exception as err:
            print err
            break
Ejemplo n.º 10
0
from utils import make_dir, is_img

source_dir = "./images/preprocessed/"
beer_indexs = {
        "Budweiser": 0,
        "Corona": 1,
        "Heineken": 2,
        "Hoegaarden": 3,
        }

train_csv_path = "./texts/train_data2.csv"
test_csv_path = "./texts/test_data2.csv"
f_train = open(train_csv_path, 'w')
f_test = open(test_csv_path, 'w')

for (beer, index) in beer_indexs.iteritems():
    beer_root = os.path.join(source_dir, beer)
    for root, _, files in os.walk(beer_root):
        for file in files:
            path = os.path.join(root, file)
            if is_img(path):
                line = str(index) + ","  + path + "\n"
                if random.random() < 0.8:
                    f_train.write(line)
                else:
                    f_test.write(line)

f_train.close()
f_test.close()

Ejemplo n.º 11
0
 def load_samples(self):
     for f in os.listdir(self.dir_AB)[:self.cfg['max_dataset_size']]:
         if is_img(f):
             self.AB_paths.append(os.path.join(self.dir_AB, f))
Ejemplo n.º 12
0
 def is_img(self):
     if self._is_img == None:
         self._is_img = utils.is_img(self._url, content_type)
     return self._is_img
Ejemplo n.º 13
0
    def get_all_third_party_responses_by_site(self, top_url, lazy=False):
        """Return a dictionary containing third party data loaded on given top_url."""
        top_url = 'http://' + top_url
        tp_query = "SELECT r.url, h.value FROM http_responses_view AS r " \
                   "LEFT JOIN http_response_headers_view as h ON h.response_id = r.id " \
                   " WHERE r.top_url LIKE %s AND " \
                   "url not LIKE %s and h.name = 'Content-Type'"

        try:
            top_ps = utils.get_domain(top_url)
        except AttributeError:
            print("Error while finding public suffix of %s" % top_url)
            return None
        cur = self.connection.cursor()
        cur.itersize = 100000
        try:
            cur.execute(tp_query, (top_url, top_ps))
        except:
            self._reconnect()
            cur = self.connection.cursor()
            cur.itersize = 100000
            cur.execute(tp_query, (top_url, top_ps))

        # If no responses, then clearly this was a crawl failure. Raise exception
        #if cur.rowcount <= 0:
        #    raise CensusException("No responses found: Census crawl failed to ")

        response_data = defaultdict(dict)
        for url, content_type in cur:
            if utils.should_ignore(url):
                continue

            url_data = dict()

            url_ps = utils.get_domain(url)
            if url_ps == top_ps:
                continue
            url_data['url_domain'] = url_ps

            is_js = utils.is_js(url, content_type)
            is_img = utils.is_img(url, content_type)
            if not lazy:
                is_el_tracker = utils.is_tracker(url,
                                                 is_js=is_js,
                                                 is_img=is_img,
                                                 first_party=top_url,
                                                 blocklist='easylist')
                is_ep_tracker = utils.is_tracker(url,
                                                 is_js=is_js,
                                                 is_img=is_img,
                                                 first_party=top_url,
                                                 blocklist='easyprivacy')
                is_tracker = is_el_tracker or is_ep_tracker
                url_data['is_tracker'] = is_tracker

            organization = utils.get_org(url)

            url_data['is_js'] = is_js
            url_data['is_img'] = is_img
            url_data['organization_name'] = organization

            response_data[url] = url_data
        cur.close()
        return dict(response_data)