def simple_file_run(self, img, download_path): """对单独的一个文件进行搜索""" if os.path.isfile(img): # 这里的img是一个完成的路径 img_name = os.path.splitext(os.path.split(img)[1])[0] # 所要上传图片的名字 print("--> 正在处理图片: {} ".format(img_name)) if is_img(img, self.extention): # 在对应的目录下创建新的目录来储存对应获取的内容 this_download_dir = os.path.join( download_path, img_name + "_search_data_folder") if not os.path.exists(this_download_dir): os.mkdir(this_download_dir) html_name = "{}.html".format( os.path.join(this_download_dir, img_name)) html_source = self.upload_img_get_html( img) # 获取上传图片之后获取的html source with open(html_name, 'a', encoding='utf-8', errors='ignore') as file: file.write("<!--下载源码时间: " + time.asctime() + " -->") file.write(html_source) self.analyse(html_source, this_download_dir, this_download_dir + "/" + img_name) # 解析网页,下载图片,写入网页文本 print("{}图片{}处理完成\n{}".format(Fore.GREEN, img_name, Fore.RESET)) else: print(f"{Fore.RED}文件 {img_name} 不是图片类型文件{Fore.RESET}")
def create_image_path(dir_path, url): if not is_img(url): print(Fore.RED + "Inappropriate file extension: " + url) return encoded_url = url.encode('utf-8') hashed_url = hashlib.sha3_256(encoded_url).hexdigest() file_extension = os.path.splitext(url)[-1] full_path = os.path.join(dir_path, hashed_url + file_extension.lower()) return full_path
def simple_file_run(self, img, download_path): """对单独的一个文件进行搜索""" if os.path.isfile(img): # 这里的img是一个完成的路径 img_name = os.path.splitext(os.path.split(img)[1])[0] # 所要上传图片的名字 print("--> 正在处理图片: {} ".format(img_name)) if is_img(img, self.extention): # 在对应的目录下创建新的目录来储存对应获取的内容 this_download_dir = os.path.join( download_path, img_name + "_search_data_folder") if not os.path.exists(this_download_dir): os.mkdir(this_download_dir) while_count = 3 while (1): try: html_source = self.upload_img_get_html( img) # 获取上传图片之后获取的html source self.analyse(html_source, this_download_dir, this_download_dir + "/" + img_name) # 解析网页,下载图片,写入网页文本 print("{}图片{}处理完成\n{}".format(Fore.GREEN, img_name, Fore.RESET)) break except: while_count -= 1 traceback.print_exc() time.sleep(1) if while_count <= 0: break continue else: print(f"{Fore.RED}文件 {img_name} 不是图片类型文件{Fore.RESET}")
import os import sys from utils import is_img contentPath = "images/UHD_content/center_img" stylePath = "images/UHD_style" content_imgs = [x for x in os.listdir(contentPath) if is_img(x)] style_imgs = [x for x in os.listdir(stylePath) if is_img(x)] pairs = [(i, j) for i in content_imgs for j in style_imgs] for i, j in pairs: script = "python WCT_my.py --cuda --UHD --UHD_contentPath images/UHD_content/center_img/ --fineSize=3000 --log_mark=20181114-1016 --picked_content_mark=%s --picked_style_mark=%s" % ( i, j) os.system(script)
from PIL import Image as I import sys import os from utils import is_img pjoin = os.path.join def crop_and_save(img_path): img = I.open(img_path) w, h = img.size center_img = img if w > h: margin = int((w - h)/2) center_img = img.crop((margin, 0, margin+h, h)) elif w < h: margin = int((h - w)/2) center_img = img.crop((0, margin, w, margin+w)) ext = os.path.splitext(img_path)[1] center_img.save(img_path.replace(ext, "_center" + ext)) inDir = sys.argv[1] [crop_and_save(pjoin(inDir, i)) for i in os.listdir(inDir) if is_img(i)]
def __init__(self, pathC, pathS, shorter_side): self.imgListC = [os.path.join(pathC, i) for i in os.listdir(pathC) if is_img(i)] self.imgListS = [os.path.join(pathS, i) for i in os.listdir(pathS) if is_img(i)] self.shorter_side = shorter_side
def __init__(self, img_dir, shorter_side): self.img_list = [os.path.join(img_dir, i) for i in os.listdir(img_dir) if is_img(i)] random_order = np.random.permutation(len(self.img_list)) self.img_list = list(np.array(self.img_list)[random_order]) self.shorter_side = shorter_side
def __init__(self, img_dir, shorter_side): self.img_list = [os.path.join(img_dir, i) for i in os.listdir(img_dir) if is_img(i)] self.shorter_side = shorter_side
resize = (side, side) try: resized = cv2.resize(crop_img, resize, interpolation=cv2.INTER_AREA) return resized except Exception as err: print "Resizing error. image shape:" + str(crop_img.shape) raise err input_dir = "./images/original/" output_dir = "./images/preprocessed/" size = 32 for root, _, files in os.walk(input_dir): for file in files: if not is_img(file): continue sem_root = root.replace(input_dir, "") sem_file = os.path.join(sem_root, file) input_img_path = os.path.join(input_dir, sem_file) output_img_path = os.path.join(output_dir, sem_file) print "Converting " + input_img_path + " => " + output_img_path img = cv2.imread(input_img_path, cv2.IMREAD_COLOR) try: cropped_img = crop_and_resize(img, size) except Exception as err: print err break
from utils import make_dir, is_img source_dir = "./images/preprocessed/" beer_indexs = { "Budweiser": 0, "Corona": 1, "Heineken": 2, "Hoegaarden": 3, } train_csv_path = "./texts/train_data2.csv" test_csv_path = "./texts/test_data2.csv" f_train = open(train_csv_path, 'w') f_test = open(test_csv_path, 'w') for (beer, index) in beer_indexs.iteritems(): beer_root = os.path.join(source_dir, beer) for root, _, files in os.walk(beer_root): for file in files: path = os.path.join(root, file) if is_img(path): line = str(index) + "," + path + "\n" if random.random() < 0.8: f_train.write(line) else: f_test.write(line) f_train.close() f_test.close()
def load_samples(self): for f in os.listdir(self.dir_AB)[:self.cfg['max_dataset_size']]: if is_img(f): self.AB_paths.append(os.path.join(self.dir_AB, f))
def is_img(self): if self._is_img == None: self._is_img = utils.is_img(self._url, content_type) return self._is_img
def get_all_third_party_responses_by_site(self, top_url, lazy=False): """Return a dictionary containing third party data loaded on given top_url.""" top_url = 'http://' + top_url tp_query = "SELECT r.url, h.value FROM http_responses_view AS r " \ "LEFT JOIN http_response_headers_view as h ON h.response_id = r.id " \ " WHERE r.top_url LIKE %s AND " \ "url not LIKE %s and h.name = 'Content-Type'" try: top_ps = utils.get_domain(top_url) except AttributeError: print("Error while finding public suffix of %s" % top_url) return None cur = self.connection.cursor() cur.itersize = 100000 try: cur.execute(tp_query, (top_url, top_ps)) except: self._reconnect() cur = self.connection.cursor() cur.itersize = 100000 cur.execute(tp_query, (top_url, top_ps)) # If no responses, then clearly this was a crawl failure. Raise exception #if cur.rowcount <= 0: # raise CensusException("No responses found: Census crawl failed to ") response_data = defaultdict(dict) for url, content_type in cur: if utils.should_ignore(url): continue url_data = dict() url_ps = utils.get_domain(url) if url_ps == top_ps: continue url_data['url_domain'] = url_ps is_js = utils.is_js(url, content_type) is_img = utils.is_img(url, content_type) if not lazy: is_el_tracker = utils.is_tracker(url, is_js=is_js, is_img=is_img, first_party=top_url, blocklist='easylist') is_ep_tracker = utils.is_tracker(url, is_js=is_js, is_img=is_img, first_party=top_url, blocklist='easyprivacy') is_tracker = is_el_tracker or is_ep_tracker url_data['is_tracker'] = is_tracker organization = utils.get_org(url) url_data['is_js'] = is_js url_data['is_img'] = is_img url_data['organization_name'] = organization response_data[url] = url_data cur.close() return dict(response_data)