def __init__(self, url_file, user_agent_file, crawl_config): valid_instance(crawl_config, CD.CrawlConfig) self.crawl_config = CD.CrawlConfig() self.crawl_config.CopyFrom(crawl_config) # Prepare the input self.urls = filter(bool, open(url_file, 'r').read().split('\n')) self.user_agents = filter(bool, open(user_agent_file, 'r').read().split('\n')) # self.referers = filter(bool, open(referer_file, 'r').read().split('\n')) # Prepare the output directory crawl_type = None for user_agent in self.user_agents: if "bot" in user_agent: crawl_type = "bot" break if not crawl_type: crawl_type = "user" now = datetime.now().strftime("%Y%m%d-%H%M%S") self.base_dir = url_file + '.' + crawl_type + '.' + now + '.selenium.crawl/' mkdir_if_not_exist(self.base_dir) # Prepare log files # self.htmls_f = open(self.base_dir + 'html_path_list', 'a') self.md5_UA_filename = self.base_dir + 'md5_UA.log' self.crawl_log_filename = self.base_dir + 'crawl_log'
def visit_landing_url(self, landing_url_set, url_fetcher=None): """ @parameter landing_url_set: landing url set to visit url_fetcher: selenium handles to use for crawl """ valid_instance(landing_url_set, set) mkdir_if_not_exist(self.crawl_config.user_agent_md5_dir) # crawl web pages landing_url_set_size = len(landing_url_set) if landing_url_set_size < 8: record_maximum_threads = self.crawl_config.maximum_threads self.crawl_config.maximum_threads = 2 quit_fetcher_when_done = False if not url_fetcher: url_fetcher = UrlFetcher(self.crawl_config) quit_fetcher_when_done = True thread_computer = ThreadComputer(url_fetcher, 'fetch_url', landing_url_set) if quit_fetcher_when_done: url_fetcher.quit() if landing_url_set_size < 8: self.crawl_config.maximum_threads = record_maximum_threads # create and fill current_search, including urls, search_term etc. current_search = CD.CrawlSearchTerm() for p, s in thread_computer.result: result = current_search.result.add() result.CopyFrom(s) # update current_log if self.first: self.first = False self.current_log = CD.CrawlLog() result_search = self.current_log.result_search.add() result_search.CopyFrom(current_search)
def search_and_crawl(word_file, max_word_per_file=50): """ search words in word_file, get clickstring for search results and ads, then visit these clickstrings. @parameter word_file: the filename containing the words to search max_word_per_file: the maximum number of words to store in one crawl_log file """ # define constants user_UA = "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/" \ "537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36" user_suffix = "selenium.crawl/" now_suffix = datetime.now().strftime(".%Y%m%d-%H%M%S") # compute base_dir and start logging base_dir = '.'.join([word_file, user_suffix]) mkdir_if_not_exist(base_dir) logging.basicConfig(filename=base_dir+'running_log'+now_suffix, level=logging.DEBUG) logging.getLogger("global") # set crawl_config crawl_config = CD.CrawlConfig() crawl_config.maximum_threads = 6 crawl_config.user_agent = user_UA crawl_config.user_agent_md5_dir = base_dir + hex_md5(crawl_config.user_agent) \ + now_suffix + '/' crawl_config.browser_type = CD.CrawlConfig.CHROME # print crawl_config.user_agent words = SearchTerm(word_file) search = Search(crawl_config) crawl_config.result_type = CD.AD crawl_config.log_filename = 'ad_crawl_log' + now_suffix ad_visit = Visit(crawl_config, max_word_per_file) crawl_config.result_type = CD.SEARCH crawl_config.log_filename = 'search_crawl_log' + now_suffix search_visit = Visit(crawl_config, max_word_per_file) """ word_list = words.get_word_list() print 'word list size ', len(word_list) print word_list word_list = list() word_list.append('Essay Writing') word_list.append('P**n sale') for word in word_list: """ for word in words.get_word_list(): ad_set, search_set = search.search(word) # print clickstring_set ad_visit.visit(ad_set, word) search_visit.visit(search_set, word) words.next()
def save_binary_imgs(boundary_dir, thre): from util import mkdir_if_not_exist outdir = boundary_dir + "_binary" mkdir_if_not_exist(outdir) print("Result will be saved in %s" % outdir) fn_list = os.listdir(boundary_dir) for fn in tqdm(fn_list): raw_boundary_img = Image.open(os.path.join(boundary_dir, fn)) out_img = binarize(raw_boundary_img, thre) out_img = frame_img(out_img) out_img.save(os.path.join(outdir, fn)) print("Finished!!!")
def refine_by_bwboundary(segdir, bwbddir, dataset, min_thre, max_thre): basedir = os.path.split(segdir)[0] out_seg_dir = os.path.join(basedir, "refined_label") out_vis_dir = os.path.join(basedir, "refined_vis") mkdir_if_not_exist(out_seg_dir) mkdir_if_not_exist(out_vis_dir) print("Result will be saved in %s" % out_seg_dir) print("Result will be saved in %s" % out_vis_dir) segdir = os.path.join(basedir, "label") img_fn_list = os.listdir(segdir) for img_fn in tqdm(img_fn_list): segimg_fn = os.path.join(segdir, img_fn) # bwbdimg_fn = os.path.join(basedir, "bwboundary", img_fn) bwbdimg_fn = os.path.join(bwbddir, img_fn) segimg = np.array(Image.open(segimg_fn)) bwbdimg = np.array(Image.open(bwbdimg_fn)) cnter = Counter(bwbdimg.flatten()) ok_id_list = [ k for k, v in cnter.items() if v < max_thre and v > min_thre and k != 1 ] res = np.copy(segimg) for ok_id in ok_id_list: ok_idxes = np.where(bwbdimg == ok_id) cnter = Counter(segimg[ok_idxes].flatten()) top_id, n_pixel_of_top_id = cnter.most_common()[0] res[ok_idxes] = top_id res = Image.fromarray(res) out_seg_fn = os.path.join(out_seg_dir, img_fn) res.save(out_seg_fn) out_vis_fn = os.path.join(out_vis_dir, img_fn) save_colorized_lbl(res, out_vis_fn, dataset) print("Finished!!!") return out_seg_dir
def visit(self, clickstring_set, search_term): """ Count how many times this visit has been called, ie. how many words has been searched and visited so far. Note: some of the words might have empty advertisement clickstring_set, these words are counted but not logged. @parameter clickstring_set: the links to visit search_term: search term related to clickstring_set @return None or current_log_filename (from write_crawl_log()) """ self.counter += 1 clickstring_set_size = len(clickstring_set) if clickstring_set_size == 0: return None mkdir_if_not_exist(self.crawl_config.user_agent_md5_dir) # crawl web pages if clickstring_set_size < 8: record_maximum_threads = self.crawl_config.maximum_threads self.crawl_config.maximum_threads = 2 url_fetcher = UrlFetcher(self.crawl_config) thread_computer = ThreadComputer(url_fetcher, 'fetch_url', clickstring_set) url_fetcher.quit() if clickstring_set_size < 8: self.crawl_config.maximum_threads = record_maximum_threads # create and fill current_search, including urls, search_term etc. current_search = CD.CrawlSearchTerm() for p, s in thread_computer.result: result = current_search.result.add() result.CopyFrom(s) current_search.search_term = search_term current_search.result_type = self.crawl_config.result_type # update current_log if self.first: self.first = False self.current_log = CD.CrawlLog() result_search = self.current_log.result_search.add() result_search.CopyFrom(current_search) if self.counter % self.max_word_per_file == 0: return self.write_crawl_log()
def vis_using_Colorize(indir_list, outdir): indir = indir_list[0] # outdir = os.path.join(os.path.split(indir)[0], "vis_labels") mkdir_if_not_exist(outdir) for one_file in tqdm(os.listdir(indir)): fullpath = os.path.join(indir, one_file) hard_to_see_img = m.imread(fullpath) # outputs = outputs[0, :19].data.max(0)[1] # outputs = outputs.view(1, outputs.size()[0], outputs.size()[1]) outputs = hard_to_see_img # TODO this should be fixed output = Colorize()(outputs) output = np.transpose(output.cpu().numpy(), (1, 2, 0)) img = Image.fromarray(output, "RGB") img = img.resize(hard_to_see_img.shape, Image.NEAREST) outfn = os.path.join(outdir, one_file) plt.savefig(outfn, transparent=True, bbox_inches='tight', pad_inches=0) img.save(outfn)
def crawl(self): has_written = False for user_agent in self.user_agents: user_agent_md5 = hex_md5(user_agent) self.crawl_config.user_agent = user_agent self.crawl_config.user_agent_md5_dir = self.base_dir + user_agent_md5 + '/' # specify which type of browser to use set_browser_type(self.crawl_config) mkdir_if_not_exist(self.crawl_config.user_agent_md5_dir) # md5 - user agent mapping logs md5_UA_f = open(self.md5_UA_filename, 'a') # user agent md5_UA_f.write(user_agent_md5 + ":" + user_agent + "\n") md5_UA_f.close() # crawl web pages url_fetcher = UrlFetcher(self.crawl_config) thread_computer = ThreadComputer(url_fetcher, 'fetch_url', self.urls) url_fetcher.quit() # Write log for current user agent current_log = CD.CrawlLog() current_log_filename = self.crawl_config.user_agent_md5_dir + 'crawl_log' current_search = CD.CrawlSearchTerm() for p, s in thread_computer.result: result = current_search.result.add() result.CopyFrom(s) result_search = current_log.result_search.add() result_search.CopyFrom(current_search) write_proto_to_file(current_log, current_log_filename) # Write global crawl_log crawl_log = CD.CrawlLog() if has_written: read_proto_from_file(crawl_log, self.crawl_log_filename) else: has_written = True for r_s in current_log.result_search: result_search = crawl_log.result_search.add() result_search.CopyFrom(r_s) """ for s in current_log.result: result = crawl_log.result.add() result.CopyFrom(s) """ write_proto_to_file(crawl_log, self.crawl_log_filename)
def get_feature_mat_from_video(video_filename, output_dir='output'): yt_vid, extension = video_filename.split('/')[-1].split('.') assert extension in ['webm', 'mp4', '3gp'] mkdir_if_not_exist(output_dir, False) output_filename = output_dir + '/' + yt_vid + '.npy' vid_reader = imageio.get_reader(video_filename, 'ffmpeg') img_list = get_img_list_from_vid_reader(vid_reader, extension) base_model = InceptionV3(include_top=True, weights='imagenet') model = Model(inputs=base_model.input, outputs=base_model.get_layer('avg_pool').output) feature_mat = get_feature_mat(model, img_list) np.save(output_filename, feature_mat) return feature_mat
def collect_site_for_plot(site_set, outdir, mode="user"): """ Collect user and google observation for site in site_set. This is scheduled by cron job. In order to show how hash values of websites change over time. @parameter site_set: the set of urls to visit outdir: the output directory mode: which user agent to use, supported mode includes user, google, both """ valid_instance(site_set, set) mkdir_if_not_exist(outdir) user_UA = "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/" \ "537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36" google_UA = "AdsBot-Google (+http://www.google.com/adsbot.html)" crawl_config = CD.CrawlConfig() crawl_config.maximum_threads = 1 crawl_config.browser_type = CD.CrawlConfig.CHROME crawl_config.crawl_log_dir = outdir now_suffix = datetime.now().strftime(".%Y%m%d-%H%M%S") UAs = dict() if mode == "user": UAs["user"] = user_UA elif mode == "google": UAs["google"] = google_UA elif mode == "both": UAs["user"] = user_UA UAs["google"] = google_UA else: raise Exception("Unknown mode {0}".format(mode)) for mode in UAs: crawl_config.user_agent = UAs[mode] crawl_config.user_agent_md5_dir = outdir + hex_md5(crawl_config.user_agent) \ + now_suffix + '/' crawl_config.log_filename = mode + '_crawl_log' + now_suffix mode_visit = Visit(crawl_config) mode_visit.visit_landing_url(site_set) mode_visit.write_crawl_log(False)
def convert_40to13cls(target_dir): with open('./dataset/nyu_info.json', 'r') as f: paramdic = json.load(f) class_ind = np.array(paramdic['40to13cls']) base_dir, indir = os.path.split(target_dir) out_lbl_dir = os.path.join(base_dir, indir + "-13cls") out_vis_dir = os.path.join(base_dir, "vis-13cls") original_pngfn_list = os.listdir(target_dir) mkdir_if_not_exist(out_lbl_dir) mkdir_if_not_exist(out_vis_dir) for pngfn in tqdm(original_pngfn_list): fullpath = os.path.join(target_dir, pngfn) original_im = Image.open(fullpath) processed_im = swap_labels(np.array(original_im), class_ind) out_lbl_fn = os.path.join(out_lbl_dir, pngfn) processed_im.save(out_lbl_fn, 'PNG') out_vis_fn = os.path.join(out_vis_dir, pngfn) save_colorized_lbl(processed_im, out_vis_fn)
def revisit(crawl_log_file_list, word_file, n): """ visit landing urls in crawl_log_file n times @parameter crawl_log_file_list: list of filenames of crawl_log word_file: file containing words in crawl_log_file, used for creating base_dir n: number of times to visit """ # google_UA is not used in search and crawl. Used in later visit. google_UA = "AdsBot-Google (+http://www.google.com/adsbot.html)" google_suffix = 'google.crawl/' for i in range(int(n)): # the time label is set for each iteration of visit now_suffix = datetime.now().strftime(".%Y%m%d-%H%M%S") for crawl_log_file in crawl_log_file_list: # compute base_dir and start logging base_dir = '.'.join([word_file, google_suffix]) mkdir_if_not_exist(base_dir) logging.basicConfig(filename=base_dir+'running_log'+now_suffix, level=logging.DEBUG) logging.getLogger("global") # set crawl_config crawl_config = CD.CrawlConfig() crawl_config.maximum_threads = 6 crawl_config.user_agent = google_UA crawl_config.user_agent_md5_dir = base_dir + hex_md5(crawl_config.user_agent) \ + now_suffix + '/' crawl_config.browser_type = CD.CrawlConfig.CHROME google_crawl_log = crawl_log_file.split('/')[-1] + '.google' crawl_config.log_filename = google_crawl_log + now_suffix revisit = Visit(crawl_config) crawl_log = CD.CrawlLog() read_proto_from_file(crawl_log, crawl_log_file) landing_url_set = crawl_log_attr_set(crawl_log, "landing_url") revisit.visit_landing_url(landing_url_set) revisit.write_crawl_log(False)
def main(npydirs, out_directory='ensemble_results', method='averaging', mode='test'): """ Ensemble. 1. get all npy files from given directory. 2. ensemble each file and output predict png file. """ out_shape = (2048, 1024) if mode == 'valid' else (1280, 720) out_label_dir = os.path.join(out_directory, 'label') out_vis_dir = os.path.join(out_directory, 'vis') out_prob_dir = os.path.join(out_directory, 'prob') mkdir_if_not_exist(out_label_dir) mkdir_if_not_exist(out_vis_dir) mkdir_if_not_exist(out_prob_dir) print('- npy_directory_list') print(npydirs) print('- method') print(method) print('- mode') print(mode, out_shape) prob_filenames = os.listdir(npydirs[0]) print(len(prob_filenames)) print('Ensembling ...') for i, prob_filename in tqdm(enumerate(prob_filenames)): png_filename = prob_filename.replace('npy', 'png') prob_fns = [os.path.join(npydir, prob_filename) for npydir in npydirs] ensemble_predict(prob_fns, os.path.join(out_label_dir, png_filename), os.path.join(out_prob_dir, prob_filename), os.path.join(out_vis_dir, png_filename), method, out_shape)
def edge_detect(rgb_dir, min_canny_thre, max_canny_thre): base_dir = os.path.split(rgb_dir)[0] edge_dir = os.path.join(base_dir, "edges") laplacian_dir = os.path.join(edge_dir, "laplacian") canny_dir = os.path.join(edge_dir, "canny") sobel_dir = os.path.join(edge_dir, "sobel") mkdir_if_not_exist(laplacian_dir) mkdir_if_not_exist(canny_dir) mkdir_if_not_exist(sobel_dir) print("Result will be saved in %s" % edge_dir) fn_list = os.listdir(rgb_dir) for fn in tqdm(fn_list): img = cv2.imread(os.path.join(rgb_dir, fn)) imgYUV = cv2.cvtColor(img, cv2.COLOR_BGR2YUV) imgY = imgYUV[:, :, 0] # Laplacian out_laplacian_im = cv2.Laplacian(imgY, cv2.CV_64F) + 128 out_laplacian_fn = os.path.join(laplacian_dir, fn) save_img_by_PIL(out_laplacian_im, out_laplacian_fn) # Canny out_canny_im = cv2.Canny(imgY, cv2.CV_64F, min_canny_thre, max_canny_thre) out_canny_fn = os.path.join(canny_dir, fn) save_img_by_PIL(out_canny_im, out_canny_fn) # Sobel dx = cv2.Sobel(imgY, cv2.CV_64F, 1, 0, ksize=3) dy = cv2.Sobel(imgY, cv2.CV_64F, 0, 1, ksize=3) out_sobel_im = np.sqrt(dx**2 + dy**2) out_sobel_fn = os.path.join(sobel_dir, fn) save_img_by_PIL(out_sobel_im, out_sobel_fn)
'/data/unagi0/watanabe/DomainAdaptation/Segmentation/VisDA2017/cityscapes_gt/val' ) parser.add_argument( '--vis_outdir', type=str, default= '/data/unagi0/watanabe/DomainAdaptation/Segmentation/VisDA2017/cityscapes_vis_gt/val' ) args = parser.parse_args() if args.dataset in ["city16", "synthia"]: info_json_fn = "./dataset/synthia2cityscapes_info.json" else: info_json_fn = "./dataset/city_info.json" # Save visualized predicted pixel labels(pngs) with open(info_json_fn) as f: info_dic = json.load(f) gtfn_list = os.listdir(args.gt_dir) for gtfn in tqdm(gtfn_list): full_gtfn = os.path.join(args.gt_dir, gtfn) img = Image.open(full_gtfn) palette = np.array(info_dic['palette'], dtype=np.uint8) img.putpalette(palette.flatten()) mkdir_if_not_exist(args.vis_outdir) vis_fn = os.path.join(args.vis_outdir, gtfn) img.save(vis_fn)
def main(): if len(sys.argv) != 5: print( "Usage: gen_imgs_from_bson.py <validpct> <inputdir> <traindir> <validdir>" ) sys.exit(1) validpct = float(sys.argv[1]) / 100 inputdir = sys.argv[2] traindir = sys.argv[3] validdir = sys.argv[4] trainfile = os.path.join(inputdir, 'train.bson') train_raw_dir = os.path.join(traindir, "raw") valid_raw_dir = os.path.join(validdir, "raw") # create categories folders categories = pd.read_csv(os.path.join(inputdir, 'category_names.csv'), index_col='category_id') for category in tqdm_notebook(categories.index): mkdir_if_not_exist(os.path.join(train_raw_dir, str(category))) mkdir_if_not_exist(os.path.join(valid_raw_dir, str(category))) num_products = 7069896 # 7069896 for train and 1768182 for test product_cnt = 0 bar = tqdm_notebook(total=num_products) with open(trainfile, 'rb') as trainbson: data = bson.decode_file_iter(trainbson) train_cats, valid_cats, image_counter = collections.Counter( ), collections.Counter(), collections.Counter() for prod in data: product_id = prod['_id'] category_id = prod['category_id'] # decide if this product will go into the validation or train data if random.random() < validpct: outdir = valid_raw_dir valid_cats[category_id] += 1 else: outdir = train_raw_dir train_cats[category_id] += 1 for picidx, pic in enumerate(prod['imgs']): filename = os.path.join(outdir, str(category_id), "{}.{}.jpg".format(product_id, picidx)) with open(filename, 'wb') as f: f.write(pic['picture']) image_counter[outdir] += 1 bar.update() product_cnt += 1 if product_cnt % 10000 == 0: print("converted {} products".format(product_cnt)) for name, cnt, dir_ in [("training", train_cats, train_raw_dir), ("validation", valid_cats, valid_raw_dir)]: print("{}: {} categories with {} products and {} images".format( name, len(cnt), sum(cnt.values()), image_counter[dir_]))
model_name += "-use_f2" print("=> loading checkpoint '{}'".format(args.trained_checkpoint)) if not os.path.exists(args.trained_checkpoint): raise OSError("%s does not exist!" % args.trained_checkpoint) checkpoint = torch.load(args.trained_checkpoint) train_args = checkpoint["args"] args.start_epoch = checkpoint['epoch'] print("----- train args ------") pprint(checkpoint["args"].__dict__, indent=4) print("-" * 50) print("=> loaded checkpoint '{}'".format(args.trained_checkpoint)) base_outdir = os.path.join(args.outdir, args.mode, model_name) mkdir_if_not_exist(base_outdir) json_fn = os.path.join(base_outdir, "param.json") check_if_done(json_fn) args.machine = os.uname()[1] save_dic_to_json(args.__dict__, json_fn) train_img_shape = tuple([int(x) for x in train_args.train_img_shape]) test_img_shape = tuple([int(x) for x in args.test_img_shape]) if "normalize_way" in train_args.__dict__.keys(): img_transform = get_img_transform(img_shape=train_img_shape, normalize_way=train_args.normalize_way) else: img_transform = get_img_transform(img_shape=train_img_shape)
model_f1.cuda() print("----- train args ------") pprint(checkpoint["args"].__dict__, indent=4) print("-" * 50) args.train_img_shape = checkpoint["args"].train_img_shape print("=> loaded checkpoint '{}'".format(args.trained_checkpoint)) indir, infn = os.path.split(args.trained_checkpoint) trained_mode = indir.split(os.path.sep)[-2] args.mode = "%s---%s-%s" % (trained_mode, args.tgt_dataset, args.split) model_name = infn.replace(".pth", "") base_outdir = os.path.join(args.outdir, args.mode, model_name) mkdir_if_not_exist(base_outdir) # Set TF-Logger tfconfigure(base_outdir, flush_secs=10) tflogger = AccumulatedTFLogger() json_fn = os.path.join(base_outdir, "param.json") check_if_done(json_fn) args.machine = os.uname()[1] save_dic_to_json(args.__dict__, json_fn) train_img_shape = tuple([int(x) for x in args.train_img_shape]) test_img_shape = tuple([int(x) for x in args.test_img_shape]) img_transform = Compose([ Scale(train_img_shape, Image.BILINEAR),
action='store_true', help='whether you save probability tensors') args = parser.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu config = { 'output_path': args.output_path, 'path': { 'log': args.output_path + 'log/', 'prob': args.output_path + 'prob/', 'label': args.output_path + 'label/' } } mkdir_if_not_exist(config['path']['log']) mkdir_if_not_exist(config['path']['label']) config['logger'] = Logger(logroot=config['path']['log'], filename=args.log_file, level='debug') config['logger'].logger.debug(str(args)) if not os.path.exists(args.trained_checkpoint): raise OSError('%s does not exist!' % args.trained_checkpoint) config['logger'].logger.debug('==> loading checkpoint: ' + args.trained_checkpoint) checkpoint = torch.load(args.trained_checkpoint) train_args = checkpoint['args'] G, F1, F2 = get_models(input_ch=train_args.input_ch, n_class=train_args.n_class,
files = [os.path.join(vis_dir, filename) for vis_dir in vis_dirs] outimg = os.path.join(outdir, filename) merge_four_images(files, outimg) print("Finished! Result dir is %s" % outdir) if __name__ == '__main__': args = sys.argv """num of args need to be 3.""" # merge_four_images(args[1:], 'sample_merged.png') # vis_dirs = ['/data/ugui0/dataset/adaptation/segmentation_test'] + args[1:] vis_dirs = ["/data/unagi0/dataset/NYUDv2/gupta/rgb/"] pred_base_dir = args[1] target_dir_list = ["vis", "depth", "boundary"] vis_dirs += [os.path.join(pred_base_dir, x) for x in target_dir_list] print(vis_dirs) # for i in range(20): # outdir = 'merged_imgs/merged_imgs_{0}'.format(i) # if os.path.exists(outdir): # continue # else: # break outdir = os.path.join(pred_base_dir, "merged") mkdir_if_not_exist(outdir) main(vis_dirs, outdir)
def vis_with_legend(indir_list, outdir, label_list, raw_rgb_dir, raw_optional_img_dir=None, gt_dir=None, ext="pdf", title_names=None): N_CLASS = len(label_list) values = np.arange(N_CLASS) n_imgs = 1 + len(indir_list) if raw_optional_img_dir: n_imgs += 1 if gt_dir: n_imgs += 1 mkdir_if_not_exist(outdir) n_row = 2 n_col = int(round(float(n_imgs) / n_row)) # img_fn_list = os.listdir(raw_rgb_dir) img_fn_list = os.listdir(indir_list[0]) for one_img_fn in tqdm(img_fn_list): fig = plt.figure(figsize=(560 * n_col / 100, 425 * n_row / 100 * 1.2)) # sharex=True, sharey=True) ax_list = [] ax_list.append(fig.add_subplot(n_row, n_col, 1)) raw_img = Image.open(os.path.join(raw_rgb_dir, one_img_fn)) ax_list[0].imshow(raw_img) ax_list[0].axis("off") ax_list[0].set_xticklabels([]) ax_list[0].set_yticklabels([]) ax_list[0].set_aspect('equal') offset = 1 plt.axis('tight') if raw_optional_img_dir: ax_list.append(fig.add_subplot(n_row, n_col, offset + 1)) raw_img = Image.open(os.path.join(raw_optional_img_dir, one_img_fn)) ax_list[offset].imshow(raw_img, cmap='gray') ax_list[offset].axis("off") ax_list[offset].set_xticklabels([]) ax_list[offset].set_yticklabels([]) ax_list[offset].set_aspect('equal') plt.axis('tight') offset += 1 if gt_dir: ax_list.append(fig.add_subplot(n_row, n_col, offset + 1)) gt_img = Image.open(os.path.join(gt_dir, one_img_fn.replace("leftImg8bit", "gtFine_gtlabels"))) gt_img = np.array(gt_img, dtype=np.uint8) ax_list[offset].imshow(gt_img, vmin=0, vmax=N_CLASS - 1, interpolation='none', cmap="jet") ax_list[offset].axis("off") ax_list[offset].set_xticklabels([]) ax_list[offset].set_yticklabels([]) ax_list[offset].set_aspect('equal') plt.axis('tight') offset += 1 # ax_list[0].set_aspect('equal') for i, (indir, title_name) in enumerate(zip(indir_list, title_names[offset:])): # hard_to_see_img = m.imread(os.path.join(indir, one_img_fn)) hard_to_see_img = Image.open(os.path.join(indir, one_img_fn)).resize(raw_img.size) hard_to_see_img = np.array(hard_to_see_img) ax_list.append(fig.add_subplot(n_row, n_col, i + offset + 1)) # hsv = plt.get_cmap('hsv') # colors = hsv(np.linspace(0, 1.0, N_CLASS)) def discrete_cmap(N, base_cmap=None): """Create an N-bin discrete colormap from the specified input map""" # Note that if base_cmap is a string or None, you can simply do # return plt.cm.get_cmap(base_cmap, N) # The following works for string, None, or a colormap instance: base = plt.cm.get_cmap(base_cmap) color_list = base(np.linspace(0, 1, N)) cmap_name = base.name + str(N) return base.from_list(cmap_name, color_list, N) cmap = "gray" if "boundary" in title_name.lower() else "jet" vmax = 255 if "boundary" in title_name.lower() else N_CLASS - 1 im = ax_list[i + offset].imshow(hard_to_see_img.astype(np.uint8), vmin=0, vmax=vmax, interpolation='none', cmap=cmap) # cmap=discrete_cmap(N_CLASS, "jet")) ax_list[i + offset].axis("off") ax_list[i + offset].set_xticklabels([]) ax_list[i + offset].set_yticklabels([]) ax_list[i + offset].set_title(indir.replace("outputs/", "").replace("/label", "").replace("/", "\n"), fontsize=4) ax_list[i + offset].set_aspect('equal') plt.axis('tight') if title_names is not None: for i, title in enumerate(title_names): ax_list[i].set_title(title, fontsize=30) # fig.subplots_adjust(wspace=0, hspace=0) # fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None) fig.subplots_adjust(left=0, bottom=0, right=1, top=1, wspace=0, hspace=0) fig.tight_layout(pad=0) # colors = [im.cmap(im.norm(value)) for value in values] # patches = [mpatches.Patch(color=colors[i], label=label_list[i]) for i in range(len(values))] # # lgd = fig.legend(handles=patches, labels=label_list, bbox_to_anchor=(1.05, 1), borderaxespad=0., # # fontsize=7, loc='upper left') # loc=2 # if n_col * 2 <= N_CLASS: # n_legend_col = n_col * 2 # else: # n_legend_col = N_CLASS # lgd = plt.legend(patches, label_list, loc='lower center', bbox_to_anchor=(0, 0, 1, 1), # bbox_transform=plt.gcf().transFigure, ncol=n_legend_col, fontsize=5) # fig.tight_layout() outfn = os.path.join(outdir, one_img_fn) outfn = os.path.splitext(outfn)[0] + '.%s' % ext fig.savefig(outfn, transparent=True, bbox_inches='tight', pad_inches=0) # fig.savefig(outfn, transparent=True, bbox_inches='tight', pad_inches=0, bbox_extra_artists=(lgd,), dpi=300) plt.close()
def fetch_url(self, url): while True: self.lock.acquire() if self.browser_queue.empty(): self.lock.release() time.sleep(5) else: browser = self.browser_queue.get() self.lock.release() break result = CD.CrawlResult() # record whether url loading failed! result.url = url result.url_md5 = hex_md5(url) result.success = True try: # This line is used to handle alert: <stay on this page> <leave this page> browser.get(result.url) browser.execute_script("window.onbeforeunload = function() {};") time.sleep(1) if self.crawl_config.browser_type == CD.CrawlConfig.CHROME and \ (('404 Not Found' in browser.title) \ or ('403' in browser.title) \ or ('Forbidden' in browser.title) \ or ('not available' in browser.title) \ or ('Problem loading page' in browser.title) \ or ('Page not found' in browser.title) \ or ('Error' in browser.title) \ or ('Access denied' in browser.title) \ or (browser.current_url == 'data:text/html,chromewebdata')): result.landing_url = browser.current_url result.landing_url_md5 = hex_md5(result.landing_url) result.success = False elif self.crawl_config.browser_type == CD.CrawlConfig.FIREFOX and \ (('404 Not Found' in browser.title) \ or ('403' in browser.title) \ or ('Forbidden' in browser.title) \ or ('not available' in browser.title) \ or ('Problem loading page' in browser.title) \ or ('Page not found' in browser.title) \ or ('Error' in browser.title) \ or ('Access denied' in browser.title)): result.landing_url = browser.current_url result.landing_url_md5 = hex_md5(result.landing_url) result.success = False else: ############# # md5 the original url url_md5_dir = self.crawl_config.user_agent_md5_dir + result.url_md5 + '/' mkdir_if_not_exist(url_md5_dir) # get the landing url result.landing_url = browser.current_url result.landing_url_md5 = hex_md5(result.landing_url) # get the whole page source response = browser.execute_script("return document.documentElement.innerHTML;") result.file_path = url_md5_dir + 'index.html' f = open(result.file_path, 'w') f.write(response.encode('utf-8')) f.close() browser.delete_all_cookies() if len(browser.window_handles) > 1: # close all the other windows current_window_handle = browser.current_window_handle for handle in browser.window_handles: if handle != current_window_handle: browser.switch_to_window(handle) browser.close() # switch back to the current window browser.switch_to_window(current_window_handle) except: result.success = False browser = restart_browser(self.crawl_config.browser_type, incognito=False, user_agent=self.crawl_config.user_agent, browser=browser) self.browser_queue.put(browser) logger = logging.getLogger("global") logger.info("the length of the browser_queue") logger.info(self.browser_queue.qsize()) return result
def vis_with_legend(indir_list, outdir, label_list, raw_rgb_dir, raw_optional_img_dir=None, gt_dir=None, ext="png", title_names=None, n_sample=10): N_CLASS = len(label_list) values = np.arange(N_CLASS) n_imgs = 1 + len(indir_list) if raw_optional_img_dir: n_imgs += 1 if gt_dir: n_imgs += 1 mkdir_if_not_exist(outdir) n_row = 1 n_col = int(round(float(n_imgs) / n_row)) # img_fn_list = os.listdir(raw_rgb_dir) img_fn_list = os.listdir(gt_dir) # img_fn_list = os.listdir(indir_list[0]) img_fn_list = random.sample(img_fn_list, n_sample) for one_img_fn in tqdm(img_fn_list): fig = plt.figure(figsize=(560 * n_col / 100, 425 * n_row / 100)) # sharex=True, sharey=True) ax_list = [] ax_list.append(fig.add_subplot(n_row, n_col, 1)) raw_img = Image.open(os.path.join(raw_rgb_dir, one_img_fn)) ax_list[0].imshow(raw_img) ax_list[0].axis("off") ax_list[0].set_xticklabels([]) ax_list[0].set_yticklabels([]) ax_list[0].set_aspect('equal') offset = 1 plt.axis('tight') if raw_optional_img_dir: ax_list.append(fig.add_subplot(n_row, n_col, offset + 1)) raw_img = Image.open(os.path.join(raw_optional_img_dir, one_img_fn)) ax_list[offset].imshow(raw_img, cmap='gray') ax_list[offset].axis("off") ax_list[offset].set_xticklabels([]) ax_list[offset].set_yticklabels([]) ax_list[offset].set_aspect('equal') plt.axis('tight') offset += 1 if gt_dir: ax_list.append(fig.add_subplot(n_row, n_col, offset + 1)) gt_img = Image.open( os.path.join( gt_dir, one_img_fn.replace("leftImg8bit", "gtFine_gtlabels"))) gt_img = np.array(gt_img, dtype=np.uint8) ax_list[offset].imshow(gt_img, vmin=0, vmax=N_CLASS - 1, interpolation='none', cmap="jet") ax_list[offset].axis("off") ax_list[offset].set_xticklabels([]) ax_list[offset].set_yticklabels([]) ax_list[offset].set_aspect('equal') plt.axis('tight') offset += 1 if title_names is not None: for i, title in enumerate(title_names): ax_list[i].set_title(title, fontsize=30) fig.subplots_adjust(left=0, bottom=0, right=1, top=1, wspace=0, hspace=0) fig.tight_layout(pad=0) outfn = os.path.join(outdir, one_img_fn) outfn = os.path.splitext(outfn)[0] + '.%s' % ext fig.savefig(outfn, transparent=True, bbox_inches='tight', pad_inches=0) plt.close()
model.load_state_dict(checkpoint) print ("----- train args ------") pprint(train_args.__dict__, indent=4) print ("-" * 50) args.train_img_shape = train_args.train_img_shape print("=> loaded checkpoint '{}'".format(args.trained_checkpoint)) indir, infn = os.path.split(args.trained_checkpoint) trained_mode = indir.split(os.path.sep)[-2] args.mode = "%s---%s-%s" % (trained_mode, args.tgt_dataset, args.split) model_name = infn.replace(".pth", "") base_outdir = os.path.join(args.outdir, args.mode, model_name) mkdir_if_not_exist(base_outdir) json_fn = os.path.join(base_outdir, "param.json") check_if_done(json_fn) args.machine = os.uname()[1] save_dic_to_json(args.__dict__, json_fn) train_img_shape = tuple([int(x) for x in args.train_img_shape]) test_img_shape = tuple([int(x) for x in args.test_img_shape]) if "crop_size" in train_args.__dict__.keys() and train_args.crop_size > 0: train_img_shape = test_img_shape print ("train_img_shape was set to the same as test_img_shape") if "normalize_way" in train_args.__dict__.keys(): img_transform = get_img_transform(img_shape=train_img_shape, normalize_way=train_args.normalize_way)
def search_and_revisit(word_file, n, threads=6, ad_only=False): """ This function does the following things. 1. Search each word in word file. 2. Grab the top 200 returned results and corresponding ads 3. Visit all the results and ads with "chrome user agent", repeat n times 4. Visit all the landing pages in step 3 with "google ads bot user agent" @parameter word_file: the filename containing the words to search n: repeat step 3 for n times ad_only: Only retrieve the advertisements. In this case, we only view the first 5 pages. @output Following are output of this function Running log: [WORD_FILE].selenium.crawl/running_log.[SEARCH_TIME] "chrome user agent" result is: [WORD_FILE].selenium.crawl/ad_crawl_log.[SEARCH_TIME].[WORD_MD5] [WORD_FILE].selenium.crawl/search_crawl_log.[SEARCH_TIME].[WORD_MD5] [WORD_FILE].selenium.crawl/[WORD_MD5]/[UA_MD5].[SEARCH_TIME]/[URL_MD5]/index.html "google ads bot user agent" result is: [WORD_FILE].selenium.crawl/ad_crawl_log.[SEARCH_TIME].[WORD_MD5].google [WORD_FILE].selenium.crawl/search_crawl_log.[SEARCH_TIME].[WORD_MD5].google [WORD_FILE].selenium.crawl/[WORD_MD5]/[UA_MD5].[SEARCH_TIME].revisit.[REVISIT_TIME]/[URL_MD5]/index.html """ valid_instance(threads, int) # prepare search and visit user_UA = "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/" \ "537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36" user_suffix = "selenium.crawl/" search_now_suffix = datetime.now().strftime(".%Y%m%d-%H%M%S") word_md5_delimiter = "WORD_MD5" # compute base_dir and start logging base_dir = '.'.join([word_file, user_suffix]) mkdir_if_not_exist(base_dir) logging.basicConfig(filename=base_dir+'running_log'+search_now_suffix, level=logging.DEBUG) logging.getLogger("global") # set search and visit crawl_config search_config = CD.CrawlConfig() search_config.maximum_threads = threads search_config.user_agent = user_UA # number of top search results to be inspected if ad_only: search_config.count = 50 search_config.browser_type = CD.CrawlConfig.CHROME ad_crawl_config = CD.CrawlConfig() ad_crawl_config.CopyFrom(search_config) ad_crawl_config.result_type = CD.AD ad_crawl_config.crawl_log_dir = base_dir ad_log_filename_prefix = 'ad_crawl_log' + search_now_suffix ad_dir_prefix = base_dir + word_md5_delimiter + "/" + \ hex_md5(ad_crawl_config.user_agent) + search_now_suffix + '/' search_crawl_config = CD.CrawlConfig() search_crawl_config.CopyFrom(search_config) search_crawl_config.result_type = CD.SEARCH search_crawl_config.crawl_log_dir = base_dir search_log_filename_prefix = 'search_crawl_log' + search_now_suffix search_dir_prefix = base_dir + word_md5_delimiter + "/" + \ hex_md5(search_crawl_config.user_agent) + search_now_suffix + '/' # print crawl_config.user_agent words = SearchTerm(word_file) search = Search(search_config) ad_visit = Visit(ad_crawl_config, 1) search_visit = Visit(search_crawl_config, 1) # prepare the revisit google_ad_UA = "AdsBot-Google (+http://www.google.com/adsbot.html)" google_search_UA = "Googlebot/2.1 (+http://www.google.com/bot.html)" # set revisit crawl_config revisit_crawl_config = CD.CrawlConfig() revisit_crawl_config.maximum_threads = threads revisit_crawl_config.browser_type = CD.CrawlConfig.CHROME # base directory uses search_now_suffix to correlate these two revisit_crawl_config.crawl_log_dir = base_dir # search, visit and revisit each word for word in words.get_word_list(): print "Processing {0} word: {1}".format(words.current(), word) # update word_md5 related directories print word word_md5 = hex_md5(word) ad_crawl_config.log_filename = ad_log_filename_prefix + "." + word_md5 ad_crawl_config.user_agent_md5_dir = word_md5.join( ad_dir_prefix.split(word_md5_delimiter)) search_crawl_config.log_filename = search_log_filename_prefix + "." + word_md5 search_crawl_config.user_agent_md5_dir = word_md5.join( search_dir_prefix.split(word_md5_delimiter)) ad_visit.update_crawl_config(ad_crawl_config) search_visit.update_crawl_config(search_crawl_config) # search and crawl right_click = not ad_only ad_set, search_set = search.search(word, right_click) ad_crawl_log_filename = ad_visit.visit(ad_set, word) if ad_only: search_crawl_log_filename = None else: search_crawl_log_filename = search_visit.visit(search_set, word) # revisit crawl_log_file_list = list() if ad_crawl_log_filename: crawl_log_file_list.append(ad_crawl_log_filename) if search_crawl_log_filename: crawl_log_file_list.append(search_crawl_log_filename) for crawl_log_file in crawl_log_file_list: if crawl_log_file == ad_crawl_log_filename: revisit_crawl_config.user_agent = google_ad_UA else: revisit_crawl_config.user_agent = google_search_UA revisit_dir_prefix = base_dir + word_md5_delimiter + "/" + \ hex_md5(revisit_crawl_config.user_agent) + search_now_suffix revisit_crawl_config.log_filename = crawl_log_file.split('/')[-1] + '.google' revisit = Visit(revisit_crawl_config) crawl_log = CD.CrawlLog() read_proto_from_file(crawl_log, crawl_log_file) revisit.visit_landing_url_n_times(crawl_log, int(n), revisit_dir_prefix, word_md5, word_md5_delimiter) words.next() """
def vis_with_legend(indir_list, raw_rgb_dir, outdir, raw_gray_dir=None, gt_dir=None, ext="png"): n_imgs = 1 + len(indir_list) if raw_gray_dir: n_imgs += 1 if gt_dir: n_imgs += 1 mkdir_if_not_exist(outdir) n_row = 2 n_col = int(round(float(n_imgs) / n_row)) # img_fn_list = os.listdir(raw_rgb_dir) img_fn_list = os.listdir(indir_list[0]) for one_img_fn in tqdm(img_fn_list): fig = plt.figure() # sharex=True, sharey=True) ax_list = [] ax_list.append(fig.add_subplot(n_row, n_col, 1)) raw_img = Image.open(os.path.join(raw_rgb_dir, one_img_fn)) ax_list[0].imshow(raw_img) ax_list[0].axis("off") ax_list[0].set_xticklabels([]) ax_list[0].set_yticklabels([]) offset = 1 if raw_gray_dir: ax_list.append(fig.add_subplot(n_row, n_col, offset + 1)) raw_img = Image.open(os.path.join(raw_gray_dir, one_img_fn)) ax_list[offset].imshow(raw_img, cmap='gray') ax_list[offset].axis("off") ax_list[offset].set_xticklabels([]) ax_list[offset].set_yticklabels([]) offset += 1 if gt_dir: ax_list.append(fig.add_subplot(n_row, n_col, offset + 1)) gt_img = Image.open( os.path.join( gt_dir, one_img_fn.replace("leftImg8bit", "gtFine_gtlabels"))) ax_list[offset].imshow(gt_img, vmin=0, vmax=N_CLASS - 1, interpolation='none', cmap="jet") ax_list[offset].axis("off") ax_list[offset].set_xticklabels([]) ax_list[offset].set_yticklabels([]) offset += 1 # ax_list[0].set_aspect('equal') for i, indir in enumerate(indir_list): # hard_to_see_img = m.imread(os.path.join(indir, one_img_fn)) hard_to_see_img = Image.open(os.path.join( indir, one_img_fn)).resize(raw_img.size) hard_to_see_img = np.array(hard_to_see_img) ax_list.append(fig.add_subplot(n_row, n_col, i + offset + 1)) im = ax_list[i + offset].imshow(hard_to_see_img.astype(np.uint8), vmin=0, vmax=N_CLASS - 1, interpolation='none', cmap="jet") ax_list[i + offset].axis("off") ax_list[i + offset].set_xticklabels([]) ax_list[i + offset].set_yticklabels([]) ax_list[i + offset].set_title(indir.replace( "outputs/", "").replace("/label", "").replace("/", "\n"), fontsize=4) # ax_list[i + 1].set_aspect('equal') fig.subplots_adjust(wspace=0, hspace=0) colors = [im.cmap(im.norm(value)) for value in values] patches = [ mpatches.Patch(color=colors[i], label=label_list[i]) for i in range(len(values)) ] # lgd = fig.legend(handles=patches, labels=label_list, bbox_to_anchor=(1.05, 1), borderaxespad=0., # fontsize=7, loc='upper left') # loc=2 if n_col * 2 <= N_CLASS: n_legend_col = n_col * 2 else: n_legend_col = N_CLASS lgd = plt.legend(patches, label_list, loc='lower center', bbox_to_anchor=(0, 0, 1, 1), bbox_transform=plt.gcf().transFigure, ncol=n_legend_col, fontsize=5) # fig.tight_layout() outfn = os.path.join(outdir, one_img_fn) outfn = os.path.splitext(outfn)[0] + '.%s' % ext fig.savefig(outfn, transparent=True, bbox_inches='tight', pad_inches=0, bbox_extra_artists=(lgd, ), dpi=300) plt.close()
BASE_DIR = os.path.abspath("../..") SUBMISSION_DIR = BASE_DIR + "/submission" MODEL_DIR = BASE_DIR + "/model" BEST_MODEL_FILE = MODEL_DIR + '/best_model.hdf5' BEST_WEIGHTS_FILE = MODEL_DIR + '/best_weights.hdf5' MODEL_FILE = MODEL_DIR + "/model.json" CLASS_INDICES_FILE = MODEL_DIR + "/class_indices.json" LOG_DIR = BASE_DIR + "/log" TF_LOG_DIR = LOG_DIR + "/tf_log" PY_LOG_DIR = LOG_DIR + "/py_log" mkdir_if_not_exist(SUBMISSION_DIR) mkdir_if_not_exist(MODEL_DIR) mkdir_if_not_exist(TF_LOG_DIR) # -------------------------------------------------- # dataset config # -------------------------------------------------- NUM_TEST_PRODUCTS = 1768182 NUM_TEST_PICS = 3095080 NUM_TRAIN_IMGS = 11134709 NUM_VALID_IMGS = 1236584 if DEBUG: NUM_TEST_PRODUCTS = 1000 NUM_TEST_PICS = 1714 NUM_TRAIN_IMGS = 14741
lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) args.outdir = os.path.join( args.base_outdir, "%s-%s_only_%sch" % (args.src_dataset, args.split, args.input_ch)) args.pth_dir = os.path.join(args.outdir, "pth") if args.net in ["fcn", "psp"]: model_name = "%s-%s-res%s" % (args.savename, args.net, args.res) else: model_name = "%s-%s" % (args.savename, args.net) args.tflog_dir = os.path.join(args.outdir, "tflog", model_name) mkdir_if_not_exist(args.pth_dir) mkdir_if_not_exist(args.tflog_dir) json_fn = os.path.join(args.outdir, "param-%s.json" % model_name) check_if_done(json_fn) args.machine = os.uname()[1] save_dic_to_json(args.__dict__, json_fn) start_epoch = 0 train_img_shape = tuple([int(x) for x in args.train_img_shape]) img_transform_list = [ Scale(train_img_shape, Image.BILINEAR), ToTensor(), # Normalize([.485, .456, .406], [.229, .224, .225])
model_f2 = model_f1 mode = "%s-%s2%s-%s_%sch_Finetune_MFNet" % (args.src_dataset, args.src_split, args.tgt_dataset, args.tgt_split, args.input_ch) if args.net in ["fcn", "psp"]: model_name = "%s-%s-%s-res%s" % (detailed_method, args.savename, args.net, args.res) else: model_name = "%s-%s-%s" % (detailed_method, args.savename, args.net) outdir = os.path.join(args.base_outdir, mode) # Create Model Dir pth_dir = os.path.join(outdir, "pth") mkdir_if_not_exist(pth_dir) # Create Model Dir and Set TF-Logger tflog_dir = os.path.join(outdir, "tflog", model_name) mkdir_if_not_exist(tflog_dir) configure(tflog_dir, flush_secs=5) # Save param dic json_fn = os.path.join(outdir, "param-%s-finetune_MFNet.json" % model_name) check_if_done(json_fn) save_dic_to_json(args.__dict__, json_fn) train_img_shape = tuple([int(x) for x in args.train_img_shape]) use_crop = True if args.crop_size > 0 else False
args = parser.parse_args() if args.dataset in ["city16", "synthia"]: info_json_fn = "./dataset/synthia2cityscapes_info.json" elif args.dataset in ["nyu"]: info_json_fn = "./dataset/nyu_info.json" elif args.dataset == "ir": info_json_fn = "./dataset/ir_info.json" else: info_json_fn = "./dataset/city_info.json" # Save visualized predicted pixel labels(pngs) with open(info_json_fn) as f: info_dic = json.load(f) palette = np.array(info_dic['palette'], dtype=np.uint8) gt_dir = GT_DIR_DIC[args.dataset] vis_outdir = os.path.join(os.path.split(gt_dir)[0], os.path.split(gt_dir)[1] + "_pretty") print ("OUTDIR is %s" % vis_outdir) mkdir_if_not_exist(vis_outdir) gtfn_list = os.listdir(gt_dir) for gtfn in tqdm(gtfn_list): full_gtfn = os.path.join(gt_dir, gtfn) img = Image.open(full_gtfn).convert("P") img.putpalette(palette.flatten()) vis_fn = os.path.join(vis_outdir, gtfn) img.save(vis_fn)
if not os.path.exists(args.trained_checkpoint): raise OSError("%s does not exist!" % args.trained_checkpoint) checkpoint = torch.load(args.trained_checkpoint) train_args = checkpoint["args"] args.start_epoch = checkpoint['epoch'] print ("----- train args ------") pprint(train_args.__dict__, indent=4) print ("-" * 50) print("=> loaded checkpoint '{}'".format(args.trained_checkpoint)) detailed_method = train_args.method + "-" + train_args.method_detail print ("method: %s" % detailed_method) base_outdir = os.path.join(args.outdir, args.mode, model_name) mkdir_if_not_exist(base_outdir) json_fn = os.path.join(base_outdir, "param.json") check_if_done(json_fn) args.machine = os.uname()[1] save_dic_to_json(args.__dict__, json_fn) train_img_shape = tuple([int(x) for x in train_args.train_img_shape]) test_img_shape = tuple([int(x) for x in args.test_img_shape]) # TODO if "normalize_way" in train_args.__dict__.keys(): img_transform = get_img_transform(img_shape=train_img_shape, normalize_way=train_args.normalize_way) else: img_transform = get_img_transform(img_shape=train_img_shape)
def vis_with_legend( indir_list, outdir, label_list, raw_rgb_dir, raw_optional_img_dir=None, gt_dir=None, boundary_dir=None, ext="png", title_names=None, n_sample=10, ): N_CLASS = len(label_list) values = np.arange(N_CLASS) n_imgs = 1 + len(indir_list) if raw_optional_img_dir: n_imgs += 1 if gt_dir: n_imgs += 1 if boundary_dir: n_imgs += 1 mkdir_if_not_exist(outdir) n_row = 1 # 2 n_col = int(round(float(n_imgs) / n_row)) # with open("/data/unagi0/dataset/SUNCG-Seg/data_goodlist_v2.txt") as f: with open( "/data/unagi0/watanabe/DomainAdaptation/Segmentation/VisDA2017/test_output/suncg-train_rgbhhab_only_3ch---suncg-train_rgbhha/normal-drn_d_38-20.tar/data_list.txt" ) as f: # with open( # "/data/unagi0/watanabe/DomainAdaptation/Segmentation/VisDA2017/test_output/suncg-train_rgbhha_only_6ch---suncg-train_rgbhha/b16-drn_d_38-10.tar/data_list.txt") as f: fn_id_list = [x.strip() for x in f.readlines()] fn_id_list = random.sample(fn_id_list, n_sample) # fn_id_list = ["6f905fac454cea2d4cf5fd4d83a83a69/000000"] for one_img_id in tqdm(fn_id_list): fig = plt.figure(figsize=(640 * n_col / 100, 480 * n_row / 100)) # sharex=True, sharey=True) ax_list = [] ax_list.append(fig.add_subplot(n_row, n_col, 1)) raw_img = Image.open(os.path.join(raw_rgb_dir, one_img_id + "_mlt.png")) ax_list[0].imshow(raw_img) ax_list[0].axis("off") ax_list[0].set_xticklabels([]) ax_list[0].set_yticklabels([]) ax_list[0].set_aspect('equal') offset = 1 plt.axis('tight') if raw_optional_img_dir: ax_list.append(fig.add_subplot(n_row, n_col, offset + 1)) raw_img = Image.open( os.path.join(raw_optional_img_dir, one_img_id + "_hha.png")) ax_list[offset].imshow(raw_img, cmap='gray') ax_list[offset].axis("off") ax_list[offset].set_xticklabels([]) ax_list[offset].set_yticklabels([]) ax_list[offset].set_aspect('equal') plt.axis('tight') offset += 1 if gt_dir: ax_list.append(fig.add_subplot(n_row, n_col, offset + 1)) gt_img = Image.open( os.path.join(gt_dir, one_img_id + "_category40.png")) gt_img = np.array(gt_img, dtype=np.uint8) ax_list[offset].imshow(gt_img, vmin=0, vmax=N_CLASS - 1, interpolation='none', cmap="jet") ax_list[offset].axis("off") ax_list[offset].set_xticklabels([]) ax_list[offset].set_yticklabels([]) ax_list[offset].set_aspect('equal') plt.axis('tight') offset += 1 if boundary_dir: ax_list.append(fig.add_subplot(n_row, n_col, offset + 1)) boundary_img = Image.open( os.path.join(boundary_dir, one_img_id + "_instance_boundary.png")) boundary_img = np.array(boundary_img, dtype=np.uint8) ax_list[offset].imshow(boundary_img, vmin=0, vmax=N_CLASS - 1, interpolation='none', cmap="gray") ax_list[offset].axis("off") ax_list[offset].set_xticklabels([]) ax_list[offset].set_yticklabels([]) ax_list[offset].set_aspect('equal') plt.axis('tight') offset += 1 if title_names is not None: for i, title in enumerate(title_names): ax_list[i].set_title(title, fontsize=30) # fig.subplots_adjust(wspace=0, hspace=0) # fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None) fig.subplots_adjust(left=0, bottom=0, right=1, top=1, wspace=0, hspace=0) fig.tight_layout(pad=0) # colors = [im.cmap(im.norm(value)) for value in values] # patches = [mpatches.Patch(color=colors[i], label=label_list[i]) for i in range(len(values))] # # lgd = fig.legend(handles=patches, labels=label_list, bbox_to_anchor=(1.05, 1), borderaxespad=0., # # fontsize=7, loc='upper left') # loc=2 # if n_col * 2 <= N_CLASS: # n_legend_col = n_col * 2 # else: # n_legend_col = N_CLASS # lgd = plt.legend(patches, label_list, loc='lower center', bbox_to_anchor=(0, 0, 1, 1), # bbox_transform=plt.gcf().transFigure, ncol=n_legend_col, fontsize=5) # fig.tight_layout() outfn = os.path.join( outdir, os.path.split(one_img_id)[-2] + "_" + os.path.split(one_img_id)[-1]) outfn = os.path.splitext(outfn)[0] + '.%s' % ext fig.savefig(outfn, transparent=True, bbox_inches='tight', pad_inches=0) # fig.savefig(outfn, transparent=True, bbox_inches='tight', pad_inches=0, bbox_extra_artists=(lgd,), dpi=300) plt.close()
default=(1280, 720), nargs=2, help="W H") parser.add_argument( '--raw_img_indir', type=str, default=None, help= "input directory that contains raw imgs(valid:'/data/unagi0/watanabe/DomainAdaptation/Segmentation/VisDA2017/cityscapes_val_imgs', test:'/data/ugui0/dataset/adaptation/segmentation_test')" ) args = parser.parse_args() args.outimg_shape = [int(x) for x in args.outimg_shape] mkdir_if_not_exist(args.outdir) for one_file in tqdm(os.listdir(args.prob_indir)): one_npy_fn = os.path.join(args.prob_indir, one_file) outfn = os.path.join(args.outdir, one_file.replace("npy", "png")) # if os.path.exists(outfn): # continue one_prob = np.load(one_npy_fn) one_prob = softmax(one_prob) one_prob = np.transpose(one_prob, [1, 2, 0]) one_prob = np.expand_dims(one_prob, 0) _, h, w, n_class = one_prob.shape if args.raw_img_indir:
seed_everything(args.seed) os.environ['PYTHONASHSEED'] = str(args.seed) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu config = { 'output_path': args.output_path, 'path': { 'log': args.output_path + '/log/', 'scalar': args.output_path + '/scalar/', 'model': args.output_path + '/model/' }, 'is_writer': args.is_writer } # Create output Dir mkdir_if_not_exist(config['path']['log']) mkdir_if_not_exist(config['path']['scalar']) mkdir_if_not_exist(config['path']['model']) if config['is_writer']: config['writer'] = SummaryWriter(log_dir=config['path']['scalar']) config['logger'] = Logger(logroot=config['path']['log'], filename=args.log_file, level='debug') config['logger'].logger.debug(str(args)) # whether resume training start_epoch = 0 if args.resume: config['logger'].logger.debug('==> loading checkpoint: ' + args.resume) if not os.path.exists(args.resume): raise OSError("%s does not exist!" % args.resume)