def __init__(self, url_file, user_agent_file, crawl_config):
		valid_instance(crawl_config, CD.CrawlConfig)
		self.crawl_config = CD.CrawlConfig()
		self.crawl_config.CopyFrom(crawl_config)

		# Prepare the input
		self.urls = filter(bool, open(url_file, 'r').read().split('\n'))
		self.user_agents = filter(bool, open(user_agent_file, 'r').read().split('\n'))
		# self.referers = filter(bool, open(referer_file, 'r').read().split('\n'))

		# Prepare the output directory
		crawl_type = None
		for user_agent in self.user_agents:
			if "bot" in user_agent:
				crawl_type = "bot"
				break
		if not crawl_type:
			crawl_type = "user"
		now = datetime.now().strftime("%Y%m%d-%H%M%S")
		self.base_dir = url_file + '.' + crawl_type + '.' + now + '.selenium.crawl/'
		mkdir_if_not_exist(self.base_dir)

		# Prepare log files
		# self.htmls_f = open(self.base_dir + 'html_path_list', 'a')
		self.md5_UA_filename = self.base_dir + 'md5_UA.log'
		self.crawl_log_filename = self.base_dir + 'crawl_log'
	def visit_landing_url(self, landing_url_set, url_fetcher=None):
		"""
		@parameter
		landing_url_set: landing url set to visit
		url_fetcher: selenium handles to use for crawl
		"""
		valid_instance(landing_url_set, set)
		mkdir_if_not_exist(self.crawl_config.user_agent_md5_dir)
		# crawl web pages
		landing_url_set_size = len(landing_url_set)
		if landing_url_set_size < 8:
			record_maximum_threads = self.crawl_config.maximum_threads
			self.crawl_config.maximum_threads = 2
		quit_fetcher_when_done = False
		if not url_fetcher:
			url_fetcher = UrlFetcher(self.crawl_config)
			quit_fetcher_when_done = True
		thread_computer = ThreadComputer(url_fetcher, 'fetch_url',
				landing_url_set)
		if quit_fetcher_when_done:
			url_fetcher.quit()
		if landing_url_set_size < 8:
			self.crawl_config.maximum_threads = record_maximum_threads
		# create and fill current_search, including urls, search_term etc.
		current_search = CD.CrawlSearchTerm()
		for p, s in thread_computer.result:
			result = current_search.result.add()
			result.CopyFrom(s)
		# update current_log
		if self.first:
			self.first = False
			self.current_log = CD.CrawlLog()
		result_search = self.current_log.result_search.add()
		result_search.CopyFrom(current_search)
def search_and_crawl(word_file, max_word_per_file=50):
	"""
	search words in word_file, get clickstring for search results and ads,
	then visit these clickstrings.
	@parameter
	word_file: the filename containing the words to search
	max_word_per_file: the maximum number of words to store in one crawl_log file
	"""
	# define constants 
	user_UA = "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/" \
			"537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36"
	user_suffix = "selenium.crawl/"
	now_suffix = datetime.now().strftime(".%Y%m%d-%H%M%S")

	# compute base_dir and start logging
	base_dir = '.'.join([word_file, user_suffix])
	mkdir_if_not_exist(base_dir)
	logging.basicConfig(filename=base_dir+'running_log'+now_suffix, level=logging.DEBUG)
	logging.getLogger("global")

	# set crawl_config
	crawl_config = CD.CrawlConfig()
	crawl_config.maximum_threads = 6
	crawl_config.user_agent = user_UA
	crawl_config.user_agent_md5_dir = base_dir + hex_md5(crawl_config.user_agent) \
			+ now_suffix + '/'
	crawl_config.browser_type = CD.CrawlConfig.CHROME

	# print crawl_config.user_agent
	words = SearchTerm(word_file)
	search = Search(crawl_config)
	crawl_config.result_type = CD.AD
	crawl_config.log_filename = 'ad_crawl_log' + now_suffix
	ad_visit = Visit(crawl_config, max_word_per_file)
	crawl_config.result_type = CD.SEARCH
	crawl_config.log_filename = 'search_crawl_log' + now_suffix
	search_visit = Visit(crawl_config, max_word_per_file)
	"""
	word_list = words.get_word_list()
	print 'word list size ', len(word_list)
	print word_list
	word_list = list()
	word_list.append('Essay Writing')
	word_list.append('P**n sale')
	for word in word_list:
	"""
	for word in words.get_word_list():
		ad_set, search_set = search.search(word)
		# print clickstring_set
		ad_visit.visit(ad_set, word)
		search_visit.visit(search_set, word)
		words.next()
def save_binary_imgs(boundary_dir, thre):
    from util import mkdir_if_not_exist
    outdir = boundary_dir + "_binary"
    mkdir_if_not_exist(outdir)

    print("Result will be saved in %s" % outdir)

    fn_list = os.listdir(boundary_dir)

    for fn in tqdm(fn_list):
        raw_boundary_img = Image.open(os.path.join(boundary_dir, fn))
        out_img = binarize(raw_boundary_img, thre)
        out_img = frame_img(out_img)
        out_img.save(os.path.join(outdir, fn))

    print("Finished!!!")
Esempio n. 5
0
def refine_by_bwboundary(segdir, bwbddir, dataset, min_thre, max_thre):
    basedir = os.path.split(segdir)[0]
    out_seg_dir = os.path.join(basedir, "refined_label")
    out_vis_dir = os.path.join(basedir, "refined_vis")
    mkdir_if_not_exist(out_seg_dir)
    mkdir_if_not_exist(out_vis_dir)

    print("Result will be saved in %s" % out_seg_dir)
    print("Result will be saved in %s" % out_vis_dir)

    segdir = os.path.join(basedir, "label")
    img_fn_list = os.listdir(segdir)

    for img_fn in tqdm(img_fn_list):

        segimg_fn = os.path.join(segdir, img_fn)
        # bwbdimg_fn = os.path.join(basedir, "bwboundary", img_fn)
        bwbdimg_fn = os.path.join(bwbddir, img_fn)

        segimg = np.array(Image.open(segimg_fn))
        bwbdimg = np.array(Image.open(bwbdimg_fn))

        cnter = Counter(bwbdimg.flatten())
        ok_id_list = [
            k for k, v in cnter.items()
            if v < max_thre and v > min_thre and k != 1
        ]

        res = np.copy(segimg)
        for ok_id in ok_id_list:
            ok_idxes = np.where(bwbdimg == ok_id)
            cnter = Counter(segimg[ok_idxes].flatten())
            top_id, n_pixel_of_top_id = cnter.most_common()[0]
            res[ok_idxes] = top_id

        res = Image.fromarray(res)

        out_seg_fn = os.path.join(out_seg_dir, img_fn)
        res.save(out_seg_fn)

        out_vis_fn = os.path.join(out_vis_dir, img_fn)
        save_colorized_lbl(res, out_vis_fn, dataset)

    print("Finished!!!")

    return out_seg_dir
	def visit(self, clickstring_set, search_term):
		"""
		Count how many times this visit has been called, ie.
		how many words has been searched and visited so far.

		Note: some of the words might have empty advertisement
		clickstring_set, these words are counted but not logged.
		@parameter
		clickstring_set: the links to visit
		search_term: search term related to clickstring_set
		@return
		None or current_log_filename (from write_crawl_log())
		"""
		self.counter += 1
		clickstring_set_size = len(clickstring_set)
		if clickstring_set_size == 0:
			return None
		mkdir_if_not_exist(self.crawl_config.user_agent_md5_dir)
		# crawl web pages
		if clickstring_set_size < 8:
			record_maximum_threads = self.crawl_config.maximum_threads
			self.crawl_config.maximum_threads = 2
		url_fetcher = UrlFetcher(self.crawl_config)
		thread_computer = ThreadComputer(url_fetcher, 'fetch_url',
				clickstring_set)
		url_fetcher.quit()
		if clickstring_set_size < 8:
			self.crawl_config.maximum_threads = record_maximum_threads
		# create and fill current_search, including urls, search_term etc.
		current_search = CD.CrawlSearchTerm()
		for p, s in thread_computer.result:
			result = current_search.result.add()
			result.CopyFrom(s)
		current_search.search_term = search_term
		current_search.result_type = self.crawl_config.result_type
		# update current_log
		if self.first:
			self.first = False
			self.current_log = CD.CrawlLog()
		result_search = self.current_log.result_search.add()
		result_search.CopyFrom(current_search)
		if self.counter % self.max_word_per_file == 0:
			return self.write_crawl_log()
def vis_using_Colorize(indir_list, outdir):
    indir = indir_list[0]
    # outdir = os.path.join(os.path.split(indir)[0], "vis_labels")
    mkdir_if_not_exist(outdir)

    for one_file in tqdm(os.listdir(indir)):
        fullpath = os.path.join(indir, one_file)
        hard_to_see_img = m.imread(fullpath)
        # outputs = outputs[0, :19].data.max(0)[1]
        # outputs = outputs.view(1, outputs.size()[0], outputs.size()[1])
        outputs = hard_to_see_img  # TODO this should be fixed
        output = Colorize()(outputs)
        output = np.transpose(output.cpu().numpy(), (1, 2, 0))
        img = Image.fromarray(output, "RGB")
        img = img.resize(hard_to_see_img.shape, Image.NEAREST)

        outfn = os.path.join(outdir, one_file)
        plt.savefig(outfn, transparent=True, bbox_inches='tight', pad_inches=0)
        img.save(outfn)
	def crawl(self):
		has_written = False
		for user_agent in self.user_agents:
			user_agent_md5 = hex_md5(user_agent)
			self.crawl_config.user_agent = user_agent
			self.crawl_config.user_agent_md5_dir = self.base_dir + user_agent_md5 + '/'
			# specify which type of browser to use
			set_browser_type(self.crawl_config)
			mkdir_if_not_exist(self.crawl_config.user_agent_md5_dir)
			# md5 - user agent mapping logs
			md5_UA_f = open(self.md5_UA_filename, 'a')  # user agent
			md5_UA_f.write(user_agent_md5 + ":" + user_agent + "\n")
			md5_UA_f.close()
			# crawl web pages
			url_fetcher = UrlFetcher(self.crawl_config)
			thread_computer = ThreadComputer(url_fetcher, 'fetch_url', self.urls)
			url_fetcher.quit()
			# Write log for current user agent
			current_log = CD.CrawlLog()
			current_log_filename = self.crawl_config.user_agent_md5_dir + 'crawl_log'
			current_search = CD.CrawlSearchTerm()
			for p, s in thread_computer.result:
				result = current_search.result.add()
				result.CopyFrom(s)
				result_search = current_log.result_search.add()
				result_search.CopyFrom(current_search)
			write_proto_to_file(current_log, current_log_filename)
			# Write global crawl_log
			crawl_log = CD.CrawlLog()
			if has_written:
				read_proto_from_file(crawl_log, self.crawl_log_filename)
			else:
				has_written = True
			for r_s in current_log.result_search:
				result_search = crawl_log.result_search.add()
				result_search.CopyFrom(r_s)
			"""
			for s in current_log.result:
				result = crawl_log.result.add()
				result.CopyFrom(s)
			"""
			write_proto_to_file(crawl_log, self.crawl_log_filename)
def get_feature_mat_from_video(video_filename, output_dir='output'):
    yt_vid, extension = video_filename.split('/')[-1].split('.')

    assert extension in ['webm', 'mp4', '3gp']

    mkdir_if_not_exist(output_dir, False)

    output_filename = output_dir + '/' + yt_vid + '.npy'

    vid_reader = imageio.get_reader(video_filename, 'ffmpeg')

    img_list = get_img_list_from_vid_reader(vid_reader, extension)

    base_model = InceptionV3(include_top=True, weights='imagenet')
    model = Model(inputs=base_model.input, outputs=base_model.get_layer('avg_pool').output)

    feature_mat = get_feature_mat(model, img_list)

    np.save(output_filename, feature_mat)

    return feature_mat
Esempio n. 10
0
def collect_site_for_plot(site_set, outdir, mode="user"):
	"""
	Collect user and google observation for site in site_set.
	This is scheduled by cron job. In order to show how hash values of
	websites change over time.

	@parameter
	site_set: the set of urls to visit
	outdir: the output directory
	mode: which user agent to use, supported mode includes user, google, both
	"""
	valid_instance(site_set, set)
	mkdir_if_not_exist(outdir)

	user_UA = "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/" \
			"537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36"
	google_UA = "AdsBot-Google (+http://www.google.com/adsbot.html)"
	crawl_config = CD.CrawlConfig()
	crawl_config.maximum_threads = 1
	crawl_config.browser_type = CD.CrawlConfig.CHROME
	crawl_config.crawl_log_dir = outdir
	now_suffix = datetime.now().strftime(".%Y%m%d-%H%M%S")
	UAs = dict()
	if mode == "user":
		UAs["user"] = user_UA
	elif mode == "google":
		UAs["google"] = google_UA
	elif mode == "both":
		UAs["user"] =  user_UA
		UAs["google"] = google_UA
	else:
		raise Exception("Unknown mode {0}".format(mode))
	for mode in UAs:
		crawl_config.user_agent = UAs[mode]
		crawl_config.user_agent_md5_dir = outdir + hex_md5(crawl_config.user_agent) \
				+ now_suffix + '/'
		crawl_config.log_filename = mode + '_crawl_log' + now_suffix
		mode_visit = Visit(crawl_config)
		mode_visit.visit_landing_url(site_set)
		mode_visit.write_crawl_log(False)
Esempio n. 11
0
def convert_40to13cls(target_dir):
    with open('./dataset/nyu_info.json', 'r') as f:
        paramdic = json.load(f)

    class_ind = np.array(paramdic['40to13cls'])

    base_dir, indir = os.path.split(target_dir)
    out_lbl_dir = os.path.join(base_dir, indir + "-13cls")
    out_vis_dir = os.path.join(base_dir, "vis-13cls")
    original_pngfn_list = os.listdir(target_dir)

    mkdir_if_not_exist(out_lbl_dir)
    mkdir_if_not_exist(out_vis_dir)

    for pngfn in tqdm(original_pngfn_list):
        fullpath = os.path.join(target_dir, pngfn)
        original_im = Image.open(fullpath)
        processed_im = swap_labels(np.array(original_im), class_ind)
        out_lbl_fn = os.path.join(out_lbl_dir, pngfn)
        processed_im.save(out_lbl_fn, 'PNG')

        out_vis_fn = os.path.join(out_vis_dir, pngfn)
        save_colorized_lbl(processed_im, out_vis_fn)
def revisit(crawl_log_file_list, word_file, n):
	"""
	visit landing urls in crawl_log_file n times
	@parameter
	crawl_log_file_list: list of filenames of crawl_log
	word_file: file containing words in crawl_log_file, used for creating base_dir
	n: number of times to visit
	"""
	# google_UA is not used in search and crawl. Used in later visit.
	google_UA = "AdsBot-Google (+http://www.google.com/adsbot.html)"
	google_suffix = 'google.crawl/'
	for i in range(int(n)):
		# the time label is set for each iteration of visit
		now_suffix = datetime.now().strftime(".%Y%m%d-%H%M%S")
		for crawl_log_file in crawl_log_file_list:
			# compute base_dir and start logging
			base_dir = '.'.join([word_file, google_suffix])
			mkdir_if_not_exist(base_dir)
			logging.basicConfig(filename=base_dir+'running_log'+now_suffix, level=logging.DEBUG)
			logging.getLogger("global")

			# set crawl_config
			crawl_config = CD.CrawlConfig()
			crawl_config.maximum_threads = 6
			crawl_config.user_agent = google_UA
			crawl_config.user_agent_md5_dir = base_dir + hex_md5(crawl_config.user_agent) \
					+ now_suffix + '/'
			crawl_config.browser_type = CD.CrawlConfig.CHROME

			google_crawl_log = crawl_log_file.split('/')[-1] + '.google'
			crawl_config.log_filename = google_crawl_log + now_suffix
			revisit = Visit(crawl_config)
			crawl_log = CD.CrawlLog()
			read_proto_from_file(crawl_log, crawl_log_file)
			landing_url_set = crawl_log_attr_set(crawl_log, "landing_url")
			revisit.visit_landing_url(landing_url_set)
			revisit.write_crawl_log(False)
Esempio n. 13
0
def main(npydirs,
         out_directory='ensemble_results',
         method='averaging', mode='test'):
    """
    Ensemble.

    1. get all npy files from given directory.
    2. ensemble each file and output predict png file.
    """

    out_shape = (2048, 1024) if mode == 'valid' else (1280, 720)

    out_label_dir = os.path.join(out_directory, 'label')
    out_vis_dir = os.path.join(out_directory, 'vis')
    out_prob_dir = os.path.join(out_directory, 'prob')

    mkdir_if_not_exist(out_label_dir)
    mkdir_if_not_exist(out_vis_dir)
    mkdir_if_not_exist(out_prob_dir)

    print('- npy_directory_list')
    print(npydirs)

    print('- method')
    print(method)

    print('- mode')
    print(mode, out_shape)

    prob_filenames = os.listdir(npydirs[0])

    print(len(prob_filenames))
    print('Ensembling ...')
    for i, prob_filename in tqdm(enumerate(prob_filenames)):
        png_filename = prob_filename.replace('npy', 'png')

        prob_fns = [os.path.join(npydir, prob_filename) for npydir in npydirs]

        ensemble_predict(prob_fns,
                         os.path.join(out_label_dir, png_filename),
                         os.path.join(out_prob_dir, prob_filename),
                         os.path.join(out_vis_dir, png_filename),
                         method, out_shape)
def edge_detect(rgb_dir, min_canny_thre, max_canny_thre):
    base_dir = os.path.split(rgb_dir)[0]
    edge_dir = os.path.join(base_dir, "edges")
    laplacian_dir = os.path.join(edge_dir, "laplacian")
    canny_dir = os.path.join(edge_dir, "canny")
    sobel_dir = os.path.join(edge_dir, "sobel")

    mkdir_if_not_exist(laplacian_dir)
    mkdir_if_not_exist(canny_dir)
    mkdir_if_not_exist(sobel_dir)

    print("Result will be saved in %s" % edge_dir)

    fn_list = os.listdir(rgb_dir)

    for fn in tqdm(fn_list):
        img = cv2.imread(os.path.join(rgb_dir, fn))
        imgYUV = cv2.cvtColor(img, cv2.COLOR_BGR2YUV)
        imgY = imgYUV[:, :, 0]

        # Laplacian
        out_laplacian_im = cv2.Laplacian(imgY, cv2.CV_64F) + 128
        out_laplacian_fn = os.path.join(laplacian_dir, fn)
        save_img_by_PIL(out_laplacian_im, out_laplacian_fn)

        # Canny
        out_canny_im = cv2.Canny(imgY, cv2.CV_64F, min_canny_thre,
                                 max_canny_thre)
        out_canny_fn = os.path.join(canny_dir, fn)
        save_img_by_PIL(out_canny_im, out_canny_fn)

        # Sobel
        dx = cv2.Sobel(imgY, cv2.CV_64F, 1, 0, ksize=3)
        dy = cv2.Sobel(imgY, cv2.CV_64F, 0, 1, ksize=3)
        out_sobel_im = np.sqrt(dx**2 + dy**2)
        out_sobel_fn = os.path.join(sobel_dir, fn)
        save_img_by_PIL(out_sobel_im, out_sobel_fn)
    '/data/unagi0/watanabe/DomainAdaptation/Segmentation/VisDA2017/cityscapes_gt/val'
)
parser.add_argument(
    '--vis_outdir',
    type=str,
    default=
    '/data/unagi0/watanabe/DomainAdaptation/Segmentation/VisDA2017/cityscapes_vis_gt/val'
)

args = parser.parse_args()

if args.dataset in ["city16", "synthia"]:
    info_json_fn = "./dataset/synthia2cityscapes_info.json"
else:
    info_json_fn = "./dataset/city_info.json"

    # Save visualized predicted pixel labels(pngs)
with open(info_json_fn) as f:
    info_dic = json.load(f)

gtfn_list = os.listdir(args.gt_dir)

for gtfn in tqdm(gtfn_list):
    full_gtfn = os.path.join(args.gt_dir, gtfn)
    img = Image.open(full_gtfn)
    palette = np.array(info_dic['palette'], dtype=np.uint8)
    img.putpalette(palette.flatten())
    mkdir_if_not_exist(args.vis_outdir)
    vis_fn = os.path.join(args.vis_outdir, gtfn)
    img.save(vis_fn)
Esempio n. 16
0
def main():
    if len(sys.argv) != 5:
        print(
            "Usage: gen_imgs_from_bson.py <validpct> <inputdir> <traindir> <validdir>"
        )
        sys.exit(1)

    validpct = float(sys.argv[1]) / 100
    inputdir = sys.argv[2]
    traindir = sys.argv[3]
    validdir = sys.argv[4]

    trainfile = os.path.join(inputdir, 'train.bson')

    train_raw_dir = os.path.join(traindir, "raw")
    valid_raw_dir = os.path.join(validdir, "raw")

    # create categories folders
    categories = pd.read_csv(os.path.join(inputdir, 'category_names.csv'),
                             index_col='category_id')
    for category in tqdm_notebook(categories.index):
        mkdir_if_not_exist(os.path.join(train_raw_dir, str(category)))
        mkdir_if_not_exist(os.path.join(valid_raw_dir, str(category)))

    num_products = 7069896  # 7069896 for train and 1768182 for test
    product_cnt = 0
    bar = tqdm_notebook(total=num_products)
    with open(trainfile, 'rb') as trainbson:
        data = bson.decode_file_iter(trainbson)

        train_cats, valid_cats, image_counter = collections.Counter(
        ), collections.Counter(), collections.Counter()

        for prod in data:
            product_id = prod['_id']
            category_id = prod['category_id']

            # decide if this product will go into the validation or train data
            if random.random() < validpct:
                outdir = valid_raw_dir
                valid_cats[category_id] += 1
            else:
                outdir = train_raw_dir
                train_cats[category_id] += 1

            for picidx, pic in enumerate(prod['imgs']):
                filename = os.path.join(outdir, str(category_id),
                                        "{}.{}.jpg".format(product_id, picidx))
                with open(filename, 'wb') as f:
                    f.write(pic['picture'])

                image_counter[outdir] += 1
            bar.update()
            product_cnt += 1
            if product_cnt % 10000 == 0:
                print("converted {} products".format(product_cnt))

        for name, cnt, dir_ in [("training", train_cats, train_raw_dir),
                                ("validation", valid_cats, valid_raw_dir)]:
            print("{}: {} categories with {} products and {} images".format(
                name, len(cnt), sum(cnt.values()), image_counter[dir_]))
    model_name += "-use_f2"

print("=> loading checkpoint '{}'".format(args.trained_checkpoint))
if not os.path.exists(args.trained_checkpoint):
    raise OSError("%s does not exist!" % args.trained_checkpoint)

checkpoint = torch.load(args.trained_checkpoint)
train_args = checkpoint["args"]
args.start_epoch = checkpoint['epoch']
print("----- train args ------")
pprint(checkpoint["args"].__dict__, indent=4)
print("-" * 50)
print("=> loaded checkpoint '{}'".format(args.trained_checkpoint))

base_outdir = os.path.join(args.outdir, args.mode, model_name)
mkdir_if_not_exist(base_outdir)

json_fn = os.path.join(base_outdir, "param.json")
check_if_done(json_fn)
args.machine = os.uname()[1]
save_dic_to_json(args.__dict__, json_fn)

train_img_shape = tuple([int(x) for x in train_args.train_img_shape])
test_img_shape = tuple([int(x) for x in args.test_img_shape])

if "normalize_way" in train_args.__dict__.keys():
    img_transform = get_img_transform(img_shape=train_img_shape,
                                      normalize_way=train_args.normalize_way)
else:
    img_transform = get_img_transform(img_shape=train_img_shape)
Esempio n. 18
0
    model_f1.cuda()

print("----- train args ------")
pprint(checkpoint["args"].__dict__, indent=4)
print("-" * 50)
args.train_img_shape = checkpoint["args"].train_img_shape
print("=> loaded checkpoint '{}'".format(args.trained_checkpoint))

indir, infn = os.path.split(args.trained_checkpoint)

trained_mode = indir.split(os.path.sep)[-2]
args.mode = "%s---%s-%s" % (trained_mode, args.tgt_dataset, args.split)
model_name = infn.replace(".pth", "")

base_outdir = os.path.join(args.outdir, args.mode, model_name)
mkdir_if_not_exist(base_outdir)

# Set TF-Logger
tfconfigure(base_outdir, flush_secs=10)
tflogger = AccumulatedTFLogger()

json_fn = os.path.join(base_outdir, "param.json")
check_if_done(json_fn)
args.machine = os.uname()[1]
save_dic_to_json(args.__dict__, json_fn)

train_img_shape = tuple([int(x) for x in args.train_img_shape])
test_img_shape = tuple([int(x) for x in args.test_img_shape])

img_transform = Compose([
    Scale(train_img_shape, Image.BILINEAR),
Esempio n. 19
0
                    action='store_true',
                    help='whether you save probability tensors')

args = parser.parse_args()
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

config = {
    'output_path': args.output_path,
    'path': {
        'log': args.output_path + 'log/',
        'prob': args.output_path + 'prob/',
        'label': args.output_path + 'label/'
    }
}

mkdir_if_not_exist(config['path']['log'])
mkdir_if_not_exist(config['path']['label'])

config['logger'] = Logger(logroot=config['path']['log'],
                          filename=args.log_file,
                          level='debug')
config['logger'].logger.debug(str(args))

if not os.path.exists(args.trained_checkpoint):
    raise OSError('%s does not exist!' % args.trained_checkpoint)
config['logger'].logger.debug('==> loading checkpoint: ' +
                              args.trained_checkpoint)
checkpoint = torch.load(args.trained_checkpoint)
train_args = checkpoint['args']
G, F1, F2 = get_models(input_ch=train_args.input_ch,
                       n_class=train_args.n_class,
        files = [os.path.join(vis_dir, filename) for vis_dir in vis_dirs]
        outimg = os.path.join(outdir, filename)
        merge_four_images(files, outimg)

    print("Finished! Result dir is %s" % outdir)


if __name__ == '__main__':
    args = sys.argv
    """num of args need to be 3."""

    # merge_four_images(args[1:], 'sample_merged.png')
    # vis_dirs = ['/data/ugui0/dataset/adaptation/segmentation_test'] + args[1:]
    vis_dirs = ["/data/unagi0/dataset/NYUDv2/gupta/rgb/"]
    pred_base_dir = args[1]
    target_dir_list = ["vis", "depth", "boundary"]
    vis_dirs += [os.path.join(pred_base_dir, x) for x in target_dir_list]
    print(vis_dirs)

    # for i in range(20):
    #     outdir = 'merged_imgs/merged_imgs_{0}'.format(i)
    #     if os.path.exists(outdir):
    #         continue
    #     else:
    #         break
    outdir = os.path.join(pred_base_dir, "merged")
    mkdir_if_not_exist(outdir)

    main(vis_dirs, outdir)
def vis_with_legend(indir_list, outdir, label_list, raw_rgb_dir, raw_optional_img_dir=None, gt_dir=None, ext="pdf",
                    title_names=None):
    N_CLASS = len(label_list)
    values = np.arange(N_CLASS)

    n_imgs = 1 + len(indir_list)
    if raw_optional_img_dir:
        n_imgs += 1
    if gt_dir:
        n_imgs += 1

    mkdir_if_not_exist(outdir)

    n_row = 2
    n_col = int(round(float(n_imgs) / n_row))

    # img_fn_list = os.listdir(raw_rgb_dir)
    img_fn_list = os.listdir(indir_list[0])

    for one_img_fn in tqdm(img_fn_list):
        fig = plt.figure(figsize=(560 * n_col / 100, 425 * n_row / 100 * 1.2))  # sharex=True, sharey=True)

        ax_list = []
        ax_list.append(fig.add_subplot(n_row, n_col, 1))
        raw_img = Image.open(os.path.join(raw_rgb_dir, one_img_fn))

        ax_list[0].imshow(raw_img)
        ax_list[0].axis("off")
        ax_list[0].set_xticklabels([])
        ax_list[0].set_yticklabels([])

        ax_list[0].set_aspect('equal')
        offset = 1
        plt.axis('tight')

        if raw_optional_img_dir:
            ax_list.append(fig.add_subplot(n_row, n_col, offset + 1))
            raw_img = Image.open(os.path.join(raw_optional_img_dir, one_img_fn))

            ax_list[offset].imshow(raw_img, cmap='gray')
            ax_list[offset].axis("off")
            ax_list[offset].set_xticklabels([])
            ax_list[offset].set_yticklabels([])

            ax_list[offset].set_aspect('equal')
            plt.axis('tight')
            offset += 1

        if gt_dir:
            ax_list.append(fig.add_subplot(n_row, n_col, offset + 1))
            gt_img = Image.open(os.path.join(gt_dir, one_img_fn.replace("leftImg8bit", "gtFine_gtlabels")))
            gt_img = np.array(gt_img, dtype=np.uint8)
            ax_list[offset].imshow(gt_img, vmin=0, vmax=N_CLASS - 1, interpolation='none', cmap="jet")
            ax_list[offset].axis("off")
            ax_list[offset].set_xticklabels([])
            ax_list[offset].set_yticklabels([])

            ax_list[offset].set_aspect('equal')
            plt.axis('tight')
            offset += 1

        # ax_list[0].set_aspect('equal')
        for i, (indir, title_name) in enumerate(zip(indir_list, title_names[offset:])):
            # hard_to_see_img = m.imread(os.path.join(indir, one_img_fn))
            hard_to_see_img = Image.open(os.path.join(indir, one_img_fn)).resize(raw_img.size)
            hard_to_see_img = np.array(hard_to_see_img)

            ax_list.append(fig.add_subplot(n_row, n_col, i + offset + 1))

            # hsv =  plt.get_cmap('hsv')
            # colors = hsv(np.linspace(0, 1.0, N_CLASS))
            def discrete_cmap(N, base_cmap=None):
                """Create an N-bin discrete colormap from the specified input map"""

                # Note that if base_cmap is a string or None, you can simply do
                #    return plt.cm.get_cmap(base_cmap, N)
                # The following works for string, None, or a colormap instance:

                base = plt.cm.get_cmap(base_cmap)
                color_list = base(np.linspace(0, 1, N))
                cmap_name = base.name + str(N)
                return base.from_list(cmap_name, color_list, N)

            cmap = "gray" if "boundary" in title_name.lower() else "jet"
            vmax = 255 if "boundary" in title_name.lower() else N_CLASS - 1

            im = ax_list[i + offset].imshow(hard_to_see_img.astype(np.uint8), vmin=0, vmax=vmax,
                                            interpolation='none',
                                            cmap=cmap)
            # cmap=discrete_cmap(N_CLASS, "jet"))



            ax_list[i + offset].axis("off")
            ax_list[i + offset].set_xticklabels([])
            ax_list[i + offset].set_yticklabels([])

            ax_list[i + offset].set_title(indir.replace("outputs/", "").replace("/label", "").replace("/", "\n"),
                                          fontsize=4)
            ax_list[i + offset].set_aspect('equal')
            plt.axis('tight')

        if title_names is not None:
            for i, title in enumerate(title_names):
                ax_list[i].set_title(title, fontsize=30)

        # fig.subplots_adjust(wspace=0, hspace=0)
        # fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)
        fig.subplots_adjust(left=0, bottom=0, right=1, top=1, wspace=0, hspace=0)
        fig.tight_layout(pad=0)

        # colors = [im.cmap(im.norm(value)) for value in values]
        # patches = [mpatches.Patch(color=colors[i], label=label_list[i]) for i in range(len(values))]
        # # lgd = fig.legend(handles=patches, labels=label_list, bbox_to_anchor=(1.05, 1), borderaxespad=0.,
        # #                  fontsize=7, loc='upper left')  # loc=2
        # if n_col * 2 <= N_CLASS:
        #     n_legend_col = n_col * 2
        # else:
        #     n_legend_col = N_CLASS
        # lgd = plt.legend(patches, label_list, loc='lower center', bbox_to_anchor=(0, 0, 1, 1),
        #                  bbox_transform=plt.gcf().transFigure, ncol=n_legend_col, fontsize=5)

        # fig.tight_layout()
        outfn = os.path.join(outdir, one_img_fn)
        outfn = os.path.splitext(outfn)[0] + '.%s' % ext

        fig.savefig(outfn, transparent=True, bbox_inches='tight', pad_inches=0)
        # fig.savefig(outfn, transparent=True, bbox_inches='tight', pad_inches=0, bbox_extra_artists=(lgd,), dpi=300)
        plt.close()
Esempio n. 22
0
	def fetch_url(self, url):
		while True:
			self.lock.acquire()
			if self.browser_queue.empty():
				self.lock.release()
				time.sleep(5)
			else:
				browser = self.browser_queue.get()
				self.lock.release()
				break
		result = CD.CrawlResult() # record whether url loading failed!
		result.url = url
		result.url_md5 = hex_md5(url)
		result.success = True
		try:
			# This line is used to handle alert: <stay on this page> <leave this page>
			browser.get(result.url)
			browser.execute_script("window.onbeforeunload = function() {};")
			time.sleep(1)
			if self.crawl_config.browser_type == CD.CrawlConfig.CHROME and \
					(('404 Not Found' in browser.title) \
					or ('403' in browser.title) \
					or ('Forbidden' in browser.title) \
					or ('not available' in browser.title) \
					or ('Problem loading page' in browser.title) \
					or ('Page not found' in browser.title) \
					or ('Error' in browser.title) \
					or ('Access denied' in browser.title) \
					or (browser.current_url == 'data:text/html,chromewebdata')):
				result.landing_url = browser.current_url
				result.landing_url_md5 = hex_md5(result.landing_url)
				result.success = False
			elif self.crawl_config.browser_type == CD.CrawlConfig.FIREFOX and \
					(('404 Not Found' in browser.title) \
					or ('403' in browser.title) \
					or ('Forbidden' in browser.title) \
					or ('not available' in browser.title) \
					or ('Problem loading page' in browser.title) \
					or ('Page not found' in browser.title) \
					or ('Error' in browser.title) \
					or ('Access denied' in browser.title)):
				result.landing_url = browser.current_url
				result.landing_url_md5 = hex_md5(result.landing_url)
				result.success = False
			else:
				#############
				# md5 the original url
				url_md5_dir = self.crawl_config.user_agent_md5_dir + result.url_md5 + '/'
				mkdir_if_not_exist(url_md5_dir)
				# get the landing url
				result.landing_url = browser.current_url
				result.landing_url_md5 = hex_md5(result.landing_url)
				# get the whole page source
				response = browser.execute_script("return document.documentElement.innerHTML;")
				result.file_path = url_md5_dir + 'index.html'
				f = open(result.file_path, 'w')
				f.write(response.encode('utf-8'))
				f.close()
			browser.delete_all_cookies()
			if len(browser.window_handles) > 1:
				# close all the other windows
				current_window_handle = browser.current_window_handle
				for handle in browser.window_handles:
					if handle != current_window_handle:
						browser.switch_to_window(handle)
						browser.close()
				# switch back to the current window
				browser.switch_to_window(current_window_handle)
		except:
			result.success = False
			browser = restart_browser(self.crawl_config.browser_type, incognito=False,
					user_agent=self.crawl_config.user_agent, browser=browser)
		self.browser_queue.put(browser)
		logger = logging.getLogger("global")
		logger.info("the length of the browser_queue")
		logger.info(self.browser_queue.qsize())
		return result
def vis_with_legend(indir_list,
                    outdir,
                    label_list,
                    raw_rgb_dir,
                    raw_optional_img_dir=None,
                    gt_dir=None,
                    ext="png",
                    title_names=None,
                    n_sample=10):
    N_CLASS = len(label_list)
    values = np.arange(N_CLASS)

    n_imgs = 1 + len(indir_list)
    if raw_optional_img_dir:
        n_imgs += 1
    if gt_dir:
        n_imgs += 1

    mkdir_if_not_exist(outdir)

    n_row = 1
    n_col = int(round(float(n_imgs) / n_row))

    # img_fn_list = os.listdir(raw_rgb_dir)
    img_fn_list = os.listdir(gt_dir)
    # img_fn_list = os.listdir(indir_list[0])
    img_fn_list = random.sample(img_fn_list, n_sample)

    for one_img_fn in tqdm(img_fn_list):
        fig = plt.figure(figsize=(560 * n_col / 100, 425 * n_row /
                                  100))  # sharex=True, sharey=True)

        ax_list = []
        ax_list.append(fig.add_subplot(n_row, n_col, 1))
        raw_img = Image.open(os.path.join(raw_rgb_dir, one_img_fn))

        ax_list[0].imshow(raw_img)
        ax_list[0].axis("off")
        ax_list[0].set_xticklabels([])
        ax_list[0].set_yticklabels([])

        ax_list[0].set_aspect('equal')
        offset = 1
        plt.axis('tight')

        if raw_optional_img_dir:
            ax_list.append(fig.add_subplot(n_row, n_col, offset + 1))
            raw_img = Image.open(os.path.join(raw_optional_img_dir,
                                              one_img_fn))

            ax_list[offset].imshow(raw_img, cmap='gray')
            ax_list[offset].axis("off")
            ax_list[offset].set_xticklabels([])
            ax_list[offset].set_yticklabels([])

            ax_list[offset].set_aspect('equal')
            plt.axis('tight')
            offset += 1

        if gt_dir:
            ax_list.append(fig.add_subplot(n_row, n_col, offset + 1))
            gt_img = Image.open(
                os.path.join(
                    gt_dir, one_img_fn.replace("leftImg8bit",
                                               "gtFine_gtlabels")))
            gt_img = np.array(gt_img, dtype=np.uint8)
            ax_list[offset].imshow(gt_img,
                                   vmin=0,
                                   vmax=N_CLASS - 1,
                                   interpolation='none',
                                   cmap="jet")
            ax_list[offset].axis("off")
            ax_list[offset].set_xticklabels([])
            ax_list[offset].set_yticklabels([])

            ax_list[offset].set_aspect('equal')
            plt.axis('tight')
            offset += 1

        if title_names is not None:
            for i, title in enumerate(title_names):
                ax_list[i].set_title(title, fontsize=30)

        fig.subplots_adjust(left=0,
                            bottom=0,
                            right=1,
                            top=1,
                            wspace=0,
                            hspace=0)
        fig.tight_layout(pad=0)

        outfn = os.path.join(outdir, one_img_fn)
        outfn = os.path.splitext(outfn)[0] + '.%s' % ext

        fig.savefig(outfn, transparent=True, bbox_inches='tight', pad_inches=0)
        plt.close()
    model.load_state_dict(checkpoint)

print ("----- train args ------")
pprint(train_args.__dict__, indent=4)
print ("-" * 50)
args.train_img_shape = train_args.train_img_shape
print("=> loaded checkpoint '{}'".format(args.trained_checkpoint))

indir, infn = os.path.split(args.trained_checkpoint)

trained_mode = indir.split(os.path.sep)[-2]
args.mode = "%s---%s-%s" % (trained_mode, args.tgt_dataset, args.split)
model_name = infn.replace(".pth", "")

base_outdir = os.path.join(args.outdir, args.mode, model_name)
mkdir_if_not_exist(base_outdir)

json_fn = os.path.join(base_outdir, "param.json")
check_if_done(json_fn)
args.machine = os.uname()[1]
save_dic_to_json(args.__dict__, json_fn)

train_img_shape = tuple([int(x) for x in args.train_img_shape])
test_img_shape = tuple([int(x) for x in args.test_img_shape])

if "crop_size" in train_args.__dict__.keys() and train_args.crop_size > 0:
    train_img_shape = test_img_shape
    print ("train_img_shape was set to the same as test_img_shape")

if "normalize_way" in train_args.__dict__.keys():
    img_transform = get_img_transform(img_shape=train_img_shape, normalize_way=train_args.normalize_way)
def search_and_revisit(word_file, n, threads=6, ad_only=False):
	"""
	This function does the following things.
	1. Search each word in word file.
	2. Grab the top 200 returned results and corresponding ads
	3. Visit all the results and ads with "chrome user agent", repeat n times
	4. Visit all the landing pages in step 3 with "google ads bot user agent"

	@parameter
	word_file: the filename containing the words to search
	n: repeat step 3 for n times
	ad_only: Only retrieve the advertisements. In this case, we only view the first 5 pages.

	@output
	Following are output of this function
	Running log:
	[WORD_FILE].selenium.crawl/running_log.[SEARCH_TIME]
	"chrome user agent" result is:
	[WORD_FILE].selenium.crawl/ad_crawl_log.[SEARCH_TIME].[WORD_MD5]
	[WORD_FILE].selenium.crawl/search_crawl_log.[SEARCH_TIME].[WORD_MD5]
	[WORD_FILE].selenium.crawl/[WORD_MD5]/[UA_MD5].[SEARCH_TIME]/[URL_MD5]/index.html
	"google ads bot user agent" result is:
	[WORD_FILE].selenium.crawl/ad_crawl_log.[SEARCH_TIME].[WORD_MD5].google
	[WORD_FILE].selenium.crawl/search_crawl_log.[SEARCH_TIME].[WORD_MD5].google
	[WORD_FILE].selenium.crawl/[WORD_MD5]/[UA_MD5].[SEARCH_TIME].revisit.[REVISIT_TIME]/[URL_MD5]/index.html
	"""
	valid_instance(threads, int)
	# prepare search and visit
	user_UA = "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/" \
			"537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36"
	user_suffix = "selenium.crawl/"
	search_now_suffix = datetime.now().strftime(".%Y%m%d-%H%M%S")
	word_md5_delimiter = "WORD_MD5"

	# compute base_dir and start logging
	base_dir = '.'.join([word_file, user_suffix])
	mkdir_if_not_exist(base_dir)
	logging.basicConfig(filename=base_dir+'running_log'+search_now_suffix, level=logging.DEBUG)
	logging.getLogger("global")

	# set search and visit crawl_config
	search_config = CD.CrawlConfig()
	search_config.maximum_threads = threads
	search_config.user_agent = user_UA
	# number of top search results to be inspected
	if ad_only:
		search_config.count = 50
	search_config.browser_type = CD.CrawlConfig.CHROME

	ad_crawl_config = CD.CrawlConfig()
	ad_crawl_config.CopyFrom(search_config)
	ad_crawl_config.result_type = CD.AD
	ad_crawl_config.crawl_log_dir = base_dir
	ad_log_filename_prefix = 'ad_crawl_log' + search_now_suffix
	ad_dir_prefix = base_dir + word_md5_delimiter + "/" + \
			hex_md5(ad_crawl_config.user_agent) + search_now_suffix + '/'
	search_crawl_config = CD.CrawlConfig()
	search_crawl_config.CopyFrom(search_config)
	search_crawl_config.result_type = CD.SEARCH
	search_crawl_config.crawl_log_dir = base_dir
	search_log_filename_prefix = 'search_crawl_log' + search_now_suffix
	search_dir_prefix = base_dir + word_md5_delimiter + "/" + \
			hex_md5(search_crawl_config.user_agent) + search_now_suffix + '/'

	# print crawl_config.user_agent
	words = SearchTerm(word_file)
	search = Search(search_config)
	ad_visit = Visit(ad_crawl_config, 1)
	search_visit = Visit(search_crawl_config, 1)

	# prepare the revisit
	google_ad_UA = "AdsBot-Google (+http://www.google.com/adsbot.html)"
	google_search_UA = "Googlebot/2.1 (+http://www.google.com/bot.html)"

	# set revisit crawl_config
	revisit_crawl_config = CD.CrawlConfig()
	revisit_crawl_config.maximum_threads = threads
	revisit_crawl_config.browser_type = CD.CrawlConfig.CHROME
	# base directory uses search_now_suffix to correlate these two
	revisit_crawl_config.crawl_log_dir = base_dir

	# search, visit and revisit each word
	for word in words.get_word_list():
		print "Processing {0} word: {1}".format(words.current(), word)
		# update word_md5 related directories
		print word
		word_md5 = hex_md5(word)
		ad_crawl_config.log_filename = ad_log_filename_prefix + "." + word_md5
		ad_crawl_config.user_agent_md5_dir = word_md5.join(
				ad_dir_prefix.split(word_md5_delimiter))
		search_crawl_config.log_filename = search_log_filename_prefix + "." + word_md5
		search_crawl_config.user_agent_md5_dir = word_md5.join(
				search_dir_prefix.split(word_md5_delimiter))
		ad_visit.update_crawl_config(ad_crawl_config)
		search_visit.update_crawl_config(search_crawl_config)
		
		# search and crawl
		right_click = not ad_only
		ad_set, search_set = search.search(word, right_click)
		ad_crawl_log_filename = ad_visit.visit(ad_set, word)
		if ad_only:
			search_crawl_log_filename = None
		else:
			search_crawl_log_filename = search_visit.visit(search_set, word)

		# revisit
		crawl_log_file_list = list()
		if ad_crawl_log_filename:
			crawl_log_file_list.append(ad_crawl_log_filename)
		if search_crawl_log_filename:
			crawl_log_file_list.append(search_crawl_log_filename)
		for crawl_log_file in crawl_log_file_list:
			if crawl_log_file == ad_crawl_log_filename:
				revisit_crawl_config.user_agent = google_ad_UA
			else:
				revisit_crawl_config.user_agent = google_search_UA
			revisit_dir_prefix = base_dir + word_md5_delimiter + "/" + \
					hex_md5(revisit_crawl_config.user_agent) + search_now_suffix
			revisit_crawl_config.log_filename = crawl_log_file.split('/')[-1] + '.google'
			revisit = Visit(revisit_crawl_config)
			crawl_log = CD.CrawlLog()
			read_proto_from_file(crawl_log, crawl_log_file)
			revisit.visit_landing_url_n_times(crawl_log, int(n), revisit_dir_prefix,
					word_md5, word_md5_delimiter)
		words.next()
		"""
def vis_with_legend(indir_list,
                    raw_rgb_dir,
                    outdir,
                    raw_gray_dir=None,
                    gt_dir=None,
                    ext="png"):
    n_imgs = 1 + len(indir_list)
    if raw_gray_dir:
        n_imgs += 1
    if gt_dir:
        n_imgs += 1

    mkdir_if_not_exist(outdir)

    n_row = 2
    n_col = int(round(float(n_imgs) / n_row))

    # img_fn_list = os.listdir(raw_rgb_dir)
    img_fn_list = os.listdir(indir_list[0])

    for one_img_fn in tqdm(img_fn_list):
        fig = plt.figure()  # sharex=True, sharey=True)
        ax_list = []
        ax_list.append(fig.add_subplot(n_row, n_col, 1))
        raw_img = Image.open(os.path.join(raw_rgb_dir, one_img_fn))

        ax_list[0].imshow(raw_img)
        ax_list[0].axis("off")
        ax_list[0].set_xticklabels([])
        ax_list[0].set_yticklabels([])

        offset = 1

        if raw_gray_dir:
            ax_list.append(fig.add_subplot(n_row, n_col, offset + 1))
            raw_img = Image.open(os.path.join(raw_gray_dir, one_img_fn))

            ax_list[offset].imshow(raw_img, cmap='gray')
            ax_list[offset].axis("off")
            ax_list[offset].set_xticklabels([])
            ax_list[offset].set_yticklabels([])
            offset += 1

        if gt_dir:
            ax_list.append(fig.add_subplot(n_row, n_col, offset + 1))
            gt_img = Image.open(
                os.path.join(
                    gt_dir, one_img_fn.replace("leftImg8bit",
                                               "gtFine_gtlabels")))
            ax_list[offset].imshow(gt_img,
                                   vmin=0,
                                   vmax=N_CLASS - 1,
                                   interpolation='none',
                                   cmap="jet")
            ax_list[offset].axis("off")
            ax_list[offset].set_xticklabels([])
            ax_list[offset].set_yticklabels([])
            offset += 1

        # ax_list[0].set_aspect('equal')
        for i, indir in enumerate(indir_list):
            # hard_to_see_img = m.imread(os.path.join(indir, one_img_fn))
            hard_to_see_img = Image.open(os.path.join(
                indir, one_img_fn)).resize(raw_img.size)
            hard_to_see_img = np.array(hard_to_see_img)

            ax_list.append(fig.add_subplot(n_row, n_col, i + offset + 1))
            im = ax_list[i + offset].imshow(hard_to_see_img.astype(np.uint8),
                                            vmin=0,
                                            vmax=N_CLASS - 1,
                                            interpolation='none',
                                            cmap="jet")
            ax_list[i + offset].axis("off")
            ax_list[i + offset].set_xticklabels([])
            ax_list[i + offset].set_yticklabels([])
            ax_list[i + offset].set_title(indir.replace(
                "outputs/", "").replace("/label", "").replace("/", "\n"),
                                          fontsize=4)
            # ax_list[i + 1].set_aspect('equal')

        fig.subplots_adjust(wspace=0, hspace=0)

        colors = [im.cmap(im.norm(value)) for value in values]
        patches = [
            mpatches.Patch(color=colors[i], label=label_list[i])
            for i in range(len(values))
        ]
        # lgd = fig.legend(handles=patches, labels=label_list, bbox_to_anchor=(1.05, 1), borderaxespad=0.,
        #                  fontsize=7, loc='upper left')  # loc=2
        if n_col * 2 <= N_CLASS:
            n_legend_col = n_col * 2
        else:
            n_legend_col = N_CLASS
        lgd = plt.legend(patches,
                         label_list,
                         loc='lower center',
                         bbox_to_anchor=(0, 0, 1, 1),
                         bbox_transform=plt.gcf().transFigure,
                         ncol=n_legend_col,
                         fontsize=5)

        # fig.tight_layout()
        outfn = os.path.join(outdir, one_img_fn)
        outfn = os.path.splitext(outfn)[0] + '.%s' % ext

        fig.savefig(outfn,
                    transparent=True,
                    bbox_inches='tight',
                    pad_inches=0,
                    bbox_extra_artists=(lgd, ),
                    dpi=300)
        plt.close()
Esempio n. 27
0
BASE_DIR = os.path.abspath("../..")

SUBMISSION_DIR = BASE_DIR + "/submission"

MODEL_DIR = BASE_DIR + "/model"
BEST_MODEL_FILE = MODEL_DIR + '/best_model.hdf5'
BEST_WEIGHTS_FILE = MODEL_DIR + '/best_weights.hdf5'
MODEL_FILE = MODEL_DIR + "/model.json"
CLASS_INDICES_FILE = MODEL_DIR + "/class_indices.json"

LOG_DIR = BASE_DIR + "/log"
TF_LOG_DIR = LOG_DIR + "/tf_log"
PY_LOG_DIR = LOG_DIR + "/py_log"

mkdir_if_not_exist(SUBMISSION_DIR)
mkdir_if_not_exist(MODEL_DIR)
mkdir_if_not_exist(TF_LOG_DIR)

# --------------------------------------------------
# dataset config
# --------------------------------------------------
NUM_TEST_PRODUCTS = 1768182
NUM_TEST_PICS = 3095080
NUM_TRAIN_IMGS = 11134709
NUM_VALID_IMGS = 1236584

if DEBUG:
    NUM_TEST_PRODUCTS = 1000
    NUM_TEST_PICS = 1714
    NUM_TRAIN_IMGS = 14741
Esempio n. 28
0
                              lr=args.lr,
                              momentum=args.momentum,
                              weight_decay=args.weight_decay)

    args.outdir = os.path.join(
        args.base_outdir,
        "%s-%s_only_%sch" % (args.src_dataset, args.split, args.input_ch))
    args.pth_dir = os.path.join(args.outdir, "pth")

    if args.net in ["fcn", "psp"]:
        model_name = "%s-%s-res%s" % (args.savename, args.net, args.res)
    else:
        model_name = "%s-%s" % (args.savename, args.net)

    args.tflog_dir = os.path.join(args.outdir, "tflog", model_name)
    mkdir_if_not_exist(args.pth_dir)
    mkdir_if_not_exist(args.tflog_dir)

    json_fn = os.path.join(args.outdir, "param-%s.json" % model_name)
    check_if_done(json_fn)
    args.machine = os.uname()[1]
    save_dic_to_json(args.__dict__, json_fn)

    start_epoch = 0

train_img_shape = tuple([int(x) for x in args.train_img_shape])

img_transform_list = [
    Scale(train_img_shape, Image.BILINEAR),
    ToTensor(),
    # Normalize([.485, .456, .406], [.229, .224, .225])
    model_f2 = model_f1

mode = "%s-%s2%s-%s_%sch_Finetune_MFNet" % (args.src_dataset, args.src_split,
                                            args.tgt_dataset, args.tgt_split,
                                            args.input_ch)
if args.net in ["fcn", "psp"]:
    model_name = "%s-%s-%s-res%s" % (detailed_method, args.savename, args.net,
                                     args.res)
else:
    model_name = "%s-%s-%s" % (detailed_method, args.savename, args.net)

outdir = os.path.join(args.base_outdir, mode)

# Create Model Dir
pth_dir = os.path.join(outdir, "pth")
mkdir_if_not_exist(pth_dir)

# Create Model Dir and  Set TF-Logger
tflog_dir = os.path.join(outdir, "tflog", model_name)
mkdir_if_not_exist(tflog_dir)
configure(tflog_dir, flush_secs=5)

# Save param dic

json_fn = os.path.join(outdir, "param-%s-finetune_MFNet.json" % model_name)
check_if_done(json_fn)
save_dic_to_json(args.__dict__, json_fn)

train_img_shape = tuple([int(x) for x in args.train_img_shape])

use_crop = True if args.crop_size > 0 else False
args = parser.parse_args()

if args.dataset in ["city16", "synthia"]:
    info_json_fn = "./dataset/synthia2cityscapes_info.json"
elif args.dataset in ["nyu"]:
    info_json_fn = "./dataset/nyu_info.json"
elif args.dataset == "ir":
    info_json_fn = "./dataset/ir_info.json"
else:
    info_json_fn = "./dataset/city_info.json"

    # Save visualized predicted pixel labels(pngs)
with open(info_json_fn) as f:
    info_dic = json.load(f)
palette = np.array(info_dic['palette'], dtype=np.uint8)

gt_dir = GT_DIR_DIC[args.dataset]
vis_outdir = os.path.join(os.path.split(gt_dir)[0], os.path.split(gt_dir)[1] + "_pretty")
print ("OUTDIR is %s" % vis_outdir)
mkdir_if_not_exist(vis_outdir)

gtfn_list = os.listdir(gt_dir)

for gtfn in tqdm(gtfn_list):
    full_gtfn = os.path.join(gt_dir, gtfn)
    img = Image.open(full_gtfn).convert("P")
    img.putpalette(palette.flatten())
    vis_fn = os.path.join(vis_outdir, gtfn)
    img.save(vis_fn)
Esempio n. 31
0
if not os.path.exists(args.trained_checkpoint):
    raise OSError("%s does not exist!" % args.trained_checkpoint)

checkpoint = torch.load(args.trained_checkpoint)
train_args = checkpoint["args"]
args.start_epoch = checkpoint['epoch']
print ("----- train args ------")
pprint(train_args.__dict__, indent=4)
print ("-" * 50)
print("=> loaded checkpoint '{}'".format(args.trained_checkpoint))

detailed_method = train_args.method + "-" + train_args.method_detail
print ("method: %s" % detailed_method)

base_outdir = os.path.join(args.outdir, args.mode, model_name)
mkdir_if_not_exist(base_outdir)

json_fn = os.path.join(base_outdir, "param.json")
check_if_done(json_fn)
args.machine = os.uname()[1]
save_dic_to_json(args.__dict__, json_fn)

train_img_shape = tuple([int(x) for x in train_args.train_img_shape])
test_img_shape = tuple([int(x) for x in args.test_img_shape])

# TODO
if "normalize_way" in train_args.__dict__.keys():
    img_transform = get_img_transform(img_shape=train_img_shape, normalize_way=train_args.normalize_way)
else:
    img_transform = get_img_transform(img_shape=train_img_shape)
def vis_with_legend(
    indir_list,
    outdir,
    label_list,
    raw_rgb_dir,
    raw_optional_img_dir=None,
    gt_dir=None,
    boundary_dir=None,
    ext="png",
    title_names=None,
    n_sample=10,
):
    N_CLASS = len(label_list)
    values = np.arange(N_CLASS)

    n_imgs = 1 + len(indir_list)
    if raw_optional_img_dir:
        n_imgs += 1
    if gt_dir:
        n_imgs += 1
    if boundary_dir:
        n_imgs += 1
    mkdir_if_not_exist(outdir)

    n_row = 1  # 2
    n_col = int(round(float(n_imgs) / n_row))

    # with open("/data/unagi0/dataset/SUNCG-Seg/data_goodlist_v2.txt") as f:
    with open(
            "/data/unagi0/watanabe/DomainAdaptation/Segmentation/VisDA2017/test_output/suncg-train_rgbhhab_only_3ch---suncg-train_rgbhha/normal-drn_d_38-20.tar/data_list.txt"
    ) as f:
        # with open(
        #         "/data/unagi0/watanabe/DomainAdaptation/Segmentation/VisDA2017/test_output/suncg-train_rgbhha_only_6ch---suncg-train_rgbhha/b16-drn_d_38-10.tar/data_list.txt") as f:
        fn_id_list = [x.strip() for x in f.readlines()]
    fn_id_list = random.sample(fn_id_list, n_sample)
    # fn_id_list = ["6f905fac454cea2d4cf5fd4d83a83a69/000000"]

    for one_img_id in tqdm(fn_id_list):
        fig = plt.figure(figsize=(640 * n_col / 100, 480 * n_row /
                                  100))  # sharex=True, sharey=True)

        ax_list = []
        ax_list.append(fig.add_subplot(n_row, n_col, 1))
        raw_img = Image.open(os.path.join(raw_rgb_dir,
                                          one_img_id + "_mlt.png"))

        ax_list[0].imshow(raw_img)
        ax_list[0].axis("off")
        ax_list[0].set_xticklabels([])
        ax_list[0].set_yticklabels([])

        ax_list[0].set_aspect('equal')
        offset = 1
        plt.axis('tight')

        if raw_optional_img_dir:
            ax_list.append(fig.add_subplot(n_row, n_col, offset + 1))
            raw_img = Image.open(
                os.path.join(raw_optional_img_dir, one_img_id + "_hha.png"))

            ax_list[offset].imshow(raw_img, cmap='gray')
            ax_list[offset].axis("off")
            ax_list[offset].set_xticklabels([])
            ax_list[offset].set_yticklabels([])

            ax_list[offset].set_aspect('equal')
            plt.axis('tight')
            offset += 1

        if gt_dir:
            ax_list.append(fig.add_subplot(n_row, n_col, offset + 1))
            gt_img = Image.open(
                os.path.join(gt_dir, one_img_id + "_category40.png"))
            gt_img = np.array(gt_img, dtype=np.uint8)
            ax_list[offset].imshow(gt_img,
                                   vmin=0,
                                   vmax=N_CLASS - 1,
                                   interpolation='none',
                                   cmap="jet")
            ax_list[offset].axis("off")
            ax_list[offset].set_xticklabels([])
            ax_list[offset].set_yticklabels([])

            ax_list[offset].set_aspect('equal')
            plt.axis('tight')
            offset += 1

        if boundary_dir:
            ax_list.append(fig.add_subplot(n_row, n_col, offset + 1))
            boundary_img = Image.open(
                os.path.join(boundary_dir,
                             one_img_id + "_instance_boundary.png"))
            boundary_img = np.array(boundary_img, dtype=np.uint8)
            ax_list[offset].imshow(boundary_img,
                                   vmin=0,
                                   vmax=N_CLASS - 1,
                                   interpolation='none',
                                   cmap="gray")
            ax_list[offset].axis("off")
            ax_list[offset].set_xticklabels([])
            ax_list[offset].set_yticklabels([])

            ax_list[offset].set_aspect('equal')
            plt.axis('tight')
            offset += 1

        if title_names is not None:
            for i, title in enumerate(title_names):
                ax_list[i].set_title(title, fontsize=30)

        # fig.subplots_adjust(wspace=0, hspace=0)
        # fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)
        fig.subplots_adjust(left=0,
                            bottom=0,
                            right=1,
                            top=1,
                            wspace=0,
                            hspace=0)
        fig.tight_layout(pad=0)

        # colors = [im.cmap(im.norm(value)) for value in values]
        # patches = [mpatches.Patch(color=colors[i], label=label_list[i]) for i in range(len(values))]
        # # lgd = fig.legend(handles=patches, labels=label_list, bbox_to_anchor=(1.05, 1), borderaxespad=0.,
        # #                  fontsize=7, loc='upper left')  # loc=2
        # if n_col * 2 <= N_CLASS:
        #     n_legend_col = n_col * 2
        # else:
        #     n_legend_col = N_CLASS
        # lgd = plt.legend(patches, label_list, loc='lower center', bbox_to_anchor=(0, 0, 1, 1),
        #                  bbox_transform=plt.gcf().transFigure, ncol=n_legend_col, fontsize=5)

        # fig.tight_layout()
        outfn = os.path.join(
            outdir,
            os.path.split(one_img_id)[-2] + "_" +
            os.path.split(one_img_id)[-1])
        outfn = os.path.splitext(outfn)[0] + '.%s' % ext

        fig.savefig(outfn, transparent=True, bbox_inches='tight', pad_inches=0)
        # fig.savefig(outfn, transparent=True, bbox_inches='tight', pad_inches=0, bbox_extra_artists=(lgd,), dpi=300)
        plt.close()
Esempio n. 33
0
                        default=(1280, 720),
                        nargs=2,
                        help="W H")
    parser.add_argument(
        '--raw_img_indir',
        type=str,
        default=None,
        help=
        "input directory that contains raw imgs(valid:'/data/unagi0/watanabe/DomainAdaptation/Segmentation/VisDA2017/cityscapes_val_imgs', test:'/data/ugui0/dataset/adaptation/segmentation_test')"
    )

    args = parser.parse_args()

    args.outimg_shape = [int(x) for x in args.outimg_shape]

    mkdir_if_not_exist(args.outdir)

    for one_file in tqdm(os.listdir(args.prob_indir)):
        one_npy_fn = os.path.join(args.prob_indir, one_file)
        outfn = os.path.join(args.outdir, one_file.replace("npy", "png"))

        #     if os.path.exists(outfn):
        #         continue

        one_prob = np.load(one_npy_fn)
        one_prob = softmax(one_prob)
        one_prob = np.transpose(one_prob, [1, 2, 0])
        one_prob = np.expand_dims(one_prob, 0)
        _, h, w, n_class = one_prob.shape

        if args.raw_img_indir:
Esempio n. 34
0
seed_everything(args.seed)
os.environ['PYTHONASHSEED'] = str(args.seed)
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

config = {
    'output_path': args.output_path,
    'path': {
        'log': args.output_path + '/log/',
        'scalar': args.output_path + '/scalar/',
        'model': args.output_path + '/model/'
    },
    'is_writer': args.is_writer
}

# Create output Dir
mkdir_if_not_exist(config['path']['log'])
mkdir_if_not_exist(config['path']['scalar'])
mkdir_if_not_exist(config['path']['model'])
if config['is_writer']:
    config['writer'] = SummaryWriter(log_dir=config['path']['scalar'])
config['logger'] = Logger(logroot=config['path']['log'],
                          filename=args.log_file,
                          level='debug')
config['logger'].logger.debug(str(args))

# whether resume training
start_epoch = 0
if args.resume:
    config['logger'].logger.debug('==> loading checkpoint: ' + args.resume)
    if not os.path.exists(args.resume):
        raise OSError("%s does not exist!" % args.resume)