Exemple #1
0
def batch_one_image_dataset(global_X_path, global_Y_path, img_window_path, img_vlad_path, img_metadata_path, target, overlap_threshold=0.5):
    pic = parse_image_metadata(file_path=img_metadata_path, parseObject=True)
    winL = de_serialize_window(input_path=img_window_path)
    vladL = load_matrix(input_path=img_vlad_path).tolist()
    dataset = get_data_set_X_Y(winL, vladL, pic, target, overlap_threshold)
    X, Y = dataset[0], dataset[1]
    append_file(dest_file=global_X_path, strInput=X)
    append_file(dest_file=global_Y_path, strInput=Y)
Exemple #2
0
def batch_all_images(input_image_path, annotation_path, output_parent_path,
            unit_ratio_list, overlap_ratio, target, target_pos_path, vladVector, target_count=20, pca=True,
            k=30, max_iter=30, preVLAD=False, voca_path=None, dataset_mode=False, overlap_threshold=0.5):
    global_sift_path = '%sglobal_sift.txt' % (output_parent_path)
    image_name_list = get_target_pos_names(input_path=target_pos_path, target=target, target_count=target_count)
    sift_path_L = []
    if preVLAD:
        delete_file(file_path=global_sift_path)
        for image_name in image_name_list:
            metadata_path = '%s%s.xml' % (annotation_path, image_name)
            win_sift_path = batch_one_image_pre_VLAD(input_image_path, image_name, metadata_path, output_parent_path,
                unit_ratio_list, overlap_ratio)
            sift_path_L.append(win_sift_path)
            append_file(dest_file=global_sift_path, input_path=win_sift_path)
        print "----------pre-VLAD Done"
    else:
        all_dir = list_all_files(input_path=output_parent_path + 'windows/', onlyDir=True)
        for d in all_dir:
            sift_path_L.append("%s/windows/%s/temp_sift/" % (output_parent_path, d))
        print "----------pre-VLAD is enabled"
    if voca_path is None or not isfile(voca_path):
        print "~~~~~~~Learning vocabulary by the sift vectors of all windows of all images"
        vector_matrix = None
        if pca:
            vector_matrix = pca_dataset(input_path=global_sift_path)
        vocabulary = learn_vocabulary(input_path=global_sift_path,
                    k=k, max_iter=max_iter, single_file=True, vector_matrix=vector_matrix)
        save_matrix(v=vocabulary, output_path=voca_path)
        print "~~~~~~~Learning vocabulary done"
    elif vladVector:
        print "~~~~~~~Loading existing vocabulary"
        vocabulary = load_matrix(input_path=voca_path)
    if vladVector:
        for i in xrange(len(image_name_list)):
            image_name = image_name_list[i]
            output_path = "%swindows/%s/%s" % (output_parent_path, image_name, image_name)
            print "\t======Creating VLAD vectors"
            vlad_vector_batch(input_path=sift_path_L[i],
                    output_path=output_path, vocabulary=vocabulary)
            print "\t======VLAD Done for", image_name
    else:
        print "##########No VLAD vector generated...."
    if dataset_mode:
        print "^^^^^^^^^^Generate data set for global windows and VLAD"
        global_X_path = output_parent_path + "global_X.txt"
        global_Y_path = output_parent_path + "global_Y.txt"
        delete_file(global_X_path)
        delete_file(global_Y_path)
        for img_name in image_name_list:
            img_window_path = "%s/windows/%s/%s_windows.txt" % (output_parent_path, img_name , img_name)
            img_vlad_path = "%s/windows/%s/%s_vlad.txt" % (output_parent_path, img_name , img_name)
            metadata_path = '%s%s.xml' % (annotation_path, img_name)
            batch_one_image_dataset(global_X_path, global_Y_path, img_window_path,
                        img_vlad_path, metadata_path, target, overlap_threshold=overlap_threshold)
            print "\tData set done for", img_name
    print "....................All done"
Exemple #3
0
def batch_one_image_dataset(global_X_path,
                            global_Y_path,
                            img_window_path,
                            img_vlad_path,
                            img_metadata_path,
                            target,
                            overlap_threshold=0.5):
    pic = parse_image_metadata(file_path=img_metadata_path, parseObject=True)
    winL = de_serialize_window(input_path=img_window_path)
    vladL = load_matrix(input_path=img_vlad_path).tolist()
    dataset = get_data_set_X_Y(winL, vladL, pic, target, overlap_threshold)
    X, Y = dataset[0], dataset[1]
    append_file(dest_file=global_X_path, strInput=X)
    append_file(dest_file=global_Y_path, strInput=Y)
def multi_thread_continous_download(url, file_name=None, overwrite=False, thread_num=4):
	if thread_num == 1:
		single_thread_continous_download(url, file_name, overwrite)
	elif thread_num > 1:
		# 如果文件名为空,则从 URL 中获取文件名
		if file_name is None:
			file_name = url.rpartition('/')[-1]
		target_size = get_file_size(url)
		if (target_size < 0):
			print("multi_thread_continous_download(): get_file_size() error!\n")
			return
		if os.path.exists(file_name):
			if overwrite:
				os.remove(file_name)
			current_size = os.path.getsize(file_name)
			# 理论上来说,更严谨的方法是下载完目标文件,然后比较两个文件的 MD5 值。但是需要事先下载整个文件,可能浪费带宽(尤其是文件很大的时侯)
			if (current_size == target_size):
				print("multi_thread_continous_download(): file %s already downloaded complete!" %(file_name))
				return 
			# 已存在的同名文件大小 != 要下载的目标文件大小,重命名已存在文件,重新下载目标文件
			else:
				print("multi_thread_continous_download(): file %s size exception, current_size != target_size" %(file_name))
				new_file_name = file_name + '_' + get_current_timestamp()
				os.rename(file_name, new_file_name)
				print("multi_thread_continous_download(): %s RENAMED TO %s" %(file_name, new_file_name))
		ranges = split_file_size(target_size, thread_num)
		thread_group = []
		for i in range(thread_num):
			# print(i, '\t', ranges[i][0], ',', ranges[i][1])
			t = threading.Thread(target=sub_thread_continous_download, name="thread%d" % i, args=(url, get_file_name_split(file_name, i), ranges[i][0], ranges[i][1], ranges[i][2]))
			t.start()
			thread_group.append(t)
		for t in thread_group:
			t.join()
		# 拼接前检查各个文件块的完整性
		if check_file_integrity(file_name, target_size, thread_num):
			append_file(file_name, thread_num)
def multi_thread_download(url, file_name=None, overwrite=False, thread_num=4):
	if thread_num == 1:
		single_thread_download(url, file_name, overwrite)
	elif thread_num > 1:
		# 如果文件名为空,则从 URL 中获取文件名
		if file_name is None:
			file_name = url.rpartition('/')[-1]
		# 潜在 bug:如果不覆盖己有文件,而已有文件不完整(eg. 没下载全),会有潜在影响
		if os.path.exists(file_name) and (not overwrite):
			return
		target_size = get_file_size(url)
		if (target_size < 0):
			print("multi_thread_download(): get_file_size() error!\n")
			return
		ranges = split_file_size(target_size, thread_num)
		thread_group = []
		for i in range(thread_num):
			# print(i, '\t', ranges[i][0], ',', ranges[i][1])
			t = threading.Thread(target=sub_thread_download, name="thread%d" % i, args=(url, split_file_name(file_name, i), ranges[i][0], ranges[i][1]))
			t.start()
			thread_group.append(t)
		for t in thread_group:
			t.join()
		append_file(file_name, thread_num, False)
Exemple #6
0
def batch_all_images(input_image_path,
                     annotation_path,
                     output_parent_path,
                     unit_ratio_list,
                     overlap_ratio,
                     target,
                     target_pos_path,
                     vladVector,
                     target_count=20,
                     pca=True,
                     k=30,
                     max_iter=30,
                     preVLAD=False,
                     voca_path=None,
                     dataset_mode=False,
                     overlap_threshold=0.5):
    global_sift_path = '%sglobal_sift.txt' % (output_parent_path)
    image_name_list = get_target_pos_names(input_path=target_pos_path,
                                           target=target,
                                           target_count=target_count)
    sift_path_L = []
    if preVLAD:
        delete_file(file_path=global_sift_path)
        for image_name in image_name_list:
            metadata_path = '%s%s.xml' % (annotation_path, image_name)
            win_sift_path = batch_one_image_pre_VLAD(input_image_path,
                                                     image_name, metadata_path,
                                                     output_parent_path,
                                                     unit_ratio_list,
                                                     overlap_ratio)
            sift_path_L.append(win_sift_path)
            append_file(dest_file=global_sift_path, input_path=win_sift_path)
        print "----------pre-VLAD Done"
    else:
        all_dir = list_all_files(input_path=output_parent_path + 'windows/',
                                 onlyDir=True)
        for d in all_dir:
            sift_path_L.append("%s/windows/%s/temp_sift/" %
                               (output_parent_path, d))
        print "----------pre-VLAD is enabled"
    if voca_path is None or not isfile(voca_path):
        print "~~~~~~~Learning vocabulary by the sift vectors of all windows of all images"
        vector_matrix = None
        if pca:
            vector_matrix = pca_dataset(input_path=global_sift_path)
        vocabulary = learn_vocabulary(input_path=global_sift_path,
                                      k=k,
                                      max_iter=max_iter,
                                      single_file=True,
                                      vector_matrix=vector_matrix)
        save_matrix(v=vocabulary, output_path=voca_path)
        print "~~~~~~~Learning vocabulary done"
    elif vladVector:
        print "~~~~~~~Loading existing vocabulary"
        vocabulary = load_matrix(input_path=voca_path)
    if vladVector:
        for i in xrange(len(image_name_list)):
            image_name = image_name_list[i]
            output_path = "%swindows/%s/%s" % (output_parent_path, image_name,
                                               image_name)
            print "\t======Creating VLAD vectors"
            vlad_vector_batch(input_path=sift_path_L[i],
                              output_path=output_path,
                              vocabulary=vocabulary)
            print "\t======VLAD Done for", image_name
    else:
        print "##########No VLAD vector generated...."
    if dataset_mode:
        print "^^^^^^^^^^Generate data set for global windows and VLAD"
        global_X_path = output_parent_path + "global_X.txt"
        global_Y_path = output_parent_path + "global_Y.txt"
        delete_file(global_X_path)
        delete_file(global_Y_path)
        for img_name in image_name_list:
            img_window_path = "%s/windows/%s/%s_windows.txt" % (
                output_parent_path, img_name, img_name)
            img_vlad_path = "%s/windows/%s/%s_vlad.txt" % (output_parent_path,
                                                           img_name, img_name)
            metadata_path = '%s%s.xml' % (annotation_path, img_name)
            batch_one_image_dataset(global_X_path,
                                    global_Y_path,
                                    img_window_path,
                                    img_vlad_path,
                                    metadata_path,
                                    target,
                                    overlap_threshold=overlap_threshold)
            print "\tData set done for", img_name
    print "....................All done"