def getAnnotationsData(files_metadata): json_files = filter(lambda f: f['extension'] == 'json', files_metadata) annotations_data = {} for file in json_files: file_path = getFilePath(file) json_from_file = load_json_from_file(file_path) key = file['name'] annotations_data[key] = json_from_file return annotations_data
def datasetClasses(dataset_name, vertical=True, total=None): report_file_name = 'report_' + dataset_name + '.json' report = load_json_from_file(report_file_name) print(len(report['objects number by class']), 'классов:') for c in report['objects number by class']: print('{0:20}'.format('Класс "' + c + '":'), report['objects number by class'][c], 'изображений') print() print('{0:20}'.format('Всего:'), total if total else len(Pairs('pairs_' + dataset_name)), 'изображений') print() plotClassesDistribution(report['objects number by class'], vertical)
def plotDatasetClassesDistribution(dataset_name, vertical=True): report_file_name = 'report_' + dataset_name + '.json' report = load_json_from_file(report_file_name) plotClassesDistribution(report['objects number by class'], vertical)
def datasetMetrics(dataset_name, metric_name, log_plots=False, api=None, examples_amount=0, rows=None, columns=None, vertical=True, max_columns=20): examples_amount = None if (examples_amount == 'all') else examples_amount report_file_name = 'on_flow_by_classes_' + dataset_name + '_test.json' report = load_json_from_file(report_file_name) pairs_folder_name = 'pairs_' + dataset_name dataset_specific_api_name = api or dataset_name dataset_specific_api = getDatasetSpecificApi(dataset_specific_api_name) if (examples_amount == None) or (examples_amount > 0): pairs = Pairs(pairs_folder_name, get_classes_function=dataset_specific_api.getClasses) for filter_as_string in report: print('Для изображений', presentFilter(json.loads(filter_as_string)) + ':') report_for_filter = report[filter_as_string] metrics_for_class = report_for_filter[metric_name] for metric in metrics_for_class: if metric['name'] == 'distribution': print('Распределение:') preprocess = (lambda x: log(x) if x != 0 else 0) if log_plots else (lambda x: x) if len(metric['value']) > max_columns: scaleFactor = ceil(len(metric['value']) / max_columns) else: scaleFactor = 1 plotDistribution(metric['value'], preprocess, vertical=vertical, scaleFactor=scaleFactor) for interval in metric['value']: if (examples_amount == None) or (examples_amount > 0): if 'examples' in interval: print('Примеры для интервала с', interval['from'], 'до', interval['to'], ':') if len(interval['examples'][0]) == 1: example_pairs = [ pairs[example[0]] for example in interval['examples'][:examples_amount] ] showPairsImages(example_pairs) else: for example in interval[ 'examples'][:examples_amount]: example_pairs = [pairs[i] for i in example] showPairsImages(example_pairs) elif (metric['name'] == 'kmeans') and ((examples_amount == None) or (examples_amount > 0)): print('Результаты кластеризации на', len(metric['value']), 'кластер' + properEnding(len(metric['value'])) + ':') for cluster_id in metric['value']: print('Примеры изображений из кластера ' + cluster_id + ':') example_pairs = [ pairs[object_id] for object_id in metric['value'] [cluster_id][:examples_amount] ] if len(example_pairs) > 0: printPairsPaths(example_pairs) elif metric['name'] == 'mean': if metric_name == 'mean_histogram': plt.hist(metric['value'], bins=len(metric['value'])) plt.show() else: print('Среднее значение:', metric['value'])
# if not block: # break # handle.write(block) def getMaxNumberFileName(dir_path): result = 0 for element in os.listdir(dir_path): number = int(element.split('.')[0]) result = max(result, number) return result def downloadImages(links, dir_path): from_index = 0 if not os.path.exists(dir_path): os.mkdir(dir_path) elif (not overwrite): last_saved_image_number = getMaxNumberFileName(dir_path) from_index = max(0, last_saved_image_number - threads) links = links[from_index:] for result in tqdm(ThreadPool(threads).imap_unordered( lambda n_link: downloadImage(n_link[1], dir_path, str(n_link[0] + from_index)), enumerate(links)), total=len(links)): pass links = list(load_json_from_file(input_path).items()) downloadImages(links, output_path)