예제 #1
0
def getAnnotationsData(files_metadata):
    json_files = filter(lambda f: f['extension'] == 'json', files_metadata)
    annotations_data = {}
    for file in json_files:
        file_path = getFilePath(file)
        json_from_file = load_json_from_file(file_path)
        key = file['name']
        annotations_data[key] = json_from_file
    return annotations_data
예제 #2
0
def datasetClasses(dataset_name, vertical=True, total=None):
    report_file_name = 'report_' + dataset_name + '.json'
    report = load_json_from_file(report_file_name)
    print(len(report['objects number by class']), 'классов:')
    for c in report['objects number by class']:
        print('{0:20}'.format('Класс "' + c + '":'),
              report['objects number by class'][c], 'изображений')
    print()
    print('{0:20}'.format('Всего:'),
          total if total else len(Pairs('pairs_' + dataset_name)),
          'изображений')
    print()
    plotClassesDistribution(report['objects number by class'], vertical)
예제 #3
0
def plotDatasetClassesDistribution(dataset_name, vertical=True):
    report_file_name = 'report_' + dataset_name + '.json'
    report = load_json_from_file(report_file_name)
    plotClassesDistribution(report['objects number by class'], vertical)
예제 #4
0
def datasetMetrics(dataset_name,
                   metric_name,
                   log_plots=False,
                   api=None,
                   examples_amount=0,
                   rows=None,
                   columns=None,
                   vertical=True,
                   max_columns=20):
    examples_amount = None if (examples_amount == 'all') else examples_amount
    report_file_name = 'on_flow_by_classes_' + dataset_name + '_test.json'
    report = load_json_from_file(report_file_name)
    pairs_folder_name = 'pairs_' + dataset_name
    dataset_specific_api_name = api or dataset_name
    dataset_specific_api = getDatasetSpecificApi(dataset_specific_api_name)
    if (examples_amount == None) or (examples_amount > 0):
        pairs = Pairs(pairs_folder_name,
                      get_classes_function=dataset_specific_api.getClasses)
    for filter_as_string in report:
        print('Для изображений',
              presentFilter(json.loads(filter_as_string)) + ':')
        report_for_filter = report[filter_as_string]
        metrics_for_class = report_for_filter[metric_name]
        for metric in metrics_for_class:
            if metric['name'] == 'distribution':
                print('Распределение:')
                preprocess = (lambda x: log(x)
                              if x != 0 else 0) if log_plots else (lambda x: x)
                if len(metric['value']) > max_columns:
                    scaleFactor = ceil(len(metric['value']) / max_columns)
                else:
                    scaleFactor = 1
                plotDistribution(metric['value'],
                                 preprocess,
                                 vertical=vertical,
                                 scaleFactor=scaleFactor)
                for interval in metric['value']:
                    if (examples_amount == None) or (examples_amount > 0):
                        if 'examples' in interval:
                            print('Примеры для интервала с', interval['from'],
                                  'до', interval['to'], ':')
                            if len(interval['examples'][0]) == 1:
                                example_pairs = [
                                    pairs[example[0]] for example in
                                    interval['examples'][:examples_amount]
                                ]
                                showPairsImages(example_pairs)
                            else:
                                for example in interval[
                                        'examples'][:examples_amount]:
                                    example_pairs = [pairs[i] for i in example]
                                    showPairsImages(example_pairs)
            elif (metric['name'] == 'kmeans') and ((examples_amount == None) or
                                                   (examples_amount > 0)):
                print('Результаты кластеризации на', len(metric['value']),
                      'кластер' + properEnding(len(metric['value'])) + ':')
                for cluster_id in metric['value']:
                    print('Примеры изображений из кластера ' + cluster_id +
                          ':')
                    example_pairs = [
                        pairs[object_id] for object_id in metric['value']
                        [cluster_id][:examples_amount]
                    ]
                    if len(example_pairs) > 0:
                        printPairsPaths(example_pairs)
            elif metric['name'] == 'mean':
                if metric_name == 'mean_histogram':
                    plt.hist(metric['value'], bins=len(metric['value']))
                    plt.show()
                else:
                    print('Среднее значение:', metric['value'])
예제 #5
0
        # 	if not block:
        # 		break
        # 	handle.write(block)


def getMaxNumberFileName(dir_path):
    result = 0
    for element in os.listdir(dir_path):
        number = int(element.split('.')[0])
        result = max(result, number)
    return result


def downloadImages(links, dir_path):
    from_index = 0
    if not os.path.exists(dir_path):
        os.mkdir(dir_path)
    elif (not overwrite):
        last_saved_image_number = getMaxNumberFileName(dir_path)
        from_index = max(0, last_saved_image_number - threads)
        links = links[from_index:]
    for result in tqdm(ThreadPool(threads).imap_unordered(
            lambda n_link: downloadImage(n_link[1], dir_path,
                                         str(n_link[0] + from_index)),
            enumerate(links)),
                       total=len(links)):
        pass


links = list(load_json_from_file(input_path).items())
downloadImages(links, output_path)