def init(filename=None):
        rootLogger = logging.getLogger()

        if filename is None:
            FileUtils.createDir('./logs')
            filename = os.path.abspath(
                './logs/' + datetime.now().strftime('%y-%m-%d_auto') + '.log')
            if len(rootLogger.handlers) > 0:
                if os.path.exists(filename):
                    return

        for each in rootLogger.handlers:
            rootLogger.removeHandler(each)

        logFormatter = logging.Formatter(
            "%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s]  %(message)s"
        )
        rootLogger.level = logging.INFO  #level

        fileHandler = logging.FileHandler(filename)
        fileHandler.setFormatter(logFormatter)
        rootLogger.addHandler(fileHandler)

        consoleHandler = logging.StreamHandler()
        consoleHandler.setFormatter(logFormatter)
        rootLogger.addHandler(consoleHandler)
Esempio n. 2
0
 def create_local(args):
     try:
         if args.name and len(args.name) > 0:
             filename = './tasks/' + args.report + '/runs/' + args.name + '/' + args.name + '.csv'
             if not os.path.exists(filename):
                 headers = args.params_report_local
                 with open(filename, 'w') as outfile:
                     FileUtils.lock_file(outfile)
                     outfile.write(','.join(headers) + '\n')
                     outfile.flush()
                     os.fsync(outfile)
                     FileUtils.unlock_file(outfile)
     except Exception as e:
         logging.error(str(e))
         exc_type, exc_value, exc_tb = sys.exc_info()
         logging.error(
             traceback.format_exception(exc_type, exc_value, exc_tb))
Esempio n. 3
0
 def create(args):
     try:
         if args.report and len(args.report) > 0:
             filename = os.path.join('reports', args.report) + '.csv'
             if not os.path.exists(filename):
                 headers = args.params_report
                 if not args.params_grid is None:
                     headers += args.params_grid
                 with open(filename, 'w') as outfile:
                     FileUtils.lock_file(outfile)
                     outfile.write(','.join(headers) + '\n')
                     outfile.flush()
                     os.fsync(outfile)
                     FileUtils.unlock_file(outfile)
     except Exception as e:
         logging.error(str(e))
         exc_type, exc_value, exc_tb = sys.exc_info()
         logging.error(
             traceback.format_exception(exc_type, exc_value, exc_tb))
Esempio n. 4
0
def process_dists(idx_start, y_each, y_list, path_embeddings, sample_count,
                  classes_size, embedding_size, triplet_similarity, mode):
    try:
        path_emb_json = f'{path_embeddings}/{y_each}.json'
        path_emb_mem = f'{path_embeddings}/{y_each}.mmap'
        path_dists_mem = f'{path_embeddings}/dists.mmap'

        dists_mem = np.memmap(path_dists_mem,
                              mode='r+',
                              dtype=np.float16,
                              shape=(sample_count, classes_size))

        emb_json = FileUtils.loadJSON(path_emb_json)
        emb_mem = np.memmap(path_emb_mem,
                            mode='r',
                            dtype=np.float16,
                            shape=(emb_json['count'], embedding_size))

        path_centroids_mem = f'{path_embeddings}/dists.mmap'
        centroids_mem = np.memmap(path_centroids_mem,
                                  mode='r',
                                  dtype=np.float16,
                                  shape=(classes_size, embedding_size))

        for idx_y in y_list:
            np_class_centroids_tiled = np.tile(centroids_mem[idx_y],
                                               (emb_json['count'], 1))
            dists = get_distance(emb_mem, np_class_centroids_tiled,
                                 triplet_similarity, mode).tolist()
            dists_mem[idx_start:idx_start + emb_json['count'],
                      idx_y] = dists[:]
        #dists_mem.flush()
    except Exception as e:
        logging.error(str(e))
        exc_type, exc_value, exc_tb = sys.exc_info()
        logging.error('\n'.join(
            traceback.format_exception(exc_type, exc_value, exc_tb)))
Esempio n. 5
0
parser.add_argument('-debug_batch_count', default=0, type=int) # 0 = release version

parser.add_argument('-embedding_size', default=32, type=int)

parser.add_argument('-gamma', default=0.0, type=float)
parser.add_argument('-C_0', default=0.0, type=float)
parser.add_argument('-C_n', default=5.0, type=float)
parser.add_argument('-C_interval', default=10000, type=int)
parser.add_argument('-C_start', default=0, type=int)

args, args_other = parser.parse_known_args()

path_sequence = f'./results/{args.sequence_name}'
args.run_name += ('-' + datetime.utcnow().strftime(f'%y-%m-%d--%H-%M-%S'))
path_run = f'./results/{args.sequence_name}/{args.run_name}'
FileUtils.createDir(path_run)
path_artifacts = f'./artifacts/{args.sequence_name}/{args.run_name}'
FileUtils.createDir(path_artifacts)
FileUtils.writeJSON(f'{path_run}/args.json', args.__dict__)

CsvUtils2.create_global(path_sequence)
CsvUtils2.create_local(path_sequence, args.run_name)

summary_writer = tensorboard_utils.CustomSummaryWriter(
    logdir=path_run
)

rootLogger = logging.getLogger()
logFormatter = logging.Formatter("%(asctime)s [%(process)d] [%(thread)d] [%(levelname)s]  %(message)s")
rootLogger.level = logging.INFO #level
                    default=False,
                    type=lambda x: (str(x).lower() == 'true'))

args, args_other = parser.parse_known_args()
args = ArgsUtils.add_other_args(args, args_other)
args_other_names = ArgsUtils.extract_other_args_names(args_other)

if args.is_restricted_cpu:
    from cgroups import Cgroup  # pip install cgroups
    # sudo /home/ubuntu/anaconda3/bin/user_cgroups ubuntu
    # sudo /home/evalds/.conda/envs/conda_env/bin/user_cgroups evalds

# add all testable parameters to final report header
args.params_report += args_other_names

FileUtils.createDir('./reports')
FileUtils.createDir('./tasks')
FileUtils.createDir('./tasks/' + args.report)

logging_utils = LoggingUtils(filename=os.path.join('reports', args.report +
                                                   '.txt'))
ArgsUtils.log_args(args, 'taskgen.py', logging_utils)

task_settings = {'id': 0, 'repeat_id': 0}
tasks_settings_path = os.path.join('tasks', 'tasks.json')
if os.path.exists(tasks_settings_path):
    with open(tasks_settings_path, 'r') as outfile:
        tasks_settings_loaded = json.load(outfile)
        for key in tasks_settings_loaded:
            task_settings[key] = tasks_settings_loaded[key]
Esempio n. 7
0
    'score_best',
    'loss',
    'loss_dqn',
    'loss_inverse',
    'loss_forward',
    'cosine_distance'
]
if not args.params_report is None:
    for it in reversed(args.params_report):
        if not it in tmp:
            tmp.insert(0, it)

args.params_report = tmp
args.params_report_local = args.params_report

FileUtils.createDir('./tasks/' + args.report)
run_path = './tasks/' + args.report + '/runs/' + args.name

if os.path.exists(run_path):
    shutil.rmtree(run_path, ignore_errors=True)
    time.sleep(3)
    while os.path.exists(run_path):
        pass

FileUtils.createDir(run_path)
logging_utils = LoggingUtils(filename=os.path.join(run_path, 'log.txt'))
is_logged_cnorm = False

ArgsUtils.log_args(args, 'main.py', logging_utils)

CsvUtils.create_local(args)
Esempio n. 8
0
parser.add_argument('-hpc_mem',
                    help='HPC - override mem GB',
                    default=0,
                    type=int)

parser.add_argument('-is_hpc',
                    help='is HPC qsub tasks or local tasks',
                    default=True,
                    type=lambda x: (str(x).lower() == 'true'))

parser.add_argument('-hpc_queue', help='hpc queue', default='batch', type=str)

args, args_other = parser.parse_known_args()

FileUtils.createDir('./reports')
FileUtils.createDir('./tasks')
#FileUtils.createDir('./tasks/' + args.report)

if args.is_hpc:
    FileUtils.createDir(os.path.expanduser('~') + '/tmp')

logging_utils = LoggingUtils(name=os.path.join('reports', args.report +
                                               '.txt'))

task_settings = {'id': 0, 'repeat_id': 0}

hpc_settings_path = os.path.join('tasks', 'tasks.json')
if os.path.exists(hpc_settings_path):
    with open(hpc_settings_path, 'r') as outfile:
        hpc_settings_loaded = json.load(outfile)
    def add_hparams(path_sequence, run_name, args_dict, metrics_dict, global_step):
        try:
            path_local_csv = f'{path_sequence}/{run_name}.csv'
            path_global_csv = f'{path_sequence}/sequence-{os.path.basename(path_sequence)}.csv'

            args_dict = copy.copy(args_dict)
            metrics_dict = copy.copy(metrics_dict)
            for each_dict in [args_dict, metrics_dict]:
                for key in list(each_dict.keys()):
                    if not isinstance(each_dict[key], float) and \
                        not isinstance(each_dict[key], int) and \
                        not isinstance(each_dict[key], str) and \
                        not isinstance(each_dict[key], np.float) and \
                        not isinstance(each_dict[key], np.int) and \
                        not isinstance(each_dict[key], np.float32):
                        del each_dict[key]

            for path_csv in [path_local_csv, path_global_csv]:

                if os.path.exists(path_csv):
                    with open(path_csv, 'r+') as outfile:
                        FileUtils.lock_file(outfile)
                        lines_all = outfile.readlines()
                        lines_all = [it.replace('\n', '').split(',') for it in lines_all if ',' in it]
                        if len(lines_all) == 0 or len(lines_all[0]) < 2:
                            headers = ['step'] + list(args_dict.keys()) + list(metrics_dict.keys())
                            headers = [str(it).replace(',', '_') for it in headers]
                            lines_all.append(headers)

                        values = [global_step] + list(args_dict.values()) + list(metrics_dict.values())
                        values = [str(it).replace(',', '_') for it in values]
                        if path_csv == path_local_csv:
                            lines_all.append(values)
                        else:
                            # global
                            existing_line_idx = -1
                            args_values = list(args_dict.values())
                            args_values = [str(it).replace(',', '_') for it in args_values]
                            for idx_line, line in enumerate(lines_all):
                                if len(line) > 1:
                                    is_match = True
                                    for idx_arg in range(len(args_values)):
                                        if line[idx_arg + 1] != args_values[idx_arg]:
                                            is_match = False
                                            break
                                    if is_match:
                                        existing_line_idx = idx_line
                                        break
                            if existing_line_idx >= 0:
                                lines_all[existing_line_idx] = values
                            else:
                                lines_all.append(values)

                        outfile.truncate(0)
                        outfile.seek(0)
                        outfile.flush()
                        rows = [','.join(it) for it in lines_all]
                        rows = [it for it in rows if len(it.replace('\n', '').strip()) > 0]
                        outfile.write('\n'.join(rows).strip())
                        outfile.flush()
                        os.fsync(outfile)
                        FileUtils.unlock_file(outfile)

        except Exception as e:
            logging.exception(e)
Esempio n. 10
0
# /simpsons/test.mmap
# /simpsons/test.json
parser.add_argument('-path_output',
                    default='/Users/evalds/Downloads/simpsons_x/',
                    type=str)

# scale and squeeze images to this size
parser.add_argument('-size_img', default=128, type=int)
parser.add_argument('-thread_max', default=10, type=int)

parser.add_argument('-test_split', default=0.2, type=float)

args, args_other = parser.parse_known_args()

FileUtils.createDir(args.path_output)
logging_utils = LoggingUtils(
    f"{args.path_output}/simpsons-{datetime.now().strftime('%y-%m-%d_%H-%M-%S')}.log"
)

class_names = []
last_class_name = None

mmap_shape = [0, 3, args.size_img, args.size_img]

logging_utils.info(
    f'move test samples into train to change from classification to re-identification task'
)

paths_files = FileUtils.listSubFiles(args.path_input_test)
for path_file in paths_files:
Esempio n. 11
0
args_other_names = ArgsUtils.extract_other_args_names(args_other)

if len(args.datasource_include_test_class_ids) > 0:
    args.datasource_include_test_class_ids = ' '.join(
        args.datasource_include_test_class_ids)
if len(args.datasource_exclude_train_class_ids) > 0:
    args.datasource_exclude_train_class_ids = ' '.join(
        args.datasource_exclude_train_class_ids)

if args.hpc_queue == 'inf':
    args.hpc_gpu_max_queue = 0  # for old ones disable GPU

# add all testable parameters to final report header
args.params_report += args_other_names

FileUtils.createDir('./reports')
FileUtils.createDir('./tasks')
FileUtils.createDir('./tasks/' + args.report)
if args.is_hpc:
    FileUtils.createDir(os.path.expanduser('~') + '/tmp')

logging_utils = LoggingUtils(filename=os.path.join('reports', args.report +
                                                   '.txt'))
ArgsUtils.log_args(args, 'taskgen.py', logging_utils)

task_settings = {'id': 0, 'repeat_id': 0}
hpc_settings_path = os.path.join('tasks', 'tasks.json')
if os.path.exists(hpc_settings_path):
    with open(hpc_settings_path, 'r') as outfile:
        hpc_settings_loaded = json.load(outfile)
        for key in hpc_settings_loaded:
Esempio n. 12
0
    def __init__(self, args, is_test_data):
        super().__init__()

        self.args = args
        self.is_test_data = is_test_data

        path_data = f'{self.args.path_data}/{self.args.datasource_type}'
        FileUtils.createDir(path_data)

        if not os.path.exists(
                f'{self.args.path_data}/{self.args.datasource_type}/lock'):
            with open(
                    f'{self.args.path_data}/{self.args.datasource_type}/lock',
                    'w') as fp_download_lock:
                fp_download_lock.write('')
            time.sleep(1.0)

        with open(f'{self.args.path_data}/{self.args.datasource_type}/lock',
                  'r+') as fp_download_lock:
            FileUtils.lock_file(fp_download_lock)

            transform_colors = torchvision.transforms.ToTensor()
            if self.args.datasource_is_grayscale:
                transform_colors = torchvision.transforms.Compose([
                    torchvision.transforms.Grayscale(),
                    torchvision.transforms.ToTensor()
                ])

            if self.args.datasource_type == 'fassion_mnist':
                self.dataset = torchvision.datasets.FashionMNIST(
                    path_data,
                    download=True,
                    train=not is_test_data,
                    transform=torchvision.transforms.ToTensor())
            elif self.args.datasource_type == 'mnist':
                self.dataset = torchvision.datasets.MNIST(
                    path_data,
                    download=True,
                    train=not is_test_data,
                    transform=torchvision.transforms.ToTensor())
            elif self.args.datasource_type == 'cifar_10':

                self.dataset = torchvision.datasets.CIFAR10(
                    path_data,
                    download=True,
                    train=not is_test_data,
                    transform=transform_colors)
            elif self.args.datasource_type == 'cifar_100':
                self.dataset = torchvision.datasets.CIFAR100(
                    path_data,
                    download=True,
                    train=not is_test_data,
                    transform=transform_colors)
            elif self.args.datasource_type == 'emnist':  # extended mnist https://arxiv.org/pdf/1702.05373.pdf
                self.dataset = torchvision.datasets.EMNIST(
                    path_data,
                    download=True,
                    split='balanced',
                    train=not is_test_data,
                    transform=torchvision.transforms.Compose([
                        lambda img: torchvision.transforms.functional.rotate(
                            img, -90), lambda img: torchvision.transforms.
                        functional.hflip(img),
                        torchvision.transforms.ToTensor()
                    ]))

            FileUtils.unlock_file(fp_download_lock)

        self.classes = np.arange(np.array(self.dataset.targets).max() +
                                 1).tolist()
        groups = [{'samples': [], 'counter': 0} for _ in self.classes]

        for img, label_idx in self.dataset:
            groups[int(label_idx)]['samples'].append(img)

        args.input_size = img.size(1)  # channels, w, h
        args.input_features = img.size(0)

        if not is_test_data:
            ids = [
                int(it) for it in self.args.datasource_exclude_train_class_ids
            ]
            ids = sorted(ids, reverse=True)
            for remove_id in ids:
                del self.classes[remove_id]
                del groups[remove_id]
        else:
            if len(self.args.datasource_include_test_class_ids):
                ids = set(self.classes) - set([
                    int(it)
                    for it in self.args.datasource_include_test_class_ids
                ])
                ids = list(ids)
                ids = sorted(ids, reverse=True)
                for remove_id in ids:
                    del self.classes[remove_id]
                    del groups[remove_id]

        self.classes = np.array(self.classes, dtype=np.int)
        self.size_samples = 0
        for idx, group in enumerate(groups):
            samples = group['samples']
            self.size_samples += len(samples)
        self.groups = groups

        # for debugging purposes
        # DEBUGGING
        if self.args.datasource_size_samples > 0:
            logging.info(
                f'debugging: reduced data size {self.args.datasource_size_samples}'
            )
            self.size_samples = self.args.datasource_size_samples

        logging.info(
            f'{self.args.datasource_type} {"test" if is_test_data else "train"}: classes: {len(groups)} total triplets: {self.size_samples}'
        )

        if not is_test_data:
            self.args.datasource_classes_train = len(
                groups)  # override class count

        if self.args.batch_size % self.args.triplet_positives != 0 or self.args.batch_size <= self.args.triplet_positives:
            logging.error(
                f'batch does not accommodate triplet_positives {self.args.batch_size} {self.args.triplet_positives}'
            )
            exit()
        self.reshuffle()
Esempio n. 13
0
    def add_results(args, state):
        try:
            if args.report and len(args.report) > 0:
                filename = os.path.join('reports', args.report) + '.csv'

                if not os.path.exists(filename):
                    if not os.path.exists('./reports'):
                        os.mkdir('./reports')
                    with open(filename, 'w') as outfile:
                        FileUtils.lock_file(outfile)
                        outfile.write(','.join(args.params_report) + '\n')
                        outfile.flush()
                        os.fsync(outfile)
                        FileUtils.unlock_file(outfile)

                lines_all = []
                with open(filename, 'r+') as outfile:
                    FileUtils.lock_file(outfile)
                    raw_lines = outfile.readlines()
                    if len(raw_lines) > 0:
                        header_line = raw_lines[0].strip()
                        headers = header_line.split(',')
                    else:
                        headers = args.params_report
                        lines_all.append(headers)

                    for line in raw_lines:
                        line = line.strip()
                        if len(line) > 0 and ',' in line:
                            parts = line.split(',')
                            lines_all.append(parts)

                    line_new = []
                    for key in headers:
                        #! gather from state
                        if key in state:
                            line_new.append(str(state[key]))
                        # ! gather also from args
                        elif key in vars(args):
                            line_new.append(str(getattr(args, key)))
                        # ! if not found empty
                        else:
                            line_new.append('')

                    # look for existing line to override
                    part_idx_id = headers.index('id')
                    is_exist = False
                    try:
                        for idx_line in range(1, len(lines_all)):
                            parts = lines_all[idx_line]
                            part_id = parts[part_idx_id]
                            if str(args.id) == part_id.strip():
                                lines_all[idx_line] = line_new
                                is_exist = True
                                break
                    except Exception as e:
                        logging.error(str(e))
                        exc_type, exc_value, exc_tb = sys.exc_info()
                        logging.error(
                            traceback.format_exception(exc_type, exc_value,
                                                       exc_tb))

                    if not is_exist:
                        lines_all.append(line_new)

                    outfile.truncate(0)
                    outfile.seek(0)
                    outfile.flush()
                    rows = [','.join(it) for it in lines_all]
                    outfile.write('\n'.join(rows))
                    outfile.flush()
                    os.fsync(outfile)
                    FileUtils.unlock_file(outfile)
        except Exception as e:
            logging.error(str(e))
            exc_type, exc_value, exc_tb = sys.exc_info()
            logging.error(
                traceback.format_exception(exc_type, exc_value, exc_tb))
Esempio n. 14
0
def calculate_accuracy(
        path_embeddings,
        meter_acc: tnt.meter.ClassErrorMeter,
        meter_auc: tnt.meter.AUCMeter,
        type='range',
        norm='l2',
        triplet_similarity='cos',
        mode='cpu',
        embedding_size=None,
        class_max_dist=None,  # precomputed
        class_centroids=None,
        y_list=None,  #precumputed
        sample_count=None,  #precomputed
        paths_embs_idx_path_pairs=None):  # precomputed

    paths_embs = FileUtils.listSubFiles(path_embeddings)

    # calculate centroids first
    if class_max_dist is None:
        class_centroids = {}
        class_max_dist = {}
        y_list = []
        paths_embs_idx_path_pairs = []
        sample_count = 0

        for path_emb in paths_embs:
            if path_emb.endswith('.json'):
                y_each = int(os.path.basename(path_emb).split('.')[0])
                path_emb_json = f'{path_embeddings}/{y_each}.json'
                path_emb_mem = f'{path_embeddings}/{y_each}.mmap'

                emb_json = FileUtils.loadJSON(path_emb_json)
                emb_mem = np.memmap(path_emb_mem,
                                    mode='r',
                                    dtype=np.float16,
                                    shape=(emb_json['count'], embedding_size))

                paths_embs_idx_path_pairs.append((sample_count, y_each))
                sample_count += emb_json['count']

                y_list += (np.ones(
                    (emb_json['count'], ), dtype=np.int) * y_each).tolist()

                class_centroids[y_each] = np.average(emb_mem, axis=0)
                if norm == 'l2':
                    class_centroids[y_each] = normalize_vec(
                        class_centroids[y_each])

                np_class_centroids_tiled = np.tile(class_centroids[y_each],
                                                   (len(emb_mem), 1))
                list_dists = get_distance(np_class_centroids_tiled, emb_mem,
                                          triplet_similarity, mode).tolist()
                list_dists = sorted(list_dists, reverse=False)
                list_dists = list_dists[:max(
                    2, int(len(list_dists) * 0.9)
                )]  # drop 10 top percent embeddings as they could contain noise
                class_max_dist[y_each] = list_dists[
                    -1]  # last largest distance

    classes_size = int(np.max(y_list)) + 1

    # store distance matrix as memmap for optimization
    path_dists_mem = f'{path_embeddings}/dists.mmap'
    is_exist_dists_mem = os.path.exists(path_dists_mem)
    dists_mem = np.memmap(path_dists_mem,
                          mode='r+' if is_exist_dists_mem else 'w+',
                          dtype=np.float16,
                          shape=(sample_count, classes_size))
    #dists_mem.flush()

    path_centroids_mem = f'{path_embeddings}/dists.mmap'
    is_exist_centroids_mem = os.path.exists(path_centroids_mem)
    centroids_mem = np.memmap(path_centroids_mem,
                              mode='r+' if is_exist_centroids_mem else 'w+',
                              dtype=np.float16,
                              shape=(classes_size, embedding_size))
    for key, value in class_centroids.items():
        centroids_mem[key] = value
    #centroids_mem.flush()

    if not is_exist_dists_mem:
        Parallel(n_jobs=multiprocessing.cpu_count() * 2, backend='threading')(
            delayed(process_dists)(idx_start, y_each, y_list, path_embeddings,
                                   sample_count, classes_size, embedding_size,
                                   triplet_similarity, mode)
            for idx_start, y_each in paths_embs_idx_path_pairs)

        dists_mem = np.memmap(path_dists_mem,
                              mode='r',
                              dtype=np.float16,
                              shape=(sample_count, classes_size))

    # iterate through precomputed distances to add to data to meters for mem optimization
    chunk_size = 1024
    for idx_chunk_start in range(sample_count // chunk_size + 1):
        idx_chunk_end = min(sample_count, idx_chunk_start + chunk_size)
        chunk_each_size = idx_chunk_end - idx_chunk_start

        if chunk_each_size == 0:
            break

        if type == 'range':
            predicted = np.zeros((chunk_each_size, classes_size),
                                 dtype=np.float)
        else:
            predicted = np.ones(
                (chunk_each_size, classes_size), dtype=np.float) * 1e9
        target = np.zeros((chunk_each_size, classes_size), dtype=np.float)

        for idx_y in class_max_dist.keys():
            max_dist = class_max_dist[idx_y]
            for idx_class in range(chunk_each_size):
                target[idx_class, y_list[idx_chunk_start + idx_class]] = 1.0

            dists = dists_mem[idx_chunk_start:idx_chunk_end]

            if type == 'range':
                for idx_emb, dist in enumerate(dists):
                    if max_dist > dist[idx_y]:
                        predicted[idx_emb, idx_y] += 1.0
            else:
                predicted[:, idx_y] = np.minimum(
                    predicted[:, idx_y], dists[:, idx_y]
                )  # store for each class closest embedding with distance value

        if type == 'range':
            predicted = predicted / (np.sum(predicted, axis=1, keepdims=True) +
                                     1e-18)
        else:
            # TODO softmax/hardmax based accuracy
            idx_class = np.argmin(
                predicted, axis=1)  # for each sample select closest distance
            predicted = np.zeros_like(predicted)  # init probabilities vector
            predicted[
                np.arange(predicted.shape[0]),
                idx_class] = 1.0  # for each sample set prob 100% by columns
        y_chunk = np.array(y_list[idx_chunk_start:idx_chunk_end])
        meter_acc.add(predicted, y_chunk)

        # AssertionError: targets should be binary (0, 1)
        idxes_classes = np.argmax(predicted, axis=1)
        target_tp = np.array(np.equal(y_chunk, idxes_classes), dtype=np.int)
        meter_auc.add(np.max(predicted, axis=1), target_tp)

    return class_max_dist, class_centroids, y_list, sample_count, paths_embs_idx_path_pairs