Esempio n. 1
0
 def save(self):
     if self._id:
         command = '%s.save(%s)' % (self._mongocollection, str(self._get_hash()))
         if DBDEBUG: Logger.log_to_file(command)
         eval(command)
     else:
         command = '%s.insert(%s)' % (self._mongocollection, str(self._get_hash()))
         if DBDEBUG: Logger.log_to_file(command)
         self._id = eval(command)
Esempio n. 2
0
def app(symbols, start, end, threads, timeframe, folder, header):
    if start > end:
        return
    lock = threading.Lock()
    global day_counter
    total_days = how_many_days(start, end)

    if total_days == 0:
        return

    last_fetch = deque([], maxlen=5)
    update_progress(day_counter, total_days, -1, threads)

    def do_work(symbol, day, csv):
        global day_counter
        star_time = time.time()
        Logger.info("Fetching day {0}".format(day))
        try:
            csv.append(day, decompress(symbol, day, fetch_day(symbol, day)))
        except Exception as e:
            print("ERROR for {0}, {1} Exception : {2}".format(
                day, symbol, str(e)))
        elapsed_time = time.time() - star_time
        last_fetch.append(elapsed_time)
        with lock:
            day_counter += 1
        Logger.info("Day {0} fetched in {1}s".format(day, elapsed_time))

    futures = []

    with concurrent.futures.ThreadPoolExecutor(
            max_workers=threads) as executor:

        files = {
            symbol: CSVDumper(symbol, timeframe, start, end, folder, header)
            for symbol in symbols
        }

        for symbol in symbols:
            for day in days(start, end):
                futures.append(
                    executor.submit(do_work, symbol, day, files[symbol]))

        for future in concurrent.futures.as_completed(futures):
            if future.exception() is None:
                update_progress(day_counter, total_days, avg(last_fetch),
                                threads)
            else:
                Logger.error("An error happen when fetching data : ",
                             future.exception())

        Logger.info("Fetching data terminated")
        for file in files.values():
            file.dump()

    update_progress(day_counter, total_days, avg(last_fetch), threads)
Esempio n. 3
0
File: Email.py Progetto: iamcm/core
 def send(self, subject, body):
     msg = MIMEText(body, 'html')
     msg['Subject'] = subject
     msg['From'] = self.sender
     msg['To'] = self.recipient
     
     try:
         s = smtplib.SMTP(self.host)
         if settings.EMAILUSERNAME and settings.EMAILPASSWORD:
             s.login(settings.EMAILUSERNAME, settings.EMAILPASSWORD)
         s.sendmail(self.sender, self.recipient, msg.as_string())
     except:
         Logger.log_to_file(msg.as_string())
Esempio n. 4
0
 def do_work(symbol, day, csv):
     global day_counter
     star_time = time.time()
     Logger.info("Fetching day {0}".format(day))
     try:
         csv.append(day, decompress(symbol, day, fetch_day(symbol, day)))
     except Exception as e:
         print("ERROR for {0}, {1} Exception : {2}".format(
             day, symbol, str(e)))
     elapsed_time = time.time() - star_time
     last_fetch.append(elapsed_time)
     with lock:
         day_counter += 1
     Logger.info("Day {0} fetched in {1}s".format(day, elapsed_time))
Esempio n. 5
0
 def delete_one(self, entity, _id):
     """
     Deletes a single entity from the datastore based on the id given
     
     entity should be a string
     _id should be the string entity id
     
     e.g.
     todoId = '5047b7bb37d5e64e9a4b1c74'
     EntityManager(_DBCON).delete_one('Todo', todoId)
     """
     command = 'self.db.%s.remove({"_id":ObjectId("%s")})' % (entity, str(_id))
     if DBDEBUG: Logger.log_to_file(command)
     eval(command)
     
Esempio n. 6
0
 def _get(self, _id):
     command = '%s.find_one({"_id":ObjectId("%s")})' % (self._mongocollection, str(_id))
     if DBDEBUG: Logger.log_to_file(command)
     entity = eval(command)
     
     if entity:
         setattr(self, '_id', entity.get('_id'))
         for f, val in self._fields:
             fieldtype = type(getattr(self, f))
             fieldvalue = entity.get(f)
             
             if fieldtype == list:
                 fieldlist = []
                 for el in fieldvalue:
                     if type(el)==dict and el.has_key('__instanceOf__'):
                         command = '%s(self.db, ObjectId("%s"))' % (self._unicode_to_class(el['__instanceOf__'])
                                                                     , el['_id'])
                         if DBDEBUG: Logger.log_to_file(command)
                         el = eval(command)
                     
                     fieldlist.append(el)
                 fieldvalue = fieldlist
             elif type(fieldvalue)==dict and fieldvalue.has_key('__instanceOf__'):  
                 command = '%s(self.db, ObjectId("%s"))' % (self._unicode_to_class(fieldvalue['__instanceOf__'])
                                                             , fieldvalue['_id'])
                 if DBDEBUG: Logger.log_to_file(command)       
                 fieldvalue = eval(command)
             
             setattr(self, f, fieldvalue)
Esempio n. 7
0
def clustering(dataset_folder,
               dataset,
               window_size=10,
               hash_module=1024,
               threshold=26,
               input_limit=None):
    loader = Loader()
    pages = loader.load_pages(dataset_folder, dataset)
    pages = pages[:input_limit]

    logger = Logger.get_instance()
    logger.print('############### INIZIO PASSO 1 ####################', 1)
    hash_table = {}
    algoritmo = Algoritmo()
    hash_table = algoritmo.passo1(pages, window_size, hash_module)

    logger.print('############### FINE PASSO 1 ####################', 1)
    logger.print(hash_table, 3)

    logger.print('############### INIZIO PASSO 2 ####################', 1)
    hash_table = algoritmo.passo2(hash_table, threshold)

    ## TODO: testing passo1
    ## TODO: testing passo2
    ## TODO: testing passo3
    ## TODO: da rivedere bene come fare gli hash che per ora sono fortemente dipendenti dal modulo che scegliamo, anche alla luce dei risultati che raggiungiamo

    logger.print('############### FINE PASSO 2 ####################', 1)
    logger.print(hash_table, 3)

    logger.print('############### INIZIO PASSO 3 ####################', 1)
    cluster = {}
    cluster = algoritmo.passo3(hash_table, pages, hash_module, window_size)

    logger.print('################ FINE PASSO 3 ####################', 1)
    logger.print('Numero cluster ' + str(len(cluster)), 2)
    logger.print('\nClusters: \n', 2)
    logger.print(cluster, 2)

    file = open("prediction.csv", "w")
    index_cluster = 0
    for key in cluster:
        logger.print("\ncluster\n", 3)
        for page in cluster[key]:
            file.write(page.name + ", " + str(index_cluster) + "\n")
        index_cluster += 1

    file.close()
    def passo1(self, pages, window_size=10, hash_module=256):
        logger = Logger.get_instance()
        hash_table = {}
        for page in pages:
            logger.print("Processing page: " + page.name, 2)
            shingle_set = extract_shingle_set(page, window_size)
            shingle_vector = create_shingle_vector(shingle_set, hash_module)
            masked_shingle_vectors = k_shingle_cover(shingle_vector, 6)
            for masked_shingle_vector in masked_shingle_vectors:
                #Ecco la bruttura
                if (masked_shingle_vector.getContent() in hash_table):
                    hash_table[masked_shingle_vector.getContent(
                    )] = hash_table.get(masked_shingle_vector.getContent()) + 1
                else:
                    hash_table[masked_shingle_vector.getContent()] = 1

        return hash_table
Esempio n. 9
0
def main(config):

	# environments
	make_dirs(config.save_path)
	make_dirs(os.path.join(config.save_path, 'logs/'))
	make_dirs(os.path.join(config.save_path, 'model/'))
	make_dirs(os.path.join(config.save_path, 'features/'))
	make_dirs(os.path.join(config.save_path, 'results/'))
	make_dirs(os.path.join(config.save_path, 'images/'))
	os.environ["CUDA_VISIBLE_DEVICES"] = '0,1'


	# loaders
    # data_aug = transforms.Compose([
    #
    # ])
	# transform_train = transforms.Compose(
    #     [XRayCenterCrop(),
    #      XRayResizer(config.image_size),
	# 	 ToPILImage(),
	# 	 histeq(),
	# 	 transforms.Grayscale(num_output_channels=3),
	# 	 transforms.ToTensor()])

	transform = torchvision.transforms.Compose([XRayCenterCrop(),
												XRayResizer(224),
												ToPILImage(),
												t.Grayscale(num_output_channels=3)
												])

	aug = torchvision.transforms.RandomApply([t.ColorJitter(brightness=0.5, contrast=0.7),
											  t.RandomRotation(120),
											  t.RandomResizedCrop(224, scale=(0.6, 1.0), ratio=(0.75, 1.33),
																  interpolation=2),
											  t.RandomHorizontalFlip(),
											  t.RandomVerticalFlip(),
											  ], p=0.5)
	aug = t.Compose([aug, t.ToTensor()])

	loader= dataset_loader(config, transform, aug)

	# base
	Base = base(config, loader)


	# logger
	logger = Logger(os.path.join(os.path.join(config.save_path, 'logs/'), 'logging.txt'))
	logger(config)


	if config.mode == 'train':

		# automatically resume model from the latest one
		start_train_epoch = 0
		if True:
			root, _, files = os_walk(Base.save_model_path)
			if len(files) > 0:
				# get indexes of saved models
				indexes = []
				for file in files:
					indexes.append(int(file.replace('.pkl', '').split('_')[-1]))

				# remove the bad-case and get available indexes
				model_num = len(Base.model_list)
				available_indexes = copy.deepcopy(indexes)
				for element in indexes:
					if indexes.count(element) < model_num:
						available_indexes.remove(element)

				available_indexes = sorted(list(set(available_indexes)), reverse=True)
				unavailable_indexes = list(set(indexes).difference(set(available_indexes)))

				if len(available_indexes) > 0:  # resume model from the latest model
					Base.resume_model(available_indexes[0])
					start_train_epoch = available_indexes[0]
					logger('Time: {}, automatically resume training from the latest step (model {})'.
						   format(time_now(), available_indexes[0]))
				else:  #
					logger('Time: {}, there are no available models')

		# train loop
		for current_step in range(start_train_epoch, config.joint_training_steps):

			# save model every step. extra models will be automatically deleted for saving storage
			Base.save_model(current_step)

			# evaluate reid
			# if (current_step+1)%10 ==0:
			# 	logger('**********' * 10 + 'evaluate' + '**********' * 10)
			# 	results = test(config, base, loader_target, True)
			# 	for key in list(results.keys()):
			# 		logger('Time: {}, {}, {}'.format(time_now(), key, results[key]))
			# 	logger('')
			logger('**********'*10 + 'train' + '**********'*10 )
			train_titles, train_values, val_titles, val_values = train_a_ep(config, Base, loader, current_step)
			logger('Time: {};  Step: {};  {}'.format(time_now(), current_step, analyze_names_and_meter(train_titles, train_values)))
			logger('Time: {};  Step: {};  {}'.format(time_now(), current_step, analyze_names_and_meter(val_titles, val_values)))
			logger('')
    os.makedirs(save_path)
    os.makedirs(save_path + '/plots')
    summary_writer = tf.summary.FileWriter(save_path,
                                           graph=tf.get_default_graph())
    with open(save_path + '/description.txt', 'w') as f:
        f.write(model.description)
    ##### test #####
    saver.save(sess, save_path + '/model.ckpt')
    ##### test #####
#%%
accum_l = 0
alpha = 0.9
sess.run(iterator_tr.initializer)
logger = Logger(cols=[
    'index', 'l', 'l_rank', 'l_att_span', 'l_att_global', 'l_att_dist', 'mF1',
    'mAP', 'lr'
],
                filename=save_path + '/log.csv',
                is_save=is_save)
eval_interval = max((n_iters // n_report), 500)
tic = time.clock()
tensors_zs = [img_ids_test, features_test, unseen_labels_test]
tensors_gzs = [img_ids_test, features_test, gzs_labels_test]
for i in range(n_iters):
    img_ids_tr_v, features_tr_v, labels_tr_v = sess.run(
        [img_ids_tr, features_tr, labels_tr])

    mask = np.sum(labels_tr_v > 0, 1) > 0
    if np.sum(mask) == 0:
        continue
    feed_dict = {
        model.features: features_tr_v[mask],
Esempio n. 11
0
from flask_oidc import OpenIDConnect
# Flask Login Path
from flask_bcrypt import Bcrypt
from flask_login import current_user, login_user, logout_user, LoginManager

# Apoplication imports
from core.utils import Logger

app = Flask(__name__)
app.config.from_object("core.config.ProductionConfig")
# app.config.from_object("core.config.DevelopmentConfig")

oidc = OpenIDConnect(app)
login_manager = LoginManager(app)
bcrypt = Bcrypt(app)
logger = Logger().get_logger()


def oidc_loggedin():
    return oidc.user_loggedin


def oidc_isAdmin():
    if oidc_loggedin():
        isAdmin = oidc.user_getfield("isAdmin")
        if isAdmin == "yes":
            return True
    return False


app.jinja_env.globals['oidc_loggedin'] = oidc_loggedin
Esempio n. 12
0
from evaluation_metrics import precision_recall

import vertex

if len(sys.argv) < 3:
    print(
        "usage: main.py <datasets-folder> <dataset> <verbosity> <input-size>")
    exit()

dataset_folder = sys.argv[1]
dataset = sys.argv[2]

verbosity = 0
if len(sys.argv) > 3:
    verbosity = int(sys.argv[3])

if len(sys.argv) > 4:
    input_size = int(sys.argv[4])
else:
    input_size = None

#singleton initialization
logger = Logger(verbosity)

vertex.clustering(dataset_folder, dataset, 10, 1024, 26, input_size)

ground_truth_path = os.path.join(dataset_folder, dataset + "_ground_truth.csv")
precision, recall, f1 = precision_recall("prediction.csv", ground_truth_path)
print("Precision: ", precision)
print("Recall: ", recall)
print("F1: ", f1)
Esempio n. 13
0
def main(config):

    # environments
    make_dirs(config.save_path)
    make_dirs(os.path.join(config.save_path, 'logs/'))
    make_dirs(os.path.join(config.save_path, 'model/'))
    make_dirs(os.path.join(config.save_path, 'features/'))
    make_dirs(os.path.join(config.save_path, 'results/'))
    make_dirs(os.path.join(config.save_path, 'images/'))
    os.environ["CUDA_VISIBLE_DEVICES"] = '0,1'

    # loaders
    transform = torchvision.transforms.Compose([
        t.ToPILImage(),
    ])
    aug = torchvision.transforms.RandomApply([
        t.ColorJitter(brightness=0.5, contrast=0.7),
        t.RandomRotation(120),
        t.RandomResizedCrop(
            224, scale=(0.6, 1.0), ratio=(0.75, 1.33), interpolation=2),
        t.RandomHorizontalFlip(),
        t.RandomVerticalFlip(),
    ],
                                             p=0.5)
    # random_hist = t.RandomApply([histeq()], p=0.5)
    aug = t.Compose([
        aug,
        t.ToTensor(),
        t.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
    ])
    loader = stain_dataloader(config, transform, aug)

    # base
    Base = base(config, loader)

    # logger
    logger = Logger(
        os.path.join(os.path.join(config.save_path, 'logs/'), 'logging.txt'))
    csv_log = open(os.path.join(config.save_path, 'logs/csv_log.csv'), 'w')
    logger(config)

    if config.mode == 'train':

        # automatically resume model from the latest one
        start_train_epoch = 0
        if True:
            root, _, files = os_walk(Base.save_model_path)
            if len(files) > 0:
                # get indexes of saved models
                indexes = []
                for file in files:
                    indexes.append(int(
                        file.replace('.pkl', '').split('_')[-1]))

                # remove the bad-case and get available indexes
                model_num = len(Base.model_list)
                available_indexes = copy.deepcopy(indexes)
                for element in indexes:
                    if indexes.count(element) < model_num:
                        available_indexes.remove(element)

                available_indexes = sorted(list(set(available_indexes)),
                                           reverse=True)
                unavailable_indexes = list(
                    set(indexes).difference(set(available_indexes)))

                if len(available_indexes
                       ) > 0:  # resume model from the latest model
                    Base.resume_model(available_indexes[0])
                    start_train_epoch = available_indexes[0]
                    logger(
                        'Time: {}, automatically resume training from the latest step (model {})'
                        .format(time_now(), available_indexes[0]))
                else:  #
                    logger('Time: {}, there are no available models')

        # train loop
        for current_step in range(start_train_epoch,
                                  config.joint_training_steps):

            # save model every step. extra models will be automatically deleted for saving storage
            Base.save_model(current_step)

            logger('**********' * 10 + 'train' + '**********' * 10)
            train_titles, train_values, val_titles, val_values, test_titles, test_values, metric_bag = train_a_ep(
                config, Base, loader, current_step)
            logger('Time: {};  Step: {};  {}'.format(
                time_now(), current_step,
                analyze_names_and_meter(train_titles, train_values)))
            logger('Time: {};  Step: {};  {}'.format(
                time_now(), current_step,
                analyze_names_and_meter(val_titles, val_values)))
            for i, _ in enumerate(metric_bag):
                metric_bag[i] = round(metric_bag[i], 3)
            logger(
                'Time: {};  Step: {};  AP:{}; AuC:{}, Precision:{}, Recall:{}, Sensitivity:{}, Specificity:{}, f1:{}'
                .format(time_now(), current_step, metric_bag[0], metric_bag[1],
                        metric_bag[2], metric_bag[3], metric_bag[4],
                        metric_bag[5], metric_bag[6]), '.3f')
            logger('')
            list_all = analyze_meter_4_csv(val_titles, val_values) + metric_bag
            csv_writer = csv.writer(csv_log)
            csv_writer.writerow(list_all)
            # csv_log.close()
            if (current_step + 1) % 10 == 0:
                logger('**********' * 10 + 'test' + '**********' * 10)
                test_titles, test_values, metric_bag = test_a_ep(
                    config, Base, loader, current_step)
                logger('Time: {};  Step: {};  {}'.format(
                    time_now(), current_step,
                    analyze_names_and_meter(test_titles, test_values)))
                logger('')
Esempio n. 14
0
    def get_all(self, entity, filter_criteria='', sort_by=[], skip=None, limit=None, count=False):
        """
        Get all or a selection of entities from the datastore. This returns
        a list of entities.
        
        Entity should be class object
        
        filter_criteria can be used to filter the results and should be
        a dictionary that adheres to the pymongo documentation
        http://api.mongodb.org/python/current/genindex.html
        {'name':'jim'}
        
        sort_by should be a list of tuples (attribute, direction)
        [
            ('name',1),
            ('age',1),
        ]
        
        skip and limit are both ints and are used for pagination

        count should be True if only a count of the results is required

        e.g.
        todos = EntityManager(_DBCON).get_all(Todo
                                            ,filter_criteria={'uid':self.session['uid']}
                                            ,sort_by=[('added', 1)]
                                            ,skip=20
                                            ,limit=10
                                            )
        """
        extraCriteria = ''

        if len(sort_by)>0:
            extraCriteria += '.sort(%s)' % str(sort_by)

        if skip:
            extraCriteria += '.skip(%s)' % str(skip)

        if limit:
            extraCriteria += '.limit(%s)' % str(limit)

        if count:
            extraCriteria += '.count()'
            
        command = 'self.db.%s.find(%s)%s' % (entity.__name__
                                                ,str(filter_criteria)
                                                , extraCriteria
                                            )

        if DBDEBUG: Logger.log_to_file(command)

        if count:
            return eval(command)
        else:
            entities = []
            for result in eval(command):
                e = entity(self.db)
                setattr(e, '_id', result.get('_id'))
                for f, val in e.fields:
                    setattr(e, f, result.get(f))
                entities.append(e)
            
            return entities
Esempio n. 15
0
def main(config):

    # environments
    make_dirs(config.save_path)
    make_dirs(os.path.join(config.save_path, 'logs/'))
    make_dirs(os.path.join(config.save_path, 'model/'))
    make_dirs(os.path.join(config.save_path, 'dataset/'))
    os.environ["CUDA_VISIBLE_DEVICES"] = '0,1'

    # loaders
    transform = torchvision.transforms.Compose([
        XRayResizer(config.image_size),
        CLAHE(clip_limit=4.0, tile_grid_size=(4, 4)),
        t.ToPILImage(),
    ])

    aug = torchvision.transforms.RandomApply([
        t.ColorJitter(brightness=0.5, contrast=0.7),
        t.RandomRotation(120),
        t.RandomResizedCrop(config.image_size,
                            scale=(0.6, 1.0),
                            ratio=(0.75, 1.33),
                            interpolation=2),
        t.RandomHorizontalFlip(),
        t.RandomVerticalFlip(),
    ],
                                             p=0.5)
    aug = t.Compose([
        aug,
        ZscoreNormalize(),
        t.ToTensor(),
    ])

    loader = dataset_loader(config, transform, aug)
    # base
    Base = base(config, loader)
    # logger
    logger = Logger(
        os.path.join(os.path.join(config.save_path, 'logs/'), 'logging.txt'))
    logger(config)

    # automatically resume model from the latest one
    start_epoch = 0
    pathologies = loader.train_set.dataset.pathologies
    count_train = count_instance_num(loader.train_set)
    count_val = count_instance_num(loader.val_set)
    logger(('all_train', len(loader.train_set) + len(loader.val_set), 'class:',
            pathologies, '  num:', count_train + count_val))
    logger(('train:', len(loader.train_set), 'class:', pathologies, '  num:',
            count_train))
    logger(('validation:', len(loader.val_set), 'class:', pathologies,
            '  num:', count_val))
    logger(pathologies)

    root, _, files = os_walk(Base.save_model_path)
    if len(files) > 0:
        # get indexes of saved models
        indexes = []
        for file in files:
            indexes.append(int(file.replace('.pkl', '').split('_')[-1]))

        # remove the bad-case and get available indexes
        model_num = len(Base.model_list)
        available_indexes = copy.deepcopy(indexes)
        for element in indexes:
            if indexes.count(element) < model_num:
                available_indexes.remove(element)

        available_indexes = sorted(list(set(available_indexes)), reverse=True)
        unavailable_indexes = list(
            set(indexes).difference(set(available_indexes)))

        if len(
                available_indexes
        ) > 0 and config.mode != '5fold':  # resume model from the latest model
            Base.resume_model(available_indexes[0])
            start_epoch = available_indexes[0]
            logger(
                'Time: {}, automatically resume training from the latest step (model {})'
                .format(time_now(), available_indexes[0]))
            logger('Time: {},read train indices from /dataset'.format(
                time_now()))
            logger('Time: {},read train indices from /dataset'.format(
                time_now()))
            loader.train_set.indices = np.load(
                os.path.join(config.save_path, 'dataset', 'train.npy'))
            loader.train_set.dataset.idxs = np.load(
                os.path.join(config.save_path, 'dataset', 'train_idx.npy'))
            loader.train_set.dataset.labels = np.load(
                os.path.join(config.save_path, 'dataset', 'train_labels.npy'))

            loader.val_set.indices = np.load(
                os.path.join(config.save_path, 'dataset', 'test.npy'), )
            loader.val_set.dataset.idxs = np.load(
                os.path.join(config.save_path, 'dataset', 'test_idx.npy'))
            loader.val_set.dataset.labels = np.load(
                os.path.join(config.save_path, 'dataset', 'test_labels.npy'))

            count_train = count_instance_num(loader.train_set)
            count_val = count_instance_num(loader.val_set)
            logger(('all: num:', count_train + count_val))
            logger(('train: num:', count_train))
            logger(('test: num:', count_val))
    else:
        logger('Time: {}, there are no available models'.format(time_now()))
        logger('Time: {},write train indices in /dataset/train.npy'.format(
            time_now()))
        logger('Time: {},write train indices in /dataset/train_idx.npy'.format(
            time_now()))
        logger(
            'Time: {},write train indices in /dataset/train_labels.npy'.format(
                time_now()))
        logger('Time: {},write test indices in /dataset/test.npy'.format(
            time_now()))
        logger('Time: {},write test indices in /dataset/test_idx.npy'.format(
            time_now()))
        logger(
            'Time: {},write test indices in /dataset/test_labels.npy'.format(
                time_now()))

        np.save(os.path.join(config.save_path, 'dataset', 'train.npy'),
                np.array(loader.train_set.indices))
        np.save(os.path.join(config.save_path, 'dataset', 'train_idx.npy'),
                np.array(loader.train_set.dataset.idxs))
        np.save(os.path.join(config.save_path, 'dataset', 'train_labels.npy'),
                loader.train_set.dataset.labels)
        np.save(os.path.join(config.save_path, 'dataset', 'test.npy'),
                np.array(loader.val_set.indices))
        np.save(os.path.join(config.save_path, 'dataset', 'test_idx.npy'),
                np.array(loader.val_set.dataset.idxs))
        np.save(os.path.join(config.save_path, 'dataset', 'test_labels.npy'),
                loader.val_set.dataset.labels)

    if config.mode == 'train':
        # get all the id in dataset
        dataset_to_split = [i for i, _ in enumerate(loader.all_set)]
        # random split them to 5 folds
        train_ids_by_fold = []
        test_ids_by_fold = []
        test_cache = []
        for data_id in range(5):
            train_cache = list(set(dataset_to_split) - set(test_cache))
            test_part = random.sample(train_cache,
                                      int(len(dataset_to_split) / 5))
            test_cache = test_cache + test_part
            train_part = list(set(dataset_to_split) - set(test_part))
            train_ids_by_fold.append(train_part)
            test_ids_by_fold.append(test_part)

        for fold_id in range(5):
            # re-initialize after final test
            start_epoch = 0
            Base = base(config, loader)
            loader.train_set.indices = train_ids_by_fold[fold_id]
            loader.val_set.indices = test_ids_by_fold[fold_id]

            logger('**********' * 3 + '5fold_train_fold_' + str(fold_id) +
                   '**********' * 3)
            for current_step in range(start_epoch,
                                      config.joint_training_steps):
                # save model every step. extra models will be automatically deleted for saving storage
                Base.save_model(current_step)
                logger('**********' * 3 + 'train' + '**********' * 3)
                train_titles, train_values = train_a_ep(
                    config, Base, loader, current_step)
                logger('Time: {};  Step: {};  {}'.format(
                    time_now(), current_step,
                    analyze_names_and_meter(train_titles, train_values)))
                logger('')
                if (current_step) % 3 == 0:
                    logger('**********' * 3 + 'test' + '**********' * 3)
                    test_titles, test_values, confusion_matrix, metric_values = test_a_ep(
                        config, Base, loader, current_step)
                    logger('Time: {};  Step: {};  {}'.format(
                        time_now(), current_step,
                        analyze_names_and_meter(test_titles, test_values)))
                    logger(
                        'Time: {};  Step: {}; acc:{}; Precision:{}, Recall:{}, f1:{},Specificity:{}, FPR:{}'
                        .format(time_now(), current_step, metric_values[0],
                                metric_values[1], metric_values[2],
                                metric_values[3], metric_values[4],
                                metric_values[5]), '.3f')
                    logger(confusion_matrix)
                    logger('')

    elif config.mode == 'test':
        logger('**********' * 3 + 'test' + '**********' * 3)
        test_titles, test_values, confusion_matrix, metric_values = test_a_ep(
            config, Base, loader, start_epoch)
        logger('Time: {};  Step: {};  {}'.format(
            time_now(), start_epoch,
            analyze_names_and_meter(test_titles, test_values)))

        logger(
            'Time: {};  Step: {}; acc:{}; Precision:{}, Recall:{}, f1:{}, Specificity:{}, FPR:{}'
            .format(time_now(), start_epoch, metric_values[0],
                    metric_values[1], metric_values[2], metric_values[3],
                    metric_values[4], metric_values[5]), '.3f')
        logger(confusion_matrix)
        logger('')

    elif config.mode == 'localize':
        logger('**********' * 3 + 'localize' + '**********' * 3)
        masks = [
            os.path.join("./datasets/Localize2/Masks", i)
            for i in os.listdir("./datasets/Localize2/Masks")
        ]
        masks.sort()

        test_titles, test_values, = localize_penumonia(config, Base, loader,
                                                       start_epoch)
        logger('Time: {};  Step: {};  {}'.format(
            time_now(), start_epoch,
            analyze_names_and_meter(test_titles, test_values)))
        # logger(confusion_matrix)
        logger('')