def save(self): if self._id: command = '%s.save(%s)' % (self._mongocollection, str(self._get_hash())) if DBDEBUG: Logger.log_to_file(command) eval(command) else: command = '%s.insert(%s)' % (self._mongocollection, str(self._get_hash())) if DBDEBUG: Logger.log_to_file(command) self._id = eval(command)
def app(symbols, start, end, threads, timeframe, folder, header): if start > end: return lock = threading.Lock() global day_counter total_days = how_many_days(start, end) if total_days == 0: return last_fetch = deque([], maxlen=5) update_progress(day_counter, total_days, -1, threads) def do_work(symbol, day, csv): global day_counter star_time = time.time() Logger.info("Fetching day {0}".format(day)) try: csv.append(day, decompress(symbol, day, fetch_day(symbol, day))) except Exception as e: print("ERROR for {0}, {1} Exception : {2}".format( day, symbol, str(e))) elapsed_time = time.time() - star_time last_fetch.append(elapsed_time) with lock: day_counter += 1 Logger.info("Day {0} fetched in {1}s".format(day, elapsed_time)) futures = [] with concurrent.futures.ThreadPoolExecutor( max_workers=threads) as executor: files = { symbol: CSVDumper(symbol, timeframe, start, end, folder, header) for symbol in symbols } for symbol in symbols: for day in days(start, end): futures.append( executor.submit(do_work, symbol, day, files[symbol])) for future in concurrent.futures.as_completed(futures): if future.exception() is None: update_progress(day_counter, total_days, avg(last_fetch), threads) else: Logger.error("An error happen when fetching data : ", future.exception()) Logger.info("Fetching data terminated") for file in files.values(): file.dump() update_progress(day_counter, total_days, avg(last_fetch), threads)
def send(self, subject, body): msg = MIMEText(body, 'html') msg['Subject'] = subject msg['From'] = self.sender msg['To'] = self.recipient try: s = smtplib.SMTP(self.host) if settings.EMAILUSERNAME and settings.EMAILPASSWORD: s.login(settings.EMAILUSERNAME, settings.EMAILPASSWORD) s.sendmail(self.sender, self.recipient, msg.as_string()) except: Logger.log_to_file(msg.as_string())
def do_work(symbol, day, csv): global day_counter star_time = time.time() Logger.info("Fetching day {0}".format(day)) try: csv.append(day, decompress(symbol, day, fetch_day(symbol, day))) except Exception as e: print("ERROR for {0}, {1} Exception : {2}".format( day, symbol, str(e))) elapsed_time = time.time() - star_time last_fetch.append(elapsed_time) with lock: day_counter += 1 Logger.info("Day {0} fetched in {1}s".format(day, elapsed_time))
def delete_one(self, entity, _id): """ Deletes a single entity from the datastore based on the id given entity should be a string _id should be the string entity id e.g. todoId = '5047b7bb37d5e64e9a4b1c74' EntityManager(_DBCON).delete_one('Todo', todoId) """ command = 'self.db.%s.remove({"_id":ObjectId("%s")})' % (entity, str(_id)) if DBDEBUG: Logger.log_to_file(command) eval(command)
def _get(self, _id): command = '%s.find_one({"_id":ObjectId("%s")})' % (self._mongocollection, str(_id)) if DBDEBUG: Logger.log_to_file(command) entity = eval(command) if entity: setattr(self, '_id', entity.get('_id')) for f, val in self._fields: fieldtype = type(getattr(self, f)) fieldvalue = entity.get(f) if fieldtype == list: fieldlist = [] for el in fieldvalue: if type(el)==dict and el.has_key('__instanceOf__'): command = '%s(self.db, ObjectId("%s"))' % (self._unicode_to_class(el['__instanceOf__']) , el['_id']) if DBDEBUG: Logger.log_to_file(command) el = eval(command) fieldlist.append(el) fieldvalue = fieldlist elif type(fieldvalue)==dict and fieldvalue.has_key('__instanceOf__'): command = '%s(self.db, ObjectId("%s"))' % (self._unicode_to_class(fieldvalue['__instanceOf__']) , fieldvalue['_id']) if DBDEBUG: Logger.log_to_file(command) fieldvalue = eval(command) setattr(self, f, fieldvalue)
def clustering(dataset_folder, dataset, window_size=10, hash_module=1024, threshold=26, input_limit=None): loader = Loader() pages = loader.load_pages(dataset_folder, dataset) pages = pages[:input_limit] logger = Logger.get_instance() logger.print('############### INIZIO PASSO 1 ####################', 1) hash_table = {} algoritmo = Algoritmo() hash_table = algoritmo.passo1(pages, window_size, hash_module) logger.print('############### FINE PASSO 1 ####################', 1) logger.print(hash_table, 3) logger.print('############### INIZIO PASSO 2 ####################', 1) hash_table = algoritmo.passo2(hash_table, threshold) ## TODO: testing passo1 ## TODO: testing passo2 ## TODO: testing passo3 ## TODO: da rivedere bene come fare gli hash che per ora sono fortemente dipendenti dal modulo che scegliamo, anche alla luce dei risultati che raggiungiamo logger.print('############### FINE PASSO 2 ####################', 1) logger.print(hash_table, 3) logger.print('############### INIZIO PASSO 3 ####################', 1) cluster = {} cluster = algoritmo.passo3(hash_table, pages, hash_module, window_size) logger.print('################ FINE PASSO 3 ####################', 1) logger.print('Numero cluster ' + str(len(cluster)), 2) logger.print('\nClusters: \n', 2) logger.print(cluster, 2) file = open("prediction.csv", "w") index_cluster = 0 for key in cluster: logger.print("\ncluster\n", 3) for page in cluster[key]: file.write(page.name + ", " + str(index_cluster) + "\n") index_cluster += 1 file.close()
def passo1(self, pages, window_size=10, hash_module=256): logger = Logger.get_instance() hash_table = {} for page in pages: logger.print("Processing page: " + page.name, 2) shingle_set = extract_shingle_set(page, window_size) shingle_vector = create_shingle_vector(shingle_set, hash_module) masked_shingle_vectors = k_shingle_cover(shingle_vector, 6) for masked_shingle_vector in masked_shingle_vectors: #Ecco la bruttura if (masked_shingle_vector.getContent() in hash_table): hash_table[masked_shingle_vector.getContent( )] = hash_table.get(masked_shingle_vector.getContent()) + 1 else: hash_table[masked_shingle_vector.getContent()] = 1 return hash_table
def main(config): # environments make_dirs(config.save_path) make_dirs(os.path.join(config.save_path, 'logs/')) make_dirs(os.path.join(config.save_path, 'model/')) make_dirs(os.path.join(config.save_path, 'features/')) make_dirs(os.path.join(config.save_path, 'results/')) make_dirs(os.path.join(config.save_path, 'images/')) os.environ["CUDA_VISIBLE_DEVICES"] = '0,1' # loaders # data_aug = transforms.Compose([ # # ]) # transform_train = transforms.Compose( # [XRayCenterCrop(), # XRayResizer(config.image_size), # ToPILImage(), # histeq(), # transforms.Grayscale(num_output_channels=3), # transforms.ToTensor()]) transform = torchvision.transforms.Compose([XRayCenterCrop(), XRayResizer(224), ToPILImage(), t.Grayscale(num_output_channels=3) ]) aug = torchvision.transforms.RandomApply([t.ColorJitter(brightness=0.5, contrast=0.7), t.RandomRotation(120), t.RandomResizedCrop(224, scale=(0.6, 1.0), ratio=(0.75, 1.33), interpolation=2), t.RandomHorizontalFlip(), t.RandomVerticalFlip(), ], p=0.5) aug = t.Compose([aug, t.ToTensor()]) loader= dataset_loader(config, transform, aug) # base Base = base(config, loader) # logger logger = Logger(os.path.join(os.path.join(config.save_path, 'logs/'), 'logging.txt')) logger(config) if config.mode == 'train': # automatically resume model from the latest one start_train_epoch = 0 if True: root, _, files = os_walk(Base.save_model_path) if len(files) > 0: # get indexes of saved models indexes = [] for file in files: indexes.append(int(file.replace('.pkl', '').split('_')[-1])) # remove the bad-case and get available indexes model_num = len(Base.model_list) available_indexes = copy.deepcopy(indexes) for element in indexes: if indexes.count(element) < model_num: available_indexes.remove(element) available_indexes = sorted(list(set(available_indexes)), reverse=True) unavailable_indexes = list(set(indexes).difference(set(available_indexes))) if len(available_indexes) > 0: # resume model from the latest model Base.resume_model(available_indexes[0]) start_train_epoch = available_indexes[0] logger('Time: {}, automatically resume training from the latest step (model {})'. format(time_now(), available_indexes[0])) else: # logger('Time: {}, there are no available models') # train loop for current_step in range(start_train_epoch, config.joint_training_steps): # save model every step. extra models will be automatically deleted for saving storage Base.save_model(current_step) # evaluate reid # if (current_step+1)%10 ==0: # logger('**********' * 10 + 'evaluate' + '**********' * 10) # results = test(config, base, loader_target, True) # for key in list(results.keys()): # logger('Time: {}, {}, {}'.format(time_now(), key, results[key])) # logger('') logger('**********'*10 + 'train' + '**********'*10 ) train_titles, train_values, val_titles, val_values = train_a_ep(config, Base, loader, current_step) logger('Time: {}; Step: {}; {}'.format(time_now(), current_step, analyze_names_and_meter(train_titles, train_values))) logger('Time: {}; Step: {}; {}'.format(time_now(), current_step, analyze_names_and_meter(val_titles, val_values))) logger('')
os.makedirs(save_path) os.makedirs(save_path + '/plots') summary_writer = tf.summary.FileWriter(save_path, graph=tf.get_default_graph()) with open(save_path + '/description.txt', 'w') as f: f.write(model.description) ##### test ##### saver.save(sess, save_path + '/model.ckpt') ##### test ##### #%% accum_l = 0 alpha = 0.9 sess.run(iterator_tr.initializer) logger = Logger(cols=[ 'index', 'l', 'l_rank', 'l_att_span', 'l_att_global', 'l_att_dist', 'mF1', 'mAP', 'lr' ], filename=save_path + '/log.csv', is_save=is_save) eval_interval = max((n_iters // n_report), 500) tic = time.clock() tensors_zs = [img_ids_test, features_test, unseen_labels_test] tensors_gzs = [img_ids_test, features_test, gzs_labels_test] for i in range(n_iters): img_ids_tr_v, features_tr_v, labels_tr_v = sess.run( [img_ids_tr, features_tr, labels_tr]) mask = np.sum(labels_tr_v > 0, 1) > 0 if np.sum(mask) == 0: continue feed_dict = { model.features: features_tr_v[mask],
from flask_oidc import OpenIDConnect # Flask Login Path from flask_bcrypt import Bcrypt from flask_login import current_user, login_user, logout_user, LoginManager # Apoplication imports from core.utils import Logger app = Flask(__name__) app.config.from_object("core.config.ProductionConfig") # app.config.from_object("core.config.DevelopmentConfig") oidc = OpenIDConnect(app) login_manager = LoginManager(app) bcrypt = Bcrypt(app) logger = Logger().get_logger() def oidc_loggedin(): return oidc.user_loggedin def oidc_isAdmin(): if oidc_loggedin(): isAdmin = oidc.user_getfield("isAdmin") if isAdmin == "yes": return True return False app.jinja_env.globals['oidc_loggedin'] = oidc_loggedin
from evaluation_metrics import precision_recall import vertex if len(sys.argv) < 3: print( "usage: main.py <datasets-folder> <dataset> <verbosity> <input-size>") exit() dataset_folder = sys.argv[1] dataset = sys.argv[2] verbosity = 0 if len(sys.argv) > 3: verbosity = int(sys.argv[3]) if len(sys.argv) > 4: input_size = int(sys.argv[4]) else: input_size = None #singleton initialization logger = Logger(verbosity) vertex.clustering(dataset_folder, dataset, 10, 1024, 26, input_size) ground_truth_path = os.path.join(dataset_folder, dataset + "_ground_truth.csv") precision, recall, f1 = precision_recall("prediction.csv", ground_truth_path) print("Precision: ", precision) print("Recall: ", recall) print("F1: ", f1)
def main(config): # environments make_dirs(config.save_path) make_dirs(os.path.join(config.save_path, 'logs/')) make_dirs(os.path.join(config.save_path, 'model/')) make_dirs(os.path.join(config.save_path, 'features/')) make_dirs(os.path.join(config.save_path, 'results/')) make_dirs(os.path.join(config.save_path, 'images/')) os.environ["CUDA_VISIBLE_DEVICES"] = '0,1' # loaders transform = torchvision.transforms.Compose([ t.ToPILImage(), ]) aug = torchvision.transforms.RandomApply([ t.ColorJitter(brightness=0.5, contrast=0.7), t.RandomRotation(120), t.RandomResizedCrop( 224, scale=(0.6, 1.0), ratio=(0.75, 1.33), interpolation=2), t.RandomHorizontalFlip(), t.RandomVerticalFlip(), ], p=0.5) # random_hist = t.RandomApply([histeq()], p=0.5) aug = t.Compose([ aug, t.ToTensor(), t.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) ]) loader = stain_dataloader(config, transform, aug) # base Base = base(config, loader) # logger logger = Logger( os.path.join(os.path.join(config.save_path, 'logs/'), 'logging.txt')) csv_log = open(os.path.join(config.save_path, 'logs/csv_log.csv'), 'w') logger(config) if config.mode == 'train': # automatically resume model from the latest one start_train_epoch = 0 if True: root, _, files = os_walk(Base.save_model_path) if len(files) > 0: # get indexes of saved models indexes = [] for file in files: indexes.append(int( file.replace('.pkl', '').split('_')[-1])) # remove the bad-case and get available indexes model_num = len(Base.model_list) available_indexes = copy.deepcopy(indexes) for element in indexes: if indexes.count(element) < model_num: available_indexes.remove(element) available_indexes = sorted(list(set(available_indexes)), reverse=True) unavailable_indexes = list( set(indexes).difference(set(available_indexes))) if len(available_indexes ) > 0: # resume model from the latest model Base.resume_model(available_indexes[0]) start_train_epoch = available_indexes[0] logger( 'Time: {}, automatically resume training from the latest step (model {})' .format(time_now(), available_indexes[0])) else: # logger('Time: {}, there are no available models') # train loop for current_step in range(start_train_epoch, config.joint_training_steps): # save model every step. extra models will be automatically deleted for saving storage Base.save_model(current_step) logger('**********' * 10 + 'train' + '**********' * 10) train_titles, train_values, val_titles, val_values, test_titles, test_values, metric_bag = train_a_ep( config, Base, loader, current_step) logger('Time: {}; Step: {}; {}'.format( time_now(), current_step, analyze_names_and_meter(train_titles, train_values))) logger('Time: {}; Step: {}; {}'.format( time_now(), current_step, analyze_names_and_meter(val_titles, val_values))) for i, _ in enumerate(metric_bag): metric_bag[i] = round(metric_bag[i], 3) logger( 'Time: {}; Step: {}; AP:{}; AuC:{}, Precision:{}, Recall:{}, Sensitivity:{}, Specificity:{}, f1:{}' .format(time_now(), current_step, metric_bag[0], metric_bag[1], metric_bag[2], metric_bag[3], metric_bag[4], metric_bag[5], metric_bag[6]), '.3f') logger('') list_all = analyze_meter_4_csv(val_titles, val_values) + metric_bag csv_writer = csv.writer(csv_log) csv_writer.writerow(list_all) # csv_log.close() if (current_step + 1) % 10 == 0: logger('**********' * 10 + 'test' + '**********' * 10) test_titles, test_values, metric_bag = test_a_ep( config, Base, loader, current_step) logger('Time: {}; Step: {}; {}'.format( time_now(), current_step, analyze_names_and_meter(test_titles, test_values))) logger('')
def get_all(self, entity, filter_criteria='', sort_by=[], skip=None, limit=None, count=False): """ Get all or a selection of entities from the datastore. This returns a list of entities. Entity should be class object filter_criteria can be used to filter the results and should be a dictionary that adheres to the pymongo documentation http://api.mongodb.org/python/current/genindex.html {'name':'jim'} sort_by should be a list of tuples (attribute, direction) [ ('name',1), ('age',1), ] skip and limit are both ints and are used for pagination count should be True if only a count of the results is required e.g. todos = EntityManager(_DBCON).get_all(Todo ,filter_criteria={'uid':self.session['uid']} ,sort_by=[('added', 1)] ,skip=20 ,limit=10 ) """ extraCriteria = '' if len(sort_by)>0: extraCriteria += '.sort(%s)' % str(sort_by) if skip: extraCriteria += '.skip(%s)' % str(skip) if limit: extraCriteria += '.limit(%s)' % str(limit) if count: extraCriteria += '.count()' command = 'self.db.%s.find(%s)%s' % (entity.__name__ ,str(filter_criteria) , extraCriteria ) if DBDEBUG: Logger.log_to_file(command) if count: return eval(command) else: entities = [] for result in eval(command): e = entity(self.db) setattr(e, '_id', result.get('_id')) for f, val in e.fields: setattr(e, f, result.get(f)) entities.append(e) return entities
def main(config): # environments make_dirs(config.save_path) make_dirs(os.path.join(config.save_path, 'logs/')) make_dirs(os.path.join(config.save_path, 'model/')) make_dirs(os.path.join(config.save_path, 'dataset/')) os.environ["CUDA_VISIBLE_DEVICES"] = '0,1' # loaders transform = torchvision.transforms.Compose([ XRayResizer(config.image_size), CLAHE(clip_limit=4.0, tile_grid_size=(4, 4)), t.ToPILImage(), ]) aug = torchvision.transforms.RandomApply([ t.ColorJitter(brightness=0.5, contrast=0.7), t.RandomRotation(120), t.RandomResizedCrop(config.image_size, scale=(0.6, 1.0), ratio=(0.75, 1.33), interpolation=2), t.RandomHorizontalFlip(), t.RandomVerticalFlip(), ], p=0.5) aug = t.Compose([ aug, ZscoreNormalize(), t.ToTensor(), ]) loader = dataset_loader(config, transform, aug) # base Base = base(config, loader) # logger logger = Logger( os.path.join(os.path.join(config.save_path, 'logs/'), 'logging.txt')) logger(config) # automatically resume model from the latest one start_epoch = 0 pathologies = loader.train_set.dataset.pathologies count_train = count_instance_num(loader.train_set) count_val = count_instance_num(loader.val_set) logger(('all_train', len(loader.train_set) + len(loader.val_set), 'class:', pathologies, ' num:', count_train + count_val)) logger(('train:', len(loader.train_set), 'class:', pathologies, ' num:', count_train)) logger(('validation:', len(loader.val_set), 'class:', pathologies, ' num:', count_val)) logger(pathologies) root, _, files = os_walk(Base.save_model_path) if len(files) > 0: # get indexes of saved models indexes = [] for file in files: indexes.append(int(file.replace('.pkl', '').split('_')[-1])) # remove the bad-case and get available indexes model_num = len(Base.model_list) available_indexes = copy.deepcopy(indexes) for element in indexes: if indexes.count(element) < model_num: available_indexes.remove(element) available_indexes = sorted(list(set(available_indexes)), reverse=True) unavailable_indexes = list( set(indexes).difference(set(available_indexes))) if len( available_indexes ) > 0 and config.mode != '5fold': # resume model from the latest model Base.resume_model(available_indexes[0]) start_epoch = available_indexes[0] logger( 'Time: {}, automatically resume training from the latest step (model {})' .format(time_now(), available_indexes[0])) logger('Time: {},read train indices from /dataset'.format( time_now())) logger('Time: {},read train indices from /dataset'.format( time_now())) loader.train_set.indices = np.load( os.path.join(config.save_path, 'dataset', 'train.npy')) loader.train_set.dataset.idxs = np.load( os.path.join(config.save_path, 'dataset', 'train_idx.npy')) loader.train_set.dataset.labels = np.load( os.path.join(config.save_path, 'dataset', 'train_labels.npy')) loader.val_set.indices = np.load( os.path.join(config.save_path, 'dataset', 'test.npy'), ) loader.val_set.dataset.idxs = np.load( os.path.join(config.save_path, 'dataset', 'test_idx.npy')) loader.val_set.dataset.labels = np.load( os.path.join(config.save_path, 'dataset', 'test_labels.npy')) count_train = count_instance_num(loader.train_set) count_val = count_instance_num(loader.val_set) logger(('all: num:', count_train + count_val)) logger(('train: num:', count_train)) logger(('test: num:', count_val)) else: logger('Time: {}, there are no available models'.format(time_now())) logger('Time: {},write train indices in /dataset/train.npy'.format( time_now())) logger('Time: {},write train indices in /dataset/train_idx.npy'.format( time_now())) logger( 'Time: {},write train indices in /dataset/train_labels.npy'.format( time_now())) logger('Time: {},write test indices in /dataset/test.npy'.format( time_now())) logger('Time: {},write test indices in /dataset/test_idx.npy'.format( time_now())) logger( 'Time: {},write test indices in /dataset/test_labels.npy'.format( time_now())) np.save(os.path.join(config.save_path, 'dataset', 'train.npy'), np.array(loader.train_set.indices)) np.save(os.path.join(config.save_path, 'dataset', 'train_idx.npy'), np.array(loader.train_set.dataset.idxs)) np.save(os.path.join(config.save_path, 'dataset', 'train_labels.npy'), loader.train_set.dataset.labels) np.save(os.path.join(config.save_path, 'dataset', 'test.npy'), np.array(loader.val_set.indices)) np.save(os.path.join(config.save_path, 'dataset', 'test_idx.npy'), np.array(loader.val_set.dataset.idxs)) np.save(os.path.join(config.save_path, 'dataset', 'test_labels.npy'), loader.val_set.dataset.labels) if config.mode == 'train': # get all the id in dataset dataset_to_split = [i for i, _ in enumerate(loader.all_set)] # random split them to 5 folds train_ids_by_fold = [] test_ids_by_fold = [] test_cache = [] for data_id in range(5): train_cache = list(set(dataset_to_split) - set(test_cache)) test_part = random.sample(train_cache, int(len(dataset_to_split) / 5)) test_cache = test_cache + test_part train_part = list(set(dataset_to_split) - set(test_part)) train_ids_by_fold.append(train_part) test_ids_by_fold.append(test_part) for fold_id in range(5): # re-initialize after final test start_epoch = 0 Base = base(config, loader) loader.train_set.indices = train_ids_by_fold[fold_id] loader.val_set.indices = test_ids_by_fold[fold_id] logger('**********' * 3 + '5fold_train_fold_' + str(fold_id) + '**********' * 3) for current_step in range(start_epoch, config.joint_training_steps): # save model every step. extra models will be automatically deleted for saving storage Base.save_model(current_step) logger('**********' * 3 + 'train' + '**********' * 3) train_titles, train_values = train_a_ep( config, Base, loader, current_step) logger('Time: {}; Step: {}; {}'.format( time_now(), current_step, analyze_names_and_meter(train_titles, train_values))) logger('') if (current_step) % 3 == 0: logger('**********' * 3 + 'test' + '**********' * 3) test_titles, test_values, confusion_matrix, metric_values = test_a_ep( config, Base, loader, current_step) logger('Time: {}; Step: {}; {}'.format( time_now(), current_step, analyze_names_and_meter(test_titles, test_values))) logger( 'Time: {}; Step: {}; acc:{}; Precision:{}, Recall:{}, f1:{},Specificity:{}, FPR:{}' .format(time_now(), current_step, metric_values[0], metric_values[1], metric_values[2], metric_values[3], metric_values[4], metric_values[5]), '.3f') logger(confusion_matrix) logger('') elif config.mode == 'test': logger('**********' * 3 + 'test' + '**********' * 3) test_titles, test_values, confusion_matrix, metric_values = test_a_ep( config, Base, loader, start_epoch) logger('Time: {}; Step: {}; {}'.format( time_now(), start_epoch, analyze_names_and_meter(test_titles, test_values))) logger( 'Time: {}; Step: {}; acc:{}; Precision:{}, Recall:{}, f1:{}, Specificity:{}, FPR:{}' .format(time_now(), start_epoch, metric_values[0], metric_values[1], metric_values[2], metric_values[3], metric_values[4], metric_values[5]), '.3f') logger(confusion_matrix) logger('') elif config.mode == 'localize': logger('**********' * 3 + 'localize' + '**********' * 3) masks = [ os.path.join("./datasets/Localize2/Masks", i) for i in os.listdir("./datasets/Localize2/Masks") ] masks.sort() test_titles, test_values, = localize_penumonia(config, Base, loader, start_epoch) logger('Time: {}; Step: {}; {}'.format( time_now(), start_epoch, analyze_names_and_meter(test_titles, test_values))) # logger(confusion_matrix) logger('')