def rate(title, body, source): # Get score from trained classifier clf = Classifier() classifier_score = clf.predict(title, body) # Get score from article searcher related_article, search_score = searcher_score(title, body) #search_score = min(0.9, search_score * 2) if source[:7] == "http://": source = source[7:] elif source[:8] == "https://": source = source[8:] first_slash_index = source.find("/") if first_slash_index != -1: source = source[:first_slash_index] print(source) is_trusted = is_from_trusted_source(source) print("Is trusted ", is_trusted) p = 0.55 q = 0.3 if is_trusted: return 10 * (classifier_score * p + (1 - p - q) + search_score * q), related_article else: return 10 * (classifier_score * 0.7 + search_score * 0.3), related_article
def main(): classifier = Classifier("data/best_model/model.meta", "data/best_model/model") sentences = [["0", "I used to like cars."]] tokenized_sentences = tokenize_sentences(sentences) encoded_sentences, _ = encode_sentences(tokenized_sentences, 32) result = classifier.infer(encoded_sentences) print(result)
def rate(title, body): # Get score from trained classifier clf = Classifier() classifier_score = clf.predict(title, body) # Get score from article searcher search_score = searcher_score(title, body) return classifier_score * 0.8 + search_score * 0.2
def main(fmodel, fvocab, rpath, wpath): clf = Classifier() dr = DocReader() clf.loadmodel(fmodel) flist = [join(rpath,fname) for fname in listdir(rpath) if fname.endswith('conll')] vocab = load(gzip.open(fvocab)) for (fidx, fname) in enumerate(flist): print "Processing file: {}".format(fname) doc = dr.read(fname, withboundary=False) sg = SampleGenerator(vocab) sg.build(doc) M, _ = sg.getmat() predlabels = clf.predict(M) doc = postprocess(doc, predlabels) writedoc(doc, fname, wpath)
def run(args): with open(args.model_path + 'cfg.json') as f: cfg = edict(json.load(f)) device_ids = list(map(int, args.device_ids.split(','))) num_devices = torch.cuda.device_count() if num_devices < len(device_ids): raise Exception('#available gpu : {} < --device_ids : {}'.format( num_devices, len(device_ids))) device = torch.device('cuda:{}'.format(device_ids[0])) model = Classifier(cfg) model = DataParallel(model, device_ids=device_ids).to(device).eval() ckpt_path = os.path.join(args.model_path, 'best1.ckpt') ckpt = torch.load(ckpt_path, map_location=device) model.module.load_state_dict(ckpt['state_dict']) dataloader_test = DataLoader(ImageDataset(args.in_csv_path, cfg, mode='test'), batch_size=cfg.dev_batch_size, num_workers=args.num_workers, drop_last=False, shuffle=False) test_epoch(cfg, args, model, dataloader_test, args.out_csv_path) print('Save best is step :', ckpt['step'], 'AUC :', ckpt['auc_dev_best'])
def _generate_next_sw_config(self, current_vertex_distributions, current_clustering, level_counter): """Generate sw configuration for next run base on current clustering result.""" if level_counter == 1: graph_size = len(self.corpus) next_vertex_distributions = self._generate_initial_vertex_distributions( ) else: graph_size = len(current_clustering) # create new vertex distribution next_vertex_distributions = _combine_vertex_distributions_given_clustering( current_vertex_distributions, current_clustering) if level_counter == 1: config = SWConfig(graph_size, vertex_distributions=next_vertex_distributions, documents=self.corpus.documents, vocabularies=self.corpus.vocabularies, level=level_counter) elif level_counter == 2: classifier = None if self._classifier_model_file is not None: classifier = Classifier(self._classifier_model_file) config = SWConfigLevel2( graph_size, vertex_distributions=next_vertex_distributions, documents=self.corpus.documents, vocabularies=self.corpus.vocabularies, level=level_counter, classifier=classifier) config.setup() return config
def sw_Process(): [all_sentences, true_segment] = readingfiles.read_testing_file('2008080814') [ transition_prob, length_prior, seg_num_prior ] = readingfiles.load_model_parameters('preprocessing/model_segmenter.txt') classifier = Classifier('preprocessing/model_segmenter.txt') segmentation_model = SegmentationModel(all_sentences, transition_prob, length_prior, seg_num_prior, classifier) plotter = Plotter(segmentation_model, true_segment) node_number = len(all_sentences) edges = [] for i in range(0, node_number - 1): j = i + 1 edges.append([i, j]) print('Start Sampling') sw.sample(node_number, edges, segmentation_model.calculate_Qe, segmentation_model.target_evaluation_func, plotter.plot_callback, initial_clustering=None, monitor_statistics=segmentation_model.calculate_energy) print('Converged.') plotter.save()
def get_json_data(): if request.method == 'POST': data = request.get_json() ### get city, state and country name url = "https://api.ipgeolocation.io/ipgeo?apiKey=2b1ee37501e64754b85f704fab4a5b82&ip=" + data[ "ip"] resp = requests.get(url=url) info = resp.json() ### ==== step 1: using data to classify user as unique or not and get ID if it is platform = data["platformmodel"] OS = data["os"].split("|")[0] timezone = int(data["timezone"]) user_agent = data["user_agent"].split("|")[0].lower() browser = "" browser_version = "" try: browser, browser_version = [ i.lower() for i in data["browser"].split() ] except: browser = data["browser"].lower() channel, width, height = [ int(i) for i in data["resolution"].split("|") ] vendor = data["vendor"] language = data["language"].lower() ### step 2 call model to get userID obj = Classifier() boolean, id = obj.test(platform, OS, browser, timezone, width, height, channel, user_agent, vendor, language) print(boolean, id) ### data["country"] = info["country_name"] data["city"] = info["city"] data["state"] = info["state_prov"] data["time"] = datetime.datetime.now().isoformat( ) #strftime("%Y-%m-%d %H:%M:%S").isoformat() del data["ip"] print("********-------------*****") print("Data to send is ", data) print("*******--------------*****") es = Elasticsearch([{'host': 'localhost', 'port': 9200}]) res = es.index(index='my-index-000001', body=data) return '', 200
def main(ftrain, fdev=None, fmodel='model/model.pickle.gz'): # Load data print 'Loading training data ...' data = load(gzip.open(ftrain)) M, labels = data['data'], data['labels'] # Load dev data if fdev is not None: print 'Loading dev data ...' devdata = load(gzip.open(fdev)) devM, devlabels = devdata['data'], devdata['labels'] else: devM, devlabels = None, None # Training with specified parameters print 'Training ...' clf = Classifier() clf.train(M, labels, devM, devlabels) clf.savemodel(fmodel)
def train(opt): # Prepare the training corpus print(options.TrainLogPrefix + "Prepare the training corpus begin!") from datasource.input_corpus import InputCorpus input_corpus = InputCorpus(opt.corpus_root, encoding=opt.encoding) print(options.TrainLogPrefix + "Prepare the training corpus end!") # Get the basic tfidf features print(options.TrainLogPrefix + "Get the basic tfidf features begin!") from feature.ngram_tfidf import NgramTfidf ngram_tfidf = NgramTfidf(input_corpus) ngram_tfidf.set_stopwords('./resource/stop_words_zh.utf8.txt') import numpy as np tfidf_mat, features = ngram_tfidf.get_tfidf_mat(top_k=opt.tfidf_top_k) tfidf_mat = np.asarray(tfidf_mat) features = np.asarray(features) targets = np.asarray(input_corpus.get_filenames_and_targets()[1]) print(options.TrainLogPrefix + "Get the basic tfidf features end!") # Do feature selection print(options.TrainLogPrefix + "Do feature selection begin!") if opt.which_filter == 'mi': from feature.feature_selection import MISelection as FeatureSelection feature_selector = FeatureSelection(tfidf_mat, targets, mi_threshold=opt.mi_threshold) else: from feature.feature_selection import GBDTSelection as FeatureSelection feature_selector = FeatureSelection(tfidf_mat, targets) boolean_selection_index = feature_selector.get_boolean_selection_lst() filtered_tfidf_mat = tfidf_mat[:, boolean_selection_index] filtered_features = features[boolean_selection_index] print(options.TrainLogPrefix + "Do feature selection end!") # Training model print(options.TrainLogPrefix + "Training model begin!") if opt.which_classifier == 'svm': from model.classifier import SVMClassifier as Classifier else: from model.classifier import GBDTClassifier as Classifier classifier_model = Classifier() from model.classifier import Scorer scorer = Scorer(classifier_model.get_model(), filtered_tfidf_mat, targets) print(options.TrainLogPrefix + "Training model end!") scorer.show_score() # Save the model model_save_path = opt.path_to_save_model from utils import util util.mkdirs('/'.join(model_save_path.split('/')[:-1])) classifier_model.dump(filtered_tfidf_mat, targets, model_save_path) print(options.TrainLogPrefix + 'model save to ' + model_save_path) # Save the filtered features filtered_features_save_path = opt.path_to_save_model + options.FeaturesSaveSuffix df_vec = ngram_tfidf.numDocsContainingFeatures(filtered_features) save_features_df(df_vec, filtered_features, len(tfidf_mat), filtered_features_save_path)
def main(save_id, gen_p, train_p, eval_p, backbone_id, return_eval=False, use_bottleneck=True, file_id=""): print("save_id: {0}, train_p : {1}, eval_p: {2}, backbone_id: {3}, ".format(save_id, train_p, eval_p, backbone_id)) from model_def import define_model model_dict = define_model(backbone_id) num_segments = model_dict["num_segments"] bottleneck_size = model_dict["bottleneck_size"] dense_sample = model_dict["dense_sample"] dense_rate = model_dict["dense_rate"] dir_name = os.path.join("saved_models", save_id) # lfd_params if not os.path.exists(dir_name): os.makedirs(dir_name) filename = os.path.join(dir_name, "../model") lfd_params = default_model_args(save_id=save_id, log_dir=dir_name, num_segments=num_segments, bottleneck_size=bottleneck_size, dense_sample=dense_sample, dense_rate=dense_rate) # parse_model_args() if gen_p: print("Generating ITR Files") model = Classifier(lfd_params, filename, backbone_id, use_feature_extractor=True, use_spatial_lstm=False, spatial_train=False, use_bottleneck=use_bottleneck) generate_iad_files(lfd_params, model, "train", backbone=backbone_id) generate_iad_files(lfd_params, model, "evaluation", backbone=backbone_id) if train_p: model = Classifier(lfd_params, filename, backbone_id, use_feature_extractor=False, use_spatial_lstm=True, spatial_train=True, use_bottleneck=use_bottleneck) model = train(lfd_params, model, verbose=True, input_dtype="iad") model.save_model() if eval_p: model = Classifier(lfd_params, filename, backbone_id, use_feature_extractor=False, use_spatial_lstm=True, spatial_train=False, use_bottleneck=use_bottleneck) train_df = evaluate(lfd_params, model, mode="train", input_dtype="iad") train_df["mode"] = ["train"]*len(train_df) eval_df = evaluate(lfd_params, model, mode="evaluation", verbose=True, input_dtype="iad") eval_df["mode"] = ["evaluation"] * len(eval_df) df = pd.concat([train_df, eval_df]) if return_eval: return df df["repeat"] = ["1"]*len(df) out_filename = os.path.join(lfd_params.args.output_dir, "output_" + save_id + file_id+".csv") df.to_csv(out_filename) print("Output placed in: " + out_filename)
def build_model(cfg, paramsfile, device): model = Classifier(cfg) model = DataParallel(model, device_ids=device_ids).to(device).eval() ckpt = torch.load(paramsfile, map_location=device) state_dict = ckpt['state_dict'] if 'state_dict' in ckpt else ckpt model.module.load_state_dict(state_dict) if 'step' in ckpt and 'auc_dev_best' in ckpt: print(f"Using model '{paramsfile}' at step: {ckpt['step']} " f"with AUC: {ckpt['auc_dev_best']}") return model
def define_model(args, lfd_params, train, app=None, suffix=None, use_bottleneck=False, backbone=False): backbone_id = model_dict[args.model] filename = make_model_name(args, lfd_params, backbone=backbone) if app is None: app = args.app use_feature_extractor = False use_spatial = False use_pipeline = False use_temporal = False train_feature_extractor = False train_spatial = False train_pipeline = False train_temporal = False if suffix == Suffix.BACKBONE: use_feature_extractor = True use_spatial = True train_feature_extractor = train train_spatial = train elif suffix == Suffix.GENERATE_IAD: use_feature_extractor = True use_spatial = False elif suffix == Suffix.PIPELINE: use_pipeline = True train_pipeline = train elif suffix in [Suffix.LINEAR, Suffix.LINEAR_IAD, Suffix.LSTM_IAD, Suffix.LSTM, Suffix.TCN]: use_spatial = True train_spatial = train elif suffix == Suffix.DITRL: use_temporal = True train_temporal = train else: print(f"ERROR: execute.py: suffix '{suffix}' not available") return None # classifier if app == 'c' or suffix in [Suffix.PIPELINE, suffix.GENERATE_IAD]: return Classifier(lfd_params, filename, backbone_id, suffix, use_feature_extractor=use_feature_extractor, train_feature_extractor=train_feature_extractor, use_bottleneck=use_bottleneck, use_spatial=use_spatial, train_spatial=train_spatial, use_pipeline=use_pipeline, train_pipeline=train_pipeline, use_temporal=use_temporal, train_temporal=train_temporal) # policy_learner return PolicyLearner(lfd_params, filename, backbone_id, suffix, use_feature_extractor=use_feature_extractor, train_feature_extractor=train_feature_extractor, use_bottleneck=use_bottleneck, use_spatial=use_spatial, train_spatial=train_spatial, use_pipeline=use_pipeline, train_pipeline=train_pipeline, use_temporal=use_temporal, train_temporal=train_temporal, train_policy=train)
def __init__(self): root_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "model") with open(os.path.join(root_path, "config/example.json")) as f: cfg = edict(json.load(f)) self.model = Classifier(cfg) self.model.cfg.num_classes = [1, 1, 1, 1, 1, 1] self.model._init_classifier() self.model._init_attention_map() self.model._init_bn() if torch.cuda.is_available(): self.model = self.model.eval().cuda() else: self.model = self.model.eval().cpu() chkpt_path = os.path.join(root_path, "model_best.pt") self.model.load_state_dict( torch.load(chkpt_path, map_location=lambda storage, loc: storage))
def build_model(cfg, paramsfile): model = Classifier(cfg) model = model.to('cpu') ckpt = torch.load(paramsfile, map_location='cpu') state_dict = ckpt['state_dict'] if 'state_dict' in ckpt else ckpt model.load_state_dict(state_dict) if 'step' in ckpt and 'auc_dev_best' in ckpt: print(f"Using model '{paramsfile}' at step: {ckpt['step']} " f"with AUC: {ckpt['auc_dev_best']}") return model.eval()
def profile_model(cfg, train_file, save_prefix, use_fl): model = Classifier(cfg) dataloader_train = DataLoader(ImageDataset(train_file, cfg, mode='train'), batch_size=cfg.train_batch_size, num_workers=4, drop_last=True, shuffle=False) device = torch.device("cpu") custom_ops = { ExpPool: count_exp_pool, LinearPool: count_lin_pool, LogSumExpPool: count_log_sum_exp_pool, torch.nn.modules.activation.Sigmoid: count_sig, } for data in dataloader_train: inputs = data[0].to(device) macs, params = profile(model, inputs=(inputs, ), custom_ops=custom_ops) break steps = len(dataloader_train) if use_fl: comm_rounds = cfg.epoch epochs = cfg.local_epoch total_batches = steps * epochs * comm_rounds else: epochs = cfg.epoch total_batches = steps * epochs # When comparing MACs /FLOPs, we want the number to be implementation-agnostic and as general as possible. # The THOP library therefore only considers the number of multiplications and ignore all other operations. total_macs = macs * total_batches total_flops_approx = 2 * total_macs total_macs_formatted, _ = clever_format([total_macs, params], "%.5f") total_flops_approx_formatted, _ = clever_format( [total_flops_approx, params], "%.5f") print(f"Total MACs: {total_macs_formatted}") print(f"Approximate Total FLOPs: {total_flops_approx_formatted}") # Save results to file with open(save_prefix, "w") as f: f.write(f"Total MACs: {total_macs_formatted}\n") f.write(f"Approximate Total FLOPs: {total_flops_approx_formatted}")
def exec_classifier_backbone(args): # Train if args.eval_only: model = Classifier(lfd_params, filename, backbone_id, use_feature_extractor=False, use_spatial=True, spatial_train=True, use_bottleneck=use_bottleneck) model = train(lfd_params, model, verbose=True) model.save_model() # Evaluate model = Classifier(lfd_params, filename, backbone_id, use_feature_extractor=False, use_spatial=True, spatial_train=False, use_bottleneck=use_bottleneck) train_df = evaluate(lfd_params, model, mode="train") train_df["mode"] = ["train"] * len(train_df) eval_df = evaluate(lfd_params, model, mode="evaluation", verbose=True) eval_df["mode"] = ["evaluation"] * len(eval_df) df = pd.concat([train_df, eval_df]) df["repeat"] = ["1"] * len(df) out_filename = os.path.join(lfd_params.args.output_dir, "output_" + save_id + "_spatial.csv") df.to_csv(out_filename) print("Output placed in: " + out_filename) return 0
def main(): if args.dataset == 'ChestXray-NIHCC': if args.no_fiding: classes = [ 'Atelectasis', 'Cardiomegaly', 'Effusion', 'Infiltration', 'Mass', 'Nodule', 'Pneumonia', 'Pneumothorax', 'Consolidation', 'Edema', 'Emphysema', 'Fibrosis', 'Pleural_Thickening', 'Hernia', 'No Fiding' ] else: classes = [ 'Atelectasis', 'Cardiomegaly', 'Effusion', 'Infiltration', 'Mass', 'Nodule', 'Pneumonia', 'Pneumothorax', 'Consolidation', 'Edema', 'Emphysema', 'Fibrosis', 'Pleural_Thickening', 'Hernia' ] elif args.dataset == 'CheXpert-v1.0-small': classes = [ 'No Finding', 'Enlarged Cardiomediastinum', 'Cardiomegaly', 'Lung Opacity', 'Lung Lesion', 'Edema', 'Consolidation', 'Pneumonia', 'Atelectasis', 'Pneumothorax', 'Pleural Effusion', 'Pleural Other', 'Fracture', 'Support Devices' ] else: print('--dataset incorrect') return torch.manual_seed(args.seed) use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False if not args.evaluate: sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt')) else: sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt')) print("==========\nArgs:{}\n==========".format(args)) if use_gpu: print("Currently using GPU {}".format(args.gpu_devices)) cudnn.benchmark = True torch.cuda.manual_seed_all(args.seed) else: print("Currently using CPU (GPU is highly recommended)") pin_memory = True if use_gpu else False print("Initializing dataset: {}".format(args.dataset)) data_transforms = { 'train': transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.Resize(556), transforms.CenterCrop(512), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]), 'valid': transforms.Compose([ transforms.Resize(556), transforms.CenterCrop(512), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]), } datasetTrain = DatasetGenerator(path_base=args.base_dir, dataset_file='train', transform=data_transforms['train'], dataset_=args.dataset, no_fiding=args.no_fiding) datasetVal = DatasetGenerator(path_base=args.base_dir, dataset_file='valid', transform=data_transforms['valid'], dataset_=args.dataset, no_fiding=args.no_fiding) train_loader = DataLoader(dataset=datasetTrain, batch_size=args.train_batch, shuffle=args.train_shuffle, num_workers=args.workers, pin_memory=pin_memory) valid_loader = DataLoader(dataset=datasetVal, batch_size=args.valid_batch, shuffle=args.valid_shuffle, num_workers=args.workers, pin_memory=pin_memory) with open(args.infos_densenet) as f: cfg = edict(json.load(f)) print('Initializing densenet branch') model_dense = Classifier(cfg) print("Model size: {:.5f}M".format( sum(p.numel() for p in model_dense.parameters()) / 1000000.0)) with open(args.infos_resnet) as f: cfg = edict(json.load(f)) print('Initializing resnet branch') model_res = Classifier(cfg) print("Model size: {:.5f}M".format( sum(p.numel() for p in model_res.parameters()) / 1000000.0)) print('Initializing fusion branch') model_fusion = Fusion(input_size=7424, output_size=len(classes)) print("Model size: {:.5f}M".format( sum(p.numel() for p in model_fusion.parameters()) / 1000000.0)) print("Initializing optimizers") optimizer_dense = init_optim(args.optim, model_dense.parameters(), args.learning_rate, args.weight_decay, args.momentum) optimizer_res = init_optim(args.optim, model_res.parameters(), args.learning_rate, args.weight_decay, args.momentum) optimizer_fusion = init_optim(args.optim, model_fusion.parameters(), args.learning_rate, args.weight_decay, args.momentum) criterion = nn.BCELoss() print("Initializing scheduler: {}".format(args.scheduler)) if args.stepsize > 0: scheduler_dense = init_scheduler(args.scheduler, optimizer_dense, args.stepsize, args.gamma) scheduler_res = init_scheduler(args.scheduler, optimizer_res, args.stepsize, args.gamma) scheduler_fusion = init_scheduler(args.scheduler, optimizer_fusion, args.stepsize, args.gamma) start_epoch = args.start_epoch best_loss = np.inf if args.resume_densenet: checkpoint_dense = torch.load(args.resume_densenet) model_dense.load_state_dict(checkpoint_dense['state_dict']) epoch_dense = checkpoint_dense['epoch'] print("Resuming densenet from epoch {}".format(epoch_dense + 1)) if args.resume_resnet: checkpoint_res = torch.load(args.resume_resnet) model_res.load_state_dict(checkpoint_res['state_dict']) epoch_res = checkpoint_res['epoch'] print("Resuming resnet from epoch {}".format(epoch_res + 1)) if args.resume_fusion: checkpoint_fusion = torch.load(args.resume_fusion) model_fusion.load_state_dict(checkpoint_fusion['state_dict']) epoch_fusion = checkpoint_fusion['epoch'] print("Resuming fusion from epoch {}".format(epoch_fusion + 1)) if use_gpu: model_dense = nn.DataParallel(model_dense).cuda() model_res = nn.DataParallel(model_res).cuda() model_fusion = nn.DataParallel(model_fusion).cuda() if args.evaluate: print("Evaluate only") if args.step == 1: valid('step1', model_dense, model_res, model_fusion, valid_loader, criterion, args.print_freq, classes, cfg, data_transforms['valid']) elif args.step == 2: valid('step2', model_dense, model_res, model_fusion, valid_loader, criterion, args.print_freq, classes, cfg, data_transforms['valid']) elif args.step == 3: valid('step3', model_dense, model_res, model_fusion, valid_loader, criterion, args.print_freq, classes, cfg, data_transforms['valid']) else: print('args.step not found') return if args.step == 1: #################################### DENSENET BRANCH INIT ########################################## start_time = time.time() train_time = 0 best_epoch = 0 print("==> Start training of densenet branch") for p in model_dense.parameters(): p.requires_grad = True for p in model_res.parameters(): p.requires_grad = False for p in model_fusion.parameters(): p.requires_grad = True for epoch in range(start_epoch, args.max_epoch): start_train_time = time.time() train('step1', model_dense, model_res, model_fusion, train_loader, optimizer_dense, optimizer_res, optimizer_fusion, criterion, args.print_freq, epoch, args.max_epoch, cfg, data_transforms['train']) train_time += round(time.time() - start_train_time) if args.eval_step > 0 and (epoch + 1) % args.eval_step == 0 or ( epoch + 1) == args.max_epoch: print("==> Validation") loss_val = valid('step1', model_dense, model_res, model_fusion, valid_loader, criterion, args.print_freq, classes, cfg, data_transforms['valid']) if args.stepsize > 0: if args.scheduler == 'ReduceLROnPlateau': scheduler_dense.step(loss_val) scheduler_fusion.step(loss_val) else: scheduler_dense.step() scheduler_fusion.step() is_best = loss_val < best_loss if is_best: best_loss = loss_val best_epoch = epoch + 1 if use_gpu: state_dict_dense = model_dense.module.state_dict() state_dict_fusion = model_fusion.module.state_dict() else: state_dict_dense = model_dense.state_dict() state_dict_fusion = model_fusion.state_dict() save_checkpoint( { 'state_dict': state_dict_dense, 'loss': best_loss, 'epoch': epoch, }, is_best, args.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar', 'dense') save_checkpoint( { 'state_dict': state_dict_fusion, 'loss': best_loss, 'epoch': epoch, }, is_best, args.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar', 'fusion') print("==> Best Validation Loss {:.4%}, achieved at epoch {}".format( best_loss, best_epoch)) elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) train_time = str(datetime.timedelta(seconds=train_time)) print( "Dense branch finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}." .format(elapsed, train_time)) #################################### DENSENET BRANCH END ########################################## elif args.step == 2: #################################### RESNET BRANCH INIT ########################################## start_time = time.time() train_time = 0 best_epoch = 0 print("==> Start training of local branch") for p in model_dense.parameters(): p.requires_grad = False for p in model_res.parameters(): p.requires_grad = True for p in model_fusion.parameters(): p.requires_grad = True for epoch in range(start_epoch, args.max_epoch): start_train_time = time.time() train('step2', model_dense, model_res, model_fusion, train_loader, optimizer_dense, optimizer_res, optimizer_fusion, criterion, args.print_freq, epoch, args.max_epoch, cfg, data_transforms['train']) train_time += round(time.time() - start_train_time) if args.eval_step > 0 and (epoch + 1) % args.eval_step == 0 or ( epoch + 1) == args.max_epoch: print("==> Validation") loss_val = valid('step2', model_dense, model_res, model_fusion, valid_loader, criterion, args.print_freq, classes, cfg, data_transforms['valid']) if args.stepsize > 0: if args.scheduler == 'ReduceLROnPlateau': scheduler_res.step(loss_val) scheduler_fusion.step(loss_val) else: scheduler_res.step() scheduler_fusion.step() is_best = loss_val < best_loss if is_best: best_loss = loss_val best_epoch = epoch + 1 if use_gpu: state_dict_res = model_res.module.state_dict() state_dict_fusion = model_fusion.module.state_dict() else: state_dict_res = model_res.state_dict() state_dict_fusion = model_fusion.state_dict() save_checkpoint( { 'state_dict': state_dict_res, 'loss': best_loss, 'epoch': epoch, }, is_best, args.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar', 'res') save_checkpoint( { 'state_dict': state_dict_fusion, 'loss': best_loss, 'epoch': epoch, }, is_best, args.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar', 'fusion') print("==> Best Validation Loss {:.4%}, achieved at epoch {}".format( best_loss, best_epoch)) elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) train_time = str(datetime.timedelta(seconds=train_time)) print( "Resnet branch finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}." .format(elapsed, train_time)) #################################### RESNET BRANCH END ########################################## elif args.step == 3: #################################### FUSION BRANCH INIT ########################################## start_time = time.time() train_time = 0 best_epoch = 0 print("==> Start training of fusion branch") for p in model_dense.parameters(): p.requires_grad = True for p in model_res.parameters(): p.requires_grad = True for p in model_fusion.parameters(): p.requires_grad = True for epoch in range(start_epoch, args.max_epoch): start_train_time = time.time() train('step3', model_dense, model_res, model_fusion, train_loader, optimizer_dense, optimizer_res, optimizer_fusion, criterion, args.print_freq, epoch, args.max_epoch, cfg, data_transforms['train']) train_time += round(time.time() - start_train_time) if args.eval_step > 0 and (epoch + 1) % args.eval_step == 0 or ( epoch + 1) == args.max_epoch: print("==> Validation") loss_val = valid('step3', model_dense, model_res, model_fusion, valid_loader, criterion, args.print_freq, classes, cfg, data_transforms['valid']) if args.stepsize > 0: if args.scheduler == 'ReduceLROnPlateau': scheduler_dense.step(loss_val) scheduler_res.step(loss_val) scheduler_fusion.step(loss_val) else: scheduler_dense.step() scheduler_res.step() scheduler_fusion.step() is_best = loss_val < best_loss if is_best: best_loss = loss_val best_epoch = epoch + 1 if use_gpu: state_dict_dense = model_dense.module.state_dict() state_dict_res = model_res.module.state_dict() state_dict_fusion = model_fusion.module.state_dict() else: state_dict_dense = model_dense.state_dict() state_dict_res = model_res.state_dict() state_dict_fusion = model_fusion.state_dict() save_checkpoint( { 'state_dict': state_dict_dense, 'loss': best_loss, 'epoch': epoch, }, is_best, args.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar', 'dense') save_checkpoint( { 'state_dict': state_dict_res, 'loss': best_loss, 'epoch': epoch, }, is_best, args.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar', 'res') save_checkpoint( { 'state_dict': state_dict_fusion, 'loss': best_loss, 'epoch': epoch, }, is_best, args.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar', 'fusion') print("==> Best Validation Loss {:.4%}, achieved at epoch {}".format( best_loss, best_epoch)) elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) train_time = str(datetime.timedelta(seconds=train_time)) print( "Fusion branch finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}." .format(elapsed, train_time)) #################################### FUSION BRANCH END ########################################## else: print('args.step not found')
pipeTest = DataPipeline( imagePath=testpath, imageChannels=imageChannels, searchPatternImage='*/*.jpg', augmentations=[], ) pipeTest.build(imageSize=(imageHeight, imageWidth), batchSize=batchSize, shuffle_buffer_size=200, shuffle=True) # Build model classy = Classifier( numClasses=pipeTest.nClasses, imageWidth=imageWidth, imageHeight=imageHeight, imageChannels=imageChannels, learnRate=0.001, lastTrainableLayers=2, ) logging.info(classy.model.summary()) classy.model.load_weights(os.path.join(outputDir, "weights.h5")) logging.info("Weights loaded!") intermediate = Model(inputs=[classy.model.input], outputs=classy.model.layers[-2].output) print(intermediate.summary()) for it, (imgs, labels) in enumerate(pipeTest.ds.take(31)): imgsCaf = 255 * imgs - tf.constant([123.68, 116.779, 103.939])
searchPatternImage='*/*.jpg', augmentations=[], ) pipeTest.build(imageSize=(imageHeight, imageWidth), batchSize=batchSize, shuffle_buffer_size=600, shuffle=False) # Build model assert pipeTest.nClasses == pipeTrain.nClasses, "Number of classes must match!" classy = Classifier( numClasses=pipeTrain.nClasses, imageWidth=imageWidth, imageHeight=imageHeight, imageChannels=imageChannels, learnRate=0.001, lastTrainableLayers=2, ) logging.info(classy.model.summary()) try: classy.model.load_weights(os.path.join(outputDir, "weights.h5")) logging.info("Weights loaded!") except: logging.warning("Couldnt load weights") # Model Training subitsTrain = np.ceil(pipeTrain.ndata / batchSize) subitsTest = np.ceil(pipeTest.ndata / batchSize)
from DataHandler import * from model.nets import * from model.train_functions import * from model.classifier import Classifier from scipy import misc import numpy as np import pandas as pd params = read_params('Project_path/params.txt') data = Cifar10_Data() data.load_and_split(params['input_data_path'], params['labels_path']) #xtrain=data.train_idxs #xval=data.val_idxs #batch = data.get_train_feed_dict('X','y','train',128) #%% cls = Classifier(params, data.Ndims, net=convnet2) cls.train(data, epochs=10, batch_size=128) #cls.load_weights_from_checkpoint(params['pre-traind_model_path']) #%% Get The Test Data And Classify It test_path = params['test_data_path'] labels = [] for batch_num in range(600): X = [] for idx in range(500): img_path = test_path + str(batch_num * 500 + idx + 1) + '.png' X.append(misc.imread(img_path)) X = np.array(X) X = (X - data.mean) / (data.std + 1e-7) preds = cls.predict(X) preds = np.argmax(preds, axis=1)
def run(args): with open(args.cfg_path) as f: cfg = edict(json.load(f)) if args.verbose is True: print(json.dumps(cfg, indent=4)) if not os.path.exists(args.save_path): os.mkdir(args.save_path) if args.logtofile is True: logging.basicConfig(filename=args.save_path + '/log.txt', filemode="w", level=logging.INFO) else: logging.basicConfig(level=logging.INFO) if not args.resume: with open(os.path.join(args.save_path, 'cfg.json'), 'w') as f: json.dump(cfg, f, indent=1) device_ids = list(map(int, args.device_ids.split(','))) num_devices = torch.cuda.device_count() if num_devices < len(device_ids): raise Exception('#available gpu : {} < --device_ids : {}'.format( num_devices, len(device_ids))) device = torch.device('cuda:{}'.format(device_ids[0])) model = Classifier(cfg) if args.verbose is True: from torchsummary import summary if cfg.fix_ratio: h, w = cfg.long_side, cfg.long_side else: h, w = cfg.height, cfg.width summary(model.to(device), (3, h, w)) model = DataParallel(model, device_ids=device_ids).to(device).train() if args.pre_train is not None: if os.path.exists(args.pre_train): ckpt = torch.load(args.pre_train, map_location=device) model.module.load_state_dict(ckpt) optimizer = get_optimizer(model.parameters(), cfg) src_folder = os.path.dirname(os.path.abspath(__file__)) + '/../' dst_folder = os.path.join(args.save_path, 'classification') # rc, size = subprocess.getstatusoutput('dir --max-depth=0 %s | cut -f1' # % src_folder) # if rc != 0: # print(size) # raise Exception('Copy folder error : {}'.format(rc)) # rc, err_msg = subprocess.getstatusoutput('cp -R %s %s' % (src_folder, # dst_folder)) # if rc != 0: # raise Exception('copy folder error : {}'.format(err_msg)) copyfile(cfg.train_csv, os.path.join(args.save_path, 'train.csv')) copyfile(cfg.dev_csv, os.path.join(args.save_path, 'valid.csv')) dataloader_train = DataLoader(ImageDataset(cfg.train_csv, cfg, mode='train'), batch_size=cfg.train_batch_size, num_workers=args.num_workers, drop_last=True, shuffle=True) dataloader_dev = DataLoader(ImageDataset(cfg.dev_csv, cfg, mode='dev'), batch_size=cfg.dev_batch_size, num_workers=args.num_workers, drop_last=False, shuffle=False) dev_header = dataloader_dev.dataset._label_header summary_train = {'epoch': 0, 'step': 0} summary_dev = {'loss': float('inf'), 'acc': 0.0} summary_writer = SummaryWriter(args.save_path) epoch_start = 0 best_dict = { "acc_dev_best": 0.0, "auc_dev_best": 0.0, "loss_dev_best": float('inf'), "fused_dev_best": 0.0, "best_idx": 1 } if args.resume: ckpt_path = os.path.join(args.save_path, 'train.ckpt') ckpt = torch.load(ckpt_path, map_location=device) model.module.load_state_dict(ckpt['state_dict']) summary_train = {'epoch': ckpt['epoch'], 'step': ckpt['step']} best_dict['acc_dev_best'] = ckpt['acc_dev_best'] best_dict['loss_dev_best'] = ckpt['loss_dev_best'] best_dict['auc_dev_best'] = ckpt['auc_dev_best'] epoch_start = ckpt['epoch'] for epoch in range(epoch_start, cfg.epoch): lr = lr_schedule(cfg.lr, cfg.lr_factor, summary_train['epoch'], cfg.lr_epochs) for param_group in optimizer.param_groups: param_group['lr'] = lr summary_train, best_dict = train_epoch(summary_train, summary_dev, cfg, args, model, dataloader_train, dataloader_dev, optimizer, summary_writer, best_dict, dev_header) time_now = time.time() summary_dev, predlist, true_list = test_epoch(summary_dev, cfg, args, model, dataloader_dev) time_spent = time.time() - time_now auclist = [] for i in range(len(cfg.num_classes)): y_pred = predlist[i] y_true = true_list[i] fpr, tpr, thresholds = metrics.roc_curve(y_true, y_pred, pos_label=1) auc = metrics.auc(fpr, tpr) auclist.append(auc) summary_dev['auc'] = np.array(auclist) loss_dev_str = ' '.join( map(lambda x: '{:.5f}'.format(x), summary_dev['loss'])) acc_dev_str = ' '.join( map(lambda x: '{:.3f}'.format(x), summary_dev['acc'])) auc_dev_str = ' '.join( map(lambda x: '{:.3f}'.format(x), summary_dev['auc'])) logging.info('{}, Dev, Step : {}, Loss : {}, Acc : {}, Auc : {},' 'Mean auc: {:.3f} ' 'Run Time : {:.2f} sec'.format( time.strftime("%Y-%m-%d %H:%M:%S"), summary_train['step'], loss_dev_str, acc_dev_str, auc_dev_str, summary_dev['auc'].mean(), time_spent)) for t in range(len(cfg.num_classes)): summary_writer.add_scalar('dev/loss_{}'.format(dev_header[t]), summary_dev['loss'][t], summary_train['step']) summary_writer.add_scalar('dev/acc_{}'.format(dev_header[t]), summary_dev['acc'][t], summary_train['step']) summary_writer.add_scalar('dev/auc_{}'.format(dev_header[t]), summary_dev['auc'][t], summary_train['step']) save_best = False mean_acc = summary_dev['acc'][cfg.save_index].mean() if mean_acc >= best_dict['acc_dev_best']: best_dict['acc_dev_best'] = mean_acc if cfg.best_target == 'acc': save_best = True mean_auc = summary_dev['auc'][cfg.save_index].mean() if mean_auc >= best_dict['auc_dev_best']: best_dict['auc_dev_best'] = mean_auc if cfg.best_target == 'auc': save_best = True mean_loss = summary_dev['loss'][cfg.save_index].mean() if mean_loss <= best_dict['loss_dev_best']: best_dict['loss_dev_best'] = mean_loss if cfg.best_target == 'loss': save_best = True if save_best: torch.save( { 'epoch': summary_train['epoch'], 'step': summary_train['step'], 'acc_dev_best': best_dict['acc_dev_best'], 'auc_dev_best': best_dict['auc_dev_best'], 'loss_dev_best': best_dict['loss_dev_best'], 'state_dict': model.module.state_dict() }, os.path.join(args.save_path, 'best{}.ckpt'.format(best_dict['best_idx']))) best_dict['best_idx'] += 1 if best_dict['best_idx'] > cfg.save_top_k: best_dict['best_idx'] = 1 logging.info('{}, Best, Step : {}, Loss : {}, Acc : {},' 'Auc :{},Best Auc : {:.3f}'.format( time.strftime("%Y-%m-%d %H:%M:%S"), summary_train['step'], loss_dev_str, acc_dev_str, auc_dev_str, best_dict['auc_dev_best'])) torch.save( { 'epoch': summary_train['epoch'], 'step': summary_train['step'], 'acc_dev_best': best_dict['acc_dev_best'], 'auc_dev_best': best_dict['auc_dev_best'], 'loss_dev_best': best_dict['loss_dev_best'], 'state_dict': model.module.state_dict() }, os.path.join(args.save_path, 'train.ckpt')) summary_writer.close()
def run(args, val_h5_file): with open(args.cfg_path) as f: cfg = edict(json.load(f)) if args.verbose is True: print(json.dumps(cfg, indent=4)) if not os.path.exists(args.save_path): os.mkdir(args.save_path) if args.logtofile is True: logging.basicConfig(filename=args.save_path + '/log.txt', filemode="w", level=logging.INFO) else: logging.basicConfig(level=logging.INFO) if not args.resume: with open(os.path.join(args.save_path, 'cfg.json'), 'w') as f: json.dump(cfg, f, indent=1) device_ids = list(map(int, args.device_ids.split(','))) num_devices = torch.cuda.device_count() if num_devices < len(device_ids): raise Exception('#available gpu : {} < --device_ids : {}'.format( num_devices, len(device_ids))) device = torch.device('cuda:{}'.format(device_ids[0])) model = Classifier(cfg) if args.verbose is True: from torchsummary import summary if cfg.fix_ratio: h, w = cfg.long_side, cfg.long_side else: h, w = cfg.height, cfg.width summary(model.to(device), (3, h, w)) model = DataParallel(model, device_ids=device_ids).to(device).train() if args.pre_train is not None: if os.path.exists(args.pre_train): ckpt = torch.load(args.pre_train, map_location=device) model.module.load_state_dict(ckpt) optimizer = get_optimizer(model.parameters(), cfg) #src_folder = os.path.dirname(os.path.abspath(__file__)) + '/../' #dst_folder = os.path.join(args.save_path, 'classification') #rc, size = subprocess.getstatusoutput('du --max-depth=0 %s | cut -f1' % src_folder) #if rc != 0: raise Exception('Copy folder error : {}'.format(rc)) #rc, err_msg = subprocess.getstatusoutput('cp -R %s %s' % (src_folder, dst_folder)) #if rc != 0: raise Exception('copy folder error : {}'.format(err_msg)) #copyfile(cfg.train_csv, os.path.join(args.save_path, 'train.csv')) #copyfile(cfg.dev_csv, os.path.join(args.save_path, 'dev.csv')) # np_train_h5_file = np.array(train_h5_file['train'][:10000], dtype=np.uint8) # np_t_u_ones = np.array(train_h5_file['train_u_ones'][:10000], dtype=np.int8) # np_t_u_zeros = np.array(train_h5_file['train_u_zeros'][:10000], dtype=np.int8) # np_t_u_random = np.array(train_h5_file['train_u_random'][:10000], dtype=np.int8) np_val_h5_file = np.array(val_h5_file['val'], dtype=np.uint8) np_v_u_ones = np.array(val_h5_file['val_u_ones'], dtype=np.int8) np_v_u_zeros = np.array(val_h5_file['val_u_zeros'], dtype=np.int8) np_v_u_random = np.array(val_h5_file['val_u_random'], dtype=np.int8) train_labels = {} with h5py.File(f'{args.train_chunks}/train_labels.h5', 'r') as fp: train_labels['train_u_ones'] = np.array(fp['train_u_ones'], dtype=np.int8) train_labels['train_u_zeros'] = np.array(fp['train_u_zeros'], dtype=np.int8) train_labels['train_u_random'] = np.array(fp['train_u_random'], dtype=np.int8) np_train_samples = None for i in range(args.chunk_count): with open(f'{args.train_chunks}/chexpert_dset_chunk_{i+1}.npy', 'rb') as f: if np_train_samples is None: np_train_samples = np.load(f) else: np_train_samples = np.concatenate( (np_train_samples, np.load(f))) dataloader_train = DataLoader(ImageDataset( [np_train_samples, train_labels], cfg, mode='train'), batch_size=cfg.train_batch_size, num_workers=args.num_workers, drop_last=True, shuffle=True) dataloader_dev = DataLoader(ImageDataset( [np_val_h5_file, np_v_u_zeros, np_v_u_ones, np_v_u_random], cfg, mode='val'), batch_size=cfg.dev_batch_size, num_workers=args.num_workers, drop_last=False, shuffle=False) #dev_header = dataloader_dev.dataset._label_header dev_header = [ 'No_Finding', 'Enlarged_Cardiomediastinum', 'Cardiomegaly', 'Lung_Opacity', 'Lung_Lesion', 'Edema', 'Consolidation', 'Pneumonia', 'Atelectasis', 'Pneumothorax', 'Pleural_Effusion', 'Pleural_Other', 'Fracture', 'Support_Devices' ] print(f'dataloaders are set. train count: {np_train_samples.shape[0]}') logging.info("[LOGGING TEST]: dataloaders are set...") summary_train = {'epoch': 0, 'step': 0} summary_dev = {'loss': float('inf'), 'acc': 0.0} summary_writer = SummaryWriter(args.save_path) epoch_start = 0 best_dict = { "acc_dev_best": 0.0, "auc_dev_best": 0.0, "loss_dev_best": float('inf'), "fused_dev_best": 0.0, "best_idx": 1 } if args.resume: ckpt_path = os.path.join(args.save_path, 'train.ckpt') ckpt = torch.load(ckpt_path, map_location=device) model.module.load_state_dict(ckpt['state_dict']) summary_train = {'epoch': ckpt['epoch'], 'step': ckpt['step']} best_dict['acc_dev_best'] = ckpt['acc_dev_best'] best_dict['loss_dev_best'] = ckpt['loss_dev_best'] best_dict['auc_dev_best'] = ckpt['auc_dev_best'] epoch_start = ckpt['epoch'] q_list = [] k_list = [] for i in range(len(cfg.num_classes)): q_list.append(args.q) k_list.append(args.k) k_list = torch.FloatTensor(k_list) q_list = torch.FloatTensor(q_list) loss_sq_hinge = MultiClassSquaredHingeLoss() print('Everything is set starting to train...') before = datetime.datetime.now() for epoch in range(epoch_start, cfg.epoch): lr = lr_schedule(cfg.lr, cfg.lr_factor, summary_train['epoch'], cfg.lr_epochs) for param_group in optimizer.param_groups: param_group['lr'] = lr summary_train, best_dict = train_epoch(summary_train, summary_dev, cfg, args, model, dataloader_train, dataloader_dev, optimizer, summary_writer, best_dict, dev_header, q_list, k_list, loss_sq_hinge) time_now = time.time() summary_dev, predlist, true_list = test_epoch(summary_dev, cfg, args, model, dataloader_dev, q_list, k_list, loss_sq_hinge) time_spent = time.time() - time_now auclist = [] for i in range(len(cfg.num_classes)): y_pred = predlist[i] y_true = true_list[i] fpr, tpr, thresholds = metrics.roc_curve(y_true, y_pred, pos_label=1) auc = metrics.auc(fpr, tpr) auclist.append(auc) summary_dev['auc'] = np.array(auclist) loss_dev_str = ' '.join( map(lambda x: '{:.5f}'.format(x), summary_dev['loss'])) acc_dev_str = ' '.join( map(lambda x: '{:.3f}'.format(x), summary_dev['acc'])) auc_dev_str = ' '.join( map(lambda x: '{:.3f}'.format(x), summary_dev['auc'])) logging.info('{}, Dev, Step : {}, Loss : {}, Acc : {}, Auc : {},' 'Mean auc: {:.3f} ' 'Run Time : {:.2f} sec'.format( time.strftime("%Y-%m-%d %H:%M:%S"), summary_train['step'], loss_dev_str, acc_dev_str, auc_dev_str, summary_dev['auc'].mean(), time_spent)) for t in range(len(cfg.num_classes)): summary_writer.add_scalar('dev/loss_{}'.format(dev_header[t]), summary_dev['loss'][t], summary_train['step']) summary_writer.add_scalar('dev/acc_{}'.format(dev_header[t]), summary_dev['acc'][t], summary_train['step']) summary_writer.add_scalar('dev/auc_{}'.format(dev_header[t]), summary_dev['auc'][t], summary_train['step']) save_best = False mean_acc = summary_dev['acc'][cfg.save_index].mean() if mean_acc >= best_dict['acc_dev_best']: best_dict['acc_dev_best'] = mean_acc if cfg.best_target == 'acc': save_best = True mean_auc = summary_dev['auc'][cfg.save_index].mean() if mean_auc >= best_dict['auc_dev_best']: best_dict['auc_dev_best'] = mean_auc if cfg.best_target == 'auc': save_best = True mean_loss = summary_dev['loss'][cfg.save_index].mean() if mean_loss <= best_dict['loss_dev_best']: best_dict['loss_dev_best'] = mean_loss if cfg.best_target == 'loss': save_best = True if save_best: torch.save( { 'epoch': summary_train['epoch'], 'step': summary_train['step'], 'acc_dev_best': best_dict['acc_dev_best'], 'auc_dev_best': best_dict['auc_dev_best'], 'loss_dev_best': best_dict['loss_dev_best'], 'state_dict': model.module.state_dict() }, os.path.join(args.save_path, 'best{}.ckpt'.format(best_dict['best_idx']))) best_dict['best_idx'] += 1 if best_dict['best_idx'] > cfg.save_top_k: best_dict['best_idx'] = 1 logging.info('{}, Best, Step : {}, Loss : {}, Acc : {},' 'Auc :{},Best Auc : {:.3f}'.format( time.strftime("%Y-%m-%d %H:%M:%S"), summary_train['step'], loss_dev_str, acc_dev_str, auc_dev_str, best_dict['auc_dev_best'])) torch.save( { 'epoch': summary_train['epoch'], 'step': summary_train['step'], 'acc_dev_best': best_dict['acc_dev_best'], 'auc_dev_best': best_dict['auc_dev_best'], 'loss_dev_best': best_dict['loss_dev_best'], 'state_dict': model.module.state_dict() }, os.path.join(args.save_path, 'train.ckpt')) print_remaining_time(before, epoch + 1, cfg.epoch, additional='[training]') summary_writer.close()
class MDAIModel: def __init__(self): root_path = os.path.dirname(__file__) with open(os.path.join(root_path, "config/example.json")) as f: cfg = edict(json.load(f)) self.model = Classifier(cfg) self.model.cfg.num_classes = [1, 1, 1, 1, 1, 1] self.model._init_classifier() self.model._init_attention_map() self.model._init_bn() if torch.cuda.is_available(): self.model = self.model.eval().cuda() else: self.model = self.model.eval().cpu() chkpt_path = os.path.join(root_path, "model_best.pt") self.model.load_state_dict( torch.load(chkpt_path, map_location=lambda storage, loc: storage) ) def predict(self, data): """ The input data has the following schema: { "instances": [ { "file": "bytes" "tags": { "StudyInstanceUID": "str", "SeriesInstanceUID": "str", "SOPInstanceUID": "str", ... } }, ... ], "args": { "arg1": "str", "arg2": "str", ... } } Model scope specifies whether an entire study, series, or instance is given to the model. If the model scope is 'INSTANCE', then `instances` will be a single instance (list length of 1). If the model scope is 'SERIES', then `instances` will be a list of all instances in a series. If the model scope is 'STUDY', then `instances` will be a list of all instances in a study. The additional `args` dict supply values that may be used in a given run. For a single instance dict, `files` is the raw binary data representing a DICOM file, and can be loaded using: `ds = pydicom.dcmread(BytesIO(instance["file"]))`. The results returned by this function should have the following schema: [ { "type": "str", // 'NONE', 'ANNOTATION', 'IMAGE', 'DICOM', 'TEXT' "study_uid": "str", "series_uid": "str", "instance_uid": "str", "frame_number": "int", "class_index": "int", "data": {}, "probability": "float", "explanations": [ { "name": "str", "description": "str", "content": "bytes", "content_type": "str", }, ... ], }, ... ] The DICOM UIDs must be supplied based on the scope of the label attached to `class_index`. """ input_instances = data["instances"] input_args = data["args"] results = [] for instance in input_instances: tags = instance["tags"] ds = pydicom.dcmread(BytesIO(instance["file"])) x = ds.pixel_array x_orig = x # preprocess image # convert grayscale to RGB x = cv2.resize(x, (1024, 1024)) x = equalize_adapthist(x.astype(float) / x.max(), clip_limit=0.01) x = cv2.resize(x, (512, 512)) x = x * 2 - 1 x = np.array([[x, x, x]]) x = torch.from_numpy(x).float() if torch.cuda.is_available(): x = x.cuda() else: x = x.cpu() with torch.no_grad(): logits, logit_maps = self.model(x) logits = torch.cat(logits, dim=1).detach().cpu() y_prob = torch.sigmoid(logits - torch.from_numpy(threshs).reshape((1, 6))) y_prob = y_prob.cpu().numpy() x.requires_grad = True y_classes = y_prob >= 0.5 class_indices = np.where(y_classes.astype("bool"))[1] if len(class_indices) == 0: # no outputs, return 'NONE' output type result = { "type": "NONE", "study_uid": tags["StudyInstanceUID"], "series_uid": tags["SeriesInstanceUID"], "instance_uid": tags["SOPInstanceUID"], "frame_number": None, } results.append(result) else: for class_index in class_indices: probability = y_prob[0][class_index] gradcam = GradCam(self.model) gradcam_output = gradcam.generate_cam(x, x_orig, class_index) gradcam_output_buffer = BytesIO() gradcam_output.save(gradcam_output_buffer, format="PNG") intgrad = IntegratedGradients(self.model) intgrad_output = intgrad.generate_integrated_gradients(x, class_index, 5) intgrad_output_buffer = BytesIO() intgrad_output.save(intgrad_output_buffer, format="PNG") result = { "type": "ANNOTATION", "study_uid": tags["StudyInstanceUID"], "series_uid": tags["SeriesInstanceUID"], "instance_uid": tags["SOPInstanceUID"], "frame_number": None, "class_index": int(class_index), "data": None, "probability": float(probability), "explanations": [ { "name": "Grad-CAM", "description": "Visualize how parts of the image affects neural network’s output by looking into the activation maps. From _Grad-CAM: Visual Explanations from Deep Networks via Gradient-based Localization_ (https://arxiv.org/abs/1610.02391)", "content": gradcam_output_buffer.getvalue(), "content_type": "image/png", }, { "name": "Integrated Gradients", "description": "Visualize an average of the gradients along the construction of the input towards the decision. From _Axiomatic Attribution for Deep Networks_ (https://arxiv.org/abs/1703.01365)", "content": intgrad_output_buffer.getvalue(), "content_type": "image/png", }, ], } results.append(result) return results
cfg = edict(json.load(f)) if args.verbose is True: print(json.dumps(cfg, indent=4)) if not args.resume: with open(join(args.save_path, 'cfg.json'), 'w') as f: json.dump(cfg, f, indent=1) device_ids = list(map(int, args.device_ids.split(','))) num_devices = torch.cuda.device_count() assert num_devices >= len(device_ids), f""" #available gpu : {num_devices} < --device_ids : {len(device_ids)}""" device = torch.device(f"cuda:{device_ids[0]}") model = Classifier(cfg) if args.verbose: from torchsummary import summary h, w = (cfg.long_side, cfg.long_side) if cfg.fix_ratio \ else (cfg.height, cfg.width) summary(model.to(device), (3, h, w)) model = DataParallel(model, device_ids=device_ids).to(device) if args.pre_train is not None: if exists(args.pre_train): ckpt = torch.load(args.pre_train, map_location=device) model.module.load_state_dict(ckpt) optimizer = get_optimizer(model.parameters(), cfg) trainset = ImageDataset(cfg.train_csv, cfg, mode='train')
""" Entry point for model infering. """ import os import pandas as pd from model.classifier import Classifier from data_utils.preprocessing import encode_sentences, tokenize_sentences classifier = Classifier("data/best_model/model.meta", "data/best_model/model") def main(): classifier = Classifier("data/best_model/model.meta", "data/best_model/model") sentences = [["0", "I used to like cars."]] tokenized_sentences = tokenize_sentences(sentences) encoded_sentences, _ = encode_sentences(tokenized_sentences, 32) result = classifier.infer(encoded_sentences) print(result) def classify(tweet_body): sentences = [["0", tweet_body]] tokenized_sentences = tokenize_sentences(sentences) encoded_sentences, _ = encode_sentences(tokenized_sentences, 32) result = classifier.infer(encoded_sentences) return result def _get_sentiment_vector_nasdaq(date, tweets_dir): all_data = pd.read_csv('./%s/%s.csv' % (tweets_dir, date))
def main(args): bert_config = BertConfig(args.bert_config_path) bert_config.print_config() if args.use_cuda and torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') processor = MnliDataProcessor( data_dir=args.data_dir, vocab_path=args.vocab_path, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case, random_seed=args.random_seed ) num_labes = len(processor.get_labels()) if args.random_seed is not None: random.seed(args.random_seed) np.random.seed(args.random_seed) torch.manual_seed(args.random_seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False train_data_generator = processor.data_generator( batch_size=args.batch_size, phase='train', epoch=args.epoch, shuffle=True ) num_train_examples = processor.get_num_examples(phase='train') max_train_steps = args.epoch * num_train_examples // args.batch_size warmup_steps = int(max_train_steps * args.warmup_proportion) classifier = Classifier(bert_config, num_labes).to(device) # optimizer = torch.optim.Adam(classifier.parameters(), lr=args.learning_rate) optimizer = Optimizer(classifier, warmup_steps, max_train_steps, args.learning_rate, args.weight_decay) if args.init_pre_training_params: pre_training_params = load_pickle(args.init_pre_training_params) classifier.bert.load_state_dict(pre_training_params) # Temporal cls_ckp = load_pickle('/home/cvds_lab/maxim/transformer_investigation/notebooks/ckp/classifier_ckp.pkl') classifier.cls_out.weight.data = torch.tensor(cls_ckp['cls_out_w'], dtype=torch.float32).t().to(device) classifier.cls_out.bias.data = torch.tensor(cls_ckp['cls_out_b'], dtype=torch.float32).to(device) # Temporal logfile = args.log_to Logger().add_log(logfile, ['epoch', 'step', 'loss', 'accuracy', 'cls_w_mean', 'cls_w_std', 'cls_w_min', 'cls_w_max', 'cls_b_mean', 'cls_b_std', 'cls_b_min', 'cls_b_max']) steps = 0 total_loss, total_acc = [], [] time_begin = time.time() for batch in train_data_generator(): steps += 1 src_ids = torch.tensor(batch[0], dtype=torch.long).to(device) position_ids = torch.tensor(batch[1], dtype=torch.long).to(device) sentence_ids = torch.tensor(batch[2], dtype=torch.long).to(device) input_mask = torch.tensor(batch[3], dtype=torch.float32).to(device) labels = torch.tensor(batch[4], dtype=torch.long).to(device) optimizer.zero_grad() loss, _, accuracy = classifier(src_ids, position_ids, sentence_ids, input_mask, labels) loss.backward() optimizer.step(steps) current_example, current_epoch = processor.get_train_progress() Logger()[logfile]['epoch'].append(current_epoch) Logger()[logfile]['step'].append(steps) Logger()[logfile]['loss'].append(loss.item()) Logger()[logfile]['accuracy'].append(accuracy.item()) with torch.no_grad(): Logger()[logfile]['cls_w_mean'].append(classifier.cls_out.weight.mean().item()) Logger()[logfile]['cls_w_std'].append(classifier.cls_out.weight.std().item()) Logger()[logfile]['cls_w_min'].append(classifier.cls_out.weight.min().item()) Logger()[logfile]['cls_w_max'].append(classifier.cls_out.weight.max().item()) Logger()[logfile]['cls_b_mean'].append(classifier.cls_out.bias.mean().item()) Logger()[logfile]['cls_b_std'].append(classifier.cls_out.bias.std().item()) Logger()[logfile]['cls_b_min'].append(classifier.cls_out.bias.min().item()) Logger()[logfile]['cls_b_max'].append(classifier.cls_out.bias.max().item()) if steps % 1000 == 0: Logger().log_all() if steps % args.skip_steps == 0: total_loss.append(loss.item()) total_acc.append(accuracy.item()) current_example, current_epoch = processor.get_train_progress() time_end = time.time() used_time = time_end - time_begin print('epoch: %d, progress: %d/%d, step: %d, ave loss: %f, ave acc: %f, speed: %f steps/s' % (current_epoch, current_example, num_train_examples, steps, np.mean(total_loss).item(), np.mean(total_acc).item(), args.skip_steps / used_time)) total_loss, total_acc = [], [] time_begin = time.time()
def run_fl(args): with open(args.cfg_path) as f: cfg = edict(json.load(f)) if args.verbose is True: print(json.dumps(cfg, indent=4)) if not os.path.exists(args.save_path): os.mkdir(args.save_path) if args.logtofile is True: logging.basicConfig(filename=args.save_path + '/log.txt', filemode="w", level=logging.INFO) else: logging.basicConfig(level=logging.INFO) if not args.resume: with open(os.path.join(args.save_path, 'cfg.json'), 'w') as f: json.dump(cfg, f, indent=1) device_ids = list(map(int, args.device_ids.split(','))) num_devices = torch.cuda.device_count() if num_devices < len(device_ids): raise Exception('#available gpu : {} < --device_ids : {}'.format( num_devices, len(device_ids))) device = torch.device('cuda:{}'.format(device_ids[0])) # initialise global model model = Classifier(cfg).to(device).train() if args.verbose is True: from torchsummary import summary if cfg.fix_ratio: h, w = cfg.long_side, cfg.long_side else: h, w = cfg.height, cfg.width summary(model.to(device), (3, h, w)) if args.pre_train is not None: if os.path.exists(args.pre_train): ckpt = torch.load(args.pre_train, map_location=device) model.load_state_dict(ckpt) src_folder = os.path.dirname(os.path.abspath(__file__)) + '/../' dst_folder = os.path.join(args.save_path, 'classification') rc, size = subprocess.getstatusoutput('du --max-depth=0 %s | cut -f1' % src_folder) if rc != 0: raise Exception('Copy folder error : {}'.format(rc)) else: print('Successfully determined size of directory') rc, err_msg = subprocess.getstatusoutput('cp -R %s %s' % (src_folder, dst_folder)) if rc != 0: raise Exception('copy folder error : {}'.format(err_msg)) else: print('Successfully copied folder') # copy train files train_files = cfg.train_csv clients = {} for i, c in enumerate(string.ascii_uppercase): if i < len(train_files): clients[c] = {} else: break # initialise clients for i, client in enumerate(clients): copyfile(train_files[i], os.path.join(args.save_path, f'train_{client}.csv')) clients[client]['dataloader_train'] =\ DataLoader( ImageDataset(train_files[i], cfg, mode='train'), batch_size=cfg.train_batch_size, num_workers=args.num_workers,drop_last=True, shuffle=True ) clients[client]['bytes_uploaded'] = 0.0 clients[client]['epoch'] = 0 copyfile(cfg.dev_csv, os.path.join(args.save_path, 'dev.csv')) dataloader_dev = DataLoader(ImageDataset(cfg.dev_csv, cfg, mode='dev'), batch_size=cfg.dev_batch_size, num_workers=args.num_workers, drop_last=False, shuffle=False) dev_header = dataloader_dev.dataset._label_header w_global = model.state_dict() summary_train = {'epoch': 0, 'step': 0} summary_dev = {'loss': float('inf'), 'acc': 0.0} summary_writer = SummaryWriter(args.save_path) comm_rounds = cfg.epoch best_dict = { "acc_dev_best": 0.0, "auc_dev_best": 0.0, "loss_dev_best": float('inf'), "fused_dev_best": 0.0, "best_idx": 1 } # Communication rounds loop for cr in range(comm_rounds): logging.info('{}, Start communication round {} of FL - {} ...'.format( time.strftime("%Y-%m-%d %H:%M:%S"), cr + 1, cfg.fl_technique)) w_locals = [] for client in clients: logging.info( '{}, Start local training process for client {}, communication round: {} ...' .format(time.strftime("%Y-%m-%d %H:%M:%S"), client, cr + 1)) # Load previous current global model as start point model = Classifier(cfg).to(device).train() model.load_state_dict(w_global) if cfg.fl_technique == "FedProx": global_weight_collector = get_global_weights(model, device) else: global_weight_collector = None optimizer = get_optimizer(model.parameters(), cfg) # local training loops for epoch in range(cfg.local_epoch): lr = lr_schedule(cfg.lr, cfg.lr_factor, epoch, cfg.lr_epochs) for param_group in optimizer.param_groups: param_group['lr'] = lr summary_train, best_dict = train_epoch_fl( summary_train, summary_dev, cfg, args, model, clients[client]['dataloader_train'], dataloader_dev, optimizer, summary_writer, best_dict, dev_header, epoch, global_weight_collector) summary_train['step'] += 1 bytes_to_upload = sys.getsizeof(model.state_dict()) clients[client]['bytes_uploaded'] += bytes_to_upload logging.info( '{}, Completed local rounds for client {} in communication round {}. ' 'Uploading {} bytes to server, {} bytes in total sent from client' .format(time.strftime("%Y-%m-%d %H:%M:%S"), client, cr + 1, bytes_to_upload, clients[client]['bytes_uploaded'])) w_locals.append(model.state_dict()) if cfg.fl_technique == "FedAvg": w_global = fed_avg(w_locals) elif cfg.fl_technique == 'WFedAvg': w_global = weighted_fed_avg(w_locals, cfg.train_proportions) elif cfg.fl_technique == 'FedProx': # Use weighted FedAvg when using FedProx w_global = weighted_fed_avg(w_locals, cfg.train_proportions) # Test the performance of the averaged model avged_model = Classifier(cfg).to(device) avged_model.load_state_dict(w_global) time_now = time.time() summary_dev, predlist, true_list = test_epoch(summary_dev, cfg, args, avged_model, dataloader_dev) time_spent = time.time() - time_now auclist = [] for i in range(len(cfg.num_classes)): y_pred = predlist[i] y_true = true_list[i] fpr, tpr, thresholds = metrics.roc_curve(y_true, y_pred, pos_label=1) auc = metrics.auc(fpr, tpr) auclist.append(auc) auc_summary = np.array(auclist) loss_dev_str = ' '.join( map(lambda x: '{:.5f}'.format(x), summary_dev['loss'])) acc_dev_str = ' '.join( map(lambda x: '{:.3f}'.format(x), summary_dev['acc'])) auc_dev_str = ' '.join(map(lambda x: '{:.3f}'.format(x), auc_summary)) logging.info( '{}, Averaged Model -> Dev, Step : {}, Loss : {}, Acc : {}, Auc : {},' 'Mean auc: {:.3f} ' 'Run Time : {:.2f} sec'.format(time.strftime("%Y-%m-%d %H:%M:%S"), summary_train['step'], loss_dev_str, acc_dev_str, auc_dev_str, auc_summary.mean(), time_spent)) for t in range(len(cfg.num_classes)): summary_writer.add_scalar('dev/loss_{}'.format(dev_header[t]), summary_dev['loss'][t], summary_train['step']) summary_writer.add_scalar('dev/acc_{}'.format(dev_header[t]), summary_dev['acc'][t], summary_train['step']) summary_writer.add_scalar('dev/auc_{}'.format(dev_header[t]), auc_summary[t], summary_train['step']) save_best = False mean_acc = summary_dev['acc'][cfg.save_index].mean() if mean_acc >= best_dict['acc_dev_best']: best_dict['acc_dev_best'] = mean_acc if cfg.best_target == 'acc': save_best = True mean_auc = auc_summary[cfg.save_index].mean() if mean_auc >= best_dict['auc_dev_best']: best_dict['auc_dev_best'] = mean_auc if cfg.best_target == 'auc': save_best = True mean_loss = summary_dev['loss'][cfg.save_index].mean() if mean_loss <= best_dict['loss_dev_best']: best_dict['loss_dev_best'] = mean_loss if cfg.best_target == 'loss': save_best = True if save_best: torch.save( { 'epoch': summary_train['epoch'], 'step': summary_train['step'], 'acc_dev_best': best_dict['acc_dev_best'], 'auc_dev_best': best_dict['auc_dev_best'], 'loss_dev_best': best_dict['loss_dev_best'], 'state_dict': avged_model.state_dict() }, os.path.join(args.save_path, 'best{}.ckpt'.format(best_dict['best_idx']))) best_dict['best_idx'] += 1 if best_dict['best_idx'] > cfg.save_top_k: best_dict['best_idx'] = 1 logging.info('{}, Best, Step : {}, Loss : {}, Acc : {},' 'Auc :{},Best Auc : {:.3f}'.format( time.strftime("%Y-%m-%d %H:%M:%S"), summary_train['step'], loss_dev_str, acc_dev_str, auc_dev_str, best_dict['auc_dev_best'])) torch.save( { 'epoch': cr, 'step': summary_train['step'], 'acc_dev_best': best_dict['acc_dev_best'], 'auc_dev_best': best_dict['auc_dev_best'], 'loss_dev_best': best_dict['loss_dev_best'], 'state_dict': avged_model.state_dict() }, os.path.join(args.save_path, 'train.ckpt'))
class MDAIModel: def __init__(self): root_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "model") with open(os.path.join(root_path, "config/example.json")) as f: cfg = edict(json.load(f)) self.model = Classifier(cfg) self.model.cfg.num_classes = [1, 1, 1, 1, 1, 1] self.model._init_classifier() self.model._init_attention_map() self.model._init_bn() if torch.cuda.is_available(): self.model = self.model.eval().cuda() else: self.model = self.model.eval().cpu() chkpt_path = os.path.join(root_path, "model_best.pt") self.model.load_state_dict( torch.load(chkpt_path, map_location=lambda storage, loc: storage)) def predict(self, data): """ See https://github.com/mdai/model-deploy/blob/master/mdai/server.py for details on the schema of `data` and the required schema of the outputs returned by this function. """ input_files = data["files"] input_annotations = data["annotations"] input_args = data["args"] outputs = [] for file in input_files: if file["content_type"] != "application/dicom": continue ds = pydicom.dcmread(BytesIO(file["content"])) x = ds.pixel_array x_orig = x # preprocess image # convert grayscale to RGB x = cv2.resize(x, (1024, 1024)) x = equalize_adapthist(x.astype(float) / x.max(), clip_limit=0.01) x = cv2.resize(x, (512, 512)) x = x * 2 - 1 x = np.array([[x, x, x]]) x = torch.from_numpy(x).float() if torch.cuda.is_available(): x = x.cuda() else: x = x.cpu() with torch.no_grad(): logits, logit_maps = self.model(x) logits = torch.cat(logits, dim=1).detach().cpu() y_prob = torch.sigmoid(logits - torch.from_numpy(threshs).reshape((1, 6))) y_prob = y_prob.cpu().numpy() x.requires_grad = True y_classes = y_prob >= 0.5 class_indices = np.where(y_classes.astype("bool"))[1] if len(class_indices) == 0: # no outputs, return 'NONE' output type output = { "type": "NONE", "study_uid": str(ds.StudyInstanceUID), "series_uid": str(ds.SeriesInstanceUID), "instance_uid": str(ds.SOPInstanceUID), "frame_number": None, } outputs.append(output) else: for class_index in class_indices: probability = y_prob[0][class_index] gradcam = GradCam(self.model) gradcam_output = gradcam.generate_cam( x, x_orig, class_index) gradcam_output_buffer = BytesIO() gradcam_output.save(gradcam_output_buffer, format="PNG") intgrad = IntegratedGradients(self.model) intgrad_output = intgrad.generate_integrated_gradients( x, class_index, 5) intgrad_output_buffer = BytesIO() intgrad_output.save(intgrad_output_buffer, format="PNG") output = { "type": "ANNOTATION", "study_uid": str(ds.StudyInstanceUID), "series_uid": str(ds.SeriesInstanceUID), "instance_uid": str(ds.SOPInstanceUID), "frame_number": None, "class_index": int(class_index), "data": None, "probability": float(probability), "explanations": [ { "name": "Grad-CAM", "description": "Visualize how parts of the image affects neural network’s output by looking into the activation maps. From _Grad-CAM: Visual Explanations from Deep Networks via Gradient-based Localization_ (https://arxiv.org/abs/1610.02391)", "content": gradcam_output_buffer.getvalue(), "content_type": "image/png", }, { "name": "Integrated Gradients", "description": "Visualize an average of the gradients along the construction of the input towards the decision. From _Axiomatic Attribution for Deep Networks_ (https://arxiv.org/abs/1703.01365)", "content": intgrad_output_buffer.getvalue(), "content_type": "image/png", }, ], } outputs.append(output) return outputs
train=False, transform=transforms.ToTensor()) dataloader = { 'train': torch.utils.data.DataLoader(train_dataset, batch_size=opts.batch_size, shuffle=True), 'test': torch.utils.data.DataLoader(test_dataset, batch_size=opts.batch_size, shuffle=False) } cvae = CVAE(opts.latent_size, device).to(device) dis = Discriminator().to(device) classifier = Classifier(opts.latent_size).to(device) classer = CLASSIFIERS().to(device) print(cvae) print(dis) print(classifier) optimizer_cvae = torch.optim.Adam(cvae.parameters(), lr=opts.lr, betas=(opts.b1, opts.b2), weight_decay=opts.weight_decay) optimizer_dis = torch.optim.Adam(dis.parameters(), lr=opts.lr, betas=(opts.b1, opts.b2), weight_decay=opts.weight_decay) optimizer_classifier = torch.optim.Adam(classifier.parameters(),
train_labels['train_u_ones'] = np.array(fp['train_u_ones'], dtype=np.int8) train_labels['train_u_zeros'] = np.array(fp['train_u_zeros'], dtype=np.int8) train_labels['train_u_random'] = np.array(fp['train_u_random'], dtype=np.int8) np_train_samples = None for i in range(args.chunk_count): with open(f'{args.train_chunks}/chexpert_dset_chunk_{i+1}.npy', 'rb') as f: if np_train_samples is None: np_train_samples = np.load(f) else: np_train_samples = np.concatenate((np_train_samples, np.load(f))) # device = torch.device(f'cuda:{args.gpu}') # load best chexpert model from normal print('loading network: '+ args.saved_model_path) model = Classifier(cfg) #model = DataParallel(model, device_ids=args.gpu).to(device) model = DataParallel(model, device_ids=[args.gpu]).to(device) ckpt = torch.load(args.saved_model_path, map_location=device) model.module.load_state_dict(ckpt['state_dict']) model.cuda() # dataloader_train = DataLoader( ImageDataset([np_train_samples, train_labels], cfg, mode='train'), batch_size=cfg.train_batch_size, num_workers=args.num_workers, drop_last=False, shuffle=False) dataloader_dev_val = DataLoader( ImageDataset([np_dev_val_h5_file, np_dev_val_u_zeros, np_dev_val_u_ones, np_dev_val_u_random], cfg, mode='val'), batch_size=cfg.dev_batch_size, num_workers=args.num_workers, drop_last=False, shuffle=False)