def process(): #print("use:",layerfile()," info:",open(layerfile().replace(".pickle",".info"),'r').read().rstrip()); N, X, Target = dataset.get() Em, Dm = diff_minimax(layers()) Ep, Dp = perf() print(layerfile(), len(Ep) + len(Dp), ": minimax:", percent(Ep, Dp), "%", "Jall=", layer.J(X, Target, layers()), " ME:", percent(Em, Dm), "%")
def eval(net): data_root="~/dataset/" val_loader = get( batch_size=256, data_root=data_root, train=False, val=True, shuffle=True) acc1, acc5 = eval_model(net,val_loader) print("acc1:{}, acc5:{}".format(acc1,acc5))
def get_latent(args): model = torch.load(args.model[0]) model.eval() latents = [] file_names = [] for data_file in args.data_files: file_names.append(str(os.path.basename(data_file))) seq = torch.FloatTensor([dataset.get(data_file)]) seq_len = [len(s) for s in seq] with torch.no_grad(): mu, ln_var = model.encode(seq, seq_len) latents.append(mu[0].tolist()) return latents, file_names
def get_datasets(self, *dataset_names, use_local=False): datasets = {name: dataset_desc.get(name) for name in dataset_names} if use_local: logger.info('use local dataset') else: # we use remote dataset by default logger.info('use remote dataset') datasets = { name: create_remote_dataset( ds.servable_name, ds.nr_minibatch_in_epoch, ) for name, ds in datasets.items() } return datasets
def main(): # Read world map world_map = imread('world_map2.jpg', as_gray=True) # Set waves dataset ds = get(lat, lon, level) ds = resize_with_extender(ds, world_map.shape, extender=ds.min()) # Form color map plt.axis('off') plt.title('Color map', fontsize=20, pad=30) plt.imshow(world_map, cmap='gray') plt.imshow(ds, alpha=0.7, cmap='gist_heat') plt.colorbar(norm=Normalize(vmin=ds.min(), vmax=ds.max()), orientation='horizontal') plt.savefig(output_path, bbox_inches='tight', pad_inches=0.2) print(f"Congratulations! The color map was created. Check out {output_path}.")
def plot(preprocess): labels, data = dataset.get(subset="train", preprocess=preprocess, categories=categories, verbose=True) labels = np.array(labels) print "Getting TF IDF weights" vec = TfidfVectorizer(max_df=0.5, max_features=10000, min_df=2, stop_words='english', use_idf=True, ngram_range=(1, 1)) X = vec.fit_transform(data) print(repr(X)) print "Reducing dimensions to 50" X_reduced = TruncatedSVD(n_components=50, random_state=0).fit_transform(X) X_embedded = PCA(n_components=2).fit_transform(X_reduced) names = np.unique(labels) print names num_clusters = len(names) fig = plt.figure(frameon=False) colors = iter(cm.Spectral(np.linspace(0, 1, num_clusters))) for name in names: X = X_embedded[labels == name] plt.scatter(X[:, 0], X[:, 1], marker='x', label=name) plt.title("PCA (Preprocessed)" if preprocess else "PCA") plt.xticks([]) plt.yticks([]) plt.legend()
def learn(nhidden): X, Target = dataset.get(50) N = [] # input layer N.append(X.shape[0]) # hidden layer(s) N.append(nhidden) # output layer N.append(Target.shape[0]) T = X.shape[1] # init layers = [] for i in range(1, len(N)): layers.append( layer.Layer(N[i - 1], N[i], layer.activation('sigma'), 0.001)) dirname = "{}/{}".format("results", nhidden) if not os.path.exists(dirname): os.makedirs(dirname) scores = [] iter = 0 while not scores or (decreasing(scores) and iter < 200): scores.append(layer.J(X, Target, layers)) if iter % 50 == 0: pfile = "{}/{}.pickle".format(dirname, iter) pickle.dump(layers, open(pfile, 'wb')) ifile = "{}/{}.info".format(dirname, iter) open(ifile + ".info", 'w').write("J=" + str(scores[-1]) + "\n") print("J=", scores[-1], pfile, ifile) layer.learn(X, Target, layers) iter = iter + 1 print("J=", scores[-1]) return scores[-1]
def __init__(self, params): """ Initialize trainer. """ self.params = params # epoch / iteration size assert isinstance(config.epoch_size, int) assert config.epoch_size >= 1 self.epoch_size = config.epoch_size # network and criterion net, criterion = model.get() self.net = net self.criterion = criterion # data iterators self.iterators = {} train_iter, valid_iter, SRC_TEXT, TGT_TEXT = dataset.get(params) self.iterators["train"] = train_iter self.iterators["valid"] = valid_iter self.num_train = len(train_iter) self.SRC_TEXT = SRC_TEXT self.TGT_TEXT = TGT_TEXT # Multi-GPU assert config.amp >= 1 or not config.fp16 if config.multi_gpu and config.fp16 == False: logger.info("Using nn.parallel.DistributedDataParallel ...") self.net = nn.parallel.DistributedDataParallel( self.net, device_ids=[params.local_rank], output_device=params.local_rank ) # set optimizers self.opt = optimizer.get(self.net) # Float16 / distributed if config.fp16: self.init_amp() if config.multi_gpu: logger.info("Using apex.parallel.DistributedDataParallel ...") self.net = apex.parallel.DistributedDataParallel(self.net, delay_allreduce=True) # validation metrics self.best_metrics = {} for k in config.valid_metrics.keys(): factor = config.valid_metrics[k] self.best_metrics[k] = [config.init_metric * factor, factor] # early stopping metrics self.early_stopping_metrics = {} for k in self.best_metrics: self.early_stopping_metrics[k] = self.best_metrics[k] self.decrease_counts = 0 self.decrease_counts_max = config.decrease_counts_max self.stopping_criterion = config.stopping_criterion if config.multi_gpu: self.should_terminate = torch.tensor(0).byte() self.should_terminate = self.should_terminate.cuda() else: self.should_terminate = False assert ( self.stopping_criterion in self.best_metrics ) or ( self.stopping_criterion is None ) # training statistics self.epoch = 0 self.n_iter = 0 self.n_total_iter = 0 self.n_sentences = 0 self.stats = OrderedDict( [('processed_s', 0), ('processed_w', 0)] + [('MT-%s-%s-loss' % (config.SRC_LAN, config.TGT_LAN), [])] + [('MT-%s-%s-ppl' % (config.SRC_LAN, config.TGT_LAN), [])] ) self.last_time = time.time() # reload potential checkpoints self.reload_checkpoint(network_only=config.reload_network_only)
# Author: Marius Maaland # Jonas Palm from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer from sklearn.linear_model import LogisticRegression from sklearn import metrics import numpy as np import dataset preprocess = True #("emails", "headers") y_train, X_train_data = dataset.get(subset="train", preprocess=preprocess, verbose=True) y_test, X_test_data = dataset.get(subset="test", preprocess=preprocess) VEC_MAX_DF = 1.0 VEC_MIN_DF = 1 VEC_STOP_WORDS = 'english' def print_dominant_words(vec, n): X_train = vec.fit_transform(X_train_data) clf = LogisticRegression() clf.fit(X_train, y_train) labels = np.unique(y_train) coefs = clf.coef_.argsort()[:, ::-1] terms = vec.get_feature_names() for i in range(len(labels)):
from time import time import dataset #print "Loading features" def make_string_label_dict(unique_string_labels): label_dict = dict() for i in range(unique_string_labels.size): label_dict[unique_string_labels[i]] = i return label_dict # trunc_label specifies whether to truncate the label # to the least common denominator for each usenet group labels, data = dataset.get(truncate_label=False) datapoints = len(data) #print "Number of datapoints: ", datapoints unique_labels, _ = np.unique(labels, return_inverse=True) #print "- Labels:", unique_labels # Create a dictionary with enumerated label names label_dict = make_string_label_dict(unique_labels) #print label_dict # true_k holds the true number of clusters true_k = np.unique(labels).shape[0] # Calculate and print metrics to assess k-means def assess_birch(estimator, num_clusters, data, labels): t0 = time() estimator.fit(data)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--task', required=True) parser.add_argument('--model', required=True) parser.add_argument('--preprocess-train-frac', type=float) parser.add_argument('--preprocess-test-frac', type=float) parser.add_argument('--model-train-frac', type=float) parser.add_argument('--model-valid-frac', type=float) parser.add_argument('--result-dir', default='./result') parser.add_argument('--n-trials', type=int) parser.add_argument('--n-jobs', type=int, default=1) parser.add_argument('--seed', type=int, default=1) args = parser.parse_args() logger = autogbt.logging.get_logger() logger.info(args) model = args.model task = args.task n_trials = args.n_trials seed = args.seed n_jobs = args.n_jobs model_train_frac = args.model_train_frac model_valid_frac = args.model_valid_frac with open('../.git/refs/heads/master') as fp: commit = next(fp).strip() res_dir = Path(args.result_dir) / commit res_dir.mkdir(parents=True, exist_ok=True) name = '-'.join(map(str, [model, task, n_trials, model_train_frac, seed])) result_path = res_dir / ('%s.csv' % (name)) if result_path.exists(): return res = [] logger.info('load dataset %s' % task) logger.info('model %s' % model) cv = KFold(n_splits=5, shuffle=True, random_state=seed) train_X, train_y, test_X = dataset.get(task) start = time.time() prep = autogbt.Preprocessor( train_frac=args.preprocess_train_frac, test_frac=args.preprocess_test_frac, random_state=seed, ) train_X, valid_X, train_y = prep.transform(train_X, test_X, train_y) if model == 'auto': sampler = TrainDataSampler( train_frac=model_train_frac, valid_frac=model_valid_frac, random_state=seed, ) est = autogbt.AutoGBTClassifier( n_trials=n_trials, sampler=sampler, n_jobs=n_jobs, cv=cv, random_state=seed, ) est.fit(train_X, train_y) score = est.best_score else: n_trials = 1 model_train_frac = 1.0 model_valid_frac = 1.0 if model == 'xgb': import xgboost as xgb est = xgb.XGBClassifier(n_jobs=n_jobs, random_state=seed) pred = cross_val_predict(est, train_X, train_y, cv=cv, method='predict_proba')[:, 1] score = roc_auc_score(train_y, pred) if model == 'lgb': import lightgbm as lgb est = lgb.LGBMClassifier(n_jobs=n_jobs, random_state=seed) pred = cross_val_predict(est, train_X, train_y, cv=cv, method='predict_proba')[:, 1] score = roc_auc_score(train_y, pred) end = time.time() duration = end - start logger.info('CV AUC: %.6f' % score) res = pd.DataFrame([[ task, model, n_trials, args.preprocess_train_frac, args.preprocess_test_frac, model_train_frac, model_valid_frac, duration, score, commit, ]], columns=[ 'dataset', 'model', 'n_trials', 'preprocess_train_frac', 'preprocess_test_frac', 'model_train_frac', 'model_valid_frac', 'duration[s]', 'CV AUC', 'commit', ]) res.to_csv(result_path, index=False)
def test_incremental(): from common import config import model from utils import get_batch net, _ = model.get() net.eval() ckpt = torch.load("checkpoints/checkpoint_best_ppl.pth", map_location='cpu') # reload model parameters s_dict = {} for k in ckpt["net"]: new_k = k[7:] s_dict[new_k] = ckpt["net"][k] net.load_state_dict(s_dict) import dataset train_iter, _, SRC_TEXT, TGT_TEXT = dataset.get() #data_iter = iter(train_iter.get_iterator(True, True)) #raw_batch = next(data_iter) src = np.arange(4, 4+2000).reshape(80, 25) tgt = np.arange(4, 4+2400).reshape(80, 30) raw_batch = dataset.Batch( torch.from_numpy(src).long(), torch.from_numpy(tgt).long() ) batch = get_batch( raw_batch.src, raw_batch.tgt, SRC_TEXT.vocab, TGT_TEXT.vocab ) for k, v in batch.items(): try: print(k, v.size()) except AttributeError: pass with torch.no_grad(): enc_out = net.encode(src=batch['src'], src_mask=batch['src_mask']) # No incremental logits1 = net.decode(enc_out, batch['src_mask'], batch['tgt'], batch['tgt_mask']) logits1 = net.generator(logits1, log_prob=True) # Incremental print("Incremental encoding finished!") tlen = batch['tgt'].size(1) cache = {'cur_len':0} logits2 = [] for i in range(tlen): x = batch['tgt'][:, i].unsqueeze(-1) logit = net.decode( enc_out, batch['src_mask'], x, batch['tgt_mask'][:, i, :(i+1)].unsqueeze(-2), cache ) logit = net.generator(logit, log_prob=True) if i >= 0: ref = logits1[:, i, :] sys = logit.squeeze() ref_words = torch.topk(ref, 1)[1].squeeze() sys_words = torch.topk(sys, 1)[1].squeeze() print("Diff = {}".format(torch.sum(ref - sys).item())) print("Logits sys size : {}, Logits sys : {}".format(sys.size(), sys.sum().item())) print("Logits ref size : {}, Logits ref : {}".format(ref.size(), ref.sum().item())) if (ref_words == sys_words).all() == False: print("F**k!") print("\n") logits2.append(logit) cache['cur_len'] = i + 1 logits2 = torch.cat(logits2, dim=1).contiguous() print("Logits1: {}".format(torch.sum(logits1).item())) print("Logits2: {}".format(torch.sum(logits2).item()))
import pandas as pd import dataset src = dataset.uci_root / 'abalone' dst = dataset.data_folder / 'dataset1' if __name__ == '__main__': dataset.get(src / 'abalone.data', dst / 'abalone.data') dataset.get(src / 'abalone.names', dst / 'abalone.names') df = (pd .read_csv(dst / 'abalone.data', header=None) .pipe(lambda x: x[x[0].isin({'F', 'M'})])) X = df[[1, 2, 3, 4, 5, 6, 7]].values t = (df[0] == 'M').values y = df[8] <= 10 dataset.save('dataset1', X, t, y)
def test_dropout(wdecay, lr, route_iter, model_name='dynamic_capsules', epoch_stuff=[30, 60], reconstruct=False, loss_weights=None, exp=False, model_to_test=None, res=False, dropout=0.5): out_dirs = [] out_dir_meta = '../experiments/' + model_name + '_' + str(route_iter) num_epochs = epoch_stuff[1] if model_to_test is None: model_to_test = num_epochs - 1 epoch_start = 0 if exp: dec_after = ['exp', 0.96, epoch_stuff[0], 1e-6] else: dec_after = ['step', epoch_stuff[0], 0.1] lr = lr criterion = 'margin' criterion_str = criterion n_classes = 10 save_after = 10 init = False pre_pend = 'mnist' strs_append_list = [ 'reconstruct', reconstruct, 'shift', criterion_str, init, 'wdecay', wdecay, num_epochs ] + dec_after + lr + [dropout] if loss_weights is not None: strs_append_list = strs_append_list + ['lossweights'] + loss_weights strs_append = '_' + '_'.join([str(val) for val in strs_append_list]) out_dir_train = os.path.join(out_dir_meta, pre_pend + strs_append) final_model_file = os.path.join(out_dir_train, 'model_' + str(num_epochs - 1) + '.pt') print out_dir_train if os.path.exists(final_model_file): print 'skipping', final_model_file raw_input() model_file = None batch_size = 256 batch_size_val = 256 num_workers = 0 data_transforms = {} data_transforms['train'] = transforms.Compose([ transforms.RandomCrop(28, padding=2), transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) data_transforms['val'] = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) train_data = dataset.get( 'mnist', dict(dir_data='../data/mnist_downloaded', train=True, transform=data_transforms['train'])) test_data = dataset.get( 'mnist', dict(dir_data='../data/mnist_downloaded', train=False, transform=data_transforms['val'])) train_dataloader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers) test_dataloader = torch.utils.data.DataLoader(test_data, batch_size=batch_size_val, shuffle=False, num_workers=num_workers) network_params = dict(n_classes=n_classes, r=route_iter, init=init, reconstruct=reconstruct, loss_weights=loss_weights, dropout=dropout) util.makedirs(out_dir_train) train_params = dict(out_dir_train=out_dir_train, train_data=train_data, test_data=test_data, batch_size=batch_size, batch_size_val=batch_size_val, num_epochs=num_epochs, save_after=save_after, disp_after=1, plot_after=100, test_after=1, lr=lr, dec_after=dec_after, model_name=model_name, criterion=criterion, gpu_id=0, num_workers=0, model_file=model_file, epoch_start=epoch_start, network_params=network_params, weight_decay=wdecay) test_params = dict(out_dir_train=out_dir_train, model_num=model_to_test, train_data=train_data, test_data=test_data, gpu_id=0, model_name=model_name, batch_size_val=batch_size_val, criterion=criterion, network_params=network_params) print train_params param_file = os.path.join(out_dir_train, 'params.txt') all_lines = [] for k in train_params.keys(): str_print = '%s: %s' % (k, train_params[k]) print str_print all_lines.append(str_print) util.writeFile(param_file, all_lines) train_model_recon(**train_params)
misc.ensure_dir(args.logdir) print("=================FLAGS==================") for k, v in args.__dict__.items(): print('{}: {}'.format(k, v)) print("========================================") # seed args.cuda = torch.cuda.is_available() torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) # data loader and model train_loader, test_loader = dataset.get(batch_size=args.batch_size, num_workers=1, num_samples=args.num_samples, data_augment=args.data_augment, validation=args.validation) if args.validation or (args.num_samples != 500): Ntrain = len(train_loader.sampler.indices) else: Ntrain = len(train_loader.dataset) if args.validation: Ntest = len(test_loader.sampler.indices) else: Ntest = len(test_loader.dataset) model = model.stl10(n_channel=args.channel) model = torch.nn.DataParallel(model, device_ids=range(args.ngpu))
#Deltas[0, -50, 50], #sigma[20, 0.001, 100], #alpha[-1.5, -10, 0], n[1.5, 0.1, 10] )""") ## Categories catTitleCut = { 'highR9_eb' : ('high R9 barrel' , 'subdet == subdet::Barrel && r9 == r9::High'), 'highR9_ee' : ('high R9 endcaps', 'subdet == subdet::Endcaps && r9 == r9::High'), 'lowR9_eb' : ('low R9 barrel' , 'subdet == subdet::Barrel && r9 == r9::Low' ), 'lowR9_ee' : ('low R9 endcaps' , 'subdet == subdet::Endcaps && r9 == r9::Low' ), } ## Get data data = dataset.get( tree = esChains.getChains('v4')['data'], variable = x, weight = w, categories = myCategories ) data.SetName('realData') data.SetTitle('scale real data 750/pb') ws1.Import(data) ## Get data in categories realData = {} for cat, (title, cut) in catTitleCut.items(): realData[cat] = data.reduce( Cut(cut), Name('data_' + cat), Title(title) ) ## Get MC w.SetTitle('pileup.weightOOT') data = dataset.get( tree = esChains.getChains('v4')['z'], variable = x, weight = w )
#Deltas[0, -50, 50], #sigma[20, 0.001, 100], #alpha[-1.5, -10, 0], n[1.5, 0.1, 10] )""") ## Categories catTitleCut = { 'highR9_eb' : ('high R9 barrel' , 'subdet == subdet::Barrel && r9 == r9::High'), 'highR9_ee' : ('high R9 endcaps', 'subdet == subdet::Endcaps && r9 == r9::High'), 'lowR9_eb' : ('low R9 barrel' , 'subdet == subdet::Barrel && r9 == r9::Low' ), 'lowR9_ee' : ('low R9 endcaps' , 'subdet == subdet::Endcaps && r9 == r9::Low' ), } ## Get data data = dataset.get( tree = esChains.getChains('v4')['data'], variable = x, weight = w ) data.SetName('realData') data.SetTitle('scale real data 750/pb') ws1.Import(data) ## Get data in categories realData = {} for cat, (title, cut) in catTitleCut.items(): realData[cat] = data.reduce( Cut(cut), Name('data_' + cat), Title(title) ) ## Get MC w.SetTitle('pileup.weightOOT') data = dataset.get( tree = esChains.getChains('v4')['z'], variable = x, weight = w )
def __init__(self, params): """ Initialize trainer. """ self.params = params # Initialize tensorboard writer train_log = SummaryWriter( os.path.join(config.tensorboard_log_path, "log", "train")) valid_log = SummaryWriter( os.path.join(config.tensorboard_log_path, "log", "valid")) self._tensorboard = TensorboardWriter(train_log, valid_log) # epoch / iteration size assert isinstance(config.epoch_size, int) assert config.epoch_size >= 1 self.epoch_size = config.epoch_size # network and criterion net, criterion = model.get() self.net = net self.criterion = criterion # data iterators self.iterators = {} train_iter, valid_iter, SRC_TEXT, TGT_TEXT = dataset.get() self.iterators["train"] = train_iter self.iterators["valid"] = valid_iter self.num_train = len(train_iter) self.SRC_TEXT = SRC_TEXT self.TGT_TEXT = TGT_TEXT # Multi-GPU if config.multi_gpu: logger.info("Using nn.parallel.DistributedDataParallel ...") self.net = nn.parallel.DistributedDataParallel( self.net, device_ids=[params.local_rank], output_device=params.local_rank) """ self.criterion = nn.parallel.DistributedDataParallel( self.criterion, device_ids=[params.local_rank], output_device=params.local_rank ) """ # set optimizers self.opt = optimizer.get(self.net) # validation metrics self.best_metrics = {} for k in config.valid_metrics.keys(): factor = config.valid_metrics[k] self.best_metrics[k] = [config.init_metric * factor, factor] # training statistics self.epoch = 0 self.n_iter = 0 self.n_total_iter = 0 self.n_sentences = 0 self.stats = OrderedDict([('processed_s', 0), ('processed_w', 0)] + [('MT-%s-%s-loss' % (config.SRC_LAN, config.TGT_LAN), [])] + [('MT-%s-%s-ppl' % (config.SRC_LAN, config.TGT_LAN), [])]) self.last_time = time.time() # reload potential checkpoints self.reload_checkpoint()
def evaluate_kmeans(): """ run kmeans implementation on dataset """ import dataset from sklearn.feature_extraction.text import TfidfVectorizer def kmeans_args(k): return { "KM": { "n_clusters": k, "init": "k-means", "minibatch": False }, "KM++": { "n_clusters": k, "init": "k-means", "minibatch": False }, "MBKM": { "n_clusters": k, "init": "k-means++", "minibatch": True }, "MBKM++": { "n_clusters": k, "init": "k-means++", "minibatch": True }, } # get all unique categories in the dataset and shuffle the order labels, _ = dataset.get(subset="all") categories = np.unique(labels) np.random.shuffle(categories) print categories names = ["K"] for name in kmeans_args(2): names.append(name + " (mr)") names.append(name + " (time)") names.append(name + " (it)") print ", ".join(names) for k in range(2, 21): n = 0 args = kmeans_args(k) # select k first categories from the list of all categories we # prepared above y, Xdata = dataset.get(categories=categories[:k], subset="all") vec = TfidfVectorizer(max_df=0.5, max_features=1000, min_df=2, stop_words="english", use_idf=True) X = vec.fit_transform(Xdata) print "{},".format(k), for name in args: n += 1 km = KMeans(**args[name]) km.fit(X) mr = mistake_rate(km, k, y) time = km.avg_time iters = km.avg_iterations str = "{:.2f}, {:.2f}, {:.2f}".format(mr, time, iters) if n != len(args): str += "," print str, sys.stdout.flush() print ""
#!/usr/bin/env mdl from megbrain.config import set_default_device from megskull.graph import Function from neupeak.utils.cli import load_network import dataset import cv2 import numpy as np set_default_device('cpu0') net = load_network( '/home/zhaojing/vehicle_pose/config/xception145/train_log/models/latest') classify = Function().compile(net.outputs[0]) test_dataset = dataset.get('test') x = test_dataset.get_epoch_minibatch_iter() correct = [0, 0] total_label = [0, 0] total_pred = [0, 0] for data in x: out = classify(data.data) #total += data.label.size for i in range(0, data.label.size): total_pred[out[i].argmax()] += 1 total_label[data.label[i]] += 1 if out[i].argmax() == data.label[i]: correct[data.label[i]] += 1 accuracy = [0, 0]
import pandas as pd import dataset src = dataset.uci_root / 'adult' dst = dataset.data_folder / 'dataset2' if __name__ == '__main__': dataset.get(src / 'adult.data', dst / 'adult.data') dataset.get(src / 'adult.test', dst / 'adult.test') dataset.get(src / 'adult.names', dst / 'adult.names') df = (pd.concat([ pd.read_csv(dst / 'adult.data', header=None), pd.read_csv(dst / 'adult.test', header=None, skiprows=1) ])) higher_ed = { ' Assoc-acdm', ' Assoc-voc', ' Bachelors', ' Doctorate', ' Masters', ' Some-college' } high_income = {' >50K', ' >50K.'} X = pd.get_dummies(df[[0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]]) t = df[3].isin(higher_ed) y = df[14].isin(high_income) dataset.save('dataset2', X, t, y)
args.ngpu = len(args.gpu) # logger misc.ensure_dir(args.loaddir) misc.ensure_dir(args.savedir) print("=================FLAGS==================") for k, v in args.__dict__.items(): print('{}: {}'.format(k, v)) print("========================================") args.cuda = torch.cuda.is_available() torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) train_loader, test_loader = dataset.get(batch_size=args.batch_size, data_root=args.data_root, num_workers=4) algo = {'fgsm': fgsm_gt, 'bim': ifgsm_gt, 'pgd': pgd_gt, 'wrm': wrm_gt} # attack_algo = algo[args.attack_algo] attack_algo = algo[args.attack_algo] if args.attack_algo is not None else None defend_algo = algo[args.defend_algo] if args.defend_algo is not None else None defend_name = "None" if args.defend_algo is None else args.defend_algo if args.prune_algo == "l0proj": prune_algo = l0proj elif args.prune_algo is None: prune_algo = None elif args.prune_algo == "baseline": prune_algo = l0proj
# logger misc.ensure_dir(args.logdir) print("=================FLAGS==================") for k, v in args.__dict__.items(): print('{}: {}'.format(k, v)) print("========================================") # seed args.cuda = torch.cuda.is_available() torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) # data loader and model train_loader, test_loader = dataset.get(batch_size=args.batch_size, num_workers=1) model = model.stl10(n_channel=args.channel) model = torch.nn.DataParallel(model, device_ids=range(args.ngpu)) if args.cuda: model.cuda() # optimizer optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd) decreasing_lr = list(map(int, args.decreasing_lr.split(','))) print('decreasing_lr: ' + str(decreasing_lr)) best_acc, old_file = 0, None t_begin = time.time() try: # ready to go for epoch in range(args.epochs): model.train()
if args.premodel: model.load_weights(args.premodel) #train adam = optimizers.Adam(lr=args.lr) model.compile(optimizer=adam, loss='mean_squared_error', metrics=[true_num, pred_num, mae, mse]) # serialize model to JSON model_json = model.to_json() with open("./logs/model.json", "w") as json_file: json_file.write(model_json) checkpoint = ModelCheckpoint('./logs/models/weights_{epoch:02d}.hdf5', verbose=1, save_best_only=False, save_weights_only=True, mode='auto', period=1) data_generator = get('train') valid_data = get_test('test') model.fit_generator(data_generator, validation_data=valid_data, steps_per_epoch=config.per_epoch, epochs=config.nr_epoch, callbacks=[checkpoint])
def init_data(data_files, device): data_set = [] for file_name in data_files: data_set.append(torch.FloatTensor(dataset.get(file_name)).to(device)) return data_set