def __init__(self, batch_size, sq_pickle): self._batch_size = batch_size self._loader = data.loader("glove/glove_vocab.pickle", "glove/glove_vec") self._sq_dict = self._loader.load_pickle(sq_pickle) self.index = 0 print "Create SimpleQuestion_Batcher"
def run(dataset='mnist', batch_size=64, n_features=200, n_layers=6, n_bins=4, optimizer='adam', learnrate=1e-4, dropout=0.9, exp_name='pixelCNN', exp_dir='~/experiments/conditional-pixelcnn/', cuda=True, resume=False): exp_name += '_%s_%ifeat_%ilayers_%ibins'%( dataset, n_features, n_layers, n_bins) exp_dir = os.path.join(os.path.expanduser(exp_dir), exp_name) if not os.path.isdir(exp_dir): os.makedirs(exp_dir) # Data loaders train_loader, val_loader, onehot_fcn, n_classes = data.loader(dataset, batch_size) if not resume: # Store experiment params in params.json params = {'batch_size':batch_size, 'n_features':n_features, 'n_layers':n_layers, 'n_bins':n_bins, 'optimizer': optimizer, 'learnrate':learnrate, 'dropout':dropout, 'cuda':cuda} with open(os.path.join(exp_dir,'params.json'),'w') as f: json.dump(params,f) # Model net = model.PixelCNN(1, n_classes, n_features, n_layers, n_bins, dropout) else: # if resuming, need to have params, stats and checkpoint files if not (os.path.isfile(os.path.join(exp_dir,'params.json')) and os.path.isfile(os.path.join(exp_dir,'stats.json')) and os.path.isfile(os.path.join(exp_dir,'last_checkpoint'))): raise Exception('Missing param, stats or checkpoint file on resume') net = torch.load(os.path.join(exp_dir, 'last_checkpoint')) # Define loss fcn, incl. label formatting from input def input2label(x): return torch.squeeze(torch.round((n_bins-1)*x).type(torch.LongTensor),1) loss_fcn = torch.nn.NLLLoss2d() # Train train.fit(train_loader, val_loader, net, exp_dir, input2label, loss_fcn, onehot_fcn, n_classes, optimizer, learnrate=learnrate, cuda=cuda, resume=resume) # Generate some between-class examples generate_between_classes(net, [28, 28], [1, 7], os.path.join(exp_dir,'1-7.jpeg'), n_classes, cuda) generate_between_classes(net, [28, 28], [3, 8], os.path.join(exp_dir,'3-8.jpeg'), n_classes, cuda) generate_between_classes(net, [28, 28], [4, 9], os.path.join(exp_dir,'4-9.jpeg'), n_classes, cuda) generate_between_classes(net, [28, 28], [5, 6], os.path.join(exp_dir,'5-6.jpeg'), n_classes, cuda)
# Download GloVe 6B tokens, 300d word embeddings from # https://nlp.stanford.edu/projects/glove/ # and put them into embeddings_path = './data/glove.6B/glove.6B.300d.txt' # Pick a dataset (uncomment the line you want) # data_name = 'bbcsport-emd_tr_te_split.mat' # data_name = 'twitter-emd_tr_te_split.mat' # data_name = 'r8-emd_tr_te3.mat' data_name = 'amazon-emd_tr_te_split.mat' # data_name = 'classic-emd_tr_te_split.mat' # data_name = 'ohsumed-emd_tr_te_ix.mat' # Pick a seed # 0-4 for bbcsport, twitter, amazon, classic # r8 and ohsumed have a pre-defined train/test splits - just set seed=0 seed = 0 p = 1 data = loader(data_path + data_name, embeddings_path, p=p, K_lda=5) bow_data, y = data['X'], data['y'] topic_proportions = data['proportions'] # (n°doc, n°topics) print("Size Amazon") print(y.shape[0]) bow_train, bow_test, y_train, y_test = train_test_split(bow_data, y) topic_train, topic_test = train_test_split(topic_proportions) C = data['cost_T'] Creduced = C / C.max()
# model model = SSD(opt.n_classes) cfg = model.config model.init_parameters(opt.pretrainedvgg) criterion = MultiBoxLoss() model.cuda() criterion.cuda() cudnn.benchmark = True #print(cfg) #print('') #dataload dataset = data.loader(cfg, opt.augmentation, opt.data_path ,PRNG) print('size of dataset:', len(dataset)) # optimizer optimizer = optim.SGD(model.parameters(), lr=opt.lr, momentum=opt.momentum, weight_decay=opt.weight_decay) def train(): model.train() dataloader = DataLoader(dataset=dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.threads, pin_memory=True) iteration = opt.iter_start os.mkdir(opt.w_file_name) print('training....') while iteration<opt.iter_finish: for input, loc, label in dataloader:
def run( pixelcnn_ckpt, vgg_ckpt=None, adversarial_range=0.2, train_dataset='mnist', test_dataset='emnist', img_size=28, vgg_params={ 'batch_size': 16, 'base_f': 16, 'n_layers': 9, 'dropout': 0.8, 'optimizer': 'adam', 'learnrate': 1e-4 }, exp_name='domain-prior', exp_dir='~/experiments/domain-prior/', cuda=True, resume=False): # Set up experiment directory exp_name += '_%s-to-%s_vgg%i-%i_adv%.2f' % ( train_dataset, test_dataset, vgg_params['n_layers'], vgg_params['base_f'], adversarial_range) exp_dir = os.path.join(os.path.expanduser(exp_dir), exp_name) if not os.path.isdir(exp_dir): os.makedirs(exp_dir) # Train a VGG classifier if not already done if vgg_ckpt is None: train_loader, val_loader, n_classes = data.loader( train_dataset, vgg_params['batch_size']) if not resume: with open(os.path.join(exp_dir, 'vgg_params.json'), 'w') as f: json.dump(vgg_params, f) vgg = model.VGG(img_size, 1, vgg_params['base_f'], vgg_params['n_layers'], n_classes, vgg_params['dropout']) else: vgg = torch.load(os.path.join(exp_dir, 'best_checkpoint')) train.fit(train_loader, val_loader, vgg, exp_dir, torch.nn.CrossEntropyLoss(), vgg_params['optimizer'], vgg_params['learnrate'], cuda, resume=resume) else: vgg = torch.load(vgg_ckpt) pixelcnn = torch.load(pixelcnn_ckpt) pixelcnn_params = os.path.join(os.path.dirname(pixelcnn_ckpt), 'params.json') with open(pixelcnn_params, 'r') as f: pixelcnn_params = json.load(f) n_bins = pixelcnn_params['n_bins'] if cuda: vgg = vgg.cuda() pixelcnn = pixelcnn.cuda() # Run the datasets through the networks and calculate 3 pixelcnn losses: # 1. Average: mean across the image # 2. High-pass filtered: weight by difference to upper- and left- neighbors # 3. Saliency: weight by pixel saliency (vgg backprop-to-input) _, loader, _ = data.loader(train_dataset, 1) print('Calculating losses for ' + train_dataset) dom_avg, dom_hp, dom_sw, dom_sal, dom_var = calc_losses( vgg, pixelcnn, loader, n_bins, cuda) print('Calculating losses for adversarial images') adv_avg, adv_hp, adv_sw, adv_sal, adv_var = adversarial( vgg, pixelcnn, loader, n_bins, adversarial_range, cuda) _, loader, _ = data.loader(test_dataset, 1) print('Calculating losses for ' + test_dataset) ext_avg, ext_hp, ext_sw, ext_sal, ext_var = calc_losses( vgg, pixelcnn, loader, n_bins, cuda) # Loss histograms n_bins = 100 all_losses = np.concatenate((dom_avg, adv_avg, ext_avg, dom_hp, adv_hp, ext_hp, dom_sw, adv_sw, ext_sw)) edges = np.linspace(0, np.percentile(all_losses, 95), n_bins + 1) # average loss vis.histogram(dom_avg, edges, train_dataset + ' average loss', exp_dir) vis.histogram(adv_avg, edges, 'adversarial average loss', exp_dir) vis.histogram(ext_avg, edges, test_dataset + ' average loss', exp_dir) # high-pass weighted loss vis.histogram(dom_hp, edges, train_dataset + ' highpass loss', exp_dir) vis.histogram(adv_hp, edges, 'adversarial highpass loss', exp_dir) vis.histogram(ext_hp, edges, test_dataset + ' highpass loss', exp_dir) # saliency weighted loss vis.histogram(dom_sw, edges, train_dataset + ' saliency loss', exp_dir) vis.histogram(adv_sw, edges, 'adversarial saliency loss', exp_dir) vis.histogram(ext_sw, edges, test_dataset + ' saliency loss', exp_dir) # loss variances loss_variances = np.concatenate((dom_var, adv_var, ext_var)) edges = np.linspace(0, np.percentile(loss_variances, 95), n_bins + 1) vis.histogram(dom_var, edges, train_dataset + ' loss variance', exp_dir) vis.histogram(adv_var, edges, 'adversarial loss variance', exp_dir) vis.histogram(ext_var, edges, test_dataset + ' loss variance', exp_dir) # Calculate epistemic uncertainties for each dataset for each model _, loader, _ = data.loader(train_dataset, 1) dom_class_epi = epistemic(vgg, loader, cuda) adv_class_epi = epistemic_adversarial(vgg, adversarial_range, loader, cuda) _, loader, _ = data.loader(test_dataset, 1) ext_class_epi = epistemic(vgg, loader, cuda) # Classifier uncertainty histograms n_bins = 100 all_class_epi = dom_class_epi + adv_class_epi + ext_class_epi edges = np.linspace(0, np.percentile(all_class_epi, 95), n_bins + 1) vis.histogram(dom_class_epi, edges, train_dataset + ' classifier uncertainty', exp_dir) vis.histogram(adv_class_epi, edges, 'adversarial classifier uncertainty', exp_dir) vis.histogram(ext_class_epi, edges, test_dataset + ' classifier uncertainty', exp_dir) # ROC curves vis.roc(dom_avg, ext_avg, 'out-of-domain: average loss', exp_dir) vis.roc(dom_hp, ext_hp, 'out-of-domain: high-pass filtered loss', exp_dir) vis.roc(dom_sw, ext_sw, 'out-of-domain: saliency-weighted loss', exp_dir) vis.roc(dom_class_epi, ext_class_epi, 'out-of-domain: epistemic uncertainty', exp_dir) vis.roc(dom_avg, adv_avg, 'adversarial: average loss', exp_dir) vis.roc(dom_hp, adv_hp, 'adversarial: high-pass filtered loss', exp_dir) vis.roc(dom_sw, adv_sw, 'adversarial: saliency-weighted loss', exp_dir) vis.roc(dom_class_epi, adv_class_epi, 'adversarial: epistemic uncertainty', exp_dir)
def main(): input( "### Press *Enter* to load .las files with Depth and Gamma Ray tracks ### ... " ) path1 = input( "### Please copy-paste ot type the path to the *fisrt* .las file ### " ) path2 = input( "### Please copy-paste ot type the path to the *second* .las file ### " ) #load data las1, las2 = data.loader(path1, path2) #extend the shorter las las1 = data.las1_extension(las1, las2) #estimate Vshale las1, las2 = data.GR_to_Vshale(las1, las2) #estimate SGR method = "### Enter GR to Vshale conversion method. The options are: 'linear', 'larionov_young', 'larionov_old', 'steiber', 'clavier'. The default method is 'linear' ###..... " SGR = data.SGR_estimator(las1, las2, data.throw(las2), method=get_with_default(str, method, 'linear')) #estimate buoyancy pressure or AFPD for Yielding and Bretan, in psi BP_Y = data.BP_critical_yielding( SGR, data.Zmax_estimator(las2, data.throw(las2))) BP_B = data.BP_critical_bretan(SGR, data.Zmax_estimator(las2, data.throw(las2))) #estimate mercury-air fault permeability using Sperrevik(2002) equation. Zf_0 = "### Enter integer value representing the depth of faulting or Zf (from Sperrevik, 2002) in meters for the first depth point. The default value is *100* meters ###...... " Kf_ma = data.Kf_ma(SGR, data.Zmax_estimator(las2, data.throw(las2)), Zf_0=get_with_default(int, Zf_0, 100)) #estimate Pf_hw - hydrocarbon-water threshold capillary pressure. The liquid pair can be different. However, user should know input parameters #of fluid tension, contact angle and density Y_HW = "### Enter integer value representing a fluid tension for HC and water in dynes/cm. The default value for the light oil is *30* dynes/cm ###...... " Y_MA = "### Enter integer value representing a fluid tension for mercury and air in dynes/cm. The default value is *480* dynes/cm ###...... " THETA_HW = "### Enter integer value representing a contact angle for HC and water, degrees. The default value for the light oil is *30* degrees ###...... " THETA_MA = "### Enter integer value representing a contact angle for mercury and air, degrees. The default value is *40* degrees ###...... " Pf_hw = data.Pf_hw(data.Pf_ma(Kf_ma), Y_HW=get_with_default(int, Y_HW, 30), Y_MA=get_with_default(int, Y_MA, 480), THETA_HW=get_with_default(int, THETA_HW, 30), THETA_MA=get_with_default(int, THETA_MA, 40)) #estimate HC column height in meters using three methods: Sperrevik(2002), Yielding(2012) and Bretan(2003). DEN_WATER = "### Enter float value representing a water density, g/cm^3. The default value is *1.030* g/cm^3 ###...... " DEN_HW = "### Enter float value representing a HC density, g/cm^3. The default value is *0.700* g/cm^3 ###...... " HCCH_S, HCCH_Y, HCCH_B = data.column_height( Pf_hw, BP_Y, BP_B, DEN_WATER=get_with_default(float, DEN_WATER, 1.030), DEN_HW=get_with_default(float, DEN_HW, 0.700)) # making a dataframe input( "Now, let's create a dataframe with X coordinate - Fault throw, m; Y coordinate - Depth, m; and five properties: SGR, Kfma, HCCH_Sperrevikm HCCH_Yielding and HCCH_Bretan... ### Press *Enter* to continue ###" ) df = data.dataframe(data.throw(las2), las2, SGR, Kf_ma, HCCH_S, HCCH_Y, HCCH_B) print(df.tail(10)) #plot input( "Now, let's plot and save five triangle plots in any folder. X - Fault throw, m; Y - Depth, m; and five properties: SGR, Kfma, HCCH_Sperrevikm HCCH_Yielding and HCCH_Bretan... ### Press *Enter* to continue ###" ) output_path = "### Please copy-paste or type a *PATH* to an output folder. Default path is a current folder *./* ### " plot.plotHCCH(df, output_path=get_with_default(str, output_path, './')) plot.plotKf(df, output_path=get_with_default(str, output_path, './')) plot.plotSGR(df, output_path=get_with_default(str, output_path, './')) print( "####################### Thank you for using the Fault Triangle script ##########################" )
import sys import torch from torch.optim import SGD from torch.optim.lr_scheduler import CosineAnnealingLR from torch.utils.tensorboard import SummaryWriter from torch.nn.functional import cross_entropy, log_softmax from torch_scatter import scatter_log_softmax from model import Model, LAYERS, CHANNELS from data import loader, upload BATCH = 64 MOMENTUM = 0.9 WEIGHT_DECAY = 1e-4 TRAIN = loader('../data/train.gz', BATCH) VALIDATE = loader('../data/validate.gz', BATCH) T_MAX = 2000 LR = 0.01 def compute_loss(model, example): nodes, sources, targets, rules, graph, y = upload(example) logits = model(nodes, sources, targets, rules) log_softmax = scatter_log_softmax(logits, graph, dim=0) return -log_softmax[y].mean() def validate(model): loss = 0 count = 0
C = data['cost_T'] Data = data['X'] y = data['y'] k = np.unique(y).shape[0] ## Cluster tf: print("Processing tf-clustering") distInfoFinal, distPvalue = kcluster.kclustertf(k, Data, y) print("DistInfo :", distInfoFinal) print("Dist P-Value:", distPvalue) print("Processing LDA") k_Lda = k p = 1 data_lda = loader(data_path + data_matrix, embeddings_path, p=p, K_lda=k) C_lda = data_lda['cost_T'] Data_lda = data_lda['proportions'] y_lda = data_lda['y'] distInfoFinal, distPvalue = kcluster.kclusterLDA(k, Data_lda, y_lda) print("DistInfo :", distInfoFinal) print("Dist P-Value:", distPvalue) nameWasse = "bbcsportCluster/" nameDist = "ArrayDistExp" nameInfo = "ArrayDistInfoExp" namePvalue = "ArrayPvalExp" print("Processing wassertein") arrDist = np.zeros((25, 25)) arrDistInfo = np.zeros((25, 25))
import vgg19 # Build the model cnn = vgg19.build_model() pretrained = pickle.load(open(os.path.join(pretrained_dir, 'vgg19.pkl'))) lasagne.layers.set_all_param_values(cnn['prob'], pretrained['param values']) # Extract the features def frame_feature(x): return numpy.array( lasagne.layers.get_output(cnn['fc7'], x, deterministic=True).eval()) batch_size = 100 # limit video length to avoid out-of-memory errors dataloader = data.loader() nfiles = len(dataloader.filenames) features_path = os.path.join(os.curdir, os.path.join('framefeatures')) if not os.path.isdir(features_path): os.mkdir(features_path) startAt = 0 # in case this crashes partway through for idx in range(startAt, nfiles): print(('idx %i / %i') % (idx, nfiles)) video, label, group, clip = dataloader.get(idx) filename = 'l' + str(label) + '_g' + str(group) + '_c' + str(clip) + '.pkl' features = numpy.zeros((video.shape[0], 4096), dtype='float32') for f in range(0, video.shape[0], batch_size): batch_idx = range(f, numpy.min([f + batch_size, video.shape[0]])) features[batch_idx] = frame_feature(video[batch_idx]) pickle.dump(features, open(os.path.join(features_path, filename), 'wb'))
parser.add_argument('--d_embed', type=int, default=100) parser.add_argument('--rnn_type', type=str, default='LSTM') parser.add_argument('--d_hidden', type=int, default=1000) parser.add_argument('--n_layers', type=int, default=3) parser.add_argument('--p_dropout', type=float, default=0.2) parser.add_argument('--bptt', type=int, default=35) parser.add_argument('--cuda', action='store_true', default=True) parser.add_argument('--epochs', type=int, default=10) parser.add_argument('--batch_size', type=int, default=32) parser.add_argument('--learning_rate', type=float, default=0.01) parser.add_argument('--log_interval', type=int, default=50) parser.add_argument('--resume', action='store_true', default=False) args = parser.parse_args() if args.train_path: train = data.loader(args.train_path) n_vocab = len(train.vocabulary) if args.resume: with open(args.model_path, 'rb') as f: model = torch.load(f) else: model = model.lm(n_vocab, args.d_embed, args.rnn_type, args.d_hidden, args.n_layers, args.p_dropout) if args.cuda: model.cuda() if args.train_path: if args.valid_path: valid = data.loader(args.valid_path, vocab=train.vocabulary) else: