def nested_sampling(self, angle_times, save_path, filename, dynamic=False): """Runs nested sampling on simulated data of the sample. Args: angle_times (list): points and times for each angle to simulate. save_path (str): path to directory to save corner plot to. filename (str): file name to use when saving corner plot. dynamic (bool): whether to use static or dynamic nested sampling. """ # Simulate data for the sample. model, data = simulate(self.structure, angle_times) # The structure was defined in refnx. if isinstance(self.structure, refnx.reflect.Structure): dataset = refnx.reflect.ReflectDataset( [data[:, 0], data[:, 1], data[:, 2]]) objective = refnx.anaylsis.Objective(model, dataset) # The structure was defined in Refl1D. elif isinstance(self.structure, refl1d.model.Stack): objective = bumps.fitproblem.FitProblem(model) # Otherwise, the structure is invalid. else: raise RuntimeError('invalid structure given') # Sample the objective using nested sampling. sampler = Sampler(objective) fig = sampler.sample(dynamic=dynamic) # Save the sampling corner plot. save_path = os.path.join(save_path, self.name) save_plot(fig, save_path, filename + '_nested_sampling')
def sample(self, num: int = None, length: int = None) -> List[str]: if num is None: num = self.hparams.bs if length is None: length = self.hparams.seq_len dataset: StarTrekCharGenerationDataset = self.datasets[Splits.train] token_start = dataset.vocab.stoi[dataset.vocab.start] x = torch.from_numpy(np.array([token_start] * num)).long() x = x.reshape(num, 1) x = x.to(self.device) self.net.eval() with torch.no_grad(): states = None history = [x] for _ in tqdm(range(length), disable=(not self.hparams.verbose)): n_look_back = length if self.hparams.model == "tcn" else 1 inputs = torch.cat(history[-n_look_back:], dim=-1) logits, states = self.net(inputs, states) next_logits = logits[:, -1, :] history.append(Sampler.temperature(next_logits)) history = history[1:] # Omit start tokens outputs = torch.stack(history).squeeze().transpose(0, 1) outputs = outputs.cpu() return [dataset.sequence_to_text(outputs[i, :]) for i in range(num)]
def nested_sampling(self, angle_times, save_path, filename, dynamic=False): """Runs nested sampling on simulated data of the sample. Args: angle_times (list): points and counting times for each measurement angle to simulate. save_path (str): path to directory to save corner plot to. filename (str): name of file to save corner plot to. dynamic (bool): whether to use static or dynamic nested sampling. """ objective = bumps.fitproblem.FitProblem(self.experiment) # Sample the objective using nested sampling. sampler = Sampler(objective) fig = sampler.sample(dynamic=dynamic) # Save the sampling corner plot. save_path = os.path.join(save_path, self.name) save_plot(fig, save_path, 'nested_sampling_' + filename)
def nested_sampling(self, contrasts, angle_times, save_path, filename, underlayers=None, dynamic=False): """Runs nested sampling on simulated data of the lipid sample. Args: contrasts (list): SLDs of contrasts to simulate. angle_times (list): points and times for each angle to simulate. save_path (str): path to directory to save corner plot to. filename (str): file name to use when saving corner plot. underlayers (list): thickness and SLD of each underlayer to add. dynamic (bool): whether to use static or dynamic nested sampling. """ # Create objectives for each contrast to sample with. objectives = [] for contrast in contrasts: # Simulate an experiment using the given contrast. sample = self._using_conditions(contrast, underlayers) model, data = simulate(sample, angle_times, scale=1, bkg=5e-6, dq=2) dataset = refnx.dataset.ReflectDataset([data[:,0], data[:,1], data[:,2]]) objectives.append(refnx.analysis.Objective(model, dataset)) # Combine objectives into a single global objective. global_objective = refnx.analysis.GlobalObjective(objectives) # Exclude certain parameters if underlayers are being used. if underlayers is None: global_objective.varying_parameters = lambda: self.params else: global_objective.varying_parameters = lambda: self.underlayer_params # Sample the objective using nested sampling. sampler = Sampler(global_objective) fig = sampler.sample(dynamic=dynamic) # Save the sampling corner plot. save_path = os.path.join(save_path, self.name) save_plot(fig, save_path, 'nested_sampling_'+filename)
def main(args): # build search space data = load_data(args.dataset, args.seed) ss, _ = pruning_search_space_by_eda(data) if data.setting == 'inductive': trainer = InductiveTrainer() else: trainer = TransductiveTrainer() sampler = Sampler(args.dataset, ss) archs = [] val_scores = [] test_scores = [] # init training data for GBDT sampled_archs = sampler.sample(3000) i = 0 while i < len(sampled_archs): arch = sampled_archs[i] data = sampler.load_data(arch) try: model = sampler.build_model(arch, data.x.shape[1], int(max(data.y)) + 1) trainer.init_trainer(model, arch[7], arch[6]) val_score = trainer.train(data) test_score = trainer.test(data) except RuntimeError as e: if "cuda" in str(e) or "CUDA" in str( e): # CUDA OOM, sample another arch print(e) sampled_archs += sampler.sample(1) i += 1 continue else: raise e archs.append(arch) val_scores.append(val_score) test_scores.append(test_score) print(arch, f'real val score: {val_score} | real test score: {test_score}') print(f'Number of evaluated archs: {len(archs)}') i += 1 if i % 500 == 0: print(f'Round {i // 500} | best test score: {max(test_scores)}') if i >= 2000: break
def main(device): parser = argparse.ArgumentParser("LINE training") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() logger, log_path = create_logger(cfg) logger.info(cfg) graph = networkx.read_edgelist(cfg.DATA.GRAPH_PATH, create_using=networkx.DiGraph(), nodetype=None, data=[('weight', int)]) model = LINE(graph, cfg).to(device) v2ind, ind2v = model.get_mapping() sampler = Sampler(graph, v2ind, batch_size=cfg.SAMPLE.BATCHSIZE) criterion = KLLoss() optimizer = torch.optim.Adam(model.parameters(), lr=cfg.WORD2VEC.LR) train(sampler, model, cfg, criterion, optimizer, device) embedding = model.get_embedding() eval_embedding(embedding, cfg.DATA.LABEL_PATH, logger) vis_embedding(embedding, cfg.DATA.LABEL_PATH, log_path)
def bayes(times, angle_splits, save_path): with open(os.path.join(save_path, 'YIG_sample', 'bayes.csv'), 'w') as file: for time in times: angle_times = [(angle, points, time * split) for angle, points, split in angle_splits] sample = SampleYIG(vary=False) sample.Pt_thick.range(0, 0.2) models, datasets = simulate_magnetic(test(sample, 0.01638), angle_times, scale=1, bkg=5e-7, dq=2, pp=True, pm=False, mp=False, mm=True) mm = models[0].probe.xs[0] pp = models[1].probe.xs[3] probe = refl1d.probe.PolarizedQProbe(xs=(mm, None, None, pp), name='Probe') experiment = refl1d.experiment.Experiment(sample=sample.structure, probe=probe) sampler = Sampler(bumps.fitproblem.FitProblem(experiment)) logz_1 = sampler.sample(verbose=False, return_evidence=True) sample = SampleYIG(vary=False) sample.Pt_mag.value = 0 sample.Pt_thick.range(0, 0.2) experiment = refl1d.experiment.Experiment(sample=sample.structure, probe=probe) sampler = Sampler(bumps.fitproblem.FitProblem(experiment)) logz_2 = sampler.sample(verbose=False, return_evidence=True) factor = 2 * (logz_1 - logz_2) print(factor) file.write('{0},{1}\n'.format(time, factor))
import os, sys sys.path.append(os.getcwd()) from utils import Sampler import h5py import numpy as np import json max_step = 5 seg_len = 128 mel_band = 80 lin_band = 513 n_samples = 2000000 dset = 'train' if __name__ == '__main__': if len(sys.argv) < 3: print( 'usage: python3 make_single_samples.py [in_h5py_path] [out_json_path]' ) exit(0) sampler = Sampler(sys.argv[1], max_step=max_step, seg_len=seg_len, dset=dset) samples = [sampler.sample_single()._asdict() for _ in range(n_samples)] with open(sys.argv[2], 'w') as f_json: json.dump(samples, f_json, indent=4, separators=(',', ': '))
def main(args): # build search space data = load_data(args.dataset, args.seed) ss, _ = pruning_search_space_by_eda(data) if data.setting == 'inductive': trainer = InductiveTrainer() else: trainer = TransductiveTrainer() sampler = Sampler(args.dataset, ss) archs = [] val_scores = [] top_archs = [] top_val_scores = [] top_test_scores = [] # init training data for GBDT sampled_archs = sampler.sample(args.n) i = 0 while i < len(sampled_archs): arch = sampled_archs[i] data = sampler.load_data(arch) try: model = sampler.build_model(arch, data.x.shape[1], int(max(data.y)) + 1) trainer.init_trainer(model, arch[7], arch[6]) val_score = trainer.train(data) except RuntimeError as e: if "cuda" in str(e) or "CUDA" in str(e): # CUDA OOM, sample another arch print(e) sampled_archs += sampler.sample(1) i += 1 continue else: raise e archs.append(arch) val_scores.append(val_score) print(arch, f'real val score: {val_score}') print(f'Number of evaluated archs: {len(archs)}') i += 1 # train GBDT predictor for iter_round in range(1, args.iterations + 1): print(f'Iteration round {iter_round}, ReTraining model and sampling archs...', datetime.now().strftime("%Y-%m-%d %H:%M:%S")) # train GBDT X = [[str(e) for e in row] for row in archs] y = np.array(val_scores) train_pool = Pool(X, y, cat_features=[i for i in range(len(X[0]))]) # X = lgb.Dataset(pd.DataFrame(X, columns=ss.keys()), label=np.array(val_scores)) # gbdt_model = lgb.train(gbdt_params, X, args.gbdt_num_boost_round, categorical_feature=ss.keys()) gbdt_model = CatBoostRegressor( learning_rate=args.gbdt_lr, verbose=False ) gbdt_model.fit(train_pool) # pruning search space ss = pruning_search_space_by_shap(archs, gbdt_model, ss, args.p) sampler.update_search_space(ss) # predict some archs sampled_archs = sampler.sample(args.m) X = [[str(e) for e in row] for row in sampled_archs] test_pool = Pool(X, cat_features=[i for i in range(len(X[0]))]) predicted_val_scores = gbdt_model.predict(test_pool) # sort the archs according to the predicted value zipped = zip(sampled_archs, predicted_val_scores) zipped = sorted(zipped, key=lambda e: e[1], reverse=True) # sort in decreaing order sampled_archs, predicted_val_scores = zip(*zipped) sampled_archs, predicted_val_scores = list(sampled_archs), list(predicted_val_scores) print(f'Iteration round {iter_round}, evaluating top k archs on valid set', datetime.now().strftime("%Y-%m-%d %H:%M:%S")) # evaluate top k archs i = 0 while i < len(sampled_archs): arch = sampled_archs[i] data = sampler.load_data(arch) try: model = sampler.build_model(arch, data.x.shape[1], int(max(data.y)) + 1) trainer.init_trainer(model, arch[7], arch[6]) val_score = trainer.train(data) predicted_val_score = predicted_val_scores[i] except RuntimeError as e: if "cuda" in str(e) or "CUDA" in str(e): # CUDA OOM, sample another arch print(e) sampled_archs += sampler.sample(1) i += 1 continue else: raise e archs.append(arch) val_scores.append(val_score) print(arch, f'predicted val score: {predicted_val_score} | real val score: {val_score}') print(f'Number of evaluated archs: {len(archs)}') if i + 1 >= args.k: break i += 1 # sort all the evaluated archs zipped = zip(archs, val_scores) zipped = sorted(zipped, key=lambda e: e[1], reverse=True) archs, val_scores = zip(*zipped) archs, val_scores = list(archs), list(val_scores) print(f'Iteration round {iter_round}, evaluating top k_test archs on test set', datetime.now().strftime("%Y-%m-%d %H:%M:%S")) # evaluate top k_test archs on test set i = 0 while i < len(archs): arch = archs[i] data = sampler.load_data(arch) try: model = sampler.build_model(arch, data.x.shape[1], int(max(data.y)) + 1) trainer.init_trainer(model, arch[7], arch[6]) val_score = trainer.train(data) test_score, z = trainer.test(data, return_logits=True) pickle.dump((z, data.y[data.test_mask]), open(f'embeddings/{args.dataset}_AutoGRL-round{iter_round}-top{i + 1}.pt', 'wb')) except RuntimeError as e: if "cuda" in str(e) or "CUDA" in str(e): # CUDA OOM, sample another arch print(e) i += 1 continue else: raise e top_archs.append(arch) top_val_scores.append(val_score) top_test_scores.append(test_score) print(arch) print(f'Testing... round {iter_round} | arch top {i + 1} | real val score {val_score} | real test score {test_score}', datetime.now().strftime("%Y-%m-%d %H:%M:%S")) if i + 1 >= args.k_test: # only test top k_test models for every round break i += 1 zipped = zip(top_val_scores, top_test_scores) zipped = sorted(zipped, key=lambda e: e[0], reverse=True) best_val_score, corr_test_score = zipped[0][0], zipped[0][1] # logging print(f'Iteration {iter_round} | best val score {best_val_score} | corresponding test score {corr_test_score} | best test score {max(top_test_scores)}', datetime.now().strftime("%Y-%m-%d %H:%M:%S")) pickle.dump((ss, sampler, trainer, archs, val_scores, gbdt_model, sampled_archs, predicted_val_scores, top_val_scores, top_test_scores), open(f'cache/gbdt/{args.dataset}_seed{args.seed}_round{iter_round}.pt', 'wb'))
num_descriptors = 361 epoch_number = 60 is_test = False if is_test: gt_path = 'datasets/rrc-text-videos/ch3_test/' # test descriptors_path = 'extracted_descriptors/extracted_descriptors_'+ str(num_descriptors) +'_test' # test else: gt_path = 'datasets/rrc-text-videos/ch3_train/' # train descriptors_path = 'extracted_descriptors/extracted_descriptors_'+ str(num_descriptors) + '_dist' # train annotations_paths = glob(gt_path + '*.xml') # annotations_path = ["datasets/rrc-text-videos/ch3_test/Video_49_6_4_GT.xml"] sampler = Sampler(weights_path='models/models_361_dropout/model-epoch-' + str(epoch_number) + '.pth', num_descriptors=num_descriptors, hidden_size=256, input_size=6) with open("results-" + str(num_descriptors) + "-" + str(epoch_number) + ".txt", "a") as f: if is_test: f.write("Results for test split\n") else: f.write("Results for train split\n") for annotations_path in annotations_paths: acc = mm.MOTAccumulator(auto_id=True) with open("results-" + str(num_descriptors) + "-" + str(epoch_number) + ".txt", "a") as f: f.write("Results for file " + annotations_path + "\n") video_path = annotations_path.replace("_GT.xml", ".mp4") video_name = video_path.split('/')[-1].replace('.mp4', '')
for video_path in video_paths: print(video_path) if not os.path.isdir('images'): os.mkdir('images') files = glob.glob('images/*') for f in files: os.remove(f) video_name = video_path.split('/')[-1].replace('.mp4', '') # voc_path = 'datasets/rrc-text-videos/ch3_train/' + video_name + '_GT.txt' voc_path = 'datasets/rrc-text-videos/ch3_test/' + video_name + '_GT_voc.txt' cap = cv2.VideoCapture(video_path) sampler = Sampler(weights_path=weights_path, num_descriptors=num_descriptors, hidden_size=256, input_size=6) tracked_detections = [ [] for i in range(int(cap.get(cv2.CAP_PROP_FRAME_COUNT))) ] _, inp = cap.read() queries = set() with open(voc_path) as f: lines = f.readlines() for line in lines: word = line.split(',')[-1] word = word.translate(trans).lower() queries.add(word) for word in queries:
readable = [{ "question": question, "_id": articles.id[i], "title": articles.title[i], "answer": h_answers_bool[i] } for i in range(len(articles.title))] return readable if __name__ == "__main__": bqa = BooleanQA.from_pretrained(BOOLEAN_MODEL) aqa = AbstractiveQA.from_pretrained(ABSTRACTIVE_MODEL) eqa = ExtractiveQA.from_pretrained(EXTRACTIVE_MODEL) samplerAQA = Sampler(boolean_tokenizer=BOOLEAN_MODEL, abstractive_tokenizer=ABSTRACTIVE_MODEL) samplerEQA = Sampler(boolean_tokenizer=BOOLEAN_MODEL, extractive_tokenizer=EXTRACTIVE_MODEL) question = "Does sugar increase diabetes?" print('=' * 80) print('ABSTRACTIVE BOOLEAN MODEL') print('_' * 80) answer = abstractive_pipeline(question, aqa, bqa, samplerAQA) print() cat(answer) print('=' * 80) print('EXTRACTIVE BOOLEAN MODEL')
from utils import Sampler import h5py import numpy as np max_step=5 seg_len=128 mel_band=80 lin_band=1025 batch_size=16 n_batches=100000 if __name__ == '__main__': if len(sys.argv) < 3: print('usage: python3 make_batches.py [in_h5py_path] [out_h5py_path]') exit(0) sampler = Sampler(sys.argv[1], max_step=max_step, seg_len=seg_len) with h5py.File(sys.argv[2], 'w') as f_h5: for i in range(n_batches): samples = { 'X_i_t':{ 'mel':np.empty(shape=(batch_size, seg_len, mel_band), dtype=np.float32), #'lin':np.empty(shape=(batch_size, seg_len, lin_band), dtype=np.float32) }, 'X_i_tk':{ 'mel':np.empty(shape=(batch_size, seg_len, mel_band), dtype=np.float32), #'lin':np.empty(shape=(batch_size, seg_len, lin_band), dtype=np.float32) }, 'X_i_tk_prime':{ 'mel':np.empty(shape=(batch_size, seg_len, mel_band), dtype=np.float32), #'lin':np.empty(shape=(batch_size, seg_len, lin_band), dtype=np.float32) },