Python Samplerの例、utils.Sampler Pythonの例

コード例 #1

0

ファイルを表示

ファイル: samples.py プロジェクト: James-Durant/experimental-design

    def nested_sampling(self, angle_times, save_path, filename, dynamic=False):
        """Runs nested sampling on simulated data of the sample.

        Args:
            angle_times (list): points and times for each angle to simulate.
            save_path (str): path to directory to save corner plot to.
            filename (str): file name to use when saving corner plot.
            dynamic (bool): whether to use static or dynamic nested sampling.

        """
        # Simulate data for the sample.
        model, data = simulate(self.structure, angle_times)

        # The structure was defined in refnx.
        if isinstance(self.structure, refnx.reflect.Structure):
            dataset = refnx.reflect.ReflectDataset(
                [data[:, 0], data[:, 1], data[:, 2]])
            objective = refnx.anaylsis.Objective(model, dataset)

        # The structure was defined in Refl1D.
        elif isinstance(self.structure, refl1d.model.Stack):
            objective = bumps.fitproblem.FitProblem(model)

        # Otherwise, the structure is invalid.
        else:
            raise RuntimeError('invalid structure given')

        # Sample the objective using nested sampling.
        sampler = Sampler(objective)
        fig = sampler.sample(dynamic=dynamic)

        # Save the sampling corner plot.
        save_path = os.path.join(save_path, self.name)
        save_plot(fig, save_path, filename + '_nested_sampling')

コード例 #2

0

ファイルを表示

ファイル: run_train.py プロジェクト: chiayewken/sutd-materials

    def sample(self, num: int = None, length: int = None) -> List[str]:
        if num is None:
            num = self.hparams.bs
        if length is None:
            length = self.hparams.seq_len

        dataset: StarTrekCharGenerationDataset = self.datasets[Splits.train]
        token_start = dataset.vocab.stoi[dataset.vocab.start]
        x = torch.from_numpy(np.array([token_start] * num)).long()
        x = x.reshape(num, 1)
        x = x.to(self.device)

        self.net.eval()
        with torch.no_grad():
            states = None
            history = [x]
            for _ in tqdm(range(length), disable=(not self.hparams.verbose)):
                n_look_back = length if self.hparams.model == "tcn" else 1
                inputs = torch.cat(history[-n_look_back:], dim=-1)
                logits, states = self.net(inputs, states)
                next_logits = logits[:, -1, :]
                history.append(Sampler.temperature(next_logits))

        history = history[1:]  # Omit start tokens
        outputs = torch.stack(history).squeeze().transpose(0, 1)
        outputs = outputs.cpu()
        return [dataset.sequence_to_text(outputs[i, :]) for i in range(num)]

コード例 #3

0

ファイルを表示

ファイル: magnetic.py プロジェクト: James-Durant/experimental-design

    def nested_sampling(self, angle_times, save_path, filename, dynamic=False):
        """Runs nested sampling on simulated data of the sample.

        Args:
            angle_times (list): points and counting times for each measurement angle to simulate.
            save_path (str): path to directory to save corner plot to.
            filename (str): name of file to save corner plot to.
            dynamic (bool): whether to use static or dynamic nested sampling.

        """
        objective = bumps.fitproblem.FitProblem(self.experiment)

        # Sample the objective using nested sampling.
        sampler = Sampler(objective)
        fig = sampler.sample(dynamic=dynamic)

        # Save the sampling corner plot.
        save_path = os.path.join(save_path, self.name)
        save_plot(fig, save_path, 'nested_sampling_' + filename)

コード例 #4

0

ファイルを表示

ファイル: base.py プロジェクト: James-Durant/experimental-design

    def nested_sampling(self, contrasts, angle_times, save_path, filename,
                        underlayers=None, dynamic=False):
        """Runs nested sampling on simulated data of the lipid sample.

        Args:
            contrasts (list): SLDs of contrasts to simulate.
            angle_times (list): points and times for each angle to simulate.
            save_path (str): path to directory to save corner plot to.
            filename (str): file name to use when saving corner plot.
            underlayers (list): thickness and SLD of each underlayer to add.
            dynamic (bool): whether to use static or dynamic nested sampling.

        """
        # Create objectives for each contrast to sample with.
        objectives = []
        for contrast in contrasts:
            # Simulate an experiment using the given contrast.
            sample = self._using_conditions(contrast, underlayers)
            model, data = simulate(sample, angle_times, scale=1, bkg=5e-6, dq=2)
            dataset = refnx.dataset.ReflectDataset([data[:,0], data[:,1], data[:,2]])
            objectives.append(refnx.analysis.Objective(model, dataset))

        # Combine objectives into a single global objective.
        global_objective = refnx.analysis.GlobalObjective(objectives)

        # Exclude certain parameters if underlayers are being used.
        if underlayers is None:
            global_objective.varying_parameters = lambda: self.params
        else:
            global_objective.varying_parameters = lambda: self.underlayer_params

        # Sample the objective using nested sampling.
        sampler = Sampler(global_objective)
        fig = sampler.sample(dynamic=dynamic)

        # Save the sampling corner plot.
        save_path = os.path.join(save_path, self.name)
        save_plot(fig, save_path, 'nested_sampling_'+filename)

コード例 #5

0

ファイルを表示

ファイル: AutoGRL_random.py プロジェクト: JunweiSUN/AutoGRL

def main(args):

    # build search space
    data = load_data(args.dataset, args.seed)
    ss, _ = pruning_search_space_by_eda(data)

    if data.setting == 'inductive':
        trainer = InductiveTrainer()
    else:
        trainer = TransductiveTrainer()

    sampler = Sampler(args.dataset, ss)

    archs = []
    val_scores = []
    test_scores = []

    # init training data for GBDT
    sampled_archs = sampler.sample(3000)

    i = 0
    while i < len(sampled_archs):
        arch = sampled_archs[i]
        data = sampler.load_data(arch)
        try:
            model = sampler.build_model(arch, data.x.shape[1],
                                        int(max(data.y)) + 1)
            trainer.init_trainer(model, arch[7], arch[6])
            val_score = trainer.train(data)
            test_score = trainer.test(data)
        except RuntimeError as e:
            if "cuda" in str(e) or "CUDA" in str(
                    e):  # CUDA OOM, sample another arch
                print(e)
                sampled_archs += sampler.sample(1)
                i += 1
                continue
            else:
                raise e

        archs.append(arch)
        val_scores.append(val_score)
        test_scores.append(test_score)
        print(arch,
              f'real val score: {val_score} | real test score: {test_score}')
        print(f'Number of evaluated archs: {len(archs)}')

        i += 1
        if i % 500 == 0:
            print(f'Round {i // 500} | best test score: {max(test_scores)}')

        if i >= 2000:
            break

コード例 #6

0

ファイルを表示

def main(device):
    parser = argparse.ArgumentParser("LINE training")
    parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    logger, log_path = create_logger(cfg)
    logger.info(cfg)

    graph = networkx.read_edgelist(cfg.DATA.GRAPH_PATH, create_using=networkx.DiGraph(), nodetype=None, data=[('weight', int)])
    model = LINE(graph, cfg).to(device)
    v2ind, ind2v = model.get_mapping()
    sampler = Sampler(graph, v2ind, batch_size=cfg.SAMPLE.BATCHSIZE)

    criterion = KLLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=cfg.WORD2VEC.LR)

    train(sampler, model, cfg, criterion, optimizer, device)
    embedding = model.get_embedding()

    eval_embedding(embedding, cfg.DATA.LABEL_PATH, logger)
    vis_embedding(embedding, cfg.DATA.LABEL_PATH, log_path)

コード例 #7

0

ファイルを表示

ファイル: magnetism.py プロジェクト: James-Durant/experimental-design

def bayes(times, angle_splits, save_path):
    with open(os.path.join(save_path, 'YIG_sample', 'bayes.csv'), 'w') as file:
        for time in times:
            angle_times = [(angle, points, time * split)
                           for angle, points, split in angle_splits]

            sample = SampleYIG(vary=False)
            sample.Pt_thick.range(0, 0.2)
            models, datasets = simulate_magnetic(test(sample, 0.01638),
                                                 angle_times,
                                                 scale=1,
                                                 bkg=5e-7,
                                                 dq=2,
                                                 pp=True,
                                                 pm=False,
                                                 mp=False,
                                                 mm=True)

            mm = models[0].probe.xs[0]
            pp = models[1].probe.xs[3]

            probe = refl1d.probe.PolarizedQProbe(xs=(mm, None, None, pp),
                                                 name='Probe')
            experiment = refl1d.experiment.Experiment(sample=sample.structure,
                                                      probe=probe)
            sampler = Sampler(bumps.fitproblem.FitProblem(experiment))
            logz_1 = sampler.sample(verbose=False, return_evidence=True)

            sample = SampleYIG(vary=False)
            sample.Pt_mag.value = 0
            sample.Pt_thick.range(0, 0.2)

            experiment = refl1d.experiment.Experiment(sample=sample.structure,
                                                      probe=probe)
            sampler = Sampler(bumps.fitproblem.FitProblem(experiment))
            logz_2 = sampler.sample(verbose=False, return_evidence=True)

            factor = 2 * (logz_1 - logz_2)
            print(factor)
            file.write('{0},{1}\n'.format(time, factor))

コード例 #8

0

ファイルを表示

import os, sys

sys.path.append(os.getcwd())
from utils import Sampler
import h5py
import numpy as np
import json

max_step = 5
seg_len = 128
mel_band = 80
lin_band = 513
n_samples = 2000000
dset = 'train'

if __name__ == '__main__':
    if len(sys.argv) < 3:
        print(
            'usage: python3 make_single_samples.py [in_h5py_path] [out_json_path]'
        )
        exit(0)
    sampler = Sampler(sys.argv[1],
                      max_step=max_step,
                      seg_len=seg_len,
                      dset=dset)
    samples = [sampler.sample_single()._asdict() for _ in range(n_samples)]
    with open(sys.argv[2], 'w') as f_json:
        json.dump(samples, f_json, indent=4, separators=(',', ': '))

コード例 #9

0

ファイルを表示

def main(args):

    # build search space
    data = load_data(args.dataset, args.seed)
    ss, _ = pruning_search_space_by_eda(data)

    if data.setting == 'inductive':
        trainer = InductiveTrainer()
    else:
        trainer = TransductiveTrainer()

    sampler = Sampler(args.dataset, ss)

    archs = []
    val_scores = []

    top_archs = []
    top_val_scores = []
    top_test_scores = []

    # init training data for GBDT
    sampled_archs = sampler.sample(args.n)

    i = 0
    while i < len(sampled_archs):
        arch = sampled_archs[i]
        data = sampler.load_data(arch)
        try:
            model = sampler.build_model(arch, data.x.shape[1], int(max(data.y)) + 1)
            trainer.init_trainer(model, arch[7], arch[6])
            val_score = trainer.train(data)
        except RuntimeError as e:
            if "cuda" in str(e) or "CUDA" in str(e):     # CUDA OOM, sample another arch
                print(e)
                sampled_archs += sampler.sample(1)
                i += 1
                continue
            else:
                raise e

        archs.append(arch)
        val_scores.append(val_score)
        print(arch, f'real val score: {val_score}')
        print(f'Number of evaluated archs: {len(archs)}')

        i += 1

    # train GBDT predictor
    for iter_round in range(1, args.iterations + 1):
        print(f'Iteration round {iter_round}, ReTraining model and sampling archs...', datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
        # train GBDT
        X = [[str(e) for e in row] for row in archs]
        y = np.array(val_scores)
        train_pool = Pool(X, y, cat_features=[i for i in range(len(X[0]))])
        # X = lgb.Dataset(pd.DataFrame(X, columns=ss.keys()), label=np.array(val_scores))
        # gbdt_model = lgb.train(gbdt_params, X, args.gbdt_num_boost_round, categorical_feature=ss.keys())
        gbdt_model = CatBoostRegressor(
            learning_rate=args.gbdt_lr,
            verbose=False
        )
        gbdt_model.fit(train_pool)
        # pruning search space
        ss = pruning_search_space_by_shap(archs, gbdt_model, ss, args.p)
        sampler.update_search_space(ss)

        # predict some archs
        sampled_archs = sampler.sample(args.m)
        X = [[str(e) for e in row] for row in sampled_archs]
        test_pool = Pool(X, cat_features=[i for i in range(len(X[0]))])
        predicted_val_scores = gbdt_model.predict(test_pool)

        # sort the archs according to the predicted value
        zipped = zip(sampled_archs, predicted_val_scores)
        zipped = sorted(zipped, key=lambda e: e[1], reverse=True) # sort in decreaing order
        sampled_archs, predicted_val_scores = zip(*zipped)
        sampled_archs, predicted_val_scores = list(sampled_archs), list(predicted_val_scores)

        print(f'Iteration round {iter_round}, evaluating top k archs on valid set', datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
        # evaluate top k archs
        i = 0
        while i < len(sampled_archs):
            arch = sampled_archs[i]
            data = sampler.load_data(arch)
            try:
                model = sampler.build_model(arch, data.x.shape[1], int(max(data.y)) + 1)
                trainer.init_trainer(model, arch[7], arch[6])
                val_score = trainer.train(data)
                predicted_val_score = predicted_val_scores[i]
            except RuntimeError as e:
                if "cuda" in str(e) or "CUDA" in str(e):     # CUDA OOM, sample another arch
                    print(e)
                    sampled_archs += sampler.sample(1)
                    i += 1
                    continue
                else:
                    raise e
            
            archs.append(arch)
            val_scores.append(val_score)
            print(arch, f'predicted val score: {predicted_val_score} | real val score: {val_score}')
            print(f'Number of evaluated archs: {len(archs)}')

            if i + 1 >= args.k:
                break

            i += 1
        
        # sort all the evaluated archs
        zipped = zip(archs, val_scores)
        zipped = sorted(zipped, key=lambda e: e[1], reverse=True)
        archs, val_scores = zip(*zipped)
        archs, val_scores = list(archs), list(val_scores)

        print(f'Iteration round {iter_round}, evaluating top k_test archs on test set', datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
        # evaluate top k_test archs on test set
        i = 0
        while i < len(archs):
            arch = archs[i]
            data = sampler.load_data(arch)
            try:
                model = sampler.build_model(arch, data.x.shape[1], int(max(data.y)) + 1)
                trainer.init_trainer(model, arch[7], arch[6])
                val_score = trainer.train(data)
                test_score, z = trainer.test(data, return_logits=True)
                pickle.dump((z, data.y[data.test_mask]), open(f'embeddings/{args.dataset}_AutoGRL-round{iter_round}-top{i + 1}.pt', 'wb'))
                
            except RuntimeError as e:
                if "cuda" in str(e) or "CUDA" in str(e):     # CUDA OOM, sample another arch
                    print(e)
                    i += 1
                    continue
                else:
                    raise e
            
            
            top_archs.append(arch)
            top_val_scores.append(val_score)
            top_test_scores.append(test_score)

            print(arch)
            print(f'Testing... round {iter_round} | arch top {i + 1} | real val score {val_score} | real test score {test_score}', datetime.now().strftime("%Y-%m-%d %H:%M:%S"))

            if i + 1 >= args.k_test: # only test top k_test models for every round
                break
            
            i += 1
        
        zipped = zip(top_val_scores, top_test_scores)
        zipped = sorted(zipped, key=lambda e: e[0], reverse=True)
        best_val_score, corr_test_score = zipped[0][0], zipped[0][1]

        # logging
        print(f'Iteration {iter_round} | best val score {best_val_score} | corresponding test score {corr_test_score} | best test score {max(top_test_scores)}', datetime.now().strftime("%Y-%m-%d %H:%M:%S"))

        pickle.dump((ss, sampler, trainer, archs, val_scores, gbdt_model, sampled_archs, predicted_val_scores, top_val_scores, top_test_scores), open(f'cache/gbdt/{args.dataset}_seed{args.seed}_round{iter_round}.pt', 'wb'))

コード例 #10

0

ファイルを表示

ファイル: evaluation.py プロジェクト: Sergigb/yolo-phoc-lstm

num_descriptors = 361
epoch_number = 60
is_test = False

if is_test:
    gt_path = 'datasets/rrc-text-videos/ch3_test/'  # test
    descriptors_path = 'extracted_descriptors/extracted_descriptors_'+ str(num_descriptors) +'_test'  # test
else:
    gt_path = 'datasets/rrc-text-videos/ch3_train/'  # train
    descriptors_path = 'extracted_descriptors/extracted_descriptors_'+ str(num_descriptors) + '_dist' # train

annotations_paths = glob(gt_path + '*.xml')
# annotations_path = ["datasets/rrc-text-videos/ch3_test/Video_49_6_4_GT.xml"]

sampler = Sampler(weights_path='models/models_361_dropout/model-epoch-' + str(epoch_number) + '.pth', num_descriptors=num_descriptors,
                  hidden_size=256, input_size=6)

with open("results-" + str(num_descriptors) + "-" + str(epoch_number) + ".txt", "a") as f:
    if is_test:
        f.write("Results for test split\n")
    else:
        f.write("Results for train split\n")

for annotations_path in annotations_paths:
    acc = mm.MOTAccumulator(auto_id=True)

    with open("results-" + str(num_descriptors) + "-" + str(epoch_number) + ".txt", "a") as f:
        f.write("Results for file " + annotations_path + "\n")

    video_path = annotations_path.replace("_GT.xml", ".mp4")
    video_name = video_path.split('/')[-1].replace('.mp4', '')

コード例 #11

0

ファイルを表示

ファイル: eval_video.py プロジェクト: Sergigb/yolo-phoc-lstm

for video_path in video_paths:
    print(video_path)
    if not os.path.isdir('images'):
        os.mkdir('images')
    files = glob.glob('images/*')
    for f in files:
        os.remove(f)

    video_name = video_path.split('/')[-1].replace('.mp4', '')
    # voc_path = 'datasets/rrc-text-videos/ch3_train/' + video_name + '_GT.txt'
    voc_path = 'datasets/rrc-text-videos/ch3_test/' + video_name + '_GT_voc.txt'

    cap = cv2.VideoCapture(video_path)
    sampler = Sampler(weights_path=weights_path,
                      num_descriptors=num_descriptors,
                      hidden_size=256,
                      input_size=6)
    tracked_detections = [
        [] for i in range(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)))
    ]
    _, inp = cap.read()

    queries = set()
    with open(voc_path) as f:
        lines = f.readlines()
    for line in lines:
        word = line.split(',')[-1]
        word = word.translate(trans).lower()
        queries.add(word)

    for word in queries:

コード例 #12

0

ファイルを表示

ファイル: main.py プロジェクト: opscidia/science-checker

    readable = [{
        "question": question,
        "_id": articles.id[i],
        "title": articles.title[i],
        "answer": h_answers_bool[i]
    } for i in range(len(articles.title))]

    return readable


if __name__ == "__main__":

    bqa = BooleanQA.from_pretrained(BOOLEAN_MODEL)
    aqa = AbstractiveQA.from_pretrained(ABSTRACTIVE_MODEL)
    eqa = ExtractiveQA.from_pretrained(EXTRACTIVE_MODEL)
    samplerAQA = Sampler(boolean_tokenizer=BOOLEAN_MODEL,
                         abstractive_tokenizer=ABSTRACTIVE_MODEL)
    samplerEQA = Sampler(boolean_tokenizer=BOOLEAN_MODEL,
                         extractive_tokenizer=EXTRACTIVE_MODEL)

    question = "Does sugar increase diabetes?"

    print('=' * 80)
    print('ABSTRACTIVE BOOLEAN MODEL')
    print('_' * 80)

    answer = abstractive_pipeline(question, aqa, bqa, samplerAQA)
    print()
    cat(answer)

    print('=' * 80)
    print('EXTRACTIVE BOOLEAN MODEL')

コード例 #13

0

ファイルを表示

from utils import Sampler
import h5py
import numpy as np

max_step=5
seg_len=128
mel_band=80
lin_band=1025
batch_size=16
n_batches=100000

if __name__ == '__main__':
    if len(sys.argv) < 3:
        print('usage: python3 make_batches.py [in_h5py_path] [out_h5py_path]')
        exit(0)
    sampler = Sampler(sys.argv[1], max_step=max_step, seg_len=seg_len)
    with h5py.File(sys.argv[2], 'w') as f_h5:
        for i in range(n_batches):
            samples = {
                'X_i_t':{
                    'mel':np.empty(shape=(batch_size, seg_len, mel_band), dtype=np.float32), 
                    #'lin':np.empty(shape=(batch_size, seg_len, lin_band), dtype=np.float32)
                },
                'X_i_tk':{
                    'mel':np.empty(shape=(batch_size, seg_len, mel_band), dtype=np.float32), 
                    #'lin':np.empty(shape=(batch_size, seg_len, lin_band), dtype=np.float32)
                },
                'X_i_tk_prime':{
                    'mel':np.empty(shape=(batch_size, seg_len, mel_band), dtype=np.float32), 
                    #'lin':np.empty(shape=(batch_size, seg_len, lin_band), dtype=np.float32)
                },