Ejemplo n.º 1
0
    def load_ith_file(self):
        results_files = os.listdir(self.results_dir)
        print(results_files)
        dat = np.load(self.results_dir + results_files[self.i],
                      allow_pickle=True)
        print("Loading results file {}".format(self.results_dir +
                                               results_files[self.i]))
        if self.verbose:
            print('configuration: {} '.format(dat['conf']))

        self.pred_train = dat['y_prime_train']
        self.truth_train = dat['y_gold_train']
        self.disruptive_train = dat['disruptive_train']
        self.pred_test = dat['y_prime_test']
        self.truth_test = dat['y_gold_test']
        self.disruptive_test = dat['disruptive_test']
        self.shot_list_test = ShotList(dat['shot_list_test'][()])
        self.shot_list_train = ShotList(dat['shot_list_train'][()])
        self.saved_conf = dat['conf'][()]
        # all files must agree on T_warning due to output of truth vs.
        # normalized shot ttd.
        self.conf['data']['T_warning'] = self.saved_conf['data']['T_warning']
        for mode in ['test', 'train']:
            print('{}: loaded {} shot ({}) disruptive'.format(
                mode, self.get_num_shots(mode),
                self.get_num_disruptive_shots(mode)))
        if self.verbose:
            self.print_conf()
Ejemplo n.º 2
0
    def preprocess_from_files(self,shot_files,use_shots):
        #all shots, including invalid ones
        all_signals = self.conf['paths']['all_signals'] 
        shot_list = ShotList()
        shot_list.load_from_shot_list_files_objects(shot_files,all_signals)
        shot_list_picked = shot_list.random_sublist(use_shots)

        #empty
        used_shots = ShotList()

        use_cores = max(1,mp.cpu_count()-2)
        pool = mp.Pool(use_cores)
        print('running in parallel on {} processes'.format(pool._processes))
        start_time = time.time()
        for (i,shot) in enumerate(pool.imap_unordered(self.preprocess_single_file,shot_list_picked)):
        #for (i,shot) in enumerate(map(self.preprocess_single_file,shot_list_picked)):
            sys.stdout.write('\r{}/{}'.format(i,len(shot_list_picked)))
            used_shots.append_if_valid(shot)

        pool.close()
        pool.join()
        print('Finished Preprocessing {} files in {} seconds'.format(len(shot_list_picked),time.time()-start_time))
        print('Omitted {} shots of {} total.'.format(len(shot_list_picked) - len(used_shots),len(shot_list_picked)))
        print('{}/{} disruptive shots'.format(used_shots.num_disruptive(),len(used_shots)))
        if len(used_shots) == 0:
            print("WARNING: All shots were omitted, please ensure raw data is complete and available at {}.".format(self.conf['paths']['signal_prepath']))
        return used_shots 
Ejemplo n.º 3
0
 def load_shotlists(self):
     path = self.get_shot_list_path()
     data = np.load(path,encoding="latin1", allow_pickle=True)
     shot_list_train = data['shot_list_train'][()]
     shot_list_validate = data['shot_list_validate'][()]
     shot_list_test = data['shot_list_test'][()]
     if isinstance(shot_list_train,ShotList):
         return shot_list_train,shot_list_validate,shot_list_test
     else:
         return ShotList(shot_list_train),ShotList(shot_list_validate),ShotList(shot_list_test)
Ejemplo n.º 4
0
    def preprocess_from_files(self, shot_files, use_shots):
        # all shots, including invalid ones
        all_signals = self.conf['paths']['all_signals']
        shot_list = ShotList()
        shot_list.load_from_shot_list_files_objects(shot_files, all_signals)
        shot_list_picked = shot_list.random_sublist(use_shots)

        # empty
        used_shots = ShotList()

        # TODO(KGF): generalize the follwowing line to perform well on
        # architecutres other than CPUs, e.g. KNLs
        # min( <desired-maximum-process-count>, max(1,mp.cpu_count()-2) )
        use_cores = max(1, mp.cpu_count() - 2)
        pool = mp.Pool(use_cores)
        print('Running in parallel on {} processes'.format(pool._processes))
        start_time = time.time()
        for (i, shot) in enumerate(
                pool.imap_unordered(self.preprocess_single_file,
                                    shot_list_picked)):
            # for (i,shot) in
            # enumerate(map(self.preprocess_single_file,shot_list_picked)):
            sys.stdout.write('\r{}/{}'.format(i, len(shot_list_picked)))
            used_shots.append_if_valid(shot)

        pool.close()
        pool.join()
        print('\nFinished preprocessing {} files in {} seconds'.format(
            len(shot_list_picked),
            time.time() - start_time))
        print('Using {} shots ({} disruptive shots)'.format(
            len(used_shots), used_shots.num_disruptive()))
        print('Omitted {} shots of {} total shots'.format(
            len(shot_list_picked) - len(used_shots), len(shot_list_picked)))
        print(
            'Omitted {} disruptive shots of {} total disruptive shots'.format(
                shot_list_picked.num_disruptive() -
                used_shots.num_disruptive(),
                shot_list_picked.num_disruptive()))

        if len(used_shots) == 0:
            print("WARNING: All shots were omitted, please ensure raw data "
                  " is complete and available at {}.".format(
                      self.conf['paths']['signal_prepath']))
        return used_shots
Ejemplo n.º 5
0
def create_shot_list_tmp(original_shot,time_points,sigs=None):
    shot_list_tmp = ShotList()
    T = len(original_shot.ttd)
    t_range = np.linspace(0,T-1,time_points,dtype=np.int)
    for t in t_range:
        new_shot = copy.copy(original_shot)
        assert(new_shot.augmentation_fn == None)
        new_shot.augmentation_fn = partial(hide_signal_data,t = t,sigs_to_hide=sigs)
        #new_shot.number = original_shot.number
        shot_list_tmp.append(new_shot)
    return shot_list_tmp,t_range
Ejemplo n.º 6
0
    def train_on_files(self,
                       shot_files,
                       use_shots,
                       all_machines,
                       verbose=False):
        conf = self.conf
        all_signals = conf['paths']['all_signals']
        shot_list = ShotList()
        shot_list.load_from_shot_list_files_objects(shot_files, all_signals)
        shot_list_picked = shot_list.random_sublist(use_shots)

        previously_saved, machines_saved = self.previously_saved_stats()
        machines_to_compute = all_machines - machines_saved
        recompute = conf['data']['recompute_normalization']
        if recompute:
            machines_to_compute = all_machines
            previously_saved = False
        if not previously_saved or len(machines_to_compute) > 0:
            if previously_saved:
                self.load_stats(verbose=True)
            print('computing normalization for machines {}'.format(
                machines_to_compute))
            use_cores = max(1, mp.cpu_count() - 2)
            pool = mp.Pool(use_cores)
            print('running in parallel on {} processes'.format(
                pool._processes))
            start_time = time.time()

            for (i, stats) in enumerate(
                    pool.imap_unordered(self.train_on_single_shot,
                                        shot_list_picked)):
                # for (i,stats) in
                # enumerate(map(self.train_on_single_shot,shot_list_picked)):
                if stats.machine in machines_to_compute:
                    self.incorporate_stats(stats)
                    self.machines.add(stats.machine)
                sys.stdout.write('\r' +
                                 '{}/{}'.format(i, len(shot_list_picked)))
            pool.close()
            pool.join()
            print(
                '\nFinished Training Normalizer on ',
                '{} files in {} seconds'.format(len(shot_list_picked),
                                                time.time() - start_time))
            self.save_stats(verbose=True)
        else:
            self.load_stats(verbose=verbose)
        # print representation of trained Normalizer to stdout:
        # Machine, NormalizerName, per-signal normalization stats/params
        if verbose:
            g.print_unique(self)
Ejemplo n.º 7
0
#####################################################
#                 NORMALIZATION                     #
#####################################################
# TODO(KGF): identical in at least 3x files in examples/
# make sure preprocessing has been run, and is saved as a file
if task_index == 0:
    # TODO(KGF): check tuple unpack
    (shot_list_train, shot_list_validate,
     shot_list_test) = guarantee_preprocessed(conf)
comm.Barrier()
(shot_list_train, shot_list_validate,
 shot_list_test) = guarantee_preprocessed(conf)

shot_list = sum([l.filter_by_number([shot_num])
                 for l in [shot_list_train, shot_list_validate,
                           shot_list_test]], ShotList())
assert(len(shot_list) == 1)
# for s in shot_list.shots:
# s.restore()


def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    return[l[i:i + n] for i in range(0, len(l), n)]


def hide_signal_data(shot, t=0, sigs_to_hide=None):
    for sig in shot.signals:
        if sigs_to_hide is None or (
                sigs_to_hide is not None and sig in sigs_to_hide):
            shot.signals_dict[sig][t:, :] = shot.signals_dict[sig][t, :]
Ejemplo n.º 8
0
def apply_bleed_in(conf, shot_list_train, shot_list_validate, shot_list_test):
    np.random.seed(2)
    num = conf['data']['bleed_in']
    # new_shots = []
    if num > 0:
        shot_list_bleed = ShotList()
        print('applying bleed in with {} disruptive shots\n'.format(num))
        # num_total = len(shot_list_test)
        num_d = shot_list_test.num_disruptive()
        # num_nd = num_total - num_d
        assert num_d >= num, (
            "Not enough disruptive shots {} to cover bleed in {}".format(
                num_d, num))
        num_sampled_d = 0
        num_sampled_nd = 0
        while num_sampled_d < num:
            s = shot_list_test.sample_shot()
            shot_list_bleed.append(s)
            if conf['data']['bleed_in_remove_from_test']:
                shot_list_test.remove(s)
            if s.is_disruptive:
                num_sampled_d += 1
            else:
                num_sampled_nd += 1
        print("Sampled {} shots, {} disruptive, {} nondisruptive".format(
            num_sampled_nd + num_sampled_d, num_sampled_d, num_sampled_nd))
        print("Before adding: training shots: {} validation shots: {}".format(
            len(shot_list_train), len(shot_list_validate)))
        assert (num_sampled_d == num)
        # add bleed-in shots to training and validation set repeatedly
        if conf['data']['bleed_in_equalize_sets']:
            print("Applying equalized bleed in")
            for shot_list_curr in [shot_list_train, shot_list_validate]:
                for i in range(len(shot_list_curr)):
                    s = shot_list_bleed.sample_shot()
                    shot_list_curr.append(s)
        elif conf['data']['bleed_in_repeat_fac'] > 1:
            repeat_fac = conf['data']['bleed_in_repeat_fac']
            print("Applying bleed in with repeat factor {}".format(repeat_fac))
            num_to_sample = int(round(repeat_fac * len(shot_list_bleed)))
            for i in range(num_to_sample):
                s = shot_list_bleed.sample_shot()
                shot_list_train.append(s)
                shot_list_validate.append(s)
        else:  # add each shot only once
            print("Applying bleed in without repetition")
            for s in shot_list_bleed:
                shot_list_train.append(s)
                shot_list_validate.append(s)
        print("After adding: training shots: {} validation shots: {}".format(
            len(shot_list_train), len(shot_list_validate)))
        print("Added bleed in shots to training and validation sets")
        # if num_d > 0:
        #     for i in range(num):
        #         s = shot_list_test.sample_single_class(True)
        #         shot_list_train.append(s)
        #         shot_list_validate.append(s)
        #         if conf['data']['bleed_in_remove_from_test']:
        #             shot_list_test.remove(s)
        # else:
        #     print('No disruptive shots in test set, omitting bleed in')
        # if num_nd > 0:
        #     for i in range(num):
        #         s = shot_list_test.sample_single_class(False)
        #         shot_list_train.append(s)
        #         shot_list_validate.append(s)
        #         if conf['data']['bleed_in_remove_from_test']:
        #             shot_list_test.remove(s)
        # else:
        #     print('No nondisruptive shots in test set, omitting bleed in')
    return shot_list_train, shot_list_validate, shot_list_test
Ejemplo n.º 9
0
custom_path = None
if only_predict:
    custom_path = sys.argv[1]
shot_num = int(sys.argv[2])
print("predicting using path {} on shot {}".format(custom_path,shot_num))

assert(only_predict)
#####################################################
####################Normalization####################
#####################################################
if task_index == 0: #make sure preprocessing has been run, and is saved as a file
    shot_list_train,shot_list_validate,shot_list_test = guarantee_preprocessed(conf)
comm.Barrier()
shot_list_train,shot_list_validate,shot_list_test = guarantee_preprocessed(conf)

shot_list = sum([l.filter_by_number([shot_num]) for l in [shot_list_train,shot_list_validate,shot_list_test]],ShotList())
assert(len(shot_list) == 1)
# for s in shot_list.shots:
    # s.restore()

def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    return[ l[i:i + n] for i in range(0, len(l), n)]

def hide_signal_data(shot,t=0,sigs_to_hide=None):
    for sig in shot.signals:
        if sigs_to_hide is None or (sigs_to_hide is not None and sig in sigs_to_hide):
            shot.signals_dict[sig][t:,:] = shot.signals_dict[sig][t,:]

def create_shot_list_tmp(original_shot,time_points,sigs=None):
    shot_list_tmp = ShotList()