Example #1
0
    def preprocess_from_files(self,shot_files,use_shots):
        #all shots, including invalid ones
        all_signals = self.conf['paths']['all_signals'] 
        shot_list = ShotList()
        shot_list.load_from_shot_list_files_objects(shot_files,all_signals)
        shot_list_picked = shot_list.random_sublist(use_shots)

        #empty
        used_shots = ShotList()

        use_cores = max(1,mp.cpu_count()-2)
        pool = mp.Pool(use_cores)
        print('running in parallel on {} processes'.format(pool._processes))
        start_time = time.time()
        for (i,shot) in enumerate(pool.imap_unordered(self.preprocess_single_file,shot_list_picked)):
        #for (i,shot) in enumerate(map(self.preprocess_single_file,shot_list_picked)):
            sys.stdout.write('\r{}/{}'.format(i,len(shot_list_picked)))
            used_shots.append_if_valid(shot)

        pool.close()
        pool.join()
        print('Finished Preprocessing {} files in {} seconds'.format(len(shot_list_picked),time.time()-start_time))
        print('Omitted {} shots of {} total.'.format(len(shot_list_picked) - len(used_shots),len(shot_list_picked)))
        print('{}/{} disruptive shots'.format(used_shots.num_disruptive(),len(used_shots)))
        if len(used_shots) == 0:
            print("WARNING: All shots were omitted, please ensure raw data is complete and available at {}.".format(self.conf['paths']['signal_prepath']))
        return used_shots 
Example #2
0
    def preprocess_from_files(self,shot_files,use_shots):
        #all shots, including invalid ones
        all_signals = self.conf['paths']['all_signals'] 
        shot_list = ShotList()
        shot_list.load_from_shot_list_files_objects(shot_files,all_signals)
        shot_list_picked = shot_list.random_sublist(use_shots)

        #empty
        used_shots = ShotList()

        use_cores = max(1,mp.cpu_count()-2)
        pool = mp.Pool(use_cores)
        print('running in parallel on {} processes'.format(pool._processes))
        start_time = time.time()
        for (i,shot) in enumerate(pool.imap_unordered(self.preprocess_single_file,shot_list_picked)):
        #for (i,shot) in enumerate(map(self.preprocess_single_file,shot_list_picked)):
            sys.stdout.write('\r{}/{}'.format(i,len(shot_list_picked)))
            used_shots.append_if_valid(shot)

        pool.close()
        pool.join()
        print('Finished Preprocessing {} files in {} seconds'.format(len(shot_list_picked),time.time()-start_time))
        print('Omitted {} shots of {} total.'.format(len(shot_list_picked) - len(used_shots),len(shot_list_picked)))
        print('{}/{} disruptive shots'.format(used_shots.num_disruptive(),len(used_shots)))
        if len(used_shots) == 0:
            print("WARNING: All shots were omitted, please ensure raw data is complete and available at {}.".format(self.conf['paths']['signal_prepath']))
        return used_shots 
Example #3
0
    def train_on_files(self,
                       shot_files,
                       use_shots,
                       all_machines,
                       verbose=False):
        conf = self.conf
        all_signals = conf['paths']['all_signals']
        shot_list = ShotList()
        shot_list.load_from_shot_list_files_objects(shot_files, all_signals)
        shot_list_picked = shot_list.random_sublist(use_shots)

        previously_saved, machines_saved = self.previously_saved_stats()
        machines_to_compute = all_machines - machines_saved
        recompute = conf['data']['recompute_normalization']
        if recompute:
            machines_to_compute = all_machines
            previously_saved = False
        if not previously_saved or len(machines_to_compute) > 0:
            if previously_saved:
                self.load_stats(verbose=True)
            print('computing normalization for machines {}'.format(
                machines_to_compute))
            use_cores = max(1, mp.cpu_count() - 2)
            pool = mp.Pool(use_cores)
            print('running in parallel on {} processes'.format(
                pool._processes))
            start_time = time.time()

            for (i, stats) in enumerate(
                    pool.imap_unordered(self.train_on_single_shot,
                                        shot_list_picked)):
                # for (i,stats) in
                # enumerate(map(self.train_on_single_shot,shot_list_picked)):
                if stats.machine in machines_to_compute:
                    self.incorporate_stats(stats)
                    self.machines.add(stats.machine)
                sys.stdout.write('\r' +
                                 '{}/{}'.format(i, len(shot_list_picked)))
            pool.close()
            pool.join()
            print(
                '\nFinished Training Normalizer on ',
                '{} files in {} seconds'.format(len(shot_list_picked),
                                                time.time() - start_time))
            self.save_stats(verbose=True)
        else:
            self.load_stats(verbose=verbose)
        # print representation of trained Normalizer to stdout:
        # Machine, NormalizerName, per-signal normalization stats/params
        if verbose:
            g.print_unique(self)
Example #4
0
    def preprocess_from_files(self, shot_files, use_shots):
        # all shots, including invalid ones
        all_signals = self.conf['paths']['all_signals']
        shot_list = ShotList()
        shot_list.load_from_shot_list_files_objects(shot_files, all_signals)
        shot_list_picked = shot_list.random_sublist(use_shots)

        # empty
        used_shots = ShotList()

        # TODO(KGF): generalize the follwowing line to perform well on
        # architecutres other than CPUs, e.g. KNLs
        # min( <desired-maximum-process-count>, max(1,mp.cpu_count()-2) )
        use_cores = max(1, mp.cpu_count() - 2)
        pool = mp.Pool(use_cores)
        print('Running in parallel on {} processes'.format(pool._processes))
        start_time = time.time()
        for (i, shot) in enumerate(
                pool.imap_unordered(self.preprocess_single_file,
                                    shot_list_picked)):
            # for (i,shot) in
            # enumerate(map(self.preprocess_single_file,shot_list_picked)):
            sys.stdout.write('\r{}/{}'.format(i, len(shot_list_picked)))
            used_shots.append_if_valid(shot)

        pool.close()
        pool.join()
        print('\nFinished preprocessing {} files in {} seconds'.format(
            len(shot_list_picked),
            time.time() - start_time))
        print('Using {} shots ({} disruptive shots)'.format(
            len(used_shots), used_shots.num_disruptive()))
        print('Omitted {} shots of {} total shots'.format(
            len(shot_list_picked) - len(used_shots), len(shot_list_picked)))
        print(
            'Omitted {} disruptive shots of {} total disruptive shots'.format(
                shot_list_picked.num_disruptive() -
                used_shots.num_disruptive(),
                shot_list_picked.num_disruptive()))

        if len(used_shots) == 0:
            print("WARNING: All shots were omitted, please ensure raw data "
                  " is complete and available at {}.".format(
                      self.conf['paths']['signal_prepath']))
        return used_shots
Example #5
0
    def train_on_files(self,shot_files,use_shots,all_machines):
        conf = self.conf
        all_signals = conf['paths']['all_signals'] 
        shot_list = ShotList()
        shot_list.load_from_shot_list_files_objects(shot_files,all_signals)
        shot_list_picked = shot_list.random_sublist(use_shots) 

        previously_saved,machines_saved = self.previously_saved_stats()
        machines_to_compute = all_machines - machines_saved
        recompute = conf['data']['recompute_normalization']
        if recompute:
            machines_to_compute = all_machines
            previously_saved = False

        if not previously_saved or len(machines_to_compute) > 0:
            if previously_saved:
                self.load_stats()
            print('computing normalization for machines {}'.format(machines_to_compute))
            use_cores = max(1,mp.cpu_count()-2)
            pool = mp.Pool(use_cores)
            print('running in parallel on {} processes'.format(pool._processes))
            start_time = time.time()

            for (i,stats) in enumerate(pool.imap_unordered(self.train_on_single_shot,shot_list_picked)):
            #for (i,stats) in enumerate(map(self.train_on_single_shot,shot_list_picked)):
                if stats.machine in machines_to_compute:
                    self.incorporate_stats(stats)
                    self.machines.add(stats.machine)
                sys.stdout.write('\r' + '{}/{}'.format(i,len(shot_list_picked)))

            pool.close()
            pool.join()
            print('Finished Training Normalizer on {} files in {} seconds'.format(len(shot_list_picked),time.time()-start_time))
            self.save_stats()
        else:
            self.load_stats()
        print(self)