def __init__(self, scenario_name: str): self.scenario_name = scenario_name fm = File_Manager() self.scenario_cfgs = fm.read_scenario_config(self.scenario_name) self.current_epoch = self.get_cfgs('current_epoch', default=0) self.optimizer_type = self.get_cfgs('optimizer_type') self.scenes = OrderedDict({}) for scene_cfgs in self.get_cfgs('scenes'): scene_name = scene_cfgs['name'] task_defaults = {} if 'task_defaults' in scene_cfgs: task_defaults = scene_cfgs['task_defaults'] scene_cfgs['tasks'] = { t: fm.read_task_config(task_name=t, scenario=self, scene_defaults=task_defaults) for t in scene_cfgs['tasks'] } self.scenes[scene_name] = scene_cfgs self.scenario_lengths = [{ 'name': key, 'len': len(s) } for (key, s) in self.scenes.items()] self.collect_dictionaries() self.__current_scene = None
def end_epoch(self, summary: dict, scene_name: str): summary['dataset_name'] = self.dataset_name summary['experiment_name'] = self.experiment_name summary['modalities'] = defaultdict(dict) raw_csv_data = [] for _, modality in self.modalities.items(): raw_csv_data.extend(modality.get_runtime_values()) modality.report_epoch_summary(summary) if modality.is_explicit_modality() and modality.is_csv(): if isinstance(modality.content, pd.Series): raw_csv_data.append(modality.content) elif isinstance(modality.content, pd.DataFrame): [ raw_csv_data.append(modality.content[c]) for c in modality.content ] else: raise ValueError( f'The content type of {modality.get_name()} is not implemented' ) # We want the column estimates to be close to eachother in the final output raw_csv_data.sort(key=lambda v: v.name if hasattr(v, 'name') else -1) File_Manager().write_csv_annotation( annotations=pd.concat(raw_csv_data, axis=1), dataset_name=self.dataset_name, experiment_file_name= f'{scene_name}_{self.get_cfgs("annotations_path")}', ) return summary
def init_dictionary(self): if self.dictionary is None: raise Exception( f'No dictionary has been initated for {self.get_name()}') File_Manager().write_dictionary2logdir(dictionary=self.dictionary, modality_name=self.get_name())
def setup_annotations(self): rel_path = self.get_cfgs('annotations_path') fm = File_Manager() self.annotations = fm.read_csv_annotations( dataset_name=self.dataset_name, annotations_rel_path=rel_path, multi_view_per_sample=self.multi_view_per_sample, ) if self.annotations is None: annotations_url = self.get_cfgs('annotations_url') available_csvs_str = '\', \''.join( fm.get_available_csvs(self.dataset_name)) Console_UI().inform_user( '"%s" does not exist among the available datasets: \'%s\'.\nDownloading from:\n %s' % (rel_path, available_csvs_str, annotations_url)) fm.download_zip_file( url=annotations_url, dataset_name=self.get_dataset_name(), ) self.annotations = fm.read_csv_annotations( dataset_name=self.dataset_name, annotations_rel_path=rel_path, multi_view_per_sample=self.multi_view_per_sample, ) if self.get_cfgs('test_run'): self.annotations = self.annotations[ self.annotations.index.get_level_values(0) < 100]
def __init__(self, test_mode=False, test_init_values={}): if test_mode: self.cfgs = test_init_values Console_UI(self.get('log_level', 'warning'), globalConfigs=self) self.sub_log_path = self.get('sub_log_path', 'sub_log_not_set') File_Manager(annotations_root=self.get('annotation_root'), log_folder=self.get('log_folder'), scenario_log_root=self.get('scenario_log_root'), resume_prefix=self.get('resume'), resume_scene=self.get('resume_scene'), tmp_root=self.get('tmp_root'), model_zoo_root=self.get('model_zoo_root'), global_cfgs=self) return args = self.parse_argument() self.cfgs = args.__dict__ Console_UI(self.get('log_level', 'info'), globalConfigs=self) self.read_environment_variables() self.start_time = datetime.now( pytz.timezone('Europe/Stockholm')).strftime('%Y%m%d/%H.%M') self.sub_log_path = os.path.join(self.get('scenario'), self.start_time) if self.get('resume') is not None: self.prep_resume() fm = File_Manager(scenario_log_root=self.scenario_log_root, log_folder=self.log_folder, annotations_root=self.get('annotation_root'), resume_prefix=self.get('resume'), resume_scene=self.get('resume_scene'), tmp_root=self.get('tmp_root'), model_zoo_root=self.get('model_zoo_root'), global_cfgs=self) setup_data = {'call': ' '.join(sys.argv), 'setup': self.cfgs} fm.log_setup(data=setup_data, name='base_setup.yaml') self.__forward_noise = 0
def load(self): neural_net_name = self.get_name() state_dict = File_Manager().load_pytorch_neural_net( neural_net_name=neural_net_name) if state_dict is not None: try: # As the save is done at the layers level: neural_net.layers.state_dict() # we need to load it from the layers self.layers.load_state_dict(state_dict) except RuntimeError as e: raise RuntimeError( f'Failed to load dictionary for {neural_net_name} \nError message: {e}' )
def init_dictionary(self): if self.get_cfgs('skip_dictionary_save', default=False): return column_dictionary = pd.DataFrame({ 'columns': [c for c in self.csv_columns], 'labels': [self.column_map[c] for c in self.csv_columns], 'index': range(len(self.csv_columns)) }) File_Manager().write_dictionary2logdir(dictionary=column_dictionary, modality_name=self.get_name())
def append_suggested_dictionary(self, dataset_name, modality_name, FMSingleton=None): assert isinstance( modality_name, str), f'Modality name is not a string "{modality_name}"' assert isinstance( dataset_name, str), f'Dataset name is not a string "{modality_name}"' modality_name = modality_name.lower() if FMSingleton is None: FMSingleton = File_Manager() suggested_dictionary = FMSingleton.read_dictionary( dataset_name=dataset_name, modality_name=modality_name, ) if suggested_dictionary is not None: self.__suggested_dictionaries[modality_name].append( suggested_dictionary) return self
def init_dictionary(self): if self.dictionary is None: fm = File_Manager() self.dictionary = fm.read_dictionary( dataset_name=self.dataset_name, modality_name=self.get_name()) if self.dictionary is None: self.dictionary = self.make_dictionary() # no dictionary fm.write_dictionary(dictionary=self.dictionary, dataset_name=self.dataset_name, modality_name=self.get_name()) else: fm.write_dictionary2logdir(dictionary=self.dictionary, modality_name=self.get_name())
def run(config_manager, logger): logger.msg('loading configuration file...') config = config_manager.get_config() logger.msg('ensuring that file directories exist...') file_manager = File_Manager(config['speech'], config['text']) if (not file_manager.directories_exist()): logger.err( 'Please check the directories entered into the config file: ' + config_file_name) return logger.msg('waiting for files to transcribe...') transcriber = Transcriber(config['username'], config['password'], file_manager) # Create transcription object while (True): # Loop for eternity for file in file_manager.speech_files_without_text_files( ): # Loop through files that need to be transcribed logger.msg('Transcribing: ' + file + '...') transcriber.transcribe(file) # Transcribe the current file logger.msg(file + ' transcribed') logger.msg('waiting for files to transcribe...') sleep(10) # Wait for 10 seconds
def get_dataset_cfgs(self, dataset_name): dataset_cfgs = File_Manager().read_dataset_config(dataset_name) # For the sake of simplicity, when modalities are identical during # train and tests, we can just write "modalities": "same_as_X" in # the config file.(in this example, X is "train") # This piece of code searches for the modalities like this and # replace them with the "X" modalities for _, experiment_cfgs in dataset_cfgs['experiments'].items(): if (isinstance(experiment_cfgs['modalities'], str) and experiment_cfgs['modalities'].startswith('same_as_')): other_experiment = experiment_cfgs['modalities'][len('same_as_'):] experiments = dataset_cfgs['experiments'] if other_experiment in experiments: experiment_cfgs['modalities'] = experiments[other_experiment]['modalities'] else: raise KeyError('Could not find the modality \'%s\' among the modalities: \'%s\'' % (other_experiment, '\', \''.join(experiments.keys()))) return dataset_cfgs
def get_dataset( self, dataset_name, batch_size_multiplier: float, ): fixed_name = dataset_name.lower() if fixed_name not in self.datasets: predefined_datasets = File_Manager().get_dataset_definitions() if (fixed_name in predefined_datasets): from .csv_dataset import CSV_Dataset as Dataset else: raise Exception('The dataset \'%s\' is not among the predefined sets: \'%s\'' % (dataset_name, '\', \''.join(predefined_datasets))) self.datasets[fixed_name] = Dataset( dataset_name=fixed_name, batch_size_multiplier=batch_size_multiplier, ) else: self.datasets[fixed_name].set_batch_size_multiplier(batch_size_multiplier) return self.datasets[fixed_name]
def save(self, scene_name='last'): File_Manager().save_pytorch_neural_net(self.get_name(), self, scene_name)
def collect_dictionaries(self): """ Check all the Datasets for common items, e.g. body part and then create a general dictionary for all of them. """ datasets = [] for scene in self.scenario_cfgs['scenes']: for task in scene['tasks'].values(): if task['dataset_name'] not in datasets: datasets.append(task['dataset_name']) configs = {} for dataset_name in datasets: configs[dataset_name] = File_Manager().read_dataset_config( dataset_name) modalities_with_dictionaries = [ 'one_vs_rest', 'bipolar', 'multi_bipolar', ] # TODO: add 'hierarchical_label' but this has some fancy logic :-S dictionary_candidates = [] for dataset_name in datasets: config = configs[dataset_name] try: for experiment in config['experiments'].values(): if isinstance(experiment['modalities'], dict): [ dictionary_candidates.append(name) for name, cfg in experiment['modalities'].items() if cfg['type'].lower() in modalities_with_dictionaries and name not in dictionary_candidates ] except Exception as e: raise Exception( f'Failed to get dictionary for {dataset_name}: {e}') # Store all the different values available for this modality into the dictionary singleton that # keeps track of the unique values dg = Dictionary_Generator() for modality_name in dictionary_candidates: for dataset_name in datasets: dg.append_suggested_dictionary(dataset_name=dataset_name, modality_name=modality_name) config = configs[dataset_name] for experiment in config['experiments'].values(): annotations = File_Manager().read_csv_annotations( dataset_name, annotations_rel_path=experiment['annotations_path'], # Multi-view argument should be irrelevant for this ) if annotations is None: raise ValueError( f'Could not find the dataset: {dataset_name} in {experiment["annotations_path"]}' ) modalities = experiment['modalities'] if modalities == 'same_as_train_set': modalities = config['experiments']['train_set'][ 'modalities'] if modality_name in modalities: if 'column_name' in modalities[modality_name]: try: colname = modalities[modality_name][ 'column_name'] dg.append_values(modality_name=modality_name, values=annotations[colname]) except KeyError as e: Console_UI().warn_user( f'Got a key annotation exception for {colname}' ) Console_UI().warn_user( modalities[modality_name]) Console_UI().warn_user(annotations.columns) raise e except Exception as e: Console_UI().warn_user( f'Got an annotation exception for {colname}' ) Console_UI().warn_user( modalities[modality_name]) Console_UI().warn_user(annotations) raise e elif 'columns' in modalities[modality_name]: for column_name in modalities[modality_name][ 'columns']: if isinstance(column_name, dict): assert 'csv_name' in column_name, \ f'The column doesn\'t have the expected csv_name element, got: {column_name}' column_name = column_name['csv_name'] if column_name not in annotations: n = 3 if len( annotations.columns) < 10 else ceil( len(annotations.columns) / 3) closest = get_close_matches( word=column_name, possibilities=annotations.columns, n=n, ) closest = ', '.join(closest) raise IndexError( f'The {column_name} from {modality_name} doesn\'t exist.' + f' Closest matching are: {closest}') dg.append_values( modality_name=modality_name, values=annotations[column_name]) else: raise IndexError( f'Expected {modality_name} to have either columns or column_name defined' )
import os import Tkinter as tk from Tkinter import * import shutil import pyglet from file_manager import File_Manager import Tkinter, Tkconstants, tkFileDialog root = Tk() root.minsize(300, 300) number_of_folders = input("number_of_folders") foo = File_Manager(number_of_folders) foo.folder_name_and_extension() foo.sort()
def get_graph_cfgs(self, graph_name): graph_cfgs = File_Manager().read_graph_config(graph_name) graph_cfgs = self.fix_experiment_modalities(graph_cfgs) return graph_cfgs
def print_table(table): for x in sorted(table, key=lambda leaf:leaf.name[0], reverse=True): print x.name[0], ' - ', x.freq, ' - ', x.code ########################################################################################### # MAIN PROGRAM ########################################################################################### print "Program start." img = get_image('lena_gray.tiff') # img = get_image('c.jpg') manager = File_Manager('test') S, H = get_symbols_and_occurrences(img) D = get_tuple_array(S, H) t0 = time() Tree, Code = get_huffman(D) t1 = time() #print_table(Code) print '[Time] Huffman = ', t1 - t0 keys = [c.name[0] for c in Code] values = [c.code for c in Code] dicc = dict(zip(keys, values))
def run_scene(self, start_epoch=0): logged_memory_usage = False ui = Console_UI() ui.overall_total_epochs = self.epochs ui.overall_total_repeats = self.repeat Global_Cfgs().set_forward_noise( self.get_cfgs('forward_noise', default=0)) for r in range(0, self.repeat): ui.overall_repeat = r if (self.stochastic_weight_averaging and r > 0): self.tasks[self.main_task].stochastic_weight_average() for e in range(0, self.epochs): ui.overall_epoch = e if start_epoch > e + r * self.epochs: Scene.iteration_counter += self.epoch_size else: for task in self.tasks.values(): task.update_learning_rate(self.get_learning_rate(e)) for _ in range(self.epoch_size): for key, task in self.tasks.items(): if self.should_task_run(task_name=key, task=task): task.step( iteration_counter=Scene.iteration_counter, scene_name=self.scene_name) Scene.iteration_counter += 1 if logged_memory_usage is False: for key in self.tasks.keys(): task = self.tasks[key] memory_usage = task.get_memory_usage_profile() File_Manager().write_usage_profile( scene_name=self.scene_name, task=key, memory_usage=memory_usage, ) ui.inform_user( f'\n Memory usage for {self.scene_name}::{key}\n' ) ui.inform_user(memory_usage) logged_memory_usage = True for task in self.tasks.values(): task.save(scene_name='last') # Not really helping with just emptying cache - we need to add something more # removing as this may be the cause for errors # torch.cuda.empty_cache() ui.reset_overall() # Note that the evaluation happens after this step and therefore averaging may hur the performance if self.stochastic_weight_averaging_last: self.tasks[self.main_task].stochastic_weight_average() for task in self.tasks.values(): task.save(scene_name='last') for task in self.tasks.values(): task.validate(iteration_counter=Scene.iteration_counter, scene_name=self.scene_name) task.test(iteration_counter=Scene.iteration_counter, scene_name=self.scene_name) # Save all tasks before enterering the next scene for task in self.tasks.values(): task.save(scene_name=self.scene_name) [g.dropModelNetworks() for g in task.graphs.values()]