def manage_config() -> dict: """ Parse a config file (if given), overwrite with command line arguments and return everything as dictionary of different config groups. """ required_args = { "embedding_size", "hidden_size", "num_layers", "corpus_dir" } arg_groups = { "general": {"recoding_type"}, "model": {"embedding_size", "hidden_size", "num_layers", "dropout"}, "train": { "weight_decay", "learning_rate", "batch_size", "num_epochs", "clip", "print_every", "eval_every", "model_save_path", "device", "model_name" }, "logging": {"log_dir"}, "corpus": {"corpus_dir", "max_seq_len"}, "recoding": { "step_type", "num_samples", "mc_dropout", "prior_scale", "hidden_size", "weight_decay", "data_noise", "share_anchor", "use_cross_entropy" }, "step": {"predictor_layers", "window_size", "step_size", "hidden_size"} } argparser = init_argparser() config_object = ConfigSetup(argparser, required_args, arg_groups) config_dict = config_object.config_dict return config_dict
def manage_config() -> dict: """ Parse a config file (if given), overwrite with command line arguments and return everything as dictionary of different config groups. """ required_args = { "corpus_dir", "max_seq_len", "batch_size", "models", "device", "give_gold" } arg_groups = {"general": required_args, "optional": {"ttest"}} argparser = init_argparser() config_object = ConfigSetup(argparser, required_args, arg_groups) config_dict = config_object.config_dict return config_dict
def manage_parameter_search_config() -> dict: """ Parse a config file (if given), overwrite with command line arguments and return everything as dictionary of different config groups. """ required_args = {"num_epochs", "num_trials", "model_config", "out_dir"} arg_groups = { "general": { "parameter_search", "corpus_dir", "num_epochs", "num_trials", "model_config", "out_dir" }, } argparser = init_argparser() config_object = ConfigSetup(argparser, required_args, arg_groups) config_dict = config_object.config_dict with open(config_dict["general"]["model_config"], "r") as model_config: config_dict["default_parameters"] = json.load(model_config) return config_dict
def main(): # Manage config required_args = {'model', 'vocab', 'lm_module', 'corpus_path', 'classifiers'} arg_groups = { 'model': {'model', 'vocab', 'lm_module', 'device'}, 'corpus': {'corpus_path', 'reset_states'}, 'interventions': {'step_size', 'classifiers', 'init_states', 'intervention_points', 'masking', 'redecode'}, } argparser = init_argparser() config_object = ConfigSetup(argparser, required_args, arg_groups) config_dict = config_object.config_dict # Load data: Corpus, models, diagnostic classifiers corpus = convert_to_labeled_corpus(config_dict["corpus"]["corpus_path"]) basic_model = import_model_from_json( **config_dict["model"], model_constructor=InterventionLSTM ) subj_intervention_model = import_model_from_json( **config_dict["model"], model_constructor=InterventionLSTM ) global_intervention_model = import_model_from_json( **config_dict["model"], model_constructor=InterventionLSTM ) # Retrieve relevant config options step_size = config_dict["interventions"]["step_size"] classifier_paths = config_dict["interventions"]["classifiers"] intervention_points = config_dict["interventions"]["intervention_points"] masking = config_dict["interventions"]["masking"] redecode = config_dict["interventions"]["masking"] reset_states = config_dict["corpus"]["reset_states"] # Load classifiers and apply intervention mechanisms classifiers = {path: DCTrainer.load_classifier(path) for path in classifier_paths} subj_mechanism = SubjectLanguageModelMechanism( subj_intervention_model, classifiers, intervention_points, step_size, masking=masking, redecode=redecode ) global_mechanism = LanguageModelMechanism( global_intervention_model, classifiers, intervention_points, step_size, masking=masking, redecode=redecode ) subj_intervention_model = subj_mechanism.apply() global_intervention_model = global_mechanism.apply() init_states = InitStates(basic_model) # 1. Experiment: Replicate Gulordava findings # In what percentage of cases does the LM assign a higher probability to the grammatically correct sentence? print("\n\nReplicating Gulordava Number Agreement experiment...") measure_num_agreement_accuracy(basic_model, corpus, init_states=init_states, reset_states=reset_states) # 2. Experiment: Assess the influence of interventions on LM perplexity print("\n\nAssessing influence of interventions on perplexities...") measure_influence_on_perplexity(basic_model, subj_intervention_model, global_intervention_model, corpus, init_states) # 3. Experiment: Repeat the 1. Experiment but measure the influence of interventions on the subject position / # on every position print("\n\nReplicating Gulordava Number Agreement experiment with interventions...") print("With interventions at the subject position...") measure_num_agreement_accuracy(subj_intervention_model, corpus, init_states=init_states, reset_states=reset_states) print("With interventions at every time step...") measure_num_agreement_accuracy(global_intervention_model, corpus, init_states=init_states, reset_states=reset_states)
type=int, help='(optional) Size of training set to train on.' 'Defaults to -1, i.e. the full training set.') from_cmd.add_argument( '--train_test_split', type=float, help='(optional) Percentage of data set split into train/test set.' 'Defaults to 0.9, indicating a 90/10 train/test split.') return parser if __name__ == '__main__': required_args = { 'activations_dir', 'activation_names', 'output_dir', 'classifier_type' } arg_groups = { 'dc_trainer': { 'activations_dir', 'activation_names', 'output_dir', 'classifier_type', 'labels' }, 'classify': {'train_subset_size', 'train_test_split'}, } argparser = init_argparser() config_object = ConfigSetup(argparser, required_args, arg_groups) config_dict = config_object.config_dict dc_trainer = DCTrainer(**config_dict['dc_trainer']) dc_trainer.train(**config_dict['classify'])
from diagnnose.config.arg_parser import create_arg_parser from diagnnose.config.setup import ConfigSetup from diagnnose.corpora.import_corpus import import_corpus_from_path from diagnnose.extractors.base_extractor import Extractor from diagnnose.models.import_model import import_model_from_json from diagnnose.models.language_model import LanguageModel from diagnnose.typedefs.corpus import Corpus if __name__ == '__main__': arg_groups = {'model', 'activations', 'corpus', 'extract'} arg_parser, required_args = create_arg_parser(arg_groups) config_dict = ConfigSetup(arg_parser, required_args, arg_groups).config_dict model: LanguageModel = import_model_from_json(config_dict['model']) corpus: Corpus = import_corpus_from_path(**config_dict['corpus']) extractor = Extractor(model, corpus, **config_dict['activations']) extractor.extract(**config_dict['extract'])