Exemple #1
0
def manage_config() -> dict:
    """
    Parse a config file (if given), overwrite with command line arguments and return everything as dictionary
    of different config groups.
    """
    required_args = {
        "embedding_size", "hidden_size", "num_layers", "corpus_dir"
    }
    arg_groups = {
        "general": {"recoding_type"},
        "model": {"embedding_size", "hidden_size", "num_layers", "dropout"},
        "train": {
            "weight_decay", "learning_rate", "batch_size", "num_epochs",
            "clip", "print_every", "eval_every", "model_save_path", "device",
            "model_name"
        },
        "logging": {"log_dir"},
        "corpus": {"corpus_dir", "max_seq_len"},
        "recoding": {
            "step_type", "num_samples", "mc_dropout", "prior_scale",
            "hidden_size", "weight_decay", "data_noise", "share_anchor",
            "use_cross_entropy"
        },
        "step":
        {"predictor_layers", "window_size", "step_size", "hidden_size"}
    }
    argparser = init_argparser()
    config_object = ConfigSetup(argparser, required_args, arg_groups)
    config_dict = config_object.config_dict

    return config_dict
Exemple #2
0
def manage_config() -> dict:
    """
    Parse a config file (if given), overwrite with command line arguments and return everything as dictionary
    of different config groups.
    """
    required_args = {
        "corpus_dir", "max_seq_len", "batch_size", "models", "device",
        "give_gold"
    }
    arg_groups = {"general": required_args, "optional": {"ttest"}}
    argparser = init_argparser()
    config_object = ConfigSetup(argparser, required_args, arg_groups)
    config_dict = config_object.config_dict

    return config_dict
Exemple #3
0
def manage_parameter_search_config() -> dict:
    """
    Parse a config file (if given), overwrite with command line arguments and return everything as dictionary
    of different config groups.
    """
    required_args = {"num_epochs", "num_trials", "model_config", "out_dir"}
    arg_groups = {
        "general": {
            "parameter_search", "corpus_dir", "num_epochs", "num_trials",
            "model_config", "out_dir"
        },
    }
    argparser = init_argparser()
    config_object = ConfigSetup(argparser, required_args, arg_groups)
    config_dict = config_object.config_dict
    with open(config_dict["general"]["model_config"], "r") as model_config:
        config_dict["default_parameters"] = json.load(model_config)

    return config_dict
def main():
    # Manage config
    required_args = {'model', 'vocab', 'lm_module', 'corpus_path', 'classifiers'}
    arg_groups = {
        'model': {'model', 'vocab', 'lm_module', 'device'},
        'corpus': {'corpus_path', 'reset_states'},
        'interventions': {'step_size', 'classifiers', 'init_states', 'intervention_points', 'masking', 'redecode'},
    }
    argparser = init_argparser()
    config_object = ConfigSetup(argparser, required_args, arg_groups)
    config_dict = config_object.config_dict

    # Load data: Corpus, models, diagnostic classifiers
    corpus = convert_to_labeled_corpus(config_dict["corpus"]["corpus_path"])

    basic_model = import_model_from_json(
        **config_dict["model"], model_constructor=InterventionLSTM
    )
    subj_intervention_model = import_model_from_json(
        **config_dict["model"], model_constructor=InterventionLSTM
    )
    global_intervention_model = import_model_from_json(
        **config_dict["model"], model_constructor=InterventionLSTM
    )

    # Retrieve relevant config options
    step_size = config_dict["interventions"]["step_size"]
    classifier_paths = config_dict["interventions"]["classifiers"]
    intervention_points = config_dict["interventions"]["intervention_points"]
    masking = config_dict["interventions"]["masking"]
    redecode = config_dict["interventions"]["masking"]
    reset_states = config_dict["corpus"]["reset_states"]

    # Load classifiers and apply intervention mechanisms
    classifiers = {path: DCTrainer.load_classifier(path) for path in classifier_paths}
    subj_mechanism = SubjectLanguageModelMechanism(
        subj_intervention_model, classifiers, intervention_points, step_size, masking=masking, redecode=redecode
    )
    global_mechanism = LanguageModelMechanism(
        global_intervention_model, classifiers, intervention_points, step_size, masking=masking, redecode=redecode
    )
    subj_intervention_model = subj_mechanism.apply()
    global_intervention_model = global_mechanism.apply()
    init_states = InitStates(basic_model)

    # 1. Experiment: Replicate Gulordava findings
    # In what percentage of cases does the LM assign a higher probability to the grammatically correct sentence?
    print("\n\nReplicating Gulordava Number Agreement experiment...")
    measure_num_agreement_accuracy(basic_model, corpus, init_states=init_states, reset_states=reset_states)

    # 2. Experiment: Assess the influence of interventions on LM perplexity
    print("\n\nAssessing influence of interventions on perplexities...")
    measure_influence_on_perplexity(basic_model, subj_intervention_model, global_intervention_model, corpus, init_states)

    # 3. Experiment: Repeat the 1. Experiment but measure the influence of interventions on the subject position /
    # on every position
    print("\n\nReplicating Gulordava Number Agreement experiment with interventions...")
    print("With interventions at the subject position...")
    measure_num_agreement_accuracy(subj_intervention_model, corpus, init_states=init_states, reset_states=reset_states)
    print("With interventions at every time step...")
    measure_num_agreement_accuracy(global_intervention_model, corpus, init_states=init_states, reset_states=reset_states)
Exemple #5
0
                          type=int,
                          help='(optional) Size of training set to train on.'
                          'Defaults to -1, i.e. the full training set.')
    from_cmd.add_argument(
        '--train_test_split',
        type=float,
        help='(optional) Percentage of data set split into train/test set.'
        'Defaults to 0.9, indicating a 90/10 train/test split.')

    return parser


if __name__ == '__main__':
    required_args = {
        'activations_dir', 'activation_names', 'output_dir', 'classifier_type'
    }
    arg_groups = {
        'dc_trainer': {
            'activations_dir', 'activation_names', 'output_dir',
            'classifier_type', 'labels'
        },
        'classify': {'train_subset_size', 'train_test_split'},
    }
    argparser = init_argparser()

    config_object = ConfigSetup(argparser, required_args, arg_groups)
    config_dict = config_object.config_dict

    dc_trainer = DCTrainer(**config_dict['dc_trainer'])
    dc_trainer.train(**config_dict['classify'])
Exemple #6
0
from diagnnose.config.arg_parser import create_arg_parser
from diagnnose.config.setup import ConfigSetup
from diagnnose.corpora.import_corpus import import_corpus_from_path
from diagnnose.extractors.base_extractor import Extractor
from diagnnose.models.import_model import import_model_from_json
from diagnnose.models.language_model import LanguageModel
from diagnnose.typedefs.corpus import Corpus

if __name__ == '__main__':
    arg_groups = {'model', 'activations', 'corpus', 'extract'}
    arg_parser, required_args = create_arg_parser(arg_groups)

    config_dict = ConfigSetup(arg_parser, required_args, arg_groups).config_dict

    model: LanguageModel = import_model_from_json(config_dict['model'])
    corpus: Corpus = import_corpus_from_path(**config_dict['corpus'])

    extractor = Extractor(model, corpus, **config_dict['activations'])
    extractor.extract(**config_dict['extract'])