Beispiel #1
0
def main():
    args = parse_args()

    results = []
    for test_name in sent_tests.keys():
        encs = sent_tests[test_name]
        encs = get_encodings(args,
                             encs,
                             debias=args.debias,
                             equalize=args.equalize)
        esize, pval = weat.run_test(encs,
                                    n_samples=args.n_samples,
                                    parametric=args.parametric)
        targ1 = list(encs['targ1']['encs'].values())
        targ2 = list(encs['targ2']['encs'].values())
        attr1 = list(encs['attr1']['encs'].values())
        attr2 = list(encs['attr2']['encs'].values())
        targets = [targ1, targ2]
        attributes = [attr1, attr2]
        weat_score, effect_size = binary_weat(targets, attributes)
        results.append("{}: esize={} pval={} | w_score={} esize={}".format(
            test_name, esize, pval, weat_score, effect_size))

    for result in results:
        logger.info(result)
Beispiel #2
0
    def evaluate(self):
        args = self.args
        if (not os.path.exists(args.results_dir)):
            os.makedirs(args.results_dir)
        results_path = os.path.join(args.results_dir, args.output_name)
        results = []
        all_tests_dict = dict()

        for filename in self.filenames:
            sent_file = os.path.join(DATA_DIR, filename)
            data = load_json(sent_file)
            encs = self.get_encodings(data)
            esize, pval = weat.run_test(encs,
                                        n_samples=args.n_samples,
                                        parametric=args.parametric)

            result = "{}: esize={} pval={}".format(filename, esize, pval)
            print(filename, result)
            results.append(result)
            test_results = {"esize": esize, "pval": pval}

            all_tests_dict[filename] = test_results

        # print and save results
        for result in results:
            logger.info(result)
        save_dict_to_json(all_tests_dict, results_path)
Beispiel #3
0
def evaluate(args, word_level=False):
	'''Evaluate bias level with given definitional sentence pairs.'''
	results_path = os.path.join(args.results_dir, args.output_name)

	if (not args.encode_only):
		if (os.path.exists(results_path)): 
			print("Results already evaluated in {}".format(results_path))
			return
		if (not os.path.exists(args.results_dir)): os.makedirs(args.results_dir)

	results = []
	all_tests_dict = dict()

	tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
	print("tokenizer: {}".format(tokenizer==None))
	#gender_subspace = None
	# if (args.debias):
	# 	gender_subspace = compute_gender_dir(DEVICE, tokenizer, bert_encoder, def_pairs, 
	# 		args.max_seq_length, k=args.num_dimension, load=True, task=args.model, word_level=word_level, keepdims=True)
	# 	logger.info("Computed (gender) bias direction")

	with open(args.gendered_words_filename, "r") as f:
		gender_specific_words = json.load(f)
	specific_set = set(gender_specific_words)

	abs_esizes = []
	for test_id in ['6', '6b', '7', '7b', '8', '8b']:
		filename = "sent-weat{}.jsonl".format(test_id)
		sent_file = os.path.join(DATA_DIR, filename)
		data = load_json(sent_file)

		encs = get_encodings(args, data)
		if (args.encode_only):
			if (args.debias):
				outfile_name = 'debiased_encs{}.pkl'.format(test_id)
			else:
				outfile_name = 'biased_encs{}.pkl'.format(test_id)

			with open(os.path.join(args.results_dir, outfile_name), 'wb') as outfile:
				pickle.dump(encs, outfile)
			continue
		'''
		encs: targ1, targ2, attr1, attr2
		         -> category
		         -> encs
		         	-> (id1, sent1_emb), (id2, sent2_emb), ...
		'''

		esize, pval = weat.run_test(encs, n_samples=args.n_samples, parametric=args.parametric)
		abs_esizes.append(abs(esize))

		result = "{}: esize={} pval={}".format(filename, esize, pval)
		print(filename, result)
		results.append(result)
		test_results = {"esize": esize, "pval": pval}
		
		all_tests_dict[filename] = test_results
	avg_absesize = np.mean(np.array(abs_esizes))
	print("Averge of Absolute esize: {}".format(avg_absesize))
	all_tests_dict['avg_absesize'] = avg_absesize

	if (args.encode_only): return
	
	# print and save results
	for result in results: logger.info(result)
	save_dict_to_json(all_tests_dict, results_path)

	return
Beispiel #4
0
		if (args.encode_only):
			if (args.debias):
				outfile_name = 'debiased_encs{}.pkl'.format(test_id)
			else:
				outfile_name = 'biased_encs{}.pkl'.format(test_id)

			with open(os.path.join(args.results_dir, outfile_name), 'wb') as outfile:
				pickle.dump(encs, outfile)
			continue
		'''
		encs: targ1, targ2, attr1, attr2
		         -> category
		         -> encs
		         	-> (id1, sent1_emb), (id2, sent2_emb), ...
		'''
		esize, pval = weat.run_test(encs, n_samples=args.n_samples, parametric=args.parametric)
		
		# weat_score, effect_size = run_binary_weat_test(encs)
		# results.append("{}: esize={} pval={} | w_score={} esize={}".format(filename, 
		# 	esize, pval, weat_score, effect_size))
		# test_results = {"esize": esize, "pval": pval, "weat_score": weat_score, "effect_size": effect_size}

		result = "{}: esize={} pval={}".format(filename, esize, pval)
		print(filename, result)
		results.append(result)
		test_results = {"esize": esize, "pval": pval}
		
		all_tests_dict[filename] = test_results

	if (args.encode_only): return
	# print and save results
Beispiel #5
0
def main(arguments):
    ''' Main logic: parse args for tests to run and which models to evaluate '''
    log.basicConfig(format='%(asctime)s: %(message)s', datefmt='%m/%d %I:%M:%S %p', level=log.INFO)

    args = handle_arguments(arguments)
    if args.seed >= 0:
        log.info('Seeding random number generators with {}'.format(args.seed))
        random.seed(args.seed)
        np.random.seed(args.seed)
    maybe_make_dir(args.exp_dir)
    if args.log_file:
        log.getLogger().addHandler(log.FileHandler(args.log_file))
    log.info("Parsed args: \n%s", args)

    all_tests = sorted(
        [
            entry[:-len(TEST_EXT)]
            for entry in os.listdir(args.data_dir)
            if not entry.startswith('.') and entry.endswith(TEST_EXT)
        ],
        key=test_sort_key
    )
    log.debug('Tests found:')
    for test in all_tests:
        log.debug('\t{}'.format(test))

    tests = split_comma_and_check(args.tests, all_tests, "test") if args.tests is not None else all_tests
    log.info('Tests selected:')
    for test in tests:
        log.info('\t{}'.format(test))

    models = split_comma_and_check(args.models, MODEL_NAMES, "model") if args.models is not None else MODEL_NAMES
    log.info('Models selected:')
    for model in models:
        log.info('\t{}'.format(model))


    results = []
    for model_name in models:
        # Different models have different interfaces for things, but generally want to:
        # - if saved vectors aren't there:
        #    - load the model
        #    - load the test data
        #    - encode the vectors
        #    - dump the files into some storage
        # - else load the saved vectors '''
        log.info('Running tests for model {}'.format(model_name))

        if model_name == ModelName.BOW.value:
            model_options = ''
            if args.glove_path is None:
                raise Exception('glove_path must be specified for {} model'.format(model_name))
        elif model_name == ModelName.INFERSENT.value:
            if args.glove_path is None:
                raise Exception('glove_path must be specified for {} model'.format(model_name))
            if args.infersent_dir is None:
                raise Exception('infersent_dir must be specified for {} model'.format(model_name))
            model_options = ''
        elif model_name == ModelName.GENSEN.value:
            if args.glove_h5_path is None:
                raise Exception('glove_h5_path must be specified for {} model'.format(model_name))
            if args.gensen_dir is None:
                raise Exception('gensen_dir must be specified for {} model'.format(model_name))
            gensen_version_list = split_comma_and_check(args.gensen_version, GENSEN_VERSIONS, "gensen_prefix")
            if len(gensen_version_list) > 2:
                raise ValueError('gensen_version can only have one or two elements')
            model_options = 'version=' + args.gensen_version
        elif model_name == ModelName.GUSE.value:
            model_options = ''
        elif model_name == ModelName.COVE.value:
            if args.cove_encs is None:
                raise Exception('cove_encs must be specified for {} model'.format(model_name))
            model_options = ''
        elif model_name == ModelName.ELMO.value:
            model_options = 'time_combine={};layer_combine={}'.format(
                args.time_combine_method, args.layer_combine_method)
        elif model_name == ModelName.BERT.value:
            model_options = 'version=' + args.bert_version
        elif model_name == ModelName.OPENAI.value:
            if args.openai_encs is None:
                raise Exception('openai_encs must be specified for {} model'.format(model_name))
            model_options = ''
        else:
            raise ValueError("Model %s not found!" % model_name)

        model = None

        for test in tests:
            log.info('Running test {} for model {}'.format(test, model_name))
            enc_file = os.path.join(args.exp_dir, "%s.%s.h5" % (
                "%s;%s" % (model_name, model_options) if model_options else model_name,
                test))
            if not args.ignore_cached_encs and os.path.isfile(enc_file):
                log.info("Loading encodings from %s", enc_file)
                encs = load_encodings(enc_file)
                encs_targ1 = encs['targ1']
                encs_targ2 = encs['targ2']
                encs_attr1 = encs['attr1']
                encs_attr2 = encs['attr2']
            else:
                # load the test data
                encs = load_json(os.path.join(args.data_dir, "%s%s" % (test, TEST_EXT)))

                # load the model and do model-specific encoding procedure
                log.info('Computing sentence encodings')
                if model_name == ModelName.BOW.value:
                    encs_targ1 = bow.encode(encs["targ1"]["examples"], args.glove_path)
                    encs_targ2 = bow.encode(encs["targ2"]["examples"], args.glove_path)
                    encs_attr1 = bow.encode(encs["attr1"]["examples"], args.glove_path)
                    encs_attr2 = bow.encode(encs["attr2"]["examples"], args.glove_path)

                elif model_name == ModelName.INFERSENT.value:
                    if model is None:
                        model = infersent.load_infersent(args.infersent_dir, args.glove_path, train_data='all',
                                                         use_cpu=args.use_cpu)
                    model.build_vocab(
                        [
                            example
                            for k in ('targ1', 'targ2', 'attr1', 'attr2')
                            for example in encs[k]['examples']
                        ],
                        tokenize=True)
                    log.info("Encoding sentences for test %s with model %s...", test, model_name)
                    encs_targ1 = infersent.encode(model, encs["targ1"]["examples"])
                    encs_targ2 = infersent.encode(model, encs["targ2"]["examples"])
                    encs_attr1 = infersent.encode(model, encs["attr1"]["examples"])
                    encs_attr2 = infersent.encode(model, encs["attr2"]["examples"])

                elif model_name == ModelName.GENSEN.value:
                    if model is None:
                        gensen_1 = gensen.GenSenSingle(
                            model_folder=args.gensen_dir,
                            filename_prefix=gensen_version_list[0],
                            pretrained_emb=args.glove_h5_path,
                            cuda=not args.use_cpu)
                        model = gensen_1

                        if len(gensen_version_list) == 2:
                            gensen_2 = gensen.GenSenSingle(
                                model_folder=args.gensen_dir,
                                filename_prefix=gensen_version_list[1],
                                pretrained_emb=args.glove_h5_path,
                                cuda=not args.use_cpu)
                            model = gensen.GenSen(gensen_1, gensen_2)

                    vocab = gensen.build_vocab([
                        s
                        for set_name in ('targ1', 'targ2', 'attr1', 'attr2')
                        for s in encs[set_name]["examples"]
                    ])

                    model.vocab_expansion(vocab)

                    encs_targ1 = gensen.encode(model, encs["targ1"]["examples"])
                    encs_targ2 = gensen.encode(model, encs["targ2"]["examples"])
                    encs_attr1 = gensen.encode(model, encs["attr1"]["examples"])
                    encs_attr2 = gensen.encode(model, encs["attr2"]["examples"])

                elif model_name == ModelName.GUSE.value:
                    model = hub.Module("https://tfhub.dev/google/universal-sentence-encoder/2")
                    if args.use_cpu:
                        kwargs = dict(device_count={'GPU': 0})
                    else:
                        kwargs = dict()
                    config = tf.ConfigProto(**kwargs)
                    config.gpu_options.per_process_gpu_memory_fraction = 0.5  # maximum alloc gpu50% of MEM
                    config.gpu_options.allow_growth = True  # allocate dynamically
                    with tf.Session(config=config) as session:
                        session.run([tf.global_variables_initializer(), tf.tables_initializer()])
                        def guse_encode(sents):
                            encs_node = model(sents)
                            encs = session.run(encs_node)
                            encs_d = {sents[j]: enc for j, enc in enumerate(np.array(encs).tolist())}
                            return encs_d

                        encs_targ1 = guse_encode(encs["targ1"]["examples"])
                        encs_targ2 = guse_encode(encs["targ2"]["examples"])
                        encs_attr1 = guse_encode(encs["attr1"]["examples"])
                        encs_attr2 = guse_encode(encs["attr2"]["examples"])

                elif model_name == ModelName.COVE.value:
                    load_encs_from = os.path.join(args.cove_encs, "%s.encs" % test)
                    encs = load_jiant_encodings(load_encs_from, n_header=1)

                elif model_name == ModelName.ELMO.value:
                    kwargs = dict(time_combine_method=args.time_combine_method,
                                  layer_combine_method=args.layer_combine_method)
                    encs_targ1 = elmo.encode(encs["targ1"]["examples"], **kwargs)
                    encs_targ2 = elmo.encode(encs["targ2"]["examples"], **kwargs)
                    encs_attr1 = elmo.encode(encs["attr1"]["examples"], **kwargs)
                    encs_attr2 = elmo.encode(encs["attr2"]["examples"], **kwargs)

                elif model_name == ModelName.BERT.value:
                    model, tokenizer = bert.load_model(args.bert_version)
                    encs_targ1 = bert.encode(model, tokenizer, encs["targ1"]["examples"])
                    encs_targ2 = bert.encode(model, tokenizer, encs["targ2"]["examples"])
                    encs_attr1 = bert.encode(model, tokenizer, encs["attr1"]["examples"])
                    encs_attr2 = bert.encode(model, tokenizer, encs["attr2"]["examples"])

                elif model_name == ModelName.OPENAI.value:
                    load_encs_from = os.path.join(args.openai_encs, "%s.encs" % test)
                    #encs = load_jiant_encodings(load_encs_from, n_header=1, is_openai=True)
                    encs = load_encodings(load_encs_from)
                    encs_targ1 = encs["targ1"]["encs"]
                    encs_targ2 = encs["targ2"]["encs"]
                    encs_attr1 = encs["attr1"]["encs"]
                    encs_attr2 = encs["attr2"]["encs"]

                else:
                    raise ValueError("Model %s not found!" % model_name)

                encs["targ1"]["encs"] = encs_targ1
                encs["targ2"]["encs"] = encs_targ2
                encs["attr1"]["encs"] = encs_attr1
                encs["attr2"]["encs"] = encs_attr2

                log.info("\tDone!")
                if not args.dont_cache_encs:
                    log.info("Saving encodings to %s", enc_file)
                    save_encodings(encs, enc_file)

            enc = [e for e in encs["targ1"]['encs'].values()][0]
            d_rep = enc.size if isinstance(enc, np.ndarray) else len(enc)

            # run the test on the encodings
            log.info("Running SEAT...")
            log.info("Representation dimension: {}".format(d_rep))
            esize, pval = weat.run_test(encs, n_samples=args.n_samples, parametric=args.parametric)
            results.append(dict(
                model=model_name,
                options=model_options,
                test=test,
                p_value=pval,
                effect_size=esize,
                num_targ1=len(encs['targ1']['encs']),
                num_targ2=len(encs['targ2']['encs']),
                num_attr1=len(encs['attr1']['encs']),
                num_attr2=len(encs['attr2']['encs'])))

        log.info("Model: %s", model_name)
        log.info('Options: {}'.format(model_options))
        for r in results:
            log.info("\tTest {test}:\tp-val: {p_value:.9f}\tesize: {effect_size:.2f}".format(**r))

    if args.results_path is not None:
        log.info('Writing results to {}'.format(args.results_path))
        with open(args.results_path, 'w') as f:
            writer = DictWriter(f, fieldnames=results[0].keys(), delimiter='\t')
            writer.writeheader()
            for r in results:
                writer.writerow(r)