def arg_l_arg_r_pairs_vector(args_file, file_contains_context=False, has_header=True): """ """ logging.info("creating arg pairs class vector '{}'".format(args_file)) temp = [] if file_contains_context: ctx_argl_argr_entail = tio.read_args_w_ctx(args_file, has_header=has_header) else: argl_argr_entail = tio.read_args_wo_ctx(args_file, has_header=has_header) def append_empty_context(tuples): for l,r,e in tuples: yield '', l, r, e ctx_argl_argr_entail = append_empty_context(argl_argr_entail) d_triples = td.TripleDict() # rows duplicates = 0 contradicting_duplicates = 0 for ctx, arg_l, arg_r, entailing in ctx_argl_argr_entail: i = d_triples.add((ctx, arg_l, arg_r)) if i < len(temp): label = 1 if entailing.strip().lower() == 'true' else 0 print("omitting duplicate example: '{} {} {} {}' ".format(ctx, arg_l, arg_r, entailing) ,file=sys.stderr) duplicates += 1 if temp[i] != label: print("duplicate example has different label: '{}' vs. '{}'".format(temp[i], label) ,file=sys.stderr) contradicting_duplicates += 1 else: temp.append(1 if entailing.strip().lower() == 'true' else 0) vec = np.array(temp, dtype=np.float64) logging.info("finished creating arg pairs class vector '{}'".format(args_file)) logging.info("found {} duplicate examples with {} having contradicting labels.".format(duplicates, contradicting_duplicates)) return vec, d_triples
def get_train_test_indexes_presplit(d_triples): """ """ t1 = tio.read_args_w_ctx('../data/updates/4/args_v2_am.tsv', has_header=False) train_ids = [] for ctx, arg_l, arg_r, __ in t1: id_ = d_triples.get_triple_id((ctx, arg_l, arg_r)) train_ids.append(id_) t2 = tio.read_args_w_ctx('../data/updates/4/args_v2_nz.tsv', has_header=False) test_ids = [] for ctx, arg_l, arg_r, __ in t2: id_ = d_triples.get_triple_id((ctx, arg_l, arg_r)) test_ids.append(id_) return train_ids, test_ids
def arg_l_arg_r_pairs_vector(args_file, file_contains_context=False, has_header=True): """ """ logging.info("creating arg pairs class vector '{}'".format(args_file)) temp = [] if file_contains_context: ctx_argl_argr_entail = tio.read_args_w_ctx(args_file, has_header=has_header) else: argl_argr_entail = tio.read_args_wo_ctx(args_file, has_header=has_header) def append_empty_context(tuples): for l, r, e in tuples: yield '', l, r, e ctx_argl_argr_entail = append_empty_context(argl_argr_entail) d_triples = td.TripleDict() # rows duplicates = 0 contradicting_duplicates = 0 for ctx, arg_l, arg_r, entailing in ctx_argl_argr_entail: i = d_triples.add((ctx, arg_l, arg_r)) if i < len(temp): label = 1 if entailing.strip().lower() == 'true' else 0 print("omitting duplicate example: '{} {} {} {}' ".format( ctx, arg_l, arg_r, entailing), file=sys.stderr) duplicates += 1 if temp[i] != label: print("duplicate example has different label: '{}' vs. '{}'". format(temp[i], label), file=sys.stderr) contradicting_duplicates += 1 else: temp.append(1 if entailing.strip().lower() == 'true' else 0) vec = np.array(temp, dtype=np.float64) logging.info( "finished creating arg pairs class vector '{}'".format(args_file)) logging.info( "found {} duplicate examples with {} having contradicting labels.". format(duplicates, contradicting_duplicates)) return vec, d_triples