def save_tsv(filename, essay_code, sentences, dist, label_preds): header = ["essay code", "unit id", "text", "target", "relation", "drop_flag"] rep = TreeBuilder(dist) component_labels = rep.auto_component_labels(AC_breakdown=True) f = open(filename, "w") f.write("\t".join(header)+"\n") label_idx = 0 for i in range(len(sentences)): output_line = [] output_line.append(essay_code) output_line.append(str(i+1)) output_line.append(sentences[i]) if component_labels[i] == "non-arg comp.": output_line.append("") output_line.append("") output_line.append("TRUE") else: target = i+1+dist[i] if target==i+1: # point to itself, i.e., root output_line.append("") output_line.append("") else: # not root output_line.append(str(target)) output_line.append(label_preds[label_idx]) label_idx += 1 output_line.append("FALSE") f.write("\t".join(output_line)+"\n") assert(label_idx == len(label_preds)) f.close()
def structured_output_quality(links) -> (List, float, float, float): """ Infer component labels automatically from the structure """ component_labels = [] tree_ratio = 0 avg_depth = 0 avg_leaf_prop = 0 all_depths = [] n_essays = len(links) for i in range(len(links)): rep = TreeBuilder(links[i]) component_labels.append(rep.auto_component_labels(AC_breakdown=True)) if rep.is_tree(): tree_ratio += 1 # evaluate this only when the output forms a tree depth, leaf_prop = rep.tree_depth_and_leaf_proportion() avg_depth += depth all_depths.append(depth) avg_leaf_prop += leaf_prop return component_labels, float(tree_ratio) / float(n_essays), float( avg_depth) / float(tree_ratio), float(avg_leaf_prop) / float( tree_ratio), all_depths
def create_pairwise_data(sentences, dist): """ Create pairwise link labelling data Args: sentences (list[str]) dist (list[int]) Returns: list[tuple(str,str)] """ rep = TreeBuilder(dist) component_labels = rep.auto_component_labels(AC_breakdown=True) output = [] for i in range(len(sentences)): if component_labels[i] == "non-arg comp.": pass else: if i+dist[i] != i: # the current sentence does not point to itself, i.e., not a root source = sentences[i] target = sentences[i+dist[i]] output.append((source, target)) return output
# assertion to check whether we have included non-arg-units here assert (len(sentences) == len(rel_distances)) assert (len(sentences) == len(rel_labels)) # determine where to save the file if args.split: split_folder = check_train_or_test(split_info, essay.essay_code) assert (split_folder != None) split_folder = split_folder + "/" else: split_folder = "" # no split information provided # component labels rep = TreeBuilder(rel_distances) component_labels = rep.auto_component_labels(AC_breakdown=True) # save to file save_content_to_file( args.out_dir + "linking/" + split_folder.lower() + essay.essay_code + ".sentences", sentences) save_content_to_file( args.out_dir + "linking/" + split_folder.lower() + essay.essay_code + ".vectors", vectors.tolist()) save_content_to_file( args.out_dir + "linking/" + split_folder.lower() + essay.essay_code + ".rel_distances", rel_distances) # save_content_to_file(args.out_dir + "linking/" + split_folder.lower() + essay.essay_code + ".rel_labels", rel_labels) save_content_to_file( args.out_dir + "linking/" + split_folder.lower() + essay.essay_code + ".component_labels", component_labels)