def structured_output_quality(links) -> (List, float, float, float): """ Infer component labels automatically from the structure """ component_labels = [] tree_ratio = 0 avg_depth = 0 avg_leaf_prop = 0 all_depths = [] n_essays = len(links) for i in range(len(links)): rep = TreeBuilder(links[i]) component_labels.append(rep.auto_component_labels(AC_breakdown=True)) if rep.is_tree(): tree_ratio += 1 # evaluate this only when the output forms a tree depth, leaf_prop = rep.tree_depth_and_leaf_proportion() avg_depth += depth all_depths.append(depth) avg_leaf_prop += leaf_prop return component_labels, float(tree_ratio) / float(n_essays), float( avg_depth) / float(tree_ratio), float(avg_leaf_prop) / float( tree_ratio), all_depths
# to get non_AC samples non_ac_samples.extend(essay.get_non_ACS("original", False)) # about tree structure if args.original_order: directions_with_non_AC = essay.get_rel_distances( "original", include_non_arg_units=True)[0] else: directions_with_non_AC = essay.get_rel_distances( "reordering", include_non_arg_units=True)[0] # reordering try: rep = TreeBuilder( directions_with_non_AC) # distances between sentences except: print("Distance error", essay.essay_code) depth, leaf_ratio = rep.tree_depth_and_leaf_proportion() leaf_prop.append(leaf_ratio) print("> Corpus", directory) print("> items", len(essays)) print("> Common Stats") print(" \t\t\t \tsum \tmax \tmin \tavg \tstdev") print_stats("# Sentences\t", n_sentences) print_stats("# Tokens\t", n_tokens) print_stats("# Arg. components", n_ACs) print_stats("# Non-arg. comp.", n_non_ACs) print("> Relations") print_stats("# Support\t", n_sup) print_stats("# Detail\t", n_det) print_stats("# Attack\t", n_att)