Ejemplo n.º 1
0
def read_label(idx):
    gatt = read_yaml("../../data/label/applicability/labels/GATT.yaml")
    gatt = cleanse_dict(gatt)
    inv_gatt = invert_dict(gatt)

    print(len(inv_gatt.keys()))
    print(sorted(inv_gatt.keys()))
    return inv_gatt[idx]
Ejemplo n.º 2
0
def rehash_arts_in_text(indices,
                        yaml_path=
                        "labels/GATT.yaml"):
    """
    From the integer list of labels, change the int into article name
    e/g : [0,2] -> ['Article I', 'Article II']
    """
    gatt_info = read_yaml(yaml_path)
    gatt_dict = cleanse_dict(gatt_info)
    gatt_keys_in_int = get_keys(gatt_dict)
    in_text = []
    for index in indices:
        in_text.append(gatt_keys_in_int[index])
    return in_text
Ejemplo n.º 3
0
def filter_panel_eng(list_of_urls, idx):
    """
    Filter panel report's urls on given list of urls
    :param list_of_urls
    :return: one url that of panel report
    """
    
    def _find_panel(urls, ds_idx):
        panel_url = None
        print("Panel reports: ")
        snippet = None
        for url in filter_eng(urls):
            print("url", url.split("/")[-1])
            if 'Q' in url and 'WT' in url:
                snippet = url
            
            if 'q' in url and 'WT' in url:
                snippet = url
            
            if "{}R.pdf".format(ds_idx) == url.split("/")[-1]:
                panel_url = url
                break
            
            if "{}R.PDF".format(ds_idx) == url.split("/")[-1]:
                panel_url = url
                break
            
            if "{}RW.pdf".format(ds_idx) == url.split("/")[-1]:
                panel_url = url
                break
            
            if "{}R-00.pdf".format(ds_idx) == url.split("/")[-1]:
                panel_url = url
                break
            
            if "{}R-01.pdf".format(ds_idx) == url.split("/")[-1]:
                panel_url = url
                break
                
            if not snippet:
                snippet = url
            print("urlSnippet", snippet)
            print("panel_url", panel_url)
        return panel_url, snippet
    
    panelURL, urlSNIPPET = _find_panel(list_of_urls, idx)

    if not panelURL:
        group = None
        info = read_yaml("../../data/info.yaml")
        links = info['LinkedPanel']
        for link in links:
            if idx in link:
                group = list(link.keys())
                print("group", group)
                break
        if group is not None:
            for one in group:
                print("linked", one)
                panelURL, url_snippet = _find_panel(list_of_urls, one)
                print("newly found panelURL", panelURL)
                if panelURL:
                    break
    
    if not panelURL:

        panelURL = os.path.join('/'.join(urlSNIPPET.split('/')[:-1]),
                                 '{}R.pdf'.format(idx))
        print("final_decision: ", panelURL)
    
    return panelURL
Ejemplo n.º 4
0
from prep.factual.after_panel.extract import extract_factual_manual, locate_chapter_II, locate_chapter_III


def read_label(idx):
    gatt = read_yaml("../../data/label/applicability/labels/GATT.yaml")
    gatt = cleanse_dict(gatt)
    inv_gatt = invert_dict(gatt)

    print(len(inv_gatt.keys()))
    print(sorted(inv_gatt.keys()))
    return inv_gatt[idx]


if __name__ == "__main__":
    pkl_path = "../../data/factual/after_panel/factual.pkl"
    info = read_yaml("../../data/info.yaml")
    panel_exist = info['Panel']['ds_numb']
    mutual_agree = info['Panel']['mutual_agree']
    WPF = info['Panel']['WPF']
    print(WPF)
    LinkedPanel = info['LinkedPanel']
    LinkedOmit = info['LinkedOmit']
    for idx in panel_exist:
        if check_already_exist(pkl_path, idx):
            continue
        elif idx in mutual_agree:
            continue
        elif idx in WPF:
            print(idx, "in WPF")
            continue
        elif idx in LinkedOmit:
Ejemplo n.º 5
0
"""Convert Multi Label Dataset to One-Label"""

import os
import json
from utils.misc.yaml import read_yaml
from utils.misc.dict import get_keys
from utils.misc.json import write_json_line_by_line

from prep.label.cite.parse import cleanse_dict

print(os.getcwd())

multi_label_json = "entire_data.json"
provision_path = "/home/zachary/projects/DeepWTO/data/provision/gatt.yaml"
provision = read_yaml(provision_path)

provision_names = list(provision.keys())
print(provision_names)
print(len(provision_names))
print(provision[provision_names[0]])


gatt = read_yaml("/home/zachary/projects/DeepWTO/data/label/citability/labels"
                 "/GATT.yaml")
gatt = cleanse_dict(gatt)
gatt_keys = get_keys(gatt)
print(gatt_keys)
art_id = gatt_keys.index("Article I")
print(art_id)

print(provision_names == gatt_keys)
Ejemplo n.º 6
0
from utils.misc.yaml import read_yaml

read = read_yaml("notable.yaml")

ds_numbs = set()
titles = ['III', 'III:1', 'III:2', 'III:4', 'III:5', 'III:7']
for title in titles:
    ds_numbs = ds_numbs.union(set(read[title].split(',')))

ds_numbs = sorted(list(ds_numbs))
print(len(ds_numbs))
print(ds_numbs)

ds_numbs_wo_DS_str = []
for ds_numb in ds_numbs:
    if ' ' in ds_numb:
        ds_numb = ds_numb[1:]
        print(ds_numb)
    ds_numbs_wo_DS_str.append(int(ds_numb[2:]))

comparee = read_yaml("../../info.yaml")
comparee = comparee['Panel']['ds_numb']

joint = []
for elem in ds_numbs_wo_DS_str:
    if elem in comparee:
        joint.append(elem)

if __name__ == "__main__":
    print(sorted(set(joint)))
    print(len(sorted(set(joint))))
Ejemplo n.º 7
0
    write_yaml(cited, "./stat.yaml")


def sort_yaml(yaml_path):
    """
    Read yaml path then sort it by value, then print.
    :param yaml_path:
    :return: None. only prints.
    ""
    """
    stream = open(yaml_path, "r")
    docs = yaml.load_all(stream)
    result = dict()
    for doc in docs:
        for k, v in doc.items():
            result[k] = v

    sorted_result = sorted(result.items(), key=lambda kv: kv[1], reverse=True)
    print(sorted_result)


if __name__ == "__main__":
    # main()
    # sort_yaml("stat.yaml")
    info = read_yaml("../info.yaml")
    panel = info['Panel']['ds_numb']
    appellate = info['AppellateBody']['ds_numb']
    linked = info['LinkedPanel']
    print(linked)
Ejemplo n.º 8
0
def query(article, yaml_path):
    data = read_yaml(yaml_path)
    print(data[article])
Ejemplo n.º 9
0
                        "labels/GATT.yaml"):
    """
    From the integer list of labels, change the int into article name
    e/g : [0,2] -> ['Article I', 'Article II']
    """
    gatt_info = read_yaml(yaml_path)
    gatt_dict = cleanse_dict(gatt_info)
    gatt_keys_in_int = get_keys(gatt_dict)
    in_text = []
    for index in indices:
        in_text.append(gatt_keys_in_int[index])
    return in_text
    

if __name__ == "__main__":
    gatt = read_yaml("labels/GATT.yaml")
    gatt = cleanse_dict(gatt)
    print(gatt)
    gatt_keys = get_keys(gatt)
    print(gatt_keys)
    for key in gatt_keys:
        print(key)
    inv_gatt = invert_dict(gatt)
    inv_gatt_keys = sorted(list(inv_gatt.keys()))
    print(inv_gatt)
    print(inv_gatt_keys)
    print(len(inv_gatt_keys))
    
    print(rehash_arts_in_text([0, 1]))
    # Dump
    # path_to_dump = "../../dataset/citability/GATT_523/label.pkl"
Ejemplo n.º 10
0
from utils.misc.yaml import read_yaml

read = read_yaml("labels/AD.yaml")

if __name__ == "__main__":
    new = dict()
    keys = read.keys()
    for key in keys:
        value = []
        dss = read[key].split(', ')
        for ds in dss:
            numb_only = ds[2:]
            value.append(numb_only)
        new[key] = value
    print(new.keys())
    print(new['Article XXIV:12'][-1])
    
    pass