Python save_annot_csv 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: qanom.annotations.common

메소드/함수: save_annot_csv

hotexamples.com에서의 예제들: 7

Python save_annot_csv - 7개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 qanom.annotations.common.save_annot_csv에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def generate_final_annotation_files() -> NoReturn:
    """
    Generating the final gold annotations -
    1. taking the .arbit file and adding the predicates
    with isVerbal==false,false from generation (that haven't been sent to consolidation).
    2. Anonymize worker-id
    3. Adjust CSV columns
    """
    gen_dir_path = "files/annotations/gold_set/generation/corrected_filtered"
    arb_dir_path = "files/annotations/gold_set/arbitration"
    dest_path = "files/annotations/gold_set/final"
    arb_name_to_gen_name = lambda name: '.'.join(['annot'] + name.split('.')[1:
                                                                             ])
    ann_files = [(os.path.join(arb_dir_path, fn),
                  os.path.join(gen_dir_path, arb_name_to_gen_name(fn)))
                 for fn in os.listdir(arb_dir_path) if fn.endswith(".csv")
                 and arb_name_to_gen_name(fn) in os.listdir(gen_dir_path)]
    # prepare worker anonymization (dataset-wide)
    anonymization: Dict[str, str] = get_anonymization(all_worker_ids)
    for arb_fn, gen_fn in ann_files:
        arb_df = read_annot_csv(arb_fn)
        gen_df = read_annot_csv(gen_fn)
        # combine arb with (false,false) predicates from gen
        combined_df = combine_to_final_annot(arb_df=arb_df, gen_df=gen_df)
        # make internal aesthetic modifications in the DataFrame
        final_df = convert_to_final_annot(combined_df, anonymization)
        # save
        fn = os.path.basename(arb_fn)
        # remove prefix and put new one
        fn = 'annot.final.' + fn.lstrip("arbit.")
        dest_fn = os.path.join(dest_path, fn)
        save_annot_csv(final_df, dest_fn)

예제 #2

파일 보기

def postprocess_annotation_files(
    orig_dir: str,
    dest_dir: str,
    process_annot_func: Callable[[
        pd.DataFrame,
    ], pd.DataFrame],
    file_name_modification_func: Callable[[
        str,
    ], str] = lambda s: s) -> NoReturn:
    """
    :param orig_dir: Directory from which to take the annottion to process (input)
    :param dest_dir: Directory to which the processed annotation files are to be exported
    :param process_annot_func: a function that gets an annot_df and returns a processed (i.e. corrected or changed,
    to some aspect) annot_df
    :param file_name_modification_func: how to change an annotation file-name from source-dir to dest-dir
    :return:
    """
    ann_files = [
        os.path.join(orig_dir, fn) for fn in os.listdir(orig_dir)
        if fn.endswith(".csv")
    ]
    for orig_fn in ann_files:
        orig_df = read_annot_csv(orig_fn)
        new_df = process_annot_func(orig_df)
        # now export to file with same naming as orig (but in destination folder)
        orig_dir, orig_name = os.path.split(orig_fn)
        new_name = file_name_modification_func(orig_name)
        dest_fn = os.path.join(dest_dir, new_name)
        save_annot_csv(new_df, dest_fn)
        print(f"exported annotations to {dest_fn}")

예제 #3

파일 보기

def generate_pruned_dupl_annot() -> NoReturn:
    gen_dupl_fn = "files/annotations/gold_set/generation/corrected_filtered/annot.dupl.wikinews.dev.5.csv"
    arb_dupl_fn = "files/annotations/gold_set/arbitration/arbit.dupl.wikinews.dev.5.csv"
    out_fn = "files/annotations/gold_set/final/annot.final.wikinews.dev.5.csv"
    gen_dupl_df = read_annot_csv(gen_dupl_fn)
    arb_dupl_df = read_annot_csv(arb_dupl_fn)
    pruned_final_df = prune_duplicated_annot(gen_dupl_df, arb_dupl_df)
    save_annot_csv(pruned_final_df, out_fn)

예제 #4

파일 보기

def fix_annot_with_nmr_blacklist(orig_annot_fn: str,
                                 dest_dir: str) -> NoReturn:
    orig_df = read_annot_csv(orig_annot_fn)
    filtered_df = remove_NMR_cases_from_annotations(orig_df)
    # now export to file with same naming as orig (but in destination folder)
    orig_dir, orig_name = os.path.split(orig_annot_fn)
    dest_fn = os.path.join(dest_dir, orig_name)
    save_annot_csv(filtered_df, dest_fn)

예제 #5

파일 보기

def jsonl_file_to_csv(qasrl_v2_fn: str, dest_dir: str) -> NoReturn:
    with open(qasrl_v2_fn, encoding='latin-1') as f:
        annot_df = pd.concat(
            [sentence_json_to_df(json.loads(jline)) for jline in f],
            ignore_index=True,
            sort=False)
    # save df in destination
    orig_dir, orig_name = os.path.split(qasrl_v2_fn)
    new_name = '.'.join(orig_name.split('.')[:-1]) + ".csv"
    dest_fn = os.path.join(dest_dir, new_name)
    save_annot_csv(decode_qasrl(annot_df), dest_fn)

예제 #6

파일 보기

def fix_annot_with_corrected(
        orig_annot_fn: str,
        corrected_annot_fn: str,
        dest_dir: str = "files/annotations/production/corrected") -> NoReturn:
    orig_df = read_annot_csv(orig_annot_fn)
    all_corrected_df = read_annot_csv(corrected_annot_fn)
    corrected_df = replace_some_annotations(orig_df, all_corrected_df)
    # in addition to re-annotation correction, filter out currently invalid prompts for data
    corrected_df = find_invalid_prompts(corrected_df)
    corrected_and_filtered_df = corrected_df[~corrected_df.invalid_prompt]
    final_df = corrected_and_filtered_df.drop(
        ["corrected_verb_form", "invalid_prompt"], axis=1)
    # now export to file with same naming as orig (but in destination folder)
    orig_dir, orig_name = os.path.split(orig_annot_fn)
    dest_fn = os.path.join(dest_dir, orig_name)
    save_annot_csv(final_df, dest_fn)

예제 #7

파일 보기

def generate_final_train_annotations() -> NoReturn:
    """
    Generating the final train-set annotations -
    1. Anonymize worker-id
    2. Adjust CSV columns
    """
    orig_train_dir_path = "files/annotations/train_set/filtered"
    dest_path = "files/annotations/train_set/final"
    ann_files = [
        os.path.join(orig_train_dir_path, fn)
        for fn in os.listdir(orig_train_dir_path) if fn.endswith('.csv')
    ]
    # prepare worker anonymization (dataset-wide)
    anonymization: Dict[str, str] = get_anonymization(all_worker_ids)
    for gen_fn in ann_files:
        gen_df = read_annot_csv(gen_fn)
        # make internal aesthetic modifications in the DataFrame
        final_df = convert_to_final_annot(gen_df, anonymization)
        # save
        fn = os.path.basename(gen_fn)
        dest_fn = os.path.join(dest_path, fn)
        save_annot_csv(final_df, dest_fn)