Python make_stc_map 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: decrypt.common.puzzle_clue

메소드/함수: make_stc_map

hotexamples.com에서의 예제들: 5

Python make_stc_map - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 decrypt.common.puzzle_clue.make_stc_map에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: guardian_load.py 프로젝트: jsrozner/decrypt

def get_clean_clues(json_file_or_json_dir,
                    load_from_json_files: bool = False,
                    verify=True,
                    ) -> Tuple[Dict[str, List[BaseClue]], List[BaseClue]]:
    if load_from_json_files:
        soln_to_clue_map, all_clue_list = orig_get_clean_clues(
            json_file_or_json_dir)
    else:
        with open(json_file_or_json_dir, 'r') as f:
            all_clue_list = json.load(f)
        all_clue_list = list(map(CleanGuardianClue.from_json, all_clue_list))
        soln_to_clue_map = make_stc_map(all_clue_list)

    # add indices and a note about dataset
    for idx, c in enumerate(all_clue_list):
        c.idx = idx
        # if not strip_identifying_info:
        #     c.dataset = json_output_dir

    # print the distribution
    ctr = Counter()
    for c in all_clue_list:
        ctr[len(c.lengths)] += 1
    log.info(ctr)

    # Verify same length
    assert sum(map(len, soln_to_clue_map.values())) == len(all_clue_list)

    if verify:
        assert len(all_clue_list) == 142380, f'Your clues do not match the ones in Decrypting paper'
        log.info(f'Clue list length matches Decrypting paper expected length')

    return soln_to_clue_map, all_clue_list

예제 #2

파일 보기

파일: guardian_load.py 프로젝트: jsrozner/decrypt

def load_splits_from_json(json_file_name) -> SplitReturn:
    try:
        log.info(f'Loading splits directly from given json files. Using {json_file_name}')
        with open(json_file_name, 'r') as f:
            splits_dict = json.load(f)
    except FileNotFoundError as e:
        log.error('Json not found. Did you unzip as per the readme?')
        raise e

    split_tuple = splits_dict['train'], splits_dict['val'], splits_dict['test']
    clue_list_tuple = tuple([list(map(CleanGuardianClue.from_json, split)) for split in split_tuple])
    all_clues = [c for clue_list in clue_list_tuple for c in clue_list]
    soln_to_clue_map = make_stc_map(all_clues)

    # note that there will be no indices in these clues

    # print the distribution
    ctr = Counter()
    for c in all_clues:
        ctr[len(c.lengths)] += 1
    log.info(ctr)

    # Verify same length
    assert sum(map(len, soln_to_clue_map.values())) == len(all_clues)

    assert len(all_clues) == 142380, f'Your clues do not match the ones in Decrypting paper'
    log.info(f'Clue list length matches Decrypting paper expected length')

    check_splits(all_clues, clue_list_tuple)

    return soln_to_clue_map, all_clues, clue_list_tuple

예제 #3

파일 보기

파일: guardian_load.py 프로젝트: jsrozner/decrypt

def orig_get_clean_clues(json_output_dir,
                    do_filter_dupes=True,
                    verify=True,
                    strip_identifying_info=False,
                    ) -> Tuple[Dict[str, List[BaseClue]], List[BaseClue]]:
    log.info(f'loading from {json_output_dir}')
    parsed_puzzles: Dict[str, List[GuardianClue]] = defaultdict(None)  # map from puz_id => List[GuardianClue]

    # load full glob
    if strip_identifying_info:
        clue_cls = CleanGuardianClue
    else:
        clue_cls = GuardianClue
    all_clue_list = all_json_files_to_json_list(json_output_dir,
                                                subsite="cryptic",
                                                puzzle_dict=parsed_puzzles,
                                                skip_if_in_dict=True,
                                                verify=verify,
                                                clue_cls=clue_cls)


    soln_to_clue_map = make_stc_map(all_clue_list)

    # Remove anything that is exactly the same up to small diffs
    # removes 1610 normalized clues
    if do_filter_dupes:
        soln_to_clue_map, all_clue_list = filter_clues(soln_to_clue_map)

    return soln_to_clue_map, all_clue_list

예제 #4

파일 보기

파일: guardian_load.py 프로젝트: jsrozner/decrypt

def make_disjoint_split(all_clues: List[BaseClue],
                        seed=42) -> Tuple[List[BaseClue], ...]:

    soln_to_clue_map = make_stc_map(all_clues)
    train, val, test = [], [], []
    for k, v in soln_to_clue_map.items():
        h = safe_hash(k[:2]) % 5  # normal hash function is not deterministic across python runs
        if h < 3:
            train.extend(v)
        elif h < 4:
            val.extend(v)
        else:
            test.extend(v)

    out_tuple = train, val, test
    rng = random.Random(seed)
    for l in out_tuple:
        rng.shuffle(l)
    check_splits(all_clues, out_tuple)

    return out_tuple

예제 #5

파일 보기

파일: acw_load.py 프로젝트: jsrozner/decrypt

def get_clean_xd_clues(filename,
                       remove_if_not_in_dict=True,
                       do_filter_dupes=True) \
    -> Tuple[Dict[str, List[BaseClue]], List[BaseClue]]:

    logging.info(f'loading xd (ACW) set from {filename}')
    all_clue_list = xd_load_and_filter_clues(
        filename,
        remove_if_not_in_dict=remove_if_not_in_dict,
        strip_trailing_period=True,
        remove_questions=True,
        remove_likely_abbreviations=True,
        remove_fillin=True)

    # generate soln to clue map
    # soln:str -> List[gc]
    soln_to_clue_map = make_stc_map(all_clue_list)

    # Remove anything that is exactly the same up to small diffs
    # removes 1610 normalized clues
    if do_filter_dupes:
        soln_to_clue_map, all_clue_list = filter_clues(soln_to_clue_map)

    # add indices and a note about dataset
    for idx, c in enumerate(all_clue_list):
        c.idx = idx
        c.dataset = filename

    # print the distribution
    ctr = Counter()
    for c in all_clue_list:
        ctr[len(c.lengths)] += 1
    logging.info(ctr)

    # Verify same length
    assert sum(map(len, soln_to_clue_map.values())) == len(all_clue_list)

    return soln_to_clue_map, all_clue_list