def check(paths) -> bool: pathsArr = [] startInd = 0 lastInd = 0 while lastInd < len(paths): if paths[lastInd] == ".": lastInd += 1 while lastInd < len(paths) and paths[lastInd] != " ": lastInd += 1 pathsArr.append(paths[startInd:lastInd]) lastInd += 1 startInd = lastInd lastInd += 1 for filepath in pathsArr: extension = filepath.split(os.extsep)[-1] if extension not in Config.SDK_TO_EXTENSION.values(): continue filepath = root_dir + filepath if get_tag(filepath) is not None: return True return False
def test_get_tag_when_tag_is_exists(): result = get_tag("") assert result.tag_as_dict.get("name") == "Name" assert result.tag_as_string == "# beam-playground:\n# name: Name\n\n"
def test_get_tag_when_tag_does_not_exist(): result = get_tag("") assert result is None
def test_get_tag_when_tag_is_exists(): result = get_tag("") assert result.get("name") == "Name"
def tagger(str): word_list = casual_tokenize(str) tag_list = pos_tag(word_list, tagset='universal') return tag_list def str2int_list(str, split_flag=' '): str_list = str.split(split_flag) int_list = map(int, str_list) return list(int_list) vocab = get_vocab() data = CompresDataset(vocab=vocab) dataloader = DataLoader(data, batch_size=1) print(len(data)) x = get_tag() print(x[:10]) # # for step, (origin, headline, label) in enumerate(dataloader, 1): # out_tag = tagger(origin[0]) # temp_list = [] # print(step) # for item in out_tag: # temp_list.append(TAG_DICT[item[1]]) # tag_list.append(temp_list) # # with open('./data/tag_list.pkl', 'wb') as f: # pickle.dump(tag_list, f)