コード例 #1
0
def make_joint_corpus(datasets, joint):
    parts = ['dev', 'test', 'train', 'train-all']
    for part in parts:
        old_file = 'data/{}/raw/{}.txt'.format(joint, part)
        if os.path.exists(old_file):
            os.remove(old_file)
        elif not os.path.exists(os.path.dirname(old_file)):
            os.makedirs(os.path.dirname(old_file))
        for name in datasets:
            append_tags(name, joint, part)  #?
コード例 #2
0
ファイル: convert_corpus.py プロジェクト: leezqcst/ID-CNN-CWS
def make_joint_corpus(datasets, joint):
    parts = ['dev', 'test', 'train', 'train-all']
    for part in parts:
        old_file = 'data/{}/raw/{}.txt'.format(joint, part)
        if os.path.exists(old_file):
            os.remove(old_file)
        elif not os.path.exists(os.path.dirname(old_file)):
            os.makedirs(os.path.dirname(old_file))
        for name in datasets:
            append_tags(name, joint, part)
コード例 #3
0
ファイル: data-prepare.py プロジェクト: zhdbwe/fastNLP
def make_joint_corpus(datasets, joint):
    parts = ["dev", "test", "train", "train-all"]
    for part in parts:
        old_file = "{}/{}/raw/{}.txt".format(data_path, joint, part)
        if os.path.exists(old_file):
            os.remove(old_file)
        elif not os.path.exists(os.path.dirname(old_file)):
            os.makedirs(os.path.dirname(old_file))
        for name in datasets:
            append_tags(
                os.path.join(data_path, name, "raw"),
                os.path.dirname(old_file),
                name,
                part,
                encode="utf-8",
            )