Example #1
0
    def do_check(self, data_path_list=[]):
        try:
            if not data_path_list:
                data_path_list = common.Cfg().get('local', 'data_paths')
            data_path_list = common.expand_path(data_path_list)
            data_path_list = common.get_yml_path_list(data_path_list)
            for data_path in data_path_list:
                print('')
                LOG.info('Checking %s' % data_path)

                with open(data_path, 'r') as f:
                    qas = yaml.load(f.read())

                for qa in qas:
                    if not qa:
                        self._error(qa, 'qa is none')
                    for value in qa.values():
                        if not value:
                            self._error(qa, 'value is none')
                        else:
                            for item in value:
                                if type(item) is dict:
                                    self._error(qa, 'item is dict')
                                if not item:
                                    self._error(qa, 'item is none')
                LOG.info('Check Passed!')

        except Exception as e:
            LOG.error(e)
Example #2
0
def get_path_list():
    path_list = common.Cfg().get('local', 'data_paths')
    path_list = common.expand_path(path_list)
    path_list = common.get_md_path_list(path_list)
    return path_list
Example #3
0
    def do_transform(self, file_path_list=[]):
        file_path_list = common.expand_path(file_path_list)
        for file_path in file_path_list:
            LOG.info('Transforming %s' % file_path)
            text = ''
            with open(file_path, 'r') as fp:
                text = fp.read()
            if not text:
                raise Exception('empty file!')
            text = re.sub(r'[^\u000A-\u007E]', '', text)
            text = re.sub(r'#*', '', text)
            text = re.sub(r':', ',', text)
            text = re.sub(r'//*', r'/', text)
            text = re.sub(r'!!*', '!', text)
            text = re.sub(r',,*', ',', text)
            text = re.sub(r'::*', ':', text)
            text = re.sub(r';;*', ';', text)
            text = re.sub(r'\?\?*', '?', text)
            text = re.sub(r'\(\(*', '(', text)
            text = re.sub(r'\)\)*', ')', text)
            text = re.sub(r'\(.*\)', '', text)
            text = re.sub(r'\[\[*', '[', text)
            text = re.sub(r'\]\]*', ']', text)
            text = re.sub(r'\[.*\]', '', text)
            text = re.sub(r'\{\{*', '{', text)
            text = re.sub(r'\}\}*', '}', text)
            text = re.sub(r'{.*}', '', text)
            text = re.sub(r'\<\<*', '<', text)
            text = re.sub(r'\>\>*', '>', text)
            text = re.sub(r'<.*>', '', text)
            text = re.sub(r'  *', ' ', text)
            text = re.sub(r'^ *', '', text)
            text = re.sub(r' *$', '', text)
            text = re.sub(r'\? ', r'?\n', text)
            text = re.sub(r'! ', r'!\n', text)
            text = re.sub(r'\. ', r'.\n', text)
            text = re.sub(r'&.*;', '', text)
            text = re.sub(r'\n\n*\n', '\n', text)
            text = re.sub(r'.{140,9999}\n', '', text)
            text = re.sub(r'\n[^a-zA-Z]*\n', r'\n', text)
            text = re.sub(r'\n[^a-zA-Z0-9]*', r'\n', text)
            text = re.sub(r'(.*\?\n)(.*[^\?]\n)', r'\1\n\2', text)
            text = re.sub(r'(.*[^\?]\n)(.*\?\n)', r'\1\n\n\2', text)

            list1 = text.split('\n\n\n')
            list2 = []
            for item in list1:
                sub_item1 = item.split('\n\n')
                sub_item2 = [sub_item1[0].split('\n'),
                             sub_item1[-1].split('\n')]
                list2.append(sub_item2)

            yml_path = file_path + '.yml'
            with open(yml_path, 'w') as fp:
                for item in list2:
                    fp.write('\n- que:\n')
                    for que in item[0]:
                        if not que:
                            continue
                        fp.write('  - %s\n' % que)
                    fp.write('  ans:\n')
                    for ans in item[-1]:
                        if not ans:
                            continue
                        fp.write('  - %s\n' % ans)
Example #4
0
    def do_transform(self, file_path_list=[]):
        file_path_list = common.expand_path(file_path_list)
        new_file_path_list = []

        # Copy file
        for file_path in file_path_list:
            file_name = os.path.basename(file_path)
            new_file_path = os.path.join(self.out_path, file_name)
            if not os.path.exists(new_file_path):
                LOG.info('Copy %s' % file_path)
                shutil.copyfile(file_path, new_file_path)
            new_file_path_list.append(new_file_path)

        # Initilization
        for file_path in new_file_path_list:
            LOG.info('Initializing %s' % file_path)
            text = ''
            with open(file_path, 'r') as fr:
                text = fr.read()
            if not text:
                raise Exception('empty file!')
            text = re.sub(r'[^\u000A-\u007E]', '', text)
            text = re.sub(r'#*', '', text)
            text = re.sub(r':', ',', text)
            text = re.sub(r'//*', r'/', text)
            text = re.sub(r'!!*', '!', text)
            text = re.sub(r',,*', ',', text)
            text = re.sub(r'::*', ':', text)
            text = re.sub(r';;*', ';', text)
            text = re.sub(r'\?\?*', '?', text)
            text = re.sub(r'\(\(*', '(', text)
            text = re.sub(r'\)\)*', ')', text)
            text = re.sub(r'\(.*\)', '', text)
            text = re.sub(r'\[\[*', '[', text)
            text = re.sub(r'\]\]*', ']', text)
            text = re.sub(r'\[.*\]', '', text)
            text = re.sub(r'\{\{*', '{', text)
            text = re.sub(r'\}\}*', '}', text)
            text = re.sub(r'{.*}', '', text)
            text = re.sub(r'\<\<*', '<', text)
            text = re.sub(r'\>\>*', '>', text)
            text = re.sub(r'<.*>', '', text)
            text = re.sub(r'  *', ' ', text)
            text = re.sub(r'^ *', '', text)
            text = re.sub(r' *$', '', text)
            text = re.sub(r'\? ', r'?\n', text)
            text = re.sub(r'! ', r'!\n', text)
            text = re.sub(r'\. ', r'.\n', text)
            text = re.sub(r'&.*;', '', text)
            text = re.sub(r'\n\n*\n', '\n', text)
            text = re.sub(r'.{140,9999}\n', '', text)
            text = re.sub(r'\n[^a-zA-Z]*\n', r'\n', text)
            text = re.sub(r'\n[^a-zA-Z0-9]*', r'\n', text)
            text = re.sub(r'(.*\?\n)(.*[^\?]\n)', r'\1\n\2', text)
            text = re.sub(r'(.*[^\?]\n)(.*\?\n)', r'\1\n\n\2', text)
            with open(file_path, 'w') as fw:
                fw.write(text)

        # Transform
        for file_path in new_file_path_list:
            LOG.info('Transforming %s' % file_path)
            with open(file_path, 'r') as fr:
                text = fr.read()
            list1 = text.split('\n\n\n')
            list2 = []
            for item in list1:
                sub_item1 = item.split('\n\n')
                sub_item2 = [
                    sub_item1[0].split('\n'), sub_item1[-1].split('\n')
                ]
                list2.append(sub_item2)
            yml_name = os.path.splitext(file_path)[0] + '.yml'
            with open(os.path.join(self.out_path, yml_name), 'w') as fw:
                fw.write('qas:\n')
                for item in list2:
                    fw.write('\n- que:\n')
                    for que in item[0]:
                        if not que:
                            continue
                        fw.write('  - %s\n' % que)
                    fw.write('  ans:\n')
                    for ans in item[-1]:
                        if not ans:
                            continue
                        fw.write('  - %s\n' % ans)