def std_srt_to_text_line(url): """ Тотлько для субтитров. """ sets = dal.get_utf8_template() sets['name'] = url readed_lst, err = dal.efile2list(sets) purged_lst = list() if readed_lst: for at in readed_lst: at_copy = at.replace('\r','') at_copy = at_copy.replace('\n','') if at_copy: if not '-->' in at_copy: if not is_content_nums(at_copy): at_copy = at_copy.replace('<i>','') at_copy = at_copy.replace('</i>','') # Добавление purged_lst.append(at_copy) # Теперь нужно разить на предложения one_line = '@@@@'.join(purged_lst) # Filtration one_line = one_line.replace(']', '.').replace('[','') one_line = one_line.replace('♪', '') # TODO(zaqwes): rm links one_line = re.sub('\~.*?\~', ' ', one_line) one_line = re.sub('\<.*?\</.*?\>', ' ', one_line) one_line = '\n'.join(one_line.split('@@@@')) return one_line
def save_result(branch_name, what_save): sets = iow.get_utf8_template() # Перевод в json sets['name'] = branch_name+'.json' sets['howOpen'] = 'w' sets['coding'] = 'utf_8' iow.list2file(sets, ['\r# Unrolled:\n']) sets['howOpen'] = 'a' to_json.save_process_result(what_save, sets, raw=False) iow.list2file(sets, ['\r# Packed:\n']) to_json.save_process_result(what_save, sets, raw=True)
def assignBranch(self, str_branch_name):#void """ Соединяет курсор с узлом. Если узла нет, создается.""" self._current_node_name = str_branch_name self._branch_cash = {} findex_name = self._get_real_branch_name()+'/'+self._kForwardIndexName sets = dal.get_utf8_template() sets['name'] = findex_name try: os.mkdir(self._get_real_branch_name()) sets['howOpen'] = 'w' dal.list2file(sets, ["{}"]) except OSError as e: print 'Branch is exist' # Загружаем индекс readed_list = dal.file2list(sets) branch_in_json = ' '.join(readed_list) # TODO(zaqwes): долгая операция(несколько секунд), как быть? self._branch_cash = json.loads(branch_in_json)
def saveBranchCash(self):#void to_file = [json.dumps(self._branch_cash, sort_keys=True, indent=2)] sets = dal.get_utf8_template() sets['name'] = self._get_real_branch_name()+'/'+self._kForwardIndexName sets['howOpen'] = 'w' dal.list2file(sets, to_file)
def parser_target_for_spider(target_fname): """ Thinks: А что если файл пустой? TODO: Сделать кастомизацию преобразоватлелей в текст """ sets = dal.get_utf8_template() sets['name'] = target_fname list_lines, err = dal.efile2list(sets) if err[0]: rpt = err[1] yield None, 1, rpt return # Можно обрабатывать list_without_comments = map( lambda line: remove_forward_and_back_spaces(line.split('#')[0]), list_lines) # Удаление пустых строк result_job_list = [] map(lambda line: result_job_list.append(line) if line \ else None, list_without_comments) # В первой информационной строке должно быть имя узла if not is_node(result_job_list[0]): rpt = 'target_fname: '+target_fname+ \ '. Неверный формат файла - первое имя узла должно быть до адресов.'+ \ 'Либо файл с заданиями пуст.' code_err = 2 yield None, code_err, rpt return current_node = get_node_name(result_job_list[0]) i = 0 nodes = [] for at in result_job_list: if is_node(at): current_node = get_node_name(at) if current_node not in nodes: nodes.append(current_node) else: code_err = 2 yield None, code_err, 'Name node: ['+current_node+ \ ']\n'+"\tError: Node name need be unic." i = 0 else: i += 1 # Выделяем обработчик pos_first_settings_item = at.find('[') if pos_first_settings_item != -1: url = remove_forward_and_back_spaces( at[:pos_first_settings_item]) params = at[pos_first_settings_item:] params, code_err, rpt = _parse_target_params(params) if code_err != 0 and rpt: rpt = 'Name node: ['+current_node+']\nUrl: ['+url+']\n'+rpt yield (current_node, url, i, params), 0, rpt else: url = remove_forward_and_back_spaces(at) rpt = None yield (current_node, url, i, '{}'), 0, rpt
def _save_temp_file(fname, text_content): sets = get_utf8_template() sets['name'] = fname sets['howOpen'] = 'w' list2file(sets, text_content)
def read_utf_txt_file(fname): sets = dal.get_utf8_template() sets['name'] = fname return dal.file2list(sets)
def write_result_file(result_list, fname): sets = dal.get_utf8_template() sets['howOpen'] = 'w' sets['name'] = fname dal.list2file(sets, result_list)
# TODO(zaqwes): Добавить маркер в закр. комментарий enable = True close_comment_idx.append(i) open_comment_idx.append(i) # Добавление if enable and '#' not in at and at and '"""' not in at: result.append( _kContentMarker+_remove_end_spaces(at)) else: result.append(_remove_end_spaces(at)) return result, open_comment_idx, close_comment_idx if __name__=='__main__': sets = iow.get_utf8_template() sets['name'] = 'bale_mkt.py' mkt_in_list = iow.file2list(sets) mkt_in_list, open_comment_idx, close_comment_idx = _remove_long_comments(mkt_in_list) # Дополнительная маркеровка # Классы, интерфейсы, методы, переменные markered_code = [] for at in mkt_in_list: if _kContentMarker in at: if 'interface' in at: markered_code.append('I:'+at) elif '(' in at: markered_code.append('M:'+at)
def file_as_list(file_name): template = io.get_utf8_template() template['name'] = file_name return io.file2list(template)
def write_source(fname, list_lines): sets = iow.get_utf8_template() sets['name'] = fname sets['howOpen'] = 'w' iow.list2file(sets, list_lines)
def get_file_content(fname): sets = iow.get_utf8_template() sets['name'] = fname source_content = iow.file2list(sets) return source_content
# coding: utf-8 ''' Created on 18.04.2013 @author: кей ''' import dals.os_io.io_wrapper as dal def convert_one_line(msg): copy_line = msg.split(';')[0] if copy_line: name = copy_line.split('.')[-1] print copy_line+' as '+name if __name__=='__main__': sets = dal.get_utf8_template() sets['name'] = 'test_import_to_jy.txt' readed = dal.file2list(sets) map(convert_one_line, readed) print 'Done'