Ejemplo n.º 1
0
def std_srt_to_text_line(url):
    """ Тотлько для субтитров. """
    sets = dal.get_utf8_template()
    sets['name'] = url
        
    readed_lst, err = dal.efile2list(sets)
    purged_lst = list()
    if readed_lst:
        for at in readed_lst:
            at_copy = at.replace('\r','')
            at_copy = at_copy.replace('\n','')
            if at_copy:
                if not '-->' in at_copy:
                    if not is_content_nums(at_copy):
                        at_copy = at_copy.replace('<i>','')
                        at_copy = at_copy.replace('</i>','')
                        
                        # Добавление
                        purged_lst.append(at_copy)
    
    # Теперь нужно разить на предложения
    one_line = '@@@@'.join(purged_lst)
    
    # Filtration
    one_line = one_line.replace(']', '.').replace('[','')
    one_line = one_line.replace('♪', '')
    
    # TODO(zaqwes): rm links
    one_line = re.sub('\~.*?\~', ' ', one_line)
    one_line = re.sub('\<.*?\</.*?\>', ' ', one_line)
    
    one_line = '\n'.join(one_line.split('@@@@'))
    
    return one_line
Ejemplo n.º 2
0
def save_result(branch_name, what_save):
    sets = iow.get_utf8_template()
    # Перевод в json
    sets['name'] = branch_name+'.json'
    sets['howOpen'] = 'w'
    sets['coding'] = 'utf_8'
    iow.list2file(sets, ['\r# Unrolled:\n'])
    sets['howOpen'] = 'a'
    to_json.save_process_result(what_save, sets, raw=False)
    iow.list2file(sets, ['\r# Packed:\n'])
    to_json.save_process_result(what_save, sets, raw=True)
Ejemplo n.º 3
0
 def assignBranch(self, str_branch_name):#void
     """ Соединяет курсор с узлом. Если узла нет, создается."""
     self._current_node_name = str_branch_name
     self._branch_cash = {}
     findex_name = self._get_real_branch_name()+'/'+self._kForwardIndexName
     sets = dal.get_utf8_template()
     sets['name'] = findex_name
     try:
         os.mkdir(self._get_real_branch_name())
         sets['howOpen'] = 'w'
         dal.list2file(sets, ["{}"])
     except OSError as e:
         print 'Branch is exist'  
         # Загружаем индекс 
         readed_list = dal.file2list(sets)
         branch_in_json = ' '.join(readed_list)
         
         # TODO(zaqwes): долгая операция(несколько секунд), как быть?
         self._branch_cash = json.loads(branch_in_json)
Ejemplo n.º 4
0
 def saveBranchCash(self):#void
     to_file = [json.dumps(self._branch_cash, sort_keys=True, indent=2)]
     sets = dal.get_utf8_template()
     sets['name'] = self._get_real_branch_name()+'/'+self._kForwardIndexName
     sets['howOpen'] = 'w'
     dal.list2file(sets, to_file)
Ejemplo n.º 5
0
def parser_target_for_spider(target_fname):
    """ 
    
    Thinks:
        А что если файл пустой?
        
    TODO:
        Сделать кастомизацию преобразоватлелей в текст
    """
    sets = dal.get_utf8_template()
    sets['name'] = target_fname
    list_lines, err = dal.efile2list(sets)
    if err[0]:
        rpt = err[1]
        yield None, 1, rpt
        return
   
    # Можно обрабатывать
    list_without_comments = map(
            lambda line: remove_forward_and_back_spaces(line.split('#')[0]), 
            list_lines)
    
    # Удаление пустых строк
    result_job_list = []
    map(lambda line: result_job_list.append(line) if line \
        else None, list_without_comments)

    # В первой информационной строке должно быть имя узла
    if not is_node(result_job_list[0]):
        rpt = 'target_fname: '+target_fname+ \
                '. Неверный формат файла - первое имя узла должно быть до адресов.'+ \
                'Либо файл с заданиями пуст.'
        code_err = 2
        yield None, code_err, rpt
        return
    
    current_node = get_node_name(result_job_list[0])
    i = 0
    nodes = []
    for at in result_job_list:
        if is_node(at):
            current_node = get_node_name(at)
            if current_node not in nodes:
                nodes.append(current_node)
            else:
                code_err = 2
                yield None, code_err, 'Name node: ['+current_node+ \
                        ']\n'+"\tError: Node name need be unic."
            i = 0
        else:
            i += 1
            # Выделяем обработчик
            pos_first_settings_item = at.find('[')          
            if pos_first_settings_item != -1:
                url =  remove_forward_and_back_spaces(
                        at[:pos_first_settings_item])
                params = at[pos_first_settings_item:]
                params, code_err, rpt = _parse_target_params(params)
                if code_err != 0 and rpt:
                    rpt = 'Name node: ['+current_node+']\nUrl: ['+url+']\n'+rpt
                yield (current_node, url, i, params), 0, rpt
            else:
                url =  remove_forward_and_back_spaces(at)
                rpt = None
                yield (current_node, url, i, '{}'), 0, rpt
Ejemplo n.º 6
0
def _save_temp_file(fname, text_content):
    sets = get_utf8_template()
    sets['name'] = fname 
    sets['howOpen'] = 'w'
    list2file(sets, text_content)
def read_utf_txt_file(fname):
    sets = dal.get_utf8_template()
    sets['name'] = fname
    return dal.file2list(sets) 
def write_result_file(result_list, fname):
    sets = dal.get_utf8_template()
    sets['howOpen'] = 'w'
    sets['name'] = fname
    dal.list2file(sets, result_list)
Ejemplo n.º 9
0
            # TODO(zaqwes): Добавить маркер в закр. комментарий
            enable = True
            close_comment_idx.append(i)
            open_comment_idx.append(i)
            
        # Добавление
        if enable and '#' not in at and at and '"""' not in at:
            result.append(
                    _kContentMarker+_remove_end_spaces(at)) 
        else:
            result.append(_remove_end_spaces(at))     
  
    return result, open_comment_idx, close_comment_idx

if __name__=='__main__':
    sets = iow.get_utf8_template()
    sets['name'] = 'bale_mkt.py'
    
    mkt_in_list = iow.file2list(sets)
    mkt_in_list, open_comment_idx, close_comment_idx = _remove_long_comments(mkt_in_list)
    
    # Дополнительная маркеровка
    # Классы, интерфейсы, методы, переменные
    markered_code = []
    for at in mkt_in_list:
        if _kContentMarker in at:
            if 'interface' in at:
                markered_code.append('I:'+at)
            elif '(' in at:
                markered_code.append('M:'+at)
                
Ejemplo n.º 10
0
def file_as_list(file_name):
    template = io.get_utf8_template()
    template['name'] = file_name 
    return io.file2list(template)
def write_source(fname, list_lines):
    sets = iow.get_utf8_template()
    sets['name'] = fname
    sets['howOpen'] = 'w'
    iow.list2file(sets, list_lines)
def get_file_content(fname):
    sets = iow.get_utf8_template()
    sets['name'] = fname
    source_content = iow.file2list(sets)
    return source_content
Ejemplo n.º 13
0
# coding: utf-8
'''
Created on 18.04.2013

@author: кей
'''

import dals.os_io.io_wrapper as dal

def convert_one_line(msg):
    copy_line = msg.split(';')[0]
    if copy_line:
        name = copy_line.split('.')[-1]
        print copy_line+' as '+name

if __name__=='__main__':
    sets = dal.get_utf8_template()
    sets['name'] = 'test_import_to_jy.txt'
    readed = dal.file2list(sets)
    
    map(convert_one_line, readed)
    
    print 'Done'