def create_compact_html_files(letter):

    cf = config.ConfigFile()
    text_files_path = (cf.configfile[cf.computername]['original_files_path'])
    sections_and_pbs = []

    # open the specific html file
    file_name = letter + ".html"
    text_file_path = text_files_path + file_name
    with open(text_file_path, 'r') as f:
        soup = BeautifulSoup(f)
        soup_sections_and_pbs = soup.select(
            ".section,[title='page break']")  #pb = page break
        for soup_section_or_pb in soup_sections_and_pbs:
            # remove the p tag with the id 'hang' as we don't need it
            if soup_section_or_pb.select_one(".hang"):
                soup_section_or_pb.select_one(".hang").unwrap()
            sections_and_pbs.append(soup_section_or_pb)

    # create a list of keys for each section
    # Headword-PageNumber-Sequential Number for the letter E
    # For example "Engari-027-031"
    page_numbers = get_page_numbers(letter, sections_and_pbs)
    od = OrderedDict(sorted(Counter(page_numbers).items()))
    pprint.pprint(od)
    pass
    return True
Exemple #2
0
def get_open_compounds_list(file_id='hpk_tauira'):

    # the file_id is used for the list of teina

    HPK_OPEN_COMPOUNDS_FILE_NAME = "hpk_open_compounds.txt"
    OTHER_OPEN_COMPOUNDS_FILE_NAME = "other_open_compounds.txt"

    cf = config.ConfigFile()
    text_files_path = (cf.configfile[cf.computername]['text_files_path'])

    open_compounds_list = []

    hpk_open_compounds_file_path = \
    text_files_path + HPK_OPEN_COMPOUNDS_FILE_NAME

    other_open_compounds_file_path = \
    text_files_path + OTHER_OPEN_COMPOUNDS_FILE_NAME

    with open(hpk_open_compounds_file_path, 'r') as f:
        for line in f:
            open_compounds_list.append(line.replace('\n', ''))

    with open(other_open_compounds_file_path, 'r') as f:
        for line in f:
            open_compounds_list.append(line.replace('\n', ''))

    # add any teina that are themselves open compounds
    # and have a big brother in the list of open compounds
    for big_brother, little_brothers in teina.teina[file_id]:
        if big_brother in open_compounds_list:
            for little_brother in little_brothers:
                if ' ' in little_brother:
                    open_compounds_list.append(little_brother)
        else:
            # big brother not in the list of open compounds
            if ' ' in big_brother:
                print("Must add " + big_brother + " to open compounds")
                return False
            else:
                # not an open compound but it could have open compound teina
                for little_brother in little_brothers:
                    if ' ' in little_brother:
                        open_compounds_list.append(little_brother)

    # sort the list by length (longest at start)
    # to avoid say finding 'the banana' and never finding
    # 'longer version of the banana'

    open_compounds_list.sort(key=len, reverse=True)
    print("hi")
    pprint.pprint(open_compounds_list)
    return open_compounds_list
Exemple #3
0
def get_non_maori_words(file_id):

    TEXT_EXTENSION = "txt"
    # TAUIRA_FILE_ID = "hpk_tauira" # duplicated with the choices in the call

    cf = config.ConfigFile()
    text_files_path = (cf.configfile[cf.computername]['text_files_path'])
    text_file_path = text_files_path + file_id + os.extsep + TEXT_EXTENSION

    with open(text_file_path, 'r') as f:
        for line_number, line in enumerate(f):
            maori_words = re.findall(maori_regex.maori_word, line, re.VERBOSE | re.IGNORECASE)
            all_words = re.findall(r"\w+", line)
                                                    
            for word in all_words:
                if word not in maori_words and not is_number(word):                    
                        print(word)
Exemple #4
0
def pickle_to_yaml(file_name):

    cf = config.ConfigFile()
    pickle_files_path = (cf.configfile[cf.computername]['pickle_files_path'])
    pickle_file_path = pickle_files_path + file_name + os.extsep + PICKLE_EXTENSION

    with open(pickle_file_path, 'rb') as pickle_file:
        file_to_process = pickle.load(pickle_file)

    yaml_files_path = (cf.configfile[cf.computername]['yaml_files_path'])
    yaml_file_path = yaml_files_path + file_name + os.extsep + YAML_EXTENSION

    # used 'unsafe' because i couldn't get the Text Chunk to work ...
    yaml = ruamel.yaml.YAML(typ='unsafe')

    with open(yaml_file_path, "w") as yaml_file:
        yaml.dump(file_to_process, yaml_file)
    return True
Exemple #5
0
def get_all_entries():

    all_entries = {}
    Word_ID = namedtuple('Word_ID', 'root_number trunk branch_number twig twig_number')
    
    #gather all the parts and make one large ordered dictionary
    cf = config.ConfigFile()
    json_path = (cf.configfile[cf.computername]['json_path'])

    for letter in pū.dictionary_letters:
        print ('gathering json', letter)
        json_filename = letter + ".json"
        full_json_path = json_path + json_filename
        with open(full_json_path,'r') as f:
            word_trees_from_json = json.load(f)

        word_trees_from_json = {Word_ID(**ast.literal_eval(k)):v for k,v in word_trees_from_json.items()}
        all_entries.update(word_trees_from_json)
    return OrderedDict(sorted(all_entries.items(), key=mw.get_dict_sort_key))
Exemple #6
0
def process_source_file(file_name):

    cf = config.ConfigFile()
    source_files_path = (cf.configfile[cf.computername]['source_files_path'])
    source_file_path = source_files_path + file_name + os.extsep + TEXT_EXTENSION

    # the dictionary to hold the results
    results = {}

    # read in each line from the source file
    with open(source_file_path, 'r') as f:
        source_file = f.readlines()

    for line_number, line in enumerate(source_file, start=1):
        results[f'{line_number:05}'] = line.split()

    print(results)
    post_gateway_files_path = (cf.configfile[cf.computername]['post_gateway_files_path'])
    post_gateway_file_path = post_gateway_files_path + file_name + \
                             os.extsep + YAML_EXTENSION
    with open(post_gateway_file_path, "w") as myfile:
        yaml.dump(results, myfile, allow_unicode=True)
    return True
Exemple #7
0
At the end we update file and folder names
and update the .aff file
'''

import os
import config
import hunspell
from datetime import datetime
import release_utilities as ru
import maoriword as mw
import common_word_division_errors as cwde

IR = "ir"

cf = config.ConfigFile()
internal_releases_files_path = (
    cf.configfile[cf.computername]['internal_releases_files_path'])
baseline_files_path = cf.configfile[cf.computername]['baseline_files_path']


def verify_internal_release():

    # get the internal release folder (to be tested)
    untested_release = ru.get_untested_release()

    if untested_release is None:
        print("No release found to test")
        return False
    else:
        untested_release_folder_name = IR + "_" + untested_release[0] + "_" + \
Exemple #8
0
r'''
Fuction: 
Version: 1.0.0
Created: Tuyj
Created date:2015/4/1
'''
from _env import addPaths

addPaths(".")
import unittest, json, requests
import config as config

from init.Oauth import getAccesssToken, headers
from init.initalize import returnValue, address_host, address_http, host

cfg = config.ConfigFile('test.cfg', ensure_ascii=False)
gParams = cfg.getValue(["post2"])
gIndex = 0


class LuaTest(unittest.TestCase):
    def setUp(self):
        print '--------------开始--------------\n'

    def testComm(self):

        global gIndex
        print '--------------执行URl--------------\n'
        #         获取token(str)
        access_str = getAccesssToken(address_host)
        payload_1 = {'access_token': access_str}
def create_excel_headword_file(letter):

    cf = config.ConfigFile()
    text_files_path = (cf.configfile[cf.computername]['original_files_path'])
    # sections_and_pbs = []

    # open the specific html file
    file_name = letter + ".html"
    text_file_path = text_files_path + file_name
    with open(text_file_path, 'r') as f:
        soup = BeautifulSoup(f)
        sections_and_pbs = soup.select(".section,[title='page break']") #pb = page break

    # get page numbers and headwords
    page_numbers_and_headwords = get_page_numbers_and_headwords(letter, sections_and_pbs)
    # od = OrderedDict(sorted(Counter(page_numbers_and_headwords).items()))
    pprint.pprint(page_numbers_and_headwords)

    # write the Excel file
    wb = Workbook()
    excel_files_path = (cf.configfile[cf.computername]['excel_files_path'])
    excel_file_name = letter + ".xlsx"
    excel_file_path = excel_files_path + excel_file_name

    page_numbers = list(set(x[0] for x in page_numbers_and_headwords))
    
    for page_counter, page_number in enumerate(page_numbers, 0):
        headwords_for_page = [x[1] for x in page_numbers_and_headwords if x[0] == page_number]

        #get worksheet title
        if page_counter == 0:
            active_worksheet = wb.active
            active_worksheet.title = str(start_pages[letter])
        else:
            active_worksheet = wb.create_sheet(title=str(start_pages[letter] + page_counter))

        # data validation
        dv = DataValidation(type="list", formula1='"yes,no,adjust"', allow_blank=True)
        active_worksheet.add_data_validation(dv)

        #named styles
        style_title = NamedStyle(name="style_title")
        style_title.font = Font(bold=True, color=colours[letter], italic=True)

        # had to wrap this in try / except because sometimes getting that style_title already existed
        # when creating the workbook!
        try:
            wb.add_named_style(style_title)
        except ValueError:
            pass

        # titles
        active_worksheet["A1"] = "Entry"
        active_worksheet["A1"].style = 'style_title'
        active_worksheet["B1"] = "Headword"
        active_worksheet["B1"].style = 'style_title'
        active_worksheet["C1"] = "Status"
        active_worksheet["C1"].style = 'style_title'
        active_worksheet["D1"] = "Adjusted"
        active_worksheet["D1"].style = 'style_title'

        for counter, headword in enumerate(headwords_for_page, 1):
            active_worksheet.cell(row=counter + 1, column=1, value=counter)
            active_worksheet.cell(row=counter + 1, column=1).font = Font(bold=True)
            active_worksheet.cell(row=counter + 1, column=2, value=headword)
            dv.add("C"+str(counter + 1)) # couldn't use row column syntax

        rows = range(1, len(headwords_for_page) + 1 + 1)
        columns = range(1, 4 + 1)
        for row in rows:
            for col in columns:
                active_worksheet.cell(row, col).alignment = Alignment(horizontal='center', vertical='center')

        active_worksheet.sheet_format.defaultRowHeight = 22
        active_worksheet.sheet_format.baseColWidth = 12
        active_worksheet.sheet_view.zoomScale = 140
        active_worksheet.sheet_view.showGridLines = False
        active_worksheet.sheet_properties.tabColor = colours[letter]
    wb.save(filename = excel_file_path)
    return True
Exemple #10
0
                                        int(t), res[6]) == 0:
                    mutexLock.release()
                    # 更新数据失败,从新链接sqlserver
                    self.upload_result = False
                    continue
                mutexLock.release()
                # 更新完成的数据在本地删除
                gpioDB.deleteByRealDateLogId(res[0])
            time.sleep(int(cfg.getUploadInterval()))


if __name__ == '__main__':
    # 先释放所有的gpio端口,防止被其他应用程序占用
    gpio.unexportAllGPIO(gpioTuple)

    cfg = config.ConfigFile()
    # 打印一下配置文件中的一些参数k
    print(cfg.getRemoteIp())
    print(cfg.getRemotePort())
    print(cfg.getRemoteUser())
    print(cfg.getRemotePassword())

    # 线程锁
    # 由于heartbeat和上传数据都需要操作sqlserver,而且在不同的线程,所以需要考虑到资源抢占的问题
    mutexLock = threading.Lock()

    # 本地mysql数据库更新线程
    mysqlThread = UpdateMysqlThread()
    # 心跳进程,发送数据到sqlserver
    heartbeatThread = HeartbeatThread()
    # 更新本地gpio状态到sqlserver
Exemple #11
0
def process_text_file(file_id, first_line, last_line):

    first_line = int(first_line)
    last_line = int(last_line)

    if first_line > last_line:
        import sys
        print("First Line can't be greater than Last Line")
        sys.exit()

    TEXT_EXTENSION = "txt"
    PICKLE_EXTENSION = "p"

    cf = config.ConfigFile()
    text_files_path = (cf.configfile[cf.computername]['text_files_path'])
    text_file_path = text_files_path + file_id + os.extsep + TEXT_EXTENSION

    # get the open compounds list to use to search for
    ocs = get_open_compounds.get_open_compounds_list(file_id)

    # the dictionary to hold the results
    chunked_lines = {}

    # create a list of tuples (line number, line)
    # containing the lines we want to chunk
    with open(text_file_path, 'r') as f:
        text_file_to_check = f.readlines()
        lines_in_file = len(text_file_to_check)
        text_file_to_check.insert(0,
                                  None)  # align index number with line number

    lines_to_check = []

    if first_line == 0 and last_line == 0:
        first_line_to_use = 1
        last_line_to_use = lines_in_file
    else:
        first_line_to_use = first_line
        last_line_to_use = min(last_line, lines_in_file)

    for x in range(first_line_to_use, last_line_to_use + 1):
        lines_to_check.append((x, text_file_to_check[x]))

    for line_number, line in lines_to_check:
        print("=============== " + str(line_number) + " ==============")
        print(line)

        #initialise dictionary value
        chunked_lines[line_number] = []

        #Group 1 - Open Compounds
        CHUNK_TYPE = "oc"
        for oc in ocs:
            regex_string = maori_regex.get_oc_regex(oc)
            oc_matches = re.finditer(regex_string, line)
            for oc_match in oc_matches:
                print(line_number, oc_match)
                try:
                    return_from_create_Text_Chunk = \
                        create_Text_Chunk(chunked_lines[line_number],
                        oc_match.group(1),
                        oc_match.start(1),
                        oc_match.end(1),
                        CHUNK_TYPE)
                except NameError:
                    print("something has gone wrong")
                else:
                    if return_from_create_Text_Chunk:
                        print(return_from_create_Text_Chunk)
                        chunked_lines[line_number].append(
                            return_from_create_Text_Chunk)
                    else:
                        print("all inside")

        #Group 2 to Group 8
        for chunk_type, regex_string in maori_regex.static_regexes:
            chunk_matches = re.finditer(regex_string, line, re.VERBOSE)
            for chunk_match in chunk_matches:
                try:
                    return_from_create_Text_Chunk = \
                        create_Text_Chunk(chunked_lines[line_number],
                        chunk_match.group(1),
                        chunk_match.start(1),
                        chunk_match.end(1),
                        chunk_type)
                except NameError:
                    if chunk_type.startswith("misc"):
                        # this is what is left over so if it
                        # overlaps with anything else we have
                        # made a mistake
                        print("something HAS gone wrong")
                    else:
                        print("something MAY have gone wrong")
                else:
                    if return_from_create_Text_Chunk:
                        print(return_from_create_Text_Chunk)
                        chunked_lines[line_number].append(
                            return_from_create_Text_Chunk)
                    else:
                        print(chunk_match.group(1), chunk_match.start(1),
                              chunk_match.end(1), chunk_type)
                        print("all inside")

    from operator import itemgetter
    for k, v in chunked_lines.items():
        sorted_chunks = sorted(v, key=itemgetter(1))
        pprint.pprint(sorted_chunks)
        print("==================================")
        recreated_line = ''
        for chunk in sorted_chunks:
            recreated_line = recreated_line + chunk.text_chunk
        if text_file_to_check[k].lower() == recreated_line.lower():
            pass
        else:
            print('ERROR')
            print(text_file_to_check[k])
            print(recreated_line)

    import pickle
    pickle_files_path = (cf.configfile[cf.computername]['pickle_files_path'])
    pickle_file_path = pickle_files_path + file_id + os.extsep + PICKLE_EXTENSION

    pickle.dump(chunked_lines, open(pickle_file_path, "wb"))
# add ./libs/ to module path
sys.path.append(os.path.join(sys.path[0],'libs'))

# handle arguments
import argparse
parser = argparse.ArgumentParser(description='peer2backup background service')
parser.add_argument('-c', '--config',
                   default=os.path.join(sys.path[0],'peer2backup.ini'),
                   help='Configuration File',
                   )
args = parser.parse_args()

# load configuration
import config
configdb_path = config.ConfigFile(args.config)
configdb = config.ConfigDb(configdb_path)
http_port = configdb.Get('http.server.port',9336)
num_worker_threads = configdb.Get('worker.threads.count',multiprocessing.cpu_count())

import key
keydb_path = configdb.Get('keydb.path',os.path.join(sys.path[0],'keydb.sqlite'))
keydb = key.KeyDb(keydb_path)

import auth
authdb_path = configdb.Get('authdb.path',os.path.join(sys.path[0],'authdb.sqlite'))
authdb = auth.AuthDb(authdb_path)

def _CreateWorkerQueueKey(self):
    keychars = list('~!@#$%^&*()_+1234567890-=QWERTYUIOP{}|qwertyuiop[]\\ASDFGHJKL:"asdfghjkl;\'ZXCVBNM<>?zxcvbnm,./ ') # typable ASCII characters
    new_key = []
Exemple #13
0
 def doReadConfig(self):
     '''read the configuration file'''
     self.setStatus('read the config file: ' + self.rcfile_name)
     self.rcfile = config.ConfigFile(self.rcfile_name)
     self.rebuildModelView()