def create_compact_html_files(letter): cf = config.ConfigFile() text_files_path = (cf.configfile[cf.computername]['original_files_path']) sections_and_pbs = [] # open the specific html file file_name = letter + ".html" text_file_path = text_files_path + file_name with open(text_file_path, 'r') as f: soup = BeautifulSoup(f) soup_sections_and_pbs = soup.select( ".section,[title='page break']") #pb = page break for soup_section_or_pb in soup_sections_and_pbs: # remove the p tag with the id 'hang' as we don't need it if soup_section_or_pb.select_one(".hang"): soup_section_or_pb.select_one(".hang").unwrap() sections_and_pbs.append(soup_section_or_pb) # create a list of keys for each section # Headword-PageNumber-Sequential Number for the letter E # For example "Engari-027-031" page_numbers = get_page_numbers(letter, sections_and_pbs) od = OrderedDict(sorted(Counter(page_numbers).items())) pprint.pprint(od) pass return True
def get_open_compounds_list(file_id='hpk_tauira'): # the file_id is used for the list of teina HPK_OPEN_COMPOUNDS_FILE_NAME = "hpk_open_compounds.txt" OTHER_OPEN_COMPOUNDS_FILE_NAME = "other_open_compounds.txt" cf = config.ConfigFile() text_files_path = (cf.configfile[cf.computername]['text_files_path']) open_compounds_list = [] hpk_open_compounds_file_path = \ text_files_path + HPK_OPEN_COMPOUNDS_FILE_NAME other_open_compounds_file_path = \ text_files_path + OTHER_OPEN_COMPOUNDS_FILE_NAME with open(hpk_open_compounds_file_path, 'r') as f: for line in f: open_compounds_list.append(line.replace('\n', '')) with open(other_open_compounds_file_path, 'r') as f: for line in f: open_compounds_list.append(line.replace('\n', '')) # add any teina that are themselves open compounds # and have a big brother in the list of open compounds for big_brother, little_brothers in teina.teina[file_id]: if big_brother in open_compounds_list: for little_brother in little_brothers: if ' ' in little_brother: open_compounds_list.append(little_brother) else: # big brother not in the list of open compounds if ' ' in big_brother: print("Must add " + big_brother + " to open compounds") return False else: # not an open compound but it could have open compound teina for little_brother in little_brothers: if ' ' in little_brother: open_compounds_list.append(little_brother) # sort the list by length (longest at start) # to avoid say finding 'the banana' and never finding # 'longer version of the banana' open_compounds_list.sort(key=len, reverse=True) print("hi") pprint.pprint(open_compounds_list) return open_compounds_list
def get_non_maori_words(file_id): TEXT_EXTENSION = "txt" # TAUIRA_FILE_ID = "hpk_tauira" # duplicated with the choices in the call cf = config.ConfigFile() text_files_path = (cf.configfile[cf.computername]['text_files_path']) text_file_path = text_files_path + file_id + os.extsep + TEXT_EXTENSION with open(text_file_path, 'r') as f: for line_number, line in enumerate(f): maori_words = re.findall(maori_regex.maori_word, line, re.VERBOSE | re.IGNORECASE) all_words = re.findall(r"\w+", line) for word in all_words: if word not in maori_words and not is_number(word): print(word)
def pickle_to_yaml(file_name): cf = config.ConfigFile() pickle_files_path = (cf.configfile[cf.computername]['pickle_files_path']) pickle_file_path = pickle_files_path + file_name + os.extsep + PICKLE_EXTENSION with open(pickle_file_path, 'rb') as pickle_file: file_to_process = pickle.load(pickle_file) yaml_files_path = (cf.configfile[cf.computername]['yaml_files_path']) yaml_file_path = yaml_files_path + file_name + os.extsep + YAML_EXTENSION # used 'unsafe' because i couldn't get the Text Chunk to work ... yaml = ruamel.yaml.YAML(typ='unsafe') with open(yaml_file_path, "w") as yaml_file: yaml.dump(file_to_process, yaml_file) return True
def get_all_entries(): all_entries = {} Word_ID = namedtuple('Word_ID', 'root_number trunk branch_number twig twig_number') #gather all the parts and make one large ordered dictionary cf = config.ConfigFile() json_path = (cf.configfile[cf.computername]['json_path']) for letter in pū.dictionary_letters: print ('gathering json', letter) json_filename = letter + ".json" full_json_path = json_path + json_filename with open(full_json_path,'r') as f: word_trees_from_json = json.load(f) word_trees_from_json = {Word_ID(**ast.literal_eval(k)):v for k,v in word_trees_from_json.items()} all_entries.update(word_trees_from_json) return OrderedDict(sorted(all_entries.items(), key=mw.get_dict_sort_key))
def process_source_file(file_name): cf = config.ConfigFile() source_files_path = (cf.configfile[cf.computername]['source_files_path']) source_file_path = source_files_path + file_name + os.extsep + TEXT_EXTENSION # the dictionary to hold the results results = {} # read in each line from the source file with open(source_file_path, 'r') as f: source_file = f.readlines() for line_number, line in enumerate(source_file, start=1): results[f'{line_number:05}'] = line.split() print(results) post_gateway_files_path = (cf.configfile[cf.computername]['post_gateway_files_path']) post_gateway_file_path = post_gateway_files_path + file_name + \ os.extsep + YAML_EXTENSION with open(post_gateway_file_path, "w") as myfile: yaml.dump(results, myfile, allow_unicode=True) return True
At the end we update file and folder names and update the .aff file ''' import os import config import hunspell from datetime import datetime import release_utilities as ru import maoriword as mw import common_word_division_errors as cwde IR = "ir" cf = config.ConfigFile() internal_releases_files_path = ( cf.configfile[cf.computername]['internal_releases_files_path']) baseline_files_path = cf.configfile[cf.computername]['baseline_files_path'] def verify_internal_release(): # get the internal release folder (to be tested) untested_release = ru.get_untested_release() if untested_release is None: print("No release found to test") return False else: untested_release_folder_name = IR + "_" + untested_release[0] + "_" + \
r''' Fuction: Version: 1.0.0 Created: Tuyj Created date:2015/4/1 ''' from _env import addPaths addPaths(".") import unittest, json, requests import config as config from init.Oauth import getAccesssToken, headers from init.initalize import returnValue, address_host, address_http, host cfg = config.ConfigFile('test.cfg', ensure_ascii=False) gParams = cfg.getValue(["post2"]) gIndex = 0 class LuaTest(unittest.TestCase): def setUp(self): print '--------------开始--------------\n' def testComm(self): global gIndex print '--------------执行URl--------------\n' # 获取token(str) access_str = getAccesssToken(address_host) payload_1 = {'access_token': access_str}
def create_excel_headword_file(letter): cf = config.ConfigFile() text_files_path = (cf.configfile[cf.computername]['original_files_path']) # sections_and_pbs = [] # open the specific html file file_name = letter + ".html" text_file_path = text_files_path + file_name with open(text_file_path, 'r') as f: soup = BeautifulSoup(f) sections_and_pbs = soup.select(".section,[title='page break']") #pb = page break # get page numbers and headwords page_numbers_and_headwords = get_page_numbers_and_headwords(letter, sections_and_pbs) # od = OrderedDict(sorted(Counter(page_numbers_and_headwords).items())) pprint.pprint(page_numbers_and_headwords) # write the Excel file wb = Workbook() excel_files_path = (cf.configfile[cf.computername]['excel_files_path']) excel_file_name = letter + ".xlsx" excel_file_path = excel_files_path + excel_file_name page_numbers = list(set(x[0] for x in page_numbers_and_headwords)) for page_counter, page_number in enumerate(page_numbers, 0): headwords_for_page = [x[1] for x in page_numbers_and_headwords if x[0] == page_number] #get worksheet title if page_counter == 0: active_worksheet = wb.active active_worksheet.title = str(start_pages[letter]) else: active_worksheet = wb.create_sheet(title=str(start_pages[letter] + page_counter)) # data validation dv = DataValidation(type="list", formula1='"yes,no,adjust"', allow_blank=True) active_worksheet.add_data_validation(dv) #named styles style_title = NamedStyle(name="style_title") style_title.font = Font(bold=True, color=colours[letter], italic=True) # had to wrap this in try / except because sometimes getting that style_title already existed # when creating the workbook! try: wb.add_named_style(style_title) except ValueError: pass # titles active_worksheet["A1"] = "Entry" active_worksheet["A1"].style = 'style_title' active_worksheet["B1"] = "Headword" active_worksheet["B1"].style = 'style_title' active_worksheet["C1"] = "Status" active_worksheet["C1"].style = 'style_title' active_worksheet["D1"] = "Adjusted" active_worksheet["D1"].style = 'style_title' for counter, headword in enumerate(headwords_for_page, 1): active_worksheet.cell(row=counter + 1, column=1, value=counter) active_worksheet.cell(row=counter + 1, column=1).font = Font(bold=True) active_worksheet.cell(row=counter + 1, column=2, value=headword) dv.add("C"+str(counter + 1)) # couldn't use row column syntax rows = range(1, len(headwords_for_page) + 1 + 1) columns = range(1, 4 + 1) for row in rows: for col in columns: active_worksheet.cell(row, col).alignment = Alignment(horizontal='center', vertical='center') active_worksheet.sheet_format.defaultRowHeight = 22 active_worksheet.sheet_format.baseColWidth = 12 active_worksheet.sheet_view.zoomScale = 140 active_worksheet.sheet_view.showGridLines = False active_worksheet.sheet_properties.tabColor = colours[letter] wb.save(filename = excel_file_path) return True
int(t), res[6]) == 0: mutexLock.release() # 更新数据失败,从新链接sqlserver self.upload_result = False continue mutexLock.release() # 更新完成的数据在本地删除 gpioDB.deleteByRealDateLogId(res[0]) time.sleep(int(cfg.getUploadInterval())) if __name__ == '__main__': # 先释放所有的gpio端口,防止被其他应用程序占用 gpio.unexportAllGPIO(gpioTuple) cfg = config.ConfigFile() # 打印一下配置文件中的一些参数k print(cfg.getRemoteIp()) print(cfg.getRemotePort()) print(cfg.getRemoteUser()) print(cfg.getRemotePassword()) # 线程锁 # 由于heartbeat和上传数据都需要操作sqlserver,而且在不同的线程,所以需要考虑到资源抢占的问题 mutexLock = threading.Lock() # 本地mysql数据库更新线程 mysqlThread = UpdateMysqlThread() # 心跳进程,发送数据到sqlserver heartbeatThread = HeartbeatThread() # 更新本地gpio状态到sqlserver
def process_text_file(file_id, first_line, last_line): first_line = int(first_line) last_line = int(last_line) if first_line > last_line: import sys print("First Line can't be greater than Last Line") sys.exit() TEXT_EXTENSION = "txt" PICKLE_EXTENSION = "p" cf = config.ConfigFile() text_files_path = (cf.configfile[cf.computername]['text_files_path']) text_file_path = text_files_path + file_id + os.extsep + TEXT_EXTENSION # get the open compounds list to use to search for ocs = get_open_compounds.get_open_compounds_list(file_id) # the dictionary to hold the results chunked_lines = {} # create a list of tuples (line number, line) # containing the lines we want to chunk with open(text_file_path, 'r') as f: text_file_to_check = f.readlines() lines_in_file = len(text_file_to_check) text_file_to_check.insert(0, None) # align index number with line number lines_to_check = [] if first_line == 0 and last_line == 0: first_line_to_use = 1 last_line_to_use = lines_in_file else: first_line_to_use = first_line last_line_to_use = min(last_line, lines_in_file) for x in range(first_line_to_use, last_line_to_use + 1): lines_to_check.append((x, text_file_to_check[x])) for line_number, line in lines_to_check: print("=============== " + str(line_number) + " ==============") print(line) #initialise dictionary value chunked_lines[line_number] = [] #Group 1 - Open Compounds CHUNK_TYPE = "oc" for oc in ocs: regex_string = maori_regex.get_oc_regex(oc) oc_matches = re.finditer(regex_string, line) for oc_match in oc_matches: print(line_number, oc_match) try: return_from_create_Text_Chunk = \ create_Text_Chunk(chunked_lines[line_number], oc_match.group(1), oc_match.start(1), oc_match.end(1), CHUNK_TYPE) except NameError: print("something has gone wrong") else: if return_from_create_Text_Chunk: print(return_from_create_Text_Chunk) chunked_lines[line_number].append( return_from_create_Text_Chunk) else: print("all inside") #Group 2 to Group 8 for chunk_type, regex_string in maori_regex.static_regexes: chunk_matches = re.finditer(regex_string, line, re.VERBOSE) for chunk_match in chunk_matches: try: return_from_create_Text_Chunk = \ create_Text_Chunk(chunked_lines[line_number], chunk_match.group(1), chunk_match.start(1), chunk_match.end(1), chunk_type) except NameError: if chunk_type.startswith("misc"): # this is what is left over so if it # overlaps with anything else we have # made a mistake print("something HAS gone wrong") else: print("something MAY have gone wrong") else: if return_from_create_Text_Chunk: print(return_from_create_Text_Chunk) chunked_lines[line_number].append( return_from_create_Text_Chunk) else: print(chunk_match.group(1), chunk_match.start(1), chunk_match.end(1), chunk_type) print("all inside") from operator import itemgetter for k, v in chunked_lines.items(): sorted_chunks = sorted(v, key=itemgetter(1)) pprint.pprint(sorted_chunks) print("==================================") recreated_line = '' for chunk in sorted_chunks: recreated_line = recreated_line + chunk.text_chunk if text_file_to_check[k].lower() == recreated_line.lower(): pass else: print('ERROR') print(text_file_to_check[k]) print(recreated_line) import pickle pickle_files_path = (cf.configfile[cf.computername]['pickle_files_path']) pickle_file_path = pickle_files_path + file_id + os.extsep + PICKLE_EXTENSION pickle.dump(chunked_lines, open(pickle_file_path, "wb"))
# add ./libs/ to module path sys.path.append(os.path.join(sys.path[0],'libs')) # handle arguments import argparse parser = argparse.ArgumentParser(description='peer2backup background service') parser.add_argument('-c', '--config', default=os.path.join(sys.path[0],'peer2backup.ini'), help='Configuration File', ) args = parser.parse_args() # load configuration import config configdb_path = config.ConfigFile(args.config) configdb = config.ConfigDb(configdb_path) http_port = configdb.Get('http.server.port',9336) num_worker_threads = configdb.Get('worker.threads.count',multiprocessing.cpu_count()) import key keydb_path = configdb.Get('keydb.path',os.path.join(sys.path[0],'keydb.sqlite')) keydb = key.KeyDb(keydb_path) import auth authdb_path = configdb.Get('authdb.path',os.path.join(sys.path[0],'authdb.sqlite')) authdb = auth.AuthDb(authdb_path) def _CreateWorkerQueueKey(self): keychars = list('~!@#$%^&*()_+1234567890-=QWERTYUIOP{}|qwertyuiop[]\\ASDFGHJKL:"asdfghjkl;\'ZXCVBNM<>?zxcvbnm,./ ') # typable ASCII characters new_key = []
def doReadConfig(self): '''read the configuration file''' self.setStatus('read the config file: ' + self.rcfile_name) self.rcfile = config.ConfigFile(self.rcfile_name) self.rebuildModelView()