import logcontrol import multiprocessing import os import sys import time from typing import Dict, List LOG_DIR = os.path.join(freezehelper.executable_dir, "logs") MAIN_LOG_PATH = os.path.join(LOG_DIR, "example_log.txt") WORKER_LOG_DIR = os.path.join(LOG_DIR, "workers") if freezehelper.is_child_process(): # Worker processes can get each get a unique log file when set in this way (outside of the __main__ check) logcontrol.set_log_file(os.path.join(WORKER_LOG_DIR, f"{os.getpid()}_log.txt"), roll_count=0) print(f"{os.getpid()} (child process)") else: logcontrol.set_log_file(MAIN_LOG_PATH) print(f"{os.getpid()} (parent process)") def lines_containing_string_in_file(search_string: str, filepath: str, encoding: str = None, errors: str = None) -> List[str]: """Return lines in a file that contain a given string. :param search_string: str item to search for :param filepath: str or Path-like object file to search in
"--label", help="Assign y value | label for training set") parser.add_argument( "-lr", "--label_range", help="Assign y value within range for training set | Ex: 0.7-1") #python process_docs.py -out ../../tei10 -in ../../pdf10 -m generate-train" -csv ../ database_path = path.expanduser('~/data/database') database = Database(database_path) args = parser.parse_args() logcontrol.register_logger(timelogger.logger, "timelogger") logcontrol.set_level(logcontrol.DEBUG, group="timelogger") #logcontrol.log_to_console(group="timelogger") logcontrol.set_log_file(args.csv_out + '/main_log.txt') # Debug parameters config #args.mode = "extract-test" #args.grobid_out = r"C:\Users\arjun\dev\GROBID_processed\test" #args.pdf_input = r"C:\Users\arjun\dev\test\pdfs" args.data_file = r"~/data/2400set/data.csv" #args.csv_out = r"C:\Users\arjun\dev" # Process PDFS -> Generate XMLs and txt files if args.mode == "process-pdfs": # Change pdf names (Some PDFs have '-' instead of '_' in the names) for count, filename in enumerate(listdir(args.pdf_input)): print("Processing: ", filename, ", file number: ", count) new_name = filename.replace('-', '_') rename(args.pdf_input + '/' + filename,
:param errors: optional str error handling to pass to open() call :return: list of strings that were found """ matching_lines = [] with open(filepath, mode="r", encoding=encoding, errors=errors) as file_handle: for line in file_handle: if search_string in line: matching_lines.append(line) return matching_lines if __name__ == "__main__": logcontrol.set_log_file(MAIN_LOG_PATH) if len(sys.argv) < 3: print( f"Usage: {freezehelper.executable_path} <search string> <filepath> [<filepath> ...]" ) sys.exit(1) provided_search_string = sys.argv[1] filepaths = sys.argv[2:] # Using thread pools to manage work tm = threadmanager.ThreadManager("example") tm.add_pool("files") thread_references: Dict[str, threadmanager.classes.TimedThread] = {}
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with threadmanager. # If not, see <https://www.gnu.org/licenses/>. """This is an extremely basic example""" import logcontrol import logging import threadmanager import time logfile = "example.log" logger = logging.getLogger() logcontrol.set_log_file(logfile, max_size=64000) logcontrol.set_level(logging.DEBUG) def continuous_func(work_time: float): """A function that repeats until a stop is requested""" return_value: int = 0 while tm.go: log_time("continuous_func - doing work") time.sleep(work_time) return_value += 1 log_time("returning from continuous_func() as go is False") return return_value def fibonacci(input_number: int):