def pandas_view_debug(object,width=1000): #pandas.set_option('expand_frame_repr', False) #None if width!=500: pandas.set_option('display.width', width) else: pandas.set_option('display.width', None) logger.getLogger().debug(object)
def run(self): # signal.signal(signal.SIGTERM,consumer_exit) try: self.logger = logger.getLogger( logging.INFO, "tfidf." + str(self.processNo) + ".log") self.logger.info("Consumer_" + str(self.processNo) + " is start!") # self.logger = multiprocessing.get_logger() jieba.setLogLevel(logging.INFO) jieba.load_userdict("dict.txt") while (True): datas = self.queue.get() if 0 == len(datas): self.logger.error("null list exit") break # saveID(datas[0]) datas = self.processText(datas) self.save_queue.put(datas) # saveID(0-datas[0]) except SystemExit: self.logger.info("process exit with sys.exit()") exit(0) except: error = traceback.format_exc() self.logger.error(str(self.processNo) + " Error") self.logger.error(error) exit(-1)
def run(self): # 每个进程自己维护自己独立的mysql connection # 如果整个进程树使用一个,会导致重启某个进程后连接断了 # 初始化ID为最开始的ID ID = startID # 信号量响应 signal.signal(signal.SIGTERM, self.exit) # 获取logger self.logger = logger.getLogger(logging.INFO, "tfidf.producer.log") self.logger.info("Processer is start!") # 获取整个表大小 # DATA_MAXSIZE = rawdataDB.zl_project.select().count() # 获取保存的没有完成的任务id和ID进度 tID, tIDs = getIDFromFile() # self.logger.info("getID and IDs:"+str(tID)+"-"+str(tIDs)) sID, sIDs = getIDsFromIndex(tID, tIDs) self.logger.info("getID and IDs:" + str(sID) + "-" + str(sIDs)) if sID == -1: self.logger.info("新任务,从头执行") else: if sID <= startID - DATA_SIZE: self.logger.info("错误的sID,从头开始") else: self.logger.info("获取任务进度成功,在" + str(sID) + "处开始") ID = sID for i in sIDs: self.logger.info("处理一些未完成的任务ID:" + str(i)) texts = self.getTextsFromID(i) self.queue.put(texts) try: # m = "Producer process: " while True: # 如果任务完成就在这个地方一直循环,直到主进程终结 if DATA_MAXSIZE < ID: self.logger.info("ID is on " + str(ID) + " \ Task is over sleep to wait exit") time.sleep(10) continue # self.logger.info("getting texts from ID:"+str(ID)) texts = self.getTextsFromID(ID) # if 0 == len(text): # continue self.queue.put(texts) # for log 每1000个打印一次log IID = ID / 100 if IID % 10 == 0: self.logger.info(str(ID) + "/" + str(DATA_MAXSIZE)) # end ID += self.size except SystemExit: self.logger.info("process exit with sys.exit()") exit(0) except: error = traceback.format_exc() self.logger.error("Producer error") self.logger.error(error) exit(-1)
#!/usr/bin/env python # -*- coding: UTF-8 -*- from context import resource_manager import pandas import numpy from tools import logger import numpy as np import matplotlib.pyplot as plt log = logger.getLogger() def plot_image_file(img): plt.imshow(img) plt.show() def plot_image(narray, w='', h=''): log.info("plot image array:" + str(narray.shape)) if w is not '': narray = narray.reshape(w, h) plt.imshow(narray) plt.show() def plot_rho_delta(rho, delta): ''' Plot scatter diagram for rho-delta points Args: rho : rho list
#!/usr/bin/python3 import tools.logger as logger import paramiko import time # setup logging paramiko.util.log_to_file('./logs/SSH.log') LOG = logger.getLogger('Connector') ssh_client = paramiko.SSHClient() ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) def run_cmd(host, user, passwd, cmd_list, client=ssh_client, port_number=22): if type(cmd_list) is str: cmd_list = [cmd_list] try: client.connect(hostname=host, port=port_number, username=user, password=passwd, allow_agent=False) shell = client.invoke_shell() LOG.info('Successfully connected to {0}'.format(host)) shell.recv(1024) shell.send('environment no more\r\n') time.sleep(0.5) shell.recv(1024) except: LOG.warning('Connection to {0} failed'.format(host)) try:
#!/usr/bin/python3 import tools.logger as logger import paramiko import time # setup logging paramiko.util.log_to_file('./logs/SSH.log') LOG = logger.getLogger('Connector') ssh_client = paramiko.SSHClient() ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) def run_cmd(host, user, passwd, cmd_list, client=ssh_client, port_number=22): if type(cmd_list) is str: cmd_list = [cmd_list] try: client.connect( hostname=host, port=port_number, username=user, password=passwd, allow_agent=False) shell = client.invoke_shell() LOG.info('Successfully connected to {0}'.format(host)) shell.recv(1024) shell.send('environment no more\r\n') time.sleep(0.5) shell.recv(1024) except: LOG.warning('Connection to {0} failed'.format(host)) try:
def run(self): try: # 初始化 初始化时,已经处理完的任务指针应该指向 # 第一个ID之前的那个ID,否则会略过第一个 self.ID = startID - DATA_SIZE self.IDs = set() # 初始化logger self.logger = logger.getLogger(logging.INFO, "tfidf.saver.log") self.logger.info("Saver is start!") # 获取进度,以便吻合发过来的ID # 这个里面获取saveID是有必要的,因为如果在恢复进度时出错, # 再次保存进度需要此ID saveID, saveIDs = getIDFromFile() self.logger.info("saver get ID and IDs:" + str(saveID) + "-" + str(saveIDs)) if saveID > 0: self.ID = saveID self.IDs = set(saveIDs) # 设置信号量 signal.signal(signal.SIGTERM, self.exit) except: self.logger.info("process exit with sys.exit()") pid = getPidFromFile() killandExit(pid) self.logger.info("tfidfMaker error!!!! exit!!!!") try: while (True): data = self.save_queue.get() if data is None: continue if 0 == len(data): continue if -1 == data[0]: setIDToFile(self.ID, self.IDs) break self.saveData(data) self.IDs.add(data[0]) # self.logger.info("save over ID="+str(data[0])) self.ID, self.IDs = manageIndex(self.ID, self.IDs) # log 每5000打印一个log ID_log = self.ID / 100 if 0 == ID_log % 50: self.logger.info("save index on " + str(self.ID)) ### setIDToFile(self.ID, self.IDs) # self.logger.info("saver storeID :"+str(self.ID)+"-"+str(self.IDs)) # 如果处理完成 则向主进程发送SIGTREM信号量,并且终结整个进程树 if self.ID + DATA_SIZE >= DATA_MAXSIZE: self.logger.info("tfidfMaker over!!!! exit!!!!") self.ID = DATA_MAXSIZE self.ID = set() pid = getPidFromFile() killandExit(pid) except SystemExit: self.logger.info("process exit with sys.exit()") exit(0) except: setIDToFile(self.ID, self.IDs) self.logger.error("saver storeID :" + str(self.ID) + "-" + str(self.IDs)) error = traceback.format_exc() self.logger.error("saver error") self.logger.error(error) exit(-1)
# 全局变量 #### ################################################################ PROCESS_SIZE = 7 # 处理词所用进程数 #### DATA_MAXSIZE = 16145057 # 数据库大小 #### # rawdataDB.zl_project.select().count() #### # 每次处理,储存,获取的大小.一旦开始不能随意更换 #### DATA_SIZE = 100 # 试验性的,可以调大一点 #### QUEUE_MAXSIZE = 2 # tests传输队列大小 #### S_QUEUE_MAXSIZE = 1 # datas存储传输队列大小 #### startID = 3512466 # 非空数据开始索引 #### ################################################################ ################################################################ # Get Logger log_ = logger.getLogger(logging.INFO, "tfidf.main.log") log_.info("start process") # init process queues = initQueue() # queue = multiprocessing.Queue(QUEUE_MAXSIZE) # save_queue = multiprocessing.Queue(S_QUEUE_MAXSIZE) processes = initProcesses(queues) # process_list,process_producer,process_saver = initProcesses() # start process startProcesses(processes) log_.info("start process over") try:
def pandas_view_info(object,width=1000): if width!=500: pandas.set_option('display.width', width) else: pandas.set_option('display.width', None) logger.getLogger().info(object)