Ejemplo n.º 1
0
def pandas_view_debug(object,width=1000):
    #pandas.set_option('expand_frame_repr', False)
    #None
    if width!=500:
        pandas.set_option('display.width', width)
    else:
        pandas.set_option('display.width', None)
    logger.getLogger().debug(object)
Ejemplo n.º 2
0
 def run(self):
     # signal.signal(signal.SIGTERM,consumer_exit)
     try:
         self.logger = logger.getLogger(
             logging.INFO, "tfidf." + str(self.processNo) + ".log")
         self.logger.info("Consumer_" + str(self.processNo) + " is start!")
         # self.logger = multiprocessing.get_logger()
         jieba.setLogLevel(logging.INFO)
         jieba.load_userdict("dict.txt")
         while (True):
             datas = self.queue.get()
             if 0 == len(datas):
                 self.logger.error("null list exit")
                 break
             # saveID(datas[0])
             datas = self.processText(datas)
             self.save_queue.put(datas)
             # saveID(0-datas[0])
     except SystemExit:
         self.logger.info("process exit with sys.exit()")
         exit(0)
     except:
         error = traceback.format_exc()
         self.logger.error(str(self.processNo) + " Error")
         self.logger.error(error)
         exit(-1)
Ejemplo n.º 3
0
    def run(self):
        # 每个进程自己维护自己独立的mysql connection
        # 如果整个进程树使用一个,会导致重启某个进程后连接断了
        # 初始化ID为最开始的ID
        ID = startID
        # 信号量响应
        signal.signal(signal.SIGTERM, self.exit)
        # 获取logger
        self.logger = logger.getLogger(logging.INFO, "tfidf.producer.log")
        self.logger.info("Processer is start!")
        # 获取整个表大小
        # DATA_MAXSIZE = rawdataDB.zl_project.select().count()
        # 获取保存的没有完成的任务id和ID进度
        tID, tIDs = getIDFromFile()
        # self.logger.info("getID and IDs:"+str(tID)+"-"+str(tIDs))
        sID, sIDs = getIDsFromIndex(tID, tIDs)
        self.logger.info("getID and IDs:" + str(sID) + "-" + str(sIDs))
        if sID == -1:
            self.logger.info("新任务,从头执行")
        else:
            if sID <= startID - DATA_SIZE:
                self.logger.info("错误的sID,从头开始")
            else:
                self.logger.info("获取任务进度成功,在" + str(sID) + "处开始")
                ID = sID
                for i in sIDs:
                    self.logger.info("处理一些未完成的任务ID:" + str(i))
                    texts = self.getTextsFromID(i)
                    self.queue.put(texts)
        try:
            # m = "Producer process: "
            while True:
                # 如果任务完成就在这个地方一直循环,直到主进程终结
                if DATA_MAXSIZE < ID:
                    self.logger.info("ID is on " + str(ID) + " \
					Task is over sleep to wait exit")
                    time.sleep(10)
                    continue
                # self.logger.info("getting texts from ID:"+str(ID))
                texts = self.getTextsFromID(ID)
                # if 0 == len(text):
                # 	continue
                self.queue.put(texts)
                # for log 每1000个打印一次log
                IID = ID / 100
                if IID % 10 == 0:
                    self.logger.info(str(ID) + "/" + str(DATA_MAXSIZE))
                # end
                ID += self.size
        except SystemExit:
            self.logger.info("process exit with sys.exit()")
            exit(0)
        except:
            error = traceback.format_exc()
            self.logger.error("Producer error")
            self.logger.error(error)
            exit(-1)
Ejemplo n.º 4
0
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
from context import resource_manager
import pandas
import numpy
from tools import logger
import numpy as np
import matplotlib.pyplot as plt

log = logger.getLogger()


def plot_image_file(img):
    plt.imshow(img)
    plt.show()


def plot_image(narray, w='', h=''):
    log.info("plot image array:" + str(narray.shape))
    if w is not '':
        narray = narray.reshape(w, h)
    plt.imshow(narray)
    plt.show()


def plot_rho_delta(rho, delta):
    '''
    Plot scatter diagram for rho-delta points

    Args:
        rho   : rho list
Ejemplo n.º 5
0
#!/usr/bin/python3
import tools.logger as logger
import paramiko
import time

# setup logging
paramiko.util.log_to_file('./logs/SSH.log')
LOG = logger.getLogger('Connector')

ssh_client = paramiko.SSHClient()
ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())


def run_cmd(host, user, passwd, cmd_list, client=ssh_client, port_number=22):
    if type(cmd_list) is str:
        cmd_list = [cmd_list]
    try:
        client.connect(hostname=host,
                       port=port_number,
                       username=user,
                       password=passwd,
                       allow_agent=False)
        shell = client.invoke_shell()
        LOG.info('Successfully connected to {0}'.format(host))
        shell.recv(1024)
        shell.send('environment no more\r\n')
        time.sleep(0.5)
        shell.recv(1024)
    except:
        LOG.warning('Connection to {0} failed'.format(host))
    try:
Ejemplo n.º 6
0
#!/usr/bin/python3
import tools.logger as logger
import paramiko
import time

# setup logging
paramiko.util.log_to_file('./logs/SSH.log')
LOG = logger.getLogger('Connector')

ssh_client = paramiko.SSHClient()
ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())

def run_cmd(host, user, passwd, cmd_list, client=ssh_client, port_number=22):
    if type(cmd_list) is str:
        cmd_list = [cmd_list]
    try:
        client.connect(
            hostname=host,
            port=port_number,
            username=user,
            password=passwd,
            allow_agent=False)
        shell = client.invoke_shell()
        LOG.info('Successfully connected to {0}'.format(host))
        shell.recv(1024)
        shell.send('environment no more\r\n')
        time.sleep(0.5)
        shell.recv(1024)
    except:
        LOG.warning('Connection to {0} failed'.format(host))
    try:
Ejemplo n.º 7
0
 def run(self):
     try:
         # 初始化 初始化时,已经处理完的任务指针应该指向
         # 第一个ID之前的那个ID,否则会略过第一个
         self.ID = startID - DATA_SIZE
         self.IDs = set()
         # 初始化logger
         self.logger = logger.getLogger(logging.INFO, "tfidf.saver.log")
         self.logger.info("Saver is start!")
         # 获取进度,以便吻合发过来的ID
         # 这个里面获取saveID是有必要的,因为如果在恢复进度时出错,
         # 再次保存进度需要此ID
         saveID, saveIDs = getIDFromFile()
         self.logger.info("saver get ID and IDs:" + str(saveID) + "-" +
                          str(saveIDs))
         if saveID > 0:
             self.ID = saveID
             self.IDs = set(saveIDs)
         # 设置信号量
         signal.signal(signal.SIGTERM, self.exit)
     except:
         self.logger.info("process exit with sys.exit()")
         pid = getPidFromFile()
         killandExit(pid)
         self.logger.info("tfidfMaker error!!!! exit!!!!")
     try:
         while (True):
             data = self.save_queue.get()
             if data is None:
                 continue
             if 0 == len(data):
                 continue
             if -1 == data[0]:
                 setIDToFile(self.ID, self.IDs)
                 break
             self.saveData(data)
             self.IDs.add(data[0])
             # self.logger.info("save over ID="+str(data[0]))
             self.ID, self.IDs = manageIndex(self.ID, self.IDs)
             # log 每5000打印一个log
             ID_log = self.ID / 100
             if 0 == ID_log % 50:
                 self.logger.info("save index on " + str(self.ID))
             ###
             setIDToFile(self.ID, self.IDs)
             # self.logger.info("saver storeID :"+str(self.ID)+"-"+str(self.IDs))
             # 如果处理完成 则向主进程发送SIGTREM信号量,并且终结整个进程树
             if self.ID + DATA_SIZE >= DATA_MAXSIZE:
                 self.logger.info("tfidfMaker over!!!! exit!!!!")
                 self.ID = DATA_MAXSIZE
                 self.ID = set()
                 pid = getPidFromFile()
                 killandExit(pid)
     except SystemExit:
         self.logger.info("process exit with sys.exit()")
         exit(0)
     except:
         setIDToFile(self.ID, self.IDs)
         self.logger.error("saver storeID :" + str(self.ID) + "-" +
                           str(self.IDs))
         error = traceback.format_exc()
         self.logger.error("saver error")
         self.logger.error(error)
         exit(-1)
Ejemplo n.º 8
0
# 					 全局变量								####
################################################################

PROCESS_SIZE = 7  # 处理词所用进程数						####
DATA_MAXSIZE = 16145057  # 数据库大小						####
# rawdataDB.zl_project.select().count()						####
# 每次处理,储存,获取的大小.一旦开始不能随意更换				####
DATA_SIZE = 100  # 试验性的,可以调大一点						####
QUEUE_MAXSIZE = 2  # tests传输队列大小						####
S_QUEUE_MAXSIZE = 1  # datas存储传输队列大小				####
startID = 3512466  # 非空数据开始索引						####
################################################################
################################################################

# Get Logger
log_ = logger.getLogger(logging.INFO, "tfidf.main.log")

log_.info("start process")

# init process
queues = initQueue()
# queue = multiprocessing.Queue(QUEUE_MAXSIZE)
# save_queue = multiprocessing.Queue(S_QUEUE_MAXSIZE)
processes = initProcesses(queues)
# process_list,process_producer,process_saver = initProcesses()
# start process
startProcesses(processes)

log_.info("start process over")

try:
Ejemplo n.º 9
0
def pandas_view_info(object,width=1000):
    if width!=500:
        pandas.set_option('display.width', width)
    else:
        pandas.set_option('display.width', None)
    logger.getLogger().info(object)