コード例 #1
0
ファイル: websocket_tail.py プロジェクト: zhoufaye/rest_xops
 def local_tail(self, logfile, webuser):
     # 创建一个可跨文件的全局变量,以便控制死循环
     gl._init()
     gl.set_value('deploy_' + str(webuser), False)
     try:
         with open(logfile, 'rt') as f:
             f.seek(0, 0)
             while True:
                 is_stop = gl.get_value('deploy_' + str(webuser))
                 line = f.readline()
                 if line:
                     self.send_message(webuser, line)
                 elif is_stop:
                     self.send_message(webuser, '[INFO]文件监视结束..')
                     break
     except Exception as e:
         self.send_message(webuser, e)
コード例 #2
0
ファイル: websocket_tail.py プロジェクト: zeroCamus/rest_xops
 def remote_tail(self,
                 host,
                 port,
                 user,
                 passwd,
                 logfile,
                 webuser,
                 filter_text=None):
     # 创建一个可跨文件的全局变量,控制停止
     try:
         self.client = paramiko.SSHClient()
         self.client.load_system_host_keys()
         self.client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
         self.client.connect(hostname=host,
                             port=port,
                             username=user,
                             password=passwd)
         interact = SSHClientInteraction(self.client,
                                         timeout=10,
                                         display=False)
         interact.expect('.*#.*')
         logfile = logfile.strip().replace('&&',
                                           '').replace('||',
                                                       '').replace('|', '')
         self.send_message(webuser, '[INFO][%s@%s]开始监控日志' % (user, host))
         gl._init()
         gl.set_value('tail_' + str(webuser), self.client)
         if filter_text:
             filter_text_re = filter_text.strip().replace('&&', '').replace(
                 '||', '').replace('|', '')
             interact.send('tail -f %s|grep --color=never %s' %
                           (logfile, filter_text_re))
         else:
             interact.send('tail -f %s' % (logfile))
         interact.tail(
             output_callback=lambda m: self.send_message(webuser, m))
     except Exception as e:
         self.send_message(webuser, e)
     finally:
         try:
             self.client.close()
         except Exception as e:
             self.send_message(webuser, e)
コード例 #3
0
from model import Discriminator, resnet_56, resnet_56_sparse
from data import cifar10

from resnet import ResNet18, ResNet50
from resnet_sprase import ResNet18_sprase, ResNet50_sprase

from collections import OrderedDict
import numpy as np
from torch.autograd import Variable
from resnet_imagenet import resnet101

import torchvision.datasets as datasets
import torchvision.transforms as transforms

import utils.globalvar as gl
gl._init()
import time
import logging
import sys

try:
    import nvidia.dali.plugin.pytorch as plugin_pytorch
    from nvidia.dali.pipeline import Pipeline
    import nvidia.dali.ops as ops
    import nvidia.dali.types as types
except ImportError:
    raise ImportError(
        "Please install DALI from https://www.github.com/NVIDIA/DALI to run this example."
    )

num_gpu = 4
コード例 #4
0
                    help='Use multi-processing distributed training to launch '
                         'N processes per node, which has N GPUs. This is the '
                         'fastest way to use PyTorch for either single node or '
                         'multi node data parallel training')
parser.add_argument('--channel_removed_ratio',default=0.2,type=float,help='removed ratio.')
parser.add_argument('--spatial_removed_ratio',default=0.2,type=float,help='removed ratio.')
parser.add_argument('--Is_spatial',action='store_true',help='use spatial module or not,default is channel with conv.')
parser.add_argument('--lasso',action='store_true',help='add l1 regularization to channel module.')
parser.add_argument('--l1_coe',default=1e-8,type=float,help='coe of l1 regularization.')
parser.add_argument('--show',action='store_true',help='show model architecture.')
parser.add_argument('--flops',action='store_true',help='calc flops given a pretrained model.')
parser.add_argument('--debug',action='store_true',help='debug.')
args = parser.parse_args()
os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
best_acc1 = 0
gvar._init()
gvar.set_value('removed_ratio_c',args.channel_removed_ratio)
gvar.set_value('removed_ratio_s',args.spatial_removed_ratio)
gvar.set_value('is_spatial',args.Is_spatial) 
def main():
    if not os.path.isdir(args.save_dir):
    	os.makedirs(args.save_dir)
    
    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
コード例 #5
0
    def post(self, request, format=None):
        if request.data['excu'] == 'init':
            # 项目初始化
            id = request.data['id']
            result = self.repo_init(id)
            if result.exited == 0:
                Project.objects.filter(id=id).update(status='Succeed')
                info_logger.info('初始化项目:' + str(id) + ',执行成功!')
                http_status = OK
                msg = '初始化成功!'
            else:
                error_logger.error('初始化项目:%s 执行失败! 错误信息:%s' %
                                   (str(id), result.stderr))
                http_status = BAD
                msg = '初始化项目:%s 执行失败! 错误信息:%s' % (str(id), result.stderr)

            return XopsResponse(msg, status=http_status)

        elif request.data['excu'] == 'deploy':
            # 部署操作
            id = request.data['id']
            webuser = request.user.username
            alias = request.data['alias']
            self.start_time = time.strftime("%Y%m%d%H%M%S", time.localtime())
            record_id = str(alias) + '_' + str(self.start_time)
            name = '部署_' + record_id
            DeployRecord.objects.create(name=name,
                                        alias=alias,
                                        status='Failed',
                                        project_id=int(id))
            Project.objects.filter(id=id).update(last_task_status='Failed')
            local_log_path = self._path.rstrip('/') + '/' + str(
                id) + '_' + str(request.data['alias']) + '/logs'
            log = local_log_path + '/' + record_id + '.log'
            version = request.data['version'].strip()
            serverid = request.data['server_ids']
            deploy = DeployExcu(webuser, record_id, id)
            deploy.start(log, version, serverid, record_id, webuser,
                         self.start_time)
            return XopsResponse(record_id)

        elif request.data['excu'] == 'rollback':
            # 回滚
            id = request.data['id']
            project_id = request.data['project_id']
            alias = request.data['alias']
            self.start_time = time.strftime("%Y%m%d%H%M%S", time.localtime())
            record_id = str(alias) + '_' + str(self.start_time)
            log = self._path.rstrip('/') + '/' + str(project_id) + '_' + str(
                alias) + '/logs/' + record_id + '.log'
            self.do_rollback(id, log, record_id)
            return XopsResponse(record_id)

        elif request.data['excu'] == 'deploymsg':
            # 部署控制台消息读取
            try:
                id = request.data['id']
                alias = request.data['alias']
                record = request.data['record']
                scenario = int(request.data['scenario'])
                logfile = self._path.rstrip('/') + '/' + str(id) + '_' + str(
                    alias) + '/logs/' + record + '.log'
                webuser = request.user.username
                print(webuser)
                msg = Tailf()
                if scenario == 0:
                    gl._init()
                    gl.set_value('deploy_' + str(webuser), False)
                    msg.local_tailf(logfile, webuser)
                http_status = OK
                request_status = '执行成功!'
            except Exception as e:
                http_status = BAD
                request_status = '执行错误:日志文件可能不存在!'
                print(e)
            return XopsResponse(request_status, status=http_status)

        elif request.data['excu'] == 'readlog' and request.data[
                'scenario'] == 1:
            # 读取部署日志
            try:
                id = request.data['id']
                alias = request.data['alias']
                record = request.data['record']
                logfile = self._path.rstrip('/') + '/' + str(id) + '_' + str(
                    alias) + '/logs/' + record + '.log'
                response = FileResponse(open(logfile, 'rb'))
                response['Content-Type'] = 'text/plain'
                return response
            except Exception:
                http_status = BAD
                request_status = '执行错误:文件不存在!'
            return XopsResponse(request_status, status=http_status)

        elif request.data['excu'] == 'app_start':
            # 项目启动
            try:
                app_start = request.data['app_start']
                host = request.data['host']
                webuser = request.user.username
                auth_info, auth_key = auth_init(host)
                connect = Shell(auth_info,
                                connect_timeout=5,
                                connect_kwargs=auth_key)
                app_start = app_start.strip().replace('&&',
                                                      '').replace('||', '')
                connect.run(app_start, ws=True, webuser=webuser)
                connect.close()
                http_status = OK
                request_status = '执行成功!'
            except Exception as e:
                http_status = BAD
                request_status = '执行错误:' + str(e)
            return XopsResponse(request_status, status=http_status)

        elif request.data['excu'] == 'app_stop':
            # 项目停止
            try:
                app_stop = request.data['app_stop']
                host = request.data['host']
                webuser = request.user.username
                auth_info, auth_key = auth_init(host)
                connect = Shell(auth_info,
                                connect_timeout=5,
                                connect_kwargs=auth_key)
                app_stop = app_stop.strip().replace('&&', '').replace('||', '')
                connect.run(app_stop, ws=True, webuser=webuser)
                connect.close()
                http_status = OK
                request_status = '执行成功!'
            except Exception as e:
                http_status = BAD
                request_status = '执行错误:' + str(e)
            return XopsResponse(request_status, status=http_status)

        elif request.data['excu'] == 'tail_start':
            # 日志监控
            try:
                filter_text = str(request.data['filter'])
                app_log_file = request.data['app_log_file']
                host = request.data['host']
                webuser = request.user.username
                device_info = DeviceInfo.objects.filter(id=int(host)).values()
                host = device_info[0]['hostname']
                auth_type = device_info[0]['auth_type']
                connect_info = ConnectionInfo.objects.filter(
                    hostname=host, auth_type=auth_type).values()
                user = connect_info[0]['username']
                passwd = connect_info[0]['password']
                port = connect_info[0]['port']
                tail = Tailf()
                tail.remote_tail(host,
                                 port,
                                 user,
                                 passwd,
                                 app_log_file,
                                 webuser,
                                 filter_text=filter_text)
                http_status = OK
                request_status = '执行成功!'
            except Exception as e:
                http_status = BAD
                request_status = str(e)
            return XopsResponse(request_status, status=http_status)

        elif request.data['excu'] == 'tail_stop':
            # 日志监控停止
            try:
                webuser = request.user.username
                if hasattr(gl, '_global_dict'):
                    tail_key = 'tail_' + str(webuser)
                    if tail_key in gl._global_dict.keys():
                        client = gl.get_value('tail_' + str(webuser))
                        client.close()
                http_status = OK
                request_status = '执行成功!'
            except Exception as e:
                http_status = BAD
                request_status = str(e)
            return XopsResponse(request_status, status=http_status)