def run(self): # todo: 1) do all ansible calls through subprocess # 2) move to Python 3 and asyncIO all in one thread + executors # ... -> eliminate multiprocessing here, # ... possible to use simple logging, with redis handler self.log.info("Creating VM Spawner, HealthChecker, Terminator") self.spawner = Spawner(self.opts) self.checker = HealthChecker(self.opts) self.terminator = Terminator(self.opts) self.vm_manager = VmManager( opts=self.opts, logger=self.log, ) self.vm_manager.post_init() self.log.info("Starting up VM EventHandler") self.event_handler = EventHandler(self.opts, vmm=self.vm_manager, terminator=self.terminator) self.event_handler.post_init() self.event_handler.start() self.log.info("Starting up VM Master") self.vm_master = VmMaster(self.opts, vmm=self.vm_manager, spawner=self.spawner, checker=self.checker) self.vm_master.start() setproctitle("Copr VMM base process")
def test_obfuscate_process_password(): original_title = setproctitle.getproctitle() setproctitle.setproctitle("pgcli user=root password=secret host=localhost") obfuscate_process_password() title = setproctitle.getproctitle() expected = "pgcli user=root password=xxxx host=localhost" assert title == expected setproctitle.setproctitle("pgcli user=root password=top secret host=localhost") obfuscate_process_password() title = setproctitle.getproctitle() expected = "pgcli user=root password=xxxx host=localhost" assert title == expected setproctitle.setproctitle("pgcli user=root password=top secret") obfuscate_process_password() title = setproctitle.getproctitle() expected = "pgcli user=root password=xxxx" assert title == expected setproctitle.setproctitle("pgcli postgres://root:secret@localhost/db") obfuscate_process_password() title = setproctitle.getproctitle() expected = "pgcli postgres://root:xxxx@localhost/db" assert title == expected setproctitle.setproctitle(original_title)
def supervise(pid, job_id, timeout=1, log_file=None): """ Supervise a job process, entering a loop that ends only when the job terminates. :param pid: the process id :type pid: int :param job_id: the job id :type job_id: int :param timeout: timeout value in seconds :type timeout: float :param str log_file: Optional log file location. If specified, log messages will be appended to this file. If not specified, log messages will be printed to the console. """ # Set the name of this process (as reported by /bin/ps) setproctitle('openquake supervisor for job_id=%s job_pid=%s' % (job_id, pid)) ignore_sigint() if log_file is not None: logging.root.addHandler(SupervisorLogFileHandler(job_id, log_file)) else: logging.root.addHandler(SupervisorLogStreamHandler(job_id)) supervisor = SupervisorLogMessageConsumer(job_id, pid, timeout) supervisor.run()
def main(): """ Main programm which is called when the clacks agent process gets started. It does the main forking os related tasks. """ # Set process list title os.putenv('SPT_NOENV', 'non_empty_value') setproctitle("clacks-agent") # Inizialize core environment env = Environment.getInstance() if not env.base: env.log.critical("Clacks agent needs a 'core.base' do operate on") exit(1) env.log.info("Clacks %s is starting up (server id: %s)" % (VERSION, env.id)) if env.config.get('core.profile'): import cProfile import clacks.common.lsprofcalltree p = cProfile.Profile() p.runctx('mainLoop(env)', globals(), {'env': env}) #pylint: disable=E1101 k = clacks.common.lsprofcalltree.KCacheGrind(p) data = open('prof.kgrind', 'w+') k.output(data) data.close() else: mainLoop(env)
def main() -> None: '''Runs server''' # Parse options define('production', default = False, help = 'run in production mode', type = bool) options.parse_command_line() # Set server name pname = settings.process_name if settings.process_name else None if pname: setproctitle(pname) # Register IRC server server = IRCServer(settings = ircdsettings) for address, port in ircdsettings['listen']: server.listen(port, address = address) # Start profiling if settings.profiling: import yappi yappi.start() # Setup autoreload autoreload.start() # Run application IOLoop.instance().start()
def main(): parser = setup_parser() argcomplete.autocomplete(parser) options = parser.parse_args() _setup_logger(options) # Support the deprecated -c option if getattr(options, 'config', None) is not None: options.configs.append(options.config) if options.subparser in ('report', 'logs', 'metrics', 'run'): _default_region(options) _default_account_id(options) try: command = options.command if not callable(command): command = getattr( importlib.import_module(command.rsplit('.', 1)[0]), command.rsplit('.', 1)[-1]) # Set the process name to something cleaner process_name = [os.path.basename(sys.argv[0])] process_name.extend(sys.argv[1:]) setproctitle(' '.join(process_name)) command(options) except Exception: if not options.debug: raise traceback.print_exc() pdb.post_mortem(sys.exc_info()[-1])
def main(): from solarsan import logging logger = logging.getLogger(__name__) from solarsan.cluster.models import Peer from solarsan.conf import rpyc_conn_config from rpyc.utils.server import ThreadedServer #from rpyc.utils.server import ThreadedZmqServer, OneShotZmqServer from setproctitle import setproctitle from .service import CLIService import rpyc title = 'SolarSan CLI' setproctitle('[%s]' % title) local = Peer.get_local() cluster_iface_bcast = local.cluster_nic.broadcast # Allow all public attrs, because exposed_ is stupid and should be a # f*****g decorator. #t = ThreadedZmqServer(CLIService, port=18863, #t = OneShotZmqServer(CLIService, port=18863, t = ThreadedServer(CLIService, port=18863, registrar=rpyc.utils.registry.UDPRegistryClient(ip=cluster_iface_bcast, #logger=None, logger=logger, ), auto_register=True, logger=logger, #logger=None, protocol_config=rpyc_conn_config) t.start()
def supervise(pid, job_id, timeout=1, log_file=None): """ Supervise a job process, entering a loop that ends only when the job terminates. :param int pid: the process id :param int job_id: the job id :param float timeout: timeout value in seconds :param str log_file: Optional log file location. If specified, log messages will be appended to this file. If not specified, log messages will be printed to the console. """ the_job = OqJob.objects.get(id=job_id) calc_id = the_job.calculation.id if the_job.hazard_calculation is not None: calc_domain = 'hazard' else: calc_domain = 'risk' # Set the name of this process (as reported by /bin/ps) setproctitle('openquake supervisor for %s calc_id=%s job_pid=%s' % (calc_domain, calc_id, pid)) ignore_sigint() start_logging(calc_id, calc_domain, log_file) supervisor = SupervisorLogMessageConsumer(job_id, pid, timeout) supervisor.run()
def __init__(self, name=None, description=None, epilog=None, debug_flag=True): self.db = ConfigDB() self.name = os.path.basename(sys.argv[0]) reload(sys) sys.setdefaultencoding('utf-8') setproctitle('%s %s' % (self.name, ' '.join(sys.argv[1:]))) signal.signal(signal.SIGINT, self.SIGINT) if name is None: name = self.name self.logger = SoundforestLogger() self.log = self.logger.default_stream self.parser = argparse.ArgumentParser( prog=name, description=description, epilog=epilog, add_help=True, conflict_handler='resolve', ) self.subcommand_parser = None self.subcommands = None if debug_flag: self.parser.add_argument('--debug', action='store_true', help='Show debug messages')
def main(self): parser = argparse.ArgumentParser() parser.add_argument('-c', metavar='CONFIG', default=DEFAULT_CONFIGFILE, help='Middleware config file') parser.add_argument('-p', type=int, metavar='PORT', default=5500, help="WebSockets server port") args = parser.parse_args() configure_logging('/var/log/containerd.log', 'DEBUG') setproctitle.setproctitle('containerd') gevent.signal(signal.SIGTERM, self.die) gevent.signal(signal.SIGQUIT, self.die) self.config = args.c self.init_datastore() self.init_dispatcher() self.init_mgmt() self.init_nat() self.init_ec2() self.logger.info('Started') # WebSockets server kwargs = {} s4 = WebSocketServer(('', args.p), ServerResource({ '/console': ConsoleConnection, }, context=self), **kwargs) s6 = WebSocketServer(('::', args.p), ServerResource({ '/console': ConsoleConnection, }, context=self), **kwargs) serv_threads = [gevent.spawn(s4.serve_forever), gevent.spawn(s6.serve_forever)] gevent.joinall(serv_threads)
def main(): setproctitle.setproctitle("swankvm") parser = argparse.ArgumentParser(description='runkvm arguments.') parser.add_argument('--testname', '-t', action='store', help='The name of the test to run.') parser.add_argument('--hostname', '-H', action='store', default='', help='The name of the host to run.') parser.add_argument('--compile', action="store_true", help='compile the source on host <hostname>.') parser.add_argument('--install', action="store_true", help='run make install module_install .') parser.add_argument('--x509', action="store_true", help='tell the guest to setup the X509 certs in NSS.') parser.add_argument('--final', action="store_true", help='run final.sh on the host.') parser.add_argument('--reboot', action="store_true", help='first reboot the host') # unused parser.add_argument('--timer', default=120, help='timeout for each command for expect.') args = parser.parse_args() if args.final: prompt = "\[root@%s %s\]# "%(args.hostname, args.testname) child = connect_to_kvm(args, prompt) else : child = connect_to_kvm(args) if not child: sys.exit("Failed to launch/connect to %s - aborted"%args.hostname) if args.compile: compile_on(args,child) if args.install: make_install(args,child) if (args.testname and not args.final): run_test(args,child) if args.final: run_final(args,child)
def run(self): container = create_container(self.config) install_plugins(container, self.config.get('plugins', {})) install_interfaces(container, self.config.get('interfaces', {})) for cls_name in self.args.get('--interface', ()): cls = import_object(cls_name) container.install(cls) if self.args.get('--debug'): from gevent.backdoor import BackdoorServer backdoor = BackdoorServer(('127.0.0.1', 5005), locals={'container': container}) gevent.spawn(backdoor.serve_forever) def handle_signal(): logger.info('caught SIGINT/SIGTERM, pid=%s', os.getpid()) container.stop() container.join() sys.exit(0) gevent.signal(signal.SIGINT, handle_signal) gevent.signal(signal.SIGTERM, handle_signal) setproctitle('lymph-instance (identity: %s, endpoint: %s, config: %s)' % ( container.identity, container.endpoint, self.config.source, )) container.start(register=not self.args.get('--isolated', False)) if self.args.get('--reload'): set_source_change_callback(container.stop) container.join()
def init(self): global use_setproctitle if use_setproctitle: setproctitle("mongodb_log %s" % self.topic) self.mongoconn = Connection(self.mongodb_host, self.mongodb_port) self.mongodb = self.mongoconn[self.mongodb_name] self.mongodb.set_profiling_level = SLOW_ONLY self.collection = self.mongodb[self.collname] self.collection.count() self.queue.cancel_join_thread() rospy.init_node(WORKER_NODE_NAME % (self.nodename_prefix, self.id, self.collname), anonymous=False) self.subscriber = None while not self.subscriber: try: msg_class, real_topic, msg_eval = rostopic.get_topic_class(self.topic, blocking=True) self.subscriber = rospy.Subscriber(real_topic, msg_class, self.enqueue, self.topic) except rostopic.ROSTopicIOException: print("FAILED to subscribe, will keep trying %s" % self.name) time.sleep(randint(1,10)) except rospy.ROSInitException: print("FAILED to initialize, will keep trying %s" % self.name) time.sleep(randint(1,10)) self.subscriber = None
def __init__(self, stream, gate): self.stream = stream self.gate = gate aj.master = False os.setpgrp() setproctitle.setproctitle( '%s worker [%s]' % ( sys.argv[0], self.gate.name ) ) set_log_params(tag=self.gate.log_tag) init_log_forwarding(self.send_log_event) logging.info( 'New worker "%s" PID %s, EUID %s, EGID %s', self.gate.name, os.getpid(), os.geteuid(), os.getegid(), ) self.context = Context(parent=aj.context) self.context.session = self.gate.session self.context.worker = self self.handler = HttpMiddlewareAggregator([ AuthenticationMiddleware.get(self.context), CentralDispatcher.get(self.context), ]) self._master_config_reloaded = Event()
def __init__(self,name=None,description=None,epilog=None,debug_flag=True,subcommands=False): self.name = os.path.basename(sys.argv[0]) setproctitle('%s %s' % (self.name,' '.join(sys.argv[1:]))) signal.signal(signal.SIGINT, self.SIGINT) reload(sys) sys.setdefaultencoding('utf-8') if name is None: name = self.name # Set to True to avoid any messages from self.message to be printed self.silent = False self.logger = Logger(self.name) self.log = self.logger.default_stream self.parser = argparse.ArgumentParser( prog=name, description=description, epilog=epilog, add_help=True, conflict_handler='resolve', ) if debug_flag: self.parser.add_argument('--debug',action='store_true',help='Show debug messages') if subcommands: self.commands = {} self.command_parsers = self.parser.add_subparsers( dest='command', help='Please select one command mode below', title='Command modes' )
def _worker(in_queue, out_queue, worker_id): try: import setproctitle setproctitle.setproctitle("imageWorker") except ImportError: pass done = False while not done: if not in_queue.empty(): obj = in_queue.get() # if a bool is passed down the queue, set the done flag if isinstance(obj, bool): print "got a bool down the pipe; shutting down" done = True import sys #sys.exit() else: url, batch_id = obj w, h, buffers = _downloadImage(url, worker_id) if w != None: #print "putting loaded buffers [%d] on out queue" % len(buffers) out_queue.put((url, batch_id, w, h, buffers)) else: #print "putting blank buffer on out queue." out_queue.put((url, batch_id, 0, 0, [])) pygame.time.wait(SLEEP_TIME)
def run(self, *args, **kwargs): """ The Node main method, running in a child process (similar to Process.run() but also accepts args) A children class can override this method, but it needs to call super().run(*args, **kwargs) for the node to start properly and call update() as expected. :param args: arguments to pass to update() :param kwargs: keyword arguments to pass to update() :return: last exitcode returned by update() """ # TODO : make use of the arguments ? since run is now the target for Process... exitstatus = None # keeping the semantic of multiprocessing.Process : running process has None if setproctitle and self.new_title: setproctitle.setproctitle("{0}".format(self.name)) print('[{proc}] Proc started as [{pid}]'.format(proc=self.name, pid=self.ident)) with self.context_manager(*args, **kwargs) as cm: if cm: cmargs = maybe_tuple(cm) # prepending context manager, to be able to access it from target args = cmargs + args exitstatus = self.eventloop(*args, **kwargs) logging.debug("[{self.name}] Proc exited.".format(**locals())) return exitstatus # returning last exit status from the update function
def run(self, host=None, port=None, debug=None, workers=None): """ 启动 :param host: 监听IP :param port: 监听端口 :param debug: 是否debug :param workers: workers数量 :return: """ self._validate_cmds() if host is None: host = constants.SERVER_HOST if port is None: port = constants.SERVER_PORT if debug is not None: self.debug = debug workers = workers if workers is not None else 1 logger.info('Running server on %s, debug: %s, workers: %s', (host, port), self.debug, workers) self._prepare_server((host, port)) setproctitle.setproctitle(self._make_proc_name('master')) # 只能在主线程里面设置signals self._handle_parent_proc_signals() self._spawn_workers(workers, self._worker_run)
def daemonize_server(port_or_path, fix_title=False): process_id = os.fork() if process_id < 0: raise Error('Unable to fork') elif process_id != 0: return # noinspection PyNoneFunctionAssignment,PyArgumentList process_id = os.setsid() if process_id == -1: sys.exit(1) for fd in range(3, resource.getrlimit(resource.RLIMIT_NOFILE)[0]): try: os.close(fd) except OSError: pass devnull = os.devnull if hasattr(os, 'devnull') else '/dev/null' devnull_fd = os.open(devnull, os.O_RDWR) for fd in range(3): # noinspection PyTypeChecker os.dup2(devnull_fd, fd) os.umask(0o27) os.chdir('/') if fix_title and setproctitle is not None: # noinspection PyCallingNonCallable setproctitle('papa daemon from %s' % os.path.basename(sys.argv[0])) socket_server(port_or_path)
def run(self): setproctitle('satori: {0}'.format(self.name)) logging.info('%s starting', self.name) signal(SIGTERM, self.handle_signal) signal(SIGINT, self.handle_signal) # let ssl register OpenSSL callbacks, so that they do not interfere with callbacks from OpenSSL.crypto import ssl # let pyOpenSSL register OpenSSL callbacks import OpenSSL.SSL import OpenSSL.crypto # tell libpq not to register OpenSSL callbacks - hopefully no DB connection has been created yet libpq = ctypes.cdll.LoadLibrary('libpq.so') libpq.PQinitSSL(0) try: self.do_run() except SystemExit: logging.info('%s exited (SystemExit)', self.name) except: logging.exception('%s exited with error', self.name) else: logging.info('%s exited', self.name)
def start_worker_for_queue(flow='simple_queue_processor', queue='zmon:queue:default', **execution_context): """ Starting execution point to the workflows """ known_flows = {'simple_queue_processor': flow_simple_queue_processor} if flow not in known_flows: logger.exception('Bad role: %s' % flow) sys.exit(1) logger.info('Starting worker with pid=%s, flow type: %s, queue: %s, execution_context: %s', os.getpid(), flow, queue, execution_context) setproctitle.setproctitle('zmon-worker {} {}'.format(flow, queue)) # start Flow Reactor here FlowControlReactor.get_instance().start() exit_code = 0 try: known_flows[flow](queue=queue, **execution_context) except (KeyboardInterrupt, SystemExit): logger.warning('Caught user signal to stop consumer: finishing!') except Exception: logger.exception('Exception in start_worker(). Details: ') exit_code = 2 finally: FlowControlReactor.get_instance().stop() sys.exit(exit_code)
def run(self): """Runs the worker and consumes messages from RabbitMQ. Returns only after `shutdown()` is called. """ # Lazy import setproctitle. # There is bug with the latest version of Python with # uWSGI and setproctitle combination. # Watch: https://github.com/unbit/uwsgi/issues/1030 from setproctitle import setproctitle setproctitle("kuyruk: worker on %s" % self.queue) self._setup_logging() signal.signal(signal.SIGINT, self._handle_sigint) signal.signal(signal.SIGTERM, self._handle_sigterm) signal.signal(signal.SIGHUP, self._handle_sighup) signal.signal(signal.SIGUSR1, self._handle_sigusr1) signal.signal(signal.SIGUSR2, self._handle_sigusr2) self._started = os.times()[4] for f in (self._watch_load, self._shutdown_timer): t = threading.Thread(target=f) t.daemon = True t.start() signals.worker_start.send(self.kuyruk, worker=self) self._consume_messages() signals.worker_shutdown.send(self.kuyruk, worker=self) logger.debug("End run worker")
def run_rule_async(rule_name, settings): setproctitle("inferno - %s" % rule_name) signal.signal(signal.SIGHUP, signal.SIG_IGN) signal.signal(signal.SIGINT, signal.SIG_IGN) signal.signal(signal.SIGTERM, signal.SIG_IGN) rules = get_rules_by_name( rule_name, settings['rules_directory'], immediate=False) if rules and len(rules) > 0: rule = rules[0] else: log.error('No rule exists with rule_name: %s' % rule_name) raise Exception('No rule exists with rule_name: %s' % rule_name) pid_dir = pid.pid_dir(settings) log.info("Running %s" % rule.name) try: pid.create_pid(pid_dir, rule, str(os.getpid())) execute_rule(rule, settings) except Exception as e: log.exception('%s: %s', rule_name, e) if not rule.retry: pid.create_last_run(pid_dir, rule) else: pid.create_last_run(pid_dir, rule) finally: pid.remove_pid(pid_dir, rule) os._exit(0)
def run(self): self._name = "BuildActor-{0:d} job {1}".format(self.pid, self.job_id) setproctitle.setproctitle('mob2_build') logging.config.dictConfig(self._log_conf) self._log = logging.getLogger(__name__) #change the status to aware the job that this job is currently building job = self.get_job() job.status.state = Status.BUILDING job.save() self.make_job_environement(job) os.chdir(job.dir) #import data needed for the job #build the cmdline??? seulement pour ClJob ??? #ou action generique de job et joue sur le polymorphism? #perform data conversion #how to decide which data must be convert? # the acces log must record # the submited jobs to mobyle # or # the submitted job to execution? # #acc_log = logging.getLogger( 'access') #acc_log.info( "test access log {0}".format(self._name)) #the monitor is now aware of the new status job.status.state = Status.TO_BE_SUBMITTED job.save() self._log.info( "{0} put job {1} with status {2} in table".format(self._name, job.id, job.status))
def _set_process_title(): try: import setproctitle except ImportError: pass else: setproctitle.setproctitle("kupfer")
def ensure_running( config ): """ Verify that there is an automount daemon servicing a mountpoint. If there isn't, start one. If we're configured to run in the foreground, this method never returns. """ mountpoint_dir = config['mountpoint_dir'] # is the daemon running? procs = watchdog.find_by_attrs( "syndicate-automount-daemon", {"mounts": mountpoint_dir} ) if len(procs) > 0: # it's running print "Syndicate automount daemon already running for %s (PID(s): %s)" % (mountpoint_dir, ",".join( [str(watchdog.get_proc_pid(p)) for p in procs] )) return True if config.get("foreground", None): main( config ) else: logfile_path = None pidfile_path = config.get("pidfile", None) if config.has_key("logdir"): logfile_path = os.path.join( config['logdir'], "syndicated.log" ) title = watchdog.attr_proc_title( "syndicate-automount-daemon", {"mounts" : mountpoint_dir} ) setproctitle.setproctitle( title ) daemon.daemonize( lambda: main(config), logfile_path=logfile_path, pidfile_path=pidfile_path ) return True
def run(self, debug=None): """ :param debug: :return: """ self._validate_cmds() if debug is not None: self.debug = debug if os.getenv(constants.WORKER_ENV_KEY) != 'true': # 主进程 logger.info('Connect to server , debug: %s, workers: %s', self.debug, self.spawn_count) # 设置进程名 setproctitle.setproctitle(self._make_proc_name('worker:master')) # 只能在主线程里面设置signals self._handle_parent_proc_signals() self._spawn_workers(self.spawn_count) else: # 子进程 setproctitle.setproctitle(self._make_proc_name('worker:worker')) self._worker_run()
def run(self): setproctitle("Event Handler") self.do_recycle_proc = Recycle(terminator=self.terminator, recycle_period=self.recycle_period) self.do_recycle_proc.start() self.start_listen()
def __init__(self, name=None, description=None, epilog=None, debug_flag=True): self.name = os.path.basename(sys.argv[0]) setproctitle('%s %s' % (self.name, ' '.join(sys.argv[1:]))) signal.signal(signal.SIGINT, self.SIGINT) reload(sys) sys.setdefaultencoding('utf-8') if name is None: name = self.name # Set to True to avoid any messages from self.message to be printed self.silent = False self.logger = Logger(self.name) self.log = self.logger.default_stream self.subcommand_parser = None self.parser = argparse.ArgumentParser( prog=name, description=description, formatter_class=argparse.RawTextHelpFormatter, epilog=epilog, add_help=True, conflict_handler='resolve', ) if debug_flag: self.parser.add_argument('--debug', action='store_true', help='Show debug messages') self.parser.add_argument('--insecure', action='store_false', help='No HTTPS certificate validation') self.parser.add_argument('-B', '--browser', choices=('chrome','chromium','firefox'), help='Browser for cookie stealing' )
def __init__(self, config, runner, pilot_id, rpc=None, debug=False, run_timeout=180, backoff_delay=1): self.config = config self.runner = runner self.pilot_id = pilot_id self.hostname = gethostname() self.rpc = rpc self.debug = debug self.run_timeout = run_timeout self.backoff_delay = backoff_delay self.resource_interval = 1.0 # seconds between resouce measurements self.running = True self.tasks = {} try: setproctitle('iceprod2_pilot({})'.format(pilot_id)) except Exception: pass logger.warning('pilot_id: %s', self.pilot_id) logger.warning('hostname: %s', self.hostname) # hint at resources for pilot # don't pass them as raw, because that overrides condor if 'resources' in config['options']: for k in config['options']['resources']: v = config['options']['resources'][k] name = 'NUM_'+k.upper() if k in ('cpu','gpu'): name += 'S' os.environ[name] = str(v) self.resources = Resources(debug=self.debug) self.start_time = time.time()
def loop(args): # create config and model collection objects, and retrieve the run config configs = {} models = {} configs.update({'run': RunConfig(args.config_file)}) # set GPU-related environmental options and config settings os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu) if args.gpu is not None else '' setproctitle('RGN ' + configs['run'].names['run'] + ' on ' + os.getenv('CUDA_VISIBLE_DEVICES', 'CPU')) # derived files and directories base_dir = args.base_directory run_dir = os.path.join(base_dir, RUNS_DIRNAME, configs['run'].names['run'], configs['run'].names['dataset']) data_dir = os.path.join(base_dir, DATAS_DIRNAME, configs['run'].names['dataset']) checkpoints_dir = os.path.join(run_dir, CHECKPOINTS_DIRNAME, '') logs_dir = os.path.join(run_dir, LOGS_DIRNAME, '') stdout_err_file = os.path.join(base_dir, LOGS_DIRNAME, configs['run'].names['run'] + '.log') alphabet_file = os.path.join(data_dir, ALPHABETS_DIRNAME, configs['run'].names['alphabet'] + '.csv') if configs['run'].names['alphabet'] is not None else None # this is all for evaluation models (including training, so training_batch_size is for evaluation) full_training_glob = os.path.join(data_dir, FULL_TRAINING_DIRNAME, configs['run'].io['full_training_glob']) sample_training_glob = os.path.join(data_dir, FULL_TRAINING_DIRNAME, configs['run'].io['sample_training_glob']) training_batch_size = configs['run'].evaluation['num_training_samples'] training_invocations = configs['run'].evaluation['num_training_invocations'] validation_glob = os.path.join(data_dir, SAMPLE_VALIDATION_DIRNAME, configs['run'].io['sample_validation_glob']) validation_batch_size = configs['run'].evaluation['num_validation_samples'] validation_invocations = configs['run'].evaluation['num_validation_invocations'] testing_glob = os.path.join(data_dir, FULL_TESTING_DIRNAME, configs['run'].io['full_testing_glob']) testing_batch_size = configs['run'].evaluation['num_testing_samples'] testing_invocations = configs['run'].evaluation['num_testing_invocations'] if not args.prediction_only: eval_num_epochs = None else: eval_num_epochs = 1 training_batch_size = validation_batch_size = testing_batch_size = 1 training_invocations = validation_invocations = testing_invocations = 1 # redirect stdout/err to file sys.stderr.flush() if not os.path.exists(os.path.dirname(stdout_err_file)): os.makedirs(os.path.dirname(stdout_err_file)) stdout_err_file_handle = open(stdout_err_file, 'w') os.dup2(stdout_err_file_handle.fileno(), sys.stderr.fileno()) sys.stdout = stdout_err_file_handle # select device placement taking into consideration the interaction between training and evaluation models if configs['run'].computing['training_device'] == 'GPU' and configs['run'].computing['evaluation_device'] == 'GPU': fod_training = {'/cpu:0': ['point_to_coordinate']} fod_evaluation = {'/cpu:0': ['point_to_coordinate']} dd_training = '' dd_evaluation = '' elif configs['run'].computing['training_device'] == 'GPU' and configs['run'].computing['evaluation_device'] == 'CPU': fod_training = {'/cpu:0': ['point_to_coordinate', 'loss_history']} fod_evaluation = {} dd_training = '' dd_evaluation = '/cpu:0' else: fod_training = {} fod_evaluation = {} dd_training = '/cpu:0' dd_evaluation = '/cpu:0' # create models configuration templates configs.update({'training': RGNConfig(args.config_file, {'name': 'training', 'dataFilesGlob': full_training_glob, 'checkpointsDirectory': checkpoints_dir, 'logsDirectory': logs_dir, 'fileQueueCapacity': configs['run'].queueing['training_file_queue_capacity'], 'batchQueueCapacity': configs['run'].queueing['training_batch_queue_capacity'], 'minAfterDequeue': configs['run'].queueing['training_min_after_dequeue'], 'shuffle': configs['run'].queueing['training_shuffle'], 'tertiaryNormalization': configs['run'].loss['training_tertiary_normalization'], 'batchDependentNormalization': configs['run'].loss['training_batch_dependent_normalization'], 'alphabetFile': alphabet_file, 'functionsOnDevices': fod_training, 'defaultDevice': dd_training, 'fillGPU': args.fill_gpu})}) configs.update({'evaluation': RGNConfig(args.config_file, {'fileQueueCapacity': configs['run'].queueing['evaluation_file_queue_capacity'], 'batchQueueCapacity': configs['run'].queueing['evaluation_batch_queue_capacity'], 'minAfterDequeue': configs['run'].queueing['evaluation_min_after_dequeue'], 'shuffle': configs['run'].queueing['evaluation_shuffle'], 'tertiaryNormalization': configs['run'].loss['evaluation_tertiary_normalization'], 'batchDependentNormalization': configs['run'].loss['evaluation_batch_dependent_normalization'], 'alphabetFile': alphabet_file, 'functionsOnDevices': fod_evaluation, 'defaultDevice': dd_evaluation, 'numEpochs': eval_num_epochs, 'bucketBoundaries': None})}) # Override included evaluation models with list from command-line if specified (assumes none are included and then includes ones that are specified) if args.evaluation_model: for prefix in ['', 'un']: for group in ['training', 'validation', 'testing']: configs['run'].evaluation.update({'include_' + prefix + 'weighted_' + group: False}) for entry in args.evaluation_model: configs['run'].evaluation.update({'include_' + entry: True}) # Override other command-lind arguments if args.gpu_fraction: configs['training'].computing.update({'gpu_fraction': args.gpu_fraction}) if args.milestone: configs['run'].optimization.update({'validation_milestone': dict(args.milestone)}) # Ensure that correct validation reference is chosen if not predicting, and turn off evaluation loss if predicting if not args.prediction_only: if ((not configs['run'].evaluation['include_weighted_validation']) and configs['run'].optimization['validation_reference'] == 'weighted') or \ ((not configs['run'].evaluation['include_unweighted_validation']) and configs['run'].optimization['validation_reference'] == 'unweighted'): raise RuntimeError('Chosen validation reference is not included in run.') else: configs['evaluation'].loss['include'] = False # rescaling needed to adjust for how frequently loss_history is updated if configs['training'].curriculum['behavior'] == 'loss_change': configs['training'].curriculum[ 'change_num_iterations'] //= configs['run'].io['evaluation_frequency'] # result must be >=1 configs['evaluation'].curriculum['change_num_iterations'] //= configs['run'].io['evaluation_frequency'] # ditto # create training model models = {} models.update({'training': RGNModel('training', configs['training'])}) print('*** training configuration ***') pprint(configs['training'].__dict__) # create weighted training evaluation model (conditional) if configs['run'].evaluation['include_weighted_training']: configs.update({'eval_wt_train': deepcopy(configs['evaluation'])}) configs['eval_wt_train'].io['name'] = 'evaluation_wt_training' configs['eval_wt_train'].io['data_files_glob'] = sample_training_glob configs['eval_wt_train'].optimization['batch_size'] = training_batch_size configs['eval_wt_train'].queueing['num_evaluation_invocations'] = training_invocations models.update({'eval_wt_train': RGNModel('evaluation', configs['eval_wt_train'])}) print('\n\n\n*** weighted training evaluation configuration ***') pprint(configs['eval_wt_train'].__dict__) # create weighted validation evaluation model (conditional) if configs['run'].evaluation['include_weighted_validation']: configs.update({'eval_wt_val': deepcopy(configs['evaluation'])}) configs['eval_wt_val'].io['name'] = 'evaluation_wt_validation' configs['eval_wt_val'].io['data_files_glob'] = validation_glob configs['eval_wt_val'].optimization['batch_size'] = validation_batch_size configs['eval_wt_val'].queueing['num_evaluation_invocations'] = validation_invocations if configs['run'].optimization['validation_reference'] == 'weighted': configs['eval_wt_val'].curriculum['update_loss_history'] = True models.update({'eval_wt_val': RGNModel('evaluation', configs['eval_wt_val'])}) print('\n\n\n*** weighted validation evaluation configuration ***') pprint(configs['eval_wt_val'].__dict__) # create weighted testing evaluation model (conditional) if configs['run'].evaluation['include_weighted_testing']: configs.update({'eval_wt_test': deepcopy(configs['evaluation'])}) configs['eval_wt_test'].io['name'] = 'evaluation_wt_testing' configs['eval_wt_test'].io['data_files_glob'] = testing_glob configs['eval_wt_test'].optimization['batch_size'] = testing_batch_size configs['eval_wt_test'].queueing['num_evaluation_invocations'] = testing_invocations models.update({'eval_wt_test': RGNModel('evaluation', configs['eval_wt_test'])}) print('\n\n\n*** weighted testing evaluation configuration ***') pprint(configs['eval_wt_test'].__dict__) # create equivalents for unweighted loss if there's a curriculum. if configs['training'].curriculum['mode'] is not None: # create unweighted training evaluation model (conditional) if configs['run'].evaluation['include_unweighted_training']: configs.update({'eval_unwt_train': deepcopy(configs['evaluation'])}) configs['eval_unwt_train'].io['name'] = 'evaluation_unwt_training' configs['eval_unwt_train'].io['data_files_glob'] = sample_training_glob configs['eval_unwt_train'].optimization['batch_size'] = training_batch_size configs['eval_unwt_train'].queueing['num_evaluation_invocations'] = training_invocations configs['eval_unwt_train'].curriculum['mode'] = None configs['eval_unwt_train'].curriculum['behavior'] = None models.update({'eval_unwt_train': RGNModel('evaluation', configs['eval_unwt_train'])}) # create unweighted validation evaluation model (conditional) if configs['run'].evaluation['include_unweighted_validation']: configs.update({'eval_unwt_val': deepcopy(configs['evaluation'])}) configs['eval_unwt_val'].io['name'] = 'evaluation_unwt_validation' configs['eval_unwt_val'].io['data_files_glob'] = validation_glob configs['eval_unwt_val'].optimization['batch_size'] = validation_batch_size configs['eval_unwt_val'].queueing['num_evaluation_invocations'] = validation_invocations configs['eval_unwt_val'].curriculum['mode'] = None configs['eval_unwt_val'].curriculum['behavior'] = None if configs['run'].optimization['validation_reference'] == 'unweighted': configs['eval_unwt_val'].curriculum['update_loss_history'] = True models.update({'eval_unwt_val': RGNModel('evaluation', configs['eval_unwt_val'])}) # create unweighted testing evaluation model (conditional) if configs['run'].evaluation['include_unweighted_testing']: configs.update({'eval_unwt_test': deepcopy(configs['evaluation'])}) configs['eval_unwt_test'].io['name'] = 'evaluation_unwt_testing' configs['eval_unwt_test'].io['data_files_glob'] = testing_glob configs['eval_unwt_test'].optimization['batch_size'] = testing_batch_size configs['eval_unwt_test'].queueing['num_evaluation_invocations'] = testing_invocations configs['eval_unwt_test'].curriculum['mode'] = None configs['eval_unwt_test'].curriculum['behavior'] = None models.update({'eval_unwt_test': RGNModel('evaluation', configs['eval_unwt_test'])}) # start head model and related prep stdout_err_file_handle.flush() session = models['training'].start(models.values()) global_step = models['training'].current_step(session) current_log_step = (global_step // configs['run'].io['prediction_frequency']) + 1 log_dir = os.path.join(run_dir, str(current_log_step)) restart = False # predict or train depending on set mode behavior if args.prediction_only: try: while not models['training'].is_done(): predict_and_log(log_dir, configs, models, session) except tf.errors.OutOfRangeError: pass except: print('Unexpected error: ', sys.exc_info()[0]) raise finally: if models['training']._is_started: models['training'].finish(session, save=False) stdout_err_file_handle.close() else: # clean up post last checkpoint residue if any if global_step != 0: # remove future directories last_log_step = sorted([int(os.path.basename(os.path.normpath(dir))) for dir in glob(os.path.join(run_dir, '*[0-9]'))])[-1] for step in range(current_log_step + 1, last_log_step + 1): rmtree(os.path.join(run_dir, str(step))) # remove future log entries in current log files log_file = os.path.join(log_dir, 'error.log') if os.path.exists(log_file): with open(log_file, 'rw+') as f: while True: new_line = f.readline().split() if len(new_line) > 1: step = int(new_line[1]) if step == global_step: f.truncate() break else: # reached end without seeing global_step, means checkpoint is ahead of last recorded log entry break # training loop try: while not models['training'].is_done(): # Train for one step global_step, ids = models['training'].train(session) # Set and create logging directory and files if needed log_dir = os.path.join(run_dir, str((global_step // configs['run'].io['prediction_frequency']) + 1)) log_file = os.path.join(log_dir, 'error.log') if not os.path.exists(log_dir): os.makedirs(log_dir) # Evaluate error, get diagnostics, and raise exceptions if necessary if global_step % configs['run'].io['evaluation_frequency'] == 0: diagnostics = evaluate_and_log(log_file, configs, models, session) # restart if a milestone is missed val_ref_set_prefix = 'un' if configs['run'].optimization['validation_reference'] == 'unweighted' else '' min_loss_achieved = diagnostics[val_ref_set_prefix + 'wt_val_loss']['min_tertiary_loss_achieved_all'] for step, loss in configs['run'].optimization['validation_milestone'].iteritems(): if global_step >= step and min_loss_achieved > loss: raise MilestoneError('Milestone at step ' + str(global_step) + \ ' missed because minimum loss achieved so far is ' + str(min_loss_achieved)) # restart if gradients are zero if (diagnostics['min_grad'] == 0 and diagnostics['max_grad'] == 0) or \ (configs['run'].evaluation['include_diagnostics'] and (np.isnan(diagnostics['min_grad']) or np.isnan(diagnostics['max_grad']))): raise DeadGradientError('Gradient is dead.') # Predict structures. Currently assumes that weighted training and validation models are available, and fails if they're not. if global_step % configs['run'].io['prediction_frequency'] == 0: predict_and_log(log_dir, configs, models, session) # Checkpoint if global_step % configs['run'].io['checkpoint_frequency'] == 0: models['training'].save(session) except tf.errors.OutOfRangeError: print('Epoch limit reached.') except (tf.errors.InvalidArgumentError, DeadGradientError): # InvalidArgumentError is usually triggered by a nan models['training'].finish(session, save=False) if args.restart_on_dead_gradient: print('Nan or dead gradient encountered; model will be resumed from last checkpoint if one exists, or restarted from scratch otherwise.') if not os.path.isdir(checkpoints_dir): for sub_dir in next(os.walk(run_dir))[1]: rmtree(os.path.join(run_dir, sub_dir)) # erase all old directories restart = True else: print('Nan or dead gradient encountered; model will be terminated.') except MilestoneError: models['training'].finish(session, save=False) if args.restart_on_missed_milestone: print('Milestone missed; model will be restarted from scratch with an incremented seed.') for sub_dir in next(os.walk(run_dir))[1]: rmtree(os.path.join(run_dir, sub_dir)) # erase all old directories # modify configuration file with new seed old_seed = configs['training'].initialization['graph_seed'] new_seed = old_seed + args.seed_increment for line in fileinput.input(args.config_file, inplace=True): print line.replace('randSeed ' + str(old_seed), 'randSeed ' + str(new_seed)), restart = True else: print('Milestone missed; model will be terminated.') except: print('Unexpected error: ', sys.exc_info()[0]) raise finally: # Wrap up (ask threads to stop, save final checkpoint, etc.) if models['training']._is_started: models['training'].finish(session, save=args.checkpoint_on_finish) stdout_err_file_handle.close() return restart
def set_proc_name(name): import setproctitle setproctitle.setproctitle(name)
def setproctitle(self, title=""): setproctitle('odoo: %s %s %s' % (self.__class__.__name__, self.pid, title))
import numpy as np from PIL import Image import caffe import setproctitle import os, sys import surgery, score import tools from copy import copy import time import setup setproctitle.setproctitle(os.path.basename(os.getcwd())) caffe_root = '/home/cv/hdl/caffe' models = '{}/models'.format(caffe_root) voc_dir = '{}/data/pascal/VOC/VOC2010'.format(caffe_root) snapshot = 'snapshot' part1 = 'head' part2 = 'torso' joint_parts = 'head+torso' parts = [part1, part2, joint_parts] weights = 'vgg16fc.caffemodel' classes = np.asarray([ 'background', 'head', 'torso', 'head+torso', 'left arm', 'right arm', 'arms', 'left leg', 'right leg', 'legs', 'person' ]) device = sys.argv[1] if len(sys.argv) > 2: is_resume = sys.argv[2] == '-resume' and int(sys.argv[3]) % 4000 == 0 if is_resume:
def appendproctitle(name): ''' Append "name" to the current process title ''' if HAS_SETPROCTITLE: setproctitle.setproctitle(setproctitle.getproctitle() + ' ' + name)
def main(): args = parse() args_pt = copy.deepcopy(args) args_teacher = copy.deepcopy(args) # Load a conf file if args.resume: conf = load_config(os.path.join(os.path.dirname(args.resume), 'conf.yml')) for k, v in conf.items(): if k != 'resume': setattr(args, k, v) recog_params = vars(args) # Automatically reduce batch size in multi-GPU setting if args.n_gpus > 1: args.batch_size -= 10 args.print_step //= args.n_gpus # Compute subsampling factor subsample_factor = 1 subsample_factor_sub1 = 1 subsample_factor_sub2 = 1 subsample = [int(s) for s in args.subsample.split('_')] if args.conv_poolings and 'conv' in args.enc_type: for p in args.conv_poolings.split('_'): subsample_factor *= int(p.split(',')[0].replace('(', '')) else: subsample_factor = np.prod(subsample) if args.train_set_sub1: if args.conv_poolings and 'conv' in args.enc_type: subsample_factor_sub1 = subsample_factor * np.prod(subsample[:args.enc_n_layers_sub1 - 1]) else: subsample_factor_sub1 = subsample_factor if args.train_set_sub2: if args.conv_poolings and 'conv' in args.enc_type: subsample_factor_sub2 = subsample_factor * np.prod(subsample[:args.enc_n_layers_sub2 - 1]) else: subsample_factor_sub2 = subsample_factor skip_thought = 'skip' in args.enc_type # Load dataset train_set = Dataset(corpus=args.corpus, tsv_path=args.train_set, tsv_path_sub1=args.train_set_sub1, tsv_path_sub2=args.train_set_sub2, dict_path=args.dict, dict_path_sub1=args.dict_sub1, dict_path_sub2=args.dict_sub2, nlsyms=args.nlsyms, unit=args.unit, unit_sub1=args.unit_sub1, unit_sub2=args.unit_sub2, wp_model=args.wp_model, wp_model_sub1=args.wp_model_sub1, wp_model_sub2=args.wp_model_sub2, batch_size=args.batch_size * args.n_gpus, n_epochs=args.n_epochs, min_n_frames=args.min_n_frames, max_n_frames=args.max_n_frames, sort_by_input_length=True, short2long=True, sort_stop_epoch=args.sort_stop_epoch, dynamic_batching=args.dynamic_batching, ctc=args.ctc_weight > 0, ctc_sub1=args.ctc_weight_sub1 > 0, ctc_sub2=args.ctc_weight_sub2 > 0, subsample_factor=subsample_factor, subsample_factor_sub1=subsample_factor_sub1, subsample_factor_sub2=subsample_factor_sub2, discourse_aware=args.discourse_aware, skip_thought=skip_thought) dev_set = Dataset(corpus=args.corpus, tsv_path=args.dev_set, tsv_path_sub1=args.dev_set_sub1, tsv_path_sub2=args.dev_set_sub2, dict_path=args.dict, dict_path_sub1=args.dict_sub1, dict_path_sub2=args.dict_sub2, nlsyms=args.nlsyms, unit=args.unit, unit_sub1=args.unit_sub1, unit_sub2=args.unit_sub2, wp_model=args.wp_model, wp_model_sub1=args.wp_model_sub1, wp_model_sub2=args.wp_model_sub2, batch_size=args.batch_size * args.n_gpus, min_n_frames=args.min_n_frames, max_n_frames=args.max_n_frames, shuffle=True if args.discourse_aware else False, ctc=args.ctc_weight > 0, ctc_sub1=args.ctc_weight_sub1 > 0, ctc_sub2=args.ctc_weight_sub2 > 0, subsample_factor=subsample_factor, subsample_factor_sub1=subsample_factor_sub1, subsample_factor_sub2=subsample_factor_sub2, discourse_aware=args.discourse_aware, skip_thought=skip_thought) eval_sets = [] for s in args.eval_sets: eval_sets += [Dataset(corpus=args.corpus, tsv_path=s, dict_path=args.dict, nlsyms=args.nlsyms, unit=args.unit, wp_model=args.wp_model, batch_size=1, discourse_aware=args.discourse_aware, skip_thought=skip_thought, is_test=True)] args.vocab = train_set.vocab args.vocab_sub1 = train_set.vocab_sub1 args.vocab_sub2 = train_set.vocab_sub2 args.input_dim = train_set.input_dim # Load a LM conf file for LM fusion & LM initialization if not args.resume and (args.lm_fusion or args.lm_init): if args.lm_fusion: lm_conf = load_config(os.path.join(os.path.dirname(args.lm_fusion), 'conf.yml')) elif args.lm_init: lm_conf = load_config(os.path.join(os.path.dirname(args.lm_init), 'conf.yml')) args.lm_conf = argparse.Namespace() for k, v in lm_conf.items(): setattr(args.lm_conf, k, v) assert args.unit == args.lm_conf.unit assert args.vocab == args.lm_conf.vocab # Set save path if args.resume: save_path = os.path.dirname(args.resume) dir_name = os.path.basename(save_path) else: dir_name = set_asr_model_name(args, subsample_factor) save_path = mkdir_join(args.model_save_dir, '_'.join( os.path.basename(args.train_set).split('.')[:-1]), dir_name) save_path = set_save_path(save_path) # avoid overwriting # Set logger logger = set_logger(os.path.join(save_path, 'train.log'), key='training') # Model setting model = SkipThought(args, save_path) if skip_thought else Speech2Text(args, save_path) if args.resume: # Set optimizer epoch = int(args.resume.split('-')[-1]) optimizer = set_optimizer(model, optimizer='sgd' if epoch > conf['convert_to_sgd_epoch'] else conf['optimizer'], lr=float(conf['learning_rate']), # on-the-fly weight_decay=float(conf['weight_decay'])) # Restore the last saved model model, checkpoint = load_checkpoint(model, args.resume, resume=True) optimizer = checkpoint['optimizer'] epoch = checkpoint['epoch'] step = checkpoint['step'] metric_dev_best = checkpoint['metric_dev_best'] # Resume between convert_to_sgd_epoch -1 and convert_to_sgd_epoch if epoch == conf['convert_to_sgd_epoch']: optimizer = set_optimizer(model, optimizer='sgd', lr=float(args.learning_rate), weight_decay=float(conf['weight_decay'])) optimizer = LRScheduler(optimizer, lr_max=args.learning_rate, decay_type='epoch', decay_start_epoch=0, decay_rate=0.5, lower_better=True) logger.info('========== Convert to SGD ==========') else: # Save the conf file as a yaml file save_config(vars(args), os.path.join(save_path, 'conf.yml')) if args.lm_fusion: save_config(args.lm_conf, os.path.join(save_path, 'conf_lm.yml')) # Save the nlsyms, dictionar, and wp_model if args.nlsyms: shutil.copy(args.nlsyms, os.path.join(save_path, 'nlsyms.txt')) for sub in ['', '_sub1', '_sub2']: if getattr(args, 'dict' + sub): shutil.copy(getattr(args, 'dict' + sub), os.path.join(save_path, 'dict' + sub + '.txt')) if getattr(args, 'unit' + sub) == 'wp': shutil.copy(getattr(args, 'wp_model' + sub), os.path.join(save_path, 'wp' + sub + '.model')) for k, v in sorted(vars(args).items(), key=lambda x: x[0]): logger.info('%s: %s' % (k, str(v))) # Count total parameters for n in sorted(list(model.num_params_dict.keys())): nparams = model.num_params_dict[n] logger.info("%s %d" % (n, nparams)) logger.info("Total %.2f M parameters" % (model.total_parameters / 1000000)) logger.info(model) # Initialize with pre-trained model's parameters if args.pretrained_model and os.path.isfile(args.pretrained_model): # Load the ASR model conf_pt = load_config(os.path.join(os.path.dirname(args.pretrained_model), 'conf.yml')) for k, v in conf_pt.items(): setattr(args_pt, k, v) model_pt = Speech2Text(args_pt) model_pt, _ = load_checkpoint(model_pt, args.pretrained_model) # Overwrite parameters only_enc = (args.enc_n_layers != args_pt.enc_n_layers) or ( args.unit != args_pt.unit) or args_pt.ctc_weight == 1 param_dict = dict(model_pt.named_parameters()) for n, p in model.named_parameters(): if n in param_dict.keys() and p.size() == param_dict[n].size(): if only_enc and 'enc' not in n: continue if args.lm_fusion_type == 'cache' and 'output' in n: continue p.data = param_dict[n].data logger.info('Overwrite %s' % n) epoch, step = 0, 0 metric_dev_best = 10000 # Set optimizer optimizer = set_optimizer(model, optimizer=args.optimizer, lr=float(args.learning_rate), weight_decay=float(args.weight_decay)) # Wrap optimizer by learning rate scheduler noam = 'transformer' in args.enc_type or args.dec_type == 'transformer' optimizer = LRScheduler(optimizer, lr_max=float(args.learning_rate), decay_type=args.decay_type, decay_start_epoch=args.decay_start_epoch, decay_rate=args.decay_rate, decay_patient_n_epochs=args.decay_patient_n_epochs, lower_better=True, best_value=metric_dev_best, model_size=args.d_model, warmup_start_lr=args.warmup_start_learning_rate, warmup_n_steps=args.warmup_n_steps, lr_factor=args.learning_rate_factor, noam=noam) # Load the teacher ASR model teacher = None teacher_lm = None if args.teacher and os.path.isfile(args.teacher): conf_teacher = load_config(os.path.join(os.path.dirname(args.teacher), 'conf.yml')) for k, v in conf_teacher.items(): setattr(args_teacher, k, v) # Setting for knowledge distillation args_teacher.ss_prob = 0 args.lsm_prob = 0 teacher = Speech2Text(args_teacher) teacher, _ = load_checkpoint(teacher, args.teacher) # Load the teacher LM if args.teacher_lm and os.path.isfile(args.teacher_lm): conf_lm = load_config(os.path.join(os.path.dirname(args.teacher_lm), 'conf.yml')) args_lm = argparse.Namespace() for k, v in conf_lm.items(): setattr(args_lm, k, v) teacher_lm = select_lm(args_lm) teacher_lm, _ = load_checkpoint(teacher_lm, args.teacher_lm) # GPU setting if args.n_gpus >= 1: model = CustomDataParallel(model, device_ids=list(range(0, args.n_gpus, 1)), deterministic=False, benchmark=True) model.cuda() if teacher is not None: teacher.cuda() if teacher_lm is not None: teacher_lm.cuda() logger.info('PID: %s' % os.getpid()) logger.info('USERNAME: %s' % os.uname()[1]) # Set process name if args.job_name: setproctitle(args.job_name) else: setproctitle(dir_name) # Set reporter reporter = Reporter(save_path, tensorboard=True) if args.mtl_per_batch: # NOTE: from easier to harder tasks tasks = [] if 1 - args.bwd_weight - args.ctc_weight - args.sub1_weight - args.sub2_weight > 0: tasks += ['ys'] if args.bwd_weight > 0: tasks = ['ys.bwd'] + tasks if args.ctc_weight > 0: tasks = ['ys.ctc'] + tasks if args.lmobj_weight > 0: tasks = ['ys.lmobj'] + tasks for sub in ['sub1', 'sub2']: if getattr(args, 'train_set_' + sub): if getattr(args, sub + '_weight') - getattr(args, 'ctc_weight_' + sub) > 0: tasks = ['ys_' + sub] + tasks if getattr(args, 'ctc_weight_' + sub) > 0: tasks = ['ys_' + sub + '.ctc'] + tasks else: tasks = ['all'] start_time_train = time.time() start_time_epoch = time.time() start_time_step = time.time() not_improved_n_epochs = 0 pbar_epoch = tqdm(total=len(train_set)) accum_n_tokens = 0 while True: # Compute loss in the training set batch_train, is_new_epoch = train_set.next() accum_n_tokens += sum([len(y) for y in batch_train['ys']]) # Change mini-batch depending on task for task in tasks: if skip_thought: loss, reporter = model(batch_train['ys'], ys_prev=batch_train['ys_prev'], ys_next=batch_train['ys_next'], reporter=reporter) else: loss, reporter = model(batch_train, reporter=reporter, task=task, teacher=teacher, teacher_lm=teacher_lm) # loss /= args.accum_grad_n_steps if len(model.device_ids) > 1: loss.backward(torch.ones(len(model.device_ids))) else: loss.backward() loss.detach() # Trancate the graph if args.accum_grad_n_tokens == 0 or accum_n_tokens >= args.accum_grad_n_tokens: if args.clip_grad_norm > 0: torch.nn.utils.clip_grad_norm_(model.module.parameters(), args.clip_grad_norm) optimizer.step() optimizer.zero_grad() accum_n_tokens = 0 loss_train = loss.item() del loss reporter.step() step += args.n_gpus if step % args.print_step == 0: # Compute loss in the dev set batch_dev = dev_set.next()[0] # Change mini-batch depending on task for task in tasks: if skip_thought: loss, reporter = model(batch_dev['ys'], ys_prev=batch_dev['ys_prev'], ys_next=batch_dev['ys_next'], reporter=reporter, is_eval=True) else: loss, reporter = model(batch_dev, reporter=reporter, task=task, is_eval=True) loss_dev = loss.item() del loss reporter.step(is_eval=True) duration_step = time.time() - start_time_step if args.input_type == 'speech': xlen = max(len(x) for x in batch_train['xs']) ylen = max(len(y) for y in batch_train['ys']) elif args.input_type == 'text': xlen = max(len(x) for x in batch_train['ys']) ylen = max(len(y) for y in batch_train['ys_sub1']) logger.info("step:%d(ep:%.2f) loss:%.3f(%.3f)/lr:%.5f/bs:%d/xlen:%d/ylen:%d (%.2f min)" % (step, epoch + train_set.epoch_detail, loss_train, loss_dev, optimizer.lr, len(batch_train['utt_ids']), xlen, ylen, duration_step / 60)) start_time_step = time.time() pbar_epoch.update(len(batch_train['utt_ids'])) # Save fugures of loss and accuracy if step % (args.print_step * 10) == 0: reporter.snapshot() model.module.plot_attention() # Save checkpoint and evaluate model per epoch if is_new_epoch: epoch += 1 duration_epoch = time.time() - start_time_epoch logger.info('========== EPOCH:%d (%.2f min) ==========' % (epoch, duration_epoch / 60)) if epoch < args.eval_start_epoch: # Save the model save_checkpoint(model, save_path, optimizer, epoch, step, metric_dev_best, remove_old_checkpoints=not noam) reporter._epoch += 1 # TODO(hirofumi): fix later else: start_time_eval = time.time() # dev if args.metric == 'edit_distance': if args.unit in ['word', 'word_char']: metric_dev = eval_word([model.module], dev_set, recog_params, epoch=epoch)[0] logger.info('WER (%s): %.2f %%' % (dev_set.set, metric_dev)) elif args.unit == 'wp': metric_dev, cer_dev = eval_wordpiece([model.module], dev_set, recog_params, epoch=epoch) logger.info('WER (%s): %.2f %%' % (dev_set.set, metric_dev)) logger.info('CER (%s): %.2f %%' % (dev_set.set, cer_dev)) elif 'char' in args.unit: metric_dev, cer_dev = eval_char([model.module], dev_set, recog_params, epoch=epoch) logger.info('WER (%s): %.2f %%' % (dev_set.set, metric_dev)) logger.info('CER (%s): %.2f %%' % (dev_set.set, cer_dev)) elif 'phone' in args.unit: metric_dev = eval_phone([model.module], dev_set, recog_params, epoch=epoch) logger.info('PER (%s): %.2f %%' % (dev_set.set, metric_dev)) elif args.metric == 'ppl': metric_dev = eval_ppl([model.module], dev_set, batch_size=args.batch_size)[0] logger.info('PPL (%s): %.2f' % (dev_set.set, metric_dev)) elif args.metric == 'loss': metric_dev = eval_ppl([model.module], dev_set, batch_size=args.batch_size)[1] logger.info('Loss (%s): %.2f' % (dev_set.set, metric_dev)) else: raise NotImplementedError(args.metric) reporter.epoch(metric_dev) # Update learning rate optimizer.decay(epoch=epoch, value=metric_dev) if metric_dev < metric_dev_best: metric_dev_best = metric_dev not_improved_n_epochs = 0 logger.info('||||| Best Score |||||') # Save the model save_checkpoint(model, save_path, optimizer, epoch, step, metric_dev_best, remove_old_checkpoints=not noam) # test for s in eval_sets: if args.metric == 'edit_distance': if args.unit in ['word', 'word_char']: wer_test = eval_word([model.module], s, recog_params, epoch=epoch)[0] logger.info('WER (%s): %.2f %%' % (s.set, wer_test)) elif args.unit == 'wp': wer_test, cer_test = eval_wordpiece([model.module], s, recog_params, epoch=epoch) logger.info('WER (%s): %.2f %%' % (s.set, wer_test)) logger.info('CER (%s): %.2f %%' % (s.set, cer_test)) elif 'char' in args.unit: wer_test, cer_test = eval_char([model.module], s, recog_params, epoch=epoch) logger.info('WER (%s): %.2f %%' % (s.set, wer_test)) logger.info('CER (%s): %.2f %%' % (s.set, cer_test)) elif 'phone' in args.unit: per_test = eval_phone([model.module], s, recog_params, epoch=epoch) logger.info('PER (%s): %.2f %%' % (s.set, per_test)) elif args.metric == 'ppl': ppl_test = eval_ppl([model.module], s, batch_size=args.batch_size)[0] logger.info('PPL (%s): %.2f' % (s.set, ppl_test)) elif args.metric == 'loss': loss_test = eval_ppl([model.module], s, batch_size=args.batch_size)[1] logger.info('Loss (%s): %.2f' % (s.set, loss_test)) else: raise NotImplementedError(args.metric) else: not_improved_n_epochs += 1 # start scheduled sampling if args.ss_prob > 0: model.module.scheduled_sampling_trigger() duration_eval = time.time() - start_time_eval logger.info('Evaluation time: %.2f min' % (duration_eval / 60)) # Early stopping if not_improved_n_epochs == args.not_improved_patient_n_epochs: break # Convert to fine-tuning stage if epoch == args.convert_to_sgd_epoch: optimizer = set_optimizer(model, optimizer='sgd', lr=args.learning_rate, weight_decay=float(args.weight_decay)) optimizer = LRScheduler(optimizer, lr_max=args.learning_rate, decay_type='epoch', decay_start_epoch=0, decay_rate=0.5, lower_better=True) logger.info('========== Convert to SGD ==========') pbar_epoch = tqdm(total=len(train_set)) if epoch == args.n_epochs: break start_time_step = time.time() start_time_epoch = time.time() duration_train = time.time() - start_time_train logger.info('Total time: %.2f hour' % (duration_train / 3600)) if reporter.tensorboard: reporter.tf_writer.close() pbar_epoch.close() return save_path
#!/usr/bin/env python2.7 #-*- coding: utf-8 -*- __author__ = "Shrinidhi Rao" __license__ = "GPL" __email__ = "*****@*****.**" import os import sys sys.path.append(os.sep.join(os.path.abspath(__file__).split(os.sep)[:-3])) import lib.common.system_utils import setproctitle import simplejson import cherrypy setproctitle.setproctitle("web_api_server") cherrypy._cpserver.Server.thread_pool = 30 class host_details(object): @cherrypy.expose def index(self): details = lib.common.system_utils.get_local_host_details() return(simplejson.dumps(details)) if (__name__ == '__main__'): cherrypy.tree.mount(host_details(),'/') cherrypy.engine.start() cherrypy.engine.block()
# Copyright Niantic 2019. Patent Pending. All rights reserved. # # This software is licensed under the terms of the Monodepth2 licence # which allows for non-commercial use only, the full terms of which are made # available in the LICENSE file. from __future__ import absolute_import, division, print_function from trainer import Trainer from options import MonodepthOptions import setproctitle options = MonodepthOptions() opts = options.parse() setproctitle.setproctitle(opts.model_name) if __name__ == "__main__": trainer = Trainer(opts) trainer.train()
def post_worker_init(dummy_worker): setproctitle.setproctitle( settings.GUNICORN_WORKER_READY_PREFIX + setproctitle.getproctitle() )
def __init__(self, verbose=False, log_dir=None, console_log=False, run_dir=None, config_file=None, persistence_file=None, test_dir=None): setproctitle.setproctitle('openrazer-daemon') # pylint: disable=no-member # Expanding ~ as python doesn't do it by default, also creating dirs if needed try: if log_dir is not None: log_dir = os.path.expanduser(log_dir) os.makedirs(log_dir, exist_ok=True) if run_dir is not None: run_dir = os.path.expanduser(run_dir) os.makedirs(run_dir, exist_ok=True) except NotADirectoryError as e: print("Failed to create {}".format(e.filename), file=sys.stderr) sys.exit(1) if config_file is not None: config_file = os.path.expanduser(config_file) if not os.path.exists(config_file): print("Config file {} does not exist.".format(config_file), file=sys.stderr) sys.exit(1) if persistence_file is not None: persistence_file = os.path.expanduser(persistence_file) if not os.path.exists(persistence_file): print("Persistence file {} does not exist.".format( persistence_file), file=sys.stderr) sys.exit(1) self._test_dir = test_dir self._run_dir = run_dir self._config_file = config_file self._config = configparser.ConfigParser() self.read_config(config_file) self._persistence_file = persistence_file self._persistence = configparser.ConfigParser() self._persistence.status = {"changed": False} self.read_persistence(persistence_file) # Logging log_level = logging.INFO if verbose or self._config.getboolean('General', 'verbose_logging'): log_level = logging.DEBUG self.logger = self._create_logger(log_dir, log_level, console_log) # Check for plugdev group if not self._check_plugdev_group(): self.logger.critical("User is not a member of the plugdev group") self.logger.critical( "Please run the command 'sudo gpasswd -a $USER plugdev' and then reboot!" ) sys.exit(1) # Setup DBus to use gobject main loop dbus.mainloop.glib.threads_init() dbus.mainloop.glib.DBusGMainLoop(set_as_default=True) super().__init__('/org/razer') self._init_signals() self._main_loop = GLib.MainLoop() # Listen for input events from udev self._init_udev_monitor() # Load Classes self._device_classes = openrazer_daemon.hardware.get_device_classes() self.logger.info("Initialising Daemon (v%s). Pid: %d", __version__, os.getpid()) self._init_screensaver_monitor() self._razer_devices = DeviceCollection() self._load_devices(first_run=True) # Add DBus methods methods = { # interface, method, callback, in-args, out-args ('razer.devices', 'getDevices', self.get_serial_list, None, 'as'), ('razer.devices', 'supportedDevices', self.supported_devices, None, 's'), ('razer.devices', 'enableTurnOffOnScreensaver', self.enable_turn_off_on_screensaver, 'b', None), ('razer.devices', 'getOffOnScreensaver', self.get_off_on_screensaver, None, 'b'), ('razer.devices', 'syncEffects', self.sync_effects, 'b', None), ('razer.devices', 'getSyncEffects', self.get_sync_effects, None, 'b'), ('razer.daemon', 'version', self.version, None, 's'), ('razer.daemon', 'stop', self.stop, None, None), } for m in methods: self.logger.debug("Adding {}.{} method to DBus".format(m[0], m[1])) self.add_dbus_method(m[0], m[1], m[2], in_signature=m[3], out_signature=m[4]) self._collecting_udev = False self._collecting_udev_devices = [] self._init_autosave_persistence() # TODO remove self.sync_effects( self._config.getboolean('Startup', 'sync_effects_enabled'))
def _set_process_title(self): setproctitle('lymph-instance (identity: %s, endpoint: %s, config: %s)' % ( self.container.identity, self.container.endpoint, self.config.source, ))
def setproctitle(title: str) -> None: if _setproctitle_enabled: setproctitle_module.setproctitle(title) else: logger.warn(f"setproctitle not enabled for process {title}")
def run(self): setproctitle.setproctitle('stream_server') self.serve_forever()
return def remove_custom_place(self, removeButton): treeselection = self.custom_places_tree.get_selection() currentiter = treeselection.get_selected()[1] if currentiter: self.custom_places_model.remove(currentiter) return def save_custom_places(self, treemodel, path, iter=None, new_order=None): if not iter or self.custom_places_model.get_value(iter, 1): treeiter = self.custom_places_model.get_iter_first() custom_places_names = [] custom_places_paths = [] while treeiter: custom_places_names = custom_places_names + [ self.custom_places_model.get_value(treeiter, 0) ] custom_places_paths = custom_places_paths + [ self.custom_places_model.get_value(treeiter, 1) ] treeiter = self.custom_places_model.iter_next(treeiter) self.places_settings.set_strv("custom-paths", custom_places_paths) self.places_settings.set_strv("custom-names", custom_places_names) if __name__ == "__main__": setproctitle.setproctitle('mintmenu-preferences') preferences = mintMenuPreferences() Gtk.main()
# # import ctypes import os import sys import matplotlib.pyplot as plt import numpy as np import setproctitle as setPT import lora as lora_gr # from GNURadio library from lora_id import * # from InterDigital customized library from utils import * if __name__ == '__main__': setPT.setproctitle('lora-tester-channelizer-py') print "Process name: " + str(get_proc_name()) print "***********************************" # ---------------------------------------------- # GDB ATTACH (DEBUGGING or performance monitoring) # ---------------------------------------------- GDB_ATTACH = 0 if (GDB_ATTACH): print ('Blocked waiting for GDB attach (pid = %d) ' % (os.getpid(),) + '. Press ENTER after GDB is attached.') sys.stdout.flush() raw_input() # Read from dataset (returns numpy) dataset = read_complex_array("../data/lora-99-100.sigmf-data") # before channelizer fileName_out = '../data/py_lora_output_resampler'
import logging import platform import setproctitle import flaskr import sys import nw_logging #Todo. app.py에서는 로깅 시스템과 운영체제 기본적인 설정을 여기서 함 def validate_python() -> None: """Validate that the right Python version is running.""" if sys.version_info[:3] < REQUIRED_PYTHON_VER: print("ninewatt Device requires at least Python {}.{}.{}".format( *REQUIRED_PYTHON_VER)) sys.exit(1) def main(): flask_app = flaskr.create_app() flask_app.debug = True flask_app.run(host="localhost", port="5000") if __name__ == "__main__": if platform.system() == "Linux": setproctitle.setproctitle('ninewatt_app') sys.exit(main())
from trainer import * import setproctitle if __name__ == "__main__": args = pblm.argparser(prefix='mnist', gan_type='ACGAN', opt='adam', batch_size_test=10, proj=50, norm_train='l2_normal', norm_test='l2', epsilon=1.58, seed=0) kwargs = pblm.args2kwargs(args) setproctitle.setproctitle('python') print("saving file to {}".format(args.proctitle)) saved_filepath = ('./saved_log/' + args.proctitle) model_filepath = os.path.dirname('./models/' + args.proctitle) if not os.path.exists(saved_filepath): os.makedirs(saved_filepath) if not os.path.exists(model_filepath): os.makedirs(model_filepath) model_path = ('./models/' + args.proctitle) train_res = open(saved_filepath + '/train_res.txt', "w") test_res = open(saved_filepath + '/test_res.txt', "w") # load the data if args.prefix == "mnist":
if GetLastError() == ERROR_ALREADY_EXISTS: lock_file_validation = False else: lock_file_validation = True # run persepolis mainwindow if lock_file_validation: from persepolis.scripts import initialization from persepolis.scripts.mainwindow import MainWindow # set "persepolis" name for this process in linux and bsd if os_type == 'Linux' or os_type == 'FreeBSD' or os_type == 'OpenBSD': try: from setproctitle import setproctitle setproctitle("persepolis") except: from persepolis.scripts import logger logger.sendToLog('setproctitle is not installed!', "ERROR") from PyQt5.QtWidgets import QApplication from PyQt5.QtGui import QFont from PyQt5.QtCore import QCoreApplication, QSettings from persepolis.gui.palettes import DarkRedPallete, DarkBluePallete, ArcDarkRedPallete, ArcDarkBluePallete, LightRedPallete, LightBluePallete from persepolis.scripts.bubble import notifySend from persepolis.scripts.error_window import ErrorWindow import traceback # load persepolis_settings persepolis_setting = QSettings('persepolis_download_manager', 'persepolis')
import torch import torch.nn as nn import numpy as np from arch.FastDVDNet import FastDVDNet from utils.data_utils import * from utils.file_utils import * import argparse from tensorboardX import SummaryWriter from torch.utils.data import Dataset, DataLoader from data_provider import Video_Provider import os, sys, shutil import torch.optim as optim import time import setproctitle setproctitle.setproctitle('ZhangBin') def args_parser(): parser = argparse.ArgumentParser() parser.add_argument('--dataset_path', '-dp', default='/media/sde/zb/rnn-cnn/vimeo_septuplet/sequences', help='the path of vimeo-90k') parser.add_argument('--txt_path', '-tp', default='/media/sde/zb/rnn-cnn/vimeo_septuplet', help='the path of train/eval txt file') parser.add_argument('--batch_size', '-bs', default=64, type=int, help='batch size') parser.add_argument('--frames', '-f', default=5, type=int) parser.add_argument('--im_size', '-s', default=96, type=int) parser.add_argument('--learning_rate', '-lr', default=1e-4, type=float) parser.add_argument('--num_worker', '-nw', default=4, type=int, help='number of workers to load data by dataloader') parser.add_argument('--restart', '-r', action='store_true', help='whether to restart the train process') parser.add_argument('--eval', '-e', action='store_true', help='whether to work on the eval mode') parser.add_argument('--cuda', action='store_true', help='whether to train the network on the GPU, default is mGPU') parser.add_argument('--max_epoch', default=100, type=int) return parser.parse_args()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batchSz', type=int, default=10) parser.add_argument('--dice', action='store_true') parser.add_argument('--ngpu', type=int, default=1) parser.add_argument('--nEpochs', type=int, default=300) parser.add_argument('--start-epoch', default=0, type=int, metavar='N', help='manual epoch number (useful on restarts)') parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', help='evaluate model on validation set') parser.add_argument('-i', '--inference', default='', type=str, metavar='PATH', help='run inference on data set and save results') # 1e-8 works well for lung masks but seems to prevent # rapid learning for nodule masks parser.add_argument('--weight-decay', '--wd', default=1e-8, type=float, metavar='W', help='weight decay (default: 1e-8)') parser.add_argument('--no-cuda', action='store_true') parser.add_argument('--save') parser.add_argument('--seed', type=int, default=1) parser.add_argument('--opt', type=str, default='adam', choices=('sgd', 'adam', 'rmsprop')) args = parser.parse_args() best_prec1 = 100. args.cuda = not args.no_cuda and torch.cuda.is_available() args.save = args.save or 'work/vnet.base.{}'.format(datestr()) nll = True if args.dice: nll = False weight_decay = args.weight_decay setproctitle.setproctitle(args.save) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) print("build vnet") model = vnet.VNet(elu=False, nll=nll) batch_size = args.ngpu * args.batchSz gpu_ids = range(args.ngpu) model = nn.parallel.DataParallel(model, device_ids=gpu_ids) if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) else: model.apply(weights_init) if nll: train = train_nll test = test_nll class_balance = True else: train = train_dice test = test_dice class_balance = False print(' + Number of params: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) if args.cuda: model = model.cuda() if os.path.exists(args.save): shutil.rmtree(args.save) os.makedirs(args.save, exist_ok=True) # LUNA16 dataset isotropically scaled to 2.5mm^3 # and then truncated or zero-padded to 160x128x160 normMu = [-642.794] normSigma = [459.512] normTransform = transforms.Normalize(normMu, normSigma) trainTransform = transforms.Compose([transforms.ToTensor(), normTransform]) testTransform = transforms.Compose([transforms.ToTensor(), normTransform]) #if ct_targets == nodule_masks: # masks = lung_masks #else: masks = None if args.inference != '': if not args.resume: print("args.resume must be set to do inference") exit(1) kwargs = {'num_workers': 1} if args.cuda else {} src = args.inference dst = args.save inference_batch_size = args.ngpu root = os.path.dirname(src) images = os.path.basename(src) dataset = dset.LUNA16(root=root, images=images, transform=testTransform, split=target_split, mode="infer") loader = DataLoader(dataset, batch_size=inference_batch_size, shuffle=False, collate_fn=noop, **kwargs) inference(args, loader, model, trainTransform) return kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} print("loading training set") trainSet = dset.LUNA16(root='/content/drive/luna16', images=ct_images, targets=ct_targets, mode="train", transform=trainTransform, class_balance=class_balance, split=target_split, seed=args.seed, masks=masks) trainLoader = DataLoader(trainSet, batch_size=batch_size, shuffle=True, **kwargs) print("loading test set") testLoader = DataLoader(dset.LUNA16(root='/content/drive/luna16', images=ct_images, targets=ct_targets, mode="test", transform=testTransform, seed=args.seed, masks=masks, split=target_split), batch_size=batch_size, shuffle=False, **kwargs) target_mean = trainSet.target_mean() bg_weight = target_mean / (1. + target_mean) fg_weight = 1. - bg_weight print(bg_weight) class_weights = torch.FloatTensor([bg_weight, fg_weight]) if args.cuda: class_weights = class_weights.cuda() if args.opt == 'sgd': optimizer = optim.SGD(model.parameters(), lr=1e-1, momentum=0.99, weight_decay=weight_decay) elif args.opt == 'adam': optimizer = optim.Adam(model.parameters(), weight_decay=weight_decay) elif args.opt == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), weight_decay=weight_decay) trainF = open(os.path.join(args.save, 'train.csv'), 'w') testF = open(os.path.join(args.save, 'test.csv'), 'w') err_best = 100. for epoch in range(1, args.nEpochs + 1): adjust_opt(args.opt, optimizer, epoch) train(args, epoch, model, trainLoader, optimizer, trainF, class_weights) err = test(args, epoch, model, testLoader, optimizer, testF, class_weights) is_best = False if err < best_prec1: is_best = True best_prec1 = err save_checkpoint( { 'epoch': epoch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1 }, is_best, args.save, "vnet") os.system('./plot.py {} {} &'.format(len(trainLoader), args.save)) trainF.close() testF.close()
def main(): setproctitle('train ' + os.path.split(os.path.realpath(__file__))[0]) parser = argparse.ArgumentParser() parser.add_argument('--resume_weights', '-r', default=None, type=str, help='a number for certain pth file.') parser.add_argument('--resume', action='store_true', help='Resume the model from save.') parser.add_argument('--base_model', '-m', default=None) parser.add_argument('--save_dir', '-sd', default='./exp') parser.add_argument('--output_name', '-on', default='default') parser.add_argument('--log_dump_iter', default=500, type=int) parser.add_argument('--batch_size_per_gpu', default=8, type=int) parser.add_argument('--debug_dir', '-dd', default=None) parser.add_argument('--dataset', default=None, type=str) parser.add_argument('--init_weights', default='../pths/R-50.pkl', type=str, help='Official code require it for init network.') parser.add_argument( '--epochs', type=int, default=8) # iteration = epoch * instance_number / batch_size parser.add_argument('--save_per_epoch', '-spe', type=int, default=-1) parser.add_argument('--flip_JSD', action='store_true', help='Semi-supervised learning enable.') parser.add_argument( '--flip_JSD_0g', action='store_true', help='Semi-supervised learning enable with zero grad on flipped input.' ) parser.add_argument('--recursive', action='store_true', help='recursive mode enable.') parser.add_argument('--bit', action='store_true', help='group_norm ResNet50 enable.') parser.add_argument('--resnext50_32x4d', action='store_true', help='ResNeXt50 enable.') parser.add_argument('--resnext101_32x8d', action='store_true', help='ResNeXt101 enable.') parser.add_argument('--diff_loss', action='store_true', help='Diff_loss learning enable.') parser.add_argument('--flip_aug', action='store_true', help='') parser.add_argument('--FA_heavy', action='store_true', help='heavy flip_aug') args = parser.parse_args() updateConfig_train(args)
def _setproctitle(title): setproctitle("gunicorn: %s" % title)
def main(args): setproctitle.setproctitle('quakenet_eval') if args.n_clusters == None: raise ValueError('Define the number of clusters with --n_clusters') ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir) cfg = config.Config() cfg.batch_size = 1 cfg.n_clusters = args.n_clusters cfg.add = 1 cfg.n_clusters += 1 cfg.n_epochs = 1 # Remove previous output directory if os.path.exists(args.output): shutil.rmtree(args.output) os.makedirs(args.output) if args.plot: os.makedirs(os.path.join(args.output, "viz")) # data pipeline data_pipeline = DataPipeline(args.dataset, config=cfg, is_training=False) samples = { 'data': data_pipeline.samples, 'cluster_id': data_pipeline.labels, 'start_time': data_pipeline.start_time, 'end_time': data_pipeline.end_time } # set up model and validation metrics model = models.get(args.model, samples, cfg, args.checkpoint_dir, is_training=False) if args.max_windows is None: max_windows = 2**31 else: max_windows = args.max_windows # Dictonary to store info on detected events events_dic = { "start_time": [], "end_time": [], "utc_timestamp": [], "cluster_id": [], "clusters_prob": [] } # Create catalog name in which the events are stored output_catalog = os.path.join(args.output, 'catalog_detection.csv') print('Catalog created to store events', output_catalog) # Run ConvNetQuake with tf.Session() as sess: coord = tf.train.Coordinator() tf.initialize_local_variables().run() threads = tf.train.start_queue_runners(sess=sess, coord=coord) model.load(sess, args.step) print('Predicting using model at step {}'.format( sess.run(model.global_step))) step = tf.train.global_step(sess, model.global_step) n_events = 0 idx = 0 time_start = time.time() while True: try: # Fetch class_proba and label to_fetch = [ samples['data'], model.layers['class_prob'], model.layers['class_prediction'], samples['start_time'], samples['end_time'] ] sample, class_prob_, cluster_id, start_time, end_time = sess.run( to_fetch) # # Keep only clusters proba, remove noise proba clusters_prob = class_prob_[0, 1::] cluster_id -= 1 # label for noise = -1, label for cluster \in {0:n_clusters} is_event = cluster_id[0] > -1 if is_event: n_events += 1 idx += 1 if idx % 1000 == 0: print("processed {} windows".format(idx)) if is_event: events_dic["start_time"].append(UTCDateTime(start_time)) events_dic["end_time"].append(UTCDateTime(end_time)) events_dic["utc_timestamp"].append( (start_time + end_time) / 2.0) events_dic["cluster_id"].append(cluster_id[0]) events_dic["clusters_prob"].append(list(clusters_prob)) if idx >= max_windows: print("stopped after {} windows".format(max_windows)) print("found {} events".format(n_events)) break except KeyboardInterrupt: print("processed {} windows, found {} events".format( idx + 1, n_events)) print("Run time: ", time.time() - time_start) except tf.errors.OutOfRangeError: print('Evaluation completed ({} epochs).'.format(cfg.n_epochs)) break print('joining data threads') m, s = divmod(time.time() - time_start, 60) print("Prediction took {} min {} seconds".format(m, s)) coord.request_stop() coord.join(threads) # Dump dictionary into csv file df = pd.DataFrame.from_dict(events_dic) df.to_csv(output_catalog)
def main(): # Workaround for development modpath = os.path.realpath( os.path.join( os.path.dirname(os.path.realpath(__file__)), '..', )) if modpath not in sys.path: sys.path.insert(0, modpath) parser = argparse.ArgumentParser() parser.add_argument('restart', nargs='?') parser.add_argument('--pidfile', '-P', action='store_true') parser.add_argument('--disable-loop-monitor', '-L', action='store_true') parser.add_argument('--loop-debug', action='store_true') parser.add_argument('--overlay-dirs', '-o', action='append') parser.add_argument('--debug-level', choices=[ 'TRACE', 'DEBUG', 'INFO', 'WARN', 'ERROR', ], default='DEBUG') parser.add_argument('--log-handler', choices=[ 'console', 'file', ], default='console') args = parser.parse_args() _logger = logger.Logger('middleware', args.debug_level) _logger.getLogger() pidpath = '/var/run/middlewared.pid' if args.restart: if os.path.exists(pidpath): with open(pidpath, 'r') as f: pid = int(f.read().strip()) try: os.kill(pid, 15) except ProcessLookupError as e: if e.errno != errno.ESRCH: raise if 'file' in args.log_handler: _logger.configure_logging('file') stream = _logger.stream() if stream is not None: sys.stdout = sys.stderr = stream elif 'console' in args.log_handler: _logger.configure_logging('console') else: _logger.configure_logging('file') setproctitle.setproctitle('middlewared') # Workaround to tell django to not set up logging on its own os.environ['MIDDLEWARED'] = str(os.getpid()) if args.pidfile: with open(pidpath, "w") as _pidfile: _pidfile.write(f"{str(os.getpid())}\n") Middleware( loop_debug=args.loop_debug, loop_monitor=not args.disable_loop_monitor, overlay_dirs=args.overlay_dirs, debug_level=args.debug_level, ).run()
import traceback # Suppress GTK deprecation warnings warnings.filterwarnings("ignore") gi.require_version("Gtk", "3.0") gi.require_version('XApp', '1.0') from gi.repository import Gtk, Gdk, Gio, XApp, GdkPixbuf, GLib, Pango from common import * import mpv from imdb import IMDb setproctitle.setproctitle("hypnotix") # i18n APP = 'hypnotix' LOCALE_DIR = "/usr/share/locale" locale.bindtextdomain(APP, LOCALE_DIR) gettext.bindtextdomain(APP, LOCALE_DIR) gettext.textdomain(APP) _ = gettext.gettext PROVIDER_OBJ, PROVIDER_NAME = range(2) PROVIDER_TYPE_ID, PROVIDER_TYPE_NAME = range(2) GROUP_OBJ, GROUP_NAME = range(2) CHANNEL_OBJ, CHANNEL_NAME, CHANNEL_LOGO = range(3)
def serve(self, args): threading.currentThread().setName('master') if SETPROCTITLE: setproctitle.setproctitle(args.process_name + ' master %s' % ' '.join(sys.argv[1:])) # Initialize logging, keep this at the beginning! self.init_logging(args.log_level) for f in glob.glob(os.path.join(args.socket_path, 'rest*.sock')): os.unlink(f) for f in glob.glob(os.path.join(args.socket_path, 'notify*.sock')): os.unlink(f) # Initialize translations self.translations = self.get_translations(args.translations_path) if not self.translations: logging.warn( 'no po files found, no translations will be available') else: # TODO: lazy-logging, info message? logging.debug("translations available for: '%s'", ', '.join(self.translations.keys())) if not UJSON: warnings.warn( 'ujson module is not available, falling back to slower stdlib json implementation' ) logging.info('starting kopano-mfr') # Fake exit queue. queue = multiprocessing.JoinableQueue(1) queue.put(True) workers = [] for n in range(args.workers): rest_runner = Runner(queue, self.run_rest, 'rest', args.process_name, n) rest_process = multiprocessing.Process(target=rest_runner.run, name='rest{}'.format(n), args=(args.socket_path, n, args)) workers.append(rest_process) notify_runner = Runner(queue, self.run_notify, 'notify', args.process_name, n) notify_process = multiprocessing.Process(target=notify_runner.run, name='notify{}'.format(n), args=(args.socket_path, n, args)) workers.append(notify_process) for worker in workers: worker.daemon = True worker.start() if args.insecure: logging.warning( 'insecure mode - TLS client connections are susceptible to man-in-the-middle attacks and safety checks are off - this is not suitable for production use' ) if args.with_experimental: logging.warning('experimental endpoints are enabled') if args.with_metrics: if PROMETHEUS: if not os.environ.get('prometheus_multiproc_dir'): logging.error('please export "prometheus_multiproc_dir"') sys.exit(-1) # Spawn the metrics process later, so we can pass along worker name and pids. monitor_workers = [(worker.name, worker.pid) for worker in workers] # Include master process. monitor_workers.append(('master', os.getpid())) metrics_runner = Runner(queue, self.run_metrics, 'metrics', args.process_name, 0) metrics_process = multiprocessing.Process( target=metrics_runner.run, args=(args.socket_path, args, monitor_workers)) metrics_process.daemon = True metrics_process.start() workers.append(metrics_process) else: logging.error( 'please install prometheus client python bindings') sys.exit(-1) signal.signal(signal.SIGCHLD, self.sigchld) signal.signal(signal.SIGTERM, self.sigterm) try: while self.running: signal.pause() except KeyboardInterrupt: self.running = False logging.info('keyboard interrupt') logging.info('starting shutdown') signal.signal(signal.SIGCHLD, signal.SIG_IGN) if not self.abnormal_shutdown: # Flush queue, to tell workers to cleanly exit. queue.get() try: queue.task_done() except ValueError: # NOTE(longsleep): If a process encountered an error taks_done() was # already called, thus it errors which is ok and can be ignored. pass # Wait for workers to exit. deadline = time.monotonic() + 5 done = [] while deadline > time.monotonic(): ready = multiprocessing.connection.wait([ worker.sentinel for worker in workers if worker.sentinel not in done ], timeout=1) done.extend(ready) if len(done) == len(workers): break # Kill off workers which did not exit. kill = len(done) != len(workers) for worker in workers: if kill and worker.is_alive(): if self.abnormal_shutdown: logging.critical('killing worker: %d', worker.pid) os.kill(worker.pid, signal.SIGKILL) else: logging.warn('terminating worker: %d', worker.pid) worker.terminate() if args.with_metrics and PROMETHEUS: prometheus_multiprocess.mark_process_dead(worker.pid) worker.join() # Cleanup potentially left over sockets. sockets = [] for n in range(args.workers): sockets.append('rest%d.sock' % n) for n in range(args.workers): sockets.append('notify%d.sock' % n) for socket in sockets: # noqa: F402 try: unix_socket = os.path.join(args.socket_path, socket) os.unlink(unix_socket) except OSError as err: if err.errno != errno.ENOENT: logging.warn( 'failed to remove socket %s on shutdown, error: %s', unix_socket, err) logging.info('shutdown complete')
'alarm proxy host, agent pull config and push alarm to this proxy host, eg: 127.0.0.1:9090', default='127.0.0.1:9090') parser.add_argument('-f', '--file', help="log file for agent to watch, eg: ./xtop.log", default='/chain/log/xtop.log') parser.add_argument('--nodaemon', action='store_true', help='start as no-daemon mode') args = parser.parse_args() # set process title proc_title = 'topargus-agent: ' for i in range(len(sys.argv)): proc_title = '{0} {1}'.format(proc_title, sys.argv[i]) setproctitle.setproctitle(proc_title) if args.nodaemon: print("start as no-daemon mode") else: # forbidden using slog befor daemon_init print("start as daemon mode") try: daemon.daemon_init() except RuntimeError as e: print(e, file=sys.stderr) raise SystemExit(1) # attention: must behind daemon_init slogging.start_log_monitor()
def main(config_path, model_save_path, gpu_indices): # Load a config file (.yml) with open(config_path, "r") as f: config = yaml.load(f) params = config['param'] # Except for a blank class if params['label_type_main'] == 'word_freq10': if params['train_data_size'] == 'train100h': params['num_classes_main'] = 7213 elif params['train_data_size'] == 'train460h': params['num_classes_main'] = 18641 elif params['train_data_size'] == 'train960h': params['num_classes_main'] = 26642 else: raise TypeError if params['label_type_sub'] == 'character': params['num_classes_sub'] = 28 elif params['label_type_sub'] == 'character_capital_divide': if params['train_data_size'] == 'train100h': params['num_classes_sub'] = 72 elif params['train_data_size'] == 'train460h': params['num_classes_sub'] = 77 elif params['train_data_size'] == 'train960h': params['num_classes_sub'] = 77 else: raise TypeError # Model setting model = MultitaskCTC(encoder_type=params['encoder_type'], input_size=params['input_size'], splice=params['splice'], num_stack=params['num_stack'], num_units=params['num_units'], num_layers_main=params['num_layers_main'], num_layers_sub=params['num_layers_sub'], num_classes_main=params['num_classes_main'], num_classes_sub=params['num_classes_sub'], main_task_weight=params['main_task_weight'], lstm_impl=params['lstm_impl'], use_peephole=params['use_peephole'], parameter_init=params['weight_init'], clip_grad_norm=params['clip_grad_norm'], clip_activation=params['clip_activation'], num_proj=params['num_proj'], weight_decay=params['weight_decay']) # Set process name setproctitle( 'libri_' + model.name + '_' + params['train_data_size'] + '_' + params['label_type_main'] + '_' + params['label_type_sub']) model.name += '_' + str(params['num_units']) model.name += '_main' + str(params['num_layers_main']) model.name += '_sub' + str(params['num_layers_sub']) model.name += '_' + params['optimizer'] model.name += '_lr' + str(params['learning_rate']) if params['num_proj'] != 0: model.name += '_proj' + str(params['num_proj']) if params['dropout'] != 0: model.name += '_drop' + str(params['dropout']) if params['num_stack'] != 1: model.name += '_stack' + str(params['num_stack']) if params['weight_decay'] != 0: model.name += '_wd' + str(params['weight_decay']) model.name += '_main' + str(params['main_task_weight']) if len(gpu_indices) >= 2: model.name += '_gpu' + str(len(gpu_indices)) # Set save path model.save_path = mkdir_join( model_save_path, 'ctc', params['label_type_main'] + '_' + params['label_type_sub'], params['train_data_size'], model.name) # Reset model directory model_index = 0 new_model_path = model.save_path while True: if isfile(join(new_model_path, 'complete.txt')): # Training of the first model have been finished model_index += 1 new_model_path = model.save_path + '_' + str(model_index) elif isfile(join(new_model_path, 'config.yml')): # Training of the first model have not been finished yet model_index += 1 new_model_path = model.save_path + '_' + str(model_index) else: break model.save_path = mkdir(new_model_path) # Save config file shutil.copyfile(config_path, join(model.save_path, 'config.yml')) sys.stdout = open(join(model.save_path, 'train.log'), 'w') # TODO(hirofumi): change to logger do_train(model=model, params=params, gpu_indices=gpu_indices)
return model if __name__ == "__main__": # parse python script input parameters parser = argparse.ArgumentParser() args = add_args(parser) logging.info(args) worker_number = 1 process_id = 0 # customize the process name str_process_name = "Fedml (single):" + str(process_id) setproctitle.setproctitle(str_process_name) # customize the log format logging.basicConfig(level=logging.INFO, # logging.basicConfig(level=logging.DEBUG, format=str( process_id) + ' - %(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', datefmt='%a, %d %b %Y %H:%M:%S') hostname = socket.gethostname() logging.info("#############process ID = " + str(process_id) + ", host name = " + hostname + "########" + ", process ID = " + str(os.getpid()) + ", process Name = " + str(psutil.Process(os.getpid()))) # initialize the wandb machine learning experimental tracking platform (https://www.wandb.com/). if process_id == 0:
def main(argv): parser = argparse.ArgumentParser( description= 'update_whole_seq_db version %s.\nProcess all sequences in the waiting queue table NewSequencesTable.\nShould be run daily.' % __version__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--port', help='postgres port', default=5432, type=int) parser.add_argument('--host', help='postgres host', default=None) parser.add_argument( '--server-type', help= 'server type (develop/main/test). overridden by --database/user/password', default='main') parser.add_argument('--database', help='postgres database') parser.add_argument('--user', help='postgres user (to override --server-type)') parser.add_argument('--password', help='postgres password (to override --server-type)') parser.add_argument('--proc-title', help='name of the process (to view in ps aux)') parser.add_argument('--debug-level', help='debug level (1 for debug ... 9 for critical)', default=2, type=int) parser.add_argument( '--no-delete', help='do not delete from new sequences queue (NewSequencesTable).', action='store_true') parser.add_argument( '-w', '--wholeseqdb', help='name of the whole sequence database (i.e. SILVA/GREENGENES)', default='SILVA') parser.add_argument('-f', '--wholeseq-file', help='name of the whole sequence fasta file', required=True) parser.add_argument( '--update-all', help= "update all dbbact sequences (recalculate). If not set, will just update new dbbact sequences", action='store_true') args = parser.parse_args(argv) SetDebugLevel(args.debug_level) # set the process name for ps aux if args.proc_title: setproctitle.setproctitle(args.proc_title) # get the database connection con, cur = db_access.connect_translator_db(server_type=args.server_type, database=args.database, user=args.user, password=args.password, port=args.port, host=args.host) update_whole_seq_db(con, cur, args.wholeseq_file, seqdbname=args.wholeseqdb, check_exists=not args.update_all, no_delete=args.no_delete)