Exemple #1
0
    def run(self):
        # todo: 1) do all ansible calls through subprocess
        # 2) move to Python 3 and asyncIO all in one thread + executors
        # ... -> eliminate multiprocessing here,
        # ... possible to use simple logging, with redis handler

        self.log.info("Creating VM Spawner, HealthChecker, Terminator")
        self.spawner = Spawner(self.opts)
        self.checker = HealthChecker(self.opts)
        self.terminator = Terminator(self.opts)
        self.vm_manager = VmManager(
            opts=self.opts, logger=self.log,
        )
        self.vm_manager.post_init()
        self.log.info("Starting up VM EventHandler")
        self.event_handler = EventHandler(self.opts,
                                          vmm=self.vm_manager,
                                          terminator=self.terminator)
        self.event_handler.post_init()
        self.event_handler.start()

        self.log.info("Starting up VM Master")
        self.vm_master = VmMaster(self.opts,
                                  vmm=self.vm_manager,
                                  spawner=self.spawner,
                                  checker=self.checker)
        self.vm_master.start()
        setproctitle("Copr VMM base process")
Exemple #2
0
def test_obfuscate_process_password():
    original_title = setproctitle.getproctitle()

    setproctitle.setproctitle("pgcli user=root password=secret host=localhost")
    obfuscate_process_password()
    title = setproctitle.getproctitle()
    expected = "pgcli user=root password=xxxx host=localhost"
    assert title == expected

    setproctitle.setproctitle("pgcli user=root password=top secret host=localhost")
    obfuscate_process_password()
    title = setproctitle.getproctitle()
    expected = "pgcli user=root password=xxxx host=localhost"
    assert title == expected

    setproctitle.setproctitle("pgcli user=root password=top secret")
    obfuscate_process_password()
    title = setproctitle.getproctitle()
    expected = "pgcli user=root password=xxxx"
    assert title == expected

    setproctitle.setproctitle("pgcli postgres://root:secret@localhost/db")
    obfuscate_process_password()
    title = setproctitle.getproctitle()
    expected = "pgcli postgres://root:xxxx@localhost/db"
    assert title == expected

    setproctitle.setproctitle(original_title)
def supervise(pid, job_id, timeout=1, log_file=None):
    """
    Supervise a job process, entering a loop that ends only when the job
    terminates.

    :param pid: the process id
    :type pid: int
    :param job_id: the job id
    :type job_id: int
    :param timeout: timeout value in seconds
    :type timeout: float
    :param str log_file:
        Optional log file location. If specified, log messages will be appended
        to this file. If not specified, log messages will be printed to the
        console.
    """
    # Set the name of this process (as reported by /bin/ps)
    setproctitle('openquake supervisor for job_id=%s job_pid=%s'
                 % (job_id, pid))
    ignore_sigint()

    if log_file is not None:
        logging.root.addHandler(SupervisorLogFileHandler(job_id, log_file))
    else:
        logging.root.addHandler(SupervisorLogStreamHandler(job_id))

    supervisor = SupervisorLogMessageConsumer(job_id, pid, timeout)
    supervisor.run()
Exemple #4
0
def main():
    """ Main programm which is called when the clacks agent process gets started.
        It does the main forking os related tasks. """

    # Set process list title
    os.putenv('SPT_NOENV', 'non_empty_value')
    setproctitle("clacks-agent")

    # Inizialize core environment
    env = Environment.getInstance()
    if not env.base:
        env.log.critical("Clacks agent needs a 'core.base' do operate on")
        exit(1)

    env.log.info("Clacks %s is starting up (server id: %s)" % (VERSION, env.id))

    if env.config.get('core.profile'):
        import cProfile
        import clacks.common.lsprofcalltree
        p = cProfile.Profile()
        p.runctx('mainLoop(env)', globals(), {'env': env})
        #pylint: disable=E1101
        k = clacks.common.lsprofcalltree.KCacheGrind(p)
        data = open('prof.kgrind', 'w+')
        k.output(data)
        data.close()
    else:
        mainLoop(env)
Exemple #5
0
def main() -> None:
    '''Runs server'''

    # Parse options
    define('production',
               default = False,
               help = 'run in production mode',
               type = bool)
    options.parse_command_line()

    # Set server name
    pname = settings.process_name if settings.process_name else None
    if pname:
        setproctitle(pname)

    # Register IRC server
    server = IRCServer(settings = ircdsettings)
    for address, port in ircdsettings['listen']:
        server.listen(port, address = address)

    # Start profiling
    if settings.profiling:
        import yappi
        yappi.start()

    # Setup autoreload
    autoreload.start()

    # Run application
    IOLoop.instance().start()
Exemple #6
0
def main():
    parser = setup_parser()
    argcomplete.autocomplete(parser)
    options = parser.parse_args()

    _setup_logger(options)

    # Support the deprecated -c option
    if getattr(options, 'config', None) is not None:
        options.configs.append(options.config)

    if options.subparser in ('report', 'logs', 'metrics', 'run'):
        _default_region(options)
        _default_account_id(options)

    try:
        command = options.command
        if not callable(command):
            command = getattr(
                importlib.import_module(command.rsplit('.', 1)[0]),
                command.rsplit('.', 1)[-1])

        # Set the process name to something cleaner
        process_name = [os.path.basename(sys.argv[0])]
        process_name.extend(sys.argv[1:])
        setproctitle(' '.join(process_name))
        command(options)
    except Exception:
        if not options.debug:
            raise
        traceback.print_exc()
        pdb.post_mortem(sys.exc_info()[-1])
Exemple #7
0
def main():
    from solarsan import logging
    logger = logging.getLogger(__name__)
    from solarsan.cluster.models import Peer
    from solarsan.conf import rpyc_conn_config
    from rpyc.utils.server import ThreadedServer
    #from rpyc.utils.server import ThreadedZmqServer, OneShotZmqServer
    from setproctitle import setproctitle
    from .service import CLIService
    import rpyc

    title = 'SolarSan CLI'
    setproctitle('[%s]' % title)

    local = Peer.get_local()
    cluster_iface_bcast = local.cluster_nic.broadcast
    # Allow all public attrs, because exposed_ is stupid and should be a
    # f*****g decorator.
    #t = ThreadedZmqServer(CLIService, port=18863,
    #t = OneShotZmqServer(CLIService, port=18863,
    t = ThreadedServer(CLIService, port=18863,
                       registrar=rpyc.utils.registry.UDPRegistryClient(ip=cluster_iface_bcast,
                                                                       #logger=None,
                                                                       logger=logger,
                                                                       ),
                       auto_register=True,
                       logger=logger,
                       #logger=None,
                       protocol_config=rpyc_conn_config)
    t.start()
Exemple #8
0
def supervise(pid, job_id, timeout=1, log_file=None):
    """
    Supervise a job process, entering a loop that ends only when the job
    terminates.

    :param int pid:
        the process id
    :param int job_id:
        the job id
    :param float timeout:
        timeout value in seconds
    :param str log_file:
        Optional log file location. If specified, log messages will be appended
        to this file. If not specified, log messages will be printed to the
        console.
    """
    the_job = OqJob.objects.get(id=job_id)
    calc_id = the_job.calculation.id
    if the_job.hazard_calculation is not None:
        calc_domain = 'hazard'
    else:
        calc_domain = 'risk'
    # Set the name of this process (as reported by /bin/ps)
    setproctitle('openquake supervisor for %s calc_id=%s job_pid=%s'
                 % (calc_domain, calc_id, pid))
    ignore_sigint()

    start_logging(calc_id, calc_domain, log_file)

    supervisor = SupervisorLogMessageConsumer(job_id, pid, timeout)

    supervisor.run()
Exemple #9
0
    def __init__(self, name=None, description=None, epilog=None, debug_flag=True):
        self.db = ConfigDB()
        self.name = os.path.basename(sys.argv[0])

        reload(sys)
        sys.setdefaultencoding('utf-8')

        setproctitle('%s %s' % (self.name, ' '.join(sys.argv[1:])))
        signal.signal(signal.SIGINT, self.SIGINT)

        if name is None:
            name = self.name

        self.logger = SoundforestLogger()
        self.log = self.logger.default_stream

        self.parser = argparse.ArgumentParser(
            prog=name,
            description=description,
            epilog=epilog,
            add_help=True,
            conflict_handler='resolve',
        )
        self.subcommand_parser = None
        self.subcommands = None

        if debug_flag:
            self.parser.add_argument('--debug', action='store_true', help='Show debug messages')
    def main(self):
        parser = argparse.ArgumentParser()
        parser.add_argument('-c', metavar='CONFIG', default=DEFAULT_CONFIGFILE, help='Middleware config file')
        parser.add_argument('-p', type=int, metavar='PORT', default=5500, help="WebSockets server port")
        args = parser.parse_args()
        configure_logging('/var/log/containerd.log', 'DEBUG')
        setproctitle.setproctitle('containerd')

        gevent.signal(signal.SIGTERM, self.die)
        gevent.signal(signal.SIGQUIT, self.die)

        self.config = args.c
        self.init_datastore()
        self.init_dispatcher()
        self.init_mgmt()
        self.init_nat()
        self.init_ec2()
        self.logger.info('Started')

        # WebSockets server
        kwargs = {}
        s4 = WebSocketServer(('', args.p), ServerResource({
            '/console': ConsoleConnection,
        }, context=self), **kwargs)

        s6 = WebSocketServer(('::', args.p), ServerResource({
            '/console': ConsoleConnection,
        }, context=self), **kwargs)

        serv_threads = [gevent.spawn(s4.serve_forever), gevent.spawn(s6.serve_forever)]
        gevent.joinall(serv_threads)
Exemple #11
0
def main():
    setproctitle.setproctitle("swankvm")
    parser = argparse.ArgumentParser(description='runkvm arguments.')
    parser.add_argument('--testname', '-t', action='store', help='The name of the test to run.')
    parser.add_argument('--hostname', '-H', action='store', default='', help='The name of the host to run.')
    parser.add_argument('--compile', action="store_true", help='compile the source on host <hostname>.')
    parser.add_argument('--install', action="store_true", help='run make install module_install .')
    parser.add_argument('--x509', action="store_true", help='tell the guest to setup the X509 certs in NSS.')
    parser.add_argument('--final', action="store_true", help='run final.sh on the host.')
    parser.add_argument('--reboot', action="store_true", help='first reboot the host')
    # unused parser.add_argument('--timer', default=120, help='timeout for each command for expect.')
    args = parser.parse_args()

    if args.final:
        prompt = "\[root@%s %s\]# "%(args.hostname, args.testname)
        child = connect_to_kvm(args, prompt)
    else :
        child = connect_to_kvm(args) 

    if not child:
        sys.exit("Failed to launch/connect to %s - aborted"%args.hostname)

    if args.compile:
        compile_on(args,child) 

    if args.install:
        make_install(args,child) 

    if (args.testname and not args.final):
        run_test(args,child)

    if args.final:
        run_final(args,child)
Exemple #12
0
    def run(self):
        container = create_container(self.config)
        install_plugins(container, self.config.get('plugins', {}))
        install_interfaces(container, self.config.get('interfaces', {}))

        for cls_name in self.args.get('--interface', ()):
            cls = import_object(cls_name)
            container.install(cls)

        if self.args.get('--debug'):
            from gevent.backdoor import BackdoorServer
            backdoor = BackdoorServer(('127.0.0.1', 5005), locals={'container': container})
            gevent.spawn(backdoor.serve_forever)

        def handle_signal():
            logger.info('caught SIGINT/SIGTERM, pid=%s', os.getpid())
            container.stop()
            container.join()
            sys.exit(0)
        gevent.signal(signal.SIGINT, handle_signal)
        gevent.signal(signal.SIGTERM, handle_signal)

        setproctitle('lymph-instance (identity: %s, endpoint: %s, config: %s)' % (
            container.identity,
            container.endpoint,
            self.config.source,
        ))

        container.start(register=not self.args.get('--isolated', False))

        if self.args.get('--reload'):
            set_source_change_callback(container.stop)

        container.join()
    def init(self):
        global use_setproctitle
	if use_setproctitle:
            setproctitle("mongodb_log %s" % self.topic)

        self.mongoconn = Connection(self.mongodb_host, self.mongodb_port)
        self.mongodb = self.mongoconn[self.mongodb_name]
        self.mongodb.set_profiling_level = SLOW_ONLY

        self.collection = self.mongodb[self.collname]
        self.collection.count()

        self.queue.cancel_join_thread()

        rospy.init_node(WORKER_NODE_NAME % (self.nodename_prefix, self.id, self.collname),
                        anonymous=False)

        self.subscriber = None
        while not self.subscriber:
            try:
                msg_class, real_topic, msg_eval = rostopic.get_topic_class(self.topic, blocking=True)
                self.subscriber = rospy.Subscriber(real_topic, msg_class, self.enqueue, self.topic)
            except rostopic.ROSTopicIOException:
                print("FAILED to subscribe, will keep trying %s" % self.name)
                time.sleep(randint(1,10))
            except rospy.ROSInitException:
                print("FAILED to initialize, will keep trying %s" % self.name)
                time.sleep(randint(1,10))
                self.subscriber = None
Exemple #14
0
    def __init__(self, stream, gate):
        self.stream = stream
        self.gate = gate
        aj.master = False
        os.setpgrp()
        setproctitle.setproctitle(
            '%s worker [%s]' % (
                sys.argv[0],
                self.gate.name
            )
        )
        set_log_params(tag=self.gate.log_tag)
        init_log_forwarding(self.send_log_event)

        logging.info(
            'New worker "%s" PID %s, EUID %s, EGID %s',
            self.gate.name,
            os.getpid(),
            os.geteuid(),
            os.getegid(),
        )

        self.context = Context(parent=aj.context)
        self.context.session = self.gate.session
        self.context.worker = self
        self.handler = HttpMiddlewareAggregator([
            AuthenticationMiddleware.get(self.context),
            CentralDispatcher.get(self.context),
        ])

        self._master_config_reloaded = Event()
Exemple #15
0
    def __init__(self,name=None,description=None,epilog=None,debug_flag=True,subcommands=False):
        self.name = os.path.basename(sys.argv[0])
        setproctitle('%s %s' % (self.name,' '.join(sys.argv[1:])))
        signal.signal(signal.SIGINT, self.SIGINT)

        reload(sys)
        sys.setdefaultencoding('utf-8')

        if name is None:
            name = self.name

        # Set to True to avoid any messages from self.message to be printed
        self.silent = False

        self.logger = Logger(self.name)
        self.log = self.logger.default_stream

        self.parser = argparse.ArgumentParser(
            prog=name,
            description=description,
            epilog=epilog,
            add_help=True,
            conflict_handler='resolve',
        )
        if debug_flag:
            self.parser.add_argument('--debug',action='store_true',help='Show debug messages')

        if subcommands:
            self.commands = {}
            self.command_parsers = self.parser.add_subparsers(
                dest='command', help='Please select one command mode below',
                title='Command modes'
            )
def _worker(in_queue, out_queue, worker_id):
    try:
        import setproctitle
        setproctitle.setproctitle("imageWorker")
    except ImportError:
        pass

    done = False
    while not done:
        if not in_queue.empty():
            obj = in_queue.get()
            # if a bool is passed down the queue, set the done flag
            if isinstance(obj, bool):
                print "got a bool down the pipe; shutting down"
                done = True
                import sys
                #sys.exit()
            else:
                url, batch_id = obj
                
                w, h, buffers = _downloadImage(url, worker_id)
                if w != None:
                    #print "putting loaded buffers [%d] on out queue" % len(buffers)
                    out_queue.put((url, batch_id, w, h, buffers))
                else:
                    #print "putting blank buffer on out queue."
                    out_queue.put((url, batch_id, 0, 0, []))
        pygame.time.wait(SLEEP_TIME)
Exemple #17
0
    def run(self, *args, **kwargs):
        """
        The Node main method, running in a child process (similar to Process.run() but also accepts args)
        A children class can override this method, but it needs to call super().run(*args, **kwargs)
        for the node to start properly and call update() as expected.
        :param args: arguments to pass to update()
        :param kwargs: keyword arguments to pass to update()
        :return: last exitcode returned by update()
        """
        # TODO : make use of the arguments ? since run is now the target for Process...

        exitstatus = None  # keeping the semantic of multiprocessing.Process : running process has None

        if setproctitle and self.new_title:
            setproctitle.setproctitle("{0}".format(self.name))

        print('[{proc}] Proc started as [{pid}]'.format(proc=self.name, pid=self.ident))

        with self.context_manager(*args, **kwargs) as cm:
            if cm:
                cmargs = maybe_tuple(cm)
                # prepending context manager, to be able to access it from target
                args = cmargs + args

            exitstatus = self.eventloop(*args, **kwargs)

            logging.debug("[{self.name}] Proc exited.".format(**locals()))
            return exitstatus  # returning last exit status from the update function
Exemple #18
0
    def run(self, host=None, port=None, debug=None, workers=None):
        """
        启动
        :param host: 监听IP
        :param port: 监听端口
        :param debug: 是否debug
        :param workers: workers数量
        :return:
        """
        self._validate_cmds()

        if host is None:
            host = constants.SERVER_HOST
        if port is None:
            port = constants.SERVER_PORT
        if debug is not None:
            self.debug = debug

        workers = workers if workers is not None else 1

        logger.info('Running server on %s, debug: %s, workers: %s',
                    (host, port), self.debug, workers)

        self._prepare_server((host, port))
        setproctitle.setproctitle(self._make_proc_name('master'))
        # 只能在主线程里面设置signals
        self._handle_parent_proc_signals()
        self._spawn_workers(workers, self._worker_run)
Exemple #19
0
def daemonize_server(port_or_path, fix_title=False):
    process_id = os.fork()
    if process_id < 0:
        raise Error('Unable to fork')
    elif process_id != 0:
        return

    # noinspection PyNoneFunctionAssignment,PyArgumentList
    process_id = os.setsid()
    if process_id == -1:
        sys.exit(1)

    for fd in range(3, resource.getrlimit(resource.RLIMIT_NOFILE)[0]):
        try:
            os.close(fd)
        except OSError:
            pass

    devnull = os.devnull if hasattr(os, 'devnull') else '/dev/null'
    devnull_fd = os.open(devnull, os.O_RDWR)
    for fd in range(3):
        # noinspection PyTypeChecker
        os.dup2(devnull_fd, fd)

    os.umask(0o27)
    os.chdir('/')
    if fix_title and setproctitle is not None:
        # noinspection PyCallingNonCallable
        setproctitle('papa daemon from %s' % os.path.basename(sys.argv[0]))
    socket_server(port_or_path)
Exemple #20
0
    def run(self):
        setproctitle('satori: {0}'.format(self.name))

        logging.info('%s starting', self.name)

        signal(SIGTERM, self.handle_signal)
        signal(SIGINT, self.handle_signal)

        # let ssl register OpenSSL callbacks, so that they do not interfere with callbacks from OpenSSL.crypto
        import ssl

        # let pyOpenSSL register OpenSSL callbacks
        import OpenSSL.SSL
        import OpenSSL.crypto

        # tell libpq not to register OpenSSL callbacks - hopefully no DB connection has been created yet
        libpq = ctypes.cdll.LoadLibrary('libpq.so')
        libpq.PQinitSSL(0)


        try:
            self.do_run()
        except SystemExit:
            logging.info('%s exited (SystemExit)', self.name)
        except:
            logging.exception('%s exited with error', self.name)
        else:
            logging.info('%s exited', self.name)
Exemple #21
0
def start_worker_for_queue(flow='simple_queue_processor', queue='zmon:queue:default', **execution_context):
    """
    Starting execution point to the workflows
    """

    known_flows = {'simple_queue_processor': flow_simple_queue_processor}

    if flow not in known_flows:
        logger.exception('Bad role: %s' % flow)
        sys.exit(1)

    logger.info('Starting worker with pid=%s, flow type: %s, queue: %s, execution_context: %s', os.getpid(), flow,
                queue, execution_context)
    setproctitle.setproctitle('zmon-worker {} {}'.format(flow, queue))

    # start Flow Reactor here
    FlowControlReactor.get_instance().start()

    exit_code = 0
    try:

        known_flows[flow](queue=queue, **execution_context)

    except (KeyboardInterrupt, SystemExit):
        logger.warning('Caught user signal to stop consumer: finishing!')
    except Exception:
        logger.exception('Exception in start_worker(). Details: ')
        exit_code = 2
    finally:
        FlowControlReactor.get_instance().stop()
        sys.exit(exit_code)
Exemple #22
0
    def run(self):
        """Runs the worker and consumes messages from RabbitMQ.
        Returns only after `shutdown()` is called.

        """
        # Lazy import setproctitle.
        # There is bug with the latest version of Python with
        # uWSGI and setproctitle combination.
        # Watch: https://github.com/unbit/uwsgi/issues/1030
        from setproctitle import setproctitle
        setproctitle("kuyruk: worker on %s" % self.queue)

        self._setup_logging()

        signal.signal(signal.SIGINT, self._handle_sigint)
        signal.signal(signal.SIGTERM, self._handle_sigterm)
        signal.signal(signal.SIGHUP, self._handle_sighup)
        signal.signal(signal.SIGUSR1, self._handle_sigusr1)
        signal.signal(signal.SIGUSR2, self._handle_sigusr2)

        self._started = os.times()[4]

        for f in (self._watch_load, self._shutdown_timer):
            t = threading.Thread(target=f)
            t.daemon = True
            t.start()

        signals.worker_start.send(self.kuyruk, worker=self)
        self._consume_messages()
        signals.worker_shutdown.send(self.kuyruk, worker=self)

        logger.debug("End run worker")
Exemple #23
0
def run_rule_async(rule_name, settings):
    setproctitle("inferno - %s" % rule_name)
    signal.signal(signal.SIGHUP, signal.SIG_IGN)
    signal.signal(signal.SIGINT, signal.SIG_IGN)
    signal.signal(signal.SIGTERM, signal.SIG_IGN)

    rules = get_rules_by_name(
        rule_name, settings['rules_directory'], immediate=False)
    if rules and len(rules) > 0:
        rule = rules[0]
    else:
        log.error('No rule exists with rule_name: %s' % rule_name)
        raise Exception('No rule exists with rule_name: %s' % rule_name)

    pid_dir = pid.pid_dir(settings)
    log.info("Running %s" % rule.name)
    try:
        pid.create_pid(pid_dir, rule, str(os.getpid()))
        execute_rule(rule, settings)
    except Exception as e:
        log.exception('%s: %s', rule_name, e)
        if not rule.retry:
            pid.create_last_run(pid_dir, rule)
    else:
        pid.create_last_run(pid_dir, rule)
    finally:
        pid.remove_pid(pid_dir, rule)
        os._exit(0)
 def run(self):
     self._name = "BuildActor-{0:d} job {1}".format(self.pid, self.job_id)
     setproctitle.setproctitle('mob2_build')
     
     logging.config.dictConfig(self._log_conf)
     self._log = logging.getLogger(__name__) 
     
     #change the status to aware the job that this job is currently building  
     job = self.get_job()
     job.status.state = Status.BUILDING
     job.save()
     
     self.make_job_environement(job)
     os.chdir(job.dir)
     
     #import data needed for the job
     
     #build the cmdline??? seulement pour ClJob ???
     #ou action generique de job et joue sur le polymorphism?
     
     #perform data conversion
     #how to decide which data must be convert?
     
     # the acces log must record 
     # the submited jobs to mobyle 
     #  or
     # the submitted job to execution?
     #
     #acc_log = logging.getLogger( 'access')
     #acc_log.info( "test access log {0}".format(self._name))
     
     #the monitor is now aware of the new status
     job.status.state = Status.TO_BE_SUBMITTED
     job.save()
     self._log.info( "{0} put job {1} with status {2} in table".format(self._name, job.id, job.status))
Exemple #25
0
def _set_process_title():
    try:
        import setproctitle
    except ImportError:
        pass
    else:
        setproctitle.setproctitle("kupfer")
Exemple #26
0
def ensure_running( config ):
   """
   Verify that there is an automount daemon servicing a mountpoint.
   If there isn't, start one.
   If we're configured to run in the foreground, this method never returns.
   """
   
   mountpoint_dir = config['mountpoint_dir']
   
   # is the daemon running?
   procs = watchdog.find_by_attrs( "syndicate-automount-daemon", {"mounts": mountpoint_dir} )
   if len(procs) > 0:
      # it's running
      print "Syndicate automount daemon already running for %s (PID(s): %s)" % (mountpoint_dir, ",".join( [str(watchdog.get_proc_pid(p)) for p in procs] ))
      return True
   
   if config.get("foreground", None):
      main( config )
      
   else:
      logfile_path = None 
      pidfile_path = config.get("pidfile", None)
      
      if config.has_key("logdir"):
         logfile_path = os.path.join( config['logdir'], "syndicated.log" )
      
      title = watchdog.attr_proc_title( "syndicate-automount-daemon", {"mounts" : mountpoint_dir} )
      setproctitle.setproctitle( title )
      
      daemon.daemonize( lambda: main(config), logfile_path=logfile_path, pidfile_path=pidfile_path )
      
      return True
Exemple #27
0
    def run(self, debug=None):
        """

        :param debug:
        :return:
        """
        self._validate_cmds()

        if debug is not None:
            self.debug = debug

        if os.getenv(constants.WORKER_ENV_KEY) != 'true':
            # 主进程
            logger.info('Connect to server , debug: %s, workers: %s',
                        self.debug, self.spawn_count)

            # 设置进程名
            setproctitle.setproctitle(self._make_proc_name('worker:master'))
            # 只能在主线程里面设置signals
            self._handle_parent_proc_signals()
            self._spawn_workers(self.spawn_count)
        else:
            # 子进程
            setproctitle.setproctitle(self._make_proc_name('worker:worker'))
            self._worker_run()
Exemple #28
0
    def run(self):
        setproctitle("Event Handler")

        self.do_recycle_proc = Recycle(terminator=self.terminator, recycle_period=self.recycle_period)
        self.do_recycle_proc.start()

        self.start_listen()
Exemple #29
0
    def __init__(self, name=None, description=None, epilog=None, debug_flag=True):
        self.name = os.path.basename(sys.argv[0])
        setproctitle('%s %s' % (self.name, ' '.join(sys.argv[1:])))
        signal.signal(signal.SIGINT, self.SIGINT)

        reload(sys)
        sys.setdefaultencoding('utf-8')

        if name is None:
            name = self.name

        # Set to True to avoid any messages from self.message to be printed
        self.silent = False

        self.logger = Logger(self.name)
        self.log = self.logger.default_stream

        self.subcommand_parser = None
        self.parser = argparse.ArgumentParser(
            prog=name,
            description=description,
            formatter_class=argparse.RawTextHelpFormatter,
            epilog=epilog,
            add_help=True,
            conflict_handler='resolve',
        )
        if debug_flag:
            self.parser.add_argument('--debug', action='store_true', help='Show debug messages')

        self.parser.add_argument('--insecure', action='store_false', help='No HTTPS certificate validation')
        self.parser.add_argument('-B', '--browser',
            choices=('chrome','chromium','firefox'),
            help='Browser for cookie stealing'
        )
Exemple #30
0
    def __init__(self, config, runner, pilot_id, rpc=None, debug=False,
                 run_timeout=180, backoff_delay=1):
        self.config = config
        self.runner = runner
        self.pilot_id = pilot_id
        self.hostname = gethostname()
        self.rpc = rpc
        self.debug = debug
        self.run_timeout = run_timeout
        self.backoff_delay = backoff_delay
        self.resource_interval = 1.0 # seconds between resouce measurements

        self.running = True
        self.tasks = {}

        try:
            setproctitle('iceprod2_pilot({})'.format(pilot_id))
        except Exception:
            pass

        logger.warning('pilot_id: %s', self.pilot_id)
        logger.warning('hostname: %s', self.hostname)

        # hint at resources for pilot
        # don't pass them as raw, because that overrides condor
        if 'resources' in config['options']:
            for k in config['options']['resources']:
                v = config['options']['resources'][k]
                name = 'NUM_'+k.upper()
                if k in ('cpu','gpu'):
                    name += 'S'
                os.environ[name] = str(v)
        self.resources = Resources(debug=self.debug)

        self.start_time = time.time()
Exemple #31
0
def loop(args):
    # create config and model collection objects, and retrieve the run config
    configs = {}
    models  = {}
    configs.update({'run': RunConfig(args.config_file)})

    # set GPU-related environmental options and config settings
    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu) if args.gpu is not None else ''
    setproctitle('RGN ' + configs['run'].names['run'] + ' on ' + os.getenv('CUDA_VISIBLE_DEVICES', 'CPU'))

    # derived files and directories
    base_dir        = args.base_directory
    run_dir         = os.path.join(base_dir, RUNS_DIRNAME,        configs['run'].names['run'], configs['run'].names['dataset'])
    data_dir        = os.path.join(base_dir, DATAS_DIRNAME,       configs['run'].names['dataset'])
    checkpoints_dir = os.path.join(run_dir,  CHECKPOINTS_DIRNAME, '')
    logs_dir        = os.path.join(run_dir,  LOGS_DIRNAME,        '')
    stdout_err_file = os.path.join(base_dir, LOGS_DIRNAME,        configs['run'].names['run'] + '.log')
    alphabet_file   = os.path.join(data_dir, ALPHABETS_DIRNAME,   configs['run'].names['alphabet'] + '.csv') if configs['run'].names['alphabet'] is not None else None

    # this is all for evaluation models (including training, so training_batch_size is for evaluation)
    full_training_glob     = os.path.join(data_dir, FULL_TRAINING_DIRNAME,     configs['run'].io['full_training_glob'])
    sample_training_glob   = os.path.join(data_dir, FULL_TRAINING_DIRNAME,   configs['run'].io['sample_training_glob'])
    training_batch_size    = configs['run'].evaluation['num_training_samples']
    training_invocations   = configs['run'].evaluation['num_training_invocations']

    validation_glob        = os.path.join(data_dir, SAMPLE_VALIDATION_DIRNAME, configs['run'].io['sample_validation_glob'])
    validation_batch_size  = configs['run'].evaluation['num_validation_samples']
    validation_invocations = configs['run'].evaluation['num_validation_invocations']

    testing_glob           = os.path.join(data_dir, FULL_TESTING_DIRNAME,      configs['run'].io['full_testing_glob'])
    testing_batch_size     = configs['run'].evaluation['num_testing_samples']
    testing_invocations    = configs['run'].evaluation['num_testing_invocations']

    if not args.prediction_only:
        eval_num_epochs = None
    else:
        eval_num_epochs = 1
        training_batch_size = validation_batch_size = testing_batch_size = 1
        training_invocations = validation_invocations = testing_invocations = 1

    # redirect stdout/err to file
    sys.stderr.flush()
    if not os.path.exists(os.path.dirname(stdout_err_file)): os.makedirs(os.path.dirname(stdout_err_file))
    stdout_err_file_handle = open(stdout_err_file, 'w')
    os.dup2(stdout_err_file_handle.fileno(), sys.stderr.fileno())
    sys.stdout = stdout_err_file_handle

    # select device placement taking into consideration the interaction between training and evaluation models
    if configs['run'].computing['training_device'] == 'GPU' and configs['run'].computing['evaluation_device'] == 'GPU':
        fod_training   = {'/cpu:0': ['point_to_coordinate']}
        fod_evaluation = {'/cpu:0': ['point_to_coordinate']}
        dd_training   = ''
        dd_evaluation = ''
    elif configs['run'].computing['training_device'] == 'GPU' and configs['run'].computing['evaluation_device'] == 'CPU':
        fod_training   = {'/cpu:0': ['point_to_coordinate', 'loss_history']}
        fod_evaluation = {}
        dd_training   = ''
        dd_evaluation = '/cpu:0'
    else:
        fod_training   = {}
        fod_evaluation = {}
        dd_training   = '/cpu:0'
        dd_evaluation = '/cpu:0'

    # create models configuration templates
    configs.update({'training': RGNConfig(args.config_file, 
                                          {'name':                        'training',
                                           'dataFilesGlob':               full_training_glob,
                                           'checkpointsDirectory':        checkpoints_dir,
                                           'logsDirectory':               logs_dir,
                                           'fileQueueCapacity':           configs['run'].queueing['training_file_queue_capacity'],
                                           'batchQueueCapacity':          configs['run'].queueing['training_batch_queue_capacity'],
                                           'minAfterDequeue':             configs['run'].queueing['training_min_after_dequeue'],
                                           'shuffle':                     configs['run'].queueing['training_shuffle'],
                                           'tertiaryNormalization':       configs['run'].loss['training_tertiary_normalization'],
                                           'batchDependentNormalization': configs['run'].loss['training_batch_dependent_normalization'],
                                           'alphabetFile':                alphabet_file,
                                           'functionsOnDevices':          fod_training,
                                           'defaultDevice':               dd_training,
                                           'fillGPU':                     args.fill_gpu})})

    configs.update({'evaluation': RGNConfig(args.config_file, 
                                            {'fileQueueCapacity':           configs['run'].queueing['evaluation_file_queue_capacity'],
                                             'batchQueueCapacity':          configs['run'].queueing['evaluation_batch_queue_capacity'],
                                             'minAfterDequeue':             configs['run'].queueing['evaluation_min_after_dequeue'],
                                             'shuffle':                     configs['run'].queueing['evaluation_shuffle'],
                                             'tertiaryNormalization':       configs['run'].loss['evaluation_tertiary_normalization'],
                                             'batchDependentNormalization': configs['run'].loss['evaluation_batch_dependent_normalization'],
                                             'alphabetFile':                alphabet_file,
                                             'functionsOnDevices':          fod_evaluation,
                                             'defaultDevice':               dd_evaluation,
                                             'numEpochs':                   eval_num_epochs,
                                             'bucketBoundaries':            None})})

    # Override included evaluation models with list from command-line if specified (assumes none are included and then includes ones that are specified)
    if args.evaluation_model:
        for prefix in ['', 'un']:
            for group in ['training', 'validation', 'testing']:
                configs['run'].evaluation.update({'include_' + prefix + 'weighted_' + group: False})
        for entry in args.evaluation_model:
            configs['run'].evaluation.update({'include_' + entry: True})

    # Override other command-lind arguments
    if args.gpu_fraction: configs['training'].computing.update({'gpu_fraction': args.gpu_fraction})
    if args.milestone: configs['run'].optimization.update({'validation_milestone': dict(args.milestone)})

    # Ensure that correct validation reference is chosen if not predicting, and turn off evaluation loss if predicting
    if not args.prediction_only:
        if ((not configs['run'].evaluation['include_weighted_validation'])   and configs['run'].optimization['validation_reference'] == 'weighted') or \
           ((not configs['run'].evaluation['include_unweighted_validation']) and configs['run'].optimization['validation_reference'] == 'unweighted'):
            raise RuntimeError('Chosen validation reference is not included in run.')
    else:
        configs['evaluation'].loss['include'] = False

    # rescaling needed to adjust for how frequently loss_history is updated
    if configs['training'].curriculum['behavior'] == 'loss_change': 
        configs['training'].curriculum[  'change_num_iterations'] //= configs['run'].io['evaluation_frequency'] # result must be >=1
        configs['evaluation'].curriculum['change_num_iterations'] //= configs['run'].io['evaluation_frequency'] # ditto

    # create training model
    models = {}
    models.update({'training': RGNModel('training', configs['training'])})
    print('*** training configuration ***')
    pprint(configs['training'].__dict__)

    # create weighted training evaluation model (conditional)
    if configs['run'].evaluation['include_weighted_training']:
        configs.update({'eval_wt_train': deepcopy(configs['evaluation'])})
        configs['eval_wt_train'].io['name'] = 'evaluation_wt_training'
        configs['eval_wt_train'].io['data_files_glob'] = sample_training_glob
        configs['eval_wt_train'].optimization['batch_size'] = training_batch_size
        configs['eval_wt_train'].queueing['num_evaluation_invocations'] = training_invocations
        models.update({'eval_wt_train': RGNModel('evaluation', configs['eval_wt_train'])})
        print('\n\n\n*** weighted training evaluation configuration ***')
        pprint(configs['eval_wt_train'].__dict__)

    # create weighted validation evaluation model (conditional)
    if configs['run'].evaluation['include_weighted_validation']:
        configs.update({'eval_wt_val': deepcopy(configs['evaluation'])})
        configs['eval_wt_val'].io['name'] = 'evaluation_wt_validation'
        configs['eval_wt_val'].io['data_files_glob'] = validation_glob
        configs['eval_wt_val'].optimization['batch_size'] = validation_batch_size
        configs['eval_wt_val'].queueing['num_evaluation_invocations'] = validation_invocations
        if configs['run'].optimization['validation_reference'] == 'weighted': 
            configs['eval_wt_val'].curriculum['update_loss_history'] = True
        models.update({'eval_wt_val': RGNModel('evaluation', configs['eval_wt_val'])})
        print('\n\n\n*** weighted validation evaluation configuration ***')
        pprint(configs['eval_wt_val'].__dict__)

    # create weighted testing evaluation model (conditional)
    if configs['run'].evaluation['include_weighted_testing']:
        configs.update({'eval_wt_test': deepcopy(configs['evaluation'])})
        configs['eval_wt_test'].io['name'] = 'evaluation_wt_testing'
        configs['eval_wt_test'].io['data_files_glob'] = testing_glob
        configs['eval_wt_test'].optimization['batch_size'] = testing_batch_size
        configs['eval_wt_test'].queueing['num_evaluation_invocations'] = testing_invocations
        models.update({'eval_wt_test': RGNModel('evaluation', configs['eval_wt_test'])})
        print('\n\n\n*** weighted testing evaluation configuration ***')
        pprint(configs['eval_wt_test'].__dict__)

    # create equivalents for unweighted loss if there's a curriculum.
    if configs['training'].curriculum['mode'] is not None:
        # create unweighted training evaluation model (conditional)
        if configs['run'].evaluation['include_unweighted_training']:
            configs.update({'eval_unwt_train': deepcopy(configs['evaluation'])})
            configs['eval_unwt_train'].io['name'] = 'evaluation_unwt_training'
            configs['eval_unwt_train'].io['data_files_glob'] = sample_training_glob
            configs['eval_unwt_train'].optimization['batch_size'] = training_batch_size
            configs['eval_unwt_train'].queueing['num_evaluation_invocations'] = training_invocations
            configs['eval_unwt_train'].curriculum['mode'] = None
            configs['eval_unwt_train'].curriculum['behavior'] = None
            models.update({'eval_unwt_train': RGNModel('evaluation', configs['eval_unwt_train'])})
        
        # create unweighted validation evaluation model (conditional)
        if configs['run'].evaluation['include_unweighted_validation']:
            configs.update({'eval_unwt_val': deepcopy(configs['evaluation'])})
            configs['eval_unwt_val'].io['name'] = 'evaluation_unwt_validation'
            configs['eval_unwt_val'].io['data_files_glob'] = validation_glob
            configs['eval_unwt_val'].optimization['batch_size'] = validation_batch_size
            configs['eval_unwt_val'].queueing['num_evaluation_invocations'] = validation_invocations
            configs['eval_unwt_val'].curriculum['mode'] = None
            configs['eval_unwt_val'].curriculum['behavior'] = None
            if configs['run'].optimization['validation_reference'] == 'unweighted': 
                configs['eval_unwt_val'].curriculum['update_loss_history'] = True
            models.update({'eval_unwt_val': RGNModel('evaluation', configs['eval_unwt_val'])})

        # create unweighted testing evaluation model (conditional)
        if configs['run'].evaluation['include_unweighted_testing']:
            configs.update({'eval_unwt_test': deepcopy(configs['evaluation'])})
            configs['eval_unwt_test'].io['name'] = 'evaluation_unwt_testing'
            configs['eval_unwt_test'].io['data_files_glob'] = testing_glob
            configs['eval_unwt_test'].optimization['batch_size'] = testing_batch_size
            configs['eval_unwt_test'].queueing['num_evaluation_invocations'] = testing_invocations
            configs['eval_unwt_test'].curriculum['mode'] = None
            configs['eval_unwt_test'].curriculum['behavior'] = None
            models.update({'eval_unwt_test': RGNModel('evaluation', configs['eval_unwt_test'])})

    # start head model and related prep
    stdout_err_file_handle.flush()
    session = models['training'].start(models.values())
    global_step = models['training'].current_step(session)
    current_log_step = (global_step // configs['run'].io['prediction_frequency']) + 1
    log_dir = os.path.join(run_dir, str(current_log_step))
    restart = False

    # predict or train depending on set mode behavior
    if args.prediction_only:
        try:
            while not models['training'].is_done():
                predict_and_log(log_dir, configs, models, session)
        except tf.errors.OutOfRangeError:
            pass
        except:
            print('Unexpected error: ', sys.exc_info()[0])
            raise
        finally:
            if models['training']._is_started: models['training'].finish(session, save=False)
            stdout_err_file_handle.close()
    else:
        # clean up post last checkpoint residue if any
        if global_step != 0:
            # remove future directories
            last_log_step = sorted([int(os.path.basename(os.path.normpath(dir))) for dir in glob(os.path.join(run_dir, '*[0-9]'))])[-1]
            for step in range(current_log_step + 1, last_log_step + 1): rmtree(os.path.join(run_dir, str(step))) 

            # remove future log entries in current log files
            log_file = os.path.join(log_dir, 'error.log')
            if os.path.exists(log_file):
                with open(log_file, 'rw+') as f:
                    while True:
                        new_line = f.readline().split()
                        if len(new_line) > 1:
                            step = int(new_line[1])
                            if step == global_step:
                                f.truncate()
                                break
                        else: # reached end without seeing global_step, means checkpoint is ahead of last recorded log entry
                            break

        # training loop
        try:
            while not models['training'].is_done():
                # Train for one step
                global_step, ids = models['training'].train(session)

                # Set and create logging directory and files if needed
                log_dir = os.path.join(run_dir, str((global_step // configs['run'].io['prediction_frequency']) + 1))
                log_file = os.path.join(log_dir, 'error.log')
                if not os.path.exists(log_dir): os.makedirs(log_dir)

                # Evaluate error, get diagnostics, and raise exceptions if necessary
                if global_step % configs['run'].io['evaluation_frequency'] == 0:
                    diagnostics = evaluate_and_log(log_file, configs, models, session)

                    # restart if a milestone is missed
                    val_ref_set_prefix = 'un' if configs['run'].optimization['validation_reference'] == 'unweighted' else ''
                    min_loss_achieved = diagnostics[val_ref_set_prefix + 'wt_val_loss']['min_tertiary_loss_achieved_all']
                    for step, loss in configs['run'].optimization['validation_milestone'].iteritems():
                        if global_step >= step and min_loss_achieved > loss:
                            raise MilestoneError('Milestone at step ' + str(global_step) + \
                                                 ' missed because minimum loss achieved so far is ' + str(min_loss_achieved))

                    # restart if gradients are zero
                    if (diagnostics['min_grad'] == 0 and diagnostics['max_grad'] == 0) or \
                       (configs['run'].evaluation['include_diagnostics'] and (np.isnan(diagnostics['min_grad']) or np.isnan(diagnostics['max_grad']))):
                        raise DeadGradientError('Gradient is dead.')

                # Predict structures. Currently assumes that weighted training and validation models are available, and fails if they're not.
                if global_step % configs['run'].io['prediction_frequency'] == 0:
                    predict_and_log(log_dir, configs, models, session)

                # Checkpoint
                if global_step % configs['run'].io['checkpoint_frequency'] == 0:
                    models['training'].save(session)

        except tf.errors.OutOfRangeError:
            print('Epoch limit reached.')

        except (tf.errors.InvalidArgumentError, DeadGradientError): # InvalidArgumentError is usually triggered by a nan
            models['training'].finish(session, save=False)

            if args.restart_on_dead_gradient:
                print('Nan or dead gradient encountered; model will be resumed from last checkpoint if one exists, or restarted from scratch otherwise.')        
                if not os.path.isdir(checkpoints_dir):
                    for sub_dir in next(os.walk(run_dir))[1]: rmtree(os.path.join(run_dir, sub_dir)) # erase all old directories    
                restart = True
            else:
                print('Nan or dead gradient encountered; model will be terminated.')        

        except MilestoneError:
            models['training'].finish(session, save=False)

            if args.restart_on_missed_milestone:
                print('Milestone missed; model will be restarted from scratch with an incremented seed.')
                
                for sub_dir in next(os.walk(run_dir))[1]: rmtree(os.path.join(run_dir, sub_dir)) # erase all old directories

                # modify configuration file with new seed
                old_seed = configs['training'].initialization['graph_seed']
                new_seed = old_seed + args.seed_increment
                for line in fileinput.input(args.config_file, inplace=True):
                    print line.replace('randSeed ' + str(old_seed), 'randSeed ' + str(new_seed)),
                
                restart = True
            else:
                print('Milestone missed; model will be terminated.')
            
        except:
            print('Unexpected error: ', sys.exc_info()[0])
            raise

        finally:
            # Wrap up (ask threads to stop, save final checkpoint, etc.)
            if models['training']._is_started: models['training'].finish(session, save=args.checkpoint_on_finish)
            stdout_err_file_handle.close()
    
    return restart
Exemple #32
0
def set_proc_name(name):
    import setproctitle
    setproctitle.setproctitle(name)
Exemple #33
0
 def setproctitle(self, title=""):
     setproctitle('odoo: %s %s %s' %
                  (self.__class__.__name__, self.pid, title))
Exemple #34
0
import numpy as np
from PIL import Image
import caffe
import setproctitle
import os, sys
import surgery, score
import tools
from copy import copy
import time
import setup

setproctitle.setproctitle(os.path.basename(os.getcwd()))

caffe_root = '/home/cv/hdl/caffe'
models = '{}/models'.format(caffe_root)
voc_dir = '{}/data/pascal/VOC/VOC2010'.format(caffe_root)
snapshot = 'snapshot'
part1 = 'head'
part2 = 'torso'
joint_parts = 'head+torso'
parts = [part1, part2, joint_parts]
weights = 'vgg16fc.caffemodel'
classes = np.asarray([
    'background', 'head', 'torso', 'head+torso', 'left arm', 'right arm',
    'arms', 'left leg', 'right leg', 'legs', 'person'
])

device = sys.argv[1]
if len(sys.argv) > 2:
    is_resume = sys.argv[2] == '-resume' and int(sys.argv[3]) % 4000 == 0
    if is_resume:
Exemple #35
0
def appendproctitle(name):
    '''
    Append "name" to the current process title
    '''
    if HAS_SETPROCTITLE:
        setproctitle.setproctitle(setproctitle.getproctitle() + ' ' + name)
Exemple #36
0
def main():

    args = parse()
    args_pt = copy.deepcopy(args)
    args_teacher = copy.deepcopy(args)

    # Load a conf file
    if args.resume:
        conf = load_config(os.path.join(os.path.dirname(args.resume), 'conf.yml'))
        for k, v in conf.items():
            if k != 'resume':
                setattr(args, k, v)
    recog_params = vars(args)

    # Automatically reduce batch size in multi-GPU setting
    if args.n_gpus > 1:
        args.batch_size -= 10
        args.print_step //= args.n_gpus

    # Compute subsampling factor
    subsample_factor = 1
    subsample_factor_sub1 = 1
    subsample_factor_sub2 = 1
    subsample = [int(s) for s in args.subsample.split('_')]
    if args.conv_poolings and 'conv' in args.enc_type:
        for p in args.conv_poolings.split('_'):
            subsample_factor *= int(p.split(',')[0].replace('(', ''))
    else:
        subsample_factor = np.prod(subsample)
    if args.train_set_sub1:
        if args.conv_poolings and 'conv' in args.enc_type:
            subsample_factor_sub1 = subsample_factor * np.prod(subsample[:args.enc_n_layers_sub1 - 1])
        else:
            subsample_factor_sub1 = subsample_factor
    if args.train_set_sub2:
        if args.conv_poolings and 'conv' in args.enc_type:
            subsample_factor_sub2 = subsample_factor * np.prod(subsample[:args.enc_n_layers_sub2 - 1])
        else:
            subsample_factor_sub2 = subsample_factor

    skip_thought = 'skip' in args.enc_type

    # Load dataset
    train_set = Dataset(corpus=args.corpus,
                        tsv_path=args.train_set,
                        tsv_path_sub1=args.train_set_sub1,
                        tsv_path_sub2=args.train_set_sub2,
                        dict_path=args.dict,
                        dict_path_sub1=args.dict_sub1,
                        dict_path_sub2=args.dict_sub2,
                        nlsyms=args.nlsyms,
                        unit=args.unit,
                        unit_sub1=args.unit_sub1,
                        unit_sub2=args.unit_sub2,
                        wp_model=args.wp_model,
                        wp_model_sub1=args.wp_model_sub1,
                        wp_model_sub2=args.wp_model_sub2,
                        batch_size=args.batch_size * args.n_gpus,
                        n_epochs=args.n_epochs,
                        min_n_frames=args.min_n_frames,
                        max_n_frames=args.max_n_frames,
                        sort_by_input_length=True,
                        short2long=True,
                        sort_stop_epoch=args.sort_stop_epoch,
                        dynamic_batching=args.dynamic_batching,
                        ctc=args.ctc_weight > 0,
                        ctc_sub1=args.ctc_weight_sub1 > 0,
                        ctc_sub2=args.ctc_weight_sub2 > 0,
                        subsample_factor=subsample_factor,
                        subsample_factor_sub1=subsample_factor_sub1,
                        subsample_factor_sub2=subsample_factor_sub2,
                        discourse_aware=args.discourse_aware,
                        skip_thought=skip_thought)
    dev_set = Dataset(corpus=args.corpus,
                      tsv_path=args.dev_set,
                      tsv_path_sub1=args.dev_set_sub1,
                      tsv_path_sub2=args.dev_set_sub2,
                      dict_path=args.dict,
                      dict_path_sub1=args.dict_sub1,
                      dict_path_sub2=args.dict_sub2,
                      nlsyms=args.nlsyms,
                      unit=args.unit,
                      unit_sub1=args.unit_sub1,
                      unit_sub2=args.unit_sub2,
                      wp_model=args.wp_model,
                      wp_model_sub1=args.wp_model_sub1,
                      wp_model_sub2=args.wp_model_sub2,
                      batch_size=args.batch_size * args.n_gpus,
                      min_n_frames=args.min_n_frames,
                      max_n_frames=args.max_n_frames,
                      shuffle=True if args.discourse_aware else False,
                      ctc=args.ctc_weight > 0,
                      ctc_sub1=args.ctc_weight_sub1 > 0,
                      ctc_sub2=args.ctc_weight_sub2 > 0,
                      subsample_factor=subsample_factor,
                      subsample_factor_sub1=subsample_factor_sub1,
                      subsample_factor_sub2=subsample_factor_sub2,
                      discourse_aware=args.discourse_aware,
                      skip_thought=skip_thought)
    eval_sets = []
    for s in args.eval_sets:
        eval_sets += [Dataset(corpus=args.corpus,
                              tsv_path=s,
                              dict_path=args.dict,
                              nlsyms=args.nlsyms,
                              unit=args.unit,
                              wp_model=args.wp_model,
                              batch_size=1,
                              discourse_aware=args.discourse_aware,
                              skip_thought=skip_thought,
                              is_test=True)]

    args.vocab = train_set.vocab
    args.vocab_sub1 = train_set.vocab_sub1
    args.vocab_sub2 = train_set.vocab_sub2
    args.input_dim = train_set.input_dim

    # Load a LM conf file for LM fusion & LM initialization
    if not args.resume and (args.lm_fusion or args.lm_init):
        if args.lm_fusion:
            lm_conf = load_config(os.path.join(os.path.dirname(args.lm_fusion), 'conf.yml'))
        elif args.lm_init:
            lm_conf = load_config(os.path.join(os.path.dirname(args.lm_init), 'conf.yml'))
        args.lm_conf = argparse.Namespace()
        for k, v in lm_conf.items():
            setattr(args.lm_conf, k, v)
        assert args.unit == args.lm_conf.unit
        assert args.vocab == args.lm_conf.vocab

    # Set save path
    if args.resume:
        save_path = os.path.dirname(args.resume)
        dir_name = os.path.basename(save_path)
    else:
        dir_name = set_asr_model_name(args, subsample_factor)
        save_path = mkdir_join(args.model_save_dir, '_'.join(
            os.path.basename(args.train_set).split('.')[:-1]), dir_name)
        save_path = set_save_path(save_path)  # avoid overwriting

    # Set logger
    logger = set_logger(os.path.join(save_path, 'train.log'), key='training')

    # Model setting
    model = SkipThought(args, save_path) if skip_thought else Speech2Text(args, save_path)

    if args.resume:
        # Set optimizer
        epoch = int(args.resume.split('-')[-1])
        optimizer = set_optimizer(model,
                                  optimizer='sgd' if epoch > conf['convert_to_sgd_epoch'] else conf['optimizer'],
                                  lr=float(conf['learning_rate']),  # on-the-fly
                                  weight_decay=float(conf['weight_decay']))

        # Restore the last saved model
        model, checkpoint = load_checkpoint(model, args.resume, resume=True)
        optimizer = checkpoint['optimizer']
        epoch = checkpoint['epoch']
        step = checkpoint['step']
        metric_dev_best = checkpoint['metric_dev_best']

        # Resume between convert_to_sgd_epoch -1 and convert_to_sgd_epoch
        if epoch == conf['convert_to_sgd_epoch']:
            optimizer = set_optimizer(model,
                                      optimizer='sgd',
                                      lr=float(args.learning_rate),
                                      weight_decay=float(conf['weight_decay']))
            optimizer = LRScheduler(optimizer,
                                    lr_max=args.learning_rate,
                                    decay_type='epoch',
                                    decay_start_epoch=0,
                                    decay_rate=0.5,
                                    lower_better=True)
            logger.info('========== Convert to SGD ==========')
    else:
        # Save the conf file as a yaml file
        save_config(vars(args), os.path.join(save_path, 'conf.yml'))
        if args.lm_fusion:
            save_config(args.lm_conf, os.path.join(save_path, 'conf_lm.yml'))

        # Save the nlsyms, dictionar, and wp_model
        if args.nlsyms:
            shutil.copy(args.nlsyms, os.path.join(save_path, 'nlsyms.txt'))
        for sub in ['', '_sub1', '_sub2']:
            if getattr(args, 'dict' + sub):
                shutil.copy(getattr(args, 'dict' + sub), os.path.join(save_path, 'dict' + sub + '.txt'))
            if getattr(args, 'unit' + sub) == 'wp':
                shutil.copy(getattr(args, 'wp_model' + sub), os.path.join(save_path, 'wp' + sub + '.model'))

        for k, v in sorted(vars(args).items(), key=lambda x: x[0]):
            logger.info('%s: %s' % (k, str(v)))

        # Count total parameters
        for n in sorted(list(model.num_params_dict.keys())):
            nparams = model.num_params_dict[n]
            logger.info("%s %d" % (n, nparams))
        logger.info("Total %.2f M parameters" % (model.total_parameters / 1000000))
        logger.info(model)

        # Initialize with pre-trained model's parameters
        if args.pretrained_model and os.path.isfile(args.pretrained_model):
            # Load the ASR model
            conf_pt = load_config(os.path.join(os.path.dirname(args.pretrained_model), 'conf.yml'))
            for k, v in conf_pt.items():
                setattr(args_pt, k, v)
            model_pt = Speech2Text(args_pt)
            model_pt, _ = load_checkpoint(model_pt, args.pretrained_model)

            # Overwrite parameters
            only_enc = (args.enc_n_layers != args_pt.enc_n_layers) or (
                args.unit != args_pt.unit) or args_pt.ctc_weight == 1
            param_dict = dict(model_pt.named_parameters())
            for n, p in model.named_parameters():
                if n in param_dict.keys() and p.size() == param_dict[n].size():
                    if only_enc and 'enc' not in n:
                        continue
                    if args.lm_fusion_type == 'cache' and 'output' in n:
                        continue
                    p.data = param_dict[n].data
                    logger.info('Overwrite %s' % n)

        epoch, step = 0, 0
        metric_dev_best = 10000

        # Set optimizer
        optimizer = set_optimizer(model,
                                  optimizer=args.optimizer,
                                  lr=float(args.learning_rate),
                                  weight_decay=float(args.weight_decay))

        # Wrap optimizer by learning rate scheduler
        noam = 'transformer' in args.enc_type or args.dec_type == 'transformer'
        optimizer = LRScheduler(optimizer,
                                lr_max=float(args.learning_rate),
                                decay_type=args.decay_type,
                                decay_start_epoch=args.decay_start_epoch,
                                decay_rate=args.decay_rate,
                                decay_patient_n_epochs=args.decay_patient_n_epochs,
                                lower_better=True,
                                best_value=metric_dev_best,
                                model_size=args.d_model,
                                warmup_start_lr=args.warmup_start_learning_rate,
                                warmup_n_steps=args.warmup_n_steps,
                                lr_factor=args.learning_rate_factor,
                                noam=noam)

    # Load the teacher ASR model
    teacher = None
    teacher_lm = None
    if args.teacher and os.path.isfile(args.teacher):
        conf_teacher = load_config(os.path.join(os.path.dirname(args.teacher), 'conf.yml'))
        for k, v in conf_teacher.items():
            setattr(args_teacher, k, v)
        # Setting for knowledge distillation
        args_teacher.ss_prob = 0
        args.lsm_prob = 0
        teacher = Speech2Text(args_teacher)
        teacher, _ = load_checkpoint(teacher, args.teacher)

        # Load the teacher LM
        if args.teacher_lm and os.path.isfile(args.teacher_lm):
            conf_lm = load_config(os.path.join(os.path.dirname(args.teacher_lm), 'conf.yml'))
            args_lm = argparse.Namespace()
            for k, v in conf_lm.items():
                setattr(args_lm, k, v)
            teacher_lm = select_lm(args_lm)
            teacher_lm, _ = load_checkpoint(teacher_lm, args.teacher_lm)

    # GPU setting
    if args.n_gpus >= 1:
        model = CustomDataParallel(model,
                                   device_ids=list(range(0, args.n_gpus, 1)),
                                   deterministic=False,
                                   benchmark=True)
        model.cuda()
        if teacher is not None:
            teacher.cuda()
        if teacher_lm is not None:
            teacher_lm.cuda()

    logger.info('PID: %s' % os.getpid())
    logger.info('USERNAME: %s' % os.uname()[1])

    # Set process name
    if args.job_name:
        setproctitle(args.job_name)
    else:
        setproctitle(dir_name)

    # Set reporter
    reporter = Reporter(save_path, tensorboard=True)

    if args.mtl_per_batch:
        # NOTE: from easier to harder tasks
        tasks = []
        if 1 - args.bwd_weight - args.ctc_weight - args.sub1_weight - args.sub2_weight > 0:
            tasks += ['ys']
        if args.bwd_weight > 0:
            tasks = ['ys.bwd'] + tasks
        if args.ctc_weight > 0:
            tasks = ['ys.ctc'] + tasks
        if args.lmobj_weight > 0:
            tasks = ['ys.lmobj'] + tasks
        for sub in ['sub1', 'sub2']:
            if getattr(args, 'train_set_' + sub):
                if getattr(args, sub + '_weight') - getattr(args, 'ctc_weight_' + sub) > 0:
                    tasks = ['ys_' + sub] + tasks
                if getattr(args, 'ctc_weight_' + sub) > 0:
                    tasks = ['ys_' + sub + '.ctc'] + tasks
    else:
        tasks = ['all']

    start_time_train = time.time()
    start_time_epoch = time.time()
    start_time_step = time.time()
    not_improved_n_epochs = 0
    pbar_epoch = tqdm(total=len(train_set))
    accum_n_tokens = 0
    while True:
        # Compute loss in the training set
        batch_train, is_new_epoch = train_set.next()
        accum_n_tokens += sum([len(y) for y in batch_train['ys']])

        # Change mini-batch depending on task
        for task in tasks:
            if skip_thought:
                loss, reporter = model(batch_train['ys'],
                                       ys_prev=batch_train['ys_prev'],
                                       ys_next=batch_train['ys_next'],
                                       reporter=reporter)
            else:
                loss, reporter = model(batch_train, reporter=reporter, task=task,
                                       teacher=teacher, teacher_lm=teacher_lm)
            # loss /= args.accum_grad_n_steps
            if len(model.device_ids) > 1:
                loss.backward(torch.ones(len(model.device_ids)))
            else:
                loss.backward()
            loss.detach()  # Trancate the graph
            if args.accum_grad_n_tokens == 0 or accum_n_tokens >= args.accum_grad_n_tokens:
                if args.clip_grad_norm > 0:
                    torch.nn.utils.clip_grad_norm_(model.module.parameters(), args.clip_grad_norm)
                optimizer.step()
                optimizer.zero_grad()
                accum_n_tokens = 0
            loss_train = loss.item()
            del loss
        reporter.step()
        step += args.n_gpus

        if step % args.print_step == 0:
            # Compute loss in the dev set
            batch_dev = dev_set.next()[0]
            # Change mini-batch depending on task
            for task in tasks:
                if skip_thought:
                    loss, reporter = model(batch_dev['ys'],
                                           ys_prev=batch_dev['ys_prev'],
                                           ys_next=batch_dev['ys_next'],
                                           reporter=reporter,
                                           is_eval=True)
                else:
                    loss, reporter = model(batch_dev, reporter=reporter, task=task,
                                           is_eval=True)
                loss_dev = loss.item()
                del loss
            reporter.step(is_eval=True)

            duration_step = time.time() - start_time_step
            if args.input_type == 'speech':
                xlen = max(len(x) for x in batch_train['xs'])
                ylen = max(len(y) for y in batch_train['ys'])
            elif args.input_type == 'text':
                xlen = max(len(x) for x in batch_train['ys'])
                ylen = max(len(y) for y in batch_train['ys_sub1'])
            logger.info("step:%d(ep:%.2f) loss:%.3f(%.3f)/lr:%.5f/bs:%d/xlen:%d/ylen:%d (%.2f min)" %
                        (step, epoch + train_set.epoch_detail,
                         loss_train, loss_dev,
                         optimizer.lr, len(batch_train['utt_ids']),
                         xlen, ylen, duration_step / 60))
            start_time_step = time.time()
        pbar_epoch.update(len(batch_train['utt_ids']))

        # Save fugures of loss and accuracy
        if step % (args.print_step * 10) == 0:
            reporter.snapshot()
            model.module.plot_attention()

        # Save checkpoint and evaluate model per epoch
        if is_new_epoch:
            epoch += 1
            duration_epoch = time.time() - start_time_epoch
            logger.info('========== EPOCH:%d (%.2f min) ==========' % (epoch, duration_epoch / 60))

            if epoch < args.eval_start_epoch:
                # Save the model
                save_checkpoint(model, save_path, optimizer,
                                epoch, step, metric_dev_best,
                                remove_old_checkpoints=not noam)
                reporter._epoch += 1
                # TODO(hirofumi): fix later
            else:
                start_time_eval = time.time()
                # dev
                if args.metric == 'edit_distance':
                    if args.unit in ['word', 'word_char']:
                        metric_dev = eval_word([model.module], dev_set, recog_params,
                                               epoch=epoch)[0]
                        logger.info('WER (%s): %.2f %%' % (dev_set.set, metric_dev))
                    elif args.unit == 'wp':
                        metric_dev, cer_dev = eval_wordpiece([model.module], dev_set, recog_params,
                                                             epoch=epoch)
                        logger.info('WER (%s): %.2f %%' % (dev_set.set, metric_dev))
                        logger.info('CER (%s): %.2f %%' % (dev_set.set, cer_dev))
                    elif 'char' in args.unit:
                        metric_dev, cer_dev = eval_char([model.module], dev_set, recog_params,
                                                        epoch=epoch)
                        logger.info('WER (%s): %.2f %%' % (dev_set.set, metric_dev))
                        logger.info('CER (%s): %.2f %%' % (dev_set.set, cer_dev))
                    elif 'phone' in args.unit:
                        metric_dev = eval_phone([model.module], dev_set, recog_params,
                                                epoch=epoch)
                        logger.info('PER (%s): %.2f %%' % (dev_set.set, metric_dev))
                elif args.metric == 'ppl':
                    metric_dev = eval_ppl([model.module], dev_set, batch_size=args.batch_size)[0]
                    logger.info('PPL (%s): %.2f' % (dev_set.set, metric_dev))
                elif args.metric == 'loss':
                    metric_dev = eval_ppl([model.module], dev_set, batch_size=args.batch_size)[1]
                    logger.info('Loss (%s): %.2f' % (dev_set.set, metric_dev))
                else:
                    raise NotImplementedError(args.metric)
                reporter.epoch(metric_dev)

                # Update learning rate
                optimizer.decay(epoch=epoch, value=metric_dev)

                if metric_dev < metric_dev_best:
                    metric_dev_best = metric_dev
                    not_improved_n_epochs = 0
                    logger.info('||||| Best Score |||||')

                    # Save the model
                    save_checkpoint(model, save_path, optimizer,
                                    epoch, step, metric_dev_best,
                                    remove_old_checkpoints=not noam)

                    # test
                    for s in eval_sets:
                        if args.metric == 'edit_distance':
                            if args.unit in ['word', 'word_char']:
                                wer_test = eval_word([model.module], s, recog_params,
                                                     epoch=epoch)[0]
                                logger.info('WER (%s): %.2f %%' % (s.set, wer_test))
                            elif args.unit == 'wp':
                                wer_test, cer_test = eval_wordpiece([model.module], s, recog_params,
                                                                    epoch=epoch)
                                logger.info('WER (%s): %.2f %%' % (s.set, wer_test))
                                logger.info('CER (%s): %.2f %%' % (s.set, cer_test))
                            elif 'char' in args.unit:
                                wer_test, cer_test = eval_char([model.module], s, recog_params,
                                                               epoch=epoch)
                                logger.info('WER (%s): %.2f %%' % (s.set, wer_test))
                                logger.info('CER (%s): %.2f %%' % (s.set, cer_test))
                            elif 'phone' in args.unit:
                                per_test = eval_phone([model.module], s, recog_params,
                                                      epoch=epoch)
                                logger.info('PER (%s): %.2f %%' % (s.set, per_test))
                        elif args.metric == 'ppl':
                            ppl_test = eval_ppl([model.module], s, batch_size=args.batch_size)[0]
                            logger.info('PPL (%s): %.2f' % (s.set, ppl_test))
                        elif args.metric == 'loss':
                            loss_test = eval_ppl([model.module], s, batch_size=args.batch_size)[1]
                            logger.info('Loss (%s): %.2f' % (s.set, loss_test))
                        else:
                            raise NotImplementedError(args.metric)
                else:
                    not_improved_n_epochs += 1

                    # start scheduled sampling
                    if args.ss_prob > 0:
                        model.module.scheduled_sampling_trigger()

                duration_eval = time.time() - start_time_eval
                logger.info('Evaluation time: %.2f min' % (duration_eval / 60))

                # Early stopping
                if not_improved_n_epochs == args.not_improved_patient_n_epochs:
                    break

                # Convert to fine-tuning stage
                if epoch == args.convert_to_sgd_epoch:
                    optimizer = set_optimizer(model,
                                              optimizer='sgd',
                                              lr=args.learning_rate,
                                              weight_decay=float(args.weight_decay))
                    optimizer = LRScheduler(optimizer,
                                            lr_max=args.learning_rate,
                                            decay_type='epoch',
                                            decay_start_epoch=0,
                                            decay_rate=0.5,
                                            lower_better=True)
                    logger.info('========== Convert to SGD ==========')

            pbar_epoch = tqdm(total=len(train_set))

            if epoch == args.n_epochs:
                break

            start_time_step = time.time()
            start_time_epoch = time.time()

    duration_train = time.time() - start_time_train
    logger.info('Total time: %.2f hour' % (duration_train / 3600))

    if reporter.tensorboard:
        reporter.tf_writer.close()
    pbar_epoch.close()

    return save_path
#!/usr/bin/env python2.7
#-*- coding: utf-8 -*-
__author__ = "Shrinidhi Rao"
__license__ = "GPL"
__email__ = "*****@*****.**"

import os
import sys

sys.path.append(os.sep.join(os.path.abspath(__file__).split(os.sep)[:-3]))
import lib.common.system_utils
import setproctitle

import simplejson
import  cherrypy
setproctitle.setproctitle("web_api_server")

cherrypy._cpserver.Server.thread_pool = 30
class host_details(object):
  @cherrypy.expose
  def index(self):
    details = lib.common.system_utils.get_local_host_details()
    return(simplejson.dumps(details))



if (__name__ == '__main__'):
  cherrypy.tree.mount(host_details(),'/')
  cherrypy.engine.start()
  cherrypy.engine.block()
Exemple #38
0
# Copyright Niantic 2019. Patent Pending. All rights reserved.
#
# This software is licensed under the terms of the Monodepth2 licence
# which allows for non-commercial use only, the full terms of which are made
# available in the LICENSE file.

from __future__ import absolute_import, division, print_function

from trainer import Trainer
from options import MonodepthOptions
import setproctitle

options = MonodepthOptions()
opts = options.parse()
setproctitle.setproctitle(opts.model_name)

if __name__ == "__main__":
    trainer = Trainer(opts)
    trainer.train()
def post_worker_init(dummy_worker):
    setproctitle.setproctitle(
        settings.GUNICORN_WORKER_READY_PREFIX + setproctitle.getproctitle()
    )
Exemple #40
0
    def __init__(self,
                 verbose=False,
                 log_dir=None,
                 console_log=False,
                 run_dir=None,
                 config_file=None,
                 persistence_file=None,
                 test_dir=None):

        setproctitle.setproctitle('openrazer-daemon')  # pylint: disable=no-member

        # Expanding ~ as python doesn't do it by default, also creating dirs if needed
        try:
            if log_dir is not None:
                log_dir = os.path.expanduser(log_dir)
                os.makedirs(log_dir, exist_ok=True)
            if run_dir is not None:
                run_dir = os.path.expanduser(run_dir)
                os.makedirs(run_dir, exist_ok=True)
        except NotADirectoryError as e:
            print("Failed to create {}".format(e.filename), file=sys.stderr)
            sys.exit(1)

        if config_file is not None:
            config_file = os.path.expanduser(config_file)
            if not os.path.exists(config_file):
                print("Config file {} does not exist.".format(config_file),
                      file=sys.stderr)
                sys.exit(1)

        if persistence_file is not None:
            persistence_file = os.path.expanduser(persistence_file)
            if not os.path.exists(persistence_file):
                print("Persistence file {} does not exist.".format(
                    persistence_file),
                      file=sys.stderr)
                sys.exit(1)

        self._test_dir = test_dir
        self._run_dir = run_dir

        self._config_file = config_file
        self._config = configparser.ConfigParser()
        self.read_config(config_file)

        self._persistence_file = persistence_file
        self._persistence = configparser.ConfigParser()
        self._persistence.status = {"changed": False}
        self.read_persistence(persistence_file)

        # Logging
        log_level = logging.INFO
        if verbose or self._config.getboolean('General', 'verbose_logging'):
            log_level = logging.DEBUG
        self.logger = self._create_logger(log_dir, log_level, console_log)

        # Check for plugdev group
        if not self._check_plugdev_group():
            self.logger.critical("User is not a member of the plugdev group")
            self.logger.critical(
                "Please run the command 'sudo gpasswd -a $USER plugdev' and then reboot!"
            )
            sys.exit(1)

        # Setup DBus to use gobject main loop
        dbus.mainloop.glib.threads_init()
        dbus.mainloop.glib.DBusGMainLoop(set_as_default=True)
        super().__init__('/org/razer')

        self._init_signals()
        self._main_loop = GLib.MainLoop()

        # Listen for input events from udev
        self._init_udev_monitor()

        # Load Classes
        self._device_classes = openrazer_daemon.hardware.get_device_classes()

        self.logger.info("Initialising Daemon (v%s). Pid: %d", __version__,
                         os.getpid())
        self._init_screensaver_monitor()

        self._razer_devices = DeviceCollection()
        self._load_devices(first_run=True)

        # Add DBus methods
        methods = {
            # interface, method, callback, in-args, out-args
            ('razer.devices', 'getDevices', self.get_serial_list, None, 'as'),
            ('razer.devices', 'supportedDevices', self.supported_devices, None,
             's'),
            ('razer.devices', 'enableTurnOffOnScreensaver',
             self.enable_turn_off_on_screensaver, 'b', None),
            ('razer.devices', 'getOffOnScreensaver',
             self.get_off_on_screensaver, None, 'b'),
            ('razer.devices', 'syncEffects', self.sync_effects, 'b', None),
            ('razer.devices', 'getSyncEffects', self.get_sync_effects, None,
             'b'),
            ('razer.daemon', 'version', self.version, None, 's'),
            ('razer.daemon', 'stop', self.stop, None, None),
        }

        for m in methods:
            self.logger.debug("Adding {}.{} method to DBus".format(m[0], m[1]))
            self.add_dbus_method(m[0],
                                 m[1],
                                 m[2],
                                 in_signature=m[3],
                                 out_signature=m[4])

        self._collecting_udev = False
        self._collecting_udev_devices = []

        self._init_autosave_persistence()

        # TODO remove
        self.sync_effects(
            self._config.getboolean('Startup', 'sync_effects_enabled'))
Exemple #41
0
 def _set_process_title(self):
     setproctitle('lymph-instance (identity: %s, endpoint: %s, config: %s)' % (
         self.container.identity,
         self.container.endpoint,
         self.config.source,
     ))
Exemple #42
0
def setproctitle(title: str) -> None:
    if _setproctitle_enabled:
        setproctitle_module.setproctitle(title)
    else:
        logger.warn(f"setproctitle not enabled for process {title}")
Exemple #43
0
 def run(self):
     setproctitle.setproctitle('stream_server')
     self.serve_forever()
Exemple #44
0
        return

    def remove_custom_place(self, removeButton):
        treeselection = self.custom_places_tree.get_selection()
        currentiter = treeselection.get_selected()[1]
        if currentiter:
            self.custom_places_model.remove(currentiter)
        return

    def save_custom_places(self, treemodel, path, iter=None, new_order=None):
        if not iter or self.custom_places_model.get_value(iter, 1):
            treeiter = self.custom_places_model.get_iter_first()
            custom_places_names = []
            custom_places_paths = []
            while treeiter:
                custom_places_names = custom_places_names + [
                    self.custom_places_model.get_value(treeiter, 0)
                ]
                custom_places_paths = custom_places_paths + [
                    self.custom_places_model.get_value(treeiter, 1)
                ]
                treeiter = self.custom_places_model.iter_next(treeiter)
            self.places_settings.set_strv("custom-paths", custom_places_paths)
            self.places_settings.set_strv("custom-names", custom_places_names)


if __name__ == "__main__":
    setproctitle.setproctitle('mintmenu-preferences')
    preferences = mintMenuPreferences()
    Gtk.main()
#
#

import ctypes
import os
import sys
import matplotlib.pyplot as plt
import numpy as np
import setproctitle as setPT
import lora as lora_gr  # from GNURadio library
from lora_id import *  # from InterDigital customized library
from utils import *


if __name__ == '__main__':
    setPT.setproctitle('lora-tester-channelizer-py')
    print "Process name:  " + str(get_proc_name())
    print "***********************************"

    # ----------------------------------------------
    # GDB ATTACH (DEBUGGING or performance monitoring)
    # ----------------------------------------------
    GDB_ATTACH = 0
    if (GDB_ATTACH):
        print ('Blocked waiting for GDB attach (pid = %d) ' % (os.getpid(),) + '. Press ENTER after GDB is attached.')
        sys.stdout.flush()
        raw_input()

    # Read from dataset (returns numpy)
    dataset = read_complex_array("../data/lora-99-100.sigmf-data")  # before channelizer
    fileName_out = '../data/py_lora_output_resampler'
Exemple #46
0
import logging
import platform
import setproctitle
import flaskr
import sys
import nw_logging

#Todo. app.py에서는 로깅 시스템과 운영체제 기본적인 설정을 여기서 함


def validate_python() -> None:
    """Validate that the right Python version is running."""
    if sys.version_info[:3] < REQUIRED_PYTHON_VER:
        print("ninewatt Device requires at least Python {}.{}.{}".format(
            *REQUIRED_PYTHON_VER))
        sys.exit(1)


def main():
    flask_app = flaskr.create_app()
    flask_app.debug = True
    flask_app.run(host="localhost", port="5000")


if __name__ == "__main__":

    if platform.system() == "Linux":
        setproctitle.setproctitle('ninewatt_app')

    sys.exit(main())
Exemple #47
0
from trainer import *
import setproctitle

if __name__ == "__main__":
    args = pblm.argparser(prefix='mnist',
                          gan_type='ACGAN',
                          opt='adam',
                          batch_size_test=10,
                          proj=50,
                          norm_train='l2_normal',
                          norm_test='l2',
                          epsilon=1.58,
                          seed=0)

    kwargs = pblm.args2kwargs(args)
    setproctitle.setproctitle('python')
    print("saving file to {}".format(args.proctitle))

    saved_filepath = ('./saved_log/' + args.proctitle)
    model_filepath = os.path.dirname('./models/' + args.proctitle)
    if not os.path.exists(saved_filepath):
        os.makedirs(saved_filepath)
    if not os.path.exists(model_filepath):
        os.makedirs(model_filepath)
    model_path = ('./models/' + args.proctitle)

    train_res = open(saved_filepath + '/train_res.txt', "w")
    test_res = open(saved_filepath + '/test_res.txt', "w")

    # load the data
    if args.prefix == "mnist":
Exemple #48
0
    if GetLastError() == ERROR_ALREADY_EXISTS:
        lock_file_validation = False
    else:
        lock_file_validation = True

# run persepolis mainwindow
if lock_file_validation:
    from persepolis.scripts import initialization
    from persepolis.scripts.mainwindow import MainWindow

    # set "persepolis" name for this process in linux and bsd
    if os_type == 'Linux' or os_type == 'FreeBSD' or os_type == 'OpenBSD':
        try:
            from setproctitle import setproctitle
            setproctitle("persepolis")
        except:
            from persepolis.scripts import logger
            logger.sendToLog('setproctitle is not installed!', "ERROR")

from PyQt5.QtWidgets import QApplication
from PyQt5.QtGui import QFont
from PyQt5.QtCore import QCoreApplication, QSettings
from persepolis.gui.palettes import DarkRedPallete, DarkBluePallete, ArcDarkRedPallete, ArcDarkBluePallete, LightRedPallete, LightBluePallete
from persepolis.scripts.bubble import notifySend
from persepolis.scripts.error_window import ErrorWindow
import traceback

# load persepolis_settings
persepolis_setting = QSettings('persepolis_download_manager', 'persepolis')
Exemple #49
0
import torch
import torch.nn as nn
import numpy as np
from arch.FastDVDNet import FastDVDNet
from utils.data_utils import *
from utils.file_utils import *
import argparse
from tensorboardX import SummaryWriter
from torch.utils.data import Dataset, DataLoader
from data_provider import Video_Provider
import os, sys, shutil
import torch.optim as optim
import time
import setproctitle

setproctitle.setproctitle('ZhangBin')

def args_parser():
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset_path', '-dp', default='/media/sde/zb/rnn-cnn/vimeo_septuplet/sequences', help='the path of vimeo-90k')
    parser.add_argument('--txt_path', '-tp', default='/media/sde/zb/rnn-cnn/vimeo_septuplet', help='the path of train/eval txt file')
    parser.add_argument('--batch_size', '-bs', default=64, type=int, help='batch size')
    parser.add_argument('--frames', '-f', default=5, type=int)
    parser.add_argument('--im_size', '-s', default=96, type=int)
    parser.add_argument('--learning_rate', '-lr', default=1e-4, type=float)
    parser.add_argument('--num_worker', '-nw', default=4, type=int, help='number of workers to load data by dataloader')
    parser.add_argument('--restart', '-r', action='store_true', help='whether to restart the train process')
    parser.add_argument('--eval', '-e', action='store_true', help='whether to work on the eval mode')
    parser.add_argument('--cuda', action='store_true', help='whether to train the network on the GPU, default is mGPU')
    parser.add_argument('--max_epoch', default=100, type=int)
    return parser.parse_args()
Exemple #50
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--batchSz', type=int, default=10)
    parser.add_argument('--dice', action='store_true')
    parser.add_argument('--ngpu', type=int, default=1)
    parser.add_argument('--nEpochs', type=int, default=300)
    parser.add_argument('--start-epoch',
                        default=0,
                        type=int,
                        metavar='N',
                        help='manual epoch number (useful on restarts)')
    parser.add_argument('--resume',
                        default='',
                        type=str,
                        metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument('-e',
                        '--evaluate',
                        dest='evaluate',
                        action='store_true',
                        help='evaluate model on validation set')
    parser.add_argument('-i',
                        '--inference',
                        default='',
                        type=str,
                        metavar='PATH',
                        help='run inference on data set and save results')

    # 1e-8 works well for lung masks but seems to prevent
    # rapid learning for nodule masks
    parser.add_argument('--weight-decay',
                        '--wd',
                        default=1e-8,
                        type=float,
                        metavar='W',
                        help='weight decay (default: 1e-8)')
    parser.add_argument('--no-cuda', action='store_true')
    parser.add_argument('--save')
    parser.add_argument('--seed', type=int, default=1)
    parser.add_argument('--opt',
                        type=str,
                        default='adam',
                        choices=('sgd', 'adam', 'rmsprop'))
    args = parser.parse_args()
    best_prec1 = 100.
    args.cuda = not args.no_cuda and torch.cuda.is_available()
    args.save = args.save or 'work/vnet.base.{}'.format(datestr())
    nll = True
    if args.dice:
        nll = False
    weight_decay = args.weight_decay
    setproctitle.setproctitle(args.save)

    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    print("build vnet")
    model = vnet.VNet(elu=False, nll=nll)
    batch_size = args.ngpu * args.batchSz
    gpu_ids = range(args.ngpu)
    model = nn.parallel.DataParallel(model, device_ids=gpu_ids)

    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    else:
        model.apply(weights_init)

    if nll:
        train = train_nll
        test = test_nll
        class_balance = True
    else:
        train = train_dice
        test = test_dice
        class_balance = False

    print('  + Number of params: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))
    if args.cuda:
        model = model.cuda()

    if os.path.exists(args.save):
        shutil.rmtree(args.save)
    os.makedirs(args.save, exist_ok=True)

    # LUNA16 dataset isotropically scaled to 2.5mm^3
    # and then truncated or zero-padded to 160x128x160
    normMu = [-642.794]
    normSigma = [459.512]
    normTransform = transforms.Normalize(normMu, normSigma)

    trainTransform = transforms.Compose([transforms.ToTensor(), normTransform])
    testTransform = transforms.Compose([transforms.ToTensor(), normTransform])
    #if ct_targets == nodule_masks:
    #    masks = lung_masks
    #else:
    masks = None

    if args.inference != '':
        if not args.resume:
            print("args.resume must be set to do inference")
            exit(1)
        kwargs = {'num_workers': 1} if args.cuda else {}
        src = args.inference
        dst = args.save
        inference_batch_size = args.ngpu
        root = os.path.dirname(src)
        images = os.path.basename(src)
        dataset = dset.LUNA16(root=root,
                              images=images,
                              transform=testTransform,
                              split=target_split,
                              mode="infer")
        loader = DataLoader(dataset,
                            batch_size=inference_batch_size,
                            shuffle=False,
                            collate_fn=noop,
                            **kwargs)
        inference(args, loader, model, trainTransform)
        return

    kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
    print("loading training set")
    trainSet = dset.LUNA16(root='/content/drive/luna16',
                           images=ct_images,
                           targets=ct_targets,
                           mode="train",
                           transform=trainTransform,
                           class_balance=class_balance,
                           split=target_split,
                           seed=args.seed,
                           masks=masks)
    trainLoader = DataLoader(trainSet,
                             batch_size=batch_size,
                             shuffle=True,
                             **kwargs)
    print("loading test set")
    testLoader = DataLoader(dset.LUNA16(root='/content/drive/luna16',
                                        images=ct_images,
                                        targets=ct_targets,
                                        mode="test",
                                        transform=testTransform,
                                        seed=args.seed,
                                        masks=masks,
                                        split=target_split),
                            batch_size=batch_size,
                            shuffle=False,
                            **kwargs)

    target_mean = trainSet.target_mean()
    bg_weight = target_mean / (1. + target_mean)
    fg_weight = 1. - bg_weight
    print(bg_weight)
    class_weights = torch.FloatTensor([bg_weight, fg_weight])
    if args.cuda:
        class_weights = class_weights.cuda()

    if args.opt == 'sgd':
        optimizer = optim.SGD(model.parameters(),
                              lr=1e-1,
                              momentum=0.99,
                              weight_decay=weight_decay)
    elif args.opt == 'adam':
        optimizer = optim.Adam(model.parameters(), weight_decay=weight_decay)
    elif args.opt == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters(),
                                  weight_decay=weight_decay)

    trainF = open(os.path.join(args.save, 'train.csv'), 'w')
    testF = open(os.path.join(args.save, 'test.csv'), 'w')
    err_best = 100.
    for epoch in range(1, args.nEpochs + 1):
        adjust_opt(args.opt, optimizer, epoch)
        train(args, epoch, model, trainLoader, optimizer, trainF,
              class_weights)
        err = test(args, epoch, model, testLoader, optimizer, testF,
                   class_weights)
        is_best = False
        if err < best_prec1:
            is_best = True
            best_prec1 = err
        save_checkpoint(
            {
                'epoch': epoch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1
            }, is_best, args.save, "vnet")
        os.system('./plot.py {} {} &'.format(len(trainLoader), args.save))

    trainF.close()
    testF.close()
Exemple #51
0
def main():
    setproctitle('train ' + os.path.split(os.path.realpath(__file__))[0])
    parser = argparse.ArgumentParser()
    parser.add_argument('--resume_weights',
                        '-r',
                        default=None,
                        type=str,
                        help='a number for certain pth file.')

    parser.add_argument('--resume',
                        action='store_true',
                        help='Resume the model from save.')

    parser.add_argument('--base_model', '-m', default=None)
    parser.add_argument('--save_dir', '-sd', default='./exp')
    parser.add_argument('--output_name', '-on', default='default')

    parser.add_argument('--log_dump_iter', default=500, type=int)

    parser.add_argument('--batch_size_per_gpu', default=8, type=int)

    parser.add_argument('--debug_dir', '-dd', default=None)
    parser.add_argument('--dataset', default=None, type=str)
    parser.add_argument('--init_weights',
                        default='../pths/R-50.pkl',
                        type=str,
                        help='Official code require it for init network.')

    parser.add_argument(
        '--epochs', type=int,
        default=8)  # iteration = epoch * instance_number / batch_size
    parser.add_argument('--save_per_epoch', '-spe', type=int, default=-1)
    parser.add_argument('--flip_JSD',
                        action='store_true',
                        help='Semi-supervised learning enable.')
    parser.add_argument(
        '--flip_JSD_0g',
        action='store_true',
        help='Semi-supervised learning enable with zero grad on flipped input.'
    )
    parser.add_argument('--recursive',
                        action='store_true',
                        help='recursive mode enable.')
    parser.add_argument('--bit',
                        action='store_true',
                        help='group_norm ResNet50 enable.')
    parser.add_argument('--resnext50_32x4d',
                        action='store_true',
                        help='ResNeXt50 enable.')
    parser.add_argument('--resnext101_32x8d',
                        action='store_true',
                        help='ResNeXt101 enable.')

    parser.add_argument('--diff_loss',
                        action='store_true',
                        help='Diff_loss learning enable.')
    parser.add_argument('--flip_aug', action='store_true', help='')
    parser.add_argument('--FA_heavy',
                        action='store_true',
                        help='heavy flip_aug')

    args = parser.parse_args()
    updateConfig_train(args)
 def _setproctitle(title):
     setproctitle("gunicorn: %s" % title)
def main(args):
    setproctitle.setproctitle('quakenet_eval')

    if args.n_clusters == None:
        raise ValueError('Define the number of clusters with --n_clusters')

    ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir)

    cfg = config.Config()
    cfg.batch_size = 1
    cfg.n_clusters = args.n_clusters
    cfg.add = 1
    cfg.n_clusters += 1
    cfg.n_epochs = 1

    # Remove previous output directory
    if os.path.exists(args.output):
        shutil.rmtree(args.output)
    os.makedirs(args.output)
    if args.plot:
        os.makedirs(os.path.join(args.output, "viz"))

    # data pipeline
    data_pipeline = DataPipeline(args.dataset, config=cfg, is_training=False)
    samples = {
        'data': data_pipeline.samples,
        'cluster_id': data_pipeline.labels,
        'start_time': data_pipeline.start_time,
        'end_time': data_pipeline.end_time
    }

    # set up model and validation metrics
    model = models.get(args.model,
                       samples,
                       cfg,
                       args.checkpoint_dir,
                       is_training=False)

    if args.max_windows is None:
        max_windows = 2**31
    else:
        max_windows = args.max_windows

    # Dictonary to store info on detected events
    events_dic = {
        "start_time": [],
        "end_time": [],
        "utc_timestamp": [],
        "cluster_id": [],
        "clusters_prob": []
    }

    # Create catalog name in which the events are stored
    output_catalog = os.path.join(args.output, 'catalog_detection.csv')
    print('Catalog created to store events', output_catalog)

    # Run ConvNetQuake
    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        tf.initialize_local_variables().run()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        model.load(sess, args.step)
        print('Predicting using model at step {}'.format(
            sess.run(model.global_step)))

        step = tf.train.global_step(sess, model.global_step)

        n_events = 0
        idx = 0
        time_start = time.time()
        while True:
            try:
                # Fetch class_proba and label
                to_fetch = [
                    samples['data'], model.layers['class_prob'],
                    model.layers['class_prediction'], samples['start_time'],
                    samples['end_time']
                ]
                sample, class_prob_, cluster_id, start_time, end_time = sess.run(
                    to_fetch)

                # # Keep only clusters proba, remove noise proba
                clusters_prob = class_prob_[0, 1::]
                cluster_id -= 1

                # label for noise = -1, label for cluster \in {0:n_clusters}

                is_event = cluster_id[0] > -1
                if is_event:
                    n_events += 1

                idx += 1
                if idx % 1000 == 0:
                    print("processed {} windows".format(idx))

                if is_event:
                    events_dic["start_time"].append(UTCDateTime(start_time))
                    events_dic["end_time"].append(UTCDateTime(end_time))
                    events_dic["utc_timestamp"].append(
                        (start_time + end_time) / 2.0)
                    events_dic["cluster_id"].append(cluster_id[0])
                    events_dic["clusters_prob"].append(list(clusters_prob))

                if idx >= max_windows:
                    print("stopped after {} windows".format(max_windows))
                    print("found {} events".format(n_events))
                    break

            except KeyboardInterrupt:
                print("processed {} windows, found {} events".format(
                    idx + 1, n_events))
                print("Run time: ", time.time() - time_start)

            except tf.errors.OutOfRangeError:
                print('Evaluation completed ({} epochs).'.format(cfg.n_epochs))
                break

        print('joining data threads')
        m, s = divmod(time.time() - time_start, 60)
        print("Prediction took {} min {} seconds".format(m, s))
        coord.request_stop()
        coord.join(threads)

    # Dump dictionary into csv file
    df = pd.DataFrame.from_dict(events_dic)
    df.to_csv(output_catalog)
Exemple #54
0
def main():
    # Workaround for development
    modpath = os.path.realpath(
        os.path.join(
            os.path.dirname(os.path.realpath(__file__)),
            '..',
        ))
    if modpath not in sys.path:
        sys.path.insert(0, modpath)

    parser = argparse.ArgumentParser()
    parser.add_argument('restart', nargs='?')
    parser.add_argument('--pidfile', '-P', action='store_true')
    parser.add_argument('--disable-loop-monitor', '-L', action='store_true')
    parser.add_argument('--loop-debug', action='store_true')
    parser.add_argument('--overlay-dirs', '-o', action='append')
    parser.add_argument('--debug-level',
                        choices=[
                            'TRACE',
                            'DEBUG',
                            'INFO',
                            'WARN',
                            'ERROR',
                        ],
                        default='DEBUG')
    parser.add_argument('--log-handler',
                        choices=[
                            'console',
                            'file',
                        ],
                        default='console')
    args = parser.parse_args()

    _logger = logger.Logger('middleware', args.debug_level)
    _logger.getLogger()

    pidpath = '/var/run/middlewared.pid'

    if args.restart:
        if os.path.exists(pidpath):
            with open(pidpath, 'r') as f:
                pid = int(f.read().strip())
            try:
                os.kill(pid, 15)
            except ProcessLookupError as e:
                if e.errno != errno.ESRCH:
                    raise

    if 'file' in args.log_handler:
        _logger.configure_logging('file')
        stream = _logger.stream()
        if stream is not None:
            sys.stdout = sys.stderr = stream
    elif 'console' in args.log_handler:
        _logger.configure_logging('console')
    else:
        _logger.configure_logging('file')

    setproctitle.setproctitle('middlewared')
    # Workaround to tell django to not set up logging on its own
    os.environ['MIDDLEWARED'] = str(os.getpid())

    if args.pidfile:
        with open(pidpath, "w") as _pidfile:
            _pidfile.write(f"{str(os.getpid())}\n")

    Middleware(
        loop_debug=args.loop_debug,
        loop_monitor=not args.disable_loop_monitor,
        overlay_dirs=args.overlay_dirs,
        debug_level=args.debug_level,
    ).run()
Exemple #55
0
import traceback

# Suppress GTK deprecation warnings
warnings.filterwarnings("ignore")

gi.require_version("Gtk", "3.0")
gi.require_version('XApp', '1.0')
from gi.repository import Gtk, Gdk, Gio, XApp, GdkPixbuf, GLib, Pango

from common import *

import mpv

from imdb import IMDb

setproctitle.setproctitle("hypnotix")

# i18n
APP = 'hypnotix'
LOCALE_DIR = "/usr/share/locale"
locale.bindtextdomain(APP, LOCALE_DIR)
gettext.bindtextdomain(APP, LOCALE_DIR)
gettext.textdomain(APP)
_ = gettext.gettext

PROVIDER_OBJ, PROVIDER_NAME = range(2)
PROVIDER_TYPE_ID, PROVIDER_TYPE_NAME = range(2)

GROUP_OBJ, GROUP_NAME = range(2)
CHANNEL_OBJ, CHANNEL_NAME, CHANNEL_LOGO = range(3)
Exemple #56
0
    def serve(self, args):
        threading.currentThread().setName('master')
        if SETPROCTITLE:
            setproctitle.setproctitle(args.process_name +
                                      ' master %s' % ' '.join(sys.argv[1:]))

        # Initialize logging, keep this at the beginning!
        self.init_logging(args.log_level)

        for f in glob.glob(os.path.join(args.socket_path, 'rest*.sock')):
            os.unlink(f)
        for f in glob.glob(os.path.join(args.socket_path, 'notify*.sock')):
            os.unlink(f)

        # Initialize translations
        self.translations = self.get_translations(args.translations_path)

        if not self.translations:
            logging.warn(
                'no po files found, no translations will be available')
        else:
            # TODO: lazy-logging, info message?
            logging.debug("translations available for: '%s'",
                          ', '.join(self.translations.keys()))

        if not UJSON:
            warnings.warn(
                'ujson module is not available, falling back to slower stdlib json implementation'
            )

        logging.info('starting kopano-mfr')

        # Fake exit queue.
        queue = multiprocessing.JoinableQueue(1)
        queue.put(True)

        workers = []
        for n in range(args.workers):
            rest_runner = Runner(queue, self.run_rest, 'rest',
                                 args.process_name, n)
            rest_process = multiprocessing.Process(target=rest_runner.run,
                                                   name='rest{}'.format(n),
                                                   args=(args.socket_path, n,
                                                         args))
            workers.append(rest_process)
            notify_runner = Runner(queue, self.run_notify, 'notify',
                                   args.process_name, n)
            notify_process = multiprocessing.Process(target=notify_runner.run,
                                                     name='notify{}'.format(n),
                                                     args=(args.socket_path, n,
                                                           args))
            workers.append(notify_process)

        for worker in workers:
            worker.daemon = True
            worker.start()

        if args.insecure:
            logging.warning(
                'insecure mode - TLS client connections are susceptible to man-in-the-middle attacks and safety checks are off - this is not suitable for production use'
            )

        if args.with_experimental:
            logging.warning('experimental endpoints are enabled')

        if args.with_metrics:
            if PROMETHEUS:
                if not os.environ.get('prometheus_multiproc_dir'):
                    logging.error('please export "prometheus_multiproc_dir"')
                    sys.exit(-1)

                # Spawn the metrics process later, so we can pass along worker name and pids.
                monitor_workers = [(worker.name, worker.pid)
                                   for worker in workers]
                # Include master process.
                monitor_workers.append(('master', os.getpid()))
                metrics_runner = Runner(queue, self.run_metrics, 'metrics',
                                        args.process_name, 0)
                metrics_process = multiprocessing.Process(
                    target=metrics_runner.run,
                    args=(args.socket_path, args, monitor_workers))
                metrics_process.daemon = True
                metrics_process.start()
                workers.append(metrics_process)
            else:
                logging.error(
                    'please install prometheus client python bindings')
                sys.exit(-1)

        signal.signal(signal.SIGCHLD, self.sigchld)
        signal.signal(signal.SIGTERM, self.sigterm)

        try:
            while self.running:
                signal.pause()
        except KeyboardInterrupt:
            self.running = False
            logging.info('keyboard interrupt')

        logging.info('starting shutdown')

        signal.signal(signal.SIGCHLD, signal.SIG_IGN)

        if not self.abnormal_shutdown:
            # Flush queue, to tell workers to cleanly exit.
            queue.get()
            try:
                queue.task_done()
            except ValueError:
                # NOTE(longsleep): If a process encountered an error taks_done() was
                # already called, thus it errors which is ok and can be ignored.
                pass

        # Wait for workers to exit.
        deadline = time.monotonic() + 5
        done = []
        while deadline > time.monotonic():
            ready = multiprocessing.connection.wait([
                worker.sentinel
                for worker in workers if worker.sentinel not in done
            ],
                                                    timeout=1)
            done.extend(ready)
            if len(done) == len(workers):
                break

        # Kill off workers which did not exit.
        kill = len(done) != len(workers)
        for worker in workers:
            if kill and worker.is_alive():
                if self.abnormal_shutdown:
                    logging.critical('killing worker: %d', worker.pid)
                    os.kill(worker.pid, signal.SIGKILL)
                else:
                    logging.warn('terminating worker: %d', worker.pid)
                    worker.terminate()
            if args.with_metrics and PROMETHEUS:
                prometheus_multiprocess.mark_process_dead(worker.pid)
            worker.join()

        # Cleanup potentially left over sockets.
        sockets = []
        for n in range(args.workers):
            sockets.append('rest%d.sock' % n)
        for n in range(args.workers):
            sockets.append('notify%d.sock' % n)
        for socket in sockets:  # noqa: F402
            try:
                unix_socket = os.path.join(args.socket_path, socket)
                os.unlink(unix_socket)
            except OSError as err:
                if err.errno != errno.ENOENT:
                    logging.warn(
                        'failed to remove socket %s on shutdown, error: %s',
                        unix_socket, err)

        logging.info('shutdown complete')
Exemple #57
0
        'alarm proxy host, agent pull config and push alarm to this proxy host, eg: 127.0.0.1:9090',
        default='127.0.0.1:9090')
    parser.add_argument('-f',
                        '--file',
                        help="log file for agent to watch, eg: ./xtop.log",
                        default='/chain/log/xtop.log')
    parser.add_argument('--nodaemon',
                        action='store_true',
                        help='start as no-daemon mode')
    args = parser.parse_args()

    # set process title
    proc_title = 'topargus-agent: '
    for i in range(len(sys.argv)):
        proc_title = '{0} {1}'.format(proc_title, sys.argv[i])
    setproctitle.setproctitle(proc_title)

    if args.nodaemon:
        print("start as no-daemon mode")
    else:
        # forbidden using slog befor daemon_init
        print("start as daemon mode")
        try:
            daemon.daemon_init()
        except RuntimeError as e:
            print(e, file=sys.stderr)
            raise SystemExit(1)

    # attention: must behind daemon_init
    slogging.start_log_monitor()
Exemple #58
0
def main(config_path, model_save_path, gpu_indices):

    # Load a config file (.yml)
    with open(config_path, "r") as f:
        config = yaml.load(f)
        params = config['param']

    # Except for a blank class
    if params['label_type_main'] == 'word_freq10':
        if params['train_data_size'] == 'train100h':
            params['num_classes_main'] = 7213
        elif params['train_data_size'] == 'train460h':
            params['num_classes_main'] = 18641
        elif params['train_data_size'] == 'train960h':
            params['num_classes_main'] = 26642
    else:
        raise TypeError

    if params['label_type_sub'] == 'character':
        params['num_classes_sub'] = 28
    elif params['label_type_sub'] == 'character_capital_divide':
        if params['train_data_size'] == 'train100h':
            params['num_classes_sub'] = 72
        elif params['train_data_size'] == 'train460h':
            params['num_classes_sub'] = 77
        elif params['train_data_size'] == 'train960h':
            params['num_classes_sub'] = 77
    else:
        raise TypeError

    # Model setting
    model = MultitaskCTC(encoder_type=params['encoder_type'],
                         input_size=params['input_size'],
                         splice=params['splice'],
                         num_stack=params['num_stack'],
                         num_units=params['num_units'],
                         num_layers_main=params['num_layers_main'],
                         num_layers_sub=params['num_layers_sub'],
                         num_classes_main=params['num_classes_main'],
                         num_classes_sub=params['num_classes_sub'],
                         main_task_weight=params['main_task_weight'],
                         lstm_impl=params['lstm_impl'],
                         use_peephole=params['use_peephole'],
                         parameter_init=params['weight_init'],
                         clip_grad_norm=params['clip_grad_norm'],
                         clip_activation=params['clip_activation'],
                         num_proj=params['num_proj'],
                         weight_decay=params['weight_decay'])

    # Set process name
    setproctitle(
        'libri_' + model.name + '_' + params['train_data_size'] + '_' +
        params['label_type_main'] + '_' + params['label_type_sub'])

    model.name += '_' + str(params['num_units'])
    model.name += '_main' + str(params['num_layers_main'])
    model.name += '_sub' + str(params['num_layers_sub'])
    model.name += '_' + params['optimizer']
    model.name += '_lr' + str(params['learning_rate'])
    if params['num_proj'] != 0:
        model.name += '_proj' + str(params['num_proj'])
    if params['dropout'] != 0:
        model.name += '_drop' + str(params['dropout'])
    if params['num_stack'] != 1:
        model.name += '_stack' + str(params['num_stack'])
    if params['weight_decay'] != 0:
        model.name += '_wd' + str(params['weight_decay'])
    model.name += '_main' + str(params['main_task_weight'])
    if len(gpu_indices) >= 2:
        model.name += '_gpu' + str(len(gpu_indices))

    # Set save path
    model.save_path = mkdir_join(
        model_save_path, 'ctc',
        params['label_type_main'] + '_' + params['label_type_sub'],
        params['train_data_size'], model.name)

    # Reset model directory
    model_index = 0
    new_model_path = model.save_path
    while True:
        if isfile(join(new_model_path, 'complete.txt')):
            # Training of the first model have been finished
            model_index += 1
            new_model_path = model.save_path + '_' + str(model_index)
        elif isfile(join(new_model_path, 'config.yml')):
            # Training of the first model have not been finished yet
            model_index += 1
            new_model_path = model.save_path + '_' + str(model_index)
        else:
            break
    model.save_path = mkdir(new_model_path)

    # Save config file
    shutil.copyfile(config_path, join(model.save_path, 'config.yml'))

    sys.stdout = open(join(model.save_path, 'train.log'), 'w')
    # TODO(hirofumi): change to logger
    do_train(model=model, params=params, gpu_indices=gpu_indices)
Exemple #59
0
    return model


if __name__ == "__main__":

    # parse python script input parameters
    parser = argparse.ArgumentParser()
    args = add_args(parser)
    logging.info(args)

    worker_number = 1
    process_id = 0
    # customize the process name
    str_process_name = "Fedml (single):" + str(process_id)
    setproctitle.setproctitle(str_process_name)

    # customize the log format
    logging.basicConfig(level=logging.INFO,
                        # logging.basicConfig(level=logging.DEBUG,
                        format=str(
                            process_id) + ' - %(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
                        datefmt='%a, %d %b %Y %H:%M:%S')
    hostname = socket.gethostname()
    logging.info("#############process ID = " + str(process_id) +
                 ", host name = " + hostname + "########" +
                 ", process ID = " + str(os.getpid()) +
                 ", process Name = " + str(psutil.Process(os.getpid())))

    # initialize the wandb machine learning experimental tracking platform (https://www.wandb.com/).
    if process_id == 0:
Exemple #60
0
def main(argv):
    parser = argparse.ArgumentParser(
        description=
        'update_whole_seq_db version %s.\nProcess all sequences in the waiting queue table NewSequencesTable.\nShould be run daily.'
        % __version__,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--port', help='postgres port', default=5432, type=int)
    parser.add_argument('--host', help='postgres host', default=None)
    parser.add_argument(
        '--server-type',
        help=
        'server type (develop/main/test). overridden by --database/user/password',
        default='main')
    parser.add_argument('--database', help='postgres database')
    parser.add_argument('--user',
                        help='postgres user (to override --server-type)')
    parser.add_argument('--password',
                        help='postgres password (to override --server-type)')
    parser.add_argument('--proc-title',
                        help='name of the process (to view in ps aux)')
    parser.add_argument('--debug-level',
                        help='debug level (1 for debug ... 9 for critical)',
                        default=2,
                        type=int)
    parser.add_argument(
        '--no-delete',
        help='do not delete from new sequences queue (NewSequencesTable).',
        action='store_true')

    parser.add_argument(
        '-w',
        '--wholeseqdb',
        help='name of the whole sequence database (i.e. SILVA/GREENGENES)',
        default='SILVA')
    parser.add_argument('-f',
                        '--wholeseq-file',
                        help='name of the whole sequence fasta file',
                        required=True)
    parser.add_argument(
        '--update-all',
        help=
        "update all dbbact sequences (recalculate). If not set, will just update new dbbact sequences",
        action='store_true')
    args = parser.parse_args(argv)

    SetDebugLevel(args.debug_level)
    # set the process name for ps aux
    if args.proc_title:
        setproctitle.setproctitle(args.proc_title)

    # get the database connection
    con, cur = db_access.connect_translator_db(server_type=args.server_type,
                                               database=args.database,
                                               user=args.user,
                                               password=args.password,
                                               port=args.port,
                                               host=args.host)

    update_whole_seq_db(con,
                        cur,
                        args.wholeseq_file,
                        seqdbname=args.wholeseqdb,
                        check_exists=not args.update_all,
                        no_delete=args.no_delete)