Beispiel #1
2
def checkmultiprocess(ipqueue,cacheResult):
    if ipqueue.qsize() == 0:
        return
    processlist = []
    "如果ip数小于512,只使用一个子进程,否则则使用指定进程数,每个进程处理平均值的数量ip"
    max_threads = g_maxthreads
    maxprocess = g_useprocess
    if ipqueue.qsize() < g_maxthreads:
        max_threads = ipqueue.qsize()
        maxprocess = 1
    else:
        max_threads = (ipqueue.qsize() + g_useprocess) / g_useprocess
        if max_threads > g_maxthreads:
            max_threads = g_maxthreads
    #multiprocessing.log_to_stderr(logging.DEBUG)
    for i in xrange(0,maxprocess):
        p = Process(target=callsingleprocess,args=(ipqueue,cacheResult,max_threads))
        p.daemon = True
        processlist.append(p)
        p.start()
    
    try:
        for p in processlist:
            p.join()
    except KeyboardInterrupt:
        PRINT("need wait all process end...")
        for p in processlist:
            if p.is_alive():
                p.terminate()  
Beispiel #2
0
    def __init__( self, url, dirlist, workers, logfile ):
        """ initialize main object and configure state """
        self.state = {
            "dirs"    : Queue(),
            "results" : Queue(),
            "url"     : url,
            "procs"   : [],
            "workers" : workers,
            "logfile" : logfile
        }

        for worker in range( 0, int( self.state["workers"] ) + 1 ):
            """ start worker processes """
            proc = Process( target=self.requester )
            self.state["procs"].append( proc )
            proc.start()
        
        """ start a single printer process """
        proc = Process( target=self.printer )
        self.state["procs"].append( proc )
        proc.start()
        self.dircount = 0 #number of jobs is number of dirs
        
        for directory in open( dirlist, "r" ).readlines():
            self.state["dirs"].put( directory.rstrip( "\n" ) )
            self.dircount += 1
def load_link(browser, link):
    ''' Return true if load successful, false otherwise. '''

    while True:

        p = Process(target=browser_get, args=(browser, link))
        p.start()
        p.join(LOAD_TIME)
        if p.is_alive():
            p.terminate()
        else:
            break

    while True:

        wait_time = READY_TIME
        start_time = time.time()

        ''' Wait for page to have completely loaded. '''
        while True:
            state = browser.execute_script('return document.readyState;')
            if state == 'complete':
                return True
            if time.time() - start_time > wait_time:
                logging.info("Document %s not ready after %ds", link, wait_time)
                break
            time.sleep(1)

        wait_time = wait_time * READY_RATIO
        if wait_time > MAX_READY_TIME * READY_RATIO:
            logging.error("Skipping document %s.  Was never ready.", link)
            return False
        else:
            logging.info("Increasing wait time to %ds", wait_time)
 def scanner_network(self,gateway):
     scan = ''
     config_gateway = gateway.split('.')
     del config_gateway[-1]
     for i in config_gateway:
         scan += str(i) + '.'
     gateway = scan
     ranger = str(self.ip_range.text()).split('-')
     jobs = []
     manager = Manager()
     on_ips = manager.dict()
     for n in xrange(int(ranger[0]),int(ranger[1])):
         ip='%s{0}'.format(n)%(gateway)
         p = Process(target=self.working,args=(ip,on_ips))
         jobs.append(p)
         p.start()
     for i in jobs: i.join()
     for i in on_ips.values():
         Headers = []
         n = i.split('|')
         self.data['IPaddress'].append(n[0])
         self.data['MacAddress'].append(n[1])
         self.data['Hostname'].append('<unknown>')
         for n, key in enumerate(reversed(self.data.keys())):
             Headers.append(key)
             for m, item in enumerate(self.data[key]):
                 item = QTableWidgetItem(item)
                 item.setTextAlignment(Qt.AlignVCenter | Qt.AlignCenter)
                 self.tables.setItem(m, n, item)
     Headers = []
     for key in reversed(self.data.keys()):
         Headers.append(key)
     self.tables.setHorizontalHeaderLabels(Headers)
class KeepAliveClientTest(TestCase):

    server_address = ("127.0.0.1", 65535)

    def __init__(self, *args, **kwargs):
        super(KeepAliveClientTest, self).__init__(*args, **kwargs)
        self.server_process = Process(target=self._run_server)

    def setUp(self):
        super(KeepAliveClientTest, self).setUp()
        self.client = Client(["%s:%d" % self.server_address])
        self.server_process.start()
        time.sleep(.10)

    def tearDown(self):
        self.server_process.terminate()
        super(KeepAliveClientTest, self).tearDown()

    def _run_server(self):
        self.server = BaseHTTPServer.HTTPServer(self.server_address, ClientAddressRequestHandler)
        self.server.handle_request()

    def test_client_keepalive(self):
        for x in range(10):
            result = self.client.sql("select * from fake")

            another_result = self.client.sql("select again from fake")
            self.assertEqual(result, another_result)
Beispiel #6
0
def start_short_timeout_app_process():
    # XXX DO NOT FORGET TO KILL THE PROCESS IF THE TEST DOES NOT SUCCEED
    p = Process(target=start_short_timeout_example_server)
    p.start()
    sleep()
    check_connection()
    return p
Beispiel #7
0
    def benchmark(self, request, pk):
        queryset = Attempt.objects.all()
        attempt = get_object_or_404(queryset, id=pk)
        serializer = AttemptSerializer(attempt)

        # check payload
        payload = dict(request.data)
        if 'database' not in payload and 'benchmark' not in payload:
            return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
        # run benchmark
        process = Process(target = utils.run_benchmark, args = (pk, payload['database'], payload['benchmark']))
        process.start()
        # utils.run_benchmark(pk, payload['database'], payload['benchmark'])
        # shoule know the deployer id 
        deployer_id = 1
        log_file_path = os.path.join(os.path.dirname(__file__), os.pardir, 'vagrant', str(deployer_id) + '.log')
        
        def stream_response_generator():
            last_line_no = 0
            while process.is_alive():
                time.sleep(1)
                with open(log_file_path, 'r') as log_file:
                    content = log_file.readlines()
                    line_no = len(content)
                    if line_no > last_line_no:
                        yield ''.join(content[last_line_no:])
                        last_line_no = line_no
            time.sleep(1)
            with open(log_file_path, 'r') as log_file:
                    content = log_file.readlines()
                    line_no = len(content)
                    if line_no > last_line_no:
                        yield ''.join(content[last_line_no:])

        return StreamingHttpResponse(stream_response_generator())
Beispiel #8
0
def start_echo_server_process():
    # XXX DO NOT FORGET TO KILL THE PROCESS IF THE TEST DOES NOT SUCCEED
    sleep()
    p = Process(target=start_echo_server)
    p.start()
    sleep(1.5)
    return p
Beispiel #9
0
    def run(self, tasks, render, update, render_args=(), render_kwargs={}, update_args=(), update_kwargs={}):

        # establish ipc queues using a manager process
        task_queue = SimpleQueue()
        result_queue = SimpleQueue()

        # start process to generate image samples
        producer = Process(target=self._producer, args=(tasks, task_queue))
        producer.start()

        # start worker processes
        workers = []
        for pid in range(self._processes):
            p = Process(target=self._worker, args=(render, render_args, render_kwargs, task_queue, result_queue))
            p.start()
            workers.append(p)

        # consume results
        for _ in tasks:
            result = result_queue.get()
            update(result, *update_args, **update_kwargs)

        # shutdown workers
        for _ in workers:
            task_queue.put(None)
def nct_tagging(index_name, host, port_no, process_ids,
                stopwords, umls, pos, nprocs=1):

    # open the clinical trail ids file to process
    nct_ids = []
    for line in open(process_ids, 'rb'):
        nct_ids.append(line.strip())

    # Check if index exists
    index = es_index.ElasticSearch_Index(index_name, host=host, port=port_no)
    index.add_field('ec_tags_umls', term_vector=True)

    # Get clinical
    # process each clinical trial and store to XML file
    log.info('processing clinical trials')
    procs = []
    chunksize = int(math.ceil(len(nct_ids) / float(nprocs)))
    for i in xrange(nprocs):
        p = Process(target=_worker, args=(nct_ids[chunksize * i:chunksize * (i + 1)],
                                          index_name, host, port_no,
                                          stopwords, umls, pos, (i + 1)))
        procs.append(p)
        p.start()

    for p in procs:
        p.join()
Beispiel #11
0
def send_probe_requests(interface=None, ssid=None):

    # initialize shared memory
    results = Queue()

    # start sniffer before sending out probe requests
    p = Process(target=sniffer, args=(interface, results,))
    p.start()

    # give sniffer a chance to initialize so that we don't miss
    # probe responses
    time.sleep(3)

    # send out probe requests... sniffer will catch any responses
    ProbeReq(ssid=ssid, interface='wlp3s0')

    # make sure to get results from shared memory before allowing 
    # sniffer to join with parent process 
    probe_responses = results.get()

    # join sniffer with its parent process
    p.join()

    # return results
    return probe_responses
def processFiles(patch_dir):
    root = os.getcwd()
    glbl.data_dirs = {}
    if root != patch_dir: working_path = root+"/"+patch_dir
    else: working_path = root

    for path, dirs, files in os.walk(working_path):
        if len(dirs) == 0: glbl.data_dirs[path] = ''
    

    # Multiprocessing Section
    #########################################
    Qids = glbl.data_dirs.keys()
    manager = Manager()                                      # creates shared memory manager object
    results = manager.dict()                                 # Add dictionary to manager, so it can be accessed across processes
    nextid = Queue()                                         # Create Queue object to serve as shared id generator across processes
    for qid in Qids: nextid.put(qid)                         # Load the ids to be tested into the Queue
    for x in range(0,multiprocessing.cpu_count()):           # Create one process per logical CPU
        p = Process(target=processData, args=(nextid,results)) # Assign process to processCBR function, passing in the Queue and shared dictionary
        glbl.jobs.append(p)                                   # Add the process to a list of running processes
        p.start()                                             # Start process running
    for j in glbl.jobs:
        j.join()                                              # For each process, join them back to main, blocking on each one until finished
    
    # write out results
    c = 1
    sets = results.keys()
    sets.sort()
    for x in sets:
        if results[x] != 'None':
            FINAL = open('result'+str(c)+'.txt','w')
            n = "\n************************************************************************************************\n"
            FINAL.write(n+"* "+x+'    *\n'+n+results[x]+"\n")
            FINAL.close()     
            c += 1
Beispiel #13
0
def main():

    warnings.filterwarnings("ignore", "Degrees of freedom <= 0 for slice", RuntimeWarning)

    options = getoptions()

    setuplogger(options['log'], options['logfile'], logging.INFO)

    total_procs = options['nprocs'] * options['total_instances']
    start_offset = options['instance_id'] * options['nprocs']

    exit_code = 0

    if options['nprocs'] == 1:
        createsummary(options, None, None)
    else:
        proclist = []
        for procid in xrange(options['nprocs']):
            p = Process( target=createsummary, args=(options, total_procs, start_offset + procid) )
            p.start()
            proclist.append(p)

        for proc in proclist:
            proc.join()
            exit_code += proc.exitcode

    sys.exit(exit_code)
Beispiel #14
0
def genPairs(PNGMaps, compareMaps):
    pairA = []
    pairB = []
    # Maximum possible fitness
    totalFitness = len(compareMaps)
    threadsA = []
    threadsB = []
    # Thread safe way to get parent PNGMaps
    queueA = Queue()
    queueB = Queue()
    # Create a list of threads to get a PNGMap
    for listMap in PNGMaps:
        threadA = Process(target=randPair , args=(PNGMaps, compareMaps, totalFitness, queueA))
        threadB = Process(target=randPair , args=(PNGMaps, compareMaps, totalFitness, queueB))
        threadA.start()
        threadB.start()
        threadsA.append(threadA)
        threadsB.append(threadB)
    # Get the parents from the queues
    while not len(pairA) == len(PNGMaps):
        pairA.append(queueA.get())
    while not len(pairB) == len(PNGMaps):
        pairB.append(queueB.get())
    # Join the threads with the current one
    for thread in threadsA:
        thread.join()
    for thread in threadsB:
        thread.join()
    # Return the pair of PNGMaps
    return pairA, pairB
def main():
	# Threads we will use, don't change this because each thread calculates keys for 100 games exactly
	# (You can change this if you know how, I'm too euphoric now to do more flexibility)
	start = time();
	threads = 10;
	for line in sys.stdin:
		# Parsing the stdin
		encryptedMessage,encryptedGames = line.strip().split(':');
		encryptedGames = encryptedGames.split('~');
		# Queue with keys
		q = Queue();
		# Threads
		for i in range(10):
			p = Process(target=keysFinder, args=(encryptedGames[i*100:(i+1)*100],q));
			p.start();
		# Number of threads already finished
		finished = 0;
		keys = [];
		while finished < threads:
			keys += q.get();
			finished+=1;

		# From all keys, try which one decrypts a valid message
		em = binascii.unhexlify(encryptedMessage);
		found = False;
		for key in keys:
			x = AES.new(key);
			dec = x.decrypt(em);
			if (isCorrect(dec)):
				found = True;
				# Make unpadding and print. Voila!
				print removePadding(dec.strip());
	if (sys.argv[1] == 'benchmark'):
		print "Time elapsed: ",time()-start;
Beispiel #16
0
def start_workers(config):
    '''
    Picks up all the external system configuration from the config file and starts up as many processes as non-default sections in the config.
    The following elements are required from the default configuration section :
    - solr_url : base url of the solr server.
    - nova_db_server : IP or hostname of the nova controller.
    - nova_db_port : Port of the nova db to which the workers should connect.For nova+mysql this would be 3306.
    - nova_db_creds : credentials in the format user:password
    - amqp_server : IP or hostname of the amqp server. Usually, this is same as the nova controller.
    - amqp_port : Port of the AMQP server. If using RMQ this should be 5672.
    - amqp_creds : credentials in the format user:password
    
    Each non-default section of the config should represent a resource type that this system monitors. Each individual worker corresponds to
    a resource type and is run in a separate python process.
    '''
 
    logUtils.setup_logging(config)
    global _LOGGER
    _LOGGER = logUtils.get_logger(__name__)
    for section in config.sections():
        process = Process(target=worker.run, args=(config, section,))
        process.daemon = True
        process.start()
        _LOGGER.info('Started worker process - ' + str(process.pid))
        _PROCESSES.append(process)
class ClockInfoUpdater(object):
    def __init__(self):
        self.weather_parent_pipe, weather_child_pipe = Pipe()
        weather = WeatherAPIClient(weather_child_pipe)
        self.weather_api_client = Process(target=weather.run_forever)
        self.weather_api_client.start()

        self.traffic_parent_pipe, traffic_child_pipe = Pipe()
        traffic = TrafficAPIClient(traffic_child_pipe)
        self.traffic_api_client = Process(target=traffic.run_forever)
        self.traffic_api_client.start()

    def run(self, clock_info, update_freq):
        now = datetime.now()
        last_update = clock_info.get('last_update_time')
        if last_update:
            update_time_delta = now - last_update
            if update_time_delta.total_seconds() < update_freq:
                return False
        clock_info['last_update_time'] = now
        update_time(clock_info, now)
        update_weather(clock_info, now, self.weather_parent_pipe)
        update_color(clock_info, now)
        update_traffic(clock_info, now, self.traffic_parent_pipe)
        return True
Beispiel #18
0
class TCPServer(object):
    def __init__(self, port):
        self.port = int(port)

    def start(self):
        def go(port):
            from httpretty import HTTPretty
            HTTPretty.disable()
            import socket
            s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            s.bind(('localhost', port))
            s.listen(True)
            conn, addr = s.accept()

            while True:
                data = conn.recv(1024)
                conn.send(b"RECEIVED: " + bytes(data))

            conn.close()

        args = [self.port]
        self.process = Process(target=go, args=args)
        self.process.start()
        time.sleep(0.4)

    def stop(self):
        try:
            os.kill(self.process.pid, 9)
        except OSError:
            self.process.terminate()
        finally:
            self.is_running = False
Beispiel #19
0
def run_parkinglot_expt(net, n):
    "Run experiment"

    seconds = args.time

    # Start the bandwidth and cwnd monitors in the background
    monitor = Process(target=monitor_devs_ng,
            args=('%s/bwm.txt' % args.dir, 1.0))
    monitor.start()
    start_tcpprobe()

    # Get receiver and clients
    recvr = net.getNodeByName('receiver')
    sender1 = net.getNodeByName('h1')

    # Start the receiver
    port = 5001
    recvr.cmd('iperf -s -p', port,
              '> %s/iperf_server.txt' % args.dir, '&')

    waitListening(sender1, recvr, port)

    # TODO: start the sender iperf processes and wait for the flows to finish
    # Hint: Use getNodeByName() to get a handle on each sender.
    # Hint: Use sendCmd() and waitOutput() to start iperf and wait for them to finish
    # Hint: waitOutput waits for the command to finish allowing you to wait on a particular process on the host
    # iperf command to start flow: 'iperf -c %s -p %s -t %d -i 1 -yc > %s/iperf_%s.txt' % (recvr.IP(), 5001, seconds, args.dir, node_name)
    # Hint (not important): You may use progress(t) to track your experiment progress

    recvr.cmd('kill %iperf')

    # Shut down monitors
    monitor.terminate()
    stop_tcpprobe()
Beispiel #20
0
def webgui(args):
    os.environ["FWDB_CONFIG"] = json.dumps(get_lp(args).to_dict())
    from fireworks.flask_site.app import app
    if args.wflowquery:
        app.BASE_Q_WF = json.loads(args.wflowquery)
    if args.fwquery:
        app.BASE_Q = json.loads(args.fwquery)
        if "state" in app.BASE_Q:
            app.BASE_Q_WF["state"] = app.BASE_Q["state"]

    if not args.server_mode:
        from multiprocessing import Process
        p1 = Process(
            target=app.run,
            kwargs={"host": args.host, "port": args.port, "debug": args.debug})
        p1.start()
        import webbrowser
        time.sleep(2)
        webbrowser.open("http://{}:{}".format(args.host, args.port))
        p1.join()
    else:
        from fireworks.flask_site.app import bootstrap_app
        try:
            from fireworks.flask_site.gunicorn import (
                StandaloneApplication, number_of_workers)
        except ImportError:
            import sys
            sys.exit("Gunicorn is required for server mode. "
                     "Install using `pip install gunicorn`.")
        options = {
            'bind': '%s:%s' % (args.host, args.port),
            'workers': number_of_workers(),
        }
        StandaloneApplication(bootstrap_app, options).run()
Beispiel #21
0
def start_schedulers(options):
    apps = [app.strip() for app in options.scheduler.split(',')]
    try:
        from multiprocessing import Process
    except:
        sys.stderr.write('Sorry, -K only supported for python 2.6-2.7\n')
        return
    processes = []
    code = "from gluon import current;current._scheduler.loop()"
    for app in apps:
        if not check_existent_app(options, app):
            print "Application '%s' doesn't exist, skipping" % (app)
            continue
        print 'starting scheduler for "%s"...' % app
        args = (app,True,True,None,False,code)
        logging.getLogger().setLevel(options.debuglevel)
        p = Process(target=run, args=args)
        processes.append(p)
        print "Currently running %s scheduler processes" % (len(processes))
        p.start()
        print "Processes started"
    for p in processes:
        try:
            p.join()
        except (KeyboardInterrupt, SystemExit):
            print "Processes stopped"
        except:
            p.terminate()
            p.join()
Beispiel #22
0
class FakeProcess:
    '''
    Runs an instance of multiprocessing.Process, which displays fake results based on PySystemMock.fakeCommandResult{},
    or based on a generic countdown using the command string, in the event that fakeCommandResult{} doesn't match.
    This class functions as an adapter from multiprocessing.Process() to subprocess.Popen(), which the caller will expect.
    '''
    stdout = FakeStdout()  # can be read by callers as if it's a Process.stdout object
    process = None

    MOCK_STEPS_ITERATIONS = 5

    def start(self, command, fakeCommandResults):
        fakeCommandResult = self.getFakeResultForCommand(command, fakeCommandResults)
        self.process = Process(target=writeFakeCommandResultsToPipe, args=(self.stdout.writer, fakeCommandResult))
        self.process.start()

    def getFakeResultForCommand(self, command, fakeCommandResults):
        for regex in fakeCommandResults:
            match = re.search(regex, command.__str__())
            if match:
                return fakeCommandResults[regex].split('\n')
        return ["processing %s [%d]..." % (command, i) for i in range(self.MOCK_STEPS_ITERATIONS, 0, -1)]

    def poll(self):
        return self.process.exitcode

    def wait(self):
        return self.process.wait()

    def terminate(self):
        self.process.terminate()
    def serve(self):
        """Start a fixed number of worker threads and put client into a queue"""

        #this is a shared state that can tell the workers to exit when set as false
        self.isRunning.value = True

        #first bind and listen to the port
        self.serverTransport.listen()

        #fork the children
        for i in range(self.numWorkers):
            try:
                w = Process(target=self.workerProcess)
                w.daemon = True
                w.start()
                self.workers.append(w)
            except (Exception) as x:
                logging.exception(x)

        #wait until the condition is set by stop()

        while True:

            self.stopCondition.acquire()
            try:
                self.stopCondition.wait()
                break
            except (SystemExit, KeyboardInterrupt):
		break
            except (Exception) as x:
                logging.exception(x)

        self.isRunning.value = False
Beispiel #24
0
class MultiProcessPlot(object):
	## Initilization
	def __init__(self):
		self.plotpipe, PlotterPipe = Pipe()
		## Called process for plotting
		self.plotter = ProcessPlotter()
		## Process holder
		self.plotprocess = Process(target = self.plotter, args = (PlotterPipe, ))
		self.plotprocess.daemon = True
		self.plotprocess.start()

	## Plot function
	def plot(self, finished=False):
		send = self.plotpipe.send

		if finished:
			send(None)
		else:
			if not LoopCounter % plotRefreshPeriod:
				reset = 1
			else:
				reset = 0

			## Compose data for pipe
			data = [reset,
					MessageMeasurement.pose2d.x, MessageMeasurement.pose2d.y, MessageMeasurement.pose2d.theta,
					MessageEKF.odompose2d.x, MessageEKF.odompose2d.y, MessageEKF.odompose2d.theta,
					MessageEKF.ekfpose2d.x, MessageEKF.ekfpose2d.y, MessageEKF.ekfpose2d.theta]
			# print(MessageEKF.ekfpose2d.x, MessageEKF.ekfpose2d.y, MessageEKF.ekfpose2d.theta) # //VB
			# print(MessageEKF.odompose2d.x, MessageEKF.odompose2d.y, MessageEKF.odompose2d.theta) # //VB
			## Send data through pipe
			send(data)
			## Reset global flags to receive new input
			flagSubscriber1 = False
			flagSubscriber2 = False
Beispiel #25
0
    def _find_active_serial_ports_from(self, wait_duration, device_files):
        """
        Find and returns list of active USB serial ports.

        This spawns a process that actually does the work.

        Args:
            device_files (list of strings):
                List of device files that will be checked for serial ports.
                Note that any other device file than ttyUSBx will be ignored.

        Returns:
            List of device files that have active serial port.
            Example: ["ttyUSB2", "ttyUSB4", "ttyUSB7"]

        """
        serial_results = Queue()

        serial_finder = Process(
            target=TopologyBuilder._get_active_serial_device_files,
            args=(self, serial_results, wait_duration, device_files))
        if self._verbose:
            print "Serial thread - Finding active serial ports"

        logging.info("Finding active serial ports")
        serial_finder.start()

        return serial_results
Beispiel #26
0
def apply_update(fname, status):
    # As soon as python-apt closes its opened files on object deletion
    # we can drop this fork workaround. As long as they keep their files
    # open, we run the code in an own fork, than the files are closed on
    # process termination an we can remount the filesystem readonly
    # without errors.
    p = Process(target=_apply_update, args=(fname, status))
    with rw_access("/", status):
        try:
            t_ver = get_target_version(fname)
        except BaseException:
            status.log('Reading xml-file failed!')
            return

        try:
            c_ver = get_current_version()
        except IOError as e:
            status.log('get current version failed: ' + str(e))
            c_ver = ""

        pre_sh(c_ver, t_ver, status)
        p.start()
        p.join()
        status.log("cleanup /var/cache/apt/archives")
        # don't use execute() here, it results in an error that the apt-cache
        # is locked. We currently don't understand this behaviour :(
        os.system("apt-get clean")
        if p.exitcode != 0:
            raise Exception(
                "Applying update failed. See logfile for more information")
        post_sh(c_ver, t_ver, status)
Beispiel #27
0
 def start_parser_process(self):
     if self.mp_mode:
         from multiprocessing import Process, Event
     else:
         from multiprocessing.dummy import Process, Event
     waiting_shutdown_event = Event()
     if self.mp_mode:
         bot = self.bot.__class__(
             network_result_queue=self.network_result_queue,
             parser_result_queue=self.parser_result_queue,
             waiting_shutdown_event=waiting_shutdown_event,
             shutdown_event=self.shutdown_event,
             parser_mode=True,
             meta=self.bot.meta)
     else:
         # In non-multiprocess mode we start `run_process`
         # method in new semi-process (actually it is a thread)
         # Because the use `run_process` of main spider instance
         # all changes made in handlers are applied to main
         # spider instance, that allows to suppport deprecated
         # spiders that do not know about multiprocessing mode
         bot = self.bot
         bot.network_result_queue = self.network_result_queue
         bot.parser_result_queue = self.parser_result_queue
         bot.waiting_shutdown_event = waiting_shutdown_event
         bot.shutdown_event = self.shutdown_event
         bot.meta = self.bot.meta
     proc = Process(target=bot.run_parser)
     if not self.mp_mode:
         proc.daemon = True
     proc.start()
     return waiting_shutdown_event, proc
Beispiel #28
0
class ArtBox(object):
    def __init__(self, width, height):
        self._pen_comms = Pipe()
        self._paper_comms = Pipe()
        self._pen_ear, self._pen_mouth = Pipe()
        self._paper_ear, self._paper_mouth = Pipe()
        self._pen = pen.Pen()
        self._paper = paper.Paper(width=width, height=height)
        self._proc = Process(target=self._pen, args=(self._pen_comms, self._paper_comms))
        self._proc.daemon = True

    def add_resource_folder(self, folder_name):
        pyglet.resource.path.append(folder_name)
        pyglet.resource.reindex()

    def precache(self, asset_dict):
        for key in asset_dict:
            attributes = asset_dict[key]
            if len(attributes) == 1:
                self._paper._handle_command(Nibs.Cache(key, attributes[0]))
            elif len(attributes) == 2:
                self._paper._handle_command(Nibs.Cache(key, attributes[0], attributes[1]))

    def open(self):
        self._proc.start()
        self._paper.unfurl(self._pen_comms, self._paper_comms)
        self._proc.join(1)
        if self._proc.exitcode is None:
            self._proc.terminate()
Beispiel #29
0
def connect_multiprocess(service = VoidService, config = {}, remote_service = VoidService, remote_config = {}, args={}):
    """starts an rpyc server on a new process, bound to an arbitrary port,
    and connects to it over a socket. Basically a copy of connect_thread().
    However if args is used and if these are shared memory then changes
    will be bi-directional. That is we now have access to shared memmory.

    :param service: the local service to expose (defaults to Void)
    :param config: configuration dict
    :param server_service: the remote service to expose (of the server; defaults to Void)
    :param server_config: remote configuration dict (of the server)
    :param args: dict of local vars to pass to new connection, form {'name':var}

    Contributed by *@tvanzyl*
    """
    from multiprocessing import Process

    listener = socket.socket()
    listener.bind(("localhost", 0))
    listener.listen(1)

    def server(listener=listener, args=args):
        client = listener.accept()[0]
        listener.close()
        conn = connect_stream(SocketStream(client), service = remote_service, config = remote_config)
        try:
            for k in args:
                conn._local_root.exposed_namespace[k] = args[k]
            conn.serve_all()
        except KeyboardInterrupt:
            interrupt_main()

    t = Process(target = server)
    t.start()
    host, port = listener.getsockname()
    return connect(host, port, service = service, config = config)
def run_stock_parser():
    symbol_q = Queue()
    price_q = Queue()

    stock_symbols = []
    with open('symbols.txt', 'r') as symfile:
        for n, line in enumerate(symfile):
            sym = line.strip()
            if sym:
                stock_symbols.append(sym)

    ncpu = len([x for x in open('/proc/cpuinfo').read().split('\n')\
                if x.find('processor') == 0])

    pool = [Process(target=read_stock_worker, args=(symbol_q, price_q, )) for _ in range(ncpu * 4)]

    for p in pool:
        p.start()
    output = Process(target=write_output_file, args=(price_q, ))
    output.start()

    for symbol in stock_symbols:
        symbol_q.put(symbol)
    symbol_q.put(_sentinel)
    for p in pool:
        p.join()
    price_q.put(_sentinel)
    output.join()
Beispiel #31
0
    def test_performance(self):
        self.form = MainController()
        self.cfc = self.form.compare_frame_controller
        self.stc = self.form.simulator_tab_controller
        self.gtc = self.form.generator_tab_controller

        self.form.add_signalfile(get_path_for_data_file("esaver.coco"))
        self.sframe = self.form.signal_tab_controller.signal_frames[0]
        self.sim_frame = self.form.simulator_tab_controller
        self.form.ui.tabWidget.setCurrentIndex(3)
        self.cfc.proto_analyzer.auto_assign_labels()

        self.network_sdr_plugin_sender = NetworkSDRInterfacePlugin(raw_mode=True)

        part_a = Participant("Device A", shortname="A", color_index=0)
        part_b = Participant("Device B", shortname="B", color_index=1)
        part_b.simulate = True

        self.form.project_manager.participants.append(part_a)
        self.form.project_manager.participants.append(part_b)
        self.form.project_manager.project_updated.emit()

        sniffer = ProtocolSniffer(100, 0.01, 0.01, 0.1, 5, "FSK", 1,
                                  NetworkSDRInterfacePlugin.NETWORK_SDR_NAME, BackendHandler(),
                                  network_raw_mode=True)
        sender = EndlessSender(BackendHandler(), NetworkSDRInterfacePlugin.NETWORK_SDR_NAME)

        simulator = Simulator(self.stc.simulator_config, self.gtc.modulators, self.stc.sim_expression_parser,
                              self.form.project_manager, sniffer=sniffer, sender=sender)

        pause = 100
        msg_a = SimulatorMessage(part_b,
                                 [1, 0] * 16 + [1, 1, 0, 0] * 8 + [0, 0, 1, 1] * 8 + [1, 0, 1, 1, 1, 0, 0, 1, 1, 1] * 4,
                                 pause=pause, message_type=MessageType("empty_message_type"), source=part_a)

        msg_b = SimulatorMessage(part_a,
                                 [1, 0] * 16 + [1, 1, 0, 0] * 8 + [1, 1, 0, 0] * 8 + [1, 0, 1, 1, 1, 0, 0, 1, 1, 1] * 4,
                                 pause=pause, message_type=MessageType("empty_message_type"), source=part_b)

        self.stc.simulator_config.add_items([msg_a, msg_b], 0, None)
        self.stc.simulator_config.update_active_participants()

        port = self.get_free_port()
        sniffer = simulator.sniffer
        sniffer.rcv_device.set_server_port(port)

        self.network_sdr_plugin_sender.client_port = port

        sender = simulator.sender
        port = self.get_free_port()
        sender.device.set_client_port(port)
        sender.device._VirtualDevice__dev.name = "simulator_sender"

        current_index = Value("L")
        elapsed = Value("f")
        target_num_samples = 13600 + pause
        receive_process = Process(target=receive, args=(port, current_index, target_num_samples, elapsed))
        receive_process.daemon = True
        receive_process.start()

        # Ensure receiver is running
        time.sleep(2)

        # spy = QSignalSpy(self.network_sdr_plugin_receiver.rcv_index_changed)
        simulator.start()

        modulator = Modulator("test_modulator")
        modulator.samples_per_symbol = 100
        modulator.carrier_freq_hz = 55e3

        # yappi.start()

        self.network_sdr_plugin_sender.send_raw_data(modulator.modulate(msg_a.encoded_bits), 1)
        time.sleep(0.5)
        # send some zeros to simulate the end of a message
        self.network_sdr_plugin_sender.send_raw_data(np.zeros(self.num_zeros_for_pause, dtype=np.complex64), 1)
        time.sleep(0.5)
        receive_process.join(15)

        logger.info("PROCESS TIME: {0:.2f}ms".format(elapsed.value))

        # self.assertEqual(current_index.value, target_num_samples)
        self.assertLess(elapsed.value, 200)

        # timeout = spy.wait(2000)
        # yappi.get_func_stats().print_all()
        # yappi.get_thread_stats().print_all()
Beispiel #32
0
    """
    # Registramos el agente
    gr = register_message()

    # Escuchando la cola hasta que llegue un 0
    fin = False
    while not fin:
        while cola.empty():
            pass
        v = cola.get()
        if v == 0:
            fin = True
        else:
            print(v)

            # Selfdestruct
            # requests.get(InfoAgent.stop)


if __name__ == '__main__':
    # Ponemos en marcha los behaviors
    ab1 = Process(target=agentbehavior1, args=(cola1, ))
    ab1.start()

    # Ponemos en marcha el servidor
    app.run(host=hostname, port=port)

    # Esperamos a que acaben los behaviors
    ab1.join()
    logger.info('The End')
Beispiel #33
0
							processKeys(actions[name]["down"])	
		else:
			if actions[name]['parent']=='' or (actions[name]['parent'] in keyEvents and not keyEvents[actions[name]['parent']][0]):
				if name not in keyEvents or not keyEvents[name][0]:
					keyEvents[name] = (True, newOutput[1])
					if actions[name]["repeat"]:
						threading.Thread(target=repeatProcessKeys, args=(name,)).start()
					else:
						processKeys(actions[name]["down"])				



frames = Queue()
frames.put([None,None])
camProcess = Process(target=getFrames, args=(frames,))
camProcess.start()

inputs = Queue()

config = open("paperTyper.cfg", "r")
config = config.readlines()
####USE CONFIG
actions = dict()
print config
for c in config:
	c = c.replace('\n', '')
	collumn = c.split(';')
	print collumn
	parent = ''
	for col in collumn:
		print col
Beispiel #34
0

def consumer(q):
    while True:
        res = q.get()
        if res == None: break
        time.sleep(1)
        print('消费者吃了%s' % res)
        q.task_done()


if __name__ == "__main__":
    q = JoinableQueue()
    #s生产者
    p = Process(target=producer, args=(q, "白菜"))
    p1 = Process(target=producer, args=(q, "肉"))
    p2 = Process(target=producer, args=(q, "臭豆腐"))
    #消费者
    c = Process(target=consumer, args=(q, ))
    c2 = Process(target=consumer, args=(q, ))
    c.daemon = True
    c2.daemon = True
    p.start()
    p1.start()
    p2.start()
    c.start()
    c2.start()
    p.join()
    p1.join()
    p2.join()
    print("主")
Beispiel #35
0
def validate(dataset, dataloader, model, cfg, epoch=-1):
    # switch to evaluate mode
    logger = logging.getLogger('global')
    torch.cuda.set_device(0)
    model.cuda()
    # os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    model.eval()

    total_rc = 0
    total_gt = 0
    area_extents = np.asarray(cfg['shared']['area_extents']).reshape(-1, 2)
    bev_extents = area_extents[[0, 2]]

    score_threshold = cfg['test_rpn_proposal_cfg']['score_threshold']
    valid_samples = 0
    native_code_copy = _sys_init.root_dir(
    ) + '/experiments/predictions/kitti_native_eval/'
    evaluator_utils.copy_kitti_native_code(native_code_copy)
    predictions_3d_dir = evaluator_utils.get_kitti_predictions(
        score_threshold, epoch)

    logger.info('start validate')
    for iter, _input in enumerate(dataloader):
        gt_boxes = _input[9]
        voxel_with_points = _input[6]
        batch_size = voxel_with_points.shape[0]
        # assert batch_size == 1
        img_ids = _input[10]

        x = {
            'cfg': cfg,
            'image': _input[0],
            'points': _input[1],
            'indices': _input[2],
            'num_pts': _input[3],
            'leaf_out': _input[4],
            'voxel_indices': _input[5],
            'voxel_points': torch.autograd.Variable(_input[6]).cuda(),
            'ground_plane': _input[7],
            'gt_bboxes_2d': _input[8],
            'gt_bboxes_3d': _input[9],
            'num_divisions': _input[11]
        }

        t0 = time.time()
        outputs = model(x)
        outputs = outputs['predict']
        t2 = time.time()
        proposals = outputs[0].data.cpu().numpy()

        if torch.is_tensor(gt_boxes):
            gt_boxes = gt_boxes.cpu().numpy()

        for b_ix in range(batch_size):
            rois_per_points_cloud = proposals[proposals[:, 0] == b_ix]
            if gt_boxes.shape[0] != 0:
                gts_per_points_cloud = gt_boxes[b_ix]

                rois_per_points_cloud_anchor = box_3d_encoder.box_3d_to_anchor(
                    rois_per_points_cloud[:, 1:1 + 7])
                gts_per_points_cloud_anchor = box_3d_encoder.box_3d_to_anchor(
                    gts_per_points_cloud)
                rois_per_points_cloud_bev, _ = anchor_projector.project_to_bev(
                    rois_per_points_cloud_anchor, bev_extents)
                gts_per_points_cloud_bev, _ = anchor_projector.project_to_bev(
                    gts_per_points_cloud_anchor, bev_extents)

                # rpn recall
                num_rc, num_gt = bbox_helper.compute_recall(
                    rois_per_points_cloud_bev, gts_per_points_cloud_bev)
                total_gt += num_gt
                total_rc += num_rc

                if args.visual:
                    calib_dir = os.path.join(args.datadir, 'training/calib',
                                             '%06d.txt' % (img_ids[b_ix]))
                    calib = Calibration(calib_dir)

                    # Show all LiDAR points. Draw 3d box in LiDAR point cloud
                    print(
                        ' -------- LiDAR points and 3D boxes in velodyne coordinate --------'
                    )
                    show_lidar_with_numpy_boxes(x['points'][b_ix, :,
                                                            0:3].numpy(),
                                                gts_per_points_cloud,
                                                calib,
                                                save_figure=False,
                                                color=(1, 1, 1))
                    input()

                    score_filter = rois_per_points_cloud[:,
                                                         -1] > score_threshold
                    print('img: {}, proposals shape:{}'.format(
                        img_ids[b_ix],
                        rois_per_points_cloud[score_filter].shape))

                    img = x['image'][b_ix].numpy() * 255.
                    img = img.astype(np.uint8)
                    img = np.array(np.transpose(img, (1, 2, 0)))
                    show_image_with_boxes(
                        img,
                        rois_per_points_cloud[score_filter, 1:1 + 7],
                        calib,
                        True,
                        save_figure=args.save_as_figure,
                        save_figure_dir=args.figdir,
                        img_name='img_%06d.jpg' % (img_ids[b_ix]))
                    # input()
                    #
                    show_lidar_with_numpy_boxes(
                        x['points'][b_ix, :, 0:3].numpy(),
                        rois_per_points_cloud[score_filter, 1:1 + 7][:10],
                        calib,
                        save_figure=args.save_as_figure,
                        save_figure_dir=args.figdir,
                        img_name='points_%06d.jpg' % (img_ids[b_ix]),
                        color=(1, 1, 1))
                    input()
                    # anchors = outputs[1]
                    # total_anchors, _ = anchors.shape
                    # idx = np.random.choice(total_anchors, 50)
                    # show_lidar_with_numpy_boxes(x['points'][b_ix, :, 0:3].numpy(), anchors[idx, :], calib, save_figure=False,
                    #                             color=(1, 1, 1))
                    # input()

            valid, total_samples = evaluator_utils.save_predictions_in_kitti_format(
                dataset, rois_per_points_cloud[:, 1:], img_ids[b_ix],
                predictions_3d_dir, score_threshold)
            valid_samples += valid
            logger.info('valid samples: %d/%d' %
                        (valid_samples, total_samples))
        logger.info('Test valid instance: [%d/%d] Time: %.3f %d/%d' %
                    (iter, len(dataloader), t2 - t0, total_rc, total_gt))
        log_helper.print_speed(iter + 1, t2 - t0, len(dataloader))

    logger.info('rpn300 recall=%f' % (total_rc / total_gt))
    evaluate_name = dataset.id2names[1] + '_' + dataset.split

    # Create a separate processes to run the native evaluation
    native_eval_proc = Process(target=evaluator_utils.run_kitti_native_script,
                               args=(native_code_copy, evaluate_name,
                                     score_threshold, epoch))
    native_eval_proc_05_iou = Process(
        target=evaluator_utils.run_kitti_native_script_with_05_iou,
        args=(native_code_copy, evaluate_name, score_threshold, epoch))
    # Don't call join on this cuz we do not want to block
    # this will cause one zombie process - should be fixed later.
    native_eval_proc.start()
    native_eval_proc_05_iou.start()
    # evaluator_utils.run_kitti_native_script(native_code_copy, evaluate_name, score_threshold, epoch)
    # evaluator_utils.run_kitti_native_script_with_05_iou(native_code_copy, evaluate_name, score_threshold, epoch)
    return total_rc / total_gt
 def start(self):
     # MultiProcess 크롤링 시작
     for category_name in self.selected_categories:
         proc = Process(target=self.crawling, args=(category_name,))
         proc.start()
Beispiel #37
0
def backup_service():
    p = Process(target=backup_scheduler, args=(backup_func,))
    p.start()
Beispiel #38
0
def do_process():
    print('Parent process is running %s' % os.getpid())
    p = Process(target=child_process, name='test')
    p.start()
    p.join()
Beispiel #39
0
    response = response.encode('utf-8')

    while True:
        FSM = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        FSM.connect((FSMHOST, FSMPORT))
        FSM.sendall(response)
        FSMdata = FSM.recv(16384).decode('utf-8')
        FSM.close()
        GSM = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        GSM.connect((GSMHOST, GSMPORT))
        GSM.sendall(response)
        GSMdata = GSM.recv(16384).decode('utf-8')
        GSM.close()
        FSMq.put(FSMdata)
        GSMq.put(GSMdata)


if __name__ == '__main__':
    FSMq = Queue()
    GSMq = Queue()
    STATUSq = Queue()
    p1 = Process(target=TCPClient, args=(
        FSMq,
        GSMq,
    ))
    p2 = Process(target=drawgraph, args=(
        FSMq,
        GSMq,
    ))
    p1.start()
    p2.start()
def train_model_on_stage(stage, model, fast_start=False):
    test_mode = False
    split = True
    import time

    batch_size = get_batch_size_for_stage(stage)
    if not fast_start:
        #START THE DATA GENERATION PROCESS
        data_gen_process = Process(target=data_generator_fn.main,
                                   args=[stage,
                                         np.random.randint(0, 10000)])
        data_gen_process.start()

        print 'waiting for data generator process to finish first chunk'
        while data_gen_process.is_alive():
            time.sleep(1)
    print 'starting training'

    # assert split == True, 'TODO: write code for no split'

    if split:
        Xpos = np.load(r"D:\Dlung\Xpositive_temp_v5_" + str(stage) + ".npy")
        IXpos = np.load(r"D:\Dlung\Ixpositive_temp_v5_" + str(stage) + ".npy")
        split1 = int(.75 * Xpos.shape[0])
        Xtrain1 = Xpos[:split1]
        Xvalid1 = Xpos[split1:]
        IXtrain1 = IXpos[:split1]
        IXvalid1 = IXpos[split1:]
        del Xpos, IXpos

        train_generator_75 = get_generator_static(Xtrain1,
                                                  IXtrain1,
                                                  augment=True,
                                                  batch_size=batch_size)
        valid_generator_75 = get_generator_static(Xvalid1,
                                                  IXvalid1,
                                                  augment=True,
                                                  batch_size=batch_size)
    else:
        Xpos = np.load(r"D:\Dlung\Xpositive_temp_v5_" + str(stage) + ".npy")
        IXpos = np.load(r"D:\Dlung\Ixpositive_temp_v5_" + str(stage) + ".npy")
        train_generator_75 = get_generator_static(Xpos,
                                                  IXpos,
                                                  augment=True,
                                                  batch_size=batch_size)

    #combined_generator = combine_generators(train_generator_ez, valid_generator_ez, frac1=.5)
    name = 'model_des_v34_repl_' + str(stage) + '_{epoch:02d}.h5'
    chkp = ModelCheckpoint(filepath=name)
    # lr_reducer = ReduceLROnPlateau(monitor='loss', factor=.5, patience=2,min_lr=1e-5,epsilon=1e-2,verbose=1)
    # csv_logger = CSVLogger(filename = 'model_des_v35_relu_' + str(stage) + '_trainlog.csv',append=True)

    if stage == 32:
        lr_schedule = LearningRateScheduler(stage_1_lr_schedule)
        nb_epoch = 15
        samples_per_epoch = 150
    else:
        lr_schedule = LearningRateScheduler(stage_2_lr_schedule)
        nb_epoch = 25
        samples_per_epoch = 150

    print 'restarting data gen process'
    data_gen_process = Process(target=data_generator_fn.main,
                               args=[stage, np.random.randint(0, 10000)])
    data_gen_process.start()

    for epoch in range(nb_epoch):
        #check after each epoch if the data is done and if so reload it
        if not data_gen_process.is_alive():
            #reload data
            print 'RELOADING DATA'
            if split:
                del Xtrain1, Xvalid1
                Xpos = np.load(r"D:\Dlung\Xpositive_temp_v5_" + str(stage) +
                               ".npy")
                IXpos = np.load(r"D:\Dlung\Ixpositive_temp_v5_" + str(stage) +
                                ".npy")

                #if split
                split1 = int(.75 * Xpos.shape[0])
                Xtrain1 = Xpos[:split1]
                Xvalid1 = Xpos[split1:]
                IXtrain1 = IXpos[:split1]
                IXvalid1 = IXpos[split1:]
                del Xpos, IXpos

                train_generator_75 = get_generator_static(
                    Xtrain1, IXtrain1, augment=True, batch_size=batch_size)
                valid_generator_75 = get_generator_static(
                    Xvalid1, IXvalid1, augment=True, batch_size=batch_size)
            else:
                Xpos = np.load(r"D:\Dlung\Xpositive_temp_v5_" + str(stage) +
                               ".npy")
                IXpos = np.load(r"D:\Dlung\Ixpositive_temp_v5_" + str(stage) +
                                ".npy")
                train_generator_75 = get_generator_static(
                    Xpos, IXpos, augment=True, batch_size=batch_size)

            #restart data generator
            data_gen_process = Process(
                target=data_generator_fn.main,
                args=[stage, np.random.randint(0, 10000)])
            data_gen_process.start()
        else:
            print 'data generator still running'

        print 'epoch', epoch, 'model lr', model.optimizer.lr.get_value()
        if split:
            model.fit_generator(
                train_generator_75,
                samples_per_epoch=samples_per_epoch * batch_size,
                nb_epoch=epoch + 1,
                callbacks=[chkp, lr_schedule],
                validation_data=valid_generator_75,
                nb_val_samples=samples_per_epoch * batch_size / 2,
                initial_epoch=epoch)
        else:
            model.fit_generator(train_generator_75,
                                samples_per_epoch=samples_per_epoch *
                                batch_size,
                                nb_epoch=epoch + 1,
                                callbacks=[chkp, lr_schedule],
                                initial_epoch=epoch)

    return model
def three_det_main(random_seed=-1,
                   noise_random_seed=-1,
                   output_file_name='default.hdf'):

    # -------------------------------------------------------------------------
    # Preliminaries
    # -------------------------------------------------------------------------

    # Disable output buffering ('flush' option is not available for Python 2)
    # sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
    sys.stdout = Unbuffered(sys.stdout)

    # Start the stopwatch
    script_start = time.time()

    print('')
    print('GENERATE A GW DATA SAMPLE FILE')
    print('')

    # -------------------------------------------------------------------------
    # Parse the command line arguments
    # -------------------------------------------------------------------------

    # Set up the parser and add arguments
    parser = argparse.ArgumentParser(description='Generate a GW data sample.')
    parser.add_argument('--config-file',
                        help='Name of the JSON configuration file which '
                        'controls the sample generation process.',
                        default='default.json')

    # Parse the arguments that were passed when calling this script
    print('Parsing command line arguments...', end=' ')
    command_line_arguments = vars(parser.parse_args())
    print('Done!')

    # -------------------------------------------------------------------------
    # Read in JSON config file specifying the sample generation process
    # -------------------------------------------------------------------------

    # Build the full path to the config file
    json_config_name = command_line_arguments['config_file']
    json_config_path = os.path.join('.', 'config_files', json_config_name)

    # Read the JSON configuration into a dict
    print('Reading and validating in JSON configuration file...', end=' ')
    config = read_json_config(json_config_path)
    print('Done!')

    # -------------------------------------------------------------------------
    # Read in INI config file specifying the static_args and variable_args
    # -------------------------------------------------------------------------

    # Build the full path to the waveform params file
    ini_config_name = config['waveform_params_file_name']
    ini_config_path = os.path.join('.', 'config_files', ini_config_name)

    # Read in the variable_arguments and static_arguments
    print('Reading and validating in INI configuration file...', end=' ')
    variable_arguments, static_arguments = read_ini_config(ini_config_path)
    print('Done!\n')

    # -------------------------------------------------------------------------
    # Shortcuts and random seed
    # -------------------------------------------------------------------------

    # Set the random seed for this script
    if random_seed == -1:
        np.random.seed(config['random_seed'])
        random_seed = config['random_seed']
    else:
        np.random.seed(random_seed)

    if noise_random_seed == -1:
        noise_random_seed = config['noise_random_seed']

    # Define some useful shortcuts
    max_runtime = config['max_runtime']
    bkg_data_dir = config['background_data_directory']

    # -------------------------------------------------------------------------
    # Construct a generator for sampling waveform parameters
    # -------------------------------------------------------------------------

    # Initialize a waveform parameter generator that can sample injection
    # parameters from the distributions specified in the config file
    waveform_parameter_generator = \
        WaveformParameterGenerator(config_file=ini_config_path,
                                   random_seed=random_seed)

    # Wrap it in a generator expression so that we can we can easily sample
    # from it by calling next(waveform_parameters)
    waveform_parameters = \
        (waveform_parameter_generator.draw() for _ in iter(int, 1))

    # -------------------------------------------------------------------------
    # Construct a generator for sampling valid noise times
    # -------------------------------------------------------------------------

    # If the 'background_data_directory' is None, we will use synthetic noise
    if config['background_data_directory'] is None:

        print('Using synthetic noise! (background_data_directory = None)\n')

        # Create a iterator that returns a fake "event time", which we will
        # use as a seed for the RNG to ensure the reproducibility of the
        # generated synthetic noise.
        # For the HDF file path that contains that time, we always yield
        # None, so that we know that we need to generate synthetic noise.
        noise_times = ((1000000000 + _, None) for _ in count())

    # Otherwise, we set up a timeline object for the background noise, that
    # is, we read in all HDF files in the raw_data_directory and figure out
    # which parts of it are useable (i.e., have the right data quality and
    # injection bits set as specified in the config file).
    else:

        print('Using real noise from LIGO recordings! '
              '(background_data_directory = {})'.format(bkg_data_dir))
        print('Reading in raw data. This may take several minutes...', end=' ')

        # Create a timeline object by running over all HDF files once
        noise_timeline = NoiseTimeline(background_data_directory=bkg_data_dir,
                                       random_seed=random_seed)

        # Create a noise time generator so that can sample valid noise times
        # simply by calling next(noise_time_generator)
        delta_t = int(static_arguments['noise_interval_width'] / 2)
        noise_times = (noise_timeline.sample(delta_t=delta_t,
                                             dq_bits=config['dq_bits'],
                                             inj_bits=config['inj_bits'],
                                             return_paths=True)
                       for _ in iter(int, 1))

        print('Done!\n')

    # -------------------------------------------------------------------------
    # Define a convenience function to generate arguments for the simulation
    # -------------------------------------------------------------------------

    # Prevent waveform parameter variable from generating new parameter values
    # for every generated sample (ie. here we set the parameters for the whole file)
    sample_params = next(waveform_parameters)

    def generate_arguments(injection=True):

        # Only sample waveform parameters if we are making an injection
        waveform_params = sample_params if injection else None

        # Return all necessary arguments as a dictionary
        return dict(static_arguments=static_arguments,
                    event_tuple=next(noise_times),
                    waveform_params=waveform_params,
                    noise_random_seed=noise_random_seed)

    # -------------------------------------------------------------------------
    # Finally: Create our samples!
    # -------------------------------------------------------------------------

    # Keep track of all the samples (and parameters) we have generated
    samples = dict(injection_samples=[], noise_samples=[])
    injection_parameters = dict(injection_samples=[], noise_samples=[])

    # The procedure for generating samples with and without injections is
    # mostly the same; the only real difference is which arguments_generator
    # we have have to use:
    for sample_type in ('injection_samples', 'noise_samples'):

        # ---------------------------------------------------------------------
        # Define some sample_type-specific shortcuts
        # ---------------------------------------------------------------------

        if sample_type == 'injection_samples':
            print('Generating samples containing an injection...')
            n_samples = config['n_injection_samples']
            arguments_generator = \
                (generate_arguments(injection=True) for _ in iter(int, 1))

        else:
            print('Generating samples *not* containing an injection...')
            n_samples = config['n_noise_samples']
            arguments_generator = \
                (generate_arguments(injection=False) for _ in iter(int, 1))

        # ---------------------------------------------------------------------
        # If we do not need to generate any samples, skip ahead:
        # ---------------------------------------------------------------------

        if n_samples == 0:
            print('Done! (n_samples=0)\n')
            continue

        # ---------------------------------------------------------------------
        # Initialize queues for the simulation arguments and the results
        # ---------------------------------------------------------------------

        # Initialize a Queue and fill it with as many arguments as we
        # want to generate samples
        arguments_queue = Queue()
        for i in range(n_samples):
            arguments_queue.put(next(arguments_generator))

        # Initialize a Queue and a list to store the generated samples
        results_queue = Queue()
        results_list = []

        # ---------------------------------------------------------------------
        # Use process-based multiprocessing to generate samples in parallel
        # ---------------------------------------------------------------------

        # Use a tqdm context manager for the progress bar
        tqdm_args = dict(total=n_samples, ncols=80, unit='sample')
        with tqdm(**tqdm_args) as progressbar:

            # Keep track of all running processes
            list_of_processes = []

            # While we haven't produced as many results as desired, keep going
            while len(results_list) < n_samples:

                # -------------------------------------------------------------
                # Loop over processes to see if anything finished or got stuck
                # -------------------------------------------------------------

                for process_dict in list_of_processes:

                    # Get the process object and its current runtime
                    process = process_dict['process']
                    runtime = time.time() - process_dict['start_time']

                    # Check if the process is still running when it should
                    # have terminated already (according to max_runtime)
                    if process.is_alive() and (runtime > max_runtime):

                        # Kill process that's been running too long
                        process.terminate()
                        process.join()
                        list_of_processes.remove(process_dict)

                        # Add new arguments to queue to replace the failed ones
                        new_arguments = next(arguments_generator)
                        arguments_queue.put(new_arguments)

                    # If process has terminated already
                    elif not process.is_alive():

                        # If the process failed, add new arguments to queue
                        if process.exitcode != 0:
                            new_arguments = next(arguments_generator)
                            arguments_queue.put(new_arguments)

                        # Remove process from the list of running processes
                        list_of_processes.remove(process_dict)

                # -------------------------------------------------------------
                # Start new processes if necessary
                # -------------------------------------------------------------

                # Start new processes until the arguments_queue is empty, or
                # we have reached the maximum number of processes
                while (arguments_queue.qsize() > 0
                       and len(list_of_processes) < config['n_processes']):

                    # Get arguments from queue and start new process
                    arguments = arguments_queue.get()
                    p = Process(target=queue_worker,
                                kwargs=dict(arguments=arguments,
                                            results_queue=results_queue))

                    # Remember this process and its starting time
                    process_dict = dict(process=p, start_time=time.time())
                    list_of_processes.append(process_dict)

                    # Finally, start the process
                    p.start()

                # -------------------------------------------------------------
                # Move results from results_queue to results_list
                # -------------------------------------------------------------

                # Without this part, the results_queue blocks the worker
                # processes so that they won't terminate
                while results_queue.qsize() > 0:
                    results_list.append(results_queue.get())

                # Update the progress bar based on the number of results
                progressbar.update(len(results_list) - progressbar.n)

                # Sleep for some time before we check the processes again
                time.sleep(0.5)

        # ---------------------------------------------------------------------
        # Process results in the results_list
        # ---------------------------------------------------------------------

        # Separate the samples and the injection parameters
        samples[sample_type], injection_parameters[sample_type] = \
            zip(*results_list)

        # Sort all results by the event_time
        idx = np.argsort([_['event_time'] for _ in list(samples[sample_type])])
        samples[sample_type] = \
            list([samples[sample_type][i] for i in idx])
        injection_parameters[sample_type] = \
            list([injection_parameters[sample_type][i] for i in idx])

        print('Sample generation completed!\n')

    # -------------------------------------------------------------------------
    # Compute the normalization parameters for this file
    # -------------------------------------------------------------------------

    print('Computing normalization parameters for sample...', end=' ')

    # Gather all samples (with and without injection) in one list
    all_samples = list(samples['injection_samples'] + samples['noise_samples'])

    # Group all samples by detector
    h1_samples = [_['h1_strain'] for _ in all_samples]
    l1_samples = [_['l1_strain'] for _ in all_samples]
    v1_samples = [_['v1_strain'] for _ in all_samples]

    # Stack recordings along first axis
    h1_samples = np.vstack(h1_samples)
    l1_samples = np.vstack(l1_samples)
    v1_samples = np.vstack(v1_samples)

    # Compute the mean and standard deviation for both detectors as the median
    # of the means / standard deviations for each sample. This is more robust
    # towards outliers than computing "global" parameters by concatenating all
    # samples and treating them as a single, long time series.
    normalization_parameters = \
        dict(h1_mean=np.median(np.mean(h1_samples, axis=1), axis=0),
             l1_mean=np.median(np.mean(l1_samples, axis=1), axis=0),
             v1_mean=np.median(np.mean(v1_samples, axis=1), axis=0),
             h1_std=np.median(np.std(h1_samples, axis=1), axis=0),
             l1_std=np.median(np.std(l1_samples, axis=1), axis=0),
             v1_std=np.median(np.std(v1_samples, axis=1), axis=0))

    print('Done!\n')

    # -------------------------------------------------------------------------
    # Create a SampleFile dict from list of samples and save it as an HDF file
    # -------------------------------------------------------------------------

    print('Saving the results to HDF file ...', end=' ')

    # Initialize the dictionary that we use to create a SampleFile object
    sample_file_dict = dict(command_line_arguments=command_line_arguments,
                            injection_parameters=dict(),
                            injection_samples=dict(),
                            noise_samples=dict(),
                            normalization_parameters=normalization_parameters,
                            static_arguments=static_arguments)

    # Collect and add samples (with and without injection)
    for sample_type in ('injection_samples', 'noise_samples'):
        for key in ('event_time', 'h1_strain', 'l1_strain', 'v1_strain'):
            if samples[sample_type]:
                value = np.array([_[key] for _ in list(samples[sample_type])])
            else:
                value = None
            sample_file_dict[sample_type][key] = value

    # Collect and add injection_parameters (ignore noise samples here, because
    # for those, the injection_parameters are always None)
    other_keys = [
        'h1_signal', 'h1_snr', 'l1_signal', 'l1_snr', 'v1_signal', 'v1_snr',
        'scale_factor'
    ]
    for key in list(variable_arguments + other_keys):
        if injection_parameters['injection_samples']:
            value = np.array(
                [_[key] for _ in injection_parameters['injection_samples']])
        else:
            value = None
        sample_file_dict['injection_parameters'][key] = value

    # Construct the path for the output HDF file
    if output_file_name == 'default.hdf':
        output_file_name = config['output_file_name']
    output_dir = os.path.join('.', 'output')
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
    sample_file_path = os.path.join(output_dir, output_file_name)

    # Create the SampleFile object and save it to the specified output file
    sample_file = SampleFile(data=sample_file_dict)
    sample_file.to_hdf(file_path=sample_file_path)

    print('Done!')

    # Get file size in MB and print the result
    sample_file_size = os.path.getsize(sample_file_path) / 1024**2
    print('Size of resulting HDF file: {:.2f}MB'.format(sample_file_size))
    print('')

    # -------------------------------------------------------------------------
    # Postliminaries
    # -------------------------------------------------------------------------

    # PyCBC always create a copy of the waveform parameters file, which we
    # can delete at the end of the sample generation process
    duplicate_path = os.path.join('.', config['waveform_params_file_name'])
    if os.path.exists(duplicate_path):
        os.remove(duplicate_path)

    # Print the total run time
    print('Total runtime: {:.1f} seconds!'.format(time.time() - script_start))
    print('')
def run(*args):

    if 1 in args:
        start = time.time()

        f(10_000_000)
        f(10_000_000)
        f(10_000_000)
        # f(10_000_000)
        print("1. No parallel time:  {}".format(time.time() - start))

    if 2 in args:
        start = time.time()
        t1 = Thread(target=f, args={10_000_000})
        t2 = Thread(target=f, args={10_000_000})
        t3 = Thread(target=f, args={10_000_000})
        # t4 = Thread(target=f, args={10_000_000})

        t1.start(), t2.start(), t3.start()
        t1.join(), t2.join(), t3.join()
        print("2. Threads time:      {}".format(time.time() - start))

    if 3 in args:
        start = time.time()
        with ThreadPoolExecutor(max_workers=4) as pool:
            pool.submit(f(10_000_0000))
            pool.submit(f(10_000_0000))
            pool.submit(f(10_000_0000))
            pool.submit(f(10_000_0000))
            pool.submit(f(10_000_0000))
            pool.submit(f(10_000_0000))
            pool.submit(f(10_000_0000))
            pool.submit(f(10_000_0000))
            pool.submit(f(10_000_0000))
            pool.submit(f(10_000_0000))
            pool.submit(f(10_000_0000))
            pool.submit(f(10_000_0000))
            pool.submit(f(10_000_0000))
            pool.submit(f(10_000_0000))
            pool.submit(f(10_000_0000))
            pool.submit(f(10_000_0000))
        print("3. Thread pool time:  {}".format(time.time() - start))

    if 4 in args:
        start = time.time()
        p1 = Process(target=f, args={10_000_000})
        p2 = Process(target=f, args={10_000_000})
        p3 = Process(target=f, args={10_000_000})
        # p4 = Process(target=f, args={10_000_000})

        p1.start(), p2.start(), p3.start()
        p1.join(), p2.join(), p3.join()
        print("4. Process time:      {}".format(time.time() - start))

    if 5 in args:
        start = time.time()
        with ProcessPoolExecutor(max_workers=4) as pool:
            pool.submit(f(10_000_0000))
            pool.submit(f(10_000_0000))
            pool.submit(f(10_000_0000))
            pool.submit(f(10_000_0000))
            pool.submit(f(10_000_0000))
            pool.submit(f(10_000_0000))
            pool.submit(f(10_000_0000))
            pool.submit(f(10_000_0000))
            pool.submit(f(10_000_0000))
            pool.submit(f(10_000_0000))
            pool.submit(f(10_000_0000))
            pool.submit(f(10_000_0000))
            pool.submit(f(10_000_0000))
            pool.submit(f(10_000_0000))
            pool.submit(f(10_000_0000))
            pool.submit(f(10_000_0000))
        print("5. Process pool time: {}".format(time.time() - start))
Beispiel #43
0
        node_id = event.get('hostname')
        db_manager.update_one('nodes', id=node_id, values={
            'status': NodeStatus.ONLINE
        })

    def update_nodes_status_online(event):
        other.info(f"{event}")

    with celery_app.connection() as connection:
        recv = celery_app.events.Receiver(connection, handlers={
            'worker-heartbeat': update_nodes_status,
            # 'worker-online': update_nodes_status_online,
        })
        recv.capture(limit=None, timeout=None, wakeup=True)


# run scheduler as a separate process
scheduler.run()

# monitor node status
p_monitor = Process(target=monitor_nodes_status, args=(celery_app,))
p_monitor.start()

# create folder if it does not exist
if not os.path.exists(PROJECT_LOGS_FOLDER):
    os.makedirs(PROJECT_LOGS_FOLDER)

if __name__ == '__main__':
    # run app instance
    app.run(host=FLASK_HOST, port=FLASK_PORT, threaded=True)
Beispiel #44
0
def overlap(cibra, ciket, nmo, nocc, s=None):
    '''Overlap between two CISD wavefunctions.

    Args:
        s : 2D array
            The overlap matrix of non-orthogonal one-particle basis
    '''
    if s is None:
        return dot(cibra, ciket, nmo, nocc)

    DEBUG = True

    nvir = nmo - nocc
    nov = nocc * nvir
    bra0, bra1, bra2 = cisdvec_to_amplitudes(cibra, nmo, nocc)
    ket0, ket1, ket2 = cisdvec_to_amplitudes(ciket, nmo, nocc)

    # Sort the ket orbitals to make the orbitals in bra one-one mapt to orbitals
    # in ket.
    if ((not DEBUG) and abs(numpy.linalg.det(s[:nocc, :nocc]) - 1) < 1e-2
            and abs(numpy.linalg.det(s[nocc:, nocc:]) - 1) < 1e-2):
        ket_orb_idx = numpy.where(abs(s) > 0.9)[1]
        s = s[:, ket_orb_idx]
        oidx = ket_orb_idx[:nocc]
        vidx = ket_orb_idx[nocc:] - nocc
        ket1 = ket1[oidx[:, None], vidx]
        ket2 = ket2[oidx[:, None, None, None], oidx[:, None, None],
                    vidx[:, None], vidx]

    ooidx = numpy.tril_indices(nocc, -1)
    vvidx = numpy.tril_indices(nvir, -1)
    bra2aa = bra2 - bra2.transpose(1, 0, 2, 3)
    bra2aa = lib.take_2d(bra2aa.reshape(nocc**2,
                                        nvir**2), ooidx[0] * nocc + ooidx[1],
                         vvidx[0] * nvir + vvidx[1])
    ket2aa = ket2 - ket2.transpose(1, 0, 2, 3)
    ket2aa = lib.take_2d(ket2aa.reshape(nocc**2,
                                        nvir**2), ooidx[0] * nocc + ooidx[1],
                         vvidx[0] * nvir + vvidx[1])

    occlist0 = numpy.arange(nocc).reshape(1, nocc)
    occlists = numpy.repeat(occlist0, 1 + nov + bra2aa.size, axis=0)
    occlist0 = occlists[:1]
    occlist1 = occlists[1:1 + nov]
    occlist2 = occlists[1 + nov:]

    ia = 0
    for i in range(nocc):
        for a in range(nocc, nmo):
            occlist1[ia, i] = a
            ia += 1

    ia = 0
    for i in range(nocc):
        for j in range(i):
            for a in range(nocc, nmo):
                for b in range(nocc, a):
                    occlist2[ia, i] = a
                    occlist2[ia, j] = b
                    ia += 1

    na = len(occlists)
    if DEBUG:
        trans = numpy.empty((na, na))
        for i, idx in enumerate(occlists):
            s_sub = s[idx].T.copy()
            minors = s_sub[occlists]
            trans[i, :] = numpy.linalg.det(minors)

        # Mimic the transformation einsum('ab,ap->pb', FCI, trans).
        # The wavefunction FCI has the [excitation_alpha,excitation_beta]
        # representation.  The zero blocks like FCI[S_alpha,D_beta],
        # FCI[D_alpha,D_beta], are explicitly excluded.
        bra_mat = numpy.zeros((na, na))
        bra_mat[0, 0] = bra0
        bra_mat[0, 1:1 + nov] = bra_mat[1:1 + nov, 0] = bra1.ravel()
        bra_mat[0, 1 + nov:] = bra_mat[1 + nov:, 0] = bra2aa.ravel()
        bra_mat[1:1 + nov, 1:1 + nov] = bra2.transpose(0, 2, 1,
                                                       3).reshape(nov, nov)
        ket_mat = numpy.zeros((na, na))
        ket_mat[0, 0] = ket0
        ket_mat[0, 1:1 + nov] = ket_mat[1:1 + nov, 0] = ket1.ravel()
        ket_mat[0, 1 + nov:] = ket_mat[1 + nov:, 0] = ket2aa.ravel()
        ket_mat[1:1 + nov, 1:1 + nov] = ket2.transpose(0, 2, 1,
                                                       3).reshape(nov, nov)
        ovlp = lib.einsum('ab,ap,bq,pq->', bra_mat, trans, trans, ket_mat)

    else:
        nov1 = 1 + nov
        noovv = bra2aa.size
        bra_SS = numpy.zeros((nov1, nov1))
        bra_SS[0, 0] = bra0
        bra_SS[0, 1:] = bra_SS[1:, 0] = bra1.ravel()
        bra_SS[1:, 1:] = bra2.transpose(0, 2, 1, 3).reshape(nov, nov)
        ket_SS = numpy.zeros((nov1, nov1))
        ket_SS[0, 0] = ket0
        ket_SS[0, 1:] = ket_SS[1:, 0] = ket1.ravel()
        ket_SS[1:, 1:] = ket2.transpose(0, 2, 1, 3).reshape(nov, nov)

        trans_SS = numpy.empty((nov1, nov1))
        trans_SD = numpy.empty((nov1, noovv))
        trans_DS = numpy.empty((noovv, nov1))
        occlist01 = occlists[:nov1]
        for i, idx in enumerate(occlist01):
            s_sub = s[idx].T.copy()
            minors = s_sub[occlist01]
            trans_SS[i, :] = numpy.linalg.det(minors)

            minors = s_sub[occlist2]
            trans_SD[i, :] = numpy.linalg.det(minors)

            s_sub = s[:, idx].copy()
            minors = s_sub[occlist2]
            trans_DS[:, i] = numpy.linalg.det(minors)

        ovlp = lib.einsum('ab,ap,bq,pq->', bra_SS, trans_SS, trans_SS, ket_SS)
        ovlp += lib.einsum('ab,a ,bq, q->', bra_SS, trans_SS[:, 0], trans_SD,
                           ket2aa.ravel())
        ovlp += lib.einsum('ab,ap,b ,p ->', bra_SS, trans_SD, trans_SS[:, 0],
                           ket2aa.ravel())

        ovlp += lib.einsum(' b, p,bq,pq->', bra2aa.ravel(), trans_SS[0, :],
                           trans_DS, ket_SS)
        ovlp += lib.einsum(' b, p,b ,p ->', bra2aa.ravel(), trans_SD[0, :],
                           trans_DS[:, 0], ket2aa.ravel())

        ovlp += lib.einsum('a ,ap, q,pq->', bra2aa.ravel(), trans_DS,
                           trans_SS[0, :], ket_SS)
        ovlp += lib.einsum('a ,a , q, q->', bra2aa.ravel(), trans_DS[:, 0],
                           trans_SD[0, :], ket2aa.ravel())

        # FIXME: whether to approximate the overlap between double excitation coefficients
        if numpy.linalg.norm(bra2aa) * numpy.linalg.norm(ket2aa) < 1e-4:
            # Skip the overlap if coefficients of double excitation are small enough
            pass
        if (abs(numpy.linalg.det(s[:nocc, :nocc]) - 1) < 1e-2
                and abs(numpy.linalg.det(s[nocc:, nocc:]) - 1) < 1e-2):
            # If the overlap matrix close to identity enough, use the <D|D'> overlap
            # for orthogonal single-particle basis to approximate the overlap
            # for non-orthogonal basis.
            ovlp += numpy.dot(bra2aa.ravel(), ket2aa.ravel()) * trans_SS[0,
                                                                         0] * 2
        else:
            from multiprocessing import sharedctypes, Process
            buf_ctypes = sharedctypes.RawArray('d', noovv)
            trans_ket = numpy.ndarray(noovv, buffer=buf_ctypes)

            def trans_dot_ket(i0, i1):
                for i in range(i0, i1):
                    s_sub = s[occlist2[i]].T.copy()
                    minors = s_sub[occlist2]
                    trans_ket[i] = numpy.linalg.det(minors).dot(ket2aa.ravel())

            nproc = lib.num_threads()
            if nproc > 1:
                seg = (noovv + nproc - 1) // nproc
                ps = []
                for i0, i1 in lib.prange(0, noovv, seg):
                    p = Process(target=trans_dot_ket, args=(i0, i1))
                    ps.append(p)
                    p.start()
                [p.join() for p in ps]
            else:
                trans_dot_ket(0, noovv)

            ovlp += numpy.dot(bra2aa.ravel(), trans_ket) * trans_SS[0, 0] * 2

    return ovlp
Beispiel #45
0
class Server(object):
    def __init__(self,
                 config: Config,
                 signals: np.ndarray,
                 output=None,
                 interval=1000,
                 new_reference_signal_each_k=5,
                 max_iterations=1200,
                 max_retries=10,
                 beta=1.0,
                 error_on=False,
                 error_start=0,
                 error_duration=10,
                 network_id='1'):
        """
            Creates a Server object using:
            str:host\t host address
            int:port\t port where server runs
            int:id\t ID of the Server (used in numpy to locate position in matrix)
            np.array: adjacency\t Adjacency matrix of the whole/sub system
            int:signal\t Initial value for the node to start
            dict:out_neighbors\t contains all outneighbor host addresses in key property
            bool: instant_start\t whether the server should start immediately

            Returns: Server(object).
        """

        logger = logging.getLogger(name='Server.__init__')

        manager = Manager(
        )  # used to synchronize data between processes and threads
        self._host = config.host
        self._port = config.port
        self.__address = (config.host, config.port)
        self._adjacency = config.adjacency
        self._id = config.id
        self._laplacian = utility.calculate_laplacian(self._adjacency)
        if not utility.check_laplacian(self._laplacian):
            raise BaseException("No valid laplacian")
        # self._beta = 1/np.max(np.linalg.eigvals(self._laplacian)) # calculates beta
        self._alpha = utility.calculate_beta(self._laplacian)
        self._beta = beta
        self._laplacian_proportional = np.dot(self._alpha, self._laplacian)
        self._laplacian_integral = np.dot(self._beta,
                                          self._laplacian_proportional)
        self._server_socket = socket(AF_INET, SOCK_STREAM)
        self._server_socket.setsockopt(SOL_SOCKET, SO_REUSEADDR, 1)
        self._server_socket.bind(self.__address)
        self._j = J(0, signals[0], config.out_neighbors)
        self._neighbor_states = manager.dict()
        self._neighbor_states_lock = Lock()
        self._interval = interval
        self._interval_sec = float(interval / 1000.0)
        self.__signals = signals
        self._new_reference_signal_each_k = new_reference_signal_each_k
        self.__output = output
        self.__max_iterations = max_iterations
        self.__running = False
        self.__neighbor_out_connections = manager.dict()
        self._max_retries = max_retries
        self.__error_on = error_on
        self.__error_start = error_start
        self.__error_duration = error_duration
        self.__API_URL = 'http://10.0.2.2:7071'
        self.__NETWORK_ID = network_id
        self.__API_QUEUE = manager.list()
        self.__API_QUEUE_LOCK = Lock()

        self.__tl = None  # allows to start recurring threads in a given time interval
        if config.instant_start:
            self.start()
        # logger.debug(self)
        logger.warn(f"Using beta {self._beta:24.20f}")

    def start(self):
        ''' Starting server, acccept process and broadcast Threads. '''
        if self.__running:
            return
        try:
            self.__running = True
            self._server_socket.listen(500)
            self.accept_process = Process(target=self.accept_connections)
            self.accept_process.daemon = True
            self.accept_process.start()
            self.__tl = Timeloop()
            self.__tl._add_job(self.broadcast,
                               interval=timedelta(milliseconds=self._interval))
            self.__tl._add_job(self.callApi, interval=timedelta(seconds=2))
            self.__tl.start()
        except BaseException as e:
            logging.getLogger(name='start').error(str(e))
            self.stop()

    def stop(self):
        ''' Stopping all processes and threads. '''
        if not self.__running:
            return
        self.__running = False
        # self.accept_process.terminate()
        self.__tl.stop()
        self.accept_process.join(2)
        self._server_socket.close()

    def accept_connections(self):
        ''' Waits for new connections. Creates a handling Thread for all new connections. '''
        logger = logging.getLogger(name='Server:accept_connections')
        while True:
            try:
                logger.info("Waiting for new connection...")
                client, client_address = self._server_socket.accept()
                if self._j.k() < self.__max_iterations:
                    thread = Thread(target=self.handle_connection,
                                    args=(client, client_address))
                    thread.daemon = True
                    logger.info(
                        f"Retrieving information from\t{client_address}\tat host {self.__address}"
                    )
                    thread.start()
                else:
                    return
            except IOError as e:
                if e.errno == errno.EPIPE:
                    return
                logger.error(str(e))

    def handle_connection(self, client: socket, client_address: (str, int)):
        ''' Handles each incomming connection and stores the message. '''
        logger = logging.getLogger(name='Server:handle_connection')
        while self._j.k() < self.__max_iterations:
            try:
                msg = self.receive(client)
                sender = msg[0]
                key = int(msg[2])
                state_x = msg[1]
                state_q = msg[3]

                logger.debug(f"{self._id} received:\t{msg}")

                # Only update the tuple if state is defined in message
                with self._neighbor_states_lock:
                    if (sender, key) in self._neighbor_states:
                        states_tuple = self._neighbor_states[(sender, key)]
                        if states_tuple[0] != None:
                            state_x = states_tuple[0]
                        if states_tuple[1] != None:
                            state_q = states_tuple[1]
                    self._neighbor_states[(sender, key)] = (
                        state_x, state_q
                    )  # sample item created: ((host='127.0.0.1', k=4), (state_x=451.75, state_q=-67.6))

                logger.info(
                    f"[{self._host}]: Stored [{(sender, key)}]: {str((state_x,state_q))}"
                )
                logger.debug(
                    f"Node {self._id} has states {self._neighbor_states}")
            except OSError as e:
                if e.errno == errno.EPIPE:
                    break
                else:
                    logger.error(e)

    # @self.__tl.job(interval=timedelta(milliseconds=args.time))
    def broadcast(self):
        ''' Gets called by the Timeloop class. Creates a Thread which handles computation of new state and broadcasting. '''
        try:
            if self._j.k() < self.__max_iterations:
                # logging.getLogger(name='Server:broadcast').info("broadcast job current time: %s with data: %s" % (time.ctime(), str(self._neighbor_states)))
                # thread = Thread(target=self.broadcast_thread) #, args=(self._j, self._neighbor_states, self._adjacency)
                # thread.daemon = True
                # thread.start()
                self.broadcast_thread()
            else:
                self.stop()
        except OSError as e:
            if e.errno == errno.EPIPE or e.errno == errno.ECONNRESET:
                self.stop()
            else:
                raise e

    def broadcast_thread(self):
        ''' Initially sends message to all neighbor nodes. Calculates new state each round and distributes it. '''
        k = self._j.k()

        logger = logging.getLogger(name='Server:broadcast_thread')

        if k == 0:
            logger.info(f"Node {self._id} initially broadcasts its value.")
            self.distribute_states(state_x=self._j.state())

        logger.debug(
            f"{self._id}: states: after init\t{self._neighbor_states}")

        # Check if all neighbor states of current k are known, waits otherwise for interval max. 5 tries
        for i in range(self._max_retries):
            with self._neighbor_states_lock:
                if len(
                        dict(
                            filter(lambda o: o[0][1] == k and o[1][0] != None,
                                   self._neighbor_states.items()))) == len(
                                       self._j.neighbors):
                    break
            if i == self._max_retries - 1:
                logger.warn(f"Node {self._id}: RESETTING at position 1")
                return
            time.sleep(self._interval_sec)
        logger.debug(
            f"{self._id}: states: after state_x\t{self._neighbor_states}")

        x = np.zeros(np.shape(self._adjacency)[0])
        current_neighbor_states = {}
        with self._neighbor_states_lock:
            for neighbor_at_k_tuple, value_tuple in self._neighbor_states.items(
            ):
                id = self._j.neighbors[neighbor_at_k_tuple[0]]
                if neighbor_at_k_tuple[1] == k:
                    np.put(x, int(id), value_tuple[0])
                    current_neighbor_states[
                        neighbor_at_k_tuple[0]] = value_tuple[0]

        np.put(x, self._id, self._j.state())

        # calculate state_q
        with self._j.lock:
            my_q = utility.calculate_qi(self._id, self._laplacian_integral, x,
                                        self._j.q())
            self._j.set_q(my_q)
        self.distribute_states(state_q=my_q)

        # getting qs from other states
        q = np.zeros(np.shape(x))
        np.put(q, self._id, my_q)
        # Check if values -> state_q is present at key -> k of current iteration
        for i in range(self._max_retries):
            with self._neighbor_states_lock:
                if len(
                        dict(
                            filter(lambda o: o[0][1] == k and o[1][1] != None,
                                   self._neighbor_states.items()))) == len(
                                       self._j.neighbors):
                    break
            if i == self._max_retries - 1:
                # reset q if aborted
                with self._j.lock:
                    self._j.set_q(self._j.q() - my_q)
                logger.warn(f"Node {self._id}: RESETTING at position 2")
                return
            time.sleep(self._interval_sec)

        logger.debug(
            f"{self._id}: states: after state_q\t{self._neighbor_states}")
        with self._neighbor_states_lock:
            for neighbor_at_k_tuple, value_tuple in self._neighbor_states.items(
            ):
                id = self._j.neighbors[neighbor_at_k_tuple[0]]
                logger.info(f"{neighbor_at_k_tuple[0]} > {id}")
                if neighbor_at_k_tuple[1] == k:
                    np.put(q, int(id), value_tuple[1])
        # logger.info(f"{self._id}: xm: {str(x)}")
        # logger.info(f"{self._id}: qm: {str(q)}")
        # Generating q and broadcasting it
        sig_nr = int(self._j.k() / self._new_reference_signal_each_k)
        self._j.set_reference_signal(self._j.reference_signal())
        if k > 0 and sig_nr < 200:
            if k % self._new_reference_signal_each_k == 0:
                self._j.set_reference_signal(self.__signals[sig_nr])

        x_new = utility.calculate_iteration(self._id, self._laplacian_integral,
                                            self._laplacian_proportional, x, q,
                                            self._j.reference_signal(),
                                            self._j.diff(), self._alpha,
                                            self._beta)
        self._j.increment_k()
        self._j.set_state(x_new)
        self.distribute_states(state_x=x_new)
        logger.debug(
            f" UPDATED: Node {self._id}: {self._j.state()} | Others: {self._neighbor_states}"
        )

        # add log to api queue
        with self.__API_QUEUE_LOCK:
            self.__API_QUEUE.append({
                'nodeId':
                self._host,
                'port':
                self._port,
                'state':
                float(np.real(x_new)),
                'neighborStates':
                current_neighbor_states,
                'iteration':
                self._j.k(),
                'timestamp':
                datetime.utcnow().__str__(),
                'networkId':
                self.__NETWORK_ID,
                'referenceSignal':
                self._j.reference_signal()
            })

        # log to output if specified
        if self.__output != None:
            try:
                self.__output.write(str(float(self._j.state())) + '\n')
            except BaseException as e:
                logger.error("Unable to wirte to output.\n" + str(e))
        ''' Removes all data from previous calculations (session_key < k). '''
        with self._neighbor_states_lock:
            states_to_remove = dict(
                filter(
                    lambda elem: elem[0][1] < self._j.k(
                    ),  # elem[0] is key of dict and consists of node name and session key k
                    self._neighbor_states.items()))

            for state in states_to_remove.items():
                try:
                    del self._neighbor_states[state[0]]
                except KeyError:
                    logger.warn(f"Key '{state[0]}' not found.")

    def distribute_states(self, state_x: float = None, state_q: float = None):
        ''' Sends the current state to all neighbor nodes. '''
        if self._j.k() >= self.__max_iterations:
            return
        for neighbor in self._j.neighbors.keys():
            try:
                if neighbor not in self.__neighbor_out_connections:
                    if (neighbor, self.__max_iterations
                        ) not in self._neighbor_states:
                        client_socket = socket(AF_INET, SOCK_STREAM)
                        client_socket.connect((neighbor, self._port))
                        self.__neighbor_out_connections[
                            neighbor] = client_socket
                    else:
                        continue

                # adding error to own signal if selected
                if state_x != None:
                    error = 0.0
                    if self.__error_on:
                        if self._j.k() >= self.__error_start and self._j.k(
                        ) < self.__error_start + self.__error_duration:
                            error = utility.calculate_error(self._j.k())
                            logging.debug(
                                f"Adding error on node {self._id}: {error}")
                    state_x += error

                if state_x != None:
                    state_x = float(np.real(state_x))
                if state_q != None:
                    state_q = float(np.real(state_q))

                self.send(self.__neighbor_out_connections[neighbor],
                          message=(self._host, state_x, self._j.k(), state_q))
            except OSError as e:
                logging.getLogger(name='Server:distribute_state').debug(str(e))
                try:
                    self.__neighbor_out_connections[neighbor].close()
                except Exception as e:
                    pass
                finally:
                    del self.__neighbor_out_connections[neighbor]
                    logging.getLogger(name='Server:distribute_state').warn(
                        'Deleting socket connection for ' + neighbor)
            # finally:
            #     client_socket.close()

    def send(self, channel: socket, message: object):
        ''' Sends a message to another socket using JSON. '''
        try:
            msg = json.dumps(message)
            logging.info(f"sending data from {self._host}:\t{msg}")
            channel.send(struct.pack("i", len(msg)) + bytes(msg, "utf8"))
            return True
        except OSError as e:
            logging.error(e)
            return False

    def receive(self, channel: socket) -> (str, float, int):
        ''' Receives a message from another socket using JSON. '''
        recv = channel.recv(struct.calcsize("i"))
        if len(recv) < 4:
            raise OSError(errno.EPIPE, 'Empyt message size', str(len(recv)))
        size = struct.unpack("i", recv)[0]
        data = ""
        while len(data) < size:
            msg = channel.recv(size - len(data))
            if not msg:
                return None
            data += msg.decode("utf8")
        logging.info(f"receiving data at {self._host}:\t{str(data)}")
        return json.loads(data.strip())

    def callApi(self):
        try:
            with self.__API_QUEUE_LOCK:
                data = list(self.__API_QUEUE)
                self.__API_QUEUE[:] = []
            if len(data) > 0:
                requests.post(f"{self.__API_URL}/api/log", json=data)
        except ValueError as ve:
            logging.getLogger(name='callApi: ').error(ve)
        except ConnectionError as ce:
            logging.getLogger(name='callApi: ').error(ce)
        except TimeoutError as te:
            logging.getLogger(name='callApi: ').error(te)
        except requests.exceptions.RequestException as re:
            logging.getLogger(name='callApi: ').error(re)
Beispiel #46
0
    def run_program(self, dataset, parameters):
        self.logger.info("Starting run\nParameters:\n{}".format("\n".join(
            ["\t{}: {}".format(k, v) for k, v in parameters.items()])))
        self.logger.info(
            "Distributing load over {} cores".format(NUM_OF_WORKERS))

        kg_i, kg_s = dataset

        # fit model
        t0 = timer()

        # MP manager
        manager = Manager()

        # generate semantic item sets from sampled graph
        si_sets = manager.dict(generate_semantic_item_sets(kg_i))

        # generate common behaviour sets
        work = manager.Queue()
        keys = list(si_sets.keys())
        slices = self.diagonal_matrix_slicer(keys)

        cbs_sets = manager.list()
        pool = []
        for i in range(NUM_OF_WORKERS):
            p = Process(target=generate_common_behaviour_sets,
                        args=(si_sets, cbs_sets, work,
                              parameters["similarity_threshold"]))
            p.daemon = True
            p.start()
            pool.append(p)

        for slce in slices:
            work.put(slce)

        for p in pool:
            work.put(None)

        # join shared variables
        for p in pool:
            p.join()

        # extend common behaviour sets
        cbs_size = 2
        cbs_sets_extended = manager.list(cbs_sets)
        while cbs_size < parameters["max_cbs_size"]:
            func = partial(extend_common_behaviour_sets, cbs_sets_extended,
                           parameters["similarity_threshold"])

            slices = self.diagonal_matrix_slicer(cbs_sets_extended)
            cbs_sets_extention = manager.list()
            with Pool(processes=NUM_OF_WORKERS) as pool:
                it = pool.imap_unordered(func=func, iterable=slices)

                while True:
                    try:
                        cbs_subset = next(it)
                        cbs_sets_extention.extend(cbs_subset)
                    except StopIteration:
                        break

            cbs_sets.extend(cbs_sets_extention)
            cbs_sets_extended = cbs_sets_extention
            cbs_size *= 2

        # generate semantic item sets from sampled graph association rules
        rules = manager.list()
        work = manager.Queue()
        size = max(1, floor(len(cbs_sets) / NUM_OF_WORKERS))
        slices = [slice(i, i + size) for i in range(0, len(cbs_sets), size)]

        pool = []
        for i in range(NUM_OF_WORKERS):
            p = Process(target=generate_semantic_association_rules,
                        args=(kg_i, kg_s, cbs_sets, work, rules,
                              parameters["minimal_local_support"]))
            p.daemon = True
            p.start()
            pool.append(p)

        for slce in slices:
            work.put(slce)

        for p in pool:
            work.put(None)

        # join shared variables
        for p in pool:
            p.join()

        # calculate support and confidence, skip those not meeting minimum requirements
        final_rule_set = manager.list()
        work = manager.Queue()
        size = max(1, floor(len(rules) / NUM_OF_WORKERS))
        slices = [slice(i, i + size) for i in range(0, len(rules), size)]

        pool = []
        for i in range(NUM_OF_WORKERS):
            p = Process(target=evaluate_rules,
                        args=(kg_i, rules, work, final_rule_set,
                              parameters["minimal_support"],
                              parameters["minimal_confidence"]))

            p.daemon = True
            p.start()
            pool.append(p)

        for slce in slices:
            work.put(slce)

        for p in pool:
            work.put(None)

        # join shared variables
        for p in pool:
            p.join()

        # sorting rules on both support and confidence
        final_rule_set.sort(key=itemgetter(2, 1), reverse=True)

        # time took
        t1 = timer()
        dt = t1 - t0
        print("  Program completed in {:.3f} ms".format(dt))

        print("  Found {} rules".format(len(final_rule_set)))
        return final_rule_set
Beispiel #47
0
def develop():

    param_grid = [
        {  # rbf: C overfit, gamma underfit
            "scaler": [
                "StandardScaler"
            ],  # this is framework featured parameter, not belongs to the model
            "C": [1.1, 1.5, 2.0],  # >1. overfit
            "kernel": ["rbf"],
            "degree": [
                3
            ],  # Note: Degree of the polynomial kernel function ('poly'), ignored by all other kernels.
            "gamma": [0.02, 0.015, 0.01, 0.005],  # <.03 underfit
            "coef0":
            [0.0],  # Note: It is only significant in 'poly' and 'sigmoid'.
            "probability": [True],
            "shrinking": [True],
            "tol": [1e-3],
            "cache_size": [1024],
            "class_weight": ["auto"],
            "verbose": [False],
            "max_iter": [-1],
            "random_state": [None]
        },
        {  # rbf: C underfit, gamma overfit
            "scaler": [
                "StandardScaler"
            ],  # this is framework featured parameter, not belongs to the model
            "C": [0.5, 0.55, 0.6, 0.65],  # <1. underfit
            "kernel": ["rbf"],
            "degree": [
                3
            ],  # Note: Degree of the polynomial kernel function ('poly'), ignored by all other kernels.
            "gamma": [0.01, 0.015, 0.02, 0.025],  # >.03 overfit
            "coef0":
            [0.0],  # Note: It is only significant in 'poly' and 'sigmoid'.
            "probability": [True],
            "shrinking": [True],
            "tol": [1e-3],
            "cache_size": [1024],
            "class_weight": ["auto"],
            "verbose": [False],
            "max_iter": [-1],
            "random_state": [None]
        },
        {  # poly
            "scaler": [
                "StandardScaler"
            ],  # this is framework featured parameter, not belongs to the model
            "C": [0.7, 0.9, 1.1, 1.5],
            "kernel": ["poly"],
            "degree": [
                4, 5
            ],  # Note: Degree of the polynomial kernel function ('poly'), ignored by all other kernels.
            "gamma": [0.01, 0.015, 0.02, 0.025],
            "coef0":
            [0.0],  # Note: It is only significant in 'poly' and 'sigmoid'.
            "probability": [True],
            "shrinking": [True],
            "tol": [1e-3],
            "cache_size": [1024],
            "class_weight": ["auto"],
            "verbose": [False],
            "max_iter": [-1],
            "random_state": [None]
        },
        {  # linear
            "scaler": [
                "StandardScaler"
            ],  # this is framework featured parameter, not belongs to the model
            "C": [0.9, 1.1, 1.5, 2, 2.5],
            "kernel": ["linear"],
            "degree": [
                3
            ],  # Note: Degree of the polynomial kernel function ('poly'), ignored by all other kernels.
            "gamma": [0.02, 0.025, 0.3, 0.035],
            "coef0":
            [0.0],  # Note: It is only significant in 'poly' and 'sigmoid'.
            "probability": [True],
            "shrinking": [True],
            "tol": [1e-3],
            "cache_size": [1024],
            "class_weight": ["auto"],
            "verbose": [False],
            "max_iter": [-1],
            "random_state": [None]
        }
    ]
    param_list = list(ParameterGrid(param_grid))

    process_list = []
    for param_dict in param_list:
        process = Process(target=train_validate_test, args=(param_dict, ))
        process_list.append(process)

    for process in process_list:
        process.start()

    for process in process_list:
        process.join()
def main():
    start = datetime.datetime.now()
    if len(sys.argv) != 5:
        logging.info('please input args: car_path, road_path, cross_path, answerPath')
        exit(1)

    car_path = sys.argv[1]
    road_path = sys.argv[2]
    cross_path = sys.argv[3]
    answer_path = sys.argv[4]

    global car
    global road
    global cross
    global answerPath

    car = input_txt(car_path).reshape(-1, 5)
    road = input_txt(road_path).reshape(-1, 7)
    cross = input_txt(cross_path).reshape(-1, 5)
    answerPath = answer_path

    logging.info("car_path is %s" % (car_path))
    logging.info("road_path is %s" % (road_path))
    logging.info("cross_path is %s" % (cross_path))
    logging.info("answer_path is %s" % (answer_path))

    # 总路口数
    cross_number = len(cross)
    # 道路数量
    road_number = len(road)
    # 车的数量
    car_number = len(car)

    cross_frequency(cross_number)#初始化count_cross_frequency
    road_frequency(road_number)#初始化count_cross_frequency

    # cross_adjacency_matrix = np.ones((cross_number + 1, cross_number + 1))
    # cross_adjacency_matrix = float('inf') * cross_adjacency_matrix
    cross_adjacency_high_speed = np.ones((cross_number + 1, cross_number + 1))
    cross_adjacency_high_speed = float('inf') * cross_adjacency_high_speed
    cross_adjacency_slow_speed = np.ones((cross_number + 1, cross_number + 1))
    cross_adjacency_slow_speed = float('inf') * cross_adjacency_slow_speed
    # cross_adjacency_infrequent = np.ones((cross_number + 1, cross_number + 1))
    # cross_adjacency_infrequent = float('inf') * cross_adjacency_infrequent
    # cross_adjacency_wide_road = np.ones((cross_number + 1, cross_number + 1))
    # cross_adjacency_wide_road = float('inf') * cross_adjacency_wide_road

    # 构建路口的邻接矩阵(数值为距离,-1为不连通)
    for i in range(cross_number):
        for j in range(1, 5):
            if cross[i][j] == -1:
                continue
            for x in range(cross_number):
                for y in range(1, 5):
                    if cross[i][j] == cross[x][y]:  # 找出相邻路口
                        for r in range(road_number):
                            if road[r][0] == cross[i][j] and i != x:
                                if road[r][6] == 0 and road[r][5] == cross[i][0]:
                                    continue
                                else:
                                    # cross_adjacency_matrix[i + 1][x + 1] = (
                                    #         road[r][1] / (0.95 * road[r][2] * (road[r][3])))  # 获得路口之间距离
                                    cross_adjacency_high_speed[i + 1][x + 1] = (
                                            10 / (1.5 * road[r][2] * (road[r][3])))# 速度块
                                    cross_adjacency_slow_speed[i + 1][x + 1] = (road[r][2] / (road[r][3]))# 速度慢
                                    # cross_adjacency_wide_road[i + 1][x + 1] = 10/road[r][3]

    # 重新评估权重2019-3-18
    # print(cross_adjacency_wide_road)
    # print(cross_adjacency_matrix)
    # print(cross_adjacency_infrequent)
    # cam = pd.DataFrame(cross_adjacency_matrix)
    # cam.to_csv('cam.csv')

    end1 = datetime.datetime.now()-start
    print(end1)#0.282849s

    # 生成经过cross的路线

    # # 最短距离节点路径字典
    # shortest_distance = Manager().dict()
    # 速度最快节点路径字典
    high_speed = Manager().dict()
    # 速度最慢节点路径字典
    slow_speed = Manager().dict()
    # # 频率低节点路径字典
    # low_frequency = Manager().dict()
    # 道路宽的路径字典
    wide_road = {}

    # p1 = Process(target=map,args=(cross_number, cross_adjacency_matrix,shortest_distance,cross))#普通路线
    p2 = Process(target=map,args=(cross_number, cross_adjacency_high_speed, high_speed,cross))  # 速度最快路线
    p3 = Process(target=map,args=(cross_number, cross_adjacency_slow_speed, slow_speed,cross))  # 速度最慢路线
    # p1.start()
    p2.start()
    p3.start()

    # p1.join()
    p2.join()
    p3.join()
    # map(cross_number, cross_adjacency_wide_road, wide_road)  # 路最的宽路线
    # print(shortest_distance)
    end2 = datetime.datetime.now() - start
    print(end2)#1.59.01s

    # 路口->道路的字典
    cross_road = {}
    for i in range(road_number):
        if road[i][6] == 1:
            cross_road[str(road[i][4]) + '-' + str(road[i][5])] = road[i][0]
            cross_road[str(road[i][5]) + '-' + str(road[i][4])] = road[i][0]
        else:
            cross_road[str(road[i][4]) + '-' + str(road[i][5])] = road[i][0]

    answer = []  # 普通路径
    answer_high_speed = []  # 速度块的路径
    answer_slow_speed = []  # 速度慢的路径
    answer_low_frequency = []  # 频率低的路径
    answer_wide_road = [] # 道路宽的路径

    # 生成每辆车的road路径
    # generating_path(car, answer, shortest_distance, cross_road,
    #                 count_road_frequency,count_cross_frequency)#普通路线
    generating_path(car, answer_high_speed, high_speed, cross_road,
                    count_road_frequency,count_cross_frequency)  # 速度最快路线
    generating_path(car, answer_slow_speed, slow_speed, cross_road,
                    count_road_frequency,count_cross_frequency)  # 速度最慢路线
    # generating_path(car, answer_wide_road, wide_road, cross_road,
    #                 count_road_frequency, count_cross_frequency)  # 道路宽路线

    end3 = datetime.datetime.now() - start
    print(end3)#2.03.45

    ###################################方向车辆################

    # key:东西,南北    value:东西南北车辆Array
    directionMap = direction(cross_number, car)

    ###################################方向车辆################



    ################################频率最低路线生成###########################################
    # for i in range(int(road_number / 3)):  # 设置:取消道路行驶权占总道路的比例
    #     max_frequency_road = 0
    #     index_frequency_road = 0
    #     for i in range(len(count_road_frequency)):
    #         if count_road_frequency[i] < float('inf'):
    #             if count_road_frequency[i] > max_frequency_road:
    #                 max_frequency_road = count_road_frequency[i]
    #                 index_frequency_road = i
    #     count_road_frequency[index_frequency_road] = 999999
    # # print(count_road_frequency)
    #
    # for i in range(cross_number):
    #     for j in range(1, 5):
    #         if cross[i][j] == -1:
    #             continue
    #         for x in range(cross_number):
    #             for y in range(1, 5):
    #                 if cross[i][j] == cross[x][y]:  # 找出相邻路口
    #                     for r in range(road_number):
    #                         if road[r][0] == cross[i][j] and i != x:
    #                             if road[r][6] == 0 and road[r][5] == cross[i][0]:
    #                                 continue
    #                             else:
    #                                 cross_adjacency_infrequent[i + 1][x + 1] = count_road_frequency[r]/road[r][3] # 行驶次数最少的路
    #
    # # 低频路线,取消频率过高路线的行驶权
    # # for item in cross_adjacency_infrequent
    # #     if item >= cross_adjacency_infrequent.reshape(1,-1)
    #
    # map(cross_number, cross_adjacency_infrequent, low_frequency)  # 频率低的路线
    # #print(count_cross_frequency)
    # generating_path(car_number, answer_low_frequency, low_frequency, cross_road
    #                 , count_road_frequency, count_cross_frequency)  # 频率低的路线
    #
    # end4 = datetime.datetime.now() - start
    # print(end4)
    #################################频率最低路线生成################################################

    # 定义答案Map
    answerMap = {}

    # 定义字典,用于存储每个车的高速行驶路径
    answerHighMap = {}
    for item in answer_high_speed:
        answerHighMap.setdefault(item[0], item)

    # 定义字典,用于存储每个车的低速行驶路径
    answerSlowMap = {}
    for item in answer_slow_speed:
        answerSlowMap.setdefault(item[0], item)

    # 定义字典,用于存储道路的长度
    roadMap = {}
    for roadItem in road:
        roadMap.setdefault(roadItem[0], roadItem[1])

    # 定义字典,用于存储车辆  key:carId   value:  car
    carMap = {}
    for carItem in car:
        carMap.setdefault(carItem[0], carItem)

    ###################################################以东西南北进行划分##############################################

    planTime = 0
    # 是否第一次发车
    firstStartCar = 0

    # 记录外层循环次数
    tempCount = 0

    for key, values in directionMap.items():
        # 对所有南北  和 东西的车辆进行按距离分类

        # # 定义所有车辆距离字典
        # carDistance = {}
        # for item in values:
        #     # 通过carId获取到车辆的路径
        #     answerNormalDis = answerNormalMap.get(item[0])
        #     sumRoadLen = 0
        #     for i in range(2, len(answerNormalDis)):
        #         roadLen = roadMap.get(answerNormalDis[i])
        #         sumRoadLen += roadLen
        #     # 设置每辆车的距离长度
        #     carDistance.setdefault(item[0], sumRoadLen)
        #
        # # 得到所有的距离长度
        # carDistanceArray = carDistance.values()
        #
        # # 数组去重
        # carDistanceArray = list(set(carDistanceArray))
        #
        # # 数组排序
        # # carDistanceArray = sorted(carDistanceArray, reverse=True)
        # carDistanceArray = sorted(carDistanceArray)
        #
        # # 定义数组,用于记录距离从小到大的车辆
        # sortCarDistanceArray = []
        # for carDistanceItem in carDistanceArray:
        #     for key, values in carDistance.items():
        #         if values == carDistanceItem:
        #             tempCar = carMap.get(key)
        #             sortCarDistanceArray.append(tempCar)

        # 得到所有车的速度数组
        speedArray = []
        maxSpeed = 0

        for item in values:
            carSpeed = item[3]
            flag = 0
            for tempSpeed in speedArray:
                if tempSpeed == carSpeed:
                    flag = 1
            if flag == 0:
                # 得到最高速度
                if carSpeed > maxSpeed:
                    maxSpeed = carSpeed
                speedArray.append(carSpeed)

        halfMaxSpeed = int( maxSpeed/ 2)
        halfMaxSpeed_8_3 = int(maxSpeed * (3/8))
        halfMaxSpeed_8_5 = int(maxSpeed * (5/8))

        # 车辆发车计数器
        carStartCount = 1

        if tempCount == 0:
            shardCount = 130
        else:
            shardCount = 120

        # if planTime > 0:
        #     planTime += 38

        #按照速度划分
        for item in values:

            if carStartCount % shardCount == 0:
                # 最后的车辆,同时发车
                if tempCount == 0:
                    if carStartCount <= int(0.15*values.__len__()):
                        planTime += 3.5
                    else:
                        planTime += 4.5

                elif tempCount >= 1:
                    if carStartCount <= int(0.8*values.__len__()):
                        planTime += 4
                    elif carStartCount <= int(0.9*values.__len__()):
                        planTime += 3

            # 得到车辆的ID
            carId = item[0]
            carSpeed = item[3]

            # if carSpeed < halfMaxSpeed:
            #     car = answerHighMap.get(carId)
            # else:
            #     car = answerSlowMap.get(carId)

            if carStartCount < int(0.2*values.__len__()):
                car = answerHighMap.get(carId)
            elif carStartCount < int(0.8*values.__len__()):
                # 按照速度划分
                if carSpeed == 4 or carSpeed == 6:
                    if carStartCount % 7 == 0:
                        car = answerHighMap.get(carId)
                    else:
                        car = answerHighMap.get(carId)
                else:
                    if carStartCount % 7 == 0:
                        car = answerHighMap.get(carId)
                    else:
                        car = answerSlowMap.get(carId)
            else:
                car = answerHighMap.get(carId)

            # 修改车辆的planTime   当前时间片 + 最高速度 - 车辆当前速度
            # 这样能够让速度快的车辆,优先先行,慢车就会排在快车的后面
            if planTime < item[4]:
                car[1] = item[4]
            else:
                car[1] = int(planTime + maxSpeed - carSpeed)
            answerMap.setdefault(carId, car)
            carStartCount += 1

        tempCount +=1
    ####################################################以东西南北进行划分END###########################################

    result = []
    for item in answerMap.values():
        result.append(item)

    output_txt(answerPath, result)


# to write output file
    end5 = datetime.datetime.now() - start
    print(end5)
    print(len(directionMap['NorthAndSouth']))
    print(len(directionMap['EastAndWest']))
Beispiel #49
0
def main():
    args = parser.parse_args()
    matcher_name = args.matcher
    adapter = args.adapter
    trimFirst = args.trim_first
    trimLast = args.trim_last
    trimTo = args.trim_to
    inFilePath = args.in_file
    outFilePath = args.out_file
    maxThread = args.workers
    chunk = args.chunk
    debugLimit = args.debug_limit

    print()
    if trimFirst == 0:
        print(f"Not trimming any initial bases")
    else:
        print(f"Trimming the first {trimFirst} bases")
    print(f"Trimming adapter: {adapter}")
    # if version == 2:
    print(f"The matcher '{matcher_name}' is used to find the adapter")
    # print(f'Considering only first {matchOnly} bases of adapter: {adapter[:matchOnly]}')
    # if version < 3:
    #     print(f'Considering {len(adapters)} possible variants of the adapter')
    # else:
    #     print(f'Using Levenshtein-Damerau distance to find adapter variants')
    print(f"Trimming all bases after the adapter (if present)")
    if trimLast == 0:
        print(f"Not trimming any other bases after adapter removal")
    else:
        print(f"Trimming the last {trimLast} bases after adapter removal")
    print(f"Saving to file: {outFilePath}")
    print("Used",
          f"{maxThread} workers" if maxThread > 0 else "sequential version")
    print()

    # get the matcher function
    matcher_builder = MATCHER_BUILDER[matcher_name]
    matcher = matcher_builder(adapter, args)

    if maxThread > 0:
        # build the parallel topology
        process = [None] * maxThread
        queues1 = [None] * maxThread
        queues2 = [None] * maxThread
        for i in range(maxThread):
            queues1[i] = Queue()
            queues2[i] = Queue()
            out_queue = queues2[i]
            process[i] = Process(
                target=worker_fun,
                args=(queues1[i], out_queue, trimFirst, trimLast, trimTo,
                      matcher),
            )
            process[i].start()
        collector = Process(target=collector_fun, args=(outFilePath, queues2))
        collector.start()

        # start file read
        t_start = time.perf_counter() * 1000
        with open(inFilePath, "r+b") as infile:
            t = 0
            sequence = ff.readfastq_iter(infile,
                                         fbufsize=50000,
                                         _entrypos=entrypos_c)
            for i, seq in enumerate(sequence):
                p = i % chunk
                if p == 0:
                    partition = [None] * chunk

                if i == debugLimit:
                    break

                partition[p] = seq

                if p == chunk - 1:
                    queues1[t].put(partition)
                    t = (t + 1) % maxThread
            if p < chunk - 1:
                partition = partition[:p]
                queues1[t].put(partition)

        print(f"Sent {i} elements to the workers")
        for q in queues1:
            q.put(EOF)

        print("Wait process")
        for p in process:
            p.join()
        collector.join()
        t_end = time.perf_counter() * 1000
        time_match = math.floor(t_end - t_start)

        print(f"Matching time: {time_match}")
    else:
        # Sequential version
        t_start = time.perf_counter() * 1000
        with open(inFilePath, "r+b") as infile:
            sequence = ff.readfastq_iter(infile,
                                         fbufsize=50000,
                                         _entrypos=entrypos_c)
            with open(outFilePath, "w") as outFile:
                for i, seq in enumerate(sequence):
                    comment = seq[0].decode("utf-8")
                    line = seq[1].decode("utf-8")
                    quality = seq[2].decode("utf-8")
                    match = matcher(line)
                    tFirst = 0
                    tLast = 0
                    count = 0

                    if trimTo and match:
                        lineLen = len(line[:match])
                        while lineLen > trimTo:
                            if count % 2:
                                tFirst += 1
                            else:
                                tLast += 1
                            count += 1
                            lineLen -= 1

                    tFirst = max(tFirst, trimFirst)
                    tLast = max(tLast, trimLast)

                    if match:
                        line = line[tFirst:match - tLast]
                        quality = quality[tFirst:match - tLast]
                    else:
                        line = line[tFirst:tFirst + trimTo]
                        quality = quality[tFirst:tFirst + trimTo]

                    outFile.write(f"@{comment}\n{line}\n+\n{quality}\n")

        t_end = time.perf_counter() * 1000
        time_match = math.floor(t_end - t_start)
        print(f"Processed {i} elements")
        print(f"Matching time: {time_match}")

    # Align results
    if args.aligner:
        print("Start alignment")

    # Align results
    if args.aligner in ("bowtie", "bowtie_htseq"):
        bowtie(args)

    if args.aligner == "bowtie_htseq":
        htseq(args)
Beispiel #50
0
def fill_template(template_name,
                  context,
                  img_category='Figure',
                  output_format='odt'):
    """
    Fill a document with data and convert it to the requested format.

    Returns an absolute path to the generated file.
    """

    if not isinstance(context, Context):
        context = Context(context)

    context['output_format'] = output_format
    context['img_category'] = img_category

    source_file = find_template_file(template_name)
    source_extension = os.path.splitext(source_file)[1]
    source = zipfile.ZipFile(source_file, 'r')

    dest_file = NamedTemporaryFile(delete=False, suffix=source_extension)
    dest = zipfile.ZipFile(dest_file, 'w')

    manifest_data = ''
    for name in source.namelist():
        data = source.read(name)
        if name.endswith('.xml'):
            data = smart_str(data)

        if any(name.endswith(file) for file in ('content.xml', 'styles.xml')):
            template = Template(fix_inline_tags(data))
            data = template.render(context)
        elif name == 'META-INF/manifest.xml':
            manifest_data = data[:-20]  # Cut off the closing </manifest> tag
            continue  # We will append it at the very end
        dest.writestr(name, smart_bytes(data))

    for _, image in context.dicts[0].get(IMAGES_CONTEXT_KEY, {}).items():
        filename = os.path.basename(image.name)
        ext = os.path.splitext(filename)[1][1:]
        manifest_data += (
            '<manifest:file-entry '
            'manifest:media-type="image/%(ext)s" '
            'manifest:full-path="Pictures/%(filename)s"/>\n') % locals()
        image.open()
        dest.writestr('Pictures/%s' % filename, image.read())
        image.close()

    manifest_data += '</manifest:manifest>'
    dest.writestr('META-INF/manifest.xml', manifest_data)

    source.close()
    dest.close()

    if source_extension[1:] != output_format:
        results = Queue()
        convertor = Process(target=_convert_subprocess,
                            args=(str(dest_file.name), output_format, results))
        convertor.start()
        return results.get()
    else:
        return dest_file.name
def multiple_tcp_serve(server_settings: Dict[str, Any], workers: int) -> None:
    """启动一个多进程的tcp服务,他们共享同一个socket对象.

    用multiple模式在每个子进程执行tcp服务,当执行完成后统一的回收资源

    Params:

        server_settings (Dicct[str, Any]) : - 每个单一进程的设置,
        workers (int) : - 执行的进程数

    """
    server_settings['reuse_port'] = True
    server_settings['run_multiple'] = True

    # Handling when custom socket is not provided.
    if server_settings.get('sock') is None:
        sock = socket()
        sock.setsockopt(SOL_SOCKET, SO_REUSEADDR, 1)
        sock.bind((server_settings['host'], server_settings['port']))
        sock.set_inheritable(True)
        server_settings['sock'] = sock
        server_settings['host'] = None
        server_settings['port'] = None

    def sig_handler(signal: Any, frame: Any):
        """向子进程传送SIGTERM信号,用于关闭所有子进程中运行的事件循环.

        Params:

            signal (Any) : - 要处理的信号
            frame (Any) : - 执行到的帧
        """
        status = []
        for process in processes:
            statu = process.is_alive()
            status.append(statu)
            if statu:
                os.kill(process.pid, SIGTERM)

        if any(status):
            logger.info(
                """Received signal {}. Shutting down. You may need to enter Ctrl+C again.
                """.format(Signals(signal).name))
        else:
            raise MultipleProcessDone("all process not alive")

    signal_func(SIGINT, sig_handler)
    signal_func(SIGTERM, sig_handler)

    processes = []

    for _ in range(workers):
        process = Process(target=tcp_serve, kwargs=server_settings)
        process.daemon = True
        process.start()
        processes.append(process)
    try:
        while True:
            pass
    except MultipleProcessDone as done:
        logger.info(str(done))
    except Exception as e:
        raise e
    finally:
        for process in processes:
            process.join()

        # 使用join同步后,只有进程运行结束了才关闭子进程
        for process in processes:
            process.terminate()
        server_settings.get('sock').close()
        logger.info("Shutting down done.")
def crawl_article_process():
    p = Process(target=crawl_article_pro, name="crawl_article")
    p.start()
    p.join()
Beispiel #53
0
from multiprocessing import Process

import os
import time


def info(title):
    print(title)
    print('module name:', __name__)
    if hasattr(os, 'getppid'):
        print('parent process: {}'.format(os.getppid()))
    print('process id: {}'.format(os.getpid()))


def f(name):
    info('function f')
    time.sleep(2)
    print('hello {}'.format(name))


if __name__ == '__main__':
    info('main line')
    p = Process(target=f, args=('bob', ))
    p.start()
    p.join()
# Copyright (2013) Sandia Corporation. Under the terms of Contract
 def background(self):
     t=Process(target=self.socketIO.wait)
     #t.setDaemon(True)
     t.start()
def main():
    """MAIN"""
    # Video source from webcam or video file.
    video_src = args.cam if args.cam is not None else args.video
    if video_src is None:
        print(
            "Warning: video source not assigned, default webcam will be used.")
        video_src = 0

    cap = cv2.VideoCapture(video_src)
    if video_src == 0:
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    tm = cv2.TickMeter()

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cap.read()
        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        if video_src == 0:
            frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        facebox = box_queue.get()

        if facebox is not None:
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

            tm.start()
            marks = mark_detector.detect_marks(face_img)
            tm.stop()

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # Uncomment following line to show raw marks.
            # mark_detector.draw_marks(frame, marks, color=(0, 255, 0))

            # Uncomment following line to show facebox.
            # mark_detector.draw_box(frame, [facebox])

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            steady_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                steady_pose.append(ps_stb.state[0])
            steady_pose = np.reshape(steady_pose, (-1, 3))

            # Uncomment following line to draw pose annotation on frame.
            # pose_estimator.draw_annotation_box(
            #     frame, pose[0], pose[1], color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotation on frame.
            pose_estimator.draw_annotation_box(frame,
                                               steady_pose[0],
                                               steady_pose[1],
                                               color=(128, 255, 128))

            # Uncomment following line to draw head axes on frame.
            # pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1])

        # Show preview.
        cv2.imshow("Preview", frame)
        if cv2.waitKey(10) == 27:
            break

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
from celery import Celery, chain
from multiprocessing import Process
from functools import partial
from src.app.schedule import schedule_task

app = Celery("tasks", include=["src.logic.unsplash_image"])

app.config_from_object("src.configuration.celeryconfig")


def worker():
    w = app.Worker(
        include=["src.app.schedule"],
        loglevel="INFO",
    )
    w.start()


def beat():
    app.conf.beat_schedule = schedule_task
    app.conf.timezone = "UTC"
    b = partial(app.Beat, loglevel="debug")
    b().run()


if __name__ == "__main__":
    w = Process(target=worker)
    b = Process(target=beat)
    w.start()
    b.start()
def run(args):

	# setup
	N, D = args.N, args.D
	n_examples = args.n_train_examples
	n_threads = args.n_threads
	n_test_iter = args.n_test_examples
	policy_network = alphatsp.util.get_policy_network(args.policy_network)

	# generate examples
	print("Generating examples and training...")

	manager = Manager()
	train_queue = manager.Queue()
	model_queue = manager.Queue()

	model_queue.put(policy_network)

	producers = []
	for _ in range(n_threads):
		producers.append(Process(target=generate_examples, args=(train_queue, n_examples//n_threads, N, D, args)))

	for p in producers:
		p.start()

	c = Process(target=trainer, args=(train_queue, model_queue, args))
	c.start()

	for p in producers:
		p.join()
	train_queue.put(None)

	c.join()
	train_losses = model_queue.get()
	policy_network = model_queue.get()

	# display training loss
	plt.scatter(x=np.arange(len(train_losses)), y=train_losses, marker='.')
	plt.title("Loss")
	plt.xlabel("examples")
	plt.ylabel("loss")
	plt.savefig("saves/loss_parallel.png")

	# test policy network vs other solvers
	print("Testing...")
	policy_lens, policymcts_lens, mcts_lens, greedy_lens, exact_lens = [], [], [], [], []
	for _ in range(n_test_iter):

		tsp = alphatsp.tsp.TSP(N, D)

		# policy only
		policy_solver = alphatsp.solvers.policy_solvers.PolicySolver(args, tsp, policy_network)
		policy_tour, policy_tour_len = policy_solver.solve()

		# policy + mcts
		policymcts_solver = alphatsp.solvers.policy_solvers.PolicyMCTSSolver(args, tsp, policy_network)
		policymcts_tour, policymcts_tour_len = policymcts_solver.solve()

		# mcts
		mcts_solver = alphatsp.solvers.mcts.MCTSSolver(args, tsp)
		mcts_tour, mcts_tour_len = mcts_solver.solve()

		# benchmarks
		greedy_tour, greedy_tour_len = alphatsp.solvers.heuristics.nearest_greedy(tsp)
		exact_tour, exact_tour_len = alphatsp.solvers.exact.exact(tsp)

		# log lengths
		policy_lens.append(policy_tour_len)
		policymcts_lens.append(policymcts_tour_len)
		mcts_lens.append(mcts_tour_len)
		greedy_lens.append(greedy_tour_len)
		exact_lens.append(exact_tour_len)

	# average results
	policy_avg     = np.mean(policy_lens)
	policymcts_avg = np.mean(policymcts_lens)
	mcts_avg       = np.mean(mcts_lens)
	greedy_avg     = np.mean(greedy_lens)
	exact_avg      = np.mean(exact_lens)

	# print results
	print("\nResults:")
	print(f"Policy:\t\t{policy_avg}")
	print(f"Policy+MCTS:\t{policymcts_avg}")
	print(f"MCTS:\t\t{mcts_avg}")
	print(f"Greedy:\t\t{greedy_avg}")
	print(f"Exact:\t\t{exact_avg}")

	# save network
	torch.save(policy_network.state_dict(), "saves/policy_network.pth")
Beispiel #59
0
def write(q):
    print('启动写子进程%s' % (os.getpid()))
    for chr in ["A", "B", "C", "D"]:
        q.put(chr)
        time.sleep(1)
    print("结束写子进程%s" % (os.getpid()))


def read(q):
    print("启动读子进程%s" % (os.getpid()))
    while True:
        value = q.get(True)
        print("value = " + value)
    print("结束读子进程%s" % (os.getpid()))


if __name__ == '__main__':
    # 父进程创建队列,并传递给子进程
    q = Queue()
    wp = Process(target=write, args=(q, ))
    rp = Process(target=read, args=(q, ))
    wp.start()
    rp.start()
    #
    wp.join()
    # rp进程里是个死循环,无法等待其结束,只能强行结束
    rp.terminate()

    print("父进程结束")
Beispiel #60
0
def process_dump(
    input_file,
    template_file,
    out_file,
    file_size,
    file_compress,
    process_count,
    html_safe,
):
    """
    :param input_file: name of the wikipedia dump file; '-' to read from stdin
    :param template_file: optional file with template definitions.
    :param out_file: directory where to store extracted data, or '-' for stdout
    :param file_size: max size of each extracted file, or None for no max (one file)
    :param file_compress: whether to compress files with bzip.
    :param process_count: number of extraction processes to spawn.
    """
    global knownNamespaces
    global templateNamespace, templatePrefix
    global moduleNamespace, modulePrefix

    urlbase = ""  # This is obtained from <siteinfo>

    input = decode_open(input_file)

    # collect siteinfo
    for line in input:
        line = line  # .decode('utf-8')
        m = tagRE.search(line)
        if not m:
            continue
        tag = m.group(2)
        if tag == "base":
            # discover urlbase from the xml dump file
            # /mediawiki/siteinfo/base
            base = m.group(3)
            urlbase = base[:base.rfind("/")]
        elif tag == "namespace":
            knownNamespaces.add(m.group(3))
            if re.search('key="10"', line):
                templateNamespace = m.group(3)
                templatePrefix = templateNamespace + ":"
            elif re.search('key="828"', line):
                moduleNamespace = m.group(3)
                modulePrefix = moduleNamespace + ":"
        elif tag == "/siteinfo":
            break

    if expand_templates:
        # preprocess
        template_load_start = default_timer()
        if template_file and os.path.exists(template_file):
            logging.info(
                "Preprocessing '%s' to collect template definitions: this may take some time.",
                template_file,
            )
            file = decode_open(template_file)
            templates = load_templates(file)
            file.close()
        else:
            if input_file == "-":
                # can't scan then reset stdin; must error w/ suggestion to specify template_file
                raise ValueError(
                    "to use templates with stdin dump, must supply explicit template-file"
                )
            logging.info(
                "Preprocessing '%s' to collect template definitions: this may take some time.",
                input_file,
            )
            templates = load_templates(input, template_file)
            input.close()
            input = decode_open(input_file)
        template_load_elapsed = default_timer() - template_load_start
        logging.info("Loaded %d templates in %.1fs", templates,
                     template_load_elapsed)

    if out_file == "-":
        output = sys.stdout
        if file_compress:
            logging.warn(
                "writing to stdout, so no output compression (use an external tool)"
            )
    else:
        nextFile = NextFile(out_file)
        output = OutputSplitter(nextFile, file_size, file_compress)

    # process pages
    logging.info("Starting page extraction from %s.", input_file)
    extract_start = default_timer()

    # Parallel Map/Reduce:
    # - pages to be processed are dispatched to workers
    # - a reduce process collects the results, sort them and print them.

    maxsize = 10 * process_count
    # output queue
    output_queue = Queue(maxsize=maxsize)

    # Reduce job that sorts and prints output
    reduce = Process(target=reduce_process, args=(output_queue, output))
    reduce.start()

    # initialize jobs queue
    jobs_queue = Queue(maxsize=maxsize)

    # start worker processes
    logging.info("Using %d extract processes.", process_count)
    workers = []
    for _ in range(max(1, process_count)):
        extractor = Process(target=extract_process,
                            args=(jobs_queue, output_queue, html_safe))
        extractor.daemon = True  # only live while parent process lives
        extractor.start()
        workers.append(extractor)

    # Mapper process

    # we collect individual lines, since str.join() is significantly faster
    # than concatenation
    page = []
    id = ""
    revid = ""
    last_id = ""
    ordinal = 0  # page count
    inText = False
    redirect = False
    for line in input:
        if "<" not in line:  # faster than doing re.search()
            if inText:
                page.append(line)
            continue
        m = tagRE.search(line)
        if not m:
            continue
        tag = m.group(2)
        if tag == "page":
            page = []
            redirect = False
        elif tag == "id" and not id:
            id = m.group(3)
        elif tag == "id" and id:  # <revision> <id></id> </revision>
            revid = m.group(3)
        elif tag == "title":
            title = m.group(3)
        elif tag == "redirect":
            redirect = True
        elif tag == "text":
            inText = True
            line = line[m.start(3):m.end(3)]
            page.append(line)
            if m.lastindex == 4:  # open-close
                inText = False
        elif tag == "/text":
            if m.group(1):
                page.append(m.group(1))
            inText = False
        elif inText:
            page.append(line)
        elif tag == "/page":
            colon = title.find(":")
            if (colon < 0 or (title[:colon] in acceptedNamespaces)
                    and id != last_id and not redirect
                    and not title.startswith(templateNamespace)):
                job = (id, revid, urlbase, title, page, ordinal)
                jobs_queue.put(job)  # goes to any available extract_process
                last_id = id
                ordinal += 1
            id = ""
            revid = ""
            page = []

    input.close()

    # signal termination
    for _ in workers:
        jobs_queue.put(None)
    # wait for workers to terminate
    for w in workers:
        w.join()

    # signal end of work to reduce process
    output_queue.put(None)
    # wait for it to finish
    reduce.join()

    if output != sys.stdout:
        output.close()
    extract_duration = default_timer() - extract_start
    extract_rate = ordinal / extract_duration
    logging.info(
        "Finished %d-process extraction of %d articles in %.1fs (%.1f art/s)",
        process_count,
        ordinal,
        extract_duration,
        extract_rate,
    )