Ejemplo n.º 1
0
 def __init__(self, number_of_threads=10):
     self.log = logger.get("infra")
     self.log.info("Initiating TaskManager with %d threads" %
                   number_of_threads)
     self.number_of_threads = number_of_threads
     self.pool = Executors.newFixedThreadPool(self.number_of_threads)
     self.futures = dict()
 def __init__(self, sentiworddicname, amplifiersname, decrementersname):
     self.sentiworddic = importsentixcomplex(sentiworddicname)
     self.amplifiers = loadlines(amplifiersname)
     self.decrementers = loadlines(decrementersname)
     # start thread pool
     self.k = available_cpu_count()
     self.pool = Executors.newFixedThreadPool(self.k)
Ejemplo n.º 3
0
def main(opts):
        
    # set up our channel
    conn_factory = ConnectionFactory()
    conn_factory.setUri(config['RABBITMQ_URI'])
    conn = conn_factory.newConnection()
    channel = conn.createChannel()
    channel.queueDeclare(opts.queue_name, False, False, False, None)
    channel.basicQos(1); # tells the channel we're only going to deliver one response before req acknowledgement 
    
    workers = [PdfExtractor(channel, opts) for i in xrange(opts.workers)]    
    
    log.info("creating pool with %d threads" % opts.workers)
    tpool = Executors.newFixedThreadPool(opts.workers)

    log.info("executing threads")
    futures = tpool.invokeAll(workers)

    log.info("shutting down thread pool")
    tpool.shutdown()

    try:
        if not tpool.awaitTermination(50, TimeUnit.SECONDS):
            log.info("thread pool not shutting down; trying again")
            tpool.shutdownNow()
            if not tpool.awaitTermination(50, TimeUnit.SECONDS):
                log.error("Pool did not terminate")
    except InterruptedException:
        log.info("exception during thread pool shutdown; trying again")
        tpool.shutdownNow()
        Thread.currentThread().interrupt()    
	def __init__(self, sentiworddicname, amplifiersname, decrementersname):
		self.sentiworddic = importsentixcomplex(sentiworddicname)
		self.amplifiers = loadlines(amplifiersname)
		self.decrementers = loadlines(decrementersname)
		# start thread pool
		self.k = available_cpu_count()
		self.pool = Executors.newFixedThreadPool(self.k)
Ejemplo n.º 5
0
    def load_data(self):
        executors = []
        num_executors = 5
        doc_executors = 5
        pool = Executors.newFixedThreadPool(5)

        self.num_items = self.total_num_items / doc_executors
        for i in xrange(doc_executors):
            executors.append(
                GleambookUser_Docloader(self.bucket,
                                        self.num_items,
                                        self.items_start_from +
                                        i * self.num_items,
                                        batch_size=2000))
            executors.append(
                GleambookMessages_Docloader(self.msg_bucket,
                                            self.num_items,
                                            self.items_start_from +
                                            i * self.num_items,
                                            batch_size=2000))
        futures = pool.invokeAll(executors)

        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        self.updates_from = self.items_start_from
        self.deletes_from = self.items_start_from + self.total_num_items / 10
        self.items_start_from += self.total_num_items
Ejemplo n.º 6
0
def main(opts):

    # set up our channel
    conn_factory = ConnectionFactory()
    conn_factory.setUri(config['RABBITMQ_URI'])
    conn = conn_factory.newConnection()
    channel = conn.createChannel()
    channel.queueDeclare(opts.queue_name, False, False, False, None)
    channel.basicQos(1)
    # tells the channel we're only going to deliver one response before req acknowledgement

    workers = [PdfExtractor(channel, opts) for i in xrange(opts.workers)]

    log.info("creating pool with %d threads" % opts.workers)
    tpool = Executors.newFixedThreadPool(opts.workers)

    log.info("executing threads")
    futures = tpool.invokeAll(workers)

    log.info("shutting down thread pool")
    tpool.shutdown()

    try:
        if not tpool.awaitTermination(50, TimeUnit.SECONDS):
            log.info("thread pool not shutting down; trying again")
            tpool.shutdownNow()
            if not tpool.awaitTermination(50, TimeUnit.SECONDS):
                log.error("Pool did not terminate")
    except InterruptedException:
        log.info("exception during thread pool shutdown; trying again")
        tpool.shutdownNow()
        Thread.currentThread().interrupt()
Ejemplo n.º 7
0
 def __init__(self, number_of_threads=10):
     self.log = logging.getLogger("infra")
     self.log.debug("Initiating TaskManager with {0} threads"
                    .format(number_of_threads))
     self.number_of_threads = number_of_threads
     self.pool = Executors.newFixedThreadPool(self.number_of_threads)
     self.futures = {}
     self.tasks = []
 def setUp(self):
     super(SuperIndexSearcherTest, self).setUp()
     self.executor = Executors.newFixedThreadPool(5)
     indexDirectory = SimpleFSDirectory(File(self.tempdir))
     conf = IndexWriterConfig(Version.LUCENE_4_10_0, MerescoStandardAnalyzer())
     self.writer = IndexWriter(indexDirectory, conf)
     self.reader = DirectoryReader.open(self.writer, True)
     self.sis = SuperIndexSearcher(self.reader)
	def __init__(self, sentiworddicname = None, amplifiersname = None, decrementersname = None, serializedDictsName = None):
		if serializedDictsName is not None:
			(self.sentiworddic,self.amplifiers,self.decrementers) = loaddictionariesfromfile(serializedDictsName)
		else:
			self.sentiworddic = importsentixcomplex(sentiworddicname)
			self.amplifiers = loadlines(amplifiersname)
			self.decrementers = loadlines(decrementersname)
		# start thread pool
		self.k = available_cpu_count()
		self.pool = Executors.newFixedThreadPool(self.k)
Ejemplo n.º 10
0
	def play(self):
		num_threads = Runtime.getRuntime().availableProcessors()
		executor = Executors.newFixedThreadPool(num_threads)
		callables = [_Worker(start_pos) for start_pos in self.positions]
		futures = executor.invokeAll(callables)
		# calculate stats
		for future in futures:
			worker = future.get()
			self.process_scores(worker)
		executor.shutdown()
    def testParallelMultiSort(self):
        """
        test a variety of sorts using a parallel multisearcher
        """
        threadPool = Executors.newFixedThreadPool(self.getRandomNumber(2, 8), NamedThreadFactory("testParallelMultiSort"))
        searcher = IndexSearcher(MultiReader([self.searchX.getIndexReader(),
                                              self.searchY.getIndexReader()]),
                                 threadPool)
        self._runMultiSorts(searcher, False)

        threadPool.shutdown();
        threadPool.awaitTermination(1000L, TimeUnit.MILLISECONDS);
Ejemplo n.º 12
0
 def getDataWriter(self, meta):
     """ generated source for method getDataWriter """
     if self.writer == None:
         with lock_for_object(self):
             if self.writer == None:
                 self.writerExecutor = Executors.newFixedThreadPool(
                     Runtime.getRuntime().availableProcessors())
                 self.writer = DefaultTableStoreWriter(
                     self.asyncClient, config.getDataTableName(),
                     WriterConfig(), None, self.writerExecutor)
     return TableStoreDataWriter(self.writer,
                                 self.config.getDataTableName(), meta)
Ejemplo n.º 13
0
def newFixedThreadPool(n_threads=0, name="jython-worker"):
    """ Return an ExecutorService whose Thread instances belong
      to the same group as the caller's Thread, and therefore will
      be interrupted when the caller is.
      n_threads: number of threads to use.
                 If zero, use as many as available CPUs.
                 If negative, use as many as available CPUs minus that number,
                 but at least one. """
    if n_threads <= 0:
        n_threads = max(1,
                        Runtime.getRuntime().availableProcessors() + n_threads)
    return Executors.newFixedThreadPool(n_threads,
                                        ThreadFactorySameGroup(name))
Ejemplo n.º 14
0
 def run_queries(self):
     num_executors = 1
     executors = []
     pool = Executors.newFixedThreadPool(num_executors)
     for i in xrange(num_executors):
         executors.append(
             QueryRunner(random.choice(self.queries), self.num_query,
                         self.cbas_util))
     futures = pool.invokeAll(executors)
     for future in futures:
         print future.get(num_executors, TimeUnit.SECONDS)
     print "Executors completed!!"
     shutdown_and_await_termination(pool, num_executors)
Ejemplo n.º 15
0
def runBlockMatching(params_list, image_pairs):
	MAX_CONCURRENT = 20
	
	block_matching_inputs = zip(image_pairs, params_list)

	pool = Executors.newFixedThreadPool(MAX_CONCURRENT)
	block_matchers = [BlockMatcher(pair[0], pair[1], params) for (pair, params) in block_matching_inputs]
	futures = pool.invokeAll(block_matchers)

	for future in futures:
		print future.get(5, TimeUnit.SECONDS)

	shutdownAndAwaitTermination(pool, 5)
Ejemplo n.º 16
0
    def __init__(self, state, burpCallbacks):
        """
        Main constructor. Creates an instance of a FixedThreadPool for threading operations, such as issuing multiple HTTP requests. All calls to this class to methods that end in "Clicked" are made in an independent thread to avoid locking up the Burp UI.

        Args:
            state: the state object.
            burpCallbacks: the burp callbacks object.
        """

        self.state = state
        self.burpCallbacks = burpCallbacks
        self.lock = Lock()
        self.extensions = []

        self.maxConcurrentRequests = 8

        # Avoid instantiating during unit test as it is not needed.
        if not utility.INSIDE_UNIT_TEST:
            self.state.executorService = Executors.newFixedThreadPool(16)
            self.state.fuzzExecutorService = Executors.newFixedThreadPool(16)

            # Beware: if the second argument to two of these importBurpExtension calls is the same, the same extension will be loaded twice. The solution is to recompile the JARs so that the classes do not have the same name.
            log("[+] Loading Backslash Powered Scanner")
            self.extensions.append(
                ("bps",
                 utility.importBurpExtension(
                     "lib/backslash-powered-scanner-fork.jar",
                     'burp.BackslashBurpExtender', burpCallbacks)))

            # log("[+] Loading SHELLING")
            # self.extensions.append(("shelling", utility.importBurpExtension("lib/shelling.jar", 'burp.BurpExtender', burpCallbacks)))

            log("[+] Loading ParamMiner")
            self.extensions.append(
                ("paramminer",
                 utility.importBurpExtension("lib/param-miner-fork.jar",
                                             'paramminer.BurpExtender',
                                             burpCallbacks)))
Ejemplo n.º 17
0
    def searcher(self):
        if not self._reopenSearcher:
            return self._searcher

        if self._settings.multithreaded:
            if self._executor:
                self._executor.shutdown();
            self._executor = Executors.newFixedThreadPool(self._numberOfConcurrentTasks);
            self._searcher = SuperIndexSearcher(self._reader, self._executor, self._numberOfConcurrentTasks)
        else:
            self._searcher = IndexSearcher(self._reader)
        self._searcher.setSimilarity(self._similarity)
        self._reopenSearcher = False
        return self._searcher
Ejemplo n.º 18
0
 def __init__(self,
              sentiworddicname=None,
              amplifiersname=None,
              decrementersname=None,
              serializedDictsName=None):
     if serializedDictsName is not None:
         (self.sentiworddic, self.amplifiers,
          self.decrementers) = loaddictionariesfromfile(serializedDictsName)
     else:
         self.sentiworddic = importsentixcomplex(sentiworddicname)
         self.amplifiers = loadlines(amplifiersname)
         self.decrementers = loadlines(decrementersname)
     # start thread pool
     self.k = available_cpu_count()
     self.pool = Executors.newFixedThreadPool(self.k)
Ejemplo n.º 19
0
 def start(cls):
     try:
         inetSocketAddress = InetSocketAddress(cls.host, cls.port)
         cls.httpServer = HttpServer.create(inetSocketAddress,
                                            cls.socketBackLog)
         cls.httpServer.createContext("/callback", CallbackHandler())
         cls.httpServer.createContext("/start", StartHandler())
         cls.httpServer.setExecutor(
             Executors.newFixedThreadPool(cls.poolsize))
         cls.httpServer.start()
         logger.info("HTTPServerCallback is listening on %s %s" %
                     (cls.host, cls.port))
     except IOException, e:
         logger.error('(start) %s : stacktrace=%s' % (cls.__name__, e))
         raise UnboundLocalError('(start) %s : stacktrace=%s' %
                                 (cls.__name__, e))
def getShiftFromViews(v1, v2):
    # Thread pool
    exe = Executors.newFixedThreadPool(
        Runtime.getRuntime().availableProcessors())
    try:
        # PCM: phase correlation matrix
        pcm = PhaseCorrelation2.calculatePCM(
            v1, v2, ArrayImgFactory(FloatType()), FloatType(),
            ArrayImgFactory(ComplexFloatType()), ComplexFloatType(), exe)
        # Minimum image overlap to consider, in pixels
        minOverlap = v1.dimension(0) / 10
        # Returns an instance of PhaseCorrelationPeak2
        peak = PhaseCorrelation2.getShift(pcm, v1, v2, nHighestPeaks,
                                          minOverlap, True, True, exe)
    except Exception, e:
        print e
Ejemplo n.º 21
0
    def searcher(self):
        if not self._reopenSearcher:
            return self._searcher

        if self._settings.multithreaded:
            if self._executor:
                self._executor.shutdown()
            self._executor = Executors.newFixedThreadPool(
                self._numberOfConcurrentTasks)
            self._searcher = SuperIndexSearcher(self._reader, self._executor,
                                                self._numberOfConcurrentTasks)
        else:
            self._searcher = IndexSearcher(self._reader)
        self._searcher.setSimilarity(self._similarity)
        self._reopenSearcher = False
        return self._searcher
Ejemplo n.º 22
0
def runBlockMatchingAll(wafer_title):
	MAX_CONCURRENT = 40
	params = BlockMatcherParameters(wafer_title=wafer_title)
	start = 0
	finish = len(os.listdir(params.input_folder))
	neighbors = 2
	image_pairs = make_image_pairs(start, finish, neighbors)
	# neighbors = 2
	# 1200, 2400, 3600, 4500, .. 
	# neighbors = 4
	# 1210 - 1213
	# 1267 - 1270
	# neighbors = 3
	# 1334 - 1336


	# Log file
	t = time.localtime()
	ts = str(t[0]) + str(t[1]) + str(t[2]) + str(t[3]) + str(t[4]) + str(t[5])
	writefile = params.output_folder + ts + "_block_matching_loop_log.txt"
	wf = open(writefile, 'w')
	wf.write(time.asctime() + "\n")
	wf.write(writefile + "\n")
	param_values = vars(params)
	for key in param_values:
		wf.write(key + "\t" + str(param_values[key]) + "\n")
	wf.write("B_idx\t" + 
			"A_idx\t" + 
			"B_file\t" + 
			"A_file\t" + 
			"matches\t" + 
			"smooth_removed\t" + 
			"max_displacement\t" + 
			"eff_dist\t" + 
			"eff_sigma\t" +
			"mesh\n")

	pool = Executors.newFixedThreadPool(MAX_CONCURRENT)
	block_matchers = [BlockMatcher(pair[0], pair[1], params, wf) for pair in image_pairs]
	futures = pool.invokeAll(block_matchers)

	for future in futures:
		print future.get(5, TimeUnit.SECONDS)

	wf.write(time.asctime() + "\n")
	wf.close()
	shutdownAndAwaitTermination(pool, 5)
Ejemplo n.º 23
0
    def update_data(self):
        pool = Executors.newFixedThreadPool(5)
        executors = []
        num_executors = 5
        doc_executors = 4

        executors.append(
            GleambookUser_Docloader(self.bucket, 2 * self.num_items / 10,
                                    self.updates_from, "update"))
        #         executors.append(GleambookUser_Docloader(bucket, num_items/10, deletes_from,"delete"))
        executors.append(
            GleambookMessages_Docloader(self.msg_bucket,
                                        2 * self.num_items / 10,
                                        self.updates_from, "update"))
        #         executors.append(GleambookMessages_Docloader(msg_bucket, num_items/10, deletes_from,"delete"))
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)
def register(view_index, filepaths, modelclass, csv_dir, params):
    n_threads = Runtime.getRuntime().availableProcessors()
    exe = Executors.newFixedThreadPool(n_threads)
    try:
        name = "matrices-view-%i" % view_index
        matrices = loadMatrices(name, csvdir)
        if not matrices:
            matrices = computeForwardTransforms(filepaths[view_index],
                                                klb_loader, getCalibration,
                                                csv_dir, exe, modelclass,
                                                params)
            saveMatrices(name, csv_dir)
    finally:
        exe.shutdown()

    transforms = asBackwardConcatTransforms(matrices,
                                            transformclass=Translation3D)
    path_transforms = dict(izip(filepaths[view_index], transforms))
    registered_loader = RegisteredLoader(klb_loader, path_transforms)

    return Load.lazyStack(filepaths[view_index], registered_loader)
Ejemplo n.º 25
0
    def load(self, k, v, docs=10000, server="localhost", bucket="default"):
        cluster = CouchbaseCluster.create(server)
        cluster.authenticate("Administrator", "password")
        bucket = cluster.openBucket(bucket)

        pool = Executors.newFixedThreadPool(5)
        docloaders = []
        num_executors = 5
        total_num_executors = 5
        num_docs = docs / total_num_executors
        for i in xrange(total_num_executors):
            docloaders.append(
                DocloaderTask(bucket, num_docs, i * num_docs, k, v))
        futures = pool.invokeAll(docloaders)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)

        print "Executors completed!!"
        shutdown_and_await_termination(pool, 5)
        if bucket.close() and cluster.disconnect():
            pass
Ejemplo n.º 26
0
    def execute(self, frees, threads):
        """
        Begins multithreaded execution

        Parameters
        ----------
        frees: list(Freeway)
            list of freeways to begin multithreaded execution
        threads:
            number of threads
        """
        from java.util.concurrent import Executors, ExecutorCompletionService
        pool = Executors.newFixedThreadPool(threads)
        ecs = ExecutorCompletionService(pool)
        for f in frees:
            ecs.submit(f)

        submitted = len(frees)
        while submitted > 0:
            result = ecs.take().get()
            print str(result)
            submitted -= 1
Ejemplo n.º 27
0
# Extract red channel: alpha:0, red:1, green:2, blue:3
red = Converters.argbChannel(img, 1)

# Cut out two overlapping ROIs
r1 = Rectangle(1708, 680, 1792, 1760)
r2 = Rectangle(520, 248, 1660, 1652)
cut1 = Views.zeroMin(
    Views.interval(red, [r1.x, r1.y],
                   [r1.x + r1.width - 1, r1.y + r1.height - 1]))
cut2 = Views.zeroMin(
    Views.interval(red, [r2.x, r2.y],
                   [r2.x + r2.width - 1, r2.y + r2.height - 1]))

# Thread pool
exe = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors())

try:
    # PCM: phase correlation matrix
    pcm = PhaseCorrelation2.calculatePCM(cut1, cut2,
                                         ArrayImgFactory(FloatType()),
                                         FloatType(),
                                         ArrayImgFactory(ComplexFloatType()),
                                         ComplexFloatType(), exe)

    # Number of phase correlation peaks to check with cross-correlation
    nHighestPeaks = 10

    # Minimum image overlap to consider, in pixels
    minOverlap = cut1.dimension(0) / 10
Ejemplo n.º 28
0
    def test_ups_volume(self):
        nodes_in_cluster = [self.servers[0]]
        print "Start Time: %s" % str(
            time.strftime("%H:%M:%S", time.gmtime(time.time())))

        ########################################################################################################################
        self.log.info("Add a KV nodes - 2")
        self.query_node = self.servers[1]
        rest = RestConnection(self.servers[1])
        rest.set_data_path(data_path=self.servers[1].data_path,
                           index_path=self.servers[1].index_path,
                           cbas_path=self.servers[1].cbas_path)
        result = self.add_node(self.servers[1], rebalance=False)
        self.assertTrue(result, msg="Failed to add N1QL/Index node.")

        self.log.info("Add a KV nodes - 3")
        rest = RestConnection(self.servers[2])
        rest.set_data_path(data_path=self.kv_servers[1].data_path,
                           index_path=self.kv_servers[1].index_path,
                           cbas_path=self.kv_servers[1].cbas_path)
        result = self.add_node(self.kv_servers[1],
                               services=["kv"],
                               rebalance=False)
        self.assertTrue(result, msg="Failed to add KV node.")

        self.log.info("Add one more KV node")
        rest = RestConnection(self.servers[3])
        rest.set_data_path(data_path=self.kv_servers[3].data_path,
                           index_path=self.kv_servers[3].index_path,
                           cbas_path=self.kv_servers[3].cbas_path)
        result = self.add_node(self.kv_servers[3],
                               services=["kv"],
                               rebalance=False)
        self.assertTrue(result, msg="Failed to add KV node.")

        self.log.info("Add one more KV node")
        rest = RestConnection(self.servers[4])
        rest.set_data_path(data_path=self.kv_servers[4].data_path,
                           index_path=self.kv_servers[4].index_path,
                           cbas_path=self.kv_servers[4].cbas_path)
        result = self.add_node(self.kv_servers[4],
                               services=["kv"],
                               rebalance=False)
        self.assertTrue(result, msg="Failed to add KV node.")

        nodes_in_cluster = nodes_in_cluster + [
            self.servers[1], self.servers[2], self.servers[3], self.servers[4]
        ]
        ########################################################################################################################
        self.log.info("Step 2: Create Couchbase buckets.")
        self.create_required_buckets()

        ########################################################################################################################
        self.log.info(
            "Step 3: Create 10M docs average of 1k docs for 8 couchbase buckets."
        )
        env = DefaultCouchbaseEnvironment.builder().mutationTokensEnabled(
            True).computationPoolSize(5).socketConnectTimeout(
                100000).connectTimeout(100000).maxRequestLifetime(
                    TimeUnit.SECONDS.toMillis(300)).build()
        cluster = CouchbaseCluster.create(env, self.master.ip)
        cluster.authenticate("Administrator", "password")
        bucket = cluster.openBucket("GleambookUsers")
        msg_bucket = cluster.openBucket("GleambookMessages")

        pool = Executors.newFixedThreadPool(5)
        items_start_from = 0
        total_num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 5
        num_items = total_num_items / num_executors
        for i in xrange(doc_executors):
            executors.append(
                GleambookUser_Docloader(bucket, num_items,
                                        items_start_from + i * num_items))
            executors.append(
                GleambookMessages_Docloader(msg_bucket, num_items,
                                            items_start_from + i * num_items))
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        updates_from = items_start_from
        deletes_from = items_start_from + total_num_items / 10
        items_start_from += total_num_items

        ########################################################################################################################
        self.log.info("Step 6: Verify the items count.")
        self.validate_items_count()

        ########################################################################################################################
        self.log.info("Step 8: Delete 1M docs. Update 1M docs.")
        pool = Executors.newFixedThreadPool(5)
        num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 4

        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, updates_from,
                                    "update"))
        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, deletes_from,
                                    "delete"))
        executors.append(
            GleambookMessages_Docloader(msg_bucket, num_items / 10,
                                        updates_from, "update"))
        executors.append(
            GleambookMessages_Docloader(msg_bucket, num_items / 10,
                                        deletes_from, "delete"))
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)
        ########################################################################################################################
        self.log.info("Step 9: Connect cbas buckets.")
        self.connect_cbas_buckets()
        self.sleep(10, "Wait for the ingestion to complete")

        ########################################################################################################################
        self.log.info("Step 10: Verify the items count.")
        self.validate_items_count()

        ########################################################################################################################
        self.log.info(
            "Step 12: When 11 is in progress do a KV Rebalance in of 1 nodes.")
        rest = RestConnection(self.servers[5])
        rest.set_data_path(data_path=self.servers[5].data_path,
                           index_path=self.servers[5].index_path,
                           cbas_path=self.servers[5].cbas_path)
        rebalance = self.cluster.async_rebalance(nodes_in_cluster,
                                                 [self.servers[5]], [])
        nodes_in_cluster += [self.servers[2]]
        ########################################################################################################################
        self.log.info("Step 11: Create 10M docs.")
        pool = Executors.newFixedThreadPool(5)
        total_num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 4
        num_items = total_num_items / doc_executors
        for i in xrange(doc_executors):
            executors.append(
                GleambookUser_Docloader(bucket, num_items,
                                        items_start_from + i * num_items))
            executors.append(
                GleambookMessages_Docloader(msg_bucket, num_items,
                                            items_start_from + i * num_items))
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        updates_from = items_start_from
        deletes_from = items_start_from + total_num_items / 10
        items_start_from += total_num_items

        ########################################################################################################################
        self.log.info("Step 13: Wait for rebalance to complete.")
        rebalance.get_result()
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")
        self.sleep(20)

        ########################################################################################################################
        self.log.info("Step 14: Verify the items count.")
        self.validate_items_count()

        ########################################################################################################################
        self.log.info("Step 15: Delete 1M docs. Update 1M docs.")
        pool = Executors.newFixedThreadPool(5)
        num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 4

        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, updates_from,
                                    "update"))
        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, deletes_from,
                                    "delete"))
        executors.append(
            GleambookMessages_Docloader(msg_bucket, num_items / 10,
                                        updates_from, "update"))
        executors.append(
            GleambookMessages_Docloader(msg_bucket, num_items / 10,
                                        deletes_from, "delete"))

        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)
        ########################################################################################################################
        self.log.info(
            "Step 16: Verify Results that 1M docs gets deleted from analytics datasets."
        )
        self.validate_items_count()

        ########################################################################################################################
        self.log.info("Step 17: Disconnect CBAS buckets.")
        self.disconnect_cbas_buckets()

        ########################################################################################################################
        self.log.info("Step 18: Create 10M docs.")
        pool = Executors.newFixedThreadPool(5)
        total_num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 4
        num_items = total_num_items / doc_executors
        for i in xrange(doc_executors):
            executors.append(
                GleambookUser_Docloader(bucket, num_items,
                                        items_start_from + i * num_items))
            executors.append(
                GleambookMessages_Docloader(msg_bucket, num_items,
                                            items_start_from + i * num_items))
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        updates_from = items_start_from
        deletes_from = items_start_from + total_num_items / 10
        items_start_from += total_num_items

        ########################################################################################################################
        self.log.info("Step 20: Verify the docs count.")
        self.validate_items_count()

        ########################################################################################################################
        pool = Executors.newFixedThreadPool(5)
        executors = []
        num_executors = 5

        self.log.info(
            "Step 22: When 21 is in progress do a KV Rebalance out of 2 nodes."
        )
        rebalance = self.cluster.async_rebalance(nodes_in_cluster, [],
                                                 self.servers[1:3])
        nodes_in_cluster = [
            node for node in nodes_in_cluster if node not in self.servers[1:3]
        ]

        futures = pool.invokeAll(executors)
        self.log.info("Step 23: Wait for rebalance.")
        rebalance.get_result()
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")
        self.sleep(20)

        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        ########################################################################################################################
        self.log.info("Step 24: Create 10M docs.")
        pool = Executors.newFixedThreadPool(5)
        total_num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 6
        doc_executors = 4
        num_items = total_num_items / doc_executors
        for i in xrange(doc_executors):
            executors.append(
                GleambookUser_Docloader(bucket, num_items,
                                        items_start_from + i * num_items))
            executors.append(
                GleambookMessages_Docloader(msg_bucket, num_items,
                                            items_start_from + i * num_items))

        ##################################################### NEED TO BE UPDATED ##################################################################
        self.log.info(
            "Step 25: When 24 is in progress do a KV Rebalance in of 2 nodes.")
        for node in self.servers[1:3]:
            rest = RestConnection(node)
            rest.set_data_path(data_path=node.data_path,
                               index_path=node.index_path,
                               cbas_path=node.cbas_path)
        rebalance = self.cluster.async_rebalance(nodes_in_cluster,
                                                 self.servers[1:3], [])
        nodes_in_cluster = nodes_in_cluster + self.servers[1:3]
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        updates_from = items_start_from
        deletes_from = items_start_from + total_num_items / 10
        items_start_from += total_num_items

        self.log.info("Step 27: Wait for rebalance to complete.")
        rebalance.get_result()
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")

        ########################################################################################################################
        self.log.info("Step 28: Verify the docs count.")
        self.validate_items_count()

        ########################################################################################################################
        self.log.info("Step 29: Delete 1M docs. Update 1M docs.")
        pool = Executors.newFixedThreadPool(5)
        num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 4

        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, updates_from,
                                    "update"))
        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, deletes_from,
                                    "delete"))
        executors.append(
            GleambookMessages_Docloader(msg_bucket, num_items / 10,
                                        updates_from, "update"))
        executors.append(
            GleambookMessages_Docloader(msg_bucket, num_items / 10,
                                        deletes_from, "delete"))

        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)
        ########################################################################################################################
        self.log.info("Step 30: Verify the docs count.")
        self.validate_items_count()

        ########################################################################################################################
        self.log.info("Step 31: Create 10M docs.")
        pool = Executors.newFixedThreadPool(5)
        total_num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 4
        num_items = total_num_items / doc_executors
        for i in xrange(doc_executors):
            executors.append(
                GleambookUser_Docloader(bucket, num_items,
                                        items_start_from + i * num_items))
            executors.append(
                GleambookMessages_Docloader(msg_bucket, num_items,
                                            items_start_from + i * num_items))

        ###################################################### NEED TO BE UPDATED ##################################################################
        self.log.info(
            "Step 32: When 31 is in progress do a KV Rebalance out of 2 nodes."
        )
        rebalance = self.cluster.async_rebalance(nodes_in_cluster, [],
                                                 self.servers[1:3])
        nodes_in_cluster = [
            node for node in nodes_in_cluster if node not in self.servers[1:3]
        ]
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        updates_from = items_start_from
        deletes_from = items_start_from + total_num_items / 10
        items_start_from += total_num_items
        ########################################################################################################################
        self.log.info("Step 33: Wait for rebalance to complete.")
        rebalance.get_result()
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")
        self.sleep(20)

        ########################################################################################################################
        self.log.info("Step 34: Verify the docs count.")
        self.validate_items_count()

        ########################################################################################################################
        self.log.info("Step 35: Delete 1M docs. Update 1M docs.")
        pool = Executors.newFixedThreadPool(5)
        num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 4

        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, updates_from,
                                    "update"))
        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, deletes_from,
                                    "delete"))
        executors.append(
            GleambookMessages_Docloader(msg_bucket, num_items / 10,
                                        updates_from, "update"))
        executors.append(
            GleambookMessages_Docloader(msg_bucket, num_items / 10,
                                        deletes_from, "delete"))

        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        ########################################################################################################################
        self.log.info("Step 36: Verify the docs count.")
        self.validate_items_count()

        ########################################################################################################################
        self.log.info("Step 37: Create 10M docs.")
        pool = Executors.newFixedThreadPool(5)
        total_num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 4
        num_items = total_num_items / doc_executors
        for i in xrange(doc_executors):
            executors.append(
                GleambookUser_Docloader(bucket, num_items,
                                        items_start_from + i * num_items))
            executors.append(
                GleambookMessages_Docloader(msg_bucket, num_items,
                                            items_start_from + i * num_items))

        ###################################################### NEED TO BE UPDATED ##################################################################
        self.log.info(
            "Step 38: When 37 is in progress do a CBAS SWAP Rebalance of 2 nodes."
        )
        for node in self.cbas_servers[-1:]:
            rest = RestConnection(node)
            rest.set_data_path(data_path=node.data_path,
                               index_path=node.index_path,
                               cbas_path=node.cbas_path)
        rebalance = self.cluster.async_rebalance(nodes_in_cluster,
                                                 self.servers[6],
                                                 [self.servers[5]],
                                                 services=["kv"],
                                                 check_vbucket_shuffling=False)
        nodes_in_cluster += self.servers[6]
        nodes_in_cluster.remove(self.servers[5])
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        ########################################################################################################################
        self.log.info("Step 39: Wait for rebalance to complete.")
        rebalance.get_result()
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")
        self.sleep(20)

        updates_from = items_start_from
        deletes_from = items_start_from + total_num_items / 10
        items_start_from += total_num_items

        ########################################################################################################################
        self.log.info("Step 40: Verify the docs count.")
        self.validate_items_count()

        ########################################################################################################################
        self.log.info("Step 41: Delete 1M docs. Update 1M docs.")
        pool = Executors.newFixedThreadPool(5)
        num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 4

        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, updates_from,
                                    "update"))
        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, deletes_from,
                                    "delete"))
        executors.append(
            GleambookMessages_Docloader(msg_bucket, num_items / 10,
                                        updates_from, "update"))
        executors.append(
            GleambookMessages_Docloader(msg_bucket, num_items / 10,
                                        deletes_from, "delete"))

        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        ########################################################################################################################
        self.log.info("Step 42: Verify the docs count.")
        self.validate_items_count()

        ########################################################################################################################
        self.log.info("Step 43: Create 10M docs.")
        pool = Executors.newFixedThreadPool(5)
        total_num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 4
        num_items = total_num_items / doc_executors
        for i in xrange(doc_executors):
            executors.append(
                GleambookUser_Docloader(bucket, num_items,
                                        items_start_from + i * num_items))
            executors.append(
                GleambookMessages_Docloader(msg_bucket, num_items,
                                            items_start_from + i * num_items))

        ###################################################### NEED TO BE UPDATED ##################################################################
        self.log.info("Step 44: When 43 is in progress do a KV Rebalance IN.")
        rest = RestConnection(self.servers[5])
        rest.set_data_path(data_path=self.servers[5].data_path,
                           index_path=self.servers[5].index_path,
                           cbas_path=self.servers[5].cbas_path)
        rebalance = self.cluster.async_rebalance(nodes_in_cluster,
                                                 [self.servers[5]], [],
                                                 services=["kv"])
        nodes_in_cluster += [self.servers[5]]
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        ########################################################################################################################
        self.log.info("Step 45: Wait for rebalance to complete.")
        rebalance.get_result()
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")
        self.sleep(20)

        updates_from = items_start_from
        deletes_from = items_start_from + total_num_items / 10
        items_start_from += total_num_items

        ########################################################################################################################
        self.log.info("Step 46: Verify the docs count.")
        self.validate_items_count()

        ########################################################################################################################
        self.log.info("Step 47: Delete 1M docs. Update 1M docs.")
        pool = Executors.newFixedThreadPool(5)
        num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 4

        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, updates_from,
                                    "update"))
        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, deletes_from,
                                    "delete"))
        executors.append(
            GleambookMessages_Docloader(msg_bucket, num_items / 10,
                                        updates_from, "update"))
        executors.append(
            GleambookMessages_Docloader(msg_bucket, num_items / 10,
                                        deletes_from, "delete"))

        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        ########################################################################################################################
        self.log.info("Step 48: Verify the docs count.")
        self.validate_items_count()

        ########################################################################################################################
        self.log.info("Step 49: Create 10M docs.")
        pool = Executors.newFixedThreadPool(5)
        total_num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 4
        num_items = total_num_items / doc_executors
        for i in xrange(doc_executors):
            executors.append(
                GleambookUser_Docloader(bucket, num_items,
                                        items_start_from + i * num_items))
            executors.append(
                GleambookMessages_Docloader(msg_bucket, num_items,
                                            items_start_from + i * num_items))

        ########################################################################################################################
        self.log.info(
            "Step 50: When 49 is in progress do a KV+CBAS Rebalance OUT.")
        rest = RestConnection(self.servers[6])
        rest.set_data_path(data_path=self.servers[6].data_path,
                           index_path=self.servers[6].index_path,
                           cbas_path=self.kv_servers[6].cbas_path)
        rebalance = self.cluster.async_rebalance(nodes_in_cluster, [],
                                                 [self.servers[6]])
        nodes_in_cluster.remove(self.servers[6])

        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        ########################################################################################################################
        self.log.info("Step 51: Wait for rebalance to complete.")
        rebalance.get_result()
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")
        self.sleep(20)
        updates_from = items_start_from
        deletes_from = items_start_from + total_num_items / 10
        items_start_from += total_num_items

        ########################################################################################################################
        self.log.info("Step 52: Verify the docs count.")
        self.validate_items_count()

        ########################################################################################################################
        self.log.info("Step 53: Delete 1M docs. Update 1M docs.")
        pool = Executors.newFixedThreadPool(5)
        num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 4

        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, updates_from,
                                    "update"))
        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, deletes_from,
                                    "delete"))
        executors.append(
            GleambookMessages_Docloader(msg_bucket, num_items / 10,
                                        updates_from, "update"))
        executors.append(
            GleambookMessages_Docloader(msg_bucket, num_items / 10,
                                        deletes_from, "delete"))

        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        ########################################################################################################################
        self.log.info("Step 54: Verify the docs count.")
        self.validate_items_count()

        ########################################################################################################################
        self.log.info("Step 55: Create 10M docs.")
        pool = Executors.newFixedThreadPool(5)
        total_num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 4
        num_items = total_num_items / doc_executors
        for i in xrange(doc_executors):
            executors.append(
                GleambookUser_Docloader(bucket, num_items,
                                        items_start_from + i * num_items))
            executors.append(
                GleambookMessages_Docloader(msg_bucket, num_items,
                                            items_start_from + i * num_items))

        ########################################################################################################################
        self.log.info(
            "Step 56: When 55 is in progress do a KV+CBAS SWAP Rebalance .")
        rest = RestConnection(self.servers[7])
        rest.set_data_path(data_path=self.servers[7].data_path,
                           index_path=self.servers[7].index_path,
                           cbas_path=self.servers[7].cbas_path)
        rebalance = self.cluster.async_rebalance(nodes_in_cluster,
                                                 [self.servers[7]],
                                                 [self.servers[6]])
        #         rebalance.get_result()
        nodes_in_cluster.remove(self.servers[6])
        nodes_in_cluster += [self.servers[7]]

        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        ########################################################################################################################
        self.log.info("Step 57: Wait for rebalance to complete.")
        rebalance.get_result()
        reached = RestHelper(self.rest).rebalance_reached(wait_step=240)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")
        self.sleep(20)

        updates_from = items_start_from
        deletes_from = items_start_from + total_num_items / 10
        items_start_from += total_num_items

        ########################################################################################################################
        self.log.info("Step 58: Verify the docs count.")
        self.validate_items_count()

        ########################################################################################################################
        self.log.info("Step 59: Delete 1M docs. Update 1M docs.")
        pool = Executors.newFixedThreadPool(5)
        num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 4

        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, updates_from,
                                    "update"))
        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, deletes_from,
                                    "delete"))
        executors.append(
            GleambookMessages_Docloader(msg_bucket, num_items / 10,
                                        updates_from, "update"))
        executors.append(
            GleambookMessages_Docloader(msg_bucket, num_items / 10,
                                        deletes_from, "delete"))

        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        ########################################################################################################################
        self.log.info("Step 60: Verify the docs count.")
        self.validate_items_count()

        bucket.close()
        msg_bucket.close()
        cluster.disconnect()

        print "End Time: %s" % str(
            time.strftime("%H:%M:%S", time.gmtime(time.time())))
	
	# read full input data
	f = open('texts.csv','r')
	cr = csv.reader(f)
	texts = []
	for r in cr:
		if len(r) > 0:
			texts.append(r[0])
	n = len(texts)
	
	# get CPU count
	k = available_cpu_count()
	tdatas = []
	
	# split the work (optimal static splitting)
	# implicit assumption: atomic units of work of equal weight (not true in this case)
	for i in range(k):
		tdatas.append(texts[(n*i)/k : (n*(i+1))/k])
	
	#######################################
	
	pool = Executors.newFixedThreadPool(k)
	workers = [Worker(sentiworddic,amplifiers,decrementers,i,tdatas[i]) for i in range(k)]
	futures = pool.invokeAll(workers)
	
	for future in futures:
		f = future.get(5, TimeUnit.SECONDS)
		resd[f.tid] = f.result
	
	# shutdown_and_await_termination(pool, 5)
Ejemplo n.º 30
0
		if j < len(server.protocols) - 1:
			sys.stdout.write(', ')
		sys.stdout.write(str(protocol))
	print '.'

#
# Scheduler
#

scheduler.start()

#
# Tasks
#

fixed_executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() * 2 + 1)
if len(tasks) > 0:
	futures = []
	start_time = System.currentTimeMillis()
	print 'Executing %s startup tasks...' % len(tasks)
	for task in tasks:
	    futures.append(fixed_executor.submit(task))
	for future in futures:
		try:
			future.get()
		except:
			pass
	print 'Finished all startup tasks in %s seconds.' % ((System.currentTimeMillis() - start_time) / 1000.0)

for application in applications:
	applicationService = ApplicationService(application)
Ejemplo n.º 31
0
    def process(self, dataSource, progressBar):
        
        def addArtifact(file, message):
            art = file.newArtifact(BlackboardArtifact.ARTIFACT_TYPE.TSK_INTERESTING_FILE_HIT)
            att = BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_SET_NAME,
                    VadCheckModuleFactory.moduleName, message)
            art.addAttribute(att)

        start = time.clock()

        # get or create "Transcribed" tag
        tagsManager = Case.getCurrentCase().getServices().getTagsManager()
        tagTranscribed = getOrAddTag(tagsManager, "Transcribed")

        self.log(Level.INFO, "Starting vad_check_ingest with settings minPercVoiced " + str(self.local_settings.minPercVoiced) + 
                " minTotalVoiced " + str(self.local_settings.minTotalVoiced))

        fileManager = Case.getCurrentCase().getServices().getFileManager()
        #get all files
        files = fileManager.findFiles(dataSource, "%")
        numFiles = len(files)
        self.log(Level.INFO, "found " + str(numFiles) + " files")
        progressBar.switchToDeterminate(4)

        ffmpeg_clock_start = time.clock()
        pool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors())
        futures = pool.invokeAll(map(lambda file: RunProcessAVFile(file, self.local_settings.minTotalVoiced, self), files))
        pool.shutdownNow()
        filesForVoiceClassification = filter(lambda x: x is not None, map(lambda future: future.get(), futures)) 
        ffmpeg_clock_end = time.clock()
        self.log(Level.INFO, "ffmpeg conversions completed in " + str(ffmpeg_clock_end - ffmpeg_clock_start) + "s")

        if len(filesForVoiceClassification) == 0:
            self.log(Level.INFO, "No files contain speech.")
            return IngestModule.ProcessResult.OK

        tmpFiles = map(lambda x: x[1], filesForVoiceClassification)

        fileListTxt = writeListToFile(tmpFiles, "filelist.txt")

        self.log(Level.INFO, "Files to classify speech/not speech:\n" + "\n".join(tmpFiles))
        progressBar.progress("Running voice activity detection on " + str(len(tmpFiles)) + " files. Be patient, this may take a while.", 1)
        #now run all files of interest through ina_speech_segmenter to detect voice activity
        try:
            ina_run_time = runInaSpeechSegmener(fileListTxt, self)
            self.log(Level.INFO, "ina_speech_segmenter completed in " + str(ina_run_time) + "s")
        except SubprocessError:
            self.log(Level.INFO, "inaSpeechSegmenter failed")
            return IngestModule.ProcessResult.ERROR

        progressBar.progress("Importing " + str(len(filesForVoiceClassification)) + " csv  files", 2)
        
        filesForDeepspeech = []
        for file, tmpFile, duration in filesForVoiceClassification:
            tmpFileBase, _ = os.path.splitext(tmpFile)
            csvFile = tmpFileBase + ".csv"
            total_voiced, total_female, total_male = processInaSpeechSegmenterCSV(csvFile, self)
            perc_voiced_frames = total_voiced / duration * 100

            if ((perc_voiced_frames > self.local_settings.minPercVoiced ) and
                    (total_voiced > self.local_settings.minTotalVoiced)):
                self.log(Level.INFO, "Found an audio file with speech: " + file.getName())         
                
                addArtifact(file, "Audio file with speech")    
                if total_male > 0:
                    addArtifact(file, "Audio file with speech - male")
                if total_female > 0:
                    addArtifact(file, "Audio file with speech - female")

                filesForDeepspeech.append((file, tmpFile))
            else:
                self.log(Level.INFO, "Audio file " + file.getName() + "doesn't match conditions. perc_voiced_frames = " + str(perc_voiced_frames)+
                    "total_voiced = " + str(total_voiced))

            # Fire an event to notify the UI and others that there is a new artifact
            IngestServices.getInstance().fireModuleDataEvent(
                ModuleDataEvent(VadCheckModuleFactory.moduleName,
                    BlackboardArtifact.ARTIFACT_TYPE.TSK_INTERESTING_FILE_HIT, None))
        
        progressBar.progress("Transcribing " + str(len(filesForDeepspeech)) + " files. Be patient, this may take a while.", 3)
			
        if self.local_settings.runVadTranscriber and len(filesForDeepspeech) > 0:
            tmpFiles = map(lambda x: x[1], filesForDeepspeech)
            try:
                deepspeech_clock_start = time.clock()
                #transcribe all files in one go
                transcribeFiles(fileListTxt, self.local_settings.vadTranscriberLanguage, self.local_settings.showTextSegmentStartTime, self)
                importTranscribedTextFiles(filesForDeepspeech, self, VadCheckModuleFactory,
                                            tagsManager,  tagTranscribed)
                deepspeech_clock_end = time.clock()
                self.log(Level.INFO, "deepspeech completed in " + str(deepspeech_clock_end - deepspeech_clock_start) + "s")
            except SubprocessError:
                self.log(Level.INFO, "deepspeech failed")
                return IngestModule.ProcessResult.ERROR

        end = time.clock()
        self.log(Level.INFO, "Vad_check_ingest completed in " + str(end-start) + "s")
        return IngestModule.ProcessResult.OK
Ejemplo n.º 32
0
 def __init__(self, number_of_threads=10):
     self.number_of_threads = number_of_threads
     self.pool = Executors.newFixedThreadPool(self.number_of_threads)
     self.futures = {}
     self.tasks = []
Ejemplo n.º 33
0
	with open(configurationFile) as data_file:
		json_string = data_file.read()   
except EnvironmentError, err:
	print str(err)
	usage()
	sys.exit(3)

try:
	config = json.loads(json_string.decode('utf-8'))
except:
	print "JSON from file '" + configurationFile + "' is malformed."
	e = sys.exc_info()[0]
	print str(e)
	sys.exit(4)

pool = Executors.newFixedThreadPool(len(config["input"]))
ecs  = ExecutorCompletionService(pool)

def scheduler(roots):
    for inputConfig in roots:
        yield inputConfig

def getClassByName(module, className):
    if not module:
        if className.startswith("services."):
            className = className.split("services.")[1]
        l = className.split(".")
        m = __services__[l[0]]
        return getClassByName(m, ".".join(l[1:]))
    elif "." in className:
        l = className.split(".")
  "angle_epsilon": 0.02, # in radians. 0.05 is 2.8 degrees, 0.02 is 1.1 degrees
  "len_epsilon_sq": pow(somaDiameter, 2), # in calibrated units, squared
}

# RANSAC parameters: reduce list of pointmatches to a spatially coherent subset
paramsModel = {
  "maxEpsilon": somaDiameter, # max allowed alignment error in calibrated units (a distance)
  "minInlierRatio": 0.0000001, # ratio inliers/candidates
  "minNumInliers": 5, # minimum number of good matches to accept the result
  "n_iterations": 2000, # for estimating the model
  "maxTrust": 4, # for rejecting candidates
}

# Joint dictionary of parameters
params = {}
params.update(paramsDoG)
params.update(paramsFeatures)
params.update(paramsModel)

# The model type to fit. Could also be:
# TranslationModel3D, SimilarityModel3D, AffineModel3D
modelclass = RigidModel3D

n_threads = Runtime.getRuntime().availableProcessors()
exe = Executors.newFixedThreadPool(n_threads)
csv_dir = "/tmp/"

registered = registeredView(img_filenames, img_loader, getCalibration, modelclass, csv_dir, exe, params)

IL.wrap(registered, "registered").show()
    def testEmptyFieldSort(self):
        """
        test sorting when the sort field is empty(undefined) for some of the
        documents
        """
        sort = self.sort

        sort.setSort(SortField("string", SortField.Type.STRING))
        self._assertMatches(self.full, self.queryF, sort, "ZJI")

        sort.setSort(SortField("string", SortField.Type.STRING, True))
        self._assertMatches(self.full, self.queryF, sort, "IJZ")
    
        sort.setSort(SortField("int", SortField.Type.INT))
        self._assertMatches(self.full, self.queryF, sort, "IZJ")

        sort.setSort(SortField("int", SortField.Type.INT, True))
        self._assertMatches(self.full, self.queryF, sort, "JZI")

        sort.setSort(SortField("float", SortField.Type.FLOAT))
        self._assertMatches(self.full, self.queryF, sort, "ZJI")

        # using a nonexisting field as first sort key shouldn't make a
        # difference:
        sort.setSort([SortField("nosuchfield", SortField.Type.STRING),
                      SortField("float", SortField.Type.FLOAT)])
        self._assertMatches(self.full, self.queryF, sort, "ZJI")

        sort.setSort(SortField("float", SortField.Type.FLOAT, True))
        self._assertMatches(self.full, self.queryF, sort, "IJZ")

        # When a field is None for both documents, the next SortField should
        # be used. 
        # Works for
        sort.setSort([SortField("int", SortField.Type.INT),
                      SortField("string", SortField.Type.STRING),
                      SortField("float", SortField.Type.FLOAT)])
        self._assertMatches(self.full, self.queryG, sort, "ZWXY")

        # Reverse the last criterium to make sure the test didn't pass by
        # chance 
        sort.setSort([SortField("int", SortField.Type.INT),
                      SortField("string", SortField.Type.STRING),
                      SortField("float", SortField.Type.FLOAT, True)])
        self._assertMatches(self.full, self.queryG, sort, "ZYXW")

        # Do the same for a ParallelMultiSearcher

        threadPool = Executors.newFixedThreadPool(self.getRandomNumber(2, 8), NamedThreadFactory("testEmptyFieldSort"))
        parallelSearcher=IndexSearcher(self.full.getIndexReader(), threadPool)

        sort.setSort([SortField("int", SortField.Type.INT),
                      SortField("string", SortField.Type.STRING),
                      SortField("float", SortField.Type.FLOAT)])
        self._assertMatches(parallelSearcher, self.queryG, sort, "ZWXY")

        sort.setSort([SortField("int", SortField.Type.INT),
                      SortField("string", SortField.Type.STRING),
                      SortField("float", SortField.Type.FLOAT, True)])
        self._assertMatches(parallelSearcher, self.queryG, sort, "ZYXW")

        threadPool.shutdown()
        threadPool.awaitTermination(1000L, TimeUnit.MILLISECONDS)
Ejemplo n.º 36
0
DEFAULT_ACCOUNTING = ('idv', '0')
DEFAULT_SIZE = (480, 640)

CoordinateSystems = enum('AREA', 'LATLON', 'IMAGE')
AREA = CoordinateSystems.AREA
LATLON = CoordinateSystems.LATLON
IMAGE = CoordinateSystems.IMAGE

Places = enum(ULEFT='Upper Left', CENTER='Center')
ULEFT = Places.ULEFT
CENTER = Places.CENTER

MAX_CONCURRENT = 5

pool = Executors.newFixedThreadPool(MAX_CONCURRENT)

ecs = ExecutorCompletionService(pool)

def _satBandUrl(**kwargs):
    # needs at least server, port, debug, user, and proj
    # follow AddeImageChooser.appendMiscKeyValues in determining which extra keys to add
    satbandUrlFormat = "adde://%(server)s/text?&FILE=SATBAND&COMPRESS=gzip&PORT=%(port)s&DEBUG=%(debug)s&VERSION=1&USER=%(user)s&PROJ=%(proj)s"
    return satbandUrlFormat % kwargs
    
# NOTE: remember that Callable means that the "task" returns some kind of 
# result from CallableObj.get()!
# RunnableObj.get() just returns null.
class _SatBandReq(Callable):
    def __init__(self, url):
        self.url = url
Ejemplo n.º 37
0
def run(title):
    gd = GenericDialog("Record Window")
    gd.addMessage("Maximum number of frames to record.\nZero means infinite, interrupt with ESC key.")
    gd.addNumericField("Max. frames:", 50, 0)
    gd.addNumericField("Milisecond interval:", 300, 0)
    gd.addSlider("Start in (seconds):", 0, 20, 5)
    frames = []
    titles = []
    for f in Frame.getFrames():
        if f.isEnabled() and f.isVisible():
            frames.append(f)
            titles.append(f.getTitle())
    gd.addChoice("Window:", titles, titles[0])
    gd.addCheckbox("To file", False)
    gd.showDialog()
    if gd.wasCanceled():
        return
    n_frames = int(gd.getNextNumber())
    interval = gd.getNextNumber() / 1000.0  # in seconds
    frame = frames[gd.getNextChoiceIndex()]
    delay = int(gd.getNextNumber())
    tofile = gd.getNextBoolean()

    dir = None
    if tofile:
        dc = DirectoryChooser("Directory to store image frames")
        dir = dc.getDirectory()
        if dir is None:
            return  # dialog canceled

    snaps = []
    borders = None
    executors = Executors.newFixedThreadPool(1)
    try:
        while delay > 0:
            IJ.showStatus("Starting in " + str(delay) + "s.")
            time.sleep(1)  # one second
            delay -= 1

        IJ.showStatus("Capturing frame borders...")
        bounds = frame.getBounds()
        robot = Robot()
        frame.toFront()
        time.sleep(0.5)  # half a second
        borders = robot.createScreenCapture(bounds)

        IJ.showStatus("Recording " + frame.getTitle())

        # Set box to the inside borders of the frame
        insets = frame.getInsets()
        box = bounds.clone()
        box.x = insets.left
        box.y = insets.top
        box.width -= insets.left + insets.right
        box.height -= insets.top + insets.bottom

        start = System.currentTimeMillis() / 1000.0  # in seconds
        last = start
        intervals = []
        real_interval = 0
        i = 1
        fus = None
        if tofile:
            fus = []

            # 0 n_frames means continuous acquisition
        while 0 == n_frames or (len(snaps) < n_frames and last - start < n_frames * interval):
            now = System.currentTimeMillis() / 1000.0  # in seconds
            real_interval = now - last
            if real_interval >= interval:
                last = now
                img = snapshot(frame, box)
                if tofile:
                    fus.append(executors.submit(Saver(i, dir, bounds, borders, img, insets)))  # will flush img
                    i += 1
                else:
                    snaps.append(img)
                intervals.append(real_interval)
            else:
                time.sleep(interval / 5)
                # interrupt capturing:
            if IJ.escapePressed():
                IJ.showStatus("Recording user-interrupted")
                break

                # debug:
                # print "insets:", insets
                # print "bounds:", bounds
                # print "box:", box
                # print "snap dimensions:", snaps[0].getWidth(), snaps[0].getHeight()

                # Create stack
        stack = None
        if tofile:
            for fu in snaps:
                fu.get()  # wait on all
            stack = VirtualStack(bounds.width, bounds.height, None, dir)
            files = File(dir).list(TifFilter())
            Arrays.sort(files)
            for f in files:
                stack.addSlice(f)
        else:
            stack = ImageStack(bounds.width, bounds.height, None)
            t = 0
            for snap, real_interval in zip(snaps, intervals):
                bi = BufferedImage(bounds.width, bounds.height, BufferedImage.TYPE_INT_RGB)
                g = bi.createGraphics()
                g.drawImage(borders, 0, 0, None)
                g.drawImage(snap, insets.left, insets.top, None)
                stack.addSlice(str(IJ.d2s(t, 3)), ImagePlus("", bi).getProcessor())
                t += real_interval
                snap.flush()
                bi.flush()

        borders.flush()

        ImagePlus(frame.getTitle() + " recording", stack).show()
        IJ.showStatus("Done recording " + frame.getTitle())
    except Exception, e:
        print "Some error ocurred:"
        print e.printStackTrace()
        IJ.showStatus("")
        if borders is not None:
            borders.flush()
        for snap in snaps:
            snap.flush()
from downloader import Downloader
from shutdown import shutdown_and_await_termination
from java.util.concurrent import Executors, TimeUnit

MAX_CONCURRENT = 3
SITES = [
    "http://www.cnn.com/",
    "http://www.nytimes.com/",
    "http://www.washingtonpost.com/",
    "http://www.dailycamera.com/",
    "http://www.timescall.com/",
]

pool = Executors.newFixedThreadPool(MAX_CONCURRENT)
downloaders = [Downloader(url) for url in SITES]
futures = pool.invokeAll(downloaders)

for future in futures:
    print future.get(5, TimeUnit.SECONDS)

shutdown_and_await_termination(pool, 5)
Ejemplo n.º 39
0
    def __del__(self):
        del self.result

        

from java.util.concurrent import Executors, ExecutorCompletionService
import java.lang.Runtime as JavaRunTime
rt = JavaRunTime.getRuntime()
# over 2 because most intel machines report double the number of real cores, 
# returned by this function call, -1 because we want to leave something for WAT/ResSim/the OS.
MAX_THREADS = max(1, rt.availableProcessors()/2 - 1) 

...

pool = Executors.newFixedThreadPool(numThreads)
ecs = ExecutorCompletionService(pool)
jobs = list()

def runKnnAndStorageAreas(tableLabel, modelFPart, ...):
    # function to do compute
	# call kNN compute functions and post-processors for interpolations
    ...
    return listOfTimeSeriesContainers

for task in listOfTasksCsvFile:
    ...
    # get the parameters from task
    ...
    jobs.append(ThreadWrapper(runKnnAndStorageAreas, tableLabel,
        modelFPart, ..., jobName=tableLabel))
# Propagate before or propagate after
# TO DO

# Apply transforms to patches
progress = 0
for mesh, layer in zip(meshes, layers):
	Utils.log("Applying transforms to patches...")
	IJ.showProgress(0, len(layers))

	mlt = MovingLeastSquaresTransform2()
	mlt.setModel(AffineModel2D)
	mlt.setAlpha(2.0)
	mlt.setMatches(mesh.getVA().keySet())

	# ElasticLayerAlignment uses newer concurrent methods for this
	pool = Executors.newFixedThreadPool(MAX_NUM_THREADS)
	patch_transforms = []
	patches = layer.getDisplayables(Patch)
	for patch in patches:
		pt = PatchTransform(patch, mlt.copy())
		patch_transforms.append(pt)
	futures = pool.invokeAll(patch_transforms)
	for future in futures:
		print future.get(5, TimeUnit.SECONDS)
	shutdown_and_await_termination(pool, 5)

	for vd in vector_data:
		vd.apply(layer, inf_area, mlt)

	progress += 1
	IJ.showProgress(progress, len(layers))
Ejemplo n.º 41
0
    indices = range(1, search.numNeighbors())
    if furthest:
      indices.reverse()
    # Make as many constellations as possible, up to n_max
    count = 0
    for i, k in combinations(indices, 2):
      p1, d1 = search.getPosition(i), search.getSquareDistance(i)
      p2, d2 = search.getPosition(k), search.getSquareDistance(k)
      cons = Constellation(peak, p1, d1, p2, d2)
      if cons.angle > 0.25 and count < n_max:
        count += 1
        yield cons
    """


exe = Executors.newFixedThreadPool(4)

try:
    # A map of image indices and collections of DoG peaks in calibrated 3D coordinates
    # (Must be calibrated, or the KDTree radius search wouldn't work as intended.)
    futures = [
        exe.submit(Task(getDoGPeaks, img, calibration))
        for img in [img1, img2]
    ]
    soma_detections = {ti: f.get() for ti, f in enumerate(futures)}

    for ti, peaks in soma_detections.iteritems():
        print "Found %i peaks in %i" % (len(peaks), ti)

    # Extract features from the detected soma:
    # Each feature is a constellation of a soma position and two other nearby somas.
Ejemplo n.º 42
0
    with open(configurationFile) as data_file:
        json_string = data_file.read()
except EnvironmentError, err:
    print str(err)
    usage()
    sys.exit(3)

try:
    config = json.loads(json_string.decode('utf-8'))
except:
    print "JSON from file '" + configurationFile + "' is malformed."
    e = sys.exc_info()[0]
    print str(e)
    sys.exit(4)

pool = Executors.newFixedThreadPool(len(config["input"]))
ecs = ExecutorCompletionService(pool)


def scheduler(roots):
    for inputConfig in roots:
        yield inputConfig


def getClassByName(module, className):
    if not module:
        if className.startswith("services."):
            className = className.split("services.")[1]
        l = className.split(".")
        m = __services__[l[0]]
        return getClassByName(m, ".".join(l[1:]))
Ejemplo n.º 43
0
    cr = csv.reader(f)
    texts = []
    for r in cr:
        if len(r) > 0:
            texts.append(r[0])
    n = len(texts)

    # get CPU count
    k = available_cpu_count()
    tdatas = []

    # split the work (optimal static splitting)
    # implicit assumption: atomic units of work of equal weight (not true in this case)
    for i in range(k):
        tdatas.append(texts[(n * i) / k:(n * (i + 1)) / k])

    #######################################

    pool = Executors.newFixedThreadPool(k)
    workers = [
        Worker(sentiworddic, amplifiers, decrementers, i, tdatas[i])
        for i in range(k)
    ]
    futures = pool.invokeAll(workers)

    for future in futures:
        f = future.get(5, TimeUnit.SECONDS)
        resd[f.tid] = f.result

    # shutdown_and_await_termination(pool, 5)
Ejemplo n.º 44
0
def run(title):
    gd = GenericDialog('Record Window')
    gd.addMessage(
        "Maximum number of frames to record.\nZero means infinite, interrupt with ESC key."
    )
    gd.addNumericField('Max. frames:', 50, 0)
    gd.addNumericField('Milisecond interval:', 300, 0)
    gd.addSlider('Start in (seconds):', 0, 20, 5)
    frames = []
    titles = []
    for f in Frame.getFrames():
        if f.isEnabled() and f.isVisible():
            frames.append(f)
            titles.append(f.getTitle())
    gd.addChoice('Window:', titles, titles[0])
    gd.addCheckbox("To file", False)
    gd.showDialog()
    if gd.wasCanceled():
        return
    n_frames = int(gd.getNextNumber())
    interval = gd.getNextNumber() / 1000.0  # in seconds
    frame = frames[gd.getNextChoiceIndex()]
    delay = int(gd.getNextNumber())
    tofile = gd.getNextBoolean()

    dir = None
    if tofile:
        dc = DirectoryChooser("Directory to store image frames")
        dir = dc.getDirectory()
        if dir is None:
            return  # dialog canceled

    snaps = []
    borders = None
    executors = Executors.newFixedThreadPool(1)
    try:
        while delay > 0:
            IJ.showStatus('Starting in ' + str(delay) + 's.')
            time.sleep(1)  # one second
            delay -= 1

        IJ.showStatus('Capturing frame borders...')
        bounds = frame.getBounds()
        robot = Robot()
        frame.toFront()
        time.sleep(0.5)  # half a second
        borders = robot.createScreenCapture(bounds)

        IJ.showStatus("Recording " + frame.getTitle())

        # Set box to the inside borders of the frame
        insets = frame.getInsets()
        box = bounds.clone()
        box.x = insets.left
        box.y = insets.top
        box.width -= insets.left + insets.right
        box.height -= insets.top + insets.bottom

        start = System.currentTimeMillis() / 1000.0  # in seconds
        last = start
        intervals = []
        real_interval = 0
        i = 1
        fus = None
        if tofile:
            fus = []

        # 0 n_frames means continuous acquisition
        while 0 == n_frames or (len(snaps) < n_frames
                                and last - start < n_frames * interval):
            now = System.currentTimeMillis() / 1000.0  # in seconds
            real_interval = now - last
            if real_interval >= interval:
                last = now
                img = snapshot(frame, box)
                if tofile:
                    fus.append(
                        executors.submit(
                            Saver(i, dir, bounds, borders, img,
                                  insets)))  # will flush img
                    i += 1
                else:
                    snaps.append(img)
                intervals.append(real_interval)
            else:
                time.sleep(interval / 5)
            # interrupt capturing:
            if IJ.escapePressed():
                IJ.showStatus("Recording user-interrupted")
                break

        # debug:
        #print "insets:", insets
        #print "bounds:", bounds
        #print "box:", box
        #print "snap dimensions:", snaps[0].getWidth(), snaps[0].getHeight()

        # Create stack
        stack = None
        if tofile:
            for fu in snaps:
                fu.get()  # wait on all
            stack = VirtualStack(bounds.width, bounds.height, None, dir)
            files = File(dir).list(TifFilter())
            Arrays.sort(files)
            for f in files:
                stack.addSlice(f)
        else:
            stack = ImageStack(bounds.width, bounds.height, None)
            t = 0
            for snap, real_interval in zip(snaps, intervals):
                bi = BufferedImage(bounds.width, bounds.height,
                                   BufferedImage.TYPE_INT_RGB)
                g = bi.createGraphics()
                g.drawImage(borders, 0, 0, None)
                g.drawImage(snap, insets.left, insets.top, None)
                stack.addSlice(str(IJ.d2s(t, 3)),
                               ImagePlus('', bi).getProcessor())
                t += real_interval
                snap.flush()
                bi.flush()

        borders.flush()

        ImagePlus(frame.getTitle() + " recording", stack).show()
        IJ.showStatus('Done recording ' + frame.getTitle())
    except Exception, e:
        print "Some error ocurred:"
        print e.printStackTrace()
        IJ.showStatus('')
        if borders is not None: borders.flush()
        for snap in snaps:
            snap.flush()
    def test_volume(self):
        nodes_in_cluster = [self.servers[0]]
        print "Start Time: %s" % str(
            time.strftime("%H:%M:%S", time.gmtime(time.time())))

        ########################################################################################################################
        self.log.info("Add a N1QL/Index nodes")
        self.query_node = self.servers[1]
        rest = RestConnection(self.query_node)
        rest.set_data_path(data_path=self.query_node.data_path,
                           index_path=self.query_node.index_path,
                           cbas_path=self.query_node.cbas_path)
        result = self.add_node(self.query_node, rebalance=False)
        self.assertTrue(result, msg="Failed to add N1QL/Index node.")

        self.log.info("Add a KV nodes")
        result = self.add_node(self.servers[2],
                               services=["kv"],
                               rebalance=True)
        self.assertTrue(result, msg="Failed to add KV node.")

        nodes_in_cluster = nodes_in_cluster + [
            self.servers[1], self.servers[2]
        ]
        ########################################################################################################################
        self.log.info("Step 2: Create Couchbase buckets.")
        self.create_required_buckets()
        for node in nodes_in_cluster:
            NodeHelper.do_a_warm_up(node)
            NodeHelper.wait_service_started(node)
        ########################################################################################################################
        self.log.info(
            "Step 3: Create 10M docs average of 1k docs for 8 couchbase buckets."
        )
        env = DefaultCouchbaseEnvironment.builder().mutationTokensEnabled(
            True).computationPoolSize(5).socketConnectTimeout(
                100000).connectTimeout(100000).maxRequestLifetime(
                    TimeUnit.SECONDS.toMillis(300)).build()
        cluster = CouchbaseCluster.create(env, self.master.ip)
        cluster.authenticate("Administrator", "password")
        bucket = cluster.openBucket("GleambookUsers")

        pool = Executors.newFixedThreadPool(5)
        items_start_from = 0
        total_num_items = self.input.param("num_items", 5000)

        executors = []
        num_executors = 5
        doc_executors = 5
        num_items = total_num_items / num_executors
        for i in xrange(doc_executors):
            executors.append(
                GleambookUser_Docloader(bucket,
                                        num_items,
                                        items_start_from + i * num_items,
                                        batch_size=2000))
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        updates_from = items_start_from
        deletes_from = items_start_from + total_num_items / 10
        items_start_from += total_num_items
        ########################################################################################################################
        self.sleep(120, "Sleeping after 1st cycle.")
        self.log.info("Step 8: Delete 1M docs. Update 1M docs.")
        pool = Executors.newFixedThreadPool(5)
        num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 4

        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, updates_from,
                                    "update"))
        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, deletes_from,
                                    "delete"))
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        ########################################################################################################################
        self.sleep(120, "Sleeping after 2nd cycle.")
        pool = Executors.newFixedThreadPool(5)
        num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 5
        num_items = total_num_items / doc_executors

        for i in xrange(doc_executors):
            executors.append(
                GleambookUser_Docloader(bucket,
                                        num_items,
                                        items_start_from + i * num_items,
                                        batch_size=2000))
        rebalance = self.cluster.async_rebalance(nodes_in_cluster,
                                                 [self.servers[3]], [])
        futures = pool.invokeAll(executors)

        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)
        rebalance.get_result()
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")

        bucket.close()
        cluster.disconnect()

        print "End Time: %s" % str(
            time.strftime("%H:%M:%S", time.gmtime(time.time())))
Ejemplo n.º 46
0
def recompress(run_id, conf):
    """Proceed to recompression of a run.

    Arguments:
        run_id: The run id
        conf: configuration dictionary
    """

    common.log('INFO', 'Recompress step: Starting', conf)

    # Check if input root fastq root data exists
    if not common.is_dir_exists(FASTQ_DATA_PATH_KEY, conf):
        error("FASTQ data directory does not exist",
              "FASTQ data directory does not exist: " + conf[FASTQ_DATA_PATH_KEY], conf)
        return False

    start_time = time.time()
    fastq_input_dir = conf[FASTQ_DATA_PATH_KEY] + '/' + run_id

    # initial du for comparing with ending disk usage
    previous_du_in_bytes = common.du(fastq_input_dir)

    # get information about compression type
    compression_type = conf[RECOMPRESS_COMPRESSION_KEY]
    compression_level = conf[RECOMPRESS_COMPRESSION_LEVEL_KEY]
    compression_info_tuple = get_info_from_file_type(compression_type, compression_level)

    if compression_info_tuple is None:
        error("Unknown compression type",
              "Unknown compression type: " + compression_type, conf)
        return False

    (compression_type_result, output_file_extension, output_compression_command, output_decompression_command,
     compression_level_argument) = compression_info_tuple

    # The following list contains the processed type of files to recompress
    types_to_recompress = ["fastq.gz", "fastq"]

    # list of program to check if exists in path before execution
    program_set = {"bash", "tee", "touch", "chmod", "md5sum", output_compression_command, output_decompression_command}

    # get list of file to process
    input_files = []
    for extension in types_to_recompress:

        input_files.extend(list_files(fastq_input_dir, extension))
        simple_extension = os.path.splitext(extension)[-1][1:]
        extension_info_tuple = get_info_from_file_type(simple_extension)

        if extension_info_tuple is None:
            error("Unknown extension type",
                  "Unknown extension type: " + extension, conf)
            return False

        program_set.add(extension_info_tuple[3])

    # actual program list check
    for program in program_set:
        if not common.exists_in_path(program):
            error("Can't find all needed commands in PATH env var",
                  "Can't find all needed commands in PATH env var. Unable to find: " + program + " command.", conf)
            return False

    # Create executor and for parallelization of processus
    executor = Executors.newFixedThreadPool(int(conf[RECOMPRESS_THREADS_KEY]))
    workers = []

    # process each fastq and fastq.gz recursively in each fastq directory
    for input_file in input_files:

        simple_extension = os.path.splitext(input_file)[-1][1:]

        # get info about the type of input file
        extension_info_tuple = get_info_from_file_type(simple_extension)
        if extension_info_tuple is None:
            error("Unknown extension type",
                  "Unknown extension type: " + simple_extension, conf)
            return False

        input_decompression_command = extension_info_tuple[3]

        # get file base name and create output_file name, if file is already .fastq its ready to be base_input_file
        base_input_file = input_file[0: input_file.index(".fastq") + 6]
        output_file = base_input_file + "." + output_file_extension

        # Skip if the output_file already exists
        if not os.path.exists(output_file):

            # Create worker then execute in thread
            worker = Worker(input_file, output_file, input_decompression_command, output_compression_command,
                            output_decompression_command,
                            compression_level_argument,
                            common.is_conf_value_equals_true(RECOMPRESS_DELETE_ORIGINAL_FASTQ_KEY, conf))
            workers.append(worker)
            executor.execute(worker)

        else:
            common.log("WARNING", "Recompress step: Omitting processing file " + input_file + ". The associated output file " + output_file + " already exists.", conf)

    # Wait for all thread to finish
    executor.shutdown()
    while not executor.isTerminated():
        time.sleep(1)

    # Check if any worker is in error
    for worker in workers:
        if not worker.is_successful():
            error(worker.get_error_message(),
                  worker.get_long_error_message(), conf)
            return False

    # check new disk usage
    df_in_bytes = common.df(fastq_input_dir)
    du_in_bytes = common.du(fastq_input_dir)
    previous_du = previous_du_in_bytes / (1024 * 1024)
    df = df_in_bytes / (1024 * 1024 * 1024)
    du = du_in_bytes / (1024 * 1024)

    common.log("WARNING", "Recompress step: output disk free after step: " + str(df_in_bytes), conf)
    common.log("WARNING", "Recompress step: space previously used: " + str(previous_du_in_bytes), conf)
    common.log("WARNING", "Recompress step: space now used by step: " + str(du_in_bytes), conf)

    duration = time.time() - start_time

    msg = 'Ending recompression for run ' + run_id + '.' + \
          '\nJob finished at ' + common.time_to_human_readable(time.time()) + \
          ' without error in ' + common.duration_to_human_readable(duration) + '. '

    msg += '\n\nAfter recompress step FASTQ folder is now %.2f MB (previously %.2f MB) and %.2f GB still free.' % (
        du, previous_du, df)

    common.send_msg('[Aozan] Ending recompress for run ' + run_id + ' on ' +
                    common.get_instrument_name(run_id, conf), msg, False, conf)
    common.log('INFO', 'Recompress step: successful in ' + common.duration_to_human_readable(duration), conf)
    return True