Example #1
0
def run(log_level='INFO', parallel=0):
    logformat = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
    logging.basicConfig(level=log_level, format=logformat)
    logger = logging.getLogger(__name__)
    print(PROGNAME + ' v' + VERSION + ' started.')
    feeds = []
    if parallel <= 1:
        for feeditem in CONFIG.cp.sections():
            feed = Feed(feeditem)
            feed.parse_feed()
            feeds.append(feed)
        for feed in feeds:
            if len(feed.entries) > 0:
                logger.info('Storing ' + str(len(feed.entries)) + ' items' +
                            ' from ' + feed.name + ' on ' + feed.imap.host + '.')
                for entry in feed.entries:
                    feed.imap.store_entry(feed, entry)
                feed.new_to_cache()
    else:
        logging.warning('Using ' + str(parallel) + ' connections for ' + 
                        'fetching and storing. This can cause issues on some' +
                        'some IMAP servers. Use at your own risk!')
        future_data = []
        with ThreadPoolExecutor(max_workers=parallel) as executor:
            for feeditem in CONFIG.cp.sections():
                feed = Feed(feeditem)
                feeds.append(feed)
            future_data = {executor.submit(parallel_parse, feed): feed for feed in feeds}
        for feed in as_completed(future_data):
            pass
        for feed in feeds:
            with ThreadPoolExecutor(max_workers=parallel) as executor:
                future_data = {executor.submit(feed.imap.store_entry, feed, entry): entry for entry in feed.entries}
            for nothing in as_completed(future_data):
                pass
Example #2
0
    def process(self, asgs):
        original_count = len(asgs)
        asgs = [a for a in asgs if a['SuspendedProcesses']]
        self.delay = self.data.get('delay', 30)
        self.log.debug("Filtered from %d to %d suspended asgs" % (
            original_count, len(asgs)))

        with self.executor_factory(max_workers=3) as w:
            futures = {}
            for a in asgs:
                futures[w.submit(self.resume_asg_instances, a)] = a
            for f in as_completed(futures):
                if f.exception():
                    log.error("Traceback resume asg:%s instances error:%s" % (
                        futures[f]['AutoScalingGroupName'],
                        f.exception()))
                    continue

        log.debug("Sleeping for asg health check grace")
        time.sleep(self.delay)

        with self.executor_factory(max_workers=3) as w:
            futures = {}
            for a in asgs:
                futures[w.submit(self.resume_asg, a)] = a
            for f in as_completed(futures):
                if f.exception():
                    log.error("Traceback resume asg:%s error:%s" % (
                        futures[f]['AutoScalingGroupName'],
                        f.exception()))
Example #3
0
def create_tasks(taskgraph, label_to_taskid):
    # TODO: use the taskGroupId of the decision task
    task_group_id = slugid()
    taskid_to_label = {t: l for l, t in label_to_taskid.iteritems()}

    session = requests.Session()

    # Default HTTPAdapter uses 10 connections. Mount custom adapter to increase
    # that limit. Connections are established as needed, so using a large value
    # should not negatively impact performance.
    http_adapter = requests.adapters.HTTPAdapter(pool_connections=CONCURRENCY,
                                                 pool_maxsize=CONCURRENCY)
    session.mount('https://', http_adapter)
    session.mount('http://', http_adapter)

    decision_task_id = os.environ.get('TASK_ID')

    with futures.ThreadPoolExecutor(CONCURRENCY) as e:
        fs = {}

        # We can't submit a task until its dependencies have been submitted.
        # So our strategy is to walk the graph and submit tasks once all
        # their dependencies have been submitted.
        #
        # Using visit_postorder() here isn't the most efficient: we'll
        # block waiting for dependencies of task N to submit even though
        # dependencies for task N+1 may be finished. If we need to optimize
        # this further, we can build a graph of task dependencies and walk
        # that.
        for task_id in taskgraph.graph.visit_postorder():
            task_def = taskgraph.tasks[task_id].task
            attributes = taskgraph.tasks[task_id].attributes
            # if this task has no dependencies, make it depend on this decision
            # task so that it does not start immediately; and so that if this loop
            # fails halfway through, none of the already-created tasks run.
            if decision_task_id and not task_def.get('dependencies'):
                task_def['dependencies'] = [decision_task_id]

            task_def['taskGroupId'] = task_group_id
            task_def['schedulerId'] = '-'

            # Wait for dependencies before submitting this.
            deps_fs = [fs[dep] for dep in task_def.get('dependencies', [])
                       if dep in fs]
            for f in futures.as_completed(deps_fs):
                f.result()

            fs[task_id] = e.submit(_create_task, session, task_id,
                                   taskid_to_label[task_id], task_def)

            # Schedule tasks as many times as task_duplicates indicates
            for i in range(1, attributes.get('task_duplicates', 1)):
                # We use slugid() since we want a distinct task id
                fs[task_id] = e.submit(_create_task, session, slugid(),
                                       taskid_to_label[task_id], task_def)

        # Wait for all futures to complete.
        for f in futures.as_completed(fs.values()):
            f.result()
def test_pure(client):
    N = 10
    with client.get_executor() as e:
        fs = [e.submit(get_random) for i in range(N)]
        res = [fut.result() for fut in as_completed(fs)]
        assert len(set(res)) < len(res)
    with client.get_executor(pure=False) as e:
        fs = [e.submit(get_random) for i in range(N)]
        res = [fut.result() for fut in as_completed(fs)]
        assert len(set(res)) == len(res)
 def joined_map_seq(futures):
     """
     
     :param futures: 
     :return: 
     """
     as_completed(futures)
     for future in futures:
         res = future.result()
         yield res
Example #6
0
    def get_elb_bucket_locations(self):
        session = local_session(self.manager.session_factory)
        client = session.client('elb')

        # Try to use the cache if it exists
        elbs = self.manager._cache.get(
            {'region': self.manager.config.region, 'resource': 'elb'})

        # Sigh, post query refactor reuse, we can't save our cache here
        # as that resource manager does extra lookups on tags. Not
        # worth paginating, since with cache usage we have full set in
        # mem.
        if elbs is None:
            p = client.get_paginator('describe_load_balancers')
            results = p.paginate()
            elbs = results.build_full_result().get(
                'LoadBalancerDescriptions', ())
            self.log.info("Queried %d elbs", len(elbs))
        else:
            self.log.info("Using %d cached elbs", len(elbs))

        get_elb_attrs = functools.partial(
            _query_elb_attrs, self.manager.session_factory)

        with self.executor_factory(max_workers=2) as w:
            futures = []
            for elb_set in chunks(elbs, 100):
                futures.append(w.submit(get_elb_attrs, elb_set))
            for f in as_completed(futures):
                if f.exception():
                    log.error("Error while scanning elb log targets: %s" % (
                        f.exception()))
                    continue
                for tgt in f.result():
                    yield tgt
Example #7
0
    def _process_bucket(self, b, p, key_log, w):
        count = 0

        for key_set in p:
            keys = self.get_keys(b, key_set)
            count += len(keys)
            futures = []

            for batch in chunks(keys, size=100):
                if not batch:
                    continue
                futures.append(w.submit(self.process_chunk, batch, b))

            for f in as_completed(futures):
                if f.exception():
                    log.exception("Exception Processing bucket:%s key batch %s" % (
                        b['Name'], f.exception()))
                    continue
                r = f.result()
                if r:
                    key_log.add(r)

            # Log completion at info level, progress at debug level
            if key_set['IsTruncated']:
                log.debug('Scan progress bucket:%s keys:%d remediated:%d ...',
                          b['Name'], count, key_log.count)
            else:
                log.info('Scan Complete bucket:%s keys:%d remediated:%d',
                         b['Name'], count, key_log.count)

        b['KeyScanCount'] = count
        b['KeyRemediated'] = key_log.count
        return {
            'Bucket': b['Name'], 'Remediated': key_log.count, 'Count': count}
Example #8
0
File: cli.py Project: jdsn/rapport
    def create(self):
        results = {}
        with futures.ThreadPoolExecutor(max_workers=4) as executor:
            plugin_futures = dict((executor.submit(p.collect, self.timeframe), p) for p in self.plugins)
            for future in futures.as_completed(plugin_futures):
                plugin = plugin_futures[future]
                try:
                    if rapport.config.get_int("rapport", "verbosity") >= 2:
                        print "Result for {0}: {1}".format(plugin.alias, future.result())
                    template = rapport.template.get_template(plugin, "text")
                    if template:
                        results[plugin] = template.render(future.result())
                except Exception as e:
                    print >>sys.stderr, "Failed plugin {0}:{1}: {2}!".format(plugin, plugin.alias, e)

        # TODO: Generate mail template

        # Print results sorted by plugin appearance in config file (i.e. init order):
        for plugin in self.plugins:
            try:
                print results[plugin]
            except KeyError as e:
                # A missing result for plugins means an exception happened
                # above, which already printed an error message, thus:
                pass
Example #9
0
    def add_tags_to_results(self, client, elbs):
        """
        Gets the tags for the ELBs and adds them to
        the result set.
        """
        elb_names = [elb['LoadBalancerName'] for elb in elbs]
        names_to_tags = {}
        fn = partial(self.process_tags, client=client)
        futures = []
        with self.executor_factory(max_workers=3) as w:
            # max 20 ELBs per call (API limitation)
            for elb_names_chunk in chunks(elb_names, size=20):
                    futures.append(
                        w.submit(fn, elb_names_chunk))

        for f in as_completed(futures):
            if f.exception():
                self.log.exception("Exception Processing ELB: %s" % (
                    f.exception()))
                continue
            r = f.result()
            if r:
                names_to_tags.update(r)

        for elb in elbs:
            elb['Tags'] = names_to_tags[elb['LoadBalancerName']]
Example #10
0
    def process(self, volumes):
        original_count = len(volumes)
        volumes = [v for v in volumes
                   if not v['Encrypted'] or not v['Attachments']]
        log.debug(
            "EncryptVolumes filtered from %d to %d "
            " unencrypted attached volumes" % (
                original_count, len(volumes)))

        # Group volumes by instance id
        instance_vol_map = {}
        for v in volumes:
            instance_id = v['Attachments'][0]['InstanceId']
            instance_vol_map.setdefault(instance_id, []).append(v)

        # Query instances to find current instance state
        self.instance_map = {
            i['InstanceId']: i for i in query_instances(
                local_session(self.manager.session_factory),
                InstanceIds=instance_vol_map.keys())}

        with self.executor_factory(max_workers=10) as w:
            futures = {}
            for instance_id, vol_set in instance_vol_map.items():
                futures[w.submit(
                    self.process_volume, instance_id, vol_set)] = instance_id

            for f in as_completed(futures):
                if f.exception():
                    instance_id = futures[f]
                    log.error(
                        "Exception processing instance:%s volset: %s \n %s" % (
                            instance_id, instance_vol_map[instance_id],
                            f.exception()))
Example #11
0
    def process(self, buckets):
        results = []
        with self.executor_factory(max_workers=3) as w:
            futures = {}
            for b in buckets:
                futures[w.submit(self.process_bucket, b)] = b
            for f in as_completed(futures):
                if f.exception():
                    self.log.error(
                        "Error on bucket:%s region:%s policy:%s error: %s",
                        b['Name'], b.get('Location', 'unknown'),
                        self.manager.data.get('name'), f.exception())
                    self.denied_buckets.append(b['Name'])
                    continue
                result = f.result()
                if result:
                    results.append(result)

        if self.denied_buckets and self.manager.log_dir:
            with open(
                    os.path.join(
                        self.manager.log_dir, 'denied.json'), 'w') as fh:
                json.dump(self.denied_buckets, fh, indent=2)
            self.denied_buckets = []
        return results
Example #12
0
    def _process_identify_futures(self, futures, opts, instances):
        self.out.debug('scan._process_identify_futures')
        checkpoint = datetime.now()

        i = 0
        to_scan = {}
        cancelled = False
        for future in as_completed(futures):

            if common.shutdown:
                if not cancelled:
                    map(lambda x: x.cancel(), futures)
                    cancelled = True

                continue

            url = future.url
            try:
                cms_name, result_tuple = future.result(timeout=opts['timeout_host'])

                if cms_name != None:
                    if cms_name not in to_scan:
                        to_scan[cms_name] = []

                    to_scan[cms_name].append(result_tuple)
            except:
                f.exc_handle(url, self.out, self.app.testing)

            i += 1

        if to_scan:
            self._process_scan(opts, instances, to_scan)
            to_scan = {}
Example #13
0
    def start_upload(self):
        """Method to start upload"""
        LOGGER.warning("Starting to upload %d sequences...", len(self.sequences))
        user = self.login_controller.login()
        with THREAD_LOCK:
            total = 0
            for sequence in self.sequences:
                total = total + len(sequence.visual_items)
            self.progress_bar = tqdm(total=total)

        sequence_operation = SequenceUploadOperation(self,
                                                     user.access_token,
                                                     self.max_workers)

        with ThreadPoolExecutor(max_workers=1) as executors:
            futures = [executors.submit(sequence_operation.upload,
                                        sequence) for sequence in self.sequences]
            report = []
            for future in as_completed(futures):
                success, sequence = future.result()
                report.append((success, sequence))
                if success:
                    LOGGER.warning("    Uploaded sequence from %s, "
                                   "the sequence will be available after "
                                   "processing at %s", sequence.path,
                                   self.login_controller.osc_api.sequence_link(sequence))
                else:
                    LOGGER.warning("    Failed to upload sequence at %s. Restart the script in "
                                   "order to finish you upload for this sequence.", sequence.path)
            LOGGER.warning("Finished uploading")
            self.progress_bar.close()
Example #14
0
    def resources(self, query=None):
        client = local_session(self.manager.session_factory).client('config')
        paginator = client.get_paginator('list_discovered_resources')
        pages = paginator.paginate(
            resourceType=self.manager.get_model().config_type)
        results = []

        with self.manager.executor_factory(max_workers=5) as w:
            ridents = pager(pages, self.retry)
            resource_ids = [
                r['resourceId'] for r in ridents.get('resourceIdentifiers', ())]
            self.manager.log.debug(
                "querying %d %s resources",
                len(resource_ids),
                self.manager.__class__.__name__.lower())

            for resource_set in chunks(resource_ids, 50):
                futures = []
                futures.append(w.submit(self.get_resources, resource_set))
                for f in as_completed(futures):
                    if f.exception():
                        self.manager.log.error(
                            "Exception getting resources from config \n %s" % (
                                f.exception()))
                    results.extend(f.result())
        return results
Example #15
0
  def testWaitingForSomeButNotAllConcurrentFutureInvocations(self):
    pool = logging_pool.pool(test_constants.THREAD_CONCURRENCY)
    request = b'\x67\x68'
    expected_response = self._handler.handle_unary_unary(request, None)
    response_futures = [None] * test_constants.THREAD_CONCURRENCY
    lock = threading.Lock()
    test_is_running_cell = [True]
    def wrap_future(future):
      def wrap():
        try:
          return future.result()
        except grpc.RpcError:
          with lock:
            if test_is_running_cell[0]:
              raise
          return None
      return wrap

    multi_callable = _unary_unary_multi_callable(self._channel)
    for index in range(test_constants.THREAD_CONCURRENCY):
      inner_response_future = multi_callable.future(
          request,
          metadata=(
              (b'test',
               b'WaitingForSomeButNotAllConcurrentFutureInvocations'),))
      outer_response_future = pool.submit(wrap_future(inner_response_future))
      response_futures[index] = outer_response_future

    some_completed_response_futures_iterator = itertools.islice(
        futures.as_completed(response_futures),
        test_constants.THREAD_CONCURRENCY // 2)
    for response_future in some_completed_response_futures_iterator:
      self.assertEqual(expected_response, response_future.result())
    with lock:
      test_is_running_cell[0] = False
Example #16
0
    def process(self, asgs):
        msg_tmpl = self.data.get(
            'msg',
            'AutoScaleGroup does not meet org tag policy: {op}@{stop_date}')
        
        op = self.data.get('op', 'suspend')
        tag = self.data.get('tag', DEFAULT_TAG)
        date = self.data.get('days', 4)
        
        n = datetime.now(tz=tzutc())
        stop_date = n + timedelta(days=date)
        msg = msg_tmpl.format(
            op=op, stop_date=stop_date.strftime('%Y/%m/%d'))

        self.log.info("Tagging %d asgs for %s on %s" % (
            len(asgs), op, stop_date.strftime('%Y/%m/%d')))

        futures = []
        with self.executor_factory(max_workers=10) as w:
            for a in asgs:
                futures.append(
                    w.submit(self.process_asg, a, msg))

        for f in as_completed(futures):
            if f.exception():
                log.exception("Exception processing asg:%s" % (
                    a['AutoScalingGroupName']))
                continue
Example #17
0
def access(config, accounts=()):
    """Check iam permissions for log export access in each account"""
    config = validate.callback(config)
    accounts_report = []

    def check_access(account):
        accounts_report.append(account)
        session = get_session(account['role'])
        identity = session.client('sts').get_caller_identity()
        account['account_id'] = identity['Account']
        account.pop('groups')
        account.pop('role')
        client = session.client('iam')
        policy_arn = identity['Arn']
        if policy_arn.count('/') > 1:
            policy_arn = policy_arn.rsplit('/', 1)[0]
        if ':sts:' in policy_arn:
            policy_arn = policy_arn.replace(':sts', ':iam')
        if ':assumed-role' in policy_arn:
            policy_arn = policy_arn.replace(':assumed-role', ':role')
        evaluation = client.simulate_principal_policy(
            PolicySourceArn=policy_arn,
            ActionNames=['logs:CreateExportTask'])['EvaluationResults']
        account['access'] = evaluation[0]['EvalDecision']

    with ThreadPoolExecutor(max_workers=16) as w:
        futures = {}
        for account in config.get('accounts', ()):
            if accounts and account['name'] not in accounts:
                continue
            futures[w.submit(check_access, account)] = None
        for f in as_completed(futures):
            pass
    accounts_report.sort(key=operator.itemgetter('access'), reverse=True)
    print(tabulate(accounts_report, headers='keys'))
Example #18
0
	def updateall(self,db,settings):
		#ref: http://pythonhosted.org/futures/
		from concurrent import futures
		from datetime import datetime,timedelta
		print("invoked")
		self.log.info("Update all method invoked")
		nobj= Notification()
		# loop through feeds that are not updated in last 30 mins
		#http://stackoverflow.com/questions/4541629/how-to-create-a-datetime-equal-to-15-minutes-ago
		try:
			tilltime = Helper.datetotimestamp(datetime.now()-timedelta(minutes=15))
			feedlist = []
			for feed in db.feeds.find({'lastupdated':{'$lt':tilltime}},{'items':0}):
				# and call updatefeed for each feedurl
				feedlist.append(feed['_id'])
			
			if len(feedlist) > 0:
				with futures.ThreadPoolExecutor(max_workers=5) as executor:
					future_to_url = dict((executor.submit(self.updatefeed,db, feed, settings), feed) for feed in feedlist)

				for future in futures.as_completed(future_to_url):
					feed = future_to_url[future]
					if future.exception() is not None:
						err = '%r generated an exception: %s' % (feed,future.exception())
						nobj.adderror(err)
						print(err)
					else:
						print('%r page is done' % (feed))
		except Exception,err:
			nobj.adderror(str(err))
			self.log.error("Update all feeds failed with error %s",str(err))
    def handle(self, *args, **options):

        city_names = options['cities']
        n_threads = options['n_threads']

        if city_names[0] == 'ALL':
            city_names = services.get_active_city_names()

        # Instantiate weather service
        weather_service = weather.OpenWeatherMapService()

        def fetch_weather_update(city_name):
            try:
                made_update = services.fetch_weather_update(city_name=city_name,
                                                            weather_service=weather_service)
            except services.errors.CityNotFound:
                return self.style.ERROR(f'City "{city_name}" does not exist')
            except services.errors.CityIsDisabled:
                return self.style.ERROR(f'City "{city_name}" is disabled')
            if made_update:
                return self.style.SUCCESS('Successfully fetched new weather update for city ' + \
                                          f'"{city_name}"')
            else:
                return self.style.WARNING(f'No new weather update found for city "{city_name}"')

        pool = futures.ThreadPoolExecutor(n_threads)
        jobs = []

        for city_name in city_names:
            jobs.append(pool.submit(fetch_weather_update, city_name))

        for job in futures.as_completed(jobs):
            self.stdout.write(job.result())
Example #20
0
    def process(self, buckets):
        from c7n.mu import LambdaManager
        from c7n.ufuncs.s3crypt import get_function
        func = get_function(
            None, self.data.get('role', self.manager.config.assume_role))

        # Publish function to all of our buckets regions
        region_funcs = {}
        regions = set([
            b.get('LocationConstraint', 'us-east-1') for b in buckets])
        for r in regions:
            lambda_mgr = LambdaManager(
                functools.partial(self.manager.session_factory, region=r))
            region_funcs[r] = lambda_mgr.publish(func)

        with self.executor_factory(max_workers=3) as w:
            results = []
            futures = []
            for b in buckets:
                futures.append(
                    w.submit(
                        self.process_bucket,
                        region_funcs[b.get('LocationConstraint', 'us-east-1')],
                        b))
            for f in as_completed(futures):
                if f.exception():
                    log.exception(
                        "Error attaching lambda-encrypt %s" % (f.exception()))
                results.append(f.result())
            return filter(None, results)
Example #21
0
def generate_normalized_wiggle_files(project_folder, max_proc):
    parameter_dict = _read_parameters(project_folder)
    # create normalized coverage folder if it does not exist
    wiggle_folder = "{}/normalized_coverage".format(project_folder)
    if not exists(wiggle_folder):
        makedirs(wiggle_folder)
    # Generate coverage files in parallel
    print("** Generating normalized coverage files for {} libraries...".format(
          len(parameter_dict["libraries"])), flush=True)
    t_start = time()
    with futures.ProcessPoolExecutor(
            max_workers=max_proc) as executor:
        future_to_lib_name = {
            executor.submit(
                _generate_normalized_wiggle_file_for_lib, lib_name,
                lib["bam_file"], parameter_dict["paired_end"],
                parameter_dict["max_insert_size"], lib["size_factor"],
                wiggle_folder): lib_name for lib_name, lib
            in parameter_dict["libraries"].items()}
    for future in futures.as_completed(future_to_lib_name):
        lib_name = future_to_lib_name[future]
        print("* Coverage files for library {} generated.".format(lib_name),
              flush=True)
    t_end = time()
    print("Coverage file generation finished in {} seconds.".format(
        t_end-t_start), flush=True)
Example #22
0
    def process(self, resources):
        # Legacy
        msg = self.data.get('msg')
        msg = self.data.get('value') or msg
        
        tag = self.data.get('tag', DEFAULT_TAG)
        tag = self.data.get('key') or tag

        # Support setting multiple tags in a single go with a mapping
        tags = self.data.get('tags')

        if tags is None:
            tags = []
        else:
            tags = [{'Key': k, 'Value': v} for k, v in tags.items()]

        if msg:
            tags.append({'Key': tag, 'Value': msg})

        batch_size = self.data.get('batch_size', self.batch_size)

        with self.executor_factory(max_workers=self.concurrency) as w:
            futures = []
            for resource_set in utils.chunks(resources, size=batch_size):
                futures.append(
                    w.submit(self.process_resource_set, resource_set, tags))

            for f in as_completed(futures):
                if f.exception():
                    self.log.error(
                        "Exception removing tags: %s on resources:%s \n %s" % (
                            tags, self.id_key, f.exception()))
Example #23
0
    def process(self, resources):
        
        # Move this to policy? / no resources bypasses actions?
        if not len(resources):
            return
        
        msg_tmpl = self.data.get(
            'msg',
            'Resource does not meet policy: {op}@{action_date}')

        op = self.data.get('op', 'stop')
        tag = self.data.get('tag', DEFAULT_TAG)
        date = self.data.get('days', 4)
        
        n = datetime.now(tz=tzutc())
        action_date = n + timedelta(days=date)
        msg = msg_tmpl.format(
            op=op, action_date=action_date.strftime('%Y/%m/%d'))

        self.log.info("Tagging %d resources for %s on %s" % (
            len(resources), op, action_date.strftime('%Y/%m/%d')))

        tags = [{'Key': tag, 'Value': msg}]
        
        with self.executor_factory(max_workers=2) as w:
            futures = []
            for resource_set in utils.chunks(resources, size=200):
                futures.append(
                    w.submit(self.process_resource_set, resource_set, tags))

            for f in as_completed(futures):
                if f.exception():
                    self.log.error(
                        "Exception tagging resource set: %s  \n %s" % (
                            tags, f.exception()))
Example #24
0
def record_set(session_factory, bucket, key_prefix, start_date):
    """Retrieve all s3 records for the given policy output url

    From the given start date.
    """

    s3 = local_session(session_factory).client('s3')

    records = []
    key_count = 0

    marker = key_prefix.strip("/") + "/" + start_date.strftime(
         '%Y/%m/%d/00') + "/resources.json.gz"

    p = s3.get_paginator('list_objects').paginate(
        Bucket=bucket,
        Prefix=key_prefix.strip('/') + '/',
        Marker=marker
    )

    with ThreadPoolExecutor(max_workers=20) as w:
        for key_set in p:
            if 'Contents' not in key_set:
                continue
            keys = [k for k in key_set['Contents']
                    if k['Key'].endswith('resources.json.gz')]
            key_count += len(keys)
            futures = map(lambda k: w.submit(get_records, bucket, k, session_factory), keys)

            for f in as_completed(futures):
                records.extend(f.result())

    log.info("Fetched %d records across %d files" % (
        len(records), key_count))
    return records
Example #25
0
    def process_messages(self, messages):
        future_to_message = {}
        to_delete = []

        self.logger.debug('processing %d messages', len(messages))
        for message in messages:
            # ThreadPoolExecutor will throw a RuntimeException if we try
            # to # submit while it's shutting down. If we encounter a
            # RuntimeError, # immediately stop trying to submit new tasks;
            # they will get requeued after the interval configured on the
            # queue's policy.
            try:
                future = self.pool.submit(self.func, message)
            except RuntimeError:
                self.logger.exception('cannot submit jobs to pool')
                raise
            else:
                future_to_message[future] = message

        for future in futures.as_completed(future_to_message,
                                           timeout=self.timeout):
            message = future_to_message[future]
            try:
                future.result()
            except:
                self.logger.exception('exception processing message %s',
                                      message['MessageId'])
            else:
                to_delete.append(message)

        return to_delete
Example #26
0
def run(config, use, output_dir, accounts, tags,
        region, policy, policy_tags, cache_period, metrics, dryrun, debug, verbose):
    """run a custodian policy across accounts"""
    accounts_config, custodian_config, executor = init(
        config, use, debug, verbose, accounts, tags, policy, policy_tags=policy_tags)
    policy_counts = Counter()
    with executor(max_workers=WORKER_COUNT) as w:
        futures = {}
        for a in accounts_config['accounts']:
            for r in resolve_regions(region or a.get('regions', ())):
                futures[w.submit(
                    run_account,
                    a, r,
                    custodian_config,
                    output_dir,
                    cache_period,
                    metrics,
                    dryrun,
                    debug)] = (a, r)

        for f in as_completed(futures):
            a, r = futures[f]
            if f.exception():
                if debug:
                    raise
                log.warning(
                    "Error running policy in %s @ %s exception: %s",
                    a['name'], r, f.exception())

            for p, count in f.result().items():
                policy_counts[p] += count

    log.info("Policy resource counts %s" % policy_counts)
Example #27
0
    def _poll_run_states(self):

        # in every iteration the states of all unfinished runs are requested once
        while not self._shutdown.is_set() :
            start = time()
            states = {}

            with self._web_interface._unfinished_runs_lock:
                for run_id in self._web_interface._unfinished_runs.keys():
                        state_future = self._state_poll_executor.submit(self._web_interface._is_finished, run_id)
                        states[state_future] = run_id

            # Collect states of runs
            for state_future in as_completed(states.keys()):

                run_id = states[state_future]
                state = state_future.result()

                if state == "FINISHED" or state == "UNKNOWN":
                    self._web_interface._download_result_async(run_id)

                elif state == "ERROR":
                    self._web_interface._run_failed(run_id)

            end = time();
            duration = end - start
            if duration < self._result_poll_interval and not self._shutdown.is_set():
                self._shutdown.wait(self._result_poll_interval - duration)
    def test_no_timeout(self):
        def wait_test():
            while not future1._waiters:
                pass
            call1.set_can()
            call2.set_can()

        call1 = Call(manual_finish=True)
        call2 = Call(manual_finish=True)
        try:
            future1 = self.executor.submit(call1)
            future2 = self.executor.submit(call2)

            t = threading.Thread(target=wait_test)
            t.start()
            completed = set(futures.as_completed(
                    [CANCELLED_AND_NOTIFIED_FUTURE,
                     EXCEPTION_FUTURE,
                     SUCCESSFUL_FUTURE,
                     future1, future2]))
            self.assertEqual(set(
                    [CANCELLED_AND_NOTIFIED_FUTURE,
                     EXCEPTION_FUTURE,
                     SUCCESSFUL_FUTURE,
                     future1, future2]),
                    completed)
        finally:
            call1.close()
            call2.close()
Example #29
0
    def process(self, resources, event=None):
        days = self.data.get('days', 14)
        duration = timedelta(days)

        self.metric = self.data['name']
        self.end = datetime.utcnow()
        self.start = self.end - duration
        self.period = int(self.data.get('period', duration.total_seconds()))
        self.statistics = self.data.get('statistics', 'Average')
        self.model = self.manager.get_model()
        self.op = OPERATORS[self.data.get('op', 'less-than')]
        self.value = self.data['value']

        ns = self.data.get('namespace')
        if not ns:
            ns = getattr(self.model, 'default_namespace', None)
            if not ns:
                ns = self.DEFAULT_NAMESPACE[self.model.service]
        self.namespace = ns

        self.log.debug("Querying metrics for %d", len(resources))
        matched = []
        with self.executor_factory(max_workers=3) as w:
            futures = []
            for resource_set in chunks(resources, 50):
                futures.append(
                    w.submit(self.process_resource_set, resource_set))

            for f in as_completed(futures):
                if f.exception():
                    self.log.warning(
                        "CW Retrieval error: %s" % f.exception())
                    continue
                matched.extend(f.result())
        return matched
Example #30
0
    def process(self, resources):
        resources = self.filter_table_state(
            resources, self.valid_status)
        if not len(resources):
            return

        c = local_session(self.manager.session_factory).client('dynamodb')
        futures = {}

        prefix = self.data.get('prefix', 'Backup')

        with self.executor_factory(max_workers=2) as w:
            for t in resources:
                futures[w.submit(
                    c.create_backup,
                    BackupName=snapshot_identifier(
                        prefix, t['TableName']),
                    TableName=t['TableName'])] = t
            for f in as_completed(futures):
                t = futures[f]
                if f.exception():
                    self.manager.log.warning(
                        "Could not complete DynamoDB backup table:%s", t)
                arn = f.result()['BackupDetails']['BackupArn']
                t['c7n:BackupArn'] = arn
Example #31
0
def play(num_questions, path_directory, path_screenshot):
    # Create a new Quiz
    quiz = Quiz(path_directory)
    # Create a ThreadPool to parallelize the work from here on
    pool = ThreadPoolExecutor(max_workers=4)

    # For each question
    for i in range(1, num_questions + 1):
        # Waiting for user input...
        try:
            c = input("Press enter to evaluate a new question, e to exit: ")
        except KeyboardInterrupt:
            # If CTRL+C, break
            print("")  # Go to a new line
            break
        # If the user decided to exit, break
        if c == 'e': break

        # If we are here to evaluate only one question
        if path_screenshot:
            # Load the screenshot from disk as cv2 grey object
            screenshot = Screenshot.load_image(path_screenshot)
        # Or we have to process an entire directory of questions
        elif path_directory:
            # Go through all screenshots in the specified directory
            filename = f"{path_directory}/Question-{i}.png"
            screenshot = Screenshot.load_image(filename)
        # If there isn't neither a screenshots nor a directory
        else:
            # Define the path for a new screenshot file
            filename = f"{quiz.folder_name}/Question-{i}.png"
            # Take a black-n-white screenshot
            screenshot = Screenshot.take_screenshot(filename)

        # Create a new Question object and extract the question from the screenshot
        question = quiz.new_question(Screenshot.extract_question(screenshot))

        # Extract all three answers from the screenshot, in three different threads
        future_answers = {
            pool.submit(Screenshot.extract_answer, screenshot, question,
                        position): position
            for position in range(3)
        }
        future_question = pool.submit(Sanitize.clean_question(question))
        # Wait until all threads are done
        for future in as_completed(future_answers):
            pass

        print(f"\nQuestion n.{i}: {question.get_text()}")
        print(
            f"Answers: [{question.get_answer(0).get_text()}, {question.get_answer(1).get_text()}, {question.get_answer(2).get_text()}]"
        )

        # Briefly ... later

        # Define the query URL
        query_url = Scraping.define_url(question.get_text())
        # Get search results from Google
        google_results = Scraping.search(query_url)

        # Parallelize the pattern matching process with all the results
        future_guess = {
            pool.submit(Scraping.guess_answer, google_results,
                        question.get_answer(position)): position
            for position in range(3)
        }

        # If at least one result was found
        for future in as_completed(future_guess):
            if future.result():
                question.one_match = True

        # If at least one answer has a match
        if question.one_match:
            # Get and print the answer with the highest matches number
            guessed = question.get_answer_max_matches()
            print(
                f"\n{Style.BRIGHT}{Fore.GREEN}{guessed.get_text():>40} {Fore.CYAN}{guessed.get_matches():<40}{Fore.RESET}{Style.RESET_ALL}"
            )
            # Save a reference to the guessed answer (non-zero indexed)
            question.set_guessed_answer(question.answers.index(guessed) + 1)
        else:
            print(
                f"{Style.DIM}{Fore.YELLOW}No match found, trying a more in depth analysis...{Fore.RESET}{Style.RESET_ALL}\n"
            )
            # Perform a query built concatenating the question and each answer

            # Print sort-of table header (17+Answer+17, Score+5, 1+Results+2, Total)
            # I know it's ugly, maybe I'll use Rich lib
            print(
                f"{Style.BRIGHT}                 Answer                 Score      Results  Total{Style.RESET_ALL}"
            )

            # Check if this is an usual question or a "negated" question (explaination below)
            question.usual_question = "NON" not in question.get_text()

            # Parallelize the pattern matching process with all the results
            future_calculation = {
                pool.submit(
                    Scraping.calculate_concat,
                    question.get_text() if question.usual_question else
                    question.get_text().replace("NON", ''),
                    question.get_answer(position)): position
                for position in range(3)
            }

            for future in as_completed(future_calculation):
                print(future.result())

            # If the answers scored the same, then something went wrong
            if question.get_answer(0).score == question.get_answer(
                    1).score == question.get_answer(2).score:
                print(
                    f"\n{Style.BRIGHT}{Fore.RED}Choose a random answer, the search was not successful!{Fore.RESET}{Style.RESET_ALL}"
                )  # why not suggest a random answer?
            # Otherwise, let's assume the answer with the highest score is fair
            else:
                guessed = question.get_answer_max_score(
                ) if question.usual_question else question.get_answer_min_score(
                )
                print(
                    f"\n{Style.BRIGHT}{Fore.GREEN}{guessed.get_text():>40} {Fore.CYAN}{guessed.score:<40}{Fore.RESET}{Style.RESET_ALL}"
                )
                # Save a reference to the guessed answer (non-zero indexed)
                question.set_guessed_answer(
                    question.answers.index(guessed) + 1)

            # About the algorithm for "negated" questions ("which of these... not..."):
            # if in the question is required to indicate the answer that does NOT belong
            # to a certain category, then a Google search concatenating the question without
            # the "not" ("NON", in italian) word and the answer is executed and then
            # the answer that obtains the minimum score instead of the maximum, is taken.

        # Save the (real) correct answer for debug and analysis purpose,
        # but only if the report file doesn't already exist
        if not quiz.report_exists:
            question.set_correct_answer(
                int(input("\nWhat was the correct answer? (1,2,3): ")))

        # Print a bunch (80) of underscore to separate different question
        print(
            "________________________________________________________________________________\n"
        )

    # Shutdown the ThreadPool
    pool.shutdown()

    # Save the report if it doesn't already exist
    if not quiz.report_exists:
        quiz.save_report()
Example #32
0
    def map_unordered(cls,
                      function,
                      items,
                      multiprocess=False,
                      file=None,
                      step=100,
                      ipython_widget=False,
                      multiprocessing_start_method=None):
        """Map function over items, reporting the progress.

        Does a `map` operation while displaying a progress bar with
        percentage complete. The map operation may run on arbitrary order
        on the items, and the results may be returned in arbitrary order.

        ::

            def work(i):
                print(i)

            ProgressBar.map(work, range(50))

        Parameters
        ----------
        function : function
            Function to call for each step

        items : sequence
            Sequence where each element is a tuple of arguments to pass to
            *function*.

        multiprocess : bool, int, optional
            If `True`, use the `multiprocessing` module to distribute each task
            to a different processor core. If a number greater than 1, then use
            that number of cores.

        ipython_widget : bool, optional
            If `True`, the progress bar will display as an IPython
            notebook widget.

        file : writable file-like, optional
            The file to write the progress bar to.  Defaults to
            `sys.stdout`.  If ``file`` is not a tty (as determined by
            calling its `isatty` member, if any), the scrollbar will
            be completely silent.

        step : int, optional
            Update the progress bar at least every *step* steps (default: 100).
            If ``multiprocess`` is `True`, this will affect the size
            of the chunks of ``items`` that are submitted as separate tasks
            to the process pool.  A large step size may make the job
            complete faster if ``items`` is very long.

        multiprocessing_start_method : str, optional
            Useful primarily for testing; if in doubt leave it as the default.
            When using multiprocessing, certain anomalies occur when starting
            processes with the "spawn" method (the only option on Windows);
            other anomalies occur with the "fork" method (the default on
            Linux).
        """
        # concurrent.futures import here to avoid import failure when running
        # in pyodide/Emscripten
        from concurrent.futures import ProcessPoolExecutor, as_completed

        results = []

        if file is None:
            file = _get_stdout()

        with cls(len(items), ipython_widget=ipython_widget, file=file) as bar:
            if bar._ipython_widget:
                chunksize = step
            else:
                default_step = max(int(float(len(items)) / bar._bar_length), 1)
                chunksize = min(default_step, step)
            if not multiprocess or multiprocess < 1:
                for i, item in enumerate(items):
                    results.append(function(item))
                    if (i % chunksize) == 0:
                        bar.update(i)
            else:
                ctx = multiprocessing.get_context(multiprocessing_start_method)
                kwargs = dict(mp_context=ctx)

                with ProcessPoolExecutor(
                        max_workers=(int(multiprocess)
                                     if multiprocess is not True else None),
                        **kwargs) as p:
                    for i, f in enumerate(
                            as_completed(
                                p.submit(function, item) for item in items)):
                        bar.update(i)
                        results.append(f.result())

        return results
Example #33
0
            data = {
                'code': file.replace('.jpg', ''),
                'image': file_path,
                'image_type': 'BASE64',
                'options': {
                    'max_face_num': 1,
                    'face_field': "age,beauty,gender",
                },
                'aip': aip_list[it],
                'mongo': mongo_conn
            }
            it += 1
            it %= len(aip_list)
            # 向线程池提交任务
            work = executor.submit(detect_face, data)
            all_work.append(work)
        else:
            util.print_e('相册中包含了文件夹{}'.format(file))
        util.process_bar(index + 1, len(file_list),
                         '已完成{}条任务的提交'.format(index + 1))

    # 等待线程池中所有任务结束
    util.print_a('正在等待所有任务结束')
    rowcount = completed_work = 0
    for work in as_completed(all_work):
        # 利用as_completed的阻塞效果完成信息的提取
        rowcount += work.result()
        completed_work += 1
        util.process_bar(completed_work, len(file_list),
                         '完成%d项,修改%d项数据' % (completed_work, rowcount))
Example #34
0
def scrap(start=1):
    # Choose the start blocks to scrap
    if start > 1:  # Continue from the last break
        results = np.load('results' + str(start) + '.npy').item()
    else:  # Start from the begining of the block
        results = dict()

    # Save the current block
    checkpoint = start

    try:
        for i in range(0, (len(urls) - start) // fact + 1):
            start_time = time.time()

            # Multiple threads for scrap
            with ThreadPoolExecutor(max_workers=max_workers) as executor:

                end = start + fact * (i + 1)
                if end > len(urls):
                    end = len(urls) + 1

                # Create threads
                futures = {
                    executor.submit(loadurl, url)
                    for url in urls[checkpoint:end]
                }

                # as_completed() gives you the threads once finished
                for f in as_completed(futures):

                    # Get the results
                    rs = f.result()

                    if rs[1] in results.keys():
                        results[rs[1]] += rs[0]
                    else:
                        results[rs[1]] = rs[0]

                elapsed_time = time.time() - start_time

                # Calculate time elapsed for this scrap
                e = int(time.time() - start_time)

                # Everthing Okay, update the latest checkpoint
                checkpoint = end

                # Save result to file
                np.save('results' + str(checkpoint) + '.npy', results)

                # Only keep the lastest three files
                remove('results' + str(checkpoint - fact * 3) + '.npy')

                print(
                    "Range: " + str(checkpoint) + " ",
                    '{:02d}:{:02d}:{:02d}'.format(e // 3600, (e % 3600 // 60),
                                                  e % 60))
    except:
        if debug:
            traceback.print_exc()
        print("Exception, re-call function main(%s)" % (str(checkpoint)))
        time.sleep(3)

        # Exception happend, restart from the last checkpoint
        scrap(checkpoint)
Example #35
0
def main():
    executor = futures.ProcessPoolExecutor(max_workers=4)
    args = range(10)
    fs = [executor.submit(func, arg) for arg in args]
    for f in futures.as_completed(fs):
        print(f.result())
Example #36
0
def sync(
    api: BaiduPCSApi,
    localdir: str,
    remotedir: str,
    max_workers: int = CPU_NUM,
    slice_size: int = DEFAULT_SLICE_SIZE,
    show_progress: bool = True,
):
    localdir = Path(localdir).as_posix()
    remotedir = Path(remotedir).as_posix()

    is_file = api.is_file(remotedir)
    assert not is_file, "remotedir must be a directory"

    if not api.exists(remotedir):
        all_pcs_files = {}
    else:
        all_pcs_files = {
            pcs_file.path[len(remotedir) + 1:]: pcs_file
            for pcs_file in recursive_list(api, remotedir)
        }

    fts: List[FromTo] = []
    check_list: List[Tuple[str, PcsFile]] = []
    all_localpaths = set()
    for localpath in walk(localdir):
        path = localpath[len(localdir) + 1:]
        all_localpaths.add(path)

        if path not in all_pcs_files:
            fts.append(FromTo(localpath, join_path(remotedir, path)))
        else:
            check_list.append((localpath, all_pcs_files[path]))

    semaphore = Semaphore(max_workers)
    with ThreadPoolExecutor(max_workers=CPU_NUM) as executor:
        tasks = {}
        for lp, pf in check_list:
            semaphore.acquire()
            fut = executor.submit(sure_release, semaphore, check_file_md5, lp,
                                  pf)
            tasks[fut] = (lp, pf)

        for fut in as_completed(tasks):
            is_equal = fut.result()
            lp, pf = tasks[fut]
            if not is_equal:
                fts.append(FromTo(lp, pf.path))

    _upload(
        api,
        fts,
        max_workers=max_workers,
        slice_size=slice_size,
        ignore_existing=False,
        show_progress=show_progress,
    )

    to_deletes = []
    for rp in all_pcs_files.keys():
        if rp not in all_localpaths:
            to_deletes.append(all_pcs_files[rp].path)

    if to_deletes:
        api.remove(*to_deletes)
        print(f"Delete: [i]{len(to_deletes)}[/i] remote paths")
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y_true.reshape(-1, 1),
                                                    test_size=0.15,
                                                    random_state=1869097)

N_list = [5, 10, 15, 20, 25, 30]
K = 5

print("N = [5, 10, 15, 20, 25, 30]")
print("sigma = [0.2, 0.4, 0.6, 0.8, 1, 1.2, 1.5, 1.8]")
print("rho = [0.00001, 0.00005, 0.0001, 0.0005, 0.001]\n")
ncpus = cpu_count()
results = []

with ProcessPoolExecutor(max_workers=ncpus) as executor:
    futures = list(
        (executor.submit(grid_search, X_train, y_train, N, K) for N in N_list))

for future in as_completed(futures):
    results += future.result()

final_res = pd.DataFrame(results,
                         columns=[
                             'gradient', 'K', 'N', 'sigma', 'rho', 'success',
                             'train_error', 'train_error_fit',
                             'validation_error', 'time_exec(s)', 'nfev', 'nit',
                             'njev'
                         ])

final_res.to_csv('KFOLD_MLP.csv', index=False)
Example #38
0
                package=package, base=base.fullname))
            continue
        build_order = get_build_order(package, orig)

        up_map = {orig: base.fullname}
        if args.remote is not None:
            print('Update from "%s" to "%s"' % (orig, base.fullname))

        for group in build_order:
            with ThreadPoolExecutor(max_workers=4) as executor:
                pkgs = (executor.submit(Worker.run, worker,
                                        ConanPackge(package),
                                        package_urls[package], up_map,
                                        args.remote) for package in group)
                add = {}
                for future in as_completed(pkgs):
                    version = future.result()
                    if version is not None:
                        add[package] = version
                        if args.remote is not None:
                            print('Update from "%s" to "%s"' % (orig, version))
                up_map.update(add)

        conanfile = os.path.join(package, 'conanfile.txt')
        if os.path.isfile(conanfile):
            with open(conanfile, 'r') as content_file:
                content = content_file.read()
            for old, val in up_map.items():
                content = content.replace('\n' + old, '\n' + val)
            with open(conanfile, 'w') as content_file:
                content_file.write(content)
Example #39
0
 def as_completed(self):
     for f in futures.as_completed(self._tasks):
         self._tasks.remove(f)
         yield f.result()
from concurrent.futures import ProcessPoolExecutor, wait, ALL_COMPLETED, FIRST_COMPLETED, \
    as_completed
import time, random


def do_sth(i):
    time.sleep(random.random() * 2)
    return i * i


if __name__ == '__main__':
    ppe = ProcessPoolExecutor(3)

    objs = [ppe.submit(do_sth, i) for i in range(1, 5)]
    future_iterator = as_completed(objs)  # 个人理解:as 为当,整体意思是当完成的时候
    for future in future_iterator:
        print(future.result())
"""
9
4
16
1
"""

"""
小结
标准库模块 concurrent.futures 还提供了两个函数:
wait() 和 as_completed()

as_completed(fs, timeout=None):
该函数用于将指定的 Future 实例对象序列转换为迭代器。
Example #41
0
def warmup(data, out_trace, registries, threads):
    dedup = {}
#     dup_cnt = 0
    total_cnt = 0
    trace = {}
    results = []
    process_data = []
    global ring
    ring = HashRing(nodes = registries)
    manifs_cnt = 0

    for request in data:
        unique = True
	manifs = False
        if (request['method']) == 'GET':
	    if 'manifest' in request['uri']:
		manifs = True
            uri = request['uri']
            layer_id = uri.split('/')[-1]
            total_cnt += 1
            try:
                dup_cnt = dedup[layer_id]
#                 dup_cnt += 1
#                 dedup[layer_id] += 1
                unique = False
            except Exception as e:
                dedup[layer_id] = 1
		if manifs:
		    manifs_cnt += 1
                
            if unique:
                registry_tmp = ring.get_node(layer_id) # which registry should store this layer/manifest?
                process_data.append((registry_tmp, request))

    print("total warmup unique requests:", len(process_data))
    print("unique manifest cnt: ", manifs_cnt)
    #split request list into sublists
    #n = len(process_data)

    n = 100
    process_slices = [process_data[i:i + n] for i in xrange(0, len(process_data), n)]
    for s in process_slices:
        with ProcessPoolExecutor(max_workers=threads) as executor:
            futures = [executor.submit(send_warmup_thread, req) for req in s]
            for future in as_completed(futures):
#                 print(future.result())
                try:
                    x = future.result()
                    for k in x['trace']:
                        if x['trace'][k] != 'bad':
                            trace[k] = x['trace'][k]
                        
                    results.append(x['result'])    
                except Exception as e:
                    print('warmup: something generated an exception: %s', e)
        #break     
        stats(results)
        #time.sleep(600)

    with open(out_trace, 'w') as f:
        json.dump(trace, f)
        
    stats(results)
    with open('warmup_push_performance.json', 'w') as f:
        json.dump(results, f)
    
    print "max threads:" + str(threads)
    print 'unique count: ' + str(len(dedup))
    print 'total count: ' + str(total_cnt)
    print "total warmup unique requests: (for get layer/manifest requests)" + str(len(process_data))
Example #42
0

executor = ThreadPoolExecutor(max_workers=2)
#通过submit函数提交执行的函数到线程池中, submit 是立即返回
# task1 = executor.submit(get_html, (3)) # 返回的task1为Future对象
# task2 = executor.submit(get_html, (2))

#要获取已经成功的task的返回
urls = [3, 2, 4]
all_task = [executor.submit(get_html, (url)) for url in urls]
# # Wait for the futures in the given sequence to complete
# wait(all_task, return_when=FIRST_COMPLETED)
# print("main")

# 谁先完成,谁先输出
for future in as_completed(all_task):
    data = future.result()
    print("get {} page".format(data))
"""
输出:
get page 2 success
get 2 page
get page 3 success
get 3 page
get page 4 success
get 4 page
"""
#通过executor的map获取已经完成的task的值
#打印data的顺序和urls的顺序一样
# for data in executor.map(get_html, urls):
#     print("get {} page".format(data))
Example #43
0
    def blocks(self,
               start=None,
               stop=None,
               max_batch_size=None,
               threading=False,
               thread_num=8,
               only_ops=False,
               only_virtual_ops=False):
        """ Yields blocks starting from ``start``.

            :param int start: Starting block
            :param int stop: Stop at this block
            :param int max_batch_size: only for appbase nodes. When not None, batch calls of are used.
                Cannot combine with threading
            :param bool threading: Enables threading. Cannot be combined with batch calls
            :param int thread_num: Defines the number of threads, when `threading` is set.
            :param bool only_ops: Only yielding operations, when set to True (default: False)
            :param bool only_virtual_ops: Only yield virtual operations (default: False)

            .. note:: If you want instant confirmation, you need to instantiate
                      class:`beem.blockchain.Blockchain` with
                      ``mode="head"``, otherwise, the call will wait until
                      confirmed in an irreversible block.

        """
        # Let's find out how often blocks are generated!
        current_block = self.get_current_block()
        current_block_num = current_block.block_num
        if not start:
            start = current_block_num
        head_block_reached = False
        # We are going to loop indefinitely
        while True:

            # Get chain properies to identify the
            if stop:
                head_block = stop
            else:
                current_block_num = self.get_current_block_num()
                head_block = current_block_num
            if threading and FUTURES_MODULE and not head_block_reached:
                pool = ThreadPoolExecutor(max_workers=thread_num + 1)
                # disable autoclean
                auto_clean = current_block.get_cache_auto_clean()
                current_block.set_cache_auto_clean(False)
                latest_block = 0
                for blocknum in range(start, head_block + 1, thread_num):
                    futures = []
                    i = blocknum
                    while i < blocknum + thread_num and i <= head_block:
                        futures.append(
                            pool.submit(Block,
                                        i,
                                        only_ops=only_ops,
                                        only_virtual_ops=only_virtual_ops,
                                        steem_instance=self.steem))
                        i += 1
                    results = [r.result() for r in as_completed(futures)]
                    block_nums = []
                    for b in results:
                        block_nums.append(int(b.identifier))
                        if latest_block < int(b.identifier):
                            latest_block = int(b.identifier)
                    from operator import itemgetter
                    blocks = sorted(results, key=itemgetter('id'))
                    for b in blocks:
                        yield b
                    current_block.clear_cache_from_expired_items()
                if latest_block < head_block:
                    for blocknum in range(latest_block, head_block + 1):
                        block = Block(blocknum,
                                      only_ops=only_ops,
                                      only_virtual_ops=only_virtual_ops,
                                      steem_instance=self.steem)
                        yield block
                current_block.set_cache_auto_clean(auto_clean)
            elif max_batch_size is not None and (
                    head_block -
                    start) >= max_batch_size and not head_block_reached:
                if not self.steem.is_connected():
                    return None
                self.steem.rpc.set_next_node_on_empty_reply(False)
                latest_block = start - 1
                batches = max_batch_size
                for blocknumblock in range(start, head_block + 1, batches):
                    # Get full block
                    if (head_block - blocknumblock) < batches:
                        batches = head_block - blocknumblock + 1
                    for blocknum in range(blocknumblock,
                                          blocknumblock + batches - 1):
                        if only_virtual_ops:
                            if self.steem.rpc.get_use_appbase():
                                # self.steem.rpc.get_ops_in_block({"block_num": blocknum, 'only_virtual': only_virtual_ops}, api="account_history", add_to_queue=True)
                                self.steem.rpc.get_ops_in_block(
                                    blocknum,
                                    only_virtual_ops,
                                    add_to_queue=True)
                            else:
                                self.steem.rpc.get_ops_in_block(
                                    blocknum,
                                    only_virtual_ops,
                                    add_to_queue=True)
                        else:
                            if self.steem.rpc.get_use_appbase():
                                self.steem.rpc.get_block(
                                    {"block_num": blocknum},
                                    api="block",
                                    add_to_queue=True)
                            else:
                                self.steem.rpc.get_block(blocknum,
                                                         add_to_queue=True)
                        latest_block = blocknum
                    if batches >= 1:
                        latest_block += 1
                    if latest_block <= head_block:
                        if only_virtual_ops:
                            if self.steem.rpc.get_use_appbase():
                                # self.steem.rpc.get_ops_in_block({"block_num": blocknum, 'only_virtual': only_virtual_ops}, api="account_history", add_to_queue=False)
                                block_batch = self.steem.rpc.get_ops_in_block(
                                    blocknum,
                                    only_virtual_ops,
                                    add_to_queue=False)
                            else:
                                block_batch = self.steem.rpc.get_ops_in_block(
                                    blocknum,
                                    only_virtual_ops,
                                    add_to_queue=False)
                        else:
                            if self.steem.rpc.get_use_appbase():
                                block_batch = self.steem.rpc.get_block(
                                    {"block_num": latest_block},
                                    api="block",
                                    add_to_queue=False)
                            else:
                                block_batch = self.steem.rpc.get_block(
                                    latest_block, add_to_queue=False)
                        if not bool(block_batch):
                            raise BatchedCallsNotSupported()
                        blocknum = latest_block - len(block_batch) + 1
                        if not isinstance(block_batch, list):
                            block_batch = [block_batch]
                        for block in block_batch:
                            if self.steem.rpc.get_use_appbase():
                                if only_virtual_ops:
                                    block = block["ops"]
                                else:
                                    block = block["block"]
                            block["id"] = blocknum
                            yield Block(block,
                                        only_ops=only_ops,
                                        only_virtual_ops=only_virtual_ops,
                                        steem_instance=self.steem)
                            blocknum += 1
            else:
                # Blocks from start until head block
                for blocknum in range(start, head_block + 1):
                    # Get full block
                    block = self.wait_for_and_get_block(
                        blocknum,
                        only_ops=only_ops,
                        only_virtual_ops=only_virtual_ops)
                    yield block
            # Set new start
            start = head_block + 1
            head_block_reached = True

            if stop and start > stop:
                # raise StopIteration
                return

            # Sleep for one block
            time.sleep(self.block_interval)
Example #44
0
def get_multi_thread(nums):
    with ThreadPoolExecutor() as e:
        futures = [e.submit(fibonacci, num) for num in nums]
        for future in as_completed(futures):
            print(future.result())
def transform_data(from_client,
                   from_project,
                   from_logstore,
                   from_time,
                   to_time=None,
                   to_client=None,
                   to_project=None,
                   to_logstore=None,
                   shard_list=None,
                   config=None,
                   batch_size=None,
                   compress=None,
                   cg_name=None,
                   c_name=None,
                   cg_heartbeat_interval=None,
                   cg_data_fetch_interval=None,
                   cg_in_order=None,
                   cg_worker_pool_size=None):
    """
    transform data from one logstore to another one (could be the same or in different region), the time is log received time on server side.

    """
    if not config:
        logger.info(
            "transform_data: config is not configured, use copy data by default."
        )
        return copy_data(from_client,
                         from_project,
                         from_logstore,
                         from_time,
                         to_time=to_time,
                         to_client=to_client,
                         to_project=to_project,
                         to_logstore=to_logstore,
                         shard_list=shard_list,
                         batch_size=batch_size,
                         compress=compress)

    to_client = to_client or from_client

    # increase the timeout to 2 min at least
    from_client.timeout = max(from_client.timeout, 120)
    to_client.timeout = max(to_client.timeout, 120)
    to_project = to_project or from_project
    to_logstore = to_logstore or from_logstore

    if not cg_name:
        # batch mode
        to_time = to_time or "end"
        cpu_count = multiprocessing.cpu_count() * 2
        shards = from_client.list_shards(from_project,
                                         from_logstore).get_shards_info()
        current_shards = [str(shard['shardID']) for shard in shards]
        target_shards = _parse_shard_list(shard_list, current_shards)

        worker_size = min(cpu_count, len(target_shards))

        result = dict()
        total_count = 0
        total_removed = 0
        with ProcessPoolExecutor(max_workers=worker_size) as pool:
            futures = [
                pool.submit(transform_worker,
                            from_client,
                            from_project,
                            from_logstore,
                            shard,
                            from_time,
                            to_time,
                            config,
                            to_client,
                            to_project,
                            to_logstore,
                            batch_size=batch_size,
                            compress=compress) for shard in target_shards
            ]

            for future in as_completed(futures):
                if future.exception():
                    logger.error(
                        "get error when transforming data: {0}".format(
                            future.exception()))
                else:
                    partition, count, removed, processed, failed = future.result(
                    )
                    total_count += count
                    total_removed += removed
                    if count:
                        result[partition] = {
                            "total_count": count,
                            "transformed": processed,
                            "removed": removed,
                            "failed": failed
                        }

        return LogResponse({}, {"total_count": total_count, "shards": result})

    else:
        # consumer group mode
        c_name = c_name or "transform_data_{0}".format(
            multiprocessing.current_process().pid)
        cg_heartbeat_interval = cg_heartbeat_interval or 20
        cg_data_fetch_interval = cg_data_fetch_interval or 2
        cg_in_order = False if cg_in_order is None else cg_in_order
        cg_worker_pool_size = cg_worker_pool_size or 3

        option = LogHubConfig(
            from_client._endpoint,
            from_client._accessKeyId,
            from_client._accessKey,
            from_project,
            from_logstore,
            cg_name,
            c_name,
            cursor_position=CursorPosition.SPECIAL_TIMER_CURSOR,
            cursor_start_time=from_time,
            cursor_end_time=to_time,
            heartbeat_interval=cg_heartbeat_interval,
            data_fetch_interval=cg_data_fetch_interval,
            in_order=cg_in_order,
            worker_pool_size=cg_worker_pool_size)

        TransformDataConsumer.set_transform_options(config, to_client,
                                                    to_project, to_logstore)

        result = {"total_count": 0, "shards": {}}
        l = RLock()

        def status_updator(shard_id,
                           count=0,
                           removed=0,
                           processed=0,
                           failed=0):
            logger.info(
                "status update is called, shard: {0}, count: {1}, removed: {2}, processed: {3}, failed: {4}"
                .format(shard_id, count, removed, processed, failed))

            with l:
                result["total_count"] += count
                if shard_id in result["shards"]:
                    data = result["shards"][shard_id]
                    result["shards"][shard_id] = {
                        "total_count": data["total_count"] + count,
                        "transformed": data["transformed"] + processed,
                        "removed": data["removed"] + removed,
                        "failed": data["failed"] + failed
                    }
                else:
                    result["shards"][shard_id] = {
                        "total_count": count,
                        "transformed": processed,
                        "removed": removed,
                        "failed": failed
                    }

        worker = ConsumerWorker(TransformDataConsumer,
                                consumer_option=option,
                                args=(status_updator, ))
        worker.start()

        try:
            while worker.is_alive():
                worker.join(timeout=60)
            logger.info(
                "transform_data: worker exit unexpected, try to shutdown it")
            worker.shutdown()
        except KeyboardInterrupt:
            logger.info("transform_data: *** try to exit **** ")
            print("try to stop transforming data.")
            worker.shutdown()
            worker.join(timeout=120)

        return LogResponse({}, result)
 def test_duplicate_futures(self):
     # Issue 20367. Duplicate futures should not raise exceptions or give
     # duplicate responses.
     future1 = self.executor.submit(time.sleep, 2)
     completed = [f for f in futures.as_completed([future1,future1])]
     self.assertEqual(len(completed), 1)
Example #47
0
def process_args(args):
    """Perform the actual processing according to the arguments"""
    # verbosity
    if args.verbose == 1:
        logging.basicConfig(stream=sys.stdout, level=logging.WARNING)
    elif args.verbose >= 2:
        logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
    if args.blacklist:
        args.blacklist = load_blacklist(args.blacklist)
    # processing according to mutually exclusive options
    # read url list from input file
    if args.inputfile and args.feed is False and args.sitemap is False:
        inputdict = load_input_dict(args.inputfile, args.blacklist)
        url_processing_pipeline(args, inputdict)
    # fetch urls from a feed or a sitemap
    elif args.feed or args.sitemap:
        # load input URLs
        if args.inputfile:
            input_urls = load_input_urls(args.inputfile)
        elif args.feed:
            input_urls = [args.feed]
        elif args.sitemap:
            input_urls = [args.sitemap]
        # link discovery and storage
        inputdict = None
        with ThreadPoolExecutor(max_workers=args.parallel) as executor:
            if args.feed:
                future_to_url = {
                    executor.submit(find_feed_urls, url): url
                    for url in input_urls
                }
            elif args.sitemap:
                future_to_url = {
                    executor.submit(sitemap_search,
                                    url,
                                    target_lang=args.target_language): url
                    for url in input_urls
                }
            # process results one-by-one, i.e. in parallel
            for future in as_completed(future_to_url):
                if future.result() is not None:
                    inputdict = convert_inputlist(args.blacklist,
                                                  future.result(),
                                                  args.url_filter, inputdict)
                    url_processing_pipeline(args, inputdict)
                    inputdict = None
    # read files from an input directory
    elif args.inputdir:
        file_processing_pipeline(args)
    # read from input directly
    else:
        # process input URL
        if args.URL:
            inputdict = convert_inputlist(args, [args.URL], None)
            url_processing_pipeline(args, inputdict)  # process single url
        # process input on STDIN
        else:
            # file type and unicode check
            try:
                htmlstring = sys.stdin.read()
            except UnicodeDecodeError:
                sys.exit('ERROR: system, file type or buffer encoding')
            # process
            result = examine(htmlstring, args, url=args.URL)
            write_result(result, args)
Example #48
0
import socket
from concurrent.futures import ThreadPoolExecutor
ss =socket.socket(socket.AF_INET,socket.SOCK_STREAM)
addr = ('127.0.0.1',9327)
ss.connect(addr)

def recv(ss):
    while 1:
        msg=ss.recv(65535)
        print(msg.decode())

def send(ss):
    while 1:
        msg = input('请输入')
        ss.send(msg.encode())

pools = ThreadPoolExecutor(max_workers=20)
from concurrent.futures import as_completed
function_list = [recv,send]
pools_list = list()
for i in function_list:
    pools_list.append(pools.submit(i,ss))
for i in as_completed(pools_list):
    i.result()


Example #49
0
def main(args: argparse.Namespace):
    if args.pool_executor == 'process':
        executor = ProcessPoolExecutor(max_workers=args.max_workers)
    elif args.pool_executor == 'thread':
        executor = ThreadPoolExecutor(max_workers=args.max_workers)
    else:
        raise ValueError(args.pool_executor)

    lines = [line for line in tqdm(args.input)]

    with executor:
        try:
            futs_map = {
                executor.submit(proc_sample, args.url, line_text):
                (line_no, line_text)
                for line_no, line_text in tqdm(enumerate(lines, start=1),
                                               total=len(lines))
                if line_no >= args.begin_line and (
                    args.end_line <= 0 or line_no <= args.end_line)
            }
            prog_bar = tqdm(total=len(lines))
            prog_bar.update(args.begin_line - 1)
            for fut in as_completed(futs_map):
                line_no, line_text = futs_map[fut]
                try:
                    result = fut.result()
                except requests.HTTPError as err:
                    # http status code 5xx
                    if err.response.status_code in range(500, 600):
                        prog_bar.write('发生分词错误:\n'
                                       '    CoreNLP 服务器内部错误: {}\n'
                                       '    导致错误的样本: {}行'.format(err, line_no),
                                       file=sys.stderr)
                        if not args.ignore_5xx:
                            raise
                    # http status code 5xx
                    elif err.response.status_code in range(400, 500):
                        prog_bar.write('发生分词错误:\n'
                                       '    CoreNLP 服务器收到的请求有误: {}\n'
                                       '    导致错误的样本: {}行'.format(err, line_no),
                                       file=sys.stderr)
                        if not args.ignore_4xx:
                            raise
                    else:  # 其它 http 错误码
                        prog_bar.write('发生分词错误,任务异常中止:\n'
                                       '    CoreNLP 服务器返回错误: {}\n'
                                       '    导致错误的样本: {}行\n'
                                       '{}'.format(err, line_no, line_text),
                                       file=sys.stderr)
                        raise
                except KeyboardInterrupt:
                    raise  # re-raise
                except Exception as err:  # 其它异常
                    prog_bar.write('分词的执行出现错误,任务异常中止: {}\n'
                                   '    导致错误的样本: {}行\n'
                                   '{}'.format(err, line_no, line_text),
                                   file=sys.stderr)
                    raise
                else:
                    print(json.dumps(result, ensure_ascii=False),
                          file=args.output)
                    prog_bar.update()

        except KeyboardInterrupt:
            pass
Example #50
0
def iter_documentation_builders(
    datasets: Optional[List[str]] = None,
    *,
    doc_util_paths: Optional[doc_utils.DocUtilPaths] = None,
) -> Iterator[BuilderDocumentation]:
    """Create dataset documentation string for given datasets.

  Args:
    datasets: list of datasets for which to create documentation. If None, then
      all available datasets will be used.
    doc_util_paths: Additional path for visualization, nightly info,...

  Yields:
    builder_documetation: The documentation information for each builder
  """
    print('Retrieving the list of builders...')
    datasets = datasets or _all_tfds_datasets()

    # pytype: disable=attribute-error
    if doc_util_paths.fig_base_path:
        visu_doc_util = doc_utils.VisualizationDocUtil(
            base_path=doc_util_paths.fig_base_path,
            base_url=doc_util_paths.fig_base_url,
        )
    else:
        visu_doc_util = None

    if doc_util_paths.df_base_path:
        df_doc_util = doc_utils.DataframeDocUtil(
            base_path=doc_util_paths.df_base_path,
            base_url=doc_util_paths.df_base_url,
        )
    else:
        df_doc_util = None

    if doc_util_paths.fig_base_path:
        nightly_doc_util = doc_utils.NightlyDocUtil(
            path=doc_util_paths.nightly_path, )
    else:
        nightly_doc_util = None
    # pytype: enable=attribute-error

    document_single_builder_fn = functools.partial(
        _document_single_builder,
        visu_doc_util=visu_doc_util,
        df_doc_util=df_doc_util,
        nightly_doc_util=nightly_doc_util,
    )

    # Document all builders
    print(f'Document {len(datasets)} builders...')
    with futures.ThreadPoolExecutor(
            max_workers=_WORKER_COUNT_DATASETS) as tpool:
        tasks = [
            tpool.submit(document_single_builder_fn, name) for name in datasets
        ]
        for future in tqdm.tqdm(futures.as_completed(tasks), total=len(tasks)):
            builder_doc = future.result()
            if builder_doc is None:  # Builder filtered
                continue
            else:
                tqdm.tqdm.write(
                    f'Documentation generated for {builder_doc.name}...')
                yield builder_doc
    print('All builder documentations generated!')
Example #51
0
def upload(
    api: BaiduPCSApi,
    from_to_list: List[FromTo],
    ondup: str = "overwrite",
    encrypt_key: Any = None,
    salt: Any = None,
    encrypt_type: EncryptType = EncryptType.No,
    max_workers: int = CPU_NUM,
    slice_size: int = DEFAULT_SLICE_SIZE,
    ignore_existing: bool = True,
    show_progress: bool = True,
):
    """Upload from_tos

    Args:
        max_workers (int): The number of concurrent workers
        slice_size (int): The size of slice for uploading slices.
        ignore_existing (bool): Ignoring these localpath which of remotepath exist.
        show_progress (bool): Show uploading progress.
    """

    excepts = {}
    semaphore = Semaphore(max_workers)
    with _progress:
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            futs = {}
            for from_to in from_to_list:
                semaphore.acquire()
                task_id = None
                if show_progress:
                    task_id = _progress.add_task("upload",
                                                 start=False,
                                                 title=from_to.from_)

                fut = executor.submit(
                    sure_release,
                    semaphore,
                    upload_file,
                    api,
                    from_to,
                    ondup,
                    encrypt_key=encrypt_key,
                    salt=salt,
                    encrypt_type=encrypt_type,
                    slice_size=slice_size,
                    ignore_existing=ignore_existing,
                    task_id=task_id,
                )
                futs[fut] = from_to

            for fut in as_completed(futs):
                e = fut.exception()
                if e is not None:
                    from_to = futs[fut]
                    excepts[from_to] = e

    # Summary
    if excepts:
        table = Table(title="Upload Error", box=SIMPLE, show_edge=False)
        table.add_column("From", justify="left", overflow="fold")
        table.add_column("To", justify="left", overflow="fold")
        table.add_column("Error", justify="left")

        for from_to, e in sorted(excepts.items()):
            table.add_row(from_to.from_, Text(str(e), style="red"))

        _progress.console.print(table)
Example #52
0
    def __init__(self, dirName, nEvent=-1, **kwargs):
        super(HEPCNNSplitDataset, self).__init__()
        syslogger = kwargs['syslogger'] if 'syslogger' in kwargs else None
        nWorkers = kwargs['nWorkers'] if 'nWorkers' in kwargs else 8

        if syslogger: syslogger.update(annotation='open file ' + dirName)
        self.dirName = dirName
        self.maxEventsList = [
            0,
        ]
        self.imagesList = []
        self.labelsList = []
        self.weightsList = []
        self.fileIdx = -1

        if syslogger: syslogger.update(annotation='read files')

        nEventsTotal = 0
        for fileName in sorted(listdir(self.dirName)):
            if not fileName.endswith('h5'): continue
            data = h5py.File(self.dirName + '/' + fileName, 'r')
            suffix = "_val" if 'images_val' in data['all_events'] else ""

            images = (
                fileName, 'all_events/images' + suffix
            )  ## Keep the filename and image path only, and load them later with multiproc.
            #images = data['all_events/images'+suffix]
            labels = data['all_events/labels' + suffix]
            weights = data['all_events/weights' + suffix]

            if nEvent > 0:
                #images  = images[:nEvent-nEventsTotal] ## We'll do this step after (re)loading the images
                labels = labels[:nEvent - nEventsTotal]
                weights = weights[:nEvent - nEventsTotal]

            nEventsInFile = len(weights)
            nEventsTotal += nEventsInFile
            self.maxEventsList.append(nEventsTotal)

            labels = torch.Tensor(labels[()])
            weights = torch.Tensor(weights[()])
            ## We will do this step for images later

            self.imagesList.append(images)
            self.labelsList.append(labels)
            self.weightsList.append(weights)

            if nEvent > 0 and nEventsTotal >= nEvent: break

        if syslogger: syslogger.update(annotation='Convert images to Tensor')

        env_kmp = environ['KMP_AFFINITY'] if 'KMP_AFFINITY' in environ else None
        environ['KMP_AFFINITY'] = 'none'
        jobs = []
        with futures.ProcessPoolExecutor(max_workers=nWorkers) as pool:
            for fileIdx in range(len(self.maxEventsList) - 1):
                job = pool.submit(self.imageToTensor, fileIdx)
                jobs.append(job)

            for job in futures.as_completed(jobs):
                fileIdx, images = job.result()
                self.imagesList[fileIdx] = images
        if env_kmp != None: environ['KMP_AFFINITY'] = env_kmp

        for fileIdx in range(len(self.maxEventsList) - 1):
            #images  = torch.Tensor(self.imagesList[fileIdx][()])
            images = self.imagesList[fileIdx]
            self.shape = images.shape

            if self.shape[-1] <= 5:
                ## actual format was NHWC. convert to pytorch native format, NCHW
                images = images.permute(0, 3, 1, 2)
                self.shape = images.shape
                if syslogger:
                    syslogger.update(annotation="Convert image format")

            self.imagesList[fileIdx] = images
            self.channel, self.height, self.width = self.shape[1:]

        if nEvent > 0:
            images = images[:nEvent - nEventsTotal]
Example #53
0
    def _crack_http_auth(self, url, logfile, threads=20, exit_on_success=True):
        """ check for http auth type and crack login """

        futures = deque()
        headers = {'User-Agent': f"'{self.useragent}'"}
        s = requests.session()
        h = s.head(url, verify=False, headers=headers).headers
        auth_header = ''
        if 'WWW-Authenticate' in h:
            auth_header = h['WWW-Authenticate']

        def crack(s, url, h, a, u, p):
            code = s.head(url, verify=False, headers=h,
                          auth=a(f'{u}', f'{p}')).status_code
            if code == 200:
                return f'Login found: {u}:{p}'
            return

        if 'Basic realm' in auth_header:
            auth_type = HTTPBasicAuth
        elif 'Digest realm' in auth_header:
            auth_type = HTTPDigestAuth
        else:
            # todo: proxy auth etc.
            return

        # single username + single password
        if self.opts['user'] and self.opts['pass']:
            us = self.opts['user']
            pw = self.opts['pass']
            r = s.head(url,
                       headers=headers,
                       verify=False,
                       auth=auth_type(f'{us}', f'{pw}'))
            if r.status_code == 200:
                self._log(logfile, f'Login found: {us}:{pw}')
                if exit_on_success:
                    return

        # single username + password list
        if self.opts['user'] and self.opts['plists']:
            us = self.opts['user']
            for pwlist in self.opts['plists']:
                pws = self._read_file(pwlist)
                with cf.ThreadPoolExecutor(threads) as exe:
                    for pw in pws:
                        futures.append(
                            exe.submit(crack, s, url, headers, auth_type, us,
                                       pw))
                    for r in cf.as_completed(futures):
                        if r.result():
                            self._log(logfile, f'{r.result()}')
                            if exit_on_success:
                                return
        futures = []

        # username list + password list
        if self.opts['ulists'] and self.opts['plists']:
            for uslist in self.opts['ulists']:
                for pwlist in self.opts['plists']:
                    usrs = self._read_file(uslist)
                    pws = self._read_file(pwlist)
                    with cf.ThreadPoolExecutor(threads) as exe:
                        for us in usrs:
                            for pw in pws:
                                futures.append(
                                    exe.submit(crack, s, url, headers,
                                               auth_type, us, pw))
                            for r in cf.as_completed(futures):
                                if r.result():
                                    self._log(logfile, f'{r.result()}')
                                    if exit_on_success:
                                        return

        return
Example #54
0
    k = 0
    for i in range(int(math.floor(a/2))):
        k += 2
        ll.append(k)
    if a%2 != 0:
        ll.append(k+1) 
    for k in range(len(chh)):
       print ('Block%d : start!' % k)
       file_dir_Block = r'%s/Block%d' %  (file_dir,k)
       if not os.path.exists(file_dir_Block):
           os.makedirs(file_dir_Block)    
       data1 = data[data.chrom.isin(chh[k])] 
       value = chh[k]        
       with ProcessPoolExecutor(max_workers=len(chh[k])) as pool:
            futures = [pool.submit(test,data1,value,neighbor_region,file_dir_Block,colname,i) for i in ll]                               
            for j in as_completed(futures):
                print(j.result())    

    elapsed = (time.clock() - start)
    print("Time used: %d s" % round(elapsed,4))
    
    del data
    del data1

    ####################################    
    #
    print("union neighbor methFeature: Start!" )

    meragefiledir = r'%s' % file_dir
    filenames=os.listdir(meragefiledir)
    
Example #55
0
def train(walker, lr_file, ckpt_dir, checkpoint, options):
    vocab_size = walker.walk_nodes_size
    num_steps_per_epoch = int(
        vocab_size * options.train_workers /
        options.batch_size)  # a rough formula of epoch in RWR.???????????
    iter_epochs = options.iter_epoches
    iter_steps = round(
        iter_epochs *
        num_steps_per_epoch)  # iter_epoches should be big enough to converge.
    decay_epochs = options.decay_epochs
    decay_steps = round(decay_epochs * num_steps_per_epoch)
    ckpt_steps = round(options.ckpt_epochs * num_steps_per_epoch)
    initial_learning_rate = options.learning_rate
    decay_rate = options.decay_rate

    LR = utils.LearningRateGenerator(
        initial_learning_rate=initial_learning_rate,
        initial_steps=0,
        decay_rate=decay_rate,
        decay_steps=decay_steps,
        iter_steps=iter_steps)

    with tf.Graph().as_default(), tf.device(
            '/gpu:0' if options.using_gpu else '/cpu:0'):

        global_step = tf.Variable(0, trainable=False, name="global_step")
        batch_single_size = options.walk_times * options.walk_length  # !
        # inputs(center_nodes), labels(context_nodes), neg_labels(neg_nodes)
        inputs = tf.placeholder(tf.int32,
                                shape=[options.batch_size],
                                name='inputs')  # center_nodes
        labels = tf.placeholder(tf.int32,
                                shape=[options.batch_size, batch_single_size],
                                name='labels')  # context_nodes
        neg_labels = tf.placeholder(
            tf.int32,
            shape=[options.batch_size, options.negative],
            name='neg_labels')  # neg_nodes
        learning_rate = tf.placeholder(tf.float32, name='learning_rate')

        model = SGNS(vocab_size=vocab_size,
                     embedding_size=options.embedding_size,
                     batch_size=options.batch_size,
                     batch_single_size=batch_single_size,
                     local_weight=options.local_weight,
                     global_weight=options.global_weight)

        train_op, loss = model.train(inputs, labels, neg_labels, global_step,
                                     learning_rate)

        # Create a saver.
        saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=6)

        summary_op = tf.summary.merge_all()

        # Build an initialization operation to run below.
        init_op = tf.global_variables_initializer()

        # Start running operations on the Graph. allow_soft_placement must be set to
        # True to build towers on GPU, as some of the ops do not have GPU implementations.
        config = tf.ConfigProto(
            allow_soft_placement=options.allow_soft_placement,
            log_device_placement=options.log_device_placement)
        config.gpu_options.per_process_gpu_memory_fraction = options.gpu_memory_fraction
        config.gpu_options.allow_growth = options.allow_growth
        # config.gpu_options.visible_device_list = visible_device_list

        with tf.Session(config=config) as sess:
            # first_step = 0
            if checkpoint == '0':  # new train
                sess.run(init_op)

            elif checkpoint == '-1':  # choose the latest one
                ckpt = tf.train.get_checkpoint_state(ckpt_dir)
                if ckpt and ckpt.model_checkpoint_path:
                    # new_saver = tf.train.import_meta_graph(ckpt.model_checkpoint_path + '.meta')
                    # Restores from checkpoint
                    saver.restore(sess, ckpt.model_checkpoint_path)
                    # global_step_for_restore = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
                    # first_step = int(global_step_for_restore) + 1
                else:
                    logger.warning('No checkpoint file found')
                    return
            else:
                if os.path.exists(
                        os.path.join(ckpt_dir,
                                     'model.ckpt-' + checkpoint + '.index')):
                    # new_saver = tf.train.import_meta_graph(
                    #     os.path.join(ckpt_dir, 'model.ckpt-' + checkpoint + '.meta'))
                    saver.restore(
                        sess, os.path.join(ckpt_dir,
                                           'model.ckpt-' + checkpoint))
                    # first_step = int(checkpoint) + 1
                else:
                    logger.warning(
                        'checkpoint {} not found'.format(checkpoint))
                    return

            summary_writer = tf.summary.FileWriter(ckpt_dir, sess.graph)

            ## train
            executor_workers = options.train_workers - 1
            if executor_workers > 0:
                futures = set()
                executor = ThreadPoolExecutor(max_workers=executor_workers)
                for _ in range(executor_workers):
                    future = executor.submit(
                        _train_thread_body,
                        RWRGenerator(walker, options.batch_size,
                                     options.walk_times, options.walk_workers),
                        inputs, labels, neg_labels, sess, train_op,
                        global_step, learning_rate, LR)
                    logger.info("open a new training thread: %s" % future)
                    futures.add(future)
            last_loss_time = time.time() - options.loss_interval
            last_summary_time = time.time() - options.summary_interval
            last_decay_time = last_checkpoint_time = time.time()
            last_decay_step = last_summary_step = last_checkpoint_step = 0
            rwrgenerator = RWRGenerator(walker, options.batch_size,
                                        options.walk_times,
                                        options.walk_workers)
            while True:
                start_time = time.time()
                batch_inputs, batch_labels, batch_neg_labels = rwrgenerator.next_batch(
                )
                feed_dict = {
                    inputs: batch_inputs,
                    labels: batch_labels,
                    neg_labels: batch_neg_labels,
                    learning_rate: LR.learning_rate
                }
                _, loss_value, cur_step = sess.run(
                    [train_op, loss, global_step], feed_dict=feed_dict)
                now = time.time()

                assert not np.isnan(
                    loss_value), 'Model diverged with loss = NaN'

                epoch, epoch_step = divmod(cur_step, num_steps_per_epoch)

                if now - last_loss_time >= options.loss_interval:
                    format_str = '%s: step=%d(%d/%d), lr=%.6f, loss=%.6f, duration/step=%.4fs'
                    logger.info(format_str %
                                (time.strftime('%Y-%m-%d %H:%M:%S',
                                               time.localtime(time.time())),
                                 cur_step, epoch_step, epoch, LR.learning_rate,
                                 loss_value, now - start_time))
                    last_loss_time = time.time()
                if now - last_summary_time >= options.summary_interval or cur_step - last_summary_step >= options.summary_steps or cur_step >= iter_steps:
                    summary_str = sess.run(summary_op, feed_dict=feed_dict)
                    summary_writer.add_summary(summary_str, cur_step)
                    last_summary_time = time.time()
                    last_summary_step = cur_step
                ckpted = False
                # Save the model checkpoint periodically. (named 'model.ckpt-global_step.meta')
                if now - last_checkpoint_time >= options.ckpt_interval or cur_step - last_checkpoint_step >= ckpt_steps or cur_step >= iter_steps:
                    vecs, global_step_value = sess.run(
                        [model.vectors, global_step], feed_dict=feed_dict)
                    # vecs,weights,biases = sess.run([model.vectors,model.context_weights,model.context_biases],
                    #                              feed_dict=feed_dict)
                    checkpoint_path = os.path.join(ckpt_dir, 'model.ckpt')
                    utils.save_word2vec_format_and_ckpt(
                        options.vectors_path, vecs, checkpoint_path, sess,
                        saver, global_step_value)
                    # save_word2vec_format(vectors_path+".contexts", weights, walker.idx_nodes)
                    # save_word2vec_format(vectors_path+".context_biases", np.reshape(biases,[-1,1]), walker.idx_nodes)
                    last_checkpoint_time = time.time()
                    last_checkpoint_step = global_step_value
                    ckpted = True
                # update learning rate
                if ckpted or now - last_decay_time >= options.decay_interval or (
                        decay_steps > 0
                        and cur_step - last_decay_step >= decay_steps):
                    lr_info = np.loadtxt(lr_file, dtype=float)
                    if np.abs(lr_info[1] - decay_epochs) > 1e-6:
                        decay_epochs = lr_info[1]
                        decay_steps = round(decay_epochs * num_steps_per_epoch)
                    if np.abs(lr_info[2] - decay_rate) > 1e-6:
                        decay_rate = lr_info[2]
                    if np.abs(lr_info[3] - iter_epochs) > 1e-6:
                        iter_epochs = lr_info[3]
                        iter_steps = round(iter_epochs * num_steps_per_epoch)
                    if np.abs(lr_info[0] - initial_learning_rate) > 1e-6:
                        initial_learning_rate = lr_info[0]
                        LR.reset(initial_learning_rate=initial_learning_rate,
                                 initial_steps=cur_step,
                                 decay_rate=decay_rate,
                                 decay_steps=decay_steps,
                                 iter_steps=iter_steps)
                    else:
                        LR.exponential_decay(cur_step,
                                             decay_rate=decay_rate,
                                             decay_steps=decay_steps,
                                             iter_steps=iter_steps)
                    last_decay_time = time.time()
                    last_decay_step = cur_step
                if cur_step >= LR.iter_steps:
                    break

            summary_writer.close()
            if executor_workers > 0:
                logger.info("waiting the training threads finished:")
                try:
                    for future in as_completed(futures):
                        logger.info(future)
                except KeyboardInterrupt:
                    print("stopped by hand.")
Example #56
0
    print(f'========= stderr of {binary}:')
    print(result[2])


if __name__ == "__main__":
    clean_binary_tests()
    build_tests()
    binaries = test_binaries(exclude=[r'test_regression-.*', r'near_rpc_error_macro-.*'])
    print(f'========= collected {len(binaries)} test binaries:')
    print('\n'.join(binaries))

    completed = 0
    fails = []
    with ThreadPoolExecutor(max_workers=workers()) as executor:
        future_to_binary = {executor.submit(run_test, binary): binary for binary in binaries}
        for future in as_completed(future_to_binary):
            completed += 1
            binary_full_name = future_to_binary[future]
            binary = os.path.basename(binary_full_name)
            result = future.result()
            if result[0] != 0:
                fails.append((binary_full_name, result))
            else:
                show_test_result(binary, result)

    print(f"========= finished run {completed} test binaries")
    if fails:
        if len(fails) <= RERUN_THRESHOLD:
            # if not fail a lot, retry run test sequentially to avoid potential timeout
            new_fails = []
            for f in fails:
Example #57
0
def fetch_graph_and_labels(parameters, graph_config):
    decision_task_id = find_decision_task(parameters, graph_config)

    # First grab the graph and labels generated during the initial decision task
    full_task_graph = get_artifact(decision_task_id,
                                   "public/full-task-graph.json")
    logger.info("Load taskgraph from JSON.")
    _, full_task_graph = TaskGraph.from_json(full_task_graph)
    label_to_taskid = get_artifact(decision_task_id,
                                   "public/label-to-taskid.json")

    logger.info("Fetching additional tasks from action and cron tasks.")
    # fetch everything in parallel; this avoids serializing any delay in downloading
    # each artifact (such as waiting for the artifact to be mirrored locally)
    with futures.ThreadPoolExecutor(CONCURRENCY) as e:
        fetches = []

        # fetch any modifications made by action tasks and swap out new tasks
        # for old ones
        def fetch_action(task_id):
            logger.info(
                "fetching label-to-taskid.json for action task {}".format(
                    task_id))
            try:
                run_label_to_id = get_artifact(task_id,
                                               "public/label-to-taskid.json")
                label_to_taskid.update(run_label_to_id)
            except HTTPError as e:
                if e.response.status_code != 404:
                    raise
                logger.debug("No label-to-taskid.json found for {}: {}".format(
                    task_id, e))

        head_rev_param = "{}head_rev".format(
            graph_config["project-repo-param-prefix"])

        namespace = "{}.v2.{}.revision.{}.taskgraph.actions".format(
            graph_config["trust-domain"],
            parameters["project"],
            parameters[head_rev_param],
        )
        for task_id in list_tasks(namespace):
            fetches.append(e.submit(fetch_action, task_id))

        # Similarly for cron tasks..
        def fetch_cron(task_id):
            logger.info(
                "fetching label-to-taskid.json for cron task {}".format(
                    task_id))
            try:
                run_label_to_id = get_artifact(task_id,
                                               "public/label-to-taskid.json")
                label_to_taskid.update(run_label_to_id)
            except HTTPError as e:
                if e.response.status_code != 404:
                    raise
                logger.debug("No label-to-taskid.json found for {}: {}".format(
                    task_id, e))

        namespace = "{}.v2.{}.revision.{}.cron".format(
            graph_config["trust-domain"],
            parameters["project"],
            parameters[head_rev_param],
        )
        for task_id in list_tasks(namespace):
            fetches.append(e.submit(fetch_cron, task_id))

        # now wait for each fetch to complete, raising an exception if there
        # were any issues
        for f in futures.as_completed(fetches):
            f.result()

    return (decision_task_id, full_task_graph, label_to_taskid)
Example #58
0
     schedule_type='exponential')
 sys.stderr.write("lr={}\n".format(lr))
 with ThreadPoolExecutor(max_workers=num_jobs) as executor:
     job_pool = []
     sys.stderr.write("Num jobs = {}\n".format(num_jobs))
     sys.stderr.flush()
     for job_id in range(1, num_jobs + 1):
         frame_shift = num_archives_processed % args.frame_subsampling_factor
         p = executor.submit(run_job, num_jobs, job_id, dirname,
                             iter_no, model_file, lr, frame_shift,
                             egs_dir, num_archives,
                             num_archives_processed, "16,8",
                             cuda_cmd)
         num_archives_processed += 1
         job_pool.append(p)
     for p in as_completed(job_pool):
         if p.result() != 0:
             quit(p.result())
 model_list = [
     os.path.join(dirname, "{}.{}.pt".format(iter_no, job_id))
     for job_id in range(1, num_jobs + 1)
 ]
 process_out = subprocess.run([
     *cuda_cmd.split(),
     "{}/log/merge.{}.log".format(dirname, iter_no + 1), model_file,
     "--dir", dirname, "--mode", "merge", "--new-model",
     os.path.join(dirname,
                  "{}.pt".format(iter_no + 1)), ",".join(model_list)
 ])
 if process_out.returncode != 0:
     quit(process_out.returncode)
Example #59
0
        urllib.request.urlopen(request, timeout=args.timeout, context=context)

        start_time = time.time()
        success_login = False

        if len(password) > 1:
            log.debug("total data in wordlist: " + str(len(password)) +
                      " words")
        log.info("starting a login brute force")

        with ThreadPoolExecutor(max_workers=args.thread) as executor:
            processed = (executor.submit(login, args.url, args.usr, pwd,
                                         args.timeout, args.proxy)
                         for pwd in password)

            for i, process in enumerate(as_completed(processed)):
                if len(password) > 1:
                    print("[{}][INFO] testing {} password".format(
                        datetime.now().strftime("%H:%M:%S"), i),
                          end="\r")

                process = process.result()
                if process is not False:
                    success_login = True
                    password = process
                    break

            if success_login is True:
                log.success(
                    "successfully entered into the target dashboard with username \""
                    + args.usr + "\" and password \"" + password + "\"")
Example #60
0
    def run_python_tests(self,
                         tests=None,
                         test_objects=None,
                         subsuite=None,
                         verbose=False,
                         jobs=None,
                         exitfirst=False,
                         extra=None,
                         **kwargs):

        self.activate_virtualenv()
        if test_objects is None:
            from moztest.resolve import TestResolver
            resolver = self._spawn(TestResolver)
            # If we were given test paths, try to find tests matching them.
            test_objects = resolver.resolve_tests(paths=tests, flavor='python')
        else:
            # We've received test_objects from |mach test|. We need to ignore
            # the subsuite because python-tests don't use this key like other
            # harnesses do and |mach test| doesn't realize this.
            subsuite = None

        mp = TestManifest()
        mp.tests.extend(test_objects)

        filters = []
        if subsuite == 'default':
            filters.append(mpf.subsuite(None))
        elif subsuite:
            filters.append(mpf.subsuite(subsuite))

        tests = mp.active_tests(filters=filters,
                                disabled=False,
                                python=self.virtualenv_manager.version_info[0],
                                **mozinfo.info)

        if not tests:
            submsg = "for subsuite '{}' ".format(subsuite) if subsuite else ""
            message = "TEST-UNEXPECTED-FAIL | No tests collected " + \
                      "{}(Not in PYTHON_UNITTEST_MANIFESTS?)".format(submsg)
            self.log(logging.WARN, 'python-test', {}, message)
            return 1

        parallel = []
        sequential = []
        os.environ.setdefault('PYTEST_ADDOPTS', '')

        if extra:
            os.environ['PYTEST_ADDOPTS'] += " " + " ".join(extra)

        if exitfirst:
            sequential = tests
            os.environ['PYTEST_ADDOPTS'] += " -x"
        else:
            for test in tests:
                if test.get('sequential'):
                    sequential.append(test)
                else:
                    parallel.append(test)

        self.jobs = jobs or cpu_count()
        self.terminate = False
        self.verbose = verbose

        return_code = 0

        def on_test_finished(result):
            output, ret, test_path = result

            for line in output:
                self.log(logging.INFO, 'python-test', {'line': line.rstrip()},
                         '{line}')

            if ret and not return_code:
                self.log(logging.ERROR, 'python-test', {
                    'test_path': test_path,
                    'ret': ret
                }, 'Setting retcode to {ret} from {test_path}')
            return return_code or ret

        with ThreadPoolExecutor(max_workers=self.jobs) as executor:
            futures = [
                executor.submit(self._run_python_test, test)
                for test in parallel
            ]

            try:
                for future in as_completed(futures):
                    return_code = on_test_finished(future.result())
            except KeyboardInterrupt:
                # Hack to force stop currently running threads.
                # https://gist.github.com/clchiou/f2608cbe54403edb0b13
                executor._threads.clear()
                thread._threads_queues.clear()
                raise

        for test in sequential:
            return_code = on_test_finished(self._run_python_test(test))
            if return_code and exitfirst:
                break

        self.log(logging.INFO, 'python-test', {'return_code': return_code},
                 'Return code from mach python-test: {return_code}')
        return return_code