コード例 #1
0
    def put_outbound_callable(self,
                              jobid,
                              serialized,
                              meta={},
                              call_kwargs={},
                              early_ack=False,
                              job_unique_id=None):
        self.log.info("Dispatching new callable job")
        call_kwargs_out = {'code_struct': serialized}
        for key, value in call_kwargs.items():
            call_kwargs_out[key] = value

        raw_job = buildjob(
            module='RemoteExec',
            call='callCode',
            dispatchKey="rwp-rpc-system",
            jobid=jobid,
            kwargs=call_kwargs_out,
            additionalData=meta,
            postDelay=0,
            early_ack=early_ack,
            serialize=self.pluginName,
            unique_id=job_unique_id,
        )

        self.put_outbound_raw(raw_job)
コード例 #2
0
def exposed_head(url, ref):
    '''
	Do a phantomjs HEAD request for url `url`, passing the referrer `ref`
	'''

    rpc_interface = common.get_rpyc.RemoteJobInterface("Test_Interface!")
    print('wat?')
    print(rpc_interface)

    raw_job = buildjob(module='SmartWebRequest',
                       call='getHeadTitleChromium',
                       dispatchKey="fetcher",
                       jobid=-1,
                       args=[url, ref],
                       kwargs={},
                       additionalData={'mode': 'fetch'},
                       postDelay=0,
                       unique_id=url)

    rpc_interface.put_job(raw_job)
    while True:
        try:
            resp = rpc_interface.get_job()
            print_response(resp)
            if not resp:
                time.sleep(1)

        except queue.Empty:
            print("No response yet?")
コード例 #3
0
    def __blocking_dispatch_call_local(self,
                                       remote_cls,
                                       call_kwargs,
                                       meta=None,
                                       expect_partials=False):
        self.log.info("Dispatching new callable job to local executor")

        print("Kwargs:", call_kwargs)
        scls = rpc_serialize.serialize_class(remote_cls)
        call_kwargs_out = {'code_struct': scls}
        for key, value in call_kwargs.items():
            call_kwargs_out[key] = value
        # job = {
        # 		'call'                 : 'callCode',
        # 		'module'               : 'RemoteExec',
        # 		'args'                 : (),
        # 		'kwargs'               : call_kwargs_out,
        # 		'extradat'             : meta,
        # 		'dispatch_key'         : "rpc-system",
        # 		'response_routing_key' : 'response'
        # 	}

        print(local_exec)
        print(dir(local_exec))

        jid = self.job_counter
        self.job_counter += 1

        raw_job = buildjob(
            module='RemoteExec',
            call='callCode',
            dispatchKey="rwp-rpc-system",
            jobid=jid,
            kwargs=call_kwargs_out,
            additionalData=meta,
            postDelay=0,
            early_ack=False,
            serialize=self.pluginName,
            unique_id=None,
        )

        rpc_interface = common.get_rpyc.RemoteFetchInterface()
        rpc_interface.check_ok()
        ret = rpc_interface.dispatch_request(raw_job)
        rpc_interface.close()

        ret['jobid'] = jid

        ret = self.process_response_items([jid],
                                          expect_partials,
                                          preload_rets=[ret])
        if not expect_partials:
            ret = next(ret)
        return ret
コード例 #4
0
    def put_outbound_fetch_job(self, jobid, joburl):
        self.log.info("Dispatching new fetch job")
        raw_job = buildjob(module='WebRequest',
                           call='getItem',
                           dispatchKey="rwp-rpc-system",
                           jobid=jobid,
                           args=[joburl],
                           kwargs={},
                           additionalData={'mode': 'fetch'},
                           postDelay=0)

        self.put_outbound_raw(raw_job)
コード例 #5
0
def exposed_test_chromium_fetch():
    '''
	Run a test-fetch with the chromium remote
	rendering system

	'''
    print("Chromium Test")

    rpc_interface = common.get_rpyc.RemoteJobInterface("TestInterface")
    rpc_interface.check_ok()
    print("RPC:", rpc_interface)

    print("Dispatching job engine")

    raw_job_1 = buildjob(module='NUWebRequest',
                         call='getHeadTitlePhantomJS',
                         dispatchKey="lolwattttt",
                         jobid="lolwat",
                         args=['http://www.google.com', 'http://www.goat.com'],
                         kwargs={},
                         additionalData={'herp': 'derp'},
                         postDelay=0)
    raw_job_2 = buildjob(module='WebRequest',
                         call='getHeadTitleChromium',
                         dispatchKey="lolwattttt",
                         jobid="lolwat",
                         args=[],
                         kwargs={
                             'url': 'http://www.google.com',
                             'referrer': 'http://www.goat.com'
                         },
                         additionalData={'herp': 'derp'},
                         postDelay=0)

    raw_job_3 = buildjob(module='WebRequest',
                         call='getItemChromium',
                         dispatchKey="lolwattttt",
                         jobid="lolwat",
                         args=[],
                         kwargs={'itemUrl': 'http://www.google.com'},
                         additionalData={'herp': 'derp'},
                         postDelay=0)

    raw_job_4 = buildjob(
        module='WebRequest',
        call='getItem',
        dispatchKey="lolwattttt",
        jobid="lolwat",
        args=[],
        kwargs={
            'itemUrl':
            'http://imgsv.imaging.nikon.com/lineup/dslr/d600/img/sample01/img_01_l.jpg'
        },
        additionalData={'herp': 'derp'},
        postDelay=0)

    # rpc_interface.put_job(raw_job_1)
    # rpc_interface.put_job(raw_job_2)
    rpc_interface.put_job(raw_job_3)
    rpc_interface.put_job(raw_job_4)

    for _ in range(60 * 15):

        try:
            tmp = rpc_interface.get_job()
            if tmp:
                print("response!")
                dump_response(tmp)
            else:
                print("No tmp:", tmp)
                time.sleep(1)
        except queue.Empty:
            time.sleep(1)
コード例 #6
0
ファイル: NuHeader.py プロジェクト: woebbi/ReadableWebProxy
	def put_job(self, put=3):
		with db.session_context() as db_sess:
			self.log.info("Loading rows to fetch..")
			recent_d = datetime.datetime.now() - datetime.timedelta(hours=72)
			recentq = db_sess.query(db.NuReleaseItem)                \
				.outerjoin(db.NuResolvedOutbound)                         \
				.filter(db.NuReleaseItem.validated == False)              \
				.filter(db.NuReleaseItem.first_seen >= recent_d)          \
				.options(joinedload('resolved'))                          \
				.order_by(desc(db.NuReleaseItem.first_seen))              \
				.group_by(db.NuReleaseItem.id)                            \
				.limit(max(100, put*10))


			bulkq = db_sess.query(db.NuReleaseItem)                  \
				.outerjoin(db.NuResolvedOutbound)                         \
				.filter(db.NuReleaseItem.validated == False)              \
				.options(joinedload('resolved'))                          \
				.order_by(desc(db.NuReleaseItem.first_seen))              \
				.group_by(db.NuReleaseItem.id)                            \
				.limit(max(100, put*6))

			bulkset   = bulkq.all()
			recentset = recentq.all()

			self.log.info("Have %s recent items, %s long-term items to fetch", len(recentset), len(bulkset))
			haveset   = bulkset + recentset
			filtered = {tmp.id : tmp for tmp in haveset}
			haveset = list(filtered.values())
			self.log.info("Total items after filtering for uniqueness %s", len(haveset))

			if not haveset:
				self.log.info("No jobs to remote HEAD.")
				return

			# We pick a large number of items, and randomly choose one of them.
			# This lets us weight the fetch preferentially to the recent items, but still
			# have some variability.
			# We prefer to fetch items that'll resolve as fast as possible.
			preferred_2 = [tmp for tmp in haveset if len(tmp.resolved) == 2]
			preferred_1 = [tmp for tmp in haveset if len(tmp.resolved) == 1]
			fallback    = [tmp for tmp in haveset if len(tmp.resolved) == 0]


			haveset = random.sample(preferred_2, min(put, len(preferred_2)))
			if len(haveset) < put:
				haveset.extend(random.sample(preferred_1, min(put-len(haveset), len(preferred_1))))
			if len(haveset) < put:
				haveset.extend(random.sample(fallback, min(put-len(haveset), len(fallback))))

			put = 0
			active = set()

			for have in haveset:
				if len(list(have.resolved)) >= 3:
					raise RuntimeError("Overresolved item that's not valid.")

				if (have.referrer == "http://www.novelupdates.com" or
					have.referrer == "https://www.novelupdates.com" or
					have.referrer == "https://www.novelupdates.com/" or
					have.referrer == "http://www.novelupdates.com/"):
					self.log.error("Wat?")
					self.log.error("Bad Referrer URL got into the input queue!")
					self.log.error("Id: %s, ref: %s", have.id, have.referrer)
					for bad_resolve in have.resolved:
						db_sess.delete(bad_resolve)
					db_sess.delete(have)
					db_sess.commit()
					continue

				if have.fetch_attempts > MAX_TOTAL_FETCH_ATTEMPTS:
					self.log.error("Wat?")
					self.log.error("Item fetched too many times!")
					self.log.error("Id: %s", have.id)
					self.log.error("Attempted more then %s resolves. Disabling.", MAX_TOTAL_FETCH_ATTEMPTS)
					have.reviewed = 'rejected'
					have.validated = True
					db_sess.commit()
					continue

				if have.outbound_wrapper in active:
					continue
				active.add(have.outbound_wrapper)

				have.fetch_attempts += 1
				db_sess.commit()

				self.log.info("Putting job for url '%s', with %s resolves so far", have.outbound_wrapper, len(have.resolved))
				self.log.info("Referring page '%s'", have.referrer)


				raw_job = buildjob(
					module         = 'WebRequest',
					call           = 'getHeadTitleChromium',
					dispatchKey    = "fetcher",
					jobid          = -1,
					args           = [],
					kwargs         = {
						"url"           : have.outbound_wrapper,
						"referrer"      : have.referrer,
						"title_timeout" : 30,
					},
					additionalData = {
						'mode'        : 'fetch',
						'wrapper_url' : have.outbound_wrapper,
						'referrer'    : have.referrer
						},
					postDelay      = 0,
					unique_id      = have.outbound_wrapper,
					serialize      = 'Nu-Header',
				)

				self.rpc.put_job(raw_job)
				put += 1

		return put