def test_retrieve(url, debug=True, rss_debug=False): # try: # WebMirror.SpecialCase.startAmqpFetcher() # except RuntimeError: # Fetcher already started # pass if rss_debug: print("Debugging RSS") flags.RSS_DEBUG = True parsed = urllib.parse.urlparse(url) root = urllib.parse.urlunparse((parsed[0], parsed[1], "", "", "", "")) new = db.WebPages( url = url, starturl = root, netloc = parsed.netloc, distance = 50000, is_text = True, priority = 500000, type = 'unknown', fetchtime = datetime.datetime.now(), ) if debug: print(new) try: archiver = SiteArchiver(None, db.get_db_session(), None) job = archiver.synchronousJobRequest(url, ignore_cache=True) except Exception as e: traceback.print_exc() finally: db.delete_db_session()
def doCall(self): self.log.info("Calling job %s", self.job_name) session = db.get_db_session() item = session.query(db.PluginStatus).filter( db.PluginStatus.plugin_name == self.job_name).one() if item.is_running: session.commit() self.log.error( "Plugin %s is already running! Not doing re-entrant call!", self.job_name) return item.is_running = True item.last_run = datetime.datetime.now() session.commit() try: self._doCall() except Exception: item.last_error = datetime.datetime.now() item.last_error_msg = traceback.format_exc() raise finally: item2 = session.query(db.PluginStatus).filter( db.PluginStatus.plugin_name == self.job_name).one() item2.is_running = False item2.last_run_end = datetime.datetime.now() session.commit() db.delete_db_session() self.log.info("Job %s complete.", self.job_name)
def resetRunStates(): print("JobSetup call resetting run-states!") session = db.get_db_session() session.query(db.PluginStatus).update({db.PluginStatus.is_running: False}) session.commit() db.delete_db_session() print("Run-states reset.")
def __init__(self, job_name): if not job_name in CALLABLE_LUT: raise JobNameException( "Callable '%s' is not in the class lookup table: '%s'!" % (job_name, CALLABLE_LUT)) self.runModule = CALLABLE_LUT[job_name] self.job_name = job_name session = db.get_db_session() try: query = session.query(db.PluginStatus).filter( db.PluginStatus.plugin_name == job_name) have = query.scalar() if not have: new = db.PluginStatus(plugin_name=job_name) session.add(new) session.commit() except sqlalchemy.exc.OperationalError: session.rollback() except sqlalchemy.exc.InvalidRequestError: session.rollback() finally: db.delete_db_session()
def initializeStartUrls(rules): print("Initializing all start URLs in the database") sess = db.get_db_session() for ruleset in [rset for rset in rules if rset['starturls']]: for starturl in ruleset['starturls']: have = sess.query(db.WebPages) \ .filter(db.WebPages.url == starturl) \ .count() if not have: netloc = urlFuncs.getNetLoc(starturl) new = db.WebPages( url=starturl, starturl=starturl, netloc=netloc, type=ruleset['type'], priority=db.DB_IDLE_PRIORITY, distance=db.DB_DEFAULT_DIST, normal_fetch_mode=ruleset['normal_fetch_mode'], ) print("Missing start-url for address: '{}'".format(starturl)) sess.add(new) try: sess.commit() except sqlalchemy.SQLAlchemyError: print("Failure inserting start url for address: '{}'".format( starturl)) sess.rollback() sess.close() db.delete_db_session()
def resetInProgress(): print("Resetting any stalled downloads from the previous session.") sess = db.get_db_session() sess.query(db.WebPages) \ .filter( (db.WebPages.state == "fetching") | (db.WebPages.state == "processing") | (db.WebPages.state == "specialty_deferred") | (db.WebPages.state == "specialty_ready") ) \ .update({db.WebPages.state : "new"}) sess.commit() sess.close() db.delete_db_session()
def get_times(self): conn = database.get_db_session() aps = conn.execute("SELECT job_state FROM apscheduler_jobs;") update_times = [] for blob, in aps: job_dict = pickle.loads(blob) update_times.append( (job_dict['id'], job_dict['next_run_time'].isoformat())) data = { "update-times": update_times, } database.delete_db_session() return pack_message("system-update-times", data)
def do_task(self): db_handle = db.get_db_session() hadjob = False try: self.archiver = WebMirror.Engine.SiteArchiver( self.cookie_lock, new_job_queue=self.new_job_queue, response_queue=self.resp_queue, db_interface=db_handle) hadjob = self.archiver.taskProcess() finally: # Clear out the sqlalchemy state db_handle.expunge_all() db.delete_db_session() return hadjob
def __del__(self): db.delete_db_session()
def __del__(self): db.delete_db_session(postfix='nu_forwarder')