Example #1
0
def test_retrieve(url, debug=True, rss_debug=False):

	# try:
	# 	WebMirror.SpecialCase.startAmqpFetcher()
	# except RuntimeError:  # Fetcher already started
	# 	pass

	if rss_debug:
		print("Debugging RSS")
		flags.RSS_DEBUG = True

	parsed = urllib.parse.urlparse(url)
	root = urllib.parse.urlunparse((parsed[0], parsed[1], "", "", "", ""))

	new = db.WebPages(
		url       = url,
		starturl  = root,
		netloc    = parsed.netloc,
		distance  = 50000,
		is_text   = True,
		priority  = 500000,
		type      = 'unknown',
		fetchtime = datetime.datetime.now(),
		)

	if debug:
		print(new)

	try:
		archiver = SiteArchiver(None, db.get_db_session(), None)
		job     = archiver.synchronousJobRequest(url, ignore_cache=True)
	except Exception as e:
		traceback.print_exc()
	finally:
		db.delete_db_session()
Example #2
0
    def doCall(self):

        self.log.info("Calling job %s", self.job_name)
        session = db.get_db_session()
        item = session.query(db.PluginStatus).filter(
            db.PluginStatus.plugin_name == self.job_name).one()
        if item.is_running:
            session.commit()
            self.log.error(
                "Plugin %s is already running! Not doing re-entrant call!",
                self.job_name)
            return

        item.is_running = True
        item.last_run = datetime.datetime.now()
        session.commit()

        try:
            self._doCall()
        except Exception:
            item.last_error = datetime.datetime.now()
            item.last_error_msg = traceback.format_exc()
            raise
        finally:

            item2 = session.query(db.PluginStatus).filter(
                db.PluginStatus.plugin_name == self.job_name).one()
            item2.is_running = False
            item2.last_run_end = datetime.datetime.now()
            session.commit()
            db.delete_db_session()
        self.log.info("Job %s complete.", self.job_name)
Example #3
0
def resetRunStates():
    print("JobSetup call resetting run-states!")
    session = db.get_db_session()
    session.query(db.PluginStatus).update({db.PluginStatus.is_running: False})
    session.commit()
    db.delete_db_session()
    print("Run-states reset.")
Example #4
0
    def __init__(self, job_name):

        if not job_name in CALLABLE_LUT:
            raise JobNameException(
                "Callable '%s' is not in the class lookup table: '%s'!" %
                (job_name, CALLABLE_LUT))
        self.runModule = CALLABLE_LUT[job_name]
        self.job_name = job_name

        session = db.get_db_session()

        try:
            query = session.query(db.PluginStatus).filter(
                db.PluginStatus.plugin_name == job_name)
            have = query.scalar()
            if not have:
                new = db.PluginStatus(plugin_name=job_name)
                session.add(new)
                session.commit()
        except sqlalchemy.exc.OperationalError:
            session.rollback()
        except sqlalchemy.exc.InvalidRequestError:
            session.rollback()

        finally:
            db.delete_db_session()
Example #5
0
def initializeStartUrls(rules):
    print("Initializing all start URLs in the database")
    sess = db.get_db_session()
    for ruleset in [rset for rset in rules if rset['starturls']]:
        for starturl in ruleset['starturls']:
            have = sess.query(db.WebPages) \
             .filter(db.WebPages.url == starturl)   \
             .count()
            if not have:
                netloc = urlFuncs.getNetLoc(starturl)
                new = db.WebPages(
                    url=starturl,
                    starturl=starturl,
                    netloc=netloc,
                    type=ruleset['type'],
                    priority=db.DB_IDLE_PRIORITY,
                    distance=db.DB_DEFAULT_DIST,
                    normal_fetch_mode=ruleset['normal_fetch_mode'],
                )
                print("Missing start-url for address: '{}'".format(starturl))
                sess.add(new)
            try:
                sess.commit()
            except sqlalchemy.SQLAlchemyError:
                print("Failure inserting start url for address: '{}'".format(
                    starturl))

                sess.rollback()
    sess.close()
    db.delete_db_session()
Example #6
0
def resetInProgress():
    print("Resetting any stalled downloads from the previous session.")

    sess = db.get_db_session()
    sess.query(db.WebPages) \
     .filter(
       (db.WebPages.state == "fetching")           |
       (db.WebPages.state == "processing")         |
       (db.WebPages.state == "specialty_deferred") |
       (db.WebPages.state == "specialty_ready")
       )   \
     .update({db.WebPages.state : "new"})
    sess.commit()
    sess.close()
    db.delete_db_session()
Example #7
0
    def get_times(self):
        conn = database.get_db_session()
        aps = conn.execute("SELECT job_state FROM apscheduler_jobs;")

        update_times = []
        for blob, in aps:
            job_dict = pickle.loads(blob)
            update_times.append(
                (job_dict['id'], job_dict['next_run_time'].isoformat()))

        data = {
            "update-times": update_times,
        }
        database.delete_db_session()

        return pack_message("system-update-times", data)
Example #8
0
    def do_task(self):

        db_handle = db.get_db_session()

        hadjob = False
        try:
            self.archiver = WebMirror.Engine.SiteArchiver(
                self.cookie_lock,
                new_job_queue=self.new_job_queue,
                response_queue=self.resp_queue,
                db_interface=db_handle)
            hadjob = self.archiver.taskProcess()
        finally:
            # Clear out the sqlalchemy state
            db_handle.expunge_all()
            db.delete_db_session()

        return hadjob
Example #9
0
 def __del__(self):
     db.delete_db_session()
Example #10
0
	def __del__(self):
		db.delete_db_session(postfix='nu_forwarder')