def call_url(self, url, insecure=False): ''' Dedicated method to query a URL. It is important to use this method as it allows to query them with a defined user-agent header thus informing the projects we are querying what our intentions are. :arg url: the url to request (get). :type url: str :return: the request object corresponding to the request made :return type: Request ''' user_agent = 'Anitya %s at upstream-monitoring.org' % \ pkg_resources.get_distribution('anitya').version from_email = anitya_config.get('ADMIN_EMAIL') if '*' in url: url = self.expand_subdirs(url) if url.startswith('ftp://') or url.startswith('ftps://'): socket.setdefaulttimeout(30) req = urllib2.Request(url) req.add_header('User-Agent', user_agent) req.add_header('From', from_email) resp = urllib2.urlopen(req) content = resp.read() return content else: headers = { 'User-Agent': user_agent, 'From': from_email, } # Works around https://github.com/kennethreitz/requests/issues/2863 # Currently, requests does not start new TCP connections based on # TLS settings. This means that if a connection is ever started to # a host with `verify=False`, further requests to that # (scheme, host, port) combination will also be insecure, even if # `verify=True` is passed to requests. # # This starts a new session which is immediately discarded when the # request is insecure. We don't get to pool connections for these # requests, but it stops us from making insecure requests by # accident. This can be removed in requests-3.0. if insecure: with requests.Session() as r_session: resp = r_session.get(url, headers=headers, timeout=60, verify=False) else: resp = http_session.get(url, headers=headers, timeout=60, verify=True) return resp
def setUp(self): super(BaseBackendTests, self).setUp() self.backend = backends.BaseBackend() self.headers = { 'User-Agent': 'Anitya {0} at upstream-monitoring.org'.format( anitya.app.__version__), 'From': config.get('ADMIN_EMAIL'), }
def admin(self): """ Determine if this user is an administrator. Returns: bool: True if the user is an administrator. """ return six.text_type(self.id) in anitya_config.get('ANITYA_WEB_ADMINS', [])
def setUp(self): super(BaseBackendTests, self).setUp() self.backend = backends.BaseBackend() self.headers = { "User-Agent": "Anitya {0} at release-monitoring.org".format( anitya.app.__version__ ), "From": config.get("ADMIN_EMAIL"), }
def setUp(self): super(BaseBackendTests, self).setUp() self.backend = backends.BaseBackend() self.headers = { "User-Agent": "Anitya {0} at release-monitoring.org".format( anitya.app.__version__), "From": config.get("ADMIN_EMAIL"), }
def is_admin(self): """ Determine if this user is an administrator. Set admin flag if the user is preconfigured. Returns: bool: True if the user is an administrator. """ if not self.admin: if six.text_type(self.id) in anitya_config.get("ANITYA_WEB_ADMINS", []): self.admin = True return self.admin
def main(debug, feed): ''' Retrieve all the packages and for each of them update the release version. ''' time = arrow.utcnow().datetime db.initialize(config) session = db.Session() run = db.Run(status='started') session.add(run) session.commit() LOG.setLevel(logging.DEBUG) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') if debug: # Console handler chand = logging.StreamHandler() chand.setLevel(logging.INFO) chand.setFormatter(formatter) LOG.addHandler(chand) # Save the logs in a file fhand = logging.FileHandler('/var/tmp/anitya_cron.log') fhand.setLevel(logging.INFO) fhand.setFormatter(formatter) LOG.addHandler(fhand) if feed: projects = list(projects_by_feed(session)) session.commit() else: # Get all projects, that are ready for check projects = db.Project.query.order_by( sa.func.lower(db.Project.name) ).filter(db.Project.next_check < time).all() project_ids = [project.id for project in projects] N = config.get('CRON_POOL', 10) LOG.info("Launching pool (%i) to update %i projects", N, len(project_ids)) p = multiprocessing.Pool(N) p.map(update_project, project_ids) run = db.Run(status='ended') session.add(run) session.commit()
def run(self): """ Start the check run, the run is made of three stages: 1. Preparation - get current date, clear counters, prepare queue of project 2. Execution - process every project in the queue 3. Finalize - create `db.Run` entry with counters and time """ # 1. Preparation phase # We must convert it to datetime for comparison with sqlalchemy TIMESTAMP column session = db.Session() time = arrow.utcnow().datetime self.clear_counters() queue = self.construct_queue(time) total_count = len(queue) if not queue: return # 2. Execution _log.info( "Starting check on {} for total of {} projects".format(time, total_count) ) pool_size = config.get("CRON_POOL", 10) pool = ThreadPoolExecutor(pool_size) pool.map(self.update_project, queue) # 3. Finalize _log.info( "Check done. Checked ({}): error ({}), success ({}), fail ({})".format( total_count, self.error_counter, self.success_counter, self.error_counter, ) ) run = db.Run( created_on=time, total_count=total_count, error_count=self.error_counter, ratelimit_count=self.ratelimit_counter, success_count=self.success_counter, ) session.add(run) session.commit()
def is_delete_candidate(self, project: db.Project) -> bool: """ Check if this project is a candidate for deletion. Project is a candidate for deletion, if error_counter already reached configured threshold and project has no mapping. If mapping exists, but project doesn't have any versions it's still a candidate for deletion. Args: project: Project to check Returns: True if project is candidate for deletion, False otherwise. """ if project.error_counter < config.get("CHECK_ERROR_THRESHOLD"): return False packages = db.Packages.query.filter(db.Packages.project_id == project.id).all() if packages: if not project.versions: return True else: return False return True
def run(self): """ Start the check run, the run is made of three stages: 1. Preparation - get current date, clear counters, prepare queue of project 2. Execution - process every project in the queue 3. Finalize - create `db.Run` entry with counters and time """ # 1. Preparation phase # We must convert it to datetime for comparison with sqlalchemy TIMESTAMP column session = db.Session() time = arrow.utcnow().datetime self.clear_counters() queue = self.construct_queue(time) total_count = len(queue) projects_left = len(queue) projects_iter = iter(queue) if not queue: return # 2. Execution _log.info( "Starting check on {} for total of {} projects".format(time, total_count) ) futures = {} pool_size = config.get("CRON_POOL") timeout = config.get("CHECK_TIMEOUT") with ThreadPoolExecutor(pool_size) as pool: # Wait till every project in queue is checked while projects_left: for project in projects_iter: future = pool.submit(self.update_project, project) futures[future] = project if len(futures) > pool_size: break # limit job submissions # Wait for jobs that aren't completed yet try: for future in as_completed(futures, timeout=timeout): projects_left -= 1 # one project down # log any exception if future.exception(): try: future.result() except Exception as e: _log.exception(e) del futures[future] break # give a chance to add more jobs except TimeoutError: projects_left -= 1 _log.info(f"Thread was killed because the execution took too long.") with self.error_counter_lock: self.error_counter += 1 # 3. Finalize _log.info( "Check done. Checked ({}): error ({}), success ({}), limit ({})".format( total_count, self.error_counter, self.success_counter, self.ratelimit_counter, ) ) run = db.Run( created_on=time, total_count=total_count, error_count=self.error_counter, ratelimit_count=self.ratelimit_counter, success_count=self.success_counter, ) session.add(run) session.commit()
import pkg_resources import requests from anitya.config import config as anitya_config from anitya.lib.exceptions import AnityaPluginException from anitya.lib.versions import RpmVersion import six REGEX = anitya_config["DEFAULT_REGEX"] # Default headers for requests REQUEST_HEADERS = { "User-Agent": "Anitya %s at release-monitoring.org" % pkg_resources.get_distribution("anitya").version, "From": anitya_config.get("ADMIN_EMAIL"), } _log = logging.getLogger(__name__) # Use a common http session, so we don't have to go re-establishing https # connections over and over and over again. http_session = requests.session() class BaseBackend(object): """ The base class that all the different backends should extend. Attributes:
import requests from anitya.config import config as anitya_config from anitya.lib.exceptions import AnityaPluginException from anitya.lib.versions import RpmVersion import six REGEX = anitya_config['DEFAULT_REGEX'] # Default headers for requests REQUEST_HEADERS = { 'User-Agent': 'Anitya %s at upstream-monitoring.org' % pkg_resources.get_distribution('anitya').version, 'From': anitya_config.get('ADMIN_EMAIL'), } _log = logging.getLogger(__name__) # Use a common http session, so we don't have to go re-establishing https # connections over and over and over again. http_session = requests.session() class BaseBackend(object): ''' The base class that all the different backends should extend. Attributes: name (str): The backend name. This is displayed to the user and used in