class PivotalSpider(scrapy.Spider): utils.init_datadog() name = "pivotal_security" start_urls = ['https://pivotal.io/security'] def parse(self, response): rows = response.css('table tr') entries = 0 if len(rows) <= 0: utils.Report.warning( 'There are no rows to go through... Perhaps the syntax has changed?' ) for cve in rows: fields = cve.css('td') try: date = serialize_date( utils.get_string(fields[0].css('::text').extract())) except ValueError: continue reference = utils.get_string(fields[2].css('a ::text').extract()) url = utils.get_string(fields[2].xpath('a//@href').extract()) description = utils.get_string(fields[4].css('::text').extract()) if len(reference) > 0 and len(url) > 0: entries += 1 else: print('Invalid CVE has been detected.') continue # Check if the leak has been published in a time window. if utils.start_date() <= date: leak = { 'date': date, 'reference': reference, 'url': 'https://pivotal.io' + url, 'description': description } if not utils.event_exists(leak['reference']): print('Adding new CVE event. {}'.format(leak['reference'])) utils.send_event('pivotal', leak) else: print('CVE {} already registered. Skipping.'.format( leak['reference'])) if entries <= 0: print('There has been an issue parsing the page.') utils.Report.critical( 'No entries could be detected! Please have a manual check instead.' )
class ClusterSpider(scrapy.Spider): utils.init_datadog() name = 'cluster' allowed_domains = ['app.compose.io'] start_urls = ['https://app.compose.io/session/new'] def parse(self, response): yield scrapy.FormRequest.from_response( response, formxpath='//form[@class="login-form"]', formdata={ 'user[email]': self.settings.get('COMPOSE_EMAIL'), 'user[password]': self.settings.get('COMPOSE_PASSWORD'), }, callback=self.after_login) def after_login(self, response): url = 'https://app.compose.io/{}/clusters/{}'.format(self.settings.get('COMPOSE_ACCOUNT_NAME'), self.settings.get('COMPOSE_CLUSTER_ID')) yield Request(url=url, callback=self.action) def action(self, response): try: scraped_cluster = ClusterScraper(response) except Exception as e: utils.Report.critical( 'Could not obtain cluster name! {}'.format(e)) return # Save the metrics to Datadog for host_name, host_metrics in scraped_cluster.metrics.items(): for resource_name, resource_metrics in host_metrics.items(): for name, value in resource_metrics.items(): metric_name = 'compose.cluster.host.{}.{}'.format( resource_name, name) metric_unit = ClusterScraper.UNITS_FROM_NAME[name] utils.send_metric( metric_name=metric_name, metric_description=metric_name + ' description', metric_unit=metric_unit, metric_value=value, cluster_name=scraped_cluster.name, host_name=host_name) yield dict( cluster_name=scraped_cluster.name, cluster_metrics=scraped_cluster.metrics)
def print_requests(env, month_start, month_end, timezone): title = "Data for '%s' using timezone: '%s'" % (args.env.upper(), timezone) print(title) print("=" * len(title)) print("Month,Total Requests (approximate)") for month in range(month_start, month_end + 1): start = datetime.min.replace(year=2017, month=month, day=1) start_utc = adjust_datetime_to_utc(start, timezone) end_utc = start + relativedelta(months=1) - timedelta(seconds=1) query = "integral(avg:nginx.requests{environment:%s})" % args.env results = api.Metric.query(start=start_utc.strftime('%s'), end=end_utc.strftime('%s'), query=query) data = results['series'][0].get('pointlist', []) print("%s,%s" % (start.strftime('%b'), int(data[-1][1]))) if __name__ == "__main__": args = _get_args() month_start = get_date(args.month_start).month month_end = get_date(args.month_end).month config = get_config(args.config) init_datadog(config) print_requests(args.env, month_start, month_end, ENV_TZ[args.env])