class PivotalSpider(scrapy.Spider):
    utils.init_datadog()

    name = "pivotal_security"
    start_urls = ['https://pivotal.io/security']

    def parse(self, response):
        rows = response.css('table tr')
        entries = 0

        if len(rows) <= 0:
            utils.Report.warning(
                'There are no rows to go through... Perhaps the syntax has changed?'
            )

        for cve in rows:
            fields = cve.css('td')

            try:
                date = serialize_date(
                    utils.get_string(fields[0].css('::text').extract()))
            except ValueError:
                continue

            reference = utils.get_string(fields[2].css('a ::text').extract())
            url = utils.get_string(fields[2].xpath('a//@href').extract())
            description = utils.get_string(fields[4].css('::text').extract())

            if len(reference) > 0 and len(url) > 0:
                entries += 1
            else:
                print('Invalid CVE has been detected.')
                continue

            # Check if the leak has been published in a time window.
            if utils.start_date() <= date:
                leak = {
                    'date': date,
                    'reference': reference,
                    'url': 'https://pivotal.io' + url,
                    'description': description
                }

                if not utils.event_exists(leak['reference']):
                    print('Adding new CVE event. {}'.format(leak['reference']))
                    utils.send_event('pivotal', leak)
                else:
                    print('CVE {} already registered. Skipping.'.format(
                        leak['reference']))

        if entries <= 0:
            print('There has been an issue parsing the page.')
            utils.Report.critical(
                'No entries could be detected! Please have a manual check instead.'
            )
Beispiel #2
0
class ClusterSpider(scrapy.Spider):
    utils.init_datadog()

    name = 'cluster'
    allowed_domains = ['app.compose.io']
    start_urls = ['https://app.compose.io/session/new']

    def parse(self, response):
        yield scrapy.FormRequest.from_response(
            response,
            formxpath='//form[@class="login-form"]',
            formdata={
                'user[email]': self.settings.get('COMPOSE_EMAIL'),
                'user[password]': self.settings.get('COMPOSE_PASSWORD'),
            },
            callback=self.after_login)

    def after_login(self, response):
        url = 'https://app.compose.io/{}/clusters/{}'.format(self.settings.get('COMPOSE_ACCOUNT_NAME'), self.settings.get('COMPOSE_CLUSTER_ID'))
        yield Request(url=url, callback=self.action)

    def action(self, response):
        try:
            scraped_cluster = ClusterScraper(response)
        except Exception as e:
            utils.Report.critical(
                'Could not obtain cluster name! {}'.format(e))
            return

        # Save the metrics to Datadog
        for host_name, host_metrics in scraped_cluster.metrics.items():
            for resource_name, resource_metrics in host_metrics.items():
                for name, value in resource_metrics.items():
                    metric_name = 'compose.cluster.host.{}.{}'.format(
                        resource_name, name)
                    metric_unit = ClusterScraper.UNITS_FROM_NAME[name]
                    utils.send_metric(
                        metric_name=metric_name,
                        metric_description=metric_name + ' description',
                        metric_unit=metric_unit,
                        metric_value=value,
                        cluster_name=scraped_cluster.name,
                        host_name=host_name)

        yield dict(
            cluster_name=scraped_cluster.name,
            cluster_metrics=scraped_cluster.metrics)
Beispiel #3
0
def print_requests(env, month_start, month_end, timezone):
    title = "Data for '%s' using timezone: '%s'" % (args.env.upper(), timezone)
    print(title)
    print("=" * len(title))
    print("Month,Total Requests (approximate)")

    for month in range(month_start, month_end + 1):
        start = datetime.min.replace(year=2017, month=month, day=1)
        start_utc = adjust_datetime_to_utc(start, timezone)
        end_utc = start + relativedelta(months=1) - timedelta(seconds=1)
        query = "integral(avg:nginx.requests{environment:%s})" % args.env
        results = api.Metric.query(start=start_utc.strftime('%s'),
                                   end=end_utc.strftime('%s'),
                                   query=query)
        data = results['series'][0].get('pointlist', [])

        print("%s,%s" % (start.strftime('%b'), int(data[-1][1])))


if __name__ == "__main__":

    args = _get_args()

    month_start = get_date(args.month_start).month
    month_end = get_date(args.month_end).month

    config = get_config(args.config)
    init_datadog(config)

    print_requests(args.env, month_start, month_end, ENV_TZ[args.env])