Ejemplo n.º 1
0
 def testFilenameBuilder_CompleteParameterSet(self):
     fake_filepath = utils.build_filename('/tmp/path/', '2014-02-01', '30d',
                                          'iad01', 'comcast', 'us',
                                          'download_throughput',
                                          '-fake.txt')
     expected_filepath = '/tmp/path/2014-02-01+30d_iad01_us_comcast_download_throughput-fake.txt'
     self.assertEquals(expected_filepath, fake_filepath)
Ejemplo n.º 2
0
 def testFilenameBuilder_ParameterSetMissingOptionalValues(self):
     """Tests that omitted options are properly handled in building of filenames.
 
 Where not specificied in the selector file, the parameter will be passed
 as None. The None value should be skipped in filename building, so this
 unit test checks that optional parameters that are not defined are properly
 handled by `build_filename`.
 """
     fake_filepath = utils.build_filename('/tmp/path/', '2015-02-01', '31d',
                                          None, None, 'ca', 'minimum_rtt',
                                          '-bigquery.sql')
     expected_filepath = '/tmp/path/2015-02-01+31d_ca_minimum_rtt-bigquery.sql'
     self.assertEquals(expected_filepath, fake_filepath)
Ejemplo n.º 3
0
 def testFilenameBuilder_ParameterSetMissingOptionalValues(self):
     """Tests that omitted options are properly handled in building of filenames.
 
 Where not specificied in the selector file, the parameter will be passed
 as None. The None value should be skipped in filename building, so this
 unit test checks that optional parameters that are not defined are properly
 handled by `build_filename`.
 """
     fake_filepath = utils.build_filename('/tmp/path/', '2015-02-01', '31d',
                                          None, None, 'ca', 'minimum_rtt',
                                          '-bigquery.sql')
     expected_filepath = '/tmp/path/2015-02-01+31d_ca_minimum_rtt-bigquery.sql'
     self.assertEquals(expected_filepath, fake_filepath)
Ejemplo n.º 4
0
def main(args):
    selector_queue = Queue.Queue()
    logger = setup_logger(args.verbosity)

    selectors = selectors_from_files(args.selector_in)
    # The selectors were likely provided in order. Shuffle them to get better
    # concurrent distribution on BigQuery tables.
    selectors = shuffle_selectors(selectors)

    ip_translator_factory = iptranslation.IPTranslationStrategyFactory()
    mlab_site_resolver = mlab.MLabSiteResolver()
    for data_selector in selectors:
        thread_metadata = {
            'date': data_selector.start_time.strftime('%Y-%m-%d-%H%M%S'),
            'duration': duration_to_string(data_selector.duration),
            'site': data_selector.site,
            'client_provider': data_selector.client_provider,
            'client_country': data_selector.client_country,
            'metric': data_selector.metric
        }
        data_filepath = utils.build_filename(
            args.output, thread_metadata['date'], thread_metadata['duration'],
            thread_metadata['site'], thread_metadata['client_provider'],
            thread_metadata['client_country'], thread_metadata['metric'],
            '-raw.csv')
        if not args.ignorecache and utils.check_for_valid_cache(data_filepath):
            logger.info(('Raw data file found (%s), assuming this is '
                         'cached copy of same data and moving off. Use '
                         '--ignorecache to suppress this behavior.'),
                        data_filepath)
            continue

        logger.debug('Did not find existing data file: %s', data_filepath)
        logger.debug(
            ('Generating Query for subset of {site}, {client_provider}, '
             '{date}, {duration}.').format(**thread_metadata))

        data_selector.ip_translation_spec.params['maxmind_dir'] = (
            args.maxminddir)

        try:
            ip_translator = ip_translator_factory.create(
                data_selector.ip_translation_spec)
            bq_query_string = generate_query(
                data_selector, ip_translator, mlab_site_resolver)
        except MLabServerResolutionFailed as caught_error:
            logger.error('Failed to resolve M-Lab servers: %s', caught_error)
            # This error is fatal, so bail out here.
            return None
        except Exception as caught_error:
            logger.error('Failed to generate queries: %s', caught_error)
            continue

        if args.savequery:
            bigquery_filepath = utils.build_filename(
                args.output, thread_metadata['date'],
                thread_metadata['duration'], thread_metadata['site'],
                thread_metadata['client_provider'],
                thread_metadata['client_country'], thread_metadata['metric'],
                '-bigquery.sql')
            write_bigquery_to_file(bigquery_filepath, bq_query_string)
        if not args.dryrun:
            # Offer Queue a tuple of the BQ statement, metadata, and a boolean
            # that indicates that the loop has not attempted to run the query
            # thus far (failed queries are pushed back to the end of the loop).
            selector_queue.put((bq_query_string, thread_metadata, data_filepath,
                                False))
        else:
            logger.warn(
                'Dry run flag caught, built query and reached the point that '
                'it would be posted, moving on.')
    try:
        if not args.dryrun:
            logger.info('Finished processing selector files, approximately %d '
                        'queries to be performed.', selector_queue.qsize())
            if os.path.exists(args.credentials_filepath) is False:
                logger.warn(
                    'No credentials for Google appear to exist, next step '
                    'will be an authentication mechanism for its API.')

            try:
                google_auth_config = external.GoogleAPIAuth(
                    args.credentials_filepath,
                    is_headless=args.noauth_local_webserver)
            except external.APIConfigError:
                logger.error(
                    'Could not find developer project, please create one in '
                    'Developer Console to continue. (See README.md)')
                return None

            while not selector_queue.empty():
                thread_monitor = process_selector_queue(selector_queue,
                                                        google_auth_config)

                for (existing_thread, external_query_handler) in thread_monitor:
                    existing_thread.join()
                    # Join together all defined attributes of thread_metadata for a user
                    # friendly notiication string.
                    identifier_string = ', '.join(filter(
                        None, thread_metadata.values()))

                    if (not external_query_handler.has_succeeded and
                            not external_query_handler.has_failed):
                        selector_queue.put(external_query_handler.queue_set)
                    elif external_query_handler.has_failed:
                        logger.debug('Fatal error on %s, moving along.',
                                     identifier_string)
                    else:
                        logger.debug('Successfully retrieved %s.',
                                     identifier_string)

    except KeyboardInterrupt:
        logger.error('Caught interruption, shutting down now.')

    return False
Ejemplo n.º 5
0
def main(args):
    selector_queue = Queue.Queue()
    logger = setup_logger(args.verbosity)

    selectors = selectors_from_files(args.selector_in)
    # The selectors were likely provided in order. Shuffle them to get better
    # concurrent distribution on BigQuery tables.
    selectors = shuffle_selectors(selectors)

    ip_translator_factory = iptranslation.IPTranslationStrategyFactory()
    mlab_site_resolver = mlab.MLabSiteResolver()
    for selector in selectors:
        thread_metadata = {
            'date': selector.start_time.strftime('%Y-%m-%d-%H%M%S'),
            'duration': duration_to_string(selector.duration),
            'site': selector.site,
            'client_provider': selector.client_provider,
            'client_country': selector.client_country,
            'metric': selector.metric
        }
        data_filepath = utils.build_filename(
            args.output, thread_metadata['date'], thread_metadata['duration'],
            thread_metadata['site'], thread_metadata['client_provider'],
            thread_metadata['client_country'], thread_metadata['metric'],
            '-raw.csv')
        if not args.ignorecache and utils.check_for_valid_cache(data_filepath):
            logger.info(('Raw data file found (%s), assuming this is '
                         'cached copy of same data and moving off. Use '
                         '--ignorecache to suppress this behavior.'),
                        data_filepath)
            continue

        logger.debug('Did not find existing data file: %s', data_filepath)
        logger.debug(
            ('Generating Query for subset of {site}, {client_provider}, '
             '{date}, {duration}.').format(**thread_metadata))

        selector.ip_translation_spec.params['maxmind_dir'] = args.maxminddir

        try:
            ip_translator = ip_translator_factory.create(
                selector.ip_translation_spec)
            bq_query_string, bq_table_span = generate_query(
                selector, ip_translator, mlab_site_resolver)
        except MLabServerResolutionFailed as caught_error:
            logger.error('Failed to resolve M-Lab servers: %s', caught_error)
            # This error is fatal, so bail out here.
            return None
        except Exception as caught_error:
            logger.error('Failed to generate queries: %s', caught_error)
            continue

        if args.savequery:
            bigquery_filepath = utils.build_filename(
                args.output, thread_metadata['date'],
                thread_metadata['duration'], thread_metadata['site'],
                thread_metadata['client_provider'],
                thread_metadata['client_country'], thread_metadata['metric'],
                '-bigquery.sql')
            write_bigquery_to_file(bigquery_filepath, bq_query_string)
        if not args.dryrun:
            # Offer Queue a tuple of the BQ statement, BQ table span, metadata, and a
            # boolean that indicates that the loop has not attempted to run the query
            # thus far (failed queries are pushed back to the end of the loop).
            selector_queue.put((bq_query_string, bq_table_span,
                                thread_metadata, data_filepath, False))
        else:
            logger.warn(
                'Dry run flag caught, built query and reached the point that '
                'it would be posted, moving on.')
    try:
        if not args.dryrun:
            logger.info(
                'Finished processing selector files, approximately %d '
                'queries to be performed.', selector_queue.qsize())
            if os.path.exists(args.credentials_filepath) is False:
                logger.warn(
                    'No credentials for Google appear to exist, next step '
                    'will be an authentication mechanism for its API.')

            try:
                google_auth_config = external.GoogleAPIAuth(
                    args.credentials_filepath,
                    is_headless=args.noauth_local_webserver)
            except external.APIConfigError:
                logger.error(
                    'Could not find developer project, please create one in '
                    'Developer Console to continue. (See README.md)')
                return None

            while not selector_queue.empty():
                thread_monitor = process_selector_queue(
                    selector_queue,
                    google_auth_config,
                    batchmode=args.batchmode)

                for (existing_thread,
                     external_query_handler) in thread_monitor:
                    existing_thread.join()
                    # Join together all defined attributes of thread_metadata for a user
                    # friendly notiication string.
                    identifier_string = ', '.join(
                        filter(None, thread_metadata.values()))

                    if (not external_query_handler.has_succeeded
                            and not external_query_handler.has_failed):
                        selector_queue.put(external_query_handler.queue_set)
                    elif external_query_handler.has_failed:
                        logger.debug('Fatal error on %s, moving along.',
                                     identifier_string)
                    else:
                        logger.debug('Successfully retrieved %s.',
                                     identifier_string)

    except KeyboardInterrupt:
        logger.error('Caught interruption, shutting down now.')

    return False
Ejemplo n.º 6
0
 def testFilenameBuilder_CompleteParameterSet(self):
     fake_filepath = utils.build_filename('/tmp/path/', '2014-02-01', '30d',
                                          'iad01', 'comcast', 'us',
                                          'download_throughput', '-fake.txt')
     expected_filepath = '/tmp/path/2014-02-01+30d_iad01_us_comcast_download_throughput-fake.txt'
     self.assertEquals(expected_filepath, fake_filepath)