def testRandomSelectConstrained(self): elements = range(5) result = selectors.RandomSelect(elements, 10) self.assertEquals(len(result), 10) ones = [x for x in result if x == 1] twos = [x for x in result if x == 2] self.assertTrue(len(ones) <= selectors.MAX_REPEAT) self.assertTrue(len(twos) <= selectors.MAX_REPEAT)
def testRandomSelectVeryConstrained(self): """Test to make sure we don't infinite loop if count > len(elements)*3""" elements = range(2) result = selectors.RandomSelect(elements, 20) self.assertEquals(len(result), 20) ones = [x for x in result if x == 1] twos = [x for x in result if x == 2] self.assertTrue(ones > selectors.MAX_REPEAT) self.assertTrue(twos > selectors.MAX_REPEAT)
def GetTestsFromSource(self, source, count=50, select_mode=None): """Parse records from source, and return tuples to use for testing. Args: source: A source name (str) that has been configured. count: Number of tests to generate from the source (int) select_mode: automatic, weighted, random, chunk (str) Returns: A list of record tuples in the form of (req_type, hostname) Raises: ValueError: If no usable records are found from the data source. This is tricky because we support 3 types of input data: - List of domains - List of hosts - List of record_type + hosts """ records = [] if source in self.source_config: include_duplicates = self.source_config[source].get( 'include_duplicates', False) else: include_duplicates = False records = self._GetHostsFromSource(source) if not records: raise ValueError( 'Unable to generate records from %s (nothing found)' % source) self.msg( 'Generating tests from %s (%s records, selecting %s %s)' % (self.GetNameForSource(source), len(records), count, select_mode)) (records, are_records_fqdn) = self._CreateRecordsFromHostEntries( records, include_duplicates=include_duplicates) # First try to resolve whether to use weighted or random. if select_mode in ('weighted', 'automatic', None): # If we are in include_duplicates mode (cachemiss, cachehit, etc.), we have different rules. if include_duplicates: if count > len(records): select_mode = 'random' else: select_mode = 'chunk' elif len(records) != len(set(records)): if select_mode == 'weighted': self.msg( '%s data contains duplicates, switching select_mode to random' % source) select_mode = 'random' else: select_mode = 'weighted' self.msg('Selecting %s out of %s sanitized records (%s mode).' % (count, len(records), select_mode)) if select_mode == 'weighted': records = selectors.WeightedDistribution(records, count) elif select_mode == 'chunk': records = selectors.ChunkSelect(records, count) elif select_mode == 'random': records = selectors.RandomSelect( records, count, include_duplicates=include_duplicates) else: raise ValueError('No such final selection mode: %s' % select_mode) # For custom filenames if source not in self.source_config: self.source_config[source] = {'synthetic': True} if are_records_fqdn: self.source_config[source]['full_hostnames'] = False self.msg( '%s input appears to be predominantly domain names. Synthesizing FQDNs' % source) synthesized = [] for (req_type, hostname) in records: if not addr_util.FQDN_RE.match(hostname): hostname = self._GenerateRandomHostname(hostname) synthesized.append((req_type, hostname)) return synthesized else: return records
def testRandomSelect(self): elements = range(10) result = selectors.RandomSelect(elements, 10) self.assertEquals(len(result), 10) self.assertNotEquals(result, range(10))