Beispiel #1
0
def test_strings_from_regex_digit():
    generator, size = strings_from_regex('[0-9]')

    assert size == 10
    assert list(generator) == [
        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'
    ]
Beispiel #2
0
    def _get_primary_keys(self, table_name, num_rows):
        """Return the primary key and amount of values for the requested table.

        Args:
            table_name (str):
                Name of the table to get the primary keys from.
            num_rows (str):
                Number of ``primary_keys`` to generate.

        Returns:
            tuple (str, pandas.Series):
                primary key name and primary key values. If the table has no primary
                key, ``(None, None)`` is returned.

        Raises:
            ValueError:
                If the ``metadata`` contains invalid types or subtypes, or if
                there are not enough primary keys left on any of the generators.
            NotImplementedError:
                If the primary key subtype is a ``datetime``.
        """
        primary_key = self.metadata.get_primary_key(table_name)

        field = self.metadata.get_fields(table_name)[primary_key]

        generator = self._primary_key_generators.get(table_name)

        if generator is None:
            if field['type'] != 'id':
                raise ValueError('Only columns with type `id` can be primary keys')

            subtype = field.get('subtype', 'integer')
            if subtype == 'integer':
                generator = itertools.count()
                remaining = np.inf
            elif subtype == 'string':
                regex = field.get('regex', r'^[a-zA-Z]+$')
                generator, remaining = utils.strings_from_regex(regex)
            elif subtype == 'datetime':
                raise NotImplementedError('Datetime ids are not yet supported')
            else:
                raise ValueError('Only `integer` or `string` id columns are supported.')

            self._primary_key_generators[table_name] = generator
            self._remaining_primary_keys[table_name] = remaining

        else:
            remaining = self._remaining_primary_keys[table_name]

        if remaining < num_rows:
            raise ValueError(
                'Not enough unique values for primary key of table {}'
                ' to generate {} samples.'.format(table_name, num_rows)
            )

        self._remaining_primary_keys[table_name] -= num_rows
        primary_key_values = pd.Series([x for i, x in zip(range(num_rows), generator)])

        return primary_key_values
Beispiel #3
0
def test_strings_from_regex_repeat_digit():
    generator, size = strings_from_regex(r'\d{1,3}')

    assert size == 1110

    strings = list(generator)
    assert strings[0] == '0'
    assert strings[-1] == '999'
Beispiel #4
0
    def _make_ids(cls, field_metadata, length):
        field_subtype = field_metadata.get('subtype', 'integer')
        if field_subtype == 'string':
            regex = field_metadata.get('regex', '[a-zA-Z]+')
            generator, max_size = strings_from_regex(regex)
            if max_size < length:
                raise ValueError(
                    ('Unable to generate {} unique values for regex {}, the '
                     'maximum number of unique values is {}.').format(
                         length, regex, max_size))
            values = [next(generator) for _ in range(length)]

            return pd.Series(list(values)[:length])
        else:
            return pd.Series(np.arange(length))
Beispiel #5
0
def test_strings_from_regex_literal():
    generator, size = strings_from_regex('abcd')

    assert size == 1
    assert list(generator) == ['abcd']
Beispiel #6
0
def test_strings_from_regex_repeat_literal():
    generator, size = strings_from_regex('a{1,3}')

    assert size == 3
    assert list(generator) == ['a', 'aa', 'aaa']