def setup_grouped_dataframe(self, count): df = DFMock() df.count = count df.columns = { "string_col": { "option_count": 3, "option_type": "string" }, "int_col": { "option_count": 3, "option_type": "int" }, "float_col": { "option_count": 3, "option_type": "float" }, "bool_col": { "option_count": 3, "option_type": "bool" }, "datetime_col": { "option_count": 3, "option_type": "datetime" }, "metrics": "int" } df.generate_dataframe() return df.dataframe
def mock_publish(self, partition_types: Dict[str, str], bucket="safebucketname", key='safekeyprefixname/safedatasetname'): mocker = MockHelper(count=100, s3=True) df = mocker.dataframe partitions = list(partition_types.keys()) dfmock = DFMock() dfmock.count = 10 # add partition columns columns = dict({ key: { "option_count": 3, "option_type": value } for key, value in partition_types.items() }) # add one actual data column, called metrics columns["metrics"] = "int" dfmock.columns = columns dfmock.generate_dataframe() # generate dataframe we will write df = dfmock.dataframe bucket = mocker.s3_bucket defaults = { 'bucket': bucket, 'key': key, 'dataframe': df, 'partitions': partitions } published_files = publish(bucket=bucket, key=key, dataframe=df, partitions=partitions) return bucket, df, partitions, published_files
def setup_grouped_dataframe(count: int = 100, columns: Dict = None): """ Creates mock dataframe using dfmock Args: count (int): Row length to generate on the dataframe columns (Dict): dictionary of columns and types, following dfmock guides Returns: A freshly mocked dataframe """ df = DFMock() df.count = count if not columns: columns = { "string_col": { "option_count": 3, "option_type": "string" }, "int_col": { "option_count": 3, "option_type": "int" }, "float_col": { "option_count": 3, "option_type": "float" }, "bool_col": { "option_count": 3, "option_type": "bool" }, "datetime_col": { "option_count": 3, "option_type": "datetime" }, "text_col": "string", "metrics": "int" } df.columns = columns df.generate_dataframe() return df.dataframe