Esempio n. 1
0
 def setup_grouped_dataframe(self, count):
     df = DFMock()
     df.count = count
     df.columns = {
         "string_col": {
             "option_count": 3,
             "option_type": "string"
         },
         "int_col": {
             "option_count": 3,
             "option_type": "int"
         },
         "float_col": {
             "option_count": 3,
             "option_type": "float"
         },
         "bool_col": {
             "option_count": 3,
             "option_type": "bool"
         },
         "datetime_col": {
             "option_count": 3,
             "option_type": "datetime"
         },
         "metrics": "int"
     }
     df.generate_dataframe()
     return df.dataframe
Esempio n. 2
0
    def mock_publish(self,
                     partition_types: Dict[str, str],
                     bucket="safebucketname",
                     key='safekeyprefixname/safedatasetname'):
        mocker = MockHelper(count=100, s3=True)
        df = mocker.dataframe
        partitions = list(partition_types.keys())
        dfmock = DFMock()
        dfmock.count = 10

        # add partition columns
        columns = dict({
            key: {
                "option_count": 3,
                "option_type": value
            }
            for key, value in partition_types.items()
        })

        # add one actual data column, called metrics
        columns["metrics"] = "int"

        dfmock.columns = columns
        dfmock.generate_dataframe()

        # generate dataframe we will write
        df = dfmock.dataframe
        bucket = mocker.s3_bucket

        defaults = {
            'bucket': bucket,
            'key': key,
            'dataframe': df,
            'partitions': partitions
        }
        published_files = publish(bucket=bucket,
                                  key=key,
                                  dataframe=df,
                                  partitions=partitions)

        return bucket, df, partitions, published_files
Esempio n. 3
0
def setup_grouped_dataframe(count: int = 100, columns: Dict = None):
    """ Creates mock dataframe using dfmock

    Args:
        count (int): Row length to generate on the dataframe
        columns (Dict): dictionary of columns and types, following dfmock guides

    Returns:
        A freshly mocked dataframe
    """
    df = DFMock()
    df.count = count
    if not columns:
        columns = {
            "string_col": {
                "option_count": 3,
                "option_type": "string"
            },
            "int_col": {
                "option_count": 3,
                "option_type": "int"
            },
            "float_col": {
                "option_count": 3,
                "option_type": "float"
            },
            "bool_col": {
                "option_count": 3,
                "option_type": "bool"
            },
            "datetime_col": {
                "option_count": 3,
                "option_type": "datetime"
            },
            "text_col": "string",
            "metrics": "int"
        }
    df.columns = columns
    df.generate_dataframe()
    return df.dataframe