Ejemplo n.º 1
0
    def test_is_stratifier(self):
        channel = Channel(self.stratified_string)
        self.assertTrue(channel.is_stratifier)

        channel = Channel(self.undecorated_string)
        self.assertFalse(channel.is_stratifier)

        channel = Channel(self.arbitrary_decorator_string)
        self.assertFalse(channel.is_stratifier)
Ejemplo n.º 2
0
    def test_str_returns_decorated_string(self):
        channel = Channel(self.stratified_string)
        self.assertEqual(str(channel), self.stratified_string)

        channel = Channel(self.undecorated_string)
        self.assertEqual(str(channel), self.undecorated_string)

        channel = Channel(self.arbitrary_decorator_string)
        self.assertEqual(str(channel), self.arbitrary_decorator_string)
Ejemplo n.º 3
0
    def test_decorator_detection(self):
        channel = Channel(self.stratified_string)
        self.assertEqual(channel.decorator, 's')

        channel = Channel(self.undecorated_string)
        self.assertEqual(channel.decorator, None)

        channel = Channel(self.arbitrary_decorator_string)
        self.assertEqual(channel.decorator, 'p')
Ejemplo n.º 4
0
    def test_deconstruct_channel_string_regression(self):
        channel_string = 'Asteroid Mining'
        parts = Channel.deconstruct_channel_string(channel=channel_string)
        self.assertEqual(parts['name'], channel_string)
        self.assertEqual(parts['decorator'], None)

        channel_string = 's:Asteroid Mining'
        parts = Channel.deconstruct_channel_string(channel=channel_string)
        self.assertEqual(parts['name'], channel_string.split(':')[1])
        self.assertEqual(parts['decorator'], channel_string.split(':')[0])

        channel_string = 'd:Asteroid Mining'
        parts = Channel.deconstruct_channel_string(channel=channel_string)
        self.assertEqual(parts['name'], channel_string.split(':')[1])
        self.assertEqual(parts['decorator'], channel_string.split(':')[0])
Ejemplo n.º 5
0
    def test_construct_channel_string_regression(self):
        name = 'Asteroid Mining'
        channel_string = Channel.construct_channel_string(name=name)
        self.assertEqual(channel_string, name)

        channel_string = Channel.construct_channel_string(name=name, decorator=None)
        self.assertEqual(channel_string, name)

        name = 'Carbonate Processing'
        decorator = 's'
        channel_string = Channel.construct_channel_string(name=name, decorator=decorator)
        self.assertEqual(channel_string, '%s:%s' % (decorator, name))

        decorator = 'h'
        channel_string = Channel.construct_channel_string(name=name, decorator=decorator)
        self.assertEqual(channel_string, '%s:%s' % (decorator, name))
    def test_df_initialization_with_decorated_stratifiers(self):
        cols = list(self.dataframe.columns)
        original_cols = deepcopy(cols)
        cols[0] = str(
            Channel.construct_channel_string(name=cols[0], decorator='s'))
        self.dataframe.columns = cols

        dfw = DataFrameWrapper(dataframe=self.dataframe)
        expected_stratifiers = original_cols[0:1]
        self.assertEqual(list(dfw.stratifiers), expected_stratifiers)
    def __init__(self, filename=None, dataframe=None, stratifiers=None):
        if not ((filename is None) ^ (dataframe is None)):
            raise ValueError('filename or dataframe must be provided')
        if dataframe is not None:
            self._dataframe = dataframe.copy()
        else:
            _, file_type = os.path.splitext(filename)
            file_type = file_type.replace('.', '')
            if file_type == self.CSV:
                self._dataframe = pd.read_csv(filename)
            else:
                raise self.UnsupportedFileType(
                    'Unsupported file type for reading: %s' % file_type)
        self._dataframe.reset_index(drop=True, inplace=True)

        if not stratifiers:
            # determine stratifying channels and remove stratifying decoration from channel names
            stratifiers = []
            self.data_channels = []
            columns = list(self._dataframe.columns)
            for i in range(0, len(columns)):
                channel = Channel(columns[i])
                if channel.is_stratifier:
                    stratifiers.append(channel.name)
                    columns[i] = channel.name
                else:
                    self.data_channels.append(channel.name)
            self._dataframe.columns = columns
        else:
            # use provided stratifier list
            if not isinstance(stratifiers, list):
                stratifiers = [stratifiers]
            missing_stratifiers = [
                s for s in stratifiers if s not in self._dataframe.columns
            ]
            if len(missing_stratifiers) > 0:
                raise self.MissingRequiredData(
                    'Specified stratifier(s): %s not in dataframe.' %
                    missing_stratifiers)
        self.stratifiers = sorted(set(stratifiers))
        self.data_channels = sorted(
            set(self._dataframe.columns) - set(self.stratifiers))