Exemple #1
0
 def encoder(self, data):
     if type(data) == list:
         self._encoder = OneToOne(enumerate(data)).inv
     elif type(data) in [dict, OrderedDict]:
         self._encoder = OneToOne(data)
     else:
         assert False
Exemple #2
0
    def __init__(self, labels=[], reserved_mappings={}, filepath=None):
        '''
        Arguments:
            labels=[], list(strings):
                A list of potentially non-unique strings representing categorical labels
            reserved_mappings={}, dict({str:int}):
                a dictionary mapping of text to integer numbers
            filepath

        '''
        self.num_classes = 0
        self._encodings = OneToOne()
        if len(reserved_mappings) > 0:
            reserved_mappings = list(reserved_mappings)
            self.merge_labels(reserved_mappings)
        if len(labels) > 0:
            self.merge_labels(labels)
        if filepath is not None:
            #             if len(self)>0:
            self.merge_labels(self.load_labels(filepath))
Exemple #3
0
class ParameterKeyEscaper:
    """
    Makes the fields name ready for use with MongoDB and Mongoengine
    . and $ are replaced with their codes
    __ and leading _ are escaped
    Since % is used as an escape character the % is also escaped
    """

    _mapping = OneToOne({".": "%2E", "$": "%24", "__": "%_%_"})

    @classmethod
    def escape(cls, value):
        """ Quote a parameter key """
        if value is None:
            raise errors.bad_request.ValidationError("Key cannot be empty")

        value = value.strip().replace("%", "%%")

        for c, r in cls._mapping.items():
            value = value.replace(c, r)

        if value.startswith("_"):
            value = "%_" + value[1:]

        return value

    @classmethod
    def _unescape(cls, value):
        for c, r in cls._mapping.inv.items():
            value = value.replace(c, r)
        return value

    @classmethod
    def unescape(cls, value):
        """ Unquote a quoted parameter key """
        value = "%".join(map(cls._unescape, value.split("%%")))

        if value.startswith("%_"):
            value = "_" + value[2:]

        return value
Exemple #4
0
class ParameterKeyEscaper:
    _mapping = OneToOne({".": "%2E", "$": "%24"})

    @classmethod
    def escape(cls, value):
        """ Quote a parameter key """
        value = value.strip().replace("%", "%%")
        for c, r in cls._mapping.items():
            value = value.replace(c, r)
        return value

    @classmethod
    def _unescape(cls, value):
        for c, r in cls._mapping.inv.items():
            value = value.replace(c, r)
        return value

    @classmethod
    def unescape(cls, value):
        """ Unquote a quoted parameter key """
        return "%".join(map(cls._unescape, value.split("%%")))
Exemple #5
0
def test_one_to_one():
    e = OneToOne({1:2})
    def ck(val, inv):
        assert (e, e.inv) == (val, inv)
    ck({1:2}, {2:1})
    e[2] = 3
    ck({1:2, 2:3}, {3:2, 2:1})
    e.clear()
    ck({}, {})
    e[1] = 1
    ck({1:1}, {1:1})
    e[1] = 2
    ck({1:2}, {2:1})
    e[3] = 2
    ck({3:2}, {2:3})
    del e[3]
    ck({}, {})
    e[1] = 2
    e.inv[2] = 3
    ck({3:2}, {2:3})
    del e.inv[2]
    ck({}, {})
    assert OneToOne({1:2, 3:4}).copy().inv == {2:1, 4:3}
    e[1] = 2
    e.pop(1)
    ck({}, {})
    e[1] = 2
    e.inv.pop(2)
    ck({}, {})
    e[1] = 2
    e.popitem()
    ck({}, {})
    e.setdefault(1)
    ck({1: None}, {None: 1})
    e.inv.setdefault(2)
    ck({1: None, None: 2}, {None: 1, 2: None})
    e.clear()
    e.update({1:2}, cat="dog")
    ck({1:2, "cat":"dog"}, {2:1, "dog":"cat"})

    # try various overlapping values
    oto = OneToOne({'a': 0, 'b': 0})
    assert len(oto) == len(oto.inv) == 1

    oto['c'] = 0
    assert len(oto) == len(oto.inv) == 1
    assert oto.inv[0] == 'c'

    oto.update({'z': 0, 'y': 0})
    assert len(oto) == len(oto.inv) == 1

    # test out unique classmethod
    with pytest.raises(ValueError):
        OneToOne.unique({'a': 0, 'b': 0})

    return
Exemple #6
0
def test_one_to_one():
    e = OneToOne({1:2})
    def ck(val, inv):
        assert (e, e.inv) == (val, inv)
    ck({1:2}, {2:1})
    e[2] = 3
    ck({1:2, 2:3}, {3:2, 2:1})
    e.clear()
    ck({}, {})
    e[1] = 1
    ck({1:1}, {1:1})
    e[1] = 2
    ck({1:2}, {2:1})
    e[3] = 2
    ck({3:2}, {2:3})
    del e[3]
    ck({}, {})
    e[1] = 2
    e.inv[2] = 3
    ck({3:2}, {2:3})
    del e.inv[2]
    ck({}, {})
    assert OneToOne({1:2, 3:4}).copy().inv == {2:1, 4:3}
    e[1] = 2
    e.pop(1)
    ck({}, {})
    e[1] = 2
    e.inv.pop(2)
    ck({}, {})
    e[1] = 2
    e.popitem()
    ck({}, {})
    e.setdefault(1)
    ck({1: None}, {None: 1})
    e.inv.setdefault(2)
    ck({1: None, None: 2}, {None: 1, 2: None})
    e.clear()
    e.update({1:2}, cat="dog")
    ck({1:2, "cat":"dog"}, {2:1, "dog":"cat"})
Exemple #7
0
 def __init__(self, labels):
     self.classes = tuple(np.unique(sorted(labels)))
     self._encoder = OneToOne(enumerate(self.classes)).inv
     self.fname = 'label_encoder.json'
Exemple #8
0
class LabelEncoder:
    def __init__(self, labels=[], reserved_mappings={}, filepath=None):
        '''
        Arguments:
            labels=[], list(strings):
                A list of potentially non-unique strings representing categorical labels
            reserved_mappings={}, dict({str:int}):
                a dictionary mapping of text to integer numbers
            filepath

        '''
        self.num_classes = 0
        self._encodings = OneToOne()
        if len(reserved_mappings) > 0:
            reserved_mappings = list(reserved_mappings)
            self.merge_labels(reserved_mappings)
        if len(labels) > 0:
            self.merge_labels(labels)
        if filepath is not None:
            #             if len(self)>0:
            self.merge_labels(self.load_labels(filepath))

    def filter(self, data_df, text_label_col='family', int_label_col=None):
        '''
        Filter a dataframe to include only rows corresponding to labels in the encoder. Useful for preprocessing a target domain dataset for a model trained on source domain labels.
        '''
        int_whitelist = list(self.get_encodings().inv)
        text_whitelist = list(self.get_encodings())
        if int_label_col:
            data = data_df[data_df[int_label_col].isin(int_whitelist)]
        else:
            data = data_df[data_df[text_label_col].isin(text_whitelist)]
        return data

    def transform(self, labels):
        return [self._encodings[l] for l in list(labels)]

    def inv_transform(self, encoded_labels):
        return [self._encodings.inv[l] for l in list(encoded_labels)]

    def merge_labels(self, labels=[]):
        '''
        Labels can be list, or a dict where the keys are str
        Iterates through labels or unique values that dont already exist in encoder.
        '''
        labels = list(labels)
        for l in np.unique(labels):
            if l not in self._encodings.keys():
                self._encodings.update({l: self.num_classes})
                self.num_classes += 1

    def load_labels(self, filepath):
        return load_label_encodings_from_file(filepath)

    def save_labels(self, filepath):
        save_label_encodings_to_file(self.get_encodings(), filepath)

    def get_encodings(self):
        return copy.deepcopy(self._encodings)

    def __len__(self):
        return len(self.get_encodings())

    def __repr__(self):
        return json.dumps(self.get_encodings(), indent=2)
Exemple #9
0
def test_one_to_one():
    e = OneToOne({1:2})
    def ck(val, inv):
        assert (e, e.inv) == (val, inv)
    ck({1:2}, {2:1})
    e[2] = 3
    ck({1:2, 2:3}, {3:2, 2:1})
    e.clear()
    ck({}, {})
    e[1] = 1
    ck({1:1}, {1:1})
    e[1] = 2
    ck({1:2}, {2:1})
    e[3] = 2
    ck({3:2}, {2:3})
    del e[3]
    ck({}, {})
    e[1] = 2
    e.inv[2] = 3
    ck({3:2}, {2:3})
    del e.inv[2]
    ck({}, {})
    assert OneToOne({1:2, 3:4}).copy().inv == {2:1, 4:3}
    e[1] = 2
    e.pop(1)
    ck({}, {})
    e[1] = 2
    e.inv.pop(2)
    ck({}, {})
    e[1] = 2
    e.popitem()
    ck({}, {})
    e.setdefault(1)
    ck({1: None}, {None: 1})
    e.inv.setdefault(2)
    ck({1: None, None: 2}, {None: 1, 2: None})
    e.clear()
    e.update({1:2}, cat="dog")
    ck({1:2, "cat":"dog"}, {2:1, "dog":"cat"})