Beispiel #1
0
        def to_dense(x):
            feature_vector = set_str_to_idx(
                x, metadata['str2idx'], preprocessing_parameters['tokenizer'])

            set_vector = np.zeros((len(metadata['str2idx']), ))
            set_vector[feature_vector] = 1
            return set_vector.astype(np.bool)
Beispiel #2
0
        def to_vector(set_str):
            bag_vector = np.zeros((len(metadata["str2idx"]), ),
                                  dtype=np.float32)
            col_counter = Counter(
                set_str_to_idx(set_str, metadata["str2idx"],
                               preprocessing_parameters["tokenizer"]))

            bag_vector[list(col_counter.keys())] = list(col_counter.values())
            return bag_vector
Beispiel #3
0
    def feature_data(column, metadata, preprocessing_parameters):
        bag_matrix = np.zeros((len(column), len(metadata['str2idx'])),
                              dtype=np.float32)

        for i, set_str in enumerate(column):
            col_counter = Counter(
                set_str_to_idx(set_str, metadata['str2idx'],
                               preprocessing_parameters['tokenizer']))
            bag_matrix[i,
                       list(col_counter.keys())] = list(col_counter.values())

        return bag_matrix
Beispiel #4
0
    def feature_data(column, metadata, preprocessing_parameters):
        feature_vector = np.array(
            column.map(
                lambda x: set_str_to_idx(x, metadata[
                    'str2idx'], preprocessing_parameters['tokenizer'])))

        set_matrix = np.zeros((len(column), len(metadata['str2idx'])), )

        for i in range(len(column)):
            set_matrix[i, feature_vector[i]] = 1

        return set_matrix.astype(np.bool)
Beispiel #5
0
    def feature_data(column, metadata, preprocessing_parameters):
        bag_matrix = np.zeros((len(column), len(metadata['str2idx'])),
                              dtype=float)

        for i in range(len(column)):
            col_counter = Counter(
                set_str_to_idx(column[i], metadata['str2idx'],
                               preprocessing_parameters['format']))
            bag_matrix[i,
                       list(col_counter.keys())] = list(col_counter.values())

        return bag_matrix