def to_dense(x): feature_vector = set_str_to_idx( x, metadata['str2idx'], preprocessing_parameters['tokenizer']) set_vector = np.zeros((len(metadata['str2idx']), )) set_vector[feature_vector] = 1 return set_vector.astype(np.bool)
def to_vector(set_str): bag_vector = np.zeros((len(metadata["str2idx"]), ), dtype=np.float32) col_counter = Counter( set_str_to_idx(set_str, metadata["str2idx"], preprocessing_parameters["tokenizer"])) bag_vector[list(col_counter.keys())] = list(col_counter.values()) return bag_vector
def feature_data(column, metadata, preprocessing_parameters): bag_matrix = np.zeros((len(column), len(metadata['str2idx'])), dtype=np.float32) for i, set_str in enumerate(column): col_counter = Counter( set_str_to_idx(set_str, metadata['str2idx'], preprocessing_parameters['tokenizer'])) bag_matrix[i, list(col_counter.keys())] = list(col_counter.values()) return bag_matrix
def feature_data(column, metadata, preprocessing_parameters): feature_vector = np.array( column.map( lambda x: set_str_to_idx(x, metadata[ 'str2idx'], preprocessing_parameters['tokenizer']))) set_matrix = np.zeros((len(column), len(metadata['str2idx'])), ) for i in range(len(column)): set_matrix[i, feature_vector[i]] = 1 return set_matrix.astype(np.bool)
def feature_data(column, metadata, preprocessing_parameters): bag_matrix = np.zeros((len(column), len(metadata['str2idx'])), dtype=float) for i in range(len(column)): col_counter = Counter( set_str_to_idx(column[i], metadata['str2idx'], preprocessing_parameters['format'])) bag_matrix[i, list(col_counter.keys())] = list(col_counter.values()) return bag_matrix