def remove_punctuation(table: str, column: str): dataframe = Dataframe() download = LoadFromDatabase(table=table, column=column) upload = LoadToDatabase(table=table, column=column) remove = RemovePunctuation() dataframe = download.visit(dataframe) dataframe = remove.visit(dataframe) upload.visit(dataframe)
def condense_simple(table: str, column: str, numeric_feature: str = "median"): dataframe = Dataframe() download = LoadFromDatabase(table=table, column=column) upload = LoadToDatabase(table=table, column=column) condense_simple = CondenseSimple(column, numeric_feature) dataframe = download.visit(dataframe) dataframe = condense_simple.visit(dataframe) upload.visit(dataframe)
def maxabsscaler(table: str, column: str, copy: bool = True): dataframe = Dataframe() download = LoadFromDatabase(table=table, column=column) upload = LoadToDatabase(table=table, column=column) scaler = MaxAbsScaler(copy) dataframe = download.visit(dataframe) dataframe = scaler.visit(dataframe) upload.visit(dataframe)
def mask(table: str, column: str, condition: str): dataframe = Dataframe() download = LoadFromDatabase(table=table, column=column) upload = LoadToDatabase(table=table, column=column) mask = Mask(condition, column) dataframe = download.visit(dataframe) dataframe = mask.visit(dataframe) upload.visit(dataframe)
def uppercase(table: str, column: str): dataframe = Dataframe() download = LoadFromDatabase(table=table, column=column) upload = LoadToDatabase(table=table, column=column) uppercase = UpperCase() dataframe = download.visit(dataframe) dataframe = uppercase.visit(dataframe) upload.visit(dataframe)
def ngram(table: str, column: str, value: int): dataframe = Dataframe() download = LoadFromDatabase(table=table, column=column) upload = LoadToDatabase(table=table, column=column) ngram = NGram(n_gram_value=value) dataframe = download.visit(dataframe) dataframe = ngram.visit(dataframe) upload.visit(dataframe)
def fillempty(table: str, column: str, feature_type: str, value: float): dataframe = Dataframe() download = LoadFromDatabase(table=table, column=column) upload = LoadToDatabase(table=table, column=column) fillempty = FillEmptyCells(column, feature_type, value) dataframe = download.visit(dataframe) dataframe = fillempty.visit(dataframe) upload.visit(dataframe)
def remove_character(table: str, column: str, char: str): dataframe = Dataframe() download = LoadFromDatabase(table=table, column=column) upload = LoadToDatabase(table=table, column=column) remove = RemoveChar(char=char) dataframe = download.visit(dataframe) dataframe = remove.visit(dataframe) upload.visit(dataframe)
def text_binary(table: str, column: str): dataframe = Dataframe() download = LoadFromDatabase(table=table, column=column) upload = LoadToDatabase(table=table, column=column) binary = TextToBinary(column=column) dataframe = download.visit(dataframe) dataframe = binary.visit(dataframe) upload.visit(dataframe)
def stem(table: str, column: str): dataframe = Dataframe() download = LoadFromDatabase(table=table, column=column) upload = LoadToDatabase(table=table, column=column) stem = Stemmer() dataframe = download.visit(dataframe) dataframe = stem.visit(dataframe) upload.visit(dataframe)
def mean_word(table: str, column: str): dataframe = Dataframe() download = LoadFromDatabase(table=table, column=column) upload = LoadToDatabase(table=table, column=column) word = MeanWord(column=column) dataframe = download.visit(dataframe) dataframe = word.visit(dataframe) upload.visit(dataframe)
def start_number(table: str, column: str): dataframe = Dataframe() download = LoadFromDatabase(table=table, column=column) upload = LoadToDatabase(table=table, column=column) number = StartWithNumber(column=column) dataframe = download.visit(dataframe) dataframe = number.visit(dataframe) upload.visit(dataframe)
def join(table: str, column: str, char: str): dataframe = Dataframe() download = LoadFromDatabase(table=table, column=column) upload = LoadToDatabase(table=table, column=column) join = JoinOperation(value=char) dataframe = download.visit(dataframe) dataframe = join.visit(dataframe) upload.visit(dataframe)
def character_sum(table: str, column: str): dataframe = Dataframe() download = LoadFromDatabase(table=table, column=column) upload = LoadToDatabase(table=table, column=column) sum = CharacterSum(column=column) dataframe = download.visit(dataframe) dataframe = sum.visit(dataframe) upload.visit(dataframe)
def normalize(table: str, column: str): dataframe = Dataframe() download = LoadFromDatabase(table=table, column=column) upload = LoadToDatabase(table=table, column=column) normalize = Normalizer() dataframe = download.visit(dataframe) dataframe = normalize.visit(dataframe) upload.visit(dataframe)
def labelencode(table: str, column: str, mode: str = "shuffle"): dataframe = Dataframe() download = LoadFromDatabase(table=table, column=column) upload = LoadToDatabase(table=table, column=column) encode = LabelEncoder(mode, column) dataframe = download.visit(dataframe) dataframe = encode.visit(dataframe) upload.visit(dataframe)
def lemmatize(table: str, column: str): dataframe = Dataframe() download = LoadFromDatabase(table=table, column=column) upload = LoadToDatabase(table=table, column=column) lemmatize = Lemmatizer() dataframe = download.visit(dataframe) dataframe = lemmatize.visit(dataframe) upload.visit(dataframe)
def interpolate(table: str, column: str, method: str = "linear"): dataframe = Dataframe() download = LoadFromDatabase(table=table, column=column) upload = LoadToDatabase(table=table, column=column) interpolate = Interpolate(method) dataframe = download.visit(dataframe) dataframe = interpolate.visit(dataframe) upload.visit(dataframe)
def sort(table: str, column: str, mode: str = "shuffle"): dataframe = Dataframe() download = LoadFromDatabase(table=table, column=column) upload = LoadToDatabase(table=table, column=column) sort = Sort(mode, column) dataframe = download.visit(dataframe) dataframe = sort.visit(dataframe) upload.visit(dataframe)
def minmaxscaler(table: str, column: str, feature_range: bytearray = (0, 1), copy: bool = True): dataframe = Dataframe() download = LoadFromDatabase(table=table, column=column) upload = LoadToDatabase(table=table, column=column) scaler = MinMaxScaler(feature_range, copy) dataframe = download.visit(dataframe) dataframe = scaler.visit(dataframe) upload.visit(dataframe)
def split(table: str, column: str, id_split: dict, mode: str = "sequential"): dataframe = Dataframe() download = LoadFromDatabase(table=table, column=column) upload = LoadToDatabase(table=table, column=column) split = Split(id_split, mode) dataframe = download.visit(dataframe) # Return Dict of Feature sets # {name, data} dataframes = split.visit(dataframe) upload.visit(dataframes)
def standardscale(table: str, column: str, copy: bool = True, with_mean: bool = True, with_std=True): dataframe = Dataframe() download = LoadFromDatabase(table=table, column=column) upload = LoadToDatabase(table=table, column=column) scaler = StandardScaler(copy, with_mean, with_std) dataframe = download.visit(dataframe) dataframe = scaler.visit(dataframe) upload.visit(dataframe)
def labelbinarize(table: str, column: str, neg_label: int = 0, pos_label: int = 1, sparse_output: bool = False): dataframe = Dataframe() download = LoadFromDatabase(table=table, column=column) upload = LoadToDatabase(table=table, column=column) binarize = LabelBinarizer(neg_label, pos_label, sparse_output) dataframe = download.visit(dataframe) dataframe = binarize.visit(dataframe) upload.visit(dataframe)
def onehot(table: str, column: str, categories: str = "auto", sparse: bool = True, n_values: str = "auto", categorical_features: str = "all"): dataframe = Dataframe() download = LoadFromDatabase(table=table, column=column) upload = LoadToDatabase(table=table, column=column) onehot = OneHotEncoder(categories, sparse, n_values, categorical_features) dataframe = download.visit(dataframe) dataframe = onehot.visit(dataframe) upload.visit(dataframe)
def condense(table: str, column: str, sequential: bool = False, numeric_feature: str = "median", save_index: bool = True, string_feature: str = "join"): dataframe = Dataframe() download = LoadFromDatabase(table=table, column=column) upload = LoadToDatabase(table=table, column=column) condense = Condense(column, sequential, numeric_feature, save_index, string_feature) dataframe = download.visit(dataframe) dataframe = condense.visit(dataframe) upload.visit(dataframe)
def nl_processor(table: str, column: str, extraction_target: str = "word", extraction_type: str = "bow", measure: str = None, n_gram: bytearray = None): dataframe = Dataframe() download = LoadFromDatabase(table=table, column=column) upload = LoadToDatabase(table=table, column=column) processor = NaturalLanguageProcessor(column, extraction_target, extraction_type, measure, n_gram) dataframe = download.visit(dataframe) dataframe = processor.visit(dataframe) upload.visit(dataframe)