예제 #1
0
def remove_punctuation(table: str, column: str):
    dataframe = Dataframe()
    download = LoadFromDatabase(table=table, column=column)
    upload = LoadToDatabase(table=table, column=column)
    remove = RemovePunctuation()

    dataframe = download.visit(dataframe)
    dataframe = remove.visit(dataframe)
    upload.visit(dataframe)
예제 #2
0
def condense_simple(table: str, column: str, numeric_feature: str = "median"):
    dataframe = Dataframe()
    download = LoadFromDatabase(table=table, column=column)
    upload = LoadToDatabase(table=table, column=column)
    condense_simple = CondenseSimple(column, numeric_feature)

    dataframe = download.visit(dataframe)
    dataframe = condense_simple.visit(dataframe)
    upload.visit(dataframe)
예제 #3
0
def maxabsscaler(table: str, column: str, copy: bool = True):
    dataframe = Dataframe()
    download = LoadFromDatabase(table=table, column=column)
    upload = LoadToDatabase(table=table, column=column)
    scaler = MaxAbsScaler(copy)

    dataframe = download.visit(dataframe)
    dataframe = scaler.visit(dataframe)
    upload.visit(dataframe)
예제 #4
0
def mask(table: str, column: str, condition: str):
    dataframe = Dataframe()
    download = LoadFromDatabase(table=table, column=column)
    upload = LoadToDatabase(table=table, column=column)
    mask = Mask(condition, column)

    dataframe = download.visit(dataframe)
    dataframe = mask.visit(dataframe)
    upload.visit(dataframe)
예제 #5
0
def uppercase(table: str, column: str):
    dataframe = Dataframe()
    download = LoadFromDatabase(table=table, column=column)
    upload = LoadToDatabase(table=table, column=column)
    uppercase = UpperCase()

    dataframe = download.visit(dataframe)
    dataframe = uppercase.visit(dataframe)
    upload.visit(dataframe)
예제 #6
0
def ngram(table: str, column: str, value: int):
    dataframe = Dataframe()
    download = LoadFromDatabase(table=table, column=column)
    upload = LoadToDatabase(table=table, column=column)
    ngram = NGram(n_gram_value=value)

    dataframe = download.visit(dataframe)
    dataframe = ngram.visit(dataframe)
    upload.visit(dataframe)
예제 #7
0
def fillempty(table: str, column: str, feature_type: str, value: float):
    dataframe = Dataframe()
    download = LoadFromDatabase(table=table, column=column)
    upload = LoadToDatabase(table=table, column=column)
    fillempty = FillEmptyCells(column, feature_type, value)

    dataframe = download.visit(dataframe)
    dataframe = fillempty.visit(dataframe)
    upload.visit(dataframe)
예제 #8
0
def remove_character(table: str, column: str, char: str):
    dataframe = Dataframe()
    download = LoadFromDatabase(table=table, column=column)
    upload = LoadToDatabase(table=table, column=column)
    remove = RemoveChar(char=char)

    dataframe = download.visit(dataframe)
    dataframe = remove.visit(dataframe)
    upload.visit(dataframe)
예제 #9
0
def text_binary(table: str, column: str):
    dataframe = Dataframe()
    download = LoadFromDatabase(table=table, column=column)
    upload = LoadToDatabase(table=table, column=column)
    binary = TextToBinary(column=column)

    dataframe = download.visit(dataframe)
    dataframe = binary.visit(dataframe)
    upload.visit(dataframe)
예제 #10
0
def stem(table: str, column: str):
    dataframe = Dataframe()
    download = LoadFromDatabase(table=table, column=column)
    upload = LoadToDatabase(table=table, column=column)
    stem = Stemmer()

    dataframe = download.visit(dataframe)
    dataframe = stem.visit(dataframe)
    upload.visit(dataframe)
예제 #11
0
def mean_word(table: str, column: str):
    dataframe = Dataframe()
    download = LoadFromDatabase(table=table, column=column)
    upload = LoadToDatabase(table=table, column=column)
    word = MeanWord(column=column)

    dataframe = download.visit(dataframe)
    dataframe = word.visit(dataframe)
    upload.visit(dataframe)
예제 #12
0
def start_number(table: str, column: str):
    dataframe = Dataframe()
    download = LoadFromDatabase(table=table, column=column)
    upload = LoadToDatabase(table=table, column=column)
    number = StartWithNumber(column=column)

    dataframe = download.visit(dataframe)
    dataframe = number.visit(dataframe)
    upload.visit(dataframe)
예제 #13
0
def join(table: str, column: str, char: str):
    dataframe = Dataframe()
    download = LoadFromDatabase(table=table, column=column)
    upload = LoadToDatabase(table=table, column=column)
    join = JoinOperation(value=char)

    dataframe = download.visit(dataframe)
    dataframe = join.visit(dataframe)
    upload.visit(dataframe)
예제 #14
0
def character_sum(table: str, column: str):
    dataframe = Dataframe()
    download = LoadFromDatabase(table=table, column=column)
    upload = LoadToDatabase(table=table, column=column)
    sum = CharacterSum(column=column)

    dataframe = download.visit(dataframe)
    dataframe = sum.visit(dataframe)
    upload.visit(dataframe)
예제 #15
0
def normalize(table: str, column: str):
    dataframe = Dataframe()
    download = LoadFromDatabase(table=table, column=column)
    upload = LoadToDatabase(table=table, column=column)
    normalize = Normalizer()

    dataframe = download.visit(dataframe)
    dataframe = normalize.visit(dataframe)
    upload.visit(dataframe)
예제 #16
0
def labelencode(table: str, column: str, mode: str = "shuffle"):
    dataframe = Dataframe()
    download = LoadFromDatabase(table=table, column=column)
    upload = LoadToDatabase(table=table, column=column)
    encode = LabelEncoder(mode, column)

    dataframe = download.visit(dataframe)
    dataframe = encode.visit(dataframe)
    upload.visit(dataframe)
예제 #17
0
def lemmatize(table: str, column: str):
    dataframe = Dataframe()
    download = LoadFromDatabase(table=table, column=column)
    upload = LoadToDatabase(table=table, column=column)
    lemmatize = Lemmatizer()

    dataframe = download.visit(dataframe)
    dataframe = lemmatize.visit(dataframe)
    upload.visit(dataframe)
예제 #18
0
def interpolate(table: str, column: str, method: str = "linear"):
    dataframe = Dataframe()
    download = LoadFromDatabase(table=table, column=column)
    upload = LoadToDatabase(table=table, column=column)
    interpolate = Interpolate(method)

    dataframe = download.visit(dataframe)
    dataframe = interpolate.visit(dataframe)
    upload.visit(dataframe)
예제 #19
0
def sort(table: str, column: str, mode: str = "shuffle"):
    dataframe = Dataframe()
    download = LoadFromDatabase(table=table, column=column)
    upload = LoadToDatabase(table=table, column=column)
    sort = Sort(mode, column)

    dataframe = download.visit(dataframe)
    dataframe = sort.visit(dataframe)
    upload.visit(dataframe)
예제 #20
0
def minmaxscaler(table: str,
                 column: str,
                 feature_range: bytearray = (0, 1),
                 copy: bool = True):
    dataframe = Dataframe()
    download = LoadFromDatabase(table=table, column=column)
    upload = LoadToDatabase(table=table, column=column)
    scaler = MinMaxScaler(feature_range, copy)

    dataframe = download.visit(dataframe)
    dataframe = scaler.visit(dataframe)
    upload.visit(dataframe)
예제 #21
0
def split(table: str, column: str, id_split: dict, mode: str = "sequential"):
    dataframe = Dataframe()
    download = LoadFromDatabase(table=table, column=column)
    upload = LoadToDatabase(table=table, column=column)
    split = Split(id_split, mode)

    dataframe = download.visit(dataframe)

    # Return Dict of Feature sets
    # {name, data}
    dataframes = split.visit(dataframe)

    upload.visit(dataframes)
예제 #22
0
def standardscale(table: str,
                  column: str,
                  copy: bool = True,
                  with_mean: bool = True,
                  with_std=True):
    dataframe = Dataframe()
    download = LoadFromDatabase(table=table, column=column)
    upload = LoadToDatabase(table=table, column=column)
    scaler = StandardScaler(copy, with_mean, with_std)

    dataframe = download.visit(dataframe)
    dataframe = scaler.visit(dataframe)
    upload.visit(dataframe)
예제 #23
0
def labelbinarize(table: str,
                  column: str,
                  neg_label: int = 0,
                  pos_label: int = 1,
                  sparse_output: bool = False):
    dataframe = Dataframe()
    download = LoadFromDatabase(table=table, column=column)
    upload = LoadToDatabase(table=table, column=column)
    binarize = LabelBinarizer(neg_label, pos_label, sparse_output)

    dataframe = download.visit(dataframe)
    dataframe = binarize.visit(dataframe)
    upload.visit(dataframe)
예제 #24
0
def onehot(table: str,
           column: str,
           categories: str = "auto",
           sparse: bool = True,
           n_values: str = "auto",
           categorical_features: str = "all"):
    dataframe = Dataframe()
    download = LoadFromDatabase(table=table, column=column)
    upload = LoadToDatabase(table=table, column=column)
    onehot = OneHotEncoder(categories, sparse, n_values, categorical_features)

    dataframe = download.visit(dataframe)
    dataframe = onehot.visit(dataframe)
    upload.visit(dataframe)
예제 #25
0
def condense(table: str,
             column: str,
             sequential: bool = False,
             numeric_feature: str = "median",
             save_index: bool = True,
             string_feature: str = "join"):
    dataframe = Dataframe()
    download = LoadFromDatabase(table=table, column=column)
    upload = LoadToDatabase(table=table, column=column)
    condense = Condense(column, sequential, numeric_feature, save_index,
                        string_feature)

    dataframe = download.visit(dataframe)
    dataframe = condense.visit(dataframe)
    upload.visit(dataframe)
예제 #26
0
def nl_processor(table: str,
                 column: str,
                 extraction_target: str = "word",
                 extraction_type: str = "bow",
                 measure: str = None,
                 n_gram: bytearray = None):
    dataframe = Dataframe()
    download = LoadFromDatabase(table=table, column=column)
    upload = LoadToDatabase(table=table, column=column)
    processor = NaturalLanguageProcessor(column, extraction_target,
                                         extraction_type, measure, n_gram)

    dataframe = download.visit(dataframe)
    dataframe = processor.visit(dataframe)
    upload.visit(dataframe)