class Parameters: inputCol = Parameter(str, 'tokens', 'Input column', input_column=True, input_dtype=Parameter.T_ARRAY_STRING) outputCol = Parameter(str, 'tf', 'Output column', output_column=True) numFeatures = Parameter(int, 1 << 18, 'Number of features') binary = Parameter(bool, False, 'Binary')
class Parameters: inputCol = Parameter(str, 'text', 'Input column', input_column=True, input_dtype=Parameter.T_STRING) outputCol = Parameter(str, 'tokens', 'Output column', output_column=True)
class Parameters: inputCols = Parameter(str, 'vectors', 'Input columns', input_column=True, input_dtype=Parameter.T_VECTOR) outputCol = Parameter(str, 'assembled_vector', 'Output1 column', output_column=True)
class Parameters: inputCol = Parameter(str, 'category', 'Input column', input_column=True, input_dtype=Parameter.T_STRING) outputCol = Parameter(str, 'category_index', 'Output column', output_column=True)
class Parameters: n = Parameter(int, 2, 'N') inputCol = Parameter(str, 'text', 'Input column', input_column=True, input_dtype=Parameter.T_ARRAY_STRING) outputCol = Parameter(str, 'tokens', 'Output column', output_column=True)
class Parameters: inputCol = Parameter(str, 'tf', 'Input column', input_column=True, input_dtype=Parameter.T_VECTOR) outputCol = Parameter(str, 'tfidf', 'Output column', output_column=True) minDocFreq = Parameter(int, 0, 'Minimum document frequency')
class Parameters: inputCol = Parameter(str, 'text', 'Input column', input_column=True, input_dtype=Parameter.T_ARRAY_STRING) outputCol = Parameter(str, 'tokens', 'Output column', output_column=True) # 'stopWords': Parameter(list, None, 'Stopwords list') caseSensitive = Parameter(bool, False, 'Case sensitive')
class Parameters: featuresCol = Parameter(str, 'features', 'Feature column', input_column=True) labelCol = Parameter(str, 'label', 'Label column', input_column=True) # predictionCol = Parameter(str, 'prediction', 'Prediction column') # probabilityCol = Parameter(str, 'probability', 'Probability column') # rawPredictionCol = Parameter(str, 'rawPrediction', 'Raw probability column') weightCol = Parameter(str, 'weight', 'Weight Column') smoothing = Parameter(float, 1.0, 'Smoothing')
class Parameters: cutoff = Parameter(int, 50, 'Top scores to display:') inputCol = Parameter(str, 'tokens', 'Input column', input_column=True, input_dtype=Parameter.T_STRING) sortingType = Parameter( str, 'Term_Freq', 'Choose a sorting type', items=['Term Frequency', 'Chi', 't-score', 'Non perfect Chi'])
class Parameters: featuresCol = Parameter(str, 'features', 'Feature column', input_column=True) labelCol = Parameter(str, 'label', 'Label column', input_column=True) # weightCol = Parameter(str, 'weight', 'Weight Column') maxIter = Parameter(int, 100, 'Maximal iteration') regParam = Parameter(float, 0.0, 'Regression Parameter') elasticNetParam = Parameter(float, 0.0, 'Elastic Net Parameter') tol = Parameter(float, 0.000001, 'tol') fitIntercept = Parameter(bool, True, 'Fit intercept') standardization = Parameter(bool, False, 'Standardization') solver = Parameter(str, 'auto', 'Solver') aggregationDepth = Parameter(int, 2, 'Aggregation depth')
class Parameters: inputCol = Parameter(str, 'tokens', 'Input column', input_column=True, input_dtype=Parameter.T_ARRAY_STRING) outputCol = Parameter(str, 'vector', 'Output1 column', output_column=True) vectorSize = Parameter(int, 100, 'Vector size') minCount = Parameter(int, 5, 'Minimum count') numPartitions = Parameter(int, 1, 'Number of partitions ') stepSize = Parameter(float, 0.025, 'Step size') maxIter = Parameter(int, 1, 'Maximum Iteration') seed = Parameter(int, None, 'Seed') windowSize = Parameter(int, 5, 'Window size') maxSentenceLength = Parameter(int, 1000, 'Maximum sentence length')
class Parameters: inputCol = Parameter(str, 'tokens', 'Input column', input_column=True, input_dtype=Parameter.T_ARRAY_STRING) outputCol = Parameter(str, 'vector', 'Output1 column', output_column=True) minTF = Parameter(float, 1.0, 'Minimum term frequency'), minDF = Parameter(float, 1.0, 'Minimum document frequency') vocabSize = Parameter(int, 1 << 18, 'Vocabulary size') binary = Parameter(bool, False, 'Binary')
class Parameters: inputCol = Parameter(str, 'text', 'Input column', input_column=True, input_dtype=Parameter.T_STRING) outputCol = Parameter(str, 'tokens', 'Output column', output_column=True) minTokenLength = Parameter(int, 1, 'Minimum token length') gaps = Parameter(bool, True, 'Gaps?') pattern = Parameter(str, '\\s+', 'Pattern') toLowercase = Parameter(bool, True, 'Convert to lower case?')
class Parameters: inputCols = Parameter(list, [], 'Input columns', input_column=True, input_multiple=True) outputCol = Parameter(str, 'features', 'Output column', output_column=True)
class Parameters: featuresCol = Parameter(str, 'features', 'Features column', input_column=True) labelCol = Parameter(str, 'label', 'Label column', input_column=True) # predictionCol = Parameter(str, 'prediction', 'Prediction column') # probabilityCol = Parameter(str, "probability", 'Probability column') # rawPredictionCol = Parameter(str, 'rawPrediction', 'Raw probability column') weightCol = Parameter(str, 'weight', 'Weight column') maxIter = Parameter(int, 100, 'Maximum iteration') regParam = Parameter(float, 0.0, 'Regression Parameter') elasticNetParam = Parameter(float, 0.0, 'Elastic Net Parameter') tol = Parameter(float, 0.000001, 'tol') fitIntercept = Parameter(bool, True, 'Fit intercept') threshold = Parameter(float, 0.5, 'Threshold') # thresholds = Parameter(list, None, 'Thresholds') # list[float] standardization = Parameter(bool, True, 'Standardization') aggregationDepth = Parameter(int, 2, 'Aggregation depth') family = Parameter(str, 'auto', 'Family')
class Parameters: inputCol = Parameter(str, 'input', 'Input column', input_column=True)
class Parameters: featuresCol = Parameter(str, 'features', 'Features column', input_column=True) labelCol = Parameter(str, 'label', 'Label column', input_column=True) predictionCol = Parameter(str, 'prediction', 'Prediction column', input_column=True) probabilityCol = Parameter(str, "probability", 'Probability column', input_column=True) rawPredictionCol = Parameter(str, 'rawPrediction', 'Raw prediction column', input_column=True) maxDepth = Parameter(int, 5, 'Maximal depth') maxBins = Parameter(int, 32, 'Maximal bins') minInstancesPerNode = Parameter(int, 1, 'Minimum instance per node') minInfoGain = Parameter(float, 0.0, 'Minimum Information gain') maxMemoryInMB = Parameter(int, 256, 'Maximal Memory (MB)') cacheNodeIds = Parameter(bool, False, 'Cache node ids') checkpointInterval = Parameter(int, 10, 'Checkpoint interval') impurity = Parameter(str, 'gini', 'Impurity') seed = Parameter(int, None, 'Seed')
class Parameters: select_ratio = Parameter(int, 1, 'Selection ratio for unlinked reports')
class Parameters: groupCol = Parameter(str, 'id', 'Group column', input_column=True) outputCol = Parameter(str, 'list', 'List column after grouping', output_column=True)
class Parameters: id = Parameter(str, '_id', 'ID column to join on', input_column=True)
class Parameters: train_weight = Parameter(float, 0.9, 'Train weight of split ratio') test_weight = Parameter(float, 0.1, 'Test weight of split ratio')
class Parameters: dropLast = Parameter(bool, True, 'Drop the last category') inputCol = Parameter(str, 'tokens', 'Input column (%s)', input_column=True, input_dtype=Parameter.T_ARRAY_STRING) outputCol = Parameter(str, 'features', 'Output column', output_column=True)