Beispiel #1
0
 class Parameters:
     inputCol = Parameter(str,
                          'tokens',
                          'Input column',
                          input_column=True,
                          input_dtype=Parameter.T_ARRAY_STRING)
     outputCol = Parameter(str, 'tf', 'Output column', output_column=True)
     numFeatures = Parameter(int, 1 << 18, 'Number of features')
     binary = Parameter(bool, False, 'Binary')
Beispiel #2
0
 class Parameters:
     inputCol = Parameter(str,
                          'text',
                          'Input column',
                          input_column=True,
                          input_dtype=Parameter.T_STRING)
     outputCol = Parameter(str,
                           'tokens',
                           'Output column',
                           output_column=True)
Beispiel #3
0
 class Parameters:
     inputCols = Parameter(str,
                           'vectors',
                           'Input columns',
                           input_column=True,
                           input_dtype=Parameter.T_VECTOR)
     outputCol = Parameter(str,
                           'assembled_vector',
                           'Output1 column',
                           output_column=True)
Beispiel #4
0
 class Parameters:
     inputCol = Parameter(str,
                          'category',
                          'Input column',
                          input_column=True,
                          input_dtype=Parameter.T_STRING)
     outputCol = Parameter(str,
                           'category_index',
                           'Output column',
                           output_column=True)
 class Parameters:
     n = Parameter(int, 2, 'N')
     inputCol = Parameter(str,
                          'text',
                          'Input column',
                          input_column=True,
                          input_dtype=Parameter.T_ARRAY_STRING)
     outputCol = Parameter(str,
                           'tokens',
                           'Output column',
                           output_column=True)
Beispiel #6
0
 class Parameters:
     inputCol = Parameter(str,
                          'tf',
                          'Input column',
                          input_column=True,
                          input_dtype=Parameter.T_VECTOR)
     outputCol = Parameter(str,
                           'tfidf',
                           'Output column',
                           output_column=True)
     minDocFreq = Parameter(int, 0, 'Minimum document frequency')
 class Parameters:
     inputCol = Parameter(str,
                          'text',
                          'Input column',
                          input_column=True,
                          input_dtype=Parameter.T_ARRAY_STRING)
     outputCol = Parameter(str,
                           'tokens',
                           'Output column',
                           output_column=True)
     # 'stopWords': Parameter(list, None, 'Stopwords list')
     caseSensitive = Parameter(bool, False, 'Case sensitive')
Beispiel #8
0
    class Parameters:
        featuresCol = Parameter(str,
                                'features',
                                'Feature column',
                                input_column=True)
        labelCol = Parameter(str, 'label', 'Label column', input_column=True)
        # predictionCol =  Parameter(str, 'prediction', 'Prediction column')
        # probabilityCol =  Parameter(str, 'probability', 'Probability column')
        # rawPredictionCol =  Parameter(str, 'rawPrediction', 'Raw probability column')
        weightCol = Parameter(str, 'weight', 'Weight Column')

        smoothing = Parameter(float, 1.0, 'Smoothing')
Beispiel #9
0
 class Parameters:
     cutoff = Parameter(int, 50, 'Top scores to display:')
     inputCol = Parameter(str,
                          'tokens',
                          'Input column',
                          input_column=True,
                          input_dtype=Parameter.T_STRING)
     sortingType = Parameter(
         str,
         'Term_Freq',
         'Choose a sorting type',
         items=['Term Frequency', 'Chi', 't-score', 'Non perfect Chi'])
Beispiel #10
0
    class Parameters:
        featuresCol = Parameter(str,
                                'features',
                                'Feature column',
                                input_column=True)
        labelCol = Parameter(str, 'label', 'Label column', input_column=True)
        # weightCol =  Parameter(str, 'weight', 'Weight Column')

        maxIter = Parameter(int, 100, 'Maximal iteration')
        regParam = Parameter(float, 0.0, 'Regression Parameter')
        elasticNetParam = Parameter(float, 0.0, 'Elastic Net Parameter')
        tol = Parameter(float, 0.000001, 'tol')
        fitIntercept = Parameter(bool, True, 'Fit intercept')
        standardization = Parameter(bool, False, 'Standardization')
        solver = Parameter(str, 'auto', 'Solver')
        aggregationDepth = Parameter(int, 2, 'Aggregation depth')
 class Parameters:
     inputCol = Parameter(str,
                          'tokens',
                          'Input column',
                          input_column=True,
                          input_dtype=Parameter.T_ARRAY_STRING)
     outputCol = Parameter(str,
                           'vector',
                           'Output1 column',
                           output_column=True)
     vectorSize = Parameter(int, 100, 'Vector size')
     minCount = Parameter(int, 5, 'Minimum count')
     numPartitions = Parameter(int, 1, 'Number of partitions ')
     stepSize = Parameter(float, 0.025, 'Step size')
     maxIter = Parameter(int, 1, 'Maximum Iteration')
     seed = Parameter(int, None, 'Seed')
     windowSize = Parameter(int, 5, 'Window size')
     maxSentenceLength = Parameter(int, 1000, 'Maximum sentence length')
 class Parameters:
     inputCol = Parameter(str, 'tokens', 'Input column', input_column=True, input_dtype=Parameter.T_ARRAY_STRING)
     outputCol = Parameter(str, 'vector', 'Output1 column', output_column=True)
     minTF = Parameter(float, 1.0, 'Minimum term frequency'),
     minDF = Parameter(float, 1.0, 'Minimum document frequency')
     vocabSize = Parameter(int, 1 << 18, 'Vocabulary size')
     binary = Parameter(bool, False, 'Binary')
 class Parameters:
     inputCol = Parameter(str,
                          'text',
                          'Input column',
                          input_column=True,
                          input_dtype=Parameter.T_STRING)
     outputCol = Parameter(str,
                           'tokens',
                           'Output column',
                           output_column=True)
     minTokenLength = Parameter(int, 1, 'Minimum token length')
     gaps = Parameter(bool, True, 'Gaps?')
     pattern = Parameter(str, '\\s+', 'Pattern')
     toLowercase = Parameter(bool, True, 'Convert to lower case?')
 class Parameters:
     inputCols = Parameter(list, [], 'Input columns', input_column=True, input_multiple=True)
     outputCol = Parameter(str, 'features', 'Output column', output_column=True)
    class Parameters:
        featuresCol = Parameter(str, 'features', 'Features column', input_column=True)
        labelCol = Parameter(str, 'label', 'Label column', input_column=True)
        # predictionCol = Parameter(str, 'prediction', 'Prediction column')
        # probabilityCol = Parameter(str, "probability", 'Probability column')
        # rawPredictionCol = Parameter(str, 'rawPrediction', 'Raw probability column')
        weightCol = Parameter(str, 'weight', 'Weight column')

        maxIter = Parameter(int, 100, 'Maximum iteration')
        regParam = Parameter(float, 0.0, 'Regression Parameter')
        elasticNetParam = Parameter(float, 0.0, 'Elastic Net Parameter')
        tol =  Parameter(float, 0.000001, 'tol')
        fitIntercept = Parameter(bool, True, 'Fit intercept')
        threshold = Parameter(float, 0.5, 'Threshold')
        # thresholds = Parameter(list, None, 'Thresholds')  # list[float]
        standardization = Parameter(bool, True, 'Standardization')
        aggregationDepth = Parameter(int, 2, 'Aggregation depth')
        family = Parameter(str, 'auto', 'Family')
Beispiel #16
0
 class Parameters:
     inputCol = Parameter(str, 'input', 'Input column', input_column=True)
 class Parameters:
     featuresCol = Parameter(str,
                             'features',
                             'Features column',
                             input_column=True)
     labelCol = Parameter(str, 'label', 'Label column', input_column=True)
     predictionCol = Parameter(str,
                               'prediction',
                               'Prediction column',
                               input_column=True)
     probabilityCol = Parameter(str,
                                "probability",
                                'Probability column',
                                input_column=True)
     rawPredictionCol = Parameter(str,
                                  'rawPrediction',
                                  'Raw prediction column',
                                  input_column=True)
     maxDepth = Parameter(int, 5, 'Maximal depth')
     maxBins = Parameter(int, 32, 'Maximal bins')
     minInstancesPerNode = Parameter(int, 1, 'Minimum instance per node')
     minInfoGain = Parameter(float, 0.0, 'Minimum Information gain')
     maxMemoryInMB = Parameter(int, 256, 'Maximal Memory (MB)')
     cacheNodeIds = Parameter(bool, False, 'Cache node ids')
     checkpointInterval = Parameter(int, 10, 'Checkpoint interval')
     impurity = Parameter(str, 'gini', 'Impurity')
     seed = Parameter(int, None, 'Seed')
 class Parameters:
     select_ratio = Parameter(int, 1,
                              'Selection ratio for unlinked reports')
 class Parameters:
     groupCol = Parameter(str, 'id', 'Group column', input_column=True)
     outputCol = Parameter(str,
                           'list',
                           'List column after grouping',
                           output_column=True)
 class Parameters:
     id = Parameter(str, '_id', 'ID column to join on', input_column=True)
Beispiel #21
0
 class Parameters:
     train_weight = Parameter(float, 0.9, 'Train weight of split ratio')
     test_weight = Parameter(float, 0.1, 'Test weight of split ratio')
Beispiel #22
0
 class Parameters:
     dropLast = Parameter(bool, True, 'Drop the last category')
     inputCol = Parameter(str, 'tokens', 'Input column (%s)', input_column=True, input_dtype=Parameter.T_ARRAY_STRING)
     outputCol = Parameter(str, 'features', 'Output column', output_column=True)