Example #1
0
 def prepare_data(self):
     if not self.is_prepared:
         self.histogram_list = Utils.get_distribution(self.value_list)
         self.numeric_list = Utils.clean_examples_numeric(self.value_list)
         if self.is_numeric():
             self.sample_list = sc.parallelize(self.numeric_list).sample(False,
                                                                         100.0 / len(self.numeric_list)).collect()
         else:
             self.value_text = sc.parallelize(self.value_list).map(lambda x: " %s " % x).reduce(lambda x, y: x + y)
         self.is_prepared = True
    def __init__(self, column, train_examples_map, sc):
        self.train_examples_map = train_examples_map
        self.test_examples = column.value_list
        self.true_label = column.semantic_type
        self.name = column.name
        self.numeric_test_examples = Utils.clean_examples_numeric(self.test_examples)
        self.is_numeric = column.is_numeric()
        self.hist_examples = Utils.get_distribution(self.test_examples)

        self.sc = sc