Exemple #1
0
    def get_table(self,iterator,target_name=None):
        
        feature_names = self.get_feature_names()

        X = []
        y=[]
        converter = TypeConverter()
        for row_value in iterator:
            row = []
            for name in feature_names:
                if name == target_name:
                    y.append(converter.cast(self.metadata[name]['type'],row_value[name]))
                else:
                    row.append(converter.cast(self.metadata[name]['type'],row_value[name]))
            X.append(row)
        return np.array(X),np.array(y)
Exemple #2
0
    def get_dataset(self,iterator,target_name=None):
        
        feature_names = self.get_feature_names()
        X = []
        y=[]
        converter = TypeConverter()
        for row_value in iterator:
            row = {}
            for name in feature_names:
                if name == target_name:
                    y.append(converter.cast(self.metadata[name]['type'],row_value[name]))
                else:
                    row[name] = converter.cast(self.metadata[name]['type'],row_value[name])
            X.append(row)

        
        vec = DictVectorizer()
        dataset = vec.fit_transform(X)
        feature_names = vec.get_feature_names()
        return y, dataset.toarray(), feature_names
Exemple #3
0
 def combine(self, key, values):
     resume = {}
     tc = TypeConverter()
     
     #type data
     values = map(tc.type,values)
     
     resume['num-values'] = len(values)
     resume['frequency'] = self._freq_count(values)
     try:
         resume['N/A'] = resume['frequency']['']
         del resume['frequency']['']
     except KeyError:
         resume['N/A'] = 0
          
     resume['type'] = tc.get_type(values)
     
     if resume['type'] == 'int' or resume['type'] == 'float':
         defined_values = filter(lambda v : v != '',values)
         resume['min'] = min(defined_values)
         resume['max'] = max(defined_values)
         resume['sum'] = sum(defined_values)
     
     return (key,resume)