def run(self): print "Reading in the data" dataset = self.getDataset() featureNames = [i[0] for i in self.dataToWrite] if self.ignoreFeatures != []: if self.getTrain: intermediate = data_io.read_intermediate_train() else: intermediate = data_io.read_intermediate_valid() for i in self.ignoreFeatures: dataset[i] = intermediate[i] for element in self.dataToWrite: if element[0] in self.ignoreFeatures: element[1] = element[0] element[2] = f.SimpleTransform(transformer=f.ff.identity) print "Extracting features and transforming" featureMapper = f.FeatureMapper(self.dataToWrite) transformedDataset = featureMapper.transform(dataset) print "Saving the data" if self.getTrain: data_io.write_intermediate_train(featureNames, transformedDataset, dataset) else: data_io.write_intermediate_valid(featureNames, transformedDataset, dataset)
def run(self): features = f.features train = self.getTrainingDataset() print "Reading preprocessed features" if f.preprocessedFeatures != []: intermediate = data_io.read_intermediate_train() for i in f.preprocessedFeatures: train[i] = intermediate[i] for i in features: if i[0] in f.preprocessedFeatures: i[1] = i[0] i[2] = f.SimpleTransform(transformer=f.ff.identity) print "Reading targets" target = data_io.read_train_target() print "Extracting features and training model" classifier = self.getPipeline(features) if self.directionForward: finalTarget = [x * (x + 1) / 2 for x in target.Target] else: finalTarget = [-x * (x - 1) / 2 for x in target.Target] classifier.fit(train, finalTarget) print classifier.steps[-1][1].feature_importances_ print "Saving the classifier" data_io.save_model(classifier)