def run(self): print "Reading in the data" dataset = self.getDataset() featureNames = [i[0] for i in self.dataToWrite] if self.ignoreFeatures != []: if self.getTrain: intermediate = data_io.read_intermediate_train() else: intermediate = data_io.read_intermediate_valid() for i in self.ignoreFeatures: dataset[i] = intermediate[i] for element in self.dataToWrite: if element[0] in self.ignoreFeatures: element[1] = element[0] element[2] = f.SimpleTransform(transformer=f.ff.identity) print "Extracting features and transforming" featureMapper = f.FeatureMapper(self.dataToWrite) transformedDataset = featureMapper.transform(dataset) print "Saving the data" if self.getTrain: data_io.write_intermediate_train(featureNames, transformedDataset, dataset) else: data_io.write_intermediate_valid(featureNames, transformedDataset, dataset)
def run(self): valid = self.getValidationDataset() if f.preprocessedFeatures != []: intermediate = data_io.read_intermediate_valid() for i in f.preprocessedFeatures: valid[i] = intermediate[i] print "Loading the classifier" classifier = data_io.load_model() print "Making predictions" predictions = classifier.predict(valid) predictions = predictions.flatten() print "Writing predictions to file" data_io.write_submission(predictions)