def extract_train_features(): start = time.time() features = feature_extractor() header = [] for h in features.features: header.append(h[0]) print("Reading in the training data") X = data_io.read_train_pairs() y = data_io.read_train_target() #X = X.iloc[1:7] #y = y.iloc[1:7] print("Extracting features: " + str(X.shape)) extracted = features.fit_transform(X, y,type_map = data_io.read_train_info()); elapsed = float(time.time() - start) print("Features extracted in " + str(elapsed/60.0) + " Minutes") print ("Saving features") X = pd.DataFrame(extracted, index = X.index) X.columns = header data_io.save_train_features(X, y.Target) return X
def extract_train_features(): start = time.time() features = feature_extractor() header = [] for h in features.features: header.append(h[0]) print("Reading in the training data") X = data_io.read_train_pairs() y = data_io.read_train_target() #X = X.iloc[1:7] #y = y.iloc[1:7] print("Extracting features: " + str(X.shape)) extracted = features.fit_transform(X, y, type_map=data_io.read_train_info()) elapsed = float(time.time() - start) print("Features extracted in " + str(elapsed / 60.0) + " Minutes") print("Saving features") X = pd.DataFrame(extracted, index=X.index) X.columns = header data_io.save_train_features(X, y.Target) return X
def main(): y = data_io.read_train_target() X = data_io.load_train_features() if(type(X) == type(None)): print("No feature file found!") exit(1) X_old = data_io.load_features("./Models/old_csv/features_train_en_python.csv") print X.shape X = X_old.join(X) print X.shape #print X data_io.save_train_features(X,y) X = data_io.load_valid_features() X_old = data_io.load_features("./Models/old_csv/features_valid_en_python.csv") print X.shape X = X_old.join(X) print X.shape data_io.save_valid_features(X)
def main(): y = data_io.read_train_target() X = data_io.load_train_features() if (type(X) == type(None)): print("No feature file found!") exit(1) X_old = data_io.load_features( "./Models/old_csv/features_train_en_python.csv") print X.shape X = X_old.join(X) print X.shape #print X data_io.save_train_features(X, y) X = data_io.load_valid_features() X_old = data_io.load_features( "./Models/old_csv/features_valid_en_python.csv") print X.shape X = X_old.join(X) print X.shape data_io.save_valid_features(X)
srch_length_of_stay_features.SrchLengthOfStayFeatures(self.X), srch_booking_window_features.SrchBookingWindowFeatures(self.X), ] return map(self.transformer, feature_list) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Generate features using train/test data") parser.add_argument("--test", action="store_true", default=False, help="Weather to use test data", required=False) result = parser.parse_args() if result.test: print("Reading test data") data = data_io.read_test() else: print("Reading training data") data = data_io.read_train() fm = FeatureExtractor(data) derived_features = fm.feature_extractor() data.fillna(0, inplace=True) data = pandas.concat([data] + derived_features, axis=1) if result.test: data_io.save_test_features(data) else: data_io.save_train_features(data)