Ejemplo n.º 1
0
def extract_train_features():

    start = time.time()
    features = feature_extractor()
    header = []
    for h in features.features:
        header.append(h[0])

    print("Reading in the training data")

    X = data_io.read_train_pairs()
    y = data_io.read_train_target()

    #X = X.iloc[1:7]
    #y = y.iloc[1:7]
    print("Extracting features: " + str(X.shape))

    extracted = features.fit_transform(X, y,type_map = data_io.read_train_info());


    elapsed = float(time.time() - start)
    print("Features extracted in " + str(elapsed/60.0) + " Minutes")

    print ("Saving features")
    X = pd.DataFrame(extracted, index = X.index)
    X.columns = header
    data_io.save_train_features(X, y.Target)


    return X
Ejemplo n.º 2
0
def extract_train_features():

    start = time.time()
    features = feature_extractor()
    header = []
    for h in features.features:
        header.append(h[0])

    print("Reading in the training data")

    X = data_io.read_train_pairs()
    y = data_io.read_train_target()

    #X = X.iloc[1:7]
    #y = y.iloc[1:7]
    print("Extracting features: " + str(X.shape))

    extracted = features.fit_transform(X,
                                       y,
                                       type_map=data_io.read_train_info())

    elapsed = float(time.time() - start)
    print("Features extracted in " + str(elapsed / 60.0) + " Minutes")

    print("Saving features")
    X = pd.DataFrame(extracted, index=X.index)
    X.columns = header
    data_io.save_train_features(X, y.Target)

    return X
Ejemplo n.º 3
0
def main():

    y = data_io.read_train_target()
    X = data_io.load_train_features()
    if(type(X) == type(None)):
        print("No feature file found!")
        exit(1)
    
    X_old = data_io.load_features("./Models/old_csv/features_train_en_python.csv")
    print X.shape
    X = X_old.join(X)
    print X.shape
    #print X
    data_io.save_train_features(X,y)
    
    X = data_io.load_valid_features()
    X_old = data_io.load_features("./Models/old_csv/features_valid_en_python.csv")
    print X.shape
    X = X_old.join(X)
    print X.shape
    data_io.save_valid_features(X)
Ejemplo n.º 4
0
def main():

    y = data_io.read_train_target()
    X = data_io.load_train_features()
    if (type(X) == type(None)):
        print("No feature file found!")
        exit(1)

    X_old = data_io.load_features(
        "./Models/old_csv/features_train_en_python.csv")
    print X.shape
    X = X_old.join(X)
    print X.shape
    #print X
    data_io.save_train_features(X, y)

    X = data_io.load_valid_features()
    X_old = data_io.load_features(
        "./Models/old_csv/features_valid_en_python.csv")
    print X.shape
    X = X_old.join(X)
    print X.shape
    data_io.save_valid_features(X)
Ejemplo n.º 5
0
          srch_length_of_stay_features.SrchLengthOfStayFeatures(self.X),
          srch_booking_window_features.SrchBookingWindowFeatures(self.X),
          ]

      return map(self.transformer, feature_list)
  


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Generate features using train/test data")
    parser.add_argument("--test", action="store_true", default=False, help="Weather to use test data", required=False)
    result = parser.parse_args()

    if result.test:
        print("Reading test data")
        data = data_io.read_test()
    else:
        print("Reading training data")
        data = data_io.read_train()

    fm = FeatureExtractor(data)
    derived_features = fm.feature_extractor()
    data.fillna(0, inplace=True)
    data = pandas.concat([data] + derived_features, axis=1)
  
    if result.test:
        data_io.save_test_features(data)
    else:
        data_io.save_train_features(data)