def evalOne(enabledColumns): features = [all_features[i] for i in range(0, len(all_features)) if enabledColumns[i]] Y = [] P = [] for group in range(0,5): # print("Test group " + str(group + 1)) trainStationList = [] testStationList = [] for i in range(0,5): if i == group: testStationList.extend(groups[i]) else: trainStationList.extend(groups[i]) trainStations = set(float(station) for station in trainStationList) # reorder train stations # print("\ttrainStationList:" + str(trainStationList)) trainStationList = [s for s in all_stations if float(s) in trainStations] # print("\ttrainStationList:" + str(trainStationList)) testStations = set(float(station) for station in testStationList) # print("\ttestStationList:" + str(testStationList)) trainX, testX, trainY, testY, trainLocation, testLocation = splitDataForXValidationWithLocation(trainStations, testStations, "location", data, features, "target") train_lower = [float(trainStationList[i]) for i in range(0, len(trainStationList)) if i < (len(trainStationList) / 2.0)] # train_upper = [float(trainStationList[i]) for i in range(0, len(trainStationList)) if i >= (len(trainStationList) / 2.0)] test_lower = [float(testStationList[i]) for i in range(0, len(testStationList)) if i < (len(testStationList) / 2.0)] # test_upper = [float(testStationList[i]) for i in range(0, len(testStationList)) if i >= (len(testStationList) / 2.0)] trainY = [] for l in trainLocation: if l in train_lower: trainY.append(0) else: trainY.append(1) testY = [] for l in testLocation: if l in test_lower: testY.append(0) else: testY.append(1) model = RandomForestClassifier(random_state=42, n_estimators=20, max_depth=9, n_jobs=-1) model.fit(trainX, trainY) predY = model.predict(testX) Y.extend(testY) P.extend(predY) f1 = f1_score(Y, P) accuracy = accuracy_score(Y, P) return f1, accuracy
if i == group: testStationList.extend(groups[i]) else: trainStationList.extend(groups[i]) trainStations = set(float(station) for station in trainStationList) # reorder train stations print("\ttrainStationList:" + str(trainStationList)) trainStationList = [s for s in all_stations if float(s) in trainStations] print("\ttrainStationList:" + str(trainStationList)) testStations = set(float(station) for station in testStationList) print("\ttestStationList:" + str(testStationList)) trainX, testX, trainY, testY, trainLocation, testLocation = splitDataForXValidationWithLocation(trainStations, testStations, "location", data, columns, "target") train_lower = [float(trainStationList[i]) for i in range(0, len(trainStationList)) if i < (len(trainStationList) / 2.0)] train_upper = [float(trainStationList[i]) for i in range(0, len(trainStationList)) if i >= (len(trainStationList) / 2.0)] test_lower = [float(testStationList[i]) for i in range(0, len(testStationList)) if i < (len(testStationList) / 2.0)] test_upper = [float(testStationList[i]) for i in range(0, len(testStationList)) if i >= (len(testStationList) / 2.0)] trainY = [] for l in trainLocation: if l in train_lower: trainY.append(0) else: trainY.append(1) testY = []