import plot def estimate_price(km, theta0, theta1): return theta0 + theta1 * km def usage(): print("Usage: " + sys.argv[0] + " [-b]") sys.exit(2) if len(sys.argv) != 1 and (len(sys.argv) != 2 or sys.argv[1] != "-b"): usage() bonus = True if len(sys.argv) == 2 else False loop = True while loop: try: km = float(input("km: ")) loop = False except: print("km must be a number") theta0, theta1 = file.read_theta() data = file.read_data() min_km = min(data, key = lambda t: t[0])[0] max_km = max(data, key = lambda t: t[0])[0] km = train.scale_km(km, min_km, max_km) price = theta0 + (theta1 * km) print ("estimated price: %.2f" % estimate_price(km, theta0, theta1)) if bonus: scaled_data = train.scale_data(data) p = precision.r_squared(scaled_data, theta0, theta1) print("precision: %.2f%%" % (p * 100)) plot.draw(data, theta0, theta1)
subject_list = ["Arithmancy", "Astronomy", "Herbology", \ "Defense Against the Dark Arts", "Divination", "Muggle Studies", \ "Ancient Runes", "History of Magic", "Transfiguration", "Potions", \ "Care of Magical Creatures", "Charms", "Flying"] i = 1 print("Fill your marks") for subject in subject_list: loop = True while loop: try: mark = float(input(subject + ": ")) if mark < 0.0 or mark > 20.0: raise Exception() mark_matrix[i][0] = mark / 20.0 i += 1 loop = False except: print("mark must be a number between 0 and 20") return mark_matrix if __name__ == '__main__': if len(sys.argv) != 2: usage() if not os.path.isfile(sys.argv[1]): error('no such file: %s' % sys.argv[1]) mark_matrix = ask_marks() theta_data = file.read_theta(sys.argv[1], feature_number + 1) houses = logreg_predict.logreg_predict(mark_matrix, theta_data) print("Sorting Hat: Hum, you will go to... " + houses[0] + "!")
except: error("invalid dataset") return data if __name__ == '__main__': if len(sys.argv) != 3: usage() if not os.path.isfile(sys.argv[1]): error('no such file: %s' % sys.argv[1]) if not os.path.isfile(sys.argv[2]): error('no such file: %s' % sys.argv[2]) train_file = "resources/dataset_train.csv" header_histo, features_histo = histogram.read_data(train_file) feature_number = len(header_histo) mean_features = logreg_train.calc_mean_features(features_histo, feature_number) data = read_data(sys.argv[1], feature_number, mean_features) train_data = logreg_train.read_data(train_file, feature_number, mean_features) min_matrix = np.min(train_data["Features"], axis=1).reshape(-1, 1) max_matrix = np.max(train_data["Features"], axis=1).reshape(-1, 1) data = logreg_train.scale(data, min_matrix, max_matrix) data = np.vstack((np.matrix(np.ones(len(data[0]))), data)) tn = feature_number + 1 theta_data = file.read_theta(sys.argv[2], tn) houses = logreg_predict(data, theta_data) file.write_houses(houses)