def print_linear_regression_output(self): data_to_print = dict() data_to_print[self.x_axis] = self.data[self.x_axis] data_to_print[self.y_axis] = self.data[self.y_axis] data_to_print[constant.PREDICTION_EXPORT_COL] = self.predicted_y TextUtils.print_dict(data_to_print) print(f"Formula: Y = {self.slope} * X + {self.y_intercept}, slope of the line: {self.slope}, Y Intercept of the Line {self.y_intercept}")
def define_data_types(data_types, headers): if len(data_types) == 0: while True: all_int = input(constant.ARE_ALL_INTEGERS) if TextUtils.checks_yes(all_int): return [] elif TextUtils.checks_no(all_int): for header in headers: while True: header_type = input( f"what is the type of the column {header.strip()}? (int, float, string)" ) if TextUtils.checks_int( header_type) or TextUtils.checks_str( header_type) or TextUtils.checks_float( header_type): data_types.append(header_type.lower()) break print( f"please enter a valid input for {header}! (int, float, string)" ) return data_types print(constant.INVALID_YES_NO) else: return data_types
def get_valid_axis(text, data_frame): while True: axis = ExceptionUtils.select_int(text, 0, len(data_frame.headers) - 1) if len(data_frame.data_types) == 0: return axis elif TextUtils.checks_int( data_frame.data_types[axis]) or TextUtils.checks_float( data_frame.data_types[axis]): return axis print( f"Invalid Selection {axis}, Please select column of type Integer/Float" )
def export_linear_regression_output_all(self): date = datetime.now().strftime(constant.DATE_FOTMAT) for i, header1 in enumerate(self.headers): for j, header2 in enumerate(self.headers): if not header1 == header2 and (len(self.data_types) == 0 or (not TextUtils.checks_str(self.data_types[i]) and not TextUtils.checks_str(self.data_types[j]))): self.run_linear_regression(i, j) self.export_linear_regression_output(date)
def main(): while True: TextUtils.print_menu(constant.MENU_SELECT_DATA_SET) selection = ExceptionUtils.select_int( constant.MENU_SELECT_DATA_SET_INPUT, 1, 3) if selection == 1: print(f"Opening CSV {constant.DEFAULT_DATA_SET_FILE}") data_frame = create_data_frame( constant.DEFAULT_DATA_SET_FILE, ",", constant.DEFAULT_DATA_SET_DATA_TYPES) if data_frame: use_data_frame(data_frame) elif selection == 2: path = input(constant.ENTER_PATH) delimiter = input(constant.ENTER_DELIMITER) print(f"Opening CSV {path}") data_frame = create_data_frame(path, delimiter, []) if data_frame: use_data_frame(data_frame) elif selection == 3: break
def use_data_frame(data_frame): while True: TextUtils.print_menu(constant.MENU_USE_DATA_SET) selection = ExceptionUtils.select_int(constant.MENU_USE_DATA_SET_INPUT, 1, 11) if selection == 1: TextUtils.print_dict(data_frame.data) elif selection == 2: data_frame.print_headings_with_type() elif selection == 3: data_frame.print_headings_with_type() column = get_valid_axis(constant.GET_COLUMN, data_frame) data_frame.print_deviation_calculations(column) elif selection == 4: data_frame.print_headings_with_type() column = get_valid_axis(constant.GET_COLUMN, data_frame) data_frame.plot_normal_distribution(column) elif selection == 5: data_frame.print_headings_with_type() column = get_valid_axis(constant.GET_COLUMN, data_frame) date = datetime.now().strftime(constant.DATE_FOTMAT) data_frame.export_normal_distribution(column, date) elif selection == 6: data_frame.export_all_normal_distribution() elif selection == 7: run_regression(data_frame) data_frame.print_linear_regression_output() elif selection == 8: run_regression(data_frame) date = datetime.now().strftime(constant.DATE_FOTMAT) data_frame.export_linear_regression_output(date) elif selection == 9: run_regression(data_frame) data_frame.plot_linear_regression_output() elif selection == 10: data_frame.export_linear_regression_output_all() elif selection == 11: break
def process_row(self, row): processed_row = [] if len(self.data_types) == 0: for value in row: if not ExceptionUtils.convert_to_int(value.strip()) is None: processed_row.append(ExceptionUtils.convert_to_int(value.strip())) else: print(f"could not convert a value in the row, dropping row as data type is incorrect: int, value {value.strip()}, row: {row}") return False elif len(self.headers) != len(self.data_types): print(f"The amount of data types does not match the amount of headers, dropping row {row}") return False else: for i, value in enumerate(row): if TextUtils.checks_int(self.data_types[i]) and not ExceptionUtils.convert_to_int(value.strip()) is None: processed_row.append(ExceptionUtils.convert_to_int(value.strip())) elif TextUtils.checks_str(self.data_types[i]): processed_row.append(value.strip()) elif TextUtils.checks_float(self.data_types[i]) and not ExceptionUtils.convert_to_float(value.strip()) is None: processed_row.append(ExceptionUtils.convert_to_float(value.strip())) else: print(f"could not convert a value in the row, dropping row as data type is incorrect: {self.data_types[i]} index of value: {i}, value {value.strip()}, row: {row}") return False return processed_row
def export_all_normal_distribution(self): date = datetime.now().strftime(constant.DATE_FOTMAT) for i, header in enumerate(self.headers): if len(self.data_types) == 0 or not TextUtils.checks_str(self.data_types[i]): self.export_normal_distribution(i, date)