def fit(X, y, theta, alpha, num_iters, auto_stop=False): """ fit a dataset with only one parameter (using linear_regression) return (theta, cost_history) """ m = X.shape[0] tmp = [0, 0] J_history = [] for iter_ in range(num_iters): tmp[0] = theta[0] - alpha * 1 / m * sum((predict(X, theta) - y) * 1) tmp[1] = theta[1] - alpha * 1 / m * sum((predict(X, theta) - y) * X) theta[0] = tmp[0] theta[1] = tmp[1] J_history.append(cost(X, y, theta)) if len(J_history) >= 2: # test is thecost increase if J_history[-1] - const.INCREASE_THRESHOLD > J_history[ 0]: # the cost increase logerr( 'wrong value of alpha (%f), the cost inscrease -> stop fit' % (alpha)) return theta, J_history # test is the fit is done if auto_stop: if J_history[-2] <= J_history[-1] + (const.STOP_THRESHOLD * alpha): loginfo('auto stopped at %d iterations' % (iter_)) return theta, J_history return theta, J_history
def export_theta(filename, theta): try: with open(filename, 'w') as f: json.dump(theta, f) except (os.NotADirectoryError, os.FileNotFoundError): logerr('unable to write theta in', filename) loginfo('export theta in %s' % (filename))
def start_predict(all_args): theta = get_theta_in_args(all_args) if theta is None: exit(1) loginfo('using theta ->', theta) if all_args['data_predict']['value'] is None: try: all_args['data_predict']['value'] = [int(input('car km (int): '))] except ValueError: logerr('km should be an int') exit(1) for i in all_args['data_predict']['value']: print('for %8dkm -> estimated price: %d' % (i, predict(i, theta)))
def setarg(all_args, arg): if '=' in arg and len(arg.split('=')) == 2: arg_name, arg_value = arg.split('=') for key, arg_dict in all_args.items(): if arg_name in arg_dict['argnames']: if convert_from_str(arg_value, arg_dict['type'])[0]: arg_dict['value'] = convert_from_str( arg_value, arg_dict['type'])[1] return True logerr('in arg %s -> unable to convert %s to %s' % (arg, arg_value, str(arg_dict['type']))) return False logerr('invalid argument ->', arg) return False
def import_theta(filename): if not os.path.isfile(filename): logerr('cannot import theta: %s is not a file' % (filename)) return None with open(filename, 'r') as f: try: theta_data = json.load(f) except ValueError: logerr('cannot import theta') return None if not check_theta(theta_data): return None loginfo('import theta from %s' % (filename)) return theta_data
def check_theta(theta): """ check theta values """ if type(theta) is not list: logerr('theta is not a list %s' % (theta)) return False if len(theta) is not 2: logerr('invalid theta size (%d: excpected 2) -> %s' % (len(theta), theta)) return False try: theta[0] = float(theta[0]) theta[1] = float(theta[1]) except (ValueError, TypeError): logerr('cannot convert theta values to float %s' % (theta)) return False return True
def import_data(all_args): try: data = pd.read_csv(all_args['data_filename']['value']) except (pd.errors.ParserError, pd.errors.EmptyDataError): logerr('unable to read the csv:', all_args['data_filename']['value']) return None, None, None try: X = np.array(data[all_args['data_km']['value']]) except KeyError: logerr( 'invalid column %s in the csv: %s' % (all_args['data_km']['value'], all_args['data_filename']['value'])) return None, None, None try: y = np.array(data[all_args['data_price']['value']]) except KeyError: logerr('invalid column %s in the csv: %s' % (all_args['data_price']['value'], all_args['data_filename']['value'])) return None, None, None return data, X, y
logerr('km should be an int') exit(1) for i in all_args['data_predict']['value']: print('for %8dkm -> estimated price: %d' % (i, predict(i, theta))) if __name__ == '__main__': for arg in sys.argv[1:]: if arg in ['--usage']: args.print_usage(all_args, sys.argv[0]) exit(0) elif not args.setarg(all_args, arg): exit(1) if all_args['theta']['value'] is not None and not check_theta( all_args['theta']['value']): exit(1) if all_args['data_predict']['value'] is not None: if type(all_args['data_predict']['value']) is int: all_args['data_predict']['value'] = [ all_args['data_predict']['value'] ] for data in all_args['data_predict']['value']: if type(data) is not int: logerr('we can predict only with int data') exit(1) start_predict(all_args)