def select_mvar_model_from_cross_validation(datafile, max_degree, folder=10): data = read_data(datafile) minimum_error = np.inf minimum_training_error = np.inf global_mimimum_error = np.inf optimal_theta = 0 global_optimal_theta = 0 global_optimal_degree = 0 testing_error_collection = {} training_error_collection = {} time_collection = {} for degree_index in range(1, max_degree+1): time_start = time.time() for testing_data_index in range(folder): testing_data, testing_size, training_data, training_size = cross_validation(data, testing_data_index) theta, training_error, testing_error = mvar_regression(testing_data, testing_size, training_data, training_size, degree_index) if testing_error < minimum_error: minimum_error = testing_error minimum_training_error = training_error optimal_theta = theta print "Min error: %s" %str(minimum_error) testing_error_collection[degree_index] = minimum_error training_error_collection[degree_index] = minimum_training_error if minimum_error < global_mimimum_error: global_mimimum_error = minimum_error global_optimal_theta = optimal_theta global_optimal_degree = degree_index time_end = time.time() time_cost = time_end - time_start time_collection[degree_index] = time_cost print "Global Optimal Theta: %s" % global_optimal_theta print "Global Optimal Degree: %s" % global_optimal_degree return testing_error_collection, training_error_collection, time_collection
def run(N): """ Runs N episodes of a given length and then runs a demo with greedy policy """ agent = Agent() data = read_data('./data/q.dat') if data is not None: agent.Q = data for i in range(N): bot = Bot() run_episode(bot, agent, None, draw=False, policy='eps_greedy') # if bot.center[1] > 7: print "robot moved on: %i steps" % bot.center[1] pg.init() pg.display.init() surf = pg.display.set_mode((800, 600)) surf.fill((0, 0, 0)) pg.display.flip() print "Surf1:", surf bot = Bot() bot.info() run_episode(bot, agent, surf, draw=True, policy='eps_greedy', episode_len=60) print "Robot's moves:\n", bot.path print "Robot walked %i m" % bot.center[1] print "Last state value=%.1f" % agent.get_state_value(bot.get_state()) write_data(agent.Q, "data/q.dat") write_path(agent.Q_values, "data/path.csv")
def convert(folder_name): os.chdir(folder_name) las_filename = '' try: las_filename = [x for x in os.listdir() if x[-3:] == 'las'][0] except: return if os.path.exists('failed.txt') or not os.path.exists( 'cropped_size.txt' ): # This probably indicated the crop failed. In this case, remove the las file so it doesn't take up space os.remove(las_filename) return out_filename = las_filename.split('.')[0] + '.data' with open('cropped_size.txt', 'r') as f: [desired_rows, desired_cols, channels] = [int(x) for x in f.readline().split(',')] success = convert_las_to_matrix_and_store(las_filename, desired_rows, desired_cols, out_filename) os.remove(las_filename) if success: # open up the file and count the 0's in the matrix # If there are too many, reject it m = read_data(out_filename) zeros = m.size - numpy.count_nonzero(m) if zeros < 125000: with open('pickled', 'w') as f: f.write('') # This file indicates success to the pipeline else: with open('failed.txt', 'a') as f: f.write("Found " + str(zeros) + " zeros")
def DrawPIVPlot(files, bg_a, points): # reading saved data and creating vector labels x, y, u, v, mask = tools.read_data(files) label = [] for i in range(x.shape[0]): for j in range(x.shape[1]): label.append(f'Ux:{u[i, j]:6.4f} , Uy:{v[i, j]:6.4f} (mm/s)') # plotting the results #fig, ax = plt.subplots() #ax.imshow(np.flipud(bg_a), cmap='gray') #plt.draw() q = ax.quiver(x, y, u, v, color='b', units='xy', minlength=0.1, minshaft=1.2) ax.set_title('Velocity Field', size=16) patches = [Polygon(points, True)] p = PatchCollection(patches, alpha=1.0) ax.add_collection(p) ax.axis([0, 780, 0, 580]) plt.xlabel('x (mm)', size=14, labelpad=2) plt.ylabel('y (mm)', size=14, labelpad=-10) return fig, q, label
def initialize_clustering(data_path, train_percent, clustering_algorithm, number_of_clusters): df_pre, df_raw = read_data(data_path=data_path) records = make_records(df_pre=df_pre, df_raw=df_raw) train_records, test_records = divide_train_test( records=records, train_percent=train_percent) return get_clusters(clustering_algorithm, train_records, number_of_clusters)
def run(nbr_episodes, mode): if mode == "forward": Q = read_data("data/forward.dat") elif mode == "right": Q = read_data("data/right.dat") elif mode == "left": Q = read_data("data/left.dat") else: print "Unknown mode:", mode return None seed() for i in range(nbr_episodes): D = Dor() run_episodes(mode,episode_len,D,Q) print "Dor coordinates:",D.CoM, "orientation:%i%%" % degrees(D.orientation) for action in D.actions: print action #clean_q(Q) write_data(Q,"data/"+mode+".dat")
def make_front_and_back_examples(in_dir, out_dir): """生成每个样本的正面和背面两个角度的图像矩阵 """ for f in os.listdir(in_dir): fp = os.path.join(in_dir, f) fn = f.split('.')[0] data = tools.read_data(fp) np.save(os.path.join(out_dir, fn + '_front.npy'), np.rot90(data[:, :, 0])) np.save(os.path.join(out_dir, fn + '_back.npy'), np.rot90(data[:, :, 31]))
def select_linear_model(datafile, reduce_training_data=False): data = read_data(datafile) one_vector = np.ones((len(data), 1), dtype=np.float) Z = np.column_stack((one_vector, data)) k = 10 plt.scatter(data[:, 0], data[:, 1], color="red") minimum_error = np.inf optimal_parameter = 0 for testing_index in range(k): testing_data, testing_size, training_data, training_size = cross_validation( data, testing_index) print "Training size = %s" %str(training_size) if reduce_training_data: print "Data reduced..." reduced_training_data_count = int(training_size * 0.75) training_size = reduced_training_data_count print "Reduced training data size = %s" %str(training_size) training_data = np.copy( training_data[ :reduced_training_data_count, :]) parameters = normal_equation(training_data) predicted_by_training_data = create_polynomial_regression_function( training_data[:, 0], parameters) training_error = np.dot( (predicted_by_training_data - training_data[:, 0]).T, (predicted_by_training_data - training_data[:, 0] )) / training_size training_error = training_error/training_size testing_data_x = testing_data[:, 0] testing_data_y = testing_data[:, 1] predicted_testing_data = create_polynomial_regression_function( testing_data_x, parameters) testing_error = np.dot( (predicted_testing_data - testing_data_y).T, (predicted_testing_data - testing_data_y)) / testing_size if testing_error < minimum_error: minimum_error = testing_error optimal_parameter = parameters print "Training Error %s" % training_error print "Testing Error %s" % testing_error print "Parameter %s " % parameters print "\n" x_max = int(max(data[:, 0]))+2 x_min = int(min(data[:, 0]))-2 x = np.array([i for i in range(x_min, x_max)]) y = create_polynomial_regression_function(x, optimal_parameter) plt.plot(x, y) print "The minimum testing error we've got from this training is: \n %s" % minimum_error print "Model parameters got from this training data is: \n %s" % optimal_parameter print "\n"
def analyze_dual_regression(datafile, lambda_, sigma): data = read_data(datafile) minimum_error = np.inf time_cost = 0 time_start = time.time() for folder_index in range(10): testing_data, testing_size, training_data, training_size = cross_validation(data, folder_index) testing_error = dual_regression(training_data, testing_data, lambda_ = 0.5, sigma=0.5) if testing_error < minimum_error: minimum_error = testing_error time_end = time.time() print "time_costing %s" % str(time_end - time_start) print "Minimum Testing Error %s" %str(testing_error)
def update_tables(): if Path(data_cleaned_dir).is_dir(): check() print('Updating tables...') excel_file_paths = sorted(data_raw_dir.glob('**/*.xlsx'), reverse=True)[:MAX_NUMBER_OF_MONTHS] latest_excel_file_path = excel_file_paths[0] max_date = latest_excel_file_path.name.split('.')[0] date_str_list = [p.name.split('.')[0] for p in excel_file_paths[1:]] df_product_price, df_compound = tools.read_data(latest_excel_file_path) df_compound = ( df_compound.drop_duplicates().loc[lambda x: x.Date == max_date]) selected_columns = [ '투여', '식약분류', '주성분코드', '제품코드', '제품명', '업체명', '규격', '단위', '전문/일반', '비고' ] df_product = (df_product_price.loc[lambda x: (x.비고.isna()) & (x.Date == max_date)] [selected_columns].drop_duplicates().reset_index().drop( 'index', axis='columns').set_index('제품코드')) df_price_new = (df_product_price[[ '제품코드', 'Date', '상한금액' ]].drop_duplicates().reset_index().drop( 'index', axis='columns').assign(상한금액=lambda x: pd.to_numeric( x.상한금액.apply(lambda v: None if type(v) is not int else v), errors='coerce'))) df_price_old = ( pd.read_pickle(f'{data_cleaned_dir}/price.pickle').reset_index( ).loc[lambda x: x.Date.isin(date_str_list)]) df_price = (pd.concat([df_price_new, df_price_old]).set_index('제품코드')) df_compound.to_pickle(data_compound_path) df_product.to_pickle(data_product_path) df_price.to_pickle(data_price_path) print('Tables have been updated.') else: tools.check_data_raw()
def draw_scatter_graph(files, save=False): ''' Plot scatter graph for each dataset. If save is True, then this function will save result graph as "data_scatter_graph.png" ''' graph_index = 1 for f in files: data = read_data(f) # draw sub graph by graph index. plt.subplot(2, 2, graph_index) plt.title(f) plt.scatter(data[:, 0], data[:, 1], color="red") graph_index += 1 if save: plt.savefig("data_scatter_plot.png") plt.show()
class Test_Lgoin(unittest.TestCase): def setUp(self): self.login = Login() self.log = GetLog.get_logger() @parameterized.expand(read_data("login.yaml")) def test_login(self, mobile, password): result = self.login.login_inter(mobile, password) self.log.info("登录结果:{}".format(result.json())) print("登录结果:", result.json()) assert_common(self, result) # 获取 token 值,并追加到信息头 token = result.json()["data"] self.log.info("token值:{}".format(token)) api.headers["Authorization"] = "Bearer " + token print("追加token后的信息头:", api.headers)
def select_ploynomial_models(data_file, max_degree=6, save=False, reduce_training_data=False): ''' Using 10-cross-validation to test polynomial model. ''' data = read_data(data_file) # Define the range of x to draw model graph. if max_degree < 1: raise ValueError("Max_degree should be greater than 1") # if max_degree == 1, then this function only draws one graph, otherwise, # it will draw multiple graphs. if max_degree == 1: optimal_parameters, testing_error = select_polynomial_model_from_cross_validation( data, 1, reduce_training_data=reduce_training_data) else: optimal_parameters = 0 minimum_testing_error = np.inf optimal_degree = 0 # Collect the smallest errors for each degree for plotting # Degree-Error line graph. error_collections = {} training_error_collections = {} for i in range(1, max_degree+1): parameters, testing_error, training_error = select_polynomial_model_from_cross_validation( data, i, reduce_training_data=reduce_training_data ) # Save degree and error in a dict. error_collections[i] = testing_error training_error_collections[i] = training_error if testing_error < minimum_testing_error: optimal_degree = i minimum_testing_error = testing_error optimal_parameters = parameters if save: plt.savefig("polynomial_model_graph.png") # return optimal_parameters, optimal_degree plt.scatter(data[:, 0], data[:, 1], color="red") x_max = int(max(data[:, 0]))+2 x_min = int(min(data[:, 0]))-2 x = np.array([i for i in range(x_min, x_max)]) y = create_polynomial_regression_function(x, optimal_parameters) plt.plot(x, y) print "Optimal Degree %s" %str(optimal_degree) print "Optimal Paremeters %s" %str(optimal_parameters) return error_collections, training_error_collections
def given_linear_regression(datafile): ''' Compare performances of my function with the given regression function. ''' # Import data data = read_data(datafile) col, row = data.shape testing_data, testing_size, training_data, training_size = cross_validation( data, 1) # Define the range of x x_max = int(max(data[:, 0]))+2 x_min = int(min(data[:, 0]))-2 x = np.array([i for i in range(x_min, x_max)]) x_length = len(x) one_vector_ = np.ones((x_length, 1), dtype=np.float) X = np.column_stack((one_vector_, x.reshape(x_length, 1))) # Draw my function's fitting result graph. # plt.subplot(2, 1, 1) # plot_regression_model( # "My Method", # training_data, # training_size, # testing_data, # testing_size, # 1) # Draw given function's fitting result graph one_vector = np.ones((training_size, 1), dtype=np.float) one_vector_test = np.ones((testing_size, 1), dtype=np.float) Z = np.column_stack((one_vector, training_data[:, 0])) Z_test = np.column_stack((one_vector_test, testing_data[:, 0])) # print Z clf = linear_model.LinearRegression() clf.fit(Z, training_data[:, 1]) print("Residual sum of squares: %.2f" % np.mean((clf.predict(Z_test) - testing_data[:, 1]) ** 2)) y = np.dot(X, clf.coef_) y_training = clf.coef_ * Z plt.scatter(data[:, 0], data[:, 1], color="red") plt.plot(x, y) x_test = testing_data[:, 0] y_test = testing_data[:, 1]
def test_movement(): data = tools.read_data('data/sensor.dat') x_p=0 y_p=0 z_p=0 for (step, reading) in enumerate(data): print step # if step == 10: # break r1 = reading['odometry']['r1'] r2 = reading['odometry']['r2'] t = reading['odometry']['t'] x=x_p + t*math.cos(z_p+r1) y=y_p + t*math.sin(z_p+r1) z=z_p+r1+r2 x_p=x y_p=y z_p=z drawing.draw_state_for_me(step, x, y, z,'/testm/')
def test_movement(): data = tools.read_data('data/sensor.dat') x_p = 0 y_p = 0 z_p = 0 for (step, reading) in enumerate(data): print step # if step == 10: # break r1 = reading['odometry']['r1'] r2 = reading['odometry']['r2'] t = reading['odometry']['t'] x = x_p + t * math.cos(z_p + r1) y = y_p + t * math.sin(z_p + r1) z = z_p + r1 + r2 x_p = x y_p = y z_p = z drawing.draw_state_for_me(step, x, y, z, '/testm/')
def analyze_stochastic_gradient_descent(datafile): ''' Compute ''' time_start = time.time() minimum_error = np.inf optimal_parameter = 0 data = read_data(datafile) for testing_data_index in range(10): testing_data, testing_size, training_data, training_size = cross_validation(data, testing_data_index) parameters = iterative_compute_gd(training_data, 2) y = predicted_value_of_dual_regression(parameters, testing_data[:, :-1], 2) testing_error = compute_testing_error(y, testing_data[:, -1]) if testing_error < minimum_error: minimum_error = testing_error optimal_parameter = parameters time_end = time.time() time_cost = time_end - time_start print time_cost, minimum_error, optimal_parameter
def get_landmarks(self): pass def add_landmarks(self, numLand=1): pass def predict(self, numLand, pose, noise): pass def correct(self, numPose, numLand, pose, noise): pass if __name__ == "__main__": data = tools.read_data('data/sensor.dat') world = tools.read_world('data/world.dat') f = plt.Figure( figsize=(5, 5), dpi=100, ) f.set_visible(False) plt.axes() # Accommodate landmarks to plot... lx = [] ly = [] for landmark in world: lx.append(landmark['x']) ly.append(landmark['y']) landmarks = [lx, ly]
import tools import numpy as np import seaborn as sns import matplotlib.pyplot as plt if __name__ == '__main__': args = tools.parse_args_visualize() model = tools.load_model(args['load_path']) data, target = tools.read_data(args['data_path']) sns.set_style('darkgrid') sns.scatterplot(x=data, y=target, label='Data') reg_x = np.arange(min(data), max(data), 5) reg_y = [model.predict(i) for i in reg_x] plt.plot(reg_x, reg_y, color='red', label='Regression') plt.legend(loc='best') plt.savefig('plot.png') print('Plot saved to plot.png')
def get_landmarks(self): pass def add_landmarks(self, numLand=1): pass def predict(self, numLand, pose, noise): pass def correct(self, numPose, numLand, pose, noise): pass if __name__ == "__main__": data = tools.read_data('data/sensor.dat') world = tools.read_world('data/world.dat') f = plt.Figure(figsize=(5, 5), dpi=100, ) f.set_visible(False) plt.axes() # Accommodate landmarks to plot... lx = [] ly = [] for landmark in world: lx.append(landmark['x']) ly.append(landmark['y']) landmarks = [lx, ly] print "Beginning EKFSlam Test" # Keep track of the observed landmarks
def initialize(): if not Path(data_cleaned_dir).is_dir(): Path(data_cleaned_dir).mkdir(parents=True, exist_ok=True) excel_file_paths = sorted( list(data_raw_dir.glob('**/*.xlsx')), reverse=True )[:MAX_NUMBER_OF_MONTHS] df_list = [ tools.read_data(data_path) for data_path in tqdm(excel_file_paths) ] df_product_price, df_compound = map(pd.concat, list(zip(*df_list))) max_date = df_product_price.Date.max() df_compound = ( df_compound .drop_duplicates() .loc[lambda x: x.Date == max_date] ) selected_columns = [ '투여', '식약분류', '주성분코드', '제품코드', '제품명', '업체명', '규격', '단위', '전문/일반', '비고' ] df_product = ( df_product_price .loc[lambda x: (x.비고.isna()) & (x.Date == max_date) ] [selected_columns] .drop_duplicates() .reset_index() .drop('index', axis='columns') .set_index('제품코드') ) df_price = ( df_product_price [['제품코드', 'Date', '상한금액']] .drop_duplicates() .reset_index() .drop('index', axis='columns') .assign(상한금액=lambda x: pd.to_numeric( x.상한금액.apply(lambda v: None if type(v) is not int else v ), errors='coerce' ) ) .set_index('제품코드') ) df_compound.to_pickle(data_compound_path) df_product.to_pickle(data_product_path) df_price.to_pickle(data_price_path)
from algorithms.hierarchical import hierarchical from algorithms.incremental import incremental from algorithms.lda.lda import get_lda from clustering_test import preform_test from similarity_algorithms import euclidean_distance from tools import get_clusters, make_records, read_data, divide_train_test if __name__ == '__main__': data_path = "QA-samples.xlsx" train_percent = 0.8 number_of_clusters = 900 clustering_algorithm = get_lda(False) df_pre, df_raw = read_data(data_path=data_path) records = make_records(df_pre=df_pre, df_raw=df_raw)[:100] train_records, test_records = divide_train_test( records=records, train_percent=train_percent) clusters = get_clusters(clustering_algorithm, train_records, number_of_clusters) is_lda = clustering_algorithm == get_lda( True) or clustering_algorithm == get_lda(False) preform_test(clusters, test_records, euclidean_distance, clustering_algorithm_name=clustering_algorithm.__name__, is_lda=is_lda, number_of_clusters=number_of_clusters)
import pickle import pdb import os import view from sklearn.svm import SVC from sklearn.metrics.pairwise import pairwise_kernels from sklearn.svm.libsvm import decision_function # CREATE RESULT FOLDER if not os.path.exists("results"): os.makedirs("results") # READ DATA print("LOAD/READ DATA") xtrain, ytrain, xtest, ytest = tools.read_data() numbers = [4, 9] x, y = tools.choose_numbers(xtrain, ytrain, numbers) xt, yt = tools.choose_numbers(xtest, ytest, numbers) print("LOAD/READ DATA --- DONE!") # TRAIN SVM clf = SVC() clf.fit(x, y) # GENERATE RANDOM SAMPLES samplesize = 5000 samples = np.random.uniform( -1., 1., (samplesize, len(x[0]))) #np.random.uniform(0.,1.,(samplesize,len(x[0])))
import numpy as np import pickle import pdb import os import view from SVM import * from POIM import * from Motif import * # CREATE RESULT FOLDER if not os.path.exists("results"): os.makedirs("results") # READ DATA print("LOAD/READ DATA") xtrain, xtest, y_train, ytest = tools.read_data(job="read", lines=10000) print("LOAD/READ DATA --- DONE!") # TRAIN SVM print("TRAIN SVM") Cobj = SVM(xtrain, y_train) Cobj.train(C=1.) Cobj.svm_save("results/svm_trained.pkl") # COMPUTE gPOIM print("COMPUTE gPOIM") small_k = 2 Pobj = gPOIM() Pobj.set_up(Cobj, samplesize=100, small_k=small_k) Pobj.save("results/gPOIM.pkl")