def iteration_step(training_data, input_coordinates, structure, C_old, U_old, k, shape_of_grid, time_frame_sums, T, W, ES, valid_timesteps, evaluation_dataset, edges_of_cell): """ input: path string, path to file input_coordinates numpy array, coordinates for model creation structure list(int, list(floats), list(floats)), number of non-hypertime dimensions, list of hypertime radii nad list of wavelengths C_old numpy array kxd, centres from last iteration U_old numpy array kxn, matrix of weights from the last iteration k positive integer, number of clusters shape_of_grid numpy array dx1 int64, number of cells in every dimension time_frame_sums numpy array shape_of_grid[0]x1, sum of measures over every timeframe T numpy array shape_of_grid[0]x1, time positions of timeframes W numpy array Lx1, sequence of reasonable frequencies ES float64, squared sum of squares of residues from this iteration output: dES float64, difference between last and new error structure list(int, list(floats), list(floats)), number of non-hypertime dimensions, list of hypertime radii nad list of wavelengths C numpy array kxd, matrix of k d-dimensional cluster centres U numpy array kxn, matrix of weights COV numpy array kxdxd, matrix of covariance matrices density_integrals numpy array kx1, matrix of ratios between measurements and grid cells belonging to the clusters W numpy array Lx1, sequence of reasonable frequencies ES float64, squared sum of squares of residues from this iteration P float64, length of the most influential frequency in default units uses: mdl.model_creation(), fm.chosen_period() np.sum() objective: """ #### testuji zmenu "sily" period pri pridavani shluku hist_freqs, C, U, COV, density_integrals =\ mdl.model_creation(input_coordinates, structure, training_data, C_old, U_old, k, shape_of_grid) osy = tuple(np.arange(len(np.shape(hist_freqs)) - 1) + 1) time_frame_freqs = np.sum(hist_freqs, axis=osy) P, W, ES, sum_of_amplitudes = fm.chosen_period(T, time_frame_sums, time_frame_freqs, W, ES, valid_timesteps) diff = ev.evaluation_step(evaluation_dataset, C, COV, density_integrals,\ structure, k, edges_of_cell) #### konec testovani #print('chosen k: ' + str(k)) #print('and the diff: ' + str(diff)) return sum_of_amplitudes, C, U, COV, density_integrals, W,\ ES, P, diff
def best_diff(training_data, domain_coordinates, domain_values, frequencies, k, new_structure, params, transformation, eval_dataset): """ """ X = dio.create_X(training_data, new_structure, transformation) DOMAIN = dio.create_X(domain_coordinates, new_structure, transformation) eval_domain = (dio.create_X(eval_dataset[0], new_structure, transformation), eval_dataset[1], dio.create_X(eval_dataset[2], new_structure, transformation), eval_dataset[3]) list_of_others = [] list_of_diffs = [] list_of_differences = [] #test_vals = [] #test_times = [] #for file in xrange(1, 10): # test_vals.append(np.loadtxt('../data/test_data_' + str(file) + '.txt')) # test_times.append(dio.create_X(np.loadtxt('../data/test_times_' + str(file) + '.txt').reshape(-1,1), new_structure, transformation)) for j in xrange( 21 ): # for the case that the clustering would fail TRY TO DO IT ONLY ONCE BECAUSE OF NEW INITIALIZATION !!! diff_j, C_j, densities_j, COV_j, difference_j, heights_j =\ iteration_step(DOMAIN, domain_values, X, k, new_structure, params, eval_domain) list_of_diffs.append(diff_j) list_of_others.append((diff_j, C_j, densities_j, COV_j, k, heights_j)) list_of_differences.append(difference_j) #predictions = [] #for q in xrange(9): # out = es.training_model(test_times[q], C_j, densities_j, COV_j, k, params, new_structure, heights_j) # vals = test_vals[q] # predictions.append(np.mean((out - vals) ** 2)) # #print(diff_j, fm.chosen_period(domain_coordinates[:, 0], difference_j, frequencies)[1], tuple(predictions)) list_of_diffs = np.array(list_of_diffs) chosen_model = np.where(np.median(list_of_diffs))[0][ 0] # find index of median difference between model and training data the_period, tested_sum_of_amplitudes = fm.chosen_period( domain_coordinates[:, 0], list_of_differences[chosen_model], frequencies) # tested_sum_of_amplitudes not used in this version diff, C, densities, COV, k, heights = list_of_others[chosen_model] return diff, C, densities, COV, the_period, k, heights
def whole_initialization(training_data, k, edges_of_cell, longest, shortest, training_dataset): """ input: path string, path to file k positive integer, number of clusters edge_of_square float, spatial edge of cell in default units (meters) timestep float, time edge of cell in default units (seconds) longest float, legth of the longest wanted period in default units shortest float, legth of the shortest wanted period in default units output: input_coordinates numpy array, coordinates for model creation overall_sum number (np.float64 or np.int64), sum of all measures structure list(int, list(floats), list(floats)), number of non-hypertime dimensions, list of hypertime radii nad list of wavelengths C numpy array kxd, matrix of k d-dimensional cluster centres U numpy array kxn, matrix of weights shape_of_grid numpy array dx1 int64, number of cells in every dimension time_frame_sums numpy array shape_of_grid[0]x1, sum of measures over every timeframe T numpy array shape_of_grid[0]x1, time positions of timeframes W numpy array Lx1, sequence of reasonable frequencies ES float64, squared sum of squares of residues from this iteration COV numpy array kxdxd, matrix of covariance matrices density_integrals numpy array kx1, matrix of ratios between measurements and grid cells belonging to the clusters uses: first_structure(), mdl.model_creation(), grid.time_space_positions(), first_time_frame_freqs(), fm.build_frequencies(), fm.chosen_period() objective: to perform first iteration step and to initialize variables """ print('starting learning iteration: 0 (initialization)') structure = first_structure(training_data) input_coordinates, time_frame_sums, overall_sum, shape_of_grid, T,\ valid_timesteps = grid.time_space_positions(edges_of_cell, training_data, training_dataset) if len(shape_of_grid[0]) == 1: hist_freqs = -1 C = -1 U = -1 COV = -1 density_integrals = -1 else: hist_freqs, C, U, COV, density_integrals =\ mdl.model_creation(input_coordinates, structure, training_data, 0, 0, # C_in and U_in k, shape_of_grid) time_frame_freqs = first_time_frame_freqs(overall_sum, shape_of_grid[0]) W = fm.build_frequencies(longest, shortest) ES = -1 # no previous error P, W, ES, dES = fm.chosen_period(T, time_frame_sums, time_frame_freqs[0], W, ES, valid_timesteps) print('used structure: ' + str(structure)) print('leaving learning iteration: 0 (initialization)') return input_coordinates, overall_sum, structure, C,\ U, shape_of_grid, time_frame_sums, T, W, ES, P, COV,\ density_integrals, valid_timesteps
def proposed_method(domain_coordinates, domain_values, training_data, eval_dataset, params, evaluation): """ input: longest float, legth of the longest wanted period in default units shortest float, legth of the shortest wanted period in default units dataset numpy array, columns: time, vector of measurements, 0/1 (occurence of event) edge_of_square float, spatial edge of cell in default units (meters) timestep float, time edge of cell in default units (seconds) k positive integer, number of clusters radius float, size of radius of the first found hypertime circle number_of_periods int, max number of added hypertime circles evaluation boolean, stop learning when the error starts to grow? output: C numpy array kxd, matrix of k d-dimensional cluster centres COV numpy array kxdxd, matrix of covariance matrices density_integrals numpy array kx1, matrix of ratios between measurements and grid cells belonging to the clusters structure list(int, list(floats), list(floats)), number of non-hypertime dimensions, list of hypertime radii nad list of wavelengths average DODELAT uses: time.clock() init.whole_initialization(), iteration_step() objective: to learn model parameters """ if evaluation[0] == False: # for the future to know the strusture of evaluation edges_of_cell = evaluation[1] edges_of_big_cell = evaluation[2] transformation = evaluation[3] max_number_of_periods = evaluation[4] # not used here longest, shortest = evaluation[5] # not used here structure = evaluation[6] # not used for evaluation[0] = True k = evaluation[7] # not used for evaluation[0] = True X = dio.create_X(training_data, structure, transformation) DOMAIN = dio.create_X(domain_coordinates, structure, transformation) eval_domain = (dio.create_X(eval_dataset[0], structure, transformation), eval_dataset[1], dio.create_X(eval_dataset[2], structure, transformation), eval_dataset[3]) diff, C, densities, COV, difference, heights = iteration_step( DOMAIN, domain_values, X, k, structure, params, eval_domain) #C, U = cl.iteration(X, k, structure, params) #densities, COV = ca.body(DOMAIN, X, C, U, k, params, structure) else: edges_of_cell = evaluation[1] edges_of_big_cell = evaluation[2] transformation = evaluation[3] max_number_of_periods = evaluation[4] longest, shortest = evaluation[5] k = evaluation[7] # initialization frequencies = it.build_frequencies(longest, shortest) structure = it.first_structure(training_data) if structure[0] == 0 and structure[1] == []: # there is nothing to cluster, we have to create new structure with one 'circle' before clustering average = domain_values / len(domain_values) #C = np.array([average]) #COV = C/10 #densities = np.array([[average]]) #k = 1 #chosen_period(T, S, W) the_period = fm.chosen_period(domain_coordinates[:, 0], domain_values - average, frequencies)[0] #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! structure[1].append(bs.radius(the_period, structure)) structure[2].append(the_period) WW = list(frequencies) #print(1/the_period) WW.remove(1 / the_period) # P frequencies = np.array(WW) print('nothing to cluster, periodicity ' + str(the_period) + ' chosen and the corresponding frequency removed') # create model diff, C, densities, COV, the_period, k, heights = best_diff( training_data, domain_coordinates, domain_values, frequencies, k, structure, params, transformation, eval_dataset) jump_out = 0 iteration = 0 #diff = -1 while jump_out == 0: #print('\nstarting learning iteration: ' + str(iteration)) #print('with number of clusters: ' + str(k)) #print('and the structure: ' + str(structure)) iteration += 1 start = clock() jump_out, diff, C, densities, COV, the_period, structure, frequencies, k, heights = \ step_evaluation(diff, C, densities, COV, the_period, structure, frequencies, training_data, domain_coordinates, domain_values, transformation, k, params, heights, eval_dataset) finish = clock() print('structure: ' + str(structure) + ', number of clusters: ' + str(k) + ', and difference to training data: ' + str(diff)) #print('leaving learning iteration: ' + str(iteration)) #print('processor time: ' + str(finish - start)) if len(structure[1]) >= max_number_of_periods: jump_out = 1 #print('learning iterations finished') #return C, densities, COV, k, params, structure # to poradi pak budu muset zvazit ... proc vracim params???? return C, densities, COV, k, structure, heights