def test_matrix_from_files(self): sampling_size = 50 folder_name = current_dir + '/original' files_list = mu.listfilesinfolder(folder_name) mt = mu.create_matrix_from_filelist(files_list, sampling_size) result = mt.shape self.assertEqual(result, (len(files_list), sampling_size))
def main(): """Script starts here.""" sampling_size = 20000 locations = mu.getlocationofdata() print(locations.__dict__) originscsvs = mu.readlistoffiles(locations.originfiles) filterdcsvs = mu.readlistoffiles(locations.filteredfiles) m_origin = mu.create_matrix_from_filelist(originscsvs, sampling_size) m_filtered = mu.create_matrix_from_filelist(filterdcsvs, sampling_size) q_origin, r_origin = gs.gram_schmidt(m_origin) print(q_origin.shape, r_origin.shape) r_filtered = np.dot(q_origin.T, m_filtered) print("r_filtered:", r_filtered.shape) print("r_origin:", r_origin.shape) for colume in r_origin.T: answer = __find_best_match(colume, r_filtered) print("found:", answer)