def main(): dataset_type = sys.argv[1] direc = sys.argv[2] path = sys.argv[3] length = int(sys.argv[4]) num_normaldist_ave = 3 seqs, _, _ = read_sequences(dataset_type, direc=direc, feature_normalization=True) print(len(seqs)) seqs = augment_data(seqs, length, num_normaldist_ave=num_normaldist_ave) print(len(seqs)) i = 0 for sample_name, seq in seqs.items(): if i % 5 == 0: original_name = sample_name elif i % 5 == 1: dic = dict(augmented_seq=seq, sample_name=sample_name, original_name=original_name, distribution="uniform_random") f_path = os.path.join(path, sample_name) file_utils.save_pickle(f_path, dic) else: dic = dict(augmented_seq=seq, sample_name=sample_name, original_name=original_name, distribution="uniform_random") f_path = os.path.join(path, sample_name) file_utils.save_pickle(f_path, dic) i += 1
def run(dataset_type, dataset_location, sigma, triangular, output_dir, output_filename_format, data_augmentation_size, num_process, hdf5): ######## # Create output directory and backup the configuration file to the directory ######## os.makedirs(output_dir, exist_ok=True) shutil.copy(os.path.abspath(sys.argv[2]), os.path.join(output_dir, os.path.basename(sys.argv[2]))) assert others.is_valid_dataset_type(dataset_type) output_dir = os.path.abspath(output_dir) assert os.path.isdir(output_dir) ######## # Prepare time-series data ######## seqs, sample_names, labels_str, _ = read_sequences(dataset_type, dataset_location) print("%d samples." % len(seqs)) ######## # Global Alignment Kernel execution ######## start = time.time() gram = gak.gram_gak(seqs, sigma, triangular, num_process=num_process) end = time.time() ######## # Output to a file ######## output_filename_format = output_filename_format.replace( "${sigma}", str(sigma)).replace("${triangular}", str(triangular)) if hdf5: log_file = os.path.join(output_dir, output_filename_format + ".hdf5") timelog = log_file.replace(".hdf5", ".timelog") else: log_file = os.path.join(output_dir, output_filename_format + ".pkl") timelog = log_file.replace(".pkl", ".timelog") file_utils.save_new_result(log_file, dataset_type, gram, sample_names, hdf5=hdf5) duration = end - start num_samples = len(sample_names) time_fd = open(timelog, 'w') time_fd.write("gram_gak_start: %d\n" % start) time_fd.write("gram_gak_end: %d\n" % end) time_fd.write("gram_gak_duration: %d\n" % duration) time_fd.write("num_samples: %d\n" % num_samples) time_fd.write("average_time_per_gak: %.5f\n" % (duration / (num_samples ** 2) * num_process)) time_fd.close()
def setUp(self): pickle_or_hdf5_location = "results/6DMG/30/t1/gram_upperChar_sigma30_triangularNone_t1_noaugmentation.hdf5" dataset_location = "/Users/ngym/Lorincz-Lab/project/fast_time-series_data_classification/dataset/6DMG_mat_112712/matR_char" loaded_data = file_utils.load_hdf5(os.path.abspath(pickle_or_hdf5_location)) gram_matrices = loaded_data['gram_matrices'] self.gram = gram_matrices[0]['original'] self.sample_names = loaded_data['sample_names'] self.lmbd = 0.5 dataset_type = loaded_data['dataset_type'] sample_names = [s.split('/')[-1].split('.')[0] for s in loaded_data['sample_names']] seqs, key_to_str, _ = read_sequences(dataset_type, direc=dataset_location) seqs = filter_samples(seqs, sample_names) key_to_str = filter_samples(key_to_str, self.sample_names) labels = list(key_to_str.values()) tmp = list(labels) counter = Counter(tmp) #self.size_groups = [counter[label] for label in sorted(set(tmp), key=tmp.index)] self.size_groups = [15] * 26
def run(dataset_type, dataset_location, fold_count, fold_to_drop, params, output_dir, output_filename_format, output_file, data_augmentation_size): ######## # Create output directory and backup the configuration file to the directory ######## os.makedirs(output_dir, exist_ok=True) try: shutil.copy(os.path.abspath(sys.argv[2]), os.path.join(output_dir, os.path.basename(sys.argv[2]))) except shutil.SameFileError: pass dataset_location = os.path.abspath(dataset_location) output_dir = os.path.abspath(output_dir) assert os.path.isdir(output_dir) main_start = os.times() ######## # Prepare time-series data ######## seqs, sample_names, labels_str, _ = read_sequences(dataset_type, dataset_location) print("%d samples." % len(seqs)) if data_augmentation_size != 1: # Augment only train and validation data. # Test data is not augmented. folds = k_fold_cross_validation.get_kfolds(dataset_type, sample_names, fold_count) test_indices = folds[fold_to_drop - 1] train_validation_indices = np.delete(np.arange(len(seqs)), test_indices) train_validation_seqs = [seqs[i] for i in train_validation_indices] train_validation_sample_names = [ sample_names[i] for i in train_validation_indices ] train_validation_labels_str = [ labels_str[i] for i in train_validation_indices ] augmentation_magnification = 1.2 train_validation_seqs, train_validation_sample_names, \ labels_str_tr_val_augmented, flag_augmented = augment_data( train_validation_seqs, train_validation_sample_names, train_validation_labels_str, augmentation_magnification, rand_uniform=True, num_normaldist_ave=data_augmentation_size - 2) test_seqs = [seqs[i] for i in test_indices] test_labels_str = [labels_str[i] for i in test_indices] lb = LabelBinarizer() lb.fit(labels_str) Y_test = lb.transform(test_labels_str) Y_tr_val = lb.transform(labels_str_tr_val_augmented) time_dim = max( [seq.shape[0] for seq in train_validation_seqs + test_seqs]) pad_value = -4444 train_validation_seqs = pad_sequences( [seq.tolist() for seq in train_validation_seqs], maxlen=time_dim, dtype='float32', padding='post', value=pad_value) test_seqs = pad_sequences([seq.tolist() for seq in test_seqs], maxlen=time_dim, dtype='float32', padding='post', value=pad_value) else: folds = k_fold_cross_validation.get_kfolds(dataset_type, sample_names, fold_count) test_indices = folds[fold_to_drop - 1] train_validation_indices = np.delete(np.arange(len(seqs)), test_indices) train_validation_seqs = [seqs[i] for i in train_validation_indices] train_validation_sample_names = [ sample_names[i] for i in train_validation_indices ] train_validation_labels_str = [ labels_str[i] for i in train_validation_indices ] test_seqs = [seqs[i] for i in test_indices] test_labels_str = [labels_str[i] for i in test_indices] lb = LabelBinarizer() lb.fit(labels_str) Y_test = lb.transform(test_labels_str) Y_tr_val = lb.transform(train_validation_labels_str) time_dim = max( [seq.shape[0] for seq in train_validation_seqs + test_seqs]) pad_value = -4444 train_validation_seqs = pad_sequences( [seq.tolist() for seq in train_validation_seqs], maxlen=time_dim, dtype='float32', padding='post', value=pad_value) test_seqs = pad_sequences([seq.tolist() for seq in test_seqs], maxlen=time_dim, dtype='float32', padding='post', value=pad_value) modelfile_hdf5 = os.path.join(output_dir, output_filename_format + "_model.hdf5") # pre-processing feat_dim = seqs[0].shape[1] input_shape = (time_dim, feat_dim) K.clear_session() # build network rnn_ = rnn.Rnn(input_shape, pad_value, params['rnn_units'], params['dense_units'], 'tanh', params['rnn'], params['dropout'], params['implementation'], params['bidirectional'], params['batchnormalization']) model = rnn_.create_RNN_base_network() model.add(Dense(Y_tr_val.shape[1], activation="softmax")) callbacks = [ EarlyStopping(patience=params['patience']), ModelCheckpoint(filepath=modelfile_hdf5, save_best_only=True) ] loss_weights = None optimizer = RMSprop(clipnorm=1.) model.compile(loss=params['loss_function'], optimizer=optimizer) model.fit(train_validation_seqs, Y_tr_val, validation_split=0.1, shuffle=True, nb_epoch=params['epochs'], batch_size=512, verbose=1, callbacks=callbacks) time_pred_start = os.times() test_preds = model.predict_on_batch(test_seqs) time_pred_end = os.times() main_end = os.times() model.load_weights(modelfile_hdf5) roc_auc = roc_auc_score(y_true=Y_test, y_score=test_preds) test_preds_ = np.array([[1 if prob == max(probs) else 0 for prob in probs] for probs in test_preds]) f1 = f1_score(Y_test, test_preds_, average='weighted') num_calculated_sequences = len(test_seqs) virtual_prediction_duration = time_pred_end.user - time_pred_start.user + time_pred_end.system - time_pred_start.system elapsed_prediction_duration = time_pred_end.elapsed - time_pred_start.elapsed virtual_classification_duration = 0 elapsed_classification_duration = 0 prediction = {} prediction['basics'] = {} prediction['basics']['number_of_calculated_sequences'] = len(test_seqs) prediction['all'] = {} prediction['all'][ 'virtual_prediction_duration'] = virtual_prediction_duration prediction['all'][ 'elapsed_prediction_duration'] = elapsed_prediction_duration prediction['each_seq'] = {} prediction['each_seq'][ 'virtual_prediction_duration_per_calculated_sequence'] = virtual_prediction_duration / num_calculated_sequences prediction['each_seq'][ 'elapsed_prediction_duration_per_calculated_sequence'] = elapsed_prediction_duration / num_calculated_sequences classification = {} classification['basics'] = {} classification['basics']['roc_auc'] = roc_auc classification['basics']['f1'] = f1 classification['all'] = {} classification['all'][ 'virtual_classification_duration'] = virtual_classification_duration classification['all'][ 'elapsed_classification_duration'] = elapsed_classification_duration classification['each_seq'] = {} classification['each_seq'][ 'virtual_classification_duration_per_calculated_sequence'] = virtual_classification_duration / num_calculated_sequences classification['each_seq'][ 'elapsed_classification_duration_per_calculated_sequence'] = elapsed_classification_duration / num_calculated_sequences dic = dict(prediction=prediction, classification=classification) ### out_path = os.path.join(output_dir, output_file) file_utils.save_json(out_path, dic)
def run(pickle_or_hdf5_location, dataset_location, fold_count, fold_to_drop, algorithm, params, output_dir, output_filename_format, output_file): ######## # Create output directory and backup the configuration file to the directory ######## os.makedirs(output_dir, exist_ok=True) try: shutil.copy(os.path.abspath(sys.argv[2]), os.path.join(output_dir, os.path.basename(sys.argv[2]))) except shutil.SameFileError: pass hdf5 = pickle_or_hdf5_location[-4:] == "hdf5" check_fold(fold_count, fold_to_drop, hdf5) check_algorithm(algorithm) check_params(algorithm, params) pickle_or_hdf5_location = os.path.abspath(pickle_or_hdf5_location) dataset_location = os.path.abspath(dataset_location) output_dir = os.path.abspath(output_dir) assert os.path.isdir(output_dir) assert os.path.exists(pickle_or_hdf5_location) ######## # Load complete GRAM matrix ######## time_main_start = os.times() hdf5 = pickle_or_hdf5_location[-4:] == "hdf5" if hdf5: loaded_data = file_utils.load_hdf5(pickle_or_hdf5_location) else: loaded_data = file_utils.load_pickle(pickle_or_hdf5_location) check_pickle_format(loaded_data) dataset_type = loaded_data['dataset_type'] if dataset_type == 'UCIauslan': loaded_sample_names = loaded_data['sample_names'] else: loaded_sample_names = [ s.split('/')[-1].split('.')[0] for s in loaded_data['sample_names'] ] gram_matrices = loaded_data['gram_matrices'] if len(gram_matrices) == 1: gram = gram_matrices[0]['original'] else: gram = gram_matrices[-1]['completed_npsd'] # drop elements if fold_count == 0: gram_drop = gram else: folds = k_fold_cross_validation.get_kfolds(dataset_type, loaded_sample_names, fold_count) indices_to_drop = folds[fold_to_drop - 1] gram_drop, dropped_elements = make_matrix_incomplete.gram_drop_samples( gram, indices_to_drop) ######## # Prepare time-series data ######## seqs, sample_names, labels_str, _ = read_sequences(dataset_type, dataset_location) seqs = filter_samples(seqs, sample_names, loaded_sample_names) labels_str = filter_samples(labels_str, sample_names, loaded_sample_names) ######## # Execute Matrix Completion ######## train_start = None train_end = None if algorithm == "gak": ######## # Baseline GAK ######## gram_completed, time_completion_start, time_completion_end \ = matrix_completion.gak_matrix_completion( gram_drop, seqs, indices_to_drop, sigma=params['sigma'], triangular=params['triangular']) action = "GAK sigma: " + str(params['sigma']) + " triangular: " + str( params['triangular']) output_filename_format = output_filename_format.replace( "${sigma}", str(params['sigma'])).replace("${triangular}", str(params['triangular'])) elif algorithm in {"softimpute", "knn", "iterativesvd"}: ######## # Baseline SoftImpute, KNN, IterativeSVD ######## if algorithm == "softimpute": func = matrix_completion.softimpute_matrix_completion action = "Softimpute" print('running SoftImpute') elif algorithm == "knn": func = matrix_completion.knn_matrix_completion action = "KNN" print('running KNN') elif algorithm == "iterativesvd": func = matrix_completion.iterativesvd_matrix_completion action = "IterativeSVD" print('running IterativeSVD') else: print("unsupported fancyimpute algorithm") exit(-1) flag_test = np.zeros(len(seqs)) flag_test[indices_to_drop] = 1 drop_flag_matrix = create_true_GAK_flag_matrix(1 - params['gak_rate'], flag_test) for i in range(len(seqs)): drop_flag_matrix[i, i] = 1 for j in range(i + 1): if i not in indices_to_drop and j not in indices_to_drop: drop_flag_matrix[i, j] = 1 drop_flag_matrix[j, i] = 1 print(len(seqs)**2) print(np.count_nonzero(drop_flag_matrix)) gram_completed, time_completion_start, time_completion_end \ = func(gram_drop, seqs, sigma=params['sigma'], triangular=params['triangular'], num_process=params['num_process'], drop_flag_matrix=drop_flag_matrix) elif algorithm == "rnn": ######## # Our Scheme, Siamese Recurrent Neural Network ######## modelfile_hdf5 = os.path.join(output_dir, output_filename_format + "_model.hdf5") logfile_loss = os.path.join(output_dir, output_filename_format + ".losses") gram_completed, time_train_start, time_train_end, \ time_completion_start, time_completion_end \ = matrix_completion.rnn_matrix_completion( gram_drop, seqs, params['epochs'], params['patience'], params['epoch_start_from'], logfile_loss, modelfile_hdf5, params['rnn'], params['rnn_units'], params['dense_units'], params['dropout'], params['implementation'], params['bidirectional'], params['batchnormalization'], params['mode'], params['loss_function'], params['loss_weight_ratio'], labels_str, params['siamese_joint_method'], params['siamese_arms_activation'], trained_modelfile_hdf5=params['trained_modelfile_hdf5']) action = "SiameseRNN" elif algorithm == "fast_rnn": ######## # Our Scheme, Fast Siamese Recurrent Neural Network ######## modelfile_hdf5 = os.path.join(output_dir, output_filename_format + "_model.hdf5") logfile_loss = os.path.join(output_dir, output_filename_format + ".losses") gram_completed, time_completion_start, time_completion_end \ = matrix_completion.fast_rnn_matrix_completion( gram_drop, seqs, params['rnn'], params['rnn_units'], params['dense_units'], params['dropout'], params['implementation'], params['bidirectional'], params['batchnormalization'], params['loss_function'], params['siamese_arms_activation'], params['siamese_joint_method'], trained_modelfile_hdf5=params['trained_modelfile_hdf5']) action = "FastSiameseRNN" else: assert False ######## # Make the completed matrix positive semidefinite, if it is not. ######## # eigenvalue check time_npsd_start = os.times() gram_completed_npsd = nearest_positive_semidefinite.nearest_positive_semidefinite( gram_completed) time_npsd_end = os.times() ######## # Save results ######## if hdf5: log_file = os.path.join(output_dir, output_filename_format + ".hdf5") else: log_file = os.path.join(output_dir, output_filename_format + ".pkl") action += " " + time.asctime(time.localtime()) file_utils.append_and_save_result(log_file, loaded_data, gram_drop, gram_completed, gram_completed_npsd, indices_to_drop, action, hdf5=hdf5) # claculate errors mse, mse_dropped, mae, mae_dropped, \ relative, relative_dropped = calculate_errors(gram, gram_completed_npsd, dropped_elements) time_main_end = os.times() # save run times and errors num_calculated_elements = len(dropped_elements) - len(indices_to_drop) // 2 num_dropped_sequences = len(indices_to_drop) out_path = os.path.join(output_dir, output_file) file_utils.save_analysis(out_path, len(dropped_elements), num_dropped_sequences, num_calculated_elements, time_completion_start, time_completion_end, time_npsd_start, time_npsd_end, time_main_start, time_main_end, mse, mse_dropped, mae, mae_dropped, relative, relative_dropped)
def test_read_sequences(self): seqs, key_to_str, _ = rs.read_sequences(self.dataset_type, direc=self.direc) labels = key_to_str.values() c = Counter(labels) print(c)
def run(pickle_or_hdf5_location, dataset_location, fold_to_test, fold_to_tv, fold_count, params, output_dir, output_filename_format, data_augmentation_size): os.makedirs(output_dir, exist_ok=True) shutil.copy(os.path.abspath(sys.argv[2]), os.path.join(output_dir, os.path.basename(sys.argv[2]))) hdf5 = pickle_or_hdf5_location[-4:] == "hdf5" if hdf5: loaded_data = file_utils.load_hdf5(os.path.abspath(pickle_or_hdf5_location)) else: loaded_data = file_utils.load_pickle(os.path.abspath(pickle_or_hdf5_location)) dataset_type = loaded_data['dataset_type'] sample_names = [s.split('/')[-1].split('.')[0] for s in loaded_data['sample_names']] gram_matrices = loaded_data['gram_matrices'] gram = gram_matrices[0]['original'] sample_names = loaded_data['sample_names'] folds = k_fold_cross_validation.get_kfolds(dataset_type, sample_names, fold_count) folds = np.array(folds) test_indices = np.concatenate(folds[fold_to_test]) tv_indices = np.concatenate(folds[fold_to_tv]) fold_for_gram = np.delete(np.arange(fold_count), fold_to_test + fold_to_tv) gram_indices = np.concatenate(folds[fold_for_gram]).astype(int) seqs, key_to_str, _ = read_sequences(dataset_type, dataset_location) augmentation_magnification = 1.2 seqs, key_to_str, flag_augmented = augment_data(seqs, key_to_str, augmentation_magnification, rand_uniform=True, num_normaldist_ave=data_augmentation_size - 2) seqs = filter_samples(seqs, sample_names) key_to_str = filter_samples(key_to_str, sample_names) logfile_hdf5 = os.path.join(output_dir, output_filename_format + "_model.hdf5") logfile_loss = os.path.join(output_dir, output_filename_format + ".losses") output_file = os.path.join(output_dir, output_filename_format + ".json") (roc_auc_score, f1_score) = KSS_unsupervised_alpha_prediction.get_classification_error( gram, gram_indices, tv_indices, test_indices, list(seqs.values()), params['epochs'], params['patience'], logfile_hdf5, logfile_loss, params['rnn'], params['rnn_units'], params['dense_units'], params['dropout'], params['implementation'], params['bidirectional'], params['batchnormalization'], params['mode'], list(key_to_str.values()), params['lmbd'], params['top_activation']) print(pickle_or_hdf5_location + " roc_auc_score: " + str(roc_auc_score) + " f1_score: " + str(f1_score)) dic = dict(roc_auc_score=roc_auc_score, f1_score=f1_score) file_utils.save_json(output_file, dic)
def run(pickle_or_hdf5_location, dataset_location, fold_count, fold_to_drop, params, output_dir, output_filename_format, output_file, data_augmentation_size): os.makedirs(output_dir, exist_ok=True) try: shutil.copy(os.path.abspath(sys.argv[2]), os.path.join(output_dir, os.path.basename(sys.argv[2]))) except shutil.SameFileError: pass hdf5 = pickle_or_hdf5_location[-4:] == "hdf5" check_fold(fold_count, fold_to_drop, hdf5) pickle_or_hdf5_location = os.path.abspath(pickle_or_hdf5_location) dataset_location = os.path.abspath(dataset_location) output_dir = os.path.abspath(output_dir) assert os.path.isdir(output_dir) assert os.path.exists(pickle_or_hdf5_location) main_start = os.times() hdf5 = pickle_or_hdf5_location[-4:] == "hdf5" if hdf5: loaded_data = file_utils.load_hdf5(pickle_or_hdf5_location) else: loaded_data = file_utils.load_pickle(pickle_or_hdf5_location) dataset_type = loaded_data['dataset_type'] if dataset_type == 'UCIauslan': loaded_sample_names = loaded_data['sample_names'] else: loaded_sample_names = [ s.split('/')[-1].split('.')[0] for s in loaded_data['sample_names'] ] gram_matrices = loaded_data['gram_matrices'] if len(gram_matrices) == 1: gram = gram_matrices[0]['original'] else: gram = gram_matrices[-1]['completed_npsd'] # drop elements if fold_count == 0: gram_drop = gram else: folds = k_fold_cross_validation.get_kfolds(dataset_type, loaded_sample_names, fold_count) indices_to_drop = folds[fold_to_drop - 1] gram_drop, dropped_elements = make_matrix_incomplete.gram_drop_samples( gram, indices_to_drop) seqs, sample_names, labels_str, _ = read_sequences(dataset_type, dataset_location) seqs = filter_samples(seqs, sample_names, loaded_sample_names) labels_str = filter_samples(labels_str, sample_names, loaded_sample_names) train_start = None train_end = None modelfile_hdf5 = os.path.join(output_dir, output_filename_format + "_model.hdf5") logfile_loss = os.path.join(output_dir, output_filename_format + ".losses") # pre-processing num_seqs = len(seqs) time_dim = max([seq.shape[0] for seq in seqs]) pad_value = -4444 seqs = pad_sequences([seq.tolist() for seq in seqs], maxlen=time_dim, dtype='float32', padding='post', value=pad_value) feat_dim = seqs[0].shape[1] input_shape = (time_dim, feat_dim) K.clear_session() # build network model = siamese_rnn_branch.SiameseRnnBranch( input_shape, pad_value, params['rnn_units'], params['dense_units'], params['rnn'], params['dropout'], params['implementation'], params['bidirectional'], params['batchnormalization'], params['loss_function'], params['siamese_joint_method'], params['trained_modelfile_hdf5'], siamese_arms_activation=params['siamese_arms_activation']) test_indices = indices_to_drop train_validation_indices = np.delete(np.arange(len(seqs)), test_indices) train_validation_seqs = seqs[train_validation_indices] test_seqs = seqs[test_indices] train_validation_features = model.predict(train_validation_seqs) time_pred_start = os.times() test_features = model.predict(test_seqs) time_pred_end = os.times() labels = np.array(labels_str) train_validation_labels = labels[train_validation_indices] test_labels = labels[test_indices] auc, f1, time_classification_start, time_classification_end = \ linear_svm.compute_classification_errors(train_validation_features, train_validation_labels, test_features, test_labels) main_end = os.times() num_calculated_sequences = len(test_seqs) virtual_prediction_duration = time_pred_end.user - time_pred_start.user + time_pred_end.system - time_pred_start.system elapsed_prediction_duration = time_pred_end.elapsed - time_pred_start.elapsed virtual_classification_duration = time_classification_end.user - time_classification_start.user + time_classification_end.system - time_classification_start.system elapsed_classification_duration = time_classification_end.elapsed - time_classification_start.elapsed prediction = {} prediction['basics'] = {} prediction['basics']['number_of_calculated_sequences'] = len(test_seqs) prediction['all'] = {} prediction['all'][ 'virtual_prediction_duration'] = virtual_prediction_duration prediction['all'][ 'elapsed_prediction_duration'] = elapsed_prediction_duration prediction['each_seq'] = {} prediction['each_seq'][ 'virtual_prediction_duration_per_calculated_sequence'] = virtual_prediction_duration / num_calculated_sequences prediction['each_seq'][ 'elapsed_prediction_duration_per_calculated_sequence'] = elapsed_prediction_duration / num_calculated_sequences classification = {} classification['basics'] = {} classification['basics']['roc_auc'] = auc classification['basics']['f1'] = f1 classification['all'] = {} classification['all'][ 'virtual_classification_duration'] = virtual_classification_duration classification['all'][ 'elapsed_classification_duration'] = elapsed_classification_duration classification['each_seq'] = {} classification['each_seq'][ 'virtual_classification_duration_per_calculated_sequence'] = virtual_classification_duration / num_calculated_sequences classification['each_seq'][ 'elapsed_classification_duration_per_calculated_sequence'] = elapsed_classification_duration / num_calculated_sequences dic = dict(prediction=prediction, classification=classification) ### lsvm_out_path = os.path.join(output_dir, output_file) file_utils.save_json(lsvm_out_path, dic)