Example #1
0
def main():
    dataset_type = sys.argv[1]
    direc = sys.argv[2]
    path = sys.argv[3]
    length = int(sys.argv[4])
    num_normaldist_ave = 3

    seqs, _, _ = read_sequences(dataset_type,
                                direc=direc,
                                feature_normalization=True)
    print(len(seqs))
    seqs = augment_data(seqs, length, num_normaldist_ave=num_normaldist_ave)

    print(len(seqs))
    i = 0
    for sample_name, seq in seqs.items():
        if i % 5 == 0:
            original_name = sample_name
        elif i % 5 == 1:
            dic = dict(augmented_seq=seq,
                       sample_name=sample_name,
                       original_name=original_name,
                       distribution="uniform_random")
            f_path = os.path.join(path, sample_name)
            file_utils.save_pickle(f_path, dic)
        else:
            dic = dict(augmented_seq=seq,
                       sample_name=sample_name,
                       original_name=original_name,
                       distribution="uniform_random")
            f_path = os.path.join(path, sample_name)
            file_utils.save_pickle(f_path, dic)
        i += 1
Example #2
0
def run(dataset_type, dataset_location, sigma, triangular, output_dir,
        output_filename_format, data_augmentation_size,
        num_process, hdf5):
    ########
    # Create output directory and backup the configuration file to the directory
    ########
    os.makedirs(output_dir, exist_ok=True)
    shutil.copy(os.path.abspath(sys.argv[2]),
                os.path.join(output_dir, os.path.basename(sys.argv[2])))

    assert others.is_valid_dataset_type(dataset_type)

    output_dir = os.path.abspath(output_dir)
    assert os.path.isdir(output_dir)

    ########
    # Prepare time-series data
    ########
    
    seqs, sample_names, labels_str, _ = read_sequences(dataset_type, dataset_location)

    print("%d samples." % len(seqs))

    ########
    # Global Alignment Kernel execution
    ########
        
    start = time.time()
    gram = gak.gram_gak(seqs, sigma, triangular,
                        num_process=num_process)
    end = time.time()

    ########
    # Output to a file
    ########

    output_filename_format = output_filename_format.replace(
        "${sigma}", str(sigma)).replace("${triangular}", str(triangular))
    if hdf5:
        log_file = os.path.join(output_dir, output_filename_format + ".hdf5")
        timelog = log_file.replace(".hdf5", ".timelog")
    else:
        log_file = os.path.join(output_dir, output_filename_format + ".pkl")
        timelog = log_file.replace(".pkl", ".timelog")
    file_utils.save_new_result(log_file, dataset_type, gram, sample_names, hdf5=hdf5)

    duration = end - start
    num_samples = len(sample_names)
    time_fd = open(timelog, 'w')
    time_fd.write("gram_gak_start: %d\n" % start)
    time_fd.write("gram_gak_end: %d\n" % end)
    time_fd.write("gram_gak_duration: %d\n" % duration)
    time_fd.write("num_samples: %d\n" % num_samples)
    time_fd.write("average_time_per_gak: %.5f\n" % (duration / (num_samples ** 2) * num_process))
    time_fd.close()
Example #3
0
 def setUp(self):
     pickle_or_hdf5_location = "results/6DMG/30/t1/gram_upperChar_sigma30_triangularNone_t1_noaugmentation.hdf5"
     dataset_location = "/Users/ngym/Lorincz-Lab/project/fast_time-series_data_classification/dataset/6DMG_mat_112712/matR_char"
     
     loaded_data = file_utils.load_hdf5(os.path.abspath(pickle_or_hdf5_location))
     gram_matrices = loaded_data['gram_matrices']
     self.gram = gram_matrices[0]['original']
     self.sample_names = loaded_data['sample_names']
     self.lmbd = 0.5
     
     dataset_type = loaded_data['dataset_type']
     sample_names = [s.split('/')[-1].split('.')[0] for s in loaded_data['sample_names']]
     seqs, key_to_str, _ = read_sequences(dataset_type, direc=dataset_location)
     seqs = filter_samples(seqs, sample_names)
     key_to_str = filter_samples(key_to_str, self.sample_names)
     labels = list(key_to_str.values())
     tmp = list(labels)
     counter = Counter(tmp)
     #self.size_groups = [counter[label] for label in sorted(set(tmp), key=tmp.index)]
     self.size_groups = [15] * 26
Example #4
0
def run(dataset_type, dataset_location, fold_count, fold_to_drop, params,
        output_dir, output_filename_format, output_file,
        data_augmentation_size):
    ########
    # Create output directory and backup the configuration file to the directory
    ########
    os.makedirs(output_dir, exist_ok=True)
    try:
        shutil.copy(os.path.abspath(sys.argv[2]),
                    os.path.join(output_dir, os.path.basename(sys.argv[2])))
    except shutil.SameFileError:
        pass

    dataset_location = os.path.abspath(dataset_location)
    output_dir = os.path.abspath(output_dir)
    assert os.path.isdir(output_dir)

    main_start = os.times()

    ########
    # Prepare time-series data
    ########
    seqs, sample_names, labels_str, _ = read_sequences(dataset_type,
                                                       dataset_location)

    print("%d samples." % len(seqs))

    if data_augmentation_size != 1:
        # Augment only train and validation data.
        # Test data is not augmented.
        folds = k_fold_cross_validation.get_kfolds(dataset_type, sample_names,
                                                   fold_count)

        test_indices = folds[fold_to_drop - 1]
        train_validation_indices = np.delete(np.arange(len(seqs)),
                                             test_indices)

        train_validation_seqs = [seqs[i] for i in train_validation_indices]
        train_validation_sample_names = [
            sample_names[i] for i in train_validation_indices
        ]
        train_validation_labels_str = [
            labels_str[i] for i in train_validation_indices
        ]

        augmentation_magnification = 1.2
        train_validation_seqs, train_validation_sample_names, \
        labels_str_tr_val_augmented, flag_augmented = augment_data(
            train_validation_seqs,
            train_validation_sample_names,
            train_validation_labels_str,
            augmentation_magnification,
            rand_uniform=True,
            num_normaldist_ave=data_augmentation_size - 2)

        test_seqs = [seqs[i] for i in test_indices]
        test_labels_str = [labels_str[i] for i in test_indices]

        lb = LabelBinarizer()
        lb.fit(labels_str)

        Y_test = lb.transform(test_labels_str)
        Y_tr_val = lb.transform(labels_str_tr_val_augmented)

        time_dim = max(
            [seq.shape[0] for seq in train_validation_seqs + test_seqs])
        pad_value = -4444

        train_validation_seqs = pad_sequences(
            [seq.tolist() for seq in train_validation_seqs],
            maxlen=time_dim,
            dtype='float32',
            padding='post',
            value=pad_value)
        test_seqs = pad_sequences([seq.tolist() for seq in test_seqs],
                                  maxlen=time_dim,
                                  dtype='float32',
                                  padding='post',
                                  value=pad_value)
    else:
        folds = k_fold_cross_validation.get_kfolds(dataset_type, sample_names,
                                                   fold_count)

        test_indices = folds[fold_to_drop - 1]
        train_validation_indices = np.delete(np.arange(len(seqs)),
                                             test_indices)

        train_validation_seqs = [seqs[i] for i in train_validation_indices]
        train_validation_sample_names = [
            sample_names[i] for i in train_validation_indices
        ]
        train_validation_labels_str = [
            labels_str[i] for i in train_validation_indices
        ]

        test_seqs = [seqs[i] for i in test_indices]
        test_labels_str = [labels_str[i] for i in test_indices]

        lb = LabelBinarizer()
        lb.fit(labels_str)

        Y_test = lb.transform(test_labels_str)
        Y_tr_val = lb.transform(train_validation_labels_str)

        time_dim = max(
            [seq.shape[0] for seq in train_validation_seqs + test_seqs])
        pad_value = -4444

        train_validation_seqs = pad_sequences(
            [seq.tolist() for seq in train_validation_seqs],
            maxlen=time_dim,
            dtype='float32',
            padding='post',
            value=pad_value)
        test_seqs = pad_sequences([seq.tolist() for seq in test_seqs],
                                  maxlen=time_dim,
                                  dtype='float32',
                                  padding='post',
                                  value=pad_value)

    modelfile_hdf5 = os.path.join(output_dir,
                                  output_filename_format + "_model.hdf5")

    # pre-processing

    feat_dim = seqs[0].shape[1]
    input_shape = (time_dim, feat_dim)

    K.clear_session()

    # build network
    rnn_ = rnn.Rnn(input_shape, pad_value, params['rnn_units'],
                   params['dense_units'], 'tanh', params['rnn'],
                   params['dropout'], params['implementation'],
                   params['bidirectional'], params['batchnormalization'])
    model = rnn_.create_RNN_base_network()
    model.add(Dense(Y_tr_val.shape[1], activation="softmax"))

    callbacks = [
        EarlyStopping(patience=params['patience']),
        ModelCheckpoint(filepath=modelfile_hdf5, save_best_only=True)
    ]

    loss_weights = None
    optimizer = RMSprop(clipnorm=1.)
    model.compile(loss=params['loss_function'], optimizer=optimizer)

    model.fit(train_validation_seqs,
              Y_tr_val,
              validation_split=0.1,
              shuffle=True,
              nb_epoch=params['epochs'],
              batch_size=512,
              verbose=1,
              callbacks=callbacks)

    time_pred_start = os.times()
    test_preds = model.predict_on_batch(test_seqs)
    time_pred_end = os.times()

    main_end = os.times()

    model.load_weights(modelfile_hdf5)
    roc_auc = roc_auc_score(y_true=Y_test, y_score=test_preds)
    test_preds_ = np.array([[1 if prob == max(probs) else 0 for prob in probs]
                            for probs in test_preds])
    f1 = f1_score(Y_test, test_preds_, average='weighted')

    num_calculated_sequences = len(test_seqs)

    virtual_prediction_duration = time_pred_end.user - time_pred_start.user + time_pred_end.system - time_pred_start.system
    elapsed_prediction_duration = time_pred_end.elapsed - time_pred_start.elapsed

    virtual_classification_duration = 0
    elapsed_classification_duration = 0

    prediction = {}

    prediction['basics'] = {}
    prediction['basics']['number_of_calculated_sequences'] = len(test_seqs)

    prediction['all'] = {}
    prediction['all'][
        'virtual_prediction_duration'] = virtual_prediction_duration
    prediction['all'][
        'elapsed_prediction_duration'] = elapsed_prediction_duration

    prediction['each_seq'] = {}
    prediction['each_seq'][
        'virtual_prediction_duration_per_calculated_sequence'] = virtual_prediction_duration / num_calculated_sequences
    prediction['each_seq'][
        'elapsed_prediction_duration_per_calculated_sequence'] = elapsed_prediction_duration / num_calculated_sequences

    classification = {}

    classification['basics'] = {}
    classification['basics']['roc_auc'] = roc_auc
    classification['basics']['f1'] = f1

    classification['all'] = {}
    classification['all'][
        'virtual_classification_duration'] = virtual_classification_duration
    classification['all'][
        'elapsed_classification_duration'] = elapsed_classification_duration

    classification['each_seq'] = {}
    classification['each_seq'][
        'virtual_classification_duration_per_calculated_sequence'] = virtual_classification_duration / num_calculated_sequences
    classification['each_seq'][
        'elapsed_classification_duration_per_calculated_sequence'] = elapsed_classification_duration / num_calculated_sequences

    dic = dict(prediction=prediction, classification=classification)

    ###
    out_path = os.path.join(output_dir, output_file)
    file_utils.save_json(out_path, dic)
Example #5
0
def run(pickle_or_hdf5_location, dataset_location, fold_count, fold_to_drop,
        algorithm, params, output_dir, output_filename_format, output_file):
    ########
    # Create output directory and backup the configuration file to the directory
    ########
    os.makedirs(output_dir, exist_ok=True)
    try:
        shutil.copy(os.path.abspath(sys.argv[2]),
                    os.path.join(output_dir, os.path.basename(sys.argv[2])))
    except shutil.SameFileError:
        pass
    hdf5 = pickle_or_hdf5_location[-4:] == "hdf5"
    check_fold(fold_count, fold_to_drop, hdf5)
    check_algorithm(algorithm)
    check_params(algorithm, params)

    pickle_or_hdf5_location = os.path.abspath(pickle_or_hdf5_location)
    dataset_location = os.path.abspath(dataset_location)
    output_dir = os.path.abspath(output_dir)
    assert os.path.isdir(output_dir)
    assert os.path.exists(pickle_or_hdf5_location)

    ########
    # Load complete GRAM matrix
    ########
    time_main_start = os.times()

    hdf5 = pickle_or_hdf5_location[-4:] == "hdf5"
    if hdf5:
        loaded_data = file_utils.load_hdf5(pickle_or_hdf5_location)
    else:
        loaded_data = file_utils.load_pickle(pickle_or_hdf5_location)
        check_pickle_format(loaded_data)

    dataset_type = loaded_data['dataset_type']
    if dataset_type == 'UCIauslan':
        loaded_sample_names = loaded_data['sample_names']
    else:
        loaded_sample_names = [
            s.split('/')[-1].split('.')[0] for s in loaded_data['sample_names']
        ]
    gram_matrices = loaded_data['gram_matrices']
    if len(gram_matrices) == 1:
        gram = gram_matrices[0]['original']
    else:
        gram = gram_matrices[-1]['completed_npsd']

    # drop elements
    if fold_count == 0:
        gram_drop = gram
    else:
        folds = k_fold_cross_validation.get_kfolds(dataset_type,
                                                   loaded_sample_names,
                                                   fold_count)
        indices_to_drop = folds[fold_to_drop - 1]
        gram_drop, dropped_elements = make_matrix_incomplete.gram_drop_samples(
            gram, indices_to_drop)

    ########
    # Prepare time-series data
    ########
    seqs, sample_names, labels_str, _ = read_sequences(dataset_type,
                                                       dataset_location)

    seqs = filter_samples(seqs, sample_names, loaded_sample_names)
    labels_str = filter_samples(labels_str, sample_names, loaded_sample_names)

    ########
    # Execute Matrix Completion
    ########
    train_start = None
    train_end = None
    if algorithm == "gak":
        ########
        # Baseline GAK
        ########
        gram_completed, time_completion_start, time_completion_end \
            = matrix_completion.gak_matrix_completion(
                gram_drop, seqs, indices_to_drop,
                sigma=params['sigma'], triangular=params['triangular'])
        action = "GAK sigma: " + str(params['sigma']) + " triangular: " + str(
            params['triangular'])
        output_filename_format = output_filename_format.replace(
            "${sigma}",
            str(params['sigma'])).replace("${triangular}",
                                          str(params['triangular']))
    elif algorithm in {"softimpute", "knn", "iterativesvd"}:
        ########
        # Baseline SoftImpute, KNN, IterativeSVD
        ########
        if algorithm == "softimpute":
            func = matrix_completion.softimpute_matrix_completion
            action = "Softimpute"
            print('running SoftImpute')
        elif algorithm == "knn":
            func = matrix_completion.knn_matrix_completion
            action = "KNN"
            print('running KNN')
        elif algorithm == "iterativesvd":
            func = matrix_completion.iterativesvd_matrix_completion
            action = "IterativeSVD"
            print('running IterativeSVD')
        else:
            print("unsupported fancyimpute algorithm")
            exit(-1)
        flag_test = np.zeros(len(seqs))
        flag_test[indices_to_drop] = 1
        drop_flag_matrix = create_true_GAK_flag_matrix(1 - params['gak_rate'],
                                                       flag_test)
        for i in range(len(seqs)):
            drop_flag_matrix[i, i] = 1
            for j in range(i + 1):
                if i not in indices_to_drop and j not in indices_to_drop:
                    drop_flag_matrix[i, j] = 1
                    drop_flag_matrix[j, i] = 1

        print(len(seqs)**2)
        print(np.count_nonzero(drop_flag_matrix))
        gram_completed, time_completion_start, time_completion_end \
            = func(gram_drop,
                   seqs,
                   sigma=params['sigma'],
                   triangular=params['triangular'],
                   num_process=params['num_process'],
                   drop_flag_matrix=drop_flag_matrix)
    elif algorithm == "rnn":
        ########
        # Our Scheme, Siamese Recurrent Neural Network
        ########
        modelfile_hdf5 = os.path.join(output_dir,
                                      output_filename_format + "_model.hdf5")
        logfile_loss = os.path.join(output_dir,
                                    output_filename_format + ".losses")
        gram_completed, time_train_start, time_train_end, \
            time_completion_start, time_completion_end \
            = matrix_completion.rnn_matrix_completion(
                gram_drop,
                seqs,
                params['epochs'],
                params['patience'],
                params['epoch_start_from'],
                logfile_loss,
                modelfile_hdf5,
                params['rnn'],
                params['rnn_units'],
                params['dense_units'],
                params['dropout'],
                params['implementation'],
                params['bidirectional'],
                params['batchnormalization'],
                params['mode'],
                params['loss_function'],
                params['loss_weight_ratio'],
                labels_str,
                params['siamese_joint_method'],
                params['siamese_arms_activation'],
                trained_modelfile_hdf5=params['trained_modelfile_hdf5'])
        action = "SiameseRNN"
    elif algorithm == "fast_rnn":
        ########
        # Our Scheme, Fast Siamese Recurrent Neural Network
        ########
        modelfile_hdf5 = os.path.join(output_dir,
                                      output_filename_format + "_model.hdf5")
        logfile_loss = os.path.join(output_dir,
                                    output_filename_format + ".losses")
        gram_completed, time_completion_start, time_completion_end \
            = matrix_completion.fast_rnn_matrix_completion(
                gram_drop,
                seqs,
                params['rnn'],
                params['rnn_units'],
                params['dense_units'],
                params['dropout'],
                params['implementation'],
                params['bidirectional'],
                params['batchnormalization'],
                params['loss_function'],
                params['siamese_arms_activation'],
                params['siamese_joint_method'],
                trained_modelfile_hdf5=params['trained_modelfile_hdf5'])
        action = "FastSiameseRNN"
    else:
        assert False

    ########
    # Make the completed matrix positive semidefinite, if it is not.
    ########

    # eigenvalue check
    time_npsd_start = os.times()
    gram_completed_npsd = nearest_positive_semidefinite.nearest_positive_semidefinite(
        gram_completed)
    time_npsd_end = os.times()

    ########
    # Save results
    ########
    if hdf5:
        log_file = os.path.join(output_dir, output_filename_format + ".hdf5")
    else:
        log_file = os.path.join(output_dir, output_filename_format + ".pkl")
    action += " " + time.asctime(time.localtime())
    file_utils.append_and_save_result(log_file,
                                      loaded_data,
                                      gram_drop,
                                      gram_completed,
                                      gram_completed_npsd,
                                      indices_to_drop,
                                      action,
                                      hdf5=hdf5)

    # claculate errors
    mse, mse_dropped, mae, mae_dropped, \
        relative, relative_dropped = calculate_errors(gram, gram_completed_npsd, dropped_elements)

    time_main_end = os.times()

    # save run times and errors
    num_calculated_elements = len(dropped_elements) - len(indices_to_drop) // 2
    num_dropped_sequences = len(indices_to_drop)
    out_path = os.path.join(output_dir, output_file)
    file_utils.save_analysis(out_path, len(dropped_elements),
                             num_dropped_sequences, num_calculated_elements,
                             time_completion_start, time_completion_end,
                             time_npsd_start, time_npsd_end, time_main_start,
                             time_main_end, mse, mse_dropped, mae, mae_dropped,
                             relative, relative_dropped)
Example #6
0
 def test_read_sequences(self):
     seqs, key_to_str, _ = rs.read_sequences(self.dataset_type,
                                             direc=self.direc)
     labels = key_to_str.values()
     c = Counter(labels)
     print(c)
Example #7
0
def run(pickle_or_hdf5_location, dataset_location, fold_to_test, fold_to_tv,
        fold_count, params,
        output_dir, output_filename_format, data_augmentation_size):
    os.makedirs(output_dir, exist_ok=True)
    shutil.copy(os.path.abspath(sys.argv[2]), os.path.join(output_dir, os.path.basename(sys.argv[2])))
    hdf5 = pickle_or_hdf5_location[-4:] == "hdf5"
    if hdf5:
        loaded_data = file_utils.load_hdf5(os.path.abspath(pickle_or_hdf5_location))
    else:
        loaded_data = file_utils.load_pickle(os.path.abspath(pickle_or_hdf5_location))

    dataset_type = loaded_data['dataset_type']
    sample_names = [s.split('/')[-1].split('.')[0] for s in loaded_data['sample_names']]

    gram_matrices = loaded_data['gram_matrices']
    gram = gram_matrices[0]['original']
    
    sample_names = loaded_data['sample_names']
    
    folds = k_fold_cross_validation.get_kfolds(dataset_type, sample_names, fold_count)
    folds = np.array(folds)
    test_indices = np.concatenate(folds[fold_to_test])
    tv_indices = np.concatenate(folds[fold_to_tv])
    fold_for_gram = np.delete(np.arange(fold_count), fold_to_test + fold_to_tv)
    gram_indices = np.concatenate(folds[fold_for_gram]).astype(int)
    
    seqs, key_to_str, _ = read_sequences(dataset_type, dataset_location)
    augmentation_magnification = 1.2
    seqs, key_to_str, flag_augmented = augment_data(seqs, key_to_str,
                                                    augmentation_magnification,
                                                    rand_uniform=True,
                                                    num_normaldist_ave=data_augmentation_size - 2)

    
    seqs = filter_samples(seqs, sample_names)
    key_to_str = filter_samples(key_to_str, sample_names)

    logfile_hdf5 = os.path.join(output_dir, output_filename_format + "_model.hdf5")
    logfile_loss = os.path.join(output_dir, output_filename_format + ".losses")
    output_file  = os.path.join(output_dir, output_filename_format + ".json")
    
    (roc_auc_score, f1_score) = KSS_unsupervised_alpha_prediction.get_classification_error(
        gram,
        gram_indices,
        tv_indices,
        test_indices,
        list(seqs.values()),
        params['epochs'],
        params['patience'],
        logfile_hdf5,
        logfile_loss,
        params['rnn'],
        params['rnn_units'],
        params['dense_units'],
        params['dropout'],
        params['implementation'],
        params['bidirectional'],
        params['batchnormalization'],
        params['mode'],
        list(key_to_str.values()),
        params['lmbd'],
        params['top_activation'])

    print(pickle_or_hdf5_location + " roc_auc_score: " + str(roc_auc_score) + " f1_score: " + str(f1_score))
    dic = dict(roc_auc_score=roc_auc_score,
               f1_score=f1_score)
    
    file_utils.save_json(output_file, dic)
Example #8
0
def run(pickle_or_hdf5_location, dataset_location, fold_count, fold_to_drop,
        params, output_dir, output_filename_format, output_file,
        data_augmentation_size):
    os.makedirs(output_dir, exist_ok=True)
    try:
        shutil.copy(os.path.abspath(sys.argv[2]),
                    os.path.join(output_dir, os.path.basename(sys.argv[2])))
    except shutil.SameFileError:
        pass
    hdf5 = pickle_or_hdf5_location[-4:] == "hdf5"
    check_fold(fold_count, fold_to_drop, hdf5)

    pickle_or_hdf5_location = os.path.abspath(pickle_or_hdf5_location)
    dataset_location = os.path.abspath(dataset_location)
    output_dir = os.path.abspath(output_dir)
    assert os.path.isdir(output_dir)
    assert os.path.exists(pickle_or_hdf5_location)

    main_start = os.times()

    hdf5 = pickle_or_hdf5_location[-4:] == "hdf5"
    if hdf5:
        loaded_data = file_utils.load_hdf5(pickle_or_hdf5_location)
    else:
        loaded_data = file_utils.load_pickle(pickle_or_hdf5_location)

    dataset_type = loaded_data['dataset_type']
    if dataset_type == 'UCIauslan':
        loaded_sample_names = loaded_data['sample_names']
    else:
        loaded_sample_names = [
            s.split('/')[-1].split('.')[0] for s in loaded_data['sample_names']
        ]
    gram_matrices = loaded_data['gram_matrices']
    if len(gram_matrices) == 1:
        gram = gram_matrices[0]['original']
    else:
        gram = gram_matrices[-1]['completed_npsd']

    # drop elements
    if fold_count == 0:
        gram_drop = gram
    else:
        folds = k_fold_cross_validation.get_kfolds(dataset_type,
                                                   loaded_sample_names,
                                                   fold_count)
        indices_to_drop = folds[fold_to_drop - 1]
        gram_drop, dropped_elements = make_matrix_incomplete.gram_drop_samples(
            gram, indices_to_drop)

    seqs, sample_names, labels_str, _ = read_sequences(dataset_type,
                                                       dataset_location)

    seqs = filter_samples(seqs, sample_names, loaded_sample_names)
    labels_str = filter_samples(labels_str, sample_names, loaded_sample_names)

    train_start = None
    train_end = None

    modelfile_hdf5 = os.path.join(output_dir,
                                  output_filename_format + "_model.hdf5")
    logfile_loss = os.path.join(output_dir, output_filename_format + ".losses")

    # pre-processing
    num_seqs = len(seqs)
    time_dim = max([seq.shape[0] for seq in seqs])
    pad_value = -4444
    seqs = pad_sequences([seq.tolist() for seq in seqs],
                         maxlen=time_dim,
                         dtype='float32',
                         padding='post',
                         value=pad_value)
    feat_dim = seqs[0].shape[1]
    input_shape = (time_dim, feat_dim)

    K.clear_session()

    # build network
    model = siamese_rnn_branch.SiameseRnnBranch(
        input_shape,
        pad_value,
        params['rnn_units'],
        params['dense_units'],
        params['rnn'],
        params['dropout'],
        params['implementation'],
        params['bidirectional'],
        params['batchnormalization'],
        params['loss_function'],
        params['siamese_joint_method'],
        params['trained_modelfile_hdf5'],
        siamese_arms_activation=params['siamese_arms_activation'])

    test_indices = indices_to_drop
    train_validation_indices = np.delete(np.arange(len(seqs)), test_indices)

    train_validation_seqs = seqs[train_validation_indices]
    test_seqs = seqs[test_indices]

    train_validation_features = model.predict(train_validation_seqs)

    time_pred_start = os.times()
    test_features = model.predict(test_seqs)
    time_pred_end = os.times()

    labels = np.array(labels_str)
    train_validation_labels = labels[train_validation_indices]
    test_labels = labels[test_indices]


    auc, f1, time_classification_start, time_classification_end = \
                    linear_svm.compute_classification_errors(train_validation_features,
                                                             train_validation_labels,
                                                             test_features,
                                                             test_labels)

    main_end = os.times()

    num_calculated_sequences = len(test_seqs)

    virtual_prediction_duration = time_pred_end.user - time_pred_start.user + time_pred_end.system - time_pred_start.system
    elapsed_prediction_duration = time_pred_end.elapsed - time_pred_start.elapsed

    virtual_classification_duration = time_classification_end.user - time_classification_start.user + time_classification_end.system - time_classification_start.system
    elapsed_classification_duration = time_classification_end.elapsed - time_classification_start.elapsed

    prediction = {}

    prediction['basics'] = {}
    prediction['basics']['number_of_calculated_sequences'] = len(test_seqs)

    prediction['all'] = {}
    prediction['all'][
        'virtual_prediction_duration'] = virtual_prediction_duration
    prediction['all'][
        'elapsed_prediction_duration'] = elapsed_prediction_duration

    prediction['each_seq'] = {}
    prediction['each_seq'][
        'virtual_prediction_duration_per_calculated_sequence'] = virtual_prediction_duration / num_calculated_sequences
    prediction['each_seq'][
        'elapsed_prediction_duration_per_calculated_sequence'] = elapsed_prediction_duration / num_calculated_sequences

    classification = {}

    classification['basics'] = {}
    classification['basics']['roc_auc'] = auc
    classification['basics']['f1'] = f1

    classification['all'] = {}
    classification['all'][
        'virtual_classification_duration'] = virtual_classification_duration
    classification['all'][
        'elapsed_classification_duration'] = elapsed_classification_duration

    classification['each_seq'] = {}
    classification['each_seq'][
        'virtual_classification_duration_per_calculated_sequence'] = virtual_classification_duration / num_calculated_sequences
    classification['each_seq'][
        'elapsed_classification_duration_per_calculated_sequence'] = elapsed_classification_duration / num_calculated_sequences

    dic = dict(prediction=prediction, classification=classification)

    ###
    lsvm_out_path = os.path.join(output_dir, output_file)
    file_utils.save_json(lsvm_out_path, dic)