def leave_one_out(
            newpath,
            path,
            hyperparameters,
            iedb_data_file,
            mhc_sequence_fasta_file=None,
            peptide_length = 9,
            max_sequence_length=None,
            nb_epoch=10, max_ic50 = 5000.0,):




    log_transformed_ic50_cutoff = 1 - np.log(500)/np.log(max_ic50)

    create_fasta_file(path, remove_residues = True, consensus_cutoff = 0)

    ##Load files
    allele_binding_data, df = load_binding_data(iedb_data_file)
    allele_sequence_data, max_allele_length = load_allele_sequence_data(mhc_sequence_fasta_file)
    max_sequence_length= max_allele_length

    #optimizer = keras.optimizers.Adagrad(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-8)
    #optimizer = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8)
    #optimizer = keras.optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=1e-6)
    ##build models


    for learn_rate in [ 0.001]:
        for peptide_activation in [ 'tanh']:
            for mhc_activation in [ 'tanh']:
                total_AUC_score = []
                print (peptide_activation, mhc_activation, learn_rate)
                optimizer = keras.optimizers.RMSprop(lr=learn_rate, rho=0.9, epsilon=1e-6)
                graph = build_graph_native_sequence_model(
                                                    maxlen_mhc = max_sequence_length,
                                                    optimizer=optimizer,
                                                    mhc_activation=mhc_activation,
                                                    peptide_activation= peptide_activation )
                initial_weights = graph.get_weights()

                allele_list =  create_allele_list(allele_binding_data, allele_sequence_data)
                formatted_allele_list = ['A0101','A0201','A2902','A3101','A6801','B0801','B1501','B1801','B2705','B3901']
                #formatted_allele_list = ['B2705']
                # formatted_allele_list = ['A0101','A0201','A0202','A0203',
                #                         'A0205','A0206','A0207','A0211',
                #                         'A0212','A0216','A0219','A0301',
                #                         'A1101','A2301','A2402','A2403',
                #                         'A2501','A2601','A2602','A2603',
                #                         'A2902','A3001','A3002','A3101',
                #                         'A3201','A3301','A6801','A6802',
                #                         'A6901','A8001','B0702','B0801',
                #                         'B0802','B0803','B1501','B1502',
                #                         'B1503','B1509','B1517','B1801',
                #                         'B2705','B3501','B3801','B3901',
                #                         'B4001','B4002','B4402','B4403',
                #                         'B4501','B4601','B4801','B5101',
                #                         'B5301','B5401','B5701','B5801',
                #                         'B5802','B7301']
                formatted_allele_list = allele_list
                ##leave one out validaiton
                for allele in allele_list:
                    if not os.path.exists(newpath):
                        os.makedirs(newpath)
                    f = open(newpath+ '/' + allele,'wb')
                    #split into training_list and test_list, where the test_list consists of a single allele
                    training_list = create_allele_list(allele_binding_data, allele_sequence_data)
                    training_list.remove(allele)
                    peptides_train, mhc_train, Y_train = get_model_data(training_list,
                                                                        allele_sequence_data,
                                                                        allele_binding_data,
                                                                        dense_mhc_model=None,
                                                                        peptide_length = peptide_length,
                                                                        mhc_length=max_sequence_length,
                                                                        mhc_dense = None
                                                                        )
                    peptides_test, mhc_test, Y_test = get_model_data([allele],
                                                                    allele_sequence_data,
                                                                    allele_binding_data,
                                                                    dense_mhc_model=None,
                                                                    peptide_length = peptide_length,
                                                                    mhc_length=max_sequence_length,
                                                                    mhc_dense=None)

                    #convert ic50 values into binary binders(1) or non-binders(0)
                    Y_true = 1 * np.greater(Y_test,log_transformed_ic50_cutoff)
                    Y_train = 1 * np.greater(Y_train,log_transformed_ic50_cutoff)

                    history = LossHistory()
                    history.get_data(peptides_train, mhc_train, Y_train, peptides_test, mhc_test, Y_true)

                    if(not np.all([Y_true[0]]*len(Y_true) == Y_true) ):  #check if all values in Y_true are not the same

                        graph.set_weights(initial_weights)

                        graph.fit(
                                {'peptide':peptides_train,'mhc':mhc_train, 'output': Y_train},
                                batch_size=32,
                                nb_epoch=nb_epoch,
                                verbose = 1,
                                callbacks = [history]
                            )

                        Y_true = 1 * np.greater(Y_test,log_transformed_ic50_cutoff)



                        Y_pred = graph.predict({'peptide':peptides_test,'mhc':mhc_test})['output']
                        Y_pred = Y_pred.reshape(Y_pred.shape[0])

                        AUC = roc_auc_score(Y_true, Y_pred)
                        #convert Y_pred to binary
                        Y_pred_binary = 1 * np.greater(Y_pred,log_transformed_ic50_cutoff)
                        ACC = accuracy_score(Y_true, Y_pred_binary)

                        print ("Allele: ",allele, "\t#entries :", len(peptides_test) ,"\tAUC: ", AUC, "\tACC:", ACC)
                        total_AUC_score.append(AUC)
                        print(history.test_error,'\n',history.training_error, file=f)
                        print ("\nAllele: ",allele, "\t#entries :", len(peptides_test) ,"\tAUC: ", AUC, "\tACC:", ACC, file=f)
                        f.close()

        print (1 - sum(total_AUC_score)/len(total_AUC_score))
def leave_one_out(
            path,
            hyperparameters,
            iedb_data_file,
            mhc_sequence_fasta_file=None,
            peptide_length = 9,
            max_sequence_length=None,
            nb_epoch=30, max_ic50 = 5000.0,):

    log_transformed_ic50_cutoff = 1 - np.log(500)/np.log(max_ic50)

    create_fasta_file(path, remove_residues = True, consensus_cutoff = 0)

    ##Load files
    allele_binding_data, df = load_binding_data(iedb_data_file)
    allele_sequence_data, max_allele_length = load_allele_sequence_data(mhc_sequence_fasta_file)
    max_sequence_length= max_allele_length

    for learn_rate in [ 0.001]:
        for peptide_activation in [ 'tanh']:
            for mhc_activation in [ 'tanh']:
                optimizer = keras.optimizers.RMSprop(lr=learn_rate, rho=0.9, epsilon=1e-6)

                print (peptide_activation, mhc_activation, learn_rate)

                graph = ffn_matrix(  maxlen_mhc = max_sequence_length,
                                                    optimizer=optimizer,
                                                    mhc_activation=mhc_activation,
                                                    peptide_activation= peptide_activation )
                initial_weights = graph.get_weights()

                allele_list =  create_allele_list(allele_binding_data, allele_sequence_data)
                formatted_allele_list = ['A0101', 'A0201']

                ##leave one out validaiton
                for allele in formatted_allele_list:
                    print(allele)
                    #split into training_list and test_list, where the test_list consists of a single allele
                    training_list = create_allele_list(allele_binding_data, allele_sequence_data)
                    training_list.remove(allele)
                    peptides_train, mhc_train, Y_train = get_model_data(training_list,
                                                                        allele_sequence_data,
                                                                        allele_binding_data,
                                                                        dense_mhc_model=None,
                                                                        peptide_length = peptide_length,
                                                                        mhc_length=max_sequence_length,
                                                                        mhc_dense = None
                                                                        )
                    peptides_test, mhc_test, Y_test = get_model_data([allele],
                                                                    allele_sequence_data,
                                                                    allele_binding_data,
                                                                    dense_mhc_model=None,
                                                                    peptide_length = peptide_length,
                                                                    mhc_length=max_sequence_length,
                                                                    mhc_dense=None)

                    #convert ic50 values into binary binders(1) or non-binders(0)
                    Y_true = 1 * np.greater(Y_test,log_transformed_ic50_cutoff)
                    Y_train = 1 * np.greater(Y_train,log_transformed_ic50_cutoff)

                    if(not np.all([Y_true[0]]*len(Y_true) == Y_true) ):  #check if all values in Y_true are not the same

                        graph.set_weights(initial_weights)
                        history = LossHistory()
                        history.get_data(peptides_train, mhc_train, Y_train, peptides_test, mhc_test, Y_true)
                        graph.fit(
                                {'peptide':peptides_train,'mhc':mhc_train, 'output': Y_train},
                                batch_size=32,
                                nb_epoch=nb_epoch,
                                verbose = 1,
                                callbacks=[history]
                            )