Beispiel #1
0
def get_aa_probs(pdb_id, wildtype, mutation, position):
    getIndividualProteins = GetIndividualProteins()
    file_path = FLAGS.input_features

    # Get protein from protein features and add to batch_factory
    batchFactory = create_batch(file_path, pdb_id)

    # Print PDB_i, mutation in the form: wildtype:position:mutation
    print("PDB: {}, mutation: {}{}{}".format(pdb_id, wildtype, position,
                                             mutation))
    print("size of data: {}".format(batchFactory.data_size()))

    # Get next batch from batch_factory
    batch, _ = batchFactory.next(int(batchFactory.data_size()))
    # Locate the amino acid in the chain based on the mutational position
    aa_data = batch['data'][int(position) - 1]

    # Initialize graph and start session
    with tf.Graph().as_default():
        # Load the network model (convolutional neural network)
        model = CNNModel()
        session = tf.Session()
        model.batch_size = 1

        # The model initializes based on trained model parameters.
        # The model is then used to infer on the data– a softmax is added to
        # give appropriate probabilities.
        logits = model.predict(session, [aa_data])

        # Clean the objects to release memory
        del batchFactory, getIndividualProteins
        # Return inferred logits
        return logits
Beispiel #2
0
def main(hparams):

    checkpoint_callback = ModelCheckpoint(filepath=hparams.save_dir,
                                          save_top_k=1,
                                          verbose=True,
                                          monitor='val_loss',
                                          mode='min')

    #引入cnn的模型
    cnnmodel = CNNModel.load_from_checkpoint(hparams.cnn_model_dir)
    cnn_features = cnnmodel.cnn
    cnn_classification = cnnmodel.classification

    datamodule = GANDataModule(hparams.batch_size, cnn_features,
                               hparams.data_dir, hparams.valid)
    # train_dataloader = data.train_dataloader()
    # valid_dataloader = data.val_dataloader()
    # test_dataloader = data.test_dataloader()

    logger = TensorBoardLogger(save_dir="./lightning_logs", name='gan_logs')
    # trainer = pl.Trainer(checkpoint_callback=checkpoint_callback, logger=logger, progress_bar_refresh_rate=50,
    #                     gpus=hparams.gpus, min_epochs=hparams.min_epochs, max_epochs=hparams.max_epochs)
    # trainer = pl.Trainer(hparams, checkpoint_callback=checkpoint_callback, logger=logger, progress_bar_refresh_rate=50)
    trainer = pl.Trainer.from_argparse_args(
        hparams,
        checkpoint_callback=checkpoint_callback,
        logger=logger,
        progress_bar_refresh_rate=50)
    if hparams.train == True:
        model = GAN(cnn_classification, hparams)
        trainer.fit(model, datamodule=datamodule)
    else:
        test_model = GAN.load_from_checkpoint(checkpoint_path=hparams.load_dir)
        trainer.test(test_model, datamodule=datamodule)
Beispiel #3
0
 def setUpClass(cls):
     cls.train_dataset, cls.train_labels, cls.valid_dataset, cls.valid_labels, cls.test_dataset, cls.test_labels = get_data_4d()
     dataholder = DataHolder(cls.train_dataset,
                             cls.train_labels,
                             cls.valid_dataset,
                             cls.valid_labels,
                             cls.test_dataset,
                             cls.test_labels)
     config = Config()
     cls.model = CNNModel(config, dataholder)
Beispiel #4
0
def predict_ddg(input_dir_features, pdb_id, mutations, data_set_name):
    mutation_dataframe = []

    chain_id = None
    if len(pdb_id) == 5:
        chain_id = pdb_id[4]
        pdb_id = pdb_id[:4]

    batch_factory = prepare_batch(input_dir_features=input_dir_features,
                                  pdb_id=pdb_id)

    print(batch_factory.data_size())
    batch, _ = batch_factory.next(batch_factory.data_size())

    chain_ids = batch['chain_ids']

    print("Chain_id: {}".format(chain_ids))

    # Extract index of first residue from PDB - and attempt to use this as
    # offset into model
    mmcif_parser = Bio.PDB.MMCIFParser()

    cif_path = os.path.join(FLAGS.pdb_dir, pdb_id.lower() + ".cif")

    if not os.path.exists(cif_path):
        pdbl = PDBList()
        pdbl.retrieve_pdb_file(pdb_id, pdir="./data/PDB/")

    structure = mmcif_parser.get_structure(pdb_id, cif_path)

    # Loop through all rows
    for _, mutation in mutations.iterrows():
        #mutation[['PDBFileID','chain','wildtype', 'mutation', 'position']]

        wt, res_id, mutant, chain = mutation[[
            'wildtype', 'position', 'mutation', 'chain'
        ]]
        print(wt, res_id, mutant)
        icode = ' '
        if res_id.isdigit():
            res_index = int(res_id)
        else:
            res_index = re.match("\d+", res_id).group(0)
            icode = res_id.replace(res_index, "")
            res_index = int(res_index)

        try:
            # Extract residue in PDB
            pdb_res = structure[0][chain][(' ', res_index, icode)]
        except KeyError:
            raise MissingResidueError("Missing residue: " +
                                      str((' ', res_index, icode)) +
                                      ". Perhaps a removed HETATM?")

        # Check that PDB and mutation record agree on wt
        assert (Bio.PDB.Polypeptide.three_to_one(pdb_res.get_resname()) == wt)

        chain_res_index = structure[0][chain].get_list().index(pdb_res)

        try:
            mutant_index = Bio.PDB.Polypeptide.one_to_index(mutant)
            wt_index = Bio.PDB.Polypeptide.one_to_index(wt)

            with tf.Graph().as_default():
                model = CNNModel()
                logits = model.predict(tf.Session(),
                                       [batch['data'][res_index - 1]])[0][0]
                # wildtype and mutant probability:
                print("Wildtype prob: {} and mutation prob: {}.".format(
                    logits[wt_index], logits[mutant_index]))
                mutation['w_prob'] = logits[wt_index]
                mutation['m_prob'] = logits[mutant_index]
                print(mutation)

                # Add unfolded chain mutations
                mutation['m_u_prob'] = unfolded_prob(data_set_name, mutant)
                mutation['w_u_prob'] = unfolded_prob(data_set_name, wt)
                print(mutation)
            mutation_dataframe.append(pd.DataFrame(mutation).transpose())

        except Exception as e:
            print(e)
            continue
        ''' Her er det svært uden chain_ids '''
        #model_chain_index_offset = np.nonzero(chain_ids==chain_id)[0][0]

        #model_res_index = model_chain_index_offset + chain_res_index
    if len(mutation_dataframe) > 0:
        return pd.concat(mutation_dataframe)
    return []
    # TODO: forklar!
    ''' model_sequence = ""
    for index in np.argmax(batch["model_output"], axis=1):
        if index < 20:
            model_sequence += Bio.PDB.Polypeptide.index_to_one(index)
        else:
            model_sequence += 'X'
    #assert(model_sequence[model_res_index] == wt)

    wt_aa_index = Bio.PDB.Polypeptide.one_to_index(wt)
    mutant_aa_index = Bio.PDB.Polypeptide.one_to_index(mutant) '''
    ''' wt_aa_index = Bio.PDB.Polypeptide.one_to_index(wt)
Beispiel #5
0
number_of_exp = 10
DECAY = np.random.random_sample([number_of_exp])
DECAY = np.append(DECAY, 0.96)
number_of_exp += 1
DECAY.sort()
results = []
duration = []
info = []

for i, de in enumerate(DECAY):
    print("\n ({0} of {1})".format(i + 1, number_of_exp))
    my_config = Config(tunning=True, decay_rate=de)
    attrs = vars(my_config)
    config_info = ["%s: %s" % item for item in attrs.items()]
    info.append(config_info)
    my_model = CNNModel(my_config, my_dataholder)
    train_model(my_model, my_dataholder, 10001, 1000, False)
    current_dur = get_time(train_model, 10001)
    score = check_valid(my_model)
    results.append(score)
    duration.append(current_dur)

DECAY = list(DECAY)
best_result = max(list(zip(results, DECAY, duration, info)))
result_string = """In an experiment with {0} decay rate values
the best one is {1} with valid accuracy = {2}.
\nThe training takes {3:.2f} seconds using the following params:
\n{4}""".format(number_of_exp,
                best_result[1],
                best_result[0],
                best_result[2],
Beispiel #6
0
import flask
import werkzeug
import time
import os
from flask import Flask, flash, request, redirect, url_for
from werkzeug.utils import secure_filename
from Face_dectection import faceDetection
from CNN import CNNModel
from ImageProcess import ImageProcess
from SVM import SVM
import cv2


cnn = CNNModel()
svm = SVM()

app = flask.Flask(__name__)
app.secret_key="key"
@app.route('/', methods = ['GET', 'POST'])
def requestCheck():
    if request.method == 'POST':
        # check if the post request has the file part
        if 'image' not in request.files:
            return "No file part"
        file = request.files['image']
        # if user does not select file, browser also
        # submit an empty part without filename
        if file.filename == '':
            return 'No selected file'
        if file and file.filename:
            filename = secure_filename(file.filename)