コード例 #1
0
ファイル: serve.py プロジェクト: AlecBold/ImageSearch
 def __init__(self, *args, directory=None, **kwargs):
     self.predict_model = PredictModel()
     self.search_entities = get_names_of_images()
     super().__init__(*args, **kwargs)
コード例 #2
0
def main(seed):
    # tasks = ['Ames', 'BBB', 'FDAMDD', 'H_HT', 'Pgp_inh', 'Pgp_sub']
    # os.environ['CUDA_VISIBLE_DEVICES'] = "1"
    # tasks = ['H_HT', 'Pgp_inh', 'Pgp_sub']

    task = 'Ames'
    print(task)

    medium2 = {
        'name': 'Medium',
        'num_layers': 6,
        'num_heads': 8,
        'd_model': 256,
        'path': 'medium_weights2',
        'addH': True
    }
    small = {
        'name': 'Small',
        'num_layers': 3,
        'num_heads': 4,
        'd_model': 128,
        'path': 'small_weights',
        'addH': True
    }
    medium = {
        'name': 'Medium',
        'num_layers': 6,
        'num_heads': 8,
        'd_model': 256,
        'path': 'medium_weights',
        'addH': True
    }
    large = {
        'name': 'Large',
        'num_layers': 12,
        'num_heads': 12,
        'd_model': 516,
        'path': 'large_weights',
        'addH': True
    }
    medium_without_H = {
        'name': 'Medium',
        'num_layers': 6,
        'num_heads': 8,
        'd_model': 256,
        'path': 'weights_without_H',
        'addH': False
    }
    medium_balanced = {
        'name': 'Medium',
        'num_layers': 6,
        'num_heads': 8,
        'd_model': 256,
        'path': 'weights_balanced',
        'addH': True
    }
    medium_without_pretrain = {
        'name': 'Medium',
        'num_layers': 6,
        'num_heads': 8,
        'd_model': 256,
        'path': 'medium_without_pretraining_weights',
        'addH': True
    }

    arch = medium  ## small 3 4 128   medium: 6 6  256     large:  12 8 516
    pretraining = True
    pretraining_str = 'pretraining' if pretraining else ''

    trained_epoch = 6

    num_layers = arch['num_layers']
    num_heads = arch['num_heads']
    d_model = arch['d_model']
    addH = arch['addH']

    dff = d_model * 2
    vocab_size = 17
    dropout_rate = 0.1

    seed = seed
    np.random.seed(seed=seed)
    tf.random.set_seed(seed=seed)
    train_dataset1, test_dataset1, val_dataset1 = Graph_Classification_Dataset(
        'data\clf\Ames.txt',
        smiles_field='SMILES',
        label_field='Label',
        addH=addH).get_data()
    train_dataset2, test_dataset2, val_dataset2 = Graph_Classification_Dataset(
        'data\clf\BBB.txt',
        smiles_field='SMILES',
        label_field='Label',
        addH=addH).get_data()

    train_dataset3, test_dataset3, val_dataset3 = Graph_Classification_Dataset(
        'data\clf\FDAMDD.txt',
        smiles_field='SMILES',
        label_field='Label',
        addH=addH).get_data()

    train_dataset4, test_dataset4, val_dataset4 = Graph_Classification_Dataset(
        'data\clf\H_HT.txt',
        smiles_field='SMILES',
        label_field='Label',
        addH=addH).get_data()
    train_dataset5, test_dataset5, val_dataset5 = Graph_Classification_Dataset(
        'data\clf\Pgp_inh.txt',
        smiles_field='SMILES',
        label_field='Label',
        addH=addH).get_data()

    train_dataset6, test_dataset6, val_dataset6 = Graph_Classification_Dataset(
        'data\clf\Pgp_sub.txt',
        smiles_field='SMILES',
        label_field='Label',
        addH=addH).get_data()

    x, adjoin_matrix, y = next(iter(train_dataset1.take(1)))
    seq = tf.cast(tf.math.equal(x, 0), tf.float32)
    mask = seq[:, tf.newaxis, tf.newaxis, :]
    model = PredictModel(num_layers=num_layers,
                         d_model=d_model,
                         dff=dff,
                         num_heads=num_heads,
                         vocab_size=vocab_size,
                         dense_dropout=0.2)

    if pretraining:
        temp = BertModel(num_layers=num_layers,
                         d_model=d_model,
                         dff=dff,
                         num_heads=num_heads,
                         vocab_size=vocab_size)
        pred = temp(x, mask=mask, training=True, adjoin_matrix=adjoin_matrix)
        temp.load_weights(
            arch['path'] +
            '/bert_weights{}_{}.h5'.format(arch['name'], trained_epoch))
        temp.encoder.save_weights(arch['path'] +
                                  '/bert_weights_encoder{}_{}.h5'.format(
                                      arch['name'], trained_epoch))
        del temp

        pred = model(x, mask=mask, training=True, adjoin_matrix=adjoin_matrix)
        model.encoder.load_weights(arch['path'] +
                                   '/bert_weights_encoder{}_{}.h5'.format(
                                       arch['name'], trained_epoch))
        print('load_wieghts')

    class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
        def __init__(self, d_model, total_steps=4000):
            super(CustomSchedule, self).__init__()

            self.d_model = d_model
            self.d_model = tf.cast(self.d_model, tf.float32)
            self.total_step = total_steps
            self.warmup_steps = total_steps * 0.06

        def __call__(self, step):
            arg1 = step / self.warmup_steps
            arg2 = 1 - (step - self.warmup_steps) / (self.total_step -
                                                     self.warmup_steps)

            return 5e-5 * tf.math.minimum(arg1, arg2)

    steps_per_epoch = len(train_dataset1)
    learning_rate = CustomSchedule(128, 100 * steps_per_epoch)
    optimizer = tf.keras.optimizers.Adam(learning_rate=10e-5)

    auc = 0
    stopping_monitor = 0

    for epoch in range(100):
        loss_object = tf.keras.losses.BinaryCrossentropy(from_logits=True)
        for x1, adjoin_matrix1, y1 in train_dataset1:
            x2, adjoin_matrix2, y2 = next(iter(train_dataset2))
            x3, adjoin_matrix3, y3 = next(iter(train_dataset3))
            x4, adjoin_matrix4, y4 = next(iter(train_dataset4))
            x5, adjoin_matrix5, y5 = next(iter(train_dataset5))
            x6, adjoin_matrix6, y6 = next(iter(train_dataset6))

            with tf.GradientTape() as tape:
                seq1 = tf.cast(tf.math.equal(x1, 0), tf.float32)
                mask1 = seq1[:, tf.newaxis, tf.newaxis, :]
                preds1 = model(x1,
                               mask=mask1,
                               training=True,
                               adjoin_matrix=adjoin_matrix1)
                # s1 = model.s[0]
                # s2 = model.s[1]
                # s3 = model.s[2]
                # s4 = model.s[3]
                # s5 = model.s[4]
                # s6 = model.s[5]
                loss1 = loss_object(y1, preds1[:, 0]) * 10

                seq2 = tf.cast(tf.math.equal(x2, 0), tf.float32)
                mask2 = seq2[:, tf.newaxis, tf.newaxis, :]
                preds2 = model(x2,
                               mask=mask2,
                               training=True,
                               adjoin_matrix=adjoin_matrix2)

                loss2 = loss_object(y2, preds2[:, 1])
                seq3 = tf.cast(tf.math.equal(x3, 0), tf.float32)
                mask3 = seq3[:, tf.newaxis, tf.newaxis, :]
                preds3 = model(x3,
                               mask=mask3,
                               training=True,
                               adjoin_matrix=adjoin_matrix3)

                loss3 = loss_object(y3, preds3[:, 2])

                seq4 = tf.cast(tf.math.equal(x4, 0), tf.float32)
                mask4 = seq4[:, tf.newaxis, tf.newaxis, :]
                preds4 = model(x4,
                               mask=mask4,
                               training=True,
                               adjoin_matrix=adjoin_matrix4)

                loss4 = loss_object(y4, preds4[:, 3])

                seq5 = tf.cast(tf.math.equal(x5, 0), tf.float32)
                mask5 = seq5[:, tf.newaxis, tf.newaxis, :]
                preds5 = model(x5,
                               mask=mask5,
                               training=True,
                               adjoin_matrix=adjoin_matrix5)

                loss5 = loss_object(y5, preds5[:, 4])

                seq6 = tf.cast(tf.math.equal(x6, 0), tf.float32)
                mask6 = seq6[:, tf.newaxis, tf.newaxis, :]
                preds6 = model(x6,
                               mask=mask6,
                               training=True,
                               adjoin_matrix=adjoin_matrix6)

                loss6 = loss_object(y6, preds6[:, 5])

                loss = loss1 + loss2 + loss3 + loss4 + loss5 + loss6
                grads = tape.gradient(loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))
        print('epoch: ', epoch, 'loss: {:.4f}'.format(loss.numpy().item()))

        y_true = []
        y_preds = []
        for x, adjoin_matrix, y in test_dataset1:
            seq = tf.cast(tf.math.equal(x, 0), tf.float32)
            mask = seq[:, tf.newaxis, tf.newaxis, :]
            preds = model(x,
                          mask=mask,
                          adjoin_matrix=adjoin_matrix,
                          training=False)
            y_true.append(y.numpy())
            y_preds.append(preds[:, 0].numpy())
        y_true = np.concatenate(y_true, axis=0).reshape(-1)
        y_preds = np.concatenate(y_preds, axis=0).reshape(-1)
        y_preds = tf.sigmoid(y_preds).numpy()
        auc_new = roc_auc_score(y_true, y_preds)
        test_accuracy = keras.metrics.binary_accuracy(y_true, y_preds).numpy()
        print('test auc :{:.4f}'.format(auc_new),
              'test accuracy:{:.4f}'.format(test_accuracy))

        y_true = []
        y_preds = []
        for x, adjoin_matrix, y in test_dataset2:
            seq = tf.cast(tf.math.equal(x, 0), tf.float32)
            mask = seq[:, tf.newaxis, tf.newaxis, :]
            preds = model(x,
                          mask=mask,
                          adjoin_matrix=adjoin_matrix,
                          training=False)
            y_true.append(y.numpy())
            y_preds.append(preds[:, 1].numpy())
        y_true = np.concatenate(y_true, axis=0).reshape(-1)
        y_preds = np.concatenate(y_preds, axis=0).reshape(-1)
        y_preds = tf.sigmoid(y_preds).numpy()
        auc_new = roc_auc_score(y_true, y_preds)

        test_accuracy = keras.metrics.binary_accuracy(y_true, y_preds).numpy()
        print('test auc:{:.4f}'.format(auc_new),
              'test accuracy:{:.4f}'.format(test_accuracy))

        y_true = []
        y_preds = []
        for x, adjoin_matrix, y in test_dataset3:
            seq = tf.cast(tf.math.equal(x, 0), tf.float32)
            mask = seq[:, tf.newaxis, tf.newaxis, :]
            preds = model(x,
                          mask=mask,
                          adjoin_matrix=adjoin_matrix,
                          training=False)
            y_true.append(y.numpy())
            y_preds.append(preds[:, 2].numpy())
        y_true = np.concatenate(y_true, axis=0).reshape(-1)
        y_preds = np.concatenate(y_preds, axis=0).reshape(-1)
        y_preds = tf.sigmoid(y_preds).numpy()
        auc_new = roc_auc_score(y_true, y_preds)
        test_accuracy = keras.metrics.binary_accuracy(y_true, y_preds).numpy()
        print('test auc :{:.4f}'.format(auc_new),
              'test accuracy:{:.4f}'.format(test_accuracy))

        y_true = []
        y_preds = []
        for x, adjoin_matrix, y in test_dataset4:
            seq = tf.cast(tf.math.equal(x, 0), tf.float32)
            mask = seq[:, tf.newaxis, tf.newaxis, :]
            preds = model(x,
                          mask=mask,
                          adjoin_matrix=adjoin_matrix,
                          training=False)
            y_true.append(y.numpy())
            y_preds.append(preds[:, 3].numpy())
        y_true = np.concatenate(y_true, axis=0).reshape(-1)
        y_preds = np.concatenate(y_preds, axis=0).reshape(-1)
        y_preds = tf.sigmoid(y_preds).numpy()
        auc_new = roc_auc_score(y_true, y_preds)
        test_accuracy = keras.metrics.binary_accuracy(y_true, y_preds).numpy()
        print('test auc :{:.4f}'.format(auc_new),
              'test accuracy:{:.4f}'.format(test_accuracy))

        y_true = []
        y_preds = []
        for x, adjoin_matrix, y in test_dataset5:
            seq = tf.cast(tf.math.equal(x, 0), tf.float32)
            mask = seq[:, tf.newaxis, tf.newaxis, :]
            preds = model(x,
                          mask=mask,
                          adjoin_matrix=adjoin_matrix,
                          training=False)
            y_true.append(y.numpy())
            y_preds.append(preds[:, 4].numpy())
        y_true = np.concatenate(y_true, axis=0).reshape(-1)
        y_preds = np.concatenate(y_preds, axis=0).reshape(-1)
        y_preds = tf.sigmoid(y_preds).numpy()
        auc_new = roc_auc_score(y_true, y_preds)
        test_accuracy = keras.metrics.binary_accuracy(y_true, y_preds).numpy()
        print('test auc :{:.4f}'.format(auc_new),
              'test accuracy:{:.4f}'.format(test_accuracy))

        y_true = []
        y_preds = []
        for x, adjoin_matrix, y in test_dataset6:
            seq = tf.cast(tf.math.equal(x, 0), tf.float32)
            mask = seq[:, tf.newaxis, tf.newaxis, :]
            preds = model(x,
                          mask=mask,
                          adjoin_matrix=adjoin_matrix,
                          training=False)
            y_true.append(y.numpy())
            y_preds.append(preds[:, 5].numpy())
        y_true = np.concatenate(y_true, axis=0).reshape(-1)
        y_preds = np.concatenate(y_preds, axis=0).reshape(-1)
        y_preds = tf.sigmoid(y_preds).numpy()
        auc_new = roc_auc_score(y_true, y_preds)
        test_accuracy = keras.metrics.binary_accuracy(y_true, y_preds).numpy()
        print('test auc :{:.4f}'.format(auc_new),
              'test accuracy:{:.4f}'.format(test_accuracy))

    return auc
コード例 #3
0
ファイル: serve.py プロジェクト: AlecBold/ImageSearch
class Handler(BaseHTTPRequestHandler):
    def __init__(self, *args, directory=None, **kwargs):
        self.predict_model = PredictModel()
        self.search_entities = get_names_of_images()
        super().__init__(*args, **kwargs)

    CLIENT_FILES = {
        '/': 'client/index.html',
        '/index.html': 'client/index.html',
        '/style.css': 'client/style.css',
    }

    MEDIA_PATH = r'\/media\/(\w)+\/(\w|[А-Яа-я])+\.(png|jpg|jpeg)$'

    SIMILAR_IMAGES_TEMPLATE = '''\
        <!DOCTYPE html>
        <html lang="ru">
        <head>
            <meta charset="UTF-8">
            <meta name="viewport" content="width=device-width, initial-scale=1.0">
            <meta http-equiv="X-UA-Compatible" content="ie=edge">
            <link href="https://fonts.googleapis.com/css?family=Oswald:300,400,500&display=swap" rel="stylesheet">
            <link rel="stylesheet" href="./style.css">
            <title>ImgRec</title>
        </head>
        <body>
            <h1>Similar Images</h1>
            <hr>
            <ul class="images-list">
                <li>
                    <img src="%(source_image)s" />
                    <span>source image \n distance: 0</span>
                </li>
                %(images)s
            </ul>
        </body>
        </html>
    '''

    def __set_headers(self, content_type, content_length):
        self.send_response(200)
        self.send_header('Content-type', content_type)
        self.send_header('Content-Length', content_length)
        self.end_headers()

    def __resolve_get_path(self):
        file_path = os.curdir + os.sep

        if self.path in self.CLIENT_FILES:
            return file_path + self.CLIENT_FILES[self.path]

        if re.match(self.MEDIA_PATH, self.path):
            return file_path + self.path

        return None

    def do_GET(self):
        file_path = self.__resolve_get_path()

        if not file_path:
            self.send_error(404, f'File Not Found: {self.path}')
            return

        try:
            content_type, _ = mimetypes.guess_type(file_path)

            with open(file_path, 'rb') as file_binary:
                file = file_binary.read()
                self.__set_headers(content_type, len(file))
                self.wfile.write(file)

        except IOError:
            self.send_error(404, f'File Not Found: {self.path}')

    def do_POST(self):
        if self.path == '/similar':
            form = cgi.FieldStorage(fp=self.rfile,
                                    headers=self.headers,
                                    environ={
                                        'REQUEST_METHOD':
                                        'POST',
                                        'CONTENT_TYPE':
                                        self.headers['Content-Type'],
                                    })

            image = form['image']
            image_data = image.file.read()
            image_mime, _ = mimetypes.guess_type(image.filename)
            image_base64 = str(base64.b64encode(image_data), 'utf-8')
            similar_images, distances = self.predict_model.search_nearest(
                self.search_entities, image_data)

            content = (self.SIMILAR_IMAGES_TEMPLATE % {
                'images':
                '\n'.join(
                    map(
                        lambda img, dist:
                        f'''<li><img src="media/{new_folder}/{img}" /><div>distance:{dist}</div></li>''',
                        similar_images, distances)),
                'source_image':
                f'data:{image_mime};charset=utf-8;base64, {image_base64}'
            })

            body = content.encode('UTF-8', 'replace')
            self.__set_headers('text/html;charset=utf-8', len(body))

            self.wfile.write(body)
コード例 #4
0
ファイル: main.py プロジェクト: 1ofsomepeople/flask-learn
    parser.add_argument("--n_layer", type=int, default=2)
    parser.add_argument("--num_nodes", type=int, default=17531)

    # result args
    parser.add_argument("--result_folder", type=str)
    parser.add_argument("--gpu", type=str, default="0")

    args = parser.parse_args()

    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu

    if not os.path.exists(args.result_folder):
        os.mkdir(args.result_folder)

    args.log = os.path.join(args.result_folder, args.log)

    src_id, dst_id = load_graph()

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    if args.model == "lr":
        model = LinearRegression(3, args.hid_c, args.h_step)
    elif args.model == "norm_lr":
        model = NormalizeLR(3, args.hid_c, args.h_step)
    else:
        model = PredictModel(args.model, src_id, dst_id, 3, args.hid_c,
                             args.h_step, args.n_layer, device)

    train_main(model, args, True)
    # test_main(args.log, model)
コード例 #5
0
def main(seed):
    # tasks = ['Ames', 'BBB', 'FDAMDD', 'H_HT', 'Pgp_inh', 'Pgp_sub']
    # os.environ['CUDA_VISIBLE_DEVICES'] = "1"
    # tasks = ['BBB', 'FDAMDD',  'Pgp_sub']

    task = 'FDAMDD'
    print(task)

    small = {'name':'Small','num_layers': 3, 'num_heads': 2, 'd_model': 128,'path':'small_weights','addH':True}
    medium = {'name':'Medium','num_layers': 6, 'num_heads': 8, 'd_model': 256,'path':'medium_weights','addH':True}
    large = {'name':'Large','num_layers': 12, 'num_heads': 12, 'd_model': 512,'path':'large_weights','addH':True}

    arch = medium  ## small 3 4 128   medium: 6 6  256     large:  12 8 516
    pretraining = True
    pretraining_str = 'pretraining' if pretraining else ''

    trained_epoch = 10

    num_layers = arch['num_layers']
    num_heads = arch['num_heads']
    d_model = arch['d_model']
    addH = arch['addH']

    dff = d_model * 2
    vocab_size = 17
    dropout_rate = 0.1

    seed = seed
    np.random.seed(seed=seed)
    tf.random.set_seed(seed=seed)
    train_dataset, test_dataset , val_dataset = Graph_Classification_Dataset('data/clf/{}.csv'.format(task), smiles_field='SMILES',
                                                               label_field='Label',addH=True).get_data()

    x, adjoin_matrix, y = next(iter(train_dataset.take(1)))
    seq = tf.cast(tf.math.equal(x, 0), tf.float32)
    mask = seq[:, tf.newaxis, tf.newaxis, :]
    model = PredictModel(num_layers=num_layers, d_model=d_model, dff=dff, num_heads=num_heads, vocab_size=vocab_size,
                         dense_dropout=0.5)

    if pretraining:
        temp = BertModel(num_layers=num_layers, d_model=d_model, dff=dff, num_heads=num_heads, vocab_size=vocab_size)
        pred = temp(x, mask=mask, training=True, adjoin_matrix=adjoin_matrix)
        temp.load_weights(arch['path']+'/bert_weights{}_{}.h5'.format(arch['name'],trained_epoch))
        temp.encoder.save_weights(arch['path']+'/bert_weights_encoder{}_{}.h5'.format(arch['name'],trained_epoch))
        del temp

        pred = model(x,mask=mask,training=True,adjoin_matrix=adjoin_matrix)
        model.encoder.load_weights(arch['path']+'/bert_weights_encoder{}_{}.h5'.format(arch['name'],trained_epoch))
        print('load_wieghts')


    optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)

    auc= -10
    stopping_monitor = 0
    for epoch in range(100):
        accuracy_object = tf.keras.metrics.BinaryAccuracy()
        loss_object = tf.keras.losses.BinaryCrossentropy(from_logits=True)
        for x,adjoin_matrix,y in train_dataset:
            with tf.GradientTape() as tape:
                seq = tf.cast(tf.math.equal(x, 0), tf.float32)
                mask = seq[:, tf.newaxis, tf.newaxis, :]
                preds = model(x,mask=mask,training=True,adjoin_matrix=adjoin_matrix)
                loss = loss_object(y,preds)
                grads = tape.gradient(loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads, model.trainable_variables))
                accuracy_object.update_state(y,preds)
        print('epoch: ',epoch,'loss: {:.4f}'.format(loss.numpy().item()),'accuracy: {:.4f}'.format(accuracy_object.result().numpy().item()))

        y_true = []
        y_preds = []

        for x, adjoin_matrix, y in val_dataset:
            seq = tf.cast(tf.math.equal(x, 0), tf.float32)
            mask = seq[:, tf.newaxis, tf.newaxis, :]
            preds = model(x,mask=mask,adjoin_matrix=adjoin_matrix,training=False)
            y_true.append(y.numpy())
            y_preds.append(preds.numpy())
        y_true = np.concatenate(y_true,axis=0).reshape(-1)
        y_preds = np.concatenate(y_preds,axis=0).reshape(-1)
        y_preds = tf.sigmoid(y_preds).numpy()
        auc_new = roc_auc_score(y_true,y_preds)

        val_accuracy = keras.metrics.binary_accuracy(y_true.reshape(-1), y_preds.reshape(-1)).numpy()
        print('val auc:{:.4f}'.format(auc_new), 'val accuracy:{:.4f}'.format(val_accuracy))

        if auc_new > auc:
            auc = auc_new
            stopping_monitor = 0
            np.save('{}/{}{}{}{}{}'.format(arch['path'], task, seed, arch['name'], trained_epoch, trained_epoch,pretraining_str),
                    [y_true, y_preds])
            model.save_weights('classification_weights/{}_{}.h5'.format(task,seed))
            print('save model weights')
        else:
            stopping_monitor += 1
        print('best val auc: {:.4f}'.format(auc))
        if stopping_monitor>0:
            print('stopping_monitor:',stopping_monitor)
        if stopping_monitor>20:
            break

    y_true = []
    y_preds = []
    model.load_weights('classification_weights/{}_{}.h5'.format(task, seed))
    for x, adjoin_matrix, y in test_dataset:
        seq = tf.cast(tf.math.equal(x, 0), tf.float32)
        mask = seq[:, tf.newaxis, tf.newaxis, :]
        preds = model(x, mask=mask, adjoin_matrix=adjoin_matrix, training=False)
        y_true.append(y.numpy())
        y_preds.append(preds.numpy())
    y_true = np.concatenate(y_true, axis=0).reshape(-1)
    y_preds = np.concatenate(y_preds, axis=0).reshape(-1)
    y_preds = tf.sigmoid(y_preds).numpy()
    test_auc = roc_auc_score(y_true, y_preds)
    test_accuracy = keras.metrics.binary_accuracy(y_true.reshape(-1), y_preds.reshape(-1)).numpy()
    print('test auc:{:.4f}'.format(test_auc), 'test accuracy:{:.4f}'.format(test_accuracy))

    return test_auc
コード例 #6
0
def main(seed):
    # tasks = ['caco2', 'logD', 'logS', 'PPB', 'tox']
    # os.environ['CUDA_VISIBLE_DEVICES'] = "1"
    keras.backend.clear_session()
    os.environ['CUDA_VISIBLE_DEVICES'] = "0"
    small = {'name': 'Small', 'num_layers': 3, 'num_heads': 4, 'd_model': 128, 'path': 'small_weights','addH':True}
    medium = {'name': 'Medium', 'num_layers': 6, 'num_heads': 8, 'd_model': 256, 'path': 'medium_weights','addH':True}
    medium2 = {'name': 'Medium', 'num_layers': 6, 'num_heads': 8, 'd_model': 256, 'path': 'medium_weights2',
               'addH': True}
    large = {'name': 'Large', 'num_layers': 12, 'num_heads': 12, 'd_model': 576, 'path': 'large_weights','addH':True}
    medium_without_H = {'name': 'Medium', 'num_layers': 6, 'num_heads': 8, 'd_model': 256, 'path': 'weights_without_H','addH':False}
    medium_without_pretrain = {'name': 'Medium', 'num_layers': 6, 'num_heads': 8, 'd_model': 256,'path': 'medium_without_pretraining_weights','addH':True}

    arch = medium ## small 3 4 128   medium: 6 6  256     large:  12 8 516

    pretraining = True
    pretraining_str = 'pretraining' if pretraining else ''

    trained_epoch = 10
    task = 'PPB'
    print(task)
    seed = seed

    num_layers = arch['num_layers']
    num_heads = arch['num_heads']
    d_model = arch['d_model']
    addH = arch['addH']

    dff = d_model * 2
    vocab_size = 17
    dropout_rate = 0.1

    tf.random.set_seed(seed=seed)
    graph_dataset = Graph_Regression_Dataset('data/reg/{}.txt'.format(task), smiles_field='SMILES',
                                                           label_field='Label',addH=addH).get_data()
        
    train_dataset, test_dataset,val_dataset = graph_dataset.get_data()
    
    value_range = graph_dataset.value_range()

    x, adjoin_matrix, y = next(iter(train_dataset.take(1)))
    seq = tf.cast(tf.math.equal(x, 0), tf.float32)
    mask = seq[:, tf.newaxis, tf.newaxis, :]
    model = PredictModel(num_layers=num_layers, d_model=d_model, dff=dff, num_heads=num_heads, vocab_size=vocab_size,
                         dense_dropout=0.15)
    if pretraining:
        temp = BertModel(num_layers=num_layers, d_model=d_model, dff=dff, num_heads=num_heads, vocab_size=vocab_size)
        pred = temp(x, mask=mask, training=True, adjoin_matrix=adjoin_matrix)
        temp.load_weights(arch['path']+'/bert_weights{}_{}.h5'.format(arch['name'],trained_epoch))
        temp.encoder.save_weights(arch['path']+'/bert_weights_encoder{}_{}.h5'.format(arch['name'],trained_epoch))
        del temp

        pred = model(x, mask=mask, training=True, adjoin_matrix=adjoin_matrix)

        model.encoder.load_weights(arch['path']+'/bert_weights_encoder{}_{}.h5'.format(arch['name'],trained_epoch))
        print('load_wieghts')

    class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
        def __init__(self, d_model, total_steps=4000):
            super(CustomSchedule, self).__init__()

            self.d_model = d_model
            self.d_model = tf.cast(self.d_model, tf.float32)
            self.total_step = total_steps
            self.warmup_steps = total_steps*0.10

        def __call__(self, step):
            arg1 = step/self.warmup_steps
            arg2 = 1-(step-self.warmup_steps)/(self.total_step-self.warmup_steps)

            return 10e-5* tf.math.minimum(arg1, arg2)

    steps_per_epoch = len(train_dataset)
    learning_rate = CustomSchedule(128,100*steps_per_epoch)
    optimizer = tf.keras.optimizers.Adam(learning_rate=10e-5)
    
    value_range = 

    r2 = -10
    stopping_monitor = 0
    for epoch in range(100):
        mse_object = tf.keras.metrics.MeanSquaredError()
        for x,adjoin_matrix,y in train_dataset:
            with tf.GradientTape() as tape:
                seq = tf.cast(tf.math.equal(x, 0), tf.float32)
                mask = seq[:, tf.newaxis, tf.newaxis, :]
                preds = model(x,mask=mask,training=True,adjoin_matrix=adjoin_matrix)
                loss = tf.reduce_mean(tf.square(y-preds))
                grads = tape.gradient(loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads, model.trainable_variables))
                mse_object.update_state(y,preds)
        print('epoch: ',epoch,'loss: {:.4f}'.format(loss.numpy().item()),'mse: {:.4f}'.format(mse_object.result().numpy().item() * (value_range**2)))

        y_true = []
        y_preds = []
        for x, adjoin_matrix, y in val_dataset:
            seq = tf.cast(tf.math.equal(x, 0), tf.float32)
            mask = seq[:, tf.newaxis, tf.newaxis, :]
            preds = model(x,mask=mask,adjoin_matrix=adjoin_matrix,training=False)
            y_true.append(y.numpy())
            y_preds.append(preds.numpy())
        y_true = np.concatenate(y_true,axis=0).reshape(-1)
        y_preds = np.concatenate(y_preds,axis=0).reshape(-1)
        r2_new = r2_score(y_true,y_preds)

        val_mse = keras.metrics.MSE(y_true, y_preds).numpy() * (value_range**2)
        print('val r2: {:.4f}'.format(r2_new), 'val mse:{:.4f}'.format(val_mse))
        if r2_new > r2:
            r2 = r2_new
            stopping_monitor = 0
            np.save('{}/{}{}{}{}{}'.format(arch['path'], task, seed, arch['name'], trained_epoch, trained_epoch,pretraining_str),
                    [y_true, y_preds])
            model.save_weights('regression_weights/{}.h5'.format(task))
        else:
            stopping_monitor +=1
        print('best r2: {:.4f}'.format(r2))
        if stopping_monitor>0:
            print('stopping_monitor:',stopping_monitor)
        if stopping_monitor>20:
            break

    y_true = []
    y_preds = []
    model.load_weights('regression_weights/{}.h5'.format(task, seed))
    for x, adjoin_matrix, y in test_dataset:
        seq = tf.cast(tf.math.equal(x, 0), tf.float32)
        mask = seq[:, tf.newaxis, tf.newaxis, :]
        preds = model(x, mask=mask, adjoin_matrix=adjoin_matrix, training=False)
        y_true.append(y.numpy())
        y_preds.append(preds.numpy())
    y_true = np.concatenate(y_true, axis=0).reshape(-1)
    y_preds = np.concatenate(y_preds, axis=0).reshape(-1)

    test_r2 = r2_score(y_true, y_preds)
    test_mse = keras.metrics.MSE(y_true.reshape(-1), y_preds.reshape(-1)).numpy() * (value_range**2)
    print('test r2:{:.4f}'.format(test_r2), 'test mse:{:.4f}'.format(test_mse))


    return r2
コード例 #7
0
import json
import logging

from flask import Flask, request, Response

from model import PredictModel

handlers = [logging.StreamHandler()]
logging.basicConfig(handlers=handlers,
                    format='%(levelname)s:%(message)s',
                    level=logging.INFO)

app = Flask(__name__)
model = PredictModel()
logging.info(model)


@app.route('/api/v1/predict', methods=['POST'])
def predict():
    data = request.json
    logging.info(data)
    label = model.predict(data["input"])
    logging.info(label)
    output_data = {"prediction": label[0]}
    response = Response(json.dumps(output_data), mimetype='application/json')
    return response


if __name__ == '__main__':
    app.run(port=8083, debug=True, host='0.0.0.0')