Beispiel #1
0
def main():
    # Directory Setting
    train_dir = "./data/multi_train.csv"
    test_dir = "./data/multi_test.csv"
    model_dir = "./model_save"

    # HyperParameter
    epoch = 1
    batch = 128
    max_len = 50
    hidden_units = 64
    target_names = ['0', '1', '2', '3']

    # Flow
    print("0. Setting Environment")
    set_env()

    print("1. load data")
    train_x, train_y, test_x, test_y, val_x, val_y = load_data(
        train_dir, test_dir, len(target_names))

    print("2. pre processing")
    train_x, val_x, test_x = train_x.tolist(), val_x.tolist(), test_x.tolist()

    train_x = [' '.join(t.split()[0:max_len]) for t in train_x]
    train_x = np.array(train_x, dtype=object)[:, np.newaxis]

    val_x = [' '.join(t.split()[0:max_len]) for t in val_x]
    val_x = np.array(val_x, dtype=object)[:, np.newaxis]

    test_x = [' '.join(t.split()[0:max_len]) for t in test_x]
    test_x = np.array(test_x, dtype=object)[:, np.newaxis]

    print("3. build model")
    model = ELMo(hidden_units=hidden_units,
                 data_type="multi",
                 category_size=len(target_names))
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    callbacks = create_callbacks(model_dir)
    model.fit(x=train_x,
              y=train_y,
              epochs=epoch,
              batch_size=batch,
              validation_data=(val_x, val_y),
              callbacks=callbacks)

    print("4. evaluation")
    evaluation = Evaluation(model, test_x, test_y)
    accuracy, cf_matrix, report = evaluation.eval_classification(
        data_type="multi")
    print("## Target Names : ", target_names)
    print("## Classification Report \n", report)
    print("## Confusion Matrix \n", cf_matrix)
    print("## Accuracy \n", accuracy)
def main():
    # Directory Setting
    train_dir = "./data/multi_train.csv"
    test_dir = "./data/multi_test.csv"
    model_dir = "./model_save"
    embedding_dir = "./glove.6B.50d.txt"

    # HyperParameter
    epoch = 1
    batch = 256
    embedding_dim = 50
    target_names = ['0', '1', '2', '3']

    # Flow
    print("0. Setting Environment")
    set_env()

    print("1. load data")
    train_x, train_y, test_x, test_y, val_x, val_y = load_data(
        train_dir, test_dir, len(target_names))

    print("2. pre processing")
    train_x, test_x, val_x, tokenizer = pre_processing(train_x, test_x, val_x)

    print("3. text to vector")
    embedding_matrix = text_to_vector(tokenizer.word_index,
                                      embedding_dir,
                                      word_dimension=embedding_dim)

    print("4. build model")
    model = TextCNN(sequence_len=train_x.shape[1],
                    embedding_matrix=embedding_matrix,
                    embedding_dim=embedding_dim,
                    filter_sizes=[3, 4, 5],
                    flag="pre_training",
                    data_type="multi",
                    category_num=len(target_names))
    model.compile(optimizer="adam",
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    callbacks = create_callbacks(model_dir)

    model.fit(x=train_x,
              y=train_y,
              epochs=epoch,
              batch_size=batch,
              validation_data=(val_x, val_y),
              callbacks=callbacks)

    print("5. evaluation")
    evaluation = Evaluation(model, test_x, test_y)
    accuracy, cf_matrix, report = evaluation.eval_classification(
        data_type="multi")
    print("## Target Names : ", target_names)
    print("## Classification Report \n", report)
    print("## Confusion Matrix \n", cf_matrix)
    print("## Accuracy \n", accuracy)
def main():
    # Directory Setting
    train_dir = "../data/binary_train.csv"
    test_dir = "../data/binary_test.csv"
    model_dir = "./model_save"

    # HyperParameter
    epoch = 2
    batch = 256

    # Flow
    print("0. Setting Environment")
    set_env()

    print("1. load data")
    train_x, train_y, test_x, test_y, val_x, val_y = load_data(
        train_dir, test_dir)

    print("2. pre processing")
    train_x, test_x, val_x, tokenizer = pre_processing(train_x, test_x, val_x)

    print("3. build model")
    model = TextCNN(sequence_len=train_x.shape[1],
                    embedding_matrix=len(tokenizer.word_index) + 1,
                    embedding_dim=300,
                    filter_sizes=[3, 4, 5],
                    flag="self_training",
                    data_type="binary")
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    callbacks = create_callbacks(model_dir)
    model.fit(x=train_x,
              y=train_y,
              epochs=epoch,
              batch_size=batch,
              validation_data=(val_x, val_y),
              callbacks=callbacks)

    print("4. evaluation")
    evaluation = Evaluation(model, test_x, test_y)
    accuracy, cf_matrix, report = evaluation.eval_classification(
        data_type="binary")
    print("## Classification Report \n", report)
    print("## Confusion Matrix \n", cf_matrix)
    print("## Accuracy \n", accuracy)
Beispiel #4
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
# Author: "Chris Ward" <*****@*****.**>


from datetime import datetime
import os

from utils import set_env

from metrique.utils import debug_setup

logger = debug_setup('metrique', level=10, log2stdout=True, log2file=False)

env = set_env()
exists = os.path.exists

testroot = os.path.dirname(os.path.abspath(__file__))
cubes = os.path.join(testroot, 'cubes')
fixtures = os.path.join(testroot, 'fixtures')
cache_dir = env['METRIQUE_CACHE']
etc_dir = os.environ.get('METRIQUE_ETC')

default_config = os.path.join(etc_dir, 'metrique.json')


def db_tester(proxy):
    from metrique.utils import ts2dt
    from metrique import metrique_object as O
import ogusa
import run_ogusa
from taxcalc import Policy, Calculator

EXPECTED_KEYS = (
    'policy',
    'consumption',
    'behavior',
    'growdiff_baseline',
    'growdiff_response',
    'gdp_elasticity',
)
TEST_FAIL = False

from utils import set_env
globals().update(set_env())
celery_app = Celery('tasks2', broker=REDISGREEN_URL, backend=REDISGREEN_URL)
if MOCK_CELERY:
    CELERY_ALWAYS_EAGER = True
    BROKER_BACKEND = 'memory'
    CELERY_EAGER_PROPAGATES_EXCEPTIONS = True

    def task(func):
        celery_app_like = Namespace(delay=func)
        return celery_app_like

    celery_app = Namespace(task=task)

#Create a Public Use File object
rn_seed = 80
if not MOCK_CELERY:
Beispiel #6
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
# Author: "Chris Ward" <*****@*****.**>

import os

from utils import set_env
from metrique.utils import debug_setup

logger = debug_setup('metrique', level=10, log2stdout=True, log2file=False)

env = set_env()
exists = os.path.exists

testroot = os.path.dirname(os.path.abspath(__file__))
cubes = os.path.join(testroot, 'cubes')
fixtures = os.path.join(testroot, 'fixtures')
cache_dir = env['METRIQUE_CACHE']


def test_parse_fields():
    from metrique.parse import parse_fields

    OK_list = ['a', 'b', 'c']
    OK_dct = {'a': 1, 'b': 1, 'c': 1}
    all_ = '~'
    fields_str = 'c  , b  , a  '
    fields_list = ['a  ', 'c ', '  b']
    fields_dct = {' a': 1, 'b ': 1, 'c  ': 1}
    fields_dct_BAD = {' a': None, 'b ': 1, 'c  ': 1}
Beispiel #7
0
def main():
    # Directory Setting
    train_dir = "../data/multi_train.csv"
    test_dir = "../data/multi_test.csv"
    model_dir = "./model_save"

    # HyperParameter
    epoch = 2
    batch = 256
    max_len = 50
    hidden_units = 64
    target_names = ['0', '1', '2', '3']

    # Flow
    print("0. Setting Environment")
    set_env()

    print("1. load data")
    train_x, train_y, test_x, test_y, val_x, val_y = load_data(train_dir, test_dir, len(target_names))

    print("2. pre processing")
    train_x, val_x, test_x = train_x.tolist(), val_x.tolist(), test_x.tolist()

    train_x = [' '.join(t.split()[0:max_len]) for t in train_x]
    train_x = np.array(train_x, dtype=object)[:, np.newaxis]

    val_x = [' '.join(t.split()[0:max_len]) for t in val_x]
    val_x = np.array(val_x, dtype=object)[:, np.newaxis]

    test_x = [' '.join(t.split()[0:max_len]) for t in test_x]
    test_x = np.array(test_x, dtype=object)[:, np.newaxis]

    tokenizer = create_tokenizer_from_hub_module()

    train_examples = convert_text_to_examples(train_x, train_y)
    val_examples = convert_text_to_examples(val_x, val_y)
    test_examples = convert_text_to_examples(test_x, test_y)

    train_input_ids, train_input_masks, train_segment_ids, train_labels = convert_examples_to_features(tokenizer, train_examples, max_len)
    val_input_ids, val_input_masks, val_segment_ids, val_labels = convert_examples_to_features(tokenizer, val_examples, max_len)
    test_input_ids, test_input_masks, test_segment_ids, test_labels = convert_examples_to_features(tokenizer, test_examples, max_len)

    print("3. build model")
    model = BERT(max_len, data_type="multi", category_size=len(target_names))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    initialize_vars(sess)

    cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=model_dir+"/model-weights.{epoch:02d}-{val_acc:.6f}.hdf5", monitor='val_acc', save_best_only=True, verbose=1)
    model.fit(
        [train_input_ids, train_input_masks, train_segment_ids], train_labels,
        validation_data=([val_input_ids, val_input_masks, val_segment_ids], val_labels),
        epochs=epoch,
        batch_size=batch,
        callbacks=[cp_callback]
    )

    print("4. evaluation")
    evaluation = Evaluation(model, [test_input_ids, test_input_masks, test_segment_ids], test_y)
    accuracy, cf_matrix, report = evaluation.eval_classification_bert(data_type="multi")
    print("## Classification Report \n", report)
    print("## Confusion Matrix \n", cf_matrix)
    print("## Accuracy \n", accuracy)
import os
import uuid
import threading
from threading import Thread, Event, Lock
import time

from flask import Flask, request, make_response
import pandas as pd
import taxcalc
from pandas.util.testing import assert_frame_equal
import requests
import redis
from retrying import retry

from utils import set_env
globals().update(set_env())
from celery_tasks import (celery_app, dropq_task_async,
                                          dropq_task_small_async,
                                          ogusa_async, elasticity_gdp_task_async,
                                          btax_async, example_async, MOCK_CELERY)



app = Flask(__name__)

server_url = "http://localhost:5050"

queue_name = "celery"
client = redis.Redis(host="redis", port=6379)
RUNNING_JOBS = {}
TRACKING_TICKETS = {}
Beispiel #9
0
                        comscore = ascore
                is_best = comscore > best_score
                best_score = max(comscore,best_score)
                infostr = {'Epoch:  {:.4f}   loss: {:.4f},gs: {:.4f},bs:{:.4f} ,ms:{:.4f},as:{:.4f}'.format(epoch+1,val_loss,val_score,best_score,mscore,ascore)}
                logging.info(infostr)
                save_checkpoint(
                        {'epoch': epoch + 1,
                        'state_dict': model.module.state_dict(),
                        'optimizer' : optimizer.state_dict(),
                        'scheduler' : scheduler.state_dict(),
                        'best_score': best_score
                        }, is_best, outdir=conf['outdir'])
        test(model , train_loader)
    print('Best val acc: {}'.format(best_score))
    return 0


if __name__ == '__main__':

    # get configs and set envs
    conf = get_config()
    set_env(conf)
    # generate outdir name
    set_outdir(conf)
    # Set the logger
    set_logger(conf)
    main(conf)



Beispiel #10
0
if os.getcwd() != bd:
    os.chdir(bd)

import utils

info_dict = dict(base_dir=bd,
                 gtoi_res="dev_for_container/gtoi_res",
                 itod_res="dev_for_container/fxvb_run_cv_{}",
                 data_file="brain_snp_covars_meancentered_scaled.h5",
                 cv_idx="dev_for_container/cv_splits.csv",
                 bf_cv_file="dev_for_container/bf_info_cv_{}_for_fxvb.csv",
                 fxvb_cv="dev_for_container/fxvb_run_cv_{}",
                 read_imaging=True,
                 read_genomic=False,
                 read_depvar=True)

# set some required variables
nsnp, nimg = int(10e4), 170
# load data
data = utils.set_env(info_dict)
# get the prediction accuarcies
classif_results, roc_auc_insample, roc_auc_outsample = utils.predict_all_folds(
    info_dict, data, "beta", False)
# get the pip values for plotting on the brain
df_with_itod_pips = utils.get_itod_pip(info_dict, data, nimg)
# get pip values for plotting the manhattan plots
gtoi_pip = utils.get_gtoi_pip(info_dict, 1, nsnp, nimg)
# bayes factor for each brain region after the first regression
gtoi_bf = utils.get_gtoi_bayes_factor(info_dict, data, nimg)