Python sample Examples

Programming Language: Python

Namespace/Package Name: dataset.dtoc

Method/Function: sample

Examples at hotexamples.com: 4

Python sample - 4 examples found. These are the top rated real world Python examples of dataset.dtoc.sample extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: LSTM_dtoc-checkpoint.py Project: niuguy/suanming

def evaluate(start_date, end_date, all_dtoc=False):
    df = dtoc.sample(40000, start_date, end_date)
    if all_dtoc:
        df = df[df['is_dtoc'] == 0]
    sample_x = vectorization(df.iloc[:, 4:16], Word2Vec.load("diag2vec.model"))
    model = load_model()
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=[f1, 'acc'])
    return model.evaluate(sample_x, df['is_dtoc'])

Example #2

Show file

def train():
    ## load data

    df = dtoc.sample(40000)
    train_df, val_df = train_test_split(df, test_size=0.08, random_state=2018)

    train_X = train_df[[
        'diag1', 'diag2', 'diag3', 'diag4', 'diag5', 'diag6', 'diag7', 'diag8',
        'diag9', 'diag10', 'diag11', 'diag12', 'age'
    ]]

    # train_X = train_df['age'].values
    train_y = train_df['is_dtoc'].values

    val_X = val_df[[
        'diag1', 'diag2', 'diag3', 'diag4', 'diag5', 'diag6', 'diag7', 'diag8',
        'diag9', 'diag10', 'diag11', 'diag12', 'age'
    ]]
    # val_X = train_df['age'].values
    val_y = val_df['is_dtoc'].values

    # embed_size = 150 # how big is each variable vector
    # max_features = 10000 # how many unique codes to use (ICD codes + all ages)
    # max_len = 13 # max number of variables in one records
    # embedding_matrix = get_embeddings_matrix(df)
    # load word2vec model
    tbCallBack = TensorBoard(log_dir='./Graph',
                             histogram_freq=0,
                             write_graph=True,
                             write_images=True)

    EMBEDDING_MODEL_FILE = 'diag2vec.model'
    wv_model = Word2Vec.load(EMBEDDING_MODEL_FILE)

    #model = model_LR()
    model = model_LSTM()
    # model = model_CNN()

    model.fit(vectorization(train_X, wv_model),
              train_y,
              batch_size=512,
              epochs=20,
              validation_data=(vectorization(val_X, wv_model), val_y),
              callbacks=[tbCallBack])

    pred_val_y = model.predict(vectorization(val_X, wv_model),
                               batch_size=1024,
                               verbose=1)
    for thresh in np.arange(0.1, 0.501, 0.01):
        thresh = np.round(thresh, 2)
        print("F1 score at threshold {0} is {1}".format(
            thresh, metrics.f1_score(val_y,
                                     (pred_val_y > thresh).astype(int))))

Example #3

Show file

File: LSTM_dtoc-checkpoint.py Project: niuguy/suanming

def main():
    df = dtoc.sample(40000, '2010-01-01', '2018-01-01')
    train_df, val_df = train_test_split(df, test_size=0.1)
    wv_model = Word2Vec.load("diag2vec.model")
    validation_data = (vectorization(val_df.iloc[:, 4:16],
                                     wv_model), np.array(val_df['is_dtoc']))
    model = train(train_df, wv_model, validation_data)

    ##save weights
    model.save_weights('lstm_weights_before2018.h5')
    ##save structure
    with open('lstm_architecture_before2018.json', 'w') as f:
        f.write(model.to_json())

Example #4

Show file

File: LSTM_dtoc-checkpoint.py Project: niuguy/suanming

def predict_evaluate():
    model = load_model()
    df = dtoc.sample(40000)
    val_y = df['is_dtoc']
    sample_x = vectorization(df.iloc[:, 4:16], Word2Vec.load("diag2vec.model"))
    model = load_model()
    # model.compile(loss='binary_crossentropy',optimizer='adam',metrics=[f1, 'acc'])
    pred_val_y = model.predict(sample_x)
    thresholds = []
    for thresh in np.arange(0.1, 0.501, 0.01):
        thresh = np.round(thresh, 2)
        res = metrics.f1_score(val_y, (pred_val_y > thresh).astype(int))
        thresholds.append([thresh, res])
        print("F1 score at threshold {0} is {1}".format(thresh, res))