Python dataLoader Examples, util.dataLoader Python Examples

Example #1

0

Show file

File: app.py Project: superaja/automlizer

def update_graph(selected_algo):
    df_pipe, df_ref = dataLoader('pipeline.csv', 'ref.csv')
    algo_df = df_pipe.copy()
    algo = selected_algo
    d1, d2 = hyperProfiler(algo, algo_df)
    plnc, plnn = hyperprofilercharts(d1, d2, algo)
    num_temp_divs = []
    num_final_divs = []
    cat_temp_divs = []
    cat_final_divs = []
    for i in plnn:  # create subsets of 2 graphs
        num_temp_divs.append(i)
        if len(num_temp_divs) == 2:
            num_final_divs.append(
                html.Div([html.Br(),
                          html.Div(num_temp_divs, className='row')]))
            num_temp_divs = []
    for i in plnc:  # create subsets of 2 graphs
        cat_temp_divs.append(i)
        if len(cat_temp_divs) == 2:
            cat_final_divs.append(
                html.Div([html.Br(),
                          html.Div(cat_temp_divs, className='row')]))
            cat_temp_divs = []
    return cat_final_divs, num_final_divs

Example #2

0

Show file

File: app.py Project: superaja/automlizer

def long_process(gen, X, y, metric, algo):
    global stat
    score = []
    pipeline_name = []
    if semaphore.is_locked():
        raise Exception('Resource is locked')
    total_pipes = 0

    for i in range(gen):
        semaphore.lock()
        stat['status'] = 'Started'
        po, po_score, pipes = runTPOT(X, y, metric, algo)
        createsklearnPipeline(po, total_pipes)
        total_pipes = total_pipes + pipes
        df_pipe, df_ref = dataLoader('pipeline.csv', 'ref.csv')
        pct_algo, pct_hyper = percentStatus(df_pipe, df_ref)
        highest_score, evalgo, evalpipes, evalhyper = kpi(df_pipe, algo)
        df_pipeline_score = df_pipe[['PIPELINE',
                                     'SCORE']].copy().drop_duplicates()
        #df_pipeline_score = df_pipeline_score[df_pipeline_score['SCORE']>0.1]
        pipeline_name = pipeline_name + list(df_pipeline_score['PIPELINE'])
        score = score + list(df_pipeline_score['SCORE'])
        print(len(score))
        # top 10 pipeline
        df_top_10 = df_pipeline_score[['PIPELINE',
                                       'SCORE']].copy().drop_duplicates()
        if algo == 'Classifier':
            df_top_10 = df_top_10.sort_values(['SCORE'],
                                              ascending=False).nlargest(
                                                  n, ['SCORE'])
        else:
            df_top_10 = df_top_10.sort_values(['SCORE'],
                                              ascending=False).nsmallest(
                                                  n, ['SCORE'])
        # Update real-time dict
        stat['top_n_pipe'] = [
            "P-" + str(e) for e in list(df_top_10['PIPELINE'])
        ]
        stat['top_n_score'] = list(df_top_10['SCORE'])
        stat['pipeline'] = pipeline_name
        stat['score'] = score
        stat['highest_score'] = highest_score
        stat['evalgo'] = evalgo
        stat['evalpipes'] = evalpipes
        stat['evalhyper'] = evalhyper
        stat['pct_algo'] = pct_algo
        stat['pct_hyper'] = pct_hyper
        stat['gen'] = i + 1
        stat['time_start'] = datetime.datetime.now()
        semaphore.unlock()
        stat['time_end'] = datetime.datetime.now()
        #time.sleep(2)
    stat['status'] = 'Completed'
    return (stat['time_end'] - stat['time_start']).total_seconds() * 1e6, stat

Example #3

0

Show file

File: app.py Project: superaja/automlizer

def algo_hist_detail_display(status):
    num_temp_divs = []
    num_final_divs = []
    if status == 'Completed':
        df_pipe, df_ref = dataLoader('pipeline.csv', 'ref.csv')
        df_algo = df_pipe[df_pipe['PP_FLAG'] == 'N']
        algo_hist_detail_div = algoHistDetails(df_algo)
        return html.Div(algo_hist_detail_div, className='row')
    else:
        return dcc.Loading(
            type='graph',
            children=html.Div(
                [html.H6("Generating Individual Algorithm Histograms....")]))

Example #4

0

Show file

File: app.py Project: superaja/automlizer

def update_pipeline(selectedData):
    df_pipe, df_ref = dataLoader('pipeline.csv', 'ref.csv')
    algo_df = df_pipe.copy()
    df_pipeline_table = df_pipe[[
        'PIPELINE', 'ALGO_NAME', 'HYPER_NAME', 'HYPER_VALUE'
    ]]
    if selectedData is None:
        raise PreventUpdate
    # get the values from df and return it
    if selectedData is not None:
        selectedPipeline = selectedData['points'][0]['y']
        selectedPipeline = selectedPipeline[2:]
        pipe = int(float(selectedPipeline))
        radio_o, radio_v = algoOptions(
            df_pipeline_table[df_pipeline_table['PIPELINE'] == pipe])
        return radio_o, radio_v, hyperTable(
            df_pipeline_table[df_pipeline_table['PIPELINE'] == pipe])

Example #5

0

Show file

import model
import tensorflow as tf
import config
import util
import time
import numpy as np
import os

C = config.Config()
model = model.Model()
labels = util.labelGenerator()
dataLoader = util.dataLoader(labels.label2id)
compute_graph = tf.get_default_graph()

X_test = np.load(os.path.join(C.numpy_serialized_path, C.x_test_serialized))
y_test = np.load(os.path.join(C.numpy_serialized_path, C.y_test_serialized))

batchLoader = util.batchGenerator(X_test,y_test,barchsize=1)
batchIter = iter(batchLoader) #batchLoader is an endless iterator providing X,Y

with compute_graph.as_default():
    model.lstm_crf_predict() #Define graph
    model.load_word2vec() #Initialize all variables
    model.restore()
    trainHelper = util.trainHelper(model.word2vector) #Train helper do the padding
    for x,y in batchLoader:
        x_raw = x.copy()
        x,y,sequence_length,seq_max_len = trainHelper.generateBatch(x,y)
        viterbi_sequence = model.predict(x,sequence_length,seq_max_len)
        print(x_raw[0])
        print(labels.ID2label(viterbi_sequence[0][0]))