Python data_prep Exemples, functions.data_prep Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : app.py Projet : ashutoshsingh19/Covid_edtech

def update_figure(selected_dropdown1, selected_dropdown2, val, fr):
    dropdown = {
        'Coursera': cou_n,
        'edX': edx_n,
        'Khan Academy': kha_n,
        'Pluralsight': plu_n,
        'Skillshare': ski_n,
        'Udacity': uda_n,
        'Udemy': ude_n,
        'Skype': sky_n,
        'Zoom': zoo_n,
        'TED Talks': ted_n
    }
    df = dropdown[selected_dropdown1]
    if val == 1:
        df1 = df[df['username'] == selected_dropdown1]
    elif val == 2:
        df1 = df[df['username'] != selected_dropdown1]
    else:
        df1 = df
    test = data_prep(df1)
    test_i = seasonal_decompose(test[selected_dropdown2],
                                model='additive',
                                period=fr)
    t1 = pd.DataFrame(test_i.trend)
    t1.index = t1.index.to_timestamp().to_pydatetime()

    figure = {
        'data': [{
            'x': t1.index,
            'y': t1['trend'],
            'range_x': [t1.index.min(), t1.index.max()],
            'type': 'scatter',
            'mode': 'marker',
            'opacity': 0.7
        }],
        'layout': {
            'title':
            'Trend analysis of 2019-20 period for {}'.format(
                selected_dropdown2),
            'xaxis': {
                'title': 'Timeline'
            }
        }
    }
    return figure

Exemple #2

0

Afficher le fichier

Fichier : main.py Projet : raquelaoki/MLCB2019

RUN_CREATE_FEATURE_DATASET = False
RUN_CREATE_ROC_CAUSAL_DATASET = False
RUN_EXPERIMENTS = False  #RUN_CREATE_FEATURE_DATASET also needs to be true
'''Latents Dimension for Deconfounder Algorithm (DA)'''
k_mf_ = [40]
k_pca_ = [40]
k_ac_ = [10]
'''Loading dataset'''
filename = "data\\tcga_train_gexpression_cgc_7k.txt"  #_2
filename_gamma = "results\\gamma.txt"

#Running Factor Analysis Models + Predictive Check + outcome model in all patients
if RUN_ALL:
    data = pd.read_csv(filename, sep=';')
    #data = data.iloc[0:500, 0:100]
    train, j, v, y01, abr, colnames = fc.data_prep(data)
    #j: rows, v: columns, y01: initial label, abr: cancer type, colnames: genes names
    '''
    1) Run factor model;
    2) Do predictive check;
    3) If pass on predictive ckeck, run outcome model
    4) Save results and predictions for ROC curve
    '''
    df_gamma = pd.read_csv(filename_gamma, sep=';')
    gamma = []
    cil = []
    cip = []
    id2 = []

    if RUN_MF:
        for k_mf in k_mf_:

Exemple #3

0

Afficher le fichier

import functions

#improvements: change variable names to be more intuitive

chdir("/home/cree/Downloads/br_econ/")

count = 0
mun, state = {}, {}

mun = functions.import_data("mun")
state = functions.import_data("state")

chdir("/home/cree/workspace/econometrics/")

# municipal dataframes
mun_renda = functions.data_prep(mun.get('mun/Renda_municipios - Renda familiar - per capita - media.csv'))
mun_ensino = functions.data_prep(mun.get('mun/mun_media_anos_de_estudos_25_anos_+_todos.csv'))
mun_saude = mun.get('mun/mun_Mortalidade infantil (por mil nascidos vivos)_1970-2000.csv')
mun_populacao_censo  = functions.data_prep(mun.get('mun/populacao_municipal.csv'))
mun_renda_2, mun_ensino_2 = mun_renda.ix[3:].copy(),mun_ensino.ix[3:].copy()
mun_renda, mun_ensino = mun_renda.ix[:3],mun_ensino.ix[:3]

#state dataframes
state_renda = functions.data_prep(state.get('state/estado_Renda domiciliar per capita - media_1976 - 2014.csv'))
state_ensino = functions.data_prep(state.get('state/Anos de estudo - media - pessoas 25 anos e mais - 1981 - 2014.csv'))
state_ensino_mulheres = functions.data_prep(state.get('state/Anos de estudo - media - pessoas 25 anos e mais - mulheres 1981 -2014.csv'))
state_populacao_anual = functions.data_prep(state.get('state/populcao_estado_1980-2014.csv'))
state_populacao_censo = state.get('state/populacao_residente_estado_1970-2000.csv')

#calcular mortalidade infantil por estado
df9 = state_populacao_censo[['Sigla', 'Código', 'Estado']].copy()

Exemple #4

0

Afficher le fichier

Fichier : app.py Projet : ashutoshsingh19/Covid_edtech

def update_figure(selected_dropdown1, val):
    dropdown = {
        'Coursera': cou_n,
        'edX': edx_n,
        'Khan Academy': kha_n,
        'Pluralsight': plu_n,
        'Skillshare': ski_n,
        'Udacity': uda_n,
        'Udemy': ude_n,
        'Skype': sky_n,
        'Zoom': zoo_n,
        'TED Talks': ted_n
    }
    df = dropdown[selected_dropdown1]

    if val == 1:
        df1 = df[df['username'] == selected_dropdown1]
        var = selected_dropdown1 + ' handle'
    elif val == 2:
        df1 = df[df['username'] != selected_dropdown1]
        var = 'Users'
    else:
        df1 = df
        var = 'Both (Platform and user)'

    test = data_prep(df1)
    test.index = test.index.to_timestamp().to_pydatetime()

    trace1 = go.Bar(x=test.index,
                    y=test['tweet_counter'] - test['label'],
                    name='Non Covid',
                    hovertext=test['tweet_counter'] - test['label'])
    trace2 = go.Bar(x=test.index,
                    y=test['label'],
                    name='Covid',
                    hovertext=test['label'])

    figure1 = go.Figure(
        data=[trace1, trace2],
        layout=go.Layout(
            barmode='stack',
            title=
            'Analysis for Covid and Non-Covid related tweets from {} on a weekly basis '
            .format(var),
            # paper_bgcolor='rgba(48, 48, 48, 1)',
            plot_bgcolor='white'))

    df2 = df1[df1['label'] == 1]

    wc = [len(text) for text in df2['text']]
    time = [tim for tim in df2['timestamp']]

    trace3 = go.Scatter(x=time,
                        y=wc,
                        mode='markers',
                        marker={
                            'color': '#ff471a',
                            'size': wc,
                            'sizemode': 'area',
                            'sizeref': 2. * max(wc) / (40.**2),
                            'sizemin': 4
                        })

    figure2 = go.Figure(data=[trace3],
                        layout=go.Layout(
                            plot_bgcolor='white',
                            title='Covid related tweets on timeline'))

    return figure1, figure2

Exemple #5

0

Afficher le fichier

Fichier : app.py Projet : ashutoshsingh19/Covid_edtech

    'Pluralsight': plu_n,
    'Skillshare': ski_n,
    'Udacity': uda_n,
    'Udemy': ude_n,
    'Skype': sky_n,
    'Zoom': zoo_n,
    'TED Talks': ted_n
}

server = flask.Flask(__name__)
app = dash.Dash(__name__, server=server)

data = list(dict_main.keys())  # keys for the dataset values
channels = dict_main[data[0]]  # the_data_sets

channels = data_prep(channels)

app.layout = html.Div([
    html.H1('COVID Edu-Tech', style={'textAlign': 'center'}),
    html.Div([
        html.Div([
            html.Label([
                "Select Platform",
                dcc.Dropdown(id='data-dropdown',
                             options=[{
                                 'label': label,
                                 'value': label
                             } for label in data],
                             value=list(dict_main.keys())[0],
                             multi=False,
                             searchable=False,