Ejemplo n.º 1
0
def draw_stacked_chart(df, file_name, minimum_feature_contribution):
    logger.info('drawing stacked chart for ' + file_name)

    
    col_list, remaining_features, remaining_features_values = va_utils.get_significant_features(df, minimum_feature_contribution)   

    X = list(df['Date'])
    data = OrderedDict()
    for col in col_list:
        data[col] = df[col]
    #finally add the remaining features as a combined single column
    data['everything-else'] = remaining_features_values

    feature_name = file_name[:-4]
    #chart_file_name = feature_name + '.html'
    #output_file(chart_file_name)
    file_name_wo_extn = file_name[:-4]
    chart_file_name = os.path.join(os.path.sep, os.getcwd(), OUTPUT_DIR_NAME, file_name_wo_extn + '_stacked_chart.html')
    output_file(chart_file_name)


    title = feature_name.upper() + ' distribution from ' + str(df['Date'][0]) + ' to ' + str(df['Date'][len(df) - 1])
    bar = Bar(data, X, title= title, stacked=True, legend='bottom_left', tools='hover,pan,wheel_zoom,box_zoom,reset,resize', width=1300, height=500)
    # glyph_renderers = bar.select(dict(type=GlyphRenderer))
    # bar_source = glyph_renderers[0].data_source

    # hover = bar.select(dict(type=HoverTool))
    # hover.tooltips = [
    #   ('name',' @cat'),
    #   ('number', '$y'),  
    # ]
    save(bar)
    logger.info('saved the chart in ' + chart_file_name)
Ejemplo n.º 2
0
def model_tsa(df, file_name, minimum_feature_contribution):
    #first get a list of features to model
    #each significant feature (protocol or application) would be modeled as an ARIMA (auto regressive moving average)
    #the modeling artifacts i.e. charts, sumary etc would be stored in a directory by feature name
    logger.info('Begin feature extraction...')
    col_list, remaining_features, remaining_features_values = va_utils.get_significant_features(df, minimum_feature_contribution)

    #store the results in a dataframe
    df_output = pd.DataFrame()
    feature_name_list = []
    model_name_list   = []
    MAE_list          = []
    model_selection_list = []
    
    predicted_col_list_with_data = ['Date'] + col_list
    df_predictions = pd.DataFrame(columns = predicted_col_list_with_data )
    logger.info('columns in oredicted df...')
    logger.info(predicted_col_list_with_data)
    df3 = df_predictions
    #df_w_predictions = copy.deepcopy(df[col_list])

    for col in col_list:
        logger.info('-------- modeling ' + col + '-------------')
        curr_dir = os.getcwd()
        try:
            feature, model_names, models, results, MAE, predicted_dates, predicted, model_selection = model_feature(file_name, df, col)
            feature_name_list += [feature]*len(model_names)
            model_name_list   += model_names
            MAE_list          += MAE
            model_selection_list += model_selection
            df_predictions['Date'] = predicted_dates
            df_predictions[feature] = predicted
        except Exception, e:
            logger.info('Could not model feature ' + col)
            logger.info('Exception: ' + str(e))
            os.chdir(curr_dir)