예제 #1
0
def plotYear(year, component="H"):

    import common_DataProcess as processor
    import common_MagdasDB as magdasDB
    import pytz
    import basic_variation_plot as plotter
    import pandas as pd
    import numpy as np

    print('Plotting year : ' + year + ', component : ' + component)
    dataFrame = magdasDB.getMinData(
        'CMB', year, targetTimeZone=pytz.timezone('Asia/Colombo'))

    # outliers_by_abnormal = processor.get_outliers_abnormal_ignore(dataFrame, component,min=40000, max=45000)
    # outliers_by_slope = processor.get_outliers_abnormal_slope_ignore(dataFrame, component,min=40000, max=45000)
    # outliers_zscore = processor.get_outliers_z_score(dataFrame, component, threshold=3)
    # outliers = pd.concat([outliers_by_abnormal, outliers_by_slope, outliers_zscore])

    ab_ignore_min_max_filter = processor.OutlierFilter(
        processor.FilterType.ABNORMAL_IGNORE_BY_MIN_MAX, min=40000, max=45000)
    #ab_ignore_sudden_inc = processor.OutlierFilter(processor.FilterType.ABNORMAL_IGNORE_BY_SUDDEN_INCREASE, threshold=100)
    z_score = processor.OutlierFilter(processor.FilterType.Z_SCORE,
                                      threshold=3)
    filter_list = processor.FilterList(
        ab_ignore_min_max=ab_ignore_min_max_filter, z_score=z_score)
    outliers = processor.get_outliers_multiple_filter(dataFrame, component,
                                                      filter_list)

    dataFrame.loc[outliers.index, component] = np.nan

    plotter.yearly_graph(dataFrame, component, outliers=outliers)
예제 #2
0
def processArgvs(argvs):
    import common_MagdasDB as magdasDB
    if argvs[0] == "stl":
        magdasDB.printStationList()
    elif '-' in argvs[0]:

        import numpy as np
        import common_DataProcess as processor

        component = 'H'
        parts = argvs[0].split("-")
        if len(parts) > 2:
            year = parts[0]
            month = parts[1]
            day = parts[2]
            if len(month) < 2:
                month = '0' + month
            if len(day) < 2:
                day = '0' + day
            print('Collecting data for day ' + year + '-' + month + '-' + day)

            dataFrame = magdasDB.getMinData(
                'CMB',
                year,
                month,
                day,
                targetTimeZone=pytz.timezone('Asia/Colombo'))
            #outliers = processor.get_outliers_quantile_scale(dataFrame, component, interquartile_range_scale=1.5)
            #outliers = processor.get_outliers_min_max_limit(dataFrame, component, 40000, 40955)
            #outliers = processor.get_outliers_z_score(dataFrame, component, threshold=3)
            #outliers = processor.get_outliers_rolling_medians(dataFrame, component, threshold=1.5)
            normal_distribution_filter = processor.OutlierFilter(
                processor.FilterType.NORMAL_DISTRIBUTION, SD_range_scalar=3)
            filter_list = processor.FilterList(
                normal_disb=normal_distribution_filter)
            outliers = processor.get_outliers_multiple_filter(
                dataFrame, component, filter_list)
            dataFrame.loc[outliers.index,
                          component] = np.nan  # any value can be assigned

            import noon_study_plot as plotter
            plotter.dailyVariationAnalyzes(dataFrame, component, outliers)

            #import pyplotWrap as plotter
            #plotter.dialyCompMaxAndNoon(dataFrame['Date_Time'],dataFrame['H'],'H')

        else:
            year = parts[0]
            month = parts[1]
            if len(month) < 2:
                month = '0' + month
            print('Collecting data for month ' + year + '-' + month)

    elif len(argvs[0]) == 4 and is_number(argvs[0]):
        year = argvs[0]
        print('Collecting data for year ' + year)
    else:
        print(
            'Incorrect combination of parameters. First paramter supposed to be a date.'
        )
예제 #3
0
print('This python is script not developed to use with command line arguments.')
print('Procedure should be specified in this file.')


#command = 'generate_outliers'
command = 'check_written_outliers'

if command == 'generate_outliers':

    import common_DataProcess as processor
    import common_MagdasDB as magdasDB
    import pytz

    dataFrame = magdasDB.getMinData('CMB', '2016', '03', '31', targetTimeZone=pytz.timezone('Asia/Colombo'))

    processor.save_outliers('CMB',dataFrame)

if command == 'check_written_outliers':

    import common_file as file_access

    outliers = file_access.read_confirmed_outliers('CMB','Min')

    print(outliers)
예제 #4
0
def plotDay(year, month, day, component="H"):

    import common_DataProcess as processor
    import common_MagdasDB as magdasDB
    import numpy as np
    import basic_variation_plot as plotter
    import pytz
    import pandas as pd
    from datetime import timedelta, datetime
    import helper_astro as astro
    import math

    print('Plotting day : ' + year + '-' + month + '-' + day +
          ', component : ' + component)
    dataFrame = magdasDB.getMinData(
        'CMB', year, month, day, targetTimeZone=pytz.timezone('Asia/Colombo'))
    #outliers_by_abnormal = processor.get_outliers_abnormal_ignore(dataFrame, component,min=40000, max=45000)
    #outliers_by_slope = processor.get_outliers_abnormal_slope_ignore(dataFrame, component,min=40000, max=45000)
    #outliers_zscore = processor.get_outliers_z_score(dataFrame, component, threshold=3)
    #outliers = pd.concat([outliers_by_abnormal, outliers_by_slope, outliers_zscore])
    if component == 'H' or component == 'F':
        #ab_ignore_min_max_filter = processor.OutlierFilter(processor.FilterType.ABNORMAL_IGNORE_BY_MIN_MAX, min=40000, max=45000)
        #ab_ignore_sudden_inc = processor.OutlierFilter(processor.FilterType.ABNORMAL_IGNORE_BY_SUDDEN_INCREASE, threshold=100)
        #z_score = processor.OutlierFilter(processor.FilterType.Z_SCORE, threshold=3)
        unreal_total_field = processor.OutlierFilter(
            processor.FilterType.UNREAL_TOTAL_FIELD,
            total_field_min=40000,
            total_field_max=43000)
        #filter_list =  processor.FilterList(ab_ignore_min_max=ab_ignore_min_max_filter, ab_ignore_sudden_inc=ab_ignore_sudden_inc, z_score=z_score, unreal_total_field=unreal_total_field)
        filter_list = processor.FilterList(
            unreal_total_field=unreal_total_field)
        outliers = processor.get_outliers_multiple_filter(
            dataFrame, component, filter_list)
        #print(dataFrame.loc[(dataFrame['Date_Time'] > datetime(year=2016,month=3, day=31, hour=11, minute=30, tzinfo=pytz.timezone('Asia/Colombo')))])
        #print(dataFrame.loc[(math.isnan(dataFrame['F']))])
        dataFrame.loc[outliers.index, component] = np.nan
        plotter.daily_graph(dataFrame, component, outliers)
    elif component == 'D':
        ab_ignore_min_max_filter = processor.OutlierFilter(
            processor.FilterType.ABNORMAL_IGNORE_BY_MIN_MAX,
            min=-10000,
            max=45000)
        ab_ignore_sudden_inc = processor.OutlierFilter(
            processor.FilterType.ABNORMAL_IGNORE_BY_SUDDEN_INCREASE,
            threshold=10)
        z_score = processor.OutlierFilter(processor.FilterType.Z_SCORE,
                                          threshold=3)
        filter_list = processor.FilterList(
            ab_ignore_min_max=ab_ignore_min_max_filter,
            ab_ignore_sudden_inc=ab_ignore_sudden_inc,
            z_score=z_score)
        outliers = processor.get_outliers_multiple_filter(
            dataFrame, component, filter_list)
        dataFrame.loc[outliers.index, component] = np.nan
        plotter.daily_graph(dataFrame, component, outliers)
        #plotter.daily_graph(dataFrame, component)
    elif component == 'Z':
        ab_ignore_min_max_filter = processor.OutlierFilter(
            processor.FilterType.ABNORMAL_IGNORE_BY_MIN_MAX,
            min=-10000,
            max=45000)
        ab_ignore_sudden_inc = processor.OutlierFilter(
            processor.FilterType.ABNORMAL_IGNORE_BY_SUDDEN_INCREASE,
            threshold=100)
        #z_score = processor.OutlierFilter(processor.FilterType.Z_SCORE, threshold=3)
        filter_list = processor.FilterList(
            ab_ignore_min_max=ab_ignore_min_max_filter,
            ab_ignore_sudden_inc=ab_ignore_sudden_inc)
        outliers = processor.get_outliers_multiple_filter(
            dataFrame, component, filter_list)
        dataFrame.loc[outliers.index, component] = np.nan
        plotter.daily_graph(dataFrame, component, outliers)
예제 #5
0
    import numpy as np
    import comp_compare_plot as plotter

    day = '2016-3-9'
    print('Comparing three components for date ' + day)
    parts = day.split("-")

    year = parts[0]
    month = parts[1]
    day = parts[2]
    if len(month) < 2:
        month = '0' + month
    if len(day) < 2:
        day = '0' + day

    dataFrame = magdasDB.getMinData(
        'CMB', year, month, day, targetTimeZone=pytz.timezone('Asia/Colombo'))

    component = 'H'

    unreal_total_field = processor.OutlierFilter(
        processor.FilterType.UNREAL_TOTAL_FIELD,
        total_field_min=40000,
        total_field_max=43000)
    filter_list = processor.FilterList(unreal_total_field=unreal_total_field)
    outliers = processor.get_outliers_multiple_filter(dataFrame, component,
                                                      filter_list)
    dataFrame.loc[outliers.index, component] = np.nan
    plotter.component_compare_daily_all(dataFrame, outliers)

if command == 'compare_monthly_components':
    month = day = '2016-3-9'