def plotYear(year, component="H"): import common_DataProcess as processor import common_MagdasDB as magdasDB import pytz import basic_variation_plot as plotter import pandas as pd import numpy as np print('Plotting year : ' + year + ', component : ' + component) dataFrame = magdasDB.getMinData( 'CMB', year, targetTimeZone=pytz.timezone('Asia/Colombo')) # outliers_by_abnormal = processor.get_outliers_abnormal_ignore(dataFrame, component,min=40000, max=45000) # outliers_by_slope = processor.get_outliers_abnormal_slope_ignore(dataFrame, component,min=40000, max=45000) # outliers_zscore = processor.get_outliers_z_score(dataFrame, component, threshold=3) # outliers = pd.concat([outliers_by_abnormal, outliers_by_slope, outliers_zscore]) ab_ignore_min_max_filter = processor.OutlierFilter( processor.FilterType.ABNORMAL_IGNORE_BY_MIN_MAX, min=40000, max=45000) #ab_ignore_sudden_inc = processor.OutlierFilter(processor.FilterType.ABNORMAL_IGNORE_BY_SUDDEN_INCREASE, threshold=100) z_score = processor.OutlierFilter(processor.FilterType.Z_SCORE, threshold=3) filter_list = processor.FilterList( ab_ignore_min_max=ab_ignore_min_max_filter, z_score=z_score) outliers = processor.get_outliers_multiple_filter(dataFrame, component, filter_list) dataFrame.loc[outliers.index, component] = np.nan plotter.yearly_graph(dataFrame, component, outliers=outliers)
def processArgvs(argvs): import common_MagdasDB as magdasDB if argvs[0] == "stl": magdasDB.printStationList() elif '-' in argvs[0]: import numpy as np import common_DataProcess as processor component = 'H' parts = argvs[0].split("-") if len(parts) > 2: year = parts[0] month = parts[1] day = parts[2] if len(month) < 2: month = '0' + month if len(day) < 2: day = '0' + day print('Collecting data for day ' + year + '-' + month + '-' + day) dataFrame = magdasDB.getMinData( 'CMB', year, month, day, targetTimeZone=pytz.timezone('Asia/Colombo')) #outliers = processor.get_outliers_quantile_scale(dataFrame, component, interquartile_range_scale=1.5) #outliers = processor.get_outliers_min_max_limit(dataFrame, component, 40000, 40955) #outliers = processor.get_outliers_z_score(dataFrame, component, threshold=3) #outliers = processor.get_outliers_rolling_medians(dataFrame, component, threshold=1.5) normal_distribution_filter = processor.OutlierFilter( processor.FilterType.NORMAL_DISTRIBUTION, SD_range_scalar=3) filter_list = processor.FilterList( normal_disb=normal_distribution_filter) outliers = processor.get_outliers_multiple_filter( dataFrame, component, filter_list) dataFrame.loc[outliers.index, component] = np.nan # any value can be assigned import noon_study_plot as plotter plotter.dailyVariationAnalyzes(dataFrame, component, outliers) #import pyplotWrap as plotter #plotter.dialyCompMaxAndNoon(dataFrame['Date_Time'],dataFrame['H'],'H') else: year = parts[0] month = parts[1] if len(month) < 2: month = '0' + month print('Collecting data for month ' + year + '-' + month) elif len(argvs[0]) == 4 and is_number(argvs[0]): year = argvs[0] print('Collecting data for year ' + year) else: print( 'Incorrect combination of parameters. First paramter supposed to be a date.' )
print('This python is script not developed to use with command line arguments.') print('Procedure should be specified in this file.') #command = 'generate_outliers' command = 'check_written_outliers' if command == 'generate_outliers': import common_DataProcess as processor import common_MagdasDB as magdasDB import pytz dataFrame = magdasDB.getMinData('CMB', '2016', '03', '31', targetTimeZone=pytz.timezone('Asia/Colombo')) processor.save_outliers('CMB',dataFrame) if command == 'check_written_outliers': import common_file as file_access outliers = file_access.read_confirmed_outliers('CMB','Min') print(outliers)
def plotDay(year, month, day, component="H"): import common_DataProcess as processor import common_MagdasDB as magdasDB import numpy as np import basic_variation_plot as plotter import pytz import pandas as pd from datetime import timedelta, datetime import helper_astro as astro import math print('Plotting day : ' + year + '-' + month + '-' + day + ', component : ' + component) dataFrame = magdasDB.getMinData( 'CMB', year, month, day, targetTimeZone=pytz.timezone('Asia/Colombo')) #outliers_by_abnormal = processor.get_outliers_abnormal_ignore(dataFrame, component,min=40000, max=45000) #outliers_by_slope = processor.get_outliers_abnormal_slope_ignore(dataFrame, component,min=40000, max=45000) #outliers_zscore = processor.get_outliers_z_score(dataFrame, component, threshold=3) #outliers = pd.concat([outliers_by_abnormal, outliers_by_slope, outliers_zscore]) if component == 'H' or component == 'F': #ab_ignore_min_max_filter = processor.OutlierFilter(processor.FilterType.ABNORMAL_IGNORE_BY_MIN_MAX, min=40000, max=45000) #ab_ignore_sudden_inc = processor.OutlierFilter(processor.FilterType.ABNORMAL_IGNORE_BY_SUDDEN_INCREASE, threshold=100) #z_score = processor.OutlierFilter(processor.FilterType.Z_SCORE, threshold=3) unreal_total_field = processor.OutlierFilter( processor.FilterType.UNREAL_TOTAL_FIELD, total_field_min=40000, total_field_max=43000) #filter_list = processor.FilterList(ab_ignore_min_max=ab_ignore_min_max_filter, ab_ignore_sudden_inc=ab_ignore_sudden_inc, z_score=z_score, unreal_total_field=unreal_total_field) filter_list = processor.FilterList( unreal_total_field=unreal_total_field) outliers = processor.get_outliers_multiple_filter( dataFrame, component, filter_list) #print(dataFrame.loc[(dataFrame['Date_Time'] > datetime(year=2016,month=3, day=31, hour=11, minute=30, tzinfo=pytz.timezone('Asia/Colombo')))]) #print(dataFrame.loc[(math.isnan(dataFrame['F']))]) dataFrame.loc[outliers.index, component] = np.nan plotter.daily_graph(dataFrame, component, outliers) elif component == 'D': ab_ignore_min_max_filter = processor.OutlierFilter( processor.FilterType.ABNORMAL_IGNORE_BY_MIN_MAX, min=-10000, max=45000) ab_ignore_sudden_inc = processor.OutlierFilter( processor.FilterType.ABNORMAL_IGNORE_BY_SUDDEN_INCREASE, threshold=10) z_score = processor.OutlierFilter(processor.FilterType.Z_SCORE, threshold=3) filter_list = processor.FilterList( ab_ignore_min_max=ab_ignore_min_max_filter, ab_ignore_sudden_inc=ab_ignore_sudden_inc, z_score=z_score) outliers = processor.get_outliers_multiple_filter( dataFrame, component, filter_list) dataFrame.loc[outliers.index, component] = np.nan plotter.daily_graph(dataFrame, component, outliers) #plotter.daily_graph(dataFrame, component) elif component == 'Z': ab_ignore_min_max_filter = processor.OutlierFilter( processor.FilterType.ABNORMAL_IGNORE_BY_MIN_MAX, min=-10000, max=45000) ab_ignore_sudden_inc = processor.OutlierFilter( processor.FilterType.ABNORMAL_IGNORE_BY_SUDDEN_INCREASE, threshold=100) #z_score = processor.OutlierFilter(processor.FilterType.Z_SCORE, threshold=3) filter_list = processor.FilterList( ab_ignore_min_max=ab_ignore_min_max_filter, ab_ignore_sudden_inc=ab_ignore_sudden_inc) outliers = processor.get_outliers_multiple_filter( dataFrame, component, filter_list) dataFrame.loc[outliers.index, component] = np.nan plotter.daily_graph(dataFrame, component, outliers)
import numpy as np import comp_compare_plot as plotter day = '2016-3-9' print('Comparing three components for date ' + day) parts = day.split("-") year = parts[0] month = parts[1] day = parts[2] if len(month) < 2: month = '0' + month if len(day) < 2: day = '0' + day dataFrame = magdasDB.getMinData( 'CMB', year, month, day, targetTimeZone=pytz.timezone('Asia/Colombo')) component = 'H' unreal_total_field = processor.OutlierFilter( processor.FilterType.UNREAL_TOTAL_FIELD, total_field_min=40000, total_field_max=43000) filter_list = processor.FilterList(unreal_total_field=unreal_total_field) outliers = processor.get_outliers_multiple_filter(dataFrame, component, filter_list) dataFrame.loc[outliers.index, component] = np.nan plotter.component_compare_daily_all(dataFrame, outliers) if command == 'compare_monthly_components': month = day = '2016-3-9'