def main2(): data = readstkData("BITSTAMP/USD") #data_log = np.log(data.Last.values) # rollmean10 = data_log.rolling(10).mean() #pd.rolling_mean(data['Last'], window=10) # rollmean5 = data_log.rolling(5).mean() #pd.rolling_mean(data['Last'], window=5) Av1 = list(movingaverage(data.Last.values, MA1)) Av2 = list(movingaverage(data.Last.values, MA2)) #Av2 = movingaverage(data.Last.values, MA2) #rollstd = pd.rolling_std(data['Last'], window=12) plt.plot(data['Last'], color='blue', label='Original') plt.plot(data['Last'].index[MA1 - 1:], Av1, color='red', label='Rolling Mean 10d') plt.plot(data['Last'].index[MA2 - 1:], Av2, color='green', label='Rolling Mean 50d') #plt.plot(Av1, color='green', label='Rolling Mean 5d') #plt.plot(rollstd, color='black', label = 'Rolling Std') plt.legend(loc='best') # plt.plot(data['Last']) plt.title('Plot price & mean') plt.show()
def process_data_rhs(data_2D, num_yrs, \ data_type, start_year, end_year): """ Process dat needed for plotting """ import numpy as np from movingaverage import movingaverage, n_take_k ########################################################## # Assemble the data needed on the right-hand-side plot. # # This may differ with data_type, but details will need # # to be defined here. nnnnnn # ########################################################## data_set_rhs = np.empty([1]) averaging_window = 7 window_raw = np.array([]) window_raw = np.append(window_raw,[n_take_k(averaging_window-1,i) for i in range(averaging_window)]) window = window_raw / np.sum(window_raw) # normalized weights if data_type == 'default' or\ data_type == 'discharge': yearly_avg = [np.mean(data_2D[i,:]) for i in range(num_yrs)] yearly_avg = movingaverage( yearly_avg[:averaging_window] + yearly_avg + yearly_avg[-averaging_window:], window)[averaging_window:-averaging_window] data_set_rhs = yearly_avg elif data_type == 'minT': yearly_min = [np.min(data_2D[i,:]) for i in range(num_yrs)] yearly_min = movingaverage( yearly_min[:averaging_window] + yearly_min + yearly_min[-averaging_window:], window)[averaging_window:-averaging_window] data_set_rhs = yearly_min elif data_type == 'maxT': yearly_max = [np.max(data_2D[i,:]) for i in range(num_yrs)] yearly_max = movingaverage( yearly_max[:averaging_window] + yearly_max + yearly_max[-averaging_window:], window)[averaging_window:-averaging_window] data_set_rhs = yearly_max elif data_type == 'precip': precip_sum = list(np.sum(data_2D,axis=1)) precip_sum = movingaverage( precip_sum[:averaging_window] + precip_sum + precip_sum[-averaging_window:], window)[averaging_window:-averaging_window] data_set_rhs = precip_sum else: print data_type, ' data_type not defined in process_data function' raise Exception return data_set_rhs
def get_transition(X, Y, window_size): slope_changes = get_slope_changes(X, Y) window_size = 5 slopes_avg = movingaverage.movingaverage(slope_changes, window_size) # pl.plot(np.absolute(slopes_avg)) # pl.legend() trans = get_index_of_trans(slopes_avg) return trans
def process_data_bottom( data_2D, num_yrs, data_type, start_year, end_year ): """ Returns 3 data sets for plotting on bottom strip """ import numpy as np from movingaverage import movingaverage, n_take_k ########################################################## # Prep the data for the bottom strip # ########################################################## averaging_window = 51 window_raw = np.array([]) window_raw = np.append(window_raw,[n_take_k(averaging_window-1,i) for i in range(averaging_window)]) window = window_raw / np.sum(window_raw) # normalized weights # Calculate moving averages (using binomial filter - in movingaverage). # Prepend and append half of averaging window to data window so that moving average at early # and late time are correct. one_third = (end_year - start_year)/3 two_thirds = 2*(end_year - start_year)/3 data_early = movingaverage([np.mean(data_2D[0:one_third,i]) for i in range(365-averaging_window/2 , 364)] + [np.mean(data_2D[0:one_third,i]) for i in range(365)] + [np.mean(data_2D[0:one_third,i]) for i in range(0 , averaging_window/2)], window)[averaging_window/2 : 365+averaging_window/2] data_mid = movingaverage([np.mean(data_2D[one_third+1:two_thirds,i]) for i in range(365-averaging_window/2 , 364)] + [np.mean(data_2D[one_third+1:two_thirds,i]) for i in range(365)] + [np.mean(data_2D[one_third+1:two_thirds,i]) for i in range(0 , averaging_window/2)], window)[averaging_window/2 : 365+averaging_window/2] data_late = movingaverage([np.mean(data_2D[two_thirds+1:,i]) for i in range(365-averaging_window/2 , 364)] + [np.mean(data_2D[two_thirds+1:,i]) for i in range(365)] + [np.mean(data_2D[two_thirds+1:,i]) for i in range(0 , averaging_window/2)], window)[averaging_window/2 : 365+averaging_window/2] return data_early, data_mid, data_late
def smooth(l): return movingaverage(l, 10, data_is_list=True, avoid_fp_drift=False)
#Eliminate any values in the first 200 bins that are less than 10 edit = [] for i in range(0, 200): if counts[i] < 10: edit.append(i) for i in edit: counts[i] = -1. np.delete(counts, -1.) #Eliminate any extreme values, larger data sets should allow stricter and more complicated #function as filter paramaters, default moving average +- half moving average #create a list of moving averages ma_counts = ma.movingaverage(counts, 10) #make realcounts and counts the same length by adding zeros to the end of the list a = ma_counts.__len__() b = counts.__len__() c = b - a zeros = [0] * c ma_counts.extend(zeros) counts1 = [] for j in range(0, len(counts)): if counts[j] > ma_counts[j] - (ma_counts[j]/2) or counts[j] < ma_counts[j] + (ma_counts[j]/2): counts1.append(counts[j]) #construct channels array that is the correct length y = counts1.__len__()
def matrix_from_xls( file_w_path, column = 0, xcycle = 365, day_of_year_start=1, skip=0, filetype='csv', data_type='annual', leap_yr='none', read_date_column=False, date_column=0, movingaveragevec='none', missing_data_flag='none' ): #Roy Haggerty, 2014 """Reads timeseries sheet (csv, xls/xlsx, google). Returns 2-D numpy array. If using google sheet, use filetype == 'gsheet'. Make sure gspread is within your path. Need to add gspread from github, and import username and password. file_w_path -- (str) filename including path of file. If google sheet, then key. keyword arguments: column -- (int) column number for data (default 0) xcycle -- (int) how many numbers in each row (default 365) day_of_year_start -- (int) for a timeseries, day of year start of 2D array (default 1) skip -- (int) how many numbers to skip before using data (default 0) filetype -- (str) type of file csv, xls, gsheet. (default csv) data_type -- (str) type of data annual, daily (default annual) leap_yr -- (str) how to deal with leap years, none or remove (default none) read_date_column -- (bool) data contain date col True or False (default False) date_column -- (int) column where dates are found (default 0) missing_data_flag -- integer flag that identifies missing or bad data (default none) """ import numpy as np import xlrd import pandas as pd from movingaverage import movingaverage if read_date_column: data_col_num = column - 1 if filetype != 'gsheet': # unless it is a google sheet, get filetype from windows extension filetype = file_w_path.rsplit('.')[-1] if filetype == 'csv': if read_date_column: df = pd.read_csv(file_w_path, index_col=date_column, parse_dates=[date_column]) df = df.convert_objects(convert_numeric=True) df.index = pd.to_datetime(df.index.date) #convert to Timestamp, set time to 00 ts = pd.Series(df.iloc[:,data_col_num],df.index) start_date, start_year, end_year \ = start_end_info(ts, skip=skip, \ day_of_year_start=day_of_year_start, xcycle=xcycle) data_yr_tmp = timeseries(ts,leap_yr=leap_yr,missing_data='bfill', start_date = start_date, missing_data_flag = missing_data_flag) if movingaveragevec != 'none': data_yr_tmp = movingaverage(data_yr_tmp, movingaveragevec) return start_year, end_year, data_2D(data_yr_tmp,skip,xcycle) else: data_tmp = np.array(np.genfromtxt(file_w_path, delimiter=',',skip_header=1)) # Read csv file data_yr_tmp = data_tmp[:,column] return data_2D(data_yr_tmp,skip,xcycle) elif filetype == 'xls': workbook = xlrd.open_workbook(file_w_path) # get 0th sheet, column, starting at 1st row sheetnum = 0 rowstart = 1 if read_date_column: df = pd.read_excel(file_w_path, sheetname=sheetnum, header=rowstart-1, index_col=date_column) df = df.convert_objects(convert_numeric=True) df.index = pd.to_datetime(df.index.date) #convert to Timestamp, set time to 00 ts = pd.Series(df.iloc[:,data_col_num],df.index) start_date, start_year, end_year \ = start_end_info(ts, skip=skip, \ day_of_year_start=day_of_year_start, xcycle=xcycle) data_yr_tmp = timeseries(ts,leap_yr=leap_yr,missing_data='bfill', start_date = start_date, missing_data_flag = missing_data_flag) return start_year, end_year, data_2D(data_yr_tmp,skip,xcycle) else: data_yr_tmp = np.array(workbook.sheet_by_index(sheetnum).col_values(column)[rowstart:]) return data_2D(data_yr_tmp,skip,xcycle) elif filetype == 'gsheet': import imp try: import gspread # gspread is available at https://github.com/burnash/gspread ui = imp.load_source('userinfo', 'C:\\keys\\userinfo.py') gc = gspread.login(ui.userid,ui.pw) sheet = gc.open_by_key(file_w_path).sheet1 if read_date_column: data_str = np.array(sheet.get_all_values()) dates = data_str[1:,date_column].astype(str) df = pd.DataFrame(data_str[1:,date_column+1:],index=pd.to_datetime(dates)) df = df.convert_objects(convert_numeric=True) print df.head() print df.index print data_col_num ts = pd.Series(df.iloc[:,data_col_num],df.index) data_yr_tmp = timeseries(ts,leap_yr=leap_yr,missing_data='bfill', start_date = start_date, missing_data_flag = missing_data_flag) return start_year, end_year, data_2D(data_yr_tmp,skip,xcycle) else: data_str = np.array(sheet.get_all_values()) data_yr_tmp = data_str[1:,column].astype(np.float) return data_2D(data_yr_tmp,skip,xcycle) except ImportError: print '\nGSPREAD library is not available.' print 'make sure gspread is loaded and placed in the path' print 'see gspread docs at https://github.com/burnash/gspread\n' raise ImportError() except gspread.exceptions.SpreadsheetNotFound: print '\nGOOGLE sheet not found' print 'check google key\n' raise gspread.exceptions.SpreadsheetNotFound() except gspread.exceptions.AuthenticationError: print '\nLOGIN to google failed. Make sure your username' print 'and password are correctly provided.\n' raise gspread.exceptions.AuthenticationError() except IndexError: print '\nSOMETHING appears to be wrong with spreadsheet or requested column\n' raise IndexError() except ValueError: print '\nEXPECTING float in spreadsheet but found other variable type\n' raise ValueError(float) except: print 'unknown error importing gspread module or reading data' raise Exception()
def matrix_from_xls(file_w_path, column=0, xcycle=365, day_of_year_start=1, skip=0, filetype='csv', data_type='annual', leap_yr='none', read_date_column=False, date_column=0, movingaveragevec='none', missing_data_flag='none'): #Roy Haggerty, 2014 """Reads timeseries sheet (csv, xls/xlsx, google). Returns 2-D numpy array. If using google sheet, use filetype == 'gsheet'. Make sure gspread is within your path. Need to add gspread from github, and import username and password. file_w_path -- (str) filename including path of file. If google sheet, then key. keyword arguments: column -- (int) column number for data (default 0) xcycle -- (int) how many numbers in each row (default 365) day_of_year_start -- (int) for a timeseries, day of year start of 2D array (default 1) skip -- (int) how many numbers to skip before using data (default 0) filetype -- (str) type of file csv, xls, gsheet. (default csv) data_type -- (str) type of data annual, daily (default annual) leap_yr -- (str) how to deal with leap years, none or remove (default none) read_date_column -- (bool) data contain date col True or False (default False) date_column -- (int) column where dates are found (default 0) missing_data_flag -- integer flag that identifies missing or bad data (default none) """ import numpy as np import xlrd import pandas as pd from movingaverage import movingaverage if read_date_column: data_col_num = column - 1 if filetype != 'gsheet': # unless it is a google sheet, get filetype from windows extension filetype = file_w_path.rsplit('.')[-1] if filetype == 'csv': if read_date_column: df = pd.read_csv(file_w_path, index_col=date_column, parse_dates=[date_column]) df = df.convert_objects(convert_numeric=True) df.index = pd.to_datetime( df.index.date) #convert to Timestamp, set time to 00 ts = pd.Series(df.iloc[:, data_col_num], df.index) start_date, start_year, end_year \ = start_end_info(ts, skip=skip, \ day_of_year_start=day_of_year_start, xcycle=xcycle) data_yr_tmp = timeseries(ts, leap_yr=leap_yr, missing_data='bfill', start_date=start_date, missing_data_flag=missing_data_flag) if movingaveragevec != 'none': data_yr_tmp = movingaverage(data_yr_tmp, movingaveragevec) return start_year, end_year, data_2D(data_yr_tmp, skip, xcycle) else: data_tmp = np.array( np.genfromtxt(file_w_path, delimiter=',', skip_header=1)) # Read csv file data_yr_tmp = data_tmp[:, column] return data_2D(data_yr_tmp, skip, xcycle) elif filetype == 'xls': workbook = xlrd.open_workbook(file_w_path) # get 0th sheet, column, starting at 1st row sheetnum = 0 rowstart = 1 if read_date_column: df = pd.read_excel(file_w_path, sheetname=sheetnum, header=rowstart - 1, index_col=date_column) df = df.convert_objects(convert_numeric=True) df.index = pd.to_datetime( df.index.date) #convert to Timestamp, set time to 00 ts = pd.Series(df.iloc[:, data_col_num], df.index) start_date, start_year, end_year \ = start_end_info(ts, skip=skip, \ day_of_year_start=day_of_year_start, xcycle=xcycle) data_yr_tmp = timeseries(ts, leap_yr=leap_yr, missing_data='bfill', start_date=start_date, missing_data_flag=missing_data_flag) return start_year, end_year, data_2D(data_yr_tmp, skip, xcycle) else: data_yr_tmp = np.array( workbook.sheet_by_index(sheetnum).col_values(column) [rowstart:]) return data_2D(data_yr_tmp, skip, xcycle) elif filetype == 'gsheet': import imp try: import gspread # gspread is available at https://github.com/burnash/gspread ui = imp.load_source('userinfo', 'C:\\keys\\userinfo.py') gc = gspread.login(ui.userid, ui.pw) sheet = gc.open_by_key(file_w_path).sheet1 if read_date_column: data_str = np.array(sheet.get_all_values()) dates = data_str[1:, date_column].astype(str) df = pd.DataFrame(data_str[1:, date_column + 1:], index=pd.to_datetime(dates)) df = df.convert_objects(convert_numeric=True) print df.head() print df.index print data_col_num ts = pd.Series(df.iloc[:, data_col_num], df.index) data_yr_tmp = timeseries(ts, leap_yr=leap_yr, missing_data='bfill', start_date=start_date, missing_data_flag=missing_data_flag) return start_year, end_year, data_2D(data_yr_tmp, skip, xcycle) else: data_str = np.array(sheet.get_all_values()) data_yr_tmp = data_str[1:, column].astype(np.float) return data_2D(data_yr_tmp, skip, xcycle) except ImportError: print '\nGSPREAD library is not available.' print 'make sure gspread is loaded and placed in the path' print 'see gspread docs at https://github.com/burnash/gspread\n' raise ImportError() except gspread.exceptions.SpreadsheetNotFound: print '\nGOOGLE sheet not found' print 'check google key\n' raise gspread.exceptions.SpreadsheetNotFound() except gspread.exceptions.AuthenticationError: print '\nLOGIN to google failed. Make sure your username' print 'and password are correctly provided.\n' raise gspread.exceptions.AuthenticationError() except IndexError: print '\nSOMETHING appears to be wrong with spreadsheet or requested column\n' raise IndexError() except ValueError: print '\nEXPECTING float in spreadsheet but found other variable type\n' raise ValueError(float) except: print 'unknown error importing gspread module or reading data' raise Exception()