Ejemplo n.º 1
0
def main2():
    data = readstkData("BITSTAMP/USD")
    #data_log = np.log(data.Last.values)
    # rollmean10 = data_log.rolling(10).mean() #pd.rolling_mean(data['Last'], window=10)
    # rollmean5 = data_log.rolling(5).mean() #pd.rolling_mean(data['Last'], window=5)
    Av1 = list(movingaverage(data.Last.values, MA1))
    Av2 = list(movingaverage(data.Last.values, MA2))
    #Av2 = movingaverage(data.Last.values, MA2)
    #rollstd = pd.rolling_std(data['Last'], window=12)
    plt.plot(data['Last'], color='blue', label='Original')
    plt.plot(data['Last'].index[MA1 - 1:],
             Av1,
             color='red',
             label='Rolling Mean 10d')
    plt.plot(data['Last'].index[MA2 - 1:],
             Av2,
             color='green',
             label='Rolling Mean 50d')
    #plt.plot(Av1, color='green', label='Rolling Mean 5d')
    #plt.plot(rollstd, color='black', label = 'Rolling Std')
    plt.legend(loc='best')

    # plt.plot(data['Last'])
    plt.title('Plot price & mean')
    plt.show()
Ejemplo n.º 2
0
def process_data_rhs(data_2D, num_yrs,  \
        data_type, start_year, end_year):
    """
    Process dat needed for plotting
    """

    import numpy as np
    from movingaverage import movingaverage, n_take_k

    ##########################################################
    # Assemble the data needed on the right-hand-side plot.  #
    # This may differ with data_type, but details will need  #
    # to be defined here.      nnnnnn                        #
    ##########################################################
    data_set_rhs = np.empty([1])

    averaging_window = 7
    window_raw = np.array([])
    window_raw = np.append(window_raw,[n_take_k(averaging_window-1,i) for i in range(averaging_window)])
    window = window_raw / np.sum(window_raw)  # normalized weights
    if data_type == 'default' or\
       data_type == 'discharge':        
        yearly_avg = [np.mean(data_2D[i,:]) for i in range(num_yrs)]  
        yearly_avg = movingaverage(
            yearly_avg[:averaging_window] + yearly_avg + yearly_avg[-averaging_window:],
            window)[averaging_window:-averaging_window]
        data_set_rhs = yearly_avg
        
    elif data_type == 'minT':
        yearly_min = [np.min(data_2D[i,:]) for i in range(num_yrs)]  
        yearly_min = movingaverage(
            yearly_min[:averaging_window] + yearly_min + yearly_min[-averaging_window:],
            window)[averaging_window:-averaging_window]
        data_set_rhs = yearly_min
    
    elif data_type == 'maxT':      
        yearly_max = [np.max(data_2D[i,:]) for i in range(num_yrs)]  
        yearly_max = movingaverage(
            yearly_max[:averaging_window] + yearly_max + yearly_max[-averaging_window:],
            window)[averaging_window:-averaging_window]
        data_set_rhs = yearly_max
               
    elif data_type == 'precip':
        precip_sum = list(np.sum(data_2D,axis=1)) 
        precip_sum = movingaverage(
            precip_sum[:averaging_window] + precip_sum + precip_sum[-averaging_window:],
            window)[averaging_window:-averaging_window]
        data_set_rhs = precip_sum
        
    else:
        print data_type, ' data_type not defined in process_data function'
        raise Exception
        
    return data_set_rhs
def get_transition(X, Y, window_size):

    slope_changes = get_slope_changes(X, Y)
    window_size = 5
    slopes_avg = movingaverage.movingaverage(slope_changes, window_size)
    
#    pl.plot(np.absolute(slopes_avg))
#    pl.legend()
    trans = get_index_of_trans(slopes_avg)  
    return trans
Ejemplo n.º 4
0
def process_data_bottom(
            data_2D, num_yrs, data_type, start_year, end_year
            ):
    """
    Returns 3 data sets for plotting on bottom strip
    """
    
    import numpy as np
    from movingaverage import movingaverage, n_take_k
    
    ##########################################################
    #   Prep the data for the bottom strip                   #
    ##########################################################
    
    averaging_window = 51
    window_raw = np.array([])
    window_raw = np.append(window_raw,[n_take_k(averaging_window-1,i) for i in range(averaging_window)])
    window = window_raw / np.sum(window_raw)  # normalized weights

    # Calculate moving averages (using binomial filter - in movingaverage).
    # Prepend and append half of averaging window to data window so that moving average at early
    #   and late time are correct.

    one_third = (end_year - start_year)/3
    two_thirds = 2*(end_year - start_year)/3

    data_early = movingaverage([np.mean(data_2D[0:one_third,i]) for i in range(365-averaging_window/2 , 364)] +
                               [np.mean(data_2D[0:one_third,i]) for i in range(365)] +
                               [np.mean(data_2D[0:one_third,i]) for i in range(0 , averaging_window/2)],
                               window)[averaging_window/2 : 365+averaging_window/2]
    data_mid   = movingaverage([np.mean(data_2D[one_third+1:two_thirds,i]) for i in range(365-averaging_window/2 , 364)] +
                               [np.mean(data_2D[one_third+1:two_thirds,i]) for i in range(365)] +
                               [np.mean(data_2D[one_third+1:two_thirds,i]) for i in range(0 , averaging_window/2)],
                               window)[averaging_window/2 : 365+averaging_window/2]
    data_late  = movingaverage([np.mean(data_2D[two_thirds+1:,i]) for i in range(365-averaging_window/2 , 364)] +
                               [np.mean(data_2D[two_thirds+1:,i]) for i in range(365)] +
                               [np.mean(data_2D[two_thirds+1:,i]) for i in range(0 , averaging_window/2)],
                               window)[averaging_window/2 : 365+averaging_window/2]
                               
    return data_early, data_mid, data_late       
Ejemplo n.º 5
0
def smooth(l):
    return movingaverage(l, 10, data_is_list=True, avoid_fp_drift=False)
#Eliminate any values in the first 200 bins that are less than 10
edit = []
for i in range(0, 200):
	if counts[i] < 10:
		edit.append(i)
for i in edit:
	counts[i] = -1.

np.delete(counts, -1.)

#Eliminate any extreme values, larger data sets should allow stricter and more complicated
#function as filter paramaters, default moving average +- half moving average

#create a list of moving averages
ma_counts = ma.movingaverage(counts, 10)

#make realcounts and counts the same length by adding zeros to the end of the list
a = ma_counts.__len__()
b = counts.__len__()
c = b - a
zeros = [0] * c
ma_counts.extend(zeros) 

counts1 = []
for j in range(0, len(counts)):
	if counts[j] > ma_counts[j] - (ma_counts[j]/2)  or counts[j] < ma_counts[j] + (ma_counts[j]/2):
		counts1.append(counts[j])
		
#construct channels array that is the correct length
y = counts1.__len__()
def matrix_from_xls(
    file_w_path,
    column = 0,
    xcycle = 365,
    day_of_year_start=1, 
    skip=0,
    filetype='csv',
    data_type='annual', 
    leap_yr='none', 
    read_date_column=False, 
    date_column=0,
    movingaveragevec='none',
    missing_data_flag='none'
    ):
    #Roy Haggerty, 2014
    """Reads timeseries sheet (csv, xls/xlsx, google). Returns 2-D numpy array.
    
    If using google sheet, use filetype == 'gsheet'. Make sure gspread is
    within your path.
    Need to add gspread from github, and import username and password.
    
    file_w_path -- (str) filename including path of file. If google sheet, then key.
    
    keyword arguments:
    column -- (int) column number for data (default 0)
    xcycle -- (int) how many numbers in each row (default 365)
    day_of_year_start -- (int) for a timeseries, day of year start of 2D array (default 1)
    skip -- (int) how many numbers to skip before using data (default 0)
    filetype -- (str) type of file csv, xls, gsheet. (default csv)
    data_type -- (str) type of data annual, daily (default annual)
    leap_yr -- (str) how to deal with leap years, none or remove (default none)
    read_date_column -- (bool) data contain date col True or False (default False)
    date_column -- (int) column where dates are found (default 0)
    missing_data_flag -- integer flag that identifies missing or bad data (default none)
    """
    
    import numpy as np
    import xlrd
    import pandas as pd
    from movingaverage import movingaverage
    
    if read_date_column:
        data_col_num = column - 1
    
    if filetype != 'gsheet':  # unless it is a google sheet, get filetype from windows extension
        filetype = file_w_path.rsplit('.')[-1]

    if filetype == 'csv':
        if read_date_column:
            df = pd.read_csv(file_w_path, index_col=date_column, parse_dates=[date_column])
            df = df.convert_objects(convert_numeric=True)
            df.index  = pd.to_datetime(df.index.date)  #convert to Timestamp, set time to 00
            ts = pd.Series(df.iloc[:,data_col_num],df.index)
            start_date, start_year, end_year \
                        = start_end_info(ts, skip=skip, \
                          day_of_year_start=day_of_year_start, xcycle=xcycle)
            data_yr_tmp = timeseries(ts,leap_yr=leap_yr,missing_data='bfill',
                        start_date = start_date, missing_data_flag = missing_data_flag)
            if movingaveragevec != 'none':
                data_yr_tmp = movingaverage(data_yr_tmp, movingaveragevec) 
            return start_year, end_year, data_2D(data_yr_tmp,skip,xcycle)
        else:
            data_tmp = np.array(np.genfromtxt(file_w_path, delimiter=',',skip_header=1)) # Read csv file
            data_yr_tmp = data_tmp[:,column]
            return data_2D(data_yr_tmp,skip,xcycle)
    elif filetype == 'xls':
        workbook = xlrd.open_workbook(file_w_path)
        # get 0th sheet, column, starting at 1st row
        sheetnum = 0
        rowstart = 1
        if read_date_column:
            df = pd.read_excel(file_w_path, sheetname=sheetnum, header=rowstart-1, index_col=date_column)
            df = df.convert_objects(convert_numeric=True)
            df.index  = pd.to_datetime(df.index.date) #convert to Timestamp, set time to 00
            ts = pd.Series(df.iloc[:,data_col_num],df.index)
            start_date, start_year, end_year \
                        = start_end_info(ts, skip=skip, \
                          day_of_year_start=day_of_year_start, xcycle=xcycle)
            data_yr_tmp = timeseries(ts,leap_yr=leap_yr,missing_data='bfill',
                        start_date = start_date, missing_data_flag = missing_data_flag)
            return start_year, end_year, data_2D(data_yr_tmp,skip,xcycle)
        else:
            data_yr_tmp = np.array(workbook.sheet_by_index(sheetnum).col_values(column)[rowstart:])
            return data_2D(data_yr_tmp,skip,xcycle)
    elif filetype == 'gsheet':
        import imp
        try:
            import gspread  # gspread is available at https://github.com/burnash/gspread
            ui = imp.load_source('userinfo', 'C:\\keys\\userinfo.py')
            gc = gspread.login(ui.userid,ui.pw)
            sheet = gc.open_by_key(file_w_path).sheet1
            if read_date_column:
                data_str = np.array(sheet.get_all_values())
                dates = data_str[1:,date_column].astype(str)
                df = pd.DataFrame(data_str[1:,date_column+1:],index=pd.to_datetime(dates))
                df = df.convert_objects(convert_numeric=True)
                print df.head()
                print df.index
                print data_col_num
                ts = pd.Series(df.iloc[:,data_col_num],df.index)
                data_yr_tmp = timeseries(ts,leap_yr=leap_yr,missing_data='bfill',
                            start_date = start_date, missing_data_flag = missing_data_flag)
                return start_year, end_year, data_2D(data_yr_tmp,skip,xcycle)
            else:
                data_str = np.array(sheet.get_all_values())
                data_yr_tmp = data_str[1:,column].astype(np.float)
                return data_2D(data_yr_tmp,skip,xcycle)
        except ImportError:
            print '\nGSPREAD library is not available.'
            print 'make sure gspread is loaded and placed in the path'
            print 'see gspread docs at https://github.com/burnash/gspread\n'
            raise ImportError()
        except gspread.exceptions.SpreadsheetNotFound:
            print '\nGOOGLE sheet not found'
            print 'check google key\n'
            raise gspread.exceptions.SpreadsheetNotFound()
        except gspread.exceptions.AuthenticationError:
            print '\nLOGIN to google failed. Make sure your username'
            print 'and password are correctly provided.\n'
            raise gspread.exceptions.AuthenticationError()
        except IndexError:
            print '\nSOMETHING appears to be wrong with spreadsheet or requested column\n'
            raise IndexError()
        except ValueError:
            print '\nEXPECTING float in spreadsheet but found other variable type\n'
            raise ValueError(float)
        except:
            print 'unknown error importing gspread module or reading data'
            raise Exception()
def matrix_from_xls(file_w_path,
                    column=0,
                    xcycle=365,
                    day_of_year_start=1,
                    skip=0,
                    filetype='csv',
                    data_type='annual',
                    leap_yr='none',
                    read_date_column=False,
                    date_column=0,
                    movingaveragevec='none',
                    missing_data_flag='none'):
    #Roy Haggerty, 2014
    """Reads timeseries sheet (csv, xls/xlsx, google). Returns 2-D numpy array.
    
    If using google sheet, use filetype == 'gsheet'. Make sure gspread is
    within your path.
    Need to add gspread from github, and import username and password.
    
    file_w_path -- (str) filename including path of file. If google sheet, then key.
    
    keyword arguments:
    column -- (int) column number for data (default 0)
    xcycle -- (int) how many numbers in each row (default 365)
    day_of_year_start -- (int) for a timeseries, day of year start of 2D array (default 1)
    skip -- (int) how many numbers to skip before using data (default 0)
    filetype -- (str) type of file csv, xls, gsheet. (default csv)
    data_type -- (str) type of data annual, daily (default annual)
    leap_yr -- (str) how to deal with leap years, none or remove (default none)
    read_date_column -- (bool) data contain date col True or False (default False)
    date_column -- (int) column where dates are found (default 0)
    missing_data_flag -- integer flag that identifies missing or bad data (default none)
    """

    import numpy as np
    import xlrd
    import pandas as pd
    from movingaverage import movingaverage

    if read_date_column:
        data_col_num = column - 1

    if filetype != 'gsheet':  # unless it is a google sheet, get filetype from windows extension
        filetype = file_w_path.rsplit('.')[-1]

    if filetype == 'csv':
        if read_date_column:
            df = pd.read_csv(file_w_path,
                             index_col=date_column,
                             parse_dates=[date_column])
            df = df.convert_objects(convert_numeric=True)
            df.index = pd.to_datetime(
                df.index.date)  #convert to Timestamp, set time to 00
            ts = pd.Series(df.iloc[:, data_col_num], df.index)
            start_date, start_year, end_year \
                        = start_end_info(ts, skip=skip, \
                          day_of_year_start=day_of_year_start, xcycle=xcycle)
            data_yr_tmp = timeseries(ts,
                                     leap_yr=leap_yr,
                                     missing_data='bfill',
                                     start_date=start_date,
                                     missing_data_flag=missing_data_flag)
            if movingaveragevec != 'none':
                data_yr_tmp = movingaverage(data_yr_tmp, movingaveragevec)
            return start_year, end_year, data_2D(data_yr_tmp, skip, xcycle)
        else:
            data_tmp = np.array(
                np.genfromtxt(file_w_path, delimiter=',',
                              skip_header=1))  # Read csv file
            data_yr_tmp = data_tmp[:, column]
            return data_2D(data_yr_tmp, skip, xcycle)
    elif filetype == 'xls':
        workbook = xlrd.open_workbook(file_w_path)
        # get 0th sheet, column, starting at 1st row
        sheetnum = 0
        rowstart = 1
        if read_date_column:
            df = pd.read_excel(file_w_path,
                               sheetname=sheetnum,
                               header=rowstart - 1,
                               index_col=date_column)
            df = df.convert_objects(convert_numeric=True)
            df.index = pd.to_datetime(
                df.index.date)  #convert to Timestamp, set time to 00
            ts = pd.Series(df.iloc[:, data_col_num], df.index)
            start_date, start_year, end_year \
                        = start_end_info(ts, skip=skip, \
                          day_of_year_start=day_of_year_start, xcycle=xcycle)
            data_yr_tmp = timeseries(ts,
                                     leap_yr=leap_yr,
                                     missing_data='bfill',
                                     start_date=start_date,
                                     missing_data_flag=missing_data_flag)
            return start_year, end_year, data_2D(data_yr_tmp, skip, xcycle)
        else:
            data_yr_tmp = np.array(
                workbook.sheet_by_index(sheetnum).col_values(column)
                [rowstart:])
            return data_2D(data_yr_tmp, skip, xcycle)
    elif filetype == 'gsheet':
        import imp
        try:
            import gspread  # gspread is available at https://github.com/burnash/gspread
            ui = imp.load_source('userinfo', 'C:\\keys\\userinfo.py')
            gc = gspread.login(ui.userid, ui.pw)
            sheet = gc.open_by_key(file_w_path).sheet1
            if read_date_column:
                data_str = np.array(sheet.get_all_values())
                dates = data_str[1:, date_column].astype(str)
                df = pd.DataFrame(data_str[1:, date_column + 1:],
                                  index=pd.to_datetime(dates))
                df = df.convert_objects(convert_numeric=True)
                print df.head()
                print df.index
                print data_col_num
                ts = pd.Series(df.iloc[:, data_col_num], df.index)
                data_yr_tmp = timeseries(ts,
                                         leap_yr=leap_yr,
                                         missing_data='bfill',
                                         start_date=start_date,
                                         missing_data_flag=missing_data_flag)
                return start_year, end_year, data_2D(data_yr_tmp, skip, xcycle)
            else:
                data_str = np.array(sheet.get_all_values())
                data_yr_tmp = data_str[1:, column].astype(np.float)
                return data_2D(data_yr_tmp, skip, xcycle)
        except ImportError:
            print '\nGSPREAD library is not available.'
            print 'make sure gspread is loaded and placed in the path'
            print 'see gspread docs at https://github.com/burnash/gspread\n'
            raise ImportError()
        except gspread.exceptions.SpreadsheetNotFound:
            print '\nGOOGLE sheet not found'
            print 'check google key\n'
            raise gspread.exceptions.SpreadsheetNotFound()
        except gspread.exceptions.AuthenticationError:
            print '\nLOGIN to google failed. Make sure your username'
            print 'and password are correctly provided.\n'
            raise gspread.exceptions.AuthenticationError()
        except IndexError:
            print '\nSOMETHING appears to be wrong with spreadsheet or requested column\n'
            raise IndexError()
        except ValueError:
            print '\nEXPECTING float in spreadsheet but found other variable type\n'
            raise ValueError(float)
        except:
            print 'unknown error importing gspread module or reading data'
            raise Exception()