Python expanding_sumの例、pandas.expanding_sum Pythonの例

コード例 #1

0

ファイルを表示

ファイル: transform_epl_features.py プロジェクト: amoderate/epl_predict

def iter_over_groups(data, group,features, key):
	row_num = 0	
	for x in group:

		if row_num == 0:
			data_1 = data[data[key]==x]
			for i in features:

				data_1['avg_10' + i.lower() + '_'+ key[0].lower()] = pd.rolling_mean(data_1[i], 7).shift(+1)
				data_1['sum_15' + i.lower() + '_'+ key[0].lower()] = pd.rolling_mean(data_1[i], 10).shift(+1)
				data_1['avg_3' + i.lower() + '_'+ key[0].lower()] = pd.rolling_mean(data_1[i], 3).shift(+1)
				data_1['expand' + i.lower() + '_'+ key[0].lower()] = pd.expanding_mean(data_1[i]).shift(+1)	
				data_1['expand_sum' + i.lower() + '_'+ key[0].lower()] = pd.expanding_sum(data_1[i]).shift(+1)		

		else:
			data_2 = data[data[key]==x]
			for i in features:
				data_2['avg_10' + i.lower() + '_' + key[0].lower()] = pd.rolling_mean(data_2[i], 7).shift(+1)
				data_2['sum_15' + i.lower() + '_' + key[0].lower()] = pd.rolling_mean(data_2[i], 10).shift(+1)
				data_2['avg_3' + i.lower() + '_' + key[0].lower()] = pd.rolling_mean(data_2[i],3).shift(+1)
				data_2['expand' + i.lower() + '_'+ key[0].lower()] = pd.expanding_mean(data_1[i]).shift(+1)
				data_2['expand_sum' + i.lower() + '_'+ key[0].lower()] = pd.expanding_sum(data_1[i]).shift(+1)
				


			data_1 = data_1.append(data_2, ignore_index = True)

		row_num += 1
	return data_1

コード例 #2

0

ファイルを表示

ファイル: strategy.py プロジェクト: garyjoy/pyswing

    def analyse(self):

        # Logger.log(logging.INFO, "Analyse Strategy", {"scope":__name__, "Rule 1":self._rule1, "Rule 2":self._rule2, "Rule 3":self._rule3, "Type":self._type})

        connection = sqlite3.connect(pyswing.database.pySwingDatabase)
        query = self.analyseStrategySql % (self._rule1, self._rule2, self._rule3, self._exit, self._type)
        self._strategyData = read_sql_query(query, connection, 'Date')
        self._strategyData['ExitValueAfterCosts'] = self._strategyData['ExitValue'] - 0.2
        connection.close()

        exitValueDataFrame = self._strategyData.ix[:,'ExitValueAfterCosts']

        mean = exitValueDataFrame.mean()
        median = exitValueDataFrame.median()
        sum = exitValueDataFrame.sum()
        count = exitValueDataFrame.count()

        tradesPerYear = count / 10
        sharpeRatio = sqrt(tradesPerYear) * exitValueDataFrame.mean() / exitValueDataFrame.std()

        self._strategyData["Sum"] = expanding_sum(exitValueDataFrame)
        self._strategyData["Max"] = expanding_max(self._strategyData["Sum"])
        self._strategyData["Min"] = expanding_min(self._strategyData["Sum"])
        self._strategyData["DD"] = self._strategyData["Max"] - self._strategyData["Min"]

        runningSum = expanding_sum(exitValueDataFrame)
        max2here = expanding_max(runningSum)
        dd2here = runningSum - max2here
        drawDown = dd2here.min()

        Logger.log(logging.INFO, "Analysing Strategy", {"scope":__name__, "Rule 1":self._rule1, "Rule 2":self._rule2, "Rule 3":self._rule3, "Exit":self._exit, "Type":self._type, "Mean":str(mean), "Median":str(median), "Sum":str(sum), "Count":str(count), "SharpeRatio":str(sharpeRatio), "DrawDown":str(drawDown)})

        connection = sqlite3.connect(pyswing.database.pySwingDatabase)
        c = connection.cursor()

        deleteSql = self.deleteStrategySql % (pyswing.globals.pySwingStrategy, self._rule1, self._rule2, self._rule3, self._exit, self._type)
        c.executescript(deleteSql)
        connection.commit()

        insertSql = self.insertStrategySql % (pyswing.globals.pySwingStrategy, self._rule1, self._rule2, self._rule3, self._exit, self._type, str(mean), str(median), str(sum), str(count), str(sharpeRatio), str(drawDown))
        c.executescript(insertSql)
        connection.commit()

        c.close()
        connection.close()

コード例 #3

0

ファイルを表示

ファイル: drawdown.py プロジェクト: ctmg/research_templates

def all_dd(df):
    
    df.reset_index(level=0, inplace=True)
    df['start'] = pd.NaT; df['end'] = pd.NaT;  df['valley'] = pd.NaT; df['length_of_dd'] = np.nan; df['dd'] = np.nan; df['max_dd'] = np.nan

    for x in df.index[2:]:
        test = df.ix[x-1:x, 'indd']
        
        if (test.notnull()[x] and test.isnull()[x-1]):
            df.ix[x,'start'] = df.ix[x, 'Date']
            
        if (test.notnull()[x-1] and test.isnull()[x]): 
            s = df.ix[:x,'start'].last_valid_index()  
            df.ix[s:x-1,'end'] = df.ix[x-1, 'Date']
            #find valley 
            v = df.ix[s:x-1,'vami'].idxmin()
            df.ix[s:x-1, 'valley'] = df.ix[v,'Date']            
            df.ix[x-1,'length_of_dd'] = len(df[s:x-1])+1
            #need to sum ror's here
            df.ix[s:x-1, 'dd'] = pd.expanding_sum(df.ix[s:x-1,'ror'])
            df.ix[x-1, 'max_dd'] = np.exp(df.ix[s:v,'ror'].sum())
        
        if (x == len(df)-1 and test.notnull()[x]):
            s = df.ix[:x,'start'].last_valid_index()
            df.ix[s:x,'end'] = df.ix[x, 'Date']
            #find valley 
            v = df.ix[s:x,'vami'].idxmin()
            df.ix[s:x, 'valley'] = df.ix[v,'Date']  
            df.ix[x,'length_of_dd'] = len(df[s:x])+1
            #need to sum ror's here
            df.ix[s:x, 'dd'] = pd.expanding_sum(df.ix[s:x,'ror'])
            df.ix[x, 'max_dd'] = np.exp(df.ix[s:v,'ror'].sum())
             

    #forward fill the start 
    df['start'] = df['start'][df['indd'].notnull()].fillna(method='ffill')
    return df

コード例 #4

0

ファイルを表示

ファイル: cf.py プロジェクト: epifanio/ecoop-binder

 def expanding_smoother(self, data, stype='rolling_mean', min_periods=None, freq=None):
     """
     
     Perform a expanding smooting on the data for a complete help refer to http://pandas.pydata.org/pandas-docs/dev/computation.html
     
     :param data: pandas dataframe input data
     :param stype: soothing type
     :param min_periods: periods
     :param freq: frequence
     smoothing types:
     expanding_count	Number of non-null observations
     expanding_sum	Sum of values
     expanding_mean	Mean of values
     expanding_median	Arithmetic median of values
     expanding_min	Minimum
     expanding_max	Maximum
     expandingg_std	Unbiased standard deviation
     expanding_var	Unbiased variance
     expanding_skew	Unbiased skewness (3rd moment)
     expanding_kurt	Unbiased kurtosis (4th moment)
     
     """
     if stype == 'count':
         newy = pd.expanding_count(data, min_periods=min_periods, freq=freq)
     if stype == 'sum':
         newy = pd.expanding_sum(data, min_periods=min_periods, freq=freq)
     if stype == 'mean':
         newy = pd.expanding_mean(data, min_periods=min_periods, freq=freq)
     if stype == 'median':
         newy = pd.expanding_median(data, min_periods=min_periods, freq=freq)
     if stype == 'min':
         newy = pd.expanding_min(data, min_periods=min_periods, freq=freq)
     if stype == 'max':
         newy = pd.expanding_max(data, min_periods=min_periods, freq=freq)
     if stype == 'std':
         newy = pd.expanding_std(data, min_periods=min_periods, freq=freq)
     if stype == 'var':
         newy = pd.expanding_var(data, min_periods=min_periods, freq=freq)
     if stype == 'skew':
         newy = pd.expanding_skew(data, min_periods=min_periods, freq=freq)
     if stype == 'kurt':
         newy = pd.expanding_kurt(data, min_periods=min_periods, freq=freq)
     return newy

コード例 #5

0

ファイルを表示

def load_volume_profile(uni_df, start, end, freq='30Min'):
    date = start
    result_dfs = list()
    while (date < end):
        dateStr = date.strftime('%Y%m%d')
        year = dateStr[0:4]
        price_dir = PRICE_BASE_DIR + year
        volume_file = price_dir + "/" + dateStr + ".equ_volume_profiles_20d.rev.csv"
        print "Loading {}".format(volume_file)
        try:
            volume_df = pd.read_csv(volume_file, header=0, index_col=['sid'])
        except IOError:
            print "File not found: {}".format(volume_file)
            date += timedelta(days=1)
            continue

        print "stacking..."
        volume_df = volume_df.stack()
        volume_df = volume_df.reset_index()

        volume_df = volume_df[
            (volume_df['level_1'] != 'med_open_volume')
            & (volume_df['level_1'] != 'med_close_volume') &
            (volume_df['level_1'] != 'med_cum_pre_mkt_volume') &
            (volume_df['level_1'] != 'med_cum_post_mkt_volume')]
        timemap = dict()
        print "parsing dates..."
        for rawtime in volume_df['level_1'].unique():
            val = None
            try:
                val = dateparser.parse(dateStr + " " + rawtime[:-2] + ":" +
                                       rawtime[-2:])
            except:
                pass
            timemap[rawtime] = val

        print "mapping dates..."
        volume_df['iclose_ts'] = volume_df['level_1'].apply(
            lambda x: timemap[x])
        volume_df['date'] = date
        volume_df.set_index(keys=['date', 'sid'], inplace=True)
        print "merging..."
        volume_df = pd.merge(uni_df,
                             volume_df,
                             how='inner',
                             left_index=True,
                             right_index=True,
                             sort=True,
                             suffixes=['', '_dead'])
        volume_df.reset_index(inplace=True)
        grouped = volume_df.groupby('sid')
        print "accumulating volumes..."
        for name, group in grouped:
            group['med_cum_volume'] = pd.expanding_sum(group[0])
            del group[0]
            group['sid'] = name
            group = group.reset_index()
            #            print group.head()
            group.set_index('iclose_ts', inplace=True)
            group_df = group.resample(freq,
                                      how='last',
                                      closed='right',
                                      label='right')
            #            print group_df.head()
            result_dfs.append(group_df)

        date += timedelta(days=1)

    result_df = pd.concat(result_dfs)
    result_df = result_df.reset_index()
    print result_df.head()
    result_df['iclose_ts'] = result_df['level_0']
    del result_df['level_0']
    result_df.set_index(keys=['iclose_ts', 'sid'], inplace=True)
    result_df = remove_dup_cols(result_df)
    return result_df

コード例 #6

0

ファイルを表示

ファイル: ContinualV3Figures.py プロジェクト: JudoWill/ResearchNotebooks

df

# <headingcell level=3>

# Generate a Linear Regression of the data

# <codecell>

from datetime import timedelta
tdf = df.groupby(['Patient', 'Visit'], as_index=False).first()

date_based = pd.pivot_table(tdf[tdf['Date'].notnull()], rows = 'Date', 
                            cols = 'Prediction',
                            values = 'Patient',
                            aggfunc = 'count')
date_cum = pd.expanding_sum(date_based)['2013-10':]

date_cum['Total'] = date_cum.sum(axis=1)
td = date_cum[['Total']].reset_index()
td['dDate'] = (td['Date'] - pd.to_datetime('2013-7-1')).apply(lambda x: x / timedelta64(1, 'D'))

m, b, r, p, e = linregress(td['dDate'], td['Total'])
num_days = (len(tdf)-b)/m
nd = pd.DataFrame({
                       'Date':pd.date_range(start = '2013-7-1', 
                                            freq = 'M',
                                            periods = np.ceil(num_days/30))
                       })
nd['dDate'] = (nd['Date'] - pd.to_datetime('2013-7-1')).apply(lambda x: x / timedelta64(1, 'D'))
nd['GuessNum'] = m*nd['dDate'] + b
nd = nd.set_index('Date')

コード例 #7

0

ファイルを表示

# Difference functions allow us to identify seasonal changes when we see repeated up or downswings.
# An example from FiveThirtyEight:
# http://i2.wp.com/espnfivethirtyeight.files.wordpress.com/2015/03/casselman-datalab-wsj2.png?quality=90&strip=all&w=575&ssl=1

'''
Pandas Expanding Functions

In addition to the set of rolling_* functions, Pandas also 
provides a similar collection of expanding_* functions, which, 
instead of using a window of N values, uses all values up until 
that time.
'''


pd.expanding_mean(daily_store_sales) # average date from first till last date specified
pd.expanding_sum(daily_store_sales) # sum of average sales per store until that date

'''
EXERCISES

1. Plot the distribution of sales by month and compare the effect of promotions.
hint: try using hue in sns
2. Are sales more correlated with the prior date, a similar date last year, or a similar date last month?
3. Plot the 15 day rolling mean of customers in the stores.
4. Identify the date with largest drop in sales from the same date in the previous week.
5. Compute the total sales up until Dec. 2014.
6. When were the largest differences between 15-day moving/rolling averages? HINT: Using rolling_mean and diff
'''

# Plot the distribution of sales by month and compare the effect of promotions
sns.factorplot(

コード例 #8

0

ファイルを表示

ファイル: useful.py プロジェクト: garyjoy/pyswing


from pyswing.AskHorse import askHorse
args = "-n asx".split()
askHorse(args)


from pyswing.AnalyseStrategies import analyseStrategies
args = "-n ftse -s v1.2 -r 0.4 -t 500".split
analyseStrategies(args)


# Run me to populate (emptying to begin with) the historic trades table using the strategies in active strategies (which must be put in there manually)...
from pyswing.GenerateHistoricTradesForActiveStrategies import generateHistoricTradesForActiveStrategies
args = "-n ftse".split()
generateHistoricTradesForActiveStrategies(args)


# Run me to chart the (distinct) results in active strategy
import pyswing.database
import sqlite3
from pandas.io.sql import read_sql_query
from pandas import expanding_sum
connection = sqlite3.connect(pyswing.database.pySwingDatabase)
query = ("select t.matchDate as Date, t.code as Code, t.type as Type, t.ExitValue as ExitValue from ( select distinct matchDate, Code, type, exitValue from historicTrades order by matchDate asc) t")
cbaEquityData = read_sql_query(query, connection, 'Date')
connection.close()
cbaEquityData['ExitValueAfterCosts'] = cbaEquityData['ExitValue'] - 0.2
exitValueDataFrame = cbaEquityData.ix[:,'ExitValueAfterCosts']
cbaEquityData["Sum"] = expanding_sum(exitValueDataFrame)
cbaEquityData.query("Date > '2005-01-01 00:00:00'").plot(y=['Sum'], title='v1.4')

コード例 #9

0

ファイルを表示

ファイル: agg_to_month.py プロジェクト: jujuaria/DataCleaning_Dissertation

df=pd.concat(l,axis=0)

df.to_csv("/Users/juju/Dropbox/Sent_to_STR_ZHU/miami_aggby_post_month.csv")

#### In excel, arrange the column names 

df = pd.read_csv("/Users/juju/Dropbox/Sent_to_STR_ZHU/chicago_aggby_post_month.csv",sep=',',header=0,infer_datetime_format=True,parse_dates=['review_month'])

df["review_month"] = df["review_month"].dt.to_period("M")

l=[]
g = df.groupby("shareid")

for name,group in g:
    group["accum_rating"] = pd.expanding_mean(group["month_rating_mean"])
    group["num_of_reviews"] = pd.expanding_sum(group["monthly_reviews"])
    group["num_of_responses"] = pd.expanding_sum(group["monthly_hotel_response"])
    group["num_of_partnerships"] = pd.expanding_sum(group["partnerships"])
    group["num_of_solo"] = pd.expanding_sum(group["solo"])
    group["num_of_couple"] = pd.expanding_sum(group["couple"])
    group["num_of_family"] = pd.expanding_sum(group["family"])
    group["num_of_business"] = pd.expanding_sum(group["business"])
    group["shareid"]=name
    l.append(group)
    
df_2= pd.concat(l,axis=0)
df_2.to_csv("/Users/juju/Dropbox/Sent_to_STR_ZHU/chicago_aggby_post_month_2.csv", index = False)


# Aggregate by stayed month

コード例 #10

0

ファイルを表示

ファイル: useful.py プロジェクト: garyjoy/pyswing

    y=['Close', 'SMA_200'], title='Testing')

from pyswing.AskHorse import askHorse
args = "-n asx".split()
askHorse(args)

from pyswing.AnalyseStrategies import analyseStrategies
args = "-n ftse -s v1.2 -r 0.4 -t 500".split
analyseStrategies(args)

# Run me to populate (emptying to begin with) the historic trades table using the strategies in active strategies (which must be put in there manually)...
from pyswing.GenerateHistoricTradesForActiveStrategies import generateHistoricTradesForActiveStrategies
args = "-n ftse".split()
generateHistoricTradesForActiveStrategies(args)

# Run me to chart the (distinct) results in active strategy
import pyswing.database
import sqlite3
from pandas.io.sql import read_sql_query
from pandas import expanding_sum
connection = sqlite3.connect(pyswing.database.pySwingDatabase)
query = (
    "select t.matchDate as Date, t.code as Code, t.type as Type, t.ExitValue as ExitValue from ( select distinct matchDate, Code, type, exitValue from historicTrades order by matchDate asc) t"
)
cbaEquityData = read_sql_query(query, connection, 'Date')
connection.close()
cbaEquityData['ExitValueAfterCosts'] = cbaEquityData['ExitValue'] - 0.2
exitValueDataFrame = cbaEquityData.ix[:, 'ExitValueAfterCosts']
cbaEquityData["Sum"] = expanding_sum(exitValueDataFrame)
cbaEquityData.query("Date > '2005-01-01 00:00:00'").plot(y=['Sum'],
                                                         title='v1.4')

コード例 #11

0

ファイルを表示

    def analyse(self):

        # Logger.log(logging.INFO, "Analyse Strategy", {"scope":__name__, "Rule 1":self._rule1, "Rule 2":self._rule2, "Rule 3":self._rule3, "Type":self._type})

        connection = sqlite3.connect(pyswing.database.pySwingDatabase)
        query = self.analyseStrategySql % (self._rule1, self._rule2,
                                           self._rule3, self._exit, self._type)
        self._strategyData = read_sql_query(query, connection, 'Date')
        self._strategyData[
            'ExitValueAfterCosts'] = self._strategyData['ExitValue'] - 0.2
        connection.close()

        exitValueDataFrame = self._strategyData.ix[:, 'ExitValueAfterCosts']

        mean = exitValueDataFrame.mean()
        median = exitValueDataFrame.median()
        sum = exitValueDataFrame.sum()
        count = exitValueDataFrame.count()

        tradesPerYear = count / 10
        sharpeRatio = sqrt(tradesPerYear) * exitValueDataFrame.mean(
        ) / exitValueDataFrame.std()

        self._strategyData["Sum"] = expanding_sum(exitValueDataFrame)
        self._strategyData["Max"] = expanding_max(self._strategyData["Sum"])
        self._strategyData["Min"] = expanding_min(self._strategyData["Sum"])
        self._strategyData[
            "DD"] = self._strategyData["Max"] - self._strategyData["Min"]

        runningSum = expanding_sum(exitValueDataFrame)
        max2here = expanding_max(runningSum)
        dd2here = runningSum - max2here
        drawDown = dd2here.min()

        Logger.log(
            logging.INFO, "Analysing Strategy", {
                "scope": __name__,
                "Rule 1": self._rule1,
                "Rule 2": self._rule2,
                "Rule 3": self._rule3,
                "Exit": self._exit,
                "Type": self._type,
                "Mean": str(mean),
                "Median": str(median),
                "Sum": str(sum),
                "Count": str(count),
                "SharpeRatio": str(sharpeRatio),
                "DrawDown": str(drawDown)
            })

        connection = sqlite3.connect(pyswing.database.pySwingDatabase)
        c = connection.cursor()

        deleteSql = self.deleteStrategySql % (
            pyswing.globals.pySwingStrategy, self._rule1, self._rule2,
            self._rule3, self._exit, self._type)
        c.executescript(deleteSql)
        connection.commit()

        insertSql = self.insertStrategySql % (
            pyswing.globals.pySwingStrategy, self._rule1, self._rule2,
            self._rule3, self._exit, self._type, str(mean), str(median),
            str(sum), str(count), str(sharpeRatio), str(drawDown))
        c.executescript(insertSql)
        connection.commit()

        c.close()
        connection.close()

コード例 #12

0

ファイルを表示

def prepare_data(kind):
    if kind == 'f':  # If fall
        putList, takeList = fall_df_list, fall_list
    else:  # If ADL
        putList, takeList = adl_df_list, adl_list

    start = time.time()  # Timer for testing

    for i in range(0, len(
            takeList)):  # Iterate through the takeList (fall_list or adl_list)

        my_df = takeList[i].copy()  # copy dataframe from list
        new_df = pd.DataFrame()  # placeholder

        for trial in trials:  # Iterate through trials (1-5)

            # Get relevant trial data
            trial_df = my_df[my_df['trial'] == trial]

            tempdf = pd.DataFrame()  # dataframe for putting into filter
            # Low Pass Buttersworth Filter and remove bias
            tempdf['ax'], tempdf['ay'], tempdf['az'] = trial_df[
                'x1'], trial_df['y1'], trial_df['z1']
            tempdf = tempdf.reset_index(drop=True)
            tempdf['fx'] = pd.Series(
                butter_lowpass_filter(trial_df['x1'], cutoff, fs, order))
            tempdf['fy'] = pd.Series(
                butter_lowpass_filter(trial_df['y1'], cutoff, fs, order))
            tempdf['fz'] = pd.Series(
                butter_lowpass_filter(trial_df['z1'], cutoff, fs, order))
            tempdf['bx'] = tempdf['fx'].diff()
            tempdf['by'] = tempdf['fy'].diff()
            tempdf['bz'] = tempdf['fz'].diff()

            tempdf = tempdf.reset_index(drop=True)
            trial_df = trial_df.reset_index(drop=True)
            tempdf['gx'], tempdf['gy'], tempdf['gz'] = trial_df[
                'x2'], trial_df['y2'], trial_df['z2']

            # Rolling averages
            tempdf['y_roll'] = pd.Series(tempdf['by'].rolling(200).mean())
            tempdf['fy_roll'] = pd.Series(tempdf['fy'].rolling(200).mean())
            tempdf['gy_roll'] = pd.Series(tempdf['by'].rolling(200).mean())

            # Rolling standard deviations
            tempdf['bx_std'] = tempdf['bx'].rolling(200).std()
            tempdf['by_std'] = tempdf['by'].rolling(200).std()
            tempdf['bz_std'] = tempdf['bz'].rolling(200).std()
            tempdf['fx_std'] = tempdf['fx'].rolling(200).std()
            tempdf['fy_std'] = tempdf['fy'].rolling(200).std()
            tempdf['fz_std'] = tempdf['fz'].rolling(200).std()
            tempdf['gx_std'] = tempdf['fx'].rolling(200).std()
            tempdf['gy_std'] = tempdf['fy'].rolling(200).std()
            tempdf['gz_std'] = tempdf['fz'].rolling(200).std()

            # Integral stuff
            tempdf['xsum'] = pd.expanding_sum(
                ((abs(tempdf['ax']).rolling(2).sum() / 2) *
                 (1 / 200)).fillna(0))
            tempdf['ysum'] = pd.expanding_sum(
                ((abs(tempdf['ay']).rolling(2).sum() / 2) *
                 (1 / 200)).fillna(0))
            tempdf['zsum'] = pd.expanding_sum(
                ((abs(tempdf['az']).rolling(2).sum() / 2) *
                 (1 / 200)).fillna(0))
            tempdf['time'] = 1 / 200
            tempdf['time'] = pd.expanding_sum(tempdf['time'])
            # C10 Signal Magnitude Area
            tempdf['SigMagArea'] = (tempdf['xsum'] + tempdf['ysum'] +
                                    tempdf['zsum']) / tempdf['time']
            # C11
            tempdf['HorizSigMagArea'] = (tempdf['xsum'] +
                                         tempdf['zsum']) / tempdf['time']
            # Sum vector magnitude
            tempdf['vm'] = np.sqrt(tempdf['fx']**2 + tempdf['fy']**2 +
                                   tempdf['fz']**2)
            # Maximum peak to peak acceleration amplitude
            tempdf['Amax'] = (tempdf['vm'].rolling(200).max())
            tempdf['Amin'] = (tempdf['vm'].rolling(200).min())
            # C3
            tempdf['peak_diff'] = tempdf['Amax'] - tempdf['Amin']
            # Angle from horizontal to z-axis
            tempdf['angle_from_horiz'] = np.arctan2(
                np.sqrt(tempdf['fx']**2 + tempdf['fz']**2),
                -tempdf['fy']) * 180 / np.pi
            tempdf['angle_std'] = pd.rolling_std(tempdf['angle_from_horiz'],
                                                 200)

            # had to make versions of this to put into sliding window, will change once I
            # confirm they're the same as the others below
            tempdf['horiz_std_mag9'] = np.sqrt(tempdf['fx_std']**2 +
                                               tempdf['fz_std']**2)
            tempdf['horiz_vector_mag9'] = np.sqrt(tempdf['fx']**2 +
                                                  tempdf['fz']**2)
            tempdf['std_mag9'] = np.sqrt(tempdf['fx_std']**2 +
                                         tempdf['fy_std']**2 +
                                         tempdf['fz_std']**2)
            tempdf['diff_std_mag9'] = np.sqrt(tempdf['bx_std']**2 +
                                              tempdf['by_std']**2 +
                                              tempdf['bz_std']**2)
            tempdf['horiz_mag2'] = np.sqrt(tempdf['bx']**2 + tempdf['bz']**2)
            tempdf['horiz_std_mag2'] = np.sqrt(tempdf['bx_std']**2 +
                                               tempdf['bz_std']**2)
            tempdf['vector_mag2'] = np.sqrt(tempdf['bx']**2 + tempdf['by']**2 +
                                            tempdf['bz']**2)

            tempdf['gyro_horiz_std_mag'] = np.sqrt(tempdf['gx_std']**2 +
                                                   tempdf['gz_std']**2)
            tempdf['gyro_vector_mag'] = np.sqrt(tempdf['gx']**2 +
                                                tempdf['gy']**2 +
                                                tempdf['gz']**2)
            tempdf['gyro_horiz_mag'] = np.sqrt(tempdf['gx']**2 +
                                               tempdf['gz']**2)
            tempdf['gyro_std_mag'] = np.sqrt(tempdf['gx_std']**2 +
                                             tempdf['gy_std']**2 +
                                             tempdf['gz_std']**2)

            tempdf = pd.concat([
                tempdf.reset_index(drop=True), trial_df[[
                    'activity', 'subject', 'trial'
                ]].reset_index(drop=True)
            ],
                               axis=1)
            new_df = pd.concat([new_df.reset_index(drop=True), tempdf])

            sliding_window(tempdf, kind)

        # differential vector mag
        new_df['vector_mag'] = np.sqrt(new_df['fx']**2 + new_df['fy']**2 +
                                       new_df['fz']**2)
        # C2
        new_df['horiz_mag'] = np.sqrt(new_df['fx']**2 + new_df['fz']**2)
        #
        new_df['vert'] = new_df['by'] - new_df['y_roll']
        new_df['vert2'] = new_df['ay'] - new_df['y_roll']
        new_df['vert3'] = new_df['fy'] - new_df['fy_roll']
        # C9
        new_df['std_mag2'] = np.sqrt(new_df['bx_std']**2 +
                                     new_df['by_std']**2 + new_df['bz_std']**2)

        putList.append(new_df.fillna(0))

    print('Completed... It took', time.time() - start, 'seconds.')

コード例 #13

0

ファイルを表示

ファイル: tdx_formula.py プロジェクト: yl365/tdx_formula

 def SUM(self, param):
     if param[1] == 0:
         return pd.expanding_sum(param[0])
     return pd.rolling_sum(param[0], param[1])