def get_ppo(data): """Calculate the percentage price oscillator for values of given dataframe. :param data: a dataframe in OHLC format :return: a concatenated Pandas series with the PPO and signal values """ if data is None: raise EmptyDataError('[!] Invalid data value') result = TA.PPO(data) if result is None: raise IndicatorException return result
def get_vwap(data): """Calculate the volume weighted average price for values of given dataframe. :param data: a dataframe in OHLC format :return: a Pandas series """ if data is None: raise EmptyDataError('[!] Invalid data value') result = TA.VWAP(data) if result is None: raise IndicatorException return result
def get_macd(data): """Calculate the moving average convergence-divergence for values of given dataframe. :param data: a dataframe in OHLC format :return: a concatenated Pandas series with the MACD and signal values """ if data is None: raise EmptyDataError('[!] Invalid data value') result = TA.MACD(data) if result is None: raise IndicatorException return result
def get_kama(data): """Calculate the Kaufman adaptive moving avarage for values of given dataframe. :param data: a dataframe in OHLC format :return: a Pandas series """ if data is None: raise EmptyDataError('[!] Invalid data value') result = TA.KAMA(data) if result is None: raise IndicatorException return result
def get_zlema(data): """Calculate the zero log exponential moving avarage for values of given dataframe. :param data: a dataframe in OHLC format :return: a Pandas series """ if data is None: raise EmptyDataError('[!] Invalid data value') result = TA.ZLEMA(data) if result is None: raise IndicatorException return result
def get_fve(data): """Calculate the finite volume element of given dataframe. :param data: a dataframe in OHLC format :return: a Pandas series """ if data is None: raise EmptyDataError('[!] Invalid data value') result = TA.FVE(data) if result is None: raise IndicatorException return result
def get_trix(data): """Calculate the triple exponential moving average oscillator for values of given dataframe. :param data: a dataframe in OHLC format :return: a Pandas series """ if data is None: raise EmptyDataError('[!] Invalid data value') result = TA.TRIX(data) if result is None: raise IndicatorException return result
def get_squeeze(data): """Calculate the squeeze momentum indicator of given dataframe. :param data: a dataframe in OHLC format :return: a Pandas series """ if data is None: raise EmptyDataError('[!] Invalid data value') result = TA.SQZMI(data) if result is None: raise IndicatorException return result
def get_apz(data): """Calculate the adaptive price zone of given dataframe. :param data: a dataframe in OHLC format :return: a concatenated Pandas series """ if data is None: raise EmptyDataError('[!] Invalid data value') result = TA.APZ(data) if result is None: raise IndicatorException return result
def get_ichimoku(data): """Calculate the Ichimoku cloud of given dataframe. :param data: a dataframe in OHLC format :return: a concatenated Pandas series """ if data is None: raise EmptyDataError('[!] Invalid data value') result = TA.ICHIMOKU(data) if result is None: raise IndicatorException return result
def get_smm(data): """Calculate the simple moving median for values of given dataframe. :param data: a dataframe in OHLC format :return: a Pandas series """ if data is None: raise EmptyDataError('[!] Invalid data value') result = TA.SMM(data) if result is None: raise IndicatorException return result
def get_evmacd(data): """Calculate the elastic volume-weighted MACD for values of given dataframe. :param data: a dataframe in OHLC format :return: a concatenated Pandas series with the EVMACD and signal values """ if data is None: raise EmptyDataError('[!] Invalid data value') result = TA.EV_MACD(data) if result is None: raise IndicatorException return result
def get_sar(data): """Calculate the stop and reverse for values of given dataframe. :param data: a dataframe in OHLC format :return: a Pandas series """ if data is None: raise EmptyDataError('[!] Invalid data value') result = TA.SAR(data) if result is None: raise IndicatorException return result
def get_stochd(data): """Calculate the stochastic oscillator %D for given dataframe. :param data: a dataframe in OHLC format :return: a Pandas series """ if data is None: raise EmptyDataError('[!] Invalid data value') result = TA.STOCHD(data) if result is None: raise IndicatorException return result
def get_adx(data): """Calculate the ADX of given dataframe. :param data: a dataframe in OHLC format :return: a Pandas series """ if data is None: raise EmptyDataError('[!] Invalid data value') result = TA.ADX(data) if result is None: raise IndicatorException return result
def get_pivot_fib(data): """Calculate Fibonacci pivot point. :param data: a dataframe in OHLC format :return: a concatenated Pandas series with 9 elements """ if data is None: raise EmptyDataError('[!] Invalid data value') result = TA.PIVOT_FIB(data) if result is None: raise IndicatorException return result
def get_percent_b(data): """Calculate the percent b for Bollinger band values of given dataframe. :param data: a dataframe in OHLC format :return: a Pandas series """ if data is None: raise EmptyDataError('[!] Invalid data value') result = TA.PERCENT_B(data) if result is None: raise IndicatorException return result
def get_dmi(data): """Calculate the directional movement indicator of given dataframe. :param data: a dataframe in OHLC format :return: a Pandas series """ if data is None: raise EmptyDataError('[!] Invalid data value') result = TA.DMI(data) if result is None: raise IndicatorException return result
def get_bbandwidth(data): """Calculate the Bollinger bandwidth for values of given dataframe. :param data: a dataframe in OHLC format :return: a Pandas series """ if data is None: raise EmptyDataError('[!] Invalid data value') result = TA.BBWIDTH(data) if result is None: raise IndicatorException return result
def get_ssma(data): """Calculate the SMOOTHED simple moving average for values of given dataframe. :param data: a dataframe in OHLC format :return: a Pandas series """ if data is None: raise EmptyDataError('[!] Invalid data value') result = TA.SSMA(data) if result is None: raise IndicatorException return result
def get_tmf(data): """Calculate the Twigg's money flow of given dataframe. :param data: a dataframe in OHLC format :return: a concatenated Pandas series """ if data is None: raise EmptyDataError('[!] Invalid data value') result = TA.TMF(data) if result is None: raise IndicatorException return result
def get_ift_rsi(data): """Calculate the Inverse-Fisher Transform on relative strength index for values of given dataframe. :param data: a dataframe in OHLC format :return: a Pandas series """ if data is None: raise EmptyDataError('[!] Invalid data value') result = TA.IFT_RSI(data) if result is None: raise IndicatorException return result
def get_macd(data, period_fast=12, period_slow=26): """Calculate the moving average convergence-divergence for values of given dataframe. :param data: a dataframe in OHLC format :param period_fast: number of days to look back for the fast moving average :param period_slow: number of days to look back for the slow moving average :return: a concatenated Pandas series with the MACD and signal values """ if data is None: raise EmptyDataError("[!] Invalid data value") result = TA.MACD(data, period_fast, period_slow) if result is None: raise IndicatorException return result
def transform(self, X: pd.DataFrame = None): """ Returns a self.key column from an input Dataframe Parameters: ------------ X : pd.DataFrame pandas Dataframe from which we need to get a column """ if len(X) == 0: raise EmptyDataError("Input dataset is empty!") try: return X[[self.key]] except KeyError as e: raise KeyError( "Input column {} does not exist in the input Dataset!".format( self.key))
def get_er(data): """Calculate the Kaufman efficiency ratio for values of given dataframe. Example bullish: +0.67 bearish: -0.67 :param data: a dataframe in OHLC format :return: a Pandas series """ if data is None: raise EmptyDataError("[!] Invalid data value") result = TA.ER(data) if result is None: raise IndicatorException return result
def _getArcpyTypesAndConversionFromDf(colsAndTypesDict:dict): if len(colsAndTypesDict) < 1: raise EmptyDataError('No columns or types in this DataFrame') textTypeList = ['object'] intTypeList = ['int64'] floatTypeList = ['float64'] arcpySafeColsAndTypes = {} colsAfterAndBefore = {} for key, value in colsAndTypesDict.items(): #converting pandas datatype to arcpy datatype if value in textTypeList: colType = 'TEXT' elif value in intTypeList: colType = 'LONG' elif value in floatTypeList: colType = 'DOUBLE' else: colType = 'TEXT' #renaming the key so that there are no spaces in it key = key.strip() # print(f'Stripped: {key}') # underscoredName = re.sub(r'[:,]', '', key) underscoredName = re.sub(r'[\s/\-:,\(\)\.]', '_', key) # print(f'First Pass: {underscoredName}') #replacing a group of underscores with only one underscoredName = re.sub(r'_+', '_', underscoredName) # print(f'Second Pass: {underscoredName}') #removing trailing underscores from columns if underscoredName.endswith('_'): underscoredName = underscoredName[:-1] if len(underscoredName) > 64: underscoredName = underscoredName[:64] # underscoredName = re.sub(r'_') #adding arcpy safe names and types to another dict arcpySafeColsAndTypes.update({underscoredName: colType}) colsAfterAndBefore.update({underscoredName: key}) return arcpySafeColsAndTypes, colsAfterAndBefore
def detect_colspecs(self, infer_nrows=100, skiprows=None): # Regex escape the delimiters delimiters = "".join(fr"\{x}" for x in self.delimiter) pattern = re.compile(f"([^{delimiters}]+)") rows = self.get_rows(infer_nrows, skiprows) if not rows: raise EmptyDataError("No rows from which to infer column width") max_len = max(map(len, rows)) mask = np.zeros(max_len + 1, dtype=int) if self.comment is not None: rows = [row.partition(self.comment)[0] for row in rows] for row in rows: for m in pattern.finditer(row): mask[m.start():m.end()] = 1 shifted = np.roll(mask, 1) shifted[0] = 0 edges = np.where((mask ^ shifted) == 1)[0] edge_pairs = list(zip(edges[::2], edges[1::2])) return edge_pairs
def filter_rows_by_group_func(df, group, col, func, op=operator.gt, value=None, quantile=None): """ Filters groups of rows based on some function applied for a column within each group. For example, this could be used to filter out all entries for drugs which have a low variance of IC-50 scores across cell lines. """ # determine which function to apply within each group if func == "mad": # median absolute deviation (mad) from statsmodels import robust func = robust.mad elif isinstance(func, str): # otherwise assume function name or expression passed in as a string func = eval(func) # apply statistic within each group group_stats = df.groupby(group)[col].apply(func) # if quantile specified, determine value associated with that quantile if quantile is not None: cutoff_value = group_stats.quantile(quantile) else: cutoff_value = value # get ids of rows passing the cutoff mask = group_stats.loc[op(group_stats, cutoff_value)].index # apply filter df = df[df[group].isin(mask)] # check to make sure data is non-empty after filtering step if df.empty: raise EmptyDataError("No data remaining after filter applied") return df
def _compile_columns(file_list): from pandas.errors import EmptyDataError # Read one line from each dataframe dfsamples = [] for file in file_list: try: dfsamples.append(pd.read_csv( file, index_col=None, header=0, nrows=1, comment='#')) except EmptyDataError: raise EmptyDataError('Summary file empty: \n{}'.format(file)) # Compare number of columns ncols = [df.shape[1] for df in dfsamples] if not all([n==ncols[0] for n in ncols]): warnings.warn('The dataframes to compile do not have the same number of columns.') # Compile all columns columns = pd.concat(dfsamples, axis=0, sort=False).columns.to_list() return columns
def fit(self, X: pd.DataFrame = None, y: str = None): """ Uses a pd.get_dummies method to do a OHE on self.key column Also saves a self.columns variable (list of final columns AFTER the OHE) Parameters: ------------ X : pd.DataFrame (default = None) pandas dataframe input data (train) that should contain a self.key column y : str (default = None) target column name (needed only for a compatibility with other sklearn transformers!) """ if len(X) == 0: raise EmptyDataError("Input dataset is empty!") try: self.columns = [ col for col in pd.get_dummies(X[[self.key]], prefix=self.key).columns ] except KeyError as e: raise KeyError( "Input column {} does not exist in the input Dataset!".format( self.key)) return self