Python eval 예제들, pandas.eval Python 예제들

예제 #1

0

파일 보기

파일: test_query_eval.py 프로젝트: bashtage/pandas

    def test_nested_scope(self):
        engine = self.engine
        parser = self.parser

        skip_if_no_pandas_parser(parser)

        df = DataFrame(np.random.randn(5, 3))
        df2 = DataFrame(np.random.randn(5, 3))
        expected = df[(df > 0) & (df2 > 0)]

        result = df.query('(@df > 0) & (@df2 > 0)', engine=engine,
                          parser=parser)
        assert_frame_equal(result, expected)

        result = pd.eval('df[df > 0 and df2 > 0]', engine=engine,
                         parser=parser)
        assert_frame_equal(result, expected)

        result = pd.eval('df[df > 0 and df2 > 0 and df[df > 0] > 0]',
                         engine=engine, parser=parser)
        expected = df[(df > 0) & (df2 > 0) & (df[df > 0] > 0)]
        assert_frame_equal(result, expected)

        result = pd.eval('df[(df>0) & (df2>0)]', engine=engine, parser=parser)
        expected = df.query('(@df>0) & (@df2>0)', engine=engine, parser=parser)
        assert_frame_equal(result, expected)

예제 #2

0

파일 보기

파일: test_query_eval.py 프로젝트: bashtage/pandas

    def test_nested_scope(self):
        from pandas.core.computation.ops import UndefinedVariableError
        engine = self.engine
        parser = self.parser
        # smoke test
        x = 1  # noqa
        result = pd.eval('x + 1', engine=engine, parser=parser)
        assert result == 2

        df = DataFrame(np.random.randn(5, 3))
        df2 = DataFrame(np.random.randn(5, 3))

        # don't have the pandas parser
        with pytest.raises(SyntaxError):
            df.query('(@df>0) & (@df2>0)', engine=engine, parser=parser)

        with pytest.raises(UndefinedVariableError):
            df.query('(df>0) & (df2>0)', engine=engine, parser=parser)

        expected = df[(df > 0) & (df2 > 0)]
        result = pd.eval('df[(df > 0) & (df2 > 0)]', engine=engine,
                         parser=parser)
        assert_frame_equal(expected, result)

        expected = df[(df > 0) & (df2 > 0) & (df[df > 0] > 0)]
        result = pd.eval('df[(df > 0) & (df2 > 0) & (df[df > 0] > 0)]',
                         engine=engine, parser=parser)
        assert_frame_equal(expected, result)

예제 #3

0

파일 보기

파일: comparing.py 프로젝트: griverorz/RecordLinkage

def _squared_distance(s1, s2, a=0, b=1):

	expr = '((s2-s1)-a)**2*b**2'

	# PANDAS BUG?
	# return pandas.eval(expr, engine=None)
	try:
		return pandas.eval(expr, engine='numexpr')
	except ImportError:
		return pandas.eval(expr, engine='python')

예제 #4

0

파일 보기

파일: comparing.py 프로젝트: griverorz/RecordLinkage

def _linear_distance(s1, s2, a=0, b=1):

	expr = 'abs(((s2-s1)-a)*b)'

	# PANDAS BUG?
	# return pandas.eval(expr, engine=None)

	try:
		return pandas.eval(expr, engine='numexpr')
	except ImportError:
		return pandas.eval(expr, engine='python')

예제 #5

0

파일 보기

파일: comparing.py 프로젝트: griverorz/RecordLinkage

def _haversine_distance(lat1, lng1, lat2, lng2):

	# degrees to radians conversion
	to_rad = 1/360*np.pi*2

	# numeric expression to use with numexpr package
	expr = '2*6371*arcsin(sqrt((sin((lat2*to_rad-lat1*to_rad)/2))**2+cos(lat1*to_rad)*cos(lat2*to_rad)*(sin((lng2*to_rad-lng1*to_rad)/2))**2))'

	# PANDAS BUG?
	# return pandas.eval(expr, engine=None)
	try:
		return pandas.eval(expr, engine='numexpr')
	except ImportError:
		return pandas.eval(expr, engine='python')

예제 #6

0

파일 보기

파일: expr.py 프로젝트: FashtimeDotCom/pandas

 def visit_Subscript(self, node, **kwargs):
     value = self.visit(node.value)
     slobj = self.visit(node.slice)
     result = pd.eval(slobj, local_dict=self.env, engine=self.engine,
                      parser=self.parser)
     try:
         # a Term instance
         v = value.value[result]
     except AttributeError:
         # an Op instance
         lhs = pd.eval(value, local_dict=self.env, engine=self.engine,
                       parser=self.parser)
         v = lhs[result]
     name = self.env.add_tmp(v)
     return self.term_type(name, env=self.env)

예제 #7

0

파일 보기

파일: ops.py 프로젝트: patcpsc/pandas

    def evaluate(self, env, engine, parser, term_type, eval_in_python):
        """Evaluate a binary operation *before* being passed to the engine.

        Parameters
        ----------
        env : Scope
        engine : str
        parser : str
        term_type : type
        eval_in_python : list

        Returns
        -------
        term_type
            The "pre-evaluated" expression as an instance of ``term_type``
        """
        if engine == "python":
            res = self(env)
        else:
            # recurse over the left/right nodes
            left = self.lhs.evaluate(
                env, engine=engine, parser=parser, term_type=term_type, eval_in_python=eval_in_python
            )
            right = self.rhs.evaluate(
                env, engine=engine, parser=parser, term_type=term_type, eval_in_python=eval_in_python
            )

            # base cases
            if self.op in eval_in_python:
                res = self.func(left.value, right.value)
            else:
                res = pd.eval(self, local_dict=env, engine=engine, parser=parser)

        name = env.add_tmp(res)
        return term_type(name, env=env)

예제 #8

0

파일 보기

파일: test_query_eval.py 프로젝트: bashtage/pandas

 def test_eval_resolvers_as_list(self):
     # GH 14095
     df = DataFrame(np.random.randn(10, 2), columns=list('ab'))
     dict1 = {'a': 1}
     dict2 = {'b': 2}
     assert (df.eval('a + b', resolvers=[dict1, dict2]) ==
             dict1['a'] + dict2['b'])
     assert (pd.eval('a + b', resolvers=[dict1, dict2]) ==
             dict1['a'] + dict2['b'])

예제 #9

0

파일 보기

파일: distance.py 프로젝트: J535D165/recordlinkage

def _haversine_distance(lat1, lng1, lat2, lng2):

    # degrees to radians conversion
    to_rad = np.deg2rad(1)

    # numeric expression to use with numexpr package
    expr = '2*6371*arcsin(sqrt((sin((lat2*to_rad-lat1*to_rad)/2))**2+cos(lat1*to_rad)*cos(lat2*to_rad)*(sin((lng2*to_rad-lng1*to_rad)/2))**2))'

    return pandas.eval(expr)

예제 #10

0

파일 보기

파일: numeric.py 프로젝트: J535D165/recordlinkage

def _step_sim(d, offset=0, origin=0):
    # scale is not an argument

    if offset < 0:
        raise ValueError("The offset must be positive.")

    expr = 'abs(d - origin) <= offset'

    return pandas.eval(expr).astype(np.int64)

예제 #11

0

파일 보기

파일: run_alerts.py 프로젝트: robertdavidwest/keen

def _check_rule(data, rule):
    data = deepcopy(data)
    exclude_column = 'exclude-' + rule['alertName']
    idx = data[exclude_column] != True
    data = data[idx]

    check = pd.eval(rule['formula'])
    if check.any():
        campaigns = list(set(data[check].campaign))
        msg = make_alert_msg(rule['alertName'], campaigns)
        return msg

예제 #12

0

파일 보기

파일: numeric.py 프로젝트: J535D165/recordlinkage

def _linear_sim(d, scale, offset=0, origin=0):

    if offset < 0:
        raise ValueError("The offset must be positive.")

    if scale <= 0:
        raise ValueError("The scale must be larger than 0. ")

    d = (abs(d - origin)).clip(offset, offset + 2 * scale)

    expr = '1 - (d-offset)/(2*scale)'

    return pandas.eval(expr)

예제 #13

0

파일 보기

파일: numeric.py 프로젝트: J535D165/recordlinkage

def _gauss_sim(d, scale, offset=0, origin=0):

    if offset < 0:
        raise ValueError("The offset must be positive.")

    if scale <= 0:
        raise ValueError("The scale must be larger than 0. ")

    d = (abs(d - origin)).clip(offset, None)

    # solve y=exp(-x^2*a) if 1/2 = exp(-x^2/scale^2)
    expr = '2**(-((d-offset)/scale)**2)'

    return pandas.eval(expr)

예제 #14

0

파일 보기

파일: sql4pandas.py 프로젝트: keeganmccallum/sql4pandas

            def _case(case):
                as_name, else_stmt, stmts = \
                    case['as_name'], case.get('else_stmt', None), case['stmts']

                # make a copy of a column in the data frame and use it as a base
                col = self._curr_val.iloc[:, 0].copy()
                if else_stmt is not None:
                    else_val = _get_val(*else_stmt)
                    col.loc[:] = else_val
                else:
                    # default to NULL as no else val specified
                    col.loc[:] = None

                for (ev_str, identifiers), stmt in stmts:
                    print ev_str
                    idx = pd.eval(ev_str, local_dict=id_dict(identifiers))
                    val = _get_val(*stmt)
                    col[idx] = val[idx]

                self._curr_val[as_name] = col

예제 #15

0

파일 보기

파일: numeric.py 프로젝트: J535D165/recordlinkage

def _squared_sim(d, scale, offset=0, origin=0):

    if offset < 0:
        raise ValueError("The offset must be positive.")

    if scale <= 0:
        raise ValueError("The scale must be larger than 0. ")

    d = (abs(d - origin)).clip(offset, offset + np.sqrt(2) * scale)
    # solve y=1-ad^2 given y(d=scale)=0.5
    # 1-y = ad^2
    # a = (1-y)/d^2

    # fill y=0.5 and d = scale
    # a = (1-0.5)/scale^2
    # a = 1/(2*scale^2)
    # y = 1 - 1/2*(d/scale)^2
    # d = sqrt(2)*scale is the point where similarity is zero.

    expr = '1 - 1/2*exp(2*log((d-offset)/scale))'

    return pandas.eval(expr)

예제 #16

0

파일 보기

파일: distance.py 프로젝트: J535D165/recordlinkage

def _1d_distance(s1, s2):

    return pandas.eval("s2-s1")

예제 #17

0

파일 보기

파일: sql4pandas.py 프로젝트: keeganmccallum/sql4pandas

 def _operation(op):
     as_name, expr = op['as_name'], op['expr']
     ev_str, identifiers = expr
     col = pd.eval(ev_str, local_dict=id_dict(identifiers))
     self._curr_val[as_name] = col

예제 #18

0

파일 보기

 def time_eval_frame_chained_cmp_all_threads(self):
     pd.eval('df < df2 < df3 < df4')

예제 #19

0

파일 보기

 def testit():
     a, b = 1, 2  # noqa
     res = pd.eval('a + b', engine=engine, parser=parser)
     assert res == 3

예제 #20

0

파일 보기

파일: test_compat.py 프로젝트: cgrin/pandas

 def testit():
     a, b = 1, 2
     res = pd.eval('a + b', engine=engine, parser=parser)
     tm.assert_equal(res, 3)

예제 #21

0

파일 보기

파일: eval.py 프로젝트: scarrucciu/pandas

 def time_eval_frame_and_all_threads(self):
     pd.eval('(df > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)')

예제 #22

0

파일 보기

파일: Numpy_and_Panadas.py 프로젝트: Xinwei-Long/Python_Review

ax = data.plot(style=['-', '--', ':'])
ax.lines[0].set_alpha(0.3)

#get the formation of date index
dataA.index.time
dataA.indx.dayofweek
dataA.index.weekday

###High-performance Pandas: eval(), query()
#eval():string expressions to efficiently compute operations using DataFrames
rng = np.random.RandomState(42)
df1, df2, df3, df4, df5 = (pd.DataFrame(rng.randint(0, 1000, (100, 3)))
                           for i in range(5))
#arithmetic operators
result1 = -df1 * df2 / (df3 + df4) - df5
result2 = pd.eval('-df1 * df2 / (df3 + df4) - df5')
np.allclose(result1, result2)  #same result
#comparison: include chained comparision
result1 = (df1 < df2) & (df2 <= df3) & (df3 != df4)
result2 = pd.eval('df1 < df2 <= df3 != df4')
#Bitwise:& and |
result2 = pd.eval('(df1<0.5) & (df2<0.5) | (df3<df4)')
#use of the literal and and or in Boolean expressions:
result2 = pd.eval('(df1<0.5) and (df2<0.5) or (df3<df4)')
#Object attributes and indices.
result2 = pd.eval('df2.T[0] + df3.iloc[1]')

#DF.eval() for columns-wise operation
df = pd.DataFrame(rng.rand(1000, 3), columns=['A', 'B', 'C'])
result2 = df.eval('(A+B)/(C-1)')

예제 #23

0

파일 보기

파일: eval.py 프로젝트: mwaskom/pandas

 def time_add(self, engine, threads):
     pd.eval('self.df + self.df2 + self.df3 + self.df4', engine=engine)

예제 #24

0

파일 보기

파일: filtering-df.py 프로젝트: anarinsk/bpt-pandas

#%% 
import pandas as pd 
import numpy as np 


# %%
nrows, ncols = 1000000, 100 
df_A, df_B, df_C, df_D = (pd.DataFrame(np.random.randn(nrows, ncols)) for i in range(4)) 

# %%
%timeit df_A + df_B + df_C + df_D

# %%
%timeit pd.eval('df_A + df_B + df_C + df_D')

# %%
df = pd.read_csv('/home/bpt-pandas/data/hotel_bookings.csv')

# %%
df.query('adults >2 and lead_time < 40')
# %%

fifa_df = pd.read_csv("/home/bpt-pandas/data/fifa-data.csv", usecols=["Name", "Age", "Nationality", 
                                           "Club", "Overall", "Value", "Wage"])

fifa_df
#%%
fifa_df[['Value', 'Wage']] = fifa_df[['Value', 'Wage']] \
                             .apply(lambda s: s.replace('[\€,)]','', regex=True))

fifa_df['Value'] = fifa_df['Value'].replace({'K':'*1e3', 'M':'*1e6'}, regex=True).map(pd.eval).astype(float)

예제 #25

0

파일 보기

    def to_df(self):
        """
        returns a pandas DataFrame object based on parsed data from a
        Commodity object's HTML
        """
        try:
            df = pd.read_html(self.response)
            df = df[0]  # Ignore footer table
            if S.DBG_ICOM:
                df.to_csv(S.WORK_DIR + "/" + self.name + ".inf")
            price = df['Price'][0]
            # print self.name, type(price), price
            if math.isnan(price):
                # No result found
                return None
            df["Date"] = pd.to_datetime(df["Date"])
            df.insert(0, "Commodity", np.nan)
            df["Commodity"] = self.name
            df.insert(6, "Close", np.nan)
            df["Close"] = df["Price"]
            df.insert(7, "Volume", np.nan)

            if self.name.startswith('USD'):
                df['Volume'] = 0
            elif self.name.startswith('FTFBM'):
                df['Volume'] = df["Vol."]
            else:
                mp = {'K': ' * 10**3', 'M': ' * 10**6'}
                # vol = df['Vol.'][0]
                # print type(vol), vol
                df['Vol.'] = df['Vol.'].replace('-', '0.1K')
                df['Vol.'] = df['Vol.'].replace(
                    0, '0.1K')  # replace all 0 vol with 100 shares
                '''
                Convert k to 1000 and m to 1000000
                Important: Can only support max 5 months of EOD to convert
                '''
                df["Volume"] = pd.eval(df["Vol."].replace(
                    mp.keys(), mp.values(),
                    regex=True).str.replace(r'[^\d\.\*]+', ''))

            df.drop('Price', axis=1, inplace=True)
            df.drop('Change %', axis=1, inplace=True)
            if 'Vol.' in df.columns:
                # FOREX has no "Vol." column
                df.drop('Vol.', axis=1, inplace=True)
            df.sort_values(by='Date', inplace=True)
        except ValueError as ve:
            df = 'ValueError'
            self.csverr = self.name + ": ValueError (No data for date range) " + ' (' + str(
                ve) + ')'
            if S.DBG_ICOM:
                with open(S.WORK_DIR + "value.err", 'ab') as f:
                    f.write('\n=============================\n')
                    f.write(self.name + "\n")
                    f.write(self.response)
        except Exception as e:
            # This happens when records being processed are larger than 3 months data,
            # try reducing the period
            if S.DBG_ICOM:
                with open(S.WORK_DIR + "value.err", 'ab') as f:
                    f.write('\n=============================\n')
                    f.write(self.name + "\n")
                    f.write(self.response)
            self.csverr = self.name + ":" + self.start + "," + self.end + ":" + str(
                e)
            df = 'Exception'
            # raise e

        return df

예제 #26

0

파일 보기

파일: query_tools.py 프로젝트: agunawan95/FlowAnalysis

def update(df, formulas):
    for k, v in formulas.items():
        df[k] = pd.eval(v)

예제 #27

0

파일 보기

def test_invalid_numexpr_version(engine, parser):
    if engine == "numexpr":
        pytest.importorskip("numexpr")
    a, b = 1, 2  # noqa:F841
    res = pd.eval("a + b", engine=engine, parser=parser)
    assert res == 3

예제 #28

0

파일 보기

파일: operator.py 프로젝트: actcwlf/panelexpr

 def eval(self, lope, rope):
     return pd.eval('lope / rope')

예제 #29

0

파일 보기

파일: Python Analysis INC5000 2019.py 프로젝트: DCCP80/Analysis-of-Inc5000-2019-Companies-and-Census-Data

EDCTN.hist(bins=30, figsize=(40, 35), xrot=90)
plt.suptitle('25 and Older Education Obtained', va='baseline', size=32)
plt.rcParams.update({'font.size': 16})

# In[162]:

POVERTY.drop('StateFIPS', axis=1).hist(bins=30, figsize=(20, 15), xrot=90)
plt.suptitle('Those Living Under $30,962 per Year', va='baseline', size=24)
plt.rcParams.update({'font.size': 16})

# In[160]:

POV_CALC = POVERTY.drop(['StateFIPS', 'Prcnt_UND_30962'], axis=1)
POV_POP = pd.merge(POV_CALC, POP, on='state', how='outer')

POV_POP['under_thrsh_pr100k'] = pd.eval(
    '(POV_POP.Under_30962/POV_POP.POP_2018)*100000')
POV_POP['under_thrsh_pr100k'].drop(POV_POP.index[39]).hist(bins=30,
                                                           figsize=(20, 15),
                                                           xrot=90)

# In[151]:

columns = [
    "state", "high_school_dipl", "No_schooling", "Master_degree",
    "Associate_degree", "Bachelor_degree", "Doctorate"
]
columns2 = [
    'state', 'HighSchool_pr100k', 'NoSch_pr100k', 'MasterDeg_pr100k',
    'AssociateDeg_pr100k', 'BachDeg_pr100k', 'Doctorate_pr100k'
]
TRG_EDCTN = EDUCATION.loc[:, columns]

예제 #30

0

파일 보기

파일: 30-highPerformance-pandas.py 프로젝트: behrouzmadahian/python

print(max_expr, '\n')
'''
The benefit here is that Numexpr evaluates the expression in a way that does not use full-sized
temporary arrays, and thus can be much more efficient than NumPy, especially for large arrays. 
'''
print('pandas.eval() for Efficient Operations:\n')
nrows, ncols = 100000, 100
df1, df2, df3, df4 = (pd.DataFrame(rng.rand(nrows, ncols))
                      for i in range(4))
t1 = time.time()
stmt = df1 + df2 + df3 + df4
t2 = time.time()
print('Total time taken = ', t2-t1)

t1 = time.time()
stmt1 = pd.eval('df1 + df2 + df3 + df4')
t2 = time.time()
print('Total time taken pd.eval= ', t2-t1)
# numpy.allclose: returns true if two arrays are element-wise equal within a tolerance!
print('Operations supported by pd.eval(): supports a wide range of operations. \n')
results = pd.eval('df1 < df2 <= df3 != df4')
print('Object attributes and indices')
print('pd.eval() supports access to object attributes via the obj.attr syntax, and indexes via the obj[index] syntax:\n')
results2 = pd.eval('df2.T[0] + df3.iloc[1]')
'''
Other operations such as function calls, conditional statements, loops, 
and other more involved constructs are currently not implemented in pd.eval().
'''
print('DataFrame.eval() for Column-Wise Operations:\n')
print('The benefit of the myDf.eval() method is that columns can be referred to by name. ')
df = pd.DataFrame(rng.rand(1000, 3), columns=['A', 'B', 'C'])

예제 #31

0

파일 보기

파일: test_compat.py 프로젝트: AllenDowney/pandas

 def testit():
     a, b = 1, 2  # noqa
     res = pd.eval('a + b', engine=engine, parser=parser)
     assert res == 3

예제 #32

0

파일 보기

def score_gekko(team_projections,
                team_id,
                opponent_scoring,
                scoring_categories,
                date_range,
                roster_makeup,
                date_last_use_actuals=None,
                roster_change_set=None,
                actual_scores=None):

    # if we don't have any actuals, lets return 0 for all stats
    if actual_scores is None:
        actual_scores = defaultdict(lambda: 0)

    m = GEKKO(remote=False, server='http://localhost:8083')
    m.options.SOLVER = 1

    # with roster changes we make changes, so let's copy the projections
    current_projections = team_projections.copy()
    # projections for players who may play.  changes with roster changes during period
    projections_with_added_players = team_projections.copy()

    rewards = defaultdict(list)
    player_vars = defaultdict(dict)

    rc_dict = defaultdict(list)
    if roster_change_set:
        rc_dict = _roster_changes_as_day_dict(roster_change_set)

    for game_day_idx, game_day in enumerate(date_range):
        for rc in rc_dict[game_day.date()]:
            # TODO should really figure out how to deal with this.  sometimes it is string, sometimes list.
            # i think has to do with serializing via jsonpickle
            with suppress(Exception):
                rc.in_projections['eligible_positions'] = pd.eval(
                    rc.in_projections['eligible_positions'])
            # add player in projections to projection dataframe
            current_projections = current_projections.append(rc.in_projections)
            projections_with_added_players = projections_with_added_players.append(
                rc.in_projections)
            current_projections.drop(rc.out_player_id, inplace=True)
            # current_projections.sort_values(by='fpts', ascending=False, inplace=True)

        teams_playing_today = nhl_scraper._teams_playing_one_day(
            game_day.to_pydatetime().date())
        game_day_players = current_projections[
            current_projections.team_id.isin(teams_playing_today)]
        scores = {}
        players = {}
        for position in roster_makeup.keys():
            available_for_position = game_day_players[
                game_day_players.eligible_positions.map(
                    set([position]).issubset)]
            if len(available_for_position) > 0:
                players[position] = list(available_for_position.index)
                scores[position] = available_for_position[scoring_categories]

        vars_by_player = defaultdict(list)
        for position in players:
            for player in players[position]:
                player_var = m.Var(1,
                                   0,
                                   1,
                                   True,
                                   name=f"{game_day_idx}_{position}_{player}")
                if position not in player_vars[game_day_idx]:
                    player_vars[game_day_idx][position] = []
                player_vars[game_day_idx][position].append(player_var)
                vars_by_player[player].append(player_var)
                for category in scoring_categories:
                    rewards[category].append(
                        player_var * game_day_players.loc[player, category])
            # limit amount players to roster size allowed for position
            if position in player_vars[game_day_idx]:
                m.Equation(
                    m.sum(player_vars[game_day_idx][position]) <=
                    roster_makeup[position])

        # for players with multiple eligible positions, make sure only appear once
        for player_id, player_info in game_day_players.iterrows():
            eligible_positions = player_info.eligible_positions
            if len(eligible_positions) > 1:
                m.Equation(m.sum(vars_by_player[player_id]) <= 1)

    for category in scoring_categories:
        m.Obj(-1 *
              (1 /
               (1 + e**(-(m.sum(rewards[category]) + actual_scores[category] -
                          opponent_scoring[category])))))

    result = None
    try:
        m.solve(disp=False)
    except Exception as ex:
        print('Exception')
        logger.exception("Exception in gekko scoring", ex)
        time.sleep(1)
        try:
            m.solve()
        except Exception as ex2:
            logger.exception("Exception in retry gekko scoring", ex2)
            raise ex2

    rostered_players = []
    for game_day_idx, game_day in enumerate(date_range):
        for position in roster_makeup.keys():
            if position in player_vars[game_day_idx]:
                for player in player_vars[game_day_idx][position]:
                    if player.value[0] == 1:
                        attrs = player.name.split("_")
                        player_id = int(attrs[-1])
                        position = player.name.split("_")[-2].upper()
                        rostered_players.append(
                            [player_id, position, 'p', game_day])
    results = pd.DataFrame(
        rostered_players,
        columns=['player_id', 'rostered_position', 'score_type', 'play_date'])
    results = results.join(projections_with_added_players[scoring_categories],
                           on='player_id')
    return results

예제 #33

0

파일 보기

파일: eval.py 프로젝트: mwaskom/pandas

 def time_chained_cmp(self, engine, threads):
     pd.eval('self.df < self.df2 < self.df3 < self.df4', engine=engine)

예제 #34

0

파일 보기

 },
                           inplace=True)
 for comb in range(combinations):
     #print(comb)
     if comb == 0:
         campos = [
             i for i in range(len(stage[period_back].columns) - 1)
         ]
         campos.pop(0)
         for i in range(1, len(campos) - 2, 3):
             campos.remove(i)
         print(campos)
         campos_print = campos[:]
         campos_print.insert(0, campos[-2])
         campos_print.pop(-2)
         stage2 = stage[period_back].iloc[:, pd.eval(campos_print)]
         stage2.to_csv('S:\proyecto2\csv\SCENES\scene' +
                       frec_scene + '-' + str(
                           (max_period - period_back + 1)) + '_' +
                       desc[comb] + '.csv',
                       index=False)
     if comb == 1:
         for i in range(3, campos[-1] - 1, 3):
             campos.remove(i)
             campos_print = campos[:]
             campos_print.insert(0, campos[-2])
             campos_print.pop(-2)
             stage2 = stage[period_back].iloc[:,
                                              pd.eval(campos_print)]
             stage2.to_csv('S:\proyecto2\csv\SCENES\scene' +
                           frec_scene + '-' + str(

예제 #35

0

파일 보기

    def _signal(self,
                df_sys,
                fast,
                slow,
                trade="buy",
                df_price=None,
                signal="entry",
                price="open"):
        """
        A function to generate entry and exit signals individually based on two
        signals crossing one another. The direction in which the cross is
        evaluated is defined by the `trade` value ('buy' or 'sell')

        Parameters
        ----------
        df_sys : pandas.core.frame.DataFrame
            A pandas dataframe indexed by timestamp at consisten intervals
            containing two columns each containing a signal's values.
        fast : str
            The column label for the signal defined by a short time frame
            moving average.
        slow : str
            The column label for the signals defined by a long timeframe
            moving average.
        trade : {'buy', 'sell'}
            The trade directions against which the signal crosses should be
            evaluated against.
        df_price : pandas.core.frame.DataFrame
            The dataframe containing the entry or exit price that should be
            matched against the given timestamp directly following the session
            in which the signal cross occured.
        signal : {'entry', 'exit'}
            Whether to generate price points for a trades entry or exit.
        price : str
            The column label for the chose entry or exit price.

        Returns
        -------
        pandas.core.frame.DataFrame
            A dataframe with a series of entry or exit prices that are
            generated by two signals crossing one another.

        Notes
        -----
        Optimisations yield the following improvements:
        1. 1min 28s ± 5.46 s per loop (mean ± std. dev. of 7 runs, 1 loop each)
        2. 53.4 s ± 1.12 s per loop (mean ± std. dev. of 7 runs, 1 loop each)
        3. 10.8 s ± 400 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
        """
        if trade == "buy":
            df_sys.eval(f"sys = {fast} > {slow}", inplace=True)
        elif trade == "sell":
            df_sys.eval(f"sys = {fast} < {slow}", inplace=True)

        df_sys["prev_sys"] = df_sys["sys"].shift(2)
        df_sys["curr_sys"] = df_sys["sys"].shift(1)

        if signal == "entry":
            df_sys[signal] = pd.eval(
                "(df_sys.curr_sys == 1) & (df_sys.prev_sys == 0)")
        elif signal == "exit":
            df_sys[signal] = pd.eval(
                "(df_sys.curr_sys == 0) & (df_sys.prev_sys == 1)")

        df_sys.index.rename("timestamp", inplace=True)
        en_ex_prep = df_sys[df_sys[signal] ==
                            True][signal].copy().reset_index()
        del df_sys

        # datetime column label differs due to different setups.
        en_ex_price = en_ex_prep.merge(df_price[price],
                                       how="left",
                                       left_on="timestamp",
                                       right_index=True,
                                       validate="1:1")

        s = en_ex_price.rename(
            columns={
                "timestamp": f"{signal}_dt",
                f"{price}": f"{signal}_price",
                f"{signal}": f"{signal}_type"
            })

        return s

예제 #36

0

파일 보기

파일: test_query_eval.py 프로젝트: bogdantalpiga/ClientChurnRate

 def check_raise_on_panel4d_with_multiindex(self, parser, engine):
     tm.skip_if_no_ne()
     p4d = tm.makePanel4D(7)
     p4d.items = tm.makeCustomIndex(len(p4d.items), nlevels=2)
     with tm.assertRaises(NotImplementedError):
         pd.eval('p4d + 1', parser=parser, engine=engine)

예제 #37

0

파일 보기

파일: eval.py 프로젝트: scarrucciu/pandas

 def time_eval_frame_and_python(self):
     pd.eval('(df > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)', engine='python')

예제 #38

0

파일 보기

x
x = rng.rand(1E6//4)
x




#pandas.eval() 
import pandas as pd
nrows, ncols = 100000, 100
nrows, ncols
rng = np.random.RandomState(42)
rng
df1, df2, df3, df4 = (pd.DataFrame(rng.rand(nrows, ncols)) for i in range(4))
df1.head()
%timeit df1 + df2 + df3 + df4
%timeit pd.eval('df1+df2+df3+df4')

np.allclose(df1+df2+df3+df4, pd.eval('df1+df2+df3+df4'))

#Arithmetic Operators

#Comparison

#Bitwise

#Object Attributes


#Columnwise Operations

예제 #39

0

파일 보기

 def time_eval_frame_add_one_thread(self):
     pd.eval('df + df2 + df3 + df4')

예제 #40

0

파일 보기

파일: thermalModel_groupA.py 프로젝트: kelvinxuande/thermal-ann-lite

def load_csv(filename, features_list, mode):
    df = pd.read_csv(filename, header=12, dtype=str)
    # df = df.loc[:, data_list]
    df.dropna(axis=0, how='any', inplace=True)

    # minute, second seperation:
    df['Program time'] = df['Program time'].str.split(':')
    df[['minute', 'second']] = pd.DataFrame(df['Program time'].values.tolist(),
                                            index=df.index)
    df['new_set'] = pd.eval(
        'df.minute.str.contains("00") and df.second.str.contains("00.0")',
        engine='python')
    df.drop(columns=['Program time'], inplace=True)

    if (mode == 0):
        search_list = ['AhCha', 'AhDch', 'Temp', 'minute', 'second']
    elif (mode == 1):
        search_list = ['Current', 'Voltage', 'Temp', 'minute', 'second']
    elif (mode == 2):
        search_list = [
            'Current', 'Voltage', 'AhCha', 'AhDch', 'Temp', 'minute', 'second'
        ]
    df[search_list] = df[search_list].apply(pd.to_numeric, errors='raise')
    df[search_list] = df[search_list].astype('float64',
                                             copy=True,
                                             errors='raise')

    df.reset_index(drop=True, inplace=True)
    # get index of new sets
    # this indexes to the next needs to be added with 30 secs (see above)
    set_index = df.index[df['new_set']].tolist()
    del set_index[0:3]
    second_increment = [
        round(df['second'][i - 1] - df['second'][i - 2], 2) for i in set_index
    ]
    # index of new sets and everything after, increase by second_increment
    for index in range(len(second_increment)):
        if (index != len(second_increment) - 1):
            df['second'][set_index[index]:set_index[index + 1]] = df['second'][
                set_index[index]:set_index[index +
                                           1]] + second_increment[index]
        else:
            df['second'][set_index[index]:] = df['second'][
                set_index[index]:] + second_increment[index]

    # now, we form a list of new sets - 1
    prev_index = [i - 1 for i in set_index]
    # take values of indexes new set - 1, add to indexes next set and everything after
    seconds_summation = [(df['minute'][i] * 60) + df['second'][i]
                         for i in prev_index]
    for index in range(len(seconds_summation)):
        df['second'][set_index[index]:] = df['second'][
            set_index[index]:] + seconds_summation[index]
    # finally, convert all minutes to seconds
    df['second'] = df['second'] + df['minute'] * 60

    # do some clean-ups
    df.drop(columns=['minute', 'new_set'], inplace=True)
    df['Amb'] = df['Temp'].min()

    if (mode == 0):
        df = df[['second', 'AhCha', 'AhDch', 'Amb', 'Temp']]
    elif (mode == 1):
        df = df[['second', 'Current', 'Voltage', 'Amb', 'Temp']]
    elif (mode == 2):
        df = df[[
            'second', 'Current', 'Voltage', 'AhCha', 'AhDch', 'Amb', 'Temp'
        ]]

    df.columns = features_list
    return df

예제 #41

0

파일 보기

 def time_eval_frame_mult_python_one_thread(self):
     pd.eval('df * df2 * df3 * df4', engine='python')

예제 #42

0

파일 보기

 def time_eval_frame_chained_cmp_python(self):
     pd.eval('df < df2 < df3 < df4', engine='python')

예제 #43

0

파일 보기

파일: sql4pandas.py 프로젝트: keeganmccallum/sql4pandas

 def _where(cond):
     ev_str, identifiers = cond
     index = pd.eval(ev_str, local_dict=id_dict(identifiers))
     self._curr_val = self._curr_val[index]

예제 #44

0

파일 보기

 def test_raise_on_panel_with_multiindex(self, parser, engine):
     p = tm.makePanel(7)
     p.items = tm.makeCustomIndex(len(p.items), nlevels=2)
     with pytest.raises(NotImplementedError):
         pd.eval('p + 1', parser=parser, engine=engine)

예제 #45

0

파일 보기

 def time_eval_frame_mult_one_thread(self):
     pd.eval('df * df2 * df3 * df4')

예제 #46

0

파일 보기

 def time_eval_frame_and_all_threads(self):
     pd.eval('(df > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)')

예제 #47

0

파일 보기

파일: test_query_eval.py 프로젝트: BobMcFry/pandas

 def test_raise_on_panel_with_multiindex(self, parser, engine):
     p = tm.makePanel(7)
     p.items = tm.makeCustomIndex(len(p.items), nlevels=2)
     with pytest.raises(NotImplementedError):
         pd.eval('p + 1', parser=parser, engine=engine)

예제 #48

0

파일 보기

파일: store.py 프로젝트: alanhdu/pandasRAM

 def _eval(self, key, where):
     match = arithOp.search(where)
     column = where[:match.start()].strip()
     other = where[match.start():]
     c = self[key + "/" + column]
     return c[pd.eval("c" + other)].index

예제 #49

0

파일 보기

파일: test_query_eval.py 프로젝트: RogerThomas/pandas

 def check_raise_on_panel4d_with_multiindex(self, parser, engine):
     tm.skip_if_no_ne()
     p4d = tm.makePanel4D(7)
     p4d.items = tm.makeCustomIndex(len(p4d.items), nlevels=2)
     with tm.assertRaises(NotImplementedError):
         pd.eval('p4d + 1', parser=parser, engine=engine)

예제 #50

0

파일 보기

 def time_eval_frame_mult_all_threads(self):
     pd.eval('df * df2 * df3 * df4')

예제 #51

0

파일 보기

파일: eval.py 프로젝트: mwaskom/pandas

 def time_and(self, engine, threads):
     pd.eval('(self.df > 0) & (self.df2 > 0) & '
             '(self.df3 > 0) & (self.df4 > 0)', engine=engine)

예제 #52

0

파일 보기

 def time_eval_frame_add_python_one_thread(self):
     pd.eval('df + df2 + df3 + df4', engine='python')

예제 #53

0

파일 보기

파일: eval.py 프로젝트: mwaskom/pandas

 def time_mult(self, engine, threads):
     pd.eval('self.df * self.df2 * self.df3 * self.df4', engine=engine)

예제 #54

0

파일 보기

파일: eval.py 프로젝트: scarrucciu/pandas

 def time_eval_frame_add_python_one_thread(self):
     pd.eval('df + df2 + df3 + df4', engine='python')

예제 #55

0

파일 보기

파일: test_compat.py 프로젝트: wuthmonehnin/pandas

 def testit():
     a, b = 1, 2
     res = pd.eval('a + b', engine=engine, parser=parser)
     tm.assert_equal(res, 3)

예제 #56

0

파일 보기

 def time_eval_frame_and_python(self):
     pd.eval('(df > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)',
             engine='python')

예제 #57

0

파일 보기

mask = (x>0.5) & (y<0.5)
tmp1 = (x>0.5)
tmp2 = (y<0.5)
mask = tmp1 & tmp2

import numexpr
mask_numexpr = numexpr.evaluate('(x>0.5) & (y<0.5)')
np.allclose(mask, mask_numexpr)

nrows, ncols = 10000, 100
rng = np.random.RandomState(42)
df1, df2, df3, df4 = (pd.DataFrame(rng.rand(nrows, ncols)) for i in range(4))
df1.head()

df1 + df2 + df3 + df4
pd.eval('df1 + df2 + df3 + df4')
df1, df2, df3, df4, df5 = (pd.DataFrame(rng.randint(0,1000, (100,3))) for i in range(5))
result1 = -df1*df2/(df3+df4) - df5
result2 = pd.eval('-df1*df2/(df3+df4) - df5')
np.allclose(result1, result2)
result1 = (df1 < 0.5) & (df2 < 0.5) | (df3 < df4)
result2 = pd.eval('(df1 < 0.5) & (df2 < 0.5) | (df3 < df4)')
np.allclose(result1, result2)
result3 = pd.eval('(df1 < 0.5) and (df2 < 0.5) or (df3 < df4)')
np.allclose(result1, result3)
result1 = df2.T[0] + df2.iloc[1]
result2 = pd.eval('df2.T[0] + df2.iloc[1]')
np.allclose(result1, result2)

df = pd.DataFrame(rng.rand(1000, 3), columns = ['A', 'B', 'C'])
df.head()

예제 #58

0

파일 보기

 def time_eval_frame_add_all_threads(self):
     pd.eval('df + df2 + df3 + df4')

예제 #59

0

파일 보기

파일: data_load.py 프로젝트: wgwangang/ICE-Talk

def load_data(hp, mode="train", audio_extension='.wav'):
    '''Loads data
      Args:
          mode: "train" / "validation" / "synthesis" / "demo".
    '''
    assert mode in ('train', 'synthesis', 'validation', 'demo')
    logging.info('Start loading data in mode: %s' % (mode))
    get_speaker_codes = (hp.multispeaker != []
                         )  ## False if hp.multispeaker is empty list

    dataset_df_path = os.path.join(hp.featuredir, 'dataset_' + mode + '.csv')

    # In demo mode, we change the "dataset" with only one line each time and do not want to use always the same df
    if False:  #os.path.exists(dataset_df_path) and mode != 'demo':
        dataset_df = pd.read_csv(dataset_df_path)

        dataset = {}
        #import pdb;pdb.set_trace()

        # this does not work in train mode because of  problem with doing pd.eval() with bytes
        try:
            dataset['texts'] = np.array(
                [pd.eval(e) for e in dataset_df['texts'].tolist()])
        except AttributeError:
            #that is why we do this
            dataset['texts'] = np.array(
                [ast.literal_eval(e) for e in dataset_df['texts'].tolist()])
            # I think this cause an error when trying training:
            # tensorflow.python.framework.errors_impl.InvalidArgumentError: Input to DecodeRaw has length 105 that is not a multiple of 4, the size of int32

        dataset['fpaths'] = dataset_df['fpaths'].tolist(
        )  ## at synthesis, fpaths only a way to get bases -- wav files probably do not exist
        dataset['text_lengths'] = dataset_df['text_lengths'].tolist(
        )  ## only used in training (where length information lost due to string format) - TODO: good motivation for this format?
        dataset['audio_lengths'] = dataset_df['audio_lengths'].tolist(
        )  ## might be []
        dataset['label_lengths'] = dataset_df['label_lengths'].tolist(
        )  ## might be []

        if get_speaker_codes:
            dataset['speakers'] = dataset_df['speakers'].tolist()
        if hp.use_external_durations:
            dataset['durations'] = dataset_df['durations'].tolist()

        return dataset
    else:
        if mode in ['synthesis', 'demo']:
            get_speaker_codes = False  ## never read speaker from transcript for synthesis -- take user-specified speaker instead

        # Load vocabulary
        char2idx, idx2char = load_vocab(hp)

        if mode in ["train", "validation"]:
            transcript = os.path.join(hp.transcript)
        elif mode == 'synthesis':
            transcript = os.path.join(hp.test_transcript)
        else:
            transcript = './demo/transcript.csv'

        if hp.multispeaker:
            speaker2ix = dict(zip(hp.speaker_list,
                                  range(len(hp.speaker_list))))

        fpaths, text_lengths, texts, speakers, durations = [], [], [], [], []
        audio_lengths, label_lengths = [], []
        lines = codecs.open(transcript, 'r', 'utf-8').readlines()

        too_long_count_frames = 0
        too_long_count_text = 0
        no_data_count = 0

        nframes = 0  ## default 'False' value
        for line in tqdm(lines, desc='load_data'):
            line = line.strip('\n\r |')
            if line == '':
                continue
            fields = line.strip().split("|")

            assert len(fields) >= 1, fields
            if len(fields) > 1:
                assert len(fields) >= 3, fields

            fname = fields[0]
            if len(fields) > 1:
                unnorm_text, norm_text = fields[1:3]
            else:
                norm_text = None  # to test if audio only

            if hp.validpatt:
                if mode == "train":
                    if hp.validpatt in fname:
                        continue
                elif mode == "validation":
                    if hp.validpatt not in fname:
                        continue

            if len(fields) >= 4:
                phones = fields[3]

            if norm_text is None:
                letters_or_phones = [
                ]  #  [0] ## dummy 'text' (1 character of padding) where we are using audio only
            elif hp.input_type == 'phones':
                if 'speaker_dependent_phones' in hp.multispeaker:
                    speaker_code = speaker
                else:
                    speaker_code = ''
                phones = phones_normalize(
                    phones, char2idx, speaker_code=speaker_code
                )  # in case of phones, all EOS markers are assumed included
                letters_or_phones = [char2idx[char] for char in phones]
            elif hp.input_type == 'letters':
                text = text_normalize(norm_text, hp) + "E"  # E: EOS
                letters_or_phones = [char2idx[char] for char in text]

            text_length = len(letters_or_phones)

            if text_length > hp.max_N:
                #print('number of letters/phones for %s is %s, exceeds max_N %s: skip it'%(fname, text_length, hp.max_N))
                too_long_count_text += 1
                continue

            if mode in ["train", "validation"] and os.path.exists(
                    hp.coarse_audio_dir):
                mel = "{}/{}".format(hp.coarse_audio_dir, fname + ".npy")
                if not os.path.exists(mel):
                    logging.debug('no file %s' % (mel))
                    no_data_count += 1
                    continue
                nframes = np.load(mel).shape[0]
                if nframes > hp.max_T:
                    #print('number of frames for %s is %s, exceeds max_T %s: skip it'%(fname, nframes, hp.max_T))
                    too_long_count_frames += 1
                    continue
                audio_lengths.append(nframes)

            texts.append(np.array(letters_or_phones, np.int32))

            fpath = os.path.join(hp.waveforms, fname + audio_extension)
            fpaths.append(fpath)
            text_lengths.append(text_length)

            ## get speaker before phones in case need to get speaker-dependent phones
            if get_speaker_codes:
                assert len(fields) >= 5, fields
                speaker = fields[4]
                speaker_ix = speaker2ix[speaker]
                speakers.append(np.array(speaker_ix, np.int32))

            if hp.merlin_label_dir:  ## only get shape here -- get the data later
                try:
                    label_length, label_dim = np.load("{}/{}".format(
                        hp.merlin_label_dir,
                        basename(fpath) + ".npy")).shape
                except TypeError:
                    label_length, label_dim = np.load("{}/{}".format(
                        hp.merlin_label_dir,
                        basename(fpath.decode('utf-8')) + ".npy")).shape
                label_lengths.append(label_length)
                assert label_dim == hp.merlin_lab_dim

            if hp.use_external_durations:
                assert len(fields) >= 6, fields
                duration_data = fields[5]
                duration_data = [
                    int(value)
                    for value in re.split('\s+', duration_data.strip(' '))
                ]
                duration_data = np.array(duration_data, np.int32)
                if hp.merlin_label_dir:
                    duration_data = duration_data[
                        duration_data >
                        0]  ## merlin label contains no skipped items
                    assert len(duration_data) == label_length, (
                        len(duration_data), label_length, fpath)
                else:
                    assert len(duration_data) == text_length, (
                        len(duration_data), text_length, fpath)
                if nframes:
                    assert duration_data.sum() == nframes * hp.r, (
                        duration_data.sum(), nframes * hp.r)
                durations.append(duration_data)

            # !TODO! check this -- duplicated!?
            # if hp.merlin_label_dir: ## only get shape here -- get the data later
            #     label_length, _ = np.load("{}/{}".format(hp.merlin_label_dir, basename(fpath)+".npy")).shape
            #     label_lengths.append(label_length)

        #import pdb;pdb.set_trace()

        if mode == "validation":
            if len(texts) == 0:
                logging.error(
                    'No validation sentences collected: maybe the validpatt %s matches no training data file names?'
                    % (hp.validpatt))
                sys.exit(1)

        logging.info('Loaded data for %s sentences' % (len(texts)))
        logging.info('Sentences skipped with missing features: %s' %
                     (no_data_count))
        logging.info('Sentences skipped with > max_T (%s) frames: %s' %
                     (hp.max_T, too_long_count_frames))
        logging.info(
            'Additional sentences skipped with > max_N (%s) letters/phones: %s'
            % (hp.max_N, too_long_count_text))

        if mode == 'train' and hp.n_utts > 0:
            n_utts = hp.n_utts
            assert n_utts <= len(fpaths)
            logging.info('Take first %s (n_utts) sentences for training' %
                         (n_utts))
            fpaths = fpaths[:n_utts]
            text_lengths = text_lengths[:n_utts]
            texts = texts[:n_utts]
            if get_speaker_codes:
                speakers = speakers[:n_utts]
            if audio_lengths:
                audio_lengths = audio_lengths[:n_utts]
            if label_lengths:
                label_lengths = label_lengths[:n_utts]

        if mode == 'train':
            ## Return string representation which will be parsed with tf's decode_raw:
            texts = [text.tostring() for text in texts]
            if get_speaker_codes:
                speakers = [speaker.tostring() for speaker in speakers]
            if hp.use_external_durations:
                durations = [d.tostring() for d in durations]

        if mode in ['validation', 'synthesis', 'demo']:
            ## Prepare a batch of 'stacked texts' (matrix with number of rows==synthesis batch size, and each row an array of integers)
            stacked_texts = np.zeros((len(texts), hp.max_N), np.int32)
            for i, text in enumerate(texts):
                stacked_texts[i, :len(text)] = text
            texts = stacked_texts

            if hp.use_external_durations:
                stacked_durations = np.zeros((len(texts), hp.max_T, hp.max_N),
                                             np.int32)
                for i, dur in enumerate(durations):
                    duration_matrix = durations_to_hard_attention_matrix(dur)
                    duration_matrix = end_pad_for_reduction_shape_sync(
                        duration_matrix, hp)
                    duration_matrix = duration_matrix[0::hp.r, :]
                    m, n = duration_matrix.shape
                    stacked_durations[i, :m, :n] = duration_matrix
                durations = stacked_durations

        dataset = {}
        dataset['texts'] = texts
        dataset[
            'fpaths'] = fpaths  ## at synthesis, fpaths only a way to get bases -- wav files probably do not exist
        dataset[
            'text_lengths'] = text_lengths  ## only used in training (where length information lost due to string format) - TODO: good motivation for this format?
        dataset['audio_lengths'] = audio_lengths  ## might be []
        dataset['label_lengths'] = label_lengths  ## might be []

        dataset_df = dataset.copy()

        try:
            dataset_df['texts'] = dataset_df['texts'].tolist()
        except:
            # It is already a list
            pass
        try:
            if len(dataset_df['audio_lengths']) == 0:
                dataset_df['audio_lengths'] = [0] * len(dataset_df['texts'])
            if len(dataset_df['label_lengths']) == 0:
                dataset_df['label_lengths'] = [0] * len(dataset_df['texts'])
            if not os.path.exists(hp.featuredir): os.makedirs(hp.featuredir)
            pd.DataFrame.to_csv(pd.DataFrame.from_records(dataset_df),
                                dataset_df_path)
        except:
            import pdb
            pdb.set_trace()

        if get_speaker_codes:
            dataset['speakers'] = speakers
        if hp.use_external_durations:
            dataset['durations'] = durations
        return dataset

예제 #60

0

파일 보기

파일: benchmark.py 프로젝트: tschm/tcapy

    def calculate_benchmark(self,
                            trade_order_df=None,
                            market_df=None,
                            trade_order_name=None,
                            mid=None,
                            bid=None,
                            ask=None,
                            bid_mid_bp=None,
                            ask_mid_bp=None,
                            overwrite_bid_ask=None):
        # if not (self._check_calculate_benchmark(trade_order_name=trade_order_name)): return trade_order_df

        if mid is None: mid = self._mid
        if bid is None: bid = self._bid
        if ask is None: ask = self._ask
        if bid_mid_bp is None: bid_mid_bp = self._bid_mid_bp
        if ask_mid_bp is None: ask_mid_bp = self._ask_mid_bp
        if overwrite_bid_ask is None:
            overwrite_bid_ask = self._overwrite_bid_ask

        bid_mid_bp = float(bid_mid_bp)
        ask_mid_bp = float(ask_mid_bp)

        # market_df_list = [market_df]

        if mid not in market_df.columns:
            market_df[mid] = (market_df[bid].values +
                              market_df[ask].values) / 2.0

        # Calculate the bid-mid and ask-mid spreads from market data
        if bid in market_df.columns and ask in market_df.columns and not (
                overwrite_bid_ask):
            # market_df[bid + '_' + mid + '_spread'] = (market_df[bid].values / market_df[mid].values) - 1.0
            # market_df[ask + '_' + mid + '_spread'] = (market_df[mid].values / market_df[ask].values) - 1.0
            market_df[bid + '_' + mid + '_spread'] = pd.eval(
                '(market_df.bid / market_df.mid) - 1.0')
            market_df[ask + '_' + mid + '_spread'] = pd.eval(
                '(market_df.mid / market_df.ask) - 1.0')

        # If we have been asked to overwrite bid/ask columns with an artificial proxy
        elif bid in market_df.columns and ask in market_df.columns and overwrite_bid_ask:
            # otherwise if we don't have sufficient bid/ask data (and only mid data), or if we want to forecibly overwrite it,
            # create a synthetic bid/ask and use the user specified spread
            market_df[bid + '_' + mid + '_spread'] = -bid_mid_bp / 10000.0
            market_df[ask + '_' + mid + '_spread'] = -ask_mid_bp / 10000.0
            # market_df[bid] = (market_df[mid].values) * (1.0 - bid_mid_bp / 10000.0)
            # market_df[ask] = (market_df[mid].values) / (1.0 - ask_mid_bp / 10000.0)
            # market_df[bid + '_' + mid + '_spread'] = pd.eval('-bid_mid_bp / 10000.0')
            # market_df[ask + '_' + mid + '_spread'] = pd.eval('-ask_mid_bp / 10000.0')
            market_df[bid] = pd.eval(
                '(market_df[mid]) * (1.0 - bid_mid_bp / 10000.0)')
            market_df[ask] = pd.eval(
                '(market_df[mid]) / (1.0 - ask_mid_bp / 10000.0)')

        # If we only have the mid column
        elif mid in market_df.columns and bid not in market_df.columns and ask not in market_df.columns:
            market_df[bid + '_' + mid + '_spread'] = -bid_mid_bp / 10000.0
            market_df[ask + '_' + mid + '_spread'] = -ask_mid_bp / 10000.0
            # market_df[bid] = (market_df[mid].values) * (1.0 - bid_mid_bp / 10000.0)
            # market_df[ask] = (market_df[mid].values) / (1.0 - ask_mid_bp / 10000.0)
            # market_df[bid + '_' + mid + '_spread'] = pd.eval('-bid_mid_bp / 10000.0')
            # market_df[ask + '_' + mid + '_spread'] = pd.eval('-ask_mid_bp / 10000.0')
            market_df[bid] = pd.eval(
                '(market_df.mid) * (1.0 - bid_mid_bp / 10000.0)')
            market_df[ask] = pd.eval(
                '(market_df.mid) / (1.0 - ask_mid_bp / 10000.0)')
        else:
            LoggerManager().getLogger(__name__).warn(
                "Couldn't calculate spread from mid, check market data has appropriate fields."
            )

        return trade_order_df, market_df