Example #1
0
def most_common_lineup_position(retro):
    TABLE_FLAG = False
    if isinstance(retro, _Table):
        TABLE_FLAG = True
        retro = retro.to_df()

    # Order of operations:
    # 1. Get PA counts
    # 2. Turn Lineup_Order into a column
    # 3. Rename column to PA
    # 4. Sort on PA in descending order
    lineup_pos = retro.groupby(['Batter_ID', 'Lineup_Order'])['Inning'].\
        count().\
        reset_index(level='Lineup_Order').\
        rename(columns={'Inning': 'PA'}).\
        sort_values('PA', ascending=False)

    # Duplicates indicate other positions.  By keeping first, we keep the most
    # common due to the sorting
    most_common = ~lineup_pos.index.duplicated(keep='first')
    lineup_pos = lineup_pos.loc[most_common, ['Lineup_Order']].sort_index()

    if TABLE_FLAG:
        return _Table.from_df(lineup_pos.reset_index())
    else:
        return lineup_pos
Example #2
0
def fast_run_expectancy(retro, re):
    TABLE_FLAG = False
    if isinstance(retro, _Table):
        TABLE_FLAG = True
        retro = retro.to_df()
        re = re.to_df()

    re = re.set_index(['Outs', 'Start_Bases'])

    # Build current out-runner states
    idx = list(zip(retro['Outs'], retro['Start_Bases']))
    # Extract run potentials
    retro['Run_Expectancy'] = re.loc[idx].values

    next_outs = retro['Outs'] + retro['Event_Outs']
    # Build next out-runner states
    idx = list(zip(next_outs, retro['End_Bases']))
    # Extract run potentials
    retro['Run_Expectancy_Next'] = re.loc[idx].values

    # When the inning ends, there are 3 outs.  That is not in the run
    # expectancy matrix so inning ending plate appearances will have an NA
    # value here.  We fill those with 0.
    retro['Run_Expectancy_Next'].fillna(0, inplace=True)

    return _Table.from_df(retro) if TABLE_FLAG else retro
def fill_null(table, fill_column=None, fill_value=None, fill_method=None):
    TABLE_FLAG = False
    if isinstance(table, _Table):
        TABLE_FLAG = True
        table = table.to_df()
    data = table[fill_column] if fill_column is not None else table
    data = data.fillna(value=fill_value, method=fill_method)
    return _Table.from_df(data) if TABLE_FLAG else data
def get_first_from_group(table, groupby):
    TABLE_FLAG = False
    if isinstance(table, _Table):
        TABLE_FLAG = True
        table = table.to_df()
    out = table.sort_values(groupby).\
        drop_duplicates(subset=groupby, keep='first')
    if TABLE_FLAG:
        return _Table.from_df(out)
    else:
        return out
Example #5
0
def merge(t1, t2, on, how='outer', fillna=True):
    DS_FLAG = False
    if isinstance(t1, _Table):
        t1 = t1.to_df()
        DS_FLAG = True
    if isinstance(t2, _Table):
        t2 = t2.to_df()
    full_t = _pd.merge(t1, t2, how=how, left_on=on, right_on=on)
    if fillna:
        full_t.fillna(0, inplace=True)
    if DS_FLAG:
        return _Table.from_df(full_t)
    else:
        return full_t
Example #6
0
def merge(t1, t2, on, how='outer', fillna=True):
    import pandas as pd
    from datascience import Table
    DS_FLAG = False
    if isinstance(t1, Table):
        t1 = t1.to_df()
        DS_FLAG = True
    if isinstance(t2, Table):
        t2 = t2.to_df()
    full_t = pd.merge(t1, t2, how=how, left_on=on, right_on=on)
    if fillna:
        full_t.fillna(0, inplace=True)
    if DS_FLAG:
        return Table.from_df(full_t)
    else:
        return full_t
def multi_sort(table, by, descending=True, na_position='first'):
    sorted_df = table.to_df().sort_values(by,
                                          ascending=not descending,
                                          na_position=na_position)
    return _Table.from_df(sorted_df)
def concat(table_list):
    df = _pd.concat([t.to_df() for t in table_list])
    return _Table.from_df(df)
def fill_null(table, value=None, method=None):
    df = table.to_df().fillna(value=value, method=method)
    return _Table.from_df(df)
#Hence is of little use for prediction of shelf life. hence droping those columns also from further analysis
print(
    "Also Residual Oxygen, Moisture (%) and Hexanal (ppm) are charecteristics of aged samples. Hence is of little use for prediction of shelf life. hence droping those columns also from further analysis"
)

ProductTable = ProductTable.drop('Moisture (%)')
ProductTable = ProductTable.drop('Residual Oxygen (%)')
ProductTable = ProductTable.drop('Hexanal (ppm)')
#Also drop colomn Study Number/Sample ID as this is not affecting shelf life in anyway
'''ProductTable=ProductTable.drop('Study Number')
ProductTable=ProductTable.drop('Sample ID')'''
#Drop Duplicate Entries iF there are any
pandasDF = ProductTable.to_df()
pandasDF.drop_duplicates(keep='first',
                         inplace=True)  #inplace=True modify original record
ProductTable = Table.from_df(pandasDF)
'''ProductTable=ProductTable.move_column('Difference From Fresh',0)
ProductTable=ProductTable.move_column('Sample Age (Weeks)',1)
ProductTable=ProductTable.move_column('Processing Agent Stability Index',2)
ProductTable=ProductTable.move_column('Process Type',3)'''

#--------------------------------------------------------------------------------------------------------------------
#ONE HOT ENCODING(Custom)
#--------------------------------------------------------------------------------------------------------------------
ProductTable.append_column('ProcessTypeC', False)
ProductTable.append_column('ProcessTypeB', False)
ProductTable.append_column('ProcessTypeA', False)
ProductTable['ProcessTypeA'] = ProductTable.apply(
    lambda x: True if x == 'A' else False, 'Process Type')
ProductTable['ProcessTypeC'] = ProductTable.apply(
    lambda x: True if x == 'C' else False, 'Process Type')
Example #11
0
File: b2.py Project: yifanwu/b2
 def from_df(self, df):
     # a pandas df
     table = Table.from_df(df)
     df_name = find_name()
     return self.create_with_table_wrap(table, df_name)