예제 #1
0
def onehot2transactional(df, ):
    assert df.max().max() == 1, 'Not onehot encoded'
    te = TransactionEncoder()
    te.columns_ = list(df.columns)
    te.columns_mapping_ = {}
    for i, c in enumerate(te.columns_):
        te.columns_mapping_[i] = c

    res = te.inverse_transform(df.values)
    return res
예제 #2
0
#Topic: AR - transacation data
#-----------------------------
#libraries
from mlxtend.preprocessing import TransactionEncoder

dataset = [['Apple', 'Beer', 'Rice', 'Chicken'], ['Apple', 'Beer', 'Rice'],
           ['Apple', 'Beer'], ['Apple', 'Bananas'],
           ['Milk', 'Beer', 'Rice', 'Chicken'], ['Milk', 'Beer', 'Rice'],
           ['Milk', 'Beer'], ['Apple', 'Bananas']]

dataset
te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
te_ary
#The NumPy array is boolean for the sake of memory efficiency when working with large datasets. If a classic integer representation is desired instead, we can just convert the array to the appropriate type
te_ary.astype("int")

#After fitting, the unique column names that correspond to the data array shown above can be accessed via the columns_ attribute:
te.columns_

#if we desire, we can turn the one-hot encoded array back into a transaction list of lists via the inverse_transform function:

first4 = te_ary[:4]
te.inverse_transform(first4)
#[['Apple', 'Beer', 'Chicken', 'Rice'],['Apple', 'Beer', 'Rice'],['Apple', 'Beer'],['Apple', 'Bananas']]
def test_inverse_transform():
    oht = TransactionEncoder()
    oht.fit(dataset)
    np.testing.assert_array_equal(np.array(data_sorted),
                                  np.array(oht.inverse_transform(expect)))
예제 #4
0
pd.set_option('display.max_columns',None)
#-----
transactions = [['I1','I2','I5'],['I2','I4'],['I2','I3'] ,['I1','I2','I4'],['I1','I3'], ['I2','I3'],['I1','I3'], ['I1','I2','I3','I5'],['I1','I2','I3']]
transactions
#----
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
te_ary
te.columns_
df = pd.DataFrame(te_ary, columns=te.columns_)
df
#this matrix of transactions : T/ F indicate their presence in each Trans ID
df.shape
#get back orginal transactions
orgtrans1 = te_ary[:]
te.inverse_transform(orgtrans1)

#%%% #frequent itemsets - Most Imp Step
support_threshold = 0.01
frequent_itemsets = apriori(df, min_support= support_threshold, use_colnames = True)
frequent_itemsets
print(frequent_itemsets) #dataframe with the itemsets

#%%%%  - Support
support3 = association_rules(frequent_itemsets, metric="support", min_threshold = .3)
print(support3)
print(support3[['antecedents', 'consequents', 'support','confidence']])
#---
support2 = association_rules(frequent_itemsets, metric="support", min_threshold = .2)
print(support2[['antecedents', 'consequents', 'support','confidence']])
def test_inverse_transform():
    oht = TransactionEncoder()
    oht.fit(dataset)
    np.testing.assert_array_equal(np.array(data_sorted),
                                  np.array(oht.inverse_transform(expect)))