def onehot2transactional(df, ): assert df.max().max() == 1, 'Not onehot encoded' te = TransactionEncoder() te.columns_ = list(df.columns) te.columns_mapping_ = {} for i, c in enumerate(te.columns_): te.columns_mapping_[i] = c res = te.inverse_transform(df.values) return res
#Topic: AR - transacation data #----------------------------- #libraries from mlxtend.preprocessing import TransactionEncoder dataset = [['Apple', 'Beer', 'Rice', 'Chicken'], ['Apple', 'Beer', 'Rice'], ['Apple', 'Beer'], ['Apple', 'Bananas'], ['Milk', 'Beer', 'Rice', 'Chicken'], ['Milk', 'Beer', 'Rice'], ['Milk', 'Beer'], ['Apple', 'Bananas']] dataset te = TransactionEncoder() te_ary = te.fit(dataset).transform(dataset) te_ary #The NumPy array is boolean for the sake of memory efficiency when working with large datasets. If a classic integer representation is desired instead, we can just convert the array to the appropriate type te_ary.astype("int") #After fitting, the unique column names that correspond to the data array shown above can be accessed via the columns_ attribute: te.columns_ #if we desire, we can turn the one-hot encoded array back into a transaction list of lists via the inverse_transform function: first4 = te_ary[:4] te.inverse_transform(first4) #[['Apple', 'Beer', 'Chicken', 'Rice'],['Apple', 'Beer', 'Rice'],['Apple', 'Beer'],['Apple', 'Bananas']]
def test_inverse_transform(): oht = TransactionEncoder() oht.fit(dataset) np.testing.assert_array_equal(np.array(data_sorted), np.array(oht.inverse_transform(expect)))
pd.set_option('display.max_columns',None) #----- transactions = [['I1','I2','I5'],['I2','I4'],['I2','I3'] ,['I1','I2','I4'],['I1','I3'], ['I2','I3'],['I1','I3'], ['I1','I2','I3','I5'],['I1','I2','I3']] transactions #---- te = TransactionEncoder() te_ary = te.fit(transactions).transform(transactions) te_ary te.columns_ df = pd.DataFrame(te_ary, columns=te.columns_) df #this matrix of transactions : T/ F indicate their presence in each Trans ID df.shape #get back orginal transactions orgtrans1 = te_ary[:] te.inverse_transform(orgtrans1) #%%% #frequent itemsets - Most Imp Step support_threshold = 0.01 frequent_itemsets = apriori(df, min_support= support_threshold, use_colnames = True) frequent_itemsets print(frequent_itemsets) #dataframe with the itemsets #%%%% - Support support3 = association_rules(frequent_itemsets, metric="support", min_threshold = .3) print(support3) print(support3[['antecedents', 'consequents', 'support','confidence']]) #--- support2 = association_rules(frequent_itemsets, metric="support", min_threshold = .2) print(support2[['antecedents', 'consequents', 'support','confidence']])