def get_random_transactions(transaction_number=500, max_item_per_transaction=100, max_key_length=50, key_alphabet=string.ascii_letters, universe_size=1000): '''Generates a random list of `transaction_number` transactions containing from 0 to `max_item_per_transaction` from a collection of `universe_size`. Each key has a maximum length of `max_key_length` and is computed from a sequence of characters specified by `key_alphabet` (default is ascii letters). If `key_alphabet` is None, range(universize_size) is used as the alphabet and `max_key_length` is ignored. ''' if key_alphabet is None: words = list(range(universe_size)) else: words = [] for _ in range(universe_size): word = ''.join((random.choice(key_alphabet) for x in range(random.randint(1, max_key_length)))) words.append(word) transactions = [] for _ in range(transaction_number): transaction = {word for word in random.sample(words, random.randint(0, max_item_per_transaction))} transactions.append(transaction) return transactions
def get_random_transactions(transaction_number=500, max_item_per_transaction=100, max_key_length=50, key_alphabet=string.ascii_letters, universe_size=1000): '''Generates a random list of `transaction_number` transactions containing from 0 to `max_item_per_transaction` from a collection of `universe_size`. Each key has a maximum length of `max_key_length` and is computed from a sequence of characters specified by `key_alphabet` (default is ascii letters). If `key_alphabet` is None, range(universize_size) is used as the alphabet and `max_key_length` is ignored. ''' if key_alphabet is None: words = list(range(universe_size)) else: words = [] for _ in range(universe_size): word = ''.join((random.choice(key_alphabet) for x in range(random.randint(1, max_key_length)))) words.append(word) transactions = [] for _ in range(transaction_number): transaction = { word for word in random.sample( words, random.randint(0, max_item_per_transaction)) } transactions.append(transaction) return transactions
def test_itemset_perf(perf_round=10, sparse=True, seed=None): '''Non-scientifically tests the performance of three algorithms by running `perf_round` rounds of FP-Growth, FP-Growth without pruning, Relim, and SAM. A random set of transactions is created (the same is obviously used for all algorithms). If `sparse` is False, the random transactions are more dense, i.e., some elements appear in almost all transactions. The `seed` parameter can be used to obtain the same sample across multiple calls. ''' random.seed(seed) if sparse: universe_size = 2000 transaction_number = 500 support = 10 else: universe_size = 110 transaction_number = 75 support = 25 transactions = get_random_transactions( transaction_number=transaction_number, universe_size=universe_size, key_alphabet=None) print('Random transactions generated with seed {0}\n'.format(seed)) start = time() for i in range(perf_round): (n, report) = test_fpgrowth(False, transactions, support, pruning=True) print('Done round {0}'.format(i)) end = time() print('FP-Growth (pruning on) took: {0}'.format(end - start)) print('Computed {0} frequent item sets.'.format(n)) start = time() for i in range(perf_round): (n, report) = test_fpgrowth(False, transactions, support, pruning=False) print('Done round {0}'.format(i)) end = time() print('FP-Growth (pruning off) took: {0}'.format(end - start)) print('Computed {0} frequent item sets.'.format(n)) start = time() for i in range(perf_round): (n, report) = test_relim(False, transactions, support) print('Done round {0}'.format(i)) end = time() print('Relim took: {0}'.format(end - start)) print('Computed {0} frequent item sets.'.format(n)) start = time() for i in range(perf_round): (n, report) = test_sam(False, transactions, support) print('Done round {0}'.format(i)) end = time() print('Sam took: {0}'.format(end - start)) print('Computed {0} frequent item sets.'.format(n))
def test_itemset_perf(perf_round=10, sparse=True, seed=None): '''Non-scientifically tests the performance of three algorithms by running `perf_round` rounds of FP-Growth, FP-Growth without pruning, Relim, and SAM. A random set of transactions is created (the same is obviously used for all algorithms). If `sparse` is False, the random transactions are more dense, i.e., some elements appear in almost all transactions. The `seed` parameter can be used to obtain the same sample across multiple calls. ''' random.seed(seed) if sparse: universe_size = 2000 transaction_number = 500 support = 10 else: universe_size = 110 transaction_number = 75 support = 25 transactions = get_random_transactions( transaction_number=transaction_number, universe_size=universe_size, key_alphabet=None) print('Random transactions generated with seed {0}\n'.format(seed)) start = time() for i in range(perf_round): (n, report) = test_fpgrowth(False, transactions, support, pruning=True) print('Done round {0}'.format(i)) end = time() print('FP-Growth (pruning on) took: {0}'.format(end - start)) print('Computed {0} frequent item sets.'.format(n)) start = time() for i in range(perf_round): (n, report) = test_fpgrowth(False, transactions, support, pruning=False) print('Done round {0}'.format(i)) end = time() print('FP-Growth (pruning off) took: {0}'.format(end - start)) print('Computed {0} frequent item sets.'.format(n)) start = time() for i in range(perf_round): (n, report) = test_relim(False, transactions, support) print('Done round {0}'.format(i)) end = time() print('Relim took: {0}'.format(end - start)) print('Computed {0} frequent item sets.'.format(n)) start = time() for i in range(perf_round): (n, report) = test_sam(False, transactions, support) print('Done round {0}'.format(i)) end = time() print('Sam took: {0}'.format(end - start)) print('Computed {0} frequent item sets.'.format(n))