Beispiel #1
0
def measure(sequences, min_sup=0.5):
    time_result = []
    for i in range(10):
        bitmaps_for_words_ids = generate_words_bitmaps(sequences)
        spam_alg = SPAM(min_sup, bitmaps_for_words_ids)
        time_result.append(timeit.timeit(spam_alg.spam, number = 1))
    mean = sum(time_result)/len(time_result)
    return mean
Beispiel #2
0
def start(minsup, data_path):
    sequences = DataSequence.data_sequence_factory(customers=TEST['customers'],
                                                   texts=TEST['texts'],
                                                   path=data_path)
    bitmaps_for_words_ids = generate_words_bitmaps(sequences)
    spam_alg = SPAM(minsup, bitmaps_for_words_ids)
    frequent_patterns = spam_alg.spam()

    with open("frequent_patterns", 'w') as file:
        for pattern in translate_patterns(frequent_patterns):
            file.write(str(pattern) + '\n')
Beispiel #3
0
def measure_spam(number_of_items, number_of_sequences,
              number_of_customers, min_items_in_transaction,
              max_items_in_transaction, min_sup):
    time_result = []
    for i in range(10):
        #print("probka {}".format(i))
        sequences = generate_test_sequeneces(
            number_of_items, number_of_sequences, number_of_customers,
            min_items_in_transaction, max_items_in_transaction)
        bitmaps_for_words_ids = generate_words_bitmaps(sequences)
        spam_alg = SPAM(min_sup, bitmaps_for_words_ids)

        # start = time.time()
        # spam_alg.spam()
        # end = time.time()
        # result = end - start
        time_result.append(timeit.timeit(spam_alg.spam, number = 1))

    mean = sum(time_result)/len(time_result)
    return mean
Beispiel #4
0
from read_data import generate_test_sequeneces
from bitmap import generate_words_bitmaps
import timeit
import matplotlib.pyplot as plt


def translate_patterns(frequent_patterns):
    translated_patterns = []
    for pattern in frequent_patterns:
        translated_sequence = []
        for itemset in pattern[0]:
            translated_itemset = []
            for item in itemset:
                translated_itemset.append(DataSequence.get_words(item))
            translated_sequence.append(translated_itemset)
        translated_patterns.append(translated_sequence)
    return translated_patterns


if __name__ == "__main__":
    sequences = generate_simple_sequeneces()
    bitmaps_for_words_ids = generate_words_bitmaps(sequences)
    #print(bitmaps_for_words_ids)

    spam_alg = SPAM(0.5, bitmaps_for_words_ids)
    frequent_patterns = spam_alg.spam()

    with open("frequent_patterns", 'w') as file:
        for pattern in translate_patterns(frequent_patterns):
            file.write(str(pattern) + '\n')
Beispiel #5
0
def measure_memory(sequences, min_sup=0.5):
    bitmaps_for_words_ids = generate_words_bitmaps(sequences)
    spam_alg = SPAM(min_sup, bitmaps_for_words_ids)
    spam_alg.spam()