def experiment(transactions, epsilon, delta, mu, supp, rep): results = [] for r in xrange(rep): ts = time.time() # Compute the bounds t_bound = toivonen_bound(epsilon, delta) d_bound = dbound_bound(len(transactions), epsilon, delta, float(d_index), 0.5) print( '[+] Performing experiment {}/{}. (epsilon: {}, delta: {}, mu: {}, support: {})' .format(r + 1, rep, epsilon, delta, mu, supp)) # Draw a sample --- Toivonen's bound sample = sample_n(transactions, t_bound) threshold = toivonen_threshold(len(sample), mu) print('[|] Toivonen\'s bound sample size: {:,} ({})'.format( len(sample), threshold)) t_fi = fim(sample, supp=supp, zmin=2) t_fi_size = len(t_fi) print('[|] Frequent itemsets found {}'.format(t_fi_size)) # Draw a sample --- d-bound sample = sample_n(transactions, d_bound) print('[|] d-bound sample size: {:,}'.format(len(sample))) d_fi = fim(sample, supp=supp, zmin=2) d_fi_size = len(d_fi) print('[|] Frequent itemsets found {}'.format(d_fi_size)) print('[+] Comparing the frequent itemsets') fi_common = compare_fis(t_fi, d_fi) fi_common_size = len(fi_common) percent = numpy.mean([ percentage(t_fi_size, fi_common_size), percentage(d_fi_size, fi_common_size) ]) print('[|] {} frequent itemsets in common ({:.2f}%)'.format( fi_common_size, percent)) # Finish the experiment te = time.time() milliseconds = (te - ts) * 1000.0 print('[+] Experiment duration: {:.4f} ms'.format(milliseconds)) print('') results.append([ epsilon, delta, mu, t_bound, t_fi_size, threshold, d_bound, d_fi_size, fi_common_size, percent, milliseconds ]) return numpy.mean(results, axis=0).tolist()
def _get_frequent_candidates(transactions, min_support, max_words): """ Runs an frequent itemset mining algorithms on the transactions Input: transactions: list of tuples representing items on each transaction min_support: minimum support of an item set (positive: percentage, negative: absolute number) max_words: maximum number of items in each frequent set Output: list of extracted frequent itemset in the form (itemsets, support) """ return fim.fim(transactions, supp=min_support, zmax=max_words)
# print(test_pids_cat2[0]) # print(test_known_tracks[test_pids_cat2[0]]) # print([x[1] for x in test_known_tracks[test_pids_cat2[0]]]) # # print("start") sequences = urm_to_sequences(urm_pos=dr.get_position_matrix(position_type='last'), target_list=[x[1] for x in test_known_tracks[test_pids_cat2[0]]], min_common=1) # for s in sequences: print(s) # for s in sequences[0:2]: # print("seuences:", s) # print("maximal") seq = fim(sequences[0:2], target='maximal', supp=-2, zmin=2, report='a') # for s in seq: # print("max>", s) # print("normale") # seq = fim(sequences[0:10], supp=-2, zmin=2, report='a') # print("norm", seq) # print("prefixspan") sequences_for_prefix = urm_to_sequences(urm_pos=dr.get_position_matrix(position_type='last'), target_list=[x[1] for x in test_known_tracks[test_pids_cat2[0]]], min_common=1, list_of_list_of_listss=True) model = PrefixSpan.train(sequences_for_prefix, minSupport=0.1, maxPatternLength=250) result = model.freqSequences().collect()
# --- save generated pattern spectrum --- if cnt > 0 and pspfn != "": # if file name for pattern spectrum t = time() # start timer, print log message stderr.write('writing %s ... ' % pspfn) with open(pspfn, 'w') as out: # write pattern spectrum to a file for s in sorted([(z,c,psp[z,c]) for z,c in psp]): out.write(('%d'+pssep+'%d'+pssep+'%.16g\n') % s) stderr.write('[%d signature(s)]' % len(psp)) stderr.write(' done [%.2fs].\n' % (time()-t)) # --- analyze original data set --- if len(args) < 2: exit() # check for an output file name t = time() # start timer, print log message stderr.write('analyzing original data ... ') pats = fim(tracts, target, supp, zmin, zmax, 'a', border=border) stderr.write('[%d pattern(s)]' % len(pats)) stderr.write(' done [%.2fs].\n' % (time()-t)) # --- pattern set reduction --- if pred != 'x': # if to filter with pattern spectrum t = time() # start timer, print log message stderr.write('reducing pattern set ... ') pats = patred(pats, pred, border, False) stderr.write('[%d pattern(s)]' % len(pats)) stderr.write(' done [%.2fs].\n' % (time()-t)) # --- write output file --- t = time() # start timer, print log message stderr.write('writing %s ... ' % args[1]) with open(args[1], 'w') as out:
tid = int(argv[1]) if tid < -2: print(fpgrowth.__doc__) elif tid < -1: print(eclat.__doc__) elif tid < 0: print(apriori.__doc__) else: tracts = [[1, 2, 3], [1, 4, 5], [2, 3, 4], [1, 2, 3, 4], [2, 3], [1, 2, 4], [4, 5], [1, 2, 3, 4], [3, 4, 5], [1, 2, 3]] print('transactions:') for t in tracts: print(t) if tid < 1: print('apriori(tracts, supp=-3, zmin=2):') for r in apriori(tracts, supp=-3, zmin=2): print r elif tid < 2: print('eclat(tracts, supp=-3, zmin=2):') for r in eclat(tracts, supp=-3, zmin=2): print r elif tid < 3: print('fpgrowth(tracts, supp=-3, zmin=2):') for r in fpgrowth(tracts, supp=-3, zmin=2): print r else: print('fim(tracts, supp=-3, zmin=2, report=\'#\'):') for r in fim(tracts, supp=-3, zmin=2, report='#'): print r
print(fpgrowth.__doc__) elif tid < -1: print(eclat.__doc__) elif tid < 0: print(apriori.__doc__) else: tracts = [ [ 1, 2, 3 ], [ 1, 4, 5 ], [ 2, 3, 4 ], [ 1, 2, 3, 4 ], [ 2, 3 ], [ 1, 2, 4 ], [ 4, 5 ], [ 1, 2, 3, 4 ], [ 3, 4, 5 ], [ 1, 2, 3 ] ] print('transactions:') for t in tracts: print(t) if tid < 1: print ('apriori(tracts, supp=-3, zmin=2):') for r in apriori(tracts, supp=-3, zmin=2): print r elif tid < 2: print ('eclat(tracts, supp=-3, zmin=2):') for r in eclat(tracts, supp=-3, zmin=2): print r elif tid < 3: print ('fpgrowth(tracts, supp=-3, zmin=2):') for r in fpgrowth(tracts, supp=-3, zmin=2): print r else: print ('fim(tracts, supp=-3, zmin=2, report=\'#\'):') for r in fim(tracts, supp=-3, zmin=2, report='#'): print r
#human initial state I0 = data_case[0] / abs(opt_repH) E0 = 2 * data_case[0] / abs(opt_repH) S0 = 1.0 - I0 - E0 #mosquito initial state A0 = 1.0 S0_m = 1.0 / abs(opt_mu_m) E0_m = 0.0 I0_m = 0.0 ini = [S0, E0, I0, A0, S0_m, E0_m, I0_m] time_step = np.array(range(0, 7 * (53 - sim_start), 7)) #simulated case data dengue = dengue_model() opt_res = dengue.ode_run(dengue.model, ini, time_step, param) res_inc_t1 = np.append(np.array([0]), spi.cumtrapz(abs(opt_repH) * alpha * opt_res[:, 1])) res_inc_t0 = np.append(np.array([0]), spi.cumtrapz(abs(opt_repH) * alpha * opt_res[0:-1, 1])) data_mw = 7 * (res_inc_t1 + 6.0 / 7.0) - 7 * np.append(np.array( [0]), (res_inc_t0 + 6.0 / 7.0)) #print 'data_mw', data_mw #fisher information matrix fim_dengue = fim(dengue.model, time_step) fim = fim_dengue.Fim_x(param, data_mw) #default pert = 0.01 fim_r = fim_dengue.Fim_rank(fim) #set the default tolerance to match matlab print fim print 'rank: ', fim_r
#----------------------------------------------------------------------- if __name__ == '__main__': runs = int(argv[1]) if len(argv) > 1 else 1 tracts = [[i+1 for i in range(100) if random() < 0.1] for k in range(1000)] with open('data.txt', 'w') as out: for t in tracts: for i in t: out.write('%d ' % i) out.write('\n') stderr.write('frequent item sets:\n') stderr.write('fim ... '); t = time() for r in range(runs): pypats = fim(tracts, supp=-2, zmin=2, report='a') stderr.write('done [%.3fs].\n' % (time()-t)) ref = set([(tuple(sorted(list(s))), x) for s,x in pypats]) stderr.write('\n') for p,f in [('apriori', apriori), ('eclat', eclat), ('fpgrowth', fpgrowth), ('sam', sam), ('relim', relim)]: stderr.write(p +' (all)\n') stderr.write('python ... '); t = time() for r in range(runs): pypats = f(tracts, supp=-2, zmin=2, report='a') stderr.write('done [%.3fs].\n' % (time()-t))