def assign_servers_test_output(df_train, df_test, percentile, confidence, apps_server): df_train['hour'] = None df_train['hour'] = pd.DatetimeIndex(df_train['Date']).hour data_l = list(df_train['pairs']) pairs_count = (df_train.groupby('pairs2').agg({ 'Date': 'count', 'norm_latency': 'mean', 'Duration': 'sum', 'Packets': 'sum' }).reset_index()) pairs_count.columns = [ 'pairs', 'frequency', 'avg_norm_latency', 'total_duration', 'total_packets' ] pairs_count['norm_latency'] = ( pairs_count['total_duration'] / pairs_count['total_packets'].sum() ) * 100 #sum of all duration time divided by sum of all packets transfered for that pair per_n = (pairs_count['frequency'].quantile(percentile)) patterns = pyfpgrowth.find_frequent_patterns(data_l, per_n) rules = pyfpgrowth.generate_association_rules(patterns, confidence) #format the rules, bring back in the other info on latency rank formated_rules = format_rules(rules, df_train, apps_server) #now we make the server assignments based on the training rules applied to the test data server_df, server_assignments, total_latency, total_latency_model, avg_latency, avg_latency_model = server_association( formated_rules, df_test, apps_server) #this function loaded fr #return(formated_rules) return (server_df, server_assignments, total_latency, total_latency_model, avg_latency, avg_latency_model)
def upload(): # path_parent = os.path.dirname(os.getcwd()) # os.chdir(path_parent) if request.method == "POST": if request.files: file = request.files['inputFile'] support = request.form['support'] confidence = request.form['confidence'] file.save(os.path.join(app.config["FILE_UPLOADS"], file.filename)) with open('./uploads/' + file.filename, newline='') as f: reader = csv.reader(f) data = list(reader) transactions = data patterns = pyfpgrowth.find_frequent_patterns( transactions, len(transactions) * int(support) / 100) rules = pyfpgrowth.generate_association_rules( patterns, int(confidence) / 100) # return str(rules) res = rules newFile = File(name=file.filename, sup=support, con=confidence, result=res) db.session.add(newFile) db.session.commit() return render_template("data.html", result=result)
def popupmsg(): rules = pyfpgrowth.generate_association_rules(s, 0.33) k = dict(sorted(rules.items(), key=operator.itemgetter(1), reverse=True)) ls = list(k.keys()) #print(ls) lgid = list1_goal.curselection() #print(lgid) n_gid = int(lgid[0]) gid = a[n_gid - 1] print("gid:" + str(gid)) print("s :" + str(s)) print("rules : " + str(rules)) sug = "goal id :" + str(gid[0]) + " suggestion: " + str( k[gid][0]) + "probabilty :" + str(k[gid][1]) '''for i in range(len(rules)): print(rules[gid][0][0]) if rules[i][0][0]==gid: print(rules[i][1][1])''' '''i=0 for key, value in k.items(): if gid == ls[i][0]: print(k[gid][0]) #print(value[i][0]) #elif gif in value[][0] i+=1 ''' popup = tk.Tk() popup.wm_title("suggestion") label = tk.Label(popup, text=sug) label.pack(side="top", fill="x", pady=10) B1 = tk.Button(popup, text="Okay", command=popup.destroy) B1.pack() popup.mainloop()
def generate_rules(dataframe, information=[], minsupport=25, minconfidence=0.55, satisfied_value=3): ''' :param information: the transaction we already have :param dataframe: it should includes userID, productID, rating :param minsupport: the itemset at least show up minsupport times :param minconfidence: the min possibility that the consumer will apply the rules :param satisified_value: consider the user like the product only he rating it over the value :return: rules: the association rules ''' raw_dict = {} # transform dataframe to transaction for row in dataframe.itertuples(): raw_dict.setdefault(row.reviewerID, {}) if float(row.rating) >= satisfied_value: raw_dict[row.reviewerID].update({row.productID: row.rating}) transaction = [] for user in raw_dict: transaction.append(list(raw_dict[user].keys())) if information: transaction.extend(information) # pp.pprint(transaction) # pp.pprint(len(transaction)) # generate the rules patterns = pyfpgrowth.find_frequent_patterns(transaction, minsupport) rules = pyfpgrowth.generate_association_rules(patterns, minconfidence) return rules
def categorize_queries(self, **data): """ Executes Apriori algorithm and returns a RelationRecord generator. Arguments: transactions -- A transaction iterable object (eg. [['A', 'B'], ['B', 'C']]). Keyword arguments: min_support -- The minimum support of relations (float). min_lift -- The minimum lift of relations (float). (>1 is likely) min_probability -- Finds patterns that are associated with another with a certain minimum probability: min_confidence -- The minimum confidence of relations (float). """ min_support = data.get('min_support', 10) min_lift = data.get('min_lift', 1) min_probability = data.get('min_probability', 0.5) min_confidence = data.get('min_confidence', 0) print("Converting to transactions.") transactions, match_queries = self.create_transactions( self.df, self.col) if self.alg.lower() == "apriori": print("Running Apriori") min_support = float(min_support / len(transactions)) results = list( apriori(transactions, min_support=min_support, min_confidence=min_confidence, min_lift=min_lift, max_length=None)) print("Making Categories") self.categories = [' '.join(list(l.items)) for l in results] elif self.alg.lower() == "fpgrowth": print("Running FPGrpwth") results = list( pg.generate_association_rules( pg.find_frequent_patterns(transactions, min_support), min_probability)) print("Making Categories") self.categories = [' '.join(l) for l in results] else: raise Exception( "{} is not one of the available algorithms (`apriori`, `fpgrowth`)" .format(self.alg)) print('Total Categories: {}'.format(len(set(self.categories)))) self.df['match_queries'] = match_queries self.df['category'] = self.df.match_queries.map( lambda x: self.match_labels(x, self.categories)) self.counts = pd.DataFrame(self.df.category.value_counts())
def find_rules(data, support_threshold, confidence_threshold): patterns = find_frequent_patterns(transactions=data, support_threshold=support_threshold) rules = generate_association_rules( patterns=patterns, confidence_threshold=confidence_threshold) return rules
def fpgrouth(id, dataset, principal): pasos = "Dataset Cargado" + '\n' dataset = pickdataset(int(id), dataset) patterns = pyfpgrowth.find_frequent_patterns(dataset, 3) rules = pyfpgrowth.generate_association_rules(patterns, 0.6) pasos += "Encuentros: " + '\n' pasos += str(patterns) + '\n' avgReal = 0 for i in rules.values(): it = i[1:2] x = str(it) x1 = x.split(',') x2 = str(x1[0]) x3 = x2.split('(') avgReal += float(x3[1]) avgReal = str((avgReal / len(rules.values())) * 100) + '% Confianza' reglas = str(rules) img = 'No aplica' if principal: context = { 'algoritmoPrincipal': 'FP-growth', 'resultado': avgReal, 'pasos': pasos, 'reglas': reglas, 'img': img } else: context = { 'algoritmoComparar': 'FP-growth', 'resultado2': avgReal, 'pasos2': pasos, 'reglas2': reglas, 'img2': img } return context
def run_rule_manually(self): for record in self: transactions = self.get_sale_data() if (record.rule_type == 'apriori'): results = self.format_rules( list( apriori(transactions, min_support=record.min_supp, min_confidence=record.min_conf))) self.update_rule(results, 'apriori') else: totalRow = len(transactions) results = self.format_rules_fp( pyfpgrowth.generate_association_rules( pyfpgrowth.find_frequent_patterns( transactions, totalRow * record.min_supp), record.min_conf)) self.update_rule(results, 'fpgrowth') self.update_on_web() return { 'type': 'ir.actions.act_window', 'name': 'View Rules', 'view_type': 'form', 'view_mode': 'tree,form', 'res_model': 'data.mining.show', 'target': 'current', }
def fpGrowth(transactions): patterns = pyfpgrowth.find_frequent_patterns(transactions, 1) rules = pyfpgrowth.generate_association_rules(patterns, 0.2) print(patterns) print(rules) salida = "\nFrecuencias" + "\n" + str( patterns) + "\n" + "Reglas" + "\n" + str(rules) return salida
def demo(): transactions = [[1, 2, 5], [2, 4], [2, 3], [1, 2, 4], [1, 3], [2, 3], [1, 3], [1, 2, 3, 5], [1, 2, 3]] # support = 2 # minconf = 0.7 patterns = pyfpgrowth.find_frequent_patterns(transactions, 2) rules = pyfpgrowth.generate_association_rules(patterns, 0.7) print(rules)
def fpgrowth(seq, **kwargs): import pyfpgrowth sup = int(kwargs.pop('sup')) conf = float(kwargs.pop('conf')) patterns = pyfpgrowth.find_frequent_patterns(seq, sup) rules = pyfpgrowth.generate_association_rules(patterns, conf) ret = [[key, value[0], value[1]] for key, value in rules.items()] return ret
def pattern_mine(trans, support, confidence): """ input: [[term1, term2, ...], [term1, term2, ...] ...] output: """ pattern = pyfpgrowth.find_frequent_patterns(trans, support) rule = pyfpgrowth.generate_association_rules(pattern, confidence) return pattern, rule
def AssociationRule(Voicing,support,confidence): #Voicing_patterns = {} #Voicing_rules = {} for v in Voicing.items(): patterns = pyfpgrowth.find_frequent_patterns(v[1], len(v[1])*support) Voicing_patterns[v[0]] = patterns rules = pyfpgrowth.generate_association_rules(patterns, confidence) Voicing_rules[v[0]]= rules return Voicing_patterns,Voicing_rules
def generate_rules(data: pd.DataFrame, support_threshold: int = 1, confidence_threshold: float = 0.3) -> tuple: patterns = fp.find_frequent_patterns(data['items'], support_threshold=support_threshold) rules = fp.generate_association_rules( patterns, confidence_threshold=confidence_threshold) return patterns, rules
def proses(id): my_data = File.query.get(id) with open('./uploads/' + my_data.name, newline='') as f: reader = csv.reader(f) data = list(reader) transactions = data patterns = pyfpgrowth.find_frequent_patterns( transactions, len(transactions) * my_data.sup / 100) rules = pyfpgrowth.generate_association_rules(patterns, 0.5) return str(rules)
def dev_association(data): a = 0 test_14 = 0 count = [0, 0, 0, 0, 0, 0] itemset = [[] for x in range(data.shape[1] * data.shape[2])] for i in range(data.shape[1]): for j in range(data.shape[2]): for z in range(data.shape[0]): if data[z, i, j] == 1: itemset[a].append(z) count[z] = count[z] + 1 if data[1, i, j] == data[4, i, j] == 1: test_14 += 1 a += 1 # count = [count[z]/data.shape[1]*data.shape[2] for z in range(len(count))] print(test_14) patterns = fpgrowth.find_frequent_patterns(itemset, 50) rules = fpgrowth.generate_association_rules(patterns, .2) association_rules = apriori(itemset, min_support=0.002, min_confidence=0.4, min_lift=2, min_length=2.5) association_results = list(association_rules) associ_rules = [] for item in association_results: # associ_rules = [] pair = [] for i in item[0]: pair.append(i) support = item[1] for rules in item[2]: tmp = [] for i in rules[0]: tmp.append(i) for i in rules[1]: tmp.append(i) associ_rules.append([tmp, rules[2]]) dev_rules_no_0 = [] dev_rules = np.array(associ_rules) print(dev_rules[0]) print(dev_rules[0, 0]) # print(dev_rules[0,0,0]) print(dev_rules[0, 0][0]) for item in dev_rules: if item[0][0] == 0: continue dev_rules_no_0.append([item[0][0:-1], [item[0][-1]], item[1]]) return patterns, rules, count, association_results, dev_rules_no_0
def get_scores(transactions, resume_words): num_resumes = len(transactions) suggestion_scores = collections.Counter() patterns = fpg.find_frequent_patterns(transactions, num_resumes / 1.5) rules = fpg.generate_association_rules(patterns, 0.5) for antecedent, consequent in rules.items(): if set(antecedent).issubset(resume_words) and antecedent in patterns: suggestion_scores[consequent[0]] += ((patterns[antecedent] * consequent[1])) suggestions = set.union(*[set(x) for x in suggestion_scores if suggestion_scores[x] >= 3]) return {x for x in suggestions if x not in resume_words}
def main(): df = pd.read_csv(r"MarketBasket/Market_Basket_Optimisation.csv", header=None) transcation_efficient = transcationGenerator(df, "efficient") transcation_non_efficient = transcationGenerator(df, "non-efficient") print('-' * 20, 'Apriori', '-' * 20) apriori_one(transcation_efficient, support=0.05, confidence=0.3) print('-' * 20, 'Apriori', '-' * 20) apriori_two(transcation_non_efficient, 0.05, "confidence", 0.3) print('-' * 20, 'FP-GROWTH', '-' * 20) patterns = fp.find_frequent_patterns(transcation_efficient, 20) rules = fp.generate_association_rules(patterns, 0.3) print('关联规则:', '\n', rules)
def botnet(): transactions = [] with open("../data/KnowledgeGraph/sample7.txt") as f: for line in f: line = line.strip('\n') ip, ua, target = line.split(',') print("Add (%s %s %s)" % (ip, ua, target)) transactions.append([ip, ua, target]) patterns = pyfpgrowth.find_frequent_patterns(transactions, 3) rules = pyfpgrowth.generate_association_rules(patterns, 0.9) print(rules)
def fit(self, transactions): patterns = pyfpgrowth.find_frequent_patterns(transactions, self.min_support) rules = pyfpgrowth.generate_association_rules(patterns, self.min_confidence) item_sim_sets = {} for key, value in rules.items(): rule_items = key + value[0] for item in rule_items: item_sim_sets.setdefault(item, set()) for i in range(len(rule_items)): if rule_items[i] == item: continue item_sim_sets[item].add(rule_items[i]) return item_sim_sets
def get_rules(transactions): print('generating patterns...') start_time = time.time() patterns = pyfpgrowth.find_frequent_patterns(transactions, 400) print( f'patterns generated (time taken : {round(time.time() - start_time, 4)} seconds)' ) print('generating rules...') start_time = time.time() rules = pyfpgrowth.generate_association_rules(patterns, 0.7) print( f'rules generated (time taken : {round(time.time() - start_time, 4)} seconds)' ) return rules
def fpgrowth(dataset): # 将数据存放到transactions中 transactions = [] for i in range(0, dataset.shape[0]): temp = [] for j in range(0, 20): if str(dataset.values[i, j]) != 'nan': temp.append(str(dataset.values[i, j])) transactions.append(temp) # print(transactions) # 挖掘频繁项集和频繁规则,频数为100以上 itemsets = pyfpgrowth.find_frequent_patterns(transactions, 100) rules = pyfpgrowth.generate_association_rules(itemsets, 0.4) print("频繁项集:", itemsets) print("关联规则:", rules)
def get_rules(): file = open("NewRulesForME.csv") reader = csv.reader(file, delimiter=',') transactions = [] for row in reader: transactions.append(row) patterns = pyfpgrowth.find_frequent_patterns(transactions, 3) rules = pyfpgrowth.generate_association_rules(patterns, 0.60) print(rules) for i in rules: print(i) '''
def rules(request): data = data_set() # data = [['chicken', 'eggs', 'oil'], # ['eggs', 'masala'], # ['eggs', 'ginger'], # ['chicken', 'eggs', 'masala'], # ['chicken', 'ginger'], # ['eggs', 'ginger'], # ['chicken', 'ginger'], # ['chicken', 'eggs', 'ginger', 'oil'], # ['chicken', 'eggs', 'ginger']] patterns = pyfpgrowth.find_frequent_patterns(data, 2) rules = pyfpgrowth.generate_association_rules(patterns, 1) print(rules) return render(request, 'rules.html', {'rules': rules})
def AssociationRule(ip_urls, support, minConf): ips = set([items[0] for items in ip_urls]) url_paths = set([items[1] for items in ip_urls]) ips_map = {ip: -i - 1 for i, ip in enumerate(ips)} urls_map = {ulrpath: i + 1 for i, ulrpath in enumerate(url_paths)} transactions = [[ips_map.get(items[0]), url_paths.get(items[1])] for items in ip_urls] patterns = pyfpgrowth.find_frequent_patterns(transactions, support) rules = pyfpgrowth.generate_association_rules(patterns, minConf) ip = [] url = [] for (i, ) in rules.keys(): if i < 0: ip.append(i) else: url.append(i)
def printFP_GrowthResult(transactions, support, confidence): patterns = pyfpgrowth.find_frequent_patterns(transactions, support * len(transactions)) rules = pyfpgrowth.generate_association_rules(patterns, confidence) print '\n\nFP Growth algorithm:' print 'min_support: ', support print 'min_confidence: ', confidence print '\nFrequent item set:( size:', len(patterns), ')' print '[', for key in patterns: print key, ',', print ']' print '\nRules:' for key in rules: print key, '->', rules[key][0], ', confidence:', rules[key][1]
def solve(self): vote_data = pd.read_csv('A.csv') transaction = vote_data.values patterns = pyfpgrowth.find_frequent_patterns(transaction, 150) print(patterns) rules = pyfpgrowth.generate_association_rules(patterns, 0.9) result_list = [] for key,value in rules.items(): i_list = [] i_list.append(list(key)) i_list.append(list(value[0])) result_list.append(i_list) print(result_list) string = '' for item in result_list: string += str(item) + '\n' with open('result.txt','w') as file: file.write(string) return result_list
def get_patterns_rules(txn_list, FPOF_flag=0, OutliernessDegree_flag=0, min_sup=np.nan, min_conf=np.nan): print("FPGrowth rule-mining in progress ......") # Run PFGRWOTH ALGO - Get the Frequent patterns minsup = min_sup minconf = min_conf print("Min Support: ", minsup, ", Min Confidence: ", minconf) leninput = len(modelinput) if FPOF_flag == 1: patterns = pyfpgrowth.find_frequent_patterns(txn_list, minsup*leninput) print("Number of patterns (Frequent Itemsets): ", len(patterns)) print("FPGrowth rule-mining completed") return patterns if OutliernessDegree_flag == 1: patterns = pyfpgrowth.find_frequent_patterns(txn_list, minsup*leninput) rules = pyfpgrowth.generate_association_rules(patterns, minconf) print("Number of patterns (Frequent Itemsets): ", len(patterns), ", Number of high-confidence rules: ", len(rules)) print("FPGrowth rule-mining completed") return rules
def generate_C2C_rules(data_list, support_ratio=0.00003, confidence_ratio=0.3): support = support_ratio * len(data_list) patterns = pyfpgrowth.find_frequent_patterns(data_list, support) rules = pyfpgrowth.generate_association_rules(patterns, confidence_ratio) rules_counter = Counter() new_rules_t = defaultdict(lambda: 0) new_rules = defaultdict(lambda: dict()) for pattern, r in rules.items(): # target_c = r[0][0] confidence = r[1] for triggerC in pattern: for targetC in r[0]: new_rules_t[(triggerC, targetC)] += confidence rules_counter.update([(triggerC, targetC)]) new_rules_t = {k: v / rules_counter[k] for k, v in new_rules_t.items()} for k, v in new_rules_t.items(): c1, c2 = k new_rules[c1].update({c2: v}) return new_rules
def on_input(data1, data2): soid_data = {} final = "" df1 = pandas.read_csv(io.StringIO(data1), low_memory=False) df2 = pandas.read_csv(io.StringIO(data2), low_memory=False) df1 = df1.merge(df2, on='PRODUCTID') df3 = df1[['SALESORDERID', 'PRODUCTNAME']] df3.to_csv('itemSet.csv', index=False) with open('itemSet.csv', mode='rU') as f: reader = csv.reader(f) for n, row in enumerate(reader): if not n: # Skip header row (n = 0). continue soid, pname = row if soid not in soid_data: soid_data[soid] = list() soid_data[soid].append((pname)) patterns = pyfpgrowth.find_frequent_patterns(soid_data.values(), 8) rules = pyfpgrowth.generate_association_rules(patterns, 0.8) for key, value in rules.iteritems(): if (len(key) <= 2 and len(value[0]) <= 1): final = final + str(key) + ":" + str(value[0]) + "\n" api.send("output", "Rules :" + final)
import pyfpgrowth # In[15]: patterns = pyfpgrowth.find_frequent_patterns(dataset, 1000) patterns # In[18]: rules5 = pyfpgrowth.generate_association_rules(patterns, 0.5) rules5 # In[19]: patterns500 = pyfpgrowth.find_frequent_patterns(dataset, 500) patterns500 # In[20]: rules500_5 = pyfpgrowth.generate_association_rules(patterns500, 0.5) rules500_5
import pyfpgrowth transactions=[] with open("../data/KnowledgeGraph/sample7.txt") as f: for line in f: line=line.strip('\n') ip,ua,target=line.split(',') print "Add (%s %s %s)" % (ip,ua,target) transactions.append([ip,ua,target]) patterns = pyfpgrowth.find_frequent_patterns(transactions, 3) rules = pyfpgrowth.generate_association_rules(patterns, 0.9) print rules