def pronostico(self): periodos = dict( lluvia=0, sequia=0, optimo=0, normal=0, pico_lluvia=self._pico_lluvia['dia'] if self._pico_lluvia else 0) for estado, group_estados in itertools.groupby( sorted(self._registro, key=lambda e: e['estado']), lambda e: e['estado']): for _, group_periodos in itertools.groupby( enumerate(sorted(group_estados, key=lambda e: e['dia'])), lambda x: x[0] - x[1]['dia']): if estado == Clima.CLIMAS['NORMAL']: periodos['normal'] += len( (list(map(itemgetter(1), group_periodos)))) elif estado == Clima.CLIMAS['LLUVIA']: periodos['lluvia'] += len( (list(map(itemgetter(1), group_periodos)))) elif estado == Clima.CLIMAS['SEQUIA']: periodos['sequia'] += len( (list(map(itemgetter(1), group_periodos)))) elif estado == Clima.CLIMAS['OPTIMO']: periodos['optimo'] += len( (list(map(itemgetter(1), group_periodos)))) return periodos
def most_common(d:dict, topn=10) -> list: ''' List the n most common elements and their counts from the most common to the least. If n is None, then list all element counts ''' if topn is None or topn == 0: return sorted(d.items(), key=itemgetter(1), reverse=True) return heapq.nlargest(topn, d.items(), key=itemgetter(1))
def recommondation(user_id, user_dict, K): rank = defaultdict(int) W = itemCF(user_dict) for i, score in user_dict[user_id]: for j, wj in sorted(W[i].items(), key=itemgetter(1), reverse=True)[0:K]: if j in user_dict[user_id]: continue rank[j] += score * wj l = sorted(rank.items(), key=itemgetter(1), reverse=True)[0:20] return l
def recommend(user_id, user_dict, K): rank = defaultdict(int) l = list() W = measureSimilarity(user_dict) for i, score in user_dict[user_id]: #i为特定用户的电影id,score为其相应评分 for j, wj in sorted(W[i].items(), key=itemgetter(1), reverse=True)[0:K]: #sorted()的返回值为list,List的元素为元组 if j in user_dict[user_id]: continue rank[ j] += score * wj #先找出用户评论过的电影集合,对每一部电影id,假设其中一部电影id1,找出与该电影最相似的K部电影,计算出在id1下用户对每部电影的兴趣度,接着迭代整个用户评论过的电影集合,求加权和,再排序,可推荐出前n部电影 l = sorted(rank.items(), key=itemgetter(1), reverse=True)[0:20] return l
def recommend2(user_id, user_dict, K, topN): rank = defaultdict(int) W = measureSimilarity(user_dict) for i, score in user_dict[user_id]: for j, wj in sorted(W[i].items(), key=itemgetter(1), reverse=True)[0:K]: if j in user_dict[user_id]: continue rank[j] += score * wj l = sorted(rank.items(), key=itemgetter(1), reverse=True)[0:topN] print('user_id ' + str(user_id) + ' : ') print(l) for item in l: oldFile.write(str(user_id) + ' | ' + str(item[0])) oldFile.write("\n")
def Recommendation(train,user_id,W,K=10): rank=dict() ru=train[user_id] for i in ru: for j,wj in sorted(W[i].items(),key=itemgetter(1),reverse=True)[0:K]: if j in ru: continue if j not in rank: rank[j]=0 rank[j]+=wj return rank.items() end=10 if len(rank) < end: end = len(rank) return sorted(rank.items(), key=itemgetter(1), reverse=True)[0:end]
def trainInMemory(): print("TEST IN MEMORY") test_set = [({word: (word in word_tokenize(x[0])) for word in _ALL_WORDS}, x[1]) for x in __FILE_TRAIN] training_set = apply_features(extractFeature, test_set) classifier = NaiveBayesClassifier.train(training_set) featurized_test_sentence = { word.lower(): (word in word_tokenize(_TEXT.lower(), language=_LANGUAGE)) for word in _ALL_WORDS } print("Classification: %s \nAccuracy: %.4f \n" % (classifier.classify(featurized_test_sentence), nltk.classify.accuracy(classifier, test_set))) dictProbs = classifier.prob_classify(featurized_test_sentence) #probLabels = getProbabilitiesAllLabels(dictProbs) ARRUMAR probLabels = [] for label in dictProbs.samples(): probLabels.append((label, dictProbs.prob(label))) probLabels.sort(key=itemgetter(0)) #(key=lambda tup: tup[0]) print(probLabels) for content in probLabels: print("%.5f\t- %s" % (content[1], content[0]))
def most_recent_sessions(self, sessions, number): ''' Find the most recent sessions in the given set Parameters -------- sessions: set of session ids Returns -------- out : set ''' sample = set() tuples = list() for session in sessions: time = self.session_time.get(session) if time is None: print(' EMPTY TIMESTAMP!! ', session) tuples.append((session, time)) tuples = sorted(tuples, key=itemgetter(1), reverse=True) #print 'sorted list ', sortedList cnt = 0 for element in tuples: cnt = cnt + 1 if cnt > number: break sample.add(element[0]) #print 'returning sample of size ', len(sample) return sample
def idf_analysis(articles, content): dict_map = wordgram_map(articles) dict_idf = OrderedDict() dict_file = wordgram_analyze(content) for dict_elem in dict_file.keys(): dict_idf[dict_elem] = idf_value(len(articles). dict_map, dict_elem) return OrderedDict(sorted(dict_idf.items(),key=_operator.itemgetter(1),reverse=True))
def condition_minable(contents): dirpath = "../learning/data/preferences_doc/" # It seems like using DF not IDF?! title = text_handler.extract_html_title(contents) text = filter.filter_html_contents(contents) pref_dict = preference_analyzer.preference_from_text(text, dirpath, filter_files='../data/filters/stop-words_english_en.txt', text_title=title) import _operator from collections import OrderedDict sorted_pref_dict = OrderedDict(sorted(pref_dict.items(), key=_operator.itemgetter(1), reverse=True)) print(sorted_pref_dict) # feature_list = [] # iter_cnt = 0 # for pref_key, pref_val in sorted_pref_dict.items(): # if iter_cnt >= 10: # break # # feature_list.append(math_util.sigmoid(hash(pref_key) % 2**32)) # feature_list.append(hash(pref_key)) # feature_list.append(pref_val) # iter_cnt += 1 # # from learning.svm import generate_dataset # generate_dataset.generate_classify_data(feature_list, 10) return True
def search(prefix, usage, match): "Returns refnames starting with or containing prefix (case-insensitive)" if match == "startswith": cypher = cypher_search_refname_v1 % {'search_type': "STARTS WITH"} else: cypher = cypher_search_refname_v1 % {'search_type': "CONTAINS"} result = shareds.driver.session().run(cypher, prefix=prefix, usage=usage) records = [] for rec in result: name = rec['name'] source = rec['source'] basename = rec['basename'] num_neighbors = rec['num_neighbors'] records.append( dict(name=name, source=source, basename=basename, is_basename=(basename is None), num_neighbors=num_neighbors)) # records.append(surroundedBy=sorted(places1,key=lambda x:x['name'])) return { "status": "OK", "statusText": "OK", "resultCount": len(records), "records": sorted(records, key=itemgetter("name")), }
def cn_predict(G): start_cn = datetime.now() # print('Common neighbor prediction starting...') out = open('./predictions/common_neighbor.csv', 'w') outN = open('./predictions/common_neighbor_with_name.csv', 'w') hop2s = dict() neighbors = dict() cn_sim = defaultdict(dict) sortDic = {} left_set = [n for n, d in G.nodes(data=True) if d['bipartite'] == 0] right_set = [n for n, d in G.nodes(data=True) if d['bipartite'] == 1] dictionary = {} out.write('(left_element, right_element)') out.write(",") out.write('Probability') out.write("\n") # outN.write('(left_element, right_element)') # outN.write(",") # outN.write('Probability') # outN.write("\n") for left_element in left_set: # print('snp {} -- '.format(len(G[left_element]))) hop2s[left_element] = getAdj2(G, list(set(G[left_element])), 1) # print('snp hop 2 {} -- '.format(len(hop2s[left_element]))) for right_element in right_set: # print('cancer {} -- '.format(len(G[right_element]))) neighbors[right_element] = list(set(G[right_element])) if not (left_element, right_element) in G.edges: cn_sim[left_element][right_element] = common_neighbors( hop2s[left_element], neighbors[right_element]) # if (left_element, right_element) in edge_subset: # print((left_element, right_element), cn_sim[left_element][right_element]) if cn_sim[left_element][right_element] > 0: dictionary.update({ (left_element, right_element): cn_sim[left_element][right_element] }) for k, v in sorted(dictionary.items(), key=itemgetter(1), reverse=True): # print(k[0],v) out.write(str((k[0], k[1]))) out.write(",") out.write(str(cn_sim[k[0]][k[1]])) out.write("\n") # # outN.write(str((df_nodes[k[0]], df_nodes[k[1]]))) # outN.write(",") # outN.write(str(cn_sim[k[0]][k[1]])) # outN.write("\n") # print('Common neghbor prediction finished sucnessfully') end_cn = datetime.now() # print('Common neghbor duration: {}'.format(end_cn - start_cn), "\n") return dictionary
def recommond(resultPath, user_count, user_dict, K, topN): W = measureSimilarity(user_dict) f = open(resultPath, "w") user_id = 1 while user_id <= user_count: rank = defaultdict(int) #the most important word and easy to write in the wrong site for i, score in user_dict[user_id]: for j, wj in sorted(W[i].items(), key = itemgetter(1), reverse=True)[0:K]: if j in user_dict[user_id]: continue rank[j] += score * wj l = sorted(rank.items(), key = itemgetter(1), reverse = True)[0:topN] for item in l: f.write(str(user_id) + ' | ' + str(item[0])) f.write("\n") user_id += 1
def build_page_lists(): all_reviews = [] one_star_review_list = [] with open(modified_reviews_json_path) as file: review_data = json.loads(file.read()) for item in review_data: if review_data[item]["unixReviewTime"] is None: time = 9999999999 else: time = review_data[item]["unixReviewTime"] view_item = { "by": review_data[item]["reviewerName"], "descendants": 0, "id": item, "score": review_data[item]["score"], "time": time, "title": review_data[item]["reviewText"], "type": review_data[item]["rating"], "url": "http://www.google.com" } if review_data[item]["rating"] == 1.0: one_star_review_list.append(view_item) all_reviews.append(view_item) list_of_review_item_ids = [] for review_item in sorted(one_star_review_list, key=itemgetter("score"), reverse=True): list_of_review_item_ids.append(review_item["id"]) return list_of_review_item_ids, all_reviews
def average_grade_at_all_disciplines(self): sd = self.__student_discipline_repo.get_all( ) #sd - lista studentilor cu note asignate la diferite discipline studentsId = [] studentsIdDisc = [] for i in sd: idC = i.get_student_id() #id student curent p = 0 for j in studentsId: if j == idC: #daca am gasit in lista rez acelasi id p = 1 if p == 0: studentsId.append(idC) for i in studentsId: suma = 0 nr = 0 for j in sd: if i == j.get_student_id(): suma = suma + j.get_grade() nr = nr + 1 studentsIdDisc.append((i, suma / nr)) sorted(studentsIdDisc, key=itemgetter(1), reverse=True) return studentsIdDisc
def most_recent_sessions( self, sessions, number ): ''' Desc: Find the most recent sessions in the courpus Input -------- sessions: set of session ids number_of_sessions: number of session that we want to filter Output -------- set of sessions ''' sample = set() tuples = list() for session in sessions: time = self.session_time.get( session ) if time is None: print(' EMPTY TIMESTAMP!! ', session) tuples.append((session, time)) tuples = sorted(tuples, key=itemgetter(1), reverse=True) cnt = 0 for element in tuples: cnt = cnt + 1 if cnt > number: break sample.add( element[0] ) return sample
def filterByType(self, type): all = self.__repo.getAll() rez = [] for e in all: if e.get_type() == type: rez.append([e.get_id(), e.get_type(), e.get_price()]) rez = sorted(rez, key = itemgetter(2)) return rez
def Recommend(user, train, W, K=5): rank = dict() interact_items = train[user] for v, wuv in sorted(W[user].items(), key=itemgetter(1), reverse=True)[0:K]: for item in train[v]: if item in interact_items: continue if item not in rank: rank[item] = 0 rank[item] += wuv * 1 # return rank end = 10 # return rank.items() if len(rank) < end: end = len(rank) return sorted(rank.items(), key=itemgetter(1), reverse=True)[0:end]
def filterByType(self, type): all = self.__repo.getAll() rez = [] for e in all: if e.get_type() == type: rez.append([e.get_id(), e.get_type(), e.get_price()]) rez = sorted(rez, key=itemgetter(2)) return rez
def filterGrade(self, grade): all = self.get_all() rez = [] for g in all: if int(g.get_grade()) > grade: rez.append([g.get_stud().get_name(), str(g.get_grade()).strip()]) rez = sorted(rez, key = itemgetter(1)) return rez
def getplace(id): print('id:',id) result = shareds.driver.session().run(cypher_getplace,id=id).single() print('result:',result) if not result: return dict(status="Error",resultCount=0) p = result.get('p') largerPlaces = result['largerPlaces'] smallerPlaces = result['smallerPlaces'] places1 = [] for h1,largerPlace,id2 in largerPlaces: if largerPlace is None: break name2 = largerPlace['pname'] type2 = largerPlace['type'] place = dict(name=name2,type=type2,id=id2) datetype = h1['datetype'] if datetype: date1 = h1['date1'] date2 = h1['date2'] d = DateRange(datetype, date1, date2) timespan = d.__str__() date1 = DateRange.DateInt(h1['date1']).long_date() date2 = str(DateRange.DateInt(h1['date2'])) place['datetype'] = datetype place['date1'] = date1 place['date2'] = date2 place['timespan'] = timespan places1.append(place) places2 = [] for h2,smallerPlace,id2 in smallerPlaces: if smallerPlace is None: break name2 = smallerPlace['pname'] type2 = smallerPlace['type'] place = dict(name=name2,type=type2,id=id2) datetype = h2['datetype'] if datetype: date1 = h2['date1'] date2 = h2['date2'] d = DateRange(datetype, date1, date2) timespan = d.__str__() date1 = str(DateRange.DateInt(h2['date1'])) date2 = str(DateRange.DateInt(h2['date2'])) place['datetype'] = datetype place['date1'] = date1 place['date2'] = date2 place['timespan'] = timespan places2.append(place) #names = [dict(name=pn['name'],lang=pn['lang']) for pn in result['names']] place = PlaceBl.from_node(p) place.names = [PlaceName.from_node(pn) for pn in result['names']] print(smallerPlaces) if smallerPlaces == [[None,None,None]]: smallerPlaces = [] place.surrounds = [PlaceName.from_node(p2) for (h2,p2,id2) in smallerPlaces] place.surrounds=sorted(places2,key=itemgetter('name')) return {"status":"OK", "statusText":"OK", "resultCount": 1, "place": place, }
def aa_predict(G): start_aa = datetime.now() # print('Adamic_adar prediction starting...') out = open('./predictions/adamic_adar.csv', 'w') outN = open('./predictions/adamic_adar_with_name.csv', 'w') hop2s = dict() neighbors = dict() aa_sim = defaultdict(dict) sortDic = {} left_set = [n for n, d in G.nodes(data=True) if d['bipartite'] == 0] right_set = [n for n, d in G.nodes(data=True) if d['bipartite'] == 1] # print('left side: ', left_set.__len__()) dictionary = {} out.write('(left_element, right_element)') out.write(",") out.write('Probability') out.write("\n") # outN.write('(left_element, right_element)') # outN.write(",") # outN.write('Probability') # outN.write("\n") exception_count = 0 for left_element in left_set: hop2s[left_element] = getAdj2(G, list(set(G[left_element])), 1) for right_element in right_set: neighbors[right_element] = list(set(G[right_element])) if not (left_element, right_element) in G.edges: try: aa_sim[left_element][right_element] = adamic_adar( hop2s[(left_element)], neighbors[(right_element)], G) if aa_sim[left_element][right_element] > 0: # print(left_element, right_element, aa_sim[left_element][right_element]) dictionary.update({ (left_element, right_element): aa_sim[left_element][right_element] }) except: exception_count += 1 print(exception_count) for k, v in sorted(dictionary.items(), key=itemgetter(1), reverse=True): # print(k[0],v) out.write(str((k[0], k[1]))) out.write(",") out.write(str(aa_sim[k[0]][k[1]])) out.write("\n") # outN.write(str((df_nodes[k[0]], df_nodes[k[1]]))) # outN.write(",") # outN.write(str(aa_sim[k[0]][k[1]])) # outN.write("\n") # print('Adamic-adar prediction finished sucnessfully') end_aa = datetime.now() # print('Adamic-adar duration: {}'.format(end_aa - start_aa), "\n") return dictionary
def overDueRentals(self): stat = [] for r in self.getRentals(): if r.getReturnedDate() == None: stat.append({ "rentalID": r.getRentalID(), "overdue": (date.today() - r.getDueDate()).days() }) return sorted(stat, key=itemgetter("overdue"), reverse=True)
def test_getter_multiple_gest(self): import _operator as operator class A(object): pass a = A() a.x = 'X' a.y = 'Y' a.z = 'Z' assert operator.attrgetter('x', 'z', 'y')(a) == ('X', 'Z', 'Y') e = raises(TypeError, operator.attrgetter, ('x', (), 'y')) assert str(e.value) == "attribute name must be a string, not 'tuple'" data = list(map(str, range(20))) assert operator.itemgetter(2, 10, 5)(data) == ('2', '10', '5') raises(TypeError, operator.itemgetter(2, 'x', 5), data)
def usage(): mypath = sys.argv[0] myname = mypath[mypath.rindex('\\') + 1:] if '\\' in mypath else mypath print('Usage:\n\npython ' + myname + ' <product name> <index file path>\n') print('For Example:') for (pro, path) in sorted(productDict.items(), key=itemgetter(0)): print('python ' + myname + ' ' + pro + ' ' + path) print('python ' + myname + ' all ----This will cost long long time') print('\nRequest: python3; pywin32; Windows OS; Office Word')
def order(self): l=[] for b in self.__repob.getAll(): for r in self.__repor.getAll(): if r.getID()==b.getRID(): l.append([int(b.getBID()),int(b.getTimes())*int(r.getDist())]) l=reversed(sorted(l,key=itemgetter(1))) for i in l: print(i)
def usage(): mypath=sys.argv[0] myname=mypath[mypath.rindex('\\')+1:] if '\\' in mypath else mypath print('Usage:\n\npython '+myname+' <product name> <index file path>\n') print('For Example:') for (pro,path) in sorted(productDict.items(),key=itemgetter(0)): print('python '+myname+' '+pro+' '+path) print('python '+myname+' all ----This will cost long long time') print('\nRequest: python3; pywin32; Windows OS; Office Word')
def _evaluate_method(G, k, method): kf = KFold(n_splits=k, shuffle=True) precision_sum = 0 auc_sum = 0 print(tabulate([[f'Starting caculating {method}']], tablefmt='grid')) iterator = 0 for train_index, test_index in kf.split(list(G.edges)): G_train = G.copy() np_edges = np.array(list(G.edges)) test_edges = np_edges[test_index] G_train.remove_edges_from(test_edges) # print('G_train(node, edge): ', G_train.number_of_nodes(), G_train.number_of_edges()) print('Iteration %i / %i :' % (iterator, k)) # ------------------------------------------------------------------- if method == 'jc': predicted = pr.jc_predict(G_train) elif method == 'aa': predicted = pr.aa_predict(G_train) elif method == 'cn': predicted = pr.cn_predict(G_train) elif method == 'pa': predicted = pr.pa_predict(G_train) else: raise Exception('Entered method is not valid', method) # ------------------------------------------------------------------- precision = len(set(predicted.keys()) & set(map(tuple, test_edges))) / len( set(predicted.keys())) precision_sum += precision print('precision: ', precision) # ------------------------------------------------------------------- score_algo, label_algo = zip( *[(float(score), label in test_edges) for label, score in sorted( predicted.items(), key=itemgetter(1), reverse=True)]) # Compute the ROC AUC Score fpr_algo, tpr_algo, _ = roc_curve(label_algo, score_algo) auc_algo = roc_auc_score(label_algo, score_algo) print("auc: ", auc_algo) auc_sum += auc_algo # ------------------------------------------------------------------- iterator += 1 print('---' * 20) overal_precision = precision_sum / k overal_auc = auc_sum / k print( tabulate([[ "%i-fold evaluation overal precision: %f" % (k, overal_precision), "%i-fold evaluation overal auc: %f" % (k, overal_auc) ]], tablefmt='jira')) headers = ['overal_precision', 'overal_auc'] table = [[overal_precision, overal_auc]] print(tabulate(table, headers, tablefmt="pipe")) return [overal_precision, overal_auc, fpr_algo, tpr_algo]
def printTags(self): s_arr = [] self.sortedTags = [] si_tags = list(self.s_tags.items()) si_tags.sort(key=_operator.itemgetter(1)) si_tags.reverse() for i in si_tags: s_arr.append("{0} ({1})".format(i[0], i[1])) self.sortedTags.append(i[0]) return s_arr
def Recommend(user, P, Q,train, K=30): rank = dict() userItems=train[user]; for item in Q.keys(): if item in userItems: continue rank[item] = 0 for f in range(0, F): rank[item] += P[user][f] * Q[item][f] return sorted(rank.items(), key=itemgetter(1), reverse=True)[0:K]
def Recommend(graph,maxUserId,user, k=10): # maxUserId=max(train.keys()) ret=dict() rank=PersonalRank(graph, user) for id, value in rank.items(): if id>maxUserId : # this is important if id not in graph[user]: ret[id-maxUserId]=value return sorted(ret.items(),key=itemgetter(1),reverse=True)[0:k]
def Recommend(user, P, Q, train, K=30): rank = dict() userItems = train[user] for item in Q.keys(): if item in userItems: continue rank[item] = 0 for f in range(0, F): rank[item] += P[user][f] * Q[item][f] return sorted(rank.items(), key=itemgetter(1), reverse=True)[0:K]
def accuracy(self): res = [] for i in range(1, self.n_estimators): for j in range(1, self.min_samples_leaf): self.classification = RandomForestClassifier(n_estimators=i, random_state=self.r_random_state, min_samples_leaf=j) self._modeling(self.train[:testing_training_number], self.label[:testing_training_number]) res.append((i, j, self.score(self.train[testing_training_number:], self.label[testing_training_number:]))) return max(res, key=itemgetter(2))
def mostRentedAuthors2(self): """ function to return a list of authors with the numbers of rentals respectively in descending order """ stat = [] for b in self.getBooks(): rentals = self.filterRentals2(None, None, b.getAuthor()) rents = len(rentals) stat.append({"author": b.getAuthor(), "rents": rents}) return sorted(stat, key=itemgetter("rents"), reverse=True)
def filterGrade(self, grade): all = self.get_all() rez = [] for g in all: if int(g.get_grade()) > grade: rez.append( [g.get_stud().get_name(), str(g.get_grade()).strip()]) rez = sorted(rez, key=itemgetter(1)) return rez
def pa_predict(G): start_pa = datetime.now() # print('Preferential_attachment prediction starting...') dictionary = {} out = open('./predictions/preferential_attachment.csv', 'w') outN = open('./predictions/preferential_attachment_with_name.csv', 'w') hop2s = dict() neighbors_right_element = dict() neighbors_left_element = dict() pa_sim = defaultdict(dict) sortDic = {} left_set = [n for n, d in G.nodes(data=True) if d['bipartite'] == 0] right_set = [n for n, d in G.nodes(data=True) if d['bipartite'] == 1] out.write('(left_element, right_element)') out.write(",") out.write('Probability') out.write("\n") # outN.write('(left_element, right_element)') # outN.write(",") # outN.write('Probability') # outN.write("\n") for left_element in left_set: # hop2s[left_element] = getAdj2(G, list(set(G[left_element])), 1) neighbors_left_element[left_element] = list(set(G[left_element])) for right_element in right_set: neighbors_right_element[right_element] = list(set( G[right_element])) if not (left_element, right_element) in G.edges: pa_sim[left_element][right_element] = preferential_attachment( neighbors_left_element[(left_element)], neighbors_right_element[(right_element)]) if pa_sim[left_element][right_element] > 0: dictionary.update({ (left_element, right_element): pa_sim[left_element][right_element] }) for k, v in sorted(dictionary.items(), key=itemgetter(1), reverse=True): # print(k[0],v) out.write(str((k[0], k[1]))) out.write(",") out.write(str(pa_sim[k[0]][k[1]])) out.write("\n") # outN.write(str((df_nodes[k[0]], df_nodes[k[1]]))) # outN.write(",") # outN.write(str(pa_sim[k[0]][k[1]])) # outN.write("\n") # print('Preferential_attachment prediction finished sucnessfully') end_pa = datetime.now() # print('Common neghbor duration: {}'.format(end_pa - start_pa), "\n") return dictionary
def jc_predict(G): start_jc = datetime.now() # print('Jaccard prediction starting...') dictionary = {} out = open('./predictions/jaccard.csv', 'w') outN = open('./predictions/jaccard_with_name.csv', 'w') hop2s = dict() neighbors = dict() jaccard_sim = defaultdict(dict) left_set = [n for n, d in G.nodes(data=True) if d['bipartite'] == 0] right_set = [n for n, d in G.nodes(data=True) if d['bipartite'] == 1] out.write('(left_element, right_element)') out.write(",") out.write('Probability') out.write("\n") # outN.write('(left_element, right_element)') # outN.write(",") # outN.write('Probability') # outN.write("\n") exception_count = 0 for left_element in left_set: hop2s[left_element] = getAdj2(G, list(set(G[left_element])), 1) for right_element in right_set: neighbors[right_element] = list(set(G[right_element])) if not (left_element, right_element) in G.edges: try: jaccard_sim[left_element][right_element] = jaccard( hop2s[(left_element)], neighbors[(right_element)]) if jaccard_sim[left_element][right_element] > 0: dictionary.update({ (left_element, right_element): jaccard_sim[left_element][right_element] }) except: exception_count += 1 print(exception_count) for k, v in sorted(dictionary.items(), key=itemgetter(1), reverse=True): # print(k[0],v) out.write(str((k[0], k[1]))) out.write(",") out.write(str(jaccard_sim[k[0]][k[1]])) out.write("\n") # outN.write(str((df_nodes[k[0]], df_nodes[k[1]]))) # outN.write(",") # outN.write(str(jaccard_sim[k[0]][k[1]])) # outN.write("\n") # print('Jaccard prediction finished sucnessfully') end_jc = datetime.now() # print('Jaccard duration: {}'.format(end_jc - start_jc), "\n") return dictionary
def get_serial_parametr(self, item_pattern): if item_pattern in self.__serial_params_cache: return self.__serial_params_cache[item_pattern] search_format = re.compile(item_pattern.format(r"(?P<param_idx>\d+)")) matches = [] for name, value in par_map.items(): m = re.match(search_format, name) if m: matches.append((name, value, int(m.group('param_idx')))) matches = sorted(matches, key=itemgetter(2)) return SerialParam._create_from_matches(matches)
def tfidf_analyze(self, path=None, text=None): """ Calculate TF-IDF value of the document which is in the certain directory. :param path: Target document's path :return: TF-IDF dictionary of the document. """ if path is not None: dict_tf = self.tf_analyze(path=path) dict_idf = self.idf_analyze(path=path) else: dict_tf = self.tf_analyze(text=text) dict_idf = self.idf_analyze(text=text) tf_idf_dict = OrderedDict() for tf_elem in dict_tf.keys(): tf_idf_dict[tf_elem] = dict_tf[tf_elem] * dict_idf[tf_elem] sorted_tf_idf_dict = OrderedDict(sorted(tf_idf_dict.items(), key=_operator.itemgetter(1), reverse=True)) return sorted_tf_idf_dict
def idf_analyze(self, path=None, text=None): """ Get the IDF values from documents below directory path. :param path: Target document path. :return: Dictionary of words mapped word as a key and IDF value as a value. """ # dict_map = self.wordgram_map(dirpath) if path is not None: contents = fileio.read_file(path) elif path is None and text is not None: contents = text else: return None dict_file = ngram.wordgram_analyze(contents) dict_idf = OrderedDict() for dict_elem in dict_file.keys(): dict_idf[dict_elem] = self.idf_value(self.dict_map, dict_elem) sorted_idf_dict = OrderedDict(sorted(dict_idf.items(), key=_operator.itemgetter(1), reverse=True)) return sorted_idf_dict
def tf_analyze(self, path=None, text=None): """ Return TF value dictionary in the files inside directory path. :param path: Directory path to analyze. :param contents: Contents to analyze tf value. :return: Sorted dictionary of TF. """ if path is not None: contents = fileio.read_file(path) elif path is None and text is not None: contents = text else: return None dictionary = ngram.wordgram_analyze(contents) tf_dict = OrderedDict() for k, v in dictionary.items(): tf_dict[str(k)] = self.tf_value(dictionary, str(k)) sorted_tf_dict = OrderedDict(sorted(tf_dict.items(), key=_operator.itemgetter(1), reverse=True)) return sorted_tf_dict
def classTopN(Q,data='movies.dat',output='topN.txt'): top =dict() movies=dict() k=10 with open(data, 'r',encoding='utf8') as handle: for line in handle: itemId,name,genre = line.split('::')[0:3] # print(itemId) movies[int(itemId)]=name+' '+genre for i in range(0,F): temp={} for item in Q.keys(): temp[item]=Q[item][i] tempTop=sorted(temp.items(),key=itemgetter(1),reverse=True)[0:k] top[i]=tempTop with open(output,'a') as outputFile: outputFile.write(str(i)+'**********') for id,score in tempTop: outputFile.write(movies[id]) outputFile.write('*****************') return top
def average_grade_at_all_disciplines(self): sd = self.__student_discipline_repo.get_all() #sd - lista studentilor cu note asignate la diferite discipline studentsId = [] studentsIdDisc = [] for i in sd: idC = i.get_student_id() #id student curent p=0 for j in studentsId: if j == idC: #daca am gasit in lista rez acelasi id p=1 if p==0: studentsId.append(idC) for i in studentsId: suma = 0 nr = 0 for j in sd: if i == j.get_student_id(): suma = suma + j.get_grade() nr = nr + 1 studentsIdDisc.append((i,suma/nr)) sorted(studentsIdDisc, key = itemgetter(1), reverse = True) return studentsIdDisc
def __init__(self, **kwargs): field_names = tuple(map(itemgetter(0), self._fields)) for key, value in kwargs.items(): if key in field_names: setattr(self, key, value) super().__init__()
q1 = input("have you seen cartoon characters ?") if q1=="yes": user_answer.append(1) else: user_answer.append(0) q1 = input("have you seen any cars ?") if q1=="yes": user_answer.append(1) else: user_answer.append(0) q1 = input("have you seen any wars ?") if q1=="yes": user_answer.append(1) else: user_answer.append(0) user_answer final_data = list() # calculate the similarity between the user answers vector and all the data set genres. # then sort them and pick the highest one. (K=1) for k,v in dataset.items(): sim_of_gen = GetCosSimilarityForGenres(user_answer,v) final_data.append([k,sim_of_gen]) sorted_data = sorted(final_data,key=itemgetter(1),reverse=True) # print the result print("You Film Is Classified as : ",sorted_data[0][0],", by : ",round(sorted_data[0][1]*100),"%")
def report(self): all = self.product all = sorted(all, key = itemgetter(3), reverse = True) return all
def splitattribute(): queue=[] root.data=list queue.append(root) count=0 while (len(queue)>0): current=queue.pop(0) data=current.data leftsplit=[] rightsplit=[] data1=sorted(data,key=itemgetter(0)) data2=sorted(data,key=itemgetter(1)) records=[] records.append(data1) records.append(data2) (bestt,besti,bestabove,bestbelow)=bestsplit(records) for i in range(len(data)): if(data[i][besti]<=bestt): leftsplit.append(data[i]) else: rightsplit.append(data[i]) count=count+1 current.threshold=bestt current.attributeindex=besti if (len(leftsplit)>0 and bestbelow.count(0)<3 ): l=node() l.data=leftsplit l.parent=current l.classval=bestbelow l.d=current.d+1 current.left=l queue.append(l) else: l=node() l.data=leftsplit l.parent=current l.classval=bestbelow current.left=l l.d=current.d+1 if (len(rightsplit)>0 and bestabove.count(0)<3 ): r=node() r.data=rightsplit r.parent=current r.classval=bestabove r.d=current.d+1 current.right=r queue.append(r) else: r=node() r.data=rightsplit r.parent=current r.classval=bestabove r.d=current.d+1 current.right=r