コード例 #1
0
ファイル: clima.py プロジェクト: romicasal/vulcano
    def pronostico(self):
        periodos = dict(
            lluvia=0,
            sequia=0,
            optimo=0,
            normal=0,
            pico_lluvia=self._pico_lluvia['dia'] if self._pico_lluvia else 0)

        for estado, group_estados in itertools.groupby(
                sorted(self._registro, key=lambda e: e['estado']),
                lambda e: e['estado']):
            for _, group_periodos in itertools.groupby(
                    enumerate(sorted(group_estados, key=lambda e: e['dia'])),
                    lambda x: x[0] - x[1]['dia']):
                if estado == Clima.CLIMAS['NORMAL']:
                    periodos['normal'] += len(
                        (list(map(itemgetter(1), group_periodos))))
                elif estado == Clima.CLIMAS['LLUVIA']:
                    periodos['lluvia'] += len(
                        (list(map(itemgetter(1), group_periodos))))
                elif estado == Clima.CLIMAS['SEQUIA']:
                    periodos['sequia'] += len(
                        (list(map(itemgetter(1), group_periodos))))
                elif estado == Clima.CLIMAS['OPTIMO']:
                    periodos['optimo'] += len(
                        (list(map(itemgetter(1), group_periodos))))

        return periodos
コード例 #2
0
ファイル: container.py プロジェクト: cypro666/magic3
def most_common(d:dict, topn=10) -> list:
    ''' List the n most common elements and their counts from the most
        common to the least.  If n is None, then list all element counts '''

    if topn is None or topn == 0:
        return sorted(d.items(), key=itemgetter(1), reverse=True)

    return heapq.nlargest(topn, d.items(), key=itemgetter(1))
コード例 #3
0
def recommondation(user_id, user_dict, K):
    rank = defaultdict(int)
    W = itemCF(user_dict)
    for i, score in user_dict[user_id]:
        for j, wj in sorted(W[i].items(), key=itemgetter(1),
                            reverse=True)[0:K]:
            if j in user_dict[user_id]:
                continue
            rank[j] += score * wj
    l = sorted(rank.items(), key=itemgetter(1), reverse=True)[0:20]
    return l
コード例 #4
0
def recommend(user_id, user_dict, K):
    rank = defaultdict(int)
    l = list()
    W = measureSimilarity(user_dict)
    for i, score in user_dict[user_id]:  #i为特定用户的电影id,score为其相应评分
        for j, wj in sorted(W[i].items(), key=itemgetter(1),
                            reverse=True)[0:K]:  #sorted()的返回值为list,List的元素为元组
            if j in user_dict[user_id]:
                continue
            rank[
                j] += score * wj  #先找出用户评论过的电影集合,对每一部电影id,假设其中一部电影id1,找出与该电影最相似的K部电影,计算出在id1下用户对每部电影的兴趣度,接着迭代整个用户评论过的电影集合,求加权和,再排序,可推荐出前n部电影
    l = sorted(rank.items(), key=itemgetter(1), reverse=True)[0:20]
    return l
コード例 #5
0
def recommend2(user_id, user_dict, K, topN):
    rank = defaultdict(int)
    W = measureSimilarity(user_dict)
    for i, score in user_dict[user_id]:
        for j, wj in sorted(W[i].items(), key=itemgetter(1),
                            reverse=True)[0:K]:
            if j in user_dict[user_id]:
                continue
            rank[j] += score * wj
    l = sorted(rank.items(), key=itemgetter(1), reverse=True)[0:topN]
    print('user_id ' + str(user_id) + ' : ')
    print(l)
    for item in l:
        oldFile.write(str(user_id) + ' | ' + str(item[0]))
        oldFile.write("\n")
コード例 #6
0
def Recommendation(train,user_id,W,K=10):
    rank=dict()
    ru=train[user_id]
    for i in ru:
        for j,wj in sorted(W[i].items(),key=itemgetter(1),reverse=True)[0:K]:
            if j in ru:
                continue
            if j not in rank:
                rank[j]=0
            rank[j]+=wj
    return rank.items()
    end=10
    if len(rank) < end:
        end = len(rank)
    return sorted(rank.items(), key=itemgetter(1), reverse=True)[0:end]
コード例 #7
0
def trainInMemory():
    print("TEST IN MEMORY")

    test_set = [({word: (word in word_tokenize(x[0]))
                  for word in _ALL_WORDS}, x[1]) for x in __FILE_TRAIN]
    training_set = apply_features(extractFeature, test_set)
    classifier = NaiveBayesClassifier.train(training_set)

    featurized_test_sentence = {
        word.lower(): (word in word_tokenize(_TEXT.lower(),
                                             language=_LANGUAGE))
        for word in _ALL_WORDS
    }

    print("Classification: %s \nAccuracy: %.4f \n" %
          (classifier.classify(featurized_test_sentence),
           nltk.classify.accuracy(classifier, test_set)))

    dictProbs = classifier.prob_classify(featurized_test_sentence)

    #probLabels = getProbabilitiesAllLabels(dictProbs) ARRUMAR
    probLabels = []
    for label in dictProbs.samples():
        probLabels.append((label, dictProbs.prob(label)))
    probLabels.sort(key=itemgetter(0))  #(key=lambda tup: tup[0])
    print(probLabels)
    for content in probLabels:
        print("%.5f\t- %s" % (content[1], content[0]))
コード例 #8
0
ファイル: sknn.py プロジェクト: rn5l/session-rec
    def most_recent_sessions(self, sessions, number):
        '''
        Find the most recent sessions in the given set
        
        Parameters
        --------
        sessions: set of session ids
        
        Returns 
        --------
        out : set           
        '''
        sample = set()

        tuples = list()
        for session in sessions:
            time = self.session_time.get(session)
            if time is None:
                print(' EMPTY TIMESTAMP!! ', session)
            tuples.append((session, time))

        tuples = sorted(tuples, key=itemgetter(1), reverse=True)
        #print 'sorted list ', sortedList
        cnt = 0
        for element in tuples:
            cnt = cnt + 1
            if cnt > number:
                break
            sample.add(element[0])
        #print 'returning sample of size ', len(sample)
        return sample
コード例 #9
0
def idf_analysis(articles, content):
    dict_map = wordgram_map(articles)
    dict_idf = OrderedDict()
    dict_file = wordgram_analyze(content)
    for dict_elem in dict_file.keys():
        dict_idf[dict_elem] = idf_value(len(articles). dict_map, dict_elem)
    return OrderedDict(sorted(dict_idf.items(),key=_operator.itemgetter(1),reverse=True))
コード例 #10
0
ファイル: web_robot.py プロジェクト: ParkJinSang/Logle
def condition_minable(contents):
    dirpath = "../learning/data/preferences_doc/"

    # It seems like using DF not IDF?!
    title = text_handler.extract_html_title(contents)
    text = filter.filter_html_contents(contents)
    pref_dict = preference_analyzer.preference_from_text(text, dirpath, filter_files='../data/filters/stop-words_english_en.txt', text_title=title)

    import _operator
    from collections import OrderedDict
    sorted_pref_dict = OrderedDict(sorted(pref_dict.items(), key=_operator.itemgetter(1), reverse=True))
    print(sorted_pref_dict)

#     feature_list = []
#     iter_cnt = 0
#     for pref_key, pref_val in sorted_pref_dict.items():
#         if iter_cnt >= 10:
#             break
# #        feature_list.append(math_util.sigmoid(hash(pref_key) % 2**32))
#         feature_list.append(hash(pref_key))
#         feature_list.append(pref_val)
#         iter_cnt += 1
#
#     from learning.svm import generate_dataset
#     generate_dataset.generate_classify_data(feature_list, 10)

    return True
コード例 #11
0
ファイル: refnameapi_v1.py プロジェクト: hexaroi/stk-upload
def search(prefix, usage, match):
    "Returns refnames starting with or containing prefix (case-insensitive)"
    if match == "startswith":
        cypher = cypher_search_refname_v1 % {'search_type': "STARTS WITH"}
    else:
        cypher = cypher_search_refname_v1 % {'search_type': "CONTAINS"}
    result = shareds.driver.session().run(cypher, prefix=prefix, usage=usage)
    records = []
    for rec in result:
        name = rec['name']
        source = rec['source']
        basename = rec['basename']
        num_neighbors = rec['num_neighbors']
        records.append(
            dict(name=name,
                 source=source,
                 basename=basename,
                 is_basename=(basename is None),
                 num_neighbors=num_neighbors))


#    records.append(surroundedBy=sorted(places1,key=lambda x:x['name']))
    return {
        "status": "OK",
        "statusText": "OK",
        "resultCount": len(records),
        "records": sorted(records, key=itemgetter("name")),
    }
コード例 #12
0
def cn_predict(G):
    start_cn = datetime.now()

    # print('Common neighbor prediction starting...')

    out = open('./predictions/common_neighbor.csv', 'w')
    outN = open('./predictions/common_neighbor_with_name.csv', 'w')
    hop2s = dict()
    neighbors = dict()
    cn_sim = defaultdict(dict)
    sortDic = {}

    left_set = [n for n, d in G.nodes(data=True) if d['bipartite'] == 0]
    right_set = [n for n, d in G.nodes(data=True) if d['bipartite'] == 1]

    dictionary = {}
    out.write('(left_element, right_element)')
    out.write(",")
    out.write('Probability')
    out.write("\n")

    # outN.write('(left_element, right_element)')
    # outN.write(",")
    # outN.write('Probability')
    # outN.write("\n")

    for left_element in left_set:
        # print('snp {} -- '.format(len(G[left_element])))
        hop2s[left_element] = getAdj2(G, list(set(G[left_element])), 1)
        # print('snp hop 2 {} -- '.format(len(hop2s[left_element])))
        for right_element in right_set:
            # print('cancer {} -- '.format(len(G[right_element])))
            neighbors[right_element] = list(set(G[right_element]))
            if not (left_element, right_element) in G.edges:
                cn_sim[left_element][right_element] = common_neighbors(
                    hop2s[left_element], neighbors[right_element])

                # if (left_element, right_element) in edge_subset:
                #   print((left_element, right_element), cn_sim[left_element][right_element])
                if cn_sim[left_element][right_element] > 0:
                    dictionary.update({
                        (left_element, right_element):
                        cn_sim[left_element][right_element]
                    })

    for k, v in sorted(dictionary.items(), key=itemgetter(1), reverse=True):
        # print(k[0],v)
        out.write(str((k[0], k[1])))
        out.write(",")
        out.write(str(cn_sim[k[0]][k[1]]))
        out.write("\n")
    #
    #     outN.write(str((df_nodes[k[0]], df_nodes[k[1]])))
    #     outN.write(",")
    #     outN.write(str(cn_sim[k[0]][k[1]]))
    #     outN.write("\n")
    # print('Common neghbor prediction finished sucnessfully')
    end_cn = datetime.now()
    # print('Common neghbor duration: {}'.format(end_cn - start_cn), "\n")
    return dictionary
コード例 #13
0
ファイル: itemCF.py プロジェクト: whulyx/MyItemCFAndCrawler
def recommond(resultPath, user_count, user_dict, K, topN):
    W = measureSimilarity(user_dict)
    f = open(resultPath, "w")
    user_id = 1
    while user_id <= user_count:
        rank = defaultdict(int) #the most important word and easy to write in the wrong site
        for i, score in user_dict[user_id]: 
            for j, wj in sorted(W[i].items(), key = itemgetter(1), reverse=True)[0:K]: 
                if j in user_dict[user_id]:
                    continue
                rank[j] += score * wj       
        l = sorted(rank.items(), key = itemgetter(1), reverse = True)[0:topN]
        for item in l:
            f.write(str(user_id) + ' | ' + str(item[0]))
            f.write("\n")
        user_id += 1
コード例 #14
0
ファイル: functions.py プロジェクト: michaelkm03/OneStarData
def build_page_lists():
    all_reviews = []
    one_star_review_list = []

    with open(modified_reviews_json_path) as file:
        review_data = json.loads(file.read())

        for item in review_data:
            if review_data[item]["unixReviewTime"] is None:
                time = 9999999999
            else:
                time = review_data[item]["unixReviewTime"]
            view_item = {
                "by": review_data[item]["reviewerName"],
                "descendants": 0,
                "id": item,
                "score": review_data[item]["score"],
                "time": time,
                "title": review_data[item]["reviewText"],
                "type": review_data[item]["rating"],
                "url": "http://www.google.com"
            }
            if review_data[item]["rating"] == 1.0:
                one_star_review_list.append(view_item)
            all_reviews.append(view_item)

    list_of_review_item_ids = []
    for review_item in sorted(one_star_review_list, key=itemgetter("score"), reverse=True):
        list_of_review_item_ids.append(review_item["id"])
    return list_of_review_item_ids, all_reviews
コード例 #15
0
    def average_grade_at_all_disciplines(self):
        sd = self.__student_discipline_repo.get_all(
        )  #sd - lista studentilor cu note asignate la diferite discipline
        studentsId = []
        studentsIdDisc = []
        for i in sd:
            idC = i.get_student_id()  #id student curent
            p = 0
            for j in studentsId:
                if j == idC:  #daca am gasit in lista rez acelasi id
                    p = 1

            if p == 0:
                studentsId.append(idC)
        for i in studentsId:
            suma = 0
            nr = 0
            for j in sd:
                if i == j.get_student_id():
                    suma = suma + j.get_grade()
                    nr = nr + 1
            studentsIdDisc.append((i, suma / nr))

        sorted(studentsIdDisc, key=itemgetter(1), reverse=True)
        return studentsIdDisc
コード例 #16
0
ファイル: cknn.py プロジェクト: alirezagharahi/d_sknn
    def most_recent_sessions( self, sessions, number ):
        '''
        Desc: Find the most recent sessions in the courpus
        
        Input
        --------
        sessions: set of session ids
        number_of_sessions: number of session that we want to filter
        
        Output 
        --------
        set of sessions            
        '''
        sample = set()

        tuples = list()
        for session in sessions:
            time = self.session_time.get( session )
            if time is None:
                print(' EMPTY TIMESTAMP!! ', session)
            tuples.append((session, time))
            
        tuples = sorted(tuples, key=itemgetter(1), reverse=True)
        cnt = 0
        for element in tuples:
            cnt = cnt + 1
            if cnt > number:
                break
            sample.add( element[0] )
            
        return sample
コード例 #17
0
ファイル: Controller.py プロジェクト: andidh/Python
 def filterByType(self, type):
     all = self.__repo.getAll()
     rez = []
     for e in all:
         if e.get_type() == type:
             rez.append([e.get_id(), e.get_type(), e.get_price()])
     rez = sorted(rez, key = itemgetter(2))
     return rez
コード例 #18
0
def Recommend(user, train, W, K=5):
    rank = dict()
    interact_items = train[user]
    for v, wuv in sorted(W[user].items(), key=itemgetter(1),
                         reverse=True)[0:K]:
        for item in train[v]:
            if item in interact_items:
                continue
            if item not in rank:
                rank[item] = 0
            rank[item] += wuv * 1
#     return rank
    end = 10
    #     return rank.items()
    if len(rank) < end:
        end = len(rank)
    return sorted(rank.items(), key=itemgetter(1), reverse=True)[0:end]
コード例 #19
0
ファイル: Controller.py プロジェクト: andidh/Python
 def filterByType(self, type):
     all = self.__repo.getAll()
     rez = []
     for e in all:
         if e.get_type() == type:
             rez.append([e.get_id(), e.get_type(), e.get_price()])
     rez = sorted(rez, key=itemgetter(2))
     return rez
コード例 #20
0
ファイル: Gcntr.py プロジェクト: andidh/Python
 def filterGrade(self, grade):
     all = self.get_all()
     rez = []
     for g in all:
         if int(g.get_grade()) > grade:
             rez.append([g.get_stud().get_name(), str(g.get_grade()).strip()])
     rez = sorted(rez, key = itemgetter(1))
     return rez
コード例 #21
0
ファイル: refplaceeapi_v1.py プロジェクト: hexaroi/stk-upload
def getplace(id):
    print('id:',id)
    result = shareds.driver.session().run(cypher_getplace,id=id).single()
    print('result:',result)
    if not result: return dict(status="Error",resultCount=0)
    p = result.get('p')
    largerPlaces = result['largerPlaces']
    smallerPlaces = result['smallerPlaces']
    places1 = []
    for h1,largerPlace,id2 in largerPlaces: 
        if largerPlace is None: break
        name2 = largerPlace['pname']
        type2 = largerPlace['type']
        place = dict(name=name2,type=type2,id=id2)
        datetype = h1['datetype']
        if datetype:
            date1 = h1['date1']
            date2 = h1['date2']
            d = DateRange(datetype, date1, date2)
            timespan = d.__str__()
            date1 = DateRange.DateInt(h1['date1']).long_date()
            date2 = str(DateRange.DateInt(h1['date2']))
            place['datetype'] = datetype
            place['date1'] = date1
            place['date2'] = date2
            place['timespan'] = timespan
        places1.append(place)
    places2 = []
    for h2,smallerPlace,id2 in smallerPlaces: 
        if smallerPlace is None: break
        name2 = smallerPlace['pname']
        type2 = smallerPlace['type']
        place = dict(name=name2,type=type2,id=id2)
        datetype = h2['datetype']
        if datetype:
            date1 = h2['date1']
            date2 = h2['date2']
            d = DateRange(datetype, date1, date2)
            timespan = d.__str__()
            date1 = str(DateRange.DateInt(h2['date1']))
            date2 = str(DateRange.DateInt(h2['date2']))
            place['datetype'] = datetype
            place['date1'] = date1
            place['date2'] = date2
            place['timespan'] = timespan
        places2.append(place)
    #names = [dict(name=pn['name'],lang=pn['lang']) for pn in result['names']]
    place = PlaceBl.from_node(p)
    place.names = [PlaceName.from_node(pn) for pn in result['names']]
    print(smallerPlaces)
    if smallerPlaces == [[None,None,None]]: smallerPlaces = []
    place.surrounds = [PlaceName.from_node(p2) for (h2,p2,id2) in smallerPlaces]
    place.surrounds=sorted(places2,key=itemgetter('name'))
    return {"status":"OK",
     "statusText":"OK",
     "resultCount": 1,
     "place": place, 
    }
コード例 #22
0
def aa_predict(G):
    start_aa = datetime.now()

    # print('Adamic_adar prediction starting...')

    out = open('./predictions/adamic_adar.csv', 'w')
    outN = open('./predictions/adamic_adar_with_name.csv', 'w')
    hop2s = dict()
    neighbors = dict()
    aa_sim = defaultdict(dict)
    sortDic = {}
    left_set = [n for n, d in G.nodes(data=True) if d['bipartite'] == 0]
    right_set = [n for n, d in G.nodes(data=True) if d['bipartite'] == 1]
    # print('left side: ', left_set.__len__())
    dictionary = {}
    out.write('(left_element, right_element)')
    out.write(",")
    out.write('Probability')
    out.write("\n")

    # outN.write('(left_element, right_element)')
    # outN.write(",")
    # outN.write('Probability')
    # outN.write("\n")
    exception_count = 0
    for left_element in left_set:
        hop2s[left_element] = getAdj2(G, list(set(G[left_element])), 1)
        for right_element in right_set:
            neighbors[right_element] = list(set(G[right_element]))
            if not (left_element, right_element) in G.edges:
                try:
                    aa_sim[left_element][right_element] = adamic_adar(
                        hop2s[(left_element)], neighbors[(right_element)], G)
                    if aa_sim[left_element][right_element] > 0:
                        # print(left_element, right_element, aa_sim[left_element][right_element])
                        dictionary.update({
                            (left_element, right_element):
                            aa_sim[left_element][right_element]
                        })
                except:
                    exception_count += 1
                    print(exception_count)

    for k, v in sorted(dictionary.items(), key=itemgetter(1), reverse=True):
        # print(k[0],v)
        out.write(str((k[0], k[1])))
        out.write(",")
        out.write(str(aa_sim[k[0]][k[1]]))
        out.write("\n")

        # outN.write(str((df_nodes[k[0]], df_nodes[k[1]])))
        # outN.write(",")
        # outN.write(str(aa_sim[k[0]][k[1]]))
        # outN.write("\n")
    # print('Adamic-adar prediction finished sucnessfully')
    end_aa = datetime.now()
    # print('Adamic-adar duration: {}'.format(end_aa - start_aa), "\n")
    return dictionary
コード例 #23
0
 def overDueRentals(self):
     stat = []
     for r in self.getRentals():
         if r.getReturnedDate() == None:
             stat.append({
                 "rentalID": r.getRentalID(),
                 "overdue": (date.today() - r.getDueDate()).days()
             })
     return sorted(stat, key=itemgetter("overdue"), reverse=True)
コード例 #24
0
    def test_getter_multiple_gest(self):
        import _operator as operator

        class A(object):
            pass

        a = A()
        a.x = 'X'
        a.y = 'Y'
        a.z = 'Z'

        assert operator.attrgetter('x', 'z', 'y')(a) == ('X', 'Z', 'Y')
        e = raises(TypeError, operator.attrgetter, ('x', (), 'y'))
        assert str(e.value) == "attribute name must be a string, not 'tuple'"

        data = list(map(str, range(20)))
        assert operator.itemgetter(2, 10, 5)(data) == ('2', '10', '5')
        raises(TypeError, operator.itemgetter(2, 'x', 5), data)
コード例 #25
0
def usage():
    mypath = sys.argv[0]
    myname = mypath[mypath.rindex('\\') + 1:] if '\\' in mypath else mypath
    print('Usage:\n\npython ' + myname + ' <product name> <index file path>\n')
    print('For Example:')
    for (pro, path) in sorted(productDict.items(), key=itemgetter(0)):
        print('python ' + myname + ' ' + pro + ' ' + path)
    print('python ' + myname + ' all ----This will cost long long time')
    print('\nRequest: python3; pywin32; Windows OS; Office Word')
コード例 #26
0
 def order(self):
     l=[]
     for b in self.__repob.getAll():
         for r in self.__repor.getAll():
             if r.getID()==b.getRID():
                 l.append([int(b.getBID()),int(b.getTimes())*int(r.getDist())])
     l=reversed(sorted(l,key=itemgetter(1)))
     for i in l:
         print(i)
コード例 #27
0
ファイル: htmlToDoc.py プロジェクト: snowyxx/MyTest
def usage():
    mypath=sys.argv[0]
    myname=mypath[mypath.rindex('\\')+1:] if '\\' in mypath else mypath
    print('Usage:\n\npython '+myname+' <product name> <index file path>\n')
    print('For Example:')
    for (pro,path) in sorted(productDict.items(),key=itemgetter(0)):
        print('python '+myname+' '+pro+' '+path)
    print('python '+myname+' all ----This will cost long long time')
    print('\nRequest: python3; pywin32; Windows OS; Office Word')
コード例 #28
0
def _evaluate_method(G, k, method):
    kf = KFold(n_splits=k, shuffle=True)
    precision_sum = 0
    auc_sum = 0
    print(tabulate([[f'Starting caculating {method}']], tablefmt='grid'))
    iterator = 0
    for train_index, test_index in kf.split(list(G.edges)):
        G_train = G.copy()
        np_edges = np.array(list(G.edges))
        test_edges = np_edges[test_index]
        G_train.remove_edges_from(test_edges)
        # print('G_train(node, edge): ', G_train.number_of_nodes(), G_train.number_of_edges())
        print('Iteration %i / %i :' % (iterator, k))
        # -------------------------------------------------------------------
        if method == 'jc':
            predicted = pr.jc_predict(G_train)
        elif method == 'aa':
            predicted = pr.aa_predict(G_train)
        elif method == 'cn':
            predicted = pr.cn_predict(G_train)
        elif method == 'pa':
            predicted = pr.pa_predict(G_train)
        else:
            raise Exception('Entered method is not valid', method)
        # -------------------------------------------------------------------
        precision = len(set(predicted.keys())
                        & set(map(tuple, test_edges))) / len(
                            set(predicted.keys()))
        precision_sum += precision
        print('precision: ', precision)

        # -------------------------------------------------------------------

        score_algo, label_algo = zip(
            *[(float(score), label in test_edges) for label, score in sorted(
                predicted.items(), key=itemgetter(1), reverse=True)])
        # Compute the ROC AUC Score
        fpr_algo, tpr_algo, _ = roc_curve(label_algo, score_algo)
        auc_algo = roc_auc_score(label_algo, score_algo)
        print("auc: ", auc_algo)
        auc_sum += auc_algo
        # -------------------------------------------------------------------
        iterator += 1
        print('---' * 20)
    overal_precision = precision_sum / k
    overal_auc = auc_sum / k
    print(
        tabulate([[
            "%i-fold evaluation overal precision: %f" % (k, overal_precision),
            "%i-fold evaluation overal auc: %f" % (k, overal_auc)
        ]],
                 tablefmt='jira'))
    headers = ['overal_precision', 'overal_auc']
    table = [[overal_precision, overal_auc]]
    print(tabulate(table, headers, tablefmt="pipe"))
    return [overal_precision, overal_auc, fpr_algo, tpr_algo]
コード例 #29
0
 def printTags(self):
     s_arr = []
     self.sortedTags = []
     si_tags = list(self.s_tags.items())
     si_tags.sort(key=_operator.itemgetter(1))
     si_tags.reverse()
     for i in si_tags:
         s_arr.append("{0} ({1})".format(i[0], i[1]))
         self.sortedTags.append(i[0])
     return s_arr
コード例 #30
0
ファイル: LFM.py プロジェクト: wei-yao/recommend_algorithms
def Recommend(user, P, Q,train, K=30):
    rank = dict()
    userItems=train[user];
    for item in Q.keys():
        if item in userItems:
            continue
        rank[item] = 0
        for f in range(0, F):
            rank[item] += P[user][f] * Q[item][f]  
    return sorted(rank.items(), key=itemgetter(1), reverse=True)[0:K]
コード例 #31
0
def Recommend(graph,maxUserId,user, k=10):
#     maxUserId=max(train.keys())
    ret=dict()
    rank=PersonalRank(graph, user)
    for id, value in rank.items():
        if id>maxUserId :
#             this is important
            if id not in graph[user]:
                ret[id-maxUserId]=value
    return sorted(ret.items(),key=itemgetter(1),reverse=True)[0:k]
コード例 #32
0
def Recommend(user, P, Q, train, K=30):
    rank = dict()
    userItems = train[user]
    for item in Q.keys():
        if item in userItems:
            continue
        rank[item] = 0
        for f in range(0, F):
            rank[item] += P[user][f] * Q[item][f]
    return sorted(rank.items(), key=itemgetter(1), reverse=True)[0:K]
コード例 #33
0
ファイル: titanic.py プロジェクト: pokk/algorithm
    def accuracy(self):
        res = []

        for i in range(1, self.n_estimators):
            for j in range(1, self.min_samples_leaf):
                self.classification = RandomForestClassifier(n_estimators=i, random_state=self.r_random_state, min_samples_leaf=j)
                self._modeling(self.train[:testing_training_number], self.label[:testing_training_number])
                res.append((i, j, self.score(self.train[testing_training_number:], self.label[testing_training_number:])))

        return max(res, key=itemgetter(2))
コード例 #34
0
 def mostRentedAuthors2(self):
     """
     function to return a list of authors with the numbers of rentals respectively in descending order
     """
     stat = []
     for b in self.getBooks():
         rentals = self.filterRentals2(None, None, b.getAuthor())
         rents = len(rentals)
         stat.append({"author": b.getAuthor(), "rents": rents})
     return sorted(stat, key=itemgetter("rents"), reverse=True)
コード例 #35
0
ファイル: Gcntr.py プロジェクト: andidh/Python
 def filterGrade(self, grade):
     all = self.get_all()
     rez = []
     for g in all:
         if int(g.get_grade()) > grade:
             rez.append(
                 [g.get_stud().get_name(),
                  str(g.get_grade()).strip()])
     rez = sorted(rez, key=itemgetter(1))
     return rez
コード例 #36
0
def pa_predict(G):
    start_pa = datetime.now()
    # print('Preferential_attachment prediction starting...')
    dictionary = {}
    out = open('./predictions/preferential_attachment.csv', 'w')
    outN = open('./predictions/preferential_attachment_with_name.csv', 'w')
    hop2s = dict()
    neighbors_right_element = dict()
    neighbors_left_element = dict()
    pa_sim = defaultdict(dict)
    sortDic = {}
    left_set = [n for n, d in G.nodes(data=True) if d['bipartite'] == 0]
    right_set = [n for n, d in G.nodes(data=True) if d['bipartite'] == 1]

    out.write('(left_element, right_element)')
    out.write(",")
    out.write('Probability')
    out.write("\n")

    # outN.write('(left_element, right_element)')
    # outN.write(",")
    # outN.write('Probability')
    # outN.write("\n")

    for left_element in left_set:
        # hop2s[left_element] = getAdj2(G, list(set(G[left_element])), 1)
        neighbors_left_element[left_element] = list(set(G[left_element]))
        for right_element in right_set:
            neighbors_right_element[right_element] = list(set(
                G[right_element]))
            if not (left_element, right_element) in G.edges:
                pa_sim[left_element][right_element] = preferential_attachment(
                    neighbors_left_element[(left_element)],
                    neighbors_right_element[(right_element)])
                if pa_sim[left_element][right_element] > 0:
                    dictionary.update({
                        (left_element, right_element):
                        pa_sim[left_element][right_element]
                    })

    for k, v in sorted(dictionary.items(), key=itemgetter(1), reverse=True):
        # print(k[0],v)
        out.write(str((k[0], k[1])))
        out.write(",")
        out.write(str(pa_sim[k[0]][k[1]]))
        out.write("\n")

        # outN.write(str((df_nodes[k[0]], df_nodes[k[1]])))
        # outN.write(",")
        # outN.write(str(pa_sim[k[0]][k[1]]))
        # outN.write("\n")
    # print('Preferential_attachment prediction finished sucnessfully')
    end_pa = datetime.now()
    # print('Common neghbor duration: {}'.format(end_pa - start_pa), "\n")
    return dictionary
コード例 #37
0
def jc_predict(G):
    start_jc = datetime.now()

    # print('Jaccard prediction starting...')
    dictionary = {}
    out = open('./predictions/jaccard.csv', 'w')
    outN = open('./predictions/jaccard_with_name.csv', 'w')
    hop2s = dict()
    neighbors = dict()
    jaccard_sim = defaultdict(dict)
    left_set = [n for n, d in G.nodes(data=True) if d['bipartite'] == 0]
    right_set = [n for n, d in G.nodes(data=True) if d['bipartite'] == 1]

    out.write('(left_element, right_element)')
    out.write(",")
    out.write('Probability')
    out.write("\n")

    # outN.write('(left_element, right_element)')
    # outN.write(",")
    # outN.write('Probability')
    # outN.write("\n")
    exception_count = 0
    for left_element in left_set:
        hop2s[left_element] = getAdj2(G, list(set(G[left_element])), 1)
        for right_element in right_set:
            neighbors[right_element] = list(set(G[right_element]))
            if not (left_element, right_element) in G.edges:
                try:
                    jaccard_sim[left_element][right_element] = jaccard(
                        hop2s[(left_element)], neighbors[(right_element)])
                    if jaccard_sim[left_element][right_element] > 0:
                        dictionary.update({
                            (left_element, right_element):
                            jaccard_sim[left_element][right_element]
                        })
                except:
                    exception_count += 1
                    print(exception_count)

    for k, v in sorted(dictionary.items(), key=itemgetter(1), reverse=True):
        # print(k[0],v)
        out.write(str((k[0], k[1])))
        out.write(",")
        out.write(str(jaccard_sim[k[0]][k[1]]))
        out.write("\n")

        # outN.write(str((df_nodes[k[0]], df_nodes[k[1]])))
        # outN.write(",")
        # outN.write(str(jaccard_sim[k[0]][k[1]]))
        # outN.write("\n")
    # print('Jaccard prediction finished sucnessfully')
    end_jc = datetime.now()
    # print('Jaccard duration: {}'.format(end_jc - start_jc), "\n")
    return dictionary
コード例 #38
0
 def get_serial_parametr(self, item_pattern):
     if item_pattern in self.__serial_params_cache:
         return self.__serial_params_cache[item_pattern]
     search_format = re.compile(item_pattern.format(r"(?P<param_idx>\d+)"))
     matches = []
     for name, value in par_map.items():
         m = re.match(search_format, name)
         if m:
             matches.append((name, value, int(m.group('param_idx'))))
     matches = sorted(matches, key=itemgetter(2))
     return SerialParam._create_from_matches(matches)
コード例 #39
0
ファイル: text_analyzer.py プロジェクト: ParkJinSang/Logle
    def tfidf_analyze(self, path=None, text=None):
        """
        Calculate TF-IDF value of the document which is in the certain directory.
        :param path: Target document's path
        :return: TF-IDF dictionary of the document.
        """

        if path is not None:
            dict_tf = self.tf_analyze(path=path)
            dict_idf = self.idf_analyze(path=path)
        else:
            dict_tf = self.tf_analyze(text=text)
            dict_idf = self.idf_analyze(text=text)

        tf_idf_dict = OrderedDict()
        for tf_elem in dict_tf.keys():
            tf_idf_dict[tf_elem] = dict_tf[tf_elem] * dict_idf[tf_elem]

        sorted_tf_idf_dict = OrderedDict(sorted(tf_idf_dict.items(), key=_operator.itemgetter(1), reverse=True))
        return sorted_tf_idf_dict
コード例 #40
0
ファイル: text_analyzer.py プロジェクト: ParkJinSang/Logle
    def idf_analyze(self, path=None, text=None):
        """
        Get the IDF values from documents below directory path.
        :param path: Target document path.
        :return: Dictionary of words mapped word as a key and IDF value as a value.
        """
        # dict_map = self.wordgram_map(dirpath)

        if path is not None:
            contents = fileio.read_file(path)
        elif path is None and text is not None:
            contents = text
        else:
            return None

        dict_file = ngram.wordgram_analyze(contents)
        dict_idf = OrderedDict()
        for dict_elem in dict_file.keys():
            dict_idf[dict_elem] = self.idf_value(self.dict_map, dict_elem)
        sorted_idf_dict = OrderedDict(sorted(dict_idf.items(), key=_operator.itemgetter(1), reverse=True))
        return sorted_idf_dict
コード例 #41
0
ファイル: text_analyzer.py プロジェクト: ParkJinSang/Logle
    def tf_analyze(self, path=None, text=None):
        """
        Return TF value dictionary in the files inside directory path.
        :param path: Directory path to analyze.
        :param contents: Contents to analyze tf value.
        :return: Sorted dictionary of TF.
        """

        if path is not None:
            contents = fileio.read_file(path)
        elif path is None and text is not None:
            contents = text
        else:
            return None

        dictionary = ngram.wordgram_analyze(contents)
        tf_dict = OrderedDict()
        for k, v in dictionary.items():
            tf_dict[str(k)] = self.tf_value(dictionary, str(k))
        sorted_tf_dict = OrderedDict(sorted(tf_dict.items(), key=_operator.itemgetter(1), reverse=True))
        return sorted_tf_dict
コード例 #42
0
ファイル: LFM.py プロジェクト: wei-yao/recommend_algorithms
def classTopN(Q,data='movies.dat',output='topN.txt'):
    top =dict()
    movies=dict()
    k=10
    with open(data, 'r',encoding='utf8') as handle:
        for line in handle:
            itemId,name,genre = line.split('::')[0:3]
#             print(itemId)
            movies[int(itemId)]=name+'  '+genre
    for i in range(0,F):
        temp={}
        for item in Q.keys():
            temp[item]=Q[item][i]
        tempTop=sorted(temp.items(),key=itemgetter(1),reverse=True)[0:k]
        top[i]=tempTop
       
        with open(output,'a') as outputFile:
            outputFile.write(str(i)+'**********')
            for id,score in tempTop:
                outputFile.write(movies[id])
            outputFile.write('*****************')   
    return top
コード例 #43
0
 def average_grade_at_all_disciplines(self):
     sd = self.__student_discipline_repo.get_all() #sd - lista studentilor cu note asignate la diferite discipline
     studentsId = []
     studentsIdDisc = []
     for i in sd:
         idC = i.get_student_id() #id student curent
         p=0
         for j in studentsId:
             if j == idC: #daca am gasit in lista rez acelasi id
                 p=1
             
         if p==0:
             studentsId.append(idC)
     for i in studentsId:
         suma = 0
         nr = 0
         for j in sd:
             if i == j.get_student_id():
                 suma = suma + j.get_grade()
                 nr = nr + 1
         studentsIdDisc.append((i,suma/nr))
     
     sorted(studentsIdDisc, key = itemgetter(1), reverse = True)    
     return studentsIdDisc
コード例 #44
0
 def __init__(self, **kwargs):
     field_names = tuple(map(itemgetter(0), self._fields))
     for key, value in kwargs.items():
         if key in field_names:
             setattr(self, key, value)
     super().__init__()
コード例 #45
0
q1 = input("have you seen cartoon characters ?")
if q1=="yes":
    user_answer.append(1)
else:
    user_answer.append(0)

    q1 = input("have you seen any cars ?")
if q1=="yes":
    user_answer.append(1)
else:
    user_answer.append(0)

q1 = input("have you seen any wars ?")
if q1=="yes":
    user_answer.append(1)
else:
    user_answer.append(0)


user_answer
final_data = list()
# calculate the similarity between the user answers vector and all the data set genres.
# then sort them and pick the highest one. (K=1)
for k,v in dataset.items():
    sim_of_gen = GetCosSimilarityForGenres(user_answer,v)
    final_data.append([k,sim_of_gen])
sorted_data = sorted(final_data,key=itemgetter(1),reverse=True)

# print the result
print("You Film Is Classified as : ",sorted_data[0][0],", by : ",round(sorted_data[0][1]*100),"%")
    
コード例 #46
0
ファイル: contr.py プロジェクト: andidh/Python
 def report(self):
     all = self.product
     all = sorted(all, key = itemgetter(3), reverse = True)
     return all
 
         
コード例 #47
0
def splitattribute():
    queue=[]
    root.data=list
    queue.append(root)
    count=0
    while (len(queue)>0):
        
        current=queue.pop(0)

        data=current.data
        leftsplit=[]
        rightsplit=[]       
        data1=sorted(data,key=itemgetter(0))
        data2=sorted(data,key=itemgetter(1))
  
        records=[]
        records.append(data1)
        records.append(data2)
        (bestt,besti,bestabove,bestbelow)=bestsplit(records)

        for i in range(len(data)):
            if(data[i][besti]<=bestt):
                leftsplit.append(data[i])
            else:
                rightsplit.append(data[i])

        count=count+1
        current.threshold=bestt
        current.attributeindex=besti
        if (len(leftsplit)>0 and bestbelow.count(0)<3 ):
            l=node()
            l.data=leftsplit
            l.parent=current
            l.classval=bestbelow
            l.d=current.d+1

            current.left=l
            queue.append(l)


        else:
            l=node()
            l.data=leftsplit
            l.parent=current
            l.classval=bestbelow
            current.left=l
            l.d=current.d+1


        if (len(rightsplit)>0 and bestabove.count(0)<3 ):
            r=node()
            r.data=rightsplit
            r.parent=current
            r.classval=bestabove
            r.d=current.d+1

            current.right=r
            queue.append(r)


        else:
            r=node()
            r.data=rightsplit
            r.parent=current
            r.classval=bestabove
            r.d=current.d+1

            current.right=r