Esempi in Python per DB.getAuthors

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: DBUtil

Classe/tipologia: DB

Metodo/funzione: getAuthors

Esempi su hotexamples.com: 2

DB.getAuthors in Python: 2 esempi trovati. Questi sono i migliori esempi reali in Python per DBUtil.DB.getAuthors, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

DB(3)

connect(3)

getAuthors(2)

addCard(1)

checkin(1)

close(1)

getPapers(1)

getPatents(1)

getProjects(1)

idInDB(1)

showPoints(1)

Esempio n. 1

Mostra file

 def get_author_by_sql(self, typee, ids):
     db = DB()
     return db.getAuthors(typee, ids)  # 使用MySQL获取信息

Esempio n. 2

Mostra file

File: similarity.py Progetto: coolsnake/TonrnadoRecomendServer

class Recommander(object):
    def __init__(self, vec_file, pap, pat, pro):
        # self.wm = gensim.models.KeyedVectors.load_word2vec_format(vec_file,binary=True)
        self.wm = gensim.models.word2vec.Word2Vec.load_word2vec_format(
            vec_file, binary=True)
        self.paper_index = AnnoyIndexer()
        self.paper_index.load(pap)
        self.patent_index = AnnoyIndexer()
        self.patent_index.load(pat)
        self.project_index = AnnoyIndexer()
        self.project_index.load(pro)
        self.t2v = Convert2Vec(self.wm)
        self.cuttor = FilterCut()
        self.db = DB()
        self.featureIndex = self.buildFeatureIndex()

    def buildFeatureIndex(self):
        paperFeature = open(
            "/testdata400/data/recommender/data0828/feature/paper_feature.txt",
            'r')
        patentFeature = open(
            "/testdata400/data/recommender/data0828/feature/patent_feature.txt",
            'r')
        projectFeature = open(
            "/testdata400/data/recommender/data0828/feature/project_feature.txt",
            'r')
        featureIndex = {}
        featureIndex['paper'] = self.loadFeature(paperFeature)
        featureIndex['patent'] = self.loadFeature(patentFeature)
        featureIndex['project'] = self.loadFeature(projectFeature)
        return featureIndex

    def loadFeature(self, file):
        file = file.readlines()
        index = {}
        index['field'] = {}
        index['type'] = {}
        index['province'] = {}
        index['unit'] = {}
        for line in file:
            feature = line.split('\t')
            if feature[1] not in index['field']:
                index['field'][feature[1]] = []
            index['field'][feature[1]].append(feature[0])
            if feature[2] not in index['type']:
                index['type'][feature[2]] = []
            index['type'][feature[2]].append(feature[0])
            if feature[3] not in index['province']:
                index['province'][feature[3]] = []
            index['province'][feature[3]].append(feature[0])
            if feature[4] not in index['unit']:
                index['unit'][feature[4]] = []
            index['unit'][feature[4]].append(feature[0])
        return index

    # 过滤论文，项目，专利
    def filter(self, typee, topDocs, filterParams, topN):
        topDocIds = [i for i, j in topDocs]
        if not (filterParams[0] == '' or filterParams[0] == '-1' or typee
                == 'project'):  # field, 项目没有type，不用过滤，参数为空字符串或者-1表示不过滤
            if filterParams[0] not in self.featureIndex[typee]['field']:
                topDocIds = []
            topDocIds = list(
                set(topDocIds).intersection(
                    self.featureIndex[typee]['field'][filterParams[0]]))
        if not (filterParams[1] == '' or filterParams[1] == '-1'):  # type
            if filterParams[1] not in self.featureIndex[typee]['type']:
                topDocIds = []
            topDocIds = list(
                set(topDocIds).intersection(
                    self.featureIndex[typee]['type'][filterParams[1]]))
        if not (filterParams[2] == '' or filterParams[2] == '-1'):  # province
            if filterParams[2] not in self.featureIndex[typee]['province']:
                topDocIds = []
            topDocIds = list(
                set(topDocIds).intersection(
                    self.featureIndex[typee]['province'][filterParams[2]]))
        if not (filterParams[3] == '' or filterParams[3] == '-1'):  # unit
            if filterParams[3] not in self.featureIndex[typee]['unit']:
                topDocIds = []
            topDocIds = list(
                set(topDocIds).intersection(
                    self.featureIndex[typee]['unit'][filterParams[3]]))
        result = []
        for i in topDocs:
            if i[0] in topDocIds:
                result.append(i)
            if len(result) == topN:
                break
        return result

    # 不过滤地区，且返回全部满足的文档，而不仅仅是topn个文档
    # def filterForExpert(self, typee, topDocs, filterParams):
    #     topDocIds = [i for i,j in topDocs]
    #     if not (filterParams[0] == '' or filterParams[
    #         0] == '-1' or typee == 'project'):  # field, 项目没有type，不用过滤，参数为空字符串或者-1表示不过滤
    #         if filterParams[0] not in self.featureIndex[typee]['field']:
    #             topDocIds = []
    #         topDocIds = list(set(topDocIds).intersection(self.featureIndex[typee]['field'][filterParams[0]]))
    #     if not (filterParams[1] == '' or filterParams[1] == '-1'):  # type
    #         if filterParams[1] not in self.featureIndex[typee]['type']:
    #             topDocIds = []
    #         topDocIds = list(set(topDocIds).intersection(self.featureIndex[typee]['type'][filterParams[1]]))
    #     if not (filterParams[3] == '' or filterParams[3] == '-1'):  # unit
    #         if filterParams[3] not in self.featureIndex[typee]['unit']:
    #             topDocIds = []
    #         topDocIds = list(set(topDocIds).intersection(self.featureIndex[typee]['unit'][filterParams[3]]))
    #     result = []
    #
    #     topDocsMap = {}
    #     for i in range(len(topDocs)):
    #         topDocsMap[topDocs[i][0]]=topDocs[i][1]
    #     for id in topDocIds:
    #         listTemp = [id,topDocsMap[id]]
    #         result.append(listTemp)
    #     return result

    def most_similar_paper(self, text, topn=10):
        vec = self.t2v.text2v(text, self.cuttor)
        return self.paper_index.most_similar(vec, topn)

    def most_similar_patent(self, text, topn=10):
        vec = self.t2v.text2v(text, self.cuttor)
        return self.patent_index.most_similar(vec, topn)

    def most_similar_project(self, text, topn=10):
        vec = self.t2v.text2v(text, self.cuttor)
        return self.project_index.most_similar(vec, topn)

    def getSimExpertsIds(self, topDocs):
        expertInfoOut = {}
        expertMap = {}
        authorSeqWeiht = [1.0, 0.85, 0.7, 0.5]
        for typee in topDocs:
            order = {}
            order[typee] = {}
            k = 0
            for i, j in topDocs[typee]:
                order[typee][i] = k
                k = k + 1
            ids = [i for i, j in topDocs[typee]]
            docExpertIds = self.db.getAuthors(typee, ids)
            for id in docExpertIds:
                if not self.db.idInDB(typee, id):
                    print "docId:" + id + "is not in db"
                    continue
                expertIds = docExpertIds[id]
                qs = 1.0
                sim = qs
                for i, j in topDocs[typee]:
                    if i == id:
                        sim = j * sim
                        break
                for i in range(len(expertIds)):
                    if i >= 4:  # 一个成果考虑4个作者
                        break
                    if expertIds[i] not in expertInfoOut:
                        expertInfoOut[expertIds[i]] = []
                    expertInfoOut[expertIds[i]].append([
                        typee + str(order[typee][id]), sim * authorSeqWeiht[i],
                        i
                    ])
                    if expertIds[i] not in expertMap:
                        expertMap[expertIds[i]] = []
                    expertMap[expertIds[i]].append(sim * authorSeqWeiht[i])
        return expertMap, expertInfoOut

    # 从成果提取专家，有些专家在不过滤省份时排在前，但过滤省份后排在后，为避免此情况，先不过滤成果的地区，
    # 从这些不过滤地区的成果中提取专家，再按地区过滤专家，若不足topN，再在过滤地区的成果中找剩余的专家
    #
    # 这个函数需要重构，但是八成需求会改，所以先不重构了
    def most_similar_expert(self, topPapers, topPatents, topProjects,
                            filterParams, expertTopN):
        file = open("config.ini", 'r')
        config = ConfigParser.ConfigParser()
        config.readfp(file)
        LEN = int(config.get('global', 'len'))  # 对于一个专家要计算多少他的成果
        COE = float(config.get('global', 'coe'))  # 对于一个专家，从第二个的成果相似度乘的系数
        topDocs = {}
        topDocs['paper'] = self.filter('paper', topPapers, filterParams, 50)
        topDocs['patent'] = self.filter('patent', topPatents, filterParams, 50)
        topDocs['project'] = self.filter('project', topProjects, filterParams,
                                         15)
        expertMap, expertInfoOut = self.getSimExpertsIds(
            topDocs)  # 专家id为key，各项成果的相似度list为value
        expertScoreMap = {}  # 专家为key，评分为value
        for expert in expertMap:
            expertMap[expert].sort(reverse=True)
            sim = expertMap[expert][0]
            for i in range(1, len(expertMap[expert])):
                if i >= LEN:
                    break
                sim = sim + COE * expertMap[expert][i]
            expertScoreMap[expert] = sim
        result = sorted(expertScoreMap.items(),
                        key=lambda item: item[1],
                        reverse=True)[0:expertTopN]
        out = []
        for i in result:
            if i[0] in expertInfoOut:
                out.append({i[0]: expertInfoOut[i[0]]})
                # out[i[0]]=expertInfoOut[i[0]]
        self.printOut(out, LEN)
        return result

    def printOut(self, out, l):
        name = str('log/' + time.strftime("%Y-%m-%d %H-%M-%S" +
                                          ".txt", time.localtime()))
        print name
        output = open(name, 'w')
        for expert in out:
            for i in expert:
                list = expert[i]
                expert[i] = sorted(list, key=lambda doc: doc[1],
                                   reverse=True)[0:l]
        for expert in out:
            for i in expert:
                # print i  # 作者id
                output.write(i + '\n')
                list = expert[i]  # list为doc信息
                docOrder = ''
                for j in list:
                    docOrder = docOrder + j[0] + '                  '
                # print docOrder
                output.write(docOrder + '\n')
                sim = ''
                for j in list:
                    sim = sim + str(j[1]) + '             '
                # print sim
                output.write(sim + '\n')
                expertOrder = ''
                for j in list:
                    expertOrder = expertOrder + str(
                        j[2]) + '                            '
                # print expertOrder
                output.write(expertOrder + '\n')
                output.write("\n")
        output.close()

    # def most_similar_expert(self, text, topDocs):
    #     expertMap = self.getSimExpertsIds(topDocs)  # 专家id为key，各项成果的相似度list为value
    #     expertScoreMap = {}  # 专家为key，评分为value
    #     for expert in expertMap:
    #         expertMap[expert].sort(reverse=True)
    #         sim = expertMap[expert][0]
    #         for i in range(1, len(expertMap[expert])):
    #             if i >= 4:
    #                 break
    #             sim = sim + 0.04 * expertMap[expert][i]
    #         expertScoreMap[expert] = sim
    #     return sorted(expertScoreMap.items(), key=lambda item: item[1], reverse=True)

    def get_model(self):
        return self.wm

    def get_cuttor(self):
        return self.cuttor