def get_publications_topics_topics( authorParams: AuthorParam, probability:Optional[float] ): pipeline = [ {"$match": { "pubid": {"$exists": 1}, "topics": {'$exists': 1}}}, {"$project": {"pubid": 1, "topics": 1}}, ] pipeline += filter_by_pubs_acc(authorParams) pipeline += [ {"$project": {"pub": 0}}, {"$unwind": "$topics"}, ] if probability: pipeline += [{ "$match": { "topics.probability": {"$gte": probability}}, }] pipeline += _add_topic2pipeline(authorParams) pipeline += [{ '$lookup': { 'from': 'contexts', 'localField': '_id', 'foreignField': '_id', 'as': 'cont'}}, {'$unwind': "$cont"}, {'$unwind': "$cont.topics"}, ] if probability: pipeline += [{ "$match": { "cont.topics.probability": {"$gte": probability}}, }] pipeline += _add_topic2pipeline( authorParams, localField='cont.topics._id', as_field='cont_topic') pipeline += [{ '$project': { "pubid": 1, "topic1": "$topic.title", "topic2": "$cont_topic.title"}}, {'$match': {'$expr': {'$ne': ["$topic1", "$topic2"]}}}, { '$group': { "_id": { "topic1": { '$cond': [{'$gte': ["$topic1", "$topic2"]}, "$topic1", "$topic2"]}, "topic2": { '$cond': [{'$gte': ["$topic1", "$topic2"]}, "$topic2", "$topic1"]}, "cont_id": "$_id"}, "pubid": {"$first": "$pubid"}}}, {"$sort": {"_id": 1}}, { "$group": { "_id": {"topic1": "$_id.topic1", "topic2": "$_id.topic2"}, "count": {"$sum": 1}, "pubid": {"$push": "$pubid"}}}, {"$sort": {"count": -1, "_id": 1}}, {'$group': { '_id': "$_id.topic1", "count": {"$sum": "$count"}, 'crosstopics': { '$push': { "topic": "$_id.topic2", "pubs": "$pubid", "count": "$count"}}}}, {"$sort": {"count": -1, "_id": 1}}, {"$project": { "topic": "$_id", "_id": 0, "count": 1, "crosstopics": 1, "pubs": { "$reduce": { "input": "$crosstopics", "initialValue": [], "in": {"$setUnion": ["$$value", "$$this.pubs"]}}} }} ] return pipeline
def get_frags_topics( topn:Optional[int], authorParams: AuthorParam, probability:Optional[float] ): pipeline = [ {'$match': { 'frag_num': {'$exists': 1}, 'topics': {'$exists': 1}}}, {'$project': {'pubid': 1, 'frag_num': 1, 'topics': 1}},] pipeline += filter_by_pubs_acc(authorParams) pipeline += [ {'$unwind': '$topics'}, ] if probability : pipeline += [ {'$match': {'topics.probability': {'$gte': probability}}},] pipeline += _add_topic2pipeline(authorParams) pipeline += [ {'$group': { '_id': {'_id': '$topic.title', 'frag_num': '$frag_num'}, 'count': {'$sum': 1,}}}, {'$group': { '_id': '$_id._id', 'count': {'$sum': '$count'}, 'frags': {'$push': {'frag_num': '$_id.frag_num', 'count': '$count',}},}}, {'$sort': {'count': -1, '_id': 1}}, ] if topn: pipeline += [{'$limit': topn}] return pipeline
def get_frags_cocitauthors_topics( topn:Optional[int], authorParams: AuthorParam, probability:Optional[float] ): pipeline = [ {"$match": { "cocit_authors": {"$exists": 1}, "frag_num": {"$exists": 1}, "topics": {'$exists': 1}}}, {"$project": { "pubid": 1, "cocit_authors": 1, "frag_num": 1, "topics": 1}},] pipeline += filter_by_pubs_acc(authorParams) pipeline += [ {"$unwind": "$cocit_authors"}, {"$unwind": "$topics"},] if probability: pipeline += [ {"$match": {"topics.probability": {"$gte": probability}}, }] pipeline = _add_topic2pipeline(authorParams) pipeline += [ {"$group": { "_id": { "cocit_authors": "$cocit_authors", "topic": "$topic.title", "cont_id": "$_id"}, "cont": {"$first": {"pubid": "$pubid", "frag_num": "$frag_num"}},}}, {"$sort": {"_id": 1}}, {"$group": { "_id": { "cocit_authors": "$_id.cocit_authors", "topic": "$_id.topic"}, "count": {"$sum": 1}, "frags": {'$push': {"fn": "$cont.frag_num", "cnt": "$count"}}, "conts": { "$push": { "cont_id": "$_id.cont_id", "pubid": "$cont.pubid", "frag_num": "$cont.frag_num"}},}}, {'$group': { "_id": "$_id.cocit_authors", "count": {"$sum": "$count"}, "topics": { "$push": {"topic": "$_id.topic", "count": "$count", "frags": "$frags"}}, "conts2": {"$push": "$conts"},}}, {"$project": { "count": 1, "topics": 1, "conts": { "$reduce": { "input": "$conts2", "initialValue": [], "in": {"$setUnion": ["$$value", "$$this"]}}}}}, {"$sort": {"count": -1, "_id": 1}}, ] if topn: pipeline += [{'$limit': topn}] return pipeline
"_id": { "ngram": "$ngrams._id", "topic": "$topics._id", "cont_id": "$_id"}, "cont": {"$first": {"pubid": "$pubid", "frag_num": "$frag_num"}}, 'count': {'$sum': "$ngrams.cnt"}, 'ngrm': {'$first': "$ngrm"},}}, {"$sort": {"_id": 1}}, {"$group": { "_id": { "ngram": "$_id.ngram", "topic": "$_id.topic"}, "count": {"$sum": "$count"}, "frags": {'$push': {"fn": "$cont.frag_num", "cnt": "$count"}}, 'ngrm': {'$first': "$ngrm"},}},] pipeline += _add_topic2pipeline(authorParams, localField='_id.topic') pipeline += [ {'$group': { "_id": "$_id.ngram", "title": {"$first": "$ngrm.title"}, "type": {"$first": "$ngrm.type"}, "nka": {"$first": "$ngrm.nka"}, "count": {"$sum": "$count"}, "topics": { "$push": { "topic": "$topic.title", "count": "$count", "frags": "$frags"}},}}, {"$project": {"_id": 0,}}, {"$sort": {"count": -1, "title": 1, 'type': 1}}, ] if topn:
def get_top_topics_publications(topn: Optional[int], authorParams: AuthorParam, probability: Optional[float]): pipeline = [ { '$match': { "topics": { '$exists': 1 } } }, { '$project': { 'prefix': 0, 'suffix': 0, 'exact': 0, 'cocit_authors': 0, "ngrams": 0 } }, ] pipeline += filter_by_pubs_acc(authorParams) pipeline += [ { '$unwind': '$topics' }, ] if probability: pipeline += [ { '$match': { "topics.probability": { '$gte': probability } } }, ] pipeline += _add_topic2pipeline(authorParams) pipeline += [{ '$group': { '_id': '$topics.title', 'count': { '$sum': 1 }, 'probability_avg': { '$avg': '$topics.probability' }, 'probability_stddev': { '$stdDevPop': '$topics.probability' }, "pubs": { '$addToSet': '$pubid' }, } }, { '$sort': { 'count': -1, '_id': 1 } }] if topn: pipeline += [{'$limit': topn}] pipeline += [{ '$project': { "topic": "$_id", "_id": 0, "count_pubs": { "$size": "$pubs" }, "count_conts": "$count", "probability_avg": { "$round": ["$probability_avg", 2] }, "probability_stddev": { "$round": ["$probability_stddev", 2] }, "pubs": "$pubs", } }] return pipeline