예제 #1
0
def get_publications_topics_topics(
  authorParams: AuthorParam, probability:Optional[float]
):
  pipeline = [
    {"$match": {
      "pubid": {"$exists": 1}, "topics": {'$exists': 1}}},
    {"$project": {"pubid": 1, "topics": 1}}, ]
  pipeline += filter_by_pubs_acc(authorParams)
  pipeline += [
    {"$project": {"pub": 0}},
    {"$unwind": "$topics"}, ]
  if probability:
    pipeline += [{
      "$match": {
        "topics.probability": {"$gte": probability}}, }]
  pipeline += _add_topic2pipeline(authorParams)
  pipeline += [{
    '$lookup': {
      'from': 'contexts', 'localField': '_id', 'foreignField': '_id',
      'as': 'cont'}}, {'$unwind': "$cont"},
    {'$unwind': "$cont.topics"}, ]
  if probability:
    pipeline += [{
      "$match": {
        "cont.topics.probability": {"$gte": probability}}, }]
  pipeline += _add_topic2pipeline(
    authorParams, localField='cont.topics._id', as_field='cont_topic')
  pipeline += [{
    '$project': {
      "pubid": 1, "topic1": "$topic.title",
      "topic2": "$cont_topic.title"}},
    {'$match': {'$expr': {'$ne': ["$topic1", "$topic2"]}}}, {
      '$group': {
        "_id": {
          "topic1": {
            '$cond': [{'$gte': ["$topic1", "$topic2"]}, "$topic1", "$topic2"]},
          "topic2": {
            '$cond': [{'$gte': ["$topic1", "$topic2"]}, "$topic2", "$topic1"]},
          "cont_id": "$_id"},
        "pubid": {"$first": "$pubid"}}},
    {"$sort": {"_id": 1}}, {
      "$group": {
        "_id": {"topic1": "$_id.topic1", "topic2": "$_id.topic2"},
        "count": {"$sum": 1}, "pubid": {"$push": "$pubid"}}},
    {"$sort": {"count": -1, "_id": 1}},
    {'$group': {
      '_id': "$_id.topic1", "count": {"$sum": "$count"},
      'crosstopics': {
        '$push': {
          "topic": "$_id.topic2", "pubs": "$pubid", "count": "$count"}}}},
    {"$sort": {"count": -1, "_id": 1}},
    {"$project": {
      "topic": "$_id", "_id": 0, "count": 1, "crosstopics": 1,
      "pubs": {
        "$reduce": {
          "input": "$crosstopics", "initialValue": [],
          "in": {"$setUnion": ["$$value", "$$this.pubs"]}}}
    }}
  ]
  return pipeline
예제 #2
0
def get_frags_topics(
  topn:Optional[int], authorParams: AuthorParam, probability:Optional[float]
):
  pipeline = [
    {'$match': {
      'frag_num': {'$exists': 1}, 'topics': {'$exists': 1}}},
    {'$project': {'pubid': 1, 'frag_num': 1, 'topics': 1}},]
  pipeline += filter_by_pubs_acc(authorParams)
  pipeline += [
    {'$unwind': '$topics'},
  ]
  if probability :
    pipeline += [
      {'$match': {'topics.probability': {'$gte': probability}}},]

  pipeline += _add_topic2pipeline(authorParams)
  pipeline += [
    {'$group': {
      '_id': {'_id': '$topic.title', 'frag_num': '$frag_num'},
      'count': {'$sum': 1,}}},
    {'$group': {
      '_id': '$_id._id', 'count': {'$sum': '$count'},
      'frags': {'$push': {'frag_num': '$_id.frag_num', 'count': '$count',}},}},
    {'$sort': {'count': -1, '_id': 1}},
  ]
  if topn:
    pipeline += [{'$limit': topn}]
  return pipeline
예제 #3
0
def get_frags_cocitauthors_topics(
  topn:Optional[int], authorParams: AuthorParam, probability:Optional[float]
):
  pipeline = [
    {"$match": {
      "cocit_authors": {"$exists": 1}, "frag_num": {"$exists": 1},
      "topics": {'$exists': 1}}},
    {"$project": {
      "pubid": 1, "cocit_authors": 1, "frag_num": 1,
      "topics": 1}},]
  pipeline += filter_by_pubs_acc(authorParams)
  pipeline += [
    {"$unwind": "$cocit_authors"},
    {"$unwind": "$topics"},]
  if probability:
    pipeline += [
      {"$match": {"topics.probability": {"$gte": probability}}, }]

  pipeline = _add_topic2pipeline(authorParams)

  pipeline += [
    {"$group": {
      "_id": {
        "cocit_authors": "$cocit_authors", "topic": "$topic.title",
        "cont_id": "$_id"},
      "cont": {"$first": {"pubid": "$pubid", "frag_num": "$frag_num"}},}},
    {"$sort": {"_id": 1}},
    {"$group": {
      "_id": {
        "cocit_authors": "$_id.cocit_authors", "topic": "$_id.topic"},
      "count": {"$sum": 1},
      "frags": {'$push': {"fn": "$cont.frag_num", "cnt": "$count"}},
      "conts": {
        "$push": {
          "cont_id": "$_id.cont_id", "pubid": "$cont.pubid",
          "frag_num": "$cont.frag_num"}},}},
    {'$group': {
        "_id": "$_id.cocit_authors",
        "count": {"$sum": "$count"},
        "topics": {
          "$push": {"topic": "$_id.topic", "count": "$count", "frags": "$frags"}},
        "conts2": {"$push": "$conts"},}},
    {"$project": {
        "count": 1, "topics": 1,
        "conts": {
            "$reduce": {
               "input": "$conts2", "initialValue": [],
            "in": {"$setUnion": ["$$value", "$$this"]}}}}},
    {"$sort": {"count": -1, "_id": 1}},
  ]
  if topn:
    pipeline += [{'$limit': topn}]
  return pipeline
예제 #4
0
      "_id": {
        "ngram": "$ngrams._id",
        "topic": "$topics._id",
        "cont_id": "$_id"},
      "cont": {"$first": {"pubid": "$pubid", "frag_num": "$frag_num"}},
      'count': {'$sum': "$ngrams.cnt"},
      'ngrm': {'$first': "$ngrm"},}},
    {"$sort": {"_id": 1}},
    {"$group": {
      "_id": {
        "ngram": "$_id.ngram", "topic": "$_id.topic"},
      "count": {"$sum": "$count"},
      "frags": {'$push': {"fn": "$cont.frag_num", "cnt": "$count"}},
      'ngrm': {'$first': "$ngrm"},}},]

  pipeline += _add_topic2pipeline(authorParams, localField='_id.topic')

  pipeline += [
    {'$group': {
      "_id": "$_id.ngram",
      "title": {"$first": "$ngrm.title"},
      "type": {"$first": "$ngrm.type"},
      "nka": {"$first": "$ngrm.nka"},
      "count": {"$sum": "$count"},
      "topics": {
        "$push": {
          "topic": "$topic.title", "count": "$count", "frags": "$frags"}},}},
    {"$project": {"_id": 0,}},
    {"$sort": {"count": -1, "title": 1, 'type': 1}},
  ]
  if topn:
예제 #5
0
def get_top_topics_publications(topn: Optional[int], authorParams: AuthorParam,
                                probability: Optional[float]):
    pipeline = [
        {
            '$match': {
                "topics": {
                    '$exists': 1
                }
            }
        },
        {
            '$project': {
                'prefix': 0,
                'suffix': 0,
                'exact': 0,
                'cocit_authors': 0,
                "ngrams": 0
            }
        },
    ]
    pipeline += filter_by_pubs_acc(authorParams)

    pipeline += [
        {
            '$unwind': '$topics'
        },
    ]

    if probability:
        pipeline += [
            {
                '$match': {
                    "topics.probability": {
                        '$gte': probability
                    }
                }
            },
        ]

    pipeline += _add_topic2pipeline(authorParams)
    pipeline += [{
        '$group': {
            '_id': '$topics.title',
            'count': {
                '$sum': 1
            },
            'probability_avg': {
                '$avg': '$topics.probability'
            },
            'probability_stddev': {
                '$stdDevPop': '$topics.probability'
            },
            "pubs": {
                '$addToSet': '$pubid'
            },
        }
    }, {
        '$sort': {
            'count': -1,
            '_id': 1
        }
    }]
    if topn:
        pipeline += [{'$limit': topn}]

    pipeline += [{
        '$project': {
            "topic": "$_id",
            "_id": 0,
            "count_pubs": {
                "$size": "$pubs"
            },
            "count_conts": "$count",
            "probability_avg": {
                "$round": ["$probability_avg", 2]
            },
            "probability_stddev": {
                "$round": ["$probability_stddev", 2]
            },
            "pubs": "$pubs",
        }
    }]
    return pipeline