Exemple #1
0
def get_top_detail_bund_refauthors(
  topn: Optional[int], authorParams: AuthorParam
):
  pipeline = [
    {'$match': {'exact': {'$exists': 1}, 'bundles': {'$exists': 1}}},]
  if filter_pipeline := filter_by_pubs_acc(authorParams):
    pipeline += filter_pipeline
Exemple #2
0
def get_publications_cocitauthors(
  authorParams: AuthorParam, topn_auth:Optional[int]
):
  pipeline = [
    {"$match": {"cocit_authors": {"$exists": 1}}}]
  if filter_pipeline := filter_by_pubs_acc(authorParams):
    pipeline += filter_pipeline
Exemple #3
0
def get_publications_ngramms(
  topn:Optional[int], authorParams: AuthorParam, ngrammParam: NgrammParam,
  topn_gramm:Optional[int]
):
  pipeline = [
    {"$match": {"ngrams": {"$exists": 1}}}]
  if filter_pipeline := filter_by_pubs_acc(authorParams):
    pipeline += filter_pipeline
Exemple #4
0
def get_frags_cocitauthors_cocitauthors(
  topn:Optional[int], authorParams: AuthorParam
):
  pipeline = [
    {'$match': {
      'cocit_authors': {'$exists': 1}, 'frag_num': {'$exists': 1},}},
    {'$project': {'pubid': 1, 'cocit_authors': 1, 'frag_num': 1}},]
  if filter_pipiline := filter_by_pubs_acc(authorParams):
    pipeline += filter_pipiline
    pipeline += [{'$project': {'pub': 0}},]
Exemple #5
0
def get_pos_neg_topics(
  authorParams: AuthorParam, probability:Optional[float]
):
  pipeline = [
    {'$match': {
      'positive_negative': {'$exists': 1},
      'topics': {'$exists': 1}}},
    {'$project': {
      'pubid': 1, 'positive_negative': 1, 'topics': 1}},]
  if filter_pipeline := filter_by_pubs_acc(authorParams):
    pipeline += filter_pipeline
Exemple #6
0
def get_frag_pos_neg_contexts(authorParams: AuthorParam):
  pipeline = [
    {'$match': {
      'positive_negative': {'$exists': 1}, 'frag_num': {'$exists': 1}}},
    {'$project': {'pubid': 1, 'positive_negative': 1, 'frag_num': 1}},
  ]
  pipeline += filter_by_pubs_acc(authorParams)
  pipeline += [
    {'$group': {
      '_id': {
        'class_pos_neg': {
        '$arrayElemAt': [
          ['neutral', 'positive', 'positive', 'negative', 'negative'],
          '$positive_negative.val']},
        'frag_num': '$frag_num'},
      'pubids': {'$addToSet': '$pubid'},
      'intxtids': {'$addToSet': '$_id'}}},
    {'$sort': {'_id.class_pos_neg': -1}},
    {'$group': {
      '_id': '$_id.frag_num',
      'classes': {'$push': {
          'pos_neg': '$_id.class_pos_neg',
          'pubids': '$pubids', 'intxtids': '$intxtids'}}}},
    {'$project': {'_id': 0, 'frag_num': '$_id', 'classes': '$classes'}},
    {'$sort': {'frag_num': 1}},
  ]
  return pipeline
Exemple #7
0
def get_refauthors_part(topn:int, authorParams: AuthorParam):
  pipeline = [
    {'$match': {'exact': {'$exists': 1}}},
    {'$project': {
      'prefix': 0, 'suffix': 0, 'exact': 0, 'topics': 0,
      'ngrams': 0}},]
  pipeline += filter_by_pubs_acc(authorParams)
  pipeline += [
    {'$unwind': '$bundles'},
    {'$match': {'bundles': {'$ne': 'nUSJrP'}}},
    {'$lookup': {
      'from': 'bundles', 'localField': 'bundles', 'foreignField': '_id',
      'as': 'bun'}},
    {'$unwind': '$bun'},
    {'$unwind': '$bun.authors'},
    {'$group': {
      '_id': '$bun.authors', 'cits': {'$addToSet': '$_id'},
      'cits_all': {'$sum': 1}, 'pubs': {'$addToSet': '$pubid'},
      'bunds_ids': {'$addToSet': '$bundles'},
      'bunds': {
        '$addToSet': {
          '_id': '$bun._id', 'total_cits': '$bun.total_cits',
          'total_pubs': '$bun.total_pubs'}},
      'pos_neg': {'$push': '$positive_negative'},
      'frags': {'$push': '$frag_num'}}},
    {'$project': {
      '_id': 0, 'author': '$_id', 'cits': {'$size': '$cits'},
      'cits_all': '$cits_all', 'bunds_cnt': {'$size': '$bunds_ids'},
      'pubs': {'$size': '$pubs'}, 'total_cits': {'$sum': '$bunds.total_cits'},
      'total_pubs': {'$sum': '$bunds.total_pubs'}, 'pos_neg': 1, 'frags': 1}},
    {'$sort': {'cits_all': -1, 'cits': -1, 'pubs': -1, 'author': 1}},
  ]
  if topn:
    pipeline += [{'$limit': topn}]
  return pipeline
Exemple #8
0
def get_publications_topics_topics(
  authorParams: AuthorParam, probability:Optional[float]
):
  pipeline = [
    {"$match": {
      "pubid": {"$exists": 1}, "topics": {'$exists': 1}}},
    {"$project": {"pubid": 1, "topics": 1}}, ]
  pipeline += filter_by_pubs_acc(authorParams)
  pipeline += [
    {"$project": {"pub": 0}},
    {"$unwind": "$topics"}, ]
  if probability:
    pipeline += [{
      "$match": {
        "topics.probability": {"$gte": probability}}, }]
  pipeline += _add_topic2pipeline(authorParams)
  pipeline += [{
    '$lookup': {
      'from': 'contexts', 'localField': '_id', 'foreignField': '_id',
      'as': 'cont'}}, {'$unwind': "$cont"},
    {'$unwind': "$cont.topics"}, ]
  if probability:
    pipeline += [{
      "$match": {
        "cont.topics.probability": {"$gte": probability}}, }]
  pipeline += _add_topic2pipeline(
    authorParams, localField='cont.topics._id', as_field='cont_topic')
  pipeline += [{
    '$project': {
      "pubid": 1, "topic1": "$topic.title",
      "topic2": "$cont_topic.title"}},
    {'$match': {'$expr': {'$ne': ["$topic1", "$topic2"]}}}, {
      '$group': {
        "_id": {
          "topic1": {
            '$cond': [{'$gte': ["$topic1", "$topic2"]}, "$topic1", "$topic2"]},
          "topic2": {
            '$cond': [{'$gte': ["$topic1", "$topic2"]}, "$topic2", "$topic1"]},
          "cont_id": "$_id"},
        "pubid": {"$first": "$pubid"}}},
    {"$sort": {"_id": 1}}, {
      "$group": {
        "_id": {"topic1": "$_id.topic1", "topic2": "$_id.topic2"},
        "count": {"$sum": 1}, "pubid": {"$push": "$pubid"}}},
    {"$sort": {"count": -1, "_id": 1}},
    {'$group': {
      '_id': "$_id.topic1", "count": {"$sum": "$count"},
      'crosstopics': {
        '$push': {
          "topic": "$_id.topic2", "pubs": "$pubid", "count": "$count"}}}},
    {"$sort": {"count": -1, "_id": 1}},
    {"$project": {
      "topic": "$_id", "_id": 0, "count": 1, "crosstopics": 1,
      "pubs": {
        "$reduce": {
          "input": "$crosstopics", "initialValue": [],
          "in": {"$setUnion": ["$$value", "$$this.pubs"]}}}
    }}
  ]
  return pipeline
Exemple #9
0
def get_refbindles(topn:Optional[int], authorParams: AuthorParam):
  pipeline = [
    {'$match': {'exact': {'$exists': 1}}},
    {'$project': {
      'prefix': 0, 'suffix': 0, 'exact': 0, 'topics': 0,
      'ngrams': 0}},
  ]

  pipeline += filter_by_pubs_acc(authorParams)

  pipeline += [
    {'$unwind': '$bundles'},
    {'$match': {'bundles': {'$ne': 'nUSJrP'}}},  ##
    {'$group': {
      '_id': '$bundles', 'cits': {'$sum': 1}, 'pubs': {'$addToSet': '$pubid'},
      'pos_neg': {'$push': '$positive_negative'},
      'frags': {'$push': '$frag_num'}, }},
    {'$project': {
      'cits': 1, 'pubs': {'$size': '$pubs'}, "pubs_ids": '$pubs', 'pos_neg': 1,
      'frags': 1}},
    {'$lookup': {
      'from': 'bundles', 'localField': '_id', 'foreignField': '_id',
      'as': 'bundle'}},
    {'$unwind': '$bundle'},
    {'$project': {
      'cits': 1, 'pubs': 1, 'pubs_ids': 1,
      'total_cits': '$bundle.total_cits', 'total_pubs': '$bundle.total_pubs',
      'year': '$bundle.year', 'authors': '$bundle.authors',
      'title': '$bundle.title', 'pos_neg': 1, 'frags': 1, }},
    {'$sort': {'cits': -1, 'pubs': -1, 'title': 1}}, # {$count: 'cnt'}
  ]
  if topn:
    pipeline += [{'$limit': topn}]
  # _logger.info('pipeline: %s': )
  return pipeline
Exemple #10
0
def get_ref_auth4ngramm_tops(
  topn:Optional[int], authorParams: AuthorParam
):
  pipeline = [
    {'$match': {'exact': {'$exists': 1}}},
    {'$project': {'prefix': 0, 'suffix': 0, 'exact': 0, }},]

  pipeline += filter_by_pubs_acc(authorParams)
  pipeline += [
    {'$unwind': '$bundles'},
    {'$match': {'bundles': {'$ne': 'nUSJrP'}}},
    {'$lookup': {
        'from': 'bundles', 'localField': 'bundles', 'foreignField': '_id',
        'as': 'bundle'}},
    {'$unwind': '$bundle'},
    {'$unwind': '$bundle.authors'},
    {'$group': {
        '_id': '$bundle.authors', 'pubs': {'$addToSet': '$pubid'}, 'conts': {
          '$addToSet': {
            'cid': '$_id', 'topics': '$topics', 'ngrams': '$ngrams'}}}},
    {'$project': {
        '_id': 0, 'aurhor': '$_id', 'cits': {'$size': '$conts'},
        'pubs': {'$size': '$pubs'}, 'pubs_ids': '$pubs', 'conts': '$conts',
        'total_cits': '$bundle.total_cits', 'total_pubs': '$bundle.total_pubs',
        'year': '$bundle.year', 'authors': '$bundle.authors',
        'title': '$bundle.title', }},
    {'$sort': {'cits': -1, 'pubs': -1, 'title': 1}},
  ]
  if topn:
    pipeline += [{'$limit': topn}]
  return pipeline
Exemple #11
0
def get_frags_topics(
  topn:Optional[int], authorParams: AuthorParam, probability:Optional[float]
):
  pipeline = [
    {'$match': {
      'frag_num': {'$exists': 1}, 'topics': {'$exists': 1}}},
    {'$project': {'pubid': 1, 'frag_num': 1, 'topics': 1}},]
  pipeline += filter_by_pubs_acc(authorParams)
  pipeline += [
    {'$unwind': '$topics'},
  ]
  if probability :
    pipeline += [
      {'$match': {'topics.probability': {'$gte': probability}}},]

  pipeline += _add_topic2pipeline(authorParams)
  pipeline += [
    {'$group': {
      '_id': {'_id': '$topic.title', 'frag_num': '$frag_num'},
      'count': {'$sum': 1,}}},
    {'$group': {
      '_id': '$_id._id', 'count': {'$sum': '$count'},
      'frags': {'$push': {'frag_num': '$_id.frag_num', 'count': '$count',}},}},
    {'$sort': {'count': -1, '_id': 1}},
  ]
  if topn:
    pipeline += [{'$limit': topn}]
  return pipeline
Exemple #12
0
def get_frags_ngramms(
  topn:Optional[int], authorParams: AuthorParam, ngrammParam: NgrammParam
):
  pipeline = [
    {'$match': {
      'frag_num': {'$exists': 1}, 'ngrams': {'$exists': 1}}},
    {'$project': {
      'pubid': 1, 'frag_num': 1, 'linked_paper': '$ngrams'}},]
  pipeline += filter_by_pubs_acc(authorParams)
  pipeline += [
    {'$unwind': '$linked_paper'},
    {'$group': {
      '_id': {'_id': '$linked_paper._id', 'frag_num': '$frag_num'},
      'count': {'$sum': '$linked_paper.cnt'},}},
    {'$group': {
      '_id': '$_id._id', 'count': {'$sum': '$count'},
      'frags': {'$push': {'frag_num': '$_id.frag_num', 'count': '$count',}},}},
    {'$sort': {'count': -1, '_id': 1}},
    {'$lookup': {
      'from': 'n_gramms', 'localField': '_id', 'foreignField': '_id',
      'as': 'ngramm'}},
    {'$unwind': '$ngramm'},
  ]

  if not ngrammParam.is_empty():
    pipeline += [get_ngramm_filter(ngrammParam, 'ngramm')]

  pipeline += [
    {'$project': {
      'title': '$ngramm.title', 'type': '$ngramm.type', 'nka': '$ngramm.nka',
      'count': '$count', 'frags': '$frags'}}]

  if topn:
    pipeline += [{'$limit': topn}]
  return pipeline
Exemple #13
0
def get_frags_cocitauthors_ngramms(
  topn: Optional[int], authorParams: AuthorParam, ngrammParam: NgrammParam
):
  pipeline = [
    {"$match": {
      "cocit_authors": {"$exists": 1}, "frag_num": {"$exists": 1},
      "ngrams": {'$exists': 1}}},
    {"$project": {
      "pubid": 1, "cocit_authors": 1, "frag_num": 1,
      "ngrams": 1}},]
  pipeline += filter_by_pubs_acc(authorParams)
  pipeline += [
    {"$unwind": "$cocit_authors"},
    {"$unwind": "$ngrams"},
    {'$lookup': {
      'from': 'n_gramms', 'localField': 'ngrams._id',
      'foreignField': '_id', 'as': 'ngrm'}},
    {'$unwind': '$ngrm'},]
  if not ngrammParam.is_empty():
    pipeline += [get_ngramm_filter(ngrammParam, 'ngrm')]
  pipeline += [
    {"$group": {
      "_id": {
        "cocit_authors": "$cocit_authors", "ngram": "$ngrams._id",
        "cont_id": "$_id"},
      "cont": {"$first": {"pubid": "$pubid", "frag_num": "$frag_num"}},
      'count': {'$sum': "$ngrams.cnt"},
      'ngrm': {'$first': "$ngrm"},}},
    {"$sort": {"_id": 1}},
    {"$group": {
      "_id": {
        "cocit_authors": "$_id.cocit_authors", "ngram": "$_id.ngram"},
      "count": {"$sum": "$count"},
      "frags": {'$push': {"fn": "$cont.frag_num", "cnt": "$count"}},
      "conts": {
        "$push": {
          "cont_id": "$_id.cont_id", "pubid": "$cont.pubid",
          "frag_num": "$cont.frag_num"}},
      'ngrm': {'$first': "$ngrm"},}},
    {'$group': {
        "_id": "$_id.cocit_authors",
        "count": {"$sum": "$count"},
        "ngrms": {
          "$push": {"ngrm": "$ngrm", "count": "$count", "frags": "$frags"}},
        "conts2": {"$push": "$conts"},}},
    {"$project": {
        "count": 1, "ngrms": 1,
        "conts": {
            "$reduce": {
               "input": "$conts2", "initialValue": [],
            "in": {"$setUnion": ["$$value", "$$this"]}}}}},
    {"$sort": {"count": -1, "_id": 1}},
  ]
  if topn:
    pipeline += [{'$limit': topn}]
  return pipeline
Exemple #14
0
def get_top_cocitauthors_publications(topn: Optional[int],
                                      authorParams: AuthorParam):
    pipeline = [
        {
            '$match': {
                'cocit_authors': {
                    '$exists': 1
                }
            }
        },
        {
            '$project': {
                'prefix': 0,
                'suffix': 0,
                'exact': 0,
                'ngrams': 0,
                "topics": 0
            }
        },
    ]
    pipeline += filter_by_pubs_acc(authorParams)

    pipeline += [{
        '$unwind': '$cocit_authors'
    }, {
        '$group': {
            '_id': '$cocit_authors',
            'count': {
                '$sum': 1
            },
            'pubs': {
                '$addToSet': '$pubid'
            },
        }
    }, {
        '$sort': {
            'count': -1,
            '_id': 1
        }
    }]
    if topn:
        pipeline += [{'$limit': topn}]

    pipeline += [{
        '$project': {
            "name": "$_id",
            "_id": 0,
            "count": {
                "$size": "$pubs"
            },
            "pubs": "$pubs",
        }
    }]
    return pipeline
Exemple #15
0
def get_frags_cocitauthors_topics(
  topn:Optional[int], authorParams: AuthorParam, probability:Optional[float]
):
  pipeline = [
    {"$match": {
      "cocit_authors": {"$exists": 1}, "frag_num": {"$exists": 1},
      "topics": {'$exists': 1}}},
    {"$project": {
      "pubid": 1, "cocit_authors": 1, "frag_num": 1,
      "topics": 1}},]
  pipeline += filter_by_pubs_acc(authorParams)
  pipeline += [
    {"$unwind": "$cocit_authors"},
    {"$unwind": "$topics"},]
  if probability:
    pipeline += [
      {"$match": {"topics.probability": {"$gte": probability}}, }]

  pipeline = _add_topic2pipeline(authorParams)

  pipeline += [
    {"$group": {
      "_id": {
        "cocit_authors": "$cocit_authors", "topic": "$topic.title",
        "cont_id": "$_id"},
      "cont": {"$first": {"pubid": "$pubid", "frag_num": "$frag_num"}},}},
    {"$sort": {"_id": 1}},
    {"$group": {
      "_id": {
        "cocit_authors": "$_id.cocit_authors", "topic": "$_id.topic"},
      "count": {"$sum": 1},
      "frags": {'$push': {"fn": "$cont.frag_num", "cnt": "$count"}},
      "conts": {
        "$push": {
          "cont_id": "$_id.cont_id", "pubid": "$cont.pubid",
          "frag_num": "$cont.frag_num"}},}},
    {'$group': {
        "_id": "$_id.cocit_authors",
        "count": {"$sum": "$count"},
        "topics": {
          "$push": {"topic": "$_id.topic", "count": "$count", "frags": "$frags"}},
        "conts2": {"$push": "$conts"},}},
    {"$project": {
        "count": 1, "topics": 1,
        "conts": {
            "$reduce": {
               "input": "$conts2", "initialValue": [],
            "in": {"$setUnion": ["$$value", "$$this"]}}}}},
    {"$sort": {"count": -1, "_id": 1}},
  ]
  if topn:
    pipeline += [{'$limit': topn}]
  return pipeline
Exemple #16
0
def get_top_cocitauthors(topn: Optional[int], authorParams: AuthorParam):
    pipeline = [
        {
            '$match': {
                'frag_num': {
                    '$gt': 0
                },
                'cocit_authors': {
                    '$exists': 1
                }
            }
        },
        {
            '$project': {
                'prefix': 0,
                'suffix': 0,
                'exact': 0,
                'positive_negative': 0,
                'bundles': 0,
                'ngrams': 0,
                'topics': 0
            }
        },
    ]

    pipeline += filter_by_pubs_acc(authorParams)

    pipeline += [
        {
            '$unwind': '$cocit_authors'
        },
        {
            '$group': {
                '_id': '$cocit_authors',
                'count': {
                    '$sum': 1
                },
                'conts': {
                    '$addToSet': '$_id'
                }
            }
        },
        {
            '$sort': {
                'count': -1,
                '_id': 1
            }
        },
    ]
    if topn:
        pipeline += [{'$limit': topn}]
    return pipeline
Exemple #17
0
def get_top_cocitrefs2(topn: Optional[int], authorParams: AuthorParam):
    pipeline = [
        {
            '$match': {
                'bundles': {
                    '$exists': 1
                },
                'frag_num': {
                    '$exists': 1
                }
            }
        },
        {
            '$project': {
                'pubid': 1,
                'bundles': 1,
                'frag_num': 1
            }
        },
    ]

    if filter := filter_by_pubs_acc(authorParams):
        pipeline += filter
Exemple #18
0
def get_frags_ngramms_cocitauthors(
  topn: Optional[int], authorParams: AuthorParam, ngrammParam: NgrammParam
):
  pipeline = [
    {"$match": {
      "cocit_authors": {"$exists": 1}, "frag_num": {"$exists": 1},
      "ngrams": {'$exists': 1}}},
    {"$project": {
      "pubid": 1, "cocit_authors": 1, "frag_num": 1,
      "ngrams": 1}},]
  pipeline += filter_by_pubs_acc(authorParams)
  pipeline += [
    {"$unwind": "$cocit_authors"},
    {"$unwind": "$ngrams"},
    {'$lookup': {
      'from': 'n_gramms', 'localField': 'ngrams._id',
      'foreignField': '_id', 'as': 'ngrm'}},
    {'$unwind': '$ngrm'},]
  if not ngrammParam.is_empty():
    pipeline += [get_ngramm_filter(ngrammParam, 'ngrm')]
  pipeline += [
    {"$group": {
      "_id": {
        "ngram": "$ngrams._id", "cocit_authors": "$cocit_authors",
        "cont_id": "$_id"},
      "cont": {"$first": {"pubid": "$pubid", "frag_num": "$frag_num"}},
      'count': {'$sum': "$ngrams.cnt"},
      'ngrm': {'$first': "$ngrm"},}},
    {"$sort": {"_id": 1}},
    {"$group": {
      "_id": {
        "ngram": "$_id.ngram", "cocit_authors": "$_id.cocit_authors"},
      "count": {"$sum": "$count"},
      "frags": {'$push': {"fn": "$cont.frag_num", "cnt": "$count"}},
      'ngrm': {'$first': "$ngrm"},}},
    {'$group': {
      "_id": "$_id.ngram",
      "title": {"$first": "$ngrm.title"},
      "type": {"$first": "$ngrm.type"},
      "nka": {"$first": "$ngrm.nka"},
      "count": {"$sum": "$count"},
      "auths": {
        "$push": {
          "auth": "$_id.cocit_authors", "count": "$count", "frags": "$frags"}},}},
    {"$project": {"_id": 0,}},
    {"$sort": {"count": -1, "title": 1, 'type': 1}},
  ]
  if topn:
    pipeline += [{'$limit': topn}]
  return pipeline
Exemple #19
0
def get_frag_pos_neg_cocitauthors2(
  topn: Optional[int], authorParams: AuthorParam
):
  pipeline = [
    {'$match': {
      'positive_negative': {'$exists': 1}, 'cocit_authors': {'$exists': 1}}},
    {'$project': {
      'pubid': 1, 'positive_negative': 1, 'cocit_authors': 1, 'frag_num': 1}},]
  pipeline += filter_by_pubs_acc(authorParams)
  pipeline += [
    {'$project': {'pub': 0}},
    {'$unwind': '$cocit_authors'},
    {'$lookup': {
      'from': 'contexts', 'localField': '_id', 'foreignField': '_id',
      'as': 'cont'}},
    {'$project': {
      'pubid': 1, 'positive_negative': 1, 'cocit_authors': 1, 'frag_num': 1,
      'cont.cocit_authors': 1}},
    {'$unwind': '$cont'},
    {'$unwind': '$cont.cocit_authors'},
    {"$match": {"$expr": {"$lt": ["$cocit_authors", "$cont.cocit_authors"]}}},
    {"$project": {
      "_id": {
        "author1": "$cocit_authors", "author2": "$cont.cocit_authors",
        "cont_id": "$_id"},
      "cont": {
        "pubid": "$pubid", "frag_num": "$frag_num",
        'positive_negative': '$positive_negative'}}},
    {'$sort': {'_id': 1}},
    {'$group': {
      '_id': {
        'author1': '$_id.author1',
        'author2': '$_id.author2'},
      'count': {'$sum': 1},
      'conts': {'$push': {
        'cont_id': '$_id.cont_id', 'pubid': '$cont.pubid',
        'frag_num': '$cont.frag_num',
        'positive_negative': '$cont.positive_negative',},},}},
    {'$sort': {'count': -1, '_id': 1}},
    {'$project': {
      '_id': 0,
      'cocitpair': {
        'author1': '$_id.author1', 'author2': '$_id.author2'},
      'count': '$count', 'conts': '$conts', }},
  ]
  if topn:
    pipeline += [{'$limit': topn}]
  return pipeline
Exemple #20
0
def get_frags_cocitauthors(
  topn:Optional[int], authParams: AuthorParam
) -> List[dict]:
  pipeline = [
    {'$match': {'frag_num': {'$gt': 0}, 'cocit_authors': {'$exists': True}}},
    {'$project': {'prefix': False, 'suffix': False, 'exact': False}},]
  pipeline += filter_by_pubs_acc(authParams)
  pipeline += [
    {'$unwind': '$cocit_authors'},
    {'$group': {
        '_id': '$cocit_authors', 'count': {'$sum': 1},
        'frags': {'$push': '$frag_num'}}},
    {'$sort': {'count': -1, '_id': 1}},
  ]
  if topn:
    pipeline += [{'$limit': topn},]
  return pipeline
Exemple #21
0
def get_pos_neg_ngramms(
  topn: Optional[int], authorParams: AuthorParam, ngrammParam: NgrammParam
):
  pipeline = [
    {'$match': {
      'positive_negative': {'$exists': True},
      'ngrams': {'$exists': True},}},
    {'$project': {
      'pubid': True, 'positive_negative': True, 'ngrams': True}},]
  pipeline += filter_by_pubs_acc(authorParams)
  pipeline += [
    {'$unwind': '$ngrams'},
    {'$lookup': {
      'from': 'n_gramms', 'localField': 'ngrams._id',
      'foreignField': '_id', 'as': 'ngrm'}},
    {'$unwind': '$ngrm'},]
  if nf := get_ngramm_filter(ngrammParam, 'ngrm'):
    pipeline += [nf]
Exemple #22
0
def get_frags_topics_ngramms(
  authorParams: AuthorParam, ngrammParam: NgrammParam,
  probability: Optional[float], topn_crpssgramm:Optional[int]
):
  pipeline = [
    {"$match": {
      "topics": {"$exists": 1}, "frag_num": {"$exists": 1},
      "ngrams": {'$exists': 1}}},
    {"$project": {"pubid": 1, "topics": 1, "frag_num": 1, "ngrams": 1}},]
  pipeline += filter_by_pubs_acc(authorParams)
  pipeline += [
    {"$unwind": "$ngrams"},
    {'$lookup': {
      'from': 'n_gramms', 'localField': 'ngrams._id',
      'foreignField': '_id', 'as': 'ngrm'}},
    {'$unwind': '$ngrm'},
  ]
  if nf := get_ngramm_filter(ngrammParam, 'ngrm'):
    pipeline += [nf]
Exemple #23
0
def get_pos_neg_contexts(authorParams: AuthorParam):
  pipeline = [
    {'$match': {'positive_negative': {'$exists': True}}},
    {'$project': {'pubid': True, 'positive_negative': True}},]
  pipeline += filter_by_pubs_acc(authorParams)
  pipeline += [
    {'$group': {
      '_id': {
        '$arrayElemAt': [
          ['neutral', 'positive', 'positive', 'negative', 'negative'],
          '$positive_negative.val']},
      'pubids': {'$addToSet': '$pubid'},
      'contids': {'$addToSet': '$_id'}}},
    {'$project': {
      '_id': False, 'class_pos_neg': '$_id',
      'cont_cnt': {'$size': '$contids'}, 'pub_cnt': {'$size': '$pubids'},
      'pubids': '$pubids', 'contids': '$contids'}},
    {'$sort': {'class_pos_neg': -1}},
  ]
  return pipeline
Exemple #24
0
def get_top_ngramms_publications(topn: Optional[int],
                                 authorParams: AuthorParam,
                                 ngrammParam: NgrammParam):
    pipeline = [
        {
            '$match': {
                "ngrams": {
                    '$exists': 1
                }
            }
        },
        {
            '$project': {
                'prefix': 0,
                'suffix': 0,
                'exact': 0,
                'cocit_authors': 0,
                "topics": 0
            }
        },
    ]
    pipeline += filter_by_pubs_acc(authorParams)

    pipeline += [
        {
            '$unwind': '$ngrams'
        },
        {
            '$lookup': {
                'from': 'n_gramms',
                'localField': 'ngrams._id',
                'foreignField': '_id',
                'as': 'ngrm'
            }
        },
        {
            '$unwind': '$ngrm'
        },
    ]
    if nf := get_ngramm_filter(ngrammParam, 'ngrm'):
        pipeline += [nf]
Exemple #25
0
def get_frags_ngramms_ngramms_branch_root(
  topn:Optional[int], authorParams: AuthorParam, ngrammParam: NgrammParam
):
  pipeline = [
    {'$match': {'ngrams': {'$exists': 1}, 'frag_num': {'$gt': 0}}},]

  pipeline += filter_by_pubs_acc(authorParams)
  pipeline += [
    {'$project': {'linked_paper': '$ngrams'}},
    {'$unwind': '$linked_paper'},
    {'$group': {
      '_id': '$linked_paper._id', 'count': {'$sum': '$linked_paper.cnt'},
      'cont_ids': {'$addToSet': '$_id'},}},
    {'$sort': {'count': -1, '_id': 1}},
    {'$lookup': {
      'from': 'n_gramms', 'localField': '_id', 'foreignField': '_id',
      'as': 'ngramm'}},
    {'$unwind': '$ngramm'},
  ]

  if nf := get_ngramm_filter(ngrammParam, 'ngramm'):
    pipeline += [nf]
Exemple #26
0
def get_pos_neg_cocitauthors(
  topn:Optional[int], authorParams: AuthorParam
):
  pipeline = [
    {'$match': {
      'positive_negative': {'$exists': True},
      'cocit_authors': {'$exists': True}}},
    {'$project': {
      'pubid': True, 'positive_negative': True, 'cocit_authors': True}},]
  pipeline += filter_by_pubs_acc(authorParams)
  pipeline += [
    {'$unwind': '$cocit_authors'},
    {'$group': {
      '_id': {
        'pos_neg': {
          '$arrayElemAt': [
            ['neutral', 'positive', 'positive', 'negative', 'negative'],
            '$positive_negative.val']}, 'title': '$cocit_authors'},
      'coauthor_cnt': {'$sum': 1}}},
    {'$sort': {'coauthor_cnt': -1, '_id.title': 1}},
    {'$group': {
      '_id': '$_id.pos_neg', 'cocitauthor': {
        '$push': {'author': '$_id.title', 'count': '$coauthor_cnt'}}, }},]
  if topn:
    pipeline += [{
      '$project': {
        '_id': False, 'class_pos_neg': '$_id',
        'cocitauthors': {'$slice': ['$cocitauthor', topn]}}},]
  else:
    pipeline += [{
      '$project': {
        '_id': False, 'class_pos_neg': '$_id', 'cocitauthors': '$cocitauthor'}}]

  pipeline += [
    {'$sort': {'class_pos_neg': -1}}, ]
  return pipeline
Exemple #27
0
def get_top_topics_publications(topn: Optional[int], authorParams: AuthorParam,
                                probability: Optional[float]):
    pipeline = [
        {
            '$match': {
                "topics": {
                    '$exists': 1
                }
            }
        },
        {
            '$project': {
                'prefix': 0,
                'suffix': 0,
                'exact': 0,
                'cocit_authors': 0,
                "ngrams": 0
            }
        },
    ]
    pipeline += filter_by_pubs_acc(authorParams)

    pipeline += [
        {
            '$unwind': '$topics'
        },
    ]

    if probability:
        pipeline += [
            {
                '$match': {
                    "topics.probability": {
                        '$gte': probability
                    }
                }
            },
        ]

    pipeline += _add_topic2pipeline(authorParams)
    pipeline += [{
        '$group': {
            '_id': '$topics.title',
            'count': {
                '$sum': 1
            },
            'probability_avg': {
                '$avg': '$topics.probability'
            },
            'probability_stddev': {
                '$stdDevPop': '$topics.probability'
            },
            "pubs": {
                '$addToSet': '$pubid'
            },
        }
    }, {
        '$sort': {
            'count': -1,
            '_id': 1
        }
    }]
    if topn:
        pipeline += [{'$limit': topn}]

    pipeline += [{
        '$project': {
            "topic": "$_id",
            "_id": 0,
            "count_pubs": {
                "$size": "$pubs"
            },
            "count_conts": "$count",
            "probability_avg": {
                "$round": ["$probability_avg", 2]
            },
            "probability_stddev": {
                "$round": ["$probability_stddev", 2]
            },
            "pubs": "$pubs",
        }
    }]
    return pipeline
Exemple #28
0
def get_pos_neg_pubs(authorParams: AuthorParam):
  pipeline = [
    {'$match': {'positive_negative': {'$exists': 1}, }},]
  if filter_pipeline := filter_by_pubs_acc(authorParams):
    pipeline += filter_pipeline