Example #1
0
def get_cmp_authors_cont(ap1: AuthorParam, ap2: AuthorParam, word: str,
                         field_col: FieldsSet, ngrmpr: Optional[NgrammParam],
                         probability: Optional[float]) -> list:
    assert ap1.only_one() and ap2.only_one()
    atype1, name1 = ap1.get_qual_auth()
    atype2, name2 = ap2.get_qual_auth()
    return get_cmp_authors_ref_cont(atype1, name1, atype2, name2, word,
                                    field_col, ngrmpr, probability)
Example #2
0
async def _req_common2authors(
    authorParams1: AuthorParam = Depends(depAuthorParamOnlyOne),
    authorParams2: AuthorParam = Depends(depAuthorParamOnlyOne2),
    ngrmpr: NgrammParam = Depends(depNgrammParamReq),
    probability: Optional[float] = .5,
    _debug_option: Optional[DebugOption] = None,
    slot: Slot = Depends(Slot.req2slot)):
    pipelines = get_cmp_authors(authorParams1, authorParams2, ngrmpr,
                                probability)
    if _debug_option == DebugOption.pipeline:
        return pipelines

    atype1, name1 = authorParams1.get_qual_auth()
    atype2, name2 = authorParams2.get_qual_auth()

    def_vals = dict(common=1, union=1)

    if authorParams1 == authorParams2:
        out = dict(author1=dict(atype=atype1, name=name1),
                   author2=dict(atype=atype2, name=name2),
                   **{k: def_vals
                      for k in FieldsSet})
        return out

    coll: Collection = slot.mdb.publications

    if _debug_option == DebugOption.raw_out:
        out = {}
        for key, pipeline in pipelines.items():
            curs = coll.aggregate(pipeline)
            out_lst = [doc async for doc in curs]
            out[key] = out_lst
        return out

    vals = {}
    for key, pipeline in pipelines.items():
        curs = coll.aggregate(pipeline)
        cnts1, cnts2 = await collect_cmp_vals(atype1, name1, atype2, name2,
                                              curs)
        keys_union = cnts1.keys() | cnts2.keys()
        keys_intersect = cnts1.keys() & cnts2.keys()
        words = sorted((w, cnts1[w], cnts2[w]) for w in keys_intersect)
        if key == FieldsSet.ngram:
            len_pref = len(ngrmpr.ltype.value) + 1
            words = ((w[len_pref:], c1, c2) for w, c1, c2 in words)
        common_words = [
            dict(word=w, author1=c1, author2=c2) for w, c1, c2 in words
        ]
        vals[key] = dict(common=len(keys_intersect),
                         union=len(keys_union),
                         common_words=common_words)

    out = dict(author1=dict(atype=atype1, name=name1),
               author2=dict(atype=atype2, name=name2),
               **vals)
    return out
Example #3
0
def get_cmp_authors(ap1: AuthorParam, ap2: AuthorParam, ngrmpr: NgrammParam,
                    probability: float) -> Dict[str, list]:
    assert ap1.only_one() and ap2.only_one()
    atype1, name1 = ap1.get_qual_auth()
    atype2, name2 = ap2.get_qual_auth()
    pipelines = {}
    for fld_set in FieldsSet:
        # type: fld_set: FieldsSet
        pipeline = get_cmp_authors_ref(atype1, name1, atype2, name2, fld_set,
                                       ngrmpr, probability)
        pipelines[fld_set] = pipeline

    return pipelines
Example #4
0
async def _req_pubs_refauthors(
  top_auth:Optional[int]=3,
  authorParams:AuthorParam=Depends(),
  _debug_option:Optional[DebugOption]=None,
  slot:Slot=Depends(Slot.req2slot)
):
  pipeline = get_refauthors_part(top_auth, AuthorParam())
  if _debug_option == DebugOption.pipeline:
    return pipeline

  publications: Collection = slot.mdb.publications
  contexts: Collection = slot.mdb.contexts

  out = []
  async for pub in publications.find(
    # {'uni_authors': 'Sergey-Sinelnikov-Murylev'},
    {'name': {'$exists': 1}, **filter_acc_dict(authorParams),},
    projection={'_id': True, 'name': True}, sort=[('_id', ASCENDING)]
  ):
    pid = pub['_id']
    pub_pipeline = [{'$match': {'pubid': pid}}] + pipeline
    ref_authors = []
    async for row in contexts.aggregate(pub_pipeline):
      row.pop('pos_neg', None)
      row.pop('frags', None)
      ref_authors.append(row)

    pub_out = dict(pubid=pid, name=pub['name'], ref_authors=ref_authors)
    out.append(pub_out)

  return out
Example #5
0
async def _req_common2authors_field(
    field: FieldsSet,
    authorParams1: AuthorParam,
    authorParams2: AuthorParam,
    word: Optional[str],
    *,
    ngrmpr: Optional[NgrammParam] = None,
    probability: Optional[float] = None,
    slot: Slot,
    _debug_option: Optional[DebugOption] = None,
):

    pipeline = get_cmp_authors_cont(authorParams1, authorParams2, word, field,
                                    ngrmpr, probability)

    if _debug_option == DebugOption.pipeline:
        return pipeline

    coll: Collection = slot.mdb.publications

    curs = coll.aggregate(pipeline, allowDiskUse=True)
    if _debug_option == DebugOption.raw_out:
        out = [doc async for doc in curs]
        return out

    atype1, name1 = authorParams1.get_qual_auth()
    atype2, name2 = authorParams2.get_qual_auth()

    (set1, conts1), (set2, conts2) = await collect_cmp_vals_conts(
        atype1, name1, atype2, name2, curs)
    keys_union = set1.keys() | set2.keys()
    keys_intersect = set1.keys() & set2.keys()
    words = sorted((w, set1[w], set2[w]) for w in keys_intersect)
    len_pref = len(ngrmpr.ltype.value) + 1 if field == FieldsSet.ngram else 0
    common_words = [
        dict(word=w[len_pref:],
             author1=dict(cnt=c1, conts=sorted(conts1.get(w, ()))),
             author2=dict(cnt=c2, conts=sorted(conts2.get(w, ()))))
        for w, c1, c2 in words
    ]

    out = dict(author1=dict(atype=atype1, name=name1),
               author2=dict(atype=atype2, name=name2),
               common=len(keys_intersect),
               union=len(keys_union),
               common_words=common_words)
    return out
Example #6
0
async def _req_compare2authors(
    authorParams1: AuthorParam = Depends(depAuthorParamOnlyOne),
    authorParams2: AuthorParam = Depends(depAuthorParamOnlyOne2),
    ngrmpr: NgrammParam = Depends(depNgrammParamReq),
    probability: Optional[float] = .5,
    _debug_option: Optional[DebugOption] = None,
    slot: Slot = Depends(Slot.req2slot)):
    pipelines = get_cmp_authors(authorParams1, authorParams2, ngrmpr,
                                probability)
    if _debug_option == DebugOption.pipeline:
        return pipelines

    atype1, name1 = authorParams1.get_qual_auth()
    atype2, name2 = authorParams2.get_qual_auth()

    def_vals = dict(common=1, union=1, yaccard=1, jensen_shannon=0)

    if authorParams1 == authorParams2:
        out = dict(author1=dict(atype=atype1, name=name1),
                   author2=dict(atype=atype2, name=name2),
                   **{k: def_vals
                      for k in FieldsSet})
        return out

    coll: Collection = slot.mdb.publications

    if _debug_option == DebugOption.raw_out:
        out = {}
        for key, pipeline in pipelines.items():
            curs = coll.aggregate(pipeline)
            out_lst = [doc async for doc in curs]
            out[key] = out_lst
        return out

    vals = {}
    for key, pipeline in pipelines.items():
        curs = coll.aggregate(pipeline)
        calc_vals = await calc_cmp_vals(atype1, name1, atype2, name2, curs,
                                        key)
        vals[key] = calc_vals

    out = dict(author1=dict(atype=atype1, name=name1),
               author2=dict(atype=atype2, name=name2),
               **vals)
    return out
Example #7
0
def filter_acc_dict(ap: AuthorParam) -> Dict[str, str]:
  """Фильтр по author, cited, citing"""
  if ap.is_empty():
    return {}
  match = {
    f'uni_{key}': val for key, val in
    (('authors', ap.author), ('cited', ap.cited), ('citing', ap.citing))
    if val}
  return match
Example #8
0
def filter_by_pubs_acc(authParams: AuthorParam) -> List[dict]:
  if authParams.is_empty():
    return []
  match = filter_acc_dict(authParams)
  pipeline = [
    {'$lookup': {
      'from': 'publications', 'localField': 'pubid', 'foreignField': '_id',
      'as': 'pub'}},
    {'$unwind': '$pub'},
    # {'$match': {'pub.uni_authors': {'$exists': 1}}},
    {'$match': {f'pub.{key}': val for key, val in match.items()}},
  ]
  return pipeline
Example #9
0
def filter_by_topic(
  ap: AuthorParam, *, as_field:str= 'topic'
):
  if ap.is_empty():
    return []
  pipeline = [
    {
      '$match': {
        "$or": [
          {f'{as_field}.uni_{fld}': val} for fld, val in
          (('author', ap.author), ('cited', ap.cited), ('citing', ap.citing),)
          if val]}}, ]
  return pipeline
Example #10
0
def depAuthorParamOnlyOne2(
    ap:AuthorParamOnlyOne2=Depends()
) -> AuthorParam:
  params = AuthorParam(author=ap.author2, cited=ap.cited2, citing=ap.citing2)
  return params
Example #11
0
def depAuthorParamOnlyOne(
  authorParams:AuthorParamOnlyOne=Depends()
) -> AuthorParam:
  res = AuthorParam(**authorParams.dict())
  return res