Exemple #1
0
def 뉴스TBL에_뉴스URL이_존재하면_뉴스원본TBL의_파싱완료를_True로_일괄업뎃(dbg_on=False,
                                                 shown_cnt=1,
                                                 사전검증=False):
    whoami = dbg.whoami(sys.modules[__name__].__file__,
                        inspect.stack()[0][3], dbg_on)
    inputs = dbg.inputs(inspect.currentframe(), dbg_on)
    """
    """

    입력 = {'shown_cnt': shown_cnt, '사전검증': 사전검증}

    query = {'뉴스_url': {'$exists': 1}}
    뉴스url_li = mg.distinct(db명=DB명,
                           tbl명=PARSED_TBL,
                           col명='뉴스_url',
                           query=query,
                           dbg_on=dbg_on,
                           shown_cnt=shown_cnt)

    query = {'뉴스_url': {'$in': 뉴스url_li}}
    update = {'$set': {'파싱완료': True}}
    mg.update_many(db명=DB명,
                   tbl명=PARSING_TBL,
                   query=query,
                   update=update,
                   upsert=False,
                   dbg_on=dbg_on,
                   사전검증=사전검증)
Exemple #2
0
def RD_TBL에서_뉴스XX의_뉴스id가_존재하면_뉴스TBL에_수집완료True를_업뎃(타겟col명='뉴스제목',
                                                  analyzerMethod='srl',
                                                  dbg_on=False,
                                                  shown_cnt=1,
                                                  사전검증=False):
    whoami = dbg.whoami(sys.modules[__name__].__file__,
                        inspect.stack()[0][3], dbg_on)
    inputs = dbg.inputs(inspect.currentframe(), dbg_on)
    뉴스id_li = RD_TBL에서_ETRI언어분석수집이_정상완료된_뉴스idli로딩(
        타겟col명=타겟col명,
        analyzerMethod=analyzerMethod,
        dbg_on=dbg_on,
        shown_cnt=shown_cnt)

    수집완료col명 = 타겟col명 + '_ETRI언어분석_수집완료'
    query = {'_id': {'$in': 뉴스id_li}}
    update = {'$set': {수집완료col명: True}}
    mg.update_many(db명=DB명,
                   tbl명='뉴스',
                   query=query,
                   update=update,
                   upsert=False,
                   dbg_on=dbg_on,
                   사전검증=사전검증)

    print('\n' + '*' * 60 + inspect.stack()[0][3] + '_검증')
    query = {'_id': {'$in': 뉴스id_li}, 수집완료col명: True}
    mg.find(db명=DB명,
            tbl명='뉴스',
            query=query,
            projection=None,
            dbg_on=dbg_on,
            컬럼순서li=[],
            df보고형태='df')
Exemple #3
0
def 뉴스_원본TBL에_있는_뉴스url이_화면배치TBL에도_있다면_수집완료True_업뎃(dbg_on=False, 사전검증=False):
    print('\n' + '='*60 + inspect.stack()[0][3])

    뉴스url_li = mg.distinct(db명=DB명, tbl명=PARSING_TBL, col명='뉴스_url', query=None, dbg_on=dbg_on, shown_cnt=1)

    query = {'href':{'$in':뉴스url_li}}
    update = {'$set':{'수집완료':True}}
    mg.update_many(db명=DB명, tbl명=COLLECTING_TBL, query=query, update=update, upsert=False, dbg_on=dbg_on, 사전검증=사전검증)
Exemple #4
0
def 화면배치TBL의_수집일시를_뉴스원본TBL에_업뎃(dbg_on=False, 사전검증=False):
    print('\n' + '='*60 + inspect.stack()[0][3])

    projection = {'_id':1, 'href':1, '수집일시':1}
    df = mg.find(db명=DB명, tbl명='화면배치', query=None, projection=projection, dbg_on=dbg_on, 컬럼순서li=[], df보고형태='df')
    df = df.sort_values(['href', '수집일시'])
    df = df.drop_duplicates(subset=['href'], keep='first', inplace=False)
    dicli = df.to_dict('records')
    dicli_len = len(dicli)
    i=1
    for d in dicli:
        print('\n' + '-'*60 + '{}/{}, 수집일시:{}'.format(i, dicli_len, d['수집일시']))
        query = {'뉴스_url':d['href']}
        update = {'$set':{'수집일시':d['수집일시'], '화면배치id':d['_id']}}
        mg.update_many(db명=DB명, tbl명=PARSING_TBL, query=query, update=update, upsert=True, dbg_on=dbg_on, 사전검증=사전검증)
        #break
        i+=1

    if dbg_on == True:
        print('\n' + '='*60 + inspect.stack()[0][3]+'_검증')
        query = {'화면배치id':d['_id']}
        projection = {'_id':1, '화면배치id':1, '수집일시':1, '뉴스_url':1}
        dicli = mg.find(db명=DB명, tbl명=PARSING_TBL, query=query, projection=projection, dbg_on=dbg_on, 컬럼순서li=[], df보고형태='dicli')
Exemple #5
0
def TBL의_뉴스id가_RDTBL에도_존재한다면_파싱완료는_True여야한다(타겟col명,
                                            analyzerMethod='srl',
                                            subMethod='WSD',
                                            texttype='NNG',
                                            dbg_on=False):
    파싱타겟col명 = 타겟col명 + analyzerMethod + '_res'
    파싱저장col명 = 타겟col명 + analyzerMethod + '_' + subMethod + texttype + 'li'
    파싱완료col명 = 타겟col명 + analyzerMethod + '_res' + '파싱완료'

    query = {파싱저장col명: {'$ne': None}}
    뉴스id_li = mg.distinct(db명=DB명,
                          tbl명=TBL명,
                          col명='뉴스id',
                          query=query,
                          dbg_on=dbg_on,
                          shown_cnt=1)
    query = {'뉴스id': {'$in': 뉴스id_li}, 파싱타겟col명: {'$ne': None}}
    뉴스id_RD_li = mg.distinct(db명=DB명,
                             tbl명=RD_TBL명,
                             col명='_id',
                             query=None,
                             dbg_on=dbg_on,
                             shown_cnt=1)
    #projection = {'_id':1}
    #df = mg.find(db명=DB명, tbl명=RD_TBL명, query=None, projection=projection, dbg_on=dbg_on, 컬럼순서li=[], df보고형태='df')
    print(len(뉴스id_RD_li))

    query = {'_id': {'$in': 뉴스id_RD_li}}
    update = {'$set': {파싱완료col명: True}}
    mg.update_many(db명=DB명,
                   tbl명=RD_TBL명,
                   query=query,
                   update=update,
                   upsert=False,
                   dbg_on=dbg_on,
                   사전검증=False)