コード例 #1
0
def Main(site,db_name, runDate, targetDate):
    mongodb = dh.ToMongoDB(*dh.AWS_MongoDB_Information())
    dbname = db_name
    useDb = dh.Use_Database(mongodb, dbname)
    slack = cb.Slacker(cb.slacktoken())
    slack.chat.post_message('# general', 'Start : {}, targetData : {} '.format(site, targetDate))
    startTime = datetime.now()
    newsDf, commentsDf = Main_Naver(targetDate)
    newsCollectionName = 'newsNaver2018'
    middleTime = datetime.now()
    runningTime = middleTime = middleTime - startTime
    print ('Start Uploading')
    useCollection_daum_news = dh.Use_Collection(useDb, newsCollectionName)
    useCollection_daum_news.insert_many(newsDf.to_dict('records'))
    useCollection_comment = dh.Use_Collection(useDb, 'comments2018')
    useCollection_comment.insert_many(commentsDf.to_dict('records'))
    print ('End Uploading')
    endTime = datetime.now()
    uploadTime = endTime - middleTime
    outcome_info = '{}, news : {}, comment : {}'.format(site, len(newsDf), len(commentsDf))
    date_info = 'run date : {}, target date : {}'.format(runDate.strftime('%Y%m%d'), targetDate)
    time_info = 'running time : {}, uploading time'.format(runningTime, uploadTime)
    slack.chat.post_message('# general', outcome_info)
    slack.chat.post_message('# general', date_info)
    slack.chat.post_message('# general', time_info)
    slack.chat.post_message('# general', 'Complete Upload In AWS Mongodb')
    mongodb.close()
コード例 #2
0
ファイル: Basic_Module.py プロジェクト: drumcap/vanillaPython
def Read_Comments2(row):
    import pandas as pd
    import Database_Handler as dh
    mongodb = dh.ToMongoDB(*dh.GCP_MongoDB_Information())
    dbname = 'hy_db'
    useDB = dh.Use_Database(mongodb, dbname)
    commentCollection = dh.Use_Collection(useDB, 'comments')
    info = {
        'site': row['site'],
        'category': row['category'],
        'date': row['date'],
        'rank': int(row['rank'])
    }
    commentsForNews = commentCollection.find(info)
    commentsForNews = pd.DataFrame(list(commentsForNews))
    realNumCount = commentsForNews.shape
    print(realNumCount)
    return commentsForNews
コード例 #3
0
def GetNumberOfCommentInDB(row):
    import Database_Handler as dh
    from bson import ObjectId
    mongodb = dh.ToMongoDB(*dh.GCP_MongoDB_Information())
    dbname = 'hy_db'
    useDB = dh.Use_Database(mongodb, dbname)
    commentCollection = dh.Use_Collection(useDB, 'comments')
    info = {
        'site': row['site'],
        'category': row['category'],
        'date': row['date'],
        'rank': row['rank']
    }
    commentsForNews = commentCollection.find(info)
    realNumCount = commentsForNews.count()
    site = row['site']
    oid = ObjectId(row['id'])
    if site == 'daum':
        newsCollection = dh.Use_Collection(useDB, 'newsDaum')
    else:
        newsCollection = dh.Use_Collection(useDB, 'newsNaver')
    if realNumCount != row['number_of_crawled_comment']:
        newsCollection.update_one(
            {'_id': oid}, {'$set': {
                'real_number_of_comment': realNumCount
            }})
    if row.name % 100 == 0:
        print(row.name)
コード例 #4
0
        if isElementPresent(driver, 'tag_relate') == False:
            keywords = 'NaN'
        else:
            element = WebDriverWait(driver, 3).until(
                EC.presence_of_element_located((By.CLASS_NAME, 'tag_relate')))
            keywords = driver.find_elements_by_class_name('tag_relate')
            keywords = list(map(lambda x: x.text, keywords))
            keywords = list(map(lambda x: re.sub('#', '', x), keywords))
        driver.quit()
    return keywords


if __name__ == '__main__':
    site = 'Naver'
    collection = 'newsNaver'
    mongodb = dh.ToMongoDB(*dh.AWS_MongoDB_Information())
    dbname = 'hy_db'
    useDb = dh.Use_Database(mongodb, dbname)
    slack = cb.Slacker(cb.slacktoken())
    useCollection = dh.Use_Collection(useDb, collection)
    dataList = useCollection.find({'site': site})
    for data in dataList:
        if not 'keywords' in data.keys():
            keywords = SearchKeywordsFromDaumForNaver2(data['title'])
            useCollection.update({"_id": data['_id']},
                                 {'$set': {
                                     "keywords": keywords
                                 }})
            print(keywords)

        elif 'keywords' in data.keys() and data['keywords'] == 'NaN':