Esempio n. 1
0
    def __init__(self):
        self.mysql_connection = pymysql.connect(host='127.0.0.1',
                                                port=3306,
                                                user='******',
                                                password='',
                                                db='youtube',
                                                charset='utf8')
        self.mysql_connection.ping(reconnect=True)
        self.cur = self.mysql_connection.cursor()

        # 链接mongodb
        self.db = connectMongo(True)

        self.collection = self.db["keyWords"]
        self.wordList = list(self.collection.distinct("keyWord"))
Esempio n. 2
0
import sys

sys.path.append("./../")

from db.mongodb import connectMongo
from db.mongoquery import mongoQuery
import threading
import time

db = connectMongo(True)
collection = db["resources"]

resourcesCollection = db["resources"]


def insertCollection(item):
    if "clothes" in item["_id"]:
        part = "clothes"
    else:
        part = "GB"
    try:
        resourcesCollection.update_one({"_id": item["_id"]},
                                       {"$set": {
                                           "part": part
                                       }},
                                       upsert=True)
    except Exception as e:
        print(e)


def main():
Esempio n. 3
0
    "https://www.youtube.com/channel/UCp2Fm1fzjSAMmlnZ8F-C1nA",
    "https://www.youtube.com/channel/UCK_a_kGsvmKct-6b3TcUzmA",
    "https://www.youtube.com/channel/UCFMubAzy5RcTrLigSRA5jQg",
    "https://www.youtube.com/channel/UCyoLstvUOn_0D646NWwomdA",
    "https://www.youtube.com/channel/UCrPo31V8wpuuCMseyzEDZMQ"
]
sys.path.append("./..")
from db.mongodb import connectMongo
from spider.youtubedeep import YouTuBe
import time
from fake_useragent import UserAgent
import logging
from multiprocessing.pool import ThreadPool
import threading

mongoDB = connectMongo(True)
youtubeUrl = mongoDB["youtubeUrl"]
resource = mongoDB["resources"]
collection = mongoDB["resources"]
youtubeObj = YouTuBe()
import requests

platId = 1


def readMongoUrl():
    while True:
        resultList = list(youtubeUrl.find({"getData": False}).limit(4))
        if not resultList:
            print("没有需要相关挖掘的url")
            time.sleep(60)
Esempio n. 4
0
import sys

sys.setrecursionlimit(1000000)  # 例如这里设置为一百万
from db.mongodb import connectMongo

import threading
import multiprocessing
from tools.translate.translateYoudao import *
from fake_useragent import UserAgent

mmsDomain = "http://mms.gloapi.com/"
cmmsDomain = "http://cmms.gloapi.com/"

debug_flag = True if sys.argv[1] == "debug" else False
# mongodb
mongodb = connectMongo(debug_flag)

# 关键字信息
keyWordCollection = mongodb["keyWords"]
formeryoutubecollection = mongodb["formeryoutube"]
youtubeUrl = mongodb["youtubeUrl"]
platId = 1

# 黑白名单
blackWhiteCollection = mongodb["blackWhite"]
# 黑名单列表
blackList = list(
    blackWhiteCollection.distinct("word", {
        "isBlack": True,
        "platId": 1,
        "part": "GB"
Esempio n. 5
0
 def connectMongo(self):
     mongodb = connectMongo(True)
     return mongodb