def __init__(self): self.mysql_connection = pymysql.connect(host='127.0.0.1', port=3306, user='******', password='', db='youtube', charset='utf8') self.mysql_connection.ping(reconnect=True) self.cur = self.mysql_connection.cursor() # 链接mongodb self.db = connectMongo(True) self.collection = self.db["keyWords"] self.wordList = list(self.collection.distinct("keyWord"))
import sys sys.path.append("./../") from db.mongodb import connectMongo from db.mongoquery import mongoQuery import threading import time db = connectMongo(True) collection = db["resources"] resourcesCollection = db["resources"] def insertCollection(item): if "clothes" in item["_id"]: part = "clothes" else: part = "GB" try: resourcesCollection.update_one({"_id": item["_id"]}, {"$set": { "part": part }}, upsert=True) except Exception as e: print(e) def main():
"https://www.youtube.com/channel/UCp2Fm1fzjSAMmlnZ8F-C1nA", "https://www.youtube.com/channel/UCK_a_kGsvmKct-6b3TcUzmA", "https://www.youtube.com/channel/UCFMubAzy5RcTrLigSRA5jQg", "https://www.youtube.com/channel/UCyoLstvUOn_0D646NWwomdA", "https://www.youtube.com/channel/UCrPo31V8wpuuCMseyzEDZMQ" ] sys.path.append("./..") from db.mongodb import connectMongo from spider.youtubedeep import YouTuBe import time from fake_useragent import UserAgent import logging from multiprocessing.pool import ThreadPool import threading mongoDB = connectMongo(True) youtubeUrl = mongoDB["youtubeUrl"] resource = mongoDB["resources"] collection = mongoDB["resources"] youtubeObj = YouTuBe() import requests platId = 1 def readMongoUrl(): while True: resultList = list(youtubeUrl.find({"getData": False}).limit(4)) if not resultList: print("没有需要相关挖掘的url") time.sleep(60)
import sys sys.setrecursionlimit(1000000) # 例如这里设置为一百万 from db.mongodb import connectMongo import threading import multiprocessing from tools.translate.translateYoudao import * from fake_useragent import UserAgent mmsDomain = "http://mms.gloapi.com/" cmmsDomain = "http://cmms.gloapi.com/" debug_flag = True if sys.argv[1] == "debug" else False # mongodb mongodb = connectMongo(debug_flag) # 关键字信息 keyWordCollection = mongodb["keyWords"] formeryoutubecollection = mongodb["formeryoutube"] youtubeUrl = mongodb["youtubeUrl"] platId = 1 # 黑白名单 blackWhiteCollection = mongodb["blackWhite"] # 黑名单列表 blackList = list( blackWhiteCollection.distinct("word", { "isBlack": True, "platId": 1, "part": "GB"
def connectMongo(self): mongodb = connectMongo(True) return mongodb