Python MysqlManager.MysqlManager 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: mysql_manager

클래스/타입: MysqlManager

메소드/함수: MysqlManager

hotexamples.com에서의 예제들: 6

Python MysqlManager.MysqlManager - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 mysql_manager.MysqlManager.MysqlManager에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

MysqlManager(6)

dequeueUrl(1)

dequeue_batch_urls(1)

dequeue_topic(1)

insert_items_into_photos(1)

예제 #1

파일 보기

파일: posts.py 프로젝트: zhusijia/junior_spider

import re
from lxml import etree
import requests
import time
from threading import Thread

from crawler import PostsCrawler
from mysql_manager import MysqlManager

max_threads = 10
interval = 20
mysql_mgr = MysqlManager(max_threads)


def post_crawl_task(topic):
    # Get 1st page of this topic
    post_crawler = PostsCrawler()
    post_crawler.get_content(topic['url'], 1)
    posts = post_crawler.get_posts()

    # Get number of pages of this topic
    page_count = post_crawler.get_max_page()

    print(topic['url'])
    print('page count', page_count)

    # Get the rest posts of this topic
    if page_count > 1:
        for i in range(2, page_count + 1):
            post_crawler.get_content(topic['url'], i)
            posts += post_crawler.get_posts()

예제 #2

파일 보기

import json
from mysql_manager import MysqlManager

mysql = MysqlManager(4)

with open('videos.json', 'r') as f:
    i = 1
    while True:
        print("Parse json: ", i)
        i+= 1
        line = f.readline()

        if not line:
            break

        if len(line) < 10:
            continue

        # urls = re.findall('http://v3-dy.ixigua.com[^\"]+', json_str)
        obj = json.loads(line)

        # aweme_list->[n]->video->play_addr->url_list
        i_url = 0
        for v in obj['aweme_list']:
            # print("-----", i_url)
            try:
                url = v['video']['play_addr']['url_list'][0]
            except Exception as err:
                print("parse error ", i, " index: ", i_url)
            i_url += 1
            # print(url)

예제 #3

파일 보기

import re
from lxml import etree
import requests
import time
import global_var

from mysql_manager import MysqlManager

mysql_mgr = MysqlManager(4)


class BoardsCrawler:
    domain = 'http://www.newsmth.net/'

    base_url = domain + '/nForum/section/{}?ajax'

    def __init__(self, interval=1):
        self.interval = interval

    def get_board_of_section(self, section_idx):
        url = self.base_url.format(section_idx)
        response = requests.get(url, headers=global_var.newsmth_headers)
        time.sleep(self.interval)
        self.content = response.text
        self.tree = etree.HTML(self.content)

    def get_board_list(self, etr_obj=None):
        if etr_obj is None:
            etr_obj = self.tree
        elements = etr_obj.xpath(
            '//table[@class="board-list corner"]/tbody/tr')

예제 #4

파일 보기

파일: weibo.py 프로젝트: renji01/learning_python

 def __init__(self, limit=500):
     self.reply_limit = limit
     self.mm = MysqlManager(4)

예제 #5

파일 보기

import re
from lxml import etree
import requests
from threading import Thread
import time
import html
from mysql_manager import MysqlManager
from crawler import PostsCrawler

max_threads = 10
wait_duration = 20

mysql_mgr = MysqlManager(10)

def post_crawl_task(topic):
        # Get 1st page of this topic
        post_crawler = PostsCrawler()
        post_crawler.get_content(topic['url'], 1)
        posts = post_crawler.get_posts()

        # Get number of pages of this topic
        page_count = post_crawler.get_max_page()

        # Get the rest posts of this topic
        if page_count > 1:
            for i in range(2, page_count + 1):
                post_crawler.get_content(topic['url'], i)
                posts += post_crawler.get_posts()
                break
        
        # Insert post of a topic

예제 #6

파일 보기

파일: weibo.py 프로젝트: zky959/spider-course-5

 def __init__(self, limit=200):
     self.reply_limit = limit
     self.mm = MysqlManager(4)
     self.post = {}