Beispiel #1
0
Datei: dfw.py Projekt: vanway/dfw
def add_user():
    if request.method == 'POST':
        key_list = ['AccountName', 'Password', 'UserName', 'RechargeMoney', \
                'RechargeMoney', 'Mobile', 'IDCard', 'Star']
        item = [request.form[key] for key in key_list]
        print MysqlManager.insert_user(item)
        return "ok"
    def process_item(self, item, spider):
        if 'image_urls' in item:
           dir_path = '{0}/../../LofterImageSets'.format(FILE_PATH)
           if not os.path.exists(dir_path):
               os.makedirs(dir_path)
           for image_url in item['image_urls']:
               image_name = image_url.split('/')[-1]
               file_path = '%s/%s' % (dir_path, image_name)
               if os.path.exists(file_path):
                   continue

               #database operation
               category_list = item['image_category']
               category=[] 
               for cat in category_list:
                   category.append(cat.strip('#'))
               category_tag = ','.join(category)

               image_set_name = item['query_url'][0].split('/')[-1]
               description = item['description'][0].encode('utf-8')
               field_item = [(image_set_name,image_name,category_tag,description,image_url,item['query_url'][0])*2]
               MysqlManager.insert_items_into_photos(field_item)            
               with open(file_path, 'wb') as handle:
                   response = requests.get(image_url, stream=True)
                   for block in response.iter_content(1024):
                       if not block:
                           break
                       handle.write(block)

        return item
Beispiel #3
0
Datei: dfw.py Projekt: conkty/dfw
def get_server_stream():
    result = MysqlManager.get_trade_stream(["server"])
    return ujson.dumps(result, ensure_ascii=False)
Beispiel #4
0
Datei: dfw.py Projekt: vanway/dfw
def chat(ws):
    users[ws.id] = ws
    print ws.id

    while True:
        msg = ws.receive()
        if msg is not None:
            print msg
            typex, obj, content = "", "", ""
            try:
                typex, obj, content = msg.split(',')
            except:
                pass
            if typex == '1' and obj == 'user':
                global user_match_dict
                user_match_dict[ws.id] = content
                user = pass_users[content]
                total_trade = [user[0], user[2], user[3], user[3] - user[2]]
                msg = ujson.dumps({'total_trade': total_trade},
                                  ensure_ascii=False)
                users[ws.id].send(msg)
            if g_is_open:
                if typex == "2":
                    now_time = time.strftime('%Y-%m-%d %H:%M:%S',
                                             time.localtime())
                    user_trade_info[ws.id] = [
                        now_time, pass_users[user_match_dict[ws.id]][3],
                        cur_price, cur_price, 0
                    ]

                    trade_stream = [
                        now_time, pass_users[user_match_dict[ws.id]][0], typex,
                        "user", cur_price, "1"
                    ]
                    MysqlManager.insert_stream_trade(trade_stream)
                    trade_stream = [
                        now_time, pass_users[user_match_dict[ws.id]][0], typex,
                        "server", cur_price, "1"
                    ]
                    MysqlManager.insert_stream_trade(trade_stream)

                    msg = ujson.dumps({'single_trade': user_trade_info[ws.id]},
                                      ensure_ascii=False)
                    users[ws.id].send(msg)
                if typex == "3":
                    now_time = time.strftime('%Y-%m-%d %H:%M:%S',
                                             time.localtime())
                    user_trade_info[ws.id] = [
                        now_time, pass_users[user_match_dict[ws.id]][3],
                        cur_price, cur_price, 0
                    ]

                    trade_stream = [
                        now_time, pass_users[user_match_dict[ws.id]][0], typex,
                        "user", cur_price, "1"
                    ]
                    MysqlManager.insert_stream_trade(trade_stream)
                    trade_stream = [
                        now_time, pass_users[user_match_dict[ws.id]][0], typex,
                        "server", cur_price, "1"
                    ]
                    MysqlManager.insert_stream_trade(trade_stream)

                    msg = ujson.dumps({'single_trade': user_trade_info[ws.id]},
                                      ensure_ascii=False)
                    users[ws.id].send(msg)
                if typex == "4":
                    MysqlManager.update_item(
                        [user_trade_info[ws.id][-1], user_match_dict[ws.id]])

                    now_time = time.strftime('%Y-%m-%d %H:%M:%S',
                                             time.localtime())
                    trade_stream = [
                        now_time, pass_users[user_match_dict[ws.id]][0], typex,
                        "user", cur_price, "1"
                    ]
                    MysqlManager.insert_stream_trade(trade_stream)
                    trade_stream = [
                        now_time, pass_users[user_match_dict[ws.id]][0], typex,
                        "server", cur_price, "1"
                    ]
                    MysqlManager.insert_stream_trade(trade_stream)

                    user = MysqlManager.get_user_by_name(
                        [user_match_dict[ws.id]])[1:]
                    total_trade = [
                        user[0], user[2], user[3], user[3] - user[2]
                    ]
                    msg = ujson.dumps({'total_trade': total_trade},
                                      ensure_ascii=False)
                    users[ws.id].send(msg)
                    del user_trade_info[ws.id]
        else:
            break

    del users[ws.id]
    if ws_id in user_trade_info:
        del user_trade_info[ws.id]
Beispiel #5
0
def get_admin():
    return MysqlManager.get_admin()[1:]
Beispiel #6
0
import json
from mysql_manager import MysqlManager

mysql = MysqlManager(4)

with open('videos.json', 'r') as f:
    i = 1
    while True:
        print("Parse json: ", i)
        i+= 1
        line = f.readline()

        if not line:
            break

        if len(line) < 10:
            continue

        # urls = re.findall('http://v3-dy.ixigua.com[^\"]+', json_str)
        obj = json.loads(line)

        # aweme_list->[n]->video->play_addr->url_list
        i_url = 0
        for v in obj['aweme_list']:
            # print("-----", i_url)
            try:
                url = v['video']['play_addr']['url_list'][0]
            except Exception as err:
                print("parse error ", i, " index: ", i_url)
            i_url += 1
            # print(url)
Beispiel #7
0
import re
from lxml import etree
import requests

import time

import html

from pic_downloader import pic_downloader

from mysql_manager import MysqlManager

mysql_mgr = MysqlManager(4)


class PostsCrawler:

    domain = 'https://www.newsmth.net'
    pattern = re.compile('<.*?>')

    def get_content(self, topic_url, page):
        querystring = {"ajax": "", "p": str(page)}
        url = self.domain + topic_url
        r = requests.get(url, params=querystring)
        self.html = r.text
        pic_downloader().get_media_files(r.text)
        self.tree = etree.HTML(r.text)
        time.sleep(1)

    def get_max_page(self):
        pages = self.tree.xpath('//ol[@class="page-main"][1]/li')
Beispiel #8
0
class WeiboCrawler():

    cookie_filename = 'cookie'

    data_dir = './data'

    login_url = "https://passport.weibo.cn/sso/login"

    payload = "username={}&password={}&savestate=1&mainpageflag=1&entry=mweibo&ec=0".format(
        '18600663368', 'Xi@oxiang66')

    login_headers = {
        'origin': "https://passport.weibo.cn",
        'user-agent':
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36",
        'content-type': "application/x-www-form-urlencoded",
        'accept': "*/*",
        'referer': "https://passport.weibo.cn/signin/login",
        'accept-encoding': "gzip, deflate, br",
        'accept-language': "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7",
        'cache-control': "no-cache"
    }

    post_url = 'https://m.weibo.cn/detail/{}'
    reply_url_0 = 'https://m.weibo.cn/comments/hotflow?id={}&mid={}&max_id_type=0'
    reply_url_1 = 'https://m.weibo.cn/comments/hotflow?id={}&mid={}&max_id={}&max_id_type=0'

    comments = []

    pattern = re.compile('<.*?>')

    def __init__(self, limit=500):
        self.reply_limit = limit
        self.mm = MysqlManager(4)

    def cookie_exist(self):
        return os.path.isfile(self.cookie_filename)

    def cookie_valid(self):
        cookie_mid_time = os.path.getmtime(self.cookie_filename)
        return cookie_mid_time + 86400 * 2 > time.time()

    def load_cookie(self):
        with open(self.cookie_filename, 'r') as f:
            cookie = f.read()
        self.login_headers['cookie'] = cookie
        return cookie

    def do_login(self):
        response = requests.post(self.login_url,
                                 data=self.payload,
                                 headers=self.login_headers,
                                 allow_redirects=False)

        cookie = ''

        for k, v in response.cookies.iteritems():
            cookie += k + '=' + v + ';'
        cookie = cookie[:-1]

        with open(self.cookie_filename, 'w') as f:
            f.write(cookie)

        login_headers['cookie'] = cookie

    def login(self):
        # Check whether cookie is existed and valid
        if self.cookie_exist() and self.cookie_valid():
            cookie = self.load_cookie()
            return

        # Call login API, login and save cookie
        self.do_login()

    def assure_data_dir(self):
        if not os.path.exists(self.data_dir):
            os.makedirs(self.data_dir)

    def cleanup_text(self, text):
        return self.pattern.sub('', text)

    def save_data(self, filename, data):
        self.assure_data_dir()
        with open(self.data_dir + '/{}.json'.format(filename), 'w') as f:
            f.write(data)

    def extract_var(self, html):
        return re.findall(
            r'var\s\$render_data\s=\s(\[[\s\S]*\])\[0\]\s\|\|\s\{\}\;',
            html)[0]

    # Wed Jan 16 00:00:52 +0800 2019
    # 2019-01-16 00:00:52
    def convert_time_format(self, ts):
        return datetime.datetime.strptime(
            ts, "%a %b %d %H:%M:%S %z %Y").strftime('%Y-%m-%d %H:%M:%S')

    def get_post(self, id):
        url = self.post_url.format(id)
        response = requests.get(url, headers=self.login_headers)
        post_data_str = self.extract_var(response.text)
        post_data = json.loads(post_data_str)[0]['status']

        self.post = {}

        print(post_data['created_at'])
        print(self.convert_time_format(post_data['created_at']))

        self.post['id'] = post_data['id']
        self.post['created_at'] = self.convert_time_format(
            post_data['created_at'])
        self.post['text'] = self.cleanup_text(post_data['text'])
        self.post['reposts_count'] = post_data['reposts_count']
        self.post['comments_count'] = post_data['comments_count']
        self.post['attitudes_count'] = post_data['attitudes_count']
        post_data_user = post_data['user']
        self.post['profile_image_url'] = post_data_user['profile_image_url']
        self.post['user_id'] = post_data_user['id']
        self.post['screen_name'] = post_data_user['screen_name']
        self.save_data(self.post['id'], post_data_str)
        self.mm.insert_data('post', self.post)

        post_pics = pic_downloader().get_media_files(post_data['pics'])
        for pic in post_pics:
            p = {}
            p['post_id'] = id
            p['url'] = pic
            self.mm.insert_data('pic', p)

    def get_comments(self, id, max_id):
        if max_id == 0:
            url = self.reply_url_0.format(id, id)
        else:
            url = self.reply_url_1.format(id, id, max_id)

        response = requests.get(url, headers=self.login_headers)
        reply_json_obj = json.loads(response.text)

        reply_data = reply_json_obj['data']['data']

        comment = {}

        for r in reply_data:
            comment['created_at'] = self.convert_time_format(r['created_at'])
            comment['id'] = r['id']
            comment['post_id'] = id
            comment['text'] = self.cleanup_text(r['text'])
            r_data_user = r['user']
            comment['profile_image_url'] = r_data_user['profile_image_url']
            comment['user_id'] = r_data_user['id']
            comment['screen_name'] = r_data_user['screen_name']
            self.comments.append(comment)
            self.mm.insert_data('comment', comment)

        self.save_data(self.post['id'] + '-{}'.format(max_id), response.text)

        if len(self.comments) >= reply_json_obj['data']['total_number']:
            return

        if self.reply_limit is not 0 and len(self.comments) > int(
                self.reply_limit):
            return

        time.sleep(2)
        self.get_comments(self.post['id'], reply_json_obj['data']['max_id'])
Beispiel #9
0
Datei: dfw.py Projekt: conkty/dfw
def modify_user_star():
    if request.method == "POST":
        user_name = request.form["UserName"]
        star = request.form["Star"]
        print MysqlManager.user_modify_star(user_name, star)
        return "ok"
Beispiel #10
0
def get_admin():
    return MysqlManager.get_admin()[1:]
Beispiel #11
0
Datei: dfw.py Projekt: vanway/dfw
def delete_user():
    if request.method == 'POST':
        user_name = request.form['UserName']
        MysqlManager.user_delete(user_name)
        return 'ok'
Beispiel #12
0
Datei: dfw.py Projekt: vanway/dfw
def modify_user_star():
    if request.method == 'POST':
        user_name = request.form['UserName']
        star = request.form['Star']
        print MysqlManager.user_modify_star(user_name, star)
        return 'ok'
Beispiel #13
0
Datei: dfw.py Projekt: vanway/dfw
def add_user_money():
    if request.method == 'POST':
        user_name = request.form['UserName']
        add_money = request.form['Money']
        print MysqlManager.user_add_money(user_name, add_money)
        return 'ok'
Beispiel #14
0
Datei: dfw.py Projekt: vanway/dfw
def get_server_stream():
    result = MysqlManager.get_trade_stream(["server"])
    return ujson.dumps(result, ensure_ascii=False)
Beispiel #15
0
Datei: dfw.py Projekt: conkty/dfw
def add_user():
    if request.method == "POST":
        key_list = ["AccountName", "Password", "UserName", "RechargeMoney", "RechargeMoney", "Mobile", "IDCard", "Star"]
        item = [request.form[key] for key in key_list]
        print MysqlManager.insert_user(item)
        return "ok"
Beispiel #16
0
Datei: dfw.py Projekt: conkty/dfw
def add_user_money():
    if request.method == "POST":
        user_name = request.form["UserName"]
        add_money = request.form["Money"]
        print MysqlManager.user_add_money(user_name, add_money)
        return "ok"
Beispiel #17
0
def get_users():
    return dict([(item[1], item[1:]) for item in MysqlManager.get_users()])
Beispiel #18
0
Datei: dfw.py Projekt: conkty/dfw
def delete_user():
    if request.method == "POST":
        user_name = request.form["UserName"]
        MysqlManager.user_delete(user_name)
        return "ok"
Beispiel #19
0
    'connection': "keep-alive",
    'cache-control': "no-cache",
    'upgrade-insecure-requests': "1",
    'user-agent':
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36",
    'accept':
    "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
    'accept-encoding': "gzip, deflate",
    'accept-language': "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7",
    'range': "bytes=524288-524288",
    'if-range': "\"9056E63E897E90A3BFB2619B86481603\""
}

dirname = './dy_videos/'

mysql = MysqlManager(4)

# cur_url = 'http://v3-dy.ixigua.com/41774b984e4022ca52a4795be488dec9/5bb667a3/video/m/2200fd0bcf7114241858464e7ee8e62a2ef115bf46e00004b570145264e/'


def download_video(index, url):
    file_name = url[url.rindex('/', 0, -1) + 1:-1]
    file_name = hashlib.md5(file_name.encode('utf8')).hexdigest() + '.mp4'

    print('Start downloading ', file_name)
    r = requests.get(url, stream=True)
    # download started
    with open(dirname + file_name, 'wb') as f:
        for chunk in r.iter_content(chunk_size=1024 * 1024):
            if chunk:
                f.write(chunk)
Beispiel #20
0
 def __init__(self, limit=500):
     self.reply_limit = limit
     self.mm = MysqlManager(4)
Beispiel #21
0
headers = {
    'host': "v3-dy.ixigua.com",
    'connection': "keep-alive",
    'cache-control': "no-cache",
    'upgrade-insecure-requests': "1",
    'user-agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36",
    'accept': "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
    'accept-encoding': "gzip, deflate",
    'accept-language': "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7",
    'range': "bytes=524288-524288",
    'if-range': "\"9056E63E897E90A3BFB2619B86481603\""
}

dirname = './dy_videos/'

mysql = MysqlManager(4)

# cur_url = 'http://v3-dy.ixigua.com/41774b984e4022ca52a4795be488dec9/5bb667a3/video/m/2200fd0bcf7114241858464e7ee8e62a2ef115bf46e00004b570145264e/'

async def download_coroutine(index, url):
    file_name = url[url.rindex('/', 0, -1) + 1: -1]
    file_name = hashlib.md5(file_name).hexdigest() + '.mp4'
    
    print('Start downloading ', file_name)
    r = requests.get(url, stream = True)
    # download started 
    with open( dirname + file_name, 'wb') as f: 
        for chunk in r.iter_content(chunk_size = 1024*1024): 
            if chunk: 
                f.write(chunk)
    msg = 'Finished downloading %s'%(file_name)
Beispiel #22
0
import re
from lxml import etree
import requests
import time
import global_var

from mysql_manager import MysqlManager

mysql_mgr = MysqlManager(4)


class BoardsCrawler:
    domain = 'http://www.newsmth.net/'

    base_url = domain + '/nForum/section/{}?ajax'

    def __init__(self, interval=1):
        self.interval = interval

    def get_board_of_section(self, section_idx):
        url = self.base_url.format(section_idx)
        response = requests.get(url, headers=global_var.newsmth_headers)
        time.sleep(self.interval)
        self.content = response.text
        self.tree = etree.HTML(self.content)

    def get_board_list(self, etr_obj=None):
        if etr_obj is None:
            etr_obj = self.tree
        elements = etr_obj.xpath(
            '//table[@class="board-list corner"]/tbody/tr')
Beispiel #23
0
 def __init__(self, limit=200):
     self.reply_limit = limit
     self.mm = MysqlManager(4)
     self.post = {}
Beispiel #24
0
def get_users():
    return dict([(item[1], item[1:])  for item in MysqlManager.get_users()])
Beispiel #25
0
import re
from lxml import etree
import requests
from threading import Thread
import time
import html
from mysql_manager import MysqlManager
from crawler import PostsCrawler

max_threads = 10
wait_duration = 20

mysql_mgr = MysqlManager(10)

def post_crawl_task(topic):
        # Get 1st page of this topic
        post_crawler = PostsCrawler()
        post_crawler.get_content(topic['url'], 1)
        posts = post_crawler.get_posts()

        # Get number of pages of this topic
        page_count = post_crawler.get_max_page()

        # Get the rest posts of this topic
        if page_count > 1:
            for i in range(2, page_count + 1):
                post_crawler.get_content(topic['url'], i)
                posts += post_crawler.get_posts()
                break
        
        # Insert post of a topic
Beispiel #26
0
import re
from lxml import etree
import requests
import time
from threading import Thread

from crawler import PostsCrawler
from mysql_manager import MysqlManager

max_threads = 10
interval = 20
mysql_mgr = MysqlManager(max_threads)


def post_crawl_task(topic):
    # Get 1st page of this topic
    post_crawler = PostsCrawler()
    post_crawler.get_content(topic['url'], 1)
    posts = post_crawler.get_posts()

    # Get number of pages of this topic
    page_count = post_crawler.get_max_page()

    print(topic['url'])
    print('page count', page_count)

    # Get the rest posts of this topic
    if page_count > 1:
        for i in range(2, page_count + 1):
            post_crawler.get_content(topic['url'], i)
            posts += post_crawler.get_posts()
Beispiel #27
0
Datei: dfw.py Projekt: conkty/dfw
def chat(ws):
    users[ws.id] = ws
    print ws.id

    while True:
        msg = ws.receive()
        if msg is not None:
            print msg
            typex, obj, content = "", "", ""
            try:
                typex, obj, content = msg.split(",")
            except:
                pass
            if typex == "1" and obj == "user":
                global user_match_dict
                user_match_dict[ws.id] = content
                user = pass_users[content]
                total_trade = [user[0], user[2], user[3], user[3] - user[2]]
                msg = ujson.dumps({"total_trade": total_trade}, ensure_ascii=False)
                users[ws.id].send(msg)
            if g_is_open:
                if typex == "2":
                    now_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
                    user_trade_info[ws.id] = [now_time, pass_users[user_match_dict[ws.id]][3], cur_price, cur_price, 0]

                    trade_stream = [now_time, pass_users[user_match_dict[ws.id]][0], typex, "user", cur_price, "1"]
                    MysqlManager.insert_stream_trade(trade_stream)
                    trade_stream = [now_time, pass_users[user_match_dict[ws.id]][0], typex, "server", cur_price, "1"]
                    MysqlManager.insert_stream_trade(trade_stream)

                    msg = ujson.dumps({"single_trade": user_trade_info[ws.id]}, ensure_ascii=False)
                    users[ws.id].send(msg)
                if typex == "3":
                    now_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
                    user_trade_info[ws.id] = [now_time, pass_users[user_match_dict[ws.id]][3], cur_price, cur_price, 0]

                    trade_stream = [now_time, pass_users[user_match_dict[ws.id]][0], typex, "user", cur_price, "1"]
                    MysqlManager.insert_stream_trade(trade_stream)
                    trade_stream = [now_time, pass_users[user_match_dict[ws.id]][0], typex, "server", cur_price, "1"]
                    MysqlManager.insert_stream_trade(trade_stream)

                    msg = ujson.dumps({"single_trade": user_trade_info[ws.id]}, ensure_ascii=False)
                    users[ws.id].send(msg)
                if typex == "4":
                    MysqlManager.update_item([user_trade_info[ws.id][-1], user_match_dict[ws.id]])

                    now_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
                    trade_stream = [now_time, pass_users[user_match_dict[ws.id]][0], typex, "user", cur_price, "1"]
                    MysqlManager.insert_stream_trade(trade_stream)
                    trade_stream = [now_time, pass_users[user_match_dict[ws.id]][0], typex, "server", cur_price, "1"]
                    MysqlManager.insert_stream_trade(trade_stream)

                    user = MysqlManager.get_user_by_name([user_match_dict[ws.id]])[1:]
                    total_trade = [user[0], user[2], user[3], user[3] - user[2]]
                    msg = ujson.dumps({"total_trade": total_trade}, ensure_ascii=False)
                    users[ws.id].send(msg)
                    del user_trade_info[ws.id]
        else:
            break

    del users[ws.id]
    if ws_id in user_trade_info:
        del user_trade_info[ws.id]