Ejemplo n.º 1
0
 def __init__(self):
     self.trigger = False
     self.result_queue = RedisQueue(Config.UP_QUEUE_NAME)
     self.command_queue = RedisQueue(Config.DOWN_QUEUE_NAME)
     self.port = serial.Serial("/dev/ttyS0",
                               9600,
                               parity=serial.PARITY_NONE,
                               stopbits=serial.STOPBITS_ONE,
                               bytesize=serial.EIGHTBITS,
                               timeout=Config.SERIAL_WAIT)
     self.start()
Ejemplo n.º 2
0
    def __init__(self, status_queue, stop_event, config):
        super(Sender, self).__init__()

        self.normal_data_queue = RedisQueue('normal')
        self.retry_data_queue = RedisQueue('retry')
        self.status_queue = status_queue
        self.stop_event = stop_event

        self.base_url = config["api_url"]
        self.key = config["key"]
        self.store_energy_url = self.base_url + "/v2/energy"
        self.backup_file = "backup"
        self.console_mode = True if config["console_mode"] == "true" else False

        self.connected = False
Ejemplo n.º 3
0
    def __init__(self, time_execution_in_sec, chart_title, slave, *args,
                 **kwargs):
        super(MyTaskSet, self).__init__(time_execution_in_sec, chart_title,
                                        slave, *args, **kwargs)
        self.running = True
        self.slave = slave
        self.code = None

        self.queue_chart = RedisQueue(name="data_chart",
                                      namespace="data_chart")
        self.queue_tasks = RedisQueue(name="data_tasks",
                                      namespace="data_tasks")
        self.chart = ReportCharts(time_execution_in_sec, chart_title,
                                  self.slave)
        self.db = create_engine(self.config["database"]["db_string"])
Ejemplo n.º 4
0
def main():
    rq = RedisQueue('reddit-book-stream', host=REDIS_HOST, port=REDIS_PORT)
    reddit = praw.Reddit(user_agent=USER_AGENT,
                         client_id=CLIENT_ID,
                         client_secret=CLIENT_SECRET,
                         username=USERNAME,
                         password=PASSWORD)
Ejemplo n.º 5
0
 def __init__(self):
     self.base_url = "https://weixin.sogou.com/weixin"
     self.keyword = KEY
     self.headers = {
         'Accept':
         'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;'
         'q=0.8,application/signed-exchange;v=b3',
         'Accept-Encoding':
         'gzip, deflate, br',
         'Accept-Language':
         'zh-CN,zh;q=0.9',
         'Cache-Control':
         'max-age=0',
         'Connection':
         'keep-alive',
         'Cookie':
         COOKIES,
         'Host':
         'weixin.sogou.com',
         'Upgrade-Insecure-Requests':
         '1',
         'User-Agent':
         'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
         'Chrome/73.0.3683.86 Safari/537.36',
     }
     self.session = Session()
     self.queue = RedisQueue()
     self.mysql = Mysql()
Ejemplo n.º 6
0
    def __init__(self):
        self.command_queue = RedisQueue(Config.DOWN_QUEUE_NAME)
        self.result_queue = RedisQueue(Config.UP_QUEUE_NAME)

        self.socket = websocket.WebSocketApp(HOST,
                                             on_open=self.on_open,
                                             on_message=self.on_message,
                                             on_error=self.on_error,
                                             on_close=self.on_close)

        while True:
            try:
                self.socket.run_forever(ping_interval=100)
            except:
                pass
            time.sleep(5)
def main():
    with RecordsDB() as records_db:
        records_parser = RecordsParser(records_db)

        with CountriesDB() as countries_db:
            queue = RedisQueue(name='jobs',
                               namespace='queue',
                               decode_responses=True)
            job_in_json = queue.wait_and_dequeue()

            while job_in_json is not None:

                job = json.loads(job_in_json)

                country_id = job['country_id']
                country_name = countries_db.get_country_from_id(country_id)
                num_records = job['num_records']

                if country_name is None:
                    raise Exception("Country name cannot be None!")

                records_parser.get_records(country=country_name,
                                           country_id=country_id,
                                           max_records=num_records)

                job_in_json = queue.wait_and_dequeue()
Ejemplo n.º 8
0
def populate_job_queue():
    queue = RedisQueue('jobs')
    with CountriesDB() as countries_db:

        countries = countries_db.get_countries()
        for country in countries:
            job = {'country_id': country[0], 'num_records': 5000}
            job_in_json = json.dumps(job)
            queue.enqueue(job_in_json)
Ejemplo n.º 9
0
    def __init__(self, status_queue, config, stop_event):
        super().__init__()

        self.energy_data_queue = RedisQueue('normal')
        self.status_queue = status_queue
        self.reader = self.init_reader()
        self.solar_ip = config['solar_ip']
        self.solar_url = self.solar_ip + config['solar_url']
        self.stop_event = stop_event
        self.console_mode = True if config["console_mode"] == "true" else False
Ejemplo n.º 10
0
 def __init__(self, name, collector):
     if SCHEDULER_PERSIST:  # 如果使用分布式或者是持久化,使用redis的队列
         self.queue = RedisQueue(name=name)
         self._filter_container = RedisFilterContainer(
         )  # 使用redis作为python的去重的容器
     else:
         self.queue = Queue()
         self._filter_container = NoramlFilterContainer(
         )  # 使用Python的set()集合
     # 统计重复的数量
     self.collector = collector
Ejemplo n.º 11
0
    def __init__(self, stop_event):
        super().__init__()

        self.energy_data_queue = RedisQueue('normal')
        self.stop_event = stop_event
        self.default_message = self.get_default_message()

        self.total_usage = random.randint(1000, 5000)
        self.total_redelivery = random.randint(1000, 5000)
        self.total_solar = random.randint(1000, 5000)
        self.total_gas = random.randint(1000, 5000)
Ejemplo n.º 12
0
 def __init__(self,
              redis_mgr,
              service_name,
              custom_key,
              func_name,
              callback_to_main_thread=False):
     self.service_name = service_name
     self.func_name = func_name
     self.redis_queue = RedisQueue(redis_mgr)
     self.custom_key = custom_key
     self.redis_queue.subscribe(self.service_name, custom_key)
     self.callback_to_main_thread = callback_to_main_thread
Ejemplo n.º 13
0
    def __init__(self, thread_index, global_network):

        self.thread_index = thread_index
        self.local_network = global_network
        self.game_state = GameState()
        self.local_t = 0

        # for log
        self.episode_reward = 0.0
        self.episode_start_time = 0.0
        self.prev_local_t = 0

        self.rq = RedisQueue(REDIS_QUEUE_NAME)
        return
Ejemplo n.º 14
0
def main():
    comment_queue = RedisQueue('reddit-book-stream',
                               host=REDIS_HOST,
                               port=REDIS_PORT)
    reddit = praw.Reddit(user_agent=USER_AGENT,
                         client_id=CLIENT_ID,
                         client_secret=CLIENT_SECRET,
                         username=USERNAME,
                         password=PASSWORD)

    # stream comments from r/all, pasue_after < 0 allows faster streaming
    for comment in reddit.subreddit('all').stream.comments(pause_after=-1):
        if comment and mentions_book(comment.body):
            comment_queue.put(comment.id)
            print(comment.id)
            print(f'reddit.com/api/info?id=t1_{comment.id}')
Ejemplo n.º 15
0
    def __init__(self):
        self.device = '/gpu:0' if USE_GPU else '/cpu:0'
        self.stop_requested = False
        self.global_t = 0
        if USE_LSTM:
            self.global_network = A3CLSTMNetwork(STATE_DIM, STATE_CHN, ACTION_DIM, self.device, -1)
        else:
            self.global_network = A3CFFNetwork(STATE_DIM, STATE_CHN, ACTION_DIM, self.device)
        self.global_network.create_loss(ENTROPY_BETA)

        self.initial_learning_rate = log_uniform(INITIAL_ALPHA_LOW, INITIAL_ALPHA_HIGH, INITIAL_ALPHA_LOG_RATE)
        print 'initial_learning_rate:', self.initial_learning_rate
        self.learning_rate_input = tf.placeholder('float')
        self.optimizer = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate_input,
                                                   decay=RMSP_ALPHA, momentum=0.0, epsilon=RMSP_EPSILON)

        grads_and_vars = self.optimizer.compute_gradients(
            self.global_network.total_loss, self.global_network.get_vars())
        self.apply_gradients = self.optimizer.apply_gradients(grads_and_vars)

        self.actor_threads = []
        for i in range(PARALLEL_SIZE):
            actor_thread = A3CActorThread(i, self.global_network)
            self.actor_threads.append(actor_thread)

        self.sess = tf.InteractiveSession()
        self.sess.run(tf.initialize_all_variables())

        self.reward_input = tf.placeholder(tf.float32)
        tf.scalar_summary('reward', self.reward_input)

        self.time_input = tf.placeholder(tf.float32)
        tf.scalar_summary('living_time', self.time_input)

        self.summary_op = tf.merge_all_summaries()
        self.summary_writer = tf.train.SummaryWriter(LOG_FILE, self.sess.graph)

        self.saver = tf.train.Saver()
        self.restore()

        self.lock = threading.Lock()
        self.rq = RedisQueue(REDIS_QUEUE_NAME)
        self.train_count = 0
        return
Ejemplo n.º 16
0
async def main():
    msg = "stockx 爬虫 Starting!"
    print(msg)
    logging.info(msg)

    q = RedisQueue('rq')

    # 建立 client request
    async with aiohttp.ClientSession() as client:
        for k, v in URL.items():
            for page in range(1, 25):
                api_url = DOMAIN + v + str(page)
                task = asyncio.create_task(spiderList(client, api_url, q))
                await asyncio.sleep(10)

        done, pending = await asyncio.wait({task})
        if task in done:
            print('[爬取完成]所有爬取进程已经全部完成')
            logging.info("[爬取完成]所有爬取进程已经全部完成")
Ejemplo n.º 17
0
async def main(loop):
    print("开始爬虫")
    # 等待mysql连接好
    pool = await aiomysql.create_pool(host=conf.database['host'],
                                      port=conf.database['port'],
                                      user=conf.database['user'],
                                      password=conf.database['passwd'],
                                      db=conf.database['db'],
                                      loop=loop)

    q = RedisQueue('rq')

    for k, v in URL.items():
        for page in range(30):
            api_url = DOMAIN + v + str(page)
            task = asyncio.create_task(spiderList(pool, api_url, q))
            await asyncio.sleep(1)

    done, pending = await asyncio.wait({task})
    if task in done:
        print('[爬取完成]所有爬取进程已经全部完成')
        logging.info("[爬取完成]所有爬取进程已经全部完成")
Ejemplo n.º 18
0
 def test_failure(self):
     qkey = 'test:failure:queue'
     tid = '12'
     rq = RedisQueue(self.rc, 1, 2)
     self.rc.lpush(qkey, tid)
     tid0 = rq.safe_pop(qkey)
     self.assertEqual(tid, tid0)
     ## Popping another task too fast, before the task timeout has been
     ## reached.
     tid1 = rq.safe_pop(qkey)
     self.assertIsNone(tid1)
     ## Supposing the worker had died before finishing the task, we can take
     ## it again after the task timeout.
     time.sleep(2)
     tid2 = rq.safe_pop(qkey)
     self.assertEqual(tid, tid2)
     ## Marking the task as done should make impossible to retrieve the same
     ## task.
     rq.mark_done(qkey, tid2)
     time.sleep(2)
     tid3 = rq.safe_pop(qkey)
     self.assertIsNone(tid3)
     self.rc.delete(qkey, "%s:done" % qkey)
Ejemplo n.º 19
0
def push_to_queue(queue_name, items):
    queue = RedisQueue(queue_name)
    for item in items:
        queue.put(item)
Ejemplo n.º 20
0
# -*- coding:utf-8 -*-
__author__ = '张全亮'
import requests
import urllib3
import math
import time
import datetime
from multiprocessing.dummy import Pool

import hashlib

urllib3.disable_warnings()
from logger import Logger
from redis_queue import RedisQueue

yz = RedisQueue('yz')
yz_rec = RedisQueue('yz_rec')
logger = Logger()
"""校验订单规则,每页查找订单,找到符合条件的结束翻页查找"""


def check_pay(order_sn, pdduid, kdtsessionid):
    cookie = 'KDTSESSIONID={}'.format(kdtsessionid)
    firsr_url = 'https://h5.youzan.com/v2/trade/order/list.json?perpage=20&page=1&type=all'
    headers = {
        "User-Agent":
        "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.76 Mobile Safari/537.36",
        'Cookie': cookie
    }
    res = requests.get(firsr_url, headers=headers, verify=False)
    if '页面已被删除' in res.text:
Ejemplo n.º 21
0
#!/usr/bin/env python
# coding:utf-8
# Copyright (C) dirlt

from redis_queue import RedisQueue

command_queue = RedisQueue('command')
command_queue.put('trigger')
Ejemplo n.º 22
0
import datetime
from mysql_db import db_insert
from flask import Flask, jsonify, request, redirect, render_template
from redis_queue import RedisQueue

app = Flask(__name__)

# 下单部分
from pdd_spider import pdd_main
from yz_spider import yz_main

# 查询部分
from pdd_query import pdd_pass_query
from yz_query import yz_pass_query

pdd = RedisQueue('pdd')
yz = RedisQueue('yz')

"""拼多多下单爬虫"""


def pdd_spider(pdduid, accesstoken, goods_url, amount, order_number):
    result = pdd_main(pdduid, accesstoken, goods_url, amount, order_number)
    return result


"""有赞下单爬虫"""


def yz_spider(pdduid, kdtsessionid, goods_url, amount, order_number):
    result = yz_main(pdduid, kdtsessionid, goods_url, amount, order_number)
def add_job_to_queue(country_id, num_records):
    queue = RedisQueue('jobs')
    job = {'country_id': country_id, 'num_records': num_records}
    job_in_json = json.dumps(job)
    queue.enqueue(job_in_json)
Ejemplo n.º 24
0
# -*- coding:utf-8 -*-

from redis_queue import RedisQueue
import time

q = RedisQueue('rq')
while 1:
    result = q.get_wait()
    if not result:
        break
    print("output.py: data {} out of queue {}".format(result,
                                                      time.strftime("%c")))
    time.sleep(2)
Ejemplo n.º 25
0
 def __init__(self, redis_mgr):
     self.redis_mgr = redis_mgr
     self.queue = RedisQueue(redis_mgr)
Ejemplo n.º 26
0
# -*- coding:utf-8 -*-

from redis_queue import RedisQueue
import time

q = RedisQueue('rq')  # 新建队列名为rq
for i in ["a", "b", "c", "d", "e", "f"]:
    q.put(i)
    print("input.py: data {} enqueue {}".format(i, time.strftime("%c")))
    time.sleep(1)
Ejemplo n.º 27
0
 def __init__(self, *args, **kwargs):
     self.singleton = Singleton()
     self.queue_chart = RedisQueue(name="data_chart",
                                   namespace="data_chart")
     super(WebServerClass, self).__init__(*args, **kwargs)
Ejemplo n.º 28
0
# -*- coding:utf-8 -*-
__author__ = '张全亮'
import requests
import urllib3
from bs4 import BeautifulSoup
from multiprocessing.dummy import Pool

urllib3.disable_warnings()
import re, datetime, time
from logger import Logger
from mysql_db import db_insert
from redis_queue import RedisQueue

pdd = RedisQueue('pdd')
pdd_rec = RedisQueue('pdd_rec')
logger = Logger()

"""自动5星好评"""


def evaluation(pdduid, accesstoken, goods_id, order_sn):
    url = 'https://mobile.yangkeduo.com/proxy/api/v2/order/goods/review?pdduid={}'.format(pdduid)
    cookie = 'pdd_user_id={}; PDDAccessToken={};'.format(pdduid, accesstoken)
    headers = {
        'accesstoken': accesstoken,
        'Accept': 'text/html, application/xhtml+xml, application/xml; q=0.9, */*; q=0.8',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0',
        'Cookie': cookie
    }
    data = {
        "goods_id": goods_id,
Ejemplo n.º 29
0
class Spider():
    base_url = 'https://weixin.sogou.com/weixin'
    keyword = 'NBA'
    headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6,ja;q=0.4,zh-TW;q=0.2,mt;q=0.2',
        'Cache-Control': 'max-age=0',
        'Connection': 'keep-alive',
        'Cookie': 'IPLOC=CN1100; SUID=6FEDCF3C541C940A000000005968CF55; SUV=1500041046435211; ABTEST=0|1500041048|v1; SNUID=CEA85AE02A2F7E6EAFF9C1FE2ABEBE6F; weixinIndexVisited=1; JSESSIONID=aaar_m7LEIW-jg_gikPZv; ld=Wkllllllll2BzGMVlllllVOo8cUlllll5G@HbZllll9lllllRklll5@@@@@@@@@@; LSTMV=212%2C350; LCLKINT=4650; ppinf=5|1500042908|1501252508|dHJ1c3Q6MToxfGNsaWVudGlkOjQ6MjAxN3x1bmlxbmFtZTo1NDolRTUlQjQlOTQlRTUlQkElODYlRTYlODklOEQlRTQlQjglQTglRTklOUQlOTklRTglQTclODV8Y3J0OjEwOjE1MDAwNDI5MDh8cmVmbmljazo1NDolRTUlQjQlOTQlRTUlQkElODYlRTYlODklOEQlRTQlQjglQTglRTklOUQlOTklRTglQTclODV8dXNlcmlkOjQ0Om85dDJsdUJfZWVYOGRqSjRKN0xhNlBta0RJODRAd2VpeGluLnNvaHUuY29tfA; pprdig=ppyIobo4mP_ZElYXXmRTeo2q9iFgeoQ87PshihQfB2nvgsCz4FdOf-kirUuntLHKTQbgRuXdwQWT6qW-CY_ax5VDgDEdeZR7I2eIDprve43ou5ZvR0tDBlqrPNJvC0yGhQ2dZI3RqOQ3y1VialHsFnmTiHTv7TWxjliTSZJI_Bc; sgid=27-27790591-AVlo1pzPiad6EVQdGDbmwnvM; PHPSESSID=mkp3erf0uqe9ugjg8os7v1e957; SUIR=CEA85AE02A2F7E6EAFF9C1FE2ABEBE6F; sct=11; ppmdig=1500046378000000b7527c423df68abb627d67a0666fdcee; successCount=1|Fri, 14 Jul 2017 15:38:07 GMT',
        'Host': 'weixin.sogou.com',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'
    }
    session = Session()
    queue = RedisQueue()
    mysql = MySQL()

    def get_proxy(self):
        try:
            r = requests.get(PROXY_POOL_URL)
            if r.status_code == 200:
                print('Get Proxy', r.text)
                return r.text
            return None
        except requests.ConnectionError:
            return None

    def start(self):
        # 初始化工作

        #全局更新headers
        self.session.headers.update(self.headers)
        start_url = self.base_url + '?' + urlencode({'query': self.keyword, 'type': 2})
        weixin_request = WeixinRequest(url=start_url, callback=self.parse_index, need_proxy=True)
        # 调度一个请求
        self.queue.add(weixin_request)

    def parse_index(self, response):
        doc = pq(response.text)
        items = doc('.news-box .news-list li .txt-box h3 a').items()
        for item in items:
            url = item.attr['href']
            weixin_request = WeixinRequest(url=url, callback=self.parse_detail)
            yield weixin_request
        next = doc('#sogou_next').attr('href')
        if next:
            url = self.base_url + str(next)
            weixin_request = WeixinRequest(url=url, callback=self.parse_index, need_proxy=True)
            yield weixin_request

    def parse_detail(self, response):
        doc = pq(response.text)
        data = {
            'title': doc('.rich_media_title').text(),
            'content': doc('.rich_media_content').text(),
            'date': doc('#post-date').text(),
            'nickname': doc('#js_profile_qrcode > div > strong').text(),
            'wechat': doc('#js_profile_qrcode > div > p:nth-child(3) > span').text()
        }
        yield data

    def request(self, weixin_request):
        try:
            if weixin_request.need_proxy:
                proxy = self.get_proxy()
                if proxy:
                    proxies = {
                        'http': 'http://' + proxy,
                        'https': 'https://' + proxy
                    }
                    return self.session.send(weixin_request.prepare(),
                        timeout=weixin_request.timeout, allow_redirects=False, proxies=proxies)
            return self.session.send(weixin_request.prepare(), timeout=weixin_request.timeout, allow_redirects=False)
        except (ConnectionError, ReadTimeout) as e:
            print(e.args)
            return False

    def error(self, weixin_request):
        weixin_request.fail_time += 1
        print('Request Failed', weixin_request.fail_time, 'Times', weixin_request.url)
        if weixin_request.fail_time < MAX_FAILED_TIME:
            self.queue.add(weixin_request)

    def schedule(self):
        while not self.queue.empty():
            weixin_request = self.queue.pop()
            callback = weixin_request.callback
            print('Schedule', weixin_request.url)
            r = self.request(weixin_request)
            if r and r.status_code in VALID_STATUSES:
                results = list(callback(r))
                if results:
                    for result in results:
                        print('New Result', result)
                        if isinstance(result, WeixinRequest):
                            self.queue.add(result)
                        if isinstance(result, dict):
                            self.mysql.insert('articles', result)
                else:
                    self.error(weixin_request)
            else:
                self.error(weixin_request)

    def run(self):
        self.start()
        self.schedule()
Ejemplo n.º 30
0
 def __init__(self, topic, host='localhost', port=6379, db=0):
     self.rqueue = RedisQueue(topic, 1, host=host, port=port, db=db)