コード例 #1
0
ファイル: receiver.py プロジェクト: rossdylan/netcrawl
class Receiver(object):
    def __init__(self, redis_host):
        self.output_queue = RedisQueue(redis_host, "outqueue")

    def run(self):
        while True:
            result = self.output_queue.get().data
            pprint(result)
            print "---"

    def run_dump(self):
        dumpfile = open("netcrawl.log", "w")
        while True:
            result = self.output_queue.get().data
            pprint(result)
            dumpfile.write(pformat(result) + "\n")
            dumpfile.flush()
コード例 #2
0
class RedisMessageProvider(MessageProvider):
    def __init__(self, host, port, queue_name):
        self.queue = RedisQueue(name=queue_name,
                                namespace='queue',
                                host=host,
                                port=port)
        self.queue.wait_for()

    def get_message(self):
        return self.queue.get()
コード例 #3
0
class FetcherWorker:

    def __init__(self, in_queue_namespace, out_queue_namespace, apikey):

        self.in_queue_namespace = in_queue_namespace
        self.out_queue_namespace = out_queue_namespace
        self.apikey = apikey

        self.in_queue = RedisQueue(in_queue_namespace)
        self.out_queue = RedisQueue(out_queue_namespace)

        print "Fetcher loaded with apikey", self.apikey


    def run(self):

        while 1:

            base_url = self.in_queue.get()

            if base_url == "None":
                # add end-of-queue markers for parsers
                self.out_queue.put("None") 

                # ends program
                break

            url = base_url + self.apikey 
            
            t1 = time.time()
            
            print "fetching try 1", url

            resp = urllib2.urlopen(url)
            if resp.code == 200: 
                text = resp.read()
                self.out_queue.put(text)
            else:
                print 'failed once', url
                time.sleep(10)
                print "fetching try 2", url
                resp = urllib2.urlopen(url)
                if resp.code == 200:
                    text = resp.read()
                    self.out_queue.put(text)

            print "done fetching"

            # make sure we don't use the same API key within 2 seconds
            t2 = time.time()
            if t2 - t1 < 2.0:
                time.sleep(2.0 - (t2 - t1))
コード例 #4
0
ファイル: crawler.py プロジェクト: rossdylan/netcrawl
class Crawler(object):
    def __init__(self, redis_host, depth=10):
        self.links_queue = RedisQueue(redis_host, "linksqueue")
        self.pages_queue = RedisQueue(redis_host, "pagesqueue")

    def run(self):
        while True:
            link = self.links_queue.get().data
            try:
                page = WebPage(requests.get(link).text, link, 80)
            except:
                print("Exception GETing {0}".format(link))
                continue
            self.pages_queue.put(page.to_dict())
コード例 #5
0
class FetcherWorker:
    def __init__(self, in_queue_namespace, out_queue_namespace, apikey):

        self.in_queue_namespace = in_queue_namespace
        self.out_queue_namespace = out_queue_namespace
        self.apikey = apikey

        self.in_queue = RedisQueue(in_queue_namespace)
        self.out_queue = RedisQueue(out_queue_namespace)

        print "Fetcher loaded with apikey", self.apikey

    def run(self):

        while 1:

            base_url = self.in_queue.get()

            if base_url == "None":
                # add end-of-queue markers for parsers
                self.out_queue.put("None")

                # ends program
                break

            url = base_url + self.apikey

            t1 = time.time()

            print "fetching try 1", url

            resp = urllib2.urlopen(url)
            if resp.code == 200:
                text = resp.read()
                self.out_queue.put(text)
            else:
                print 'failed once', url
                time.sleep(10)
                print "fetching try 2", url
                resp = urllib2.urlopen(url)
                if resp.code == 200:
                    text = resp.read()
                    self.out_queue.put(text)

            print "done fetching"

            # make sure we don't use the same API key within 2 seconds
            t2 = time.time()
            if t2 - t1 < 2.0:
                time.sleep(2.0 - (t2 - t1))
コード例 #6
0
    def process_request_origin(self, request, spider):
        redis = RedisQueue('proxy_ip')
        if not redis.empty():
            proxy_ip = redis.get()
        else:
            proxy_ip = get_ip()

        proxy_para = {'ip_port': proxy_ip, 'user_pass': ''}
        request.meta['proxy'] = "http://%s" % proxy_para['ip_port']
        if proxy_para['user_pass'] is not None:
            encoded_user_pass = base64.encodestring(proxy_para['user_pass'])
            request.headers[
                'Proxy-Authorization'] = 'Basic ' + encoded_user_pass
        print "*********************** RedisProxyMiddleware Using proxy ip: %s *****" % proxy_para[
            'ip_port']
        redis.put(proxy_ip)
コード例 #7
0
ファイル: middlewares.py プロジェクト: Kylinlin/spiders
    def process_request_origin(self, request, spider):
        redis = RedisQueue('proxy_ip')
        if not redis.empty():
            proxy_ip = redis.get()
        else:
            proxy_ip = get_ip()

        proxy_para = {
                'ip_port': proxy_ip,
                'user_pass': ''
            }
        request.meta['proxy'] = "http://%s" % proxy_para['ip_port']
        if proxy_para['user_pass'] is not None:
            encoded_user_pass = base64.encodestring(proxy_para['user_pass'])
            request.headers['Proxy-Authorization'] = 'Basic ' + encoded_user_pass
        print "*********************** RedisProxyMiddleware Using proxy ip: %s *****" % proxy_para['ip_port']
        redis.put(proxy_ip)
コード例 #8
0
ファイル: indexer.py プロジェクト: rossdylan/netcrawl
class Indexer(object):
    def __init__(self, redis_host, es_urls):
        self.pages_queue = RedisQueue(redis_host, "pagesqueue") # take pages out of this queue
        self.links_queue = RedisQueue(redis_host, "linksqueue") # put links into this queue
        self.connection = pyelasticsearch.ElasticSearch(es_urls)
        try:
            self.connection.create_index("webpages")
        except:
            pass

    def run(self):
        while True:
            result = self.pages_queue.get().data
            result['tags'] = genTags(result['html'])
            self.connection.index('webpages', 'webpage', result, id=result['ip'])
            print('Indexed {0}'.format(result['ip']))
            for link in result['links']:
                self.links_queue.put(link)
コード例 #9
0
class DatastoreWriterWorker:
    def __init__(self, in_queue_namespace):

        self.in_queue_namespace = in_queue_namespace

        self.in_queue = RedisQueue(in_queue_namespace)

    def run(self):

        while 1:
            json_doc = self.in_queue.get()

            if json_doc == "None":
                break

            print "DatastoreWriterWorker got", json_doc
            print
            print "Write to KV store, Fluentd, and MySQL"
            print
            print
コード例 #10
0
class DatastoreWriterWorker():
    def __init__(self, in_queue_namespace):

        self.in_queue_namespace = in_queue_namespace

        self.in_queue = RedisQueue(in_queue_namespace)

    def run(self):

        while 1:
            json_doc = self.in_queue.get()

            if json_doc == "None":
                break

            print "DatastoreWriterWorker got", json_doc
            print
            print "Write to KV store, Fluentd, and MySQL"
            print
            print
コード例 #11
0
def main():

    done_que = RedisQueue('seed')
    run_que = RedisQueue('run')

    run_que.flushdb()

    conn = sqlite3.connect('site_data.db')
    conn.execute(
        "create table if not exists mainpages (id integer primary key autoincrement, url TEXT,headers TEXT,content BLOB)"
    )

    spend = 0
    cnt = 0
    size = 0
    while True:

        data = cPickle.loads(done_que.get())
        st = time.time()
        urls = geturls(data['url'], data['content'])
        if len(urls) == 0:
            continue

        for url in urls:
            if url not in bfdone:
                run_que.put(url)

        gziphtml = sqlite3.Binary(gzip.zlib.compress(data['content']))
        size += len(gziphtml)
        conn.execute(
            "insert into mainpages (url,headers,content) values (?,?,?)",
            (data['url'], str(data['headers']), gziphtml))

        et = time.time()
        spend += (et - st)
        cnt += 1
        if cnt % 10 == 0:
            print "cost:", spend / cnt, cnt, done_que.qsize(
            ), size / 1024 / 1024
            conn.commit()
コード例 #12
0
class ParserWorker():

    def __init__(self, in_queue_namespace, out_queue_namespace):
        
        self.in_queue_namespace = in_queue_namespace
        self.out_queue_namespace = out_queue_namespace

        self.in_queue = RedisQueue(in_queue_namespace)
        self.out_queue = RedisQueue(out_queue_namespace)

        print "Parser worker loaded"

    def run(self):

        while 1:
            xml_text = self.in_queue.get()
            print "Received XML"
            if xml_text == "None":
                self.out_queue.put("None")
                break

            json_doc = DataParser.parse_get_state_stats_resp(xml_text)
            print "Made JSON"
            self.out_queue.put(json_doc)
コード例 #13
0
import struct
import os

# test.py filename
if __name__ == '__main__':
    if (len(sys.argv) > 1):
        filename = sys.argv[1]
    else:
        with open("/home/anmol/minor/mpu/outputs/next_name",
                  "r+") as name_file:
            name = name_file.readline().split()[0]
            name_file.seek(0)
            name_file.write(str(int(name) + 1))
            filename = name + ".csv"

    f = open("/home/anmol/minor/mpu/outputs/" + filename, "w+")
    f.write("Timestamp, Accel_x, Accel_y, Accel_z\n")

    q = RedisQueue('test')
    print("Starting saving script")
    try:
        while True:
            data = q.get()
            if data.decode('utf-8') == 'finished':
                break
            else:
                f.write(data.decode('utf-8'))
    finally:
        f.close()
        os.system("bzip2 /home/anmol/minor/mpu/outputs/{}".format(filename))
コード例 #14
0
#!/usr/bin/python
from RedisQueue import RedisQueue
import subprocess
import json
import base64

q = RedisQueue('messages',
               namespace='ansible',
               host='internal-redis.ovmdvp.0001.use2.cache.amazonaws.com',
               port=6379,
               db=1)

while True:
    res = q.get()
    message = json.loads(res)
    subprocess.Popen([
        "/home/ubuntu/ansible-bot/message_bridge/run_ansible_controller.sh",
        message['response_id'], message['playbook'],
        base64.b64encode(res)
    ])
コード例 #15
0
from RedisQueue import RedisQueue
q = RedisQueue('test')

while True:
    item = q.get()
    print item, "received from queue"
    if item == "None":
        break
コード例 #16
0
class machine(multiprocessing.Process):
    def __init__(self, df):
        multiprocessing.Process.__init__(self)

        self.df = df

    def run(self):
        sleep(10)
        self.last_mean = .015
        self.q = RedisQueue('test')
        print('start')
        self.conn = sqlite3.connect("data.db")
        while not self.q.empty():
            features = str(self.q.get())[3:-2].replace("'","").split(', ')
            self.features = list(features)
            for self.hold_time in ['_10']:
                df = self.df[self.features+['stock_perc_change'+self.hold_time, 'abnormal_perc_change'+self.hold_time]]
                targets = [self.df['stock_perc_change'+self.hold_time], self.df['abnormal_perc_change'+self.hold_time]]
                positive_dfs = []
                negative_dfs = []
                for i in range(8):
                    a_train, a_test, b_train, b_test = train_test_split(df.ix[:,:-2], df.ix[:,-2:], test_size=.4)

                    self.train(a_train, b_train)
                    test_result, negative_df, positive_df = self.test(a_test, b_test)
                    if test_result:
                        positive_dfs.append(positive_df)
                        negative_dfs.append(negative_df)
                    else:
                        break

                if test_result:
                    self.get_result(pd.concat(positive_dfs), pd.concat(negative_dfs))



    def train(self, a_train, b_train):
        self.clf = SVR(C=1.0, epsilon=0.2)

        self.clf.fit(a_train, b_train['abnormal_perc_change'+self.hold_time])


    def test(self, a_test, b_test):

        a_test['Predicted'] = self.clf.predict(a_test)
        a_test['Actual_stock_perc_change'+self.hold_time] = b_test['stock_perc_change'+self.hold_time]
        a_test['Actual_abnormal_perc_change'+self.hold_time] = b_test['abnormal_perc_change'+self.hold_time]

        if len(a_test['Predicted'].unique())<40:
            return False, None, None

        a_test = a_test.sort_values(by='Predicted')

        return True, a_test.ix[:,-3:].head(20), a_test.ix[:,-3:].tail(20)

    def get_result(self, df_p, df_n):

        p_result = df_p.describe()
        n_result = df_n.describe()

        if p_result.ix['mean','Actual_abnormal_perc_change_10']<0 or n_result.ix['mean','Actual_abnormal_perc_change_10']>0:
            return
        if p_result.ix['50%','Actual_abnormal_perc_change_10']<0 or n_result.ix['50%','Actual_abnormal_perc_change_10']>0:
            return

        store_me = False
        if p_result.ix['mean','Actual_abnormal_perc_change_10']>self.last_mean:
            self.last_mean = p_result.ix['mean','Actual_abnormal_perc_change_10']
            store_me = True




        p_result.index = p_result.index+'_pos'
        n_result.index = n_result.index+'_neg'

        p_result = p_result.stack().reset_index()
        p_result.index = p_result['level_1'] +'-'+ p_result['level_0']
        p_result = p_result[0]

        n_result = n_result.stack().reset_index()
        n_result.index = n_result['level_1'] +'-'+ n_result['level_0']
        n_result = n_result[0]


        result = p_result.append(n_result)
        result = pd.DataFrame(result).T
        self.model_name = str(self.features)[1:-1]+'__'+self.hold_time[1:]
        result['features'] = self.model_name
        if store_me:

            result.to_sql('results', self.conn, index = False, if_exists='append')
            self.store_machine()

    def store_machine(self):
        df = self.df[self.features]
        target = self.df['abnormal_perc_change'+self.hold_time]

        self.clf = SVR(C=1.0, epsilon=0.2)

        self.clf.fit(df, target)
        from sklearn.externals import joblib
        joblib.dump(self.clf, 'machines/'+self.model_name)
コード例 #17
0
        print_err(uname)
        print_err("Something wrong when try to get user's followed topics")
        time.sleep(random.uniform(0, 5))

    return user_questions


if __name__ == '__main__':
    q = RedisQueue('follow_question_queue')
    sleep_time = 0
    db = MongoClient().zhihu.zhihu_follow_questions
    while 1:
        if (q.empty()):
            print('Finished at %s' % str(datetime.datetime.now()))
            print('Waiting ...')
        uname = q.get()
        uname = uname.decode()
        if db.find({'_id': uname}).count() > 0:
            continue

        try:
            with timeout(seconds=40):
                all_questions = get_user_questions(uname)
                if all_questions == {}:
                    continue
                elif all_questions is None:
                    sleep_time += random.uniform(1, 5)
                    print_err('Sleeping for %0.2f seconds' % sleep_time)
                    time.sleep(sleep_time)
                else:
                    db.insert(all_questions)
コード例 #18
0
    # create the anomaly detection instance
    print('Instantiating anomaly detection algorithm...')
    anomaly_detection = AnomalyDetection()
    anomaly_detection.load_model('anomaly_detection.csv')
    anomaly_detection.train_forest()
    print('Finished instantiating anomaly detection algorithm')

    # initialize the forwards-backwards pipelines
    print('Initializing pipelines...')
    redis_packet_queue = RedisQueue('packet_worker_queue')
    redis_results_queue = RedisQueue('packet_results_queue')
    print('Pipelines initialized, awaiting messages...')

    while True:
        # fetch a packet
        recv_info = redis_packet_queue.get()

        if not recv_info:
            continue

        # load the packet as a JSON object and decode the features
        parsed_json = json.loads(recv_info)
        json_decoded_packet = parsed_json['packet']
        json_decoded_packid = parsed_json['id']
        json_decoded_src = parsed_json['src']
        json_decoded_dst = parsed_json['dst']

        # perform isolation, observe if outlier
        result = anomaly_detection.predict_data(json_decoded_packet)

        if isinstance(result, list):
コード例 #19
0
ファイル: account_login.py プロジェクト: Tencent-Luis/python
q = RedisQueue('account_login', **redis_conn)
http = urllib3.PoolManager(num_pools=50)

def worker(value):
    params = {}
    params['account_login'] = base64.encodestring(value)
    r = http.request('POST', author_login, params)

    #服务器失败,重新压回队列
    if r.status != 200:
        q.put(value)

    #IP白名单验证失败,重新压回队列
    if r.data['status'] == 10002:
        q.put(value)
    print r.data

while 1:
    # time.sleep(1);
    if q.empty():
        print 'empty queue'
        break

    s = q.qsize()
    for i in range(0,s):
        value = q.get()
        t = threading.Thread(target=worker, args=(value,))
        t.start()
        if threading.active_count() >= 500:
            time.sleep(1)
コード例 #20
0
    combinations = []
    q = RedisQueue('test')
    #for permute_length in range(3,7):
    for permute_length in range(3,10):
        for feature in list(itertools.combinations(k_best_features, r=permute_length)):

            combinations.append(feature)


    shuffle(combinations)

    print('starting', len(combinations))
    input()
    # clear the queue
    while not q.empty():
        q.get()
    print("empty")



    for i in range(6):
        x = machine(df)
        print('starting...')
        x.start()

    for feature in combinations:
        q.put(feature)
    print('all put')

    while not q.empty():
        try:
コード例 #21
0
from RedisQueue import RedisQueue

redis = RedisQueue('0', 'jandan')


def user_agent(url):
    proxy_handler = urllib2.ProxyHandler({'http': '127.0.0.1:8080'})
    opener = urllib2.build_opener(proxy_handler)
    urllib2.install_opener(opener)
    req_header = {
        'User-Agent':
        'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0'
    }
    req_timeout = 20
    req = urllib2.Request(url, None, req_header)
    page = urllib2.urlopen(req, None, req_timeout)
    html = page
    return html


while True:
    while not redis.empty():
        down_url = redis.get()
        print(down_url)
        try:
            data = user_agent(down_url).read()
            with open('./' + down_url[-11:], 'wb') as code:
                code.write(data)
            redis.pop()
        except:
            pass
コード例 #22
0
ファイル: clean_redis_queue_2.py プロジェクト: juntiedt2/JU_1
#!/usr/bin/env python

# UniPi Python Control Panel
# clean_redis_queue_2.py
# uses Python 3.5 up
# Author: Johannes Untiedt
# Version 10.0 vom 26.03.2018

import time
from RedisQueue import RedisQueue

if __name__ == '__main__':
    print("clean_redis_queue_2 started")
    q = RedisQueue('ws_2')
    while not q.empty():
        message = q.get()
        print(message)
    print("redis_queue_2 cleaned")
コード例 #23
0
ファイル: master.py プロジェクト: madaoCN/pythonCodes
import urllib2
from RedisQueue import RedisQueue
redis = RedisQueue('jandan3')

def user_agent(url):
    req_header = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0'}
    req_timeout = 20
    req = urllib2.Request(url,None,req_header)
    page = urllib2.urlopen(req,None,req_timeout)
    html = page
    return html

while not redis.empty():
    down_url = redis.get()
    data = user_agent(down_url).read()
    with open('D:/Python/picture'+'/'+down_url[-11:],'wb')as code:
        code.write(data)
    print down_url
コード例 #24
0
from RedisQueue import RedisQueue
q=RedisQueue('test')
q.put("你好")
print(q.get().decode('utf-8'))


コード例 #25
0
from ChatRoomApiManager import ChatRoomApiManager
from RedisQueue import RedisQueue
import time

time.sleep(1)

chatRoomApi = ChatRoomApiManager()
token = chatRoomApi.get_token()

q = RedisQueue('bot_response')


while 1:
    time.sleep(3)
    content = q.get()
    if content is None:
        continue
    chatRoomApi.post_message(token, content)
コード例 #26
0
ファイル: UniPi_sender.py プロジェクト: juntiedt2/JU_1
# UniPi Python Control Panel
# UniPi_sender.py
# uses Python 3.5 up
# Author: Johannes Untiedt
# Version 10.0 vom 24.03.2018

from UniPi_interface_class import *
from RedisQueue import RedisQueue

q = RedisQueue('ws_3')
ui = unipi_interface("10.0.0.52","8080")
print("UniPi_sender started")

while True:
    while not q.empty():
        msg = q.get()
        #print(msg, " ", type(msg))
        msg = eval(msg)
        #print(msg, " ", type(msg)) 
       
        if (msg['event'] == 'change_R1' and msg['data'] == 'on'): 
            ui.set_relay(1,1,0)
        elif msg['event'] == 'change_R1' and msg['data'] == 'off': 
            ui.set_relay(1,0,0)
        elif msg['event'] == 'change_R2' and msg['data'] == 'on': 
            ui.set_relay(2,1,0)
        elif msg['event'] == 'change_R2' and msg['data'] == 'off': 
            ui.set_relay(2,0,0)
        elif msg['event'] == 'change_R3' and msg['data'] == 'on': 
            ui.set_relay(3,1,0)
        elif msg['event'] == 'change_R3' and msg['data'] == 'off':