Esempio n. 1
0
    def parse(self, response): #爬到数据后的回调
        sel = database.conn(self)
        jsons = json.loads(response.body)
        content_all = {} #内容dict
        dict_index = 0  #内容dict 索引
        # print('alive')
        # print(jsons['workers']['data'][0])
        # print('die')
        # print(jsons['workers']['data'][1])
        item = SparkpoolItem()
        for worker in jsons['workers']['data']:
            if worker['hashrate'] <= 0:
                 content = {}
                 content["computer_name"] = worker['rig']
                #  content["computer_name_num"] = re.search('[1-9]\d*', content["computer_name"]).group()
                 content['bar_id'] = self.bar_id[response.url]

                 sel.cursor.execute("select bp.id from board_list as b INNER join board_port_list as bp on b.id = bp.board_id where bp.close_time>0 and b.bar_id = "+str(content['bar_id'])+" and FIND_IN_SET('"+str(content['computer_name'])+"',bp.comp_id)")
                 port_id = sel.cursor.fetchone()
                 if port_id is None:
                    content['port_id'] = 0
                 else:
                    content['port_id'] = port_id[0]

                 content_all[dict_index] = content #附加到dict
                 dict_index += 1

        item['scan_url'] = response.url #本次爬取url
        item['scan_content']  = json.dumps(content_all,ensure_ascii=False) #数据格式为json

        yield item
Esempio n. 2
0
 def __init__(self): #查询获取爬行地址
     self = database.conn(self) #数据库实例
     #self.cursor.execute("select url,bar_id from url_list where (url like '%f2pool%' or url like '%vvpool%')")
     self.cursor.execute("select url,bar_id from url_list where id = 14")
     info = self.cursor.fetchall()
     for vo in info:
         #爬虫地址数组
         self.start_urls.append(vo[0])
         #额外拿出板子id string
         self.cursor.execute("select id from board_list where bar_id = "+str(vo[1]))
         board_info = self.cursor.fetchall()
         board_list = ''
         for vo2 in board_info:
             board_list += str(vo2[0])+','
         board_list = board_list[:-1]
         #根据板子id 获取所有机器名称
         self.cursor.execute("select comp_id from board_port_list where board_id in ("+board_list+")")
         machine_info = self.cursor.fetchall()
         machine_list = ''
         for vo3 in machine_info:
             if vo3[0] != '':
                 machine_list += str(vo3[0])+','
         machine_list = machine_list[:-1]
         machine_list = machine_list.split(',')
         #网吧id和网吧机器映射
         self.bar_id[vo[1]]=machine_list
Esempio n. 3
0
 def __init__(self): #查询获取爬行地址
     self = database.conn(self) #数据库实例
     self.cursor.execute("select url,bar_id from url_list where url like '%sparkpool%'")
     info = self.cursor.fetchall()
     for vo in info:
         before = re.search('.*(?=/#)', vo[0]).group()
         after = re.search('(?<=0x).*', vo[0]).group()
         self.start_urls.append(before+'/api/page/miner?value='+after)
         self.bar_id[vo[0]]=vo[1]
Esempio n. 4
0
    def parse(self, response): #爬到数据后的回调

        sel = database.conn(self)
        content_all = {} #内容dict
        dict_index = 0  #内容dict 索引
        for vo in response.xpath('//table[@id="workers"]/tbody/tr'): #xpath截取数据
            item = F2poolItem() 
            content = {}
            computer_name = vo.xpath('td[1]')
            content["computer_name"] = computer_name.xpath('string(.)').extract()[0]
            # content["computer_name_num"] = re.search('[1-9]\d*', content["computer_name"]).group() #电脑名称取数字
            # 默认网吧id 0 ,根据电脑名称遍历所有网吧机器,获取对应网吧id
            content["bar_id"] = 0
            for idx in self.bar_id:
                if content["computer_name"].lower() in self.bar_id[idx]:
                    content["bar_id"] = idx
                    break
            
            default_24_min = vo.xpath('td[4]')
            content["default_24_min"] = default_24_min.xpath('string(.)').extract()[0]
            #ak  在第五行
            content["time_local"] = vo.xpath('td[5]/span[1]/script').re('\d+\.?\d*') 
            #eth 在第六行
            content["time_local6"] = vo.xpath('td[6]/span[1]/script').re('\d+\.?\d*') 

            if content['time_local'] or content["time_local6"] or content["default_24_min"] == '':
                #查询要关闭的端口号
                sel.cursor.execute("select bp.id from board_list as b INNER join board_port_list as bp on b.id = bp.board_id where bp.close_time>0 and b.bar_id = "+str(content['bar_id'])+" and FIND_IN_SET('"+str(content['computer_name'])+"',bp.comp_id)")
                port_id = sel.cursor.fetchone()
                print(port_id)
                if port_id is None:
                    content['port_id'] = 0
                else:
                    content['port_id'] = port_id[0]
            else:
                content['port_id'] = 0

            content_all[dict_index] = content #附加到dict
            dict_index += 1

        item['scan_url'] = response.url #本次爬取url
        item['scan_content']  = json.dumps(content_all,ensure_ascii=False) #数据格式为json

        yield item
Esempio n. 5
0
    def parse(self, response):  #爬到数据后的回调
        sel = database.conn(self)
        content_all = {}  #内容dict
        dict_index = 0  #内容dict 索引
        for vo in response.xpath('//*[@id="online-list"]/tr'):  #xpath截取数据
            item = UupoolItem()
            content = {}
            computer_name = vo.xpath('td[2]')

            content["computer_name"] = computer_name.xpath(
                'string(.)').extract()[0]

            # 默认网吧id 0 ,根据电脑名称遍历所有网吧机器,获取对应网吧id
            content["bar_id"] = 0
            for idx in self.bar_id:
                if content["computer_name"].lower() in self.bar_id[idx]:
                    content["bar_id"] = idx
                    break

            # content["computer_name_num"] = re.search('[1-9]\d*', content["computer_name"]).group() #电脑名称取数字
            # content["bar_id"] = self.bar_id[response.url]

            #查询要关闭的端口号
            # print('sql:'+"select bp.id from board_list as b INNER join board_port_list as bp on b.id = bp.board_id where bp.close_time>0 and b.bar_id = "+str(content['bar_id'])+" and FIND_IN_SET('"+str(content['computer_name'])+"',bp.comp_id)")
            sel.cursor.execute(
                "select bp.id from board_list as b INNER join board_port_list as bp on b.id = bp.board_id where bp.close_time>0 and b.bar_id = "
                + str(content['bar_id']) + " and FIND_IN_SET('" +
                str(content['computer_name']) + "',bp.comp_id)")
            port_id = sel.cursor.fetchone()
            if port_id is None:
                content['port_id'] = 0
            else:
                content['port_id'] = port_id[0]

            content_all[dict_index] = content  #附加到dict
            dict_index += 1

        item['scan_url'] = response.url  #本次爬取url
        item['scan_content'] = json.dumps(content_all,
                                          ensure_ascii=False)  #数据格式为json
        yield item
Esempio n. 6
0
    def process_item(self, item, spider):
        self = database.conn(self)  #数据库实例
        if spider.name == 'f2pool' or spider.name == 'sparkpool' or spider.name == 'uupool':

            content = item['scan_content']

            content_data = json.loads(content)
            comp_name_list = []  #第三方网站所有网吧list
            port_list = []  #缓存本次关闭端口列表,重复端口只写一次
            now_bar_id = 0  #当前查询的网吧
            now_bar_comp_list = []  #当前网吧所有端口+电脑名称 list
            all_bar_comp_name = []  #当前网吧所有电脑名称list
            no_find_list = []  #库里有但第三方网站没有的网吧名称

            for info in content_data:
                computer_name = content_data[info]['computer_name']
                bar_id = content_data[info]['bar_id']
                port_id = content_data[info]['port_id']
                comp_name_list.append(computer_name)

                #根据bar_id 获取网吧对应所有电脑列表
                if now_bar_id != bar_id:
                    now_bar_comp_list = []
                    now_bar_id = bar_id
                    self.cursor.execute(
                        "select bp.id,bp.comp_id from board_list as b INNER join board_port_list as bp on b.id = bp.board_id where bp.close_time>0 and b.bar_id = "
                        + str(bar_id) + " and bp.comp_id <> ''")
                    bar_comp_list = self.cursor.fetchall()
                    for comp_list in bar_comp_list:
                        foo = []
                        foo.append(comp_list[0])
                        foo.append(comp_list[1])
                        for voo in comp_list[1].split(','):
                            all_bar_comp_name.append(voo)
                        now_bar_comp_list.append(foo)

                #重复端口id只写一次
                if port_id not in port_list:
                    #端口id不为0写入
                    if port_id != 0:
                        port_list.append(port_id)
                        sql = "INSERT INTO close_list(bar_id, computer_name ,board_port_id,created_at) VALUES  ('" + str(
                            bar_id) + "','" + computer_name + "','" + str(
                                port_id) + "',NOW())"
                        self.cursor.execute(sql)
            print('comp_name_list', comp_name_list)
            #遍历 将所有未出现的电脑名称写入 no_find_list
            #for comp in all_bar_comp_name:
            #    if comp not in comp_name_list:
            #        no_find_list.append(comp)

            # 未找到列表寻找端口
            #for comp in no_find_list:
            #    for vo in now_bar_comp_list:
            #        #找到端口,写入库
            #        if comp in vo[1].split(','):
            #            #重复端口id只写一次
            #            if vo[0] not in port_list:
            #                 #端口id不为0写入
            #                if port_id != 0:
            #                    port_list.append(vo[0])
            #                    sql =  "INSERT INTO close_list(bar_id, computer_name ,board_port_id,created_at) VALUES  ('"+str(bar_id)+"','"+comp+"','"+str(vo[0])+"',NOW())"
            #                    self.cursor.execute(sql)

            try:
                self.db.commit()
            except Exception as e:
                print(e)
                self.db.rollback()
        elif spider.name == 'vvpool':
            pass
        return item