def readPage(self, response): data = response.body.split("<cord>") # with open('html', 'wb') as file: # file.write(response.body) meta = response.meta for d in data: item = Host() item.parse(d) meta["item"] = d # print(item.name) # print('https://vinalo.com/%s-%s'%(item.alias,item.suffixId)) yield scrapy.Request(url='https://vinalo.com/%s-%s'%(item.alias,item.suffixId), callback=self.readDetailHost, meta = meta ) # break # return #get nextpage hxs = Selector(text=response.body) nextPage = hxs.css('div.dcontain::attr(id)').extract() if len(nextPage) > 0: nextPage = nextPage[0] # print('========next page=========') data = meta["data"] data["of"] = nextPage #load more yield scrapy.FormRequest(url = 'https://vinalo.com/loadh/morekqloai', callback = self.readPage, formdata = data, meta = meta )
def description(self, response): meta = response.meta host = Host() print '=============================' print meta['data'] description = host.getDescription(response) if description != "": host.updateDescription(meta['id'], description)
def readDetailHost(self, response): meta = response.meta str = meta["item"] item = Host() item.parse(str) print "===https://vinalo.com/%s-%s" % (item.alias,item.crawler) yield self.checkNextPage(meta) if item.checkExisted(): # print "=========== existed host ================" # print "https://vinalo.com/%s-%s" % (item.alias,item.crawler) return yield {'image_urls':[item.image_profile]} image_guid = hashlib.sha1(item.image_profile).hexdigest() item.image_profile = '%s.jpg' % (image_guid) item.parseContent(response) id = item.insertDB() if id > 0: cityName = meta["cityName"] self.state[cityName] = self.state.get(cityName, 0) + 1 # print "=========== new host ================" # print "https://vinalo.com/%s-%s" % (item.alias,item.crawler) #store tag objectTag = ObjectTag() for tagId in item.listTagId: if tagId > 0: # print id, tagId objectTag.insertNewObjectTag(id, tagId)
def readPage(self, response): data = response.body.split("<cord>") data = filter(None, data) meta = response.meta meta["last_item"] = False meta.pop('next_page', None) if len(data) <= 0: meta["last_item"] = True yield self.checkNextPage(meta) for i,d in enumerate(data): if i == len(data) -1: meta["last_item"] = True if d != "": if d.find('<viewkq>') >= 0: s = d.split('<viewkq>') d = s[0] hxs = Selector(text=response.body) nextPage = hxs.css('div.dcontain::attr(id)').extract() print nextPage if len(nextPage) > 0: meta["next_page"] = nextPage[0] item = Host() item.parse(d) meta["item"] = d link = 'https://vinalo.com/%s-%s'%(item.alias,item.suffixId) # print link yield scrapy.Request(url=link, callback=self.readDetailHost, meta = meta, dont_filter = True ) #next page if "next_page" in meta: nextPage = meta["next_page"] print('========next page=========') data = meta["data"] data["of"] = nextPage #load more yield scrapy.FormRequest(url = 'https://vinalo.com/loadh/morekqloai', callback = self.readPage, formdata = data, meta = meta, dont_filter = True, errback = lambda x: self.download_errback(x, 'https://vinalo.com/loadh/morekqloai') )
def start_requests(self): host = Host() rows = host.getItems() for row in rows: yield scrapy.Request('https://vinalo.com/%s-%s' % (row[1], row[2]), callback=self.description, dont_filter=True, meta={ 'id': row[0], 'data': row })
def start_requests(self): host = Host() idx = 0 limit = 10 rows = host.getListHost(idx,limit) while len(rows) > 0: for row in rows: yield scrapy.Request('https://vinalo.com/%s-%s'%(row[1],row[2]), callback = self.keyword, dont_filter = True, meta = {'id': row[0], 'tag': row[3], 'data': row} ) idx += limit rows = host.getListHost(idx,limit)
def parseHostXml(cls, hostPath, hosts): if not os.path.exists(hostPath): cls._logger.error("parse host xml: file is not exist") return False root = Common.getRoot(hostPath) if root is None: cls._logger.error("parser host xml: host xml format is not valid.") return False for node in Common.getChild(root): if "host" != node.tag: cls._logger.error( "parser host xml: child node name is not host.") return False host = Host() if not cls.__parseHostNode(node, host): cls._logger.error( "parser host xml: host node format is not valid.") return False hosts.append(host) return True
class PingNetworkController(): def __init__(self,ip,mask): self.host = Host(ip,mask) #def testAliveRange(self,ip,mask):list def pingAllNetworkThreads(self): # Criando as threads threads = [] #passa uma lista de ips compreendidos na rede do host for ip in self.host.listIpRange(): thread = PingThread(ip,self.host) threads.append(thread) # Comecando novas Threads for ip in threads: ip.start() for t in threads: t.join() for ip in self.host.listHostUp: print(f"Este host está ligado {ip} ") print("terminou")
def on_get(self, req, res): try: user_name = req.context['user'].name q = Host.objects(user=user_name) #.allow_filtering() hosts = [(host.to_dict()) for host in q.all()] res.status = falcon.HTTP_200 req.context['result'] = hosts except Exception: res.status = falcon.HTTP_404
def readDetailHost(self, response): str = response.meta["item"] item = Host() item.parse(str) if item.checkExisted(): # print "=========== existed host ================" return item.parseContent(response) id = item.insertDB() if id > 0: cityName = response.meta["cityName"] self.state[cityName] = self.state.get(cityName, 0) + 1
def keyword(self, response): meta = response.meta host = Host() print '=============================' print meta['data'] host.getKeyword(response)
def __init__(self,ip,mask): self.host = Host(ip,mask)
def host_create(): username = current_user.username usertype = current_user.usertype _host = request.json['host'] print 'host_add:', locals() if usertype != 'super': data = {} return jsonify(data) name = _host['name'] host = _host['host'] port = _host['port'] auth_username = _host['auth_username'] auth_password = _host['auth_password'] ram_capacity = _host['ram_capacity'] ram_reserved = _host['ram_reserved'] if '[' in name and '-' in name and ']' in name and \ '[' in host and '-' in host and ']' in host: _hosts = [] hosts = [] # name base/range s = name.find('[') e = name.find(']') name_base = name[:s] name_range = name[s + 1:e] name_range = name_range.strip(' ').strip() name_range = map(int, name_range.split('-')) name_range[1] += 1 # host base/range s = host.find('[') e = host.find(']') host_base = host[:s] host_range = host[s + 1:e] host_range = host_range.strip(' ').strip() host_range = map(int, host_range.split('-')) host_range[1] += 1 for i, j in zip(range(*name_range), range(*host_range)): __host = { 'name': '%s%i' % (name_base, i), 'host': '%s%i' % (host_base, j), 'port': port, 'auth_username': auth_username, 'auth_password': auth_password, 'ram_capacity': ram_capacity, 'ram_reserved': ram_reserved, } __host['created'] = __host['updated'] = datetime.utcnow() host = Host(**__host) db.session.add(host) hosts.append(host) db.session.commit() for host in hosts: __host = object_to_dict(host) _hosts.append(__host) data = { 'hosts': _hosts, } else: _host['created'] = _host['updated'] = datetime.utcnow() host = Host(**_host) db.session.add(host) db.session.commit() _host = object_to_dict(host) data = { 'host': _host, } return jsonify(data)