def process_racecoord(self, mv_id): handler = parsers.RaceCoordXMLParser() util.http_request('/getRaceCoord.php?mv_id=%d' % mv_id, handler, conf, logger=logger) self.route['race_coord'] = handler.race_coord
def process_marshvariants(self): handler = parsers.MarshVariantsXMLParser(current_time) util.http_request('/getMarshVariants.php', handler, conf, logger=logger) self.marsh_variants = handler.marsh_variants
def load(self): handler = parsers.StopsSubXMLParser() util.http_request('/station.php', handler, conf, logger) for st_id in handler.stations: station = handler.stations[st_id] station['in_moscow_region'] = self.in_moscow_region(station['location']) self.stations[st_id] = station self.load_old()
def process_racecard(self, mr_id, mv_id): handler = parsers.RaceCardXMLParser() util.http_request('/getRaceCard.php?mv_id=%d' % mv_id, handler, conf, logger=logger) self.route['race_card'] = handler.race_card for item in self.json['add_racecard']: if mr_id == item['mr_id']: self.route['race_card'][item['direction']].insert(item['index'], item['item']) for item in self.json['change_racecard']: if mr_id == item['mr_id']: self.route['race_card'][item['direction']][item['index']]['st_id'] = item['st_id']
def check_res(new_node): new_ip = new_node.split("@")[1] code, data = http_request(new_ip, cbase_port, (cbase_user, cbase_pwd), 'GET', '/pools/default/rebalanceProgress', {}) assert code == 200 obj = json.loads(data) while obj["status"] == "none": _, data = http_request(new_ip, cbase_port, (cbase_user, cbase_pwd), 'GET', '/pools/default/rebalanceProgress', {}) obj = json.loads(data)
def process_stops(self): handler = parsers.StopsXMLParser() util.http_request('/getStops.php', handler, conf, logger=logger) self.stations = handler.stations for item in self.json['add_stations']: self.stations[item['id']] = { 'name': item['name'], 'location': { 'lat': item['lat'], 'long': item['long'] }, 'tags': set() }
def process_racecard(self, mr_id, mv_id): handler = parsers.RaceCardXMLParser() util.http_request('/getRaceCard.php?mv_id=%d' % mv_id, handler, conf, logger=logger) self.route['race_card'] = handler.race_card for item in self.json['add_racecard']: if mr_id == item['mr_id']: self.route['race_card'][item['direction']].insert( item['index'], item['item']) for item in self.json['change_racecard']: if mr_id == item['mr_id']: self.route['race_card'][item['direction']][ item['index']]['st_id'] = item['st_id']
def update_keywords_in_article(article, article_data): if (article_data == None): article_data = util.http_request(article.url) keyword_count_list = count_keywords_in_text(article_data, logic.getKeywordDict()) article.keywords = create_list_of_keywords(keyword_count_list) article.weight = calculate_keywords_weight(keyword_count_list)
def test_http_request2(url): method = 'GET' params = {'key1': 'aaa', 'key2': 'bbb', 'key3': ['a1', 'a2', 'a3']} #params = None headers = {'User-Agent': 'Mozilla/5.0 (x128) TestAgent/1.0'} res = util.http_request(url, method, params, 'user1', '1111', headers) text = str(res.getcode()) + '\n' text += res.read().decode('utf-8') return text
def check_container(): for name in name_ips: ip = get_container_host(name) _, data = http_request(ip, PORT, user=(USER, PASSWORD), method='GET', path='/container/status/%s' % name) res = eval(data) if res["response"]["status"] == "stopped": _, data = http_request(ip, PORT, user=(USER, PASSWORD), method='POST', path='/container/start', data={"containerName": name}) res = eval(data) if res["status"] != "started": raise Exception("%s contaienr can not start" % name)
def add_node(cluster_name): new_container = get_new_container_name(cluster_name) new_ip = name_ips[new_container] old_ip = name_ips[list(get_old_container_name())[0]] data = {"hostname": new_ip, "user": cbase_user, "password": cbase_pwd} code, data = http_request(old_ip, cbase_port, (cbase_user, cbase_pwd), 'POST', '/controller/addNode', data) assert code == 200 update_old_container_names(new_container) obj = json.loads(data) return obj["otpNode"]
def rebalance(new_node): pair = new_node.split("@") node_flag = pair[0] new_ip = pair[1] names = get_old_container_name() nodes = [] for name in names: nodes.append("%s@%s" % (node_flag, name_ips[name])) str_nodes = ','.join(nodes) data = {"knownNodes": str_nodes} code, data = http_request(new_ip, cbase_port, (cbase_user, cbase_pwd), 'POST', '/controller/rebalance', data) assert code == 200
def check_res(cluster_name): import time time.sleep(5) _, data = http_request(MAIN_HOST, PORT, user=(USER, PASSWORD), method='GET', path='/containerCluster/createResult/%s' % cluster_name) obj = json.loads(data) retry_count = 20 while obj['response']['code'] != '000000' and retry_count > 0: time.sleep(3) _, data = http_request(MAIN_HOST, PORT, user=(USER, PASSWORD), method='GET', path='/containerCluster/createResult/%s' % cluster_name) obj = json.loads(data) retry_count -= 1 if retry_count <= 0: raise Exception("expand container failed")
def _has_talkback(url, data=None): if (data == None): data = http_request(url) # one method for determining talkback is by looking at the text of the <a> tags links = get_link_texts(data) for link in links: con = "".join(link) con = con.lower() for x in ["comments","talkback","trackback","talk-back","track-back","comentarios"]: if ((-1 != con.find(x)) and (-1 == con.find("commentary")) and (-1 == con.find("commented"))): logging.debug("TalkBack: Found '%s' in '%s'. Has talkbacks."%(x,con)) return True # second method is looking for a <form> with ID "comment" or "something-back" if (search_comment_forms(data)): return True return False
def fetchByUrl(NEWS_URL, language): articles = [] dom = minidom.parseString(util.http_request(NEWS_URL)) for node in dom.getElementsByTagName('item'): # the source is the last part and the rest is the title arr = node.getElementsByTagName('title')[0].firstChild.data.rsplit('-',1) newArticle = common.Article() newArticle.title = arr[0].strip() logging.debug("Parsing article: '%s'"%newArticle.title) #replace the ' mark to its html encoding and save in source newArticle.source = arr[1].strip() url = node.getElementsByTagName('link')[0].firstChild.data newArticle.url = url # Here we test if this article is already in the DB, and if so we continue: if (isURLInDB(url)): logging.debug("Article already in the DB, skipping") continue #extract text rawDescription = node.getElementsByTagName('description')[0].firstChild.data #newArticle.raw = rawDescription # used for debugging description = util.extract_text(rawDescription).split("...")[0].strip() # get all the text before the ... if (description.find(newArticle.source) > -1): newArticle.desc = description.split(newArticle.source)[1] else: newArticle.desc = description datestring = node.getElementsByTagName('pubDate')[0].firstChild.data if datestring != '': newArticle.created = datetime.datetime.strptime(datestring, '%a, %d %b %Y %H:%M:%S GMT+00:00' ) soup = BeautifulSoup(rawDescription) thumbnail = soup.find('img') if thumbnail: try: newArticle.pic_url = thumbnail['src'] except: pass #@@ari newArticle.language = language articles.append(newArticle) return articles
def check_cbase(): port = 8091 user = '******' password = cluster_name bad_nodes = [] for ip in name_ips.values(): code, data = http_request(ip, port, user=(user, password), method='GET', path='/nodeStatuses') if code == 200: res = eval(data) for k, v in res.items(): if v['status'] != 'healthy': bad_nodes.append(k) break print 'bad nodes:' print bad_nodes
def main(argv): remoteIP, filename, path, remotehost = util.get_remotehost(argv[1]) print 'remote: ',remoteIP, filename, path, remotehost s = RawSocket() # 3 handshake s.connect(remoteIP, 80) # download file print 'tcp handshake finished, start http request......' s.send(util.http_request(path, remotehost)) data = s.receive() # extract content from http content res = data.split("\r\n\r\n", 1)[-1] if not data.startswith("HTTP/1.1 200 OK"): print 'http request failed' s.close('send') sys.exit(1) with open(filename, "w") as f: f.write(res)
def expand_container(cluster_name): def check_res(cluster_name): import time time.sleep(5) _, data = http_request(MAIN_HOST, PORT, user=(USER, PASSWORD), method='GET', path='/containerCluster/createResult/%s' % cluster_name) obj = json.loads(data) retry_count = 20 while obj['response']['code'] != '000000' and retry_count > 0: time.sleep(3) _, data = http_request(MAIN_HOST, PORT, user=(USER, PASSWORD), method='GET', path='/containerCluster/createResult/%s' % cluster_name) obj = json.loads(data) retry_count -= 1 if retry_count <= 0: raise Exception("expand container failed") data = { "containerClusterName": cluster_name, "componentType": "cbase", "networkMode": "ip", "nodeCount": "1", "image": "10.160.140.32:5000/lihanlin1/cbase:V4", } code, data = http_request(MAIN_HOST, PORT, (USER, PASSWORD), 'POST', '/containerCluster/node', data) assert code == 200 check_res(cluster_name)
def fetchByUrlYT(NEWS_URL, language): articles = [] dom = minidom.parseString(util.http_request(NEWS_URL)) for node in dom.getElementsByTagName('entry'): # the source is the last part and the rest is the title newArticle = common.Article() newArticle.title = node.getElementsByTagName('title')[0].firstChild.data logging.debug("Parsing article: '[YouTube] %s'"%newArticle.title) #replace the ' mark to its html encoding and save in source newArticle.source = "YouTube" url = node.getElementsByTagName('link')[0].getAttribute('href') newArticle.url = url # Here we test if this article is already in the DB, and if so we continue: if (isURLInDB(url)): logging.debug("Article already in the DB, skipping") continue #extract text rawDescription = node.getElementsByTagName('content')[0].firstChild.data #newArticle.raw = rawDescription # used for debugging #description = rawDescription.split('tyle="font-size: 12px; margin: 3px 0px;"><span>')[1].split('</span></div></td>')[0].strip() # get all the text before the ... description = util.extract_text(rawDescription).strip()[len(newArticle.title):] newArticle.desc = description datestring = node.getElementsByTagName('updated')[0].firstChild.data if datestring != '': newArticle.created = datetime.datetime.strptime(datestring, '%Y-%m-%dT%H:%M:%S.000Z' ) thumbnail = re.findall(r'img alt="" src="(http://i.ytimg.com/[^"]+)"',rawDescription) if len(thumbnail) > 0: newArticle.pic_url = thumbnail[0] #@@ari newArticle.language = language articles.append(newArticle) return articles
def process_article(article): logging.debug("Got article: %s (%s)"%(article.source,article.url)) has_talkbacks = False volatile = False create_new_source = True article_data = None #Check if article has a known source source = db.GqlQuery("SELECT * FROM Sources WHERE name=:source",source=article.source).get() # if this is a known source if (source != None): # Reuters and guardian is big enough sources so we give them a delicate handling if (source.name.lower() == 'reuters'): logging.debug("A reuters article") article_data = http_request(article.url) has_talkbacks = _has_talkback_reuters(article.url, article_data) create_new_source = False elif (source.name.lower().find('guardian') != -1): logging.debug("A guardian article") article_data = http_request(article.url) has_talkbacks = _has_talkback_guardian(article.url, article_data) create_new_source = False else: if (source.volatile == False): logging.debug("known source, has talkbacks = %s"%source.has_tkbks) article.source_weight = source.weight has_talkbacks = source.has_tkbks create_new_source = False else: logging.debug("volatile source (%s)"%source.name) volatile = True if (create_new_source): source = Sources(name=article.source) article_data = http_request(article.url) has_talkbacks = _has_talkback(url = article.url, data = article_data) logging.debug("A new source = %s, has talkbacks = %s"%(article.source,source.has_tkbks)) #If this is not a volatile source, we should add it to the db if (not volatile): source.has_tkbks = has_talkbacks source.put() #If the source has no talkbacks, we shouldn't process the article if (has_talkbacks == False): logging.debug("Article has no talkbacks") return False article.source_weight = source.weight #Process the keywords in the article keywords.update_keywords_in_article(article, article_data) logging.debug("Article with src_weight = %d and keywords_weight = %d"%(article.source_weight, article.weight)) if (article.weight > 0): logging.debug("Article was added") return True else: logging.debug("No keywords. Article was not added") return False
def load(self): handler = parsers.MarshesSubXMLParser(logger) util.http_request('/', handler, conf, logger) self.marshes = handler.marshes self.load_old()
def process_stops(self): handler = parsers.StopsXMLParser() util.http_request('/getStops.php', handler, conf, logger) self.stations = handler.stations
def process_marshes(self): handler = parsers.MarshesXMLParser(set([7]), logger=logger) util.http_request('/getMarshes.php', handler, conf, logger=logger) self.marshes = handler.marshes
def process_stops(self): handler = parsers.StopsXMLParser() util.http_request('/getStops.php', handler, conf, logger=logger) self.stations = handler.stations for item in self.json['add_stations']: self.stations[item['id']] = {'name': item['name'], 'location': {'lat': item['lat'], 'long': item['long'] }, 'tags': set() }
def process_racecoord(self, mv_id): handler = parsers.RaceCoordXMLParser() util.http_request('/getRaceCoord.php?mv_id=%d' % mv_id, handler, conf, logger) self.route['race_coord'] = handler.race_coord
def process_marshvariants(self): handler = parsers.MarshVariantsXMLParser(current_time) util.http_request('/getMarshVariants.php', handler, conf, logger) self.marsh_variants = handler.marsh_variants
def _has_talkback_guardian(url, data): if (data == None): data = http_request(url) return (-1 != data.find('Comments in chronological order'))
def _has_talkback_reuters(url, data): if (data == None): data = http_request(url) logging.debug("Reuters returning %s"%(-1 != data.find('/articles/comments/'))) return (-1 != data.find('/article/comments/'))