def create_range( self, name, start, end, disable=True, should_print=True): url = "range" data = {} data['name'] = name data['start_addr'] = start data['end_addr'] = end data['disable'] = disable ret = make_request( url, 'create', data=data, hostname=self.hostname, auth=self.auth ) output = self.get_output(ret, self.o_format, self.delimeter) # TODO: Print the object after a get range # Really should refactor this into it's own module if should_print is True: print output
def create_host(self, name, ipv4addrs, mac, view, network_block=False, should_print=False): url = "record:host" data = {} addrobj = {} if ipv4addrs == 'nextavailableip' and network_block is not False: ipv4addrs = 'func:nextavailableip:{}'.format(network_block) addrobj['ipv4addr'] = ipv4addrs if mac: addrobj['mac'] = mac data['ipv4addrs'] = [addrobj] data['name'] = name addrobj['ipv4addr'] = ipv4addrs data['view'] = view ret = make_request(url, 'create', data=data, hostname=self.hostname, auth=self.auth) output = self.get_output(ret, self.o_format, self.delimeter) if should_print is True: print output
def create_zoneauth(self, zone, view, ns_group=None, grid_primary=None, members=[], should_print=False): url = "zone_auth" data = {} data['fqdn'] = zone data['view'] = view if grid_primary: data['grid_primary'] = [{'name': grid_primary}] if ns_group: data['ns_group'] = ns_group ret_obj = make_request(url, 'create', data=data, hostname=self.hostname, auth=self.auth) try: if ret_obj.status_code == 201: print "Successfully created zone_auth: {0}".format(zone) else: print "Unable to create zone_auth" print ret_obj.json()['text'] except Exception, e: print "Unable to create zone_auth" sys.exit(2)
def user_skills(d): quoted = quote(d[u"handle"]) request = common.make_request(u"/v3.0.0/members/%s/skills/" % quoted) skills = common.to_json(urllib2.urlopen(request).read()) skills = skills[u"result"][u"content"][u"skills"] for dd in skills.values(): del dd[u"hidden"] d[u"skills"] = skills
def user_skills(d): handle = quote_handle(d[u"handle"]) request = common.make_request(u"/v3/members/%s/skills/" % handle) raw = common.open_request_and_read(request).decode("utf-8") skills = common.to_json(raw) skills = skills[u"result"][u"content"][u"skills"] for dd in skills.values(): del dd[u"hidden"] d[u"skills"] = skills
def search_by_zone(self, zone, view, should_return=False): url = 'zone_auth?fqdn={0}&view={1}'.format(zone, view) ret_obj = make_request(url, 'get', hostname=self.hostname, auth=self.auth) if should_return: return ret_obj else: print self.get_output(ret_obj, self.o_format, self.delimeter) return ret_obj
def create_networkcontainer(self, network, comment="", should_print=False): url = "networkcontainer" data = {} data['network'] = network data['comment'] = comment ret = make_request(url, 'create', data=data, hostname=self.hostname, auth=self.auth) output = self.get_output(ret, self.o_format, self.delimeter) if should_print is True: print output
def extra_info(d, category): quoted = quote(d[u"handle"]) request = common.make_request(u"/v3.0.0/members/%s/%s/" % (quoted, category)) info = common.to_json(urllib2.urlopen(request).read())[u"result"][u"content"] del info[u"handle"] del info[u"userId"] del info[u"createdBy"] del info[u"createdAt"] del info[u"updatedBy"] del info[u"updatedAt"] d[category] = info
def create_range(self, name, start, end, disable=True, should_print=True): url = "range" data = {} data['name'] = name data['start_addr'] = start data['end_addr'] = end data['disable'] = disable ret = make_request(url, 'create', data=data, hostname=self.hostname, auth=self.auth) output = self.get_output(ret, self.o_format, self.delimeter) # TODO: Print the object after a get range # Really should refactor this into it's own module if should_print is True: print output
def create_networkcontainer( self, network, comment="", should_print=False): url = "networkcontainer" data = {} data['network'] = network data['comment'] = comment ret = make_request( url, 'create', data=data, hostname=self.hostname, auth=self.auth ) output = self.get_output(ret, self.o_format, self.delimeter) if should_print is True: print output
def create_network(self, network, comment="", disable=False, members=[], should_print=False): url = "network" data = {} data['network'] = network data['comment'] = comment data['disable'] = disable data['members'] = members ret = make_request(url, 'create', data=data, hostname=self.hostname, auth=self.auth) output = self.get_output(ret, self.o_format, self.delimeter) if should_print is True: print output
def extra_info(d, category): handle = quote_handle(d[u"handle"]) request = common.make_request(u"/v3/members/%s/%s/" % (handle, category)) raw = common.open_request_and_read(request).decode("utf-8") info = common.to_json(raw)[u"result"][u"content"] if info is None: raise Exception(u"Failed to get `%s` info for %s" % (category, d[u"handle"])) del info[u"handle"] del info[u"userId"] if u"handleLower" in info: del info[u"handleLower"] del info[u"createdBy"] del info[u"createdAt"] del info[u"updatedBy"] del info[u"updatedAt"] d[category] = info
def create_network( self, network, comment="", disable=False, members=[], should_print=False): url = "network" data = {} data['network'] = network data['comment'] = comment data['disable'] = disable data['members'] = members ret = make_request( url, 'create', data=data, hostname=self.hostname, auth=self.auth ) output = self.get_output(ret, self.o_format, self.delimeter) if should_print is True: print output
from common import make_request # target site pokemon_url = 'http://bulbapedia.bulbagarden.net/wiki/List_of_Pok%C3%A9mon_by_name' if __name__ == '__main__': pokemons = [] # get the html object html = make_request(pokemon_url) # access elements by css selector, # iterate over matches for link in html.cssselect('#mw-content-text a'): # some additional filtering if 'title' not in link.attrib: continue if ' (Pokémon)' in link.attrib['title']: pokemons.append(link.text) print('n pokemons', len(pokemons)) # save to text file with open('pokemons.txt', 'w') as f: f.write('\n'.join(pokemons))
def get_photo_albums_count(access_token): url = common.create_method_url(GET_ALBUMS_COUNT, access_token) res = common.make_request(url) return int(res['response'])
def main(): common.prepare(use_proxy=g_config.use_proxy) client = MongoClient() db = client.topcoder print "Crawling users..." print "Current:", db.users.count() if g_config.recrawl_all: print "Recrawl all users" if g_config.recheck_invalid_handles: print "Recheck invalid handles" invalid = set() def add_invalid_handle(hdl): invalid.add(hdl) with open(INVALID_HANDLES_FPATH, "w") as fp: for h in sorted(invalid): try: fp.write(h.encode("utf-8") + '\n') except UnicodeDecodeError: pass if os.path.exists(INVALID_HANDLES_FPATH): for line in open(INVALID_HANDLES_FPATH): line = line.strip() if line: invalid.add(line.decode("utf-8")) handles = set() query = {u"handle": None} field = {u"_id": 1} nb_challeges = db.challenges.count() for index, challenge in enumerate(db.challenges.find()): if (index + 1) % 100 == 0: print "Challenges: %d/%d" % (index + 1, nb_challeges) for reg in challenge[u"registrants"]: handle = reg[u"handle"].lower() for ch in ur" \/": if ch in handle: continue if handle in invalid: continue if handle in handles: continue if not g_config.recrawl_all: query[u"handle"] = handle if db.users.find_one(query, field) is not None: continue handles.add(handle) if g_config.recheck_invalid_handles or g_config.recrawl_all: handles.update(invalid) invalid = set() if os.path.exists(INVALID_HANDLES_FPATH): os.rename(INVALID_HANDLES_FPATH, INVALID_HANDLES_FPATH + ".bak") print len(handles), "users to be crawled" print "-----" for index, handle in enumerate(handles): print "[%d/%d]" % (index + 1, len(handles)), handle while True: try: try: quoted = quote_handle(handle) except KeyError: add_invalid_handle(handle) break request = common.make_request(u"/v3/members/" + quoted) s = common.open_request_and_read(request).decode("utf-8") d = common.to_json(s)[u"result"][u"content"] try: refine_user(d) user_skills(d) user_stats(d) user_external_accounts(d) except: traceback.print_exc() add_invalid_handle(handle) common.random_sleep(DOZE) break db.users.insert_one(d) common.random_sleep(DOZE) break except urllib2.HTTPError, e: if e.code in (404, 403,): add_invalid_handle(handle) common.random_sleep(DOZE) break else: print "HTTP Error", e.code, e.msg print e.geturl() print e.fp.read() except KeyboardInterrupt: return except:
def login(code, redirect_uri): parsed = common.make_request(TOKEN_URL_PATTERN.format(config.app_id, config.app_secret, code, redirect_uri)) return User(parsed['access_token'], int(parsed['user_id']), int(parsed['expires_in']))
def main(): common.prepare(use_proxy=g_config.use_proxy) client = MongoClient() db = client.topcoder print "Crawling users..." print "Current:", db.users.count() if g_config.recrawl_all: print "Recrawl all users" if g_config.recheck_invalid_handles: print "Recheck invalid handles" invalid = set() def add_invalid_handle(hdl): invalid.add(hdl) with open(INVALID_HANDLES_FPATH, "w") as fp: for h in sorted(invalid): try: fp.write(h.encode("utf-8") + '\n') except UnicodeDecodeError: pass if os.path.exists(INVALID_HANDLES_FPATH): for line in open(INVALID_HANDLES_FPATH): line = line.strip() if line: invalid.add(line.decode("utf-8")) handles = set() query = {u"handle": None} field = {u"_id": 1} nb_challeges = db.challenges.count() for index, challenge in enumerate(db.challenges.find()): if (index + 1) % 100 == 0: print "Challenges: %d/%d" % (index + 1, nb_challeges) for reg in challenge[u"registrants"]: handle = reg[u"handle"].lower() for ch in ur" \/": if ch in handle: continue if handle in invalid: continue if handle in handles: continue if not g_config.recrawl_all: query[u"handle"] = handle if db.users.find_one(query, field) is not None: continue handles.add(handle) if g_config.recheck_invalid_handles or g_config.recrawl_all: handles.update(invalid) invalid = set() if os.path.exists(INVALID_HANDLES_FPATH): os.rename(INVALID_HANDLES_FPATH, INVALID_HANDLES_FPATH + ".bak") print len(handles), "users to be crawled" print "-----" for index, handle in enumerate(handles): print "[%d/%d]" % (index + 1, len(handles)), handle while True: try: try: quoted = quote_handle(handle) except KeyError: add_invalid_handle(handle) break request = common.make_request(u"/v3/members/" + quoted) s = common.open_request_and_read(request).decode("utf-8") d = common.to_json(s)[u"result"][u"content"] try: refine_user(d) user_skills(d) user_stats(d) user_external_accounts(d) except: traceback.print_exc() add_invalid_handle(handle) common.random_sleep(DOZE) break db.users.insert_one(d) common.random_sleep(DOZE) break except urllib2.HTTPError, e: if e.code in ( 404, 403, ): add_invalid_handle(handle) common.random_sleep(DOZE) break else: print "HTTP Error", e.code, e.msg print e.geturl() print e.fp.read() except KeyboardInterrupt: return except:
def get_audio_count(access_token, user_id): url = common.create_method_url(GET_AUDIO_COUNT, access_token, owner_id=user_id) res = common.make_request(url) return int(res['response'])
def main(): config = ConfigParser.RawConfigParser() config.read("config/users.ini") use_proxy = config.getboolean("default", "proxy") common.prepare(use_proxy=use_proxy) client = MongoClient() db = client.topcoder print "Crawling users..." print "Current:", db.users.count() invalid = set() if os.path.exists("config/invalid_handles"): for line in open("config/invalid_handles"): line = line.strip() if line: invalid.add(line) handles = set() for challenge in db.challenges.find(): for reg in challenge["registrants"]: handle = reg["handle"].lower() if u' ' in handle or u'/' in handle or u'\\' in handle: continue if handle in invalid: continue if handle in handles: continue if db.users.find_one({u"handle": handle}): continue handles.add(handle) print len(handles), "users to be crawled." print "-----" for handle in handles: print handle while True: try: request = common.make_request(u"/v3.0.0/members/" + quote(handle)) s = urllib2.urlopen(request).read().decode("utf-8") d = common.to_json(s)[u"result"][u"content"] refine_user(d) user_skills(d) db.users.insert_one(d) common.random_sleep(1) break except urllib2.HTTPError, e: if e.code == 404 or e.code == 403: invalid.add(handle) with open("config/invalid_handles", "w") as fp: for h in sorted(invalid): fp.write(h + '\n') common.random_sleep(1) break else: print "HTTP Error", e.code, e.msg print e.geturl() print e.fp.read() except Exception, e: print "An unknown exception occurred." print e common.random_sleep(20)