コード例 #1
0
ファイル: network.py プロジェクト: rtucker-mozilla/bloxtool
 def create_range(
         self,
         name,
         start,
         end,
         disable=True,
         should_print=True):
     url = "range"
     data = {}
     data['name'] = name
     data['start_addr'] = start
     data['end_addr'] = end
     data['disable'] = disable
     ret = make_request(
         url,
         'create',
         data=data,
         hostname=self.hostname,
         auth=self.auth
     )
     output = self.get_output(ret, self.o_format, self.delimeter)
     # TODO: Print the object after a get range
     # Really should refactor this into it's own module
     if should_print is True:
         print output
コード例 #2
0
 def create_host(self,
                 name,
                 ipv4addrs,
                 mac,
                 view,
                 network_block=False,
                 should_print=False):
     url = "record:host"
     data = {}
     addrobj = {}
     if ipv4addrs == 'nextavailableip' and network_block is not False:
         ipv4addrs = 'func:nextavailableip:{}'.format(network_block)
     addrobj['ipv4addr'] = ipv4addrs
     if mac:
         addrobj['mac'] = mac
     data['ipv4addrs'] = [addrobj]
     data['name'] = name
     addrobj['ipv4addr'] = ipv4addrs
     data['view'] = view
     ret = make_request(url,
                        'create',
                        data=data,
                        hostname=self.hostname,
                        auth=self.auth)
     output = self.get_output(ret, self.o_format, self.delimeter)
     if should_print is True:
         print output
コード例 #3
0
 def create_zoneauth(self,
                     zone,
                     view,
                     ns_group=None,
                     grid_primary=None,
                     members=[],
                     should_print=False):
     url = "zone_auth"
     data = {}
     data['fqdn'] = zone
     data['view'] = view
     if grid_primary:
         data['grid_primary'] = [{'name': grid_primary}]
     if ns_group:
         data['ns_group'] = ns_group
     ret_obj = make_request(url,
                            'create',
                            data=data,
                            hostname=self.hostname,
                            auth=self.auth)
     try:
         if ret_obj.status_code == 201:
             print "Successfully created zone_auth: {0}".format(zone)
         else:
             print "Unable to create zone_auth"
             print ret_obj.json()['text']
     except Exception, e:
         print "Unable to create zone_auth"
         sys.exit(2)
コード例 #4
0
def user_skills(d):
    quoted = quote(d[u"handle"])
    request = common.make_request(u"/v3.0.0/members/%s/skills/" % quoted)
    skills = common.to_json(urllib2.urlopen(request).read())
    skills = skills[u"result"][u"content"][u"skills"]

    for dd in skills.values():
        del dd[u"hidden"]

    d[u"skills"] = skills
コード例 #5
0
def user_skills(d):
    handle = quote_handle(d[u"handle"])
    request = common.make_request(u"/v3/members/%s/skills/" % handle)
    raw = common.open_request_and_read(request).decode("utf-8")
    skills = common.to_json(raw)
    skills = skills[u"result"][u"content"][u"skills"]

    for dd in skills.values():
        del dd[u"hidden"]

    d[u"skills"] = skills
コード例 #6
0
 def search_by_zone(self, zone, view, should_return=False):
     url = 'zone_auth?fqdn={0}&view={1}'.format(zone, view)
     ret_obj = make_request(url,
                            'get',
                            hostname=self.hostname,
                            auth=self.auth)
     if should_return:
         return ret_obj
     else:
         print self.get_output(ret_obj, self.o_format, self.delimeter)
     return ret_obj
コード例 #7
0
ファイル: users.py プロジェクト: vanxining/TopcoderCrawler
def user_skills(d):
    handle = quote_handle(d[u"handle"])
    request = common.make_request(u"/v3/members/%s/skills/" % handle)
    raw = common.open_request_and_read(request).decode("utf-8")
    skills = common.to_json(raw)
    skills = skills[u"result"][u"content"][u"skills"]

    for dd in skills.values():
        del dd[u"hidden"]

    d[u"skills"] = skills
コード例 #8
0
 def create_networkcontainer(self, network, comment="", should_print=False):
     url = "networkcontainer"
     data = {}
     data['network'] = network
     data['comment'] = comment
     ret = make_request(url,
                        'create',
                        data=data,
                        hostname=self.hostname,
                        auth=self.auth)
     output = self.get_output(ret, self.o_format, self.delimeter)
     if should_print is True:
         print output
コード例 #9
0
def extra_info(d, category):
    quoted = quote(d[u"handle"])
    request = common.make_request(u"/v3.0.0/members/%s/%s/" % (quoted, category))
    info = common.to_json(urllib2.urlopen(request).read())[u"result"][u"content"]

    del info[u"handle"]
    del info[u"userId"]

    del info[u"createdBy"]
    del info[u"createdAt"]
    del info[u"updatedBy"]
    del info[u"updatedAt"]

    d[category] = info
コード例 #10
0
 def create_range(self, name, start, end, disable=True, should_print=True):
     url = "range"
     data = {}
     data['name'] = name
     data['start_addr'] = start
     data['end_addr'] = end
     data['disable'] = disable
     ret = make_request(url,
                        'create',
                        data=data,
                        hostname=self.hostname,
                        auth=self.auth)
     output = self.get_output(ret, self.o_format, self.delimeter)
     # TODO: Print the object after a get range
     # Really should refactor this into it's own module
     if should_print is True:
         print output
コード例 #11
0
 def create_networkcontainer(
         self,
         network,
         comment="",
         should_print=False):
     url = "networkcontainer"
     data = {}
     data['network'] = network
     data['comment'] = comment
     ret = make_request(
         url,
         'create',
         data=data,
         hostname=self.hostname,
         auth=self.auth
     )
     output = self.get_output(ret, self.o_format, self.delimeter)
     if should_print is True:
         print output
コード例 #12
0
 def create_network(self,
                    network,
                    comment="",
                    disable=False,
                    members=[],
                    should_print=False):
     url = "network"
     data = {}
     data['network'] = network
     data['comment'] = comment
     data['disable'] = disable
     data['members'] = members
     ret = make_request(url,
                        'create',
                        data=data,
                        hostname=self.hostname,
                        auth=self.auth)
     output = self.get_output(ret, self.o_format, self.delimeter)
     if should_print is True:
         print output
コード例 #13
0
ファイル: users.py プロジェクト: vanxining/TopcoderCrawler
def extra_info(d, category):
    handle = quote_handle(d[u"handle"])
    request = common.make_request(u"/v3/members/%s/%s/" % (handle, category))
    raw = common.open_request_and_read(request).decode("utf-8")
    info = common.to_json(raw)[u"result"][u"content"]

    if info is None:
        raise Exception(u"Failed to get `%s` info for %s" % (category, d[u"handle"]))

    del info[u"handle"]
    del info[u"userId"]

    if u"handleLower" in info:
        del info[u"handleLower"]

    del info[u"createdBy"]
    del info[u"createdAt"]
    del info[u"updatedBy"]
    del info[u"updatedAt"]

    d[category] = info
コード例 #14
0
def extra_info(d, category):
    handle = quote_handle(d[u"handle"])
    request = common.make_request(u"/v3/members/%s/%s/" % (handle, category))
    raw = common.open_request_and_read(request).decode("utf-8")
    info = common.to_json(raw)[u"result"][u"content"]

    if info is None:
        raise Exception(u"Failed to get `%s` info for %s" %
                        (category, d[u"handle"]))

    del info[u"handle"]
    del info[u"userId"]

    if u"handleLower" in info:
        del info[u"handleLower"]

    del info[u"createdBy"]
    del info[u"createdAt"]
    del info[u"updatedBy"]
    del info[u"updatedAt"]

    d[category] = info
コード例 #15
0
ファイル: network.py プロジェクト: rtucker-mozilla/bloxtool
 def create_network(
         self,
         network,
         comment="",
         disable=False,
         members=[],
         should_print=False):
     url = "network"
     data = {}
     data['network'] = network
     data['comment'] = comment
     data['disable'] = disable
     data['members'] = members
     ret = make_request(
         url,
         'create',
         data=data,
         hostname=self.hostname,
         auth=self.auth
     )
     output = self.get_output(ret, self.o_format, self.delimeter)
     if should_print is True:
         print output
コード例 #16
0
ファイル: example.py プロジェクト: frnsys/scraper
from common import make_request

# target site
pokemon_url = 'http://bulbapedia.bulbagarden.net/wiki/List_of_Pok%C3%A9mon_by_name'

if __name__ == '__main__':
    pokemons = []

    # get the html object
    html = make_request(pokemon_url)

    # access elements by css selector,
    # iterate over matches
    for link in html.cssselect('#mw-content-text a'):
        # some additional filtering
        if 'title' not in link.attrib:
            continue
        if ' (Pokémon)' in link.attrib['title']:
            pokemons.append(link.text)

    print('n pokemons', len(pokemons))

    # save to text file
    with open('pokemons.txt', 'w') as f:
        f.write('\n'.join(pokemons))
コード例 #17
0
ファイル: photos.py プロジェクト: ZuTa/vkbackuper
def get_photo_albums_count(access_token):
    url = common.create_method_url(GET_ALBUMS_COUNT, access_token)

    res = common.make_request(url)

    return int(res['response'])
コード例 #18
0
ファイル: users.py プロジェクト: vanxining/TopcoderCrawler
def main():
    common.prepare(use_proxy=g_config.use_proxy)

    client = MongoClient()
    db = client.topcoder

    print "Crawling users..."
    print "Current:", db.users.count()

    if g_config.recrawl_all:
        print "Recrawl all users"

    if g_config.recheck_invalid_handles:
        print "Recheck invalid handles"

    invalid = set()

    def add_invalid_handle(hdl):
        invalid.add(hdl)

        with open(INVALID_HANDLES_FPATH, "w") as fp:
            for h in sorted(invalid):
                try:
                    fp.write(h.encode("utf-8") + '\n')
                except UnicodeDecodeError:
                    pass

    if os.path.exists(INVALID_HANDLES_FPATH):
        for line in open(INVALID_HANDLES_FPATH):
            line = line.strip()
            if line:
                invalid.add(line.decode("utf-8"))

    handles = set()

    query = {u"handle": None}
    field = {u"_id": 1}

    nb_challeges = db.challenges.count()
    for index, challenge in enumerate(db.challenges.find()):
        if (index + 1) % 100 == 0:
            print "Challenges: %d/%d" % (index + 1, nb_challeges)

        for reg in challenge[u"registrants"]:
            handle = reg[u"handle"].lower()

            for ch in ur" \/":
                if ch in handle:
                    continue

            if handle in invalid:
                continue

            if handle in handles:
                continue

            if not g_config.recrawl_all:
                query[u"handle"] = handle
                if db.users.find_one(query, field) is not None:
                    continue

            handles.add(handle)

    if g_config.recheck_invalid_handles or g_config.recrawl_all:
        handles.update(invalid)
        invalid = set()

        if os.path.exists(INVALID_HANDLES_FPATH):
            os.rename(INVALID_HANDLES_FPATH, INVALID_HANDLES_FPATH + ".bak")

    print len(handles), "users to be crawled"
    print "-----"

    for index, handle in enumerate(handles):
        print "[%d/%d]" % (index + 1, len(handles)), handle

        while True:
            try:
                try:
                    quoted = quote_handle(handle)
                except KeyError:
                    add_invalid_handle(handle)

                    break

                request = common.make_request(u"/v3/members/" + quoted)
                s = common.open_request_and_read(request).decode("utf-8")
                d = common.to_json(s)[u"result"][u"content"]

                try:
                    refine_user(d)
                    user_skills(d)
                    user_stats(d)
                    user_external_accounts(d)
                except:
                    traceback.print_exc()

                    add_invalid_handle(handle)

                    common.random_sleep(DOZE)
                    break

                db.users.insert_one(d)

                common.random_sleep(DOZE)
                break
            except urllib2.HTTPError, e:
                if e.code in (404, 403,):
                    add_invalid_handle(handle)

                    common.random_sleep(DOZE)
                    break
                else:
                    print "HTTP Error", e.code, e.msg
                    print e.geturl()
                    print e.fp.read()
            except KeyboardInterrupt:
                return
            except:
コード例 #19
0
ファイル: example.py プロジェクト: frnsys/scraper
from common import make_request

# target site
pokemon_url = 'http://bulbapedia.bulbagarden.net/wiki/List_of_Pok%C3%A9mon_by_name'


if __name__ == '__main__':
    pokemons = []

    # get the html object
    html = make_request(pokemon_url)

    # access elements by css selector,
    # iterate over matches
    for link in html.cssselect('#mw-content-text a'):
        # some additional filtering
        if 'title' not in link.attrib:
            continue
        if ' (Pokémon)' in link.attrib['title']:
            pokemons.append(link.text)

    print('n pokemons', len(pokemons))

    # save to text file
    with open('pokemons.txt', 'w') as f:
        f.write('\n'.join(pokemons))
コード例 #20
0
ファイル: auth.py プロジェクト: ZuTa/vkbackuper
def login(code, redirect_uri):
    parsed = common.make_request(TOKEN_URL_PATTERN.format(config.app_id, config.app_secret, code, redirect_uri))

    return User(parsed['access_token'], int(parsed['user_id']), int(parsed['expires_in']))
コード例 #21
0
def main():
    common.prepare(use_proxy=g_config.use_proxy)

    client = MongoClient()
    db = client.topcoder

    print "Crawling users..."
    print "Current:", db.users.count()

    if g_config.recrawl_all:
        print "Recrawl all users"

    if g_config.recheck_invalid_handles:
        print "Recheck invalid handles"

    invalid = set()

    def add_invalid_handle(hdl):
        invalid.add(hdl)

        with open(INVALID_HANDLES_FPATH, "w") as fp:
            for h in sorted(invalid):
                try:
                    fp.write(h.encode("utf-8") + '\n')
                except UnicodeDecodeError:
                    pass

    if os.path.exists(INVALID_HANDLES_FPATH):
        for line in open(INVALID_HANDLES_FPATH):
            line = line.strip()
            if line:
                invalid.add(line.decode("utf-8"))

    handles = set()

    query = {u"handle": None}
    field = {u"_id": 1}

    nb_challeges = db.challenges.count()
    for index, challenge in enumerate(db.challenges.find()):
        if (index + 1) % 100 == 0:
            print "Challenges: %d/%d" % (index + 1, nb_challeges)

        for reg in challenge[u"registrants"]:
            handle = reg[u"handle"].lower()

            for ch in ur" \/":
                if ch in handle:
                    continue

            if handle in invalid:
                continue

            if handle in handles:
                continue

            if not g_config.recrawl_all:
                query[u"handle"] = handle
                if db.users.find_one(query, field) is not None:
                    continue

            handles.add(handle)

    if g_config.recheck_invalid_handles or g_config.recrawl_all:
        handles.update(invalid)
        invalid = set()

        if os.path.exists(INVALID_HANDLES_FPATH):
            os.rename(INVALID_HANDLES_FPATH, INVALID_HANDLES_FPATH + ".bak")

    print len(handles), "users to be crawled"
    print "-----"

    for index, handle in enumerate(handles):
        print "[%d/%d]" % (index + 1, len(handles)), handle

        while True:
            try:
                try:
                    quoted = quote_handle(handle)
                except KeyError:
                    add_invalid_handle(handle)

                    break

                request = common.make_request(u"/v3/members/" + quoted)
                s = common.open_request_and_read(request).decode("utf-8")
                d = common.to_json(s)[u"result"][u"content"]

                try:
                    refine_user(d)
                    user_skills(d)
                    user_stats(d)
                    user_external_accounts(d)
                except:
                    traceback.print_exc()

                    add_invalid_handle(handle)

                    common.random_sleep(DOZE)
                    break

                db.users.insert_one(d)

                common.random_sleep(DOZE)
                break
            except urllib2.HTTPError, e:
                if e.code in (
                        404,
                        403,
                ):
                    add_invalid_handle(handle)

                    common.random_sleep(DOZE)
                    break
                else:
                    print "HTTP Error", e.code, e.msg
                    print e.geturl()
                    print e.fp.read()
            except KeyboardInterrupt:
                return
            except:
コード例 #22
0
ファイル: audio.py プロジェクト: ZuTa/vkbackuper
def get_audio_count(access_token, user_id):
    url = common.create_method_url(GET_AUDIO_COUNT, access_token, owner_id=user_id)

    res = common.make_request(url)

    return int(res['response'])
コード例 #23
0
def main():
    config = ConfigParser.RawConfigParser()
    config.read("config/users.ini")

    use_proxy = config.getboolean("default", "proxy")
    common.prepare(use_proxy=use_proxy)

    client = MongoClient()
    db = client.topcoder

    print "Crawling users..."
    print "Current:", db.users.count()

    invalid = set()

    if os.path.exists("config/invalid_handles"):
        for line in open("config/invalid_handles"):
            line = line.strip()
            if line:
                invalid.add(line)

    handles = set()

    for challenge in db.challenges.find():
        for reg in challenge["registrants"]:
            handle = reg["handle"].lower()

            if u' ' in handle or u'/' in handle or u'\\' in handle:
                continue

            if handle in invalid:
                continue

            if handle in handles:
                continue

            if db.users.find_one({u"handle": handle}):
                continue

            handles.add(handle)

    print len(handles), "users to be crawled."
    print "-----"

    for handle in handles:
        print handle

        while True:
            try:
                request = common.make_request(u"/v3.0.0/members/" + quote(handle))
                s = urllib2.urlopen(request).read().decode("utf-8")

                d = common.to_json(s)[u"result"][u"content"]
                refine_user(d)

                user_skills(d)

                db.users.insert_one(d)

                common.random_sleep(1)
                break

            except urllib2.HTTPError, e:
                if e.code == 404 or e.code == 403:
                    invalid.add(handle)

                    with open("config/invalid_handles", "w") as fp:
                        for h in sorted(invalid):
                            fp.write(h + '\n')

                    common.random_sleep(1)
                    break
                else:
                    print "HTTP Error", e.code, e.msg
                    print e.geturl()
                    print e.fp.read()
            except Exception, e:
                print "An unknown exception occurred."
                print e

            common.random_sleep(20)