Python Session.getの例、requests.Session.get Pythonの例

コード例 #1

1

ファイルを表示

ファイル: grafana.py プロジェクト: hpcloud-mon/monasca-docker

def main():
    for user in create_login_payload():
        logging.info('Opening a Grafana session...')
        session = Session()
        login(session, user)

        if check_initialized(session):
            logging.info('Grafana has already been initialized, skipping!')
            return

        logging.info('Attempting to add configured datasource...')
        r = session.post('{url}/api/datasources'.format(url=GRAFANA_URL),
                         json=create_datasource_payload())
        logging.debug('Response: %r', r.json())
        r.raise_for_status()

        for path in sorted(glob.glob('{dir}/*.json'.format(dir=DASHBOARDS_DIR))):
            logging.info('Creating dashboard from file: {path}'.format(path=path))
            r = session.post('{url}/api/dashboards/db'.format(url=GRAFANA_URL),
                             json=create_dashboard_payload(path))
            logging.debug('Response: %r', r.json())
            r.raise_for_status()

        logging.info('Ending %r session...', user.get('user'))
        session.get('{url}/logout'.format(url=GRAFANA_URL))

    logging.info('Finished successfully.')

コード例 #2

1

ファイルを表示

ファイル: vote.py プロジェクト: mrzhangboss/autovote

def vote_login(username, passwd):
    """login in the univs
	Args:
		username: the account name
		passwd: the passwd

	Returns:
		the session

	"""
    try:
        s = Session()
        sso_url = "http://uzone.univs.cn/sso.action"
        sso_data = {}
        sso_data["gUser.loginName"] = username
        sso_data["gUser.password"] = passwd
        r = s.post(sso_url, data=sso_data)
        if not r.content.find("<code>0</code>") > 0:
            return None
        res1 = s.get("http://mzml.univs.cn:8081/common/checkcode")
        code = json.loads(res1.content)
        check_sso_url = "http://uzone.univs.cn/checkSSOLogin.action?token=%s&subSiteId=%s&checkCode=%s&returnUrl=http://mzml.univs.cn:8081/land.html"
        res2 = s.get(check_sso_url % (code["data"]["date"], code["data"]["subSiteId"], code["data"]["checkout"]))
        codes = res2.url
        sign_in = "http://mzml.univs.cn:8081/user/sigin"
        sign_data = {}
        sign_data["uid"] = codes.split("?")[1].split("&")[1].split("=")[1]
        sign_data["token"] = code["data"]["date"]
        sign_data["checkcode"] = codes.split("?")[1].split("&")[0].split("=")[1]
        s.post(sign_in, data=sign_data)
        return s
    except ConnectionError, Timeout:
        logging.exception("Timeout" + username)
        return 1

コード例 #3

0

ファイルを表示

ファイル: share.py プロジェクト: m-messiah/cuckoo

def sendMicrosoft(filename, help_text, email, name):
    br = Session()
    hostUrl = "https://www.microsoft.com/en-us/security/portal/submission/submit.aspx"
    br.headers.update({'referer': hostUrl})
    page = br.get(hostUrl)

    br.get("http://c.microsoft.com/trans_pixel.aspx")  # get additional cookies

    page = BeautifulSoup(page.text, 'html.parser')
    form = page.find('form', id='Newsubmission')

    form_data = dict([(el['name'], el.get('value', None))
                      for el in form.find_all('input') if el.has_attr('name')])

    form_data["Name"] = email
    form_data["Product"] = "Windows Server Antimalware"
    form_data["Comments"] = help_text
    form_data["Priority"] = 2

    response = br.post(
        hostUrl, data=form_data,
        files={u'File':
               open(filename, 'rb')})

    text = response.text.encode('utf-8')

    result = text.find('window.location.href="SubmissionHistory.aspx')
    if result != -1:
        sub_url = text[result + 44:]
        sub_url = "/SubmissionHistory.aspx" + sub_url[:sub_url.find('"')]
        url = response.url[:response.url.rfind('/')] + sub_url
        return 0, "Success! Your status is <a href='%s'>here</a>" % url
    else:
        logger.warning("Microsoft error: %s" % text)
        return 1, "Something wrong: %s" % text

コード例 #4

0

ファイルを表示

ファイル: locustfile.py プロジェクト: fbarquero/locust_swPerf

 def hit_example_com(self):
     try:
         start_time = time()
         session = Session()
         http_adapter = HTTPAdapter(max_retries=0)
         session.mount('http://', http_adapter)
         session.mount('https://', http_adapter)
         session.get("http://www.example.com", timeout=30)
         # # print("Doing a task that is not a request...")
         # login = Login()
         # r = login.sw_valid_login(GC.USERNAME, GC.PASSWORD, "http://www.sowatest.com")
         stats_latency['latency'].append(time() - start_time)
         events.request_success.fire(request_type="Transaction", name="hit_sowatest", response_time=time() - start_time, response_length=0)
         session.close()
         # # Assert Section
         # assert r.status_code == 200
         # assert "Access Denied" in str(html.fromstring(r.text).xpath("//title/text()"))
         # assert '<div id="blockedBanner">' in r.text
     except Exception, e:
         """
         * *request_type*: Request type method used
         * *name*: Path to the URL that was called (or override name if it was used in the call to the client)
         * *response_time*: Time in milliseconds until exception was thrown
         * *exception*: Exception instance that was thrown
         """
         events.request_failure.fire(request_type="Transaction", name="hit_sowatest", response_time=time() - start_time, exception=e)

コード例 #5

0

ファイルを表示

ファイル: main.py プロジェクト: honghaoz/Reserve-iPhone6-Availability-Monitor

    def get(self):
        session = Session()
        availabilityJSON = session.get(iPhone6AvailabilityURL).content
        availabilityDict = json.loads(availabilityJSON)

        caStoresXML = session.get(appleCAStoreURL).content
        # self.write(caStoresXML)
        storesDict = xmltodict.parse(caStoresXML)["records"]["country"]
        # self.dumpJSON(storesDict)
        ontarioStoresList = []
        for eachStateDict in storesDict["state"]:
            if eachStateDict["@name"] == "Ontario":
                ontarioStoresList = eachStateDict["store"]
        # self.dumpJSON(ontarioStoresList)
        # self.write(storeNameForStoreID(ontarioStoresList, "R447"))

        # # logging.info(availabilityDict)
        lastUpdatedTimestamp = availabilityDict.pop("updated")
        storeIDs = availabilityDict.keys()
        for storeID in storeIDs:
            phonesDictInThisStore = availabilityDict[storeID]
            phoneKeys = phonesDictInThisStore.keys()
            for eachPhoneKey in phoneKeys:
                if (phonesDictInThisStore[eachPhoneKey] == True) and (eachPhoneKey in targetModels) and (storeID in targetStores):
                    sendEmail(storeNameForStoreID(ontarioStoresList, storeID), eachPhoneKey)
                if eachPhoneKey in iphone6Dictionary:
                    replaceKeyInDictionary(phonesDictInThisStore, eachPhoneKey, iphone6Dictionary[eachPhoneKey])
        
        for storeID in storeIDs:
            replaceKeyInDictionary(availabilityDict, storeID, storeNameForStoreID(ontarioStoresList, storeID))

        availabilityDict["_updated"] = lastUpdatedTimestamp    
        orderedDict = collections.OrderedDict(sorted(availabilityDict.items()))
        self.dumpJSON(orderedDict)

コード例 #6

0

ファイルを表示

ファイル: client.py プロジェクト: joola/joola.io.sdk-python

class JoolaBaseClient(object):
    def __init__(self, base_url, credentials=None, api_token=None, **kwargs):
        self.base_url = str(base_url)
        self.session = Session()

        self.session.mount('http://', CachingHTTPAdapter())
        self.session.mount('https://', CachingHTTPAdapter())

        if api_token:
            self.session.auth = APITokenAuth(api_token)
        elif credentials:
            self.session.auth = credentials

    def list(self):
        return self.session.get(self.base_url)

    def get(self, lookup):
        return self.session.get('%s%s' % (self.base_url, str(lookup)))

    def insert(self, **kwargs):
        return self.session.post(self.base_url, data=kwargs)

    def patch(self, lookup, **kwargs):
        return self.session.patch('%s%s' % (self.base_url, str(lookup)), data=kwargs)

    def delete(self, lookup):
        return self.session.delete('%s%s' % (self.base_url, str(lookup)))

コード例 #7

0

ファイルを表示

ファイル: get_setlists.py プロジェクト: DJO3/setlist_visualizer

def get_all_setlists(artist, page_number, sets_per_page):
    headers = {'Accept': 'application/json'}
    url = "http://api.setlist.fm/rest/0.1/search/setlists?artistName={0}&p={1}".format(artist, page_number)
    session = Session()
    response = session.get(url, headers=headers)
    data = response.json()

    setlists = data['setlists']['setlist']
    total = data['setlists']['@total']
    total_pages = math.ceil(int(total) / sets_per_page)

    # Continue to make requests until max setlists are downloaded
    for page in range(page_number + 1, total_pages + 1):
        print('{0} Page {1}'.format(artist, page))
        url = "http://api.setlist.fm/rest/0.1/search/setlists?artistName={0}&p={1}".format(artist, page)
        response = session.get(url, headers=headers)
        data = response.json()

        # If more than one result, concatenate lists, else append element to list.
        if type(data['setlists']['setlist']) is list:
            setlists = setlists + data['setlists']['setlist']
        elif type(data['setlists']['setlist']) is dict:
            setlists.append(data['setlists']['setlist'])

    return setlists

コード例 #8

0

ファイルを表示

ファイル: houses.py プロジェクト: tlevine/alameda-houses

def search(apn):
    s = Session()
    s.mount('https://', HTTPSAdapter())
    url = 'https://www.acgov.org/ptax_pub_app/RealSearch.do'
    data = {
        'displayApn': apn,
        'situsStreetNumber': '',
        'situsStreetName': '',
        'situsStreetSuffix': '',
        'situsUnitNumber': '',
        'situsCity': '',
        'searchBills': 'Search',
        'showHistory': 'N',
    }
    headers = {
        'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36',
    }
    s.get('https://www.acgov.org/ptax_pub_app/RealSearchInit.do?showSearchParmsFromLookup=true', headers = headers)
    r = s.post(url, data = data)
    try:
        os.mkdir('results')
    except OSError:
        pass

    f = open(os.path.join('results', apn), 'w')
    f.write(r.text)
    f.close()

コード例 #9

0

ファイルを表示

ファイル: uploads.py プロジェクト: consbio/python-databasin

    def upload(cls, url, f, filename=None, session=None):
        if session is None:
            session = Session()

        should_close = True

        if isinstance(f, six.string_types):
            if not filename:
                filename = os.path.basename(f)

            f = open(f, 'rb')
            should_close = True

        try:
            if 'csrftoken' not in session.cookies:
                session.get('http://databasin.org')

            r = session.post(
                url, data={'csrfmiddlewaretoken': session.cookies['csrftoken']}, files={'file': (filename, f)}
            )
            raise_for_authorization(r, session.client.username is not None)
            r.raise_for_status()

            o = urlparse(url)
            return cls.get(
                '{0}://{1}{2}'.format(o.scheme, o.netloc, TEMPORARY_FILE_DETAIL_PATH.format(uuid=r.json()['uuid'])),
                session=session,
                lazy=False
            )
        finally:
            if should_close:
                f.close()

コード例 #10

0

ファイルを表示

ファイル: down_image.py プロジェクト: alingse/verifycode

def gen_session():
    session = Session()
    url = 'http://www.sccredit.gov.cn/queryInfo.do?behavior=enterSearch&panel=corp'
    try:
        session.get(url, timeout=3)
        return session
    except Exception:
        pass

コード例 #11

0

ファイルを表示

ファイル: test.py プロジェクト: wolf0403/flask-boilerplate

def test_login_logout():
    s = Session()
    rt = s.get(url('fl'))
    assert_response(rt, 'Login OK')
    rt = s.get(url('whoami'))
    assert_response(rt, common.FAKE_USER)
    s.get(url('logout'))
    rt = s.get(url('whoami'))
    assert_response(rt, None, status=401)

コード例 #12

0

ファイルを表示

ファイル: analytics.py プロジェクト: mikeboers/TwitLog

    def update_analytics(self):

        session = Session()

        if 'TWITLOG_COOKIES' in os.environ:
            cookies = json.loads(os.environ['TWITLOG_COOKIES'])
            session.cookies.update(cookies)

        else:

            print 'Fetching homepage for auth token'
            res = session.get('https://twitter.com')

            body = BeautifulSoup(res.text)
            input_ = body.find(lambda tag: tag.name == 'input' and tag.get('name') == 'authenticity_token')
            authe_token = input_['value']


            print 'Logging into account'
            res = session.post('https://twitter.com/sessions', data={
                'session[username_or_email]': self.args.username,
                'session[password]': self.args.password,
                'return_to_ssl': 'true',
                'scribe_log': '',
                'redirect_after_login': '******',
                'authenticity_token': authe_token,
            })

            cookies = dict(session.cookies.iteritems())
            print
            print 'export TWITLOG_COOKIES=\'%s\'' % json.dumps(cookies)
            print

        with self.db.connect() as con:
            for tid, old_json in con.execute('''
                SELECT tweet.id, last.json
                FROM tweets as tweet
                LEFT JOIN tweet_metrics as last
                ON tweet.last_metrics_id = last.id
                WHERE tweet.last_metrics_id IS NULL OR
                    last.created_at > datetime('now','-1 day')
                ORDER BY tweet.id DESC
            '''):
                res = session.get('https://twitter.com/i/tfb/v1/tweet_activity/web/poll/%s' % tid)
                new_metrics = {k: int(v) for k, v in res.json()['metrics']['all'].iteritems()}
                new_metrics.pop('Engagements', None) # Just a total of the others.
                new_json = json.dumps(new_metrics, sort_keys=True)
                changed = new_json != old_json
                print tid, new_json if changed else 'unchanged'
                if changed:
                    mid = con.insert('tweet_metrics', {
                        'tweet_id': tid,
                        'json': new_json,
                    })
                    con.update('tweets', {'last_metrics_id': mid}, {'id': tid})
                    con.commit()

コード例 #13

0

ファイルを表示

ファイル: compranet.py プロジェクト: abelsonlive/fishfinder

class Compranet(FishFinder):
  
  def setup(self):
    self.session = Session()
    self.session.get('http://compranet-pa.funcionpublica.gob.mx/PAAASOP/buscador.jsp')
    self.post_url = 'http://compranet-pa.funcionpublica.gob.mx/PAAASOP/DownloadArchivo'

  def write_xls(self, data, filename):
    with open(filename, 'wb') as f:
      f.write(data)
  
  def search(self, query):
    """
    Submit a search query and return results
    """
    params = {
      'ocultarParam':'0',
      'ocultarDetalle':'1',
      'cveEntFederativa':'0',
      'cveDependencia':'0',
      'concepto': query,
      'valCompraDirPyme':'1000',
      'entidadesSelect':'0',
      'dependenciasSelect':'0'
    }
    r = self.session.post(self.post_url, params = params)
    return r.content

  def test(self, result, query):
    """
    With our results, test 
    whethere the query was legitimate.
    0 = No Results
    1 = Pass 
    2 = Needs More 
    """
    xls = xlrd.open_workbook(file_contents=result)
    sheet = xls.sheet_by_index(0)
    nrows = sheet.nrows 

    if nrows == 1:
      print "%s has no results" % query
      return 0

    elif nrows < 2501:
      filename = "data/%s.xls" % query
      print "Writing %s" % filename
      self.write_xls(result, filename)
      return 1 

    else:
      print "%s has too many results" % query
      return 2

コード例 #14

0

ファイルを表示

ファイル: thesubdb.py プロジェクト: ArthurGarnier/SickRage

class TheSubDBProvider(Provider):
    """TheSubDB Provider."""
    languages = {Language.fromthesubdb(l) for l in language_converters['thesubdb'].codes}
    required_hash = 'thesubdb'
    server_url = 'http://api.thesubdb.com/'
    subtitle_class = TheSubDBSubtitle

    def __init__(self):
        self.session = None

    def initialize(self):
        self.session = Session()
        self.session.headers['User-Agent'] = ('SubDB/1.0 (subliminal/%s; https://github.com/Diaoul/subliminal)' %
                                              __short_version__)

    def terminate(self):
        self.session.close()

    def query(self, hash):
        # make the query
        params = {'action': 'search', 'hash': hash}
        logger.info('Searching subtitles %r', params)
        r = self.session.get(self.server_url, params=params, timeout=10)

        # handle subtitles not found and errors
        if r.status_code == 404:
            logger.debug('No subtitles found')
            return []
        r.raise_for_status()

        # loop over languages
        subtitles = []
        for language_code in r.text.split(','):
            language = Language.fromthesubdb(language_code)

            subtitle = self.subtitle_class(language, hash)
            logger.debug('Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        return subtitles

    def list_subtitles(self, video, languages):
        return [s for s in self.query(video.hashes['thesubdb']) if s.language in languages]

    def download_subtitle(self, subtitle):
        logger.info('Downloading subtitle %r', subtitle)
        params = {'action': 'download', 'hash': subtitle.hash, 'language': subtitle.language.alpha2}
        r = self.session.get(self.server_url, params=params, timeout=10)
        r.raise_for_status()

        subtitle.content = fix_line_ending(r.content)

コード例 #15

0

ファイルを表示

ファイル: restore.py プロジェクト: Ormod/pghoard

class HTTPRestore(object):
    def __init__(self, host, port, site, pgdata=None):
        self.log = logging.getLogger("HTTPRestore")
        self.host = host
        self.port = port
        self.site = site
        self.pgdata = pgdata
        self.session = Session()

    def list_basebackups(self):
        uri = "http://" + self.host + ":" + str(self.port) + "/" + self.site + "/basebackups"
        response = self.session.get(uri)
        basebackups = []
        for basebackup, values in response.json()["basebackups"].items():
            basebackups.append({"name": basebackup, "size": values["size"]})
        return basebackups

    def show_basebackup_list(self):
        basebackups = self.list_basebackups()
        line = "Available %r basebackups:" % self.site
        print(line)
        print("=" * len(line))
        print("basebackup\t\tsize")
        for r in basebackups:
            print("{}\t{}".format(r["name"], r["size"]))

    def get_basebackup_file(self, basebackup):
        uri = "http://" + self.host + ":" + str(self.port) + "/" + self.site + "/basebackups/" + basebackup
        response = self.session.get(uri, stream=True)
        if response.status_code != 200:
            raise Error("Incorrect basebackup: %{!r} or site: {!r} defined".format(basebackup, self.site))
        basebackup_path = os.path.join(self.pgdata, "base.tar.xz")
        store_response_to_file(basebackup_path, response)
        tar = tarfile.TarFile(fileobj=open(basebackup_path, "rb"))
        return response.headers["x-pghoard-start-wal-segment"], basebackup_path, tar

    def get_archive_file(self, filename, target_path, path_prefix=None):
        start_time = time.time()
        self.log.debug("Getting archived file: %r, target_path: %r, path_prefix: %r",
                       filename, target_path, path_prefix)
        uri = "http://" + self.host + ":" + str(self.port) + "/" + self.site + "/" + filename
        if not path_prefix:
            final_target_path = os.path.join(os.getcwd(), target_path)
        else:
            final_target_path = os.path.join(path_prefix, target_path)
        headers = {"x-pghoard-target-path": final_target_path}
        response = self.session.get(uri, headers=headers, stream=True)
        self.log.debug("Got archived file: %r, %r status_code: %r took: %.2fs", filename, target_path,
                       response.status_code, time.time() - start_time)
        return response.status_code in (200, 206)

コード例 #16

0

ファイルを表示

ファイル: service_clients.py プロジェクト: CognitiveScale/industry-models

class ModelRegistryClient(object):
    def __init__(self, host_and_port):
        self.endpoint = "http://%s/api/v1/models/" % host_and_port
        self.session = Session()

    def retrieve_model(self, slug, timestamp, destination):
        response = self.session.get("%s/%s/%s/default/model.bin" % (self.endpoint, slug, timestamp))
        response.raise_for_status()
        data = StringIO.StringIO(response.content)
        z = zipfile.ZipFile(data)
        z.extractall(destination)
        metadata = self.session.get("%s/%s/%s/default/metadata" % (self.endpoint, slug, timestamp))
        metadata.raise_for_status()
        return metadata.json()

コード例 #17

0

ファイルを表示

ファイル: tudown.py プロジェクト: irgendwie/TUDown

def establish_moodle_session(user, passwd):
    session = Session()

    response = session.get('https://www.moodle.tum.de')
    response = session.get('https://www.moodle.tum.de/Shibboleth.sso/Login?providerId=https://tumidp.lrz.de/idp/shibboleth&target=https://www.moodle.tum.de/auth/shibboleth/index.php')
    response = session.post('https://tumidp.lrz.de/idp/profile/SAML2/Redirect/SSO?execution=e1s1', data={'j_username': user, 'j_password': passwd, '_eventId_proceed':''})

    parsed = html.fromstring(response.text)

    session.post('https://www.moodle.tum.de/Shibboleth.sso/SAML2/POST',
                 data={'RelayState': parsed.forms[0].fields['RelayState'],
                       'SAMLResponse': parsed.forms[0].fields['SAMLResponse']})

    return session

コード例 #18

0

ファイルを表示

ファイル: uploader.py プロジェクト: jason8399/TTU-webhd-Uploader

class Uploader:

    def __init__(self):
        self.__s = Session()
        self.__header = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.10 Safari/537.36"}
        self.__url = "http://webhd1.ttu.edu.tw/"
        self.__index = "index.php"
        self.__main = "main/"
        self.__showhd = "showhd.php"
        self.__upload = "upload.php"
        sefl.__sharefile = "sharefile.php"
        self.__login_data = {"ID": "", "PWD": "", "Submit": "登入"}
        self.__upload_form = {"GoUpload": "1",
                              "MAX_FILE_SIZE": "102400000",
                              "fname1": "",
                              "fname2": "",
                              "fname3": "",
                              "fname4": "",
                              "orgfn1": "",
                              "orgfn2": "",
                              "orgfn3": "",
                              "orgfn4": ""
                              }

        self.__file_path = "C:\\fakepath\\"
        self.__file_name = ""
        self.__files = {"userfile1": "", "userfile2": "", "userfile3": "", "userfile4": ""}
        self.__share_form = {"fname": "", "idname": "", "ShareKey": "", "Confirm": "確認送出"}
        self.__share_idname = ""
        self.__s.headers.update(headers)

    def login(self):
        self.__response = self.__s.get(self.__url + self.__index)
        self.__login_data["ID"] = ""
        self.__login_data["PWD"] = ""
        self.__response = self.__s.post(self.__url + self.__index, data=self.__login_data)

    def upload(self):
        self.__response = self.__s.get(self.__url + self.__main + self.__showhd, params={"Action": "Upload"})
        self.__upload_form["fname1"] = file_name
        self.__upload_form["orgfin1"] = file_path + file_name
        self.__files["userfile1"] = open(file_name, "rb")
        self.__response = self.__s.post(self.__url + self.__main + self.__upload, data=self.__upload_form, files=self.__files)

    def share(self):
        self.__response = self.__s.get(self.__url + self.__main + self.__showhd, params={"Action": "ShareFile", "fname": self.__file_name})
        self.__share_form["fname"] = self.__file_name
        self.__share_form["idname"] = self.__share_idname
        self.__response = self.__s.post(self.__url + self.__main + self.__sharefile, data=self.__share_form)

コード例 #19

0

ファイルを表示

ファイル: test_works_with_digest_auth.py プロジェクト: Hasimir/betamax

    def test_saves_content_as_gzip(self):
        s = Session()
        cassette_name = 'handles_digest_auth'
        match = ['method', 'uri', 'digest-auth']
        with Betamax(s).use_cassette(cassette_name, match_requests_on=match):
            r = s.get('https://httpbin.org/digest-auth/auth/user/passwd',
                      auth=HTTPDigestAuth('user', 'passwd'))
            assert r.ok
            assert r.history[0].status_code == 401

        s = Session()
        with Betamax(s).use_cassette(cassette_name, match_requests_on=match):
            r = s.get('https://httpbin.org/digest-auth/auth/user/passwd',
                      auth=HTTPDigestAuth('user', 'passwd'))
            assert r.json() is not None

コード例 #20

0

ファイルを表示

ファイル: maomao.py プロジェクト: chenhang/chenhang.github.io

def get_maomaos(location_id=12030):
    start = 0
    maomaos = []
    ids = []
    while(True):
        print(start)
        if start > 29:
            break
        sess = Session()
        res = sess.get(api(start=start, location_id=12030), headers=HEADERS,
                       timeout=50, allow_redirects=False)
        table = BeautifulSoup(res.content, "html.parser").find('table')
        trs = table.findAll('tr')
        for i, tr in enumerate(trs):
            img_url = to_img_full_url(tr.find('img').attrs['src'])
            detail_url = BASE_URL + tr.find('a').attrs['href']
            desc, _, location, _, breed, _ = tr.find(
                'div').text.strip().split('\n\t\t\t\t\t\t\t')
            detail_res = sess.get(
                detail_url, headers=HEADERS, timeout=50, allow_redirects=False)
            if detail_res.status_code == 301:
                continue
            soup = BeautifulSoup(detail_res.content, "html.parser")
            detail_table, info_table, _ = soup.findAll('table')
            img_tr, content_tr = detail_table.findAll("tr") if len(
                detail_table.findAll("tr")) == 2 else [[]] + detail_table.findAll("tr")
            info = [td.text.strip() for td in info_table.findAll("td")]
            info_dict = dict(zip(info[0::2], info[1::2]))
            posted = to_str_date(info_dict['Posted'])
            print(desc)
            img_urls = [to_img_full_url(img.attrs['src'])
                        for img in img_tr.findAll('img')] if not isinstance(img_tr, list) else ["https://ws1.sinaimg.cn/large/006tNc79gy1fovo1rjkghj305o05p40b.jpg"]
            id = detail_url.split('/')[3]
            if id in ids:
                break
            ids.append(id)
            maomaos.append({'id': detail_url.split('/')[3], 'title': tr.find('img').attrs['alt'],
                            "desc": desc, 'location': location, 'order': i + start * 15,
                            'posted': to_str_date(info_dict['Posted']), 'updated': to_str_date(info_dict['Updated']),
                            'breed': breed, 'price': tr.find('span').text, 'content': content_tr.text.strip(),
                            'main_img_url': img_url, 'detail_url': detail_url, 'img_urls': img_urls, 'is_bicolor': is_bi_color((desc + ' ' + tr.find('img').attrs['alt']).lower())})
            time.sleep(2)
        write_json("data/maomaos.json", maomaos)
        leancloud_objects = [leancloud_object(
            "Maomao", maomao, id_key="id") for maomao in maomaos]
        leancloud.Object.save_all([leancloud_object(
            "Maomao", maomao, id_key="id") for maomao in maomaos])
        start += 10

コード例 #21

0

ファイルを表示

ファイル: locust_webui_actions.py プロジェクト: fbarquero/locust_swPerf

class LocustioWebActions:

    def __init__(self):
        self.session = Session()


    def start_locust(self):
        if not os.path.exists(GC.RESULTS_BASE_PATH):
            os.makedirs(GC.RESULTS_BASE_PATH)
        current_date_time = "{}_{}".format(strftime("%x").replace("/", "."), strftime("%X"))
        latest_result_folder = "{}/{}".format(GC.RESULTS_BASE_PATH, current_date_time)
        os.makedirs(latest_result_folder)
        form_data = {"locust_count": LC.USERS, "hatch_rate": LC.RAMPUP}
        self.session.post("http://localhost:8089/swarm", data=form_data)
        return latest_result_folder


    def stop_locust(self):
        response = self.session.get("http://localhost:8089/stop")
        print("Response from stop: {}".format(response.content))

    def reset_locust(self):
        response = self.session.get("http://localhost:8089/stats/reset")
        print("Response from Reset stats: {}".format(response.content))


    def get_request_stats_csv(self):
        response = self.session.get("http://localhost:8089/stats/requests/csv")
        return response.content


    def get_stats_distribution_csv(self):
        response = self.session.get("http://localhost:8089/stats/distribution/csv")
        return response.content


    def get_exceptions_csv(self):
        response = self.session.get("http://localhost:8089/exceptions/csv")
        return response.content

    def get_starting_info(self):
        with open(GC.STARTING_INFO_FILE_PATH, "rb") as f:
            starting_info = pickle.load(f)
        return starting_info

    def kill_master(self):
        starting_info = self.get_starting_info()
        os.kill(starting_info["pid"], signal.SIGTERM)

コード例 #22

0

ファイルを表示

ファイル: baidu_keywords.py プロジェクト: TesterlifeRaymond/ApiTesterhome

class BaseClass:
    """ pass """

    def __init__(self, keywords):
        """ pass """
        self.session = Session()
        self.headers = {
            "User-Agent": generate_user_agent,
            "Accept": "*/*",
            "Accept-Encoding": "gzip, deflate, sdch, br",
            "Accept-Language": "zh-CN,zh;q=0.8",
            "Connection": "keep-alive"
        }
        self.base_url = 'http://baidu.com/s?'
        self.key_words = 'ie=utf-&wd={}&rn=50'.format(keywords)

    def request(self, url):
        """ request function """
        response = self.session.get(url)
        response.encoding = 'utf-8'
        return response.content

    @staticmethod
    def parser(html, xpath):
        """ parser pages source """
        html = re.sub('<em>|</em>|<em class>', '', html)
        source = etree.HTML(html)
        return source.xpath(xpath)

    def output(self, url, xpath):
        """ return html tags """
        return self.parser(self.request(url).decode(), xpath)

コード例 #23

0

ファイルを表示

ファイル: retry.py プロジェクト: yangwe1/movie_spider

def get_response(url, **kwargs):
    header_info = {
        'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) '
                      'Chrome/44.0.2403.157 Safari/537.36'
    }
    if 'retries' in kwargs:
        retries = kwargs.pop('retries')
        kwargs['headers'] = header_info
    else:
        retries = 3
    if 'sess' in kwargs:
        sess = kwargs.pop('sess')
    else:
        sess = Session()
    if 'timeout' not in kwargs:
        kwargs['timeout'] = 10
    response = None
    try:
        response = sess.get(url, **kwargs)
    except Timeout, e:
        if retries > 0:
            kwargs['retries'] = retries - 1
            kwargs['sess'] = sess
            response = get_response(**kwargs)
        else:
            print e

コード例 #24

0

ファイルを表示

ファイル: recipe.py プロジェクト: g-div/semanticfood

    def _serializeIngredients(self):
        """Convert children ingredients in triples"""
        res = []

        session = Session()
        nutritionalInformations = {}
        ingredients = []
        for ingredient in self.ingredients:

            response = session.get(config.USDA_API.format(config.USDA_API_KEY, ingredient['food'])).json()

            ing = Ingredient(name=response.get('report').get('food').get('name'),
                             quantity=ingredient['quantity'],
                             nutrients=response.get('report').get('food').get('nutrients'))

            nutritionalInformations = self._calculateNutrients(ingredient=ing, data=nutritionalInformations)
            ingredients.append(ing)

        ingredientList = IngredientList(ingredients)
        res.append((self.uri, SFO.ingredients, ingredientList.getURI()))
        res.extend(ingredientList.serialize())


        res.extend(self._parseNutritionTable(nutritionalInformations, res))
        return res

コード例 #25

0

ファイルを表示

ファイル: lostfilm.py プロジェクト: kijojo/monitorrent

    def login(self, username, password):
        s = Session()
        # login over bogi.ru
        params = {"login": username, "password": password}
        r1 = s.post(self.login_url, params, verify=False)
        # in case of failed login, bogi redirects to:
        # http://www.lostfilm.tv/blg.php?code=6&text=incorrect%20login/password
        if r1.request.url != self.login_url:
            url = urlparse(r1.url)
            if url.netloc == self.netloc:
                query = parse_qs(url.query)
                code = int(query.get('code', ['-1'])[0])
                text = query.get('text', "-")
                r1.encoding = 'windows-1251'
                message = r1.text
                raise LostFilmTVLoginFailedException(code, text, message)
            else:
                raise LostFilmTVLoginFailedException(-1, None, None)

        # callback to lostfilm.tv
        soup = BeautifulSoup(r1.text)
        inputs = soup.findAll("input")
        action = soup.find("form")['action']
        cparams = dict([(i['name'], i['value']) for i in inputs if 'value' in i.attrs])
        s.post(action, cparams, verify=False)

        # call to profile page
        r3 = s.get(self.profile_url)

        # read required params
        self.c_uid = s.cookies['uid']
        self.c_pass = s.cookies['pass']
        self.c_usess = self.search_usess_re.findall(r3.text)[0]

コード例 #26

0

ファイルを表示

ファイル: scrape.py プロジェクト: ccnmtl/digitaltibet

class Site:
    def __init__(self, username, password):
        self.username = username
        self.password = password
        self.session = None

    def url(self):
        return "http://{}/collection/all".format(HOST)

    def login(self):
        self.session = Session()
        # drupal requires that you first GET the form
        r = self.session.get(self.url())
        # then POST to it
        s = self.session.post(
            self.url(), data={
                'name': self.username, 'pass': self.password,
                'form_id': 'user_login',
                'op': 'Log in',
            },
            headers={
                'referer': self.url(),
            }
        )
        print("=== logged in ===")
        return self.session

    def get_session(self):
        if self.session is not None:
            return self.session
        self.session = self.login()
        return self.session

    def get_collection_page(self, page):
        return CollectionPage(self.session, page)

コード例 #27

0

ファイルを表示

ファイル: code_optimalstackfacts.py プロジェクト: codedis213/avv_work

    def get_url_page(self, url="http://www.optimalstackfacts.org/"):

        for l in xrange(3):
            # proxies_url = choice(self.proxies_url_list)
            proxies_url = "http://82.209.49.200:8080"

            proxies = {
                # "http": "http://*****:*****@93.127.146.106:80/",
                "http": proxies_url,
                "https": proxies_url

            }

            try:
                session = Session()
                r = session.get(url,  proxies=proxies, headers=self.headers, timeout=10)
                # r = requests.get(url,  proxies=proxies,)
                print r.status_code

                if r.status_code in [200, 301]:

                    page = r.content
                    r.cookies.clear()
                    r.close()

                    return page

                else:
                    r.cookies.clear()
                    r.close()
            except:
                pass

コード例 #28

0

ファイルを表示

ファイル: nma.py プロジェクト: mapilarc/home-assistant

def get_service(hass, config):
    """ Get the NMA notification service. """

    if not validate_config(config,
                           {DOMAIN: [CONF_API_KEY]},
                           _LOGGER):
        return None

    try:
        # pylint: disable=unused-variable
        from requests import Session

    except ImportError:
        _LOGGER.exception(
            "Unable to import requests. "
            "Did you maybe not install the 'Requests' package?")

        return None

    nma = Session()
    response = nma.get(_RESOURCE + 'verify',
                       params={"apikey": config[DOMAIN][CONF_API_KEY]})
    tree = ET.fromstring(response.content)

    if tree[0].tag == 'error':
        _LOGGER.error("Wrong API key supplied. %s", tree[0].text)
    else:
        return NmaNotificationService(config[DOMAIN][CONF_API_KEY])

コード例 #29

0

ファイルを表示

ファイル: nma.py プロジェクト: mapilarc/home-assistant

class NmaNotificationService(BaseNotificationService):
    """ Implements notification service for NMA. """

    def __init__(self, api_key):
        # pylint: disable=no-name-in-module, unused-variable
        from requests import Session

        self._api_key = api_key
        self._data = {"apikey": self._api_key}

        self.nma = Session()

    def send_message(self, message="", **kwargs):
        """ Send a message to a user. """

        title = kwargs.get(ATTR_TITLE)

        self._data['application'] = 'home-assistant'
        self._data['event'] = title
        self._data['description'] = message
        self._data['priority'] = 0

        response = self.nma.get(_RESOURCE + 'notify',
                                params=self._data)
        tree = ET.fromstring(response.content)

        if tree[0].tag == 'error':
            _LOGGER.exception(
                "Unable to perform request. Error: %s", tree[0].text)

コード例 #30

0

ファイルを表示

ファイル: wallpaper.py プロジェクト: HoffmannP/BASH-scripts-an-stuffalike

def getReferer(url, referer):
    useragent = (
        "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.14) Gecko/20080418 Ubuntu/7.10 (gutsy) Firefox/2.0.0.14"
    )
    session = Session()
    session.headers.update({"referer": referer, "user-agent": useragent})
    return session.get(url)

コード例 #31

0

ファイルを表示

ファイル: test_demo.py プロジェクト: LiuM0805/Tester11

def test_session():
    s = Session()
    s.proxies = proxies
    s.get(url_get)

コード例 #32

0

ファイルを表示

class HosszupuskaProvider(Provider, ProviderSubtitleArchiveMixin):
    """Hosszupuska Provider."""
    languages = {Language('hun', 'HU')} | {Language(l) for l in [
        'hun', 'eng'
    ]}
    video_types = (Episode,)
    server_url = 'http://hosszupuskasub.com/'
    subtitle_class = HosszupuskaSubtitle
    hearing_impaired_verifiable = False
    multi_result_throttle = 2  # seconds

    def initialize(self):
        self.session = Session()
        self.session.headers = {'User-Agent': os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")}

    def terminate(self):
        self.session.close()

    def get_language(self, text):
        if text == '1.gif':
            return Language.fromhosszupuska('hu')
        if text == '2.gif':
            return Language.fromhosszupuska('en')
        return None

    def query(self, series, season, episode, year=None, video=None):

        # Search for s01e03 instead of s1e3
        seasona = "%02d" % season
        episodea = "%02d" % episode
        series = fix_inconsistent_naming(series)
        seriesa = series.replace(' ', '+').replace('\'', '')

        # get the episode page
        logger.info('Getting the page for episode %s', episode)
        url = self.server_url + "sorozatok.php?cim=" + seriesa + "&evad="+str(seasona) + \
            "&resz="+str(episodea)+"&nyelvtipus=%25&x=24&y=8"
        logger.info('Url %s', url)

        r = self.session.get(url, timeout=10).content

        i = 0
        soup = ParserBeautifulSoup(r, ['lxml'])

        table = soup.find_all("table")[9]

        subtitles = []
        # loop over subtitles rows
        for row in table.find_all("tr"):
            i = i + 1
            if "this.style.backgroundImage='url(css/over2.jpg)" in str(row) and i > 5:
                datas = row.find_all("td")

                # Currently subliminal not use these params, but maybe later will come in handy
                # hunagrian_name = re.split('s(\d{1,2})', datas[1].find_all('b')[0].getText())[0]
                # Translator of subtitle
                # sub_translator = datas[3].getText()
                # Posting date of subtitle
                # sub_date = datas[4].getText()

                sub_year = sub_english_name = sub_version = None
                # Handle the case when '(' in subtitle
                if datas[1].getText().count('(') == 2:
                    sub_english_name = re.split('s(\d{1,2})e(\d{1,2})', datas[1].getText())[3]
                if datas[1].getText().count('(') == 3:
                    sub_year = re.findall(r"(?<=\()(\d{4})(?=\))", datas[1].getText().strip())[0]
                    sub_english_name = re.split('s(\d{1,2})e(\d{1,2})', datas[1].getText().split('(')[0])[0]

                if not sub_english_name:
                    continue

                sub_season = int((re.findall('s(\d{1,2})', datas[1].find_all('b')[0].getText(), re.VERBOSE)[0])
                                 .lstrip('0'))
                sub_episode = int((re.findall('e(\d{1,2})', datas[1].find_all('b')[0].getText(), re.VERBOSE)[0])
                                  .lstrip('0'))

                if sub_season == season and sub_episode == episode:
                    sub_language = self.get_language(datas[2].find_all('img')[0]['src'].split('/')[1])
                    sub_downloadlink = datas[6].find_all('a')[1]['href']
                    sub_id = sub_downloadlink.split('=')[1].split('.')[0]

                    if datas[1].getText().count('(') == 2:
                        sub_version = datas[1].getText().split('(')[1].split(')')[0]
                    if datas[1].getText().count('(') == 3:
                        sub_version = datas[1].getText().split('(')[2].split(')')[0]

                    # One subtitle can be used for several releases
                    sub_releases = [s.strip() for s in sub_version.split(',')]
                    subtitle = self.subtitle_class(sub_language, sub_downloadlink, sub_id, sub_english_name.strip(),
                                                   sub_season, sub_episode, sub_version, sub_releases, sub_year,
                                                   asked_for_release_group=video.release_group,
                                                   asked_for_episode=episode)

                    logger.debug('Found subtitle: %r', subtitle)
                    subtitles.append(subtitle)

        return subtitles

    def list_subtitles(self, video, languages):
        titles = [video.series] + video.alternative_series

        for title in titles:
            subs = self.query(title, video.season, video.episode, video.year, video=video)
            if subs:
                return subs

            time.sleep(self.multi_result_throttle)

    def download_subtitle(self, subtitle):
        r = self.session.get(subtitle.page_link, timeout=10)
        r.raise_for_status()

        # open the archive
        archive_stream = io.BytesIO(r.content)
        if is_rarfile(archive_stream):
            logger.debug('Archive identified as rar')
            archive = RarFile(archive_stream)
        elif is_zipfile(archive_stream):
            logger.debug('Archive identified as zip')
            archive = ZipFile(archive_stream)
        else:
            raise ProviderError('Unidentified archive type')

        subtitle.content = self.get_subtitle_from_archive(subtitle, archive)

コード例 #33

0

ファイルを表示

from utils import choose_mod

parser = ArgumentParser(add_help=False)
parser.add_argument(
    "url",
    nargs="?",
    default="https://courses.softlab.ntua.gr/pl2/2019b/exercises/combmod.php",
)
args = parser.parse_args()

session = Session()

i = 1
while True:
    response = session.get(args.url)
    soup = BeautifulSoup(markup=response.text, features="lxml")
    N = int(soup.find(id="N").text)
    K = int(soup.find(id="K").text)
    P = int(soup.find(id="P").text)
    print(f"Round {i}, C({N}, {K}) modulo {P}")

    answer = choose_mod(N, K, P)
    print(f"Answer: {answer}")

    response = session.post(args.url, data={"answer": answer})
    soup = BeautifulSoup(markup=response.text, features="lxml")
    right = soup.find(attrs={"class": "right"})
    if right:
        print(right.text)
    else:

コード例 #34

0

ファイルを表示

def main():

    sess = Session() # 存放此次登录的 cookie

    # === read xls ===
    speed_level = input("搜索速度等级（1至60，默认为20）:")
    if not speed_level:
        speed_level = "20"
    print(speed_level)
    print("读xls电话列…")
    if len(sys.argv)<=1:
        raise Exception("没有输入 xls 文件")
    print("文件名: " + sys.argv[1])
    wb = xlrd.open_workbook(sys.argv[1])
    sheet1 = wb.sheet_by_index(0)
    tels = filter_tels(sheet1)
    print("搜寻到可用的电话号码数: " + str(len(tels)))

    # === logging ===
    print("登录账户…")
    verify_code = input("输入你当前的验证码：")
    while not verify_code:
        verify_code = input("输入你当前的验证码：")
    LOGIN_INFO.update({"code": verify_code})
    resp = sess.post(URL_LOGIN, data=LOGIN_INFO, headers=req_headers)
    if not is_success(resp.status_code):
        raise Exception("登录失败。(%s)" % resp.status_code)

    # === requests ===
    print("查询数据…")
    print("设置时间起始终止, 输入格式为:年年年年-月月-日日, 然后回车。")
    time_begin = input("起始日期: ")
    time_end = input("终止日期: ")
    if time_begin:
        time_begin = arrow.get(time_begin)
        time_begin = time_begin.format("YYYY-MM-DD HH:mm:ss")
        print("起始时间为: " + time_begin)
    if time_end:
        time_end = arrow.get(time_end)
        time_end = time_end.format("YYYY-MM-DD HH:mm:ss")
        print("结束时间为: " + time_end)

    # 产生文件名,然后写入 xls 表的首行
    file_name = generate_new_xls_filename()
    print("输出文件: " + file_name)
    doc = xlwt.Workbook()
    sheet = doc.add_sheet("sheet1")
    # 写入第一行，列名
    for n in range(len(PAGE_ROW)):
        sheet.write(0,n,PAGE_ROW[n])
    doc.save(file_name)
    current_line = 1 # 当前 xls 写的行数

    for current_tel in tels:
        # FIXME only fetch the first page
        resp = sess.get(URL_QUERY_ACCOUNT_PURCHASE_INFO_WITH_TIME_RANGE, params={
            "purchaseDatebegin":time_begin,
            "purchaseDateend":time_end,
            "account": current_tel
        }, headers=req_headers, timeout=REQ_TIMEOUT)
        if not is_success(resp.status_code):
            raise Exception("请求数据时返回状态错误, code: {code}, account: {account}".format(
                code=resp.status_code,
                account=current_tel
            ))
        rst = parse_account_info(resp.content)
        for an_order in rst:
            order_id = an_order[1]
            data_to_write = [current_tel, order_id]
            print((current_tel, order_id))
            resp = sess.get(URL_QUERY_ORDER_DETAIL, params={"account":current_tel, "id":order_id},
                            headers=req_headers,
                            timeout=REQ_TIMEOUT)
            new_rst = parse_purchase_info(resp.content)
            data_to_write.append(new_rst[1])
            data_to_write += [an_order[0], an_order[2], an_order[3], an_order[4]]
            data_to_write.append(new_rst[0])
            for j in range(len(data_to_write)):
                sheet.write(current_line, j, data_to_write[j])
            current_line += 1 # 行数增加
        doc.save(file_name)
        print("写入%s" % current_tel)
        random_pause(speed_level)

コード例 #35

0

ファイルを表示

class Icinga2Api(object):
    """
    Main Class to implement the Icinga2 API Client
    """
    module = None

    def __init__(self):
        """
          Initialize all needed Variables
        """
        self.icinga_host = module.params.get("host")
        self.icinga_port = module.params.get("port")
        self.icinga_username = module.params.get("username")
        self.icinga_password = module.params.get("password")
        self.state = module.params.get("state")
        self.hostname = module.params.get("hostname")
        self.hostnames = module.params.get("hostnames")
        self.start_time = module.params.get("start_time")
        self.end_time = module.params.get("end_time")
        self.duration = module.params.get("duration")
        self.object_type = module.params.get("object_type")
        self.all_services = module.params.get("all_services")
        self.author = module.params.get("author")
        self.comment = module.params.get("comment")
        self.fixed = module.params.get("fixed")
        self.filter_vars = None
        self.trigger_name = None

        self.icinga_url = "{0}:{1}/v1".format(self.icinga_host,
                                              self.icinga_port)

        self.connection = Session()
        self.connection.headers.update({'Accept': 'application/json'})
        self.connection.auth = (self.icinga_username, self.icinga_password)

        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

    def run(self):
        res = dict(changed=False, ansible_module_results="none")

        print("hostname  : {} ({})".format(self.hostname, type(self.hostname)))
        print("hostnames : {} ({})".format(self.hostnames,
                                           type(self.hostnames)))

        if self.hostname and self.hostnames:
            module.fail_json(msg=("Please choose whether to set downtimes for "
                                  "'hostname' or 'hostnames'. "
                                  "Both at the same time is not supported."))

        if len(self.hostnames) != 0:

            res['changed'] = True

            r = dict()

            if iter(self.hostnames):

                for h in self.hostnames:

                    r[h] = dict()

                    if self.__host_exists(h):
                        """

                        """
                        payload = {
                            'type': self.object_type,
                            'filter': "host.name == \"{}\"".format(h),
                            'author': self.author,
                            'comment': self.comment,
                            'start_time': self.start_time,
                            'end_time': self.end_time,
                            'duration': self.duration
                        }
                        if self.fixed:
                            payload.update(fixed=True)
                        else:
                            payload.update(fixed=False)

                        if self.filter_vars:
                            payload.update(filter_vars=self.filter_vars)

                        if self.trigger_name:
                            payload.update(trigger_name=self.trigger_name)

                        if self.object_type == 'Host' and self.all_services is True:
                            payload.update(all_services=True)

                        module.log(msg="downtime for: {}".format(h))
                        module.log(msg="payload: {}".format(payload))

                        code, msg = self.__schedule_downtime(payload)

                        module.log(msg="{}: {}".format(code, msg))

                        r[h] = dict(
                            msg=msg,
                            status_code=code,
                        )

                    else:
                        module.log(msg="404: host {} is not known".format(h))
                        r[h] = dict(
                            msg="host {} is not known".format(h),
                            status_code=404,
                        )

                res['result'] = r

        elif len(self.hostname) != 0:
            pass

        else:
            print("hoo")

#        print(res)
#        result = dict(changed=True,
#                      ansible_module_results="Downtimes removed",
#                      result=dict(req.json(), status_code=req.status_code))

        return res

    def __call_url(self, method='GET', path=None, data=None, headers=None):
        """

        """
        if headers is None:
            headers = {
                'Accept': 'application/json',
                'X-HTTP-Method-Override': method,
            }

        url = "{0}/{1}".format(self.icinga_url, path)
        print(url)
        self.connection.headers.update(headers)

        try:
            if (method == 'GET'):
                ret = self.connection.get(url, verify=False)
                self.connection.close()

            elif (method == 'POST'):
                self.connection.close()
                ret = self.connection.post(url, data=data, verify=False)

            else:
                print("unsupported")

            ret.raise_for_status()

            # print("------------------------------------------------------------------")
            # print(" text    : {}".format(ret.text))
            # print(" headers : {}".format(ret.headers))
            # print(" code    : {}".format(ret.status_code))
            # print("------------------------------------------------------------------")

            return ret.status_code, json.loads(ret.text)

        except Exception as e:
            print(e)
            raise

    def __host_exists(self, hostname):
        """

        """
        code = 0

        data = {
            "type": "Host",
            "attrs": ["name"],
            "filter": "match(\"{0}\", host.name)".format(hostname),
        }

        code, ret = self.__call_url(method='POST',
                                    path="objects/hosts",
                                    data=module.jsonify(data),
                                    headers={
                                        'Accept': 'application/json',
                                        'X-HTTP-Method-Override': 'GET'
                                    })

        results = ret['results']

        if (code == 200 and len(results) != 0):
            # code   = results[0]['code']
            # status = results[0]['status']
            attrs = results[0]['attrs']

            if attrs.get('name') == hostname:
                return True

        return False

    def __schedule_downtime(self, data):
        """

        """
        code = 0
        status = "no status available"

        path = 'actions/schedule-downtime'

        code, ret = self.__call_url(method='POST',
                                    path=path,
                                    data=module.jsonify(data),
                                    headers={
                                        'Accept': 'application/json',
                                        'X-HTTP-Method-Override': 'POST'
                                    })

        results = ret['results']

        if (len(results) != 0):
            # print(json.dumps(results[0]))

            code = int(results[0]['code'])
            status = results[0]['status']

        return code, status

コード例 #36

0

ファイルを表示

class User(object):
    """
    class for user credentials and sending and posting requests

    Attributes
    ----------
    session : requests.Session
    username : str
    logged_in : bool
        Boolean standing for login state. True if logged in
    """
    def __init__(self):
        super(User, self).__init__()
        self.session = Session()
        self.logged_in = False
        self.username = None

    def __del__(self):
        try:
            self.session.close()
        except TypeError:
            pass

    def login(self, username):
        """
        Sets the attributes according to login
        """
        self.username = username
        self.logged_in = True
        return self

    def logout(self):
        """
        When logging out
        """
        self.logged_in = False

    def check_login(self):
        """
        Raise an error if user is not logged in
        """
        if self.logged_in is False:
            raise AUTHError('%s is not logged in.' % self.username)

    def post(self, url, **kwargs):
        """
        Wrap session post
        """
        response = self.session.post(url, **kwargs)
        return response

    def get(self, url, **kwargs):
        """
         Wrap session get
         """
        response = self.session.get(url, **kwargs)
        return response

    @staticmethod
    def check_response(response):
        """
        Check for errors in a REST call
        """
        if response.ok:
            return response.json()
        else:
            response.raise_for_status()

コード例 #37

0

ファイルを表示

class Base:
    """访问教务系统的基类"""
    def __init__(self, user_dictionary, debug=False):
        if debug:
            self._open_debug()

        self.cache = Session()
        # 登陆教务系统
        message = self.cache.post(
            # "http://mjwgl.ahnu.edu.cn/login/check.shtml", # 旧接口
            "http://mjwgl.ahnu.edu.cn/login/remotelogin",
            data={
                "username": user_dictionary["username"],
                "password": user_dictionary["password"],
                "usertype": "stu",
                "device": "aphone",
            },
            headers={
                "Host": "mjwgl.ahnu.edu.cn",
            })
        self.sessionid = message.cookies["PHPSESSID"]
        assert message.json()["success"] == "success", message.json(
        )["message"]

    def _open_debug(self):
        logging.basicConfig(
            level=logging.DEBUG,
            format='[%(levelname)s]-[%(asctime)s] %(message)s',
            datefmt='%Y-%m-%d %H:%M:%S',
        )

    def get_page(self, target_url):
        """GET 获取页面内容"""
        if target_url.startswith("http://"):
            rep = self.cache.get(target_url, allow_redirects=False)
        else:
            rep = self.cache.get("http://mjwgl.ahnu.edu.cn/" + target_url,
                                 allow_redirects=False)
        assert rep.status_code == 200, "未登陆"
        return rep.content.decode("UTF-8")

    def post_data(self, target_url, data=None, json=None, **kwargs):
        if target_url.startswith("http://"):
            rep = self.cache.post(target_url,
                                  data=data,
                                  json=json,
                                  allow_redirects=False,
                                  **kwargs)
        else:
            rep = self.cache.post("http://mjwgl.ahnu.edu.cn" + target_url,
                                  data=data,
                                  json=json,
                                  allow_redirects=False,
                                  **kwargs)
        assert rep.status_code == 200, "未登陆"
        return rep.json()

    def get_url(self, op: str) -> str:
        """
        根据操作的拼音缩写获取对应的URL

        如: 课表查询: kbcx
        """
        rep = self.cache.post("http://mjwgl.ahnu.edu.cn/appdata.shtml", {
            "requesttype": op,
            "sessionid": self.sessionid
        },
                              allow_redirects=False)
        self.sessionid = rep.cookies["PHPSESSID"]
        return rep.headers["Location"]

コード例 #38

0

ファイルを表示

class LoginGithub(object):
    def __init__(self, username="******", password="******"):
        self.username = username
        self.password = password
        self.login_url = 'https://github.com/login'
        self.post_url = 'https://github.com/session'
        self.profile_url = 'https://github.com/settings/profile'
        # session维持会话, 不用cookies
        self.session = Session()
        self.headers = {
            'User-Agent':
            'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36',
            'Referer': 'https://github.com',
        }

    def token(self):
        response = self.session.get(self.login_url, headers=self.headers)
        selector = etree.HTML(response.text)
        token_value = selector.xpath(
            '//div[@id="login"]/form/input[2]/@value')[0]
        print(token_value)
        return token_value

    def login(self):
        post_data = {
            'authenticity_token': self.token(),
            'commit': 'Sign in',
            'login': self.username,
            'password': self.password,
            'utf8': '✓',
        }
        response = self.session.post(self.post_url,
                                     data=post_data,
                                     headers=self.headers)
        if response.status_code == 200:
            print('登录成功')
            self.parse_html(response.text)
        response = self.session.get(self.profile_url, headers=self.headers)
        if response.status_code == 200:
            self.profile(response.text)

    def parse_html(self, html):
        select = etree.HTML(html)
        # obj = BeautifulSoup(html, 'html.parser')
        # # //*[@id="dashboard"]/div[2]/div[7]
        # dynamics = select.xpath(
        #     '//*[@id="dashboard"]/div[2]/div[contains(@class, "watch_started")]')
        # div = obj.find('div', {'id': 'dashboard'})
        # print(div)
        # l2_div = div.find('div', {'class': 'news'})
        # print(l2_div)
        # divs = l2_div.find_all('div', {'class': 'watch_started'})
        # print(divs)
        # dashboard > div.news.column.two-thirds > div:nth-child(7)
        if dynamics:
            for dynamic in dynamics:
                """只看started, 还有follow 类的"""
                user = dynamic.xpath(
                    './/div[contains(@class, "width-full")]/div[contains(@class, "flex-items-baseline")]/div/a[1]/text()'
                ).strip()
                started_item = dynamic.xpath(
                    './/div[contains(@class, "width-full")]/div[contains(@class, "flex-items-baseline")]/div/a[2]/text()'
                ).strip()
                print('{} started {}'.format(user, started_item))

    def profile(self, html):
        selector = etree.HTML(html)
        user_profile_email = selector.xpath(
            '//select[@id="user_profile_email"]/option[last()]/text()')
        print('Your email is {}'.format(user_profile_email))

コード例 #39

0

ファイルを表示

IGNORED_UIDS = [
    uid for uid, username in PLEX_USERS.items()
    if username.lower() in USERNAME_IGNORE
]
IGNORED_UIDS.extend((int(ACCOUNT.id), 0))
# Get the Tautulli history.
PARAMS = {
    'cmd': 'get_users_table',
    'order_column': 'last_seen',
    'order_dir': 'asc',
    'length': 200,
    'apikey': TAUTULLI_APIKEY
}
TAUTULLI_USERS = []
try:
    GET = SESSION.get(TAUTULLI_URL.rstrip('/') + '/api/v2',
                      params=PARAMS).json()['response']['data']['data']
    for user in GET:
        if user['user_id'] in IGNORED_UIDS:
            continue
        elif IGNORE_NEVER_SEEN and not user['last_seen']:
            continue
        TAUTULLI_USERS.append(user)
except Exception as e:
    exit("Tautulli API 'get_users_table' request failed. Error: {}.".format(e))


def time_format(total_seconds):
    # Display user's last history entry
    days = total_seconds // 86400
    hours = (total_seconds - days * 86400) // 3600
    minutes = (total_seconds - days * 86400 - hours * 3600) // 60

コード例 #40

0

ファイルを表示

import pandas as pd

symbols = []
topCoins = []
url = 'https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest'
parameters = {'start': '1', 'limit': '1000', 'convert': 'USD'}
headers = {
    'Accepts': 'application/json',
    'X-CMC_PRO_API_KEY': '7b96af50-71b9-48c8-8211-3396385f4b08',
}

session = Session()
session.headers.update(headers)

try:
    response = session.get(url, params=parameters)
    data = json.loads(response.text)
    d = data['data']
    #f=d[0]['slug']
    for index in range(len(d)):
        topCoins.append([d[index]['slug'], d[index]['symbol']])

    for index in range(len(d)):
        symbols.append(d[index]['symbol'])
    #print(topCoins)
    print(len(topCoins))
    print(topCoins)
except (ConnectionError, Timeout, TooManyRedirects) as e:
    print(e)

with open('TopCoins.csv', 'w') as filehandle:

コード例 #41

0

ファイルを表示

    def _collect_sapcloudconnector(self):
        #
        #  Uses monitoring API:
        # https://help.sap.com/viewer/cca91383641e40ffbe03bdc78f00f681/Cloud/en-US/f6e7a7bc6af345d2a334c2427a31d294.html
        #
        #  Configuring : Make port 8443 available. add this to users.xml and restart SCC.
        #
        #  <user username="******" password="******" roles="sccmonitoring"/>
        #
        cloud_connector_url = "{0}:{1}/".format(self.url, "8443").replace(
            "http://", "https://")
        self.log.debug(
            "{0}: Trying to connect to sapcloudconnector on url: {1}".format(
                self.host, cloud_connector_url))
        health_url = cloud_connector_url + "exposed?action=ping"
        #
        #   1 second timeout to connect, 30 to read data.
        #
        status_code = 0
        session = Session()
        session.auth = HTTPBasicAuth(self.user, self.password)
        session.timeout = (1, 30)
        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
        try:
            health = session.get(cloud_connector_url)
            status_code = health.status_code
        except Exception:
            self.log.debug(
                "{0}: No SAP Cloud connector found on url: {1}".format(
                    self.host, health_url))
            status_code = 500

        if status_code == 200:
            self.log.info(
                "{0}: Got health from cloud connector on url: {1}".format(
                    self.host, health_url))

            external_id = str(self._scc_external_id())
            component_data = {
                "name": "SCC",
                "description": "SAP Cloud Connector",
                # "type": "SAP Cloud Connector",
                # "sid": "SCC",
                "host": self.host,
                # "system_number": "99",
                # "version": "v1",
                "domain": self.domain,
                "environment": self.stackstate_environment,
                "tags": self.tags
                # "labels": []
            }
            self.log.debug("{0}: -----> component_data : {1}".format(
                self.host, component_data))
            self.log.debug("{0}: -----> external_id : {1}".format(
                self.host, external_id))
            self.component(external_id, "sap-cloud-connector", component_data)

            # define relation  cloud connector    -->    host
            #                          is hosted on
            source_id = external_id
            target_id = self._host_external_id()
            relation_data = {}
            self.relation(source_id, target_id, "is hosted on", relation_data)

            # define scc status event
            self.event({
                "timestamp": int(time.time()),
                "source_type_name": "SAP:scc state",
                # "source_type_name": "SAP:host instance",
                "msg_title": "SCC status update.",
                "msg_text": "",
                "host": self.host,
                "tags": ["instance_id:99", "status:sapcontrol-green"]
            })
            #
            # Lists sub accounts to the SAP Cloud and connection tunnels
            #
            subaccount_url = cloud_connector_url + "api/monitoring/subaccounts"
            subaccount_reply = session.get(subaccount_url)
            if subaccount_reply.status_code == 200:
                reply = subaccount_reply.text.encode('utf-8')
                self.log.debug(
                    "{0}: Sub accounts reply from cloud connector : {1}".
                    format(self.host, reply))
                subaccounts = json.loads(subaccount_reply.text)
                self.log.debug(
                    "{0}: JSON sub accounts from cloud connector : {1}".format(
                        self.host, subaccounts))
                for subaccount in subaccounts["subaccounts"]:
                    self.log.debug("{0}: subaccount: {1}".format(
                        self.host, subaccount))
                    # define cloud connector component
                    subaccount_name = str(subaccount.get("displayName"))
                    # display name is not always setup
                    if subaccount_name == "None":
                        subaccount_name = str(subaccount.get("subaccount"))
                    external_id = str(
                        self._scc_subaccount_external_id(
                            subaccount.get("subaccount")))
                    tunnel = subaccount.get("tunnel")

                    component_data = {
                        "name": subaccount_name,
                        "description": str(subaccount.get("description")),
                        "state": str(tunnel.get("state")),
                        "connectedSince": str(tunnel.get("connectedSince")),
                        "connections": str(tunnel.get("connections")),
                        "user": str(tunnel.get("user")),
                        "regionHost": str(subaccount.get("regionHost")),
                        "subaccount": str(subaccount.get("subaccount")),
                        "locationID": str(subaccount.get("locationID")),
                        "layer": "SAP SCC Sub Accounts",
                        "domain": self.domain,
                        "environment": self.stackstate_environment,
                        "host": self.host,
                        "tags": self.tags
                        # "labels": []
                    }
                    self.log.debug("{0}: -----> component_data : {1}".format(
                        self.host, component_data))
                    self.log.debug("{0}: -----> external_id : {1}".format(
                        self.host, external_id))
                    self.component(external_id, "sap-scc-subaccount",
                                   component_data)

                    # define relation  cloud connector    -->    host
                    #                          is hosted on
                    source_id = external_id
                    target_id = self._scc_external_id()
                    relation_data = {}
                    self.relation(source_id, target_id, "is_setup_on",
                                  relation_data)

                    # define cloud connector status event

                    tunnel_status = self._scc_subaccount_status(
                        tunnel.get("state"))
                    self.event({
                        "timestamp":
                        int(time.time()),
                        "source_type_name":
                        "SAP:scc subaccount state",
                        "msg_title":
                        "SAP Cloud Connector '{0}' status update.".format(
                            subaccount_name),
                        "msg_text":
                        "",
                        "host":
                        self.host,
                        "tags": [
                            "status:{0}".format(tunnel_status),
                            "subaccount_name:{0}".format(subaccount_name)
                        ]
                    })
            else:
                if subaccount_reply.status_code == 400:
                    msg = "{0}: SAP Cloud connector monitoring sub account page not " \
                          "supported in this version of SCC.".format(self.host)
                    self.log.info(msg)
                else:
                    status = subaccount_reply.status_code
                    self.log.error(
                        "{0}: No SAP Cloud connector sub account found. Status code: {1}"
                        .format(self.host, status))
            #
            #   List backend SAP systems and virtual names.
            #
            backends_url = cloud_connector_url + "api/monitoring/connections/backends"
            backends_reply = session.get(backends_url)
            if backends_reply.status_code == 200:
                reply = backends_reply.text.encode('utf-8')
                self.log.debug(
                    "{0}: Backends reply from cloud connector : {1}".format(
                        self.host, reply))
                backends = json.loads(backends_reply.text)
                self.log.info(
                    "{0}: JSON backends from cloud connector : {1}".format(
                        self.host, backends))
                for subaccount in backends["subaccounts"]:
                    # subaccount["regionHost"]
                    # subaccount["subaccount"]
                    # subaccount["locationID"]
                    virtualbackend = str(subaccount.get("virtualBackend"))
                    for backend in subaccount["backendConnections"]:
                        external_id = self._scc_backend_external_id(
                            subaccount["subaccount"], virtualbackend)
                        component_data = {
                            "virtualBackend": virtualbackend,
                            "internalBackend":
                            str(backend.get("internalBackend")),
                            "protocol": str(backend.get("protocol")),
                            "idle": str(backend.get("idle")),
                            "active": str(backend.get("active")),
                            "labels": [],
                            "layer": "SAP SCC Back-ends",
                            "domain": self.domain,
                            "environment": self.stackstate_environment,
                            "tags": self.tags
                        }
                        self.log.debug("{0}: ------> external_id : {1}".format(
                            self.host, external_id))
                        self.component(external_id, "sap-cloud",
                                       component_data)
                        # define relation  cloud connector    -->    host
                        #                          is hosted on
                        source_id = external_id
                        target_id = self._scc_subaccount_external_id(
                            subaccount["subaccount"])
                        relation_data = {}
                        self.relation(source_id, target_id, "is connected to",
                                      relation_data)
                        self.event({
                            "timestamp":
                            int(time.time()),
                            "source_type_name":
                            "SAP:cloud component state",
                            "msg_title":
                            "SAP Cloud Connector '{0}' status update.".format(
                                backend["virtualBackend"]),
                            "msg_text":
                            "",
                            "host":
                            self.host,
                            "tags": [
                                "active:{0}".format(backend["active"]),
                                "idle:{0}".format(backend["idle"])
                            ]
                        })
            else:
                if backends_reply.status_code == 400:
                    msg = "{0}: SAP Cloud connector monitoring backend page not supported " \
                          "in this version of SCC.".format(self.host)
                    self.log.info(msg)
                else:
                    status = backends_reply.status_code
                    self.log.error(
                        "{0}: No SAP Cloud connector backends found. Status code: {1}"
                        .format(self.host, status))
        if status_code == 401:
            msg = "{0}: Authentication failed, check your config.yml and SCC users.xml " \
                  "for corresponding username and password.".format(self.host)
            self.log.error(msg)
        session.close()

コード例 #42

0

ファイルを表示

class Crawler:
    '''Blueprint for creating new crawlers'''
    def __init__(self):
        self._destroyed = False
        self.executor = futures.ThreadPoolExecutor(max_workers=3)

        # Initialize cloudscrapper
        try:
            self.scraper = cloudscraper.create_scraper(browser={
                'platform': 'linux',
                'mobile': False
            })
        except Exception as err:
            logger.exception('Failed to initialize cloudscraper')
            self.scraper = Session()
        # end try

        # Must resolve these fields inside `read_novel_info`
        self.novel_title = 'N/A'
        self.novel_author = 'N/A'
        self.novel_cover = None
        self.is_rtl = False

        # Each item must contain these keys:
        # `id` - 1 based index of the volume
        # `title` - the volume title (can be ignored)
        self.volumes = []

        # Each item must contain these keys:
        # `id` - 1 based index of the chapter
        # `title` - the title name
        # `volume` - the volume id of this chapter
        # `volume_title` - the volume title (can be ignored)
        # `url` - the link where to download the chapter
        self.chapters = []

        # Other stuffs - not necessary to resolve from crawler instance.
        self.home_url = ''
        self.novel_url = ''
        self.last_visited_url = None

    # end def

    def destroy(self):
        self._destroyed = True
        self.volumes.clear()
        self.chapters.clear()
        self.scraper.close()
        self.executor.shutdown(False)

    # end def

    # ------------------------------------------------------------------------- #
    # Implement these methods
    # ------------------------------------------------------------------------- #

    @abstractmethod
    def initialize(self):
        pass

    # end def

    @abstractmethod
    def login(self, email, password):
        pass

    # end def

    @abstractmethod
    def logout(self):
        pass

    # end def

    @abstractmethod
    def search_novel(self, query):
        '''Gets a list of results matching the given query'''
        pass

    # end def

    @abstractmethod
    def read_novel_info(self):
        '''Get novel title, autor, cover etc'''
        pass

    # end def

    @abstractmethod
    def download_chapter_body(self, chapter):
        '''Download body of a single chapter and return as clean html format.'''
        pass

    # end def

    def get_chapter_index_of(self, url):
        '''Return the index of chapter by given url or 0'''
        url = (url or '').strip().strip('/')
        for chapter in self.chapters:
            if chapter['url'] == url:
                return chapter['id']
            # end if
        # end for
        return 0

    # end def

    # ------------------------------------------------------------------------- #
    # Helper methods to be used
    # ------------------------------------------------------------------------- #
    @property
    def headers(self):
        return self.scraper.headers.copy()

    # end def

    @property
    def cookies(self):
        return {x.name: x.value for x in self.scraper.cookies}

    # end def

    def absolute_url(self, url, page_url=None):
        url = (url or '').strip()
        if not page_url:
            page_url = self.last_visited_url
        # end if
        if not url or len(url) == 0:
            return None
        elif url.startswith('//'):
            return self.home_url.split(':')[0] + ':' + url
        elif url.find('//') >= 0:
            return url
        elif url.startswith('/'):
            return self.home_url + url[1:]
        elif page_url:
            return page_url.strip('/') + '/' + url
        else:
            return self.home_url + url
        # end if

    # end def

    def is_relative_url(self, url):
        page = urlparse(self.novel_url)
        url = urlparse(url)
        return (page.hostname == url.hostname
                and url.path.startswith(page.path))

    # end def

    def get_response(self, url, **kargs):
        if self._destroyed:
            return None
        # end if
        kargs = kargs or dict()
        # kargs['verify'] = kargs.get('verify', False)
        kargs['timeout'] = kargs.get('timeout', 150)  # in seconds
        self.last_visited_url = url.strip('/')
        response = self.scraper.get(url, **kargs)
        response.encoding = 'utf-8'
        self.cookies.update({x.name: x.value for x in response.cookies})
        response.raise_for_status()
        return response

    # end def

    def submit_form(self, url, data={}, multipart=False, headers={}):
        '''Submit a form using post request'''
        if self._destroyed:
            return None
        # end if

        headers.update({
            'Content-Type':
            'multipart/form-data' if multipart else
            'application/x-www-form-urlencoded; charset=UTF-8',
        })

        response = self.scraper.post(url, data=data, headers=headers)
        response.encoding = 'utf-8'
        self.cookies.update({x.name: x.value for x in response.cookies})
        response.raise_for_status()
        return response

    # end def

    def get_soup(self, *args, **kwargs):
        parser = kwargs.pop('parser', None)
        response = self.get_response(*args, **kwargs)
        return self.make_soup(response, parser)

    # end def

    def make_soup(self, response, parser=None):
        html = response.content.decode('utf-8', 'ignore')
        soup = BeautifulSoup(html, parser or 'lxml')
        if not soup.find('body'):
            raise ConnectionError('HTML document was not loaded properly')
        # end if
        return soup

    # end def

    def get_json(self, *args, **kargs):
        response = self.get_response(*args, **kargs)
        return response.json()

    # end def

    def download_cover(self, output_file):
        response = self.get_response(self.novel_cover)
        with open(output_file, 'wb') as f:
            f.write(response.content)
        # end with

    # end def

    # ------------------------------------------------------------------------- #

    blacklist_patterns = [
        r'^[\W\D]*(volume|chapter)[\W\D]+\d+[\W\D]*$',
    ]
    bad_tags = [
        'noscript', 'script', 'iframe', 'form', 'hr', 'img', 'ins', 'button',
        'input', 'amp-auto-ads', 'pirate'
    ]
    block_tags = ['h3', 'div', 'p']

    def is_blacklisted(self, text):
        if len(text.strip()) == 0:
            return True
        # end if
        for pattern in self.blacklist_patterns:
            if re.search(pattern, text, re.IGNORECASE):
                return True
            # end if
        # end for
        return False

    # end def

    def clean_contents(self, div):
        if not div:
            return div
        # end if
        div.attrs = {}
        for tag in div.find_all(True):
            if isinstance(tag, Comment):
                tag.extract()  # Remove comments
            elif tag.name == 'br':
                next_tag = getattr(tag, 'next_sibling')
                if next_tag and getattr(next_tag, 'name') == 'br':
                    tag.extract()
                # end if
            elif tag.name in self.bad_tags:
                tag.extract()  # Remove bad tags
            elif not tag.text.strip():
                tag.extract()  # Remove empty tags
            elif self.is_blacklisted(tag.text):
                tag.extract()  # Remove blacklisted contents
            elif hasattr(tag, 'attrs'):
                tag.attrs = {}  # Remove attributes
            # end if
        # end for
        return div

    # end def

    def extract_contents(self, tag, level=0):
        body = []
        if level == 0:
            self.clean_contents(tag)
        # end if

        for elem in tag.contents:
            if self.block_tags.count(elem.name):
                body += self.extract_contents(elem, level + 1)
                continue
            # end if
            text = ''
            if not elem.name:
                text = str(elem).strip()
            else:
                text = '<%s>%s</%s>'
                text = text % (elem.name, elem.text.strip(), elem.name)
            # end if
            if text:
                body.append(text)
            # end if
        # end for

        if level > 0:
            return body
        else:
            return [x for x in body if len(x.strip())]
        # end if

    # end def

    def cleanup_text(self, text):
        return re.sub(u'[⺀-⺙⺛-⻳⼀-⿕々〇〡-〩〸-〺〻㐀-䶵一-鿃豈-鶴侮-頻並-龎]',
                      '',
                      str(text),
                      flags=re.UNICODE)

コード例 #43

0

ファイルを表示

ファイル: onedrive.py プロジェクト: jacke121/onedrive-sharedfolder-download

class OneDrive:
    """
    Downloads shared file/folder to localhost with persisted structure.

    params:
    `str:url`: url to the shared one drive folder or file
    `str:path`: local filesystem path

    methods:
    `download() -> None`: fire async download of all files found in URL
    """

    def __init__(self, url=None, path=None):
        if not (url and path):
            raise ValueError("URL to shared resource or path to download is missing.")

        self.url = url
        self.path = path
        self.prefix = "https://api.onedrive.com/v1.0/shares/"
        self.suffix = "/root?expand=children"
        self.session = Session()
        self.session.headers.update(
            {
                "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36"
                " (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36"
            }
        )

    def _token(self, url):
        return "u!" + b64encode(url.encode()).decode()

    def _traverse_url(self, url, name=""):
        """ Traverse the folder tree and store leaf urls with filenames """

        r = self.session.get(f"{self.prefix}{self._token(url)}{self.suffix}").json()
        name = name + os.sep + r["name"]

        # shared file
        if not r["children"]:
            file = {}
            file["name"] = name.lstrip(os.sep)
            file["url"] = r["@content.downloadUrl"]
            self.to_download.append(file)
            print(f"Found {file['name']}")

        # shared folder
        for child in r["children"]:
            if "folder" in child:
                self._traverse_url(child["webUrl"], name)

            if "file" in child:
                file = {}
                file["name"] = (name + os.sep + child["name"]).lstrip(os.sep)
                file["url"] = child["@content.downloadUrl"]
                self.to_download.append(file)
                print(f"Found {file['name']}")

    async def _download_file(self, file, session):
        async with session.get(file["url"], timeout=None) as r:
            filename = os.path.join(self.path, file["name"])
            os.makedirs(os.path.dirname(filename), exist_ok=True)
            async with aiofiles.open(filename, "wb") as f:
                async for chunk in r.content.iter_chunked(1024 * 16):
                    if chunk:
                        await f.write(chunk)

        self.downloaded += 1
        progress = int(self.downloaded / len(self.to_download) * 100)
        print(f"Download progress: {progress}%")

    async def _downloader(self):
        async with aiohttp.ClientSession() as session:
            await asyncio.wait(
                [self._download_file(file, session) for file in self.to_download]
            )

    def download(self):
        print("Traversing public folder\n")
        self.to_download = []
        self.downloaded = 0
        self._traverse_url(self.url)

        print("\nStarting async download\n")
        asyncio.get_event_loop().run_until_complete(self._downloader())

コード例 #44

0

ファイルを表示

from bs4 import BeautifulSoup as bs
from requests import Session


url = 'http://torlinkbgs6aabns.onion/'

s = Session()
s.proxies = {'http': 'socks5h://127.0.0.1:9052'}

tor_links = s.get(url)
page = bs(tor_links, 'html.parser')

links = page.find('div', {'id':'links'})

xpto = links.find_all(['h3', 'a'])

コード例 #45

0

ファイルを表示

class ThreatConnect:
    """ """
    def __init__(self,
                 api_aid=None,
                 api_sec=None,
                 api_org=None,
                 api_url=None,
                 api_token=None,
                 api_token_expires=None):
        """ """
        # logger
        self.log_level = {
            'debug': logging.DEBUG,
            'info': logging.INFO,
            'warning': logging.WARNING,
            'error': logging.ERROR,
            'critical': logging.CRITICAL
        }
        self.formatter = logging.Formatter(
            '%(asctime)s - %(name)s - %(levelname)s - %(message)s (%(funcName)s:%(lineno)d)'
        )
        self.tcl = tc_logger()

        # debugging
        self._memory_monitor = True

        # credentials
        self._api_aid = api_aid
        self._api_sec = api_sec
        self._api_token = api_token
        self._api_token_expires = api_token_expires

        # user defined values
        self._api_org = api_org
        self._api_url = api_url
        self._api_result_limit = 200

        # default values
        self._activity_log = False
        self._api_request_timeout = 30
        self._api_retries = 5  # maximum of 5 minute window
        self._api_sleep = 59  # seconds
        self._bulk_on_demand = False
        self._enable_report = False
        self._indicators_regex = indicators_regex
        self._proxies = {'https': None}
        self._retype = type(re.compile(''))

        # config items
        self._report = []
        self._verify_ssl = False

        # initialize request session handle
        self._session = Session()

        # instantiate report object
        self.report = Report()

        # save custom types for later
        self._indicator_parser = IndicatorObjectParser(self)

        #
        # Memory Testing
        #
        # self._p = psutil.Process(os.getpid())
        # self._memory = self._p.memory_info().rss
        #

    @property
    def indicator_parser(self):
        return self._indicator_parser

    def _renew_token(self):
        """
        {
            "success":true,
            "apiToken":"2:1:-1:1474673195:poZAT:syqtNUKnGn9ZijE5hQ5/D99aD8dIEdgdDCIMbjk2Poc\u003d",
            "apiTokenExpires":"1474673195"
        }
        """
        # make api call to get new token
        url = '{0!s}{1!s}'.format(self._api_url, '/appAuth')
        payload = {'expiredToken': self._api_token}

        token_response = self._session.get(url,
                                           params=payload,
                                           verify=self._verify_ssl,
                                           timeout=self._api_request_timeout,
                                           proxies=self._proxies,
                                           stream=False)
        if token_response.status_code == 401:
            if 'application/json' in token_response.headers['content-type']:
                err_data = token_response.json().get('message')
            else:
                err_data = token_response.text
            err = 'Could not refresh ThreatConnect Token ({}).'.format(
                err_data)
            raise RuntimeError(err)

        # bcs - return new token and set expiration date
        token_data = token_response.json()
        self._api_token = token_data['apiToken']
        self._api_token_expires = int(token_data['apiTokenExpires'])

    def _api_request_headers(self, ro):
        """ """
        timestamp = int(time.time())
        if self._api_token is not None and self._api_token_expires is not None:
            window_padding = 15  # bcs - possible configuration option
            current_time = int(time.time()) - window_padding
            if (int(self._api_token_expires) < current_time):
                self._renew_token()
            authorization = 'TC-Token {0}'.format(self._api_token)

        elif self._api_aid is not None and self._api_sec is not None:
            signature = "{0}:{1}:{2}".format(ro.path_url, ro.http_method,
                                             timestamp)
            # python 2.7, does not work on 3.x and not tested on 2.6
            # hmac_signature = hmac.new(self._api_sec, signature, digestmod=hashlib.sha256).digest()
            # authorization = 'TC {0}:{1}'.format(self._api_aid, base64.b64encode(hmac_signature))
            # python 3.x
            hmac_signature = hmac.new(self._api_sec.encode(),
                                      signature.encode(),
                                      digestmod=hashlib.sha256).digest()
            authorization = 'TC {0}:{1}'.format(
                self._api_aid,
                base64.b64encode(hmac_signature).decode())

        ro.add_header('Timestamp', timestamp)
        ro.add_header('Authorization', authorization)

    def api_filter_handler(self, resource_obj, filter_objs):
        """ """
        data_set = None

        if not filter_objs:
            # build api call (no filters)
            default_request_object = resource_obj.default_request_object
            data_set = self.api_response_handler(resource_obj,
                                                 default_request_object)
        else:
            #
            # process each filter added to the resource object for retrieve
            #
            first_run = True

            #
            # each resource object can have x filter objects with an operator to join or intersect results
            #
            for filter_obj in filter_objs:

                obj_list = [
                ]  # temp storage for results on individual filter objects
                owners = filter_obj.owners
                if len(owners) == 0:  # handle filters with no owners
                    owners = [self._api_org]  # use default org

                # iterate through all owners
                for o in owners:
                    self.tcl.debug('owner: {0!s}'.format(o))

                    if len(filter_obj) > 0:
                        # request object are for api filters
                        for ro in filter_obj:
                            if ro.owner_allowed:
                                ro.set_owner(o)
                            if hasattr(filter_obj, 'api_entity'):
                                results = self.api_response_handler(
                                    resource_obj,
                                    ro,
                                    api_entity=filter_obj.api_entity)
                            else:
                                results = self.api_response_handler(
                                    resource_obj, ro)

                            if ro.resource_type not in [
                                    ResourceType.OWNERS, ResourceType.VICTIMS,
                                    ResourceType.BATCH_JOBS
                            ]:
                                # TODO: should this be done?
                                # post filter owners
                                for obj in results:
                                    if obj.owner_name.upper() != o.upper():
                                        results.remove(obj)

                            obj_list.extend(results)
                    else:
                        ro = filter_obj.default_request_object
                        if ro.owner_allowed:
                            ro.set_owner(o)
                        if hasattr(filter_obj, 'api_entity'):
                            results = self.api_response_handler(
                                resource_obj,
                                ro,
                                api_entity=filter_obj.api_entity)
                        else:
                            results = self.api_response_handler(
                                resource_obj, ro)

                        if ro.resource_type not in [
                                ResourceType.OWNERS, ResourceType.VICTIMS
                        ]:
                            # TODO: should this be done?
                            # post filter owners
                            for obj in results:
                                if obj.owner_name.upper() != o.upper():
                                    results.remove(obj)

                        obj_list.extend(results)

                    #
                    # post filters
                    #
                    pf_obj_set = set(obj_list)
                    self.tcl.debug('count before post filter: {0:d}'.format(
                        len(obj_list)))
                    for pfo in filter_obj.post_filters:
                        self.tcl.debug('pfo: {0!s}'.format(pfo))

                        #
                        # Report Entry
                        #
                        report_entry = ReportEntry()
                        report_entry.add_post_filter_object(pfo)

                        # current post filter method
                        filter_method = getattr(resource_obj, pfo.method)

                        # current post filter results
                        post_filter_results = set(
                            filter_method(pfo.filter, pfo.operator,
                                          pfo.description))

                        pf_obj_set = pf_obj_set.intersection(
                            post_filter_results)

                        self.report.add(report_entry)

                    # set obj_list to post_filter results
                    if filter_obj.post_filters_len > 0:
                        obj_list = list(pf_obj_set)

                    self.tcl.debug('count after post filter: {0:d}'.format(
                        len(obj_list)))

                # no need to join or intersect on first run
                if first_run:
                    data_set = set(obj_list)
                    first_run = False
                    continue

                #
                # depending on the filter type the result will be intersected or joined
                #
                if filter_obj.operator is FilterSetOperator.AND:
                    data_set = data_set.intersection(obj_list)
                elif filter_obj.operator is FilterSetOperator.OR:
                    data_set.update(set(obj_list))

        #
        # only add to report if these results should be tracked (exclude attribute, tags, etc)
        #
        self.report.add_filtered_results(len(data_set))

        #
        # after intersection or join add the objects to the resource object
        #
        for obj in data_set:
            resource_obj.add_obj(obj)

    def api_request(self, ro, log=True):
        """ """
        api_response = None
        fail_msg = None
        h_content_length = None
        h_content_type = None
        start = datetime.now()

        #
        # enable activity log
        #
        if self._activity_log:
            ro.enable_activity_log()

        #
        # prepare request
        #
        url = '{0!s}{1!s}'.format(self._api_url, ro.request_uri)
        api_request = Request(ro.http_method,
                              url,
                              data=ro.body,
                              params=ro.payload)
        request_prepped = api_request.prepare()

        #
        # generate headers
        #
        ro.set_path_url(request_prepped.path_url)
        self._api_request_headers(ro)
        request_prepped.prepare_headers(ro.headers)

        #
        # Debug
        #
        if log:
            self.tcl.debug('request_object: {0!s}'.format(ro))
            self.tcl.debug('url: {0!s}'.format(url))
            self.tcl.debug('path url: {0!s}'.format(request_prepped.path_url))

        #
        # api request (gracefully handle temporary communications issues with the API)
        #
        for i in range(1, self._api_retries + 1, 1):
            try:
                api_response = self._session.send(
                    request_prepped,
                    verify=self._verify_ssl,
                    timeout=self._api_request_timeout,
                    proxies=self._proxies,
                    stream=ro.stream)
                break
            except exceptions.ReadTimeout as e:
                self.tcl.error('Error: {0!s}'.format(e))
                self.tcl.error(
                    'The server may be experiencing delays at the moment.')
                self.tcl.info(
                    'Pausing for {0!s} seconds to give server time to catch up.'
                    .format(self._api_sleep))
                time.sleep(self._api_sleep)
                self.tcl.info('Retry {0!s} ....'.format(i))

                if i == self._api_retries:
                    self.tcl.critical('Exiting: {0!s}'.format(e))
                    raise RuntimeError(e)
            except exceptions.ConnectionError as e:
                self.tcl.error('Error: {0!s}'.format(e))
                self.tcl.error('Connection Error. The server may be down.')
                self.tcl.info(
                    'Pausing for {0!s} seconds to give server time to catch up.'
                    .format(self._api_sleep))
                time.sleep(self._api_sleep)
                self.tcl.info('Retry {0!s} ....'.format(i))
                if i == self._api_retries:
                    self.tcl.critical('Exiting: {0!s}'.format(e))
                    raise RuntimeError(e)
            except socket.error as e:
                self.tcl.critical('Exiting: {0!s}'.format(e))
                raise RuntimeError(e)

        #
        # header values
        #
        if 'content-length' in api_response.headers:
            h_content_length = api_response.headers['content-length']
        if 'content-type' in api_response.headers:
            h_content_type = api_response.headers['content-type']

        #
        # raise exception on *critical* errors
        #
        non_critical_errors = [
            b'The MD5 for this File is invalid, a File with this MD5 already exists',  # 400 (application/json)
            b'The SHA-1 for this File is invalid, a File with this SHA-1 already exists',  # 400 (application/json)
            b'The SHA-256 for this File is invalid, a File with this SHA-256 already exists',  # 400 (application/json)
            b'The requested resource was not found',  # 404 (application/json)
            b'Could not find resource for relative',  # 500 (text/plain)
            b'The requested Security Label was not removed - access was denied',  # 401 (application/json)
        ]

        #
        # TODO: work out some logic to improve the API error handling, possible area where API could improve
        #

        # valid status codes 200, 201, 202
        # if api_response.status_code in [400, 401, 403, 500, 503]:
        if api_response.status_code not in [200, 201, 202]:
            # check for non critical errors that have bad status codes
            nce_found = False
            fail_msg = api_response.content
            for nce in non_critical_errors:
                # api_response_dict['message'] not in non_critical_errors:
                if re.findall(nce, api_response.content):
                    nce_found = True
                    break

            if ro.failure_callback is not None:
                ro.failure_callback(api_response.status_code)

            # raise error on bad status codes that are not defined as nce
            if not nce_found:
                self.tcl.critical('Status Code: {0:d}'.format(
                    api_response.status_code))
                self.tcl.critical('Failed API Response: {0!s}'.format(
                    api_response.content))
                if ro.failure_callback is not None:
                    ro.failure_callback(api_response.status_code)
                raise RuntimeError(api_response.content)

        #
        # set response encoding (best guess)
        #
        if api_response.encoding is None:
            ## api_response.encoding = api_response.apparent_encoding
            api_response.encoding = 'utf-8'  # using apparent encoding is costly with bulk

        #
        # Debug
        #
        if log:
            self.tcl.debug('url: %s', api_response.url)
            self.tcl.debug('status_code: %s', api_response.status_code)
            self.tcl.debug('content-length: %s', h_content_length)
            self.tcl.debug('content-type: %s', h_content_type)

        #
        # Report
        #
        self.report.add_api_call()  # count api calls
        self.report.add_request_time(datetime.now() - start)
        if log:
            self.tcl.debug('Request Time: {0!s}'.format(datetime.now() -
                                                        start))

        if self._enable_report:
            report_entry = ReportEntry()
            report_entry.add_request_object(ro)
            report_entry.set_request_url(api_response.url)
            report_entry.set_status_code(api_response.status_code)
            report_entry.set_failure_msg(fail_msg)
            self.report.add(report_entry)

        #
        # return response
        #
        return api_response

    def api_response_handler(self, resource_obj, ro, api_entity=None):
        """ """
        #
        # initialize vars
        #
        api_response_dict = {}
        obj_list = []
        # only track filter counts on request from this method
        ro.enable_track()

        #
        # debug
        #
        self.tcl.debug('Results Limit: {0!s}'.format(self._api_result_limit))

        # only resource supports pagination
        if ro.resource_pagination:
            ro.set_result_limit(self._api_result_limit)
            ro.set_result_start(0)
        else:
            ro.set_remaining_results(1)

        while ro.remaining_results > 0:
            #
            # api request
            #
            api_response = self.api_request(ro)
            # self.tcl.debug('Results Content: {0!s}'.format(api_response.content))
            self.tcl.debug('Status Code: {0!s}'.format(
                api_response.status_code))
            self.tcl.debug('Content Type: {0!s}'.format(
                api_response.headers['content-type']))

            #
            # Process API response
            #
            if api_response.headers['content-type'] == 'application/json':
                api_response_dict = api_response.json()

                # try and free memory for next api request
                api_response.close()
                del api_response  # doesn't appear to clear memory

                #
                # BULK INDICATOR (does not have status)
                #
                if 'indicator' in api_response_dict:
                    if ro.resource_type == ResourceType.INDICATORS:
                        data = api_response_dict['indicator']
                        for item in data:
                            obj_list.append(
                                parse_typed_indicator(
                                    item,
                                    resource_obj,
                                    ro.description,
                                    ro.request_uri,
                                    self._indicators_regex,
                                    indicator_parser=self.indicator_parser))

                            if len(obj_list) % 500 == 0:
                                self.tcl.debug('obj_list len: {0!s}'.format(
                                    len(obj_list)))

                elif api_response_dict['status'] == 'Failure':
                    # handle failed request (404 Resource not Found)
                    if 'message' in api_response_dict:
                        self.tcl.error('{0!s} "{1!s}"'.format(
                            api_response_dict['message'], ro.description))
                    ro.set_remaining_results(0)
                    continue

                #
                # ADVERSARIES
                #
                elif ro.resource_type == ResourceType.ADVERSARIES:
                    data = api_response_dict['data']['adversary']
                    if not isinstance(data, list):
                        data = [data]  # for single results to be a list
                    for item in data:
                        obj_list.append(
                            parse_group(item, ResourceType.ADVERSARIES,
                                        resource_obj, ro.description,
                                        ro.request_uri))

                #
                # CAMPAIGNS
                #
                elif ro.resource_type == ResourceType.CAMPAIGNS:
                    data = api_response_dict['data']['campaign']
                    if not isinstance(data, list):
                        data = [data]  # for single results to be a list
                    for item in data:
                        obj_list.append(
                            parse_group(item, ResourceType.CAMPAIGNS,
                                        resource_obj, ro.description,
                                        ro.request_uri))

                #
                # INDICATORS
                #
                elif ro.resource_type == ResourceType.INDICATORS:
                    data = api_response_dict['data']['indicator']
                    if not isinstance(data, list):
                        data = [data]  # for single results to be a list
                    for item in data:
                        obj_list.append(
                            parse_typed_indicator(
                                item,
                                resource_obj,
                                ro.description,
                                ro.request_uri,
                                self._indicators_regex,
                                indicator_parser=self.indicator_parser))

                #
                # ADDRESSES
                #
                elif ro.resource_type == ResourceType.ADDRESSES:
                    data = api_response_dict['data']['address']
                    if not isinstance(data, list):
                        data = [data]  # for single results to be a list
                    for item in data:
                        obj_list.append(
                            parse_typed_indicator(
                                item,
                                resource_obj,
                                ro.description,
                                ro.request_uri,
                                self._indicators_regex,
                                indicator_parser=self.indicator_parser))

                #
                # DOCUMENTS
                #
                elif ro.resource_type == ResourceType.DOCUMENTS:
                    data = api_response_dict['data']['document']
                    if not isinstance(data, list):
                        data = [data]  # for single results to be a list
                    for item in data:
                        obj_list.append(
                            parse_group(item, ResourceType.DOCUMENTS,
                                        resource_obj, ro.description,
                                        ro.request_uri))

                #
                # EMAILS
                #
                elif ro.resource_type == ResourceType.EMAILS:
                    data = api_response_dict['data']['email']
                    if not isinstance(data, list):
                        data = [data]  # for single results to be a list
                    for item in data:
                        obj_list.append(
                            parse_group(item, ResourceType.EMAILS,
                                        resource_obj, ro.description,
                                        ro.request_uri))

                #
                # EMAIL ADDRESSES
                #
                elif ro.resource_type == ResourceType.EMAIL_ADDRESSES:
                    data = api_response_dict['data']['emailAddress']
                    if not isinstance(data, list):
                        data = [data]  # for single results to be a list
                    for item in data:
                        obj_list.append(
                            parse_typed_indicator(
                                item,
                                resource_obj,
                                ro.description,
                                ro.request_uri,
                                self._indicators_regex,
                                indicator_parser=self.indicator_parser))

                #
                # CUSTOM INDICATORS
                #
                elif ro.resource_type == ResourceType.CUSTOM_INDICATORS:
                    # api_entity MUST be provided for Custom Indicators
                    data = api_response_dict['data'][api_entity]
                    if not isinstance(data, list):
                        data = [data]  # for single results to be a list
                    for item in data:
                        obj_list.append(
                            parse_typed_indicator(
                                item,
                                resource_obj,
                                ro.description,
                                ro.request_uri,
                                self._indicators_regex,
                                indicator_parser=self.indicator_parser))

                #
                # GROUPS
                #
                elif ro.resource_type == ResourceType.GROUPS:
                    data = api_response_dict['data']['group']
                    if not isinstance(data, list):
                        data = [data]  # for single results to be a list
                    for item in data:
                        if item.get('type') in self.group_types:
                            obj_list.append(
                                parse_group(item, ResourceType.GROUPS,
                                            resource_obj, ro.description,
                                            ro.request_uri))
                        else:
                            self.tcl.debug('Skipping unsupported Group Type')

                #
                # FILES
                #
                elif ro.resource_type == ResourceType.FILES:
                    data = api_response_dict['data']['file']
                    if not isinstance(data, list):
                        data = [data]  # for single results to be a list
                    for item in data:
                        obj_list.append(
                            parse_typed_indicator(
                                item,
                                resource_obj,
                                ro.description,
                                ro.request_uri,
                                self._indicators_regex,
                                indicator_parser=self.indicator_parser))

                #
                # HOSTS
                #
                elif ro.resource_type == ResourceType.HOSTS:
                    data = api_response_dict['data']['host']
                    if not isinstance(data, list):
                        data = [data]  # for single results to be a list
                    for item in data:
                        obj_list.append(
                            parse_typed_indicator(
                                item,
                                resource_obj,
                                ro.description,
                                ro.request_uri,
                                self._indicators_regex,
                                indicator_parser=self.indicator_parser))

                #
                # DNSResolutions
                #
                elif ro.resource_type == ResourceType.DNS_RESOLUTIONS:
                    data = api_response_dict['data']['dnsResolution']
                    if not isinstance(data, list):
                        data = [data]  # for single results to be a list
                    for item in data:
                        if 'addresses' in item:  # don't process dns resolutions that have no addresses
                            obj_list.append(parse_dns_resolution(item))
                #
                # INCIDENTS
                #
                elif ro.resource_type == ResourceType.INCIDENTS:
                    data = api_response_dict['data']['incident']
                    if not isinstance(data, list):
                        data = [data]  # for single results to be a list
                    for item in data:
                        obj_list.append(
                            parse_group(item, ResourceType.INCIDENTS,
                                        resource_obj, ro.description,
                                        ro.request_uri))

                #
                # METRICS
                #
                # elif ro.resource_type == ResourceType.OWNER_METRICS:
                #     data = api_response_dict['data']['ownerMetric']
                #     if not isinstance(data, list):
                #         data = [data]  # for single results to be a list
                #     for item in data:
                #         obj_list.append(
                #             parse_metrics(item, resource_obj, ro.description, ro.request_uri))

                #
                # MINE
                #
                # elif ro.resource_type == ResourceType.OWNER_MINE:
                #     data = api_response_dict['data']['owner']
                #     if not isinstance(data, list):
                #         data = [data]  # for single results to be a list
                #     for item in data:
                #         obj_list.append(
                #             parse_metrics(item, resource_obj, ro.description, ro.request_uri))

                #
                # MEMBERS
                #
                # elif ro.resource_type == ResourceType.OWNER_MEMBERS:
                #     data = api_response_dict['data']['user']
                #     if not isinstance(data, list):
                #         data = [data]  # for single results to be a list
                #     for item in data:
                #         obj_list.append(
                #             parse_metrics(item, resource_obj, ro.description, ro.request_uri))

                #
                # OWNERS
                #
                elif ro.resource_type == ResourceType.OWNERS:
                    data = api_response_dict['data']['owner']
                    if not isinstance(data, list):
                        data = [data]  # for single results to be a list
                    for item in data:
                        obj_list.append(
                            parse_owner(item, resource_obj, ro.description,
                                        ro.request_uri))

                #
                # SIGNATURES
                #
                elif ro.resource_type == ResourceType.SIGNATURES:
                    data = api_response_dict['data']['signature']
                    if not isinstance(data, list):
                        data = [data]  # for single results to be a list
                    for item in data:
                        obj_list.append(
                            parse_group(item, ResourceType.SIGNATURES,
                                        resource_obj, ro.description,
                                        ro.request_uri))

                #
                # TASKS
                #
                elif ro.resource_type == ResourceType.TASKS:
                    data = api_response_dict['data']['task']
                    if not isinstance(data, list):
                        data = [data]  # for single results to be a list
                    for item in data:
                        obj_list.append(
                            parse_task(item, ResourceType.TASKS, resource_obj,
                                       ro.description, ro.request_uri))

                #
                # THREATS
                #
                elif ro.resource_type == ResourceType.THREATS:
                    data = api_response_dict['data']['threat']
                    if not isinstance(data, list):
                        data = [data]  # for single results to be a list
                    for item in data:
                        obj_list.append(
                            parse_group(item, ResourceType.THREATS,
                                        resource_obj, ro.description,
                                        ro.request_uri))

                #
                # URLS
                #
                elif ro.resource_type == ResourceType.URLS:
                    data = api_response_dict['data']['url']
                    if not isinstance(data, list):
                        data = [data]  # for single results to be a list
                    for item in data:
                        obj_list.append(
                            parse_typed_indicator(
                                item,
                                resource_obj,
                                ro.description,
                                ro.request_uri,
                                self._indicators_regex,
                                indicator_parser=self.indicator_parser))

                #
                # VICTIMS
                #
                elif ro.resource_type == ResourceType.VICTIMS:
                    data = api_response_dict['data']['victim']
                    if not isinstance(data, list):
                        data = [data]  # for single results to be a list
                    for item in data:
                        # victims data comes back with no owner, manually add owner here
                        item['owner'] = ro.owner
                        obj_list.append(
                            parse_victim(item, resource_obj, ro.description,
                                         ro.request_uri))

                #
                # BatchJobs
                #
                elif ro.resource_type == ResourceType.BATCH_JOBS:
                    data = api_response_dict['data']['batchStatus']
                    if not isinstance(data, list):
                        data = [data]  # for single results to be a list
                    for item in data:
                        # victims data comes back with no owner, manually add owner here
                        item['owner'] = ro.owner
                        obj_list.append(
                            parse_batch_job(item, resource_obj, ro.description,
                                            ro.request_uri))

            elif api_response.headers['content-type'] == 'text/plain':
                self.tcl.error('{0!s} "{1!s}"'.format(api_response.content,
                                                      ro.description))
                ro.set_remaining_results(0)
                continue

            # add_obj resource_pagination if required
            if ro.resource_pagination:
                # get the number of results returned by the api
                if ro.result_start == 0:
                    ro.set_remaining_results(
                        api_response_dict['data']['resultCount'] -
                        ro.result_limit)
                else:
                    ro.set_remaining_results(ro.remaining_results -
                                             ro.result_limit)

                # increment the start position
                ro.set_result_start(ro.result_start + ro.result_limit)
            else:
                ro.set_remaining_results(0)

        self.tcl.debug('Result Count: {0!s}'.format(len(obj_list)))
        self.report.add_unfiltered_results(len(obj_list))
        return obj_list

    #
    # api / sdk settings
    #

    def result_pagination(self, ro, identifier):
        data = []

        ro.set_result_limit(self._api_result_limit)
        ro.set_result_start(0)

        while ro.remaining_results > 0:
            api_response = self.api_request(ro)

            if api_response.headers['content-type'] != 'application/json':
                break

            api_response_dict = api_response.json()
            if api_response_dict['status'] != 'Success':
                break

            data.extend(api_response_dict['data'][identifier])

            # get the number of results returned by the api
            if ro.result_start == 0:
                ro.set_remaining_results(
                    api_response_dict['data']['resultCount'] - ro.result_limit)
            else:
                ro.set_remaining_results(ro.remaining_results -
                                         ro.result_limit)

            # increment the start position
            ro.set_result_start(ro.result_start + ro.result_limit)

        return data

    def report_enable(self):
        """ """
        self._enable_report = True

    def report_disable(self):
        """ """
        self._enable_report = False

    def set_activity_log(self, data_bool):
        """ enable or disable api activity log """
        if isinstance(data_bool, bool):
            self._activity_log = data_bool

    def set_api_request_timeout(self, data_int):
        """ set timeout value for the requests module """
        if isinstance(data_int, int):
            self._api_request_timeout = data_int
        else:
            raise AttributeError(ErrorCodes.e0110.value.format(data_int))

    def set_api_retries(self, data):
        """ set the number of api retries before exception is raised """
        if isinstance(data, int):
            self._api_retries = data
        else:
            raise AttributeError(ErrorCodes.e0120.value.format(data))

    def set_api_sleep(self, data):
        """ set the amount of time between retries """
        if isinstance(data, int):
            self._api_sleep = data
        else:
            raise AttributeError(ErrorCodes.e0130.value.format(data))

    def set_api_result_limit(self, data_int):
        """ set the number of result to return per api request (500 max) """
        if isinstance(data_int, int):
            self._api_result_limit = data_int
        else:
            raise AttributeError(ErrorCodes.e0140.value.format(data_int))

    def set_proxies(self,
                    proxy_address,
                    proxy_port,
                    proxy_user=None,
                    proxy_pass=None):
        """ define proxy server to use with the requests module """
        # "http": "http://*****:*****@10.10.1.10:3128/",

        # accept host with http(s) or without
        proxy_method = 'http://'
        if re.match('^http', proxy_address):
            proxy_method, proxy_host = proxy_address.split('//')
            proxy_method += '//'
            proxy_address = proxy_host

        # TODO: add validation
        if proxy_user is not None and proxy_pass is not None:
            self._proxies['https'] = '{0!s}{1!s}:{2!s}@{3!s}:{4!s}'.format(
                proxy_method, proxy_user, proxy_pass, proxy_address,
                proxy_port)
        else:
            self._proxies['https'] = '{0!s}{1!s}:{2!s}'.format(
                proxy_method, proxy_address, proxy_port)

    def get_proxies(self):
        """ get proxy settings """
        return self._proxies

    def set_tcl_file(self, fqpn, level='info'):
        """ set the log file destination and log level """
        file_path = os.path.dirname(fqpn)
        if os.access(file_path, os.W_OK):
            if self.tcl.level > self.log_level[level]:
                self.tcl.setLevel(self.log_level[level])
            if self._api_token is not None:
                fh = ApiLoggingHandler(fqpn, self)
            else:
                fh = FileHandler(fqpn)
            # fh.set_name('tc_log_file')  # not supported in python 2.6
            if level in self.log_level.keys():
                fh.setLevel(self.log_level[level])
            else:
                fh.setLevel(self.log_level['info'])
            fh.setFormatter(self.formatter)
            self.tcl.addHandler(fh)

    # def set_tcl_level(self, level):
    #     """ """
    #     if level in self.log_level.keys():
    #         if self.tcl.level > self.log_level[level]:
    #             self.tcl.setLevel(self.log_level[level])
    #         self.tcl.handlers[0].setLevel(self.log_level[level])

    def set_tcl_console_level(self, level):
        """ set the console log level """
        if level in self.log_level.keys():
            if self.tcl.level > self.log_level[level]:
                self.tcl.setLevel(self.log_level[level])
            ch = logging.StreamHandler()
            # ch.set_name('console')  # not supported in python 2.6
            ch.setLevel(self.log_level[level])
            ch.setFormatter(self.formatter)
            self.tcl.addHandler(ch)

    def set_indicator_regex(self, type_enum, compiled_regex):
        """ overwrite default SDK regex """
        self.tcl.debug('overwrite regex for {0!s}'.format(type_enum.name))
        if not isinstance(type_enum, IndicatorType):
            raise AttributeError(ErrorCodes.e0150.value.format(type_enum))

        if not isinstance(compiled_regex, list):
            compiled_regex = [compiled_regex]

        cr_list = []
        for cr in compiled_regex:
            if isinstance(cr, self._retype):
                cr_list.append(cr)
            else:
                raise AttributeError(ErrorCodes.e0160.value.format(cr))

        self._indicators_regex[type_enum.name] = cr_list

    @property
    def group_types(self):
        """Return all defined ThreatConnect Group types.

        Returns:
            (list): A list of ThreatConnect Group types.
        """
        return [
            'Adversary',
            'Campaign',
            'Document',
            'Email',
            # 'Event',
            'Incident',
            # 'Intrusion Set',
            'Signature',
            # 'Report',
            'Threat',
            'Task'
        ]

    #
    # Resources
    #

    def adversaries(self):
        """ return an adversary container object """
        self._indicator_parser.init()
        return Adversaries(self)

    def bulk(self):
        """ return a bulk container object """
        self._indicator_parser.init()
        return Bulk(self)

    def bulk_indicators(self, on_demand=False):
        """ return a bulk indicator container object """
        self._indicator_parser.init()
        return BulkIndicators(self, on_demand)

    def campaigns(self):
        """ return an adversary container object """
        self._indicator_parser.init()
        return Campaigns(self)

    def documents(self):
        """ return a document container object """
        self._indicator_parser.init()
        return Documents(self)

    def emails(self):
        """ return an email container object """
        self._indicator_parser.init()
        return Emails(self)

    def groups(self):
        """ return an group container object """
        self._indicator_parser.init()
        return Groups(self)

    def incidents(self):
        """ return an incident container object """
        self._indicator_parser.init()
        return Incidents(self)

    def indicators(self):
        """ return an indicator container object """
        self._indicator_parser.init()
        return Indicators(self)

    def owners(self):
        """ return an owner container object """
        self._indicator_parser.init()
        return Owners(self)

    def signatures(self):
        """ return a signature container object """
        self._indicator_parser.init()
        return Signatures(self)

    def tasks(self):
        """ return a task container object """
        self._indicator_parser.init()
        return Tasks(self)

    def threats(self):
        """ return a threat container object """
        self._indicator_parser.init()
        return Threats(self)

    def victims(self):
        """ return a victim container object """
        self._indicator_parser.init()
        return Victims(self)

    def batch_jobs(self):
        """ return a batch container object """
        self._indicator_parser.init()
        return BatchJobs(self)

コード例 #46

0

ファイルを表示

ファイル: titlovi.py プロジェクト: castro732/Sub-Zero.bundle

class TitloviProvider(Provider, ProviderSubtitleArchiveMixin):
    subtitle_class = TitloviSubtitle
    languages = {Language.fromtitlovi(l) for l in language_converters['titlovi'].codes} | {Language.fromietf('sr-Latn')}
    server_url = 'https://titlovi.com'
    search_url = server_url + '/titlovi/?'
    download_url = server_url + '/download/?type=1&mediaid='

    def initialize(self):
        self.session = Session()
        self.session.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ' \
                                             '(KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
        logger.debug('User-Agent set to %s', self.session.headers['User-Agent'])
        self.session.headers['Referer'] = self.server_url
        logger.debug('Referer set to %s', self.session.headers['Referer'])

    def terminate(self):
        self.session.close()

    def query(self, languages, title, season=None, episode=None, year=None, video=None):
        items_per_page = 10
        current_page = 1

        used_languages = languages
        lang_strings = [str(lang) for lang in used_languages]

        # handle possible duplicate use of Serbian Latin
        if "sr" in lang_strings and "sr-Latn" in lang_strings:
            logger.info('Duplicate entries <Language [sr]> and <Language [sr-Latn]> found, filtering languages')
            used_languages = filter(lambda l: l != Language.fromietf('sr-Latn'), used_languages)
            logger.info('Filtered language list %r', used_languages)

        # convert list of languages into search string
        langs = '|'.join(map(str, [l.titlovi for l in used_languages]))

        # set query params
        params = {'prijevod': title, 'jezik': langs}
        is_episode = False
        if season and episode:
            is_episode = True
            params['s'] = season
            params['e'] = episode
        if year:
            params['g'] = year

        # loop through paginated results
        logger.info('Searching subtitles %r', params)
        subtitles = []

        while True:
            # query the server
            try:
                r = self.session.get(self.search_url, params=params, timeout=10)
                r.raise_for_status()

                soup = BeautifulSoup(r.content, 'lxml')

                # number of results
                result_count = int(soup.select_one('.results_count b').string)
            except:
                result_count = None

            # exit if no results
            if not result_count:
                if not subtitles:
                    logger.debug('No subtitles found')
                else:
                    logger.debug("No more subtitles found")
                break

            # number of pages with results
            pages = int(math.ceil(result_count / float(items_per_page)))

            # get current page
            if 'pg' in params:
                current_page = int(params['pg'])

            try:
                sublist = soup.select('section.titlovi > ul.titlovi > li')
                for sub in sublist:
                    # subtitle id
                    sid = sub.find(attrs={'data-id': True}).attrs['data-id']
                    # get download link
                    download_link = self.download_url + sid
                    # title and alternate title
                    match = title_re.search(sub.a.string)
                    if match:
                        _title = match.group('title')
                        alt_title = match.group('altitle')
                    else:
                        continue

                    # page link
                    page_link = self.server_url + sub.a.attrs['href']
                    # subtitle language
                    match = lang_re.search(sub.select_one('.lang').attrs['src'])
                    if match:
                        try:
                            # decode language
                            lang = Language.fromtitlovi(match.group('lang')+match.group('script'))
                        except ValueError:
                            continue

                    # relase year or series start year
                    match = year_re.search(sub.find(attrs={'data-id': True}).parent.i.string)
                    if match:
                        r_year = int(match.group('year'))
                    # fps
                    match = fps_re.search(sub.select_one('.fps').string)
                    if match:
                        fps = match.group('fps')
                    # releases
                    releases = str(sub.select_one('.fps').parent.contents[0].string)

                    # handle movies and series separately
                    if is_episode:
                        # season and episode info
                        sxe = sub.select_one('.s0xe0y').string
                        r_season = None
                        r_episode = None
                        if sxe:
                            match = season_re.search(sxe)
                            if match:
                                r_season = int(match.group('season'))
                            match = episode_re.search(sxe)
                            if match:
                                r_episode = int(match.group('episode'))

                        subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title,
                                                       alt_title=alt_title, season=r_season, episode=r_episode,
                                                       year=r_year, fps=fps,
                                                       asked_for_release_group=video.release_group,
                                                       asked_for_episode=episode)
                    else:
                        subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title,
                                                       alt_title=alt_title, year=r_year, fps=fps,
                                                       asked_for_release_group=video.release_group)
                    logger.debug('Found subtitle %r', subtitle)

                    # prime our matches so we can use the values later
                    subtitle.get_matches(video)

                    # add found subtitles
                    subtitles.append(subtitle)

            finally:
                soup.decompose()

            # stop on last page
            if current_page >= pages:
                break

            # increment current page
            params['pg'] = current_page + 1
            logger.debug('Getting page %d', params['pg'])

        return subtitles

    def list_subtitles(self, video, languages):
        season = episode = None
        if isinstance(video, Episode):
            title = video.series
            season = video.season
            episode = video.episode
        else:
            title = video.title

        return [s for s in
                self.query(languages, fix_inconsistent_naming(title), season=season, episode=episode, year=video.year,
                           video=video)]

    def download_subtitle(self, subtitle):
        r = self.session.get(subtitle.download_link, timeout=10)
        r.raise_for_status()

        # open the archive
        archive_stream = io.BytesIO(r.content)
        if is_rarfile(archive_stream):
            logger.debug('Archive identified as rar')
            archive = RarFile(archive_stream)
        elif is_zipfile(archive_stream):
            logger.debug('Archive identified as zip')
            archive = ZipFile(archive_stream)
        else:
            subtitle.content = r.content
            if subtitle.is_valid():
                return
            subtitle.content = None

            raise ProviderError('Unidentified archive type')

        subs_in_archive = archive.namelist()

        # if Serbian lat and cyr versions are packed together, try to find right version
        if len(subs_in_archive) > 1 and (subtitle.language == 'sr' or subtitle.language == 'sr-Cyrl'):
            self.get_subtitle_from_bundled_archive(subtitle, subs_in_archive, archive)
        else:
            # use default method for everything else
            subtitle.content = self.get_subtitle_from_archive(subtitle, archive)

    def get_subtitle_from_bundled_archive(self, subtitle, subs_in_archive, archive):
        sr_lat_subs = []
        sr_cyr_subs = []
        sub_to_extract = None

        for sub_name in subs_in_archive:
            if not ('.cyr' in sub_name or '.cir' in sub_name):
                sr_lat_subs.append(sub_name)

            if ('.cyr' in sub_name or '.cir' in sub_name) and not '.lat' in sub_name:
                sr_cyr_subs.append(sub_name)

        if subtitle.language == 'sr':
            if len(sr_lat_subs) > 0:
                sub_to_extract = sr_lat_subs[0]

        if subtitle.language == 'sr-Cyrl':
            if len(sr_cyr_subs) > 0:
                sub_to_extract = sr_cyr_subs[0]

        logger.info(u'Using %s from the archive', sub_to_extract)
        subtitle.content = fix_line_ending(archive.read(sub_to_extract))

コード例 #47

0

ファイルを表示

ファイル: salt.py プロジェクト: lifegpc/csweb

from requests import Session

ses = Session()
ses.trust_env = False
r = ses.get('http://127.0.0.1:2600/salt')
if r.status_code >= 400:
    raise ValueError(f'Get {r.status_code} {r.reason}')

コード例 #48

0

ファイルを表示

ファイル: edsm.py プロジェクト: dh219/EDMarketConnector

class EDSM:

    _TIMEOUT = 10
    FAKE = ['CQC', 'Training',
            'Destination']  # Fake systems that shouldn't be sent to EDSM

    def __init__(self):
        self.result = {'img': None, 'url': None, 'done': True}
        self.syscache = set()  # Cache URLs of systems with known coordinates
        self.session = Session()
        self.lastship = None  # Description of last ship that we sent to EDSM

        # Can't be in class definition since can only call PhotoImage after window is created
        EDSM._IMG_KNOWN = tk.PhotoImage(
            data=
            'R0lGODlhEAAQAMIEAFWjVVWkVWS/ZGfFZ////////////////yH5BAEKAAQALAAAAAAQABAAAAMvSLrc/lAFIUIkYOgNXt5g14Dk0AQlaC1CuglM6w7wgs7rMpvNV4q932VSuRiPjQQAOw=='
        )  # green circle
        EDSM._IMG_UNKNOWN = tk.PhotoImage(
            data=
            'R0lGODlhEAAQAKEDAGVLJ+ddWO5fW////yH5BAEKAAMALAAAAAAQABAAAAItnI+pywYRQBtA2CtVvTwjDgrJFlreEJRXgKSqwB5keQ6vOKq1E+7IE5kIh4kCADs='
        )  # red circle
        EDSM._IMG_NEW = tk.PhotoImage(
            data=
            'R0lGODlhEAAQAMZwANKVHtWcIteiHuiqLPCuHOS1MN22ZeW7ROG6Zuu9MOy+K/i8Kf/DAuvCVf/FAP3BNf/JCf/KAPHHSv7ESObHdv/MBv/GRv/LGP/QBPXOPvjPQfjQSvbRSP/UGPLSae7Sfv/YNvLXgPbZhP7dU//iI//mAP/jH//kFv7fU//fV//ebv/iTf/iUv/kTf/iZ/vgiP/hc/vgjv/jbfriiPriiv7ka//if//jd//sJP/oT//tHv/mZv/sLf/rRP/oYv/rUv/paP/mhv/sS//oc//lkf/mif/sUf/uPv/qcv/uTv/uUv/vUP/qhP/xP//pm//ua//sf//ubf/wXv/thv/tif/slv/tjf/smf/yYP/ulf/2R//2Sv/xkP/2av/0gP/ylf/2df/0i//0j//0lP/5cP/7a//1p//5gf/7ev/3o//2sf/5mP/6kv/2vP/3y//+jP///////////////////////////////////////////////////////////////yH5BAEKAH8ALAAAAAAQABAAAAePgH+Cg4SFhoJKPIeHYT+LhVppUTiPg2hrUkKPXWdlb2xHJk9jXoNJQDk9TVtkYCUkOy4wNjdGfy1UXGJYOksnPiwgFwwYg0NubWpmX1ArHREOFYUyWVNIVkxXQSoQhyMoNVUpRU5EixkcMzQaGy8xhwsKHiEfBQkSIg+GBAcUCIIBBDSYYGiAAUMALFR6FAgAOw=='
        )
        EDSM._IMG_ERROR = tk.PhotoImage(
            data=
            'R0lGODlhEAAQAKEBAAAAAP///////////yH5BAEKAAIALAAAAAAQABAAAAIwlBWpeR0AIwwNPRmZuVNJinyWuClhBlZjpm5fqnIAHJPtOd3Hou9mL6NVgj2LplEAADs='
        )  # BBC Mode 5 '?'

    # Call an EDSM endpoint with args (which should be quoted)
    def call(self, endpoint, args, check_msgnum=True):
        try:
            url = 'https://www.edsm.net/%s?commanderName=%s&apiKey=%s&fromSoftware=%s&fromSoftwareVersion=%s' % (
                endpoint,
                urllib2.quote(config.get('edsm_cmdrname').encode('utf-8')),
                urllib2.quote(config.get('edsm_apikey')),
                urllib2.quote(applongname),
                urllib2.quote(appversion),
            ) + args
            r = self.session.get(url, timeout=EDSM._TIMEOUT)
            r.raise_for_status()
            reply = r.json()
            if not check_msgnum:
                return reply
            (msgnum, msg) = reply['msgnum'], reply['msg']
        except:
            if __debug__: print_exc()
            raise Exception(_("Error: Can't connect to EDSM"))

        # Message numbers: 1xx = OK, 2xx = fatal error, 3xx = error (but not generated in practice), 4xx = ignorable errors
        if msgnum // 100 not in (1, 4):
            raise Exception(_('Error: EDSM {MSG}').format(MSG=msg))
        else:
            return reply

    # Just set link without doing a lookup
    def link(self, system_name):
        self.cancel_lookup()
        if system_name in self.FAKE:
            self.result = {
                'img': '',
                'url': None,
                'done': True,
                'uncharted': False
            }
        else:
            self.result = {
                'img':
                '',
                'url':
                'https://www.edsm.net/show-system?systemName=%s' %
                urllib2.quote(system_name),
                'done':
                True,
                'uncharted':
                False
            }

    def lookup(self, system_name, known=0):
        self.cancel_lookup()

        if system_name in self.FAKE:
            self.result = {
                'img': '',
                'url': None,
                'done': True,
                'uncharted': False
            }
        elif known or system_name in self.syscache:
            self.result = {
                'img':
                EDSM._IMG_KNOWN,
                'url':
                'https://www.edsm.net/show-system?systemName=%s' %
                urllib2.quote(system_name),
                'done':
                True,
                'uncharted':
                False
            }
        else:
            self.result = {
                'img':
                EDSM._IMG_ERROR,
                'url':
                'https://www.edsm.net/show-system?systemName=%s' %
                urllib2.quote(system_name),
                'done':
                True,
                'uncharted':
                False
            }
            data = self.call('api-v1/system',
                             '&sysname=%s&coords=1' %
                             urllib2.quote(system_name),
                             check_msgnum=False)

            if data == -1 or not data:
                # System not present - but don't create it on the assumption that the caller will
                self.result['img'] = EDSM._IMG_NEW
                self.result['uncharted'] = True
            elif data.get('coords'):
                self.result['img'] = EDSM._IMG_KNOWN
                self.syscache.add(system_name)
            else:
                self.result['img'] = EDSM._IMG_UNKNOWN
                self.result['uncharted'] = True

    # Asynchronous version of the above
    def start_lookup(self, system_name, known=0):
        self.cancel_lookup()

        if system_name in self.FAKE:
            self.result = {
                'img': '',
                'url': None,
                'done': True,
                'uncharted': False
            }
        elif known or system_name in self.syscache:
            self.result = {
                'img':
                EDSM._IMG_KNOWN,
                'url':
                'https://www.edsm.net/show-system?systemName=%s' %
                urllib2.quote(system_name),
                'done':
                True,
                'uncharted':
                False
            }
        else:
            self.result = {
                'img':
                '',
                'url':
                'https://www.edsm.net/show-system?systemName=%s' %
                urllib2.quote(system_name),
                'done':
                False,
                'uncharted':
                False
            }
            self.thread = threading.Thread(target=self.worker,
                                           name='EDSM worker',
                                           args=(system_name, self.result))
            self.thread.daemon = True
            self.thread.start()

    def cancel_lookup(self):
        self.thread = None  # orphan any existing thread
        self.result = {
            'img': '',
            'url': None,
            'done': True
        }  # orphan existing thread's results

    def worker(self, system_name, result):
        try:
            data = self.call('api-v1/system',
                             '&sysname=%s&coords=1' %
                             urllib2.quote(system_name),
                             check_msgnum=False)

            if data == -1 or not data:
                # System not present - create it
                result['img'] = EDSM._IMG_NEW
                result['uncharted'] = True
            elif data.get('coords'):
                result['img'] = EDSM._IMG_KNOWN
                self.syscache.add(system_name)
            else:
                result['img'] = EDSM._IMG_UNKNOWN
                result['uncharted'] = True
        except:
            if __debug__: print_exc()
            result['img'] = EDSM._IMG_ERROR
        result['done'] = True

    # Send flight log and also do lookup
    def writelog(self, timestamp, system_name, coordinates, shipid=None):

        if system_name in self.FAKE:
            self.result = {
                'img': '',
                'url': None,
                'done': True,
                'uncharted': False
            }
            return

        self.result = {
            'img':
            EDSM._IMG_ERROR,
            'url':
            'https://www.edsm.net/show-system?systemName=%s' %
            urllib2.quote(system_name),
            'done':
            True,
            'uncharted':
            False
        }

        args = '&systemName=%s&dateVisited=%s' % (
            urllib2.quote(system_name),
            urllib2.quote(
                time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(timestamp))),
        )
        if coordinates:
            args += '&x=%.3f&y=%.3f&z=%.3f' % coordinates
        if shipid:
            args += '&shipId=%d' % shipid
        reply = self.call('api-logs-v1/set-log', args)

        if reply.get('systemCreated'):
            self.result['img'] = EDSM._IMG_NEW
        else:
            self.result['img'] = EDSM._IMG_KNOWN
        self.syscache.add(system_name)

    def setranks(self, ranks):
        args = ''
        if ranks:
            for k, v in ranks.iteritems():
                if v is not None:
                    args += '&%s=%s' % (k, urllib2.quote('%d;%d' % v))
        if args:
            self.call('api-commander-v1/set-ranks', args)

    def setcredits(self, credits):
        if credits:
            self.call('api-commander-v1/set-credits',
                      '&balance=%d&loan=%d' % credits)

    def setshipid(self, shipid):
        if shipid is not None:
            self.call('api-commander-v1/set-ship-id', '&shipId=%d' % shipid)

    def updateship(self, shipid, shiptype, props=[]):
        if shipid is not None and shiptype:
            args = '&shipId=%d&type=%s' % (shipid, shiptype)
            for (slot, thing) in props:
                args += '&%s=%s' % (slot, urllib2.quote(unicode(thing)))
            self.call('api-commander-v1/update-ship', args)

    def sellship(self, shipid):
        if shipid is not None:
            self.call('api-commander-v1/sell-ship', '&shipId=%d' % shipid)

コード例 #49

0

ファイルを表示

ファイル: podnapisi.py プロジェクト: cristianlazarop/subs

class PodnapisiProvider(Provider):
    """Podnapisi Provider."""
    languages = ({Language('por', 'BR'),
                  Language('srp', script='Latn')} | {
                      Language.fromalpha2(l)
                      for l in language_converters['alpha2'].codes
                  })
    server_url = 'https://www.podnapisi.net/subtitles/'
    subtitle_class = PodnapisiSubtitle

    def __init__(self):
        self.session = None

    def initialize(self):
        self.session = Session()
        self.session.headers['User-Agent'] = self.user_agent

    def terminate(self):
        self.session.close()

    def query(self, language, keyword, season=None, episode=None, year=None):
        # set parameters, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164#p212652
        params = {'sXML': 1, 'sL': str(language), 'sK': keyword}
        is_episode = False
        if season and episode:
            is_episode = True
            params['sTS'] = season
            params['sTE'] = episode
        if year:
            params['sY'] = year

        # loop over paginated results
        logger.info('Searching subtitles %r', params)
        subtitles = []
        pids = set()
        while True:
            # query the server
            r = self.session.get(self.server_url + 'search/old',
                                 params=params,
                                 timeout=10)
            r.raise_for_status()
            xml = etree.fromstring(r.content)

            # exit if no results
            if not int(xml.find('pagination/results').text):
                logger.debug('No subtitles found')
                break

            # loop over subtitles
            for subtitle_xml in xml.findall('subtitle'):
                # read xml elements
                pid = subtitle_xml.find('pid').text
                # ignore duplicates, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164&start=10#p213321
                if pid in pids:
                    continue

                language = Language.fromietf(
                    subtitle_xml.find('language').text)
                hearing_impaired = 'n' in (subtitle_xml.find('flags').text
                                           or '')
                page_link = subtitle_xml.find('url').text
                releases = []
                if subtitle_xml.find('release').text:
                    for release in subtitle_xml.find('release').text.split():
                        release = re.sub(r'\.+$', '',
                                         release)  # remove trailing dots
                        release = ''.join(
                            filter(lambda x: ord(x) < 128,
                                   release))  # remove non-ascii characters
                        releases.append(release)
                title = subtitle_xml.find('title').text
                season = int(subtitle_xml.find('tvSeason').text)
                episode = int(subtitle_xml.find('tvEpisode').text)
                year = int(subtitle_xml.find('year').text)

                if is_episode:
                    subtitle = self.subtitle_class(language,
                                                   hearing_impaired,
                                                   page_link,
                                                   pid,
                                                   releases,
                                                   title,
                                                   season=season,
                                                   episode=episode,
                                                   year=year)
                else:
                    subtitle = self.subtitle_class(language,
                                                   hearing_impaired,
                                                   page_link,
                                                   pid,
                                                   releases,
                                                   title,
                                                   year=year)

                logger.debug('Found subtitle %r', subtitle)
                subtitles.append(subtitle)
                pids.add(pid)

            # stop on last page
            if int(xml.find('pagination/current').text) >= int(
                    xml.find('pagination/count').text):
                break

            # increment current page
            params['page'] = int(xml.find('pagination/current').text) + 1
            logger.debug('Getting page %d', params['page'])

        return subtitles

    def list_subtitles(self, video, languages):
        season = episode = None
        if isinstance(video, Episode):
            titles = [video.series] + video.alternative_series
            season = video.season
            episode = video.episode
        else:
            titles = [video.title] + video.alternative_titles

        for title in titles:
            subtitles = [
                s for l in languages for s in self.query(
                    l, title, season=season, episode=episode, year=video.year)
            ]
            if subtitles:
                return subtitles

        return []

    def download_subtitle(self, subtitle):
        # download as a zip
        logger.info('Downloading subtitle %r', subtitle)
        r = self.session.get(self.server_url + subtitle.pid + '/download',
                             params={'container': 'zip'},
                             timeout=10)
        r.raise_for_status()

        # open the zip
        with ZipFile(io.BytesIO(r.content)) as zf:
            if len(zf.namelist()) > 1:
                raise ProviderError('More than one file to unzip')

            subtitle.content = fix_line_ending(zf.read(zf.namelist()[0]))

コード例 #50

0

ファイルを表示

ファイル: grab-them-by-the-plate.py プロジェクト: seczer0/plategate

class PlateResolver:
    def __init__(self, canton, stat_queue):
        if canton not in ['AG', 'LU', 'SH', 'ZG', 'ZH']:
            raise ValueError('unsupported canton')
        self.__canton = canton
        self.__session = Session()
        self.__session.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64, x64; Trident/7.0; rv:11.0) like Gecko'}
        self.__submit_page = None
        self.__result_page = None
        self.__stat_queue = stat_queue

    def __get_auth_token(self):
        return self.__session.cookies.get_dict().get('.AUTOINDEXAUTH')

    def __check_auth_token(self):
        if self.__get_auth_token() is None:
            raise RuntimeError('token expired')

    def __get_remaining_tries(self):
        tries_search = re.search(r'(\d+)/(\d+)$', self.__submit_page.find('span', id='LabelAnzahl').contents[0])
        return int(tries_search.group(2)) - int(tries_search.group(1))

    def __login(self):
        while self.__get_auth_token() is None:
            login_page = None
            solution = None
            is_first_solution = None
            while solution is None:
                response = self.__session.get('https://www.viacar.ch/eindex/Login.aspx?Kanton=' + self.__canton)
                login_page = BeautifulSoup(response.text, 'lxml')
                captcha_oracle = CaptchaOracle()
                start_time = int(time())
                while solution is None and int(time()) - start_time < 60:
                    response = self.__session.get(
                        'https://www.viacar.ch/eindex/' + login_page.find('img', id='SecBild').get('src'),
                        headers={'Referer': 'https://www.viacar.ch/eindex/Login.aspx?Kanton=' + self.__canton},
                        stream=True)
                    if response.status_code == 200:
                        recognized_text = Captcha(response.raw).solve()
                        if captcha_oracle.add_possible_solution(recognized_text):
                            (solution, is_first_solution) = captcha_oracle.guess_solution()
            sleep(3)
            response = self.__session.post('https://www.viacar.ch/eindex/Login.aspx?Kanton=' + self.__canton,
                                           data={
                                               '__VIEWSTATE': login_page.find('input', id='__VIEWSTATE').get('value'),
                                               '__VIEWSTATEGENERATOR': login_page.find('input',
                                                                                       id='__VIEWSTATEGENERATOR').get(
                                                   'value'),
                                               '__EVENTVALIDATION': login_page.find('input',
                                                                                    id='__EVENTVALIDATION').get(
                                                   'value'),
                                               login_page.find('input', type='text').get('id'): solution
                                           })
            if self.__get_auth_token() is not None:
                self.__stat_queue.put(1 if is_first_solution else 2)
                self.__submit_page = BeautifulSoup(response.text, 'lxml')
            else:
                self.__stat_queue.put(0)

    def __reset_remaining_tries(self):
        auth_token = self.__get_auth_token()
        self.__session.cookies.set('ViaInd' + self.__canton,
                                   'Anzahl=0&Date=' + date.today().strftime('%d.%m.%Y') + '&de-CH=de-CH',
                                   domain='www.viacar.ch', path='/')
        self.__session.get('https://www.viacar.ch/eindex/Login.aspx?Kanton=' + self.__canton)
        self.__session.cookies.set('.AUTOINDEXAUTH', auth_token, domain='www.viacar.ch', path='/')
        sleep(3)

    def __request_submit_page(self):
        if self.__get_remaining_tries() <= 1:
            self.__reset_remaining_tries()
        response = self.__session.post('https://www.viacar.ch/eindex/Result.aspx',
                                       data={
                                           '__VIEWSTATE': self.__result_page.find('input', id='__VIEWSTATE').get(
                                               'value'),
                                           '__VIEWSTATEGENERATOR': self.__result_page.find('input',
                                                                                           id='__VIEWSTATEGENERATOR').get(
                                               'value'),
                                           '__EVENTVALIDATION': self.__result_page.find('input',
                                                                                        id='__EVENTVALIDATION').get(
                                               'value')
                                       })
        self.__check_auth_token()
        self.__submit_page = BeautifulSoup(response.text, 'lxml')

    def __prepare_submit(self):
        self.__login() if self.__get_auth_token() is None else self.__request_submit_page()

    def __submit(self, plate):
        self.__session.post('https://www.viacar.ch/eindex/Search.aspx',
                            data={
                                '__VIEWSTATE': self.__submit_page.find('input', id='__VIEWSTATE').get('value'),
                                '__VIEWSTATEGENERATOR': self.__submit_page.find('input', id='__VIEWSTATEGENERATOR').get(
                                    'value'),
                                '__EVENTVALIDATION': self.__submit_page.find('input', id='__EVENTVALIDATION').get(
                                    'value'),
                                'TextBoxKontrollschild': plate
                            })
        self.__check_auth_token()
        response = self.__session.get('https://www.viacar.ch/eindex/Result.aspx')
        self.__check_auth_token()
        self.__result_page = BeautifulSoup(response.text, 'lxml')

    def __parse_result_page(self):
        if self.__result_page.find(string=re.compile('key was not present in the dictionary')) is not None:
            self.__submit_page = self.__result_page
            return None
        owners = []
        for owner in self.__result_page.find_all(bgcolor='whitesmoke'):
            owners.append(VehicleOwner(owner))
        return owners

    def get_vehicle_owner(self, plate):
        if plate < 1 or plate > 999999:
            raise ValueError('plate must be in range [1,999999]')
        while True:
            try:
                self.__prepare_submit()
                owners = None
                while owners is None:
                    self.__submit(plate)
                    owners = self.__parse_result_page()
                return owners
            except RuntimeError:
                self.__session.cookies.clear()

コード例 #51

0

ファイルを表示

ファイル: client.py プロジェクト: connectome-neuprint/neuprint-python

class Client:
    def __init__(self, server, dataset=None, token=None, verify=True):
        """
        Client constructor.

        The first ``Client`` you create will be stored as the default
        ``Client`` to be used with all ``neuprint-python`` functions
        if you don't explicitly specify one.

        Args:
            server:
                URL of neuprintHttp server

            token:
                neuPrint token. Either pass explitily as an argument or set
                as ``NEUPRINT_APPLICATION_CREDENTIALS`` environment variable.
                Your token can be retrieved by clicking on your account in
                the NeuPrint web interface.

            verify:
                If ``True`` (default), enforce signed credentials.

            dataset:
                The dataset to run all queries against, e.g. 'hemibrain'.
                If not provided, the server will use a default dataset for
                all queries.
        """
        if not token:
            token = os.environ.get('NEUPRINT_APPLICATION_CREDENTIALS')

        if not token:
            raise RuntimeError("No token provided. Please provide one or set NEUPRINT_APPLICATION_CREDENTIALS")

        if ':' in token:
            try:
                token = ujson.loads(token)['token']
            except Exception:
                raise RuntimeError("Did not understand token. Please provide the entire JSON document or (only) the complete token string")

        token = token.replace('"', '')

        if '://' not in server:
            server = 'https://' + server
        elif server.startswith('http://'):
            raise RuntimeError("Server must be https, not http")
        elif not server.startswith('https://'):
            protocol = server.split('://')[0]
            raise RuntimeError(f"Unknown protocol: {protocol}")

        # Remove trailing backslash
        while server.endswith('/'):
            server = server[:-1]

        self.server = server

        self.session = Session()
        self.session.headers.update({"Authorization": "Bearer " + token,
                                     "Content-type": "application/json"})

        # If the connection fails, retry a couple times.
        retries = Retry(connect=2, backoff_factor=0.1)
        self.session.mount('https://', HTTPAdapter(max_retries=retries))

        self.verify = verify
        if not verify:
            urllib3.disable_warnings(InsecureRequestWarning)

        all_datasets = [*self.fetch_datasets().keys()]
        if len(all_datasets) == 0:
            raise RuntimeError(f"The neuprint server {self.server} has no datasets!")

        if len(all_datasets) == 1 and not dataset:
            self.dataset = all_datasets[0]
            logger.info(f"Initializing neuprint.Client with dataset: {self.dataset}")
        elif dataset in all_datasets:
            self.dataset = dataset
        else:
            raise RuntimeError(f"Dataset '{dataset}' does not exist on"
                               f" the neuprint server ({self.server}).\n"
                               f"Available datasets: {all_datasets}")

        # Set this as the default client if there isn't one already
        global DEFAULT_NEUPRINT_CLIENT
        if DEFAULT_NEUPRINT_CLIENT is None:
            set_default_client(self)

        from .queries.general import fetch_meta
        from .queries.rois import _all_rois_from_meta
        # Pre-cache these metadata fields,
        # to avoid re-fetching them for many queries that need them.
        self.meta = fetch_meta(client=self)
        self.primary_rois = sorted(self.meta['primaryRois'])
        self.all_rois = _all_rois_from_meta(self.meta)

    def __repr__(self):
        s = f'Client("{self.server}", "{self.dataset}"'
        if not self.verify:
            s += ", verify=False"
        s += ")"
        return s

    @verbose_errors
    def _fetch(self, url, json=None, ispost=False):
        if ispost:
            r = self.session.post(url, json=json, verify=self.verify)
        else:
            assert json is None, "Can't provide a body via GET method"
            r = self.session.get(url, verify=self.verify)
        r.raise_for_status()
        return r

    def _fetch_raw(self, url, json=None, ispost=False):
        return self._fetch(url, json=json, ispost=ispost).content

    def _fetch_json(self, url, json=None, ispost=False):
        r = self._fetch(url, json=json, ispost=ispost)
        return ujson.loads(r.content)

    ##
    ## CUSTOM QUERIES
    ##
    ## Note: Transaction queries are not implemented here.  See admin.py
    ##

    def fetch_custom(self, cypher, dataset="", format='pandas'):
        """
        Query the neuprint server with a custom Cypher query.

        Args:
            cypher:
                A cypher query string

            dataset:
                *Deprecated. Please provide your dataset as a Client constructor argument.*

                Which neuprint dataset to query against.
                If None provided, the client's default dataset is used.

            format:
                Either ``'pandas'`` or ``'json'``.
                Whether to load the results into a ``pandas.DataFrame``,
                or return the server's raw JSON response as a Python ``dict``.

        Returns:
            Either json or DataFrame, depending on ``format``.
        """
        url = f"{self.server}/api/custom/custom"
        return self._fetch_cypher(url, cypher, dataset, format)

    def _fetch_cypher(self, url, cypher, dataset, format='pandas'):
        """
        Fetch cypher from an endpoint.
        Called by fetch_custom and by Transaction queries.
        """
        assert format in ('json', 'pandas')

        if set("‘’“”").intersection(cypher):
            msg = ("Your cypher query contains 'smart quotes' (e.g. ‘foo’ or “foo”),"
                   " which are not valid characters in cypher."
                   " Please replace them with ordinary quotes (e.g. 'foo' or \"foo\").\n"
                   "Your query was:\n" + cypher)
            raise RuntimeError(msg)

        dataset = dataset or self.dataset

        cypher = indent(dedent(cypher), '    ')
        logger.debug(f"Performing cypher query against dataset '{dataset}':\n{cypher}")

        result = self._fetch_json(url,
                                  json={"cypher": cypher, "dataset": dataset},
                                  ispost=True)

        if format == 'json':
            return result

        df = pd.DataFrame(result['data'], columns=result['columns'])
        return df

    ##
    ## API-META
    ##

    def fetch_available(self):
        """
        Fetch the list of REST API endpoints supported by the server.
        """
        return self._fetch_json(f"{self.server}/api/available")

    def fetch_help(self):
        """
        Fetch auto-generated REST API documentation, as YAML text.
        """
        return self._fetch_raw(f"{self.server}/api/help/swagger.yaml").decode('utf-8')

    def fetch_server_info(self):
        """
        Returns whether or not the server is public.
        """
        return self._fetch_json(f"{self.server}/api/serverinfo")['IsPublic']

    def fetch_version(self):
        """
        Returns the version of the ``neuPrintHTTP`` server.
        """
        return self._fetch_json(f"{self.server}/api/version")['Version']

    @lru_cache(None)
    def fetch_neuron_keys(self):
        """
        Returns all available :Neuron properties in the database. Cached.
        """
        # Fetch available keys
        c = """
        MATCH (n :`Neuron`) UNWIND KEYS(n) AS k RETURN DISTINCT k AS neuron_fields
        """
        raw = self.fetch_custom(c, format='json')
        return [r[0] for r in raw['data']]

    ##
    ## DB-META
    ##

    def fetch_database(self):
        """
        Fetch the address of the neo4j database that the neuprint server is using.
        """
        return self._fetch_json(f"{self.server}/api/dbmeta/database")

    def fetch_datasets(self):
        """
        Fetch basic information about the available datasets on the server.
        """
        return self._fetch_json(f"{self.server}/api/dbmeta/datasets")

    def fetch_instances(self):
        """
        Fetch secondary data instances avaiable through neupint http
        """
        return self._fetch_json(f"{self.server}/api/dbmeta/instances")

    def fetch_db_version(self):
        """
        Fetch the database version
        """
        return self._fetch_json(f"{self.server}/api/dbmeta/version")['Version']

    ##
    ## USER
    ##

    def fetch_profile(self):
        """
        Fetch basic information about your user profile,
        including your access level.
        """
        return self._fetch_json(f"{self.server}/profile")

    def fetch_token(self):
        """
        Fetch your user authentication token.

        Note:
            This method just echoes the token back to you for debug purposes.
            To obtain your token for the first time, use the neuprint explorer
            web UI to login and obtain your token as explained elsewhere in
            this documentation.
        """
        return self._fetch_json(f"{self.server}/token")['token']

    ##
    ## Cached
    ##

    def fetch_daily_type(self, format='pandas'):
        """
        Return information about today's cell type of the day.

        The server updates the completeness numbers each day. A different
        cell type is randomly picked and an exemplar is chosen
        from this type.

        Returns:
            If ``format='json'``, a dictionary is returned with keys
            ``['info', 'connectivity', 'skeleton']``.
            If ``format='pandas'``, three values are returned:
            ``(info, connectivity, skeleton)``, where ``connectivity``
            and ``skeleton`` are DataFrames.
        """
        assert format in ('json', 'pandas')
        url = f"{self.server}/api/cached/dailytype?dataset={self.dataset}"
        result = self._fetch_json(url, ispost=False)
        if format == 'json':
            return result

        conn_df = pd.DataFrame(result['connectivity']['data'],
                               columns=result['connectivity']['columns'])
        skel_df = pd.DataFrame(result['skeleton']['data'],
                               columns=result['skeleton']['columns'])

        return result['info'], conn_df, skel_df

    def fetch_roi_completeness(self, format='pandas'):
        """
        Fetch the pre-computed traced "completeness" statistics
        for each primary ROI in the dataset.

        The completeness statistics indicate how many synapses
        belong to Traced neurons.

        Note:
            These results are not computed on-the-fly.
            They are computed periodically and cached.
        """
        assert format in ('json', 'pandas')
        url = f"{self.server}/api/cached/roicompleteness?dataset={self.dataset}"
        result = self._fetch_json(url, ispost=False)
        if format == 'json':
            return result

        df = pd.DataFrame(result['data'], columns=result['columns'])
        return df

    def fetch_roi_connectivity(self, format='pandas'):
        """
        Fetch the pre-computed connectivity statistics
        between primary ROIs in the dataset.

        Note:
            These results are not computed on-the-fly.
            They are computed periodically and cached.
        """
        assert format in ('json', 'pandas')
        url = f"{self.server}/api/cached/roiconnectivity?dataset={self.dataset}"
        result = self._fetch_json(url, ispost=False)
        if format == 'json':
            return result

        # Example result:
        # {
        #    "roi_names": [['ME(R)', "a'L(L)", 'aL(L)', ...]],
        #    "weights": {
        #       'EPA(R)=>gL(L)': {'count': 7, 'weight': 1.253483174941712},
        #       'EPA(R)=>gL(R)': {'count': 29, 'weight': 2.112117795621343},
        #       'FB=>AB(L)': {'count': 62, 'weight': 230.11732347331355},
        #       'FB=>AB(R)': {'count': 110, 'weight': 496.733276906109},
        #       ...
        #    }
        # }

        weights = [(*k.split('=>'), v['count'], v['weight']) for k,v in result["weights"].items()]
        df = pd.DataFrame(weights, columns=['from_roi', 'to_roi', 'count', 'weight'])
        return df

    ##
    ## ROI MESHES
    ##
    def fetch_roi_mesh(self, roi, export_path=None):
        """
        Fetch a mesh for the given ROI, in ``.obj`` format.

        Args:
            roi:
                Name of an ROI
            export_path:
                Optional. Writes the ``.obj`` file to the given path.

        Returns:
            bytes
            The contents of the fetched ``.obj`` mesh file.

        Note:
            ROI meshes are intended for visualization only.
            (They are not suitable for quantitative analysis.)
        """
        url = f"{self.server}/api/roimeshes/mesh/{self.dataset}/{roi}"
        data = self._fetch_raw(url, ispost=False)

        if export_path:
            with open(export_path, 'wb') as f:
                f.write(data)
        return data

    ##
    ## SKELETONS
    ##
    def fetch_skeleton(self, body, heal=False, export_path=None, format='pandas', with_distances=False):
        """
        Fetch the skeleton for a neuron or segment.

        Args:

            body (int):
                A neuron or segment ID

            heal (bool):
                If ``True`` and the skeleton is fragmented, 'heal' it by connecting
                its fragments into a single tree. The fragments are joined by
                selecting the minimum spanning tree after joining all fragments
                via their pairwise nearest neighbors. See :py:func:`.heal_skeleton()`
                for more details.

                If you want the healing procedure to refrain from connecting very
                distant fragments, set ``heal`` to a maximum allowed distance,
                e.g. ``heal=1000.0``

            format (str):
                Either 'pandas', 'swc' (similar to CSV), or 'nx' (``networkx.DiGraph``).

            export_path (str):
                Optional. Writes the ``.swc`` file to disk.
                (SWC format is written, regardless of the returned ``format``.)

            with_distances:
                Only valid when format is ``pandas`` or ``nx``.
                If True, a 'distance' column (or edge attribute) will be added
                to the dataframe (or nx.Graph), indicating the distances from each
                node to its parent node.
                In DataFrame results, root nodes will be assigned a distance of ``np.inf``.
                Distances are computed AFTER healing is performed.
                Distances will not be present in any exported SWC file.

        Returns:

            Either a string (swc), a DataFrame (pandas), or ``networkx.DiGraph`` (nx).

        See also:

            - :py:func:`.heal_skeleton()`
            - :py:func:`.skeleton_df_to_nx()`
            - :py:func:`.skeleton_df_to_swc()`
        """
        from .skeleton import skeleton_df_to_nx, heal_skeleton, skeleton_df_to_swc, skeleton_swc_to_df, calc_segment_distances

        try:
            body = int(body)
        except ValueError:
            raise RuntimeError(f"Please pass an integer body ID, not '{body}'")

        assert format in ('swc', 'pandas', 'nx'), f'Invalid format: {format}'
        assert not with_distances or format in ('pandas', 'nx'), \
            f"The with_distances option can only be used with the 'pandas' or 'nx' output formats, not {format}"

        url = f"{self.server}/api/skeletons/skeleton/{self.dataset}/{body}?format=swc"
        swc = self._fetch_raw(url, ispost=False).decode('utf-8')

        if heal or format != 'swc':
            df = skeleton_swc_to_df(swc)

        if heal:
            df = heal_skeleton(df, heal)
            if export_path or format == 'swc':
                swc = skeleton_df_to_swc(df)

        if export_path:
            with open(export_path, 'w') as f:
                f.write(swc)

        if format == 'swc':
            return swc

        if format == 'pandas':
            if with_distances:
                df['distance'] = calc_segment_distances(df)
            return df

        if format == 'nx':
            return skeleton_df_to_nx(df, with_distances=with_distances)

        raise AssertionError('Should not get here.')

    ##
    ## RAW KEY-VALUE
    ##
    def fetch_raw_keyvalue(self, instance, key):
        """
        Fetch a value from the ``neuprintHTTP`` server.
        The data address is given by both the instance name and key.
        (For admins and experts only.)
        """
        url = f"{self.server}/api/raw/keyvalue/key/{instance}/{key}"
        return self._fetch_raw(url, ispost=False)

    def post_raw_keyvalue(self, instance, key, value):
        """
        Post a value from the ``neuprintHTTP`` server.
        The data address is given by both the instance name and key.
        (For admins and experts only.)
        """
        assert isinstance(value, bytes)
        url = f"{self.server}/api/raw/keyvalue/key/{instance}/{key}"
        r = self.session.post(url, data=value, verify=self.verify)
        r.raise_for_status()

コード例 #52

0

ファイルを表示

class TVsubtitlesProvider(Provider):
    """TVsubtitles Provider."""
    languages = {Language('por', 'BR')} | {
        Language(l)
        for l in [
            'ara', 'bul', 'ces', 'dan', 'deu', 'ell', 'eng', 'fin', 'fra',
            'hun', 'ita', 'jpn', 'kor', 'nld', 'pol', 'por', 'ron', 'rus',
            'spa', 'swe', 'tur', 'ukr', 'zho'
        ]
    }
    video_types = (Episode, )
    server_url = 'http://www.tvsubtitles.net/'
    subtitle_class = TVsubtitlesSubtitle

    def __init__(self):
        self.session = None

    def initialize(self):
        self.session = Session()
        self.session.headers[
            'User-Agent'] = 'Subliminal/%s' % __short_version__

    def terminate(self):
        self.session.close()

    @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
    def search_show_id(self, series, year=None):
        """Search the show id from the `series` and `year`.

        :param str series: series of the episode.
        :param year: year of the series, if any.
        :type year: int
        :return: the show id, if any.
        :rtype: int

        """
        # make the search
        logger.info('Searching show id for %r', series)
        r = self.session.post(self.server_url + 'search.php',
                              data={'q': series},
                              timeout=10)
        r.raise_for_status()

        # get the series out of the suggestions
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
        show_id = None
        for suggestion in soup.select('div.left li div a[href^="/tvshow-"]'):
            match = link_re.match(suggestion.text)
            if not match:
                logger.error('Failed to match %s', suggestion.text)
                continue

            if match.group('series').lower() == series.lower():
                if year is not None and int(match.group('first_year')) != year:
                    logger.debug('Year does not match')
                    continue
                show_id = int(suggestion['href'][8:-5])
                logger.debug('Found show id %d', show_id)
                break

        return show_id

    @region.cache_on_arguments(expiration_time=EPISODE_EXPIRATION_TIME)
    def get_episode_ids(self, show_id, season):
        """Get episode ids from the show id and the season.

        :param int show_id: show id.
        :param int season: season of the episode.
        :return: episode ids per episode number.
        :rtype: dict

        """
        # get the page of the season of the show
        logger.info('Getting the page of show id %d, season %d', show_id,
                    season)
        r = self.session.get(self.server_url + 'tvshow-%d-%d.html' %
                             (show_id, season),
                             timeout=10)
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # loop over episode rows
        episode_ids = {}
        for row in soup.select('table#table5 tr'):
            # skip rows that do not have a link to the episode page
            if not row('a', href=episode_id_re):
                continue

            # extract data from the cells
            cells = row('td')
            episode = int(cells[0].text.split('x')[1])
            episode_id = int(cells[1].a['href'][8:-5])
            episode_ids[episode] = episode_id

        if episode_ids:
            logger.debug('Found episode ids %r', episode_ids)
        else:
            logger.warning('No episode ids found')

        return episode_ids

    def query(self, show_id, series, season, episode, year=None):
        # get the episode ids
        episode_ids = self.get_episode_ids(show_id, season)
        if episode not in episode_ids:
            logger.error('Episode %d not found', episode)
            return []

        # get the episode page
        logger.info('Getting the page for episode %d', episode_ids[episode])
        r = self.session.get(self.server_url +
                             'episode-%d.html' % episode_ids[episode],
                             timeout=10)
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # loop over subtitles rows
        subtitles = []
        for row in soup.select('.subtitlen'):
            # read the item
            language = Language.fromtvsubtitles(row.h5.img['src'][13:-4])
            subtitle_id = int(row.parent['href'][10:-5])
            page_link = self.server_url + 'subtitle-%d.html' % subtitle_id
            rip = row.find('p', title='rip').text.strip() or None
            release = row.find('h5').text.strip() or None

            subtitle = self.subtitle_class(language, page_link, subtitle_id,
                                           series, season, episode, year, rip,
                                           release)
            logger.debug('Found subtitle %s', subtitle)
            subtitles.append(subtitle)

        return subtitles

    def list_subtitles(self, video, languages):
        # lookup show_id
        titles = [video.series] + video.alternative_series
        show_id = None
        for title in titles:
            show_id = self.search_show_id(title, video.year)
            if show_id is not None:
                break

        # query for subtitles with the show_id
        if show_id is not None:
            subtitles = [
                s for s in self.query(show_id, title, video.season,
                                      video.episode, video.year)
                if s.language in languages and s.episode == video.episode
            ]
            if subtitles:
                return subtitles
        else:
            logger.error('No show id found for %r (%r)', video.series,
                         {'year': video.year})

        return []

    def download_subtitle(self, subtitle):
        # download as a zip
        logger.info('Downloading subtitle %r', subtitle)
        r = self.session.get(self.server_url +
                             'download-%d.html' % subtitle.subtitle_id,
                             timeout=10)
        r.raise_for_status()

        # open the zip
        with ZipFile(io.BytesIO(r.content)) as zf:
            if len(zf.namelist()) > 1:
                raise ProviderError('More than one file to unzip')

            subtitle.content = fix_line_ending(zf.read(zf.namelist()[0]))

コード例 #53

0

ファイルを表示

def main():
    sock = None
    udp_remote_address = None
    rxbuff = list()  # UDP incoming buffer
    txbuff = list()  # UDP outcoming buffer

    # Loop with authentication to TecoRoute service
    while True:
        try:
            user = '******'
            password = '******'
            plc = 'L2_0202'
            session = Session()
            rs = session.get('http://77.236.203.188:61682/INDEX.XML',
                             headers={
                                 'User-Agent': 'tecoroute',
                                 'x-aplic': 'AKRCON tecoroute',
                                 's-tcm': 'NT_Key',
                                 'n-user': tc_secret(user)
                             })
            print(rs.content)
            hash1 = sha1((rs.text[:8] + password).encode()).hexdigest().upper()
            rs = session.put('http://77.236.203.188:61682/IAM.TXT',
                             data=hash1 + '\r\n',
                             headers={'User-Agent': 'tecoroute'})
            print(rs.content)
            rs = session.put('http://77.236.203.188:61682/PLC.TXT',
                             data=tc_secret(plc) + '\r\n',
                             headers={'User-Agent': 'tecoroute'})
            print(rs.content)

            sock = socket.socket(type=socket.SOCK_DGRAM)
            sock.setblocking(False)
            sock.bind(('', 50000))

            sleep(10)

            # Loop for send/receive data
            while True:
                # Receive UDP data
                receiving = True
                while receiving:
                    try:
                        data, udp_remote_address = sock.recvfrom(
                            65507)  # Max UDP packet size
                    except BlockingIOError:
                        receiving = False
                    else:
                        rxbuff.append(data)

                # Send UDP data
                if udp_remote_address and txbuff:
                    for data in txbuff:
                        sock.sendto(data, udp_remote_address)
                    del txbuff[:]

                rxdata = b''
                for data in rxbuff:
                    rxdata += tc_encode(data)
                #if not rxdata:
                #    rxdata = b"\x01\x00\x01\x00\xf9\xf8\xe7\xe6\x81\x17\x16\xff"
                print('Sending', rxdata[:10])
                rs = session.get(
                    'http://77.236.203.188:61682/DATA.BIN',
                    data=rxdata,
                    headers={
                        'Cache-Control': 'no-cache',
                        'Content-Type': 'binary',
                        'User-Agent':
                        'Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko',
                        'u-tcm': 'U-TCM'
                    })
                txbuff.append(tc_decode(rs.content))
                print('Received', txbuff[-1][:10])

                sleep(1)

        except ConnectionError as e:
            if sock:
                sock.close()
            raise e

        sleep(10)

コード例 #54

0

ファイルを表示

ファイル: status.py プロジェクト: seqre/ITutils

def get_remaining_meals(s: Session):
    page = bs(s.get(meal_url).content, 'html.parser')
    status = page.find_all('td', attrs={'data-title': 'Status:'})
    any10 = re.search(r'.*=([0-9]+)', status[0].text).group(1)
    meal50 = re.search(r'.*= ([0-9]+)', status[1].text).group(1)
    return any10, meal50

コード例 #55

0

ファイルを表示

ファイル: dnac_helper.py プロジェクト: panarahc/DNAC-PNP-RMA-App

class DNACHelper(object):
    """Utility class for interacting with Onboarding service on DNA-C. The only
    class level attribute stored is the Northbound REST API client.
    """

    address = ''
    user = ''
    password = ''

    client = None

    _rest_base_api = 'api/v1'
    _auth_uri = 'api/system/v1/auth/login'

    def __init__(self, address='', user='', password=''):
        """Initializes NB REST API client for DNA-C. Performs initial auth login
        and token exchange.

        Specify DNA-C instance by providing pyATS device instance OR all of the
        following: address, user, password.

        Args:
            device (Device, optional): pyATS device object for DNA-C cluster
            address (str, optional): DNA-C address
            user (str, optional): DNA-C login user
            password (str, optional): DNA-C login password

        Raises:
            ConnectionError: failed to authenticate client

        """
        self.address = address
        self.user = user
        self.password = password
        self._create_client()

    def _create_client(self):
        """Initializes REST client

        Raises:
            ConnectionError: failed to authenticate client

        """
        self.client = Session()
        # Disable server authentication from client-side
        self.client.verify = False
        self._gen_token()

    def _gen_token(self):
        """Performs auth login, extracts JWT, and sets JWT in header"""
        # Set login authorization required for token generation
        self.client.auth = HTTPBasicAuth(self.user, self.password)
        # Attempt login
        self.client.headers.update({'Content-Type': 'application/json'})
        resp = self.client.get('https://{}/{}'.format(self.address,
                                                      self._auth_uri))
        if (resp.status_code != 200) or ('set-cookie' not in resp.headers):
            logger.error('Failed to initialize client')
            logger.debug(resp)
            raise ConnectionRefusedError("HTTP Status %s" % resp.status_code)
        # Set session cookie to JWT retrieved from response header
        cookie = SimpleCookie()
        cookie.load(resp.headers['set-cookie'])
        client_cookies = {key: morsel.value for key, morsel in cookie.items()}
        self.client.cookies.update(client_cookies)

    def _call_api(self,
                  request_type,
                  path,
                  params=None,
                  data=None,
                  files=None):
        """Calls REST API with provided information

        Args:
            request_type (str): GET, POST, PUT, DELETE, HEAD, and OPTIONS
            path (str): REST API path (e.g. onboarding/pnp-device)
            params (dict, optional): Dictionary to send in the query string
            data (dict, optional): Dictionary to send in the body of the Request

        Returns:
            requests.Response: requests response object
            None: upon call failure

        """
        url = 'https://{}/{}/{}'.format(self.address, self._rest_base_api,
                                        path)
        if not hasattr(self.client, request_type.lower()):
            raise ValueError("request_type (%s) unsupported" % request_type)
        send_kwargs = dict(url=url, params=params, data=data, files=files)
        if files:
            if isinstance(send_kwargs["files"], dict):
                fd = send_kwargs["files"]
                if len(fd) == 1 and isinstance(list(fd.values())[0], tuple):
                    ft = fd[list(fd.keys())[0]]
                    fd[list(fd.keys())[0]] = ft[:1] + (open(ft[1],
                                                            "rb"), ) + ft[2:]
                else:
                    send_kwargs["files"] = {
                        key: open(val, "rb")
                        for key, val in fd.items()
                    }

            if isinstance(send_kwargs["files"], str):
                send_kwargs["files"] = open(send_kwargs["files"], "rb")

            if self.client.headers.get("Content-Type") == "application/json":
                self.client.headers.pop("Content-Type")
        response = getattr(self.client, request_type.lower())(**send_kwargs)
        self.client.headers.update({"Content-Type": "application/json"})
        if not response or response.status_code not in (200, 204):
            logger.error("API call failed")
            logger.debug(response)
            return None
        return response

    def get_device(self, serialnumber=None, state=None, limit=None):
        request_type = 'GET'
        path = 'onboarding/pnp-device'
        payload = {}
        if serialnumber:
            payload.update(serialNumber=serialnumber)
        if state:
            payload.update(state=state)
        if state:
            payload.update(limit=1000)
        response = self._call_api(request_type, path, params=payload)
        if not response or response.status_code not in (200, 204):
            return ''
        response_body = response.json()
        if not response_body:
            return ''
        return response_body

    def delete_device(self, device_id):
        """Deletes the specified device from DNA-C database

        API: onboarding/pnp-device

        Args:
            device_id (str): Device ID

        Returns:
            bool: True if deleted successfully, False otherwise

        """
        request_type = 'DELETE'
        path = 'onboarding/pnp-device/{deviceId}'.format(deviceId=device_id)
        response = self._call_api(request_type, path)
        if not response or response.status_code not in (200, 204):
            return False
        return True

    def post_file(self, namespace, file):
        request_type = 'POST'
        path = 'file/' + namespace
        data = {}
        files = {"fileUpload": file}
        # self.client.headers['Content-Type']='multipart/form-data'
        response = self._call_api(request_type, path, files=files)
        if not response or response.status_code not in (200, 204):
            return ''
        response_body = response.json()
        if not response_body:
            return ''
        return response_body

    def get_files(self, namespace):
        request_type = 'GET'
        path = 'file/namespace/' + namespace
        response = self._call_api(request_type, path)
        if not response or response.status_code not in (200, 204):
            return ""
        return response.json()['response']

    def post_workflow(self, workflow):
        request_type = 'POST'
        path = 'onboarding/pnp-workflow'
        response = self._call_api(request_type,
                                  path,
                                  data=json.dumps(workflow))
        if not response or response.status_code not in (200, 204):
            return response
        return response.json()

    def post_project(self, project):
        request_type = 'POST'
        path = 'onboarding/pnp-project'
        response = self._call_api(request_type, path, data=json.dumps(project))
        if not response or response.status_code not in (200, 204):
            return response
        return response.json()

    def post_device_claim(self, claim_data):
        request_type = 'POST'
        path = 'onboarding/pnp-device/claim'
        response = self._call_api(request_type,
                                  path,
                                  data=json.dumps(claim_data))
        if not response or response.status_code not in (200, 204):
            return response
        return response.json()

    def delete_config(self, id):
        request_type = 'DELETE'
        path = 'file/{fileId}'.format(fileId=id)
        response = self._call_api(request_type, path)

コード例 #56

0

ファイルを表示

    def close(self):
        pass


if __name__ == '__main__':
    #main()
    d = b"\x09\x08\x05\x04\x03\xcc\x63\x32\x01\x98\x97\x95\x28\x1c\x18\x03\xc5\xc0\xbc\xa3\x65\x60\x5c\x3f\x01\xfc\xf8\xd7\x99\x94\x90\x6b\x2d\x2b\x27\xf1\xb3\xae\xaa\x70\x32\x2d\x29\xeb\xad\xab\xa7\x68\x2a\x28\x24\xe2\xa4\x9f\x9b\x55\x17\x12\xbb\xa4"
    print(tc_decode(d + d))
    exit()

    ses = Session()
    ses.mount('http+teco', TecoHttpAdapter())
    #rs = ses.get('http+teco://77.236.203.188:61682/INDEX.XML',
    #                         headers={'User-Agent': 'tecoroute', 'x-aplic': 'AKRCON tecoroute', 's-tcm': 'NT_Key',
    #                                  'n-user': tc_secret('TRCtest')})
    rs = ses.get('http+teco://www.example.com')
    print(rs)
"""string = b"\x01\x00\xfd\xfc\xfb\xec\x83\x7a\x71\x08\x07\x05\x98\x8c\x88\xa9\x82\x4f\x9d\x86"
dec = tc_decode(string)
for i in range(len(dec)):
    print(i, hex(dec[i]))
exit()"""
"""# These two lines enable debugging at httplib level (requests->urllib3->http.client)
# You will see the REQUEST, including HEADERS and DATA, and RESPONSE with HEADERS but without DATA.
# The only thing missing will be the response.body which is not logged.
try:
    import http.client as http_client
except ImportError:
    # Python 2
    import httplib as http_client
http_client.HTTPConnection.debuglevel = 1

コード例 #57

0

ファイルを表示

ファイル: legendastv.py プロジェクト: dantebarba/docker-media-server

class LegendasTVProvider(Provider):
    """LegendasTV Provider.

    :param str username: username.
    :param str password: password.
    """

    languages = {
        Language.fromlegendastv(l)
        for l in language_converters['legendastv'].codes
    }
    server_url = 'http://legendas.tv/'
    subtitle_class = LegendasTVSubtitle

    def __init__(self, username=None, password=None):

        # Provider needs UNRAR installed. If not available raise ConfigurationError
        try:
            rarfile.custom_check(rarfile.UNRAR_TOOL)
        except rarfile.RarExecError:
            raise ConfigurationError('UNRAR tool not available')

        if any((username, password)) and not all((username, password)):
            raise ConfigurationError('Username and password must be specified')

        self.username = username
        self.password = password
        self.logged_in = False
        self.session = None

    def initialize(self):
        self.session = Session()
        self.session.headers[
            'User-Agent'] = 'Subliminal/%s' % __short_version__

        # login
        if self.username and self.password:
            logger.info('Logging in')
            data = {
                '_method': 'POST',
                'data[User][username]': self.username,
                'data[User][password]': self.password
            }
            r = self.session.post(self.server_url + 'login',
                                  data,
                                  allow_redirects=False,
                                  timeout=10)
            raise_for_status(r)

            soup = ParserBeautifulSoup(r.content, ['html.parser'])
            if soup.find('div', {'class': 'alert-error'},
                         string=re.compile(u'Usuário ou senha inválidos')):
                raise AuthenticationError(self.username)

            logger.debug('Logged in')
            self.logged_in = True

    def terminate(self):
        # logout
        if self.logged_in:
            logger.info('Logging out')
            r = self.session.get(self.server_url + 'users/logout',
                                 allow_redirects=False,
                                 timeout=10)
            raise_for_status(r)
            logger.debug('Logged out')
            self.logged_in = False

        self.session.close()

    @staticmethod
    def is_valid_title(title, title_id, sanitized_title, season, year):
        """Check if is a valid title."""
        sanitized_result = sanitize(title['title'])
        if sanitized_result != sanitized_title:
            logger.debug("Mismatched title, discarding title %d (%s)",
                         title_id, sanitized_result)
            return

        # episode type
        if season:
            # discard mismatches on type
            if title['type'] != 'episode':
                logger.debug(
                    "Mismatched 'episode' type, discarding title %d (%s)",
                    title_id, sanitized_result)
                return

            # discard mismatches on season
            if 'season' not in title or title['season'] != season:
                logger.debug('Mismatched season %s, discarding title %d (%s)',
                             title.get('season'), title_id, sanitized_result)
                return
        # movie type
        else:
            # discard mismatches on type
            if title['type'] != 'movie':
                logger.debug(
                    "Mismatched 'movie' type, discarding title %d (%s)",
                    title_id, sanitized_result)
                return

            # discard mismatches on year
            if year is not None and 'year' in title and title['year'] != year:
                logger.debug("Mismatched movie year, discarding title %d (%s)",
                             title_id, sanitized_result)
                return
        return True

    @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME,
                               should_cache_fn=lambda value: value)
    def search_titles(self, title, season, title_year):
        """Search for titles matching the `title`.

        For episodes, each season has it own title
        :param str title: the title to search for.
        :param int season: season of the title
        :param int title_year: year of the title
        :return: found titles.
        :rtype: dict
        """
        titles = {}
        sanitized_titles = [sanitize(title)]
        ignore_characters = {'\'', '.'}
        if any(c in title for c in ignore_characters):
            sanitized_titles.append(
                sanitize(title, ignore_characters=ignore_characters))

        for sanitized_title in sanitized_titles:
            # make the query
            if season:
                logger.info('Searching episode title %r for season %r',
                            sanitized_title, season)
            else:
                logger.info('Searching movie title %r', sanitized_title)

            r = self.session.get(self.server_url +
                                 'legenda/sugestao/{}'.format(sanitized_title),
                                 timeout=10)
            raise_for_status(r)
            results = json.loads(r.text)

            # loop over results
            for result in results:
                source = result['_source']

                # extract id
                title_id = int(source['id_filme'])

                # extract type
                title = {'type': type_map[source['tipo']]}

                # extract title, year and country
                name, year, country = title_re.match(
                    source['dsc_nome']).groups()
                title['title'] = name

                # extract imdb_id
                if source['id_imdb'] != '0':
                    if not source['id_imdb'].startswith('tt'):
                        title['imdb_id'] = 'tt' + source['id_imdb'].zfill(7)
                    else:
                        title['imdb_id'] = source['id_imdb']

                # extract season
                if title['type'] == 'episode':
                    if source['temporada'] and source['temporada'].isdigit():
                        title['season'] = int(source['temporada'])
                    else:
                        match = season_re.search(source['dsc_nome_br'])
                        if match:
                            title['season'] = int(match.group('season'))
                        else:
                            logger.debug(
                                'No season detected for title %d (%s)',
                                title_id, name)

                # extract year
                if year:
                    title['year'] = int(year)
                elif source['dsc_data_lancamento'] and source[
                        'dsc_data_lancamento'].isdigit():
                    # year is based on season air date hence the adjustment
                    title['year'] = int(
                        source['dsc_data_lancamento']) - title.get(
                            'season', 1) + 1

                # add title only if is valid
                # Check against title without ignored chars
                if self.is_valid_title(title, title_id, sanitized_titles[0],
                                       season, title_year):
                    titles[title_id] = title

            logger.debug('Found %d titles', len(titles))

        return titles

    @region.cache_on_arguments(
        expiration_time=timedelta(minutes=15).total_seconds())
    def get_archives(self, title_id, language_code, title_type, season,
                     episode):
        """Get the archive list from a given `title_id`, `language_code`, `title_type`, `season` and `episode`.

        :param int title_id: title id.
        :param int language_code: language code.
        :param str title_type: episode or movie
        :param int season: season
        :param int episode: episode
        :return: the archives.
        :rtype: list of :class:`LegendasTVArchive`

        """
        archives = []
        page = 0
        while True:
            # get the archive page
            url = self.server_url + 'legenda/busca/-/{language}/-/{page}/{title}'.format(
                language=language_code, page=page, title=title_id)
            r = self.session.get(url)
            raise_for_status(r)

            # parse the results
            soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
            for archive_soup in soup.select(
                    'div.list_element > article > div > div.f_left'):
                # create archive
                archive = LegendasTVArchive(
                    archive_soup.a['href'].split('/')[2], archive_soup.a.text,
                    'pack' in archive_soup.parent['class'], 'destaque'
                    in archive_soup.parent['class'],
                    self.server_url + archive_soup.a['href'][1:])
                # clean name of path separators and pack flags
                clean_name = archive.name.replace('/', '-')
                if archive.pack and clean_name.startswith('(p)'):
                    clean_name = clean_name[3:]

                # guess from name
                guess = guessit(clean_name, {'type': title_type})

                # episode
                if season and episode:
                    # discard mismatches on episode in non-pack archives

                    # Guessit may return int for single episode or list for multi-episode
                    # Check if archive name has multiple episodes releases on it
                    if not archive.pack and 'episode' in guess:
                        wanted_episode = set(episode) if isinstance(
                            episode, list) else {episode}
                        archive_episode = guess['episode'] if isinstance(
                            guess['episode'], list) else {guess['episode']}

                        if not wanted_episode.intersection(archive_episode):
                            logger.debug(
                                'Mismatched episode %s, discarding archive: %s',
                                guess['episode'], clean_name)
                            continue

                # extract text containing downloads, rating and timestamp
                data_text = archive_soup.find('p', class_='data').text

                # match downloads
                archive.downloads = int(
                    downloads_re.search(data_text).group('downloads'))

                # match rating
                match = rating_re.search(data_text)
                if match:
                    archive.rating = int(match.group('rating'))

                # match timestamp and validate it
                time_data = {
                    k: int(v)
                    for k, v in timestamp_re.search(
                        data_text).groupdict().items()
                }
                archive.timestamp = pytz.timezone(
                    'America/Sao_Paulo').localize(datetime(**time_data))
                if archive.timestamp > datetime.utcnow().replace(
                        tzinfo=pytz.utc):
                    raise ProviderError('Archive timestamp is in the future')

                # add archive
                logger.info(
                    'Found archive for title %d and language %d at page %s: %s',
                    title_id, language_code, page, archive)
                archives.append(archive)

            # stop on last page
            if soup.find('a',
                         attrs={'class': 'load_more'},
                         string='carregar mais') is None:
                break

            # increment page count
            page += 1

        logger.debug('Found %d archives', len(archives))

        return archives

    def download_archive(self, archive):
        """Download an archive's :attr:`~LegendasTVArchive.content`.

        :param archive: the archive to download :attr:`~LegendasTVArchive.content` of.
        :type archive: :class:`LegendasTVArchive`

        """
        logger.info('Downloading archive %s', archive.id)
        r = self.session.get(self.server_url +
                             'downloadarquivo/{}'.format(archive.id))
        raise_for_status(r)

        # open the archive
        archive_stream = io.BytesIO(r.content)
        if is_rarfile(archive_stream):
            logger.debug('Identified rar archive')
            archive.content = RarFile(archive_stream)
        elif is_zipfile(archive_stream):
            logger.debug('Identified zip archive')
            archive.content = ZipFile(archive_stream)
        else:
            raise ValueError('Not a valid archive')

    def query(self, language, title, season=None, episode=None, year=None):
        # search for titles
        titles = self.search_titles(title, season, year)

        subtitles = []
        # iterate over titles
        for title_id, t in titles.items():

            logger.info('Getting archives for title %d and language %d',
                        title_id, language.legendastv)
            archives = self.get_archives(title_id, language.legendastv,
                                         t['type'], season, episode)
            if not archives:
                logger.info('No archives found for title %d and language %d',
                            title_id, language.legendastv)

            # iterate over title's archives
            for a in archives:

                # compute an expiration time based on the archive timestamp
                expiration_time = (datetime.utcnow().replace(tzinfo=pytz.utc) -
                                   a.timestamp).total_seconds()

                # attempt to get the releases from the cache
                cache_key = releases_key.format(archive_id=a.id,
                                                archive_name=a.name)
                releases = region.get(cache_key,
                                      expiration_time=expiration_time)

                # the releases are not in cache or cache is expired
                if releases == NO_VALUE:
                    logger.info('Releases not found in cache')

                    # download archive
                    self.download_archive(a)

                    # extract the releases
                    releases = []
                    for name in a.content.namelist():
                        # discard the legendastv file
                        if name.startswith('Legendas.tv'):
                            continue

                        # discard hidden files
                        if os.path.split(name)[-1].startswith('.'):
                            continue

                        # discard non-subtitle files
                        if not name.lower().endswith(SUBTITLE_EXTENSIONS):
                            continue

                        releases.append(name)

                    # cache the releases
                    region.set(cache_key, releases)

                # iterate over releases
                for r in releases:
                    subtitle = self.subtitle_class(language, t['type'],
                                                   t['title'], t.get('year'),
                                                   t.get('imdb_id'),
                                                   t.get('season'), a, r)
                    logger.debug('Found subtitle %r', subtitle)
                    subtitles.append(subtitle)

        return subtitles

    def list_subtitles(self, video, languages):
        season = episode = None
        if isinstance(video, Episode):
            titles = [video.series] + video.alternative_series
            season = video.season
            episode = video.episode
        else:
            titles = [video.title] + video.alternative_titles

        for title in titles:
            subtitles = [
                s for l in languages for s in self.query(
                    l, title, season=season, episode=episode, year=video.year)
            ]
            if subtitles:
                return subtitles

        return []

    def download_subtitle(self, subtitle):
        # download archive in case we previously hit the releases cache and didn't download it
        if subtitle.archive.content is None:
            self.download_archive(subtitle.archive)

        # extract subtitle's content
        subtitle.content = fix_line_ending(
            subtitle.archive.content.read(subtitle.name))

コード例 #58

0

ファイルを表示

class Kaufland(Shop):

    search_url_prefix = 'https://shop.kaufland.de/search?pageSize=48&sort=relevance&text='

    def __init__(self, email, password, captcha_service, cookie_file="kl_cookies"):
        self.logger = Logger('Kaufland')
        self.captcha_service = captcha_service
        self.base_url = 'https://shop.kaufland.de'
        self.login_url = "https://shop.kaufland.de/login"
        self.account_url = "https://shop.kaufland.de/my-account"
        self.take_url = 'https://shop.kaufland.de/cart/modify'
        self.basket_url = 'https://shop.kaufland.de/cart'

        self.driver = webdriver.PhantomJS(executable_path='/usr/local/bin/phantomjs')
        # self.driver = webdriver.Chrome('./chromedriver')
        self.driver.set_window_size(1280, 1024)

        Shop.__init__(self, email, password, cookie_file)

    @staticmethod
    def search_url(name):
        return Kaufland.search_url_prefix + quote(name.encode('utf-8'))

    def login(self):
        self.logger.info("Logging in...")
        self.session = Session()

        self.driver.get(self.account_url)
        time.sleep(2)

        x = self.driver.find_element_by_id('kLoginForm')
        x.find_element_by_id('j_username').send_keys(self.email)
        x.find_element_by_id('j_password').send_keys(self.password)
        x.find_element_by_tag_name('button').click()
        time.sleep(3)
        self.new_session_with_cookies(self.driver.get_cookies())
        self.save_session()

    def is_logged_in(self, html=None):
        if not html:
            html = self.session.get(self.account_url).text
        return html.find('Abmelden') > 0

    def save_session(self):
        with open(self.cookie_file, 'w') as f:
            pickle.dump(self.session.cookies, f)

    def load_session(self):
        try:
            with open(self.cookie_file) as f:
                cookies = pickle.load(f)
                self.session = Session()
                self.session.cookies = cookies
                return self.is_logged_in()
        except IOError:
            return False

    def get(self, url):
        html = self.session.get(url).text
        if self.is_logged_in(html):
            self.save_session()
            return html

        self.login()
        html = self.session.get(url).text
        if self.is_logged_in(html):
            self.save_session()
            return html

        self.logger.error("Can not log in")
        exit(1)

    def cart(self):
        blob = BeautifulSoup(self.get(self.basket_url), "html.parser")
        r = blob.select('section.product-list')
        if len(r) == 0:
            return []

        r = r[0]
        ids = []
        for i in r.findAll('article'):
            a = i.find('a')
            link = urllib.parse.urljoin(self.base_url, a['href'])
            title = i.find('p', {'class': 'product-list__title'}).text.strip()
            amount = i.find('div', {'class': 'product-list__amount'})
            article_id = amount['data-dynamicblock']
            amount = int(amount.find('input', {'name': 'quantity'}).get('value'))
            price = i.find('div', {'data-dynamiccontent': 'prices'})
            red = price.find('span', {'class': 'product-list__reduced-price'})
            if red:
                price = red
            price = price.text.replace('€', '').strip()
            price = int(float(price) * 100)

            title = unicodedata.normalize('NFKC', title)

            item = ShopItem(article_id, amount, title, price, link)
            ids.append(item)

        return ids

    def search(self, term, sub_term=None):
        html = self.get(Kaufland.search_url(term))
        ids = self.parse_search(html)

        split_terms = [x for x in re.split('-| |\n', term) if len(x) > 1]

        if 0 < len(ids) < 48:
            return self.order_by_matches(split_terms, ids)

        if sub_term and len(ids) == 0:
            return self.search(term + " " + sub_term)

        if len(split_terms) > 1:
            ids = []
            for criteria in split_terms:
                if len(criteria) > 1:
                    ids += self.search(criteria)

        return self.order_by_matches(split_terms, ids, max=20, perfect=0.6, cut_off=0.25)

    def parse_search(self, html):
        blob = BeautifulSoup(html, "html.parser")
        ids = []
        r = blob.select('div.productmatrix')
        if len(r) > 0:
            r = r[0]
            for i in r.findAll('article'):
                a = i.find('a')
                article_id = i['data-dynamicblock'].split('_')[0]
                link = urllib.parse.urljoin(self.base_url, a['href'])
                title = a.find('p', {'class': 'product-tile__infos--title'}).text.strip()
                price = a.find('div', {'class': 'product-tile__price--regular'})
                if not price:
                    price = a.find('div', {'class': 'product-tile__price--reduced'})
                price = price.text.replace('€', '').strip()
                price = int(float(price) * 100)

                title = unicodedata.normalize('NFKC', title)

                item = ShopItem(article_id, 1, title, price, link)
                ids.append(item)
        return ids

    def order_by_matches(self, terms, ids, max=None, perfect=None, cut_off=None):
        if len(ids) == 0:
            return []
        normal_fit = {}
        perfect_fit = {}
        normal_ids = []
        perfect_ids = []
        for item in ids:
            if item in normal_ids or item in perfect_ids:
                continue
            match = len([x for x in terms if x.lower() in item.name.lower()])
            if not cut_off or match > len(terms) * cut_off:
                normal_ids.append(item)
                normal_fit[item] = match
            if perfect and match > len(terms) * perfect:
                perfect_ids.append(item)
                perfect_fit[item] = match

        if len(perfect_fit) > 0:
            normal_ids = perfect_ids
            normal_fit = perfect_fit

        ordered = sorted(normal_ids, key=normal_fit.__getitem__, reverse=True)
        if max:
            ordered = ordered[:max]
        return ordered

    def take(self, item):
        html = self.get(Kaufland.search_url(item.name))
        blob = BeautifulSoup(html, "html.parser")
        token = blob.find('input', {'name': 'CSRFToken'}).get('value')

        self.session.post(self.take_url, data=[
            ('qty', item.amount),
            ('productCodePost', item.article_id),
            ('pageTemplate', 'producttile'),
            ('CSRFToken', token),
        ])
        self.save_session()

    def shelf_life(self, item_link):
        pass

コード例 #59

0

ファイルを表示

class Provider(BaseProvider):
    """
        he.net provider
    """
    def __init__(self, config):
        super(Provider, self).__init__(config)
        self.domain = self.domain
        self.domain_id = None

    def authenticate(self):
        """
        """
        # Create the session GET the login page to retrieve a session cookie
        self.session = Session()
        self.session.get("https://dns.he.net/")

        # Hit the login page with authentication info to login the session
        login_response = self.session.post(
            "https://dns.he.net",
            data={
                "email": self._get_provider_option('auth_username') or '',
                "pass": self._get_provider_option('auth_password') or ''
            })

        # Parse in the HTML, if the div containing the error message is found, error
        html = BeautifulSoup(login_response.content, "html.parser")
        if html.find("div", {"id": "dns_err"}) is not None:
            logger.warning("HE login failed, check HE_USER and HE_PASS")
            return False

        # Make an authenticated GET to the DNS management page
        zones_response = self.session.get("https://dns.he.net")

        html = BeautifulSoup(zones_response.content, "html.parser")
        zone_img = html.find("img", {"name": self.domain, "alt": "delete"})

        # If the tag couldn't be found, error, otherwise, return the value of the tag
        if zone_img is None:
            logger.warning("Domain {0} not found in account".format(
                self.domain))
            raise AssertionError("Domain {0} not found in account".format(
                self.domain))

        self.domain_id = zone_img["value"]
        logger.debug("HENET domain ID: {}".format(self.domain_id))
        return True

    # Create record. If record already exists with the same content, do nothing
    def create_record(self, type, name, content):
        logger.debug("Creating record for zone {0}".format(name))
        # Pull a list of records and check for ours
        records = self.list_records(type=type, name=name, content=content)
        if len(records) >= 1:
            logger.warning("Duplicate record {} {} {}, NOOP".format(
                type, name, content))
            return True
        data = {
            "account": "",
            "menu": "edit_zone",
            "Type": type,
            "hosted_dns_zoneid": self.domain_id,
            "hosted_dns_recordid": "",
            "hosted_dns_editzone": "1",
            "Priority": "",
            "Name": name,
            "Content": content,
            "TTL": "3600",
            "hosted_dns_editrecord": "Submit"
        }
        ttl = self._get_lexicon_option('ttl')
        if ttl:
            if ttl <= 0:
                data['TTL'] = "3600"
            else:
                data['TTL'] = str(ttl)
        prio = self._get_lexicon_option('priority')
        if prio:
            if prio <= 0:
                data['Priority'] = "10"
            else:
                data['Priority'] = str(prio)
        create_response = self.session.post("https://dns.he.net/index.cgi",
                                            data=data)
        # Pull a list of records and check for ours
        records = self.list_records(name=name)
        if len(records) >= 1:
            logger.info("Successfully added record {}".format(name))
            return True
        else:
            logger.info("Failed to add record {}".format(name))
            return False

    # List all records. Return an empty list if no records found.
    # type, name and content are used to filter records.
    # If possible filter during the query, otherwise filter after response is
    # received.
    def list_records(self, type=None, name=None, content=None, id=None):
        records = []
        # Make an authenticated GET to the DNS management page
        edit_response = self.session.get(
            "https://dns.he.net/?hosted_dns_zoneid={0}&menu=edit_zone&hosted_dns_editzone"
            .format(self.domain_id))

        # Parse the HTML response, and list the table rows for DNS records
        html = BeautifulSoup(edit_response.content, "html.parser")

        def is_dns_tr_type(klass):
            return klass and re.compile("dns_tr").search(klass)

        records = html.findAll("tr", class_=is_dns_tr_type)

        # If the tag couldn't be found, error, otherwise, return the value of the tag
        if records is None or len(records) == 0:
            logger.warning("Domains not found in account")
        else:
            new_records = []
            for dns_tr in records:
                tds = dns_tr.findAll("td")
                # Process HTML in the TR children to derive each object
                rec = {}
                rec['zone_id'] = tds[0].string
                rec['id'] = tds[1].string
                rec['name'] = tds[2].string
                # the 4th entry is a comment
                type_elem = tds[3].find("span", class_='rrlabel')
                if type_elem:
                    rec['type'] = type_elem.string
                else:
                    rec['type'] = None
                rec['ttl'] = tds[4].string
                if tds[5].string != '-':
                    rec['priority'] = tds[5]
                rec['content'] = tds[6].string
                if tds[7].string == '1':
                    rec['is_dynamic'] = True
                else:
                    rec['is_dynamic'] = False
                rec = self._clean_TXT_record(rec)
                new_records.append(rec)
            records = new_records
            if id:
                logger.debug("Filtering {} records by id: {}".format(
                    len(records), id))
                records = [record for record in records if record['id'] == id]
            if type:
                logger.debug("Filtering {} records by type: {}".format(
                    len(records), type))
                records = [
                    record for record in records if record['type'] == type
                ]
            if name:
                logger.debug("Filtering {} records by name: {}".format(
                    len(records), name))
                if name.endswith('.'):
                    name = name[:-1]
                records = [
                    record for record in records if name in record['name']
                ]
            if content:
                logger.debug("Filtering {} records by content: {}".format(
                    len(records), content.lower()))
                records = [
                    record for record in records
                    if record['content'].lower() == content.lower()
                ]
            logger.debug("Final records ({}): {}".format(
                len(records), records))
        return records

    # Create or update a record.
    def update_record(self, identifier, type=None, name=None, content=None):
        # Delete record if it exists
        self.delete_record(identifier, type, name, content)
        return self.create_record(type, name, content)

    # Delete an existing record.
    # If record does not exist, do nothing.
    def delete_record(self,
                      identifier=None,
                      type=None,
                      name=None,
                      content=None):
        delete_record_ids = []
        if not identifier:
            records = self.list_records(type, name, content)
            delete_record_ids = [record['id'] for record in records]
        else:
            delete_record_ids.append(identifier)
        logger.debug("Record IDs to delete: {}".format(delete_record_ids))
        for rec_id in delete_record_ids:
            # POST to the DNS management UI with form values to delete the record
            delete_response = self.session.post("https://dns.he.net/index.cgi",
                                                data={
                                                    "menu":
                                                    "edit_zone",
                                                    "hosted_dns_zoneid":
                                                    self.domain_id,
                                                    "hosted_dns_recordid":
                                                    rec_id,
                                                    "hosted_dns_editzone":
                                                    "1",
                                                    "hosted_dns_delrecord":
                                                    "1",
                                                    "hosted_dns_delconfirm":
                                                    "delete"
                                                })

            # Parse the HTML response, if the <div> tag indicating success isn't found, error
            html = BeautifulSoup(delete_response.content, "html.parser")
            if html.find("div", {"id": "dns_status"}) is None:
                logger.warning("Unable to delete record {}".format(rec_id))
                return False
        return True

コード例 #60

0

ファイルを表示

    def get_subway_realtime_position(self,
                                     subway_name: str,
                                     start_index: int = 0,
                                     end_index: int = 1000) -> List[Train]:
        """Get realtime train position for subway line.
        
        Specification: http://data.seoul.go.kr/dataList/OA-12601/A/1/datasetView.do
        
        """
        if self.api_key == SAMPLE_API_KEY:
            start_index = 0
            end_index = 5

        s = Session()
        retries = Retry(status_forcelist=[503])
        s.mount(SUBWAY_BASE_URL, HTTPAdapter(max_retries=retries))
        url = SUBWAY_BASE_URL + SUBWAY_REALTIME_POSITION_URL.format(
            api_key=self.api_key,
            format='json',
            subway_name=subway_name,
            start_index=start_index,
            end_index=end_index,
        )
        r = s.get(url)

        d = r.json()
        if 'realtimePositionList' not in d:
            """
            code: 'INFO-200'
            message: '해당하는 데이터가 없습니다.'

            Returned when the subway line has ended operations for the day.
            """
            if d['status'] == 500 and d['code'] == 'INFO-200':
                return []

            raise Exception(f"{d['code']}: {d['message']}")
        else:
            trains = []
            for t in d['realtimePositionList']:
                data = {
                    'subway_id':
                    t['subwayId'],
                    'subway_name':
                    t['subwayNm'],
                    'station_id':
                    t['statnId'],
                    'station_name':
                    t['statnNm'],
                    'terminal_station_id':
                    t['statnTid'],
                    'terminal_station_name':
                    t['statnTnm'],
                    'number':
                    t['trainNo'],
                    'status':
                    t['trainSttus'],
                    'direction':
                    Direction(int(t['updnLine'])),
                    'updated_at':
                    datetime.strptime(t['recptnDt'], '%Y-%m-%d %H:%M:%S'),
                    'express':
                    t['directAt'] == '1',
                    'last':
                    t['lstcarAt'] == '1',
                }
                trains.append(Train(**data))
            return trains