Example #1
0
def is_internal_url(url, reference):
    '''checks whether the given url is an internal url
    to the reference site'''
    refobj = utils.parse_url(reference)
    urlobj = utils.parse_url(url)
    if not url.startswith('http'):
        raise InvalidUrlError
    return urlobj.origin == refobj.origin
Example #2
0
def absolute_to_relative(url, reference):
    '''converts absolute urls to relative urls based on the
    reference url'''
    if not url.startswith('http'):
        raise InvalidUrlError
    refobj = utils.parse_url(reference)
    urlobj = utils.parse_url(url)
    if urlobj.origin == refobj.origin:
        return urlobj.path
Example #3
0
 def _patch_uris(self, uris, handler="/XMLRPC"):
     #Handles patching the uris when they're in a list.
     if type(uris) == type([]):
         ret = []
         for i in range(len(uris)):
             t = list(utils.parse_url(uris[i]))
             t[2] = handler
             ret.append(utils.unparse_url(t))
     #Handles patching the uri when it's a string.
     else:
         t = list(utils.parse_url(uris))
         t[2] = handler
         ret = utils.unparse_url(t)
     return ret
Example #4
0
 def _patch_uris(self, uris, handler="/XMLRPC"):
     #Handles patching the uris when they're in a list.
     if type(uris) == type([]):
         ret = []
         for i in range(len(uris)):
             t = list(utils.parse_url(uris[i]))
             t[2] = handler
             ret.append(utils.unparse_url(t))
     #Handles patching the uri when it's a string.
     else:
         t = list(utils.parse_url(uris))
         t[2] = handler
         ret = utils.unparse_url(t)
     return ret
Example #5
0
 def test_parse_url(self):
     """Check if parsing URL to components works fine."""
     self.assertEqual(
         utils.parse_url(
             "https://github.com/org_name/repo_name/issues/123"),
         ("org_name/repo_name", "123"),
     )
 def _post(self, url, data=None, headers={}):
     host, port, uri, is_ssl = parse_url(url)
     conn = HTTPSConnection(host, port, timeout=self.timeout)
     headers.update({'Content-type': 'application/x-www-form-urlencoded'})
     conn.request('POST', '/' + uri, urlencode(data) if data else None, headers)
     response = conn.getresponse()
     return conn, response
Example #7
0
 def insert(self, bl_type, url):
     dn, path, qp = utils.parse_url(url)
     dn, path, qp = self.generate_query_parameters(dn, path, qp)
     try:
         self.blacklists[bl_type][dn][path].add(qp)
     except KeyError:
         return
Example #8
0
def save_pdf(s, html, login=False):
    """
    保存pdf到本地
    @params s: requests的session
    @params html: 请求到的网页
    @params login: 是否连接到校园网,默认未连接

    @return 保存pdf的名字
    """
    url_lst = re.findall(r'<a target="_blank" href="&#xA;(.+)">', html)
    name = re.findall(r'id="chTitle">(.+)</span>', html)

    if name:
        res = re.findall(re_words, name[0])
        name = ''.join(s for s in res)

    for url in url_lst:
        if 'pdfdown' in url:
            pdf_url = parse_url(url)
            if name + '.pdf' not in os.listdir():
                print('Download ing')
                if login:
                    url = 'http://gb.oversea.cnki.net'
                else:
                    url = 'http://gb-oversea-cnki-net.wvpn.ncu.edu.cn'
                response = s.get(url + pdf_url, stream=True)
                with open(name + '.pdf', 'wb') as file:
                    for data in response.iter_content(chunk_size=1024):
                        file.write(data)
                print('success')
                return name + '.pdf'
            else:
                print('{} have been in directory'.format(name + '.pdf'))
                return ''
Example #9
0
    def save_page(self, url, html):
        '''saves the dom for the path'''
        try:
            urlobj = utils.parse_url(url)
            hsh = hashlib.sha1(html.encode('utf-8')).hexdigest()

            current = self.db.get_one('''
            select page_id, page_sha1 from page
            where
            site_hostname = %s and
            page_path = %s''', (urlobj.origin, urlobj.path))

            expires = datetime.datetime.now() + datetime.timedelta(0, default_expiry_time) #secs

            logging.info('Saving page: %s', url)
            if not current:
                # TODO paths should always be present beforehand
                self.db.put('''
                insert into page
                (site_hostname, page_path, page_content, page_sha1,
                page_expires, page_expiresevery, page_attempts) values
                (%s, %s, %s, %s, %s, %s, %s)
                ''', (urlobj.origin, urlobj.path, html, hsh, expires, default_expiry_time, 0))
            else:
                pageid = current[0]
                self.db.put('''
                update page
                set
                site_hostname = %s, page_content = %s, page_sha1 = %s,
                page_expires = %s, page_expiresevery = %s, page_attempts = %s
                where
                page_id = %s
                ''', (urlobj.origin, html, hsh, expires, default_expiry_time, pageid, 0))
        except Exception, e:
            logging.error('Could not save page', exc_info=True)
 def cdn_connect(self):
     """
     Setup the http connection instance for the CDN service.
     """
     (host, port, cdn_uri, is_ssl) = parse_url(self.cdn_url)
     self.cdn_connection = self.conn_class(host, port, timeout=self.timeout)
     self.cdn_enabled = True
    def _extract_url_parameters(self, url):
        '''
        strip out the osdd url parameters

        note: not always emitted correctly as param={thing?}. could also be param=thing
              except the param=thing is probably a hardcoded term SO HOW DO WE MANAGE THAT?
              TODO: manage that (ex: ?product=MOD021QA&amp;collection={mp:collection?})

        tuple: (parameter name, namespace(s), param namespace prefix, param type, format)
        '''
        assert url, 'No URL'

        query_params = parse_url(url)
        # deal with the namespaced parameters as [query param key, prefix, type]
        query_params = [[k] + list(self._extract_parameter_type(v))
                        for k, v in query_params.iteritems()]

        return [
            tidy_dict({
                "name": qp[0],
                "namespaces": self.parser._namespaces,
                "prefix": qp[1],
                "type": qp[2],
                "format": self._parameter_formats.get(':'.join(qp[1:]))
            }) for qp in query_params
        ]
Example #12
0
 def cdn_connect(self):
     """
     Setup the http connection instance for the CDN service.
     """
     (host, port, cdn_uri, is_ssl) = parse_url(self.cdn_url)
     self.cdn_connection = self.conn_class(host, port, timeout=self.timeout)
     self.cdn_enabled = True
Example #13
0
def read_dataset(base_dir, play, args):
    data_dir = os.path.join(base_dir, "datasets", "preprocessed", play)
    if args.env == "heroku":
        data_dir = parse_url(data_dir)

    infile_list = pd.read_csv(os.path.join(data_dir, "infile_list.txt"),
                              header=None,
                              names=["infile"]).infile.tolist()

    df_dict = {
        infile.replace(".csv", "").split("_")[0]:
        pd.read_csv(os.path.join(data_dir, infile), index_col=[0])
        for infile in infile_list
    }
    # rename columns
    for team, df in df_dict.items():
        df.columns = [
            c if c in ["Time [s]", "ball_x", "ball_y"] else f"{team}_{c}"
            for c in df.columns
        ]
        df_dict[team] = df

    color_dict = {
        infile.replace(".csv", "").split("_")[0]:
        infile.replace(".csv", "").split("_")[1] if
        not infile.replace(".csv", "").split("_")[1] in ["white", "lightgray"]
        else "black"
        for infile in infile_list
    }

    events_df = pd.read_csv(os.path.join(data_dir, "events.csv"))
    events_df["Team"] = "Liverpool"

    return df_dict, color_dict, events_df
    def _extract_url_parameters(self, url):
        '''
        strip out the osdd url parameters

        note: not always emitted correctly as param={thing?}. could also be param=thing
              except the param=thing is probably a hardcoded term SO HOW DO WE MANAGE THAT?
              TODO: manage that (ex: ?product=MOD021QA&amp;collection={mp:collection?})

        tuple: (parameter name, namespace(s), param namespace prefix, param type, format)
        '''
        assert url, 'No URL'

        query_params = parse_url(url)
        # deal with the namespaced parameters as [query param key, prefix, type]
        query_params = [[k] + list(self._extract_parameter_type(v)) for k, v
                        in query_params.iteritems()]

        return [
            tidy_dict({
                "name": qp[0],
                "namespaces": self.parser._namespaces,
                "prefix": qp[1],
                "type": qp[2],
                "format": self._parameter_formats.get(':'.join(qp[1:]))
            })
            for qp in query_params
        ]
 def _get(self, url, params=None, headers={}):
     host, port, uri, is_ssl = parse_url(url)
     conn = HTTPSConnection(host, port, timeout=self.timeout)
     conn.request('GET', '/' + uri + ('?'+urlencode(params) if params else ''),
                  headers=headers)
     response = conn.getresponse()
     return conn, response
 def __init__(self, username, api_key, authurl=default_authurl):
     self.authurl = authurl
     self.headers = dict()
     self.headers['x-auth-user'] = username
     self.headers['x-auth-key'] = api_key
     self.headers['User-Agent'] = user_agent
     (self.host, self.port, self.uri, self.is_ssl) = parse_url(self.authurl)
     self.conn_class = self.is_ssl and HTTPSConnection or HTTPConnection
 def __init__(self, repo, ref=None, token=None):
     if repo.count('/') > 1:
         _host, _user, _repo = parse_url(repo)
         self.repo = '{}/{}'.format(_user, _repo)
     else:
         self.repo = repo
     self.token = token
     self.ref = ref  # branch/tag/commit
Example #18
0
    def __init__(self):
        dump_argparser = self.build_argparse()
        args = dump_argparser.parse_args()

        self.GQL_ENDPOINT = utils.parse_url(args.url)
        if self.GQL_ENDPOINT is None:
            print("URL {} is not valid!".format(args.url))
            exit(1)
Example #19
0
 def cdn_connect(self):
     """
     Setup the http connection instance for the CDN service.
     """
     (host, port, cdn_uri, is_ssl) = parse_url(self.cdn_url)
     conn_class = is_ssl and HTTPSConnection or HTTPConnection
     self.cdn_connection = conn_class(host, port)
     self.cdn_enabled = True
 def __init__(self, username, api_key, authurl=default_authurl):
     self.authurl = authurl
     self.headers = dict()
     self.headers['x-auth-user'] = username
     self.headers['x-auth-key'] = api_key
     self.headers['User-Agent'] = user_agent
     (self.host, self.port, self.uri, self.is_ssl) = parse_url(self.authurl)
     self.conn_class = self.is_ssl and HTTPSConnection or HTTPConnection
Example #21
0
 def cdn_connect(self):
     """
     Setup the http connection instance for the CDN service.
     """
     (host, port, cdn_uri, is_ssl) = parse_url(self.cdn_url)
     conn_class = is_ssl and HTTPSConnection or HTTPConnection
     self.cdn_connection = conn_class(host, port)
     self.cdn_enabled = True
Example #22
0
def capture(request):
    # Merge both QueryDict into dict
    parameters = dict([(k, v) for k, v in request.GET.items()])
    parameters.update(dict([(k, v) for k, v in request.POST.items()]))

    url = parameters.get('url')
    if not url:
        return HttpResponseBadRequest(_('Missing url parameter'))
    try:
        url = parse_url(request, url)
    except NoReverseMatch:
        error_msg = _("URL '%s' invalid (could not reverse)") % url
        return HttpResponseBadRequest(error_msg)

    method = parameters.get('method', request.method)
    selector = parameters.get('selector')
    data = parameters.get('data')
    waitfor = parameters.get('waitfor')
    wait = parameters.get('wait')
    render = parameters.get('render', 'png')
    size = parameters.get('size')
    crop = parameters.get('crop')

    try:
        width = int(parameters.get('width', ''))
    except ValueError:
        width = None
    try:
        height = int(parameters.get('height', ''))
    except ValueError:
        height = None

    stream = StringIO()
    try:
        casperjs_capture(stream, url, method=method.lower(), width=width,
                         height=height, selector=selector, data=data,
                         size=size, waitfor=waitfor, crop=crop, render=render,
                         wait=wait)
    except CaptureError as e:
        return HttpResponseBadRequest(e)
    except ImportError:
        error_msg = _('Resize not supported (PIL not available)')
        return HttpResponseBadRequest(error_msg)
    except UnsupportedImageFormat:
        error_msg = _('Unsupported image format: %s' % render)
        return HttpResponseBadRequest(error_msg)

    if render == "html":
        response = HttpResponse(mimetype='text/html')
        body = """<html><body onload="window.print();">
                <img src="data:image/png;base64,%s"/></body></html>
                """ % base64.encodestring(stream.getvalue())
        response.write(body)
    else:
        response = HttpResponse(mimetype=image_mimetype(render))
        response.write(stream.getvalue())

    return response
Example #23
0
 def insert(self, payload, url):
     """
     TODO, In order to allow search of query parameters in any order,
     we may be able to use sets. This needs to be investigated.
     For now, we'll sort the query arguments. Yuck!
     """
     dn, path, qp = utils.parse_url(url)
     domain, path_parameters = self.generate_query_parameters(dn, path, qp)
     return self._session.execute(self._insert_query.bind((domain, path_parameters, payload)))
 def __init__(self, repo, ref=None, token=None):
     """Initialize repo."""
     if repo.count('/') > 1:
         _host, _user, _repo = parse_url(repo)
         self.repo = '{}/{}'.format(_user, _repo)
     else:
         self.repo = repo
     self.token = token
     self.ref = ref
Example #25
0
    def get_vedio_info(cls, url):
        """获取视频相关信息,aid,cid,点赞数等等"""
        aid = re.sub('[a-z]+', '', parse_url(url))
        url = "https://api.bilibili.com/x/web-interface/view"
        para = {
            'aid': aid,
        }

        return get_page(url, params=para)
 def __init__(self, repo, ref=None, token=None):
     """Initialize repo."""
     if repo.count('/') > 1:
         _host, _user, _repo = parse_url(repo)
         self.repo = '{}/{}'.format(_user, _repo)
     else:
         self.repo = repo
     self.token = token
     self.ref = ref
def bot():
    for comment in reddit.subreddit(sys.argv[1]).stream.comments():
        if comment.author.name == "Congress_Bill_Bot":
            continue

        urls = utils.find_urls(comment.body)

        if len(urls) > 0:
            bills = []

            for url in urls:
                url = url.replace(")", "")
                if "congress.gov/bill" not in url:
                    continue

                print "\n***************URL*****************"
                print "Working on comment: " + comment.permalink(fast=True)

                print "Working on: " + url
                congress, bill_id = utils.parse_url(url)

                print "Found Congress: " + congress + ", and Bill: " + bill_id

                print "Adding bill to list."
                bill = pp.get_bill(congress, bill_id)
                print "Got Bill titled: " + bill.title

                bills.append(bill)

            if len(bills) > 0:
                print "---------- Working on Bills! ----------"
                reply = ""
                for bill in bills:
                    print "Adding bill to reply: " + bill.title
                    reply = reply + "  \n*****  \n" + utils.format_comment_from_bill(bill)
                    analytics.db_insert(bill, comment.author.name, comment.subreddit, "C")

                comment.reply(reply)
                print "I replied to: https://reddit.com" + comment.permalink()


        elif "+/u/Congress_Bill_Bot [[" in comment.body:
            print "************SUMMONED*************"
            print "Comment: " + comment.permalink(fast=True)

            try:
                congress, bill_id = re.search(r'\[\[(.*?)\]\]', comment.body).group(1).lower().replace(" ", "").replace(".", "").split(",")
                bill = pp.get_bill(congress, bill_id)
                reply = utils.format_comment_from_bill(bill)
                analytics.db_insert(bill, comment.author.name, comment.subreddit, "C")
            
                comment.reply(reply)
            
                print "I replied to: " + comment.permalink()
            except:
                comment.reply("Sorry, I couldn't seem to find that bill.")
 def __init__(self, repo, ref=None, token=None):
     """Initialize repo."""
     if repo.count('/') > 1:
         _host, _user, _repo = parse_url(repo)
         self.repo = '{}/{}'.format(_user, _repo)
     else:
         self.repo = repo
     self.token = token
     self.host = "https://gitlab.cern.ch"
     self.ref = ref  # branch/tag/commit
 def __init__(self, username, api_key, authurl=us_authurl, timeout=15,
              useragent=user_agent):
     self.authurl = authurl
     self.headers = dict()
     self.headers['x-auth-user'] = username
     self.headers['x-auth-key'] = api_key
     self.headers['User-Agent'] = useragent
     self.timeout = timeout
     (self.host, self.port, self.uri, self.is_ssl) = parse_url(self.authurl)
     self.conn_class = get_conn_class(self.is_ssl)
 def __init__(self, repo, ref=None, token=None):
     """Initialize repo."""
     if repo.count('/') > 1:
         _host, _user, _repo = parse_url(repo)
         self.repo = '{}/{}'.format(_user, _repo)
     else:
         self.repo = repo
     self.token = token
     self.host = "https://gitlab.cern.ch"
     self.ref = ref  # branch/tag/commit
Example #31
0
def get_urls(fname):
    # 获取关键词的所有的论文,需要页面上手动保存到本地
    with open('{}/brief.html'.format(fname), 'r', encoding='utf8') as f:
        html = f.read()

    matchs = re.findall(r'(?<=href=\").+?(?=\")|(?<=href=\').+?(?=\')', html)

    urls = [parse_url(url) for url in matchs if 'detail' in url]

    return urls[::2]
Example #32
0
 def delete_page(self, url):
     try:
         urlobj = utils.parse_url(url)
         self.db.put('''
         delete from page
         where page_path = %s
         and
         site_hostname = %s''', (urlobj.path, urlobj.origin))
     except Exception, e:
         logging.error('Could not delete page', exc_info=True)
Example #33
0
 def _authenticate(self):
     """
     Authenticate and setup this instance with the values returned.
     """
     (url, self.cdn_url, self.token) = self.auth.authenticate()
     self.connection_args = parse_url(url)
     self.conn_class = self.connection_args[3] and HTTPSConnection or \
                                                   HTTPConnection
     self.http_connect()
     if self.cdn_url:
         self.cdn_connect()
Example #34
0
    def factory(url, ref=None, token=None):
        host, user, repo = parse_url(url)
        repo_name = "/".join([user, repo])

        if "gitlab" in host:
            gli = GitlabImporter(repo_name, ref, token)
            return gli

        if "github" in host:
            ghi = GithubImporter(repo_name, ref, token)
            return ghi
Example #35
0
    def cdn_connect(self):
        """
        Setup the http connection instance for the CDN service.
        """
        (host, port, cdn_uri, is_ssl) = parse_url(self.cdn_url)
	if version_info[0] <= 2 and version_info[1] < 6:
            self.conn_class = is_ssl and THTTPSConnection or THTTPConnection
        else:
            self.conn_class = is_ssl and HTTPSConnection or HTTPConnection
        self.cdn_connection = self.conn_class(host, port, timeout=self.timeout)
        self.cdn_enabled = True
Example #36
0
 def _authenticate(self):
     """
     Authenticate and setup this instance with the values returned.
     """
     (url, self.cdn_url, self.token) = self.auth.authenticate()
     self.connection_args = parse_url(url)
     self.conn_class = self.connection_args[3] and HTTPSConnection or \
                                                   HTTPConnection
     self.http_connect()
     if self.cdn_url:
         self.cdn_connect()
Example #37
0
    def __init__(self, username, api_key, authurl=default_authurl, timeout=5):
        self.authurl = authurl
        self.headers = dict()
        self.headers['x-auth-user'] = username
        self.headers['x-auth-key'] = api_key
        self.headers['User-Agent'] = user_agent
	self.timeout = timeout
        (self.host, self.port, self.uri, self.is_ssl) = parse_url(self.authurl)
	if version_info[0] <= 2 and version_info[1] < 6:
            self.conn_class = self.is_ssl and THTTPSConnection or THTTPConnection
	else: 
            self.conn_class = self.is_ssl and HTTPSConnection or HTTPConnection
Example #38
0
 def __init__(self, username, api_key, authurl=default_authurl, timeout=5):
     self.authurl = authurl
     self.headers = dict()
     self.headers['x-auth-user'] = username
     self.headers['x-auth-key'] = api_key
     self.headers['User-Agent'] = user_agent
     self.timeout = timeout
     (self.host, self.port, self.uri, self.is_ssl) = parse_url(self.authurl)
     if version_info[0] <= 2 and version_info[1] < 6:
         self.conn_class = self.is_ssl and THTTPSConnection or THTTPConnection
     else:
         self.conn_class = self.is_ssl and HTTPSConnection or HTTPConnection
Example #39
0
def make_query_string(nas_ip, username,method, action, url, source,visited_at):
    url = url.replace("'","")
    names = "nas_ip, username, ip,url, visited_at,method, action "
    values = "'{0}', '{1}','{2}', '{3}', '{4}', '{5}', '{6}' ".format(nas_ip, username, source,url,visited_at,method,action)
    dic = parse_url(url)
    for (k,v) in dic.items():
        names += " ,{0}".format(k) 
        values += " ,'{0}'".format(v)

    query_string = "INSERT INTO weblogs({0}) values({1})".format(names, values);
    # debug(query_string)
    return query_string
Example #40
0
 def lookup_url(self, url):
     dn, path, qp = utils.parse_url(url)
     dn, path, qp = self.generate_query_parameters(dn, path, qp)
     match = [bl_type for bl_type, bl in self.blacklists.iteritems()
                 if qp in bl[dn][path]]
     res = {}
     if match:
         res['status'] = True
         res['blacklists'] = match
     else:
         res['status'] = False
     return res
Example #41
0
    def _authenticate(self):
        """
        Authenticate and setup this instance with the values returned.
        """
        (url, self.token) = self.auth.authenticate()
        self.connection_args = parse_url(url)

        if version_info[0] <= 2 and version_info[1] < 6:
            self.conn_class = self.connection_args[3] and THTTPSConnection or THTTPConnection
        else:
            self.conn_class = self.connection_args[3] and HTTPSConnection or HTTPConnection
        self.http_connect()
Example #42
0
def create_report():
    form = CustomReportForm(request.form)
    if request.method == 'POST' and form.validate():
        custom_report = CustomReport()
        custom_report.name = form.name.data
        custom_report.url = utils.parse_url(form.url.data)
        custom_report.description = form.description.data

        db_session.add(custom_report)
        db_session.commit()
        return redirect(url_for('custom_report.custom_report_index'))

    return render_template('reports/create_report.html', form=form)
Example #43
0
 def get_page(self, url):
     '''gets the dom for the path'''
     try:
         url = utils.parse_url(url)
         content = self.db.get_one('''
         select page_content from page
         where
         page_path = %s and
         site_hostname = %s
         ''', (url.path, url.origin))
         return content
     except Exception, e:
         logging.error('Could not get page', exc_info=True)
Example #44
0
 def proxy_zhimas(self):
     url = 'http://webapi.http.zhimacangku.com/getip?num=1&type=2&pro=' \
           '&city=0&yys=0&port=1&pack=19967&ts=0&ys=0&cs=1&lb=1&sb=0&pb=45&mr=1&regions='
     resp = parse_url(url)
     print(resp)
     html = json.loads(resp)
     code = html.get('code')
     success = html.get('success')
     if code != 0 or success == 'false':
         return
     datas = html.get('data')
     for data in datas:
         yield data.get('ip') + ':' + str(data.get('port'))
Example #45
0
    def _authenticate(self):
        """
        Authenticate and setup this instance with the values returned.
        """
        (url, self.cdn_url, self.token) = self.auth.authenticate()
        url = self._set_storage_url(url)
        self.connection_args = parse_url(url)

        self.conn_class = get_conn_class(self.connection_args[3])

        self.http_connect()
        if self.cdn_url:
            self.cdn_connect()
Example #46
0
def run_command(cmd, internal=False, retval=False, progress=False):
    global _request
    if internal is False and not os.path.exists(config.CONFIG_KEY):
        run_command('get_key', True)
    data = None
    cfg = config.load_user_config()
    url = utils.parse_url(cfg.url)
    if _request:
        req = _request
    else:
        if url['scheme'].lower() == 'https':
            req = https.HTTPSConnection(url['host'], int(url['port'] or 443))
        else:
            req = httplib.HTTPConnection(url['host'], int(url['port'] or 80))
        _request = req
    original_cmd = cmd
    cmd = urllib.quote(json.dumps(cmd))
    query = '{0}run?{1}={2}'.format(url['path'] or '/', 'c' if internal is True else 'q', cmd)
    headers = sign_request(cfg.apikey, 'GET', query)
    headers.update({
        'User-Agent': 'dotcloud/cli (version: {0})'.format(VERSION),
        'X-DotCloud-Version': VERSION
        })
    trace_id = None
    try:
        req.request('GET', query, headers=headers)
        resp = req.getresponse()
        info = resp.getheader('X-Dotcloud-Info')
        trace_id = resp.getheader('X-Dotcloud-TraceID')
        data = resp.read()
        req.close()
        if _export is True:
            print data
            return
        if info:
            utils.warning(info.replace(';', '\n'))
        if _trace and trace_id:
            utils.info('TraceID for "{0}": {1}'.format(
                original_cmd, trace_id))
        data = json.loads(data)
        if data['type'] == 'cmd':
            return run_remote(data['data'])
        if 'data' in data and len(data['data']) > 0:
            if progress:
                sys.stderr.write('\r')
            print data['data']
        elif progress:
            sys.stderr.write('.')
    except socket.error, e:
        utils.die('Cannot reach DotCloud service ("{0}").\n' \
                'Please check the connectivity and try again.'.format(str(e)))
Example #47
0
    def _authenticate(self):
        """
        Authenticate and setup this instance with the values returned.
        """
        (url, self.token) = self.auth.authenticate()
        self.connection_args = parse_url(url)

        if version_info[0] <= 2 and version_info[1] < 6:
            self.conn_class = self.connection_args[3] and THTTPSConnection or \
                                                              THTTPConnection
        else:
            self.conn_class = self.connection_args[3] and HTTPSConnection or \
                                                              HTTPConnection
        self.http_connect()
Example #48
0
def process_site(browser, url):
    while url:
        time.sleep(1) # rate limit

        site, path = utils.parse_url(url)
        try:
            links, source = process_page(browser, url)
        except selenium.common.exceptions.UnexpectedAlertPresentException, e:
            browser.close()
            browser = create_browser()
            continue
        except selenium.common.exceptions.TimeoutException:
            logging.info('Timeout on page %s', url)
            datastore.failed_attempt(url)
            continue
Example #49
0
    def create(url, ref=None):
        host, user, repo = parse_url(url)
        repo_name = "/".join([user, repo])

        if "gitlab" in host:
            from gitlab_importer import GitlabImporter
            token = current_app.config.get('GITLAB_OAUTH_ACCESS_TOKEN')
            gli = GitlabImporter(repo_name, ref, token)
            return gli

        if "github" in host:
            from github_importer import GithubImporter
            #token = current_app.config.get('GITHUB_OAUTH_ACCESS_TOKEN')
            ghi = GithubImporter(repo_name, ref, token=None)
            return ghi
Example #50
0
    def authenticate(self):
        """
        Initiates authentication with the remote service and returns a
        two-tuple containing the storage system URL and session token.
        """
        conn = self.conn_class(self.host, self.port, timeout=self.timeout)
        conn.request('GET', '/' + self.uri, headers=self.headers)
        response = conn.getresponse()
        response.read()

        # A status code of 401 indicates that the supplied credentials
        # were not accepted by the authentication service.
        if response.status == 401:
            raise AuthenticationFailed()

        # Raise an error for any response that is not 2XX
        if response.status // 100 != 2:
            raise ResponseError(response.status, response.reason)

        for hdr in response.getheaders():
            if hdr[0].lower() == "x-auth-token":
                auth_token = hdr[1]
            if hdr[0].lower() == "x-server-management-url":

                (pnetloc, pport,
                 puri, pis_ssl) = parse_url(hdr[1])
                puri = "/" + puri

                _dns_management_host = dns_management_host
                if 'lon.' in pnetloc:
                    _dns_management_host = 'lon.' + _dns_management_host

                dns_management_url = []
                if pis_ssl:
                    dns_management_url.append("https://")
                else:
                    dns_management_url.append("http://")

                for x in (_dns_management_host, puri):
                    dns_management_url.append(x)

        conn.close()

        if not (auth_token, dns_management_host):
            raise AuthenticationError("Invalid response from the " \
                    "authentication service.")

        return ("".join(dns_management_url), auth_token)
    def create(url, ref='master'):
        """Create github or gitalb repo."""
        host, user, repo = parse_url(url)
        repo_name = "/".join([user, repo])

        if "gitlab" in host:
            from gitlab_importer import GitlabImporter
            token = current_app.config.get('GITLAB_OAUTH_ACCESS_TOKEN')
            gli = GitlabImporter(repo_name, ref, token=token)
            return gli

        if "github" in host:
            from github_importer import GithubImporter
            # token = current_app.config.get('GITHUB_OAUTH_ACCESS_TOKEN')
            ghi = GithubImporter(repo_name, ref, token=None)
            return ghi
Example #52
0
    def authenticate(self):
        """
        Initiates authentication with the remote service and returns a
        two-tuple containing the storage system URL and session token.
        """
        conn = self.conn_class(self.host, self.port, timeout=self.timeout)
        conn.request('GET', '/' + self.uri, headers=self.headers)
        response = conn.getresponse()
        response.read()

        # A status code of 401 indicates that the supplied credentials
        # were not accepted by the authentication service.
        if response.status == 401:
            raise AuthenticationFailed()

        # Raise an error for any response that is not 2XX
        if response.status // 100 != 2:
            raise ResponseError(response.status, response.reason)

        for hdr in response.getheaders():
            if hdr[0].lower() == "x-auth-token":
                auth_token = hdr[1]
            if hdr[0].lower() == "x-server-management-url":

                (pnetloc, pport, puri, pis_ssl) = parse_url(hdr[1])
                puri = "/" + puri

                _dns_management_host = dns_management_host
                if 'lon.' in pnetloc:
                    _dns_management_host = 'lon.' + _dns_management_host

                dns_management_url = []
                if pis_ssl:
                    dns_management_url.append("https://")
                else:
                    dns_management_url.append("http://")

                for x in (_dns_management_host, puri):
                    dns_management_url.append(x)

        conn.close()

        if not (auth_token, dns_management_host):
            raise AuthenticationError("Invalid response from the " \
                    "authentication service.")

        return ("".join(dns_management_url), auth_token)
Example #53
0
def bot():
    for submission in reddit.subreddit(sys.argv[1]).stream.submissions():
        if "congress.gov/bill" not in submission.url:
            continue

        congress, bill_id = utils.parse_url(submission.url)

        bill = pp.get_bill(congress, bill_id)

        comment = utils.format_comment_from_bill(bill)

        submission.reply(comment)

        print "I replied to: " + submission.shortlink

        analytics.db_insert(bill, submission.author.name, submission.subreddit,
                            "S")
 def __init__(self, username, api_key, authurl=chouti_authurl, timeout=5,
              useragent=user_agent):
     self.authurl = authurl
     self.headers = dict()
     self.headers['x-auth-user'] = username
     self.headers['x-auth-key'] = api_key
     self.headers['User-Agent'] = useragent
     self.timeout = timeout
     (self.host, self.port, self.uri, self.is_ssl) = parse_url(self.authurl)
     if version_info[0] <= 2 and version_info[1] < 6:
         # 兼容python 2.6以下的版本
         # 注意下面 and 和 or 的用法
         self.conn_class = self.is_ssl and THTTPSConnection or \
             THTTPConnection
     else:
         # 生成了http client的连接类
         self.conn_class = self.is_ssl and HTTPSConnection or HTTPConnection
 def __init__(self, username, api_key, authurl=us_authurl, timeout=15,
              useragent=user_agent,  auth_version=None, storage_region=None, servicenet=None):
     self.authurl = authurl
     self.auth_version = auth_version
     self.storage_region = storage_region
     self.servicenet = servicenet
     self.headers = dict()
     self.headers['x-auth-user'] = username
     self.headers['x-auth-key'] = api_key
     self.headers['User-Agent'] = useragent
     self.timeout = timeout
     (self.host, self.port, self.uri, self.is_ssl) = parse_url(self.authurl)
     if version_info[0] <= 2 and version_info[1] < 6:
         self.conn_class = self.is_ssl and THTTPSConnection or \
             THTTPConnection
     else:
         self.conn_class = self.is_ssl and HTTPSConnection or HTTPConnection
Example #56
0
def prejoin_workspace(workspace_url, dir_to_share, api_args):
    try:
        result = utils.parse_url(workspace_url)
    except Exception as e:
        msg.error(unicode(e))
        return False
    try:
        w = get_workspace_by_url(workspace_url)
    except Exception as e:
        editor.error_message('Error opening url %s: %s' %
                             (workspace_url, str(e)))
        return False

    if w.code >= 400:
        try:
            d = utils.get_persistent_data()
            try:
                del d['workspaces'][result['owner']][result['name']]
            except Exception:
                pass
            try:
                del d['recent_workspaces'][workspace_url]
            except Exception:
                pass
            utils.update_persistent_data(d)
        except Exception as e:
            msg.debug(unicode(e))
        return False

    msg.debug('workspace: %s', json.dumps(w.body))
    anon_perms = w.body.get('perms', {}).get('AnonymousUser', [])
    msg.debug('api args: %s' % api_args)
    new_anon_perms = api_args.get('perms', {}).get('AnonymousUser', [])
    # TODO: prompt/alert user if going from private to public
    if set(anon_perms) != set(new_anon_perms):
        msg.debug(str(anon_perms), str(new_anon_perms))
        w.body['perms']['AnonymousUser'] = new_anon_perms
        response = update_workspace(w.body['owner'], w.body['name'], w.body)
        msg.debug(str(response.body))
    utils.add_workspace_to_persistent_json(w.body['owner'], w.body['name'],
                                           workspace_url, dir_to_share)
    return result
Example #57
0
    def __init__(self):
        extra_argparser = argparse.ArgumentParser()
        extra_argparser.add_argument("--max-threads", type=int)
        extra_argparser.add_argument("--key", required=True)
        extra_argparser.add_argument("--schema", required=True)
        extra_argparser.add_argument("--attack", required=True, type=str, choices=list(self.PAYLOADS_FOLDER.keys()))
        extra_argparser.add_argument("--type", type=str)

        extra_argparser = self.build_argparse(extra_argparser)
        args = extra_argparser.parse_args()

        self.GQL_ENDPOINT = utils.parse_url(args.url)
        if self.GQL_ENDPOINT is None:
            print("URL {} is not valid!".format(args.url))
            exit(1)
        self.SCHEMA = args.schema
        self.KEY = args.key
        self.TYPE = args.type
        self.ATTACK = args.attack
        self.THREADS = args.max_threads if args.max_threads else self.THREADS
Example #58
0
 def __init__(self,
              username,
              api_key,
              authurl=chouti_authurl,
              timeout=5,
              useragent=user_agent):
     self.authurl = authurl
     self.headers = dict()
     self.headers['x-auth-user'] = username
     self.headers['x-auth-key'] = api_key
     self.headers['User-Agent'] = useragent
     self.timeout = timeout
     (self.host, self.port, self.uri, self.is_ssl) = parse_url(self.authurl)
     if version_info[0] <= 2 and version_info[1] < 6:
         # 兼容python 2.6以下的版本
         # 注意下面 and 和 or 的用法
         self.conn_class = self.is_ssl and THTTPSConnection or \
             THTTPConnection
     else:
         # 生成了http client的连接类
         self.conn_class = self.is_ssl and HTTPSConnection or HTTPConnection
Example #59
0
    def _get_github_license(self, home_url):
        logger.info("Downloading license file from Github")
        if "api.github.com/repos" not in home_url:
            url_path = utils.parse_url(home_url,
                                       only_domain=False,
                                       only_path=True).lower()
            license_url = "https://api.github.com/repos{}/license".format(
                url_path)
        else:
            url_path = home_url
            license_url = "{}/license".format(url_path)

        license_response = utils.request("GET",
                                         license_url,
                                         custom_headers=self.custom_header)
        if "content" in license_response:
            license_content = license_response["content"]
            decode_license_txt = base64.b64decode(license_content)
            return decode_license_txt
        else:
            return False