Exemple #1
0
    def get(self, data=None):
        logger.debug("GET %s", self.url)
        req = urllib.request.Request(url=self.url,
                                     data=data,
                                     headers=self.headers)
        try:
            if self.opener:
                request = self.opener.open(req, timeout=self.timeout)
            else:
                request = urllib.request.urlopen(req, timeout=self.timeout)
        except (urllib.error.HTTPError, CertificateError) as error:
            if error.code == 401:
                raise UnauthorizedAccess("Access to %s denied" % self.url)
            else:
                raise HTTPError("Request to %s failed: %s" % (self.url, error))
        except (socket.timeout, urllib.error.URLError) as error:
            raise HTTPError("Unable to connect to server %s: %s" %
                            (self.url, error))
        if request.getcode() > 200:
            logger.debug("Server responded with status code %s",
                         request.getcode())
        try:
            self.total_size = int(request.info().get("Content-Length").strip())
        except AttributeError:
            logger.warning("Failed to read response's content length")
            self.total_size = 0

        self.response_headers = request.getheaders()
        self.status_code = request.getcode()
        if self.status_code > 299:
            logger.warning("Request responded with code %s", self.status_code)
        self.content = b"".join(self._iter_chunks(request))
        self.info = request.info()
        request.close()
        return self
Exemple #2
0
def submit_external_job(request, application_path):
  ParametersFormSet = formset_factory(ParameterForm, extra=0)

  if application_path.startswith('abfs:/') and not application_path.startswith('abfs://'):
    application_path = application_path.replace("abfs:/", "abfs://")
  elif application_path.startswith('s3a:/') and not application_path.startswith('s3a://'):
    application_path = application_path.replace('s3a:/', 's3a://')
  else:
    application_path = "/" + application_path

  if application_path.startswith("abfs://"):
    application_path = abfspath(application_path)

  if request.method == 'POST':
    params_form = ParametersFormSet(request.POST)

    if params_form.is_valid():
      mapping = dict([(param['name'], param['value']) for param in params_form.cleaned_data])
      mapping['dryrun'] = request.POST.get('dryrun_checkbox') == 'on'
      application_name = os.path.basename(application_path)
      application_class = Bundle if application_name == 'bundle.xml' else Coordinator if application_name == 'coordinator.xml' else get_workflow()
      mapping[application_class.get_application_path_key()] = os.path.dirname(application_path)

      try:
        submission = Submission(request.user, fs=request.fs, jt=request.jt, properties=mapping)
        job_id = submission.run(application_path)
      except RestException as ex:
        detail = ex._headers.get('oozie-error-message', ex)
        if 'Max retries exceeded with url' in str(detail):
          detail = '%s: %s' % (_('The Oozie server is not running'), detail)
        LOG.exception(smart_str(detail))
        raise PopupException(_("Error submitting job %s") % (application_path,), detail=detail)

      jsonify = request.POST.get('format') == 'json'
      if jsonify:
        return JsonResponse({'status': 0, 'job_id': job_id, 'type': 'external_workflow'}, safe=False)
      else:
        request.info(_('Oozie job submitted'))
        view = 'list_oozie_bundle' if application_name == 'bundle.xml' else 'list_oozie_coordinator' if application_name == 'coordinator.xml' else 'list_oozie_workflow'
        return redirect(reverse('oozie:%s' % view, kwargs={'job_id': job_id}))
    else:
      request.error(_('Invalid submission form: %s' % params_form.errors))
  else:
    parameters = Submission(request.user, fs=request.fs, jt=request.jt).get_external_parameters(application_path)
    initial_params = ParameterForm.get_initial_params(parameters)
    params_form = ParametersFormSet(initial=initial_params)

  popup = render('editor/submit_job_popup.mako', request, {
                   'params_form': params_form,
                   'name': _('Job'),
                   'action': reverse('oozie:submit_external_job', kwargs={'application_path': application_path}),
                   'show_dryrun': os.path.basename(application_path) != 'bundle.xml',
                   'return_json': request.GET.get('format') == 'json'
                 }, force_template=True).content
  return JsonResponse(popup, safe=False)
Exemple #3
0
def get_request_filename(request):
    '''Figure out the filename for an HTTP download.'''
    # Check to see if a filename is specified in the HTTP headers.
    if 'Content-Disposition' in request.info():
        disposition = request.info()['Content-Disposition']
        pieces = re.split('\s*;\s*', disposition)
        for piece in pieces:
            if piece.startswith('filename='):
                return piece[len('filename='):]
    # If no filename was specified, pick a reasonable default.
    return os.path.basename(urlsplit(request.url).path) or 'index.html'
Exemple #4
0
    def get(self, data=None):
        logger.debug("GET %s", self.url)
        req = urllib.request.Request(url=self.url,
                                     data=data,
                                     headers=self.headers)
        try:
            if self.opener:
                request = self.opener.open(req, timeout=self.timeout)
            else:
                request = urllib.request.urlopen(req, timeout=self.timeout)
        except (urllib.error.HTTPError, CertificateError) as error:
            raise HTTPError("Unavailable url %s: %s" % (self.url, error))
        except (socket.timeout, urllib.error.URLError) as error:
            raise HTTPError("Unable to connect to server %s: %s" %
                            (self.url, error))
        if request.getcode() > 200:
            logger.debug("Server responded with status code %s",
                         request.getcode())
        try:
            total_size = request.info().get("Content-Length").strip()
            total_size = int(total_size)
        except AttributeError:
            logger.warning("Failed to read response's content length")
            total_size = 0

        self.response_headers = request.getheaders()
        self.status_code = request.getcode()
        if self.status_code > 299:
            logger.warning("Request responded with code %s", self.status_code)
        chunks = []
        while 1:
            if self.stop_request and self.stop_request.is_set():
                self.content = ""
                return self
            try:
                chunk = request.read(self.buffer_size)
            except socket.timeout:
                logger.error("Request timed out")
                self.content = ""
                return self
            self.downloaded_size += len(chunk)
            if self.thread_queue:
                self.thread_queue.put(
                    (chunk, self.downloaded_size, total_size))
            else:
                chunks.append(chunk)
            if not chunk:
                break
        request.close()
        self.content = b"".join(chunks)
        self.info = request.info()
        return self
Exemple #5
0
def rerun_oozie_coordinator(request, job_id, app_path=None):
  oozie_coordinator = check_job_access_permission(request, job_id)
  check_job_edition_permission(oozie_coordinator, request.user)
  ParametersFormSet = formset_factory(ParameterForm, extra=0)
  if app_path is None:
    app_path = oozie_coordinator.coordJobPath
  else:
    app_path = urllib.parse.unquote(app_path)
  return_json = request.GET.get('format') == 'json'

  if request.method == 'POST':
    params_form = ParametersFormSet(request.POST)
    rerun_form = RerunCoordForm(request.POST, oozie_coordinator=oozie_coordinator)

    if sum([rerun_form.is_valid(), params_form.is_valid()]) == 2:
      args = {}
      args['deployment_dir'] = app_path

      params = {
        'type': 'action',
        'scope': ','.join(oozie_coordinator.aggreate(rerun_form.cleaned_data['actions'])),
        'refresh': rerun_form.cleaned_data['refresh'],
        'nocleanup': rerun_form.cleaned_data['nocleanup'],
      }

      properties = dict([(param['name'], param['value']) for param in params_form.cleaned_data])

      _rerun_coordinator(request, job_id, args, params, properties)

      if rerun_form.cleaned_data['return_json']:
        return JsonResponse({'status': 0, 'job_id': job_id}, safe=False)
      else:
        request.info(_('Coordinator re-running.'))
        return redirect(reverse('oozie:list_oozie_coordinator', kwargs={'job_id': job_id}))
    else:
      request.error(_('Invalid submission form: %s') % smart_unicode(rerun_form.errors))
      return list_oozie_coordinator(request, job_id)
  else:
    rerun_form = RerunCoordForm(oozie_coordinator=oozie_coordinator, return_json=return_json)
    initial_params = ParameterForm.get_initial_params(oozie_coordinator.conf_dict)
    params_form = ParametersFormSet(initial=initial_params)

    return render('dashboard/rerun_coord_popup.mako', request, {
                   'rerun_form': rerun_form,
                   'params_form': params_form,
                   'action': reverse('oozie:rerun_oozie_coord', kwargs={'job_id': job_id, 'app_path': urllib.parse.quote(app_path.encode('utf-8'), safe=SAFE_CHARACTERS_URI_COMPONENTS)}),
                   'return_json': return_json,
                   'is_mini': request.GET.get('is_mini', False),
                 }, force_template=True)
Exemple #6
0
def rerun_oozie_bundle(request, job_id, app_path):
  oozie_bundle = check_job_access_permission(request, job_id)
  check_job_edition_permission(oozie_bundle, request.user)
  ParametersFormSet = formset_factory(ParameterForm, extra=0)
  app_path = urllib.parse.unquote(app_path)
  if request.method == 'POST':
    params_form = ParametersFormSet(request.POST)
    rerun_form = RerunBundleForm(request.POST, oozie_bundle=oozie_bundle)

    if sum([rerun_form.is_valid(), params_form.is_valid()]) == 2:
      args = {}
      args['deployment_dir'] = app_path

      params = {
        'coord-scope': ','.join(rerun_form.cleaned_data['coordinators']),
        'refresh': rerun_form.cleaned_data['refresh'],
        'nocleanup': rerun_form.cleaned_data['nocleanup'],
      }

      if rerun_form.cleaned_data['start'] and rerun_form.cleaned_data['end']:
        date = {
            'date-scope':
                '%(start)s::%(end)s' % {
                    'start': utc_datetime_format(rerun_form.cleaned_data['start']),
                    'end': utc_datetime_format(rerun_form.cleaned_data['end'])
                }
        }
        params.update(date)

      properties = dict([(param['name'], param['value']) for param in params_form.cleaned_data])

      _rerun_bundle(request, job_id, args, params, properties)

      request.info(_('Bundle re-running.'))
      return redirect(reverse('oozie:list_oozie_bundle', kwargs={'job_id': job_id}))
    else:
      request.error(_('Invalid submission form: %s' % (rerun_form.errors,)))
      return list_oozie_bundle(request, job_id)
  else:
    rerun_form = RerunBundleForm(oozie_bundle=oozie_bundle)
    initial_params = ParameterForm.get_initial_params(oozie_bundle.conf_dict)
    params_form = ParametersFormSet(initial=initial_params)

    return render('dashboard/rerun_bundle_popup.mako', request, {
                   'rerun_form': rerun_form,
                   'params_form': params_form,
                   'action': reverse('oozie:rerun_oozie_bundle', kwargs={'job_id': job_id, 'app_path': urllib.parse.quote(app_path.encode('utf-8'), safe=SAFE_CHARACTERS_URI_COMPONENTS)}),
                 }, force_template=True)
Exemple #7
0
def manage_oozie_jobs(request, job_id, action):
  if request.method != 'POST':
    raise PopupException(_('Use a POST request to manage an Oozie job.'))

  job = check_job_access_permission(request, job_id)
  check_job_edition_permission(job, request.user)

  response = {'status': -1, 'data': ''}

  try:
    oozie_api = get_oozie(request.user)
    params = None

    if action == 'change':
      pause_time_val = request.POST.get('pause_time')
      if request.POST.get('clear_pause_time') == 'true':
        pause_time_val = ''

      end_time_val = request.POST.get('end_time')
      if end_time_val:
        end_time_val = convert_to_server_timezone(end_time_val, TIME_ZONE.get())
      if pause_time_val:
        pause_time_val = convert_to_server_timezone(pause_time_val, TIME_ZONE.get())
      params = {'value': 'endtime=%s' % (end_time_val) + ';'
                            'pausetime=%s' % (pause_time_val) + ';'
                            'concurrency=%s' % (request.POST.get('concurrency'))}
    elif action == 'ignore':
      oozie_api = get_oozie(request.user, api_version="v2")
      params = {
        'type': 'action',
        'scope': ','.join(job.aggreate(request.POST.get('actions').split())),
      }

    response['data'] = oozie_api.job_control(job_id, action, parameters=params)

    response['status'] = 0
    if 'notification' in request.POST:
      request.info(_(request.POST.get('notification')))
  except RestException as ex:
    ex_message = ex.message
    if ex._headers.get('oozie-error-message'):
      ex_message = ex._headers.get('oozie-error-message')
    msg = "Error performing %s on Oozie job %s: %s." % (action, job_id, ex_message)
    LOG.exception(msg)

    response['data'] = _(msg)

  return JsonResponse(response)
Exemple #8
0
def update_best_videos(recent_videos):
    global best_videos
    for best_video in best_videos:
        '''minimal_date = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(minutes=BIG_DELAY)
        seconds_minimal = time.mktime(minimal_date.timetuple())
        seconds_best = time.mktime(best_video.published_at.timetuple())
        if seconds_minimal > seconds_best:
            continue'''
        recent_videos.append(best_video)
    best_videos = []

    ids = ','.join(str(video.video_id) for video in recent_videos)
    GET_QUERY = 'https://www.googleapis.com/youtube/v3/videos?part=statistics&id=' + ids + '&key=' + API_KEY
    request = urllib.request.urlopen(GET_QUERY)
    videos = json.loads(request.read().decode(
        request.info().get_param('charset') or 'utf-8'))
    for video in videos['items']:
        index = -1
        for i in range(len(recent_videos)):
            if recent_videos[i].video_id == video['id']:
                index = i
                break
        assert (index != -1)
        rvideo = recent_videos[index]
        new_video = Video(rvideo.title, rvideo.published_at, rvideo.video_id,
                          int(video['statistics']['commentCount']))
        best_videos.append(new_video)

    best_videos.sort()
    best_videos = best_videos[:MAX_BEST_VIDEOS_COUNT]
Exemple #9
0
def fetch_data(cmdline_again, keys):
    identifier = cmdline_again.strip('/')

    lhostmatch = identifier.find("aos://16777343")

    if lhostmatch != -1 and keys[0] == 'name':
        # If player plays on localhost then just pass this info and done
        return ['(Playing on localhost)', '-', '-', '-', '-', '-']
    else:
        try:
            # Request json serverlist that buildandshoot hosts.
            request = urllib.request.urlopen("https://services.buildandshoot.com/serverlist.json", context=SSLCONTEXT)
        except urllib.error.URLError:
            logger.warning('No internet connection.')
            time.sleep(5)
            return

        data = request.read()
        encoding = request.info().get_content_charset('utf-8')
        serverlist = json.loads(data.decode(encoding))

        try:
            for server_num in range(0, len(serverlist)+1):
                presence_info = []

                if serverlist[server_num]['identifier'] == identifier:
                    current_server = serverlist[server_num]
                    for variable in keys:
                        presence_info.append(current_server[variable])

                if len(presence_info) == 6:
                    return presence_info
        except IndexError:
            return ['(Server is not broadcasting to master server)', '-', '-', '-', '-', '-']
 def get_data_source_one(self):
     """Retrieves Data from the first Yahoo Finance source"""
     data = 'http://finance.yahoo.com/webservice/v1/symbols/' + self.stock + '/quote?format=json&view=detail'
     request = urllib.request.urlopen(data)
     response = request.read()
     charset = request.info().get_content_charset('utf-8')
     self.data_s1 = json.loads(response.decode(charset))
Exemple #11
0
def grab_random_picture(subreddit):
    print("=> Grabbing random top wallpaper from /r/{}".format(sys.argv[1]))

    request = urllib.request.urlopen(
        'https://www.reddit.com/r/{}/top/.json?count=100&limit=100&t=month'.
        format(subreddit))
    data = json.loads(request.read().decode(request.info().get_param('charset')
                                            or 'utf-8'))

    posts = data['data']['children']

    index = random.randint(0, len(posts))

    post = posts[index]['data']

    url = post['url']

    if urllib.parse.urlparse(post['url']).netloc == 'imgur.com' and (
            not url.endswith('.png') and not url.endswith('.jpg')
            and not url.endswith('.jpeg') and not url.endswith('.gif')):
        url = '{}.png'.format(url)

    print(post['title'])
    print(url)

    return url
Exemple #12
0
 def findpost(subredditpage):
     for post in reddit.subreddit(subredditpage).hot(limit=1000):
         if post.score > 5 and post.is_self is False and post.id not in post_ids:
             # Check if post is an image
             url = post.url
             request = urllib.request.urlopen(url)
             mime = request.info()['Content-type']
             if mime.endswith("png") or mime.endswith(
                     "jpeg") or mime.endswith("jpg") or mime.endswith(
                         "gif"):
                 # success link is an image
                 credit = post.author.name
                 # save picture
                 image = f"{directory}posts/post{count}.jpg"
                 urllib.request.urlretrieve(url, image)
                 # return dictionary with required info to post located picture
                 if subredditpage == sub_reddit_1:
                     caption = random.choice(caption_list_1)
                     post_ids.append(post.id)
                     result = {
                         "credit": credit,
                         "image": image,
                         "caption": caption
                     }
                     return result
                 elif subredditpage == sub_reddit_2:
                     caption = random.choice(caption_list_2)
                     post_ids.append(post.id)
                     result = {
                         "credit": credit,
                         "image": image,
                         "caption": caption
                     }
                     return result
Exemple #13
0
def update_best_videos(recent_videos):
    global best_videos
    for best_video in best_videos:
        """minimal_date = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(minutes=BIG_DELAY)
        seconds_minimal = time.mktime(minimal_date.timetuple())
        seconds_best = time.mktime(best_video.published_at.timetuple())
        if seconds_minimal > seconds_best:
            continue"""
        recent_videos.append(best_video)
    best_videos = []

    ids = ",".join(str(video.video_id) for video in recent_videos)
    GET_QUERY = "https://www.googleapis.com/youtube/v3/videos?part=statistics&id=" + ids + "&key=" + API_KEY
    request = urllib.request.urlopen(GET_QUERY)
    videos = json.loads(request.read().decode(request.info().get_param("charset") or "utf-8"))
    for video in videos["items"]:
        index = -1
        for i in range(len(recent_videos)):
            if recent_videos[i].video_id == video["id"]:
                index = i
                break
        assert index != -1
        rvideo = recent_videos[index]
        new_video = Video(rvideo.title, rvideo.published_at, rvideo.video_id, int(video["statistics"]["commentCount"]))
        best_videos.append(new_video)

    best_videos.sort()
    best_videos = best_videos[:MAX_BEST_VIDEOS_COUNT]
Exemple #14
0
def get_html(url):
    tryes = 5
    # Build our request
    req = urllib.request.Request(url)
    # Accept gziped content
    req.add_header('Accepting-encoding', 'gzip')
    # Fake user aggent
    req.add_header('User-Agent', USER_AGENT)
    while tryes > 0:
        try:
            request = urllib.request.urlopen(req)
            break
        except socket.timeout:
            tryes -= 1
        except urllib.error.URLError as error:
            if isinstance(error.reason, socket.timeout):
                tryes -= 1
            else:
                print("URL error: " + error.reason)
                quit()
    if request.info().get('Content-Encoding') == 'gzip':
        buffer = BytesIO(request.read())
        uncompressed_buffer = gzip.GzipFile(fileobj=buffer)
        html = BeautifulSoup(uncompressed_buffer.read(), 'lxml')
    else:
        html = BeautifulSoup(request.read(), "lxml")
    request.close()
    return html
Exemple #15
0
def rerun_oozie_job(request, job_id, app_path=None):
  ParametersFormSet = formset_factory(ParameterForm, extra=0)
  oozie_workflow = check_job_access_permission(request, job_id)
  check_job_edition_permission(oozie_workflow, request.user)
  if app_path is None:
    app_path = oozie_workflow.appPath
  else:
    app_path = urllib.parse.unquote(app_path)
  return_json = request.GET.get('format') == 'json'

  if request.method == 'POST':
    rerun_form = RerunForm(request.POST, oozie_workflow=oozie_workflow)
    params_form = ParametersFormSet(request.POST)

    if sum([rerun_form.is_valid(), params_form.is_valid()]) == 2:
      args = {}

      if request.POST.get('rerun_form_choice') == 'fail_nodes':
        args['fail_nodes'] = 'true'
      else:
        args['skip_nodes'] = ','.join(rerun_form.cleaned_data['skip_nodes'])
      args['deployment_dir'] = app_path

      mapping = dict([(param['name'], param['value']) for param in params_form.cleaned_data])

      _rerun_workflow(request, job_id, args, mapping)

      if rerun_form.cleaned_data['return_json']:
        return JsonResponse({'status': 0, 'job_id': job_id}, safe=False)
      else:
        request.info(_('Workflow re-running.'))
        return redirect(reverse('oozie:list_oozie_workflow', kwargs={'job_id': job_id}))
    else:
      request.error(_('Invalid submission form: %s %s' % (rerun_form.errors, params_form.errors)))
  else:
    rerun_form = RerunForm(oozie_workflow=oozie_workflow, return_json=return_json)
    initial_params = ParameterForm.get_initial_params(oozie_workflow.conf_dict)
    params_form = ParametersFormSet(initial=initial_params)

    return render('dashboard/rerun_workflow_popup.mako', request, {
                   'rerun_form': rerun_form,
                   'params_form': params_form,
                   'action': reverse('oozie:rerun_oozie_job', kwargs={'job_id': job_id, 'app_path': urllib.parse.quote(app_path.encode('utf-8'), safe=SAFE_CHARACTERS_URI_COMPONENTS) }),
                   'return_json': return_json,
                   'is_mini': request.GET.get('is_mini', False),
                 }, force_template=True)
Exemple #16
0
 def __init__(self, url, path, threads):
     
     self.url        = url
     self.path       = path
     self.threads    = threads
     self.filename   = url.split('/')[-1]
     request         = urllib.request.urlopen(self.url)
     metadata        = request.info()
     self.filesize   = int(metadata.get_all("Content-Length")[0])
 def get_data_source_two(self):
     """Retrieves Data from the second Yahoo Finance source"""
     data = "http://query.yahooapis.com/v1/public/yql?q=" \
            "SELECT%20*%20FROM%20yahoo.finance.quotes%20WHERE%20symbol%20IN%20('" + self.stock + "')" \
            "&format=json&env=http://datatables.org/alltables.env"
     request = urllib.request.urlopen(data)
     response = request.read()
     charset = request.info().get_content_charset('utf-8')
     self.data_s2 = json.loads(response.decode(charset))
Exemple #18
0
def get_request_filename(request):
    '''Figure out the filename for an HTTP download.'''
    # Check to see if a filename is specified in the HTTP headers.
    if 'Content-Disposition' in request.info():
        disposition = request.info()['Content-Disposition']
        pieces = re.split(r'\s*;\s*', disposition)
        for piece in pieces:
            if piece.startswith('filename='):
                filename = piece[len('filename='):]
                # Strip exactly one " from each end.
                if filename.startswith('"'):
                    filename = filename[1:]
                if filename.endswith('"'):
                    filename = filename[:-1]
                # Interpret backslashed quotes.
                filename = filename.replace('\\"', '"')
                return filename
    # If no filename was specified, pick a reasonable default.
    return os.path.basename(urlsplit(request.url).path) or 'index.html'
Exemple #19
0
def get_request_filename(request):
    '''Figure out the filename for an HTTP download.'''
    # Check to see if a filename is specified in the HTTP headers.
    if 'Content-Disposition' in request.info():
        disposition = request.info()['Content-Disposition']
        pieces = re.split('\s*;\s*', disposition)
        for piece in pieces:
            if piece.startswith('filename='):
                filename = piece[len('filename='):]
                # Strip exactly one " from each end.
                if filename.startswith('"'):
                    filename = filename[1:]
                if filename.endswith('"'):
                    filename = filename[:-1]
                # Interpret backslashed quotes.
                filename = filename.replace('\\"', '"')
                return filename
    # If no filename was specified, pick a reasonable default.
    return os.path.basename(urlsplit(request.url).path) or 'index.html'
Exemple #20
0
def embed_images(chapter, book):
    global finishedimages
    global imageUID
    content = chapter.content.split(sep='src="')
    output = content[0]
    for x in range(1, len(content)):
        imageurl = content[x][:content[x].find('"')]
        request = get_url(imageurl)
        if request != 0:
            headers = request.info()
            mediatype = headers["Content-Type"]
            extension = ""
            if mediatype == "text/plain":
                extension = ".txt"
            if mediatype == "text/css":
                extension = ".css"
            if mediatype == "image/png":
                extension = ".png"
            if mediatype == "image/jpeg":
                extension = ".jpg"
            if mediatype == "image/webp":
                extension = ".webp"
            if mediatype == "image/bmp":
                extension = ".bmp"
            if mediatype == "image/gif":
                extension = ".gif"
            if mediatype == "image/svg+xml":
                extension = ".svg"
            if mediatype == "image/tiff":
                extension = ".tiff"
            #imagefile = "images/" + urllib.parse.quote(imageurl, safe='') + extension

            if imageurl not in finishedimages:
                imagefile = "images/" + str(imageUID) + extension
                item = epub.EpubItem(uid=imagefile,
                                     file_name=imagefile,
                                     media_type=mediatype,
                                     content=request.read())
                book.add_item(item)
                finishedimages[imageurl] = str(imageUID)
                imageUID += 1
                print("embedded " + mediatype + " file")
            else:
                imagefile = "images/" + finishedimages[imageurl] + extension

        else:
            imagefile = imageurl
        sectionoutput = 'src="' + imagefile + content[x][content[x].find('"'):]
        output = output + sectionoutput
    chapter.content = output
Exemple #21
0
    def get(self, data=None):
        logger.debug("GET %s", self.url)
        try:
            req = urllib.request.Request(url=self.url,
                                         data=data,
                                         headers=self.headers)
        except ValueError as ex:
            raise HTTPError("Failed to create HTTP request to %s: %s" %
                            (self.url, ex)) from ex
        try:
            if self.opener:
                request = self.opener.open(req, timeout=self.timeout)
            else:
                request = urllib.request.urlopen(req, timeout=self.timeout)  # pylint: disable=consider-using-with
        except (urllib.error.HTTPError, CertificateError) as error:
            if error.code == 401:
                raise UnauthorizedAccess("Access to %s denied" %
                                         self.url) from error
            raise HTTPError("%s" % error, code=error.code) from error
        except (socket.timeout, urllib.error.URLError) as error:
            raise HTTPError("Unable to connect to server %s: %s" %
                            (self.url, error)) from error

        self.response_headers = request.getheaders()
        self.status_code = request.getcode()
        if self.status_code > 299:
            logger.warning("Request responded with code %s", self.status_code)

        try:
            self.total_size = int(request.info().get("Content-Length").strip())
        except AttributeError:
            self.total_size = 0

        self.content = b"".join(self._iter_chunks(request))
        self.info = request.info()
        request.close()
        return self
Exemple #22
0
def extract_from_url(url, progress_bar=None):
    request = urllib.request.urlopen(url)
    if progress_bar is not None:
        progress_bar.desc = filename = request.info().get_filename()
        progress_bar.total = int(request.headers.get("Content-length", 0))
    with libarchive.stream_reader(request.fp) as stream:
        for member in stream:
            if member.isdir:
                Path(member.path).mkdir()
                continue
            with open(member.path, "wb") as fp:
                for block in member.get_blocks():
                    fp.write(block)
                    if progress_bar is not None:
                        progress_bar.update(len(block))
Exemple #23
0
def listhosts(subdomain):
    headers = {"Authorization": "Bearer " + env('TOKEN')}
    url = 'https://api.digitalocean.com/v2/domains/' + (
        env('DOMAIN')) + '/records'

    req = urllib.request.Request(url, headers=headers)
    request = urllib.request.urlopen(req)

    data = json.loads(request.read().decode(request.info().get_param('charset')
                                            or 'utf-8'))
    for i in (data['domain_records']):
        for k, v in i.items():
            if v == subdomain:
                recid = (i['id'])
                return recid
	def get_data(self, docid, fields):

		uri = self.solr + self.core + '/get?id=' + urllib.parse.quote( docid ) + '&fl=' + ','.join(fields)

		request = urllib.request.urlopen( uri )
		encoding = request.info().get_content_charset('utf-8')
		data = request.read()
		request.close()
	
		solr_doc = json.loads(data.decode(encoding))

		data = None
		if 'doc' in solr_doc:
			data = solr_doc['doc']
	
		return data
	def get_data(self, docid, fields):

		uri = self.solr + self.core + '/get?id=' + urllib.parse.quote( docid ) + '&fl=' + ','.join(fields)

		request = urllib.request.urlopen( uri )
		encoding = request.info().get_content_charset('utf-8')
		data = request.read()
		request.close()
	
		solr_doc = json.loads(data.decode(encoding))

		data = None
		if 'doc' in solr_doc:
			data = solr_doc['doc']
	
		return data
Exemple #26
0
def curl():
    if len(sys.argv) > 0:
        # Remove the file name
        del sys.argv[0]
        if len(sys.argv) == 0:
            print("MyCurl: Supply a URL to retrieve")
            return

    if sys.argv[0] == '-I':
        # Only headers
        if len(sys.argv) > 1:
            if is_http(sys.argv[1]):
                print(urllib.request.urlopen(sys.argv[1]).info())
            else:
                print("MyCurl: Only http or https URLs can be requested")
        else:
            print("MyCurl: Supply a URL to retrieve")
    elif sys.argv[0] == '-i':
        # Header and body
        if len(sys.argv) > 1:
            if is_http(sys.argv[1]):
                request = urllib.request.urlopen(sys.argv[1])
                html = request.read()
                try:
                    html = str(html, encoding='utf-8')
                    print(request.info())
                    print(html)
                except Exception:
                    print("NON-TEXTUAL CONTENT")
            else:
                print("MyCurl: Only http or https URLs can be requested")
        else:
            print("MyCurl: Supply a URL to retrieve")
    else:
        # Just body
        if '-' in sys.argv[0]:
            print("MyCurl: Only -i and -I arguments are supported")
        else:
            if is_http(sys.argv[0]):
                html = urllib.request.urlopen(sys.argv[0]).read()
                try:
                    html = str(html, encoding='utf-8')
                    print(html)
                except Exception:
                    print("NON-TEXTUAL CONTENT")
            else:
                print("MyCurl: Only http or https URLs can be requested")
Exemple #27
0
def command(uri, qs):
    print('command()', uri)

    url = 'http://' + config['WEBIF']['HOST'] + '/' + uri

    try:
        request = urllib.request.urlopen(url,
                                         timeout=int(
                                             config['WEBIF']['TIMEOUT']))
        mimetype = request.info()['Content-Type']
        response = request.read()
    except (urllib.error.HTTPError, urllib.error.URLError) as err:
        error('command()', 'urllib.error', err)

        return False

    return {'data': response, 'headers': {'Content-Type': mimetype}}
Exemple #28
0
    def get(self, data=None):
        logger.debug("GET %s", self.url)
        req = urllib.request.Request(url=self.url, data=data, headers=self.headers)
        try:
            request = urllib.request.urlopen(req, timeout=self.timeout)
        except (urllib.error.HTTPError, CertificateError) as error:
            logger.error("Unavailable url (%s): %s", self.url, error)
        except (socket.timeout, urllib.error.URLError) as error:
            logger.error("Unable to connect to server (%s): %s", self.url, error)
        else:
            # Response code is available with getcode but should 200 if there
            # is no exception
            # logger.debug("Got response code: %s", request.getcode())
            try:
                total_size = request.info().get('Content-Length').strip()
                total_size = int(total_size)
            except AttributeError:
                logger.warning("Failed to read response's content length")
                total_size = 0

            chunks = []
            while 1:
                if self.stop_request and self.stop_request.is_set():
                    self.content = ''
                    return self
                try:
                    chunk = request.read(self.buffer_size)
                except socket.timeout:
                    logger.error("Request timed out")
                    self.content = ''
                    return self
                self.downloaded_size += len(chunk)
                if self.thread_queue:
                    self.thread_queue.put(
                        (chunk, self.downloaded_size, total_size)
                    )
                else:
                    chunks.append(chunk)
                if not chunk:
                    break
            request.close()
            self.content = b''.join(chunks)
        return self
async def test_relation(model, app):
    haproxy = model.applications["haproxy"]
    haproxy_unit = haproxy.units[0]

    config = await app.get_config()
    subdomain = config["proxy-subdomain"]["value"]
    address = f"http://{subdomain}.{haproxy_unit.public_address}.xip.io/admin"
    print(f"Checking address: {address}")
    request = urllib.request.urlopen(address)
    info = request.info()
    print(f"Info: {info}")
    assert request.getcode() == 200
    server_id = "not found"
    for item in info.values():
        if "SERVERID" in item:
            server_id = item.split(";")[0]
        else:
            continue
    print(f"server_id: {server_id}")
    assert subdomain in server_id
def download_file(url, filename):
    try:
        # request data from url
        request = urllib.request.urlopen(url)
        header = request.info()

        # writing data to local file...
        print('Loading file from url.', end='')
        chunk_size = 1024
        local_file = open(filename, 'wb')
        while True:
            chunk = request.read(chunk_size)
            # ...as long there is data
            if not chunk:
                break
            local_file.write(chunk)
            print('.', end='')
    except IOError:
        print('Problem reading url: ', url)
    print('')
 def callback():
     progress = Progressbar(root,
                            orient=HORIZONTAL,
                            length=100,
                            mode='determinate')
     progress.pack()
     progress.place(x=10, y=200)
     request = urllib.request.urlopen(url)
     filename = str(
         file_name_field.get())  ##filename will be the basepath of url
     meta = request.info(
     )  ## get information from request object used for determining size of content
     file_size = int(meta['Content-Length'])
     buffer_size = file_size / segments
     file_name1.configure(text="Filename: " + filename)
     print("File size : {0:.2f}".format(float(
         (file_size / 1024) / 1024)))  ## get file size in MB
     file_download_size.configure(
         text="File Size: {0:.2f} MB".format((file_size / 1024) / 1024))
     total_download = 0
     fd = open(filename, 'wb')  ## save file to current working directory
     while total_download != file_size:
         buffer = request.read(
             int(buffer_size))  ## reading files upto buffer_size
         fd.write(buffer)
         total_download += len(buffer)
         cur_download.configure(text="Downloaded {0}%".format(
             math.trunc((int(total_download) / int(file_size)) * 100)))
         progress['value'] = math.trunc(
             (int(total_download) / int(file_size)) *
             100)  ## To retrieve percentage of download
     print("Download success")
     fd.close()
     file_download_size.destroy()
     file_name1.destroy()
     progress.destroy()
     cur_download.destroy()
     messagebox.showinfo(
         "File Downloaded",
         "Your file {0} has been saved in current directory".format(
             filename))
Exemple #32
0
def make_http_request_urllib(logger, url, method):
    url = _prepare_url_before_http_request(logger, url, method)

    req = urllib.request.Request(url,
                                 data=None,
                                 headers={'User-Agent': get_user_agent()},
                                 method=method)

    logger.debug("urllib.request.urlopen ({}) method={}".format(url, method))
    try:
        with urllib.request.urlopen(
                req,
                context=TRequestPolicy.SSL_CONTEXT,
                timeout=TRequestPolicy.HTTP_TIMEOUT) as request:
            data = '' if method == "HEAD" else request.read()
            headers = request.info()
            TRequestPolicy.register_successful_request()
            return request.geturl(), headers, data
    except UnicodeError as exp:
        raise RobotHttpException(
            "cannot redirect to cyrillic web domains or some unicode error",
            url, 520, method)
    except (ConnectionError, http.client.HTTPException) as exp:
        raise RobotHttpException(str(exp), url, 520, method)
    except socket.timeout as exp:
        logger.error("socket timeout, while getting {}: {}".format(url, exp))
        raise RobotHttpException("socket.timeout", url, 504, method)
    except urllib.error.URLError as exp:
        code = -1
        if hasattr(exp, 'code'):
            code = exp.code

        raise RobotHttpException("{} extype:{}".format(str(exp), type(exp)),
                                 url, code, method)  #
    except urllib.error.HTTPError as e:
        if e.code == 503:
            TRequestPolicy.deal_with_http_code_503(logger)
        if e.code == 405 and method == "HEAD":
            return make_http_request_urllib(logger, url, "GET")
        raise RobotHttpException("{} extype:{}".format(str(e), type(e)), url,
                                 e.code, method)
Exemple #33
0
def find_recent_videos():
    global last_update

    now = last_update.strftime("%Y-%m-%dT%H:%M:%S")
    last_update = datetime.datetime.now(datetime.timezone.utc)
    GET_QUERY = 'https://www.googleapis.com/youtube/v3/search?publishedAfter=' + str(
        now) + 'Z&order=relevance&part=snippet&maxResults=50&key=' + API_KEY

    request = urllib.request.urlopen(GET_QUERY)
    videos = json.loads(request.read().decode(
        request.info().get_param('charset') or 'utf-8'))
    recent_videos = []
    for video in videos['items']:
        try:
            title = video['snippet']['title']
            published_at = parse_youtube_time(video['snippet']['publishedAt'])
            video_id = video['id']['videoId']
            recent_videos.append(Video(title, published_at, video_id))
        except:
            continue
    return recent_videos
Exemple #34
0
    def get_json(self, url):
        """
        Fetch JSON data from a web server and return a dictionary with same.

        :param str url: URL where to fetch the JSON data from
        :return: a dictionary with JSON data
        :rtype: dict
        """
       
        json_data = dict()

        try:
            request = urllib.request.urlopen(url)
        except urllib.error.URLError as e:
            self.logger.error("{}: Error opening url: {}".format(type(e).__name__, url))
            return json_data

        data = request.read()
        encoding = request.info().get_content_charset('utf-8')
        json_data = json.loads(data.decode(encoding))
        return json_data
Exemple #35
0
def download_file(request, output_file, stdout=sys.stdout):
    digest = hashlib.sha1()
    file_size_str = request.info().get("Content-Length")
    file_size = int(file_size_str) if file_size_str is not None else None
    bytes_read = 0
    while True:
        buf = request.read(4096)
        if not buf:
            break
        digest.update(buf)
        if output_file:
            output_file.write(buf)
        bytes_read += len(buf)
        percentage = ""
        kb_downloaded = format_bytes(bytes_read)
        total_kb = ""
        if file_size:
            percentage = " {}%".format(round(100 * bytes_read / file_size))
            total_kb = "/" + format_bytes(file_size)
        print("downloaded{} {}{}".format(percentage, kb_downloaded, total_kb), file=stdout)
    return digest.hexdigest()
Exemple #36
0
def download_file(request, output_file, stdout=sys.stdout):
    digest = hashlib.sha1()
    file_size_str = request.info().get('Content-Length')
    file_size = int(file_size_str) if file_size_str is not None else None
    bytes_read = 0
    while True:
        buf = request.read(4096)
        if not buf:
            break
        digest.update(buf)
        if output_file:
            output_file.write(buf)
        bytes_read += len(buf)
        percentage = ''
        kb_downloaded = format_bytes(bytes_read)
        total_kb = ''
        if file_size:
            percentage = ' {}%'.format(round(100 * bytes_read / file_size))
            total_kb = '/' + format_bytes(file_size)
        print('downloaded{} {}{}'.format(percentage, kb_downloaded, total_kb),
              file=stdout)
    return digest.hexdigest()
Exemple #37
0
    def get(self, data=None):
        req = urllib.request.Request(url=self.url,
                                     data=data,
                                     headers=self.headers)
        try:
            request = urllib.request.urlopen(req, timeout=self.timeout)
        except (urllib.error.HTTPError, CertificateError) as e:
            logger.error("Unavailable url (%s): %s", self.url, e)
        except (socket.timeout, urllib.error.URLError) as e:
            logger.error("Unable to connect to server (%s): %s", self.url, e)
        else:
            try:
                total_size = request.info().get('Content-Length').strip()
                total_size = int(total_size)
            except AttributeError:
                total_size = 0

            chunks = []
            while 1:
                if self.stop_request and self.stop_request.is_set():
                    self.content = ''
                    return self
                try:
                    chunk = request.read(self.buffer_size)
                except socket.timeout as e:
                    logger.error("Request timed out")
                    self.content = ''
                    return self
                self.downloaded_size += len(chunk)
                if self.thread_queue:
                    self.thread_queue.put(
                        (chunk, self.downloaded_size, total_size))
                else:
                    chunks.append(chunk)
                if not chunk:
                    break
            request.close()
            self.content = b''.join(chunks)
        return self
Exemple #38
0
    def get(self, data=None):
        req = urllib.request.Request(url=self.url, data=data, headers=self.headers)
        try:
            request = urllib.request.urlopen(req, timeout=self.timeout)
        except (urllib.error.HTTPError, CertificateError) as e:
            logger.error("Unavailable url (%s): %s", self.url, e)
        except (socket.timeout, urllib.error.URLError) as e:
            logger.error("Unable to connect to server (%s): %s", self.url, e)
        else:
            try:
                total_size = request.info().get('Content-Length').strip()
                total_size = int(total_size)
            except AttributeError:
                total_size = 0

            chunks = []
            while 1:
                if self.stop_request and self.stop_request.is_set():
                    self.content = ''
                    return self
                try:
                    chunk = request.read(self.buffer_size)
                except socket.timeout as e:
                    logger.error("Request timed out")
                    self.content = ''
                    return self
                self.downloaded_size += len(chunk)
                if self.thread_queue:
                    self.thread_queue.put(
                        (chunk, self.downloaded_size, total_size)
                    )
                else:
                    chunks.append(chunk)
                if not chunk:
                    break
            request.close()
            self.content = b''.join(chunks)
        return self
Exemple #39
0
    def get_content_by_url(self, url: str) -> str:
        """Вернет распарсенный контент извлеченный из указанного урла.

        Подбирает подходящий шаблон обработки страницы и передает его в парсер, который извлечет нужную информацию и вернет ее в метод.
        Метод используется в основном для получения извлеченной информации из страницы и передачи ее в другие методы,
        которые будут осуществлять сохранение этого контента.

        :param url: Адрес страницы которую требуется распарсить и сохранить из нее контент.
        :return: Готовый к сохранению контент страницы.
        """
        hostname = urllib.parse.urlparse(url).hostname
        request = urllib.request.urlopen(url)
        # Попытка чтения страницы.
        try:
            # Приведение оригинальной кодировки страницы к правильному строковому виду.
            html = request.read().decode(request.info().get_charsets()[-1])
        except Exception:
            # В случае возникновения проблем с получением страницы или с преобразованием кодировки просто прервем работу.
            return ''

        extractor = ExtractorContent(**self.get_template_by_hostname(hostname))
        extractor.feed(html)
        extractor.close()
        return extractor.get_text_content()
Exemple #40
0
def find_recent_videos():
    global last_update

    now = last_update.strftime("%Y-%m-%dT%H:%M:%S")
    last_update = datetime.datetime.now(datetime.timezone.utc)
    GET_QUERY = (
        "https://www.googleapis.com/youtube/v3/search?publishedAfter="
        + str(now)
        + "Z&order=relevance&part=snippet&maxResults=50&key="
        + API_KEY
    )

    request = urllib.request.urlopen(GET_QUERY)
    videos = json.loads(request.read().decode(request.info().get_param("charset") or "utf-8"))
    recent_videos = []
    for video in videos["items"]:
        try:
            title = video["snippet"]["title"]
            published_at = parse_youtube_time(video["snippet"]["publishedAt"])
            video_id = video["id"]["videoId"]
            recent_videos.append(Video(title, published_at, video_id))
        except:
            continue
    return recent_videos
Exemple #41
0
    'download_link': {'$exists': True},
    'file_name': {'$exists': False},
    'error': {'$exists': False},
    'size':{'$gt': 30}
}, no_cursor_timeout=True)

counter = 0
for doc in docs_arr:
    counter += 1
    docId = doc['_id']
    doc.pop("_id", None)
    download_link = HOST + doc['download_link']
    print(str(counter) + '. ' + download_link)
    try:
        with urllib.request.urlopen(download_link) as request:
            request_filename_re = re.search('filename="([^"]*)"', str(request.info()))
            if request_filename_re is not None:
                request_filename = request_filename_re.group(1)
                filename, file_extension = os.path.splitext(request_filename)
            else:
                file_extension = re.search('application/(.*)', str(request.info())).group(1)
            md5 = hashlib.md5()
            md5.update(download_link.encode())
            local_file_name = md5.hexdigest() + file_extension
            with open(FILES_DIR + local_file_name, 'w+b') as local_file:
                local_file.write(request.read())
                file_size = os.path.getsize(FILES_DIR + local_file_name)
                doc['file_real_size'] = file_size
                doc['file_name'] = local_file_name
                doc['file_extension'] = file_extension
                documents.update({'_id': docId}, doc)