Beispiel #1
0
 def register(self, identity: Identity, public_key: PKey) -> None:
     logger = self.logger.getChild('register')
     title = get_key_fingerprint(public_key)
     data = json.dumps({
         'title': title,
         'key': format_openssh_pubkey(public_key)
     })
     try:
         request(identity, self.list_url, 'POST', data=data.encode())
     except urllib.error.HTTPError as e:
         if e.code != 422:
             raise
         content_type = e.headers.get('Content-Type')
         mimetype, options = parse_options_header(content_type)
         if mimetype != 'application/json':
             raise
         charset = options.get('charset', 'utf-8')
         content_body = e.read().decode(charset)
         logger.debug('response body:\n%s', content_body)
         response = json.loads(content_body)
         for error in response.get('errors', []):
             if not isinstance(error, dict):
                 continue
             elif error.get('field') != 'key':
                 continue
             message = error.get('message', '').strip().lower()
             if message != 'key is already in use':
                 continue
             raise DuplicatePublicKeyError(message)
         raise
Beispiel #2
0
 def assertErrorsContain(self, body, key, value=None):  # noqa
     for error in body['errors']:
         if error.get('name') == key:
             if value is not None:
                 self.assertEqual(error.get('description'), value)
             return
     self.fail(str(body) + " does not contain " + key)
Beispiel #3
0
 def assertErrorsContain(self, body, key, value=None):  # noqa
     for error in body['errors']:
         if error.get('name') == key:
             if value is not None:
                 self.assertEqual(error.get('description'), value)
             return
     self.fail(str(body) + " does not contain " + key)
Beispiel #4
0
 def register(self, identity: Identity, public_key: PKey) -> None:
     logger = self.logger.getChild('register')
     title = get_key_fingerprint(public_key)
     data = json.dumps({
         'title': title,
         'key': format_openssh_pubkey(public_key)
     })
     try:
         request(identity, self.list_url, 'POST', data=data.encode())
     except urllib.error.HTTPError as e:
         if e.code != 422:
             raise
         content_type = e.headers.get('Content-Type')
         mimetype, options = parse_options_header(content_type)
         if mimetype != 'application/json':
             raise
         charset = options.get('charset', 'utf-8')
         content_body = e.read().decode(charset)
         logger.debug('response body:\n%s', content_body)
         response = json.loads(content_body)
         for error in response.get('errors', []):
             if not isinstance(error, dict):
                 continue
             elif error.get('field') != 'key':
                 continue
             message = error.get('message', '').strip().lower()
             if message != 'key is already in use':
                 continue
             raise DuplicatePublicKeyError(message)
         raise
    def _request_credentials(self, authorization_key):
        headers = {
            'Content-Type': 'application/x-www-form-urlencoded'
        }

        body = {
            'code': authorization_key,
            'client_id': self.CLIENT_ID,
            'client_secret': self.CLIENT_NOT_SO_SECRET,
            'grant_type': 'authorization_code'
        }
        body = urllib.parse.urlencode(body)

        request = urllib.request.Request(self.TOKEN_URL, data=body, headers=headers)

        try:
            response = utils.urlopen(request)

        except urllib.error.HTTPError as e:
            error = json.load(e)
            raise Exception(error.get('error_description') or error.get('error') or str(e))

        data = json.load(response)

        return {
            'access_token': data['access_token']
        }
Beispiel #6
0
def _show_json_error(reply):
    try:
        error = json.loads(reply)
    except ValueError:
        print("error: {}".format(reply))
        return
    sys.stderr.write("error: lnt server: {}\n".format(error.get('error')))
    message = error.get('message', '')
    if message:
        sys.stderr.write(message + '\n')
    def post(self, uri, data=None, files=None, verify=False):
        """
            Sends a POST request.

            @param uri: Uri of Service API.
            @param data: Requesting Data. Default: None

            @raise NetworkAPIClientError: Client failed to access the API.
        """
        try:

            request = requests.post(
                self._url(uri),
                data=json.dumps(data),
                files=files,
                auth=self._auth_basic(),
                headers=self._header(),
                verify=verify
            )

            request.raise_for_status()

            return self._parse(request.text)

        except HTTPError:
            error = self._parse(request.text)
            self.logger.error(error)
            raise NetworkAPIClientError(error.get('detail', ''))
        finally:
            self.logger.info('URI: %s', uri)
            self.logger.info('Status Code: %s', request.status_code)
            self.logger.info('X-Request-Id: %s',
                             request.headers.get('x-request-id'))
            self.logger.info('X-Request-Context: %s',
                             request.headers.get('x-request-context'))
Beispiel #8
0
 def get_embedded_errors(self):
     if self.error_body is not None:
         try:
             errors = json.loads(self.error_body.decode())["errors"]
         except (UnicodeError, json.JSONDecodeError, KeyError):
             pass
         else:
             for error in errors:
                 if isinstance(error, str):
                     yield self.ServerError(error)
                 elif isinstance(error, dict):
                     yield self.ServerError(
                         error["message"],
                         error.get("mapping", None),
                         error.get("code", None),
                     )
Beispiel #9
0
def is_error_response(url, response):
    is_error = False
    try:
        data = json.loads(response)
        error = data.get('error')
        if error:
            message = error.get('message')
            is_error = message if message else 'error'
    except Exception:
        pass
    return is_error
Beispiel #10
0
 def __init__(self, e: urllib.error.HTTPError = None, message: str = None):
     super()
     if message is not None:
         self.state = False
         self.hdrs = None
         self.fp = None
         self.filename = None
         self.info = {}
         self.msg = message
         self.code = message
         return
     body = e.read().decode("utf-8")
     try:
         body = json.loads(body)
         error = body["error"]
         self.info = error.get("errors", {})
         self.msg = error.get("message", "")
         self.code = error.get("code", "")
         self.state = error.get("status", "")
     except json.decoder.JSONDecodeError:
         pass
Beispiel #11
0
    def __init__(self, http_error):
        urllib.error.HTTPError.__init__(self, http_error.filename, http_error.code, http_error.msg, http_error.hdrs, http_error.fp)

        try:
            data = self.read()
        
            j = json.loads(data)
            error = j['error']
            self.statuscode = error['code']
            self.statusdesc = ' | '.join(error.get('errors', []))
            self.errormessage = error['message']
        except:
            pass
Beispiel #12
0
 def _parse_error(self, xml_content):
     """
     Take the XML content as string and extracts the PrestaShop error
     @param xml_content: xml content returned by the PS server as string
     @return prestashop_error_message
     """
     answer    = self._parse(xml_content)
     error_msg = ''
     if isinstance(answer, dict):
         errors = answer.get('prestashop', {}).get('errors', {}).get('error', {})
         if isinstance(errors, list):
             for error in errors:
                 error_msg += '%s ' % (error.get('message'))
         elif isinstance(errors, dict):
             error_msg = errors.get('message')
     return error_msg
Beispiel #13
0
  def handle_api_error(self, rbody, rcode, resp):
    try:
      error = resp['error']
    except (KeyError, TypeError):
      raise APIError("Invalid response object from API: %r (HTTP response code was %d)" % (rbody, rcode), rbody, rcode, resp)

    if rcode in [400, 404]:
      raise InvalidRequestError(error.get('message'), error.get('param'), rbody, rcode, resp)
    elif rcode == 401:
      raise AuthenticationError(error.get('message'), rbody, rcode, resp)
    elif rcode == 402:
      raise CardError(error.get('message'), error.get('param'), error.get('code'), rbody, rcode, resp)
    else:
      raise APIError(error.get('message'), rbody, rcode, resp)
Beispiel #14
0
    def handle_api_error(self, rbody, rcode, resp):
        try:
            error = resp['error']
        except (KeyError, TypeError):
            raise APIError(
                "Invalid response object from API: %r (HTTP response code was %d)"
                % (rbody, rcode), rbody, rcode, resp)

        if rcode in [400, 404]:
            raise InvalidRequestError(error.get('message'), error.get('param'),
                                      rbody, rcode, resp)
        elif rcode == 401:
            raise AuthenticationError(error.get('message'), rbody, rcode, resp)
        elif rcode == 402:
            raise CardError(error.get('message'), error.get('param'),
                            error.get('code'), rbody, rcode, resp)
        else:
            raise APIError(error.get('message'), rbody, rcode, resp)
Beispiel #15
0
def _small_indexing(user, fs, client, source, destination, index_name):
    kwargs = {}
    errors = []

    if source['inputFormat'] not in ('manual', 'table', 'query_handle'):
        path = urllib_unquote(source["path"])
        stats = fs.stats(path)
        if stats.size > MAX_UPLOAD_SIZE:
            raise PopupException(_('File size is too large to handle!'))

    indexer = MorphlineIndexer(user, fs)

    fields = indexer.get_field_list(destination['columns'])
    _create_solr_collection(user, fs, client, destination, index_name, kwargs)

    if source['inputFormat'] == 'file':
        kwargs['separator'] = source['format']['fieldSeparator']
        path = urllib_unquote(source["path"])
        data = fs.read(path, 0, MAX_UPLOAD_SIZE)

    if client.is_solr_six_or_more():
        kwargs['processor'] = 'tolerant'
        kwargs['map'] = 'NULL:'

    try:
        if source['inputFormat'] == 'query':
            query_id = source['query']['id'] if source['query'].get(
                'id') else source['query']

            notebook = Notebook(document=Document2.objects.document(
                user=user, doc_id=query_id)).get_data()
            request = MockedDjangoRequest(user=user)
            snippet = notebook['snippets'][0]

            searcher = CollectionManagerController(user)
            columns = [
                field['name'] for field in fields if field['name'] != 'hue_id'
            ]
            # Assumes handle still live
            fetch_handle = lambda rows, start_over: get_api(
                request, snippet).fetch_result(
                    notebook, snippet, rows=rows, start_over=start_over)
            rows = searcher.update_data_from_hive(index_name,
                                                  columns,
                                                  fetch_handle=fetch_handle,
                                                  indexing_options=kwargs)
            # TODO if rows == MAX_ROWS truncation warning
        elif source['inputFormat'] == 'manual':
            pass  # No need to do anything
        else:
            response = client.index(name=index_name, data=data, **kwargs)
            errors = [
                error.get('message', '')
                for error in response['responseHeader'].get('errors', [])
            ]
    except Exception as e:
        try:
            client.delete_index(index_name, keep_config=False)
        except Exception as e2:
            LOG.warning(
                'Error while cleaning-up config of failed collection creation %s: %s'
                % (index_name, e2))
        raise e

    return {
        'status': 0,
        'on_success_url': reverse('indexer:indexes',
                                  kwargs={'index': index_name}),
        'pub_sub_url': 'assist.collections.refresh',
        'errors': errors
    }
Beispiel #16
0
 def get_error(self, errors, name):  # noqa
     for error in errors:
         if name == error.get('name'):
             return error
     self.fail('no error for {0}'.format(name))
Beispiel #17
0
 def assertError(self, errors, name, description):  # noqa
     for error in errors:
         if description == error.get('description') and \
                 name == error.get('name'):
             return
     self.fail('no error ({}, {}) in {}'.format(name, description, errors))
Beispiel #18
0
 def assertCorniceRequired(self, error, key):  # noqa
     self.assertEqual(error.get('description'), 'Required')
     self.assertEqual(error.get('name'), key)
Beispiel #19
0
 def assertCorniceMissing(self, error, key):  # noqa
     self.assertEqual(error.get('description'), key + ' is missing')
     self.assertEqual(error.get('name'), key)
Beispiel #20
0
 def assertCorniceMissing(self, error, key):  # noqa
     self.assertEqual(error.get('description'), key + ' is missing')
     self.assertEqual(error.get('name'), key)
Beispiel #21
0
 def assertCorniceRequired(self, error, key):  # noqa
     self.assertEqual(error.get('description'), 'Required')
     self.assertEqual(error.get('name'), key)
Beispiel #22
0
 def assertError(self, errors, name, description):  # noqa
     for error in errors:
         if description == error.get('description') and \
                 name == error.get('name'):
             return
     self.fail('no error ({}, {}) in {}'.format(name, description, errors))
Beispiel #23
0
def crawl(crawler_id):
    while True:
        crawler = Crawler.objects.filter(pk=crawler_id).first()
        if crawler is not None:
            break
        time.sleep(0.1)
    crawler.status = 'Crawling'
    crawler.started = datetime.datetime.now()
    crawler.progress = 0
    crawler.save()

    crawling_thread_num = crawler.thread
    relating_thread_num = crawler.thread
    whole_task = 10 / crawling_thread_num + 1 / relating_thread_num
    crawling_ratio = 10 / crawling_thread_num / whole_task
    relating_ratio = 1 / relating_thread_num / whole_task

    options = webdriver.ChromeOptions()
    options.add_argument('headless')
    options.add_argument('window-size=1920x1080')
    options.add_argument('disable-gpu')
    options.add_argument('User-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KTHML, like Gecko) Chrome/61.0.3163.100 Safari/537.36')

    urls = (
        'https://www.melon.com/genre/song_list.htm?gnrCode=GN0900&steadyYn=Y',
        'https://www.melon.com/genre/song_list.htm?gnrCode=GN1000&steadyYn=Y',
        'https://www.melon.com/genre/song_list.htm?gnrCode=GN1100&steadyYn=Y',
        'https://www.melon.com/genre/song_list.htm?gnrCode=GN1200&steadyYn=Y',
        'https://www.melon.com/genre/song_list.htm?gnrCode=GN1300&steadyYn=Y',
        'https://www.melon.com/genre/song_list.htm?gnrCode=GN1400&steadyYn=Y',
    )

    crawled_ids = {'music': set(), 'album': set(), 'artist': set()}
    relations = {'music': {}, 'album': {}, 'artist': {}}
    fully_crawled_artist_ids = set()
    queue = Queue()
    error = Queue()
    threading_lock = threading.Lock()
    max_depth = crawler.level

    def update_crawler():
        crawler.detail = f'music {len(crawled_ids["music"])}, album {len(crawled_ids["album"])}, artist {len(crawled_ids["artist"])}'
        crawler.elapsed = time.time() - crawler.started.timestamp()
        crawler.save()

    def is_crawled(crawl_type, crawl_id, simple):
        if 'artist' == crawl_type:
            with threading_lock:
                if crawl_id in fully_crawled_artist_ids:
                    return True
                if simple and crawl_id in crawled_ids[crawl_type]:
                        return True
                crawled_ids[crawl_type].add(crawl_id)
                if not simple:
                    fully_crawled_artist_ids.add(crawl_id)
            return False
        with threading_lock:
            if crawl_id in crawled_ids[crawl_type]:
                return True
            crawled_ids[crawl_type].add(crawl_id)
        return False

    def emtpy_kwarg_list(id_list):
        return [(i, {}) for i in id_list]

    def worker(worker_id):
        print(f'Worker {worker_id} Spawned')
        driver = webdriver.Chrome(driver_path, chrome_options=options)

        def close_driver():
            try:
                driver.close()
            except Exception as e:
                print(e)
                pass
        while True:
            crawl_type, crawl_id, depth, kwargs = queue.get()
            if crawl_type is None:
                break
            try:
                # Run Work Items
                to_do_ids = {'music': set(), 'album': set(), 'artist': set()}
                if crawl_type == 'music':
                    music_id, album_id, artist_ids = crawl_music(worker_id, crawl_id, **kwargs)
                    relations['music'][music_id] = (album_id, artist_ids)
                    to_do_ids['album'] = [(album_id, {})]
                    to_do_ids['artist'] = emtpy_kwarg_list(artist_ids)
                elif crawl_type == 'album':
                    album_id, artist_ids, music_ids, music_ratings, music_artist_ids = crawl_album(worker_id, crawl_id)
                    if artist_ids:
                        relations['album'][album_id] = artist_ids
                    to_do_ids['artist'] = emtpy_kwarg_list(artist_ids)
                    to_do_ids['music'] = [(music_id, {'album_id': album_id, 'rating': music_ratings[music_id], 'artist_ids': music_artist_ids[music_id]}) for music_id in music_ids]
                elif crawl_type == 'artist':
                    artist_id, album_ids, member_ids = crawl_artist(worker_id, crawl_id, driver, depth > max_depth)
                    if member_ids:
                        relations['artist'][artist_id] = member_ids
                    to_do_ids['album'] = emtpy_kwarg_list(album_ids)
                    to_do_ids['artist'] = emtpy_kwarg_list(member_ids)
                else:
                    raise Exception(f"Illegal argument crawl_type: {crawl_type}")

                # Add Work Items
                for crawl_type in ('music', 'album', 'artist'):
                    for crawl_id, kwargs in to_do_ids[crawl_type]:
                        if not is_crawled(crawl_type, crawl_id, depth + 1 > max_depth):
                            queue.put((crawl_type, crawl_id, depth + 1, kwargs))
            except:
                # Alert to Main Thread That An Exception Has Occurred
                error.put(f'{traceback.format_exc()}\n{(crawl_type, crawl_id, depth, kwargs)} on Worker {worker_id}')
                break
            finally:
                queue.task_done()
        close_driver()
        print(f'Worker {worker_id} Buried...')

    # Spawn Worker Threads
    workers = []
    for i in range(crawling_thread_num):
        t = threading.Thread(target=worker, args=(i + 1,))
        workers.append(t)
        t.daemon = True
        t.start()

    def join():
        with queue.mutex:
            queue.queue.clear()
        for _ in range(len(workers)):
            queue.put((None, None, None, None))
        for th in workers:
            th.join()

    # Gather Initial Artist Ids
    artist_ids = set()
    for url in urls:
        tree = get_tree(url)

        artists = tree.xpath('/html/body/div/div[3]/div/div/div[7]/form/div/table/tbody/tr[*]/td[5]/div/div/div[2]/a')
        for artist in artists:
            artist_ids.add(get_id_lxml(artist))

    # Put Initial Work Items
    last_time = time.time()
    for i, artist_id in enumerate(artist_ids):
        crawler.refresh_from_db()
        # Cancel
        if crawler.cancel:
            crawler.status = 'Canceled'
            crawler.remain = None
            update_crawler()
            join()
            print('Crawling Canceled')
            return

        # Update Progress
        progress = crawling_ratio * i / len(artist_ids)
        crawler.status = 'Crawling'
        crawler.progress = 100 * progress
        current_time = time.time()
        if progress != 0:
            crawler.remain = (current_time - last_time) / progress * (1 - progress)
        update_crawler()

        # Put Work Item
        if not is_crawled('artist', artist_id, False):
            queue.put(('artist', artist_id, 0, {}))

            # Wait While Observing Errors
            while queue.unfinished_tasks:
                if error.unfinished_tasks:
                    crawler.status = 'Error Occurred'
                    error_message = error.get()
                    print(error_message)
                    crawler.error = error_message
                    crawler.remain = None
                    update_crawler()
                    join()
                    print('Crawling Error Occurred')
                    return
                time.sleep(1)

    # Crawling Finish
    crawler.status = 'Relation Constructing'
    crawler.progress = 50
    crawler.remain = None
    update_crawler()
    join()
    print('Crawling Finished')

    queue = Queue()
    error = Queue()

    def worker(worker_id):
        print(f'Worker {worker_id} Spawned')

        while True:
            chunk = queue.get()
            if chunk is None:
                break
            print(f'{chunk[0][0]} {chunk[0][1]}...{len(chunk)}')
            for model_type, model_id, arg1, arg2 in chunk:
                try:
                    if model_type == 'music':
                        music = Music.objects.get(original_id=model_id)
                        music.album = Album.objects.get(original_id=arg1)
                        for artist_id in arg2:
                            music.artists.add(Artist.objects.get(original_id=artist_id))
                        music.save()
                    elif model_type == 'album':
                        album = Album.objects.get(original_id=model_id)
                        for artist_id in arg1:
                            album.artists.add(Artist.objects.get(original_id=artist_id))
                        album.save()
                    elif model_type == 'artist':
                        artist = GroupArtist.objects.get(original_id=model_id)
                        for member_id in arg1:
                            artist.members.add(Artist.objects.get(original_id=member_id))
                        artist.save()
                    else:
                        raise Exception(f"Illegal argument model_type: {model_type}")
                    time.sleep(0.05)
                except:
                    # Alert to Main Thread That An Exception Has Occurred
                    error.put(f'{traceback.format_exc()}\n{(model_type, model_id, arg1, arg2)} on Worker {worker_id}')
                    break
            queue.task_done()
        print(f'Worker {worker_id} Buried...')

    # Spawn Worker Threads
    workers = []
    for i in range(relating_thread_num):
        t = threading.Thread(target=worker, args=(i + 1,))
        workers.append(t)
        t.daemon = True
        t.start()

    def join():
        with queue.mutex:
            queue.queue.clear()
        for _ in range(len(workers)):
            queue.put(None)
        for th in workers:
            th.join()

    # Make Work Items
    chunk_size = 10
    items = []
    music_list = list(relations['music'].items())
    for i in range(0, len(music_list), chunk_size):
        music_chunk = music_list[i:i+chunk_size]
        items.append([('music', music_id, album_id, artist_ids) for music_id, (album_id, artist_ids) in music_chunk])
    album_list = list(relations['album'].items())
    for i in range(0, len(album_list), chunk_size):
        album_chunk = album_list[i:i+chunk_size]
        items.append([('album', album_id, artist_ids, None) for album_id, artist_ids in album_chunk])
    artist_list = list(relations['artist'].items())
    for i in range(0, len(artist_list), chunk_size):
        artist_chunk = artist_list[i:i+chunk_size]
        items.append([('artist', artist_id, member_ids, None) for artist_id, member_ids in artist_chunk])
    shuffle(items)

    def provider():
        for chunk in items:
            queue.put(chunk)

    # Put and Wait
    t = threading.Thread(target=provider)
    t.daemon = True
    t.start()
    total = len(items)
    last_time = time.time()
    while queue.unfinished_tasks:
        crawler.refresh_from_db()
        # Cancel
        if crawler.cancel:
            crawler.status = 'Canceled'
            crawler.remain = None
            update_crawler()
            join()
            print('Crawling Canceled')
            return

        # Update Progress
        progress = crawling_ratio + relating_ratio * (total - queue.unfinished_tasks) / total
        crawler.status = 'Relating'
        crawler.progress = 100 * progress
        current_time = time.time()
        if progress - crawling_ratio != 0:
            crawler.remain = (current_time - last_time) / (progress - crawling_ratio) * (1 - progress)
        update_crawler()

        for _ in range(10):
            if not queue.unfinished_tasks:
                break
            if error.unfinished_tasks:
                crawler.status = 'Error Occurred'
                error_message = error.get()
                print(error_message)
                crawler.error = error_message
                crawler.remain = None
                update_crawler()
                join()
                print('Relating Error Occurred')
                return
            time.sleep(1)

    # Relating Finish
    crawler.status = 'Finished'
    crawler.progress = 100
    crawler.remain = None
    update_crawler()
    join()
    print('Entire Crawling Finished')
Beispiel #24
0
 def assertCorniceNotInEnum(self, error, key):  # noqa
     self.assertIn('is not one of', error.get('description'))
     self.assertEqual(error.get('name'), key)
Beispiel #25
0
 def get_error(self, errors, name):  # noqa
     for error in errors:
         if name == error.get('name'):
             return error
     self.fail('no error for {0}'.format(name))