コード例 #1
0
    def __init__(self, config=None):
        """Creates new Sklik API client instance.

        :param config: Sklik API client configuration instance
        """

        self.__session = None

        if not config:
            raise SklikApiError("No config given")

        self.__proxy = _create_server_proxy(config.namespace,
                                            verbose=config.debug,
                                            allow_none=True)

        res = self.__proxy.client.login(config.username, config.password)

        if res["status"] == 400:
            raise ArgumentError(res["statusMessage"], res["errors"])
        elif res["status"] == 401:
            raise AuthenticationError(res["statusMessage"])
        elif res["status"] != 200:
            raise SklikApiError(res["statusMessage"])

        self.__session = res["session"]
コード例 #2
0
    def __init__(self, config=None):
        """Creates new Sklik API client instance

        Keyword arguments:

            config: sklik API client configuration instance
        """

        self.__session = None

        if not config:
            raise SklikApiError("No config given")
        #endif
        self.__config = config

        self.__proxy = ServerProxy(self.__config.namespace, allow_none=True)

        res = self.__proxy.client.login(self.__config.username,
                                        self.__config.password)

        if res["status"] == 400:
            raise ArgumentError(res["statusMessage"], res["errors"])
        elif res["status"] == 401:
            raise AuthenticationError(res["statusMessage"])
        elif res["status"] != 200:
            raise SklikApiError(res["statusMessage"])
        #endif

        self.__session = res["session"]
コード例 #3
0
    def __init__(self, url, parent=None):
        """Instantiates a Link object

        Args:
            url (str): The URL string either relative or absolute
            parent (Link, optional): If this is None, then the current Link
                will serve as the root, else a parent will be assigned
                to the current Link instance

        Raises:
            ArgumentError: If the arguments are faulty
        """
        # validate args
        if parent:
            if not isinstance(parent, Link):
                raise ArgumentError('Link class should be instantiated with ' \
                    'a valid parent Link object. Got this: %s' %parent)
            url = URLUtils.absolute(parent.url, url)

        # link info
        self.parent = parent
        self.url = URLUtils.normalize(url)
        self.domain = URLUtils.get_domain(url)
        self.is_valid = URLUtils.is_valid(self.url)
        self.id = URLUtils.hash(self.url)
        self.level = 0 if parent is None else parent.level + 1
コード例 #4
0
 def add_hand(self, hand: Hand) -> None:
     """Add a Hand to the player."""
     if self.hand1 and self.hand2:
         raise ArgumentError(
             "Trying to add a hand to player who already has 2 hands. A player can't have a third hand."
         )
     elif not self.hand1:
         self.hand1 = hand
     else:
         self.hand2 = hand
コード例 #5
0
    def __del__(self):
        """Logs out."""

        if self.__session == None:
            return

        res = self.__proxy.client.logout(self.__session)

        if res["status"] == 400:
            raise ArgumentError(res["statusMessage"], res["errors"])
        elif res["status"] == 401:
            raise AuthenticationError(res["statusMessage"])
        elif res["status"] != 200:
            raise SklikApiError(res["statusMessage"])
コード例 #6
0
    def _get_nodes_and_marks(self, string):
        for possible_edge in self.POSSIBLE_EDGES:
            nodes_and_edge = string.split(possible_edge)

            if len(nodes_and_edge) < 2:
                continue

            node_1 = nodes_and_edge[0].strip(' ')
            node_2 = nodes_and_edge[1].strip(' ')

            return node_1, possible_edge, node_2

        raise ArgumentError( \
            "{} has an unrecognized edge. Possible edges: {}"\
            .format(string, self.POSSIBLE_EDGES)
        )
コード例 #7
0
    def set_out_of(self, node):
        """
            Add a tail next to the given node.

            Parameters:
                node: str

            Raises:
                ArgumentError if node not found.
        """
        if node not in (self.node_1, self.node_2):
            raise ArgumentError('Node {} not found'.format(node))

        if node == self.node_1:
            self.node_1_mark = PartialAncestralGraph.TAIL
        else:
            self.node_2_mark = PartialAncestralGraph.TAIL
コード例 #8
0
    def set_into(self, node):
        """
            Add the appropriate arrowhead for the given node.

            Parameters:
                node: str

            Raises:
                ArgumentError if node not found.
        """
        if node not in (self.node_1, self.node_2):
            raise ArgumentError('Node {} not found'.format(node))

        if node == self.node_1:
            self.node_1_mark = PartialAncestralGraph.LEFT_ARROWHEAD
        else:
            self.node_2_mark = PartialAncestralGraph.RIGHT_ARROWHEAD
コード例 #9
0
    def out_of(self, node):
        """
            If there is a tail next to a node, then return True, and
            False otherwise.

            Parameters:
                node: str
            Returns: bool

            Raises:
                ArgumentError if node not found.
        """
        if node not in (self.node_1, self.node_2):
            raise ArgumentError('Node {} not found'.format(node))

        if node == self.node_1 and self.node_1_mark == PartialAncestralGraph.TAIL:
            return True
        if node == self.node_2 and self.node_2_mark == PartialAncestralGraph.TAIL:
            return True
        return False
コード例 #10
0
    def has_word(self, word):
        """Checks the current HTML page for the presence of a given word

        The search is performed on the text content of the current page
        (excluding HTML tags, script sections, style sheet sections and
        invalid tags)

        Args:
            word (str): A non empty stop word

        Returns:
            bool: True, if the given word is present in the page; else False

        Raises:
            ArgumentError: If the arguments are faulty
        """
        # validate args
        if not word:
            raise ArgumentError('Param "word" cannot be empty')
        return word in self.text_content
コード例 #11
0
    def into(self, node):
        """
            If there is an arrowhead next to a node, then return True,
            and False otherwise.

            Parameters:
                node: str
            Returns: bool

            Raises:
                ArgumentError if node not found.
        """
        if node not in (self.node_1, self.node_2):
            raise ArgumentError('Node {} not found'.format(node))

        if node == self.node_1 \
            and self.node_1_mark == PartialAncestralGraph.LEFT_ARROWHEAD:
            return True

        if node == self.node_2 \
            and self.node_2_mark == PartialAncestralGraph.RIGHT_ARROWHEAD:
            return True
        return False
コード例 #12
0
def validate_config(config):
    metadata = config.yaml["metadata"]

    if not re.match("^[-A-Za-z0-9]{1,63}$", metadata["name"]):
        raise Exception(
            "Name must be at least 1 character in length letters and numbers")

    if len(metadata["regions"]) != 2:
        raise ArgumentError("Please specify exactly 2 regions")

    ec2 = boto3.client("ec2", region_name=config.bastion.region)
    ec2_regions = ec2.describe_regions()["Regions"]
    region_names = [val["RegionName"] for val in ec2_regions]

    for region in metadata["regions"]:
        # Checking region validity first for faster response
        if not region in region_names:
            raise Exception(f"Region \"{region}\" is not valid")

        try:
            eks = boto3.client("eks", region_name=region)
            eks.list_clusters()
        except Exception as ex:
            raise Exception(f"Can't access EKS service in \"{region}\"")
コード例 #13
0
    def __init__(self, link):
        """Instantiates a HTMLPage object

        Args:
            link (Link): A valid Link object

        Raises:
            ArgumentError: If the arguments are faulty
        """
        # validate args
        if not isinstance(link, Link) or not link.is_valid:
            raise ArgumentError('HTMLPage class should be instantiated with ' \
                'a valid Link object. Got this: %s' %link)
        self.link = link

        # fetch the actual webpage
        response = requests.get(link.url)
        self.status_code = response.status_code
        self.html_content = response.text.encode('utf8')
        self.text_content = HTMLUtils.html_to_text(self.html_content)
        self.encoding = response.encoding

        # fetch all child links
        self.child_links = self._get_all_links()
コード例 #14
0
    def search(self,
               search_code,
               search_type,
               start_url,
               max_level,
               stop_words=[],
               allowed_domains=[],
               persist=True):
        """Starts from the given URL, performs a Breadth First Search (BFS)
        or a Depth First Search (DFS) and continues indefinitely until
        a maximum level is reached or until one of the given stop words are
        encountered

        Args:
            search_code (str): A unique sting identifying the
                current search request
            search_type (str): Allowed values are "BFS" or "DFS"
            start_url (str): Search starting point
            max_level (int): The maximum level until which the search should
                be performed
            stop_words (list(str), optional): The list of words which is to be
                checked for to halt the current search
            allowed_domains (list(str), optional): The list of domains to
                restrict the URLs to, while performing the search
            persist (bool, optional): If True, the search results will be
                persisted in the database

        Returns:
            set(str): A list of unique Link objects encountered while performing
                the breadth first search

        Raises:
            ArgumentError: If arguments are faulty
            PersistenceExecuteError: For DB Query Execution issues
            PersistenceError: For any other DB related issue
        """
        # validate args
        if not isinstance(search_type, str) or \
        search_type.upper() not in ['BFS', 'DFS']:
            raise ArgumentError('Param "search_type" must be either of ' \
                '"bfs" or "dfs". Got: %s' %search_type)
        if not isinstance(start_url, str):
            raise ArgumentError('Param "start_url" must be a string. ' \
                'Got: %s' %start_link.url)
        start_link = Link(start_url)
        if not start_link.is_valid:
            raise ArgumentError('Param "start_url" must be a valid URL. ' \
                'Got: %s' %start_link.url)
        if not isinstance(max_level, int):
            raise ArgumentError('Param "max_level" must be an integer. ' \
                'Got: %s' %max_level)

        # add the current domain to the list of allowed domains
        if not start_link.domain in allowed_domains:
            allowed_domains.append(start_link.domain)

        if persist:
            search_info = {
                'search_code': search_code,
                'search_type': search_type,
                'start_url': start_url,
                'max_level': max_level,
                'crawled_date_time': datetime.today()
            }

            # save job details to DB
            try:
                self.db.insert('CRAWL.INFO', search_info)
                self.db.commit()
            except PersistenceExecuteError as err:
                self.db.rollback()
                logging.error('Insertion to CRAWL.INFO table failed for user: %s' \
                    %self.user_id)
                raise err

        # initiate the data structures
        visited = set()
        queue = [start_link]
        stop_word_hit = False

        # perform search
        while queue and not stop_word_hit:
            # depending on whether the search type is BFS/DFS
            # retrieve from top or bottom of the queue
            link = queue.pop(0) if search_type == 'BFS' else queue.pop()

            if link.is_valid and link not in visited:
                # visit the current link
                visited.add(link)

                if persist:
                    # save the link to DB
                    try:
                        link_data = {
                            'search_code': search_code,
                            'id': link.id,
                            'url': link.url,
                            'level': link.level,
                            'parent_id': link.parent and link.parent.id
                        }
                        self.db.insert('CRAWL.DATA', link_data)
                    except PersistenceExecuteError as err:
                        logging.error('Insertion to CRAWL.DATA table failed for user: %s' \
                            %self.user_id)
                        raise err

                # parse current page and get all child links
                page = HTMLPage(link)
                if page.status_code == 200 and link.level < max_level:
                    queue.extend(page.get_links(allowed_domains))
                logging.info(page)

                # check if the current page has one of the given stop words
                for word in stop_words:
                    stop_word_hit = stop_word_hit or page.has_word(word)

        self.db.commit()
        return visited