Exemple #1
0
 def get_async_node_data(self, async_data: IAsyncResult) -> str:
     try:
         data, _ = async_data.get()
     except NoNodeError as err:
         ErrorCodes.make_graceful(err, "No node found")
         sys.exit(ErrorCodes.NO_NODE.value)
     return data.decode("utf-8").replace("\n", "<br>")
Exemple #2
0
def main():
    try:
        host, zk_root, credentials, destination_file = get_args()
        export = ZkTreeExport.new(host, zk_root, credentials, destination_file)
        export.to_json()
    except IndexError as err:
        ErrorCodes.make_graceful(err)
        sys.exit(ErrorCodes.WRONG_NUM_ARGUMENTS.value, "wrong number of arguments")
    except NotImplementedError as err:
        ErrorCodes.make_graceful(err)
        sys.exit(ErrorCodes.NOT_IMPLEMENTED.value, "some function was not implemented")
Exemple #3
0
 def start_kazoo(host: str, credentials: str) -> KazooClient:
     """Starts a connection to the Zookeeper client"""
     zk_client = KazooClient(hosts=host)
     zk_client.add_auth_async("digest", credentials)
     try:
         event = zk_client.start_async()
         event.wait(timeout=10)
         logger.info("Zookeeper connection established")
     except KazooTimeoutError as err:
         ErrorCodes.make_graceful(err, "Zookeeper server timed out")
         sys.exit(ErrorCodes.KAZOO_TIMEOUT.value)
     return zk_client
Exemple #4
0
 def __processBaseURL(self):
     '''
     Private member function used for Processing the base URL.
     
     Process the base URL and extract the links from them and create URLLinks object for each links extracted. 
     Each of these objects are pushed as the child entry of the Main URLLinks object created for the Base URL. 
     
     These child objects are later accessed in a loop and processed further to check for their validity, depending 
     on the depth to which they belong. 
     '''
     ts = time.time()
     handle = self.__getDataFromURL(self.__dict__['_url'])
     ted = time.time()
     dlTime = ted - ts
     if ( self.__checkIfError(handle)):
         if ( handle[0] == 'HTTPError'):
             eCode = ErrorCodes(int(handle[1]))
             einfo = eCode.getError()[1]
         else:
             einfo = handle[1]
         urlObject = URLLinks(self.__dict__['_url'], None, self.__dict__['_url'], None, isProcessed=True, isBroken=True, 
                              size='<Unknown>', dlTime=dlTime, checkTime=dlTime, lastModified='<Unknwon>', info=einfo,status=handle[0] + ' : ' + handle[1], lType='<Unknwon>')
         self.__printError(handle[0] + ' : ' + handle[1] + ' : ' + einfo)
         self.__raiseError(handle, self.__dict__['_url'])
         return urlObject
     else:
         ts = time.time()
         htmlData = urllib2.urlopen(self.__dict__['_url'])
         ted = time.time()
         data = etree.HTML(htmlData.read())
         dlTime  =   ted - ts
         title = self.__getURLTitle(data)
         links = self.__links(data)
         (lTtype, lastChagned, size) = self.__getURLInfo(handle)
         status = 'OK'
         urlObj = URLLinks(self.__dict__['_url'], title, self.__dict__['_url'], title, isProcessed=True, isBroken=False, size=size, dlTime=dlTime, 
                           lastModified=lastChagned, info='Successfully Processed', status=status, lType=lTtype)
         
         for link in links:
             cLink = str(link.attrib['href']).lstrip().rstrip()
             if ( cLink.startswith('#') or cLink.startswith('.') or cLink.startswith('..') or self.__dict__['_url'] not in cLink):
                 cLink = urlparse.urljoin(self.__dict__['_url'], cLink)
             
             if ( self.__dict__['_url'] in cLink):
                 cTitle = link.text
                 temp = URLLinks(self.__dict__['_url'], title, cLink, cTitle)
                 urlObj.addChild(temp)
         te = time.time()
         cTime = te - ts
         urlObj.setCheckTime(cTime)
         Deadcheck.__levelBasedLinks[0] = []
         Deadcheck.__levelBasedLinks[0].append(urlObj)
Exemple #5
0
def get_args():
    # TODO: Implement argument parsing for future options
    if len(sys.argv) != 4:
        print(f"\nUsage: python3 {basename(__file__)} host:port/root /path/to/export")
        raise IndexError("Wrong number of arguments")

    try:
        host, zk_path = parse_zk_string(sys.argv[1])
    except NotADirectoryError as err:
        ErrorCodes.make_graceful(err, "{zk_path} is not a directory")
        sys.exit(ErrorCodes.NOT_A_DIRECTORY.value)

    credentials = sys.argv[2]
    destination = sys.argv[3]
    return (host, zk_path, credentials, destination)
Exemple #6
0
 def __raiseError(self, value, *url):
     '''
     Private member function used for raising different types of Errors that one may encounter 
     during the processign or downloading of the page. 
     
     This value is used for setting the status of the URLLinks objects and the same is used for 
     reporting purpose later on. 
     '''
     if ( value[0] == 'HTTPError'):
         eCode = ErrorCodes(int(value[1]))
         raise urllib2.HTTPError(url[0], int(value[1]), eCode.getError(), None, None)
     elif ( value[0] == 'URLError'):
         raise urllib2.URLError(value[1])
     elif ( value[0] == 'HTTPException'):
         raise httplib.HTTPException(value[1])
     elif ( value[0] == 'Generic Exception'):
         raise Exception(value[0] + ' : ' + value[1])
Exemple #7
0
    def new(cls, host: str, zk_root: str, credentials: str,
            destination: str) -> "ZkTreeExport":
        """Initializes a ZkTreeObject, performing various tests"""
        instance = cls(zk_root, destination)
        instance.zk_client = ZkTreeExport.start_kazoo(host, credentials)
        try:
            ZkTreeExport.test_write_permission(destination)
            logger.debug("Write permission successful.")
        except IsADirectoryError as err:
            ErrorCodes.make_graceful(err, "{destination} is a directory")
            sys.exit(ErrorCodes.IS_A_DIRECTORY.value)
        except PermissionError as err:
            ErrorCodes.make_graceful(err,
                                     "no write permission in {destination}")
            sys.exit(ErrorCodes.NO_WRITE_PERMISSION.value)

        return instance
def AutoLogin():
    # Decode the credentials
    credentials = base64.b64decode(
        b'YWJoaXNoZWtwYWh1amFAaG90bWFpbC5jb20=').decode(
            'utf-8'), base64.b64decode(b'UElSQVRFU29mVEhFY2FyaWJiZWFu').decode(
                'utf-8')
    # Create session
    session = requests.Session()
    # Authenticate test user
    session.auth = credentials
    zendesk = 'https://thedottedline.zendesk.com/api/v2/tickets.json?page='
    url = zendesk + '1'
    response = session.get(url)
    if response.status_code != 200:
        # Call the error code method from errorCode.py
        ErrorCodes(response.status_code)
        # Sleep for 5 seconds before clearing screen
        sleep(5)
        clear()
    else:
        clear()
        # Check the number of pages
        DATA = response.json()
        total_records = DATA['count']
        # Calculate the number of pages
        Total_Pages = int(total_records / 100) + 1
        # If more than 1 page(100 tickets)
        DataArray = [DATA] * (total_records)
        RecordArray = [DATA] * Total_Pages

        # Initialise counters
        PageCount = 1
        recordcounter = 0
        # Put all the data in arrays
        while recordcounter < total_records:
            for records in RecordArray:
                zendesk = 'https://thedottedline.zendesk.com/api/v2/tickets.json?page='
                url = zendesk + str(PageCount)
                response = session.get(url)
                # Get all the ticket records
                records = response.json()
                # Get individual ticket records
                for data in records['tickets']:
                    DataArray[recordcounter] = data
                    # Increment the record counter
                    recordcounter += 1
                # Increment to next page
                PageCount += 1
        # display a message if no tickets were found
            if total_records == 0:
                print(
                    'this account has zero tickets. requsting tickets dislay will result in blank output'
                )
        # Return the collected tickets
        return DataArray
Exemple #9
0
 def __analyze(self, url):
     ts = time.time()
     handle = self.__getDataFromURL(url)
     ted = time.time()
     dlTime = ted - ts
     if ( self.__checkIfError(handle)):
         if ( handle[0] == 'HTTPError'):
             eCode = ErrorCodes(int(handle[1]))
             einfo = eCode.getError()[1]
         else:
             einfo = handle[1]
         urlObject = URLLinks(url, None, url, None, isProcessed=True, isBroken=True, 
                              size='<Unknown>', dlTime=dlTime, checkTime=dlTime, lastModified='<Unknwon>', info=einfo,status=handle[0] + ' : ' + handle[1], lType='<Unknwon>')
         return urlObject
     else:
         ts = time.time()
         htmlData = urllib2.urlopen(url)
         ted = time.time()
         data = etree.HTML(htmlData.read())
         dlTime  =   ted - ts
         title = self.__getURLTitle(data)
         links = self.__links(data)
         (lTtype, lastChagned, size) = self.__getURLInfo(handle)
         status = 'OK'
         urlObj = URLLinks(url, title, url, title, isProcessed=True, isBroken=False, size=size, dlTime=dlTime, 
                           lastModified=lastChagned, info='Successfully Processed', status=status, lType=lTtype)
         for link in links:
             cLink = str(link.attrib['href']).lstrip().rstrip()
             if ( cLink.startswith('#') or cLink.startswith('.') or cLink.startswith('..') or url not in cLink):
                 cLink = urlparse.urljoin(url, cLink)
             
             if ( urlparse.urlparse(url).netloc in cLink):
                 cTitle = link.text
                 temp = URLLinks(url, title, cLink, cTitle)
                 urlObj.addChild(temp)
         te = time.time()
         cTime = te - ts
         urlObj.setCheckTime(cTime)
         
         return urlObj
Exemple #10
0
 def process(self):
     '''
     Method that will be called using the Deadcheck object from the Main script to instruct the 
     module to process the links based on the depth to which they belong to. 
     
     Each of the link is extracted from the childURL list that belongs to the parent URLLinks object and 
     processed after checking for exemptions. 
     
     Based on the processing, the apropriate parameters and values are set using the set method available in the
     URLLinks class. 
     
     Each page being processed has its own list of the Child URL that are extracted and pushed into an array. 
     
     These list of URLs are processed during the next depth / level value. 
     
     '''
     self.__loadExempt()
     if ( self.get_depth() == 0 ):
         self.__analyze() 
     else:
         for level in range(self.get_depth()+1):
             Deadcheck.__levelBasedLinks[level+1] = []
             for vobj in self.getAll()[level]:
                 for obj in vobj.getChildren():
                     t1 = time.time()
                     (url, title) = obj.get()
                     #if ( not Deadcheck.__ProcessedLinks.has_key(url) and not self.__checkExempt(url) and 'javascript' not in url.lower()):
                     if ( not Deadcheck.__ProcessedLinks.has_key(url) and not self.__checkExempt(url) ):
                         Deadcheck.__ProcessedLinks[url] = 1
                         # Process javascript:openWindow type URL to extract necessary links. 
                         self.__printMessage("Processing Link : " + url);
                         if ( 'javascript' in url.lower()):
                             url = self.__cleanupJavaScript(url)
                             
                         ts = time.time()
                         handle = self.__getDataFromURL(url)
                         ted = time.time()
                         if ( self.__checkIfError(handle)):
                             if ( handle[0] == 'HTTPError'):
                                 eCode = ErrorCodes(int(handle[1]))
                                 einfo = eCode.getError()[1]
                             else:
                                 einfo = handle[1]
                             obj.setInfo(einfo)
                             obj.setProcessed(True)
                             obj.setBroken(True)
                             obj.setStatus(handle[0] + ' : ' + str(handle[1]))
                             obj.setDLTime(ted-ts)
                             obj.setSize('<Unknown>')
                             obj.setLastModified('<Unknown>')
                             obj.setType('<Unknown>')
                             obj.setCheckTime(ted-ts)
                             
                             print 'Broken ' + str(obj.get()) 
                             self.__printError('Broken Link ' + str(obj.get()));
                         else:
                             ts = time.time()
                             htmlData = urllib2.urlopen(url)
                             ted = time.time()
                             data = etree.HTML(htmlData.read())
                             dlTime = ted - ts
                             title = self.__getURLTitle(data)
                             links = self.__links(data)
                             (lTtype, lastChagned, size) = self.__getURLInfo(htmlData)
                             status = 'OK'
                             urlObj = URLLinks(url, title, url, title, isProcessed=True, isBroken=False, size=size, dlTime=dlTime, lastModified=lastChagned, 
                                               info='Successfully Processed', status=status, lType=lTtype)
                             
                             for link in links:
                                 cLink = str(link.attrib['href']).lstrip().rstrip()
                                 if ( cLink.startswith('#') or cLink.startswith('.') or cLink.startswith('..') or url not in cLink):
                                     cLink = urlparse.urljoin(url, cLink)
                                 
                                 if ( urlparse.urlparse(self.__dict__['_url']).netloc in cLink):
                                     cTitle = link.text
                                     temp = URLLinks(url, title, cLink, cTitle, status='UNPROCESSED')
                                     urlObj.addChild(temp)
                             te = time.time()
                             cTime = te - ts
                             urlObj.setCheckTime(cTime)
                             Deadcheck.__levelBasedLinks[level+1].append(urlObj)
                             t2 = time.time()
                             obj.setInfo('Successfully Processed.')
                             obj.setProcessed(True)
                             obj.setBroken(False)
                             obj.setStatus('OK')
                             obj.setDLTime(dlTime)
                             obj.setSize(size)
                             obj.setLastModified(lastChagned)
                             obj.setType(lTtype)
                             obj.setCheckTime(t2-t1)
                     else:
                             if ( self.__checkExempt(url)):
                                 obj.setInfo('Exempted based on the Input file : ' + self.__dict__['_exempt'])
                                 obj.setStatus('EXEMPTED')
                                 self.__printWarning("URL Exempted : " + url);
                             elif ( 'javascript' in url ):
                                 obj.setInfo('Javascript Links are not processed. Implementation underway.')
                                 obj.setStatus('WARNING')
                             else:
                                 obj.setInfo('URL Already Processed. Will not be processed again.')
                                 obj.setStatus('SKIPPED')
                                 self.__printWarning("Skipping URL : " + url);    
                             obj.setProcessed(True)
                             obj.setBroken(False)
                             obj.setDLTime(None)
                             obj.setSize(None)
                             obj.setLastModified(None)
                             obj.setType(None)
                             obj.setCheckTime(None)
Exemple #11
0
def Login():
    # Set counter to 0
    counter = 0
    # If user does not make 3 incorrect attempts continue
    while counter < 3:
        print('https://{DomainName}.zendesk.com')
        # Get user's login details
        domain = input("Please enter the domain name (without curly braces): ")
        email = input("Please enter your email address: ")
        password = getpass("Please enter your password: "******"Please check the data you entered."
                  "\nDomain Name: " + domain + "\nEmail ID: " + email +
                  "\nPassword: "******"\nTotal attempts remaining: " +
                  str(3 - counter))
        else:
            clear()
            # Check the number of pages
            DATA = response.json()
            total_records = DATA['count']
            # Calculate the number of pages
            Total_Pages = int(total_records / 100) + 1
            # If more than 1 page(100 tickets)
            DataArray = [DATA] * (total_records)
            RecordArray = [DATA] * Total_Pages
            PageCount = 1
            recordcounter = 0
            # Put all the data in arrays
            while recordcounter < total_records:
                for records in RecordArray:
                    zendesk = 'https://' + domain + '.zendesk.com/api/v2/tickets.json?page='
                    url = zendesk + str(PageCount)
                    response = session.get(url)
                    # get all the ticket records
                    records = response.json()
                    # Get individual ticket records
                    for data in records['tickets']:
                        DataArray[recordcounter] = data
                        # Increment the record counter
                        recordcounter += 1
                    # Increment the to next page
                    PageCount += 1
            # Set counter to 4 to break while loop
            counter = 4
            # display a message if no tickets were found
            if total_records == 0:
                print(
                    'this account has zero tickets. requsting tickets dislay will result in blank output'
                )
            # Return the ticket data
            return DataArray

    # If the user made 3 invalid attempts, close the program.
    InvalidAttempts(counter)