Exemple #1
0
def xfinity(browser=None):
    if not browser:
        print ("Making browser...")
        browser = Browser('phantomjs')
    print ("Trying google.com...")
    browser.visit('http://google.com/')
    if 'google.' in browser.url:
        print ("google.com connected :)")
        return

    print ("Sign up...")
    browser.click_link_by_partial_text('Sign up')
    print ("Filling form...")
    browser.select("rateplanid", "spn")
    browser.check('spn_terms')
    browser.fill('spn_postal', '12345')
    browser.fill('spn_email', '*****@*****.**')
    print ("Submitting...")
    sleep(3) # it did not work without the sleeps
    browser.find_by_css('.startSessionButton').type(' \n')
    sleep(7)
    browser.ensure_success_response()
    print (browser.screenshot())
class GeneralGetter(threading.Thread):
    finished = False
    repository = None
    repo = None
    github_client = None
    display = None
    browser = None
    splinter__browser = None

    def __init__(self, threadId, repository, repo, github_client):
        scream.say('Initiating GeneralGetter, running __init__ procedure.')
        self.threadId = threadId
        threading.Thread.__init__(self)
        self.daemon = True
        self.finished = False
        self.repository = repository
        self.repo = repo
        self.github_client = github_client

    def run(self):
        global use_splinter

        scream.cout('GeneralGetter starts work...')
        self.finished = False
        # it is quite reasonable to initiate a display driver for selenium
        # per one getter, threads work on jobs linear so its the max partition of driver
        # we can allow, multiple threads working on one virtual display - its without sense
        if use_splinter:
            self.initiate_splinter()
        else:
            self.initiate_selenium()
        # now its ok to start retrieving data.. allonsy !
        self.get_data()

    def initiate_splinter(self):
        scream.say('Initiating splinter...')
        self.splinter__browser = Browser(splinter__driver, wait_time=timeout)
        scream.say('Splinter ready for action')

    def initiate_selenium(self):
        scream.say('Initiating selenium...')
        self.display = Display(visible=0, size=(800, 600))
        self.display.start()
        self.browser = webdriver.Firefox()
        self.browser.implicitly_wait(15)
        scream.say('Selenium ready for action')

    def analyze_with_splinter(self, repository):
        result = dict()
        scream.say('Starting webinterpret for ' + repository.html_url + '..')
        assert repository is not None
        url = repository.html_url
        assert url is not None

        while True:
            try:
                try:
                    self.splinter__browser.set_page_load_timeout(15)
                except:
                    scream.say('')
                
                try:
                    self.splinter__browser.ensure_success_response()
                except:
                    scream.say('')

                self.splinter__browser.visit(url)
                scream.say('Data from web retrieved')

                if splinter__driver == 'firefox':
                    doc = html.document_fromstring(unicode(self.splinter__browser.page_source))
                elif splinter__driver == 'chrome':
                    doc = html.document_fromstring(unicode(self.splinter__browser.html))
                elif splinter__driver == 'phantomjs':
                    doc = html.document_fromstring(unicode(self.splinter__browser.html))
                elif splinter__driver == 'zope.testbrowser':
                    #splinter__browser.set_handle_robots(False)
                    doc = html.document_fromstring(unicode(self.splinter__browser.html.decode('utf-8')))
                else:
                    assert False  # rest of browser not yet supported..
                scream.log_debug(str(url), True)
                scream.say('Continue to work on ' + url)
                scream.say('Page source sent further')

                #splinter__browser.screenshot(name=repository.key, suffix='.png')

                scream.say('Verify if 404 (repo deleted) otherwise keep on going')
                parallax = doc.xpath('//div[@id="parallax_illustration"]')

                if (len(parallax) > 0):
                    scream.say('Verified that 404 (repo deleted)')
                    result['status'] = '404'
                    break

                scream.say('Verified that not 404')

                scream.say('Verify if repo empty otherwise keep on going')
                repo_empty = doc.xpath('//div[@class="blankslate has-fixed-width"]')

                if (len(repo_empty) > 0):
                    scream.say('Verified that repo is empty')
                    result['status'] = 'EMPTY'
                    break

                scream.say('Verified that repo not empty')

                if splinter__driver == 'phantomjs':
                    #WebDriverWait(splinter__browser, 10).until(waiter)
                    while True:
                        scream.say("Wait for the AJAX to do the magic")
                        if self.splinter__browser.is_element_not_present_by_xpath('//span[@class="octicon octicon-organization"]//..//..//text()[normalize-space(.)="Fetching contributors"]', wait_time=5):
                            break
                        else:
                            scream.say("AJAX didnt work on time")
                    doc = html.document_fromstring(unicode(self.splinter__browser.html))

                assert "Fetching contributors" not in doc

                ns = doc.xpath('//ul[@class="numbers-summary"]')
                sunken = doc.xpath('//ul[@class="sunken-menu-group"]')

                scream.say('XPath made some search for ' + url + ' .. move on to bsoup..')
                scream.say('Xpath done searching')
                scream.say('Element found?: ' + str(len(ns) == 1))

                element = ns[0]
                element_sunken = sunken[0]
                local_soup = BeautifulSoup(etree.tostring(element))
                local_soup_sunken = BeautifulSoup(etree.tostring(element_sunken))

                enumarables = local_soup.findAll("li")
                enumarables_more = local_soup_sunken.findAll("li")

                commits = enumarables[0]
                scream.say('enumarables[0]')
                commits_number = analyze_tag(commits.find("span", {"class": "num"}))
                scream.say('analyze_tag finished execution for commits_number')
                scream.say('Before parse number: ' + str(commits_number))
                result['commits'] = parse_number(commits_number)
                scream.log_debug(result['commits'], True)
                scream.say('enumarables[1]')
                branches = enumarables[1]
                branches_number = analyze_tag(branches.find("span", {"class": "num"}))
                scream.say('Before parse number: ' + str(branches_number))
                result['branches'] = parse_number(branches_number)
                scream.log_debug(result['branches'], True)
                scream.say('enumarables[2]')
                releases = enumarables[2]
                releases_number = analyze_tag(releases.find("span", {"class": "num"}))
                scream.say('Before parse number: ' + str(releases_number))
                result['releases'] = parse_number(releases_number)
                scream.log_debug(result['releases'], True)

                scream.say('enumarables[3]')
                contributors = enumarables[3]
                contributors_number = analyze_tag(contributors.find("span", {"class": "num"}))
                scream.say('Before parse number: ' + str(contributors_number))
                result['contributors'] = parse_number(contributors_number)
                scream.log_debug(result['contributors'], True)

                result['issues'] = 0
                result['pulls'] = 0

                for enumerable___ in enumarables_more:
                    if enumerable___["aria-label"] == "Pull Requests":
                        pulls_tag = enumerable___
                        pulls_number = analyze_tag(pulls_tag.find("span", {"class": "counter"}))
                        scream.say('Before parse number: ' + str(pulls_number))
                        result['pulls'] = parse_number(pulls_number)
                    elif enumerable___["aria-label"] == "Issues":
                        issues_tag = enumerable___
                        issues_number = analyze_tag(issues_tag.find("span", {"class": "counter"}))
                        scream.say('Before parse number: ' + str(issues_number))
                        result['issues'] = parse_number(issues_number)
                
                result['status'] = 'OK'
                break
            except TypeError as ot:
                scream.say(str(ot))
                scream.say('Scrambled results (TypeError). Maybe GitHub down. Retry')
                time.sleep(5.0)
                if force_raise:
                    raise
            except Exception as e:
                scream.say(str(e))
                scream.say('No response from selenium. Retry')
                time.sleep(2.0)
                if force_raise:
                    raise

        assert 'status' in result
        return result


    def analyze_with_selenium(self, repository):
        result = dict()
        scream.say('Starting webinterpret for ' + repository.html_url + '..')
        assert repository is not None
        url = repository.html_url
        assert url is not None
        while True:
            try:
                self.browser.set_page_load_timeout(15)
                self.browser.get(url)
                scream.say('Data from web retrieved')
                doc = html.document_fromstring(unicode(self.browser.page_source))
                scream.log_debug(str(url), True)
                scream.say('Continue to work on ' + url)
                scream.say('Page source sent further')

                scream.say('Verify if 404 (repo deleted) otherwise keep on going')
                parallax = doc.xpath('//div[@id="parallax_illustration"]')

                if (len(parallax) > 0):
                    scream.say('Verified that 404 (repo deleted)')
                    result['status'] = '404'
                    break

                scream.say('Verified that not 404')

                scream.say('Verify if repo empty otherwise keep on going')
                repo_empty = doc.xpath('//div[@class="blankslate has-fixed-width"]')

                if (len(repo_empty) > 0):
                    scream.say('Verified that repo is empty')
                    result['status'] = 'EMPTY'
                    break

                scream.say('Verified that repo not empty')

                ns = doc.xpath('//ul[@class="numbers-summary"]')
                sunken = doc.xpath('//ul[@class="sunken-menu-group"]')

                scream.say('XPath made some search for ' + url + ' .. move on to bsoup..')
                scream.say('Xpath done searching')
                scream.say('Element found?: ' + str(len(ns) == 1))

                element = ns[0]
                element_sunken = sunken[0]
                local_soup = BeautifulSoup(etree.tostring(element))
                local_soup_sunken = BeautifulSoup(etree.tostring(element_sunken))

                enumarables = local_soup.findAll("li")
                enumarables_more = local_soup_sunken.findAll("li")

                commits = enumarables[0]
                scream.say('enumarables[0]')
                commits_number = analyze_tag(commits.find("span", {"class": "num"}))
                scream.say('analyze_tag finished execution for commits_number')
                scream.say('Before parse number: ' + str(commits_number))
                result['commits'] = parse_number(commits_number)
                scream.log_debug(result['commits'], True)
                scream.say('enumarables[1]')
                branches = enumarables[1]
                branches_number = analyze_tag(branches.find("span", {"class": "num"}))
                scream.say('Before parse number: ' + str(branches_number))
                result['branches'] = parse_number(branches_number)
                scream.log_debug(result['branches'], True)
                scream.say('enumarables[2]')
                releases = enumarables[2]
                releases_number = analyze_tag(releases.find("span", {"class": "num"}))
                scream.say('Before parse number: ' + str(releases_number))
                result['releases'] = parse_number(releases_number)
                scream.log_debug(result['releases'], True)
                scream.say('enumarables[3]')
                contributors = enumarables[3]
                contributors_number = analyze_tag(contributors.find("span", {"class": "num"}))
                scream.say('Before parse number: ' + str(contributors_number))
                result['contributors'] = parse_number(contributors_number)
                scream.log_debug(result['contributors'], True)

                result['issues'] = 0
                result['pulls'] = 0

                for enumerable___ in enumarables_more:
                    if enumerable___["aria-label"] == "Pull Requests":
                        pulls_tag = enumerable___
                        pulls_number = analyze_tag(pulls_tag.find("span", {"class": "counter"}))
                        scream.say('Before parse number: ' + str(pulls_number))
                        result['pulls'] = parse_number(pulls_number)
                    elif enumerable___["aria-label"] == "Issues":
                        issues_tag = enumerable___
                        issues_number = analyze_tag(issues_tag.find("span", {"class": "counter"}))
                        scream.say('Before parse number: ' + str(issues_number))
                        result['issues'] = parse_number(issues_number)
                
                result['status'] = 'OK'
                break
            except TypeError as ot:
                scream.say(str(ot))
                scream.say('Scrambled results (TypeError). Maybe GitHub down. Retry')
                time.sleep(5.0)
                if force_raise:
                    raise
            except Exception as e:
                scream.say(str(e))
                scream.say('No response from selenium. Retry')
                time.sleep(2.0)
                if force_raise:
                    raise

        assert 'status' in result
        return result


    def is_finished(self):
        return self.finished if self.finished is not None else False

    def set_finished(self, finished):
        scream.say('Marking the thread ' + str(self.threadId) + ' as finished..')
        self.finished = finished

    def cleanup(self):
        global use_splinter

        if use_splinter:
            scream.say("Cleanup of splinter")
            try:
                self.splinter__browser.quit()
            except:
                scream.say('Did my best to clean up')
        else:
            try:
                self.browser.close()
                self.browser.quit()
                self.display.stop()
                self.display.popen.kill()
            except:
                scream.say('Did my best to clean up after selenium and pyvirtualdisplay')
                if force_raise:
                    raise
        scream.say('Marking thread on ' + self.repo.getKey() + ' as finished..')
        self.finished = True
        scream.say('Terminating thread on ' + self.repo.getKey() + ' ...')
        self.terminate()


    '''
    def build_list_of_programmers(result_set_programmers,
                                  repo_key, repository)
    returns dict (github user name -> User object) 1..1
    key is a string contributor username (login)
    second object is actuall PyGithub User instance, meow !
    '''
    def build_list_of_programmers(self, result_set_programmers, repo_key, repository):
        result_set = dict()
        contributors__ = result_set_programmers

        while True:
            result_set.clear()
            try:
                for contributor in contributors__:
                    result_set[contributor.login] = contributor
                break
            except TypeError as e:
                scream.log_error('Repo + Contributor TypeError, or paginated through' +
                                 ' contributors gave error. ' + key + ', error({0})'.
                                 format(str(e)), True)
                repos_reported_execution_error.write(key + os.linesep)
                if force_raise:
                    raise
                #break
            except socket.timeout as e:
                scream.log_error('Timeout while revealing details.. ' +
                                 ', error({0})'.format(str(e)), True)
                freeze('build_list_of_programmers will retry')
                if force_raise:
                    raise
                #break
            except Exception as e:
                scream.log_error('Exception while revealing details.. ' +
                                 ', error({0})'.format(str(e)), True)
                freeze('build_list_of_programmers will retry')
                if force_raise:
                    raise
                #break
        return result_set


    def get_data(self):
        global resume_stage

        scream.say('Executing inside-thread method get_data() for: ' + str(self.threadId))
        if resume_stage in [None, 'contributors']:
            #try:
            scream.ssay('Checking size of a ' + str(self.repo.getKey()) + ' team')
            '1. Team size of a repository'
            self.contributors = self.repository.get_contributors()
            assert self.contributors is not None

            self.repo_contributors = set()
            self.contributors_static = self.build_list_of_programmers(self.contributors, self.repo.getKey(), self.repository)
            for contributor in self.contributors_static.items():
                scream.log_debug('move with contributor to next from contributors_static.items()', True)
                while True:
                    scream.say('Inside while True: (line 674)')
                    try:
                        self.contributor_login = contributor[0]
                        self.contributor_object = contributor[1]
                        scream.say(str(self.contributor_login))
                        self.repo_contributors.add(self.contributor_login)
                        scream.say(str(self.repo_contributors))
                        developer_revealed(threading.current_thread(), self.repository, self.repo, self.contributor_object)
                        scream.say('Finished revealing developer')
                        break
                    except TypeError as e:
                        scream.log_error('Repo + Contributor TypeError, or paginated through' +
                                         ' contributors gave error. ' + key + ', error({0})'.
                                         format(str(e)), True)
                        repos_reported_execution_error.write(key + os.linesep)
                        if force_raise:
                            raise
                        #break
                    except socket.timeout as e:
                        scream.log_error('Timeout while revealing details.. ' +
                                         ', error({0})'.format(str(e)), True)
                        freeze('socket.timeout in paginate through x contributors')
                        if force_raise:
                            raise
                        #break
                    except Exception as e:
                        scream.log_error('Exception while revealing details.. ' +
                                         ', error({0})'.format(str(e)), True)
                        freeze(str(e) + ' in paginate through x contributors')
                        if force_raise:
                            raise
                        #break

            assert self.repo_contributors is not None
            self.repo.setContributors(self.repo_contributors)
            self.repo.setContributorsCount(len(self.repo_contributors))
            scream.log('Added contributors of count: ' + str(len(self.repo_contributors)) + ' to a repo ' + key)
        self.cleanup()