def xfinity(browser=None): if not browser: print ("Making browser...") browser = Browser('phantomjs') print ("Trying google.com...") browser.visit('http://google.com/') if 'google.' in browser.url: print ("google.com connected :)") return print ("Sign up...") browser.click_link_by_partial_text('Sign up') print ("Filling form...") browser.select("rateplanid", "spn") browser.check('spn_terms') browser.fill('spn_postal', '12345') browser.fill('spn_email', '*****@*****.**') print ("Submitting...") sleep(3) # it did not work without the sleeps browser.find_by_css('.startSessionButton').type(' \n') sleep(7) browser.ensure_success_response() print (browser.screenshot())
class GeneralGetter(threading.Thread): finished = False repository = None repo = None github_client = None display = None browser = None splinter__browser = None def __init__(self, threadId, repository, repo, github_client): scream.say('Initiating GeneralGetter, running __init__ procedure.') self.threadId = threadId threading.Thread.__init__(self) self.daemon = True self.finished = False self.repository = repository self.repo = repo self.github_client = github_client def run(self): global use_splinter scream.cout('GeneralGetter starts work...') self.finished = False # it is quite reasonable to initiate a display driver for selenium # per one getter, threads work on jobs linear so its the max partition of driver # we can allow, multiple threads working on one virtual display - its without sense if use_splinter: self.initiate_splinter() else: self.initiate_selenium() # now its ok to start retrieving data.. allonsy ! self.get_data() def initiate_splinter(self): scream.say('Initiating splinter...') self.splinter__browser = Browser(splinter__driver, wait_time=timeout) scream.say('Splinter ready for action') def initiate_selenium(self): scream.say('Initiating selenium...') self.display = Display(visible=0, size=(800, 600)) self.display.start() self.browser = webdriver.Firefox() self.browser.implicitly_wait(15) scream.say('Selenium ready for action') def analyze_with_splinter(self, repository): result = dict() scream.say('Starting webinterpret for ' + repository.html_url + '..') assert repository is not None url = repository.html_url assert url is not None while True: try: try: self.splinter__browser.set_page_load_timeout(15) except: scream.say('') try: self.splinter__browser.ensure_success_response() except: scream.say('') self.splinter__browser.visit(url) scream.say('Data from web retrieved') if splinter__driver == 'firefox': doc = html.document_fromstring(unicode(self.splinter__browser.page_source)) elif splinter__driver == 'chrome': doc = html.document_fromstring(unicode(self.splinter__browser.html)) elif splinter__driver == 'phantomjs': doc = html.document_fromstring(unicode(self.splinter__browser.html)) elif splinter__driver == 'zope.testbrowser': #splinter__browser.set_handle_robots(False) doc = html.document_fromstring(unicode(self.splinter__browser.html.decode('utf-8'))) else: assert False # rest of browser not yet supported.. scream.log_debug(str(url), True) scream.say('Continue to work on ' + url) scream.say('Page source sent further') #splinter__browser.screenshot(name=repository.key, suffix='.png') scream.say('Verify if 404 (repo deleted) otherwise keep on going') parallax = doc.xpath('//div[@id="parallax_illustration"]') if (len(parallax) > 0): scream.say('Verified that 404 (repo deleted)') result['status'] = '404' break scream.say('Verified that not 404') scream.say('Verify if repo empty otherwise keep on going') repo_empty = doc.xpath('//div[@class="blankslate has-fixed-width"]') if (len(repo_empty) > 0): scream.say('Verified that repo is empty') result['status'] = 'EMPTY' break scream.say('Verified that repo not empty') if splinter__driver == 'phantomjs': #WebDriverWait(splinter__browser, 10).until(waiter) while True: scream.say("Wait for the AJAX to do the magic") if self.splinter__browser.is_element_not_present_by_xpath('//span[@class="octicon octicon-organization"]//..//..//text()[normalize-space(.)="Fetching contributors"]', wait_time=5): break else: scream.say("AJAX didnt work on time") doc = html.document_fromstring(unicode(self.splinter__browser.html)) assert "Fetching contributors" not in doc ns = doc.xpath('//ul[@class="numbers-summary"]') sunken = doc.xpath('//ul[@class="sunken-menu-group"]') scream.say('XPath made some search for ' + url + ' .. move on to bsoup..') scream.say('Xpath done searching') scream.say('Element found?: ' + str(len(ns) == 1)) element = ns[0] element_sunken = sunken[0] local_soup = BeautifulSoup(etree.tostring(element)) local_soup_sunken = BeautifulSoup(etree.tostring(element_sunken)) enumarables = local_soup.findAll("li") enumarables_more = local_soup_sunken.findAll("li") commits = enumarables[0] scream.say('enumarables[0]') commits_number = analyze_tag(commits.find("span", {"class": "num"})) scream.say('analyze_tag finished execution for commits_number') scream.say('Before parse number: ' + str(commits_number)) result['commits'] = parse_number(commits_number) scream.log_debug(result['commits'], True) scream.say('enumarables[1]') branches = enumarables[1] branches_number = analyze_tag(branches.find("span", {"class": "num"})) scream.say('Before parse number: ' + str(branches_number)) result['branches'] = parse_number(branches_number) scream.log_debug(result['branches'], True) scream.say('enumarables[2]') releases = enumarables[2] releases_number = analyze_tag(releases.find("span", {"class": "num"})) scream.say('Before parse number: ' + str(releases_number)) result['releases'] = parse_number(releases_number) scream.log_debug(result['releases'], True) scream.say('enumarables[3]') contributors = enumarables[3] contributors_number = analyze_tag(contributors.find("span", {"class": "num"})) scream.say('Before parse number: ' + str(contributors_number)) result['contributors'] = parse_number(contributors_number) scream.log_debug(result['contributors'], True) result['issues'] = 0 result['pulls'] = 0 for enumerable___ in enumarables_more: if enumerable___["aria-label"] == "Pull Requests": pulls_tag = enumerable___ pulls_number = analyze_tag(pulls_tag.find("span", {"class": "counter"})) scream.say('Before parse number: ' + str(pulls_number)) result['pulls'] = parse_number(pulls_number) elif enumerable___["aria-label"] == "Issues": issues_tag = enumerable___ issues_number = analyze_tag(issues_tag.find("span", {"class": "counter"})) scream.say('Before parse number: ' + str(issues_number)) result['issues'] = parse_number(issues_number) result['status'] = 'OK' break except TypeError as ot: scream.say(str(ot)) scream.say('Scrambled results (TypeError). Maybe GitHub down. Retry') time.sleep(5.0) if force_raise: raise except Exception as e: scream.say(str(e)) scream.say('No response from selenium. Retry') time.sleep(2.0) if force_raise: raise assert 'status' in result return result def analyze_with_selenium(self, repository): result = dict() scream.say('Starting webinterpret for ' + repository.html_url + '..') assert repository is not None url = repository.html_url assert url is not None while True: try: self.browser.set_page_load_timeout(15) self.browser.get(url) scream.say('Data from web retrieved') doc = html.document_fromstring(unicode(self.browser.page_source)) scream.log_debug(str(url), True) scream.say('Continue to work on ' + url) scream.say('Page source sent further') scream.say('Verify if 404 (repo deleted) otherwise keep on going') parallax = doc.xpath('//div[@id="parallax_illustration"]') if (len(parallax) > 0): scream.say('Verified that 404 (repo deleted)') result['status'] = '404' break scream.say('Verified that not 404') scream.say('Verify if repo empty otherwise keep on going') repo_empty = doc.xpath('//div[@class="blankslate has-fixed-width"]') if (len(repo_empty) > 0): scream.say('Verified that repo is empty') result['status'] = 'EMPTY' break scream.say('Verified that repo not empty') ns = doc.xpath('//ul[@class="numbers-summary"]') sunken = doc.xpath('//ul[@class="sunken-menu-group"]') scream.say('XPath made some search for ' + url + ' .. move on to bsoup..') scream.say('Xpath done searching') scream.say('Element found?: ' + str(len(ns) == 1)) element = ns[0] element_sunken = sunken[0] local_soup = BeautifulSoup(etree.tostring(element)) local_soup_sunken = BeautifulSoup(etree.tostring(element_sunken)) enumarables = local_soup.findAll("li") enumarables_more = local_soup_sunken.findAll("li") commits = enumarables[0] scream.say('enumarables[0]') commits_number = analyze_tag(commits.find("span", {"class": "num"})) scream.say('analyze_tag finished execution for commits_number') scream.say('Before parse number: ' + str(commits_number)) result['commits'] = parse_number(commits_number) scream.log_debug(result['commits'], True) scream.say('enumarables[1]') branches = enumarables[1] branches_number = analyze_tag(branches.find("span", {"class": "num"})) scream.say('Before parse number: ' + str(branches_number)) result['branches'] = parse_number(branches_number) scream.log_debug(result['branches'], True) scream.say('enumarables[2]') releases = enumarables[2] releases_number = analyze_tag(releases.find("span", {"class": "num"})) scream.say('Before parse number: ' + str(releases_number)) result['releases'] = parse_number(releases_number) scream.log_debug(result['releases'], True) scream.say('enumarables[3]') contributors = enumarables[3] contributors_number = analyze_tag(contributors.find("span", {"class": "num"})) scream.say('Before parse number: ' + str(contributors_number)) result['contributors'] = parse_number(contributors_number) scream.log_debug(result['contributors'], True) result['issues'] = 0 result['pulls'] = 0 for enumerable___ in enumarables_more: if enumerable___["aria-label"] == "Pull Requests": pulls_tag = enumerable___ pulls_number = analyze_tag(pulls_tag.find("span", {"class": "counter"})) scream.say('Before parse number: ' + str(pulls_number)) result['pulls'] = parse_number(pulls_number) elif enumerable___["aria-label"] == "Issues": issues_tag = enumerable___ issues_number = analyze_tag(issues_tag.find("span", {"class": "counter"})) scream.say('Before parse number: ' + str(issues_number)) result['issues'] = parse_number(issues_number) result['status'] = 'OK' break except TypeError as ot: scream.say(str(ot)) scream.say('Scrambled results (TypeError). Maybe GitHub down. Retry') time.sleep(5.0) if force_raise: raise except Exception as e: scream.say(str(e)) scream.say('No response from selenium. Retry') time.sleep(2.0) if force_raise: raise assert 'status' in result return result def is_finished(self): return self.finished if self.finished is not None else False def set_finished(self, finished): scream.say('Marking the thread ' + str(self.threadId) + ' as finished..') self.finished = finished def cleanup(self): global use_splinter if use_splinter: scream.say("Cleanup of splinter") try: self.splinter__browser.quit() except: scream.say('Did my best to clean up') else: try: self.browser.close() self.browser.quit() self.display.stop() self.display.popen.kill() except: scream.say('Did my best to clean up after selenium and pyvirtualdisplay') if force_raise: raise scream.say('Marking thread on ' + self.repo.getKey() + ' as finished..') self.finished = True scream.say('Terminating thread on ' + self.repo.getKey() + ' ...') self.terminate() ''' def build_list_of_programmers(result_set_programmers, repo_key, repository) returns dict (github user name -> User object) 1..1 key is a string contributor username (login) second object is actuall PyGithub User instance, meow ! ''' def build_list_of_programmers(self, result_set_programmers, repo_key, repository): result_set = dict() contributors__ = result_set_programmers while True: result_set.clear() try: for contributor in contributors__: result_set[contributor.login] = contributor break except TypeError as e: scream.log_error('Repo + Contributor TypeError, or paginated through' + ' contributors gave error. ' + key + ', error({0})'. format(str(e)), True) repos_reported_execution_error.write(key + os.linesep) if force_raise: raise #break except socket.timeout as e: scream.log_error('Timeout while revealing details.. ' + ', error({0})'.format(str(e)), True) freeze('build_list_of_programmers will retry') if force_raise: raise #break except Exception as e: scream.log_error('Exception while revealing details.. ' + ', error({0})'.format(str(e)), True) freeze('build_list_of_programmers will retry') if force_raise: raise #break return result_set def get_data(self): global resume_stage scream.say('Executing inside-thread method get_data() for: ' + str(self.threadId)) if resume_stage in [None, 'contributors']: #try: scream.ssay('Checking size of a ' + str(self.repo.getKey()) + ' team') '1. Team size of a repository' self.contributors = self.repository.get_contributors() assert self.contributors is not None self.repo_contributors = set() self.contributors_static = self.build_list_of_programmers(self.contributors, self.repo.getKey(), self.repository) for contributor in self.contributors_static.items(): scream.log_debug('move with contributor to next from contributors_static.items()', True) while True: scream.say('Inside while True: (line 674)') try: self.contributor_login = contributor[0] self.contributor_object = contributor[1] scream.say(str(self.contributor_login)) self.repo_contributors.add(self.contributor_login) scream.say(str(self.repo_contributors)) developer_revealed(threading.current_thread(), self.repository, self.repo, self.contributor_object) scream.say('Finished revealing developer') break except TypeError as e: scream.log_error('Repo + Contributor TypeError, or paginated through' + ' contributors gave error. ' + key + ', error({0})'. format(str(e)), True) repos_reported_execution_error.write(key + os.linesep) if force_raise: raise #break except socket.timeout as e: scream.log_error('Timeout while revealing details.. ' + ', error({0})'.format(str(e)), True) freeze('socket.timeout in paginate through x contributors') if force_raise: raise #break except Exception as e: scream.log_error('Exception while revealing details.. ' + ', error({0})'.format(str(e)), True) freeze(str(e) + ' in paginate through x contributors') if force_raise: raise #break assert self.repo_contributors is not None self.repo.setContributors(self.repo_contributors) self.repo.setContributorsCount(len(self.repo_contributors)) scream.log('Added contributors of count: ' + str(len(self.repo_contributors)) + ' to a repo ' + key) self.cleanup()