def download_linke(coords, proxy, port, saveFile, saveMode):

    print proxy, port
    print proxy != ""

    url = "http://www.soda-is.com/eng/services/service_invoke/gui.php?" + "xml_descript=soda_tl.xml&Submit2=Month"

    session = Session()
    session.verify = False

    if proxy != "":
        proxies = {proxy: port}
        session.proxies = proxies

    br = RoboBrowser(session=session, parser="lxml")
    br.open(url)

    linke_form = br.get_forms()[1]

    num = len(coords)
    index = 0

    with open(saveFile, saveMode) as f:
        try:
            for coord in coords:
                inlon, inlat = coord
                linke_form["lat"].value = inlat
                linke_form["lon"].value = inlon

                sf = linke_form.submit_fields.getlist("execute")
                br.submit_form(linke_form, submit=sf[0])

                linke_table = br.find("table", {"cellspacing": "0", "cellpadding": "2"})

                linkes = get_monthly_linke_str(get_linke_values(linke_table))
                s = "%s,%s,%s\n" % (format(inlon, "0.5f"), format(inlat, "0.5f"), linkes)

                if len(s) > 48:
                    f.write(s)
                    print "Done with point %i of %i: (%s, %s)" % (
                        index + 1,
                        num,
                        format(inlon, "0.5f"),
                        format(inlat, "0.5f"),
                    )

                index += 1

                br.back()

            print "DONE!"

        except Exception as e:

            not_dl = list(coords[index:])
            with open(saveFile + "_notdownloaded.txt", "w") as nd:
                for c in not_dl:
                    nd.write("%s,%s\n" % (str(c[0]), str(c[1])))
            print e
예제 #2
0
파일: utils.py 프로젝트: Kuppey/Shiinabot
def scrape_site(url, cookie_file="", ses=False, is_rss=False):
    from http.cookiejar import LWPCookieJar
    from robobrowser import RoboBrowser
    from requests import Session
    s = Session()
    if cookie_file:
        s.cookies = LWPCookieJar(cookie_file)
        try:
            s.cookies.load(ignore_discard=True)
        except:
            # Cookies don't exsit yet
            pass
    s.headers['User-Agent'] = 'Mozilla/5.0 (X11; Ubuntu; rv:39.0)'
    s.headers['Accept'] = 'text/html'
    s.headers['Connection'] = 'keep-alive'
    if is_rss:
        parser = 'xml'
    else:
        parser = 'html5lib'
    browser = RoboBrowser(session=s,
                          parser=parser)
    browser.open(url)
    if ses:
        return browser, s
    else:
        return browser
예제 #3
0
    def take_action(self, parsed_args):
        config_dir = '~/.kddcup2015-cli'
        config_dir = os.path.expanduser(config_dir)

        if parsed_args.topN:
            topN = int(parsed_args.topN)
        else:
            topN = 10

        base = 'https://www.kddcup2015.com'
        rank_url = '/'.join([base, 'submission-rank.html'])

        browser = RoboBrowser()
        browser.open(rank_url)

        html_str = str(browser.parsed)
        html = pq(html_str)

        country_teams = list(
            map(lambda x: x.text.strip(), html('.country_team')[:topN]))
        scores = list(
            map(lambda x: x.text.strip(), html('.td_result')[:topN]))
        entries = list(
            map(lambda x: x.text.strip(), html('.td_result + td')[:topN]))
        last_subs = list(
            map(lambda x: x.text.strip(), html('.td_result + td + td')[:topN]))

        return (
            ('Team', 'Score', 'Entries', 'Last Submission UTC'),
            (list(zip(country_teams, scores, entries, last_subs)))
        )
예제 #4
0
def parseWeek(year, week):
    """
    parses a specific week on http://rotoguru1.com/cgi-bin/fyday.pl?week={}&year={}&game=fd&scsv=1
    which contains a csv of the fan duel player prices
    stores this info in fanduel_prices collection
    """
    logger = makeLogger(str(year) + '_' + str(week), r'./logs_RotoFDStats/')

    startTime = datetime.now()

    logger.debug('Starting %d', year)

    client = MongoClient('localhost', 27017)
    db = client['nfl_data']
    col_fanduel_prices = db['fanduel_prices']

    if col_fanduel_prices.find({'year': year, 'yeek': week}).count():
        logger.debug('Already parsed %d %d', year, week)
        closeLogger(logger)
        return None

    wait = random.uniform(1.5,3.5)
    logger.debug('Waiting %f', wait)
    time.sleep(wait)

    logger.debug('Opening main page')
    browser = RoboBrowser(history=False, parser='html.parser', user_agent=get_user_agent(logger), timeout=10)
    url = "http://rotoguru1.com/cgi-bin/fyday.pl?week={}&year={}&game=fd&scsv=1".format(week, year)
    browser = open_or_follow_link(logger, browser, 'open', url)

    docs = []
    try:
        data = browser.find('pre').text
        lines = data.split('\n')
        header = lines[0]
        header = header.split(';')
        lines = lines[1:]
        for line in lines:
            doc = {}
            if not line:
                continue
            for index, each in enumerate(line.split(';')):
                doc[cleanKey(header[index])] = convertToNumber(each)
            docs.append(doc)
    except:
        logger.exception("Parse fail: %s", url)
    
    try:
        logger.debug('Bulk Creating docs')
        col_fanduel_prices.insert_many(docs)
    except:
        logger.exception('insert_many error')

    logger.debug('parseWeek time elapsed: ' + str(datetime.now() - startTime))

    closeLogger(str(year) + '_' + str(week))
예제 #5
0
def get_bracket_data(year):
    url = 'http://espn.go.com/mens-college-basketball/tournament/bracket/_/id/{}22/'.format(year)
    b = RoboBrowser()
    b.open(url)
    data = []
    for item in b.find_all(attrs={'class': 'match'}):
        t1, t2 = [(get_id(a['href']), a['title']) for a in item('a')]
        s1, s2 = ' '.join(item.find('dd').stripped_strings).split()
        data.append([t1, t2, s1, s2])
    return data
예제 #6
0
def get_mp3_url(lecture_url):
    browser = RoboBrowser()
    browser.open(lecture_url)

    link = browser.get_link(href=re.compile("\\.mp3$"))

    if link is not None:
        return link["href"]
    else:
        return None
예제 #7
0
파일: robobrowser.py 프로젝트: 18F/calc
class RoboBrowserTestCase(StaticLiveServerTestCase, base.AbstractBrowser):
    def setUp(self):
        super().setUp()
        self.browser = RoboBrowser(history=True, parser='html.parser')

    def load(self, url):
        self.browser.open(self.live_server_url + url)

    def get_title(self):
        return self.browser.find('title').text

    def get_form(self, selector):
        return RoboBrowserForm(self.browser, selector)
예제 #8
0
    def testMethod_findXssFailuresInAForm_shouldOnlySaveXssFailuresThatAreNotAlreadyInTheList(self):
        url = "http://www.remikya.com/Controllers/SearchController.php"
        xssFinder = XssFinder(url)
        browser = RoboBrowser()
        browser.open(url)
        form = browser.get_form(id="form")

        xssFinder.findXssFailuresInAForm(browser, form)
        xssFinder.findXssFailuresInAForm(browser, form)
        xssFinderListLength = len(xssFinder.getListOfLinks())

        EXPECTED_ANSWER = 1
        self.assertEqual(EXPECTED_ANSWER, xssFinderListLength)
예제 #9
0
    def take_action(self, parsed_args):
        config_dir = '~/.kddcup2015-cli'
        config_dir = os.path.expanduser(config_dir)

        if os.path.isdir(config_dir):
            config = ConfigParser.ConfigParser(allow_no_value=True)
            config.readfp(open(config_dir + '/config'))

            if parsed_args.username:
                username = parsed_args.username
            else:
                username = config.get('user', 'username')

            if parsed_args.password:
                password = parsed_args.password
            else:
                password = config.get('user', 'password')

        entry = parsed_args.entry
        message = parsed_args.message

        base = 'https://www.kddcup2015.com'
        login_url = '/'.join([base, 'user-ajaxlogin.html'])
        submit_url = '/'.join([base, 'submission-make.html'])
        submission_url = '/'.join(([base, 'submission.html']))

        browser = RoboBrowser()

        response = browser.session.post(
            login_url, dict(email=username, pwd=password)).json()

        if response['rs'] == 'error':
            self.app.stdout.write(response['msg'])

        browser.open(submit_url)

        form = browser.get_form()

        form['_f'].value = open(entry)

        if message:
            form['description'] = message

        browser.submit_form(form)

        sleep(5)

        browser.open(submission_url)

        html_str = str(browser.parsed)
        html = pq(html_str)

        times = list(map(
            lambda x: datetime_parser.parse(x.text),
            html('.td_result +td+td+td+td')))

        newest_index = times.index(max(times))

        score = html('.td_result')[newest_index * 2].text.strip()
        self.app.stdout.write(score + '\n')
예제 #10
0
def scrape_snotel_sites(url=None):
    if not url:
        url = "http://www.wcc.nrcs.usda.gov/nwcc/yearcount?network=sntl&counttype=statelist&state="
    browser = RoboBrowser(parser="html5lib")
    browser.open(url)
    browser.response.raise_for_status()
    table = browser.find_all("table")[4]
    sites = [] # list of sites with name and code
    cols = [t.text.strip() for t in table.tr.find_all("th")]
    for row in table.find_all("tr"):
        if row.td and row.td.text.strip() == 'SNTL':
            items = [i.text.strip() for i in row.find_all("td")]
            sites.append(dict(zip(cols, items)))
    return sites
	def lookUpNetCTLPan(self, sequ):
		seq='>seq' + '\n'+ sequ
		browser= RoboBrowser(user_agent='Mozilla/5.0', history=True)
		browser.allow_redirects=True
		browser.session.cookies
		Query="http://tools.immuneepitope.org/stools/netchop/netchop.do?app=netchop"
		browser.open(Query)
		net_form= browser.get_form(action="upload-submit.do")
		net_form
		net_form['sequences'].value=seq
		net_form['formtype'].value='netctlpan_select'
		net_form['length'].value='9'
		net_form['species'].value="human"
		net_form['supertype'].value='A2'
		net_form['allele'].value= "HLA-A02:01" #self.amerLength
		print(net_form)
		net_form.serialize()
		net_form
		
		browser.submit_form(net_form, submit="Submit")
		browser
		print(browser)
		
		
		
		
		table_form=browser.get_form(action="tableViewctlpan.do?thePage=1")
		print(table_form)
		
		return
예제 #12
0
파일: bscrape.py 프로젝트: baldegg/bscrape
def scrape(q):

	query = q
	ph = re.compile('(\(\d{3}\)\ \d{3}-\d{4})')
	ad = re.compile('[A-Z]{2}\ (\d{5})')
	site = re.compile('(?<=\?q=).*(?=&sa)')
	result = {
	'name':'!NO DATA!',
	'address':'!NO DATA!',
	'phone':'!NO DATA!',
	'website':'!NO DATA!',
	'blurb':'!NO DATA!'
	}
	#uses mechanize to submit google search
	browser = RoboBrowser(user_agent='Firefox', parser='html.parser')
	browser.open('http://google.com/')
	
	# Search for Porcupine Tree
	form = browser.get_form(action='/search')
	form                # <RoboForm q=>
	form['q'].value = query

	browser.submit_form(form, form.submit_fields['btnG'])




	result['query']=query
	if browser.find("div", {"class" : "_B5d"}):
		result['name'] = browser.find("div", {"class" : "_B5d"}).text.encode('utf-8')
		stuff = browser.find("div", {"class" : "_uXc"})

		address = stuff.find(text=ad)
		if address:
			result['address']=address.encode('utf-8')

		phone = stuff.find(text=ph)
		if phone:
			result['phone']=phone.encode('utf-8')

		blurb = stuff.find("span")
		if blurb:
			result['blurb'] = blurb.text.encode('utf-8')

		website = stuff.find("a", string="Website")
		if website:
			website = website.get('href').encode('utf-8')
			result['website'] = site.search(website).group()


	print result
	delay = random.randint(5,10)
	print "Waiting " + str(delay) + " seconds..."
	time.sleep(delay)
	return result
def get_schedule(employee_info,shift_period):
    browser=RoboBrowser(parser='lxml')
    login_handler(browser,employee_info)
    
    start_of_week=shift_period['start_of_week']
    end_of_week=shift_period['end_of_week']
    browser.open('https://www.rsishifts.com/Schedules/SchedulePrintByUser.aspx?'
            'StartDate='+start_of_week+'&EndDate='+end_of_week)
    
    employee_name=employee_info['employee_name']
    schedule=find_schedule(browser,employee_name)
    if (schedule):
        return convert_schedule_to_datetime(start_of_week,schedule)
    else:
        return False
예제 #14
0
 def get_digitised_pages(self, entity_id=None):
     '''
     Returns the number of pages (images) in a digitised file.
     Note that you don't need a session id to access these pages,
     so there's no need to go through get_url().
     '''
     # url = 'http://recordsearch.naa.gov.au/scripts/Imagine.asp?B={}&I=1&SE=1'.format(entity_id)
     url = 'http://recordsearch.naa.gov.au/SearchNRetrieve/Interface/ViewImage.aspx?B={}'.format(entity_id)
     br = RoboBrowser(parser='lxml')
     br.open(url)
     try:
         pages = int(br.find('span', attrs={'id': "lblEndPage"}).string)
     except AttributeError:
         pages = 0
     return pages
예제 #15
0
    def testMethod_getAllFieldNamesInAForm_shouldReturnTheNameAttributeOfAllFormNodes(self):
        url = "http://www.remikya.com/Controllers/LoginController.php"
        xssFinder = XssFinder(url)
        browser = RoboBrowser()
        browser.open(url)
        form = browser.get_form(action="/Controllers/LoginController.php")

        fieldNames = xssFinder.getAllFieldNamesFromAForm(form)

        FIRST_EXPECTED_ANSWER = "nom_utilisateur"
        SECOND_EXPECTED_ANSWER = "mot_de_passe"
        THIRD_EXPECTED_ANSWER = "Connecter"
        self.assertEqual(FIRST_EXPECTED_ANSWER, fieldNames[0])
        self.assertEqual(SECOND_EXPECTED_ANSWER, fieldNames[1])
        self.assertEqual(THIRD_EXPECTED_ANSWER, fieldNames[2])
class StatusChecker(object):
    def __init__(self, config):
        self.browser = RoboBrowser()
        self.config = config

    def post(self, url, data):
        full_url = self.base_url + url
        data_json = json.dumps(data)
        return self.session.post(full_url, data=data_json)

    def get(self, url):
        full_url = self.base_url + url
        return self.session.get(full_url)

    def status(self, username, password, name=None):
        status = Status(name or username)

        self.browser.open(self.base_url)
        login = self.post("/login", dict(
            username=username,
            lastName=password,
            password=password,
            rememberMe=False))
        login_json = login.json()
        if not login_json['success']:
            print "Unable to log in: {}".format(login_json['error'])
            print login.text
            return status
        account_summary = self.get('/account/summary')
        try:
            account_summary.json()
        except:
            print "Account summary text:"
            print account_summary.text
            raise
        status.fees_cents = account_summary.json()['accountSummary']['fees']
        status_response = self.get('/loans/0/20/Status')
        for loan in status_response.json()['loans']:
            status.add_loan(loan)
        return status

    @property
    def session(self):
        return self.browser.session

    @property
    def base_url(self):
        return self.config['library']['base_url']
예제 #17
0
class FakeMail(object):
    def __init__(self):
        self.browser = RoboBrowser(history=True)
        self.browser.open('http://10minutemail.com/')
        with open('10minmail.txt', 'w') as f:
            f.write(str(self.browser.parsed))
        if self.browser.get_link('Blocked'):
            raise BlockedException('to many login Attempts')


    def get_address(self):
        address = self.browser.find("div", {"id": "copyAddress"})
        print address

    def read_mail(self):
        pass
예제 #18
0
 def __init__(self):
     self.browser = RoboBrowser(history=True)
     self.browser.open('http://10minutemail.com/')
     with open('10minmail.txt', 'w') as f:
         f.write(str(self.browser.parsed))
     if self.browser.get_link('Blocked'):
         raise BlockedException('to many login Attempts')
예제 #19
0
    def __init__(self, email, passwd=None, metrics=None, dt1=None, dt2=None, write_out=False, filename=None):
        self.email = email
        if passwd is None:
            from getpass import getpass
            self.passwd = getpass('Password for %s: ' % self.email)
        else:
            self.passwd = passwd
        if metrics is None:
            self.metrics = ['steps', 'distance', 'floors', 'active-minutes', 'calories-burned', 'heart-rate']
        else:
            self.metrics = metrics
        if dt2 is None:
            self.dt2 = datetime.now()
        else:
            self.dt2 = dt2
        if dt1 is None:
            from datetime import timedelta

            self.dt1 = self.dt2 - timedelta(days=7)
        else:
            self.dt1 = dt1
        self.browser = RoboBrowser(parser='lxml')
        self.write_out = write_out
        self.filename = filename
        self.date, self.metric, self.data = (None, None, None)
예제 #20
0
 def __init__(self, appname, username, password, *args, **kwargs):
     self.loggedin=False
     self.browser=RoboBrowser(history=True)
     self.appname=appname
     self.username=username
     self.password=password
     self.baseurl = 'https://' + self.appname + '.infusionsoft.com'
예제 #21
0
 def __init__(self):
     # Browse url :
     self.result = None
     self.browser = RoboBrowser(parser="html.parser")
     self.browser.session.headers = config.headers
     # Mount with custom SSL Adapter
     self.browser.session.mount('https://', HTTPSAdapter())
예제 #22
0
 def __init__(self, user_id, password):
     self.user_id = user_id
     self.password = password
     self.browser = RoboBrowser()
     self.run_id = ''
     self.pre_id = ''
     self.res = {}
예제 #23
0
파일: pipwin.py 프로젝트: pbrod/pipwin
def build_cache():
    """
    Get current data from the website http://www.lfd.uci.edu/~gohlke/pythonlibs/

    Returns
    -------
    Dictionary containing package details
    """

    data = {}

    soup = RoboBrowser()
    soup.open(MAIN_URL)
    links = soup.find(class_="pylibs").find_all("a")
    for link in links:
        if link.get("onclick") is not None:
            jsfun = link.get("onclick").split('"')
            mlstr = jsfun[0].split("(")[1].strip()[1:-2]
            ml = list(map(int, mlstr.split(",")))
            mi = jsfun[1]
            url = parse_url(ml, mi)

            # Details = [package, version, pyversion, --, arch]
            details = url.split("/")[-1].split("-")
            pkg = details[0].lower().replace("_", "-")

            # Not using EXEs and ZIPs
            if len(details) != 5:
                continue
            # arch = win32 / win_amd64 / any
            arch = details[4]
            arch = arch.split(".")[0]
            # ver = cpXX / pyX / pyXXx
            pkg_ver = details[1]
            py_ver = details[2]

            py_ver_key = py_ver + "-" + arch

            if pkg in data.keys():
                if py_ver_key in data[pkg].keys():
                    data[pkg][py_ver_key].update({pkg_ver: url})
                else:
                    data[pkg][py_ver_key] = {pkg_ver: url}
            else:
                data[pkg] = {py_ver_key: {pkg_ver: url}}

    return data
def scrape_cache(query, total):
	
	browser = RoboBrowser()
	
	listings = []
	for i in range(1, total, 14):
		offset = i				# which listing to start at per page. Increment by 14
		browser.open('http://www.bing.com/search?q=%s&first=%d' % (query, offset))
	
		# Database Schema - A sqlite database is used to make data queries more efficient.
		# id (Primary Key) - ID of the item
		# orig_url - Original URL of the site.
		# cache_url - Cached URL of the site.
		# desc - Quick description of the site.
	
		# grab all search attribute strings
		capt_list = browser.select('.b_caption')
		for capt in capt_list:
			# start a new listing
			listing = {}
			
			# display original url
			listing['orig_url'] = re.sub('<[^>]*>', '', str(capt.select('cite')[0]))
			
			# display description
			listing['desc'] = capt.p.string
			
			# '|' delimited list, containing the ids needed to cache
			id_string = capt.select('div.b_attribution')[0].get('u')
			print(id_string)
			if (id_string != None):
				ids = id_string.split('|')
				listing['cache_url'] = "http://cc.bingj.com/cache.aspx?q=%s&d=%s&mkt=en-US&setlang=en-US&w=%s" % (query, ids[2], ids[3])
			else:
				listing['cache_url'] = None
			
			print(listing)
			listings.append(listing)
		
		print(":: End of dump %d" % i)
		
		# delay between page grabs
		time.sleep(1)
	
	# listings is given as an output object
	return(listings)
예제 #25
0
파일: tipper.py 프로젝트: Kricki/kicktipper
    def __init__(self, name):
        self._name = name
        self._url = "https://www.kicktipp.de/" + self._name + "/"
        self._url_login = self._url + "profil/login"
        self._url_logout = self._url + "profil/logout"
        self._url_tippabgabe = self.url + "tippabgabe"

        self._browser = RoboBrowser()
예제 #26
0
	def __init__(self, un="*****@*****.**", pw='62IS1DSDBgyTM8b7GUl', appname='if188', **kwargs):
		self.un = un
		self.pw = pw
		self.appname=appname
		self.client_id="aa8fnmbza344ypd9anqeq62v"
		self.secret="VsNrwPpHDN"
		self.redirect_uri = "http://jlmarks.org/infusionsoftcallback"
		self.browser = RoboBrowser(history=True)
예제 #27
0
    def __init__(self, output_path=None, username=None, password=None):
        self.browser = RoboBrowser(history=True)
        self.output_path = output_path or tempfile.TemporaryDirectory().name

        self.username = username or os.environ['STITCHBOT_USERNAME']
        self.password = password or os.environ['STITCHBOT_PASSWORD']

        self.logger = logger.getChild('StitchBot')
def main():
    args = docopt(__doc__, version="dailyprogrammer-dl v{}".format(__version__))

    # Configure logging
    logLevel = logging.INFO #default
    if args['--verbose']:
        logLevel = logging.DEBUG
    elif args['--quiet']:
        logLevel = logging.ERROR

    logging.basicConfig(format='%(levelname)s: %(message)s', level=logLevel)
    logging.debug(args)

    # Process command line arguments
    challengeURL = args['<challengeurl>']

    # Parse project page for title and description
    logging.info("Parsing daily challenge: {}".format(challengeURL))
    browser = RoboBrowser()
    browser.session.headers['User-Agent'] = "dailyprogrammer-dl v{} by /u/zod77".format(__version__)
    browser.open(challengeURL)
    title = browser.find('a',class_='title').string
    description = browser.find_all('div',class_="md")
    description = description[1]
    descriptionHTML = "".join(str(t) for t in description.contents) # remove outer <div>

    projectName = generateProjectName(title)

    # Init project skeleton
    logging.info("Generating project")
    projectPath = os.path.abspath(projectName)
    os.mkdir(projectPath)

    # Write out project files
    pyTemplate = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)),"boilerplate.txt"))
    shutil.copy(pyTemplate, os.path.join(projectPath,"{}.py".format(projectName)))

    # Generate README.md
    h = html2text.HTML2Text()
    descriptionMD = h.handle(descriptionHTML)
    readme = os.path.join(projectPath,"README.md")
    with open(readme, "w") as f:
        f.write(descriptionMD)

    return
예제 #29
0
                      offset_inicio:sub_trecho.find(string_fim):].lstrip()


def create_dict_from_resultset(resultset, fields):
    dict = {}
    for tag in resultset:  # modificar para algo assim --> field = ([f for field in fields] in str(tag.next_sibling.next_element)):
        for key in fields:
            if str(fields[key]).lstrip() in str(tag):
                dict[key] = str(tag.next_sibling).replace("\n", "").lstrip()
    return dict


#------- LOGIN --------------------
session = Session()
session.verify = False
browser = RoboBrowser(session=session, parser="html5lib")
browser.open("https://ditec.pf.gov.br/sistemas/criminalistica/meus_dados.php")
form_login = browser.get_form()
form_login["usuario"].value = config.LOGIN["SISCRIM"]["USER"]
form_login["senha"].value = config.LOGIN["SISCRIM"]["PASS"]
browser.submit_form(form_login)

#------- PRENCHE MATERIAL --------------------
numero_material = "846/2017"
form_mat1 = browser.get_form()
form_mat1["tipo_busca"].value = "numero_material"
form_mat1["numero_busca"].value = numero_material
browser.submit_form(form_mat1)

#------- NAVEGA P/ MATERIAL --------------------
browser.follow_link(browser.get_link(str(numero_material)))
예제 #30
0
class Siding:

    def __init__(self, usuario, contraseña):
        self.browser = RoboBrowser(parser="html.parser")
        self.usuario = usuario
        self.contraseña = contraseña
        self.ramos_alumno = defaultdict(dict)
        self.ramos_administrador = defaultdict(dict)
        self.ramos_ayudante = defaultdict(dict)

    def __repr__(self):
        return "Siding - " + self.usuario

    def login(self):
        "Inicia sesion en Siding"
        self.browser.open("http://www.ing.uc.cl/")
        forma = self.browser.get_form(id="form-siding")
        forma["login"] = self.usuario
        forma["passwd"] = self.contraseña
        self.browser.submit_form(forma) 
        if "Datos de ingreso incorrectos" in self.browser.parsed.text:
            raise CredencialesIncorrectas()

    def cargar_ramos(self):
        self.browser.open(
            "https://intrawww.ing.puc.cl/siding/" +
            "dirdes/ingcursos/cursos/index.phtml")
        ramos = self.browser.find(class_="ColorFondoZonaTrabajo")
        ramos = ramos.find_all("tr")[1:]
        comenzar = False
        lista = None
        for ramo in ramos:
            titulo = ramo.find("td").text.strip()
            if titulo == "":
                continue
            if "Cursos donde es alumno" in titulo:
                comenzar = True
                dic = self.ramos_alumno
                continue
            if "Cursos donde es administrador" in titulo:
                dic = self.ramos_administrador
                continue
            if "Cursos donde es ayudante" in titulo:
                dic = self.ramos_ayudante
                continue
            if not comenzar:
                continue
            titulo = titulo.split()
            sigla = titulo[0]
            seccion = titulo[1].split(".")[1]
            nombre = " ".join(titulo[2:])
            local_link = ramo.find("a")
            if local_link is not None:
                link = "https://intrawww.ing.puc.cl" + local_link.get("href")
                id_ = link.split("=")[-1]
            else:
                link = local_link
                id_ = None
            dic_ramo = {
                "sigla": sigla,
                "nombre": nombre,
                "seccion": seccion,
                "link": link,
                "id": id_
                }
            dic[sigla][seccion] = dic_ramo

    def subir_anuncio(self, sigla, seccion, asunto, mensaje):
        ramo = self.ramos_administrador[sigla][seccion]
        link = "https://intrawww.ing.puc.cl/siding/dirdes/ingcursos/" + \
               "cursos/index.phtml?accion_curso=avisos&acc_aviso=nuevo" + \
               "&id_curso_ic={}".format(ramo["id"])
        form = None
        while form is None:
            self.browser.open(link)
            form = self.browser.get_form(
                action="?accion_curso=avisos&acc_aviso=ingresar_aviso&" + \
                "id_curso_ic={}".format(ramo["id"]))
        form["asunto"].value = sigla + " - Nuevo aviso - " + asunto
        form["contenido_aviso"].value = mensaje
        print(form["asunto"].value)
        print(form["contenido_aviso"].value)
        #self.browser.submit_form(form)

    def subir_anuncio_multiple(self, sigla, secciones, asunto, mensaje):
        for seccion in secciones:
            self.subir_anuncio(sigla, seccion, asunto, mensaje)
        print("Se han subido todos los anuncios")
var = 1402713001
csv_ofile = open("student_data_IT_4.csv", 'w', newline='')
writer = csv.writer(csv_ofile)
writer.writerow([
    'Name', 'Father', 'roll_no', 'student_no', 'branch', 'year', 'section',
    'current_sem', 'DOB', 'category', 'hostler', 'Addmission_mod', 'contact',
    'parent_contact', 'address', 'email', '10th%', '12th%', 'B_tech%',
    'sem_1_marks', 'sem_1_attendance', 'sem_2_marks', 'sem_2_attendance',
    'sem_3_marks', 'sem_3_attendance', 'sem_4_marks', 'sem_4_attendance',
    'sem_5_marks', 'sem_5_attendance', 'sem_6_marks', 'sem_6_attendance',
    'sem_7_marks', 'sem_7_attendance', 'sem_8_marks', 'sem_8_attendance'
])

for i in range(120):
    br = RoboBrowser()
    br.open("http://10.10.156.201/login-student.php")
    form = br.get_form('password-form')
    form['username1'].value = var
    form['password'].value = str(var)
    br.submit_form(form)
    container = br.find_all('table')
    if len(container) != 0 and len(container) == 6:
        row = []
        row.append(extract_special(container[2]))
        row.append(extractor(container[3]))
        row.append(extract_special(container[4]))

        relevant = important(extractor(container[5]))
        row.append(relevant)
예제 #32
0
def parseYear(team_name, year_url, year):
    """
    parses a schedule for a specific year on http://www.pro-football-reference.com/years/{YEAR}/games.htm
    follows all the "boxscore" links (column[3]) to get stadium and weather conditions (game_info)
    stores schedule info in nfl_data.schedule
    stores game_info in nfl_data.game_info with schedule ids
    """
    logger = makeLogger(
        cleanKey(team_name) + '_' + str(year), r'./logs_pfrTeamStats/')

    startTime = datetime.now()

    logger.debug('Starting %d', year)

    schedule_list = []
    gameInfo_list = []

    client = MongoClient('localhost', 27017)
    db = client['nfl_data']
    col_team_stats_weekly = db['team_stats_weekly']

    #need to fix this to actually detect duplicate
    # if col_team_stats_weekly.find({'year': year}).count():
    #     logger.debug('Already parsed %s', year)
    #     closeLogger(logger)
    #     return None

    wait = random.uniform(1.5, 3.5)
    logger.debug('Waiting %f', wait)
    time.sleep(wait)

    logger.debug('Opening main page')
    browser = RoboBrowser(history=False,
                          parser='html5lib',
                          user_agent=get_user_agent(logger),
                          timeout=10)
    browser = open_or_follow_link(logger, browser, 'open', year_url)
    table = browser.find(id='games')
    rows = table.find_all('tr')
    header = [
        cleanKey(each.attrs['data-stat']) for each in rows[0].find_all('th')
    ]
    rows = rows[1:]

    row_dicts = []
    for index, row in enumerate(rows):
        logger.debug('Row %d of %d', index, len(rows))
        try:
            week_number = convertToNumber(row.find('th').text)
            row_values = [
                convertToNumber(value.text) for value in row.find_all('td')
            ]
            row_values.insert(0, week_number)
            row_dict = dict(zip(header, row_values))
            row_dict['year'] = year
            row_dict['team_name'] = team_name
            row_dict['year_url'] = year_url

            if row_dict['game_date'].lower() == 'playoffs':
                continue

            row_dicts.append(row_dict)
        except:
            logger.exception(row)

    logger.debug('team_stats_weekly.inert_many')

    col_team_stats_weekly.insert_many(row_dicts)

    logger.debug('parseYear time elapsed: ' + str(datetime.now() - startTime))

    closeLogger(logger)
예제 #33
0
def mainScript(host, username, password, flashFirmware, upgradeFilename, flashSleepDelay, activeMethod, activeCommand, splitCommand, ddnsService, connectRetryDelay, interCommandDelay):
    br = RoboBrowser(history=True, parser="html.parser", timeout=15)

    print('{0:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + ' ' + _("Authenticating"))
    srp6authenticate(br, host, username, password)
    print('{0:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + ' GETing : http://' + host + ' to aquire authenticated CSRFtoken')
    br.open('http://' + host)
    print('{0:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + ' GET completed: ' + str(br.response))
    if activeMethod == 'VodafoneDDNS' or activeMethod == 'VodafoneDDNS2':
        token = br.find(lambda tag: tag.has_attr('name') and tag.has_attr('type') and tag['name'] == 'CSRFtoken')['value']
    else:
        token = br.find(lambda tag: tag.has_attr('name') and tag['name'] == 'CSRFtoken')['content']
    print('{0:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + ' Got authenticated CSRFtoken: ' + token)
    success = False
    if flashFirmware:
        print('{0:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + ' Flash Firmware option is enabled. Activemethod = ' + activeMethod)
        if activeMethod == 'VodafoneDDNS': # DGA0130 Vodafone NZ VANT-9 Ultra Hub
            upgradeurlpostfix = '/modals/upgrade.lp?action=upgradefw'
        elif activeMethod == 'VodafoneDDNS2': # DNA0130 Vodafone NZ VBNT-Z Ultrahub Plus
            upgradeurlpostfix = '/modals/settings/firmwareUpdate.lp?action=upgradefw'
        else:
            upgradeurlpostfix = '/modals/gateway-modal.lp?action=upgradefw'
        filedata = {'CSRFtoken': token, 'upgradefile': ('test.rbi', open(upgradeFilename, 'rb'))}
        print('{0:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + ' POSTing firmware to: ' + 'http://' + host + upgradeurlpostfix)
        r = br.session.post('http://' + host + upgradeurlpostfix, files=filedata)
        print('{0:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + ' Fimrware POST completed: ' + str(br.response))
        br._update_state(r)
        print(r.text)
        if r.text == '{ "success":"true" }':
            print('{0:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + ' ' + _("Modem reports flashing commenced successfully"))
            success = True
            print('{0:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + ' ' + _("Waiting for reboot... Sleeping for %s s") % (flashSleepDelay))
            time.sleep(int(flashSleepDelay))
    else:
        success = True

    if success:
        backUp = False
        attempt = 0
        while not backUp:
            attempt += 1
            print('{0:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + ' ' + _("Connect attempt %i") % (attempt))
            try:
                br.open('http://' + host)
                print ('{0:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + ' Response: ' + str(br.response))
                if br.response.ok:
                    backUp = True
            except Exception:
                print('{0:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + ' ' + _('Failed to connect, attempt %i.  Retrying') % (attempt))
                time.sleep(int(connectRetryDelay))
                pass

        print('{0:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + ' ' + _("Modem up"))

    if not splitCommand:
        runCommand(br, host, token, activeMethod, activeCommand, ddnsService)
    else:
        print('{0:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + ' ' + _("Splitting command up using semicolons"))
        for subCommand in [s for s in activeCommand.split(';') if len(s) > 0]:
            runCommand(br, host, token, activeMethod, subCommand, ddnsService)
            print('{0:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + ' ' + _("Sleeping...") + str(int(interCommandDelay)) + ' seconds')
            time.sleep(int(interCommandDelay))

    result = '{0:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + ' ' + _("Please try a ssh connection now to ") + host + _(" with username root and password root (change password immediately with passwd!)  Rebooting your modem now is recommended to stop any services that have been disabled.")
    print(result)
    return result
예제 #34
0
import sys
import os

from robobrowser import RoboBrowser

# 認証の情報は環境変数から取得する。
AMAZON_EMAIL = "*****@*****.**"
AMAZON_PASSWORD = "******"
#AMAZON_EMAIL = os.environ['AMAZON_EMAIL']
#AMAZON_PASSWORD = os.environ['AMAZON_PASSWORD']

# RoboBrowserオブジェクトを作成する。
browser = RoboBrowser(
    parser='html.parser',  # Beautiful Soupで使用するパーサーを指定する。
    # Cookieが使用できないと表示されてログインできない問題を回避するため、
    # 通常のブラウザーのUser-Agent(ここではFirefoxのもの)を使う。
    user_agent=
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:45.0) Gecko/20100101 Firefox/45.0'
)


def main():
    # 注文履歴のページを開く。
    print('Navigating...', file=sys.stderr)
    browser.open('https://www.amazon.co.jp/gp/css/order-history')

    # サインインページにリダイレクトされていることを確認する。
    assert 'Amazonサインイン' in browser.parsed.title.string

    # name="signIn" というサインインフォームを埋める。
    # フォームのname属性の値はブラウザーの開発者ツールで確認できる。
예제 #35
0
파일: hallon.py 프로젝트: molobrakos/hallon
extract usage data from hallon mobile broadband
"""

from robobrowser import RoboBrowser
from bs4 import BeautifulSoup as Soup
from os import path
import sys
import yaml
import json

with open(path.join(path.dirname(sys.argv[0]),
                    ".hallon-credentials.yaml")) as f:
    CREDENTIALS = yaml.safe_load(f)

URL = "https://www.hallon.se/mina-sidor"
br = RoboBrowser(parser="lxml")
br.open(URL)
form = br.get_form(action="/logga-in")
form["UserName"].value = CREDENTIALS["username"]
form["Password"].value = CREDENTIALS["password"]
br.submit_form(form)

usage = br.select("p.usage")[0].text.replace(",", ".").split()

remaining = round(float(usage[0]), 2)
total = int(usage[2])
used = round(float(total-remaining), 2)
used_pct = round(used*100/total, 1)
days_remaining = int(br.select("p.usage-daysleft")[0].text.split()[0])

print(json.dumps({"total": total,
예제 #36
0
class MullVad:
    accountnumber = "6798499523758101"
    website = "https://www.mullvad.net/account/login/"
    br = RoboBrowser(parser='html.parser', history=True)

    #wallet = Wallet()

    #Login with given accountnumber
    def login(self):
        self.br.open(self.website)
        form = self.br.get_form()
        form['account_number'].value = self.accountnumber
        self.br.session.headers['Referer'] = self.website
        self.br.submit_form(form)

    #Purchase 1 month VPN
    def purchase(self):
        form = self.br.get_form()
        form['months'].value = "1"
        self.br.session.headers['Referer'] = self.br.url
        self.br.submit_form(form)
        month_price = ""
        bitcoin_address = ""
        payment_info_page = str(self.br.parsed)
        #Get the price for one month and bitcoin address from html code
        for line in payment_info_page.split("\n"):
            if "1 month = " in line:
                month_price = line.strip().split(" ")[3]
            if 'input readonly' in line:
                bitcoin_address_line = line.strip().split(" ")[3].split("=")[1]
                bitcoin_address = bitcoin_address_line.partition(
                    '"')[-1].rpartition('"')[0]
        print(month_price)
        print(bitcoin_address)
        #if pay(month_price, bitcoin_address):
        #	setupVPN()
        #else:
        #	print("Error: payment failed")

    #Pay for 1 month using bitcoins and the electrum wallet
    def pay(self, price, bitcoin_address):
        #Start electrum daemon
        os.system('electrum --testnet daemon start')
        #Load electrum default wallet
        os.system('electrum --testnet daemon load_wallet')
        #Check balance in wallet is enough for payment
        balance = os.popen('electrum --testnet getbalance').read()
        balance = float(
            balance.split("\n")[1].split(":")[1].replace('"', "").replace(
                " ", "").replace(",", ""))
        print(balance)
        if balance >= price:
            transaction = os.popen('electrum --testnet payto ' +
                                   bitcoin_address + ' ' + str(price) +
                                   '| electrum --testnet  broadcast -').read()
            #Check if transaction was successfull and return state of transaction
            transaction_complete = transaction.find('true')
            if transaction_complete == -1:
                transaction_complete = False
            else:
                transaction_complete = True
            print('transaction = ' + str(transaction_complete))
            return transaction_complete
        else:
            print('Insufficient balance, transaction cancelled')
            return False

    #Setup the VPN
    def setupVPN():
        print("Time to setup the vpn!")
예제 #37
0
from robobrowser import RoboBrowser
from tqdm import tqdm
import requests
import re

browser = RoboBrowser(parser='html.parser')

#lines = ["Alabama",  "Alaska",  "Arizona",  "Arkansas",  "California",  "Colorado",  "Connecticut",  "Delaware",  "Florida",  "Georgia",  "Hawaii",  "Idaho",  "Illinois",  "Indiana",  "Iowa",  "Kansas",  "Kentucky",  "Louisiana",  "Maine",  "Maryland",  "Massachusetts",  "Michigan",  "Minnesota",  "Mississippi",  "Missouri",  "Montana",  "Nebraska",  "Nevada",  "New Hampshire",  "New Jersey",  "New Mexico",  "New York",  "North Carolina",  "North Dakota",  "Ohio",  "Oklahoma",  "Oregon",  "Pennsylvania",  "Rhode Island",  "South Carolina",  "South Dakota",  "Tennessee",  "Texas",  "Utah",  "Vermont",  "Virginia",  "Washington",  "West Virginia",  "Wisconsin",  "Wyoming"]

url = 'https://greatnonprofits.org/state/'
second = '/sort:review_count/direction:desc/page:'


def linkcleaner(links):
    newlist = []
    links = list(set(links))
    for link in links:
        if 'GreatNonprofits' not in link:
            if (link != "http://twitter.com/"
                    and link != "http://twitter.com/share"
                    and link != "https://twitter.com/"
                    and link != "https://twitter.com/share"
                    and link != "https://twitter.com/?lang=en"
                    and link != "http://twitter.com/?lang=en"
                    and link != "//twitter.com/share"):
                if ' ' not in link and 'status' not in link and 'search' not in link and 'intent/' not in link and 'hashtag/' not in link and 'share?' not in link:
                    newlist.append(link)
    return newlist


links_by_state = dict()
예제 #38
0
import json
import random 
import time
import requests
from bs4 import BeautifulSoup
from robobrowser import RoboBrowser

# ====================================== #
# =========== LOGIN AND AUTH =========== #
# ====================================== #
br = RoboBrowser()
br.open('https://dragcave.net/')
form = br.get_form()
print('==================')
form['username'] = input('Username: '******'password'] = input('Password: '******'==================')
br.submit_form(form)

# =====================================#
# =========== MODE STARTUP =========== #
# =====================================#
f = open('eggpedia.json', 'r')
eggpedia = json.load(f)
f.close()

biome_codes = {
'Coast':'1',
'Desert':'2',
'Forest':'3',
'Jungle':'4',
예제 #39
0
class Scrape(object):
    def __init__(self, cookie):
        self._cookies = parse_cookie(cookie)
        self._data_dir = self._get_data_dir()
        self._ts = datetime.datetime.now().isoformat()

        self.measures = None
        self.props = []

        self.browser = RoboBrowser(
            history=True,
            parser='html5lib',
            user_agent=UA)
        self.browser.session.cookies.update(self._cookies)

    def _get_data_dir(self):
        dirname = os.path.dirname(os.path.abspath(__file__))
        if 'site-packages' not in dirname:
            data_dir = os.path.normpath(os.path.join(dirname, '..', 'data'))
        else:
            data_dir = os.path.normpath(os.path.join(os.getcwd(), 'data'))

        mkdir(data_dir)
        return data_dir


    def fetch_measures(self):
        self.browser.open(URLS.get('measures'))

        # get measures
        measures_raw = self.browser.find(id='ListElections1__ctl0')

        pattern = re.compile(r'^PROPOSITION (?P<prop>[0-9]+) - (?P<description>.*)')
        measures = []
        for a in measures_raw.find_all('a'):
            measure = pattern.match(a.text).groupdict()
            measure['url'] = a['href']
            measures.append(measure)

        self.measures = {
            'timestamp': self._ts,
            'measures': measures,
        }

        with open(os.path.join(self._data_dir, 'measures.json'), 'w') as f:
            f.write(json.dumps(self.measures))




    def fetch_prop(self, prop, url):

        link =  os.path.join(URLS['measures'], url)

        self.browser.open(link)

        cf = self.browser.find(text='Campaign Finance:')
        body = cf.parent.parent

        tables = body.find_all('table')
        new = []
        data = {
            'prop': prop,
            'committees': {},
        }
        for table in tables:
            if table.find('span', text='COMMITTEE ID'):
                for row in table.find_all('tr'):
                    if row.find(text='COMMITTEE ID'):
                        continue
                    cols = row.find_all('td')
                    c = {}
                    c_id = cols[0].find('span').text
                    a = cols[1].find('a')
                    c['name'] = a.text
                    c['link'] = a['href']
                    c['position'] = cols[2].find('span').text
                    data['committees'][c_id] = c

        data['timestamp'] = self._ts

        prop_dir = os.path.join(self._data_dir, prop)
        mkdir(prop_dir)

        with open(os.path.join(prop_dir, 'prop.json'), 'w') as f:
            f.write(json.dumps(data))

        self.props.append(data)
        return data


    def fetch_committee(self, prop, committee_id, link):
        committee_dir = os.path.join(self._data_dir, prop, committee_id)
        mkdir(committee_dir)
        '''
        url =  'http://cal-access.sos.ca.gov%s' % link
        print link

        self.browser.open(url)
        cf = self.browser.find(text='Campaign Finance:')
        body = cf.parent.parent
        '''

        '''

            'contributions',
            'expenditures',
            'late1',
            'late2',
            'late3',

        'http://cal-access.sos.ca.gov/Campaign/Committees/DetailContributionsReceivedExcel.aspx?id=1406518&session=2017',
        'http://cal-access.sos.ca.gov/Campaign/Committees/DetailContributionsMadeExcel.aspx?id=1406518&session=2017',
        'http://cal-access.sos.ca.gov/Campaign/Committees/DetailExpendituresMadeExcel.aspx?id=1406518&session=2017',
        'http://cal-access.sos.ca.gov/Campaign/Committees/DetailLateExcel.aspx?id=1406518&session=2017&view=LATE1',
        'http://cal-access.sos.ca.gov/Campaign/Committees/DetailLateExcel.aspx?id=1406518&session=2017&view=LATE2',
        'http://cal-access.sos.ca.gov/Campaign/Committees/DetailLateExcel.aspx?id=1406518&session=2017&view=LATE3',
        '''

        links =  {
            'contributions_received': '%(prefix)sDetailContributionsReceivedExcel.aspx?id=%(id)s&session=2017' % {
                'prefix': URLS.get('committees'), 
                'id': committee_id,
            },
            'contributions_made': '%(prefix)sDetailContributionsMadeExcel.aspx?id=%(id)s&session=2017' % {
                'prefix': URLS.get('committees'), 
                'id': committee_id,
            },
            'expenditures_made': '%(prefix)sDetailExpendituresMadeExcel.aspx?id=%(id)s&session=2017' % {
                'prefix': URLS.get('committees'), 
                'id': committee_id,
            },
            'late_and_5k_plus_contributions_received': '%(prefix)sDetailLateExcel.aspx?id=%(id)s&session=2017&view=LATE1' % {
                'prefix': URLS.get('committees'), 
                'id': committee_id,
            },
            'late_contributions_made': '%(prefix)sDetailLateExcel.aspx?id=%(id)s&session=2017&view=LATE2' % {
                'prefix': URLS.get('committees'), 
                'id': committee_id,
            },
            'late_independent_expenditures': '%(prefix)sDetailLateExcel.aspx?id=%(id)s&session=2017&view=LATE3' % {
                'prefix': URLS.get('committees'), 
                'id': committee_id,
            },
        }

        data = {}
        data['timestamp'] = self._ts
        for kind, link in links.iteritems():
            self.browser.open(link)
            csv_data = self.browser.find('body').text
            with open(os.path.join(committee_dir, '%s.csv' % kind), 'w') as f:
                f.write(csv_data)

            with open(os.path.join(committee_dir, '%s.csv' % kind), 'r') as f:
                #reader = csv.DictReader(f, delimiter='\t')
                reader = csv.reader(f, delimiter='\t')
                header = next(reader, None)
                rows = []
                for row in reader:
                    rows.append(row)

                data[kind] = {
                    'header': header,
                    'data': rows,
                }
            time.sleep(THROTTLE_TIME)

        with open(os.path.join(committee_dir, 'committee.json'), 'w') as f:
            f.write(json.dumps(data))
예제 #40
0
    for x in college:
        for u in range(low, high):

            # IF condition to concatenate USN
            if u < 10:
                usn = x + year + branch + '00' + str(u)
            elif u < 100:
                usn = x + year + branch + '0' + str(u)
            else:
                usn = x + year + branch + str(u)

            # opens the vtu result login page, gets the usn and opens the result page
            url = "http://results.vtu.ac.in/vitaviresultcbcs/index.php"
            if semc == '7':
                url = "http://results.vtu.ac.in/vitaviresultnoncbcs/index.php"
            br = RoboBrowser()
            br.open(url)
            form = br.get_form()
            form['lns'].value = usn
            br.submit_form(form)
            soup = br.parsed

            # Finds all the table elements and stores in array tds
            tds = soup.findAll('td')
            ths = soup.findAll('th')
            divs = soup.findAll('div', attrs={'class': 'col-md-12'})
            divCell = soup.findAll('div', attrs={'class': 'divTableCell'})

            try:
                sem = divs[5].div.text
                sem = sem.strip('Semester : ')
예제 #41
0
파일: idne.py 프로젝트: CroMarmot/idne
def cli(prob_id, filename):
	# get latest submission id, so when submitting should have not equal id
    last_id, b, c, d, e = get_latest_verdict(config.username)
    
    # Browse to Codeforces
    browser = RoboBrowser(parser = 'html.parser')
    browser.open('http://codeforces.com/enter')
        
    enter_form = browser.get_form('enterForm')
    enter_form['handleOrEmail'] = config.username
    enter_form['password'] = config.password
    browser.submit_form(enter_form)
    
    try:
	    checks = list(map(lambda x: x.getText()[1:].strip(),
	        browser.select('div.caption.titled')))
	    if config.username not in checks:
	        click.secho('Login Failed.. Wrong password.', fg = 'red')
	        return
    except Exception as e:
	    click.secho('Login Failed.. Maybe wrong id/password.', fg = 'red')
	    return 
    
    click.secho('[{0}] login successful! '.format(config.username), fg = 'green')
    click.secho('Submitting [{1}] for problem [{0}]'.format(prob_id, filename), fg = 'green')
    browser.open('https://codeforces.com/contest/'+prob_id[:-1]+'/problem/'+prob_id[-1])
    submit_form = browser.get_form(class_ = 'submitForm')
    try:
        submit_form['sourceFile'] = filename
    except Exception as e:
        click.secho('File {0} not found in current directory'.format(filename))
        return
    browser.submit_form(submit_form)

    if browser.url[-3:] != '/my':
        click.secho('Failed submission, probably you have submit the same file before', fg = 'red')
        return

    click.secho('[{0}] submitted ...'.format(filename), fg = 'green')
    hasStarted = False
    while True:
        id_, verdict_, time_, memory_, passedTestCount_ = get_latest_verdict(config.username)
        if id_ != last_id and verdict_ != 'TESTING' and verdict_ != None:
            if verdict_ == 'OK':
                click.secho('OK - Passed {} tests'.format(passedTestCount_), fg = 'green')
            else:
                click.secho("{} on test {}".format(verdict_, passedTestCount_ + 1), fg = 'red')
            click.secho('{} MS | {} KB'.format(time_, memory_), fg = ('green' if verdict_ == 'OK' else 'red'))
            break
        elif verdict_ == 'TESTING' and (not hasStarted):
            click.secho("Judgment has begun", fg='green')
            hasStarted = True
        time.sleep(0.5)
예제 #42
0
from robobrowser import RoboBrowser
br = RoboBrowser()
br.open("https://<url>")
form = br.get_form()
form['username'] = "******"
form['password'] = "******"
br.submit_form(form)
print(str(br.parsed))
예제 #43
0
def run(wait):
    """Starts the scrapping proccess.
    creates a process per year between minyear and maxyear
    """

    logger = makeLogger('main', r'./logs_pfrTeamStats/')

    startTime = datetime.now()

    logger.debug('start time: ' + str(startTime))

    logger.debug('waiting %d seconds', wait)
    time.sleep(wait)

    logger.debug('Opening main page')
    browser = RoboBrowser(history=False,
                          parser='html5lib',
                          user_agent=get_user_agent(logger),
                          timeout=10)
    browser = open_or_follow_link(
        logger, browser, 'open',
        "http://www.pro-football-reference.com/teams/")
    table_body = browser.find(id='teams_active').find('tbody')
    rows = table_body.find_all('tr')

    team_url_tups = []

    for index, row in enumerate(rows):
        logger.debug('Row %d of %d', index, len(rows))
        try:
            team_link = row.find('th').find('a')
            if team_link:
                team_url = 'http://www.pro-football-reference.com' + team_link[
                    'href']
                team_name = team_link.text
                team_url_tups.append((team_url, team_name))
        except:
            logger.exception(row)

    pool = Pool(processes=int(get_proxy_count() / 2.5))
    results = []

    for team_url, team_name in team_url_tups:
        #print parseTeam(team_url, team_name)
        results.append(pool.apply_async(parseTeam, (
            team_url,
            team_name,
        )))

    pool.close(
    )  #Prevents any more tasks from being submitted to the pool. Once all the tasks have been completed the worker processes will exit.
    pool.join(
    )  #Wait for the worker processes to exit. One must call close() or terminate() before using join().

    year_url_tups = []
    for result in results:
        year_url_tup = result.get()
        if year_url_tup:
            year_url_tups += (year_url_tup)

    logger.debug('Done gathering %d year urls', len(year_url_tups))

    pool = Pool(processes=int(get_proxy_count() / 2))

    logger.debug('Shuffling year_urls')
    random.shuffle(year_url_tups)
    logger.debug('Starting to parse year_urls')
    for team_name, year_url, year in year_url_tups:
        #parseYear(team_name, year_url, year)
        pool.apply_async(parseYear, (
            team_name,
            year_url,
            year,
        ))

    pool.close(
    )  #Prevents any more tasks from being submitted to the pool. Once all the tasks have been completed the worker processes will exit.
    pool.join(
    )  #Wait for the worker processes to exit. One must call close() or terminate() before using join().

    logger.debug('run time: ' + str(datetime.now() - startTime))

    closeLogger('main')
예제 #44
0
## search terms
terms = [
    'chafee', 'clinton', "o'malley", 'sanders', 'webb', 'warren', 'bush',
    'carson', 'christie', 'cruz', 'fiorina', 'gilmore', 'graham', 'huckabee',
    'jindal', 'kasich', 'pataki', 'paul', 'perry', 'rubio', 'santorum',
    'trump', 'walker', 'romney', 'election', 'presidential', 'cycle',
    'primary', 'primaries', 'candidate', 'race'
]

## dates to search in 2015
months, days = range(1, 9), range(1, 32)
dates = itertools.product(months, days)

## search the archives for potentially relevant material
browser = RoboBrowser(history=True)
relevant_urls = []
bad_urls = []
for date in dates:
    m, d = date[0], date[1]
    archive_url = 'http://www.wsj.com/public/page/archive-2015-' + str(
        m) + '-' + str(d) + '.html'
    try:
        browser.open(archive_url)
        articles = browser.find_all('h2')
        for article in articles:
            if any(word in article.get_text().lower() for word in terms):
                relevant_urls.append(article.find('a').get('href'))
    except:
        bad_urls.append(archive_url)
        pass
예제 #45
0


class Paper:
    no = 0
    title = ""
    author = ""
    journal = ""
    ISSN = ""
    year = 0
    cited = 0
    doi = ""
    IF = 0


browser = RoboBrowser(history=True)
browser.open("http://apps.webofknowledge.com/WOS_GeneralSearch_input.do;jsessionid=C0869EEDE01F91FB8B7F92ED05EB8972?product=WOS&search_mode=GeneralSearch&SID=E1XUuzYJfkILEXCQF8V&preferencesSaved=")

papers = []

if (len(sys.argv) == 2):
    filename = str(sys.argv[1])
else:
    filename = "files/top20.csv"

df = pd.read_csv(filename, header=0)
titles = df.values[:, 0]

fileparts = filename.split('.')
fileresult = fileparts[0] + "_result.csv"
예제 #46
0
r
r.cookies
r = requests.get("http://www.google.com")
r
r.cookies
r.url
r = requests.get("http://www.google.com", redirect=False)
requests.request?
r = requests.get("http://www.google.com", allow_redirects=False)
r.url
r.status_code
r.headers["location"]
requests.request?
import robobrowser
from robobrowser import RoboBrowser
b = RoboBrowser(parser="lxml.html")
b
b.open("http://www.chandrashekar.info")
b.url
b.contents
b.response
b.response.status_code
b.links
dir(b)
b.get_links()
b = RoboBrowser(parser="lxml")
b.open("http://www.chandrashekar.info")
b.get_links()
b.get_links()
b.forms
dir(b)
예제 #47
0
 def setUp(self):
     super().setUp()
     self.browser = RoboBrowser(history=True, parser='html.parser')
예제 #48
0
파일: spoj.py 프로젝트: yusufsholeh/kactl
import os
import shutil

extension = {
    "C++": "cpp",
    "C": "c",
    "C++14": "cpp",
    "Java": "java",
    "Python": "py",
    "CPP": "cpp",
    "JAVA": "java"
}
username = raw_input("Enter your spoj username:"******"Enter your spoj password:"******"html5lib")
browser.open('http://www.spoj.com/')
form = browser.get_form(id='login-form')
form['login_user'].value = username
form['password'].value = password
browser.submit_form(form)
browser.open('http://www.spoj.com/myaccount')
problems = browser.find(id="user-profile-tables").find_all('td')

try:
    os.mkdir("spoj_solutions")
except:
    shutil.rmtree("spoj_solutions")
    os.mkdir("spoj_solutions")

for problem in problems:
예제 #49
0
def get_non_legistar_entries(past_entries, city, search_regex):
    positive_results = []
    new_agendas = []

    browser = RoboBrowser(history=True)
    header = {
        'User-Agent':
        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
        'Accept-Encoding': 'none',
        'Accept-Language': 'en-US,en;q=0.8',
        'Connection': 'keep-alive'
    }
    s = requests.Session()
    s.headers = header
    browser = RoboBrowser(session=s, parser="lxml")
    agenda_url = city["agenda_site"]

    #non-Legistar sites need to be very specific - these sites could throw anything at you.
    #if you need to add another city, follow this format:
    #if
    if city["short"] == "berkeley":
        try:
            browser.open(agenda_url)
            links = browser.find_all("a", title="Agenda")
        except:
            print("There was a problem opening the URL: " + agenda_url)
            print("Aborting search for agendas from " + city["name"])
            return [], []

        for link in links:
            url = city["root_site"] + str(link['href'])
            meetingid = url[url.rfind("/") + 1:url.rfind(".aspx")]
            #print(meetingid)
            if not any(meetingid in entry for entry in past_entries):

                new_agendas = new_agendas + [meetingid]
                browser.follow_link(link)
                content = str(browser.response.content)
                content = content.lower()
                content = content[content.find("innercontentcontainer"):]

                term_match = []
                m = re.findall(search_regex, content.lower())
                if m is not None and len(m) > 0:
                    term_match = term_match + list(set(m))

                browser.back()

                if (len(term_match) > 0):
                    page_body = str(browser.response.content)
                    index1 = page_body.find(meetingid)
                    page_body = page_body[0:index1]
                    index2 = page_body.rfind("<tr>")
                    page_body = page_body[index2:]
                    deets = re.findall('[\\d]+/[\\d]+', page_body)
                    meeting_date = deets[0]
                    matches = ""
                    for term in set(term_match):
                        for bogus in ['-', ' ']:
                            if bogus in term:
                                term = term.replace(bogus, "")
                        matches = matches + "#" + term + ", "
                    positive_results.append(
                        (meetingid, "#" + city["short"] + " #" +
                         city["hash_tag"] + " city meeting on " +
                         meeting_date + " about " + matches, url))

    elif city["short"] == "berkeleyprc" or city["short"] == "berkeleyp&j":
        try:
            browser.open(agenda_url)
            links = browser.find_all("a", title=re.compile(".genda"))
        except:
            print("There was a problem opening the URL: " + agenda_url)
            print("Aborting search for agendas from " + city["name"])
            return [], []

        for link in links:
            meetingid = str(link)
            url = city["root_site"] + str(link['href']).replace(" ", "%20")
            #print(url)
            pdf_index = url.rfind(".pdf")
            if pdf_index < 0:
                meetingid = url[url.rfind("/") + 1:]
                if not any(meetingid in entry for entry in past_entries):
                    new_agendas = new_agendas + [meetingid]
                continue
            meetingid = url[url.rfind("/") + 1:pdf_index]
            if not any(meetingid in entry for entry in past_entries):
                new_agendas = new_agendas + [meetingid]
                browser.follow_link(link)
                content = browser.response.content

                term_match = search_pdf(meetingid, content, search_regex)

                browser.back()
                if (len(term_match) > 0):
                    searchdex = str(link['title'])
                    deets = searchdex.split()
                    meeting_date = deets[0].lower()
                    for bogus in string.ascii_letters:
                        if bogus in meeting_date:
                            meeting_date = meeting_date.replace(bogus, "")

                    matches = ""
                    for term in set(term_match):
                        for bogus in ['-', ' ']:
                            if bogus in term:
                                term = term.replace(bogus, "")
                        matches = matches + "#" + term + ", "
                    positive_results.append(
                        (meetingid,
                         "#" + city["short"] + " #" + city["hash_tag"] +
                         " mtg on " + meeting_date + " about " + matches, url))
    else:
        return [], []

    return new_agendas, positive_results
예제 #50
0
# -*- coding: utf-8 -*-
"""
Created on Fri Feb 03 15:29:35 2017

@author: d_floriello

enel scraper
"""

from robobrowser import RoboBrowser
from selenium import webdriver
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary

browser = RoboBrowser()
login_url = 'https://smistaweb.enel.it/tpauth/JavaNotEnabled.html'
browser.open(login_url)
form = browser.get_form(id='form_id')

browser = webdriver.Firefox()
browser.get(login_url)

#Profilo: Z:\Lorenzo\Entrust Profile\GABRIELE BERTHOLET.epf
#Password: Axopower_123

form['profile'].value = "Z:\Lorenzo\Entrust Profile\GABRIELE BERTHOLET.epf"
form['password'].value = "Axopower_123"
browser.submit_form(form)

binary = FirefoxBinary("C:/Program Files (x86)/Mozilla Firefox/firefox.exe")

login_url = 'https://smistaweb.enel.it/tpauth/JavaNotEnabled.html'
예제 #51
0
for i, line in enumerate(f):
    movie_id = line.replace('\n', '')
    if i + 1 < start_line:
        continue
    if i + 1 > end_line:
        break

    retry_count = 0
    while True:
        retry_count += 1
        if retry_count >= 20:
            break
            
        print(i + 1, movie_id, end='\t')
        try:
            browser = RoboBrowser(history=True, parser='html.parser', timeout=10)

            browser.open('http://www.imdb.com/title/'+movie_id)
            poster_tag = str(browser.find(class_=re.compile(r'\bposter\b')))

            browser.select('id.titleDetails')
            country = str(browser.find(href=re.compile(r'\?country')).text)
            print(country, end='\t')

            browser.select('id.titleStoryLine')
            genres = browser.find_all(href=re.compile(r'\?ref_=tt_stry_gnr'))

            for j in range(len(genres)):
                genres[j] = str(genres[j]).split('> ')[1].split('<')[0]
            genres = str(genres).replace('[', '').replace(']', '')
            genres = genres.replace(', ', ':')
예제 #52
0
import re
import config

from robobrowser import RoboBrowser

br = RoboBrowser()
br.open(
    "https://192.168.100.1:6082/php/uid.php?vsys=1&rule=2&url=http://chitkara.cloud/ChitkaraLocalCloud/home.php",
    verify=False)
form = br.get_form()
form['user'] = config.DATACOUP_USERNAME
form['passwd'] = config.DATACOUP_PASSWORD
br.submit_form(form)

src = str(br.parsed())

start = '<title>'
end = '</title>'

result = re.search('%s(.*)%s' % (start, end), src).group(1)

print(result)
예제 #53
0
class TinderApi():
    def __init__(self, data_folder):
        self.get_headers = {
            'app_version': '6.9.4',
            'platform': 'ios',
            "User-agent": "Tinder/7.5.3 (iPhone; iOS 10.3.2; Scale/2.00)",
            "Accept": "application/json"
        }
        self.get_message_headers = {
            "accept": "application/json",
            "platform": "web",
            "tinder-version": "2.46.1"
        }

        self.headers = self.get_headers.copy()
        self.headers['content-type'] = "application/json"
        self.host = "https://api.gotinder.com"
        self.browser = RoboBrowser()
        self.data_folder = data_folder
        self.page_token = None

    def get_person_data(self, data):
        if "user" in data:
            person = data['user']
            type = "recommendation"
        elif 'person' in data:
            person = data['person']
            type = "match"
        else:
            person = data
            type = "person"
        return person, type

    def download_people_data_api(self,
                                 data_list,
                                 folder_path,
                                 photos,
                                 insta,
                                 messages,
                                 rename_images,
                                 amount,
                                 force_overwrite=False,
                                 log_to_widget=True,
                                 thread_update_signal=None):
        downloaded_data = []
        if not isinstance(data_list, list):
            data_list = [data_list]
        total = len(data_list)
        if amount > 0:
            total = min(total, amount)
        for i in range(total):
            if thread_update_signal is not None:
                thread_update_signal.emit("Downloading " + str(folder_path) +
                                          ": " + str(i + 1) + "/" + str(total))
            log.i("API", "Downloading " + str(i + 1) + "/" + str(total),
                  log_to_widget)
            updated_data = self.download_person_data(data_list[i], folder_path,
                                                     photos, insta, messages,
                                                     rename_images,
                                                     force_overwrite,
                                                     log_to_widget,
                                                     thread_update_signal)
            downloaded_data.append(updated_data)
            log.i("API", "Data Downloaded!", log_to_widget)
        return downloaded_data

    def download_person_data(self,
                             data,
                             base_folder,
                             photos,
                             insta,
                             messages,
                             rename_images,
                             force_overwrite=False,
                             log_to_widget=True,
                             thread_update_signal=None):
        person_data, type = self.get_person_data(data)
        id = person_data['_id']
        name = person_data['name']
        path = base_folder + "/" + str(name) + "_" + str(id) + "/"
        person_data['path'] = str(os.path.abspath(path))
        log.i(
            "API", "Downloading " + type + ": " + name + " " + id + " to: " +
            str(person_data['path']), log_to_widget)
        if os.path.exists(path):
            log.d("API", "Person path already exists: " + person_data['path'],
                  log_to_widget)
        else:
            os.makedirs(path)
            log.d("API", "Person path created: " + person_data['path'],
                  log_to_widget)
        person_data['local_path'] = str(os.path.abspath(path))

        if insta and 'instagarm' in person_data:
            self.download_instagram_photos(person_data['instagram'], path,
                                           rename_images, force_overwrite,
                                           log_to_widget, thread_update_signal)

        if photos and 'photos' in person_data:
            self.download_photos(person_data['photos'], path, rename_images,
                                 force_overwrite, log_to_widget,
                                 thread_update_signal)

        if messages and 'match' in type:
            data['messages'] = self.download_messages(data, log_to_widget,
                                                      thread_update_signal)

        data['AI_Dating_metadata'] = {}
        data['AI_Dating_metadata']['last_updated_datetime'] = str(
            datetime.now().strftime("%d-%b-%Y %H:%M:%S"))
        data['AI_Dating_metadata']['last_updated_timestamp'] = str(
            datetime.utcnow())

        self.write_data_to_file(data, path, log_to_widget,
                                thread_update_signal)
        return data

    def download_messages(self,
                          match_data,
                          log_to_widget=True,
                          thread_update_signal=None):
        log.d("API", "Downloading match messages", log_to_widget)
        messages = self.get_messages(match_data, 100, None, log_to_widget,
                                     thread_update_signal)
        log.d(
            "API", "Downloaded messages: " +
            str(match_data["_id"] + ": " + str(messages)), log_to_widget)
        if messages is not None and 'data' in messages:
            return messages['data']['messages']
        return []

    def write_data_to_file(self,
                           data,
                           base_path,
                           log_to_widget=True,
                           thread_update_signal=None):
        log.d("API", "Data written to: " + str(base_path), log_to_widget)
        with open(base_path + 'data.yaml', 'w') as fp:
            yaml.dump(data, fp)

    def download_photos(self,
                        photos_list,
                        base_path,
                        rename,
                        force_overwrite=False,
                        log_to_widget=True,
                        thread_update_signal=None):
        for i in range(len(photos_list)):
            photo = photos_list[i]
            log.d("API", "Downloading full-size photos", log_to_widget)
            filename, skipped = self.download_file(photo['url'], base_path,
                                                   rename, i, "",
                                                   force_overwrite,
                                                   log_to_widget)
            if filename is not None:
                photo['local_path'] = str(os.path.abspath(filename))
            if 'processedFiles' in photo:
                processed_files = photo['processedFiles']
                small_photo = processed_files[len(processed_files) - 1]
                log.d("API", "Downloading small photo", log_to_widget)
                filename, skipped = self.download_file(
                    small_photo['url'],
                    base_path + "/small/",
                    rename,
                    i,
                    "_small",
                    force_overwrite,
                    log_to_widget=log_to_widget)
                if filename is not None:
                    small_photo['local_path'] = str(os.path.abspath(filename))

    def download_instagram_photos(self,
                                  instagram_data,
                                  base_path,
                                  rename,
                                  force_overwrite=False,
                                  log_to_widget=True,
                                  thread_update_signal=None):
        if 'photos' not in instagram_data.keys():
            log.d("API", "NO instagram photos", log_to_widget)
            return
        log.d("API", "Downloading instagram photos", log_to_widget)
        for i in range(len(instagram_data['photos'])):
            filename, skipped = self.download_file(
                instagram_data['photos'][i]['image'], base_path + "instagram/",
                rename, i, "", force_overwrite, log_to_widget)
            if filename is not None:
                instagram_data['photos'][i]['local_path'] = str(
                    os.path.abspath(filename))

    def download_file(self,
                      url,
                      base_path,
                      rename,
                      index,
                      postfix="",
                      force_overwrite=False,
                      log_to_widget=True,
                      thread_update_signal=None):
        try:
            file_name = str(index) + postfix + ".jpg"
            if not rename:
                file_name = (url.split("/")[-1] + '.jpg').split('?')[0]
            full_filename = base_path + file_name
            if not os.path.exists(base_path):
                os.makedirs(base_path)
                log.d("API", "File path created: " + base_path, log_to_widget)
            if not os.path.exists(full_filename) or force_overwrite:
                self.browser.open(url)
                with open(full_filename, "wb") as image_file:
                    image_file.write(self.browser.response.content)
                    if force_overwrite:
                        log.d("API", "Forcing Re-Download: " + full_filename,
                              log_to_widget)
                    else:
                        log.i("API", "Downloading: " + full_filename,
                              log_to_widget)
                    return full_filename, False
            else:
                log.d(
                    "API",
                    "File already downloaded (force_overwrite=False): " +
                    full_filename, log_to_widget)
                return full_filename, True
        except Exception as e:
            log.e("API", "EXCEPTION!: " + str(e), log_to_widget)
            return None, False

    def read_data(self,
                  file_path,
                  log_to_widget=True,
                  thread_update_signal=None):
        try:
            with open(file_path, "r") as f:
                try:
                    data = json.load(f)
                except Exception as e:
                    try:
                        data = yaml.safe_load(f)
                    except Exception as e:
                        return None
                log.i("API", "Data read from file: " + str(file_path),
                      log_to_widget)
                return data
        except Exception as e:
            log.e(
                "API", "Exception reading data from file : " +
                str(os.path.abspath(file_path)) + ", Exc: " + str(e),
                log_to_widget)
            return None

    def reload_data_from_disk(self,
                              folder_path,
                              merged_filename,
                              photos,
                              insta,
                              messages,
                              force_overwrite=False,
                              log_to_widget=True,
                              thread_update_signal=None):
        list = []
        try:
            for subdir, dirs, files in os.walk(folder_path):
                total_dirs = len(dirs)
                for i in range(len(dirs)):
                    data_path = os.path.join(subdir, dirs[i]) + "/"
                    data_file_path = data_path + "data.yaml"
                    try:
                        if os.path.exists(data_file_path):
                            with open(data_file_path) as yf:
                                data = yaml.safe_load(yf)
                                person_data, type = self.get_person_data(data)
                                person_data['path'] = os.path.abspath(
                                    data_path
                                )  # Updating the data path just in case
                                if photos and 'photos' in person_data:
                                    self.download_photos(
                                        person_data['photos'],
                                        data_path,
                                        True,
                                        force_overwrite,
                                        log_to_widget=log_to_widget)

                                if insta and 'instagram' in person_data and 'photos' in person_data[
                                        'instagram']:
                                    self.download_instagram_photos(
                                        person_data['instagram'],
                                        data_path,
                                        True,
                                        force_overwrite,
                                        log_to_widget=log_to_widget)
                                if messages and 'match' in type:
                                    data['messages'] = self.download_messages(
                                        data, log_to_widget)
                                log.d("API", "Updating " + type + " data file",
                                      log_to_widget)

                                data['AI_Dating_metadata'] = {}
                                data['AI_Dating_metadata'][
                                    'last_updated_datetime'] = str(
                                        datetime.now().strftime(
                                            "%d-%b-%Y %H:%M:%S"))
                                data['AI_Dating_metadata'][
                                    'last_updated_timestamp'] = str(
                                        datetime.utcnow())

                                self.write_data_to_file(
                                    data, data_path, log_to_widget,
                                    thread_update_signal)
                                log.d("API", "Updated", log_to_widget)
                                list.append(data)
                            log.i(
                                "API",
                                str(i + 1) + "/" + str(total_dirs) + " - " +
                                str(dirs[i]) + " " + person_data['name'],
                                log_to_widget)
                        else:
                            log.i(
                                "API",
                                str(i + 1) + "/" + str(total_dirs) + " - " +
                                str(dirs[i]) + " SKIPPED", log_to_widget)
                    except Exception as e:
                        log.e("API", "Exception reloading data " + str(e),
                              log_to_widget)
                    if thread_update_signal is not None:
                        thread_update_signal.emit(
                            str(folder_path) + "\t" + str(i + 1) + "/" +
                            str(total_dirs))
                break
        except Exception as e:
            log.e("API", "Exception in reloading from disk: " + str(e),
                  log_to_widget)
        try:
            with open(merged_filename, "w+") as f:
                json.dump(list, f)
        except Exception as e:
            log.e(
                "API", "Could not save merged file " + merged_filename + ": " +
                str(e), log_to_widget)
        return list

    def get_fb_access_token(self,
                            email,
                            password,
                            log_to_widget=True,
                            thread_update_signal=None):
        token = fb_auth_token.get_fb_access_token(email, password)
        log.e("TOKEN", "Gotten token: " + str(token), log_to_widget)
        return token

    def get_fb_user_id(self,
                       fb_token,
                       log_to_widget=True,
                       thread_update_signal=None):
        fb_id = fb_auth_token.get_fb_id(fb_token)
        log.e("FB_ID", "Gotten fb user id: " + str(fb_id), log_to_widget)
        return fb_id

    def get_auth_token(self,
                       fb_auth_token,
                       fb_user_id,
                       log_to_widget=True,
                       thread_update_signal=None):
        log.d("API", "get_auth_token: " + fb_auth_token + "\t" + fb_user_id,
              log_to_widget)
        if "error" in fb_auth_token:
            return {"error": "could not retrieve fb_auth_token"}
        if "error" in fb_user_id:
            return {"error": "could not retrieve fb_user_id"}
        url = self.host + '/v2/auth/login/facebook'
        req = requests.post(url,
                            headers=self.headers,
                            data=json.dumps({
                                'token': fb_auth_token,
                                'facebook_id': fb_user_id
                            }))
        try:
            log.d("API", "Sending JSON request", log_to_widget)
            json_request = req.json()
            log.i("API",
                  "Token JSON status: " + str(json_request['meta']['status']),
                  log_to_widget)
            tinder_auth_token = json_request["data"]["api_token"]
            self.headers.update({"X-Auth-Token": tinder_auth_token})
            self.get_headers.update({"X-Auth-Token": tinder_auth_token})
            self.get_message_headers.update(
                {"X-Auth-Token": tinder_auth_token})
            log.s("API", "You have been successfully authorized!")
            return tinder_auth_token
        except Exception as e:
            log.e("API", "Error getting Tinder Token " + str(e), log_to_widget)
            return {
                "error":
                "Something went wrong. Sorry, but we could not authorize you."
            }

    def authverif(self,
                  fb_access_token,
                  fb_user_id,
                  log_to_widget=True,
                  thread_update_signal=None):
        res = self.get_auth_token(fb_access_token, fb_user_id)
        if "error" in res:
            return False
        return True

    def get_recommendations(self,
                            log_to_widget=True,
                            thread_update_signal=None):
        '''
        Returns a list of users that you can swipe on
        '''
        try:
            r = requests.get('https://api.gotinder.com/user/recs',
                             headers=self.headers)
            json = r.json()
            log.i(
                "API", "get_recommendations: Got response. Status: " +
                str(json['status']) + ": " +
                utils.error_code_to_message[json['status']], log_to_widget)
            return json
        except requests.exceptions.RequestException as e:
            log.e("API",
                  "Something went wrong with getting recomendations:" + str(e),
                  log_to_widget)

    def get_updates(self,
                    last_activity_date="",
                    log_to_widget=True,
                    thread_update_signal=None):
        '''
        Returns all updates since the given activity date.
        The last activity date is defaulted at the beginning of time.
        Format for last_activity_date: "2017-07-09T10:28:13.392Z"
        '''
        try:
            url = self.host + '/updates'
            r = requests.post(url,
                              headers=self.headers,
                              data=json.dumps(
                                  {"last_activity_date": last_activity_date}))
            return r.json()
        except requests.exceptions.RequestException as e:
            log.e("API", "Something went wrong with getting updates:" + str(e),
                  log_to_widget)

    def get_self(self, log_to_widget=True, thread_update_signal=None):
        '''
        Returns your own profile data
        '''
        try:
            url = self.host + '/profile'
            r = requests.get(url, headers=self.headers)
            return r.json()
        except requests.exceptions.RequestException as e:
            log.e("API",
                  "Something went wrong. Could not get your data:" + str(e),
                  log_to_widget)

    def change_preferences(self, **kwargs):
        '''
        ex: change_preferences(age_filter_min=30, gender=0)
        kwargs: a dictionary - whose keys become separate keyword arguments and the values become values of these arguments
        age_filter_min: 18..46
        age_filter_max: 22..55
        age_filter_min <= age_filter_max - 4
        gender: 0 == seeking males, 1 == seeking females
        distance_filter: 1..100
        discoverable: true | false
        {"photo_optimizer_enabled":false}
        '''
        try:
            url = self.host + '/profile'
            r = requests.post(url,
                              headers=self.headers,
                              data=json.dumps(kwargs))
            return r.json()
        except requests.exceptions.RequestException as e:
            log.e(
                "API",
                "Something went wrong. Could not change your preferences:" +
                str(e), log_to_widget)

    def get_meta(self, log_to_widget=True, thread_update_signal=None):
        '''
        Returns meta data on yourself. Including the following keys:
        ['globals', 'client_resources', 'versions', 'purchases',
        'status', 'groups', 'products', 'rating', 'tutorials',
        'travel', 'notifications', 'user']
        '''
        try:
            url = self.host + '/meta'
            r = requests.get(url, headers=self.headers)
            return r.json()
        except requests.exceptions.RequestException as e:
            log.e(
                "API",
                "Something went wrong. Could not get your metadata:" + str(e),
                log_to_widget)

    def get_meta_v2(self, log_to_widget=True, thread_update_signal=None):
        '''
        Returns meta data on yourself from V2 API. Including the following keys:
        ['account', 'client_resources', 'plus_screen', 'boost',
        'fast_match', 'top_picks', 'paywall', 'merchandising', 'places',
        'typing_indicator', 'profile', 'recs']
        '''
        try:
            url = self.host + '/v2/meta'
            r = requests.get(url, headers=self.headers)
            return r.json()
        except requests.exceptions.RequestException as e:
            log.e(
                "API",
                "Something went wrong. Could not get your metadata:" + str(e),
                log_to_widget)

    def update_location(self,
                        lat,
                        lon,
                        log_to_widget=True,
                        thread_update_signal=None):
        '''
        Updates your location to the given float inputs
        Note: Requires a passport / Tinder Plus
        '''
        try:
            url = self.host + '/passport/user/travel'
            r = requests.post(url,
                              headers=self.headers,
                              data=json.dumps({
                                  "lat": lat,
                                  "lon": lon
                              }))
            return r.json()
        except requests.exceptions.RequestException as e:
            log.e(
                "API",
                "Something went wrong. Could not update your location:" +
                str(e), log_to_widget)

    def reset_real_location(self,
                            log_to_widget=True,
                            thread_update_signal=None):
        try:
            url = self.host + '/passport/user/reset'
            r = requests.post(url, headers=self.headers)
            return r.json()
        except requests.exceptions.RequestException as e:
            log.e(
                "API",
                "Something went wrong. Could not update your location:" +
                str(e), log_to_widget)

    def get_recs_v2(self, log_to_widget=True, thread_update_signal=None):
        '''
        This works more consistently then the normal get_recommendations becuase it seeems to check new location
        '''
        try:
            url = self.host + '/v2/recs/core?locale=en-US'
            r = requests.get(url, headers=self.headers)
            return r.json()
        except Exception as e:
            log.e("API", 'excepted', log_to_widget)

    def set_webprofileusername(self, username):
        '''
        Sets the username for the webprofile: https://www.gotinder.com/@YOURUSERNAME
        '''
        try:
            url = self.host + '/profile/username'
            r = requests.put(url,
                             headers=self.headers,
                             data=json.dumps({"username": username}))
            return r.json()
        except requests.exceptions.RequestException as e:
            log.e(
                "API",
                "Something went wrong. Could not set webprofile username:"******"API",
                "Something went wrong. Could not delete webprofile username:"******"API",
                  "Something went wrong. Could not get that person:" + str(e),
                  log_to_widget)

    def get_messages(self,
                     match_data=None,
                     count=100,
                     page_token=None,
                     log_to_widget=True,
                     thread_update_signal=None):
        # https://api.gotinder.com/v2/matches/5e762f611d443d01005c86975ea8db0a728e280100783a6e/messages?locale=en&count=100
        # https://api.gotinder.com/v2/matches/5cae0e962d5de015002490965ea8db0a728e280100783a6e/messages?locale=en&count=100&page_token=
        try:
            path = '/v2/matches/%s/messages?locale=en&count=%s' % (
                match_data["_id"], count)

            if page_token is not None:
                path += "&page_token=%s" % page_token
            r = requests.get(self.host + path, headers=self.headers)
            print("Messages url: " + str(self.host + path))
            r_json = r.json()
            if 'next_page_token' in r_json['data']:
                new_data = self.get_messages(match_data, 100,
                                             r_json['data']['next_page_token'],
                                             log_to_widget,
                                             thread_update_signal)
                for message in new_data['data']['messages']:
                    message[
                        'page_token'] = page_token  # This will be needed to get messages
                r_json['data']['messages'] = r_json['data'][
                    'messages'] + new_data['data']['messages']
            r_json["match_id"] = match_data["_id"]
            return r_json
        except requests.exceptions.RequestException as e:
            log.e("API",
                  "Something went wrong. Could not get messages:" + str(e),
                  log_to_widget)

    def send_msg(self,
                 match_id,
                 msg,
                 log_to_widget=True,
                 thread_update_signal=None):
        try:
            url = self.host + '/user/matches/%s' % match_id
            r = requests.post(url,
                              headers=self.headers,
                              data=json.dumps({"message": msg}))
            return r.json()
        except requests.exceptions.RequestException as e:
            log.e(
                "API",
                "Something went wrong. Could not send your message:" + str(e),
                log_to_widget)

    def unmatch(self, match_id, log_to_widget=True, thread_update_signal=None):
        try:
            url = self.host + '/user/matches/%s' % match_id
            r = requests.delete(url, headers=self.headers)
            return r.json()
        except requests.exceptions.RequestException as e:
            log.e("API",
                  "Something went wrong. Could not unmatch person:" + str(e),
                  log_to_widget)

    def superlike(self,
                  person_id,
                  log_to_widget=True,
                  thread_update_signal=None):
        try:
            url = self.host + '/like/%s/super' % person_id
            r = requests.post(url, headers=self.headers)
            return r.json()
        except requests.exceptions.RequestException as e:
            log.e("API", "Something went wrong. Could not superlike:" + str(e),
                  log_to_widget)

    def like(self, person_id, log_to_widget=True, thread_update_signal=None):
        try:
            url = self.host + '/like/%s' % person_id
            r = requests.get(url, headers=self.get_headers)
            return r.json()
        except requests.exceptions.RequestException as e:
            log.e("API", "Something went wrong. Could not like:" + str(e),
                  log_to_widget)

    def dislike(self,
                person_id,
                log_to_widget=True,
                thread_update_signal=None):
        try:
            url = self.host + '/pass/%s' % person_id
            r = requests.get(url, headers=self.get_headers)
            return r.json()
        except requests.exceptions.RequestException as e:
            log.e("API", "Something went wrong. Could not dislike:" + str(e),
                  log_to_widget)

    def report(self,
               person_id,
               cause,
               explanation='',
               log_to_widget=True,
               thread_update_signal=None):
        '''
        There are three options for cause:
            0 : Other and requires an explanation
            1 : Feels like spam and no explanation
            4 : Inappropriate Photos and no explanation
        '''
        try:
            url = self.host + '/report/%s' % person_id
            r = requests.post(url,
                              headers=self.headers,
                              data={
                                  "cause": cause,
                                  "text": explanation
                              })
            return r.json()
        except requests.exceptions.RequestException as e:
            log.e("API", "Something went wrong. Could not report:" + str(e),
                  log_to_widget)

    def match_info(self,
                   match_id,
                   log_to_widget=True,
                   thread_update_signal=None):
        try:
            url = self.host + '/matches/%s' % match_id
            r = requests.get(url, headers=self.headers)
            return r.json()
        except requests.exceptions.RequestException as e:
            log.e(
                "API", "Something went wrong. Could not get your match info:" +
                str(e), log_to_widget)

    def get_matches(self, log_to_widget=True, thread_update_signal=None):
        try:
            url = self.host + '/v2/matches'
            r = requests.get(url, headers=self.headers)
            return r.json()
        except requests.exceptions.RequestException as e:
            log.e(
                "API",
                "Something went wrong. Could not get your match iself.page_tokennfo:"
                + str(e), log_to_widget)

    def all_matches(self,
                    amount=60,
                    message=0,
                    page_token=None,
                    log_to_widget=True,
                    thread_update_signal=None):
        try:
            url = self.host + '/v2/matches?locale=en&count=' + str(
                amount) + '&message=' + str(message) + '&is_tinder_u=false'
            log.d("API", "All matches page: " + str(page_token), log_to_widget)
            if page_token:
                url = url + '&page_token=' + page_token
            r = requests.get(url, headers=self.headers)
            json = r.json()
            log.d("API", "All matches keys " + str(json.keys()), log_to_widget)
            log.d("API", "All matches data " + str(json['data'].keys()),
                  log_to_widget)
            log.d(
                "API", "All matches data matches  " +
                str(len(json['data']['matches'])) + " " +
                str(json['data']['matches'][0].keys()), log_to_widget)
            log.d("API", "All matches meta " + str(json['meta'].keys()),
                  log_to_widget)
            log.i(
                "API", "all_matches: Got response. Status: " +
                str(json['meta']['status']) + ": " +
                utils.error_code_to_message[json['meta']['status']],
                log_to_widget)
            if 'next_page_token' in json['data']:
                new_data = self.all_matches(amount, message,
                                            json['data']['next_page_token'])
                for match in new_data['data']['matches']:
                    match[
                        'page_token'] = page_token  # This will be needed to get messages
                json['data']['matches'] = json['data']['matches'] + new_data[
                    'data']['matches']
                self.page_token = json['data']['next_page_token']
            elif message <= 0:
                new_data = self.all_matches(amount, 1, None)
                json['data']['matches'] = json['data']['matches'] + new_data[
                    'data']['matches']
            log.i("API", "Total matches " + str(len(json['data']["matches"])),
                  log_to_widget)
            return json
        except requests.exceptions.RequestException as e:
            log.e(
                "API", "Something went wrong. Could not get your match info:" +
                str(e), log_to_widget)

    def fast_match_info(self, log_to_widget=True, thread_update_signal=None):
        try:
            url = self.host + '/v2/fast-match/preview'
            r = requests.get(url, headers=self.headers)
            count = r.headers['fast-match-count']
            # image is in the response but its in hex..
            return count
        except requests.exceptions.RequestException as e:
            log.e(
                "API",
                "Something went wrong. Could not get your fast-match count:" +
                str(e), log_to_widget)

    def trending_gifs(self,
                      limit=3,
                      log_to_widget=True,
                      thread_update_signal=None):
        try:
            url = self.host + '/giphy/trending?limit=%s' % limit
            r = requests.get(url, headers=self.headers)
            return r.json()
        except requests.exceptions.RequestException as e:
            log.e(
                "API",
                "Something went wrong. Could not get the trending gifs:" +
                str(e), log_to_widget)

    def gif_query(self,
                  query,
                  limit=3,
                  log_to_widget=True,
                  thread_update_signal=None):
        try:
            url = self.host + '/giphy/search?limit=%s&query=%s' % (limit,
                                                                   query)
            r = requests.get(url, headers=self.headers)
            return r.json()
        except requests.exceptions.RequestException as e:
            log.e("API",
                  "Something went wrong. Could not get your gifs:" + str(e),
                  log_to_widget)

    # def see_friends(self, log_to_widget=True, thread_update_signal=None):
    #     try:
    #         url = self.host + '/group/friends'
    #         r = requests.get(url, headers=self.headers)
    #         return r.json()['results']
    #     except requests.exceptions.RequestException as e:
    #         log.e("API", "Something went wrong. Could not get your Facebook friends:" +str(e), log_to_widget)
    """ FEATURES """

    def get_match_info(self, log_to_widget=True, thread_update_signal=None):
        matches = self.get_updates()['matches']
        now = datetime.utcnow()
        match_info = {}
        for match in matches[:len(matches)]:
            try:
                person = match['person']
                person_id = person['_id']  # This ID for looking up person
                name = person['name']
                id = match['id']
                msg_count = match['message_count']
                photos = self.get_photos(person)
                bio = ""
                if 'bio' in person.keys():
                    bio = person['bio']
                gender = person['gender']
                avg_succ_rate = self.get_avg_successRate(person)
                messages = match['messages']
                age = self.calculate_age(match['person']['birth_date'])
                distance = self.get_person(person_id)['results']['distance_mi']
                last_activity_date = match['last_activity_date']
                match_info[person_id] = {
                    "name": name,
                    "match_id": id,  # This ID for messaging
                    "message_count": msg_count,
                    "photos": photos,
                    "bio": bio,
                    "gender": gender,
                    "avg_successRate": avg_succ_rate,
                    "messages": messages,
                    "age": age,
                    "distance": distance,
                    "last_activity_date": last_activity_date,
                }
                log.d("API", name + "_" + id)
            except Exception as ex:
                template = "An exception of type {0} occurred. Arguments:\n{1!r}"
                message = template.format(type(ex).__name__, ex.args)
                log.e("API", message)
                # continue
        log.i("API", "All data stored in variable: match_info")
        filename = self.data_folder + 'match_info.json'
        with open(filename, 'w') as fp:
            json.dump(match_info, fp)
            log.i("API",
                  "All data dumped to file: " + str(os.path.abspath(filename)))
        return match_info

    def get_match_id_by_name(self,
                             name,
                             log_to_widget=True,
                             thread_update_signal=None):
        '''
        Returns a list_of_ids that have the same name as your input
        '''
        global match_info
        list_of_ids = []
        for match in match_info:
            if match_info[match]['name'] == name:
                list_of_ids.append(match_info[match]['match_id'])
        if len(list_of_ids) > 0:
            return list_of_ids
        return {"error": "No matches by name of %s" % name}

    def get_photos(self,
                   person,
                   log_to_widget=True,
                   thread_update_signal=None):
        '''
        Returns a list of photo urls
        '''
        photos = person['photos']
        photo_urls = []
        for photo in photos:
            photo_urls.append(photo['url'])
        return photo_urls

    def calculate_age(self,
                      birthday_string,
                      log_to_widget=True,
                      thread_update_signal=None):
        '''
        Converts from '1997-03-25T22:49:41.151Z' to an integer (age)
        '''
        birthyear = int(birthday_string[:4])
        birthmonth = int(birthday_string[5:7])
        birthday = int(birthday_string[8:10])
        today = date.today()
        return today.year - birthyear - ((today.month, today.day) <
                                         (birthmonth, birthday))

    def get_avg_successRate(self,
                            person,
                            log_to_widget=True,
                            thread_update_signal=None):
        '''
        SuccessRate is determined by Tinder for their 'Smart Photos' feature
        '''
        photos = person['photos']
        curr_avg = 0
        for photo in photos:
            try:
                photo_successRate = photo['successRate']
                curr_avg += photo_successRate
            except:
                return -1
        return curr_avg / len(photos)

    def sort_by_value(self,
                      sortType,
                      log_to_widget=True,
                      thread_update_signal=None):
        '''
        Sort options are:
            'age', 'message_count', 'gender'
        '''
        global match_info
        return sorted(match_info.items(),
                      key=lambda x: x[1][sortType],
                      reverse=True)

    def see_friends(self, log_to_widget=True, thread_update_signal=None):
        try:
            url = self.host + '/group/friends'
            r = requests.get(url, headers=self.headers)
            return r.json()['results']
        except requests.exceptions.RequestException as e:
            log.e(
                "API",
                "Something went wrong. Could not get your Facebook friends:" +
                str(e), log_to_widget)

    def see_friends_profiles(self,
                             name=None,
                             log_to_widget=True,
                             thread_update_signal=None):
        friends = self.see_friends()
        if name == None:
            return friends
        else:
            result_dict = {}
            name = name.title()  # upcases first character of each word
            for friend in friends:
                if name in friend["name"]:
                    result_dict[friend["name"]] = friend
            if result_dict == {}:
                return "No friends by that name"
            return result_dict

    def convert_from_datetime(self,
                              difference,
                              log_to_widget=True,
                              thread_update_signal=None):
        secs = difference.seconds
        days = difference.days
        m, s = divmod(secs, 60)
        h, m = divmod(m, 60)
        return ("%d days, %d hrs %02d min %02d sec" % (days, h, m, s))

    def get_last_activity_date(self,
                               now,
                               ping_time,
                               log_to_widget=True,
                               thread_update_signal=None):
        ping_time = ping_time[:len(ping_time) - 5]
        datetime_ping = datetime.strptime(ping_time, '%Y-%m-%dT%H:%M:%S')
        difference = now - datetime_ping
        since = self.convert_from_datetime(difference)
        return since

    def how_long_has_it_been(self,
                             log_to_widget=True,
                             thread_update_signal=None):
        global match_info
        now = datetime.utcnow()
        times = {}
        for person in match_info:
            name = match_info[person]['name']
            ping_time = match_info[person]['last_activity_date']
            since = self.get_last_activity_date(now, ping_time)
            times[name] = since
            log.i("API", name, "----->", since)
        return times

    def pause(self, log_to_widget=True, thread_update_signal=None):
        '''
        In order to appear as a real Tinder user using the app...
        When making many API calls, it is important to pause a...
        realistic amount of time between actions to not make Tinder...
        suspicious!
        '''
        nap_length = 3 * random()
        log.d("API", 'Napping for %f seconds...' % nap_length)
        sleep(nap_length)
예제 #54
0
def prepare_browser():
    br = RoboBrowser(parser='html.parser', history=True)
    return br
예제 #55
0
import re
from robobrowser import RoboBrowser
import pdb #debugger
import pandas as spreadsheet
import requests
from bs4 import BeautifulSoup

##### -- GLOBALS --
page = requests.get('https://pathofexile.gamepedia.com/Prophecy#Upgrading_uniques')
soup = BeautifulSoup(page.content, "lxml")
browser = RoboBrowser()
browser.open("https://poe.trade/")
form = browser.get_form(id="search")
fated_uniques = [[],[],[]]
prices = [[0 for x in range(57)],[0 for x in range(57)],[0 for x in range(57)]]
##### -- --

##### -- FATED SEARCH / GET NAMES
fated_page = soup.find(class_='mw-parser-output')
h2 = fated_page.find(id="Fated_Uniques")
fates = h2.parent.next_sibling.next_sibling.next_sibling.next_sibling.next_sibling.next_sibling.next_sibling
fates_rows = fates.tbody.find_all('tr')
del fates_rows[0]

fated_uniques[0] = [row.td.get_text() for row in fates_rows]

count = 0
for row in fates_rows:
    acts = row.td.next_sibling.em.find_all(class_="c-item-hoverbox__activator")
    fated_uniques[1].append( acts[0].get_text() )
    fated_uniques[2].append( acts[1].get_text() )
예제 #56
0
def main():
    # obtain tagtree page
    browser = RoboBrowser(history=True)
    browser.open(tagtree_url)

    # obtain list of items from html
    tagtree = browser.find("ul")

    # dump taglist to `tagtree-clickable.html`, clickable, pretty printed
    with open(tagtree_clickable_fname, 'w') as f:
        f.write(tagtree.prettify())

    # remove all <a href=> tags to facilitate parsing
    for match in tagtree.findAll('a'):
        match.replaceWithChildren()

    # dump taglist to `tagtree.html`, plain, no <a href>
    with open(tagtree_fname, 'w') as f:
        f.write(str(tagtree))

    # obtain xml data from file
    treexml = etree.parse(tagtree_fname)

    # connect to the database
    conn = sqlite3.connect(db_fname)
    c = conn.cursor()

    # create a dict from XHTML matching tags to their ancestors
    treedict = {}
    for element in treexml.getiterator():
        # ignore repeated elements
        if (str(element.text) != "None"):
            # find all ancestor elements
            ancestorlist = []
            for ancestor in element.iterancestors():
                if (str(ancestor.tag) != "ul"):  # ignore ul tags
                    ancestorlist.append(ancestor.text)

            # add element and ancestors to dictionary (if not empty)
            if len(ancestorlist) != 0:
                treedict[element.text] = ancestorlist

    # write treedict to formatted json
    with open(json_fname, 'w') as json_file:
        json_file.write(
            json.dumps(treedict,
                       sort_keys=True,
                       indent=2,
                       separators=(',', ': ')))

    # insert into SQLite database
    for tagname, ancestors in treedict.items():
        # element[0] is always the first parent, so insert that
        c.execute("""UPDATE tags SET parent = ? WHERE tagname = ?""",
                  [ancestors[0], tagname])

        # find all images with this tagname and add additional tags for it
        image_query = c.execute(
            """SELECT imageid FROM taglink WHERE tagname = ?""", [tagname])

        for img_id in image_query:
            # link current images to all ancestor tags
            # OR IGNORE used to avoid duplicating taglinks
            for tag in ancestors:
                c.execute(
                    """INSERT OR IGNORE INTO taglink (imageid, tagname) VALUES (?,?)""",
                    [img_id[0], tag])

    # Save (commit) the database changes
    conn.commit()

    # close sqlite database once finished
    conn.close()
예제 #57
0
import re
from robobrowser import RoboBrowser

browser = RoboBrowser()
browser.open("https://duckduckgo.com")
# Must find the proper id in the html
form = browser.get_form(id="search_form_homepage")
form
form["q"].value = "python"
browser.submit_form(form)
links = browser.get_links()
for link in links:
    print(link)
예제 #58
0
#!/usr/bin/env python2
# -*- coding: iso-8859-15 -*-

import re
from robobrowser import RoboBrowser

browser = RoboBrowser()
browser.open("https://www.celio.com/")

# ON sera dirigé sur la page d'inscription de celio
# Et on récupère le formulaire
signup_form = browser.get_form(class_="register")

# Vérification des valeurs
signup_form["user[titleCode]"].value = "mr"
signup_form["user[lastName]"].value = "Sym"
signup_form["user[firstName]"].value = "Secure Your Mail"
signup_form["user[birthdayDay]"].value = "21"
signup_form["user[birthdayMonth]"].value = "03"
signup_form["user[birthdayYear]"].value = "1989"
signup_form["user[mobilephoneCode]"].value = "612345678"
signup_form["user[defaultAdressline1]"].value = "08 rue secureyourmail"
signup_form["user[defaultAdress.postalCode]"].value = "85123"
signup_form["user[defaultAddress.town]"].value = "fraise"
signup_form["user[defaultAdress.country.isocode]"].value = "FR"
signup_form["user[email]"].value = "*****@*****.**"
signup_form["user[emailConfirmation]"].value = "*****@*****.**"
signup_form["user[password]"].value = "secureyourmail123"
signup_form["user[passwordConfirmation]"].value = "secureyourmail123"

# On soumet le formulaire
예제 #59
-1
def main():
	browser = RoboBrowser(history=True)
	##
	# First to get the stuff off of the general page.  
	#
	# There is the Presented, Handled, abandoned
	browser.open("http://support.infusiontest.com/csdashboard/general.php")
	generalresults = BeautifulSoup(browser.response.content, 'html.parser')
	phone['presented'] = generalresults.find('div', {'id': 'presented'}).find('div', {'class': 'data'}).text.strip('\r\n ')
	phone['abandoned'] = generalresults.find('div', {'id': 'queued'}).find('div', {'class': 'data'}).text.strip('\r\n ')
	phone['handled'] = generalresults.findAll('div', {'id': 'handled'})[0].find('div', {'class': 'data'}).text.strip('\r\n ')
	phone['abandonedpct'] = generalresults.findAll('div', {'id': 'handled'})[1].find('div', {'class': 'data'}).text.strip('\r\n ')
	chat['presented'] = generalresults.find('div', {'id': 'diverted'}).find('div', {'class': 'data'}).text.strip('\r\n ')
	chat['abandoned'] = generalresults.find('div', {'id': 'sla'}).find('div', {'class': 'data'}).text.strip('\r\n ')
	chat['handled'] = generalresults.find('div', {'id': 'abandoned'}).find('div', {'class': 'data'}).text.strip('\r\n ')
	chat['abandonedpct'] = generalresults.find('div', {'id': 'asa'}).find('div', {'class': 'data'}).text.strip('\r\n ')
	browser.open("http://support.infusiontest.com/csdashboard/stats.php")
	statsresults = BeautifulSoup(browser.response.content, 'html.parser')
	phone['asa'] = statsresults.find('div', {'id': 'phone'}).find('div', {'class': 'data'}).text.strip('\r\n ')
	chat['asa'] = statsresults.find('div', {'id': 'phone'}).find('div', {'class': 'data'}).text.strip('\r\n ')
	browser.open("https://docs.google.com/forms/d/1UvD_au-S6YaDGQ-u23Lth5l-JFrrpUiQT6yVFrj64BA/viewform")
	submitform = browser.get_form()	
	submitform.fields['entry.1210668230'].value = '5pm'
	submitform.fields['entry.339838906'].value = phone['asa']
	submitform.fields['entry.335804195'].value = phone['presented']
	submitform.fields['entry.950389349'].value = phone['handled']
	submitform.fields['entry.125377286'].value = phone['abandoned']
	submitform.fields['entry.73700777'].value = phone['abandonedpct']
	submitform.fields['entry.941849183'].value = chat['asa']
	submitform.fields['entry.1083299158'].value = chat['presented']
	submitform.fields['entry.487211652'].value = chat['handled']
	submitform.fields['entry.1724578827'].value = chat['abandoned']
	submitform.fields['entry.1590181783'].value = chat['abandonedpct']
	browser.submit_form(submitform)
예제 #60
-1
def get_medicare_email(request, mmg):
    """

    :param request:
    :param mmg:
    :return:
    """

    mmg_back = mmg
    mmg_back['status'] = "FAIL"
    mmg_back['mmg_email'] = ""

    PARSER = settings.BS_PARSER
    if not PARSER:
        if settings.DEBUG:
            print('Default Parser for BeautifulSoup:', 'lxml')
        PARSER = 'lxml'

    # Call the default page
    rb = RoboBrowser()

    # Set the default parser (lxml)
    # This avoids BeautifulSoup reporting an issue in the console/log
    rb.parser = PARSER

    target_page = "https://www.mymedicare.gov/myaccount.aspx"
    # Open the form to start the login
    rb.open(target_page)
     # Get the form content
    page = rb.parsed

    if settings.DEBUG:
        print("===============================")
        print("on page:", rb.url)
        print("MyAccount:", page)


    my_email = rb.find("div",
                       attrs={"class":"ctl00_ctl00_ContentPlaceHolder1_ctl00_ctl00_ctl00_ctl01_UserInfo_pnlEmailSettings"})

    if settings.DEBUG:
        print("What email information:", my_email)
    for addr in my_email:
        mail_addr = my_email.find("div",
                       attrs={"class": "myaccount-data"})
        mail_address = mail_addr.text

    mmg_back['mmg_email'] = mail_address
    if rb.url == target_page:
        mmg_back['url'] = rb.url
        mmg_back['status'] = "OK"


    if settings.DEBUG:
        print("Email:", mail_address)
        print("url:", rb.url)

    return mmg_back