Python fetch Examples, scraper.fetch Python Examples

Example #1

0

Show file

File: reg.py Project: mor1/uon

def login(username, password):
    _, h = scraper.fetch(LOGIN_FORM, data={'UserName': username,
                                           'Password': password,
                                           'SUBMIT1': 'Login',
                                           })
    if 'set-cookie' not in h: BARF
    cookies = h['set-cookie']
    scraper.fetch(LOGIN_CONFIRM, headers={'Cookie': cookies,})

    return cookies

Example #2

0

Show file

File: reg.py Project: mor1/uon

def scrape_register(cookies, module, year_id):
    page, _ = scraper.fetch(
        "%s?%s" % (SEARCH_URL, urllib.urlencode({ 'form_id': 3,
                                                  'exclude': '',
                                                  'year_id': year_id,
                                                  'mnem': module,
                                                  })),
        headers={'Cookie': cookies,}
        )

    doc = scraper.parse(page)
##     print page
    title = doc.find(scraper.path("h4", "a")).text

    for table in doc.findall(scraper.path("table")):
        if 'bordercolor' in table.keys():
            headings = [ t.text for t in table.findall(scraper.path("th", "font", "b")) ]
            if headings != [ 'Name', 'Category', 'Course', 'Misc' ]: BARF

##             for row in table.findall(scraper.path("tr"))[1:]:
##                 for c in row.findall(scraper.path("td", "font")):
##                     print c, c.text, c.tail

            students = [
                dict(zip(headings[:-1],
                         (c.text.strip()
                          for c in row.findall(scraper.path("td", "font"))
                          if c.text)))
                for row in table.findall(scraper.path("tr"))[1:]
                ]

            return {
                'title': title,
                'students': students,
                }

Example #3

0

Show file

def derp():
    slack_event = json.loads(request.data)

    if "challenge" in slack_event:
        return make_response(slack_event["challenge"], 200, {"content_type": "application/json" })

    if verification_token != slack_event.get("token"):
        message = "Invalid Slack verification token: %s \npyBot has: \
                %s\n\n" % (slack_event["token"], verification_token.verification)

        make_response(message, 403, {"X-Slack-No-Retry": 1})

    if "event" in slack_event:
        event = slack_event['event']
        if "text" in event.keys() and event['text'].startswith('!last'):
            number = 1
            split = event['text'].split(" ")
            if len(split) > 1:
                number = int(split[1])
            last = list(fetch())[:number]
            for message in entries_to_messages(last):
                send_message(message)
            return make_response("OK", 200, {})

    return make_response("[NO EVENT IN SLACK REQUEST] These are not the droids\
                             you're looking for.", 404, {"X-Slack-No-Retry": 1})

Example #4

0

Show file

File: paper-serendipity.py Project: bmenn/paper-serendipity

def on_add_node(msg):
    g = gevent.spawn(scraper.fetch, msg)
    g.join()    # Wait to Greenlet finishes

    node = g.value
    graph.add_node(node['id'])
    emit('add_node', {'id': node['id'],
                      'abstract': node['abstract'],
                      'authors': node['authors'],
                      'date': node['date'],
                      'title': node['title']
                      })

    for c in node['citations']:
        gevent.sleep(0)
        graph.add_edge(node['id'], c)
        if c[:4] == 'doi:':
            cinfo = scraper.fetch(c[4:])
            emit('add_node', {'id': cinfo['id'],
                              'abstract': cinfo['abstract'],
                              'authors': cinfo['authors'],
                              'date': cinfo['date'],
                              'title': cinfo['title']})
        else:
            emit('add_node', {'id': c})
        emit('add_link',
             {'source': node['id'],
              'target': c,
              'value': 1})

    for c in node['cited by']:
        gevent.sleep(0)
        graph.add_edge(c, node['id'])
        if c[:4] == 'doi:':
            cinfo = scraper.fetch(c[4:])
            emit('add_node', {'id': cinfo['id'],
                              'abstract': cinfo['abstract'],
                              'authors': cinfo['authors'],
                              'date': cinfo['date'],
                              'title': cinfo['title']})
        else:
            emit('add_node', {'id': c})
        emit('add_link',
             {'source': c,
              'target': node['id'],
              'value': 1})

Example #5

0

Show file

File: tt.py Project: mor1/uon

    if not (dump_ascii or dump_json): dump_ascii = True
    if "".join(map(lambda s:s.lower(), args)) in Courses:
        courses = "%0D%0A".join(map(urllib.quote_plus, Courses[args[0]]))
    elif specify_courses:
        courses = "%0D%0A".join(map(urllib.quote_plus, args))

    if courses:
        url = "%s;%s" % (TT_URL, COURSES_URL % { "courses": courses, })
    else:
        modules = "%0D%0A".join(args)
        url = "%s;%s" % (TT_URL, MODULES_URL % { "modules": modules, })

    if not (courses or modules): die_with_usage("", 1)
    
    
    modules = scrape_timetable(scraper.parse(scraper.fetch(url)[0]))
    if module_detail:
        for m in modules:
            data = { 'year_id': '000110',
                     'mnem': m['code'],
                     }
            page, hdrs = scraper.fetch(MODULE_DETAIL_URL, data)
            m['detail'] = scrape_module_details(scraper.parse(page))

    ## dump scraped data
    if dump_json: print json.dumps(modules)
    elif dump_ascii:
        for module in modules:
            print "\x1b[0;1m%s\x1b[0m" % module['code'], "--", module['title']
            for (act, data) in sorted(module['acts'].items()):
                print "\t%-13s" % (act,), \

Example #6

0

Show file

File: test_scraper.py Project: jonnylin13/simple_instagram_scraper

 def test_fetch(self):
     soup = scraper.fetch('https://instagram.com/jawkneelin', False)
     self.assertEqual(type(soup), BeautifulSoup)

Example #7

0

Show file

def post_new_entries():
    global new_entries
    existing_entries = new_entries[:]
    new_entries = fetch()
    for msg in get_new_entries(existing_entries, new_entries):
        send_message(msg)

Example #8

0

Show file

import json

from flask import Flask, request, make_response
from flask_apscheduler import APScheduler
from client import send_message, verification_token
from scraper import fetch, get_new_entries, entries_to_messages


app = Flask(__name__)
new_entries = fetch()


class Config(object):
    JOBS = [
        {
            'id': 'example',
            'func': 'app:post_new_entries',
            'trigger': 'cron',
            'second': '0',
        }
    ]

    SCHEDULER_API_ENABLED = True


def post_new_entries():
    global new_entries
    existing_entries = new_entries[:]
    new_entries = fetch()
    for msg in get_new_entries(existing_entries, new_entries):
        send_message(msg)