Example #1
0
import urllib2
from bs4 import BeautifulSoup
import requests as rq
import re

try:
  from ModellerApp.models import BasicLensData, Catalog
  DEBUG=False
except:
  DEBUG=True
  print "running in debug mode: nothing will be added to database"

if not DEBUG:
  cat1 = Catalog(
    name = "Masterlens",
    description = "Masterlens Database")
  cat1.save()

s = rq.Session()
r1 = s.get("http://admin.masterlens.org/member.php")
user = raw_input("username: "******"pw: ")

data = {'ipaddress':'217.162.244.23', 'member':'Login', 'password': pw, 'username':user}
r2 = s.post("http://admin.masterlens.org/member.php", data = data)
r3 = s.get("http://admin.masterlens.org/search.php?")

s1 = BeautifulSoup(r3.text, "html5lib")
s2 = s1.find(id="message-listing")
s3 = s2.table.tbody
Example #2
0
import urllib2
from bs4 import BeautifulSoup

from ModellerApp.models import BasicLensData, Catalog


cat1 = Catalog(
  name = "CASTLES original",
  description = "CASTLES Survey with original images")
cat2 = Catalog(
  name = "CASTLES cleaned",
  description = "CASTLES Survey with cleaned images")
cat1.save()
cat2.save()




soup = BeautifulSoup(urllib2.urlopen('http://www.cfa.harvard.edu/castles/').read())
tables = soup.find_all('table')

trows = tables[1].find_all('tr', {"align":"CENTER"})

for trow in trows:
  td=trow.find_all('td')

  cat_id = int(td[0].text)
  name = td[2].a.text
  url = td[2].a.get('href')
  z_src = td[4].text