import urllib2 from bs4 import BeautifulSoup import requests as rq import re try: from ModellerApp.models import BasicLensData, Catalog DEBUG=False except: DEBUG=True print "running in debug mode: nothing will be added to database" if not DEBUG: cat1 = Catalog( name = "Masterlens", description = "Masterlens Database") cat1.save() s = rq.Session() r1 = s.get("http://admin.masterlens.org/member.php") user = raw_input("username: "******"pw: ") data = {'ipaddress':'217.162.244.23', 'member':'Login', 'password': pw, 'username':user} r2 = s.post("http://admin.masterlens.org/member.php", data = data) r3 = s.get("http://admin.masterlens.org/search.php?") s1 = BeautifulSoup(r3.text, "html5lib") s2 = s1.find(id="message-listing") s3 = s2.table.tbody
import urllib2 from bs4 import BeautifulSoup from ModellerApp.models import BasicLensData, Catalog cat1 = Catalog( name = "CASTLES original", description = "CASTLES Survey with original images") cat2 = Catalog( name = "CASTLES cleaned", description = "CASTLES Survey with cleaned images") cat1.save() cat2.save() soup = BeautifulSoup(urllib2.urlopen('http://www.cfa.harvard.edu/castles/').read()) tables = soup.find_all('table') trows = tables[1].find_all('tr', {"align":"CENTER"}) for trow in trows: td=trow.find_all('td') cat_id = int(td[0].text) name = td[2].a.text url = td[2].a.get('href') z_src = td[4].text