def apis_db_write(): global conn, apis #cmd_p("writing api to database") c = conn.cursor() for api in apis: #print api api_name = api["name"] api_url = api["url"] api_desc = api["desc"] api_category = api["category"] api_date = api["date"] # category c.execute("INSERT OR IGNORE INTO category (category_name) VALUES (?)", (api_category,) ) conn.commit() c.execute("SELECT category_id FROM category WHERE category_name = ?", (api_category,)) category_id = c.fetchone()[0] # api c.execute("INSERT OR IGNORE INTO api (api_name, api_url, api_description, api_date) VALUES (?, ?, ?, ?)", (api_name, api_url, api_desc, api_date, ) ) conn.commit() c.execute("SELECT api_id FROM api WHERE api_url = ?", (api_url,)) api_id = c.fetchone()[0] # api_category c.execute ("INSERT OR IGNORE INTO api_category (api_id, category_id) VALUES (?, ?)", (api_id, category_id, )) conn.commit() api_page_read(api_id, "http://www.programmableweb.com"+api_url) #print "api: "+str(api_name) c.execute("SELECT COUNT(api_id) FROM api", ) apis_total = c.fetchone()[0] cmd_p("apis total: "+str(apis_total)) c.close()
def mashups_db_write(): global conn, mashups cmd_p("writing mashup to database") c = conn.cursor() cool = conn.cursor() for mashup in mashups: #print api mashup_name = mashup["name"] mashup_url = mashup["url"] mashup_desc = mashup["desc"] #mashup_apis = mashup["apis"] # mashup #print ((mashup_name, mashup_url, mashup_desc,) ) c.execute("INSERT OR IGNORE INTO mashup (mashup_name, mashup_url, mashup_description) VALUES (?, ?, ?)", (mashup_name, mashup_url, mashup_desc,) ) conn.commit() c.execute("SELECT mashup_id FROM mashup WHERE mashup_url = ? ", (mashup_url,)) #p = c.fetchone() #print p, mashup_name mashup_id = c.fetchone()[0] # api_category mashup_page_read("http://www.programmableweb.com"+mashup_url, mashup_id) c.execute("SELECT COUNT(mashup_id) FROM mashup", ) mashup_total = c.fetchone()[0] cmd_p("mashup total: "+str(mashup_total)) c.close()
def api_page_read(api_id, url): h = httplib2.Http() resp, content = h.request(url) #, "PUT", headers={"Content-Type":"text/plain"} cmd_p("reading page: "+url) soup = BeautifulSoup(content) highlight_api_page_read(soup, api_id) description_api_page_read(soup, api_id) specification_api_page_read(soup, api_id)
def mashup_page_read(url, mashup_id): h = httplib2.Http() resp, content = h.request(url) #, "PUT", headers={"Content-Type":"text/plain"} cmd_p("reading page: "+url) soup = BeautifulSoup(content) description_mashup_page_read(soup, mashup_id) tags_mashup_page_read(soup, mashup_id) summary_mashup_page_read(soup, mashup_id)
def apis_pages_read(): global conn, apis, url_root apis = [] i = 0 next = True while next: i = i+1 u = url_root+"/apis/directory/"+str(i) print u next = apis_directory_page_read(u) cmd_p("apis list length: "+str(len(apis)))
def sqlite_init(): global conn cmd_p("start to init the database") c = conn.cursor() c.executescript(sql) conn.commit() c.execute('''SELECT * FROM SQLITE_MASTER''') tables = c.fetchall() print ("database tables in total: ",len(tables)) for row in tables: print "\t(["+row[0]+"],["+row[2]+"])" c.close() cmd_p("finish init databse")
def apis_directory_page_read(url): global conn, apis h = httplib2.Http() resp, content = h.request(url) #, "PUT", headers={"Content-Type":"text/plain"} cmd_p("reading page: "+url) #print type(content) #print content soup = BeautifulSoup(content) #print type(soup) lists = soup.findAll("table", attrs={"summary":"API", "id":"apis"}) #print type(lists[0].contents) i = -2 #print i for list in lists[0].contents: if isinstance(list, Tag) : i = i+1 #print i if i == -1: i = 0 continue if len(list.contents) != 4: sys.exit("api length is not equal to 4") api_name = "" api_url = "" api_desc = "" api_category = "" api_date = "" api_name = list.contents[0].contents[0].contents[0].strip().encode("ascii", 'ignore').lower() if api_exist(api_name) != -1: continue api_url = list.contents[0].contents[0]["href"].encode("ascii", 'ignore').strip().lower() api_desc = list.contents[1].contents[0].strip() api_category = list.contents[2].contents[0].strip().encode("ascii", 'ignore').lower() api_date = list.contents[3].contents[0].strip().encode("ascii", 'ignore').lower() api = { "name": api_name, "url": api_url, "desc": api_desc, "category": api_category, "date": api_date } print api apis.append(api) #print api_date #print apis pages = soup.findAll("img", attrs={"src":"/images/listnav_next.png"}) if len(pages) == 0: return False else: return True
def mashups_page_read(url): global mashups mashups = [] h = httplib2.Http() resp, content = h.request(url) #, "PUT", headers={"Content-Type":"text/plain"} cmd_p("reading page: "+url) #print type(content) #print content soup = BeautifulSoup(content) #print type(soup) lists = soup.findAll("table", attrs={"summary":"Web 2.0 Mashups", "id":"mashups"}) #print type(lists[0].contents) i = -2 #print i for list in lists[0].contents: #print list if isinstance(list, Tag) : i = i+1 #print i if i == -1: i = 0 continue #for l in list.contents: # print type(l), l #print len(list.contents) if len(list.contents) != 6: sys.exit("api length is not equal to 4") mashup_name = "" mashup_url = "" mashup_desc = "" mashup_category = "" mashup_name = list.contents[3].contents[0].contents[0].strip().encode("ascii", 'ignore').lower() if mashup_exist(mashup_name) != -1: continue mashup_url = list.contents[1].contents[0]["href"].encode("ascii", 'ignore').strip().lower() mashup_desc = list.contents[3].contents[1].contents[0].strip() mashup = { "name": mashup_name, "url": mashup_url, "desc": mashup_desc, #"apis": mashup_apis } print mashup mashups.append(mashup) #print apis pages = soup.findAll("img", attrs={"src":"/images/listnav_next.png"}) if len(pages) == 0: return False else: return True
def highlight_api_db_write(elements, api_id, highlight): global conn #cmd_p("write "+highlight+" into database") c = conn.cursor() for element_name in elements: c.execute("INSERT OR IGNORE INTO "+highlight+" ("+highlight+"_name) VALUES (?)", (element_name,) ) conn.commit() c.execute("SELECT "+highlight+"_id FROM "+highlight+" WHERE "+highlight+"_name=?", (element_name,)) element_id = c.fetchone()[0] c.execute ("INSERT OR IGNORE INTO "+highlight+"_api (api_id, "+highlight+"_id) VALUES (?, ?)", (api_id, element_id, )) conn.commit() c.execute("SELECT COUNT("+highlight+"_id) FROM "+highlight+"", ) element_total = c.fetchone()[0] cmd_p(""+highlight+"s total: "+str(element_total)) c.close()
def tags_mashup_db_write(tags, mashup_id): global conn #cmd_p("write tags into database") c = conn.cursor() for tag_name in tags: c.execute("INSERT OR IGNORE INTO tag (tag_name) VALUES (?)", (tag_name,) ) conn.commit() c.execute("SELECT tag_id FROM tag WHERE tag_name=?", (tag_name,)) tag_id = c.fetchone()[0] c.execute ("INSERT OR IGNORE INTO tag_mashup (mashup_id, tag_id) VALUES (?, ?)", (mashup_id, tag_id, )) conn.commit() c.execute("SELECT COUNT(tag_id) FROM tag", ) tag_total = c.fetchone()[0] cmd_p("tags total: "+str(tag_total)) c.close()
def description_mashup_db_write(elements, mashup_id, highlight): global conn #cmd_p("write "+highlight+" into database") c = conn.cursor() c.execute("SELECT "+highlight+"_id FROM "+highlight+"_mashup WHERE mashup_id = ?", (mashup_id, )) if c.fetchone() == None: c.execute("INSERT OR IGNORE INTO "+highlight+" ("+highlight+"_name) VALUES (?)", (elements,) ) conn.commit() c.execute("SELECT MAX("+highlight+"_id) FROM "+highlight, ) element_id = c.fetchone()[0] c.execute ("INSERT OR IGNORE INTO "+highlight+"_mashup (mashup_id, "+highlight+"_id) VALUES (?, ?)", (mashup_id, element_id, )) conn.commit() c.execute("SELECT COUNT("+highlight+"_id) FROM "+highlight+"", ) element_total = c.fetchone()[0] cmd_p(""+highlight+"s total: "+str(element_total)) c.close()
def author_mashup_db_write(elements, mashup_id, highlight): global conn #cmd_p("write "+highlight+" into database") c = conn.cursor() for element in elements: #print element["url"], "hello" if element["url"] != None: c.execute("INSERT OR IGNORE INTO "+highlight+" ("+highlight+"_name, "+highlight+"_url) VALUES (?, ?)", (element["name"], element["url"], ) ) conn.commit() c.execute("SELECT "+highlight+"_id FROM "+highlight+" WHERE "+highlight+"_url = ?", (element["url"],)) element_id = c.fetchone()[0] c.execute ("INSERT OR IGNORE INTO "+highlight+"_mashup (mashup_id, "+highlight+"_id) VALUES (?, ?)", (mashup_id, element_id, )) conn.commit() c.execute("SELECT COUNT("+highlight+"_id) FROM "+highlight+"", ) element_total = c.fetchone()[0] cmd_p(""+highlight+"s total: "+str(element_total)) c.close()
def api_mashup_db_write(apis, mashup_id): global conn #cmd_p("write apis into database") c = conn.cursor() for api in apis: #print "api:", api c.execute("SELECT api_id FROM api WHERE api_url = ?", (api,)) result = c.fetchone() #print result if result == None: cmd_p("error:api("+api+") is not exist for mashup("+str(mashup_id)+")") #sys.exit(mashup_name) continue api_id = result[0] c.execute ("INSERT OR IGNORE INTO mashup_api (mashup_id, api_id) VALUES (?, ?)", (mashup_id, api_id, )) conn.commit() cmd_p("apis total: "+str(len(apis))) c.close()