def main(fn): the_db = db.db() ## retrieve a url from the db, retrieve the page, and put it in the db ## find one empty page record with a url f_sql = 'SELECT page_id FROM page WHERE is_valid = 0 LIMIT 1' one_page = the_db.cursor.execute(f_sql).fetchone() ## Go and fill it full_txt = the_db.retrieve_url(one_page[0]) dom = BeautifulSoup(full_txt) txt = find_text(dom) return txt
def main(args): # Create the db object the_db = db.db() # The claim is the file name, the file a list of urls cl_fn = args[0] cl_fd = open(cl_fn) cl_fn = re.sub(r'.url[s]?', '', cl_fn) # replace underscores with spaces. the_claim = re.sub('_', ' ', os.path.basename(cl_fn)) print 'claim: ', the_claim for a_url in cl_fd.readlines(): # Enter the url into the db url_str = a_url[:-1] if dbg: print url_str the_db.add_url(url_str, the_claim)