Example #1
0
def main(fn):

    the_db = db.db()

    ## retrieve a url from the db, retrieve the page, and put it in the db
    ## find one empty page record with a url
    f_sql = 'SELECT page_id FROM page WHERE is_valid = 0 LIMIT 1'
    one_page = the_db.cursor.execute(f_sql).fetchone()

    ## Go and fill it
    full_txt = the_db.retrieve_url(one_page[0])
    dom = BeautifulSoup(full_txt)
    txt = find_text(dom)
    return txt
Example #2
0
def main(fn):

    the_db = db.db()

    ## retrieve a url from the db, retrieve the page, and put it in the db
    ## find one empty page record with a url
    f_sql = 'SELECT page_id FROM page WHERE is_valid = 0 LIMIT 1'
    one_page = the_db.cursor.execute(f_sql).fetchone()

    ## Go and fill it
    full_txt = the_db.retrieve_url(one_page[0])
    dom = BeautifulSoup(full_txt)
    txt = find_text(dom)
    return txt
Example #3
0
def main(args):

    # Create the db object
    the_db = db.db()
    
    # The claim is the file name, the file a list of urls
    cl_fn = args[0]
    cl_fd = open(cl_fn)
    cl_fn = re.sub(r'.url[s]?', '', cl_fn)
    # replace underscores with spaces.
    the_claim = re.sub('_', ' ', os.path.basename(cl_fn))
    print 'claim: ', the_claim

    for a_url in cl_fd.readlines():
        # Enter the url into the db
        url_str = a_url[:-1]
        if dbg: print url_str
        the_db.add_url(url_str, the_claim)