Python Database.isCrawled Examples

Programming Language: Python

Namespace/Package Name: lib.db

Class/Type: Database

Method/Function: isCrawled

Examples at hotexamples.com: 1

Python Database.isCrawled - 1 examples found. These are the top rated real world Python examples of lib.db.Database.isCrawled extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Database(26)

set_package(3)

insert(3)

has_package(3)

connect(2)

rm_package(2)

delete(2)

query(2)

update(2)

prune(2)

get_package(2)

get_packages(2)

purge(2)

set_version(1)

new_user(1)

new_message(1)

new_conversation(1)

isCrawled(1)

get_zombie(1)

insert2(1)

add(1)

get_version(1)

get_last_five_minutes(1)

get_failures(1)

get(1)

find_one(1)

find(1)

deanonymize_user(1)

close_conn(1)

close(1)

all(1)

validate_config(1)

Example #1

Show file

File: main.py Project: basilhskk/tor-crawler

def crawl(urloc:str) -> (str,list):
    db      = Database(PATH)
    parser  = Parser()
    session = connect_to_tor()

    # select here to find if in db 
    try:
        urlindb = db.isCrawled(urloc)
        if len(urlindb) > 0:
            # url already crawled
            del urlindb
            return urloc,[]
    except Exception as e:
        print(e)


    try:
        try:
            r = session.get(urloc,headers=TORBUNDLEHEADER,timeout=20)
            r.raise_for_status()

        except Exception as err:
            insert_data = {
            "protocol" : "Error",
            "url"      : urloc,
            "data"     : base64.b64encode(str(err).encode()),
            "lastvisit": int(time.time()),
            }

            try:
                db.insert(insert_data)
            except Exception as e:
                # if urloc in db dont crawl it again and return
                if "UNIQUE constraint failed" in str(e):
                    # update val
                    try:
                        db.update(insert_data)
                    except Exception as e:
                        pass    
        else:
            urls        = parser.urlExtractor(urloc,r.text)
            protocol    = urloc.split("://")[0]
            insert_data = {
            "protocol" : protocol,
            "url"      : urloc,
            "data"     : base64.b64encode(r.content),
            "lastvisit": int(time.time()),
            }
            
            try:
                db.insert(insert_data)
            except Exception as e:
                # if urloc in db dont crawl it again and return
                if "UNIQUE constraint failed" in str(e):
                    # update val
                    try:
                        db.update(insert_data)
                    except Exception as e:
                        pass

            retUrls = []
            for key, value in urls.items():
                if key == "http" or key == "https" :
                    # crawl only http protocol
                    for url in urls[key]:
                        tld = parser.tldExtractor(url)
                        # crawl only onion sites
                        if tld == "onion":
                            retUrls.append(url)
            return urloc, retUrls

    except Exception as e:
        return urloc,[]