Python Saver.starting_url Examples

Programming Language: Python

Namespace/Package Name: saver

Class/Type: Saver

Method/Function: starting_url

Examples at hotexamples.com: 1

Python Saver.starting_url - 1 examples found. These are the top rated real world Python examples of saver.Saver.starting_url extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Saver(30)

__init__(14)

load(7)

load_checkpoint(4)

ckpt_dir_for_log_dir(4)

all_ckpts_with_iterations(2)

saveFile(2)

add_method_to_csv(2)

close_csv_file(2)

create_csv_file(2)

dump_outputs(2)

get_var_list_of_ckpt_dir(2)

load_best(2)

flush(2)

get(2)

get_ckpt_info(2)

keys(1)

replay_memory_found(1)

load_data_model_1(1)

msg(1)

insert_row(1)

save_in_file(1)

save_session(1)

starting_url(1)

is_ckpt_dir(1)

get_start_frame(1)

initialize_db(1)

init_field_probes(1)

get_tables(1)

get_table_header(1)

get_rows(1)

get_replay_memory(1)

get_log_dir(1)

end_game(1)

end(1)

check_if_highscore(1)

cancel_changes(1)

add_metrics(1)

add_beta_grads(1)

sync(1)

Example #1

Show file

File: main.py Project: ishankhare07/web-crawler

class Scrapper:
    def __init__(self, start_url, savefile, max_depth=10, max_width=100):
        """
            max_depth: maximum recursion depth to follow for each link
            max_width: maximum number of dict keys, i.e. width of tree
        """
        self.parser = Parser()
        self.start_url = start_url
        self.saver = Saver(savefile, max_width)
        self.max_depth = max_depth
        self.saver.starting_url(self.start_url)

    def start_scrapping(self, depth=0, start_url = None):
        if depth == self.max_depth:
            return
        if start_url == None:
            start_url = self.start_url
        nested_urls = self.get_urls(start_url)
        bool_break = self.save_data(start_url, nested_urls)
        if not bool_break:
            exit()

        if nested_urls == None:
            return
        else:
            for url in nested_urls:
                self.start_scrapping(depth+1, url)

    def get_urls(self, url):
        try:
            response = requests.get(url)
            web_page = response.content
            urls = self.parser.get_links(web_page)
        except requests.exceptions.RequestException as re:
            print(re)
            return
        return urls

    def save_data(self, start_url, nested_urls):
        reply = self.saver.save(start_url, nested_urls)
        if not reply:
            return False
        else:
            return True