Ejemplo n.º 1
0
def get_user_search():
    # retrieving user input
    while True:
        user_search = raw_input(" \n What would you like to search for? (-end to exit search)\n")
        if len(user_search) > 0:
            if user_search == "-end":
                poodle.database_built = True
                poodle.init_menu_options()
            else:
                break

        else:
            print " Nothing to search for -- Try again. \n"

    # get all words from search phrase and add to a list and make it lower case
    user_search_list = re.findall(r"[\w']+", user_search.lower())

    return user_search_list
Ejemplo n.º 2
0
def get_url():
    # ask for a URL to crawl e.g. https://dunluce.infc.ulst.ac.uk/d12wo/Web/B3/test_index.html
    while True:
        url_to_crawl = raw_input(" Enter a URL to crawl (-end to exit to menu) \n")
        url_to_crawl = url_to_crawl.strip() # remove whitespace from start and end if any
        url_to_crawl = url_to_crawl.lower()  # convert to lowercase

        if url_to_crawl[:7] == "http://" or url_to_crawl[:8] == "https://":  # check if URL is http or https
            if url_to_crawl[-1] == "/":  # remove forward slash from the end
                url_to_crawl = url_to_crawl[:-1]
                break
            else:
                break
        elif url_to_crawl == "-end":
            poodle.init_menu_options()
        else:
            print " URL must begin with http:// or https://\n"

    crawl_url(url_to_crawl)