Python Scraper.is_known_url Exemples

Langage de programmation: Python

Espace de nommage/Pack: scraper

Class/Type: Scraper

Méthode/Fonction: is_known_url

Exemples au hotexamples.com: 1

Python Scraper.is_known_url - 1 exemples trouvés. Ce sont les exemples réels les mieux notés de scraper.Scraper.is_known_url extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

Scraper(30)

matchTag(7)

connect(6)

__init__(5)

_time_now(5)

close(5)

submit(3)

find_docs(3)

get_children(3)

create_destination(2)

extractTag(2)

get_papers(2)

begin(2)

get_all_page_uris(1)

get_all_skills(1)

get_css(1)

get_and_write_records(1)

getZipLinks(1)

get_manga(1)

get_paths(1)

get_post_data_per_page(1)

get_all_manga(1)

getGameList(1)

getSlist(1)

getQlist(1)

getInformation(1)

getIndexhtm(1)

get_prices(1)

getEvents(1)

getDepts(1)

getAppList(1)

gather_reddit_data(1)

fetch_most_recent_transactions(1)

fetch_booster_usage(1)

extractText(1)

create_organization_sets(1)

create_http_link(1)

get_price(1)

DownloadImage(1)

get_script(1)

scrape_ingredients(1)

update_submission_content(1)

store_parse(1)

stopped(1)

sort(1)

seturldata(1)

set_started_callback(1)

set_output_file(1)

set_finished_callback(1)

set_broadcast_document_callback(1)

Méthodes fréquemment utilisées

Scraper (30)

matchTag (7)

connect (6)

__init__ (5)

_time_now (5)

close (5)

submit (3)

find_docs (3)

get_children (3)

create_destination (2)

Méthodes fréquemment utilisées

extractTag (2)

get_papers (2)

begin (2)

get_all_page_uris (1)

get_all_skills (1)

get_css (1)

get_and_write_records (1)

getZipLinks (1)

get_manga (1)

get_paths (1)

get_post_data_per_page (1)

get_all_manga (1)

getGameList (1)

getSlist (1)

getQlist (1)

getInformation (1)

getIndexhtm (1)

get_prices (1)

getEvents (1)

getDepts (1)

Méthodes fréquemment utilisées

get_post_data_per_page (1)

get_all_manga (1)

getGameList (1)

getSlist (1)

getQlist (1)

getInformation (1)

getIndexhtm (1)

get_prices (1)

getEvents (1)

getDepts (1)

getAppList (1)

gather_reddit_data (1)

fetch_most_recent_transactions (1)

fetch_booster_usage (1)

extractText (1)

create_organization_sets (1)

create_http_link (1)

get_price (1)

DownloadImage (1)

get_script (1)

scrape_ingredients (1)

update_submission_content (1)

store_parse (1)

stopped (1)

sort (1)

seturldata (1)

set_started_callback (1)

set_output_file (1)

set_finished_callback (1)

set_broadcast_document_callback (1)

Méthodes fréquemment utilisées

getAppList (1)

gather_reddit_data (1)

fetch_most_recent_transactions (1)

fetch_booster_usage (1)

extractText (1)

create_organization_sets (1)

create_http_link (1)

get_price (1)

DownloadImage (1)

get_script (1)

scrape_ingredients (1)

update_submission_content (1)

store_parse (1)

stopped (1)

sort (1)

seturldata (1)

set_started_callback (1)

set_output_file (1)

set_finished_callback (1)

set_broadcast_document_callback (1)

setStartedCallback (1)

setFinishedCallback (1)

setBroadcastDocCallback (1)

scrape_thread (1)

scrapeTitle (1)

get_url_slug (1)

scrapeSitename (1)

scrapePrefix (1)

scrapeImage (1)

scrapeDescr (1)

scrap_offers_list_max_page (1)

scrap_offer_details_page (1)

registered_manga (1)

procesa_proyectos (1)

matchByType (1)

is_known_url (1)

insert_into_db (1)

getddixml (1)

crawl (1)

get_url_sound (1)

Exemple #1

0

Afficher le fichier

Fichier : main.py Projet : halldor/Reynir

def analyze(): """ Analyze text from a given URL """ url = request.form.get("url", "").strip() use_reducer = not ("noreduce" in request.form) dump_forest = "dump" in request.form metadata = None # Single sentence (True) or contiguous text from URL (False)? single = False keep_trees = False t0 = time.time() if url.startswith("http:") or url.startswith("https:"): # Scrape the URL, tokenize the text content and return the token list metadata, generator = process_url(url) toklist = list(generator) # If this is an already scraped URL, keep the parse trees and update # the database with the new parse keep_trees = Scraper.is_known_url(url) else: # Tokenize the text entered as-is and return the token list # In this case, there's no metadata toklist = list(tokenize(url)) single = True tok_time = time.time() - t0 t0 = time.time() # result = profile(parse, toklist, single, use_reducer, dump_forest) result, trees = parse(toklist, single, use_reducer, dump_forest, keep_trees) # Add a name register to the result create_name_register(result) parse_time = time.time() - t0 if keep_trees: # Save a new parse result if Settings.DEBUG: print("Storing a new parse tree for url {0}".format(url)) Scraper.store_parse(url, result, trees) result["metadata"] = metadata result["tok_time"] = tok_time result["parse_time"] = parse_time # Return the tokens as a JSON structure to the client return jsonify(result = result)