def analyze(): """ Analyze text from a given URL """ url = request.form.get("url", "").strip() use_reducer = not ("noreduce" in request.form) dump_forest = "dump" in request.form metadata = None # Single sentence (True) or contiguous text from URL (False)? single = False keep_trees = False t0 = time.time() if url.startswith("http:") or url.startswith("https:"): # Scrape the URL, tokenize the text content and return the token list metadata, generator = process_url(url) toklist = list(generator) # If this is an already scraped URL, keep the parse trees and update # the database with the new parse keep_trees = Scraper.is_known_url(url) else: # Tokenize the text entered as-is and return the token list # In this case, there's no metadata toklist = list(tokenize(url)) single = True tok_time = time.time() - t0 t0 = time.time() # result = profile(parse, toklist, single, use_reducer, dump_forest) result, trees = parse(toklist, single, use_reducer, dump_forest, keep_trees) # Add a name register to the result create_name_register(result) parse_time = time.time() - t0 if keep_trees: # Save a new parse result if Settings.DEBUG: print("Storing a new parse tree for url {0}".format(url)) Scraper.store_parse(url, result, trees) result["metadata"] = metadata result["tok_time"] = tok_time result["parse_time"] = parse_time # Return the tokens as a JSON structure to the client return jsonify(result = result)