Example #1
0
def analyze():
    """ Analyze text from a given URL """

    url = request.form.get("url", "").strip()
    use_reducer = not ("noreduce" in request.form)
    dump_forest = "dump" in request.form
    metadata = None
    # Single sentence (True) or contiguous text from URL (False)?
    single = False
    keep_trees = False

    t0 = time.time()

    if url.startswith("http:") or url.startswith("https:"):
        # Scrape the URL, tokenize the text content and return the token list
        metadata, generator = process_url(url)
        toklist = list(generator)
        # If this is an already scraped URL, keep the parse trees and update
        # the database with the new parse
        keep_trees = Scraper.is_known_url(url)
    else:
        # Tokenize the text entered as-is and return the token list
        # In this case, there's no metadata
        toklist = list(tokenize(url))
        single = True

    tok_time = time.time() - t0

    t0 = time.time()

    # result = profile(parse, toklist, single, use_reducer, dump_forest)
    result, trees = parse(toklist, single, use_reducer, dump_forest, keep_trees)

    # Add a name register to the result
    create_name_register(result)

    parse_time = time.time() - t0

    if keep_trees:
        # Save a new parse result
        if Settings.DEBUG:
            print("Storing a new parse tree for url {0}".format(url))
        Scraper.store_parse(url, result, trees)

    result["metadata"] = metadata
    result["tok_time"] = tok_time
    result["parse_time"] = parse_time

    # Return the tokens as a JSON structure to the client
    return jsonify(result = result)