Python Parser.parse Examples

Programming Language: Python

Namespace/Package Name: app.parser

Class/Type: Parser

Method/Function: parse

Examples at hotexamples.com: 4

Python Parser.parse - 4 examples found. These are the top rated real world Python examples of app.parser.Parser.parse extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Parser(30)

clean(4)

parse(4)

decodeArray(3)

parseCommands(2)

decode(2)

getLengthFromStr(2)

idle(1)

parser_text(1)

parse_single_day_data(1)

parse_csv(1)

parseBulkString(1)

openFile(1)

list_it(1)

is_token_place_entity(1)

is_stop_word(1)

get_items(1)

get_subcategories(1)

get_parent_categories(1)

_links_domain_filter(1)

get_end_of_question(1)

getExamples(1)

find_operation(1)

extract_interesting_tokens(1)

delete_common_words(1)

decapitalize_words_list(1)

clean_sentence(1)

capitalize_words_list(1)

capitalize_text(1)

ponctuation(1)

Example #1

Show file

File: routes.py Project: GuillaumeOj/YodaPy

def process():
    """Process the user input"""

    content = {}

    if "message" in request.form and ":help" in request.form["message"]:
        # Display instructions if the user type :help
        content = BOT.instructions
    elif "message" in request.form:
        # Parse the user input
        parser = Parser()
        parser_response = parser.parse(request.form["message"])

        if "parsed_input" in parser_response:
            # Send the parsed input to the geo code api
            geo_code = GeoCode()
            geo_response = geo_code.api_request(
                parser_response["parsed_input"])

            if "place_name_fr" in geo_response:
                content["map"] = geo_response
                content["map"].update(BOT.found_place)

                # Send the coordinates to wikipedia
                wiki_search = WikiSearch()
                wiki_response = wiki_search.geo_search_article(
                    content["map"]["latitude"], content["map"]["longitude"])

                if "url" in wiki_response:
                    content["article"] = wiki_response
                    content["article"].update(BOT.found_article)
            else:
                content = BOT.not_found
        else:
            content = BOT.parse_error

    return jsonify(content)

Example #2

Show file

File: run.py Project: mvodanovic/wahlin-notifier

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# vim:fileencoding=utf-8

from app.mailer import Mailer
from app.parser import Parser
from app.template import MailTemplate
from app.url_fetcher import URLFetcher

fetcher = URLFetcher()
fetcher.fetch()

if fetcher.content:
    parser = Parser(fetcher.content)
    parser.parse()

    if parser.has_new_content:
        Mailer.send(MailTemplate(fetcher.url, parser.links))

Example #3

Show file

def test_parser_return_expected_string(sentence, sentence_parsed):

    parser = Parser(sentence)

    assert parser.parse() == sentence_parsed

Example #4

Show file

File: classifier.py Project: Larissa13/Mac5853

class Classifier:
    """
        A classifier based on calculation of distances between words. It can classify the text in forbidden/restrict (ie. 'armas', 'cigarro', 'prostituição', 'remédios', serviços) or permitted classes.


        # Attributes:
        model (KeyedVector object, None): a word2vec trained from wikipedia(portuguese) model. 
                     Otherwise, if it is 'None', the model will be trained in the initialization of the classifier.
                     Default value: None
        status (str): indicates the text's level of processinf during classification.
    """
    def __init__(self, model=None):
        self.parser = Parser()
        if model is None:
            #TODO download if inexistent
            self.model = KeyedVectors.load_word2vec_format(
                'wiki.pt/wiki.pt.vec')

        else:
            self.model = model

        self.status = "extracting words from website"

    def calc_dists(self, word, kws):
        """
        Calculates the distance between a word and a list of words based on the similarity of their word2vec representation.

        # Input:
            - word (str): The word that will be compared (based on similarity) to a list of keywords.
            - kws (list): List of Keywords.

        # Output:
            - dists (list): a list of distances (float) calculated between the word and each keyword in kws.
        """
        dists = []
        for kw in kws:
            dists += [self.model.similarity(word, kw.word)]

        return np.array(dists)

    def check_in_vocab(self, word):
        """
        Checks whether a word is in the vocabulary of the model or not.

        # Input:
            - word (str): the word to be verified.

        #Output:
            - boolean valeu indicating if the word is part of the model's vocabulary.
        """
        if type(word) == str:
            return word in self.model.wv.vocab
        else:
            return word.word in self.model.wv.vocab

    def rm_unseen(self, words):
        """
        Given a list of words, returns a list of those that are part of the model's vocabulary. 

        # Input:
            - words (list): A list of strings.
        
        # Output:
            - list of strings of word that are present in the model`s vocabulary.
        """

        return [word for word in words if self.check_in_vocab(word)]

    def prepare_result(self, result, url, thresh, kw_result):
        """
        Prepares the answer structure to be displayed in the website for the user.

        # Input:
            - result (dict): maps from label to veredict. 
            - url (str): an url string.
            - thresh (float): minimum similarity for a keyword to be considered present in the content.
            - kw_result (dict): maps from label name to a pandas` series which maps from keywords to similarity

        #Output:
            - answer (dict): contains url, the classification, the reasons (keywords and label).
        """
        answer = dict()
        answer['url'] = url
        max_res = (None, thresh)
        for label, res in result.items():
            if res > max_res[1]:
                max_res = (label, res)

        restrict = max_res[0] is not None
        permit_ans = "not very correlated to any restrict categories"

        answer['restrict'] = restrict

        reason = "highly correlated to " + max_res[
            0].name if restrict else permit_ans
        other_reason = kw_result[
            max_res[0].name].to_dict() if restrict else dict()
        other_reason = {key.word: value for key, value in other_reason.items()}
        answer['reasons'] = [reason, other_reason if restrict else dict()]
        answer['label'] = max_res[0].name if restrict else 'permitted'

        return answer

    def classify(self, url, kws, labels, dist_thresh=0.20, kws_thresh=0.49):
        """
        Classifies an url based on the word2vec  similarity of words extracted from its html content. The result is the output of the prepare_result function.

        # Input:
            - url (str): an url string.
            - kws (list): a list of Keywords from Keyword database.
            - labels (list): a list od Labels from Label database.

        *Optional:*
            - dist_thresh (float, 0.20): minimum similarity for a label to be considered present in the content.
            - kws_thresh (float, 0.49): minimum similarity for a keyword to be considered present in the content.

        """
        kws = self.rm_unseen(kws)

        yield self.status
        words = self.parser.parse(url)

        if len(words) == 0 or len(words) == 1 and words[0] == "":
            yield "error"
            return

        words = self.rm_unseen(words)
        for label in labels:
            label.keywords = self.rm_unseen(label.keywords)

        self.status = "calculating"
        yield self.status

        dists = []
        for word in words:
            dists += [self.calc_dists(word, kws)]

        dists = np.array(dists)

        df = pd.DataFrame(dists, columns=kws)

        result = dict()
        key_results = dict()
        for label in labels:
            key_mean = df[label.keywords].mean(axis=0)
            key_results[label.name] = key_mean
            result[label] = (key_mean > dist_thresh).mean()

        self.status = "formulating answer"
        yield self.status

        yield self.prepare_result(result, url, kws_thresh, key_results)