Python Parsersの例、parsers.Parsers Pythonの例

コード例 #1

0

ファイルを表示

    def _request_data(self, command, tweet, orig=None):
        # same as execute_human_response
        reply_to = "@%s" % command.get('bot_name')

        if command.get('command') == "CONNECT":
            msg = '%s GETINFO' % reply_to
            status = 'data-req'
        elif command.get('command') == "PAY":
            if orig:
                amount, description = Parsers.extract_payment(orig)
            else:
                amount, description = Parsers.extract_payment(tweet)
            msg = '%s GETINVOICE %d %s' % (reply_to, amount, description)
            status = 'data-req'
        elif command.get('command') == "FUNDCHANNEL":
            # TODO: Extract channel amounts from orig
            # if orig:
            peer_id = command.get('pubkey')
            if peer_id:
                msg = self.lnrpc._fundchannel(peer_id)
                status = 'complete'
            else:
                msg = '%s GETINFO' % reply_to
                status = 'data-req'
        else:
            raise ValueError("Command not found: %s" % command.get('command'))
        sid = self._post(msg, command.get('last_sid'))
        # update status
        return self.db.commands.update_status(command.get('sid'), sid, status)

コード例 #2

0

ファイルを表示

 def _process_bot_response(self, command, tweet, orig=None):
     if command.get('command') == "CONNECT":
         # Connect to new peer
         uri = Parsers.extract_uri(tweet)
         pubkey, ip, port = Parsers.extract_info(uri)
         msg = self.lnrpc._connect(pubkey, ip, port)
         uid = command.get('peer_uid')
         self.db.peers.set_node(uid, pubkey, ip, port)
     elif command.get('command') == "PAY":
         # Pay invoice
         # TODO: Validate invoice amount with decodepay
         bolt11 = Parsers.extract_bolt11(tweet)
         pay_info = self.lnrpc.decodepay(bolt11)
         msg = self.lnrpc._pay(bolt11)
     elif command.get('command') == "FUNDCHANNEL":
         # Process response to GETINFO
         # TODO: Extract channel amounts from orig
         # if orig:
         uri = Parsers.extract_uri(tweet)
         pubkey, _, _ = Parsers.extract_info(uri)
         msg = self.lnrpc._fundchannel(pubkey)
     sid = self._post("%s\nDONE." % msg, command.get('last_sid'))
     # update status
     return self.db.commands.update_status(command.get('sid'), sid,
                                           'complete')

コード例 #3

0

ファイルを表示

def main():
    while True:
        print "\nPython Calculator: "
        line = list()
        while True:
            user_input = raw_input("Enter Expression: ")
            user_input = user_input.replace(' ', '')
            if user_input is not None and len(user_input.strip()) == 0:
                break
            user_input += " "
            line.append(user_input)
            break
        expr = line
        if len(expr) == 0:
            break
        try:
            scan = scanner.Scanner()
            tokens = list()
            tokens = scan.parseExpression(expr[0])
            printTokens(tokens)
            parsers = Parsers()
            expressionTree = parsers.parse(tokens)
            evaluator = evaluators.Evaluators()
            result = evaluator.evaluate(expressionTree)
            print '\nResult:',
            print result
        except CalcExceptions, x:
            print 'Error!:',
            print x.message

コード例 #4

0

ファイルを表示

ファイル: calculator.py プロジェクト: prashantnarode/calculator

def main():
    while True:
        print "\nPython Calculator: "
        line = list()
        while True:
            user_input = raw_input("Enter Expression: ")
            user_input = user_input.replace(' ', '')
	    print "\n Hellooooooooooollllllllllllll"
            if user_input is not None and len(user_input.strip()) == 0:
                break
            user_input += " "
            line.append(user_input)
            break
        expr = line
        if len(expr) == 0:
            break
        try:
            scan = scanner.Scanner()
            tokens = list()
            tokens = scan.parseExpression(expr[0])
            printTokens(tokens)
            parsers = Parsers()
            expressionTree = parsers.parse(tokens)
            evaluator = evaluators.Evaluators()
            result = evaluator.evaluate(expressionTree)
            print '\nResult:',
            print result
        except CalcExceptions, x:
            print 'Error!:',
            print x.message

コード例 #5

0

ファイルを表示

ファイル: word2vec.py プロジェクト: GabrielGraells/ir_wa_FinalProject

    def __init__(self, data):
        # Store data
        self.data_Final = data

        # Read prepocessed data
        self.data = pd.read_csv("data/w2v_processed.csv")

        # Init NLP
        self.nlp = spacy.load("en_core_web_sm", disable=["ner", "parser"])
        self.nlp.max_length = 5000000

        # Load w2v model
        self.w2v_model = KeyedVectors.load("data/utils/w2v_model.kvmodel")

        # Load contractions
        self.contractions_dict = pickle.load(
            open("data/utils/contractions_dict.p", "rb"))

        # Init parser
        self.parsers = Parsers()

        # Load id_doc2vector
        self.id_doc2vector = pickle.load(open("data/utils/id_doc2vec.p", "rb"))

        # Get query
        self.get_query(self.id_doc2vector)

コード例 #6

0

ファイルを表示

ファイル: doc2vec.py プロジェクト: GabrielGraells/ir_wa_FinalProject

    def __init__(self, data):
        # Get data
        self.data_Final = data

        #Processed data
        self.data = pd.read_csv("data/d2v_processed.csv")

        #Init NLP
        self.nlp = spacy.load("en_core_web_sm", disable=["ner", "parser"])
        self.nlp.max_length = 5000000

        #Load d2v model
        self.doc2vec_model = Doc2Vec.load("data/utils/d2v_model.kvmodel")

        #Load contractions
        self.contractions_dict = pickle.load(
            open("data/utils/contractions_dict.p", "rb"))

        # Init parsers
        self.parsers = Parsers()

        # Load tag id
        self.tag_id = pickle.load(open("data/utils/tag_id.p", "rb"))

        #Load id doc2vec
        self.id_doc2vec = pickle.load(open("data/utils/id_doc2vec.p", "rb"))

        #Get query - run program
        self.get_query(self.id_doc2vec)

コード例 #7

0

ファイルを表示

ファイル: tests.py プロジェクト: fyaretskiy/interview-assignment

    def test_format_c(self):
        p = Parsers(self.LINE_TYPE_C)
        data = p.get_data()

        self.assertEqual(data['first_name'], self.FIRST_NAME)
        self.assertEqual(data['last_name'], self.LAST_NAME)
        self.assertEqual(data['phone_number'], self.CLEAN_NUMBER)
        self.assertEqual(data['color'], self.COLOR)
        self.assertEqual(data['zip_code'], self.ZIPCODE)

コード例 #8

0

ファイルを表示

ファイル: tests.py プロジェクト: fyaretskiy/interview-assignment

    def test_format_a_bad_number(self, log):
        p = Parsers(self.BAD_NUMBER_TYPE_A)

        self.assertEqual(p.get_data(), {})
        self.assertTrue(p.invalid)

        self.assertEqual(
            log.call_args[0][1], 'Phone number doesn\'t meet requirements: '
            'last_name, first_name, 1324152151352625, RED, 10013.')

コード例 #9

0

ファイルを表示

ファイル: tests.py プロジェクト: fyaretskiy/interview-assignment

    def test_bad_zip_code_type_a(self, log):
        p = Parsers(self.BAD_ZIP_A)

        self.assertEqual(p.get_data(), {})
        self.assertTrue(p.invalid)

        self.assertEqual(
            log.call_args[0][1], 'Zip code not found in data: '
            'last_name, first_name, (703)-742-0996, RED, 121311.')

コード例 #10

0

ファイルを表示

    def parse(self, soup):
        """
        Determine the generation of the page's soup, and return a
        response code so that the page's soup can be parsed
        appropriately
        """
        # todo determine parser to use

        parse_helper = Parsers(soup)
        parse_helper.bon_apetit_2020()
        return 'response_code'

コード例 #11

0

ファイルを表示

ファイル: tf_idf.py プロジェクト: GabrielGraells/ir_wa_FinalProject

 def __init__(self, data):
   # Load data
   self.data = data
   
   # Parser
   self.parsers = Parsers()
   
   # Init tf-idf
   click.echo("Creating: index and tf-idf")
   self.index, self.tf, self.df, self.idf = self.load_index_tfidf()
   click.echo("Done.\n")
   
   #Ask for query
   self.get_query()

コード例 #12

0

ファイルを表示

 def _execute_bot_response(self, command, tweet):
     if command.get('command') == "GETINFO":
         msg = self.lnrpc.get_uri()
     elif command.get('command') == "GETINVOICE":
         amount, description = Parsers.extract_payment(tweet)
         msg = self.lnrpc.get_invoice(amount, command.get('bot_name'),
                                      description)
     sid = self._post(msg, command.get('last_sid'))
     # update status
     return self.db.commands.update_status(command.get('sid'), sid,
                                           'complete')

コード例 #13

0

ファイルを表示

ファイル: word2vec.py プロジェクト: GabrielGraells/ir_wa_FinalProject

class word2vec(object):
    def __init__(self, data):
        # Store data
        self.data_Final = data

        # Read prepocessed data
        self.data = pd.read_csv("data/w2v_processed.csv")

        # Init NLP
        self.nlp = spacy.load("en_core_web_sm", disable=["ner", "parser"])
        self.nlp.max_length = 5000000

        # Load w2v model
        self.w2v_model = KeyedVectors.load("data/utils/w2v_model.kvmodel")

        # Load contractions
        self.contractions_dict = pickle.load(
            open("data/utils/contractions_dict.p", "rb"))

        # Init parser
        self.parsers = Parsers()

        # Load id_doc2vector
        self.id_doc2vector = pickle.load(open("data/utils/id_doc2vec.p", "rb"))

        # Get query
        self.get_query(self.id_doc2vector)

    def get_query(self, id_doc2vector):
        # Ask for query
        active = True
        while (active):
            click.echo(
                "######################################################")
            click.echo("TYPE 'X' TO EXIT.")
            click.echo("Insert query:")
            query = input()
            click.echo(
                "######################################################\n")

            if query == 'X' or query == 'x':
                click.echo("Exiting...")
                active = False
            else:
                self.search(query, self.id_doc2vector)

        return 0

    def expand_contractions(self, text, contractions_re):
        """
      Given contraction find match and substitude
    """
        def replace(match):
            return self.contractions_dict[match.group(0)]

        return contractions_re.sub(replace, text)

    def clean_text(self, text):
        """
    * Remove words with digits
    * Replace newline characters with space
    * Remove URLS
    * Replace non english chars with space
    """
        # Remove digits
        text = re.sub('\w*\d\w*', '', text)

        # Remove new Line chars
        text = re.sub('\n', ' ', text)

        #Remove links
        text = re.sub(r"http\S+", "", text)

        #Replace non-english chars
        text = re.sub('[^a-z]', ' ', text)

        return text

    def preprocessing(self, text):
        """
    Given a pandas dataframe apply preprocessing techinques
        * Lowercase the text
        * Expand Contractions
        * Clean the text
        * Remove Stopwords
        * Lemmatize words
    """
        # Lower case
        text = text.lower()

        # Regular expression for finding contractions
        contractions_re = re.compile('(%s)' %
                                     '|'.join(self.contractions_dict.keys()))

        #Expand contractions
        text = self.expand_contractions(text, contractions_re)
        text = self.clean_text(text)

        #Remove added spaces
        text = re.sub(" +", " ", text)
        text = text.strip()

        #Stop words and Lemmatizing
        text = ' '.join([
            token.lemma_ for token in list(self.nlp(text))
            if (token.is_stop == False)
        ])

        return text

    def embedding_w2v(self, doc_tokens):
        """
    Returns vector representation of a string
    """
        embeddings = []
        if len(doc_tokens) < 1:
            return np.zeros(100)
        else:
            for t in doc_tokens:
                if t in self.w2v_model.wv.vocab:
                    embeddings.append(self.w2v_model.wv.word_vec(t))
                else:
                    embeddings.append(np.random.rand(100))

        return np.mean(embeddings, axis=0)

    def w2v_collection(self, data):
        """
    Given a collection of documents returns the pair id:vector where the vector is
    the embedding representation of the doc.
    """
        id_doc2v = {}
        for id, text in zip(data["id"].values, data["full_text"]):
            id_doc2v[id] = self.embedding_w2v(text)

        return id_doc2v

    def rank(self, query, id_doc2vec):
        """
    Given a query preprocesses it, embeds it and return ordered dictionary of id:similarity_score
    pair.
    """
        # Pre-process query
        query = self.preprocessing(query)

        # Query vector
        q_vector = self.embedding_w2v(query.split())

        #Doc query similarity
        doc_query_sim = {
            k: cosine_similarity(
                np.array(v).reshape(1, -1),
                np.array(q_vector).reshape(1, -1))
            for k, v in id_doc2vec.items()
        }

        # Sort
        doc_query_sim = {
            k: v
            for k, v in sorted(
                doc_query_sim.items(), key=lambda item: item[1], reverse=True)
        }

        return doc_query_sim

    def search(self, query, id_doc2vector, topn=20):
        """
    Search for tweets inputing a query and see displayed results.
    Arguments:
    id_doc2vector: dic containing id:vec2doc pair - dic
    topn -- default: 20 - Top N result to display - int.

    """
        # Get ranked docs
        doc_query_sim = self.rank(query, id_doc2vector)
        ids = list(doc_query_sim.keys())[:topn]

        click.echo("Results\n")

        for index, id in enumerate(ids):
            doc = self.data_Final[self.data_Final["id"] == id]
            tweet, date, author, retweets, favorites, url, hashtags = self.parsers.parser_tweet_results(
                doc)

            print("______________________________________________________")
            print(f"Tweet {index}")
            print(f"\t·Author: {author}")
            print(f"\t·Date: {date}")
            print(f"\t·Tweet: {tweet}")
            print(f"\t·Retweets: {retweets}")
            print(f"\t·Favorites: {favorites}")
            print(f"\t·Hashtags: {hashtags}")
            print(f"\t·URL: {url}")
            print("______________________________________________________\n")

コード例 #14

0

ファイルを表示

 def __init__(self, auth_cookie):
     Session.login(auth_cookie)
     self.portfolio = Parsers.get_portfolio()
     self.open_orders = self.portfolio.open_orders

コード例 #15

0

ファイルを表示

 def refresh_portfolio(self):
     self.portfolio = Parsers.get_portfolio()
     self.open_orders = self.portfolio.open_orders

コード例 #16

0

ファイルを表示

 def __init__(self, credentials):
     Session.login(credentials)
     self.portfolio = Parsers.get_portfolio()
     self.open_orders = self.portfolio.open_orders

コード例 #17

0

ファイルを表示

ファイル: tf_idf.py プロジェクト: GabrielGraells/ir_wa_FinalProject

class tf_idf(object):
  
  def __init__(self, data):
    # Load data
    self.data = data
    
    # Parser
    self.parsers = Parsers()
    
    # Init tf-idf
    click.echo("Creating: index and tf-idf")
    self.index, self.tf, self.df, self.idf = self.load_index_tfidf()
    click.echo("Done.\n")
    
    #Ask for query
    self.get_query()
    
    
    
  def get_query(self):
    # Ask for query
    active = True
    while(active):
      click.echo("######################################################")
      click.echo("TYPE 'X' TO EXIT.")
      click.echo("Insert query:")
      query = input()
      click.echo("######################################################\n")
      
      if query == 'X' or query =='x':
        click.echo("Exiting...")
        active = False
      else:
        self.search(query, self.index, self.idf, self.tf) 
        
    return 0
     
  def load_index_tfidf(self):
    """
      Loads the preprocesed returns:
      Returns:
        index --  inverted list "term": [["id",[pos1,pos1,..]].
        tf -- normalized term frequency per doc 
        df -- document frequency per term
        idf -- inversed docuemnt frequency
    """

    index = pickle.load(open("data/utils/index.p", "rb"))

    # Term freq of terms in tweets      
    tf = pickle.load(open("data/utils/tf.p", "rb"))

    # Tweet freq of term in corpus
    df = pickle.load(open("data/utils/df.p", "rb"))
    
    # Inverse df
    idf = pickle.load(open("data/utils/idf.p", "rb"))
  
    return index, tf, df, idf
        
  def rankDocuments(self, terms, docs, index, idf, tf):
    """
    Computes ranking given query and collection of tweets.

    Arguments:
      terms -- query - str.
      docs -- ID list of docs - list.
      index -- invertex index. - dict
      idf -- inverse document frequency - dict
      tf -- term frequency - dict
    Returns:
      resultDocs -- Ordered list of matching docs based on cosine-sim - list
    """

    # Dict with vector per docID
    docVectors = defaultdict(lambda: [0]*len(terms))

    # Vector per query
    queryVector = [0]*len(terms)

    # TF of query
    query_terms_count = collections.Counter(terms)
 
    # Norm query
    query_norm = np.linalg.norm(list(query_terms_count.values()))

    for termIndex, term in enumerate(terms):
      # Check if term exist in collection
      if term not in index:
        continue

      # Score per term-query
      queryVector[termIndex] = query_terms_count[term]/query_norm * idf[term]

      for docIndex, (doc,postings) in enumerate(index[term]):
        # check if IDdoc is in list of IDdocs containg term
        if doc in docs:
          # Score per term-doc
          docVectors[doc][termIndex] = tf[term][docIndex] * idf[term]

    #Cosine similarity query-doc
    docScores = [[np.dot(curDocVec, queryVector), doc] for doc, curDocVec in docVectors.items()]

    #Sort by descending similarity
    docScores.sort(reverse=True)

    #Get IDs
    resultDocs = [x[1] for x in docScores]

    return resultDocs
  
  def search_tf_idf(self, query, index, idf, tf, topn):
    """
  Preprocess query and find docs with words in query
  Arguments:
    query -- query - str.
    index -- inverted index - dict
    idf -- inverse document frequency - dict
    tf -- term frequency - dict
    topn -- N top ranked docs to be returned - int
  Returns
    ranked_docs -- list of topn docs ranked by cosine-sim - list
    """
    # Preprocess query
    query = self.parsers.getTerms(query)
 
    # Init set of docs with terms in query
    docs = set()

    for term in query:
      try:
        # Get IDs of docs with term
        termDocs = [posting[0] for posting in index[term]]
      
        # Add new docsID
        docs = docs.union(termDocs)
    
      except:
        pass
    
    docs = list(docs)

    # Rank docs with rankDocuments
    ranked_docs = self.rankDocuments(query, docs, index, idf, tf)
    ranked_docs = ranked_docs[:topn]

    return ranked_docs
  
  def search(self, query, index, idf, tf, topn = 20):
    """
  Search for tweets inputing a query and see displayed results.
  Arguments:
    index -- inverted index - dict
    idf -- inverse document frequency - dict.
    tf -- term frequency - dict.
    topn -- default: 20 - Top N result to display - int.
    """
  
    # Get topn docs
    ranked_docs = self.search_tf_idf(query, index, idf, tf, topn)

    if len(ranked_docs) == 0:
      click.echo("No results found !\n")
      return -1
  
    click.echo("Results\n")

    for index, id in enumerate(ranked_docs):
      # Get tweet corresponding to id
      doc = self.data[self.data['id'] == id]
      tweet, date, author, retweets, favorites, url, hashtags = self.parsers.parser_tweet_results(doc)
    
      click.echo("______________________________________________________")
      click.echo(f"Tweet {index}")
      click.echo(f"\t·Author: {author}")
      click.echo(f"\t·Date: {date}")
      click.echo(f"\t·Tweet: {tweet}")
      click.echo(f"\t·Retweets: {retweets}")
      click.echo(f"\t·Favorites: {favorites}")
      click.echo(f"\t·Hashtags: {hashtags}")
      click.echo(f"\t·URL: {url}")
      click.echo("______________________________________________________\n")

コード例 #18

0

ファイルを表示

ファイル: doc2vec.py プロジェクト: GabrielGraells/ir_wa_FinalProject

class doc2vec(object):
    def __init__(self, data):
        # Get data
        self.data_Final = data

        #Processed data
        self.data = pd.read_csv("data/d2v_processed.csv")

        #Init NLP
        self.nlp = spacy.load("en_core_web_sm", disable=["ner", "parser"])
        self.nlp.max_length = 5000000

        #Load d2v model
        self.doc2vec_model = Doc2Vec.load("data/utils/d2v_model.kvmodel")

        #Load contractions
        self.contractions_dict = pickle.load(
            open("data/utils/contractions_dict.p", "rb"))

        # Init parsers
        self.parsers = Parsers()

        # Load tag id
        self.tag_id = pickle.load(open("data/utils/tag_id.p", "rb"))

        #Load id doc2vec
        self.id_doc2vec = pickle.load(open("data/utils/id_doc2vec.p", "rb"))

        #Get query - run program
        self.get_query(self.id_doc2vec)

    def get_query(self, id_doc2vector):
        # Ask for query
        active = True
        while (active):
            click.echo(
                "\n######################################################")
            click.echo("TYPE 'X' TO EXIT.")
            click.echo("Insert query:")
            query = input()
            click.echo(
                "######################################################\n")

            if query == 'X' or query == 'x':
                click.echo("Exiting...")
                active = False
            else:
                self.search(query, self.tag_id, self.id_doc2vec)

        return 0

    def search(self, query, tag_id, id_doc2vector, topn=20):
        """
    Search for tweets inputing a query and see displayed results.
    Arguments:
        id_doc2vector -- dic containing id:vec2doc pair - dic
        topn -- default: 20 - Top N result to display - int.

    """
        # Get ranked docs
        doc_query_sim = self.rank(query, self.tag_id)
        ids = doc_query_sim[:topn]

        click.echo("Results\n")

        for index, id in enumerate(ids):
            doc = self.data_Final[self.data_Final["id"] == id]
            tweet, date, author, retweets, favorites, url, hashtags = self.parsers.parser_tweet_results(
                doc)

            click.echo(
                "______________________________________________________")
            click.echo(f"Tweet {index}")
            click.echo(f"\t·Author: {author}")
            click.echo(f"\t·Date: {date}")
            click.echo(f"\t·Tweet: {tweet}")
            click.echo(f"\t·Retweets: {retweets}")
            click.echo(f"\t·Favorites: {favorites}")
            click.echo(f"\t·Hashtags: {hashtags}")
            click.echo(f"\t·ULR: {url}")
            click.echo(
                "______________________________________________________\n")

    def rank(self, query, tag_id):
        """
      Given a query preprocesses it, embeds it and return ordered dictionary of id:similarity_score
      pair.
      """
        # Pre-process query
        query = self.preprocessing(query)

        # Query vector
        q_vector = self.doc2vec_model.infer_vector(query.split())

        #Doc query similarity
        tag_sim = self.doc2vec_model.docvecs.most_similar([q_vector], topn=20)

        # Get Ids
        ids = [tag_id[id_[0]] for id_ in tag_sim]

        return ids

    def expand_contractions(self, text, contractions_dict, contractions_re):
        """
      Given contraction find match and substitude
      """
        def replace(match):
            return contractions_dict[match.group(0)]

        return contractions_re.sub(replace, text)

    def clean_text(self, text):
        """
      * Remove words with digits
      * Replace newline characters with space
      * Remove URLS
      * Replace non english chars with space
      """
        # Remove digits
        text = re.sub('\w*\d\w*', '', text)

        # Remove new Line chars
        text = re.sub('\n', ' ', text)

        #Remove links
        text = re.sub(r"http\S+", "", text)

        #Replace non-english chars
        text = re.sub('[^a-z]', ' ', text)

        return text

    def preprocessing(self, text):
        """
      Given a pandas dataframe apply preprocessing techinques
          * Lowercase the text
          * Expand Contractions
          * Clean the text
          * Remove Stopwords
          * Lemmatize words
      """
        # Lower case
        text = text.lower()

        # Regular expression for finding contractions
        contractions_re = re.compile('(%s)' %
                                     '|'.join(self.contractions_dict.keys()))

        #Expand contractions
        text = self.expand_contractions(text, self.contractions_dict,
                                        contractions_re)
        text = self.clean_text(text)

        #Remove added spaces
        text = re.sub(" +", " ", text)
        text = text.strip()

        #Stop words and Lemmatizing
        text = ' '.join([
            token.lemma_ for token in list(self.nlp(text))
            if (token.is_stop == False)
        ])

        return text