Python PyNLPIR Examples

Programming Language: Python

Namespace/Package Name: pynlpir

Class/Type: PyNLPIR

Examples at hotexamples.com: 2

Python PyNLPIR - 2 examples found. These are the top rated real world Python examples of pynlpir.PyNLPIR extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

get_POS(1)

segment_weibo_status(1)

Example #1

Show file

File: recbysns.py Project: jjyao/recbysns

 def __init__(self):
     self.db = Database()
     self.nlpir = PyNLPIR(self)
     self.renren = Renren(self)
     self.url = URL(self)
     self.UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv: 17.0) Gecko/17.0 Firefox/17.0"
     self.pos_blacklist_regexs = [
         "^emoticon$",
         "^title$",
         "^ude.",
         "^w.*",
         "^vshi",
         "^vyou",
         "^p.*",
         "^ule",
         "^m.*",
         "^cc",
         "^session$",
     ]

Example #2

Show file

File: recbysns.py Project: jjyao/recbysns

class RecBySNS(object):
    def __init__(self):
        self.db = Database()
        self.nlpir = PyNLPIR(self)
        self.renren = Renren(self)
        self.url = URL(self)
        self.UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv: 17.0) Gecko/17.0 Firefox/17.0"
        self.pos_blacklist_regexs = [
            "^emoticon$",
            "^title$",
            "^ude.",
            "^w.*",
            "^vshi",
            "^vyou",
            "^p.*",
            "^ule",
            "^m.*",
            "^cc",
            "^session$",
        ]

    def assign_recbysns_entity_sentiment(self):
        for status in self.db.select_table(
            "weibo_status",
            "text like '%%《%%》%%' or \
                                            text like '%%http://%%' or \
                                            text like '%%https://%%'",
            12696,
            5,
        ):
            sessions = self.nlpir.segment_weibo_status(status["text"])
            i = 0
            while i < len(sessions):
                session = sessions[i]
                entities = []
                session_text = ""
                for segment in session:
                    session_text += segment.rsplit("/", 1)[0]
                    if self.nlpir.get_POS(segment) == "title":
                        title = re.match(u"《(.*?)》/title", segment).group(1)
                        if self.db.select_douban_movie_by_title(title) or self.db.select_douban_book_by_title(title):
                            entities.append(segment)
                    elif self.nlpir.get_POS(segment) == "url":
                        match = re.search(u"(http.*)/url", segment)
                        if match is None:
                            print "###########%s###########" % segment
                            continue
                        url = match.group(1)
                        url = self.db.select_recbysns_url_by_short_url(url)
                        if url is None:
                            print "***********%s***********" % segment
                            continue
                        if self.url.is_video_url(url["origin_url"]):
                            entities.append(segment)
                positions = {}
                for entity in entities:
                    if entity in positions:
                        position = positions[entity] + 1
                        positions[entity] = position
                    else:
                        position = 0
                        positions[entity] = position
                    print status["text"]
                    print session_text
                    print entity
                    print "Type:"
                    type = int(sys.stdin.readline())
                    print "Sentiment:"
                    sentiment = int(sys.stdin.readline())
                    self.db.query(
                        "INSERT INTO recbysns_entity( \
                                   entity, status_id, session, position, \
                                   type, score) \
                                   VALUES(%s, %s, %s, %s, %s, %s)",
                        (entity, status["id"], i, position, type, sentiment),
                    )
                    self.db.commit()
                i = i + 1

    def is_blacklist_word(self, word):
        for pos_blacklist_regex in self.pos_blacklist_regexs:
            if re.search(pos_blacklist_regex, self.nlpir.get_POS(word)):
                return True
        return False