Esempio n. 1
0
 def __init__(self):
     APP_ID = '10362966'  # '你的 App ID'
     API_KEY = 'nQWiWR6DzjXsfYjW1yyVy8TB'  # '你的 Api Key'
     SECRET_KEY = 'WpjMdNWYv6TSg2psofaGt4LNW366tvnj'  # '你的 Secret Key'
     self.db = Mysql_DB()
     self.aip = AipNlp(APP_ID, API_KEY, SECRET_KEY)
     self.trans = OpenCC('t2s')  #模式设置为繁体-简体
Esempio n. 2
0
 def __init__(self):
     APP_ID = '10508840'  # '你的 App ID'
     API_KEY = 'W9BwLsLvlPQvD9LsfWIBGX28'  # '你的 Api Key'
     SECRET_KEY = 'd4wSFFDKm0VjGrPZVxWpZyGfAFYuD3AX'  # '你的 Secret Key'
     self.db = Mysql_DB()
     self.aip = AipNlp(APP_ID, API_KEY, SECRET_KEY)
     self.trans = OpenCC('t2s') #模式设置为繁体-简体
Esempio n. 3
0
class time_aa(object):
    def __init__(self):
        self.db = Mysql_DB()

    def Get_Sentence(self):
        sql = "select id, Comment_Time from comment where Tweet_Owner = 1195300800 limit " + str(
            1000000)
        try:
            Sentence_list = self.db.Query_MySQL(sql)  # 读取数据库,获取step行列
            for i in Sentence_list:  # 执行YYY修改命令,看看参照什么来做基准
                self.update_db(i[0], i[1])
        except Exception as e:
            print('query_db函数执行错误' + str(e))

    def update_db(self, i, timea):
        if u'分钟' in timea:
            b = re.findall(u'(\d+)分钟前', timea)[0]
            m = 60 - int(b)
            new_time = '今天 11:' + str(m)
            changeY_sql = "update comment set over = 'YYYYY', Comment_Time = '" + new_time + "' where id = " + str(
                i)
            try:
                self.db.Insert_MySQL(changeY_sql)
            except Exception as e:
                print('改变YY错误' + str(e))
Esempio n. 4
0
class Emotion(object):
    def __init__(self):
        APP_ID = '10508840'  # '你的 App ID'
        API_KEY = 'W9BwLsLvlPQvD9LsfWIBGX28'  # '你的 Api Key'
        SECRET_KEY = 'd4wSFFDKm0VjGrPZVxWpZyGfAFYuD3AX'  # '你的 Secret Key'
        self.db = Mysql_DB()
        self.aip = AipNlp(APP_ID, API_KEY, SECRET_KEY)
        self.trans = OpenCC('t2s') #模式设置为繁体-简体

    def Get_Sentence(self):
        sql = "select id, Comment_Content from comment where over = 'YYYY' limit " + str(100)
        try:
            Sentence_list = self.db.Query_MySQL(sql)  # 读取数据库,获取step行列
            for i in Sentence_list:  # 执行YYY修改命令,看看参照什么来做基准
                self.update_db(i[0])
            return Sentence_list
        except Exception as e:
            print ('query_db函数执行错误' + str(e))

    def update_db(self, i):
        changeY_sql = "update comment set over = 'YY' where id = " + str(i)
        try:
            self.db.Insert_MySQL(changeY_sql)
        except Exception as e:
            print ('改变YY错误' + str(e))

    def Get_Analyse(self):
        sentence_list = self.Get_Sentence()
        r = re.compile(ur"[\u0000-\u4dff,\u9fa6-\uffff]")  # 删除除了中文以外的一切
        for i in sentence_list:
            try:
                simple = self.trans.convert(i[1])
                #print i[1].strip().encode('utf-8', 'ignore')
                result = self.aip.sentimentClassify(simple.strip().encode('utf-8', 'ignore'))
                #print result
                '''print result['items'][0]['positive_prob'] #属于积极类别的概率
                print result['items'][0]['confidence'] #分类的置信度
                print result['items'][0]['negative_prob'] #属于消极类别的概率
                print result['items'][0]['sentiment'] #情感极性分类结果,0为负面,1为中性,2为正面'''
                s = str(result['items'][0]['sentiment'])
                p = str(result['items'][0]['positive_prob'])
                n = str(result['items'][0]['negative_prob'])
                c = str(result['items'][0]['confidence'])
                sql = "update comment set sentiment = %s, positive_prob = %s, negative_prob = %s, confidence = %s"%(s, p, n, c) + " where id = " + str(i[0])
                self.db.Insert_MySQL(sql)
            except Exception as e:
                print('辣鸡百度转码又TM错误了,看老子的' + str(e))
                try:
                    simple = self.trans.convert(i[1])
                    re_s = r.sub(',', simple)
                    result = self.aip.sentimentClassify(re_s.strip().encode('utf-8', 'ignore'))
                    s = str(result['items'][0]['sentiment'])
                    p = str(result['items'][0]['positive_prob'])
                    n = str(result['items'][0]['negative_prob'])
                    c = str(result['items'][0]['confidence'])
                    sql = "update comment set sentiment = %s, positive_prob = %s, negative_prob = %s, confidence = %s"%(s, p, n, c) + " where id = " + str(i[0])
                    self.db.Insert_MySQL(sql)
                except Exception as e:
                    print ('草,老子没辙了' + str(e))
Esempio n. 5
0
class DoubanCrawlPipeline(object):
    def __init__(self):
        self.db = Mysql_DB()

    def process_item(self, item, spider):
        if isinstance(item, DoubanDPItem):
            try:
                sql = """insert into DoubanDP (Uname, Star, DPtime, Liked, Content) VALUES ("%s", "%s", "%s", "%s", "%s")""" % (
                    item["Uname"].encode('utf-8', 'ignore'),
                    item["Star"].encode('utf-8', 'ignore'),
                    item["DPtime"].encode('utf-8', 'ignore'),
                    item["Liked"].encode('utf-8', 'ignore'),
                    item["Content"].encode('utf-8', 'ignore'))
                self.db.Insert_MySQL(sql)
            except Exception as e:
                print '插入DP表错误' + str(e)
        if isinstance(item, DoubanHTItem):
            if item['sign'] == 'OUT':
                try:
                    sql = """insert into DoubanHTOUT (sign, HT_id, title, author, HT_href, reply, HTtime, Content) VALUES ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")""" % (
                        item['sign'].encode('utf-8', 'ignore'),
                        item['HT_id'].encode('utf-8', 'ignore'),
                        item['title'].encode('utf-8', 'ignore'),
                        item['author'].encode('utf-8', 'ignore'),
                        item['HT_href'].encode('utf-8', 'ignore'),
                        item['reply'].encode('utf-8', 'ignore'),
                        item['HTtime'].encode('utf-8', 'ignore'),
                        item['Content'].encode('utf-8', 'ignore'))
                    self.db.Insert_MySQL(sql)
                except Exception as e:
                    print '插入HTOUT错误' + str(e)
            if item['sign'] == 'INSIDE':
                try:
                    sql = """insert into DoubanHTINSIDE (sign, HT_id, Rname, Rtime, Rcontent, Rliked) VALUES ("%s", "%s", "%s", "%s", "%s", "%s")""" % (
                        item['sign'].encode('utf-8', 'ignore'),
                        item['HT_id'].encode('utf-8', 'ignore'),
                        item['Rname'].encode('utf-8', 'ignore'),
                        item['Rtime'].encode('utf-8', 'ignore'),
                        item['Rcontent'].encode('utf-8', 'ignore'),
                        item['Rliked'].encode('utf-8', 'ignore'))
                    self.db.Insert_MySQL(sql)
                except Exception as e:
                    print '插入HTINSEIDE错误' + str(e)
        if isinstance(item, DoubanJPItem):
            try:
                sql = """insert into DoubanJP (JP_id, title, author, JPtime, JPstar, href, Content, JPliked, JPdisliked, reply) VALUES ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")""" % (
                    item['JP_id'].encode('utf-8', 'ignore'),
                    item['title'].encode('utf-8', 'ignore'),
                    item['author'].encode('utf-8', 'ignore'),
                    item['JPtime'].encode('utf-8', 'ignore'),
                    item['JPstar'].encode('utf-8', 'ignore'),
                    item['href'].encode('utf-8', 'ignore'),
                    item['Content'].encode('utf-8', 'ignore'),
                    item['JPliked'].encode('utf-8', 'ignore'),
                    item['JPdisliked'].encode('utf-8', 'ignore'),
                    item['reply'].encode('utf-8', 'ignore'))
                self.db.Insert_MySQL(sql)
            except Exception as e:
                print '插入JP表错误' + str(e)
Esempio n. 6
0
class Proxy(object):
    def __init__(self):
        self.db = Mysql_DB()

    def GetIP(self):  # 先委屈下放sql里,之后想办法放到redis里去管理
        #sql = "SELECT ip, port FROM proxys WHERE id >= ((SELECT MAX(id) FROM proxys)-(SELECT MIN(id) FROM proxys)) * RAND() + (SELECT MIN(id) FROM proxys)  LIMIT 1"
        sql = "SELECT ip FROM proxys WHERE id >= ((SELECT MAX(id) FROM proxys)-(SELECT MIN(id) FROM proxys)) * RAND() + (SELECT MIN(id) FROM proxys)  LIMIT 1"
        try:
            ip_middle = self.db.Query_MySQL(sql)
            #ip = str(ip_middle[0][0]) + ':' + str(ip_middle[0][1])
            ip = str(ip_middle[0][0])
            ip_ok = "http://" + ip
            return ip_ok
        except Exception as e:
            print('读取代理ip错误' + str(e))
Esempio n. 7
0
 def __init__(self):
     self.db = Mysql_DB()
Esempio n. 8
0
class WeiboCrawlPipeline(object):
    def __init__(self):
        self.db = Mysql_DB()

    def process_item(self, item, spider):
        if isinstance(item, WeiboTargetItem):
            try:
                if item["Tweet_Over"] == 'N':
                    sql = """insert into Tweet (Target_ID, Target_Name, Tweet_Time, Tweet_Content, Tweet_Transfer, Tweet_Liked, Tweet_Comment, Tweet_Platform, Tweet_GPS, Comment_Urls, Update_Time, Tweet_Over) VALUES ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")""" % (
                        item["Target_ID"].encode('utf-8', 'ignore'),
                        item["Target_Name"].encode('utf-8', 'ignore'),
                        item["Tweet_Time"].encode('utf-8', 'ignore'),
                        item["Tweet_Content"].encode('utf-8', 'ignore'),
                        item["Tweet_Transfer"], item["Tweet_Liked"],
                        item["Tweet_Comment"], item["Tweet_Platform"].encode(
                            'utf-8', 'ignore'), item["Tweet_GPS"].encode(
                                'utf-8', 'ignore'),
                        item["Comment_Urls"].encode('utf-8', 'ignore'),
                        item["Update_Time"].encode('utf-8', 'ignore'),
                        item["Tweet_Over"].encode('utf-8', 'ignore'))
                else:
                    sql = """update Tweet set Target_ID = "%s", Target_Name = "%s", Tweet_Time = "%s", Tweet_Content = "%s", Tweet_Transfer = "%s", Tweet_Liked = "%s", Tweet_Comment = "%s", Tweet_Platform = "%s", Tweet_GPS = "%s", Comment_Urls = "%s", Update_Time = "%s", Tweet_Over = "%s" """ % (
                        item["Target_ID"].encode('utf-8', 'ignore'),
                        item["Target_Name"].encode('utf-8', 'ignore'),
                        item["Tweet_Time"].encode('utf-8', 'ignore'),
                        item["Tweet_Content"].encode('utf-8', 'ignore'),
                        item["Tweet_Transfer"], item["Tweet_Liked"],
                        item["Tweet_Comment"], item["Tweet_Platform"].encode(
                            'utf-8', 'ignore'), item["Tweet_GPS"].encode(
                                'utf-8', 'ignore'),
                        item["Comment_Urls"].encode('utf-8', 'ignore'),
                        item["Update_Time"].encode('utf-8', 'ignore'),
                        item["Tweet_Over"].encode('utf-8', 'ignore')
                    ) + """where Target_ID = '%s'""" % (
                        item["Target_ID"].encode('utf-8', 'ignore'))
                self.db.Insert_MySQL(sql)
            except Exception as e:
                print '插入Target数据库错误' + str(e)
        elif isinstance(item, WeiboCommentItem):
            try:
                sql = """insert into yuanli (Comment_ID, Comment_Name, Comment_Content, Comment_Time, Comment_Liked, Comment_Platform, Comment_Personal_Url, Tweet_Owner, Tweet_Url) VALUES ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")""" % (
                    item["Comment_ID"].encode('utf-8', 'ignore'),
                    item["Comment_Name"].encode(
                        'utf-8', 'ignore'), item["Comment_Content"].encode(
                            'utf-8', 'ignore'), item["Comment_Time"].encode(
                                'utf-8', 'ignore'), item["Comment_Liked"],
                    item["Comment_Platform"].encode('utf-8', 'ignore'),
                    item["Comment_Personal_Url"].encode('utf-8', 'ignore'),
                    item["Tweet_Owner"].encode('utf-8', 'ignore'),
                    item["Tweet_Url"].encode('utf-8', 'ignore'))
                self.db.Insert_MySQL(sql)
            except Exception as e:
                print '插入Comment数据库错误' + str(e)
        elif isinstance(item, WeiboPersonalItem):
            try:
                sql = """insert into Personal (Personal_ID, Personal_Name, Personal_Tweet_Num, Personal_Fans, Personal_Follow_Num, Personal_Sex, Personal_City, Personal_Birth, Personal_Level, Personal_Sentiment, Personal_Introduce, Personal_Authentication) VALUES ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")""" % (
                    item["Personal_ID"].encode('utf-8', 'ignore'),
                    item["Personal_Name"].encode('utf-8', 'ignore'),
                    item["Personal_Tweet_Num"], item["Personal_Fans"],
                    item["Personal_Follow_Num"], item["Personal_Sex"].encode(
                        'utf-8', 'ignore'), item["Personal_City"].encode(
                            'utf-8', 'ignore'), item["Personal_Birth"].encode(
                                'utf-8', 'ignore'),
                    item["Personal_Level"].encode('utf-8', 'ignore'),
                    item["Personal_Sentiment"].encode('utf-8', 'ignore'),
                    item["Personal_Introduce"].encode('utf-8', 'ignore'),
                    item["Personal_Authentication"].encode('utf-8', 'ignore'))
                self.db.Insert_MySQL(sql)
            except Exception as e:
                print '插入Personal数据库错误' + str(e)