def __init__(self): APP_ID = '10362966' # '你的 App ID' API_KEY = 'nQWiWR6DzjXsfYjW1yyVy8TB' # '你的 Api Key' SECRET_KEY = 'WpjMdNWYv6TSg2psofaGt4LNW366tvnj' # '你的 Secret Key' self.db = Mysql_DB() self.aip = AipNlp(APP_ID, API_KEY, SECRET_KEY) self.trans = OpenCC('t2s') #模式设置为繁体-简体
def __init__(self): APP_ID = '10508840' # '你的 App ID' API_KEY = 'W9BwLsLvlPQvD9LsfWIBGX28' # '你的 Api Key' SECRET_KEY = 'd4wSFFDKm0VjGrPZVxWpZyGfAFYuD3AX' # '你的 Secret Key' self.db = Mysql_DB() self.aip = AipNlp(APP_ID, API_KEY, SECRET_KEY) self.trans = OpenCC('t2s') #模式设置为繁体-简体
class time_aa(object): def __init__(self): self.db = Mysql_DB() def Get_Sentence(self): sql = "select id, Comment_Time from comment where Tweet_Owner = 1195300800 limit " + str( 1000000) try: Sentence_list = self.db.Query_MySQL(sql) # 读取数据库,获取step行列 for i in Sentence_list: # 执行YYY修改命令,看看参照什么来做基准 self.update_db(i[0], i[1]) except Exception as e: print('query_db函数执行错误' + str(e)) def update_db(self, i, timea): if u'分钟' in timea: b = re.findall(u'(\d+)分钟前', timea)[0] m = 60 - int(b) new_time = '今天 11:' + str(m) changeY_sql = "update comment set over = 'YYYYY', Comment_Time = '" + new_time + "' where id = " + str( i) try: self.db.Insert_MySQL(changeY_sql) except Exception as e: print('改变YY错误' + str(e))
class Emotion(object): def __init__(self): APP_ID = '10508840' # '你的 App ID' API_KEY = 'W9BwLsLvlPQvD9LsfWIBGX28' # '你的 Api Key' SECRET_KEY = 'd4wSFFDKm0VjGrPZVxWpZyGfAFYuD3AX' # '你的 Secret Key' self.db = Mysql_DB() self.aip = AipNlp(APP_ID, API_KEY, SECRET_KEY) self.trans = OpenCC('t2s') #模式设置为繁体-简体 def Get_Sentence(self): sql = "select id, Comment_Content from comment where over = 'YYYY' limit " + str(100) try: Sentence_list = self.db.Query_MySQL(sql) # 读取数据库,获取step行列 for i in Sentence_list: # 执行YYY修改命令,看看参照什么来做基准 self.update_db(i[0]) return Sentence_list except Exception as e: print ('query_db函数执行错误' + str(e)) def update_db(self, i): changeY_sql = "update comment set over = 'YY' where id = " + str(i) try: self.db.Insert_MySQL(changeY_sql) except Exception as e: print ('改变YY错误' + str(e)) def Get_Analyse(self): sentence_list = self.Get_Sentence() r = re.compile(ur"[\u0000-\u4dff,\u9fa6-\uffff]") # 删除除了中文以外的一切 for i in sentence_list: try: simple = self.trans.convert(i[1]) #print i[1].strip().encode('utf-8', 'ignore') result = self.aip.sentimentClassify(simple.strip().encode('utf-8', 'ignore')) #print result '''print result['items'][0]['positive_prob'] #属于积极类别的概率 print result['items'][0]['confidence'] #分类的置信度 print result['items'][0]['negative_prob'] #属于消极类别的概率 print result['items'][0]['sentiment'] #情感极性分类结果,0为负面,1为中性,2为正面''' s = str(result['items'][0]['sentiment']) p = str(result['items'][0]['positive_prob']) n = str(result['items'][0]['negative_prob']) c = str(result['items'][0]['confidence']) sql = "update comment set sentiment = %s, positive_prob = %s, negative_prob = %s, confidence = %s"%(s, p, n, c) + " where id = " + str(i[0]) self.db.Insert_MySQL(sql) except Exception as e: print('辣鸡百度转码又TM错误了,看老子的' + str(e)) try: simple = self.trans.convert(i[1]) re_s = r.sub(',', simple) result = self.aip.sentimentClassify(re_s.strip().encode('utf-8', 'ignore')) s = str(result['items'][0]['sentiment']) p = str(result['items'][0]['positive_prob']) n = str(result['items'][0]['negative_prob']) c = str(result['items'][0]['confidence']) sql = "update comment set sentiment = %s, positive_prob = %s, negative_prob = %s, confidence = %s"%(s, p, n, c) + " where id = " + str(i[0]) self.db.Insert_MySQL(sql) except Exception as e: print ('草,老子没辙了' + str(e))
class DoubanCrawlPipeline(object): def __init__(self): self.db = Mysql_DB() def process_item(self, item, spider): if isinstance(item, DoubanDPItem): try: sql = """insert into DoubanDP (Uname, Star, DPtime, Liked, Content) VALUES ("%s", "%s", "%s", "%s", "%s")""" % ( item["Uname"].encode('utf-8', 'ignore'), item["Star"].encode('utf-8', 'ignore'), item["DPtime"].encode('utf-8', 'ignore'), item["Liked"].encode('utf-8', 'ignore'), item["Content"].encode('utf-8', 'ignore')) self.db.Insert_MySQL(sql) except Exception as e: print '插入DP表错误' + str(e) if isinstance(item, DoubanHTItem): if item['sign'] == 'OUT': try: sql = """insert into DoubanHTOUT (sign, HT_id, title, author, HT_href, reply, HTtime, Content) VALUES ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")""" % ( item['sign'].encode('utf-8', 'ignore'), item['HT_id'].encode('utf-8', 'ignore'), item['title'].encode('utf-8', 'ignore'), item['author'].encode('utf-8', 'ignore'), item['HT_href'].encode('utf-8', 'ignore'), item['reply'].encode('utf-8', 'ignore'), item['HTtime'].encode('utf-8', 'ignore'), item['Content'].encode('utf-8', 'ignore')) self.db.Insert_MySQL(sql) except Exception as e: print '插入HTOUT错误' + str(e) if item['sign'] == 'INSIDE': try: sql = """insert into DoubanHTINSIDE (sign, HT_id, Rname, Rtime, Rcontent, Rliked) VALUES ("%s", "%s", "%s", "%s", "%s", "%s")""" % ( item['sign'].encode('utf-8', 'ignore'), item['HT_id'].encode('utf-8', 'ignore'), item['Rname'].encode('utf-8', 'ignore'), item['Rtime'].encode('utf-8', 'ignore'), item['Rcontent'].encode('utf-8', 'ignore'), item['Rliked'].encode('utf-8', 'ignore')) self.db.Insert_MySQL(sql) except Exception as e: print '插入HTINSEIDE错误' + str(e) if isinstance(item, DoubanJPItem): try: sql = """insert into DoubanJP (JP_id, title, author, JPtime, JPstar, href, Content, JPliked, JPdisliked, reply) VALUES ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")""" % ( item['JP_id'].encode('utf-8', 'ignore'), item['title'].encode('utf-8', 'ignore'), item['author'].encode('utf-8', 'ignore'), item['JPtime'].encode('utf-8', 'ignore'), item['JPstar'].encode('utf-8', 'ignore'), item['href'].encode('utf-8', 'ignore'), item['Content'].encode('utf-8', 'ignore'), item['JPliked'].encode('utf-8', 'ignore'), item['JPdisliked'].encode('utf-8', 'ignore'), item['reply'].encode('utf-8', 'ignore')) self.db.Insert_MySQL(sql) except Exception as e: print '插入JP表错误' + str(e)
class Proxy(object): def __init__(self): self.db = Mysql_DB() def GetIP(self): # 先委屈下放sql里,之后想办法放到redis里去管理 #sql = "SELECT ip, port FROM proxys WHERE id >= ((SELECT MAX(id) FROM proxys)-(SELECT MIN(id) FROM proxys)) * RAND() + (SELECT MIN(id) FROM proxys) LIMIT 1" sql = "SELECT ip FROM proxys WHERE id >= ((SELECT MAX(id) FROM proxys)-(SELECT MIN(id) FROM proxys)) * RAND() + (SELECT MIN(id) FROM proxys) LIMIT 1" try: ip_middle = self.db.Query_MySQL(sql) #ip = str(ip_middle[0][0]) + ':' + str(ip_middle[0][1]) ip = str(ip_middle[0][0]) ip_ok = "http://" + ip return ip_ok except Exception as e: print('读取代理ip错误' + str(e))
def __init__(self): self.db = Mysql_DB()
class WeiboCrawlPipeline(object): def __init__(self): self.db = Mysql_DB() def process_item(self, item, spider): if isinstance(item, WeiboTargetItem): try: if item["Tweet_Over"] == 'N': sql = """insert into Tweet (Target_ID, Target_Name, Tweet_Time, Tweet_Content, Tweet_Transfer, Tweet_Liked, Tweet_Comment, Tweet_Platform, Tweet_GPS, Comment_Urls, Update_Time, Tweet_Over) VALUES ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")""" % ( item["Target_ID"].encode('utf-8', 'ignore'), item["Target_Name"].encode('utf-8', 'ignore'), item["Tweet_Time"].encode('utf-8', 'ignore'), item["Tweet_Content"].encode('utf-8', 'ignore'), item["Tweet_Transfer"], item["Tweet_Liked"], item["Tweet_Comment"], item["Tweet_Platform"].encode( 'utf-8', 'ignore'), item["Tweet_GPS"].encode( 'utf-8', 'ignore'), item["Comment_Urls"].encode('utf-8', 'ignore'), item["Update_Time"].encode('utf-8', 'ignore'), item["Tweet_Over"].encode('utf-8', 'ignore')) else: sql = """update Tweet set Target_ID = "%s", Target_Name = "%s", Tweet_Time = "%s", Tweet_Content = "%s", Tweet_Transfer = "%s", Tweet_Liked = "%s", Tweet_Comment = "%s", Tweet_Platform = "%s", Tweet_GPS = "%s", Comment_Urls = "%s", Update_Time = "%s", Tweet_Over = "%s" """ % ( item["Target_ID"].encode('utf-8', 'ignore'), item["Target_Name"].encode('utf-8', 'ignore'), item["Tweet_Time"].encode('utf-8', 'ignore'), item["Tweet_Content"].encode('utf-8', 'ignore'), item["Tweet_Transfer"], item["Tweet_Liked"], item["Tweet_Comment"], item["Tweet_Platform"].encode( 'utf-8', 'ignore'), item["Tweet_GPS"].encode( 'utf-8', 'ignore'), item["Comment_Urls"].encode('utf-8', 'ignore'), item["Update_Time"].encode('utf-8', 'ignore'), item["Tweet_Over"].encode('utf-8', 'ignore') ) + """where Target_ID = '%s'""" % ( item["Target_ID"].encode('utf-8', 'ignore')) self.db.Insert_MySQL(sql) except Exception as e: print '插入Target数据库错误' + str(e) elif isinstance(item, WeiboCommentItem): try: sql = """insert into yuanli (Comment_ID, Comment_Name, Comment_Content, Comment_Time, Comment_Liked, Comment_Platform, Comment_Personal_Url, Tweet_Owner, Tweet_Url) VALUES ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")""" % ( item["Comment_ID"].encode('utf-8', 'ignore'), item["Comment_Name"].encode( 'utf-8', 'ignore'), item["Comment_Content"].encode( 'utf-8', 'ignore'), item["Comment_Time"].encode( 'utf-8', 'ignore'), item["Comment_Liked"], item["Comment_Platform"].encode('utf-8', 'ignore'), item["Comment_Personal_Url"].encode('utf-8', 'ignore'), item["Tweet_Owner"].encode('utf-8', 'ignore'), item["Tweet_Url"].encode('utf-8', 'ignore')) self.db.Insert_MySQL(sql) except Exception as e: print '插入Comment数据库错误' + str(e) elif isinstance(item, WeiboPersonalItem): try: sql = """insert into Personal (Personal_ID, Personal_Name, Personal_Tweet_Num, Personal_Fans, Personal_Follow_Num, Personal_Sex, Personal_City, Personal_Birth, Personal_Level, Personal_Sentiment, Personal_Introduce, Personal_Authentication) VALUES ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")""" % ( item["Personal_ID"].encode('utf-8', 'ignore'), item["Personal_Name"].encode('utf-8', 'ignore'), item["Personal_Tweet_Num"], item["Personal_Fans"], item["Personal_Follow_Num"], item["Personal_Sex"].encode( 'utf-8', 'ignore'), item["Personal_City"].encode( 'utf-8', 'ignore'), item["Personal_Birth"].encode( 'utf-8', 'ignore'), item["Personal_Level"].encode('utf-8', 'ignore'), item["Personal_Sentiment"].encode('utf-8', 'ignore'), item["Personal_Introduce"].encode('utf-8', 'ignore'), item["Personal_Authentication"].encode('utf-8', 'ignore')) self.db.Insert_MySQL(sql) except Exception as e: print '插入Personal数据库错误' + str(e)