Exemple #1
0
 def getWaitCrawler(self):
     mysql = Mysql()
     sqlString = ConfigStart.SELECTCOUNTFROMLEAGUECRAWLER
     result = mysql.getOne(sqlString)
     mysql.dispose()
     return result[ConfigStart.RESULT]
     pass
Exemple #2
0
 def getJifen(self, limit):
     mysql = Mysql()
     sqlAll = ConfigStart.SELECTFROMLEAGUEYEARINFOLIMIT
     resultSelect = mysql.getAll(sqlAll, limit)
     l = []
     i = 0
     if resultSelect == False:
         return
     sqlInsert = ConfigStart.UPDATELEAGUEYEARINFO_TOP
     for resultChild in resultSelect:
         webfile = urllib.urlopen(resultChild[LeagueYearInfo.p_league_url])
         webcontext = webfile.read()
         webfile.close()
         webContent = unicode(webcontext, ConfigStart.GBK)
         soup = BeautifulSoup(webContent, ConfigStart.PARSEMETHOD)
         jifenUrl = soup.find_all(href=re.compile(ConfigStart.COMPILEJIFEN))
         for getJifen in jifenUrl:
             if (getJifen.string == ConfigStart.STRINGJIFEN):
                 print getJifen[ConfigStart.HREF]
                 l.append(resultChild[LeagueYearInfo.p_id])
                 l.append(ConfigStart.STARTURL + getJifen[ConfigStart.HREF])
                 if i == ConfigStart.FALSE:
                     sqlInsert += "(%s,%s)"
                     i = 1
                 else:
                     sqlInsert += ",(%s,%s)"
         pass
     sqlInsert += ConfigStart.UPDATELEAGUEYEARINFO_BOTTOM
     print sqlInsert
     result = mysql.update(sqlInsert, l)
     print result
     pass
     mysql.dispose()
Exemple #3
0
 def startLeagueMain(self, mutex):
     #获取数据库资源
     mysql = Mysql()
     p_id, p_name, p_type, p_country, p_main_url = 0, '', 0, '', ''
     if mutex.acquire():
         sqlString = ConfigStart.SELECTFROMLEAGUECRAWLER
         result = mysql.getOne(sqlString)
         if isinstance(result, bool):
             print "分析完毕"
             mutex.release()
             mysql.dispose()
             return
             pass
         else:
             mysql.update(ConfigStart.UPDATELEAGUESETCRAWLER,
                          result[League.p_id])
             print result[League.p_name]
             p_id,p_name,p_type,p_country,p_main_url=result[League.p_id],\
                                                     result[League.p_name],result[League.p_type],result[League.p_country],result[League.p_main_url]
         mutex.release()
         pass
     #获取每个联赛现存所有赛季并得到对应的url
     print "正在获取联赛___(%s)___的所有赛季信息获取的url为:%s " % (p_name, p_main_url)
     webfile = urllib.urlopen(p_main_url)
     webContent = webfile.read()
     webfile.close()
     webContent = unicode(webContent, ConfigStart.GBK)
     soup = BeautifulSoup(webContent, ConfigStart.PARSEMETHOD)
     leagueYears = soup.find_all(class_=ConfigStart.DROPLISTCLASS)
     for leagueYear in leagueYears[0].children:
         if (type(leagueYear) == bs4.element.Tag):
             print leagueYear.a[ConfigStart.STRINGTITLE]
             print leagueYear.a[ConfigStart.HREF]
             #print re.findall(r'\b\d+\b',leagueYear.a['title'])
             l = [[
                 p_id,
                 ConfigStart.STARTURL + leagueYear.a[ConfigStart.HREF],
                 leagueYear.a[ConfigStart.STRINGTITLE], leagueYear.a.string
             ]]
             sqlInsert = ConfigStart.INSERTINTOLEAGUEYEARINFO
             result = mysql.insertMany(sqlInsert, l)
             print result
     pass
     mysql.dispose()
 def getMatchInfo(self, limit):
     mysql = Mysql()
     sqlAll = "select * from matchurl WHERE p_use=0 limit %s,10"
     resultSelect = mysql.getAll(sqlAll, limit)
     if resultSelect == False:
         return
     for resultChild in resultSelect:
         webfile = urllib.urlopen(resultChild['p_url'])
         webcontext = webfile.read()
         webfile.close()
         webContent = unicode(webcontext, 'gbk')
         soup = BeautifulSoup(webContent, ConfigStart.PARSEMETHOD)
         listInfo = soup.find_all(id='div_group_list')
         rounds = []  #当前第几回合或第几组
         stid = resultChild['p_url'].split("jifen-")[1].split("/")[0]
         c = 'score'
         a = 'getmatch'
         if (listInfo.__len__() > 0):
             for listChild in listInfo[0].children:
                 if (type(listChild) == bs4.element.Tag):
                     if (listChild['data-group'] != 'all'):
                         rounds.append(listChild['data-group'])
                         pass
                 pass
             pass
         pass
         listInfo = soup.find_all(id='match_group')
         if (listInfo.__len__() > 0):
             for listChild in listInfo[0].children:
                 if (type(listChild) == bs4.element.Tag):
                     if (listChild.a['data-group'] != 'all'):
                         rounds.append(listChild.a['data-group'])
                         pass
                 pass
             pass
         pass
         #lmb3
         listInfo = soup.find_all(class_='lmb3')
         asc = 0
         for listC in listInfo:
             asc += 1
             rounds.append(asc)
             pass
         pass
         urlInfo = "http://liansai.500.com/index.php?"
         if (rounds.__len__() == 0):
             insertContext = []
             sqlInsert = "INSERT INTO `matchinfo` (`p_leagueid`, `fid`, `ghalfscore`, `gid`, `gname`, `gscore`, `gstanding`, `gsxname`, `handline`, `hhalfscore`, `hid`, `hname`, `hscore`, `hstanding`, `hsxname`, `round`, `status`, `stime`) VALUES "
             urlInfo += "c=" + c
             urlInfo += "&a=" + a
             urlInfo += "&stid=" + stid
             jsonContext = urllib.urlopen(urlInfo)
             jsonData = jsonContext.read()
             jsonContext.close()
             jsonData = unicode(jsonData, 'gbk')
             jsonData = json.loads(jsonData)
             index = 0
             for jsonDataChild in jsonData:
                 print jsonDataChild
                 if index == 0:
                     sqlInsert += "(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
                     index = 1
                 else:
                     sqlInsert += ",(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
                     pass
                 insertContext.append(resultChild['pid'])
                 insertContext.append(jsonDataChild['fid'])
                 insertContext.append(jsonDataChild['ghalfscore'])
                 insertContext.append(jsonDataChild['gid'])
                 insertContext.append(jsonDataChild['gname'])
                 insertContext.append(jsonDataChild['gscore'])
                 insertContext.append(jsonDataChild['gstanding'])
                 insertContext.append(jsonDataChild['gsxname'])
                 insertContext.append(jsonDataChild['handline'])
                 insertContext.append(jsonDataChild['hhalfscore'])
                 insertContext.append(jsonDataChild['hid'])
                 insertContext.append(jsonDataChild['hname'])
                 insertContext.append(jsonDataChild['hscore'])
                 insertContext.append(jsonDataChild['hstanding'])
                 insertContext.append(jsonDataChild['hsxname'])
                 insertContext.append(jsonDataChild['round'])
                 insertContext.append(jsonDataChild['status'])
                 insertContext.append(jsonDataChild['stime'])
                 pass
             if index == 0:
                 continue
             resInfo = mysql.update(sqlInsert, insertContext)
             print resInfo
         else:
             for roundChild in rounds:
                 insertContext = []
                 sqlInsert = "INSERT INTO `matchinfo` (`p_leagueid`, `fid`, `ghalfscore`, `gid`, `gname`, `gscore`, `gstanding`, `gsxname`, `handline`, `hhalfscore`, `hid`, `hname`, `hscore`, `hstanding`, `hsxname`, `round`, `status`, `stime`) VALUES "
                 urlInfo += "c=" + c
                 urlInfo += "&a=" + a
                 urlInfo += "&stid=" + stid
                 urlInfo += "&round=" + str(roundChild)
                 jsonContext = urllib.urlopen(urlInfo)
                 jsonData = jsonContext.read()
                 jsonContext.close()
                 jsonData = unicode(jsonData, 'gbk')
                 jsonData = json.loads(jsonData)
                 index = 0
                 for jsonDataChild in jsonData:
                     print jsonDataChild
                     if index == 0:
                         sqlInsert += "(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
                         index = 1
                     else:
                         sqlInsert += ",(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
                         pass
                     insertContext.append(resultChild['pid'])
                     insertContext.append(jsonDataChild['fid'])
                     insertContext.append(jsonDataChild['ghalfscore'])
                     insertContext.append(jsonDataChild['gid'])
                     insertContext.append(jsonDataChild['gname'])
                     insertContext.append(jsonDataChild['gscore'])
                     insertContext.append(jsonDataChild['gstanding'])
                     insertContext.append(jsonDataChild['gsxname'])
                     insertContext.append(jsonDataChild['handline'])
                     insertContext.append(jsonDataChild['hhalfscore'])
                     insertContext.append(jsonDataChild['hid'])
                     insertContext.append(jsonDataChild['hname'])
                     insertContext.append(jsonDataChild['hscore'])
                     insertContext.append(jsonDataChild['hstanding'])
                     insertContext.append(jsonDataChild['hsxname'])
                     insertContext.append(jsonDataChild['round'])
                     insertContext.append(jsonDataChild['status'])
                     insertContext.append(jsonDataChild['stime'])
                     pass
                 if index == 0:
                     continue
                 resInfo = mysql.update(sqlInsert, insertContext)
                 print resInfo
                 pass
             pass
             #INSERT INTO `matchinfo` (`p_leagueid`, `fid`, `ghalfscore`, `gid`, `gname`, `gscore`, `gstanding`, `gsxname`, `handline`, `hhalfscore`, `hid`, `hname`, `hscore`, `hstanding`, `hsxname`, `round`, `status`, `stime`) VALUES ('1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '2017-04-21 17:24:58')
         #设置p_use已经抓取标志
         useSql = "update matchurl set p_use=1 where pid = %s"
         mysql.update(useSql, resultChild['pid'])
         print "matchurl更新成功"
     pass
     mysql.dispose()
Exemple #5
0
        # func_var = []
        # for i in range(threadCount):
        #     func_var.append(([i * ConfigStart.THREADCOUNT], None))
        #     pass
        # requests = threadPoolBase.makeRequests(jifenToMatchUrl.getMatchInfo, func_var)
        # [pool.putRequest(req) for req in requests]
        # pool.wait()
        # mysql.dispose()
        #判断已经获取到哪里
        try:
            analysisData = AnalysisData()
            sqlAll = ConfigStart.SELECTFROMMATCHINFO
            resultSelect = mysql.getOne(sqlAll)
            threadCount = int(
                resultSelect[ConfigStart.RESULT]) / ConfigStart.THREADCOUNT + 1
            secondToJifen = SecondToJifen()
            func_var = []
            for i in range(threadCount):
                func_var.append(([i * ConfigStart.THREADCOUNT], None))
                pass
            requests = threadPoolBase.makeRequests(
                analysisData.getDataFromMatchInfo, func_var)
            [pool.putRequest(req) for req in requests]
            pool.wait()
            mysql.dispose()
            pass
        except Exception, e:
            file = open('main.log', 'wb+')
            file.write("%s\r\n" % e)
            file.close()
Exemple #6
0
 def getSecondUrl():
     # allraceMainWrap
     try:
         webfile = urllib.urlopen(ConfigStart.STARTURL)
         webContent = webfile.read()
         webfile.close()
         soup = BeautifulSoup(webContent, ConfigStart.PARSEMETHOD)
         allraceMainWrap = soup.find_all(id=ConfigStart.LEAGUESDIV)
         #获取到网页后开始分配数据库资源
         mysql = Mysql()
         #i为分区赛事
         i = -1
         # 数据模型为p_name p_type p_country p_sport_type p_main_type
         p_name = ConfigStart.NULLSTRING
         p_type = 1
         p_country = ConfigStart.NULLSTRING
         p_sport_type = 1
         p_main_type = ConfigStart.NULLSTRING
         sqlAll = ConfigStart.INSERTINTOLEAGUETABLE
         for child in allraceMainWrap[ConfigStart.DIVTOPINDEX].children:
             if (type(child) == bs4.element.Tag):
                 i = i + ConfigStart.INC
                 #print child
                 singleUrl = child.find_all(class_=[
                     ConfigStart.ALLLEAGUECLASS_1,
                     ConfigStart.ALLLEAGUECLASS_2
                 ])
                 #print singleUrl
                 for psingleUrl in singleUrl:
                     #print psingleUrl
                     for getUrlTag in psingleUrl:
                         if (type(getUrlTag) == bs4.element.Tag):
                             if (type(getUrlTag.div) == type(None)):
                                 print getUrlTag.a[ConfigStart.HREF]
                                 print getUrlTag.span.string.encode(
                                     ConfigStart.UTF8).replace(
                                         ConfigStart.SPACESTRING,
                                         ConfigStart.NULLSTRING)
                                 p_name = getUrlTag.span.string.encode(
                                     ConfigStart.UTF8).replace(
                                         ConfigStart.SPACESTRING,
                                         ConfigStart.NULLSTRING)
                                 p_type = i + 1
                                 p_country = ConfigStart.NULLSTRING
                                 p_main_type = ConfigStart.STARTURL + getUrlTag.a[
                                     ConfigStart.HREF]
                                 sqlString = ConfigStart.SELECTCOUNTFROMLEAGUETABLE
                                 lSelect = [p_name, p_type, p_country]
                                 resultSelect = mysql.getOne(
                                     sqlString, lSelect)
                                 if resultSelect[
                                         ConfigStart.
                                         RESULT] == ConfigStart.NULL:
                                     l = [[
                                         p_name, p_type, p_country,
                                         p_sport_type, p_main_type
                                     ]]
                                     result = mysql.insertMany(sqlAll, l)
                                     print result
                                     pass
                             else:
                                 #获取到国家
                                 print getUrlTag.span.string.encode(
                                     ConfigStart.UTF8).replace(
                                         ConfigStart.SPACESTRING,
                                         ConfigStart.NULLSTRING)
                                 #获取联赛及各个Url
                                 for leagueInfo in getUrlTag.div.children:
                                     if (type(leagueInfo) == bs4.element.Tag
                                         ):
                                         print leagueInfo.string
                                         print leagueInfo[ConfigStart.HREF]
                                         p_name = leagueInfo.string.encode(
                                             ConfigStart.UTF8).replace(
                                                 ConfigStart.SPACESTRING,
                                                 ConfigStart.NULLSTRING)
                                         p_main_type = ConfigStart.STARTURL + leagueInfo[
                                             ConfigStart.HREF]
                                         p_type = i + 1
                                         p_country = getUrlTag.span.string.encode(
                                             ConfigStart.UTF8).replace(
                                                 ConfigStart.SPACESTRING,
                                                 ConfigStart.NULLSTRING)
                                         sqlString = ConfigStart.SELECTCOUNTFROMLEAGUETABLE
                                         lSelect = [
                                             p_name, p_type, p_country
                                         ]
                                         resultSelect = mysql.getOne(
                                             sqlString, lSelect)
                                         if resultSelect[
                                                 ConfigStart.
                                                 RESULT] == ConfigStart.NULL:
                                             l = [[
                                                 p_name, p_type, p_country,
                                                 p_sport_type, p_main_type
                                             ]]
                                             result = mysql.insertMany(
                                                 sqlAll, l)
                                             print result
                                             pass
                                     pass
                             print ConfigStart.MATCHPARTION[i]
                     pass
                 pass
             pass
         pass
         #各洲的杯赛 lrace_bei
         allraceCup = soup.find_all(class_=ConfigStart.CPUMATCHTAG)
         for cup in allraceCup:
             i = i + 1
             print cup
             print cup.a.string
             print cup.a[ConfigStart.HREF]
             for cupChild in cup.find_all(ConfigStart.A):
                 print cupChild
                 p_name = cupChild.string.encode(ConfigStart.UTF8).replace(
                     ConfigStart.SPACESTRING, ConfigStart.NULLSTRING)
                 p_type = i + ConfigStart.INC
                 p_country = ConfigStart.NULLSTRING
                 p_main_type = ConfigStart.STARTURL + cupChild[
                     ConfigStart.HREF]
                 sqlString = ConfigStart.SELECTCOUNTFROMLEAGUETABLE
                 lSelect = [p_name, p_type, p_country]
                 resultSelect = mysql.getOne(sqlString, lSelect)
                 if resultSelect[ConfigStart.RESULT] == ConfigStart.NULL:
                     l = [[
                         p_name, p_type, p_country, p_sport_type,
                         p_main_type
                     ]]
                     result = mysql.insertMany(sqlAll, l)
                     mysql.end()
                     print result
                     pass
             pass
         mysql.dispose()
     except Exception, e:
         print Exception, ":", e
         pass