def start_requests(conn, beginDate, endDate): cursor = conn.cursor() print (beginDate) print (endDate) url = 'https://www.tdcc.com.tw/smWeb/QryStockAjax.do' for date in pd.date_range(beginDate, endDate, freq='W-FRI')[::-1]: # for date in pd.date_range(beginDate, endDate, freq='W-WED')[::-1]: # for date in pd.date_range(beginDate, endDate, freq='W-THU')[::-1]: # for date in pd.date_range(beginDate, endDate, freq='W-SAT')[::-1]: scaDate = '{}{:02d}{:02d}' .format(date.year, date.month, date.day) date = '{}/{:02d}/{:02d}'.format(date.year, date.month, date.day) sql = "SELECT code " \ "FROM own " \ "where code not in ( " \ " select code " \ " from share_ratio " \ " where 1=1 " \ " and date = '" + scaDate + "'" \ ") " \ "group by code " cursor.execute(sql) code_list = list() for row in cursor: code_list.append(row[0]) try: for code in code_list: payload = { 'scaDates': scaDate, 'scaDate': scaDate, 'SqlMethod': 'StockNo', 'StockNo': code, 'radioStockNo': code, 'StockName': '', 'REQ_OPR': 'SELECT', 'clkStockNo': code, 'clkStockName': '' } headers = {"User-Agent" : "User-Agent:Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;"} html=requests.post(url,data=payload, headers=headers).content.decode('big5') data=Extractor(html,'table.mt:eq(1)').df(1) for index, row in data.iterrows(): if index == 15: continue sql = "INSERT INTO share_ratio (`date`,`code`,`rank`,`number`,`person`,`rate`) \ VALUES (%s,%s,%s,%s,%s,%s)" if isNumer(row['股 數/單位數']): val = (scaDate,code,row['持股/單位數分級'],row['股 數/單位數'],row['人 數'],row['占集保庫存數比例 (%)']) cursor.execute(sql, val) conn.commit() print (code, scaDate, row['持股/單位數分級'],row['股 數/單位數'],row['人 數'],row['占集保庫存數比例 (%)']) time.sleep(3) except Exception as e: print (e)
def parse(self, response): m = response.meta data = Extractor(response.dom, 'table.mt:eq(1)').df(1) del data['持股/單位數分級'] data.loc[15, '序'] = 17 data.columns = ['持股分級', '人數', '股數', '佔集保庫存數比例%'] data.insert(0, 'code', m['code']) data.insert(0, 'date', m['date']) for item in data.to_dict('row'): yield item
def setUp(self): html = """ <table> <tr> <td>1</td> <td>2</td> </tr> <tr> <td>3</td> <td>4</td> </tr> </table> """ self.extractor = Extractor(html)
def setUp(self): html = """ <table> <tr> <td rowspan=2>1</td> <td>2</td> <td rowspan=3>3</td> </tr> <tr> <td colspan=2>4</td> </tr> <tr> <td colspan=2>5</td> </tr> </table> """ self.extractor = Extractor(html)
def test_init_with_id(self): html = """ <table id='wanted'> <tr> <td>1</td> <td>2</td> </tr> <tr> <td>3</td> <td>4</td> </tr> </table> <table id='unwanted'> <tr> <td>unwanted</td> </tr> </table> """ pq_html = pq(html) extractor = Extractor(pq_html, jquery='#wanted').parse() self.assertEqual(extractor.return_list(), [[u'1', u'2'], [u'3', u'4']])
def setUp(self): html = """ <table width="100%" border="5" bordercolor="#FF6600" bgcolor="#FFFFFF"> <tbody> <tr> <th class="tt" colspan="2"> </th> <th class="tt" colspan="5">營業收入</th> <th class="tt" colspan="3">累計營業收入</th> <th rowspan="2" class="tt">備註</th> </tr> <tr> <th class="tt">公司<br>代號</th> <th class="tt">公司名稱</th> <th class="tt">當月營收</th> <th class="tt">上月營收</th> <th class="tt">去年當月營收</th> <th class="tt">上月比較<br>增減(%)</th> <th class="tt">去年同月<br>增減(%)</th> <th class="tt">當月累計營收</th> <th class="tt">去年累計營收</th> <th class="tt">前期比較<br>增減(%)</th> </tr> <tr align="right"> <td align="center">1101</td> <td align="left">台泥</td> <td nowrap=""> 10,757,628</td> <td nowrap=""> 11,539,982</td> <td nowrap=""> 7,858,569</td> <td nowrap=""> -6.77</td> <td nowrap=""> 36.89</td> <td nowrap=""> 57,500,244</td> <td nowrap=""> 45,893,851</td> <td nowrap=""> 25.28</td> <td align="center">-</td> </tr> <tr align="right"> <td align="center">1102</td> <td align="left">亞泥</td> <td nowrap=""> 7,549,925</td> <td nowrap=""> 7,698,165</td> <td nowrap=""> 5,331,442</td> <td nowrap=""> -1.92</td> <td nowrap=""> 41.61</td> <td nowrap=""> 39,010,235</td> <td nowrap=""> 28,812,149</td> <td nowrap=""> 35.39</td> <td align="center">-</td> </tr> <tr align="right"> <td align="center">1103</td> <td align="left">嘉泥</td> <td nowrap=""> 172,927</td> <td nowrap=""> 185,856</td> <td nowrap=""> 143,629</td> <td nowrap=""> -6.95</td> <td nowrap=""> 20.39</td> <td nowrap=""> 1,000,927</td> <td nowrap=""> 1,058,885</td> <td nowrap=""> -5.47</td> <td align="center">-</td> </tr> <tr align="right"> <td align="center">1104</td> <td align="left">環球水泥</td> <td nowrap=""> 337,575</td> <td nowrap=""> 426,170</td> <td nowrap=""> 318,948</td> <td nowrap=""> -20.78</td> <td nowrap=""> 5.84</td> <td nowrap=""> 2,314,855</td> <td nowrap=""> 2,159,764</td> <td nowrap=""> 7.18</td> <td align="center">-</td> </tr> <tr align="right"> <td align="center">1108</td> <td align="left">幸福水泥</td> <td nowrap=""> 276,298</td> <td nowrap=""> 294,581</td> <td nowrap=""> 243,699</td> <td nowrap=""> -6.20</td> <td nowrap=""> 13.37</td> <td nowrap=""> 1,684,245</td> <td nowrap=""> 1,761,992</td> <td nowrap=""> -4.41</td> <td align="center">-</td> </tr> <tr align="right"> <td align="center">1109</td> <td align="left">信大水泥</td> <td nowrap=""> 577,408</td> <td nowrap=""> 625,561</td> <td nowrap=""> 418,868</td> <td nowrap=""> -7.69</td> <td nowrap=""> 37.84</td> <td nowrap=""> 2,809,558</td> <td nowrap=""> 2,317,812</td> <td nowrap=""> 21.21</td> <td align="center">-</td> </tr> <tr align="right"> <td align="center">1110</td> <td align="left">東泥</td> <td nowrap=""> 119,405</td> <td nowrap=""> 142,543</td> <td nowrap=""> 107,913</td> <td nowrap=""> -16.23</td> <td nowrap=""> 10.64</td> <td nowrap=""> 792,195</td> <td nowrap=""> 684,515</td> <td nowrap=""> 15.73</td> <td align="center">-</td> </tr> <tr align="right"> <th class="tt" nowrap="" colspan="2" align="center">合計</th> <td nowrap=""> 19,791,166</td> <td nowrap=""> 20,912,858</td> <td nowrap=""> 14,423,068</td> <td nowrap=""> -5.36</td> <td nowrap=""> 37.21</td> <td> 105,112,259</td> <td> 82,688,968</td> <td nowrap=""> 27.11</td> <td> </td> </tr> </tbody> </table> """ self.extractor = Extractor(html)