예제 #1
0
def start_requests(conn, beginDate, endDate):
    cursor = conn.cursor()
    print (beginDate)
    print (endDate)
    url = 'https://www.tdcc.com.tw/smWeb/QryStockAjax.do'
    for date in pd.date_range(beginDate, endDate, freq='W-FRI')[::-1]:
#    for date in pd.date_range(beginDate, endDate, freq='W-WED')[::-1]:
#    for date in pd.date_range(beginDate, endDate, freq='W-THU')[::-1]:
#    for date in pd.date_range(beginDate, endDate, freq='W-SAT')[::-1]:
        scaDate = '{}{:02d}{:02d}'  .format(date.year, date.month, date.day)
        date    = '{}/{:02d}/{:02d}'.format(date.year, date.month, date.day)
        sql = "SELECT code " \
              "FROM own " \
              "where code not in ( " \
              "    select code " \
              "    from share_ratio " \
              "    where 1=1 " \
              "    and date = '" + scaDate + "'" \
              ") " \
              "group by code "
        cursor.execute(sql)
        code_list = list()
        for row in cursor:
            code_list.append(row[0])
        try:
            for code in code_list:
                payload = {
                    'scaDates': scaDate,
                    'scaDate': scaDate,
                    'SqlMethod': 'StockNo',
                    'StockNo': code,
                    'radioStockNo': code,
                    'StockName': '',
                    'REQ_OPR': 'SELECT',
                    'clkStockNo': code,
                    'clkStockName': ''
                } 
                headers = {"User-Agent" : "User-Agent:Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;"}          
                html=requests.post(url,data=payload, headers=headers).content.decode('big5')
                data=Extractor(html,'table.mt:eq(1)').df(1)
                for index, row in data.iterrows():        
                    if index == 15:
                        continue
                    sql = "INSERT INTO share_ratio (`date`,`code`,`rank`,`number`,`person`,`rate`) \
                           VALUES (%s,%s,%s,%s,%s,%s)"
                    if isNumer(row['股  數/單位數']):
                        val = (scaDate,code,row['持股/單位數分級'],row['股  數/單位數'],row['人  數'],row['占集保庫存數比例 (%)'])
                        cursor.execute(sql, val)
                        conn.commit()
                        print (code, scaDate, row['持股/單位數分級'],row['股  數/單位數'],row['人  數'],row['占集保庫存數比例 (%)'])
                time.sleep(3)                 
        except Exception as e:
            print (e)
예제 #2
0
    def parse(self, response):
        m = response.meta
        data = Extractor(response.dom, 'table.mt:eq(1)').df(1)
        del data['持股/單位數分級']
        data.loc[15, '序'] = 17
        data.columns = ['持股分級', '人數', '股數', '佔集保庫存數比例%']

        data.insert(0, 'code', m['code'])
        data.insert(0, 'date', m['date'])

        for item in data.to_dict('row'):
            yield item
예제 #3
0
 def setUp(self):
     html = """
     <table>
         <tr>
           <td>1</td>
           <td>2</td>
         </tr>
         <tr>
           <td>3</td>
           <td>4</td>
         </tr>
     </table>
     """
     self.extractor = Extractor(html)
예제 #4
0
 def setUp(self):
     html = """
     <table>
         <tr>
             <td rowspan=2>1</td>
             <td>2</td>
             <td rowspan=3>3</td>
         </tr>
         <tr>
             <td colspan=2>4</td>
         </tr>
         <tr>
             <td colspan=2>5</td>
         </tr>
     </table>
     """
     self.extractor = Extractor(html)
예제 #5
0
 def test_init_with_id(self):
     html = """
     <table id='wanted'>
         <tr>
           <td>1</td>
           <td>2</td>
         </tr>
         <tr>
           <td>3</td>
           <td>4</td>
         </tr>
     </table>
     <table id='unwanted'>
         <tr>
           <td>unwanted</td>
         </tr>
     </table>
     """
     pq_html = pq(html)
     extractor = Extractor(pq_html, jquery='#wanted').parse()
     self.assertEqual(extractor.return_list(), [[u'1', u'2'], [u'3', u'4']])
예제 #6
0
 def setUp(self):
     html = """
         <table width="100%" border="5" bordercolor="#FF6600" bgcolor="#FFFFFF">
             <tbody>
             <tr>
                 <th class="tt" colspan="2">&nbsp;</th>
                 <th class="tt" colspan="5">營業收入</th>
                 <th class="tt" colspan="3">累計營業收入</th>
                 <th rowspan="2" class="tt">備註</th>
             </tr>
             <tr>
                 <th class="tt">公司<br>代號</th>
                 <th class="tt">公司名稱</th>
                 <th class="tt">當月營收</th>
                 <th class="tt">上月營收</th>
                 <th class="tt">去年當月營收</th>
                 <th class="tt">上月比較<br>增減(%)</th>
                 <th class="tt">去年同月<br>增減(%)</th>
                 <th class="tt">當月累計營收</th>
                 <th class="tt">去年累計營收</th>
                 <th class="tt">前期比較<br>增減(%)</th>
             </tr>
             <tr align="right">
                 <td align="center">1101</td>
                 <td align="left">台泥</td>
                 <td nowrap=""> 10,757,628</td>
                 <td nowrap=""> 11,539,982</td>
                 <td nowrap=""> 7,858,569</td>
                 <td nowrap=""> -6.77</td>
                 <td nowrap=""> 36.89</td>
                 <td nowrap=""> 57,500,244</td>
                 <td nowrap=""> 45,893,851</td>
                 <td nowrap=""> 25.28</td>
                 <td align="center">-</td>
             </tr>
             <tr align="right">
                 <td align="center">1102</td>
                 <td align="left">亞泥</td>
                 <td nowrap=""> 7,549,925</td>
                 <td nowrap=""> 7,698,165</td>
                 <td nowrap=""> 5,331,442</td>
                 <td nowrap=""> -1.92</td>
                 <td nowrap=""> 41.61</td>
                 <td nowrap=""> 39,010,235</td>
                 <td nowrap=""> 28,812,149</td>
                 <td nowrap=""> 35.39</td>
                 <td align="center">-</td>
             </tr>
             <tr align="right">
                 <td align="center">1103</td>
                 <td align="left">嘉泥</td>
                 <td nowrap=""> 172,927</td>
                 <td nowrap=""> 185,856</td>
                 <td nowrap=""> 143,629</td>
                 <td nowrap=""> -6.95</td>
                 <td nowrap=""> 20.39</td>
                 <td nowrap=""> 1,000,927</td>
                 <td nowrap=""> 1,058,885</td>
                 <td nowrap=""> -5.47</td>
                 <td align="center">-</td>
             </tr>
             <tr align="right">
                 <td align="center">1104</td>
                 <td align="left">環球水泥</td>
                 <td nowrap=""> 337,575</td>
                 <td nowrap=""> 426,170</td>
                 <td nowrap=""> 318,948</td>
                 <td nowrap=""> -20.78</td>
                 <td nowrap=""> 5.84</td>
                 <td nowrap=""> 2,314,855</td>
                 <td nowrap=""> 2,159,764</td>
                 <td nowrap=""> 7.18</td>
                 <td align="center">-</td>
             </tr>
             <tr align="right">
                 <td align="center">1108</td>
                 <td align="left">幸福水泥</td>
                 <td nowrap=""> 276,298</td>
                 <td nowrap=""> 294,581</td>
                 <td nowrap=""> 243,699</td>
                 <td nowrap=""> -6.20</td>
                 <td nowrap=""> 13.37</td>
                 <td nowrap=""> 1,684,245</td>
                 <td nowrap=""> 1,761,992</td>
                 <td nowrap=""> -4.41</td>
                 <td align="center">-</td>
             </tr>
             <tr align="right">
                 <td align="center">1109</td>
                 <td align="left">信大水泥</td>
                 <td nowrap=""> 577,408</td>
                 <td nowrap=""> 625,561</td>
                 <td nowrap=""> 418,868</td>
                 <td nowrap=""> -7.69</td>
                 <td nowrap=""> 37.84</td>
                 <td nowrap=""> 2,809,558</td>
                 <td nowrap=""> 2,317,812</td>
                 <td nowrap=""> 21.21</td>
                 <td align="center">-</td>
             </tr>
             <tr align="right">
                 <td align="center">1110</td>
                 <td align="left">東泥</td>
                 <td nowrap=""> 119,405</td>
                 <td nowrap=""> 142,543</td>
                 <td nowrap=""> 107,913</td>
                 <td nowrap=""> -16.23</td>
                 <td nowrap=""> 10.64</td>
                 <td nowrap=""> 792,195</td>
                 <td nowrap=""> 684,515</td>
                 <td nowrap=""> 15.73</td>
                 <td align="center">-</td>
             </tr>
             <tr align="right">
                 <th class="tt" nowrap="" colspan="2" align="center">合計</th>
                 <td nowrap=""> 19,791,166</td>
                 <td nowrap=""> 20,912,858</td>
                 <td nowrap=""> 14,423,068</td>
                 <td nowrap=""> -5.36</td>
                 <td nowrap=""> 37.21</td>
                 <td> 105,112,259</td>
                 <td> 82,688,968</td>
                 <td nowrap=""> 27.11</td>
                 <td>&nbsp;</td>
             </tr>
             </tbody>
         </table>
     """
     self.extractor = Extractor(html)