Example #1
0
def get_lhblist(date,proxy):
    url_sh="http://stock.jrj.com.cn/action/lhb/getHsTodaylhb.jspa?vname=list&date=%s&dateType=2&order=desc&sort=netvalue_value&psize=2000"%(date)
    html_sh = myspyder(url_sh,proxy=proxy).content.decode('utf-8')#[9:-3]
    url_sz ="http://stock.jrj.com.cn/action/lhb/getHsTodaylhb.jspa?vname=list&date=%s&dateType=1&order=desc&sort=netvalue_value&psize=2000"%(date)
    html_sz = myspyder(url_sz,proxy=proxy).content.decode('utf-8')#[9:-3]
    lhblist = list(set(re.findall('\d{6}',html_sh)+re.findall('\d{6}',html_sz)))
    return lhblist
Example #2
0
def zrzdt(stype, sdate):
    print(sdate, stype)
    strdate = sdate.strftime("%Y%m%d")
    url = "http://home.flashdata2.jrj.com.cn/limitStatistic/%s/%s.js" % (
        stype, strdate)
    engine = conn()
    try:
        html = myspyder(url, proxy=0)
        js = html.content.decode('gbk')
        js = js.replace('var zr_%s=' % (stype), '').replace(';', '')
        js = eval(js)['Data']
        df = pd.DataFrame(js,
                          columns=[
                              'code', 'name', 'time', 'close', 'percentage',
                              'amo', 'amplitude', 'turnoverate', '5day', 'pe',
                              'concept_e', 'concept'
                          ])
        del df['concept_e']
        del df['5day']
        df['time'] = str(sdate) + ' ' + df['time']

        try:
            df.to_sql('zrzdt',
                      engine,
                      schema='stockdata',
                      if_exists='append',
                      index=False)
        except Exception as e:
            print("local:%s[%s]" % (sdate, e))

        return df
    except Exception as e:
        print(sdate, e)
        return pd.DataFrame()
Example #3
0
def get_tick_detail(code):
    # print(code[:-1])
    url = "http://mdfm.eastmoney.com/EM_UBG_MinuteApi/Js/Get?dtype=all&id=%s&page=1&rows=10000&gtvolume=&sort=desc" % (
        code)
    html = "error!"
    times_retry = 10
    while (html == "error!" or html.status_code != 200) and times_retry != 0:
        html = myspyder(url, proxy=0)
        times_retry -= 1
    if html.status_code == 200:
        try:
            html = json.loads(html.content.decode('utf-8')[1:-1])
            table = []
            for i in html['value']['data']:
                table.append(re.split(",", i))
            df = pd.DataFrame(
                table, columns=['time', 'price', 'vol', 1, 2, 3, 4, '成交笔数'])
            with open(
                    "./data/tick/" + code[:-1] + "_" +
                    str(datetime.date.today()) + ".jz", 'wb') as f:
                jz = json.dumps(html['value']['data']).encode('utf-8')
                jz = gzip.compress(jz, compresslevel=9)
                f.write(jz)
        except:
            pass
Example #4
0
def split_pg(proxy):  # 配股
    pg_url = 'http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx?type=NS&sty=NSA&st=6&sr=true&p=1&ps=5000'
    get_url = "error!"
    times_retry = 10
    pgtable = []
    List_Fin_pg = pd.DataFrame()
    while get_url == "error!" and times_retry != 0:
        get_url = myspyder(pg_url, proxy)
        times_retry = times_retry - 1
    if get_url != "error!":
        try:
            return_list = re.findall("\"(.*?)\"", get_url.text)

            print("FTSPLIT:配股信息查找完毕,正在写入数据库!")
            for i in range(len(return_list)):
                appd = re.split(",", return_list[i])
                pgtable.append(appd)

            pglist = pd.DataFrame(pgtable,
                                  columns=[
                                      'A', 'B', '股票代码', '股票简称', '配售代码', '配售名称',
                                      '配股比例(10配)', '配股价', '配股前总股本(万股)', '配股总数',
                                      '配股后总股本(万股)', '股权登记日', '缴款起始日期',
                                      '缴款截止日期', '配股上市日', '除权日', '募资总额', '募资净额',
                                      '承销方式', '公告日', '最新价', 'C'
                                  ])
            List_Fin_pg = pglist[['股票代码', '配股比例(10配)', '配股价', '除权日']]
        except:
            pass
    return List_Fin_pg
Example #5
0
def get_news(url, proxy):
    source = 'stcn.com'
    html = "error!"
    times_retry = 3
    while html == "error!" and times_retry != 0:
        html = myspyder(url, proxy=proxy)
        times_retry = times_retry - 1
    html = html.content
    # print(news.decode('utf-8'))
    newsSoup = bs(html, 'html.parser')
    newslist = newsSoup.select(".mainlist")
    newslist = bs(str(newslist[0]), 'html.parser')
    newslist = newslist.find_all("p")
    result = []
    for i in range(len(newslist)):
        news = str(newslist[i])
        stype = bs(news, 'html.parser').a.text
        title = bs(news, 'html.parser').a.next_element.next_element.text
        link = bs(news,
                  'html.parser').a.next_element.next_element.attrs['href']
        datetime = str(bs(news, 'html.parser').span.text)[1:20]
        result.append((source, stype, title, link, datetime))

    result = pd.DataFrame(
        result, columns=['source', 'type', 'title', 'link', 'datetime'])
    result['datetime'] = result['datetime'].astype('datetime64[ns]')
    return result
Example #6
0
def update_embasedata(stocklist, proxy=0):
    Errorlist = []

    engine = conn()
    for i in range(len(stocklist)):
        sqli = "UPDATE  `basedata` SET  `证券简称` = %s,  `公司名称` = %s,  `英文名称` = %s, " \
               " `曾用名` = %s,  `公司简介` = %s,  `成立日期` = %s,  `工商登记号` = %s,  `注册资本` = %s,  `法人代表` = %s, " \
               " `所属证监会行业` = %s,  `员工总数` = %s,  `总经理` = %s,  `董事会秘书` = %s,  `省份` = %s,  `城市` = %s,  " \
               "`注册地址` = %s,  `办公地址` = %s,  `邮编` = %s,  `电话` = %s,  `传真` = %s,  `电子邮件` = %s,  " \
               "`公司网站` = %s,  `审计机构` = %s,  `法律顾问` = %s,  `经营分析` = %s,  `简史` = %s,  `核心题材` = %s " \
               "WHERE `basedata`.`证券代码` = %s"
        symbol = stocklist['证券代码'][i] + "01" if stocklist['证券代码'][i][
            0] == "6" else stocklist['证券代码'][i] + "02"
        sName = stocklist['证券简称'][i]
        try:
            Intr = myspyder(
                'http://soft-f9.eastmoney.com/soft/gp3.php?code=%s' % (symbol),
                proxy=proxy).content
            Conc = myspyder(
                'http://soft-f9.eastmoney.com/soft/gp30.php?code=%s' %
                (symbol),
                proxy=proxy).content
            IntrSoup = bs(Intr, 'html5lib')
            ConcSoup = bs(Conc, 'html5lib')
            stockdata = []
            stockdata.append(sName)
            for tr in IntrSoup.find_all(width=880):
                stockdata.append(txt_pre(tr.text.strip()))
            point = ConcSoup.p
            del point['style']
            stockdata.append(str(point))
            stockdata.append(symbol[:-2])
            print("BASEDATA:", symbol[:-2], sName, i + 1, "/", len(stocklist),
                  round((i + 1) / (len(stocklist)) * 100, 2))

            engine.execute(sqli, tuple(stockdata))
            sleep((random() / 10 + 1))
        except Exception as e:
            print(symbol[:-2], sName, e)
            Errorlist.append((symbol[:-2], sName, e))
    Errorlist = pd.DataFrame(Errorlist, columns=['证券代码', '证券简称', 'error'])
    Errorlist.to_csv(path() + '/error/update_basedata.csv')
    return Errorlist
Example #7
0
 def get_data(self, proxy=0):
     for url in self.urllist:
         try:
             html = "error!"
             times_retry = 3
             while html == "error!" and times_retry != 0:
                 html = myspyder(url, proxy=proxy)
                 times_retry -= 1
         except:
             pass
         html = html.content.decode('gbk')
         html = html.replace("\n", "")
         html = re.split("\;", html)
         self.htmllist = self.htmllist + html
     return self.htmllist
Example #8
0
def get_lhbdetail(code,date,proxy):
    url = "http://stock.jrj.com.cn/action/lhb/getStockLhbDetatil.jspa?vname=detailInfo&stockcode=%s&date=%s"%(code,date)
    html = myspyder(url,proxy=proxy).content.decode('utf-8')
    html = re.split("\;",html)
    json_detail = html[0][15:]
    j=1
    while json_detail[-1] != "}":
        json_detail = json_detail+html[j]
        j=j+1
    # else:
    #     json_detail =  html[0][15:]
    data = json.loads(json_detail)['data']
    df_detail =pd.DataFrame()
    for i in range(len(data)):
        tmp_detail = pd.DataFrame(data[i][1],columns=['date', 'code', '买入金额', '卖出金额', '净买入金额', '净买入金额占总成交额', 'pl', '上榜原因', '买卖方向', '营业部代码', '营业部名称', '买入金额占总成交额', '卖出金额占总成交额', '上榜总成交额'])
        df_detail = pd.concat((tmp_detail,df_detail))
    return df_detail
Example #9
0
def get_is_data(date,code):
      url="http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx?type=ZLSJ&sty=CCJGMX&p=1&ps=5000&fd=%s&code=%s&js=[(x)]"%(date,code)
      html=myspyder(url,proxy=0)
      try:
         doc = html.content.decode('utf-8')
         if doc != '[{stats:false}]':
             doc = eval(doc)
             table = []
             for data in doc:
                table.append(re.split(',',data))
             df = pd.DataFrame(table,columns=['code','stockname','fundcode','fundname','type','vol','amo','percent','percent_cir','date'])
             df = df[['code','date','fundcode','fundname','type','vol','amo','percent','percent_cir']]
             return True,df
         else:
             return True,pd.DataFrame()
      except Exception as e:
         print("%s,%s:%s"%(code,date,e))
         return False,pd.DataFrame()
Example #10
0
def zdtld(stype, sdate, ser='both'):
    strdate = sdate.strftime("%Y%m%d")
    url = "http://home.flashdata2.jrj.com.cn/limitStatistic/%sForce/%s.js" % (
        stype, strdate)
    engine = conn()
    try:
        html = myspyder(url, proxy=0)
        js = html.content.decode('gbk')
        js = re.findall(u'"Data":(.*)\};', js, re.DOTALL)[0]
        js = js.replace('Infinity', '0')
        js = json.loads(js)
        df = pd.DataFrame(js,
                          columns=[
                              'code', 'name', 'close', 'percentage', 'fcb',
                              'flb', 'fdmoney', 'lasttime', 'firsttime',
                              'opentimes', 'amplitude', 'force'
                          ])
        df['firsttime'] = str(sdate) + ' ' + df['firsttime']
        df['lasttime'] = str(sdate) + ' ' + df['lasttime']
        if ser == 'local' or ser == 'both':
            try:
                df.to_sql('zdt',
                          engine,
                          schema='stockdata',
                          if_exists='append',
                          index=False)
            except Exception as e:
                print("local:%s[%s]" % (sdate, e))
        if ser == 'local' or ser == 'both':
            try:
                df.to_sql('zdt',
                          engine,
                          schema='stockdata',
                          if_exists='append',
                          index=False)
            except Exception as e:
                print("server:%s[%s]" % (sdate, e))
        return df
    except Exception as e:
        print(sdate, e)
        return pd.DataFrame()
Example #11
0
def split_szfh(iLong, proxy=0):
    today = datetime.date.today()  #- datetime.timedelta(days=lastday)
    iyear = int(str(today)[0:4])
    imonth = int(str(today)[5:7])
    # List_stock = get_stocklist()
    # iLong = int((round(len(List_stock) / 1000, 0) + 1) * 1000)
    Q4 = str(iyear - 1) + '001002'
    Q3 = str(iyear - 1) + '001005' if imonth <= 9 else str(iyear) + '001005'
    Q2 = str(iyear - 1) + '001001' if imonth <= 6 else str(iyear) + '001001'
    Q1 = str(iyear - 1) + '001003' if imonth <= 3 else str(iyear) + '001003'
    List_Quarter = [Q1, Q2, Q3, Q4]
    result = []
    for param in List_Quarter:
        try:
            url = "http://emdatah5.eastmoney.com/FHSZ/V/GetAssignEffectList?SECURITYCODE=&REPORTDATE=%s&TYPE=0&ST=4&SR=2&PAGENUM=1&PAGESIZE=%s&selectedType=2" % (
                param, iLong)
            html = myspyder(url, proxy=proxy)
            js = html.json()
            # dict = json.loads(js)
            list = js['Result']['List']
            for i in range(len(list)):
                content = list[i]['CONTENT']
                code = list[i]['SECURITYCODESimple']
                date = list[i]['EXDIVIDENDDATE']
                d1 = re.findall(u"送([0-9.]*)", content)
                d2 = re.findall(u"转([0-9.]*)", content)
                d3 = re.findall(u"派([0-9.]*)", content)

                d1 = float(d1[0]) if len(d1) == 1 else 0.0
                d2 = float(d2[0]) if len(d2) == 1 else 0.0
                d3 = float(d3[0]) if len(d3) == 1 else 0.0
                result.append([code, date, d1 + d2, d3])
        except:
            pass
    result = pd.DataFrame(result,
                          columns=['code', 'date', '红股', '红利']).sort_values(
                              'date', ascending=False).reset_index(drop=True)
    result['date'] = result['date'].astype('datetime64[ns]')

    return result
Example #12
0
def update_focussql(N=5):
    engine=conn()
    urllist = []
    for i in range(1, N):
        if i == 1:
            urllist.append("http://money.163.com/special/002557S6/newsdata_gp_index.js")
        else:
            page = "0" + str(i) if len(str(i)) == 1 else str(i)
            urllist.append("http://money.163.com/special/002557S6/newsdata_gp_index_%s.js" % (page))
    noticelist = []
    for url in urllist:
        html = myspyder(url, proxy=0)
        doc = html.content.decode('gbk')
        js = doc.replace('data_callback(', '').replace(')', '')
        js = eval(js)
        for i in range(len(js)):
            # titlelist.append([js[i]['title'],js[i]['time'],js[i]['docurl']])
            if '公告汇总' in js[i]['title']:
                noticelist.append([js[i]['title'], js[i]['time'], js[i]['docurl']])

    sql = "select `证券代码`,`证券简称` from `basedata`"
    stocklist = pd.read_sql(sql, engine)
    final = []
    for i, [title, stime, docurl] in enumerate(noticelist):
        print(i, title, stime, docurl)
        html = myspyder(docurl, proxy=0)
        doc = html.content
        soup = bs(doc, 'html5lib')
        result = soup.select('p')
        updateresult = []

        for i in result:
            if "<strong>" in str(i):
                if ':' in i.text:
                    sname, stext = re.split(':', i.text)
                    for code, name in stocklist.values:
                        if sname == name:
                            updateresult.append([code, sname, datetime.datetime.strptime(stime[:10], '%m/%d/%Y'),
                                                 stext])
                else:
                    for code, name in stocklist.values:
                        if name in i.text:
                            updateresult.append([code, name, datetime.datetime.strptime(stime[:10], '%m/%d/%Y'),
                                                 i.text])

        df = pd.DataFrame(updateresult, columns=['code', 'name', 'time', 'text'])
        df['time'] = df['time'].astype('datetime64[ns]')

        for code in df['code'].values:
            duplicate = df[df['code'] == code].reset_index(drop=True)

            if len(df[df['code'] == code]) != 1:
                text = ''
                for i in range(len(duplicate) - 1):
                    if duplicate['text'][i][:8] == duplicate['text'][i + 1][:8]:
                        text = duplicate['text'][i]
                    else:
                        text = duplicate['text'][i] + duplicate['text'][i + 1]
                final.append([code, duplicate['name'][0], duplicate['time'][0], text])
            else:
                final.append([code, duplicate['name'][0], duplicate['time'][0], duplicate['text'][0]])
    final = pd.DataFrame(final, columns=['code', 'name', 'time', 'text'])

    sql_lastdate="select DISTINCT `date` from `indexdb` ORDER BY `date` DESC limit 1"
    sdate = pd.read_sql(sql_lastdate, engine)['date'][0]
    checkdatelist=set(final['time'].values)
    for checkdate in checkdatelist:
        sql_check="select DISTINCT `focus` from `usefuldata` WHERE `date` = '%s'"%(str(checkdate))
        scheck = pd.read_sql(sql_check,engine)['focus']

        if len(scheck)<=1:
            df = final[final['time']==checkdate].reset_index(drop=True).values
            for scode,sname,stime,stext in df:
                stime = datetime.date(int(str(stime)[0:4]),int(str(stime)[5:7]),int(str(stime)[8:10]))
                if stime>sdate:
                    stime=sdate
                else:
                    stime=stime
                params = (scode, str(stime), stext, stext)
                # print(params)
                sql = "insert into `usefuldata` (`code`,`date`,`focus`) VALUE (%s,%s,%s) ON DUPLICATE KEY UPDATE `focus` = %s;"
                try:

                    engine.execute(sql,params)
                except Exception as e:
                    print("local:",e)
                    print(scode,sname,str(stime),stext)
Example #13
0
def news_content():

    # ===================
    engine = conn()
    today = datetime.date.today() - datetime.timedelta(days=5)
    # ===================set requests================== #
    # rqs = rq.session()
    # rqs.keep_alive = False
    # ===================get news content================ #

    sql_news_null = text(
        "SELECT * FROM `news` WHERE `content` IS NULL OR (`title` LIKE :ud and `datetime`>=:dt)"
    )
    df_listurl = pd.read_sql(sql_news_null,
                             engine,
                             params={
                                 "ud": "%更新中%",
                                 "dt": str(today)
                             })

    list_url = df_listurl['link'].values
    list_title = df_listurl['title'].values

    list_content = df_listurl['content'].values
    errorlist = []
    # list_url=['http://kuaixun.stcn.com/2017/1110/13761584.shtml']
    #ip_list =pd.read_csv('ip.csv')['ip'].values

    for i in range(len(list_url)):
        time.sleep(random.random() / 10 + 3)
        newsid = re.split("\.", re.split("\/", list_url[i])[-1])[0]
        newurl = "http://app.stcn.com/?app=article&controller=article&action=fulltext&contentid=%s" % (
            newsid)
        try:
            html = myspyder(newurl, proxy=0).content.decode('utf-8')[1:-2]
            newscontent = json.loads(html)['content']
            # print(newscontent)
            # newsSoup = bs(html, 'html.parser')
            # newsSouptitle = newsSoup.select(".intal_tit")[0].h2.text
            # newsSoup = newsSoup.select(".txt_con")[0]
            # [s.extract() for s in newsSoup('a')]
            # [s.extract() for s in newsSoup('script')]
            # [s.extract() for s in newsSoup('div')]
            # newscontent = str(newsSoup)
            newscontent = "".join(
                re.split("\|STCNTTTP\|.+\|STCNTTTP\|", newscontent))
            if newscontent == list_content[i]:
                pass
            else:
                print("NEW:", list_title[i])
                # print(newscontent)
                sql_update_newscontent = "update `news` set `content`=%s WHERE `link`=%s"
                param = (newscontent, list_url[i])

                engine.execute(sql_update_newscontent, param)

        except Exception as e:
            # print(url,e)
            errorlist.append((list_url[i], e))
    df_errorlist = pd.DataFrame(errorlist, columns=['link', 'error'])
    df_errorlist.to_csv(path() + '/error/update_newscontent.csv')
Example #14
0
def get_shareholder_data(stocklist=get_stocklist_prefix('sh', 'sz', 1)):
    errorlist = []
    engine = conn()
    for code in stocklist:

        url = "http://emweb.securities.eastmoney.com/PC_HSF10/ShareholderResearch/ShareholderResearchAjax?code=%s" % (
            code)
        html = myspyder(url, proxy=0)

        try:
            doc = json.loads(html.content)
            ##  get shareholder data
            if len(doc['sdgd']) > 0:
                for i in range(len(doc['sdgd'])):
                    table = pd.DataFrame(doc['sdgd'][i]['sdgd'])
                    del table['bdbl']
                    table = table.rename(
                        columns={
                            'cgs': 'quantity',
                            'gdmc': 'name',
                            'gflx': 'type',
                            'mc': 'rank',
                            'rq': 'date',
                            'zj': 'change',
                            'zltgbcgbl': 'percentage'
                        })
                    table['code'] = code[2:]
                    table = table[[
                        'code', 'date', 'rank', 'name', 'quantity',
                        'percentage', 'change', 'type'
                    ]]
                    for i in range(len(table)):
                        if table['change'][i] == '不变':
                            table['change'][i] = 0
                        if table['change'][i] == '新进':
                            table['change'][i] = table['quantity'][i]

                        params = [str(param) for param in table.values[i]]
                        sql_query = "INSERT IGNORE INTO `shareholder`(`code`, `date`, `rank`, `name`, `quantity`, `percentage`, `change`, `type`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s)"
                        engine.execute(sql_query, params)

            ##  get cirholder data
            if len(doc['sdltgd']) > 0:
                for j in range(len(doc['sdltgd'])):
                    table2 = pd.DataFrame(doc['sdltgd'][j]['sdltgd'])
                    del table2['bdbl']
                    table2 = table2.rename(
                        columns={
                            'cgs': 'quantity',
                            'gdmc': 'name',
                            'gdxz': 'type',
                            'gflx': 'abh',
                            'mc': 'rank',
                            'rq': 'date',
                            'zj': 'change',
                            'zltgbcgbl': 'percentage'
                        })
                    table2['code'] = code[2:]
                    table2 = table2[[
                        'code', 'date', 'rank', 'name', 'type', 'quantity',
                        'percentage', 'change', 'abh'
                    ]]
                    for i in range(len(table2)):
                        if table2['change'][i] == '不变':
                            table2['change'][i] = 0
                        if table2['change'][i] == '新进':
                            table2['change'][i] = table2['quantity'][i]

                        params = [str(param) for param in table2.values[i]]
                        sql_query = "INSERT ignore INTO `cirholder`(`code`, `date`, `rank`, `name`, `type`, `quantity`, `percentage`, `change`, `abh`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)"
                        engine.execute(sql_query, params)

            ##股东人数
            if len(doc['gdrs']) > 0:
                table3 = pd.DataFrame(doc['gdrs'])
                table3 = table3.rename(
                    columns={
                        'cmjzd': 'scr',
                        'gdrs': 'shareholders',
                        'gdrs_jsqbh': 'shschange',
                        'qsdgdcghj': 'top10',
                        'qsdltgdcghj': 'cirtop10',
                        'gj': 'close',
                        'rjcgje': 'avgamount',
                        'rjltg': 'avgcirquantity',
                        'rjltg_jsqbh': 'avgcirchange',
                        'rq': 'date'
                    })
                table3['code'] = code[2:]
                table3 = table3[[
                    'code', 'date', 'close', 'scr', 'top10', 'cirtop10',
                    'shareholders', 'shschange', 'avgamount', 'avgcirquantity',
                    'avgcirchange'
                ]]
                for i in range(len(table3)):
                    if '万' in table3['shareholders'][i]:
                        table3['shareholders'][i] = float(
                            table3['shareholders'][i].replace('万', '')) * 10000
                    if '万' in table3['avgamount'][i]:
                        table3['avgamount'][i] = float(
                            table3['avgamount'][i].replace('万', '')) * 10000
                    if '万' in table3['avgcirquantity'][i]:
                        table3['avgcirquantity'][i] = float(
                            table3['avgcirquantity'][i].replace('万',
                                                                '')) * 10000
                    params = [
                        str(param) if param != '--' else None
                        for param in table3.values[i]
                    ]
                    sql_query = "INSERT IGNORE INTO `shareholdernumber`(`code`, `date`, `close`, `scr`, `top10`, `cirtop10`, `shareholders`, `shschange`, `avgamount`, `avgcirquantity`, `avgcirchange`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
                    engine.execute(sql_query, params)

        except Exception as e:
            errorlist.append(code)
            print("%s:%s" % (code, e))
        #
        # if ser == "both" or ser == "local":
        #     conn.close()
        # if ser == "both" or ser == "server":
        #     conns.close()

    return errorlist
Example #15
0
def update_stocklist(proxy=0):
    """

    :param proxy: 0 close / 1 open
    :return: errorlist
    """
    url = "http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx?type=NS&sty=NSSTV5&st=12&sr=true&p=1&ps=100"
    html = "error!"
    times_retry = 3
    while html == "error!" and times_retry != 0:
        html = myspyder(url=url, proxy=proxy)
        times_retry -= 1

    try:
        html_doc = str(html.content, 'utf-8')
        return_list = re.findall("\"(.*?)\"", html_doc)
        xgtable = [re.split("\,", elem) for elem in return_list]
        # xg = [[xglist[3],xglist[4],xglist[10],xglist[13]] for xglist in xgtable ]
        xg = pd.DataFrame(xgtable)
        xg = xg[[3, 4, 10, 13]].rename(columns={
            3: "name",
            4: "code",
            10: "ipoprice",
            13: "ipodate"
        })
        xg['ipodate'] = xg['ipodate'].astype('datetime64[ns]')
        xg['name'] = xg['name'].astype('str')
        xg['code'] = xg['code'].astype('str')
        xg = xg.dropna().reset_index(drop=True)
        print("STOCKLIST:数据获取成功,正在写入数据库...")

        engine = conn()

        stocklist = get_stocklist()
        Errorlist = []
        sql_ipocheck = "select `证券代码` from `basedata` WHERE `首发价格` is NULL"
        ipocheck = pd.read_sql(sql_ipocheck, engine)['证券代码'].values

        for i in range(len(xg)):
            code, name = xg['code'][i], xg['name'][i]
            ipoprice, ipodate = xg['ipoprice'][i], xg['ipodate'][i]
            pinyin = getpinyin(name) if '银行' not in name else getpinyin(
                name).replace('YX', 'YH')
            market = '上海证券交易所' if code[0] == '6' else '深圳证券交易所'
            if code not in stocklist or code in ipocheck:
                try:
                    sql_xg = "INSERT ignore INTO `stocklist`(`证券代码`, `证券简称`, `上市市场`,`拼音缩写`) VALUES (%s,%s,%s,%s)"
                    sql_ipo = "update `basedata` set `首发日期`=%s ,`首发价格`=%s WHERE `证券代码`=%s"

                    engine.execute(sql_xg, (code, name, market, pinyin))
                    # conn.commit()
                    engine.execute(sql_ipo, (str(ipodate), ipoprice, code))
                    # conn.commit()

                    print("STOCKLIST:", code, ":更新成功!")
                except Exception as e:
                    print("STOCKLIST:", code, ":更新失败!", e)
                    Errorlist.append(code)

        output = str(datetime.date.today()) + (" 更新完成!" if len(Errorlist) == 0
                                               else " 更新出错!请检查!")
        print("STOCKLIST:", output)
        return Errorlist
    except:
        return ['数据获取失败...']
Example #16
0
def get_forecast(proxy=0,lastday=0,update=1):
    """
    :param ser: 选择更新的数据库,local/server/both
    :param proxy: 设置是否使用ip代理,0为不开启,1为开启
    :param lastday: 上次更新距离今天多长时间,默认当天为0
    :param update: 设置更新还是重建,更新设为1,其他为重建
    :return:
    """
    errorList=[]
    today = datetime.date.today()-datetime.timedelta(days=lastday)
    print("FORECAST:",today)
    iyear =int(str(today)[0:4])
    imonth= int(str(today)[5:7])
    List_stock = get_stocklist()
    iLong = int((round(len(List_stock) / 1000, 0) + 1) * 1000)
    Q4 = datetime.datetime(iyear-1,12,31).strftime('%Y-%m-%d') if imonth <=2 else \
        datetime.datetime(iyear,12,31).strftime('%Y-%m-%d')
    Q3 = datetime.datetime(iyear-1,9,30).strftime('%Y-%m-%d') if imonth < 8 else \
        datetime.datetime(iyear,9,30).strftime('%Y-%m-%d')
    Q2 = datetime.datetime(iyear-1,6,30).strftime('%Y-%m-%d') if imonth < 5 else \
        datetime.datetime(iyear, 6, 30).strftime('%Y-%m-%d')
    Q1 = datetime.datetime(iyear, 3, 31).strftime('%Y-%m-%d')
    List_Quarter =[Q1,Q2,Q3,Q4]
    df_forecast = pd.DataFrame()
    for Quarter in List_Quarter:
        url = 'http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx?type=SR&sty=YJYG&fd=%s&st=4&sr=true&p=1&ps=%s' \
              % (Quarter, iLong)
        content = "error!"
        times_retry = 3
        while content =="error!" and times_retry!=0:
            content = myspyder(url,proxy=proxy).content.decode('utf-8')
            times_retry -= 1
        print("FORECAST:",Quarter,"数据抓取完毕,正在对数据进行处理...")
        try:
            return_list = re.findall("\"(.*?)\"", content)
            fctable = []
            for j in range(len(return_list)):
                appd = re.split("\,", return_list[j])
                fctable.append(appd)
            fctable = pd.DataFrame(fctable,
                                   columns=['code', '股票简称', '业绩变动', '变动幅度', '预告类型', '同期净利润'
                                       , '预喜预悲', 'date','财报日期'])
            fctable = fctable[['code', '业绩变动', '变动幅度', '预告类型', '同期净利润', 'date', '财报日期']]
            df_forecast = pd.concat((df_forecast, fctable)).sort_values('date', ascending=False).drop_duplicates()
        except Exception as e:
            errorList.append(e)
    df_forecast['date'] = df_forecast['date'].astype('datetime64', error='ignore')
    df_forecast['财报日期'] = df_forecast['财报日期'].astype('datetime64', error='ignore')
    print("FORECAST: 数据处理完毕,正在更新数据库...")

    if update == 1:
        df_forecast = df_forecast[df_forecast['date']>=today]
    else:
        df_forecast = df_forecast


    engine = conn()


    for j in range(len(df_forecast)):
        try:
            Scode = df_forecast.get_value(j,'code')
            Sdate = str(df_forecast.get_value(j,'date'))
            Schange = df_forecast.get_value(j, '业绩变动')
            Spercent = df_forecast.get_value(j, '变动幅度')
            Stype = df_forecast.get_value(j, '预告类型')
            Sprofit = df_forecast.get_value(j, '同期净利润')
            Srepdate = str(df_forecast.get_value(j, '财报日期'))
            ul = re.split("~", Spercent)
            if len(ul) == 2:
                upper = ul[1].replace('%', '')
                lower = ul[0].replace('%', '')
            elif len(ul) == 1 and ul[0] != '':
                upper = ul[0].replace('%', '')
                lower = None
            else:
                upper = None
                lower = None

            sql_update = "INSERT IGNORE INTO `forecast`(`code`, `date`, `业绩变动`, `变动幅度`, `预告类型`, `同" \
                         "期净利润`, `财报日期`,`上限`,`下限`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)"
            param = (Scode, Sdate, Schange, Spercent, Stype, Sprofit, Srepdate, upper, lower)

            engine.execute(sql_update,param)

        except Exception as e:
            errorList.append(e)
    dfErrorList = pd.DataFrame({'error': errorList})
    dfErrorList.to_csv(path() + '/error/update_forecast.csv')
    print("FORECAST: 更新数据库完毕!")
    return dfErrorList
Example #17
0
def unusual():
    url = "http://nuyd.eastmoney.com/EM_UBG_PositionChangesInterface/api/js?style=top&js=[(x)]&dtformat=HH:mm:ss&ac=normal"
    html = myspyder(url, 0).content.decode('utf-8')
    return html
Example #18
0
def notices(page, ser='both', proxy=0):
    today = datetime.date.today()  #- datetime.timedelta(days=2)
    sleep(random() / 10 * 2 + 0.5)
    print("NOTICE:page", page)
    engine = conn()
    try:

        url = "http://data.eastmoney.com/notices/getdata.ashx?FirstNodeType=0&CodeType=1&PageIndex=%s&PageSize=1000" % (
            page)
        html = myspyder(url, proxy=proxy).content
        js = json.loads(html.decode('gbk')[7:-1])['data']

        table = pd.DataFrame()
        for i in range(len(js)):
            output = js[i]
            output1 = js[i]['CDSY_SECUCODES'][0]
            output2 = js[i]['ANN_RELCOLUMNS'][0]
            output3 = js[i]['ANN_RELCODES'][0]
            del output['CDSY_SECUCODES'], output['ANN_RELCOLUMNS'], output[
                'ANN_RELCODES']
            output.update(output1)
            output.update(output2)
            output.update(output3)
            output['NOTICEDATE'] = output['NOTICEDATE'][:-6]
            output['EUTIME'] = output['EUTIME'][:-6]
            output['Url'] = "http://pdf.dfcfw.com/pdf/H2_" + output[
                'INFOCODE'] + "_1.pdf" if output[
                    'ATTACHTYPE'] == '0' else output['Url']
            tmp_table = pd.DataFrame.from_dict(output, orient='index')
            table = pd.concat((tmp_table.T, table), ignore_index=True)
        table = table[[
            'NOTICEDATE', 'NOTICETITLE', 'INFOCODE', 'EUTIME', 'Url',
            'SECURITYCODE', 'SECURITYFULLNAME', 'SECURITYTYPE', 'TRADEMARKET',
            'COLUMNNAME'
        ]]
        table['NOTICEDATE'] = table['NOTICEDATE'].astype('datetime64[ns]')
        table['EUTIME'] = table['EUTIME'].astype('datetime64[ns]')
        table = table.rename(
            columns={
                'NOTICEDATE': 'date',
                'NOTICETITLE': 'title',
                'INFOCODE': 'infocode',
                'EUTIME': 'eutime',
                'Url': 'url',
                'SECURITYCODE': 'code',
                'SECURITYFULLNAME': 'name',
                'SECURITYTYPE': 'security_type',
                'TRADEMARKET': 'market',
                'COLUMNNAME': 'type'
            })
        table = table[table['eutime'] >= today]
        # table.to_csv(path()+'/data/notice/'+str(today)+'.csv',encoding='utf-8')

        sql_check = "select `infocode` from `notice` where `eutime`>'%s'" % (
            today - datetime.timedelta(days=1))

        list_infocode = pd.read_sql(sql_check, engine)['infocode'].values

        for line in table.values:
            param = [str(ele) for ele in line]
            if param[2] not in list_infocode:
                sql_updae =" insert ignore into `notice` (`date`, `title`, `infocode`, `eutime`, `url`, `code`, `name`, " \
                           "`security_type`, `market`, `type`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"

                engine.execute(sql_updae, tuple(param))
                # conn.commit()

            else:
                pass
        # return None
    except Exception as e:
        print(e)
        return page
Example #19
0
def update_mb_single(code):
    # ser='both'
    proxy=0
    # if ser == 'local' or ser == 'both':
    # if ser == 'server' or ser == 'both':
    #     conns = serverconn()
    # print("MAINBUSINESS:",code)
    url = "http://emweb.securities.eastmoney.com/PC_HSF10/BusinessAnalysis/BusinessAnalysisAjax?code="

    try:
        html = myspyder(url+code,proxy=proxy)
        js = json.loads(html.content.decode('utf-8'))
        with open("./data/mainbusiness/"+code[2:]+".json",'w',encoding='utf-8') as f:
            f.write(str(html.content,encoding='utf-8'))
        # print(js['zyfw'][0]['ms'])
        # print(js['jyps'][0]['ms'])
        table =[]
        for line in js['zygcfx']:
            listcp = [list(line['cp'][i].values()) + ['产品'] for i in range(len(line['cp']))]
            listhy = [list(line['hy'][i].values()) + ['行业'] for i in range(len(line['hy']))]
            listqy = [list(line['qy'][i].values()) + ['地区'] for i in range(len(line['qy']))]
            list_all = listcp + listhy + listqy
            table = table + list_all
        df = pd.DataFrame(table,columns=['报表日期','主营构成','主营收入(元)','收入比例','主营成本(元)','成本比例',
                                          '主营利润(元)','利润比例','毛利率(%)','9','主营收入','分类'])
        df['code']=code[2:]
        df= df[['code','报表日期','主营构成','主营收入','收入比例','主营成本(元)','成本比例',
                                          '主营利润(元)','利润比例','毛利率(%)','分类']].values
        for i in range(len(df)):
            df[i][4] = df[i][4].replace('%', '') if isinstance(df[i][4],str) else df[i][4]
            df[i][6] = df[i][6].replace('%', '') if isinstance(df[i][6],str) else df[i][6]
            df[i][8] = df[i][8].replace('%', '') if isinstance(df[i][8],str) else df[i][8]
            df[i][9] = df[i][9].replace('%', '') if isinstance(df[i][9],str) else df[i][9]
            if isinstance(df[i][5],str):
                if '万亿' in df[i][5]:
                    df[i][5] = float(df[i][5].replace('万亿', '')) * 1000000000000
                elif '亿' in df[i][5]:
                    df[i][5] = float(df[i][5].replace('亿', '')) * 100000000
                elif '万' in df[i][5]:
                    df[i][5] = float(df[i][5].replace('万', '')) * 10000
                if '万亿' in df[i][7]:
                    df[i][7] = float(df[i][7].replace('万亿', '')) * 1000000000000
                elif '亿' in df[i][7]:
                    df[i][7] = float(df[i][7].replace('亿', '')) * 100000000
                elif '万' in df[i][7]:
                    df[i][7] = float(df[i][7].replace('万', '')) * 10000
        df = pd.DataFrame(df, columns=['code','报表日期','主营构成','主营收入','收入比例','主营成本','成本比例',
                                          '主营利润','利润比例','毛利率','分类'])
        df['报表日期']=pd.to_datetime(df['报表日期'])
        df = df[df['报表日期']>datetime.date(2001,1,1)]
        df = df[df['主营构成']!='']
        df = df[df['主营构成']!='--']
        df = df.replace('--', '')
        df['报表日期'] =df['报表日期'].astype('str')
        params=json.loads(df.to_json(orient='records',force_ascii=False))
        db=Database()
        mainbusiness=db.table.mainbusiness
        db.c(mainbusiness,params)
        # df.to_sql('mainbusiness',conn,flavor='mysql',schema='stockdata',if_exists='append',index=False,chunksize=10000)
        # try:
        #     for elem in df.values:
        #         sql_update= "insert ignore into `mainbusiness` (code, 报表日期, 主营构成, 主营收入, 收入比例, 主营成本," \
        #                     " 成本比例, 主营利润, 利润比例, 毛利率, 分类) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
        #
        #         params = []
        #         for param in elem:
        #             if param != '':
        #                 params.append(str(param))
        #             else:
        #                 params.append(None)
        #         # if ser == 'local' or ser == 'both':
        #
        #         # cur =conn.cursor()
        #         engine.execute(sql_update,params)
        #         # conn.commit()
        #         # if ser == 'server' or ser == 'both':
        #         #     curs =conns.cursor()
        #         #     curs.execute(sql_update,params)
        #         #     conns.commit()
        # except Exception as e:
        #     print("MAINBUSINESS:",code,e)
        #     return code
    except Exception as e:
        print("MAINBUSINESS:",code,e)
        return code
Example #20
0
def spo(proxy=0):
    """
    http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx?type=SR&sty=ZF&p=1&ps=5000&st=5

    update the spo data from eastmoney.com to server and local.

    :param ser: local/server/both
    :param proxy: user proxy set proxy=1 if not proxy=0,default 0
    :return: errorlist
    """
    print("SPO: Running...")
    errorlist = []
    today = datetime.date.today() - datetime.timedelta(days=100)
    url = "http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx?type=SR&sty=ZF&p=1&ps=1000&st=5"
    html = myspyder(url,proxy=proxy).content
    table = eval(html.decode('utf-8'))
    list =[]
    for ele in table:
        list.append(re.split("\,",ele))
    df_spo = pd.DataFrame(list,columns=['code','name','发行方式','发行总数','发行价格','现价','发行日期','增发上市日期',
                                        '8','增发代码','网上发行','中签号公布日','中签率','13','14','15','16'])
    df_spo = df_spo[['code','name','发行方式','发行总数','发行价格','发行日期','增发上市日期','增发代码','网上发行',
                     '中签号公布日','中签率']]
    df_spo = df_spo.drop_duplicates()
    # df_spo = df_spo.replace('-','')
    # print(df_spo)

    df_spo['发行日期']=df_spo['发行日期'].astype('datetime64[ns]')
    spo = df_spo[df_spo['发行日期']>=today]
    spo.to_csv(path() + '/data/spo_done/spo_' + str(today) + '.csv',encoding='utf-8')

    engine = conn()
    try:


        for elem in spo.values:
            sql_update_spo = "INSERT IGNORE INTO `spo_done`(`code`, `name`, `发行方式`, `发行总数`, `发行价格`, " \
                             "`发行日期`, `增发上市日期`, `增发代码`, `网上发行`, `中签号公布日`, `中签率`) VALUES" \
                             " (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
            params = []
            for param in elem:
                if param!='-' :
                    params.append(str(param))
                else:
                    params.append(None)

            # if ser == 'local' or ser == 'both':
                # conn = localconn()
                # cur = conn.cursor()
            result=engine.execute(sql_update_spo,params)
                # result.close()
                # conn.commit()
                # spo.to_sql('spo_done',localconn(),flavor='mysql',schema='stockdata',if_exists='append',
                #            index=False,chunksize=10000)
            # if ser == 'server' or ser == 'both':
                # conns = serverconn()
                # curs = conns.cursor()
                # results=conns.execute(sql_update_spo, params)
                # results.close()
                # conns.commit()
                # spo.to_sql('spo_done',serverconn(),flavor='mysql',schema='stockdata',if_exists='append',
                #            index=False,chunksize=10000)
        # if ser == 'local' or ser == 'both':
        #     conn.close()
        # if ser == 'server' or ser == 'both':
        #     conns.close()
        print("SPO: Done!")
    except Exception as e:
        print("SPO:",e)
        errorlist.append(e)
    return errorlist