Example #1
0
def _queryFromDB(sql):
    try:
        result = pd.read_sql_query(sql, con=ENGINE)
        return result
    except Exception as ex:
        FileLogger.error(ex)
        FileLogger.error("read from db error!")
        return None
Example #2
0
def readFile(filePath, encoding='utf8'):
    try:
        fp = open(filePath, mode='r', encoding=encoding)
        content = fp.read()
        return content
    except Exception as ex:
        FileLogger.error(ex)
        return False
Example #3
0
def _refineColumns(datadf, columns=[]):
    if datadf.index.name != 'REPORT_DATE':
        FileLogger.error("REPORT_DATE must be the index!")
    elif len(columns) == 0:
        for col in datadf.columns:
            if col not in TEXTCOLUMNS:
                columns.append(col)

    return columns
Example #4
0
def downloadFile(url, filePath):
    try:
        r = requests.get(url)
        with open(filePath, "wb") as fp:
            fp.write(r.content)
    except Exception as ex:
        FileLogger.error("downloadfile error on path: %s" % url)
        FileLogger.error(ex)
        return False
Example #5
0
def readFile(filePath):
    try:
        fp = open(filePath, 'r')
        content = fp.read()
        return content
    except Exception as ex:
        FileLogger.error("read file error on path: %s" % filePath)
        FileLogger.error(ex)
        return False
Example #6
0
def write2File(filePath, content, mode="w+") -> bool:
    try:
        fp = open(filePath, mode)
        fp.write(content)
        fp.flush()
        fp.close()
        return True
    except Exception as ex:
        FileLogger.error("write to file error on path: %s" % filePath)
        FileLogger.error(ex)
        return False
Example #7
0
def queryFromDB(sql):
    try:
        engine = create_engine(
            "mysql+pymysql://root:4401821211@localhost:3306/stock?charset=utf8"
        )
        result = pd.read_sql_query(sql, con=engine)
        return result
    except Exception as ex:
        FileLogger.error(ex)
        FileLogger.error("read from db error!")
        return None
Example #8
0
def crawlUSStocks():
    # 查询语句:select ts_code from usstock.stocklist; 
    stockList = pd.read_csv("C:/project/Tushare/usstock/code.csv").to_numpy()

    for code in stockList:
        FileLogger.info("running on code: " + code[0])
        try:
            crawlHistory(code[0])
            time.sleep(1)
        except Exception as ex:
            FileLogger.error(ex)
            FileLogger.error("crawl error on code: %s" % code)
            time.sleep(5)
Example #9
0
def keepOnlyQuarterData(datadf, quarter):
    if datadf.index.name != 'REPORT_DATE':
        return None
    if quarter == 1:
        return datadf[datadf['REPORT_DATE'].str.find('-03-31') != -1]
    elif quarter == 2:
        return datadf[datadf['REPORT_DATE'].str.find('-06-30') != -1]
    elif quarter == 3:
        return datadf[datadf['REPORT_DATE'].str.find('-09-30') != -1]
    elif quarter == 4:
        return datadf[datadf['REPORT_DATE'].str.find('-12-31') != -1]
    else:
        FileLogger.error("error quarter parameter!")
        return None
Example #10
0
def gettodayStock():
    curDate = time.strftime("%Y%m%d", time.localtime())
    tryagain = True
    while tryagain:
        try:
            content = crawlLatestUsStocks()
            if content:
                path = "C:/project/stockdata/USDay/%s.txt" % curDate
                write2File(path, content, mode="w")
                FileLogger.info("crawl stock list successfully on date:" +
                                curDate)
                tryagain = False
            else:
                time.sleep(60)
        except Exception as ex:
            FileLogger.error(ex)
            FileLogger.error("crawl stock list error, retry in 60 seconds")
            time.sleep(60)
Example #11
0
def crawlHistory(code) -> bool:
    link = "https://stock.xueqiu.com/v5/stock/chart/kline.json?symbol=%s&begin=1616585707592&period=day&type=before&count=-100000&indicator=kline,pe,pb,ps,pcf,market_capital,agt,ggt,balance" % code
    session = HTMLSession()
    r = session.get(link, headers=HEADERS, cookies=COOKIES)

    jsonObj = json.loads(r.content)
    if jsonObj['error_code'] != 0 or not jsonObj["data"].__contains__("column") or not jsonObj["data"].__contains__("item"):
        FileLogger.error("get content error from: %s" % code)
        return False

    columns = jsonObj["data"]["column"]
    items = jsonObj["data"]["item"]

    if len(items) > 0:
        path = "C:/project/stockdata/UShistory/%s.csv" % code
        save2csv(columns, items, path)

    FileLogger.info("get %d lines from code: %s" % (len(items), code))
    return True
Example #12
0
def generateQuarterYRateOnData(code, dataList, columnName):
    sqlTemplate = "update `stock`.`incomerate` set `%s_yrate` = %f where `ts_code` = '%s' and `end_date` = '%s' and `report_type` = '1'"

    lastAcculatedValue = 1
    historyValues = {}
    rate = 0
    try:
        for item in dataList:
            date = item[0]
            acculatedValue = item[1]
            if acculatedValue is None:
                continue

            # 计算季度比值,要先算出本季度的营收,income报表中都是累加的季度收益
            value = acculatedValue - lastAcculatedValue
            historyValues[date] = value

            endDate = datetime.datetime.strptime(date, "%Y%m%d")
            lastYearDate = datetime.datetime(year=endDate.year - 1,
                                             month=endDate.month,
                                             day=endDate.day)
            lastYear = lastYearDate.strftime("%Y%m%d")
            lastYearValue = historyValues[
                lastYear] if lastYear in historyValues else None

            if lastYearValue:
                rate = (value / lastYearValue - 1) * 100
                sql = sqlTemplate % (columnName, rate, code, date)
                engine.execute(sql)
                # print("%s %d %d %d %d %f" % (date, lastAcculatedValue, acculatedValue, lastYearValue, value, rate))

            if endDate.month == 12:
                lastAcculatedValue = 0
            else:
                lastAcculatedValue = acculatedValue

    except Exception as ex:
        FileLogger.error(ex)
        FileLogger.error(
            "write to DB for generateQuarterYRateOnData error on sql: %s" %
            sql)
Example #13
0
def generateQuarterRateOnData(code, dataList, columnName):
    sqlTemplate = "update `stock`.`incomerate` set `%s_rate` = %f where `ts_code` = '%s' and `end_date` = '%s' and `report_type` = '1'"

    lastDate = datetime.datetime.strptime('19900101', "%Y%m%d")
    lastValue = 1
    lastAcculatedValue = 0
    rate = 0
    try:
        for item in dataList:
            date = item[0]
            acculatedValue = item[1]
            if acculatedValue is None or math.isnan(acculatedValue):
                continue
            value = acculatedValue - lastAcculatedValue

            endDate = datetime.datetime.strptime(date, "%Y%m%d")
            delta = endDate - lastDate
            if delta > datetime.timedelta(days=135):
                # do nothing, rate should be none
                pass
            else:
                # 计算季度比值,要先算出本季度的营收,income报表中都是累加的季度收益
                rate = (value / lastValue - 1) * 100
                sql = sqlTemplate % (columnName, rate, code, date)
                engine.execute(sql)

            print("%d %d %d %d %f" %
                  (lastAcculatedValue, acculatedValue, lastValue, value, rate))
            lastDate = endDate
            lastValue = value
            if endDate.month == 12:
                lastAcculatedValue = 0
            else:
                lastAcculatedValue = acculatedValue

    except Exception as ex:
        FileLogger.error(ex)
        FileLogger.error(
            "write to DB for generateQuarterRateOnData error on sql: %s" % sql)
def generateQuarterData(code, dataList, columnName):
    sqlTemplate = "update `stock`.`income` set `%s_qtr` = %f where `ts_code` = '%s' and `end_date` = '%s' and `report_type` = '1'"

    try:
        for index, row in dataList.iterrows():
            endDate = datetime.datetime.strptime(row['end_date'], "%Y%m%d")
            lastEndDate = None
            lastValue = 0
            quarterValue = 0

            if endDate.month == 3 and endDate.day == 31:
                quarterValue = row[columnName]
            elif endDate.month == 6 and endDate.day == 30:
                lastEndDate = '%d0331' % endDate.year
            elif endDate.month == 9 and endDate.day == 30:
                lastEndDate = '%d0630' % endDate.year
            elif endDate.month == 12 and endDate.day == 31:
                lastEndDate = '%d0930' % endDate.year

            if lastEndDate:
                lastRow = dataList[dataList['end_date'] ==
                                   lastEndDate].to_numpy()
                if len(lastRow) > 0:
                    lastValue = lastRow[0][1]
                    quarterValue = row[columnName] - lastValue

            print("%s %d %d" %
                  (row['end_date'], row[columnName], quarterValue))
            sql = sqlTemplate % (columnName, quarterValue, code,
                                 row['end_date'])
            engine.execute(sql)

    except Exception as ex:
        FileLogger.error(ex)
        FileLogger.error(
            "write to DB for generateQuarterRateOnData error on sql: %s" % sql)
Example #15
0
        FileLogger.info("get %d records on code: %s" % (len(records), code))

    if len(records) != 0:
        content = json.dumps(records)
        path = "C:/project/stockdata/StockNotices/%s.json" % code
        write2File(path, content)


if __name__ == "__main__":
    stockList = getJsonFromFile("C:/project/stockdata/StockNotices/stock.json")
    stockList = stockList["stockList"]

    # stockList = [{"orgId":"9900002701","category":"A股","code":"002127","pinyin":"njds","zwjc":"南极电商"}]

    for stock in stockList:
        FileLogger.info("running on stock: %s(%s)" %
                        (stock["zwjc"], stock["code"]))
        filePath = "C:/project/stockdata/StockNotices/%s.json" % stock['code']
        if (os.path.exists(filePath)):
            continue

        try:
            crawlStockNotices(stock["code"], stock["orgId"])
            time.sleep(1)

        except Exception as ex:
            FileLogger.error(ex)
            FileLogger.error("crawl balance error on code: %s" % stock["code"])
            time.sleep(3)
Example #16
0
if __name__ == "__main__":
# http://www.cninfo.com.cn/new/announcement/bulletin_detail?announceId=13519195&flag=true&announceTime=2004-01-17

    # retrieveAnualQuarterlyReport()

    stockdf = pd.read_csv("C:/project/stockdata/StockNoticesFile/annualreportlist.csv", dtype={'code': np.str, 'year': np.str})
    # stockdf = stockdf[stockdf['code'] == '000002']
    stockList = stockdf[['code', 'name', 'year', 'announcementId', 'url']].to_numpy()

    # stockList = stockList[1:3]
    
    try:
        for stock in stockList: 
            fileName = "[%s]%s年报-%s" % (stock[1], stock[2], stock[3])
            savePath = "C:/project/stockdata/StockNoticesFile/pdf/%s.pdf" % fileName
            # make sure it's a valid path, no \/:?*"<>|
            savePath = savePath.replace("*", "")
            unresolvedPath = "C:/project/stockdata/StockNoticesFile/unresolved/%s.pdf" % fileName

            url = stock[4]
            if os.path.exists(savePath) or os.path.exists(unresolvedPath):
                FileLogger.info("file %s exists, skip!" % fileName)
            else:
                FileLogger.info("downloading file: %s" % fileName)
                downloadFile(url, savePath) 

    except Exception as ex:
        FileLogger.error(ex)
        FileLogger.error("download error on file: %s" % fileName)
        time.sleep(3)
Example #17
0
def retrieveAnualQuarterlyReport():
    stockList = getJsonFromFile("C:/project/stockdata/StockNotices/stock.json")
    stockList = stockList["stockList"]

    stockList = [{"orgId":"9900002701","category":"A股","code":"002127","pinyin":"njds","zwjc":"南极电商"}]
    # stockList = [{"orgId":"gssz0000002","category":"A股","code":"000002","pinyin":"njds","zwjc":"万科A"}]

    for stock in stockList:
        FileLogger.info("running on stock: %s(%s)" % (stock["zwjc"], stock["code"]))
    
        try:
            filePath = "C:/project/stockdata/StockNotices/%s.json" % stock['code']
            jsonList = getJsonFromFile(filePath)

            annualDf = None
            for jsonObj in jsonList:
                announcementType = jsonObj['announcementType']
                fileType = jsonObj['adjunctType']

                # 得到公告类型,一季报半年报三季报年报
                # 公告类型:{'01030501': 第一季度报全文, '01030701':第三季度报, '01030301': 半年报, '01030101':年报全文}
                noticeType = None
                if announcementType.find("01030101") != -1: 
                    noticeType = "年报"
                elif announcementType.find("01030701") != -1:
                    noticeType = "三季度报"
                elif announcementType.find("01030301") != -1:
                    noticeType = "半年报"
                elif announcementType.find("01030501") != -1:
                    noticeType = "一季度报"

                if noticeType is not None and (fileType == 'PDF' or filePath == 'PDF ' or fileType == 'pdf'):
                    FileLogger.info("downloading file: %s" % jsonObj["announcementTitle"])
                    noticeDay = jsonObj['adjunctUrl'][10:20]
                    url = "http://www.cninfo.com.cn/new/announcement/download?bulletinId=%s&announceTime=%s" % (jsonObj['announcementId'], noticeDay)
                    
                    annualData = {
                        'code': jsonObj['secCode'],
                        'name': jsonObj['secName'],
                        'announcementId': jsonObj['announcementId'],
                        'title': jsonObj['announcementTitle'], 
                        'noticeDay': noticeDay,
                        'fileType': jsonObj['adjunctType'],
                        'url': url, 
                        'Type': noticeType, 
                        'year': int(noticeDay[0:4])-1 if noticeType == "年报" else int(noticeDay[0:4])
                    }
                    if annualDf is None:
                        annualDf = pd.DataFrame(columns=annualData.keys())
                        annualDf = annualDf.append(annualData, ignore_index=True)
                    else:
                        annualDf = annualDf.append(annualData, ignore_index=True)

            time.sleep(0)
        
            # save to DB
            from sqlalchemy import create_engine
            ENGINE = create_engine("mysql+pymysql://root:4401821211@localhost:3306/eastmoney?charset=utf8")
            annualDf.to_sql(name="reportbasic", con=ENGINE, if_exists="append")

        except Exception as ex:
            FileLogger.error(ex)
            FileLogger.error("retrieve error on code: %s" % stock["code"])
            time.sleep(3)
Example #18
0
try:
    for item in incomeList:
        endDate = datetime.datetime.strptime(item[0], "%Y%m%d")
        delta = endDate - lastDate
        if delta > datetime.timedelta(days=135):
            # do nothing, rate should be none
            pass
        else:
            sql = sqlTemplate % (40, code, item[0])
            print(sql)
            engine.execute(sql)
        lastDate = endDate

except Exception as ex:
    FileLogger.error(ex)
    FileLogger.error("write to DB error on sql: %s" % sql)



# %%
income = DBLib.getIncomeFromDB('600176.SH')
sortedIncome = income.sort_values(by='end_date', ascending=True)
    
dataList = sortedIncome[['end_date', 'total_revenue']]
   
# %%
for index, row in dataList.iterrows():
    print(row['end_date'])
# %%
row = dataList[dataList['end_date']=='20130630'].to_numpy()
Example #19
0
            gatherColumnInfo(jsonSql, jsonObjects, 'TOTAL_CURRENT_LIAB',
                             'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'TOTAL_EQUITY', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'TOTAL_LIAB_EQUITY',
                             'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'TOTAL_LIABILITIES',
                             'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'TOTAL_NONCURRENT_ASSETS',
                             'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'TOTAL_NONCURRENT_LIAB',
                             'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'TOTAL_OTHER_PAYABLE',
                             'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'TOTAL_OTHER_RECE',
                             'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'TOTAL_PARENT_EQUITY',
                             'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'TRADE_FINASSET_NOTFVTPL',
                             'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'UNASSIGN_RPOFIT', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'USERIGHT_ASSET', 'double')

            executeSql(jsonSql)

            time.sleep(0.1)

        except Exception as ex:
            FileLogger.error(ex)
            FileLogger.error("parse balance error on code: %s" % code)
            time.sleep(3)
Example #20
0
def write2File(filePath, content, mode="w+") -> bool:
    try:
        fp = open(filePath, mode)
        fp.write(content)
        fp.flush()
        fp.close()
        return True
    except Exception as ex:
        FileLogger.error("write to file error on path: %s" % filePath)
        FileLogger.error(ex)
        return False


if __name__ == "__main__":
    # 查询语句:select ts_code from usstock.stocklist;
    stockdf = pd.read_csv("C:/project/Tushare/usstock/code.csv")
    errordf = pd.read_csv("C:/project/Tushare/usstock/get_error_ts_code.csv")
    errorList = errordf['ts_code'].to_numpy()
    stockList = stockdf[~stockdf['ts_code'].isin(errorList)]
    stockList = stockList['ts_code'].to_numpy()

    for code in stockList:
        FileLogger.info("running on code: %s" % code)
        try:
            crawlCashflow(code)
            time.sleep(1)

        except Exception as ex:
            FileLogger.error(ex)
            FileLogger.error("crawl cashflow error on code: %s" % code)
            time.sleep(3)
Example #21
0
            gatherColumnInfo(jsonSql, jsonObjects, 'OPERATE_PROFIT', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'NONBUSINESS_INCOME', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'NONBUSINESS_EXPENSE', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'TOTAL_PROFIT', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'INCOME_TAX', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'NETPROFIT', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'CONTINUED_NETPROFIT', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'PARENT_NETPROFIT', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'MINORITY_INTEREST', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'DEDUCT_PARENT_NETPROFIT', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'BASIC_EPS', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'DILUTED_EPS', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'OTHER_COMPRE_INCOME', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'PARENT_OCI', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'MINORITY_OCI', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'TOTAL_COMPRE_INCOME', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'PARENT_TCI', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'MINORITY_TCI', 'double')

            executeSql(jsonSql)
                
            time.sleep(0.1)

        except Exception as ex:
            FileLogger.error(ex)
            FileLogger.error("parse income error on code: %s" % code)
            time.sleep(3)

    

Example #22
0
def write2File(filePath, content, mode="w+") -> bool:
    try:
        fp = open(filePath, mode)
        fp.write(content)
        fp.flush()
        fp.close()
        return True
    except Exception as ex:
        FileLogger.error("write to file error on path: %s" % filePath)
        FileLogger.error(ex)
        return False


if __name__ == "__main__":
    # 查询语句:select ts_code from usstock.stocklist;
    stockdf = pd.read_csv("C:/project/Tushare/usstock/code.csv")
    errordf = pd.read_csv("C:/project/Tushare/usstock/get_error_ts_code.csv")
    errorList = errordf['ts_code'].to_numpy()
    stockList = stockdf[~stockdf['ts_code'].isin(errorList)]
    stockList = stockList['ts_code'].to_numpy()

    for code in stockList:
        FileLogger.info("running on code: %s" % code)
        try:
            crawlBalance(code)
            time.sleep(1)

        except Exception as ex:
            FileLogger.error(ex)
            FileLogger.error("crawl balance error on code: %s" % code)
            time.sleep(3)
Example #23
0
    try:
        fp = open(filePath, mode)
        fp.write(content)
        fp.flush()
        fp.close()
        return True
    except Exception as ex:
        FileLogger.error("write to file error on path: %s" % filePath)
        FileLogger.error(ex)
        return False


if __name__ == "__main__":
    # 查询语句:select ts_code from usstock.stocklist;
    stockdf = pd.read_csv("C:/project/Tushare/usstock/code.csv")
    errordf = pd.read_csv("C:/project/Tushare/usstock/get_error_ts_code.csv")
    errorList = errordf['ts_code'].to_numpy()
    stockList = stockdf[~stockdf['ts_code'].isin(errorList)]
    stockList = stockList['ts_code'].to_numpy()

    for code in stockList:
        FileLogger.info("running on code: %s" % code)
        try:
            crawlDivident(code)
            time.sleep(1)

        except Exception as ex:
            FileLogger.error(ex)
            FileLogger.error("crawl income error on code: %s" % code)
            time.sleep(3)
Example #24
0
    incomeBaseDF.to_sql(name='incomebase', con=usEngine, if_exists='append')


def readFile(filePath):
    try:
        fp = open(filePath, 'r')
        content = fp.read()
        return content
    except Exception as ex:
        FileLogger.error("read file error on path: %s" % filePath)
        FileLogger.error(ex)
        return False


if __name__ == "__main__":

    stockdf = pd.read_csv("C:/project/stockdata/USIncome/code.csv")
    stockList = stockdf['code'].to_numpy()

    # parseIncomeBaseList(stockList)

    for code in stockList:
        FileLogger.info("running on code: %s" % code)
        try:
            parseUSIncome(code)

        except Exception as ex:
            FileLogger.error(ex)
            FileLogger.error("write data to Database error on code: %s" % code)
            time.sleep(1)
                             'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'FINANCE_EXPENSE', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'INVEST_LOSS', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'DEFER_TAX', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'DT_ASSET_REDUCE', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'DT_LIAB_ADD', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'INVENTORY_REDUCE',
                             'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'OPERATE_RECE_REDUCE',
                             'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'OPERATE_PAYABLE_ADD',
                             'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'OPERATE_NETCASH_OTHERNOTE',
                             'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'NETCASH_OPERATENOTE',
                             'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'END_CASH', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'BEGIN_CASH', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'CCE_ADDNOTE', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'OPINION_TYPE',
                             'varchar(100)')

            executeSql(jsonSql)

            time.sleep(0.1)

        except Exception as ex:
            FileLogger.error(ex)
            FileLogger.error("parse cashflow error on code: %s" % code)
            time.sleep(3)