def combine_the_data_raw_raw():
	dir_dadan = data_dict.get("DADAN")
	df1 = combine_csv_in_folder_raw(os.path.join(dir_dadan,"2020_01_30"))
	#df1.to_csv("all.csv",index=0)
	df1.columns = ["stock_index","stock_name", \
	            "trade_time","price","trade_num","trade_shou", \
	            "status","price_change_rate","price_change_ratio","look","stock_date"]
	df1['stock_index'] = [ str(x).zfill(6) for x in df1.stock_index.tolist()]
	df1['day'] = [x.replace("_","-") for x in df1.stock_date.tolist()]
	tmp_dir = data_dict.get("tmp")
	df1 = df1.drop_duplicates()
예제 #2
0
def test(loop_num):
    data_dir = data_dict.get("day_history")
    df1 = pd.read_csv(os.path.join(data_dir, "all.csv"), header=None)
    df1 = raw_data_col(df1)
    df2 = df1[df1['stock_index'] == 600999]
    #df2 = df1.sample(10000)

    X, df_K = make_data(df2)
    n_cluster = 11
    kmeans_model = get_model(n_cluster, X)

    df_K['labels'] = kmeans_model.labels_
    #df2[["open","high","low","close"]].values
    df_K['raw_array'] = df_K[["open", "high", "low", "close"]].values.tolist()
    #df_K['center'] = kmeans_model.cluster_centers_.tolist()
    df_cluster_cent_dist = cluster_center_dist(kmeans_model.cluster_centers_)
    print(df_cluster_cent_dist.iloc[0:3, ])
    df_cluster_info = pd.DataFrame(columns=["label_center", "labels"])
    df_cluster_info['label_center'] = kmeans_model.cluster_centers_.round(
        3).tolist()
    df_cluster_info['labels'] = [x for x in range(0, kmeans_model.n_clusters)]
    df_center = pd.merge(df_K, df_cluster_info,
                         on='labels')[['raw_array', "label_center", "labels"]]

    df_cluster_stat = df_center.groupby("labels").apply(cluster_stat)[[
        "labels", 'max_dist', "median_dist", 'cnt'
    ]]
    print(df_cluster_stat.sort_values("max_dist", ascending=False))
    file_name = "cluster_%s_%s.csv" % (n_cluster, loop_num)
    save_cluster(kmeans_model.cluster_centers_, file_name)
예제 #3
0
def allKdj(now_date):
    stock_index_list = []
    j_line = []
    stock_date_list = []
    for i in stk_index_list:
        if i[0:2] == '60' or i[0] == '00':
            try:
                wy_data_dir = data_dict.get("day_history_wangyi")
                stock_index = i
                df1 = loadData(wy_data_dir,
                               stock_index).sort_values("stock_date")
                stock = DF_to_StockDataFrame(df1)
                df_stock = stock_kdj(stock)
                #df_kdj_macd['adj_close'] = df_kdj_macd["macd"]
                df_class = dayHistoryFeature.makeHistoryFeature(
                    df_stock, 10, "macdh")
                df_kdj_macd = df_class.df_out
                macd_feature_name = df_class.new_history_days_colname
                stock_date = now_date
                jjj = np.round(
                    df_kdj_macd['kdjj'][df_kdj_macd['date'] ==
                                        now_date].values[0], 3)
                macd_line = df_kdj_macd[macd_feature_name][df_kdj_macd['date']
                                                           == now_date]
                stock_index_list.append(stock_index)
                j_line.append(jjj)
                stock_date_list.append(stock_date)
            except:
                pass
    return stock_index_list, j_line, stock_date_list
def allKdj(now_date, stock_index):
    stock_index_list = []
    j_line = []
    stock_date_list = []
    rsi_6 = []
    boll_ratio = []
    try:
        wy_data_dir = data_dict.get("day_history_wangyi")
        df1 = loadData(wy_data_dir, stock_index).sort_values("stock_date")
        stock = DF_to_StockDataFrame(df1)
        df_stock, _ = stock_kdj(stock)
        df_stock["boll_ratio"] = df_stock["boll_ub"] / df_stock["boll_lb"]
        kdj_j_today = np.round(
            df_stock['kdjj'][df_stock['date'] == now_date].values[0], 3)
        rsi_today = np.round(
            df_stock['rsi_6'][df_stock['date'] == now_date].values[0], 3)
        boll_ratio_today = np.round(
            df_stock['boll_ratio'][df_stock['date'] == now_date].values[0], 3)
        stock_index_list.append(stock_index)
        j_line.append(kdj_j_today)
        rsi_6.append(rsi_today)
        boll_ratio.append(boll_ratio_today)
        stock_date_list.append(now_date)
    except:
        pass
    return stock_index_list, j_line, stock_date_list, rsi_6, boll_ratio
def download_for_stock_index(stock_index, end_year):
    dir_news_report = data_dict.get("news_report")
    stk = stock_index
    try:
        get_all_news(stock_index, os.path.join(dir_news_report, stk), end_year)
    except Exception:
        print(stock_index + 'not download')
        traceback.print_exc()
        pass
    time.sleep(3)
예제 #6
0
def getFeatture(stock_index):
    wy_data_dir = data_dict.get("day_history_wangyi")
    df1 = loadData(wy_data_dir, stock_index).sort_values("stock_date")
    stock = DF_to_StockDataFrame(df1)
    df_stock = stock_kdj(stock)
    #df_kdj_macd['adj_close'] = df_kdj_macd["macd"]
    df_class = dayHistoryFeature.makeHistoryFeature(df_stock, 10, "macdh")
    df_kdj_macd = df_class.df_out
    macd_feature_name = df_class.new_history_days_colname
    return df_kdj_macd, macd_feature_name
예제 #7
0
def load_data(date_in=None):
    dir_yjyq = data_dict.get("YeJiYuQi")
    now_date,now_date_time = get_the_datetime()
    if date_in == None:
        date_in = now_date
    data_dir = os.path.join(dir_yjyq,date_in)
    df1 = combine_csv_in_folder_raw(data_dir)
    df1.columns = ["index","stock_index","stock_name","yeji_predict","yeji_abstract","profit_change_ratio",
        "profit_change","date"]
    return df1
예제 #8
0
def combine_with_stock_basic_info(df_input,columns_select):
    out_columns = df_input.columns.tolist() 
    out_columns = out_columns + columns_select
    basic_info_dir = data_dict.get("basic_info")
    basic_info_df = pd.read_csv(os.path.join(basic_info_dir,"stock_basic_info.csv"))
    basic_info_df['stock_index'] = basic_info_df['code']
    basic_info_df['stock_index'] = [str(x).zfill(6) for x in basic_info_df['stock_index'].tolist()]
    df_input['stock_index'] = [str(x).zfill(6) for x in df_input['stock_index'].tolist()]
    df1 = pd.merge(df_input,basic_info_df,how='left',on = ["stock_index"])
    df2 = df1[out_columns]
    return df2
예제 #9
0
def loop_for_download_fin_report(stock_index, year_range):
    save_dir = data_dict.get("financial_report")
    for year in year_range:
        file_name, if_file_exists = make_filename_check_if_exists(
            save_dir, stock_index, year)
        if if_file_exists == False:
            with eventlet.Timeout(2.7, False):
                do_download(stock_index, year, save_dir, file_name,
                            if_file_exists)
                time.sleep(20)
        else:
            pass
예제 #10
0
def get_all_date():
    cols = [
        "id", "id2", "owner_name", "owner_type", "stock_type", "gudong_rank",
        "stock_index", "stock_name", "change_date", "num", "chigu_ratio",
        "liutong_ratio", "report_date", "change_type", "change_ratio", "test1",
        "test2", "num_change"
    ]
    data_dir = data_dict.get("important_owner")

    df1 = pd.read_csv(os.path.join(data_dir, "zo_nyyh_zozz_2020-08-14.csv"))
    df1.columns = cols
    df1 = df1.drop_duplicates()
    df1['stock_date'] = [x[0:10] for x in df1.change_date.tolist()]
    return df1
예제 #11
0
def get_all_date():
    cols = [
        "id", "id2", "owner_name", "owner_type", "stock_type", "gudong_rank",
        "stock_index", "stock_name", "change_date", "num", "chigu_ratio",
        "liutong_ratio", "report_date", "change_type", "change_ratio", "test1",
        "test2", "num_change"
    ]
    data_dir = data_dict.get("important_owner")

    df1 = combine_csv_in_folder_raw(data_dir)
    df1.columns = cols
    df1 = df1.drop_duplicates()
    df1['stock_date'] = [x[0:10] for x in df1.change_date.tolist()]
    return df1
예제 #12
0
def get_all_date():
    cols = [
        "id", "id2", "owner_name", "owner_type", "stock_type", "gudong_rank",
        "stock_index", "stock_name", "change_date", "num", "chigu_ratio",
        "liutong_ratio", "report_date", "change_type", "change_ratio", "test1",
        "test2", "num_change"
    ]
    data_dir = data_dict.get("important_owner")
    file1 = "hk_central_2020_05_22.csv"
    df1 = pd.read_csv(os.path.join(data_dir, file1))
    #df1 = combine_csv_in_folder_raw(data_dir)
    df1.columns = cols
    df1 = df1.drop_duplicates()
    df1['stock_date'] = [x[0:10] for x in df1.change_date.tolist()]
    return df1
예제 #13
0
def stockSlope(stock_index, start_date, stat_days, pred_days):
    try:
        data_dir = data_dict.get("baostock")
        df1 = pd.read_csv(
            os.path.join(data_dir, "history_data", stock_index) + '.csv')
        k1 = klineDate(start_date, stat_days, pred_days)
        stat_end_date, pred_start_date, pred_end_date = k1.make_date()
        df2 = df1[(df1["date"] >= pred_start_date)
                  & (df1["date"] <= pred_end_date)]
        slope = LinearReg.single_linear_reg(df2, "close")[0]
        #print("{},{},{},{},{}".format(stock_index,pred_start_date,pred_end_date,pred_days,slope))
    except:
        #print("{},{},{},{},{}".format(stock_index,"aa","bb",pred_days,-999))
        slope = -999
        pred_start_date = -999
        pred_end_date = -999
    return slope, pred_start_date, pred_end_date
예제 #14
0
def main():
    now_date, now_date_time = get_the_datetime(
    )  ## the now_date is like "2019_11_08"
    #now_date = "2020_05_22"
    dir_dadan = data_dict.get("DADAN")
    data_dir = os.path.join(dir_dadan, now_date)
    df1 = combine_csv_in_folder(data_dir)
    df1.columns = setColname().DADAN()
    ## merge the data
    df_merge1 = DADAN_diff_stat(df1)
    # save data
    now_date = now_date.replace("_", "-")
    save_dir = dailyReport.dailyReport(now_date).save_to_daily_report()
    save_file = "DADAN_200_daily_report_" + now_date + ".csv"
    save_file = os.path.join(save_dir, save_file)
    df_merge1 = changeStockIndex(df_merge1, 'stock_index')
    df_merge1.head(100).to_csv(save_file, encoding="utf_8_sig", index=0)
    return df_merge1
예제 #15
0
def combine_clean_data():
    dir_dadan = data_dict.get("DADAN")
    folder = dir_dadan
    df_all = pd.DataFrame(columns=('0', '1', '2', '3', '4', '5', '6', '7', '8',
                                   '9', 'date'))
    for dirname, dirs, files in walk(folder):
        try:
            date_sig = dirname.split("/")[-1:][0]
            date_in = datetime.datetime.strptime(
                date_sig, "%Y_%m_%d").strftime("%Y-%m-%d")
            print(dirname)
            df1 = combine_csv_in_folder_raw(dirname)
            df1['date'] = date_in
            df_all = pd.concat([df_all, df1])
        except:
            pass
    df_out = df_all.drop_duplicates()
    df_out.to_csv("test.csv", index=0)
    return df_out
예제 #16
0
def main(stock_index):
    #from dir_control.data_dir import dir_basic_info,dir_day_history,stk_index_list
    dir_day_history_insert = data_dict.get("day_history_insert")
    import os
    import pandas as pd
    import time
    import datetime
    #### ------------ para -------------------#
    ##  set start end date
    ##############################################
    ## comment it if not test
    ####
    #print(k)
    stock_index = make_stock_download_index(stock_index)
    try:
        run_download(stock_index, start_date, end_date, dir_day_history_insert)
        #print("sleep")
    except:
        print("the stock index cannot be download " + str(stock_index))
        pass
예제 #17
0
def main():
    now_date, now_date_time = get_the_datetime(
    )  ## the now_date is like "2019_11_08"
    #now_date = "2020_06_01"
    dir_dadan = data_dict.get("DADAN")
    data_dir = os.path.join(dir_dadan, now_date)
    df1 = combine_csv_in_folder(data_dir)
    df1.columns = setColname().DADAN()
    df2 = df1[df1['status'] == '买盘']
    df_buy = df2.groupby(
        'stock_index').count()['price'].sort_values().reset_index()
    df3 = df1[df1['status'] == '卖盘']
    df_sale = df3.groupby(
        'stock_index').count()['trade_time'].sort_values().reset_index()
    df_merge = pd.merge(df_buy, df_sale, how='left').fillna(0)
    df_merge['diff'] = df_merge['price'] - df_merge['trade_time']
    df_out = df_merge.sort_values('diff').tail(50)
    print(df_out)
    ## merge the data
    return df_merge1
예제 #18
0
def main():
    now_date, now_date_time = get_the_datetime(
    )  ## the now_date is like "2019_11_08"
    dir_dadan = data_dict.get("DADAN")
    data_dir = os.path.join(dir_dadan, now_date)
    df1 = combine_csv_in_folder(data_dir)
    df1.columns = setColname().DADAN()
    ## merge the data
    df_merge1 = DADAN_diff_stat(df1)
    df_merge1.to_csv("DADAN_sample.csv", index=0)
    print("=" * 50)
    print(df_merge1[['stock_index', 'stock_name', 'buy_sale_diff']].head(50))
    #print(df_merge1[['stock_index','stock_name','buy_sale_diff']][df_merge1['price']<25].head(50))
    print(df_merge1.tail(50))
    ## save data
    print("=" * 50)
    save_dir = os.path.join(data_path, "DADAN_daily_report")
    create_dir_if_not_exist(save_dir)
    save_file = "DADAN_200_daily_report_" + now_date + ".csv"
    save_file = os.path.join(save_dir, save_file)
    df_merge1.to_csv(save_file, encoding="utf_8_sig")
    return df_merge1
    df2 = df_input.drop_duplicates()
    df4 = status_sum(df2, "买盘")
    df6 = status_sum(df2, "卖盘")
    ##
    df_merge = pd.merge(df4, df6, how='left', on=["stock_index", "stock_name"])
    df_merge = df_merge.fillna(0)
    df_merge["buy_sale_diff"] = df_merge["buy_num"] - df_merge["sale_num"]
    df_merge1 = df_merge.sort_values("buy_sale_diff", ascending=False)
    #print(df_merge1)
    return df_merge1


if __name__ == '__main__':
    #now_date,now_date_time = get_the_datetime()  ## the now_date is like "2019_11_08"
    now_date = "2020_01_17"
    dir_dadan = data_dict.get("DADAN")
    data_dir = os.path.join(dir_dadan, now_date)
    print(data_dir)
    df1 = combine_csv_in_folder(data_dir)
    df1.columns = ["stock_index","stock_name", \
            "trade_time","price","trade_num","trade_shou", \
            "status","price_change_rate","price_change_ratio","look","stock_date"]
    df1['stock_index'] = [str(x).zfill(6) for x in df1.stock_index.tolist()]
    df1['day'] = [x.replace("_", "-") for x in df1.stock_date.tolist()]
    tmp_dir = data_dict.get("tmp")
    df1.to_csv(os.path.join(tmp_dir, "DADAN_sample.csv"), index=0)
    #df_merge1 = DADAN_diff_stat(df1)
    #print(df_merge1.head(30))
    #print(df_merge1.tail(30))
    #df_merge2 = df_merge1[df_merge1.sale_num.isna()]
    #df_merge3 = df_merge2.sort_values("buy_num",ascending=False)

def makeDateInput():
    season = ['03-31', '06-30', '09-30', '12-31']
    year = [str(x) + '-' for x in range(2003, 2020)]
    year.reverse()
    date_input = []
    for year1 in year:
        for season1 in season:
            date_input.append(year1 + season1)
    return date_input


if __name__ == '__main__':
    #date_input = makeDateInput()
    save_dir = data_dict.get("fenhong")
    date_input = ['2020-06-31']
    for date_input1 in date_input:
        print(date_input1)
        get_data(date_input1, save_dir)
        time.sleep(60)
        #get_data("2019-06-30")

#response=requests.get(html1)
#values={'act':'login'}
"""
headers={
'Accept': 'text/html, */*; q=0.01',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
'Connection': 'keep-alive',
예제 #21
0
    for row in table.find_all('tr'):
        column_marker = 0
        columns = row.find_all('td')
        for column in columns:
            new_table.iat[row_marker, column_marker] = column.get_text()
            column_marker += 1
        row_marker += 1
    #new_table = new_table[~(new_table[''].isnull())]
    new_table = new_table.dropna(axis=0)
    return new_table


def save_the_table(new_table, dir_dadan, now_date, now_date_time):
    save_dir = os.path.join(dir_dadan, now_date)
    create_dir_if_not_exist(save_dir)
    save_file = os.path.join(save_dir, now_date_time + ".csv")
    new_table.to_csv(save_file, index=0)


if __name__ == '__main__':
    dir_dadan = data_dict.get("DADAN")
    now_date, now_date_time = get_the_datetime()
    html1 = "http://app.finance.ifeng.com/hq/all_stock_bill.php"
    table, new_table_index = get_html_table(html1)
    new_table = table_to_DF(table, new_table_index)
    save_the_table(new_table, dir_dadan, now_date, now_date_time)

#print(new_table)

#airline.table = readHTMLTable(html1, header=T, which=1,stringsAsFactors=F)
예제 #22
0
from davidyu_cfg import *
from functions.connect_url import url_opener
from functions.data_dir import data_dict

data_dir = data_dict.get("owner")
#df1 = pd.read_csv(os.path.join(data_dir,"zhongyanghuijin.csv"))
df1 = pd.read_csv(os.path.join(data_dir, "hk_central.csv"))

#df1.groupby("股票简称")


def latest_change(x):
    date_in = x["截止日期"].tolist()[0]
    change1 = x["持股比例(%)"].diff(-1).reset_index()['持股比例(%)'].tolist()[0]
    change = round(change1, 2)
    df2 = pd.DataFrame(columns=["date_latest", "latest_change"])
    df2['date_latest'] = [date_in]
    df2["latest_change"] = [change]
    return df2


aa = df1.groupby("股票简称").apply(latest_change)

a1 = aa.dropna().sort_values("latest_change")
a2 = a1[a1["date_latest"] > "2019-09-01"]
print(a2.tail(30))
'''
a1.sort_values("date_latest")
df2 = df1[df1["股票简称"]=="华峰氨纶"]

df2["持股比例(%)"].diff(-1)
예제 #23
0
from davidyu_cfg import *
from functions.data_dir import data_dict, stk_index_list, create_dir_if_not_exist
from functions.get_datetime import *
from functions.run_combine_all_csv import *
'''
combine today DADAN data
@time:   2020-02-03
'''

if __name__ == '__main__':
    now_date, now_date_time = get_the_datetime(
    )  ## the now_date is like "2019_11_08"
    now_date = "2021-01-15"
    dir_dadan = data_dict.get("dadan_real_time_ifeng")
    data_dir = os.path.join(dir_dadan, now_date)
    print(data_dir)
    df1 = combine_csv_in_folder_raw(data_dir)
    df1.columns = ["stock_index","stock_name", \
            "trade_time","price","trade_num","trade_shou", \
            "status","price_change_rate","price_change_ratio","look","stock_date"]
    df1['stock_index'] = [str(x).zfill(6) for x in df1.stock_index.tolist()]
    df1['dt'] = [x.replace("_", "-") for x in df1.stock_date.tolist()]
    tmp_dir = data_dict.get("tmp")
    save_data_name = "dadan_real_time_ifeng_" + now_date + ".csv"
    df1.drop_duplicates().to_csv(os.path.join(tmp_dir, save_data_name),
                                 index=0)
예제 #24
0
from davidyu_cfg import *
from functions.data_dir import data_dict, stk_index_list, create_dir_if_not_exist
from functions.get_datetime import *
from functions.run_combine_all_csv import *
from functions.colNames import *

now_date = "2020_06_19"
dir_dadan = data_dict.get("dadan_DFCF")
data_dir = dir_dadan

df1 = pd.read_csv(os.path.join(data_dir, "2020_08_06.csv"))

#df1.columns = ["new_price","today_increase_ratio","stock_index","stock_name","zhuli_liuru",
#        "chaodadan_liuru","chaodadan_liuru_ratio","dadan_liuru","dadan_liuru_ratio",
#        "zhongdan_liuru","zhongdan_liuru_ratio","xiaodan_liuru","xiaodan_liuru_ratio","test1",
#        "zhuli_liuru_ratio","test2","test3",
#        "test4","stock_date"]

df1.columns = setColname().dadan_DFCF()
df1 = df1[df1['zhuli_liuru_ratio'] != "-"]
df1['zhuli_liuru_ratio'] = df1['zhuli_liuru_ratio'].astype(float)
df1['zhuli_liuru'] = df1['zhuli_liuru'].astype(float)

df2 = df1[[
    "stock_index", "stock_name", "new_price", "today_increase_ratio",
    'zhuli_liuru_ratio', 'zhuli_liuru'
]]
df2.sort_values("zhuli_liuru_ratio")
df2.sort_values("zhuli_liuru")
print(df2.head(20))
#df1.columns = setColname().DADAN()
예제 #25
0
            pass
    return kline_dict, data_list_len


def clean_dict(kline_dict, data_list_len):
    #for i in kline_dict:
    data_list_len1 = [x for x in data_list_len if x > 0]
    most_common_len = Counter(data_list_len1).most_common(1)[0][0]
    logging.info("dict out")
    logging.info(kline_dict)
    kline_data_dict = clean_dict_data(kline_dict, most_common_len)
    return kline_data_dict


if __name__ == '__main__':
    data_dir = data_dict.get("day_history_insert")
    list_files = os.listdir(data_dir)
    kl_stat_day = 14
    kl_pred_day = 7

    import time
    import random
    a1 = (2000, 1, 1, 0, 0, 0, 0, 0, 0)  #设置开始日期时间元组(1976-01-01 00:00:00)
    a2 = (2019, 12, 31, 23, 59, 59, 0, 0, 0)  #设置结束日期时间元组(1990-12-31 23:59:59)

    start = time.mktime(a1)  #生成开始时间戳
    end = time.mktime(a2)  #生成结束时间戳
    for i in range(100):
        t = random.randint(start, end)  #在开始和结束时间戳中随机取出一个
        date_touple = time.localtime(t)  #将时间戳生成时间元组
        date = time.strftime("%Y-%m-%d",
예제 #26
0
    def columnToFloat(df,columns):
        """
        @param: df: a dataframe
        @param: columns: a list of the colnames that need trans to float
        """
        for col in columns:
            df[col] = df[col].apply(lambda x:x.replace(",","").replace("%","")).astype(float)
        return df
    @staticmethod
    def dfColumnsToFloat(df): 
        new_table = dadanSina.columnToFloat(df,["total_trade_vol","total_trade_vol_ratio", \
                "total_trade_money","total_trade_money_ratio", \
                "avg_price","zhuli_buy_vol","zhongxing_vol","zhuli_sale_vol"])
        return new_table
if __name__=='__main__':
    from functions.dadan_sina.dadanSina import dadanSina
    dir_dadan = data_dict.get("dadan_sina_offline")
    now_date,now_date_time = get_the_datetime()
    url1 = "http://vip.stock.finance.sina.com.cn/quotes_service/view/cn_bill_sum.php?num=100000&page=1&sort=totalvolpct&asc=0&volume=40000&type=0&dpc=1"
    DF_columns = 11
    table,new_table_index = dadanSina.get_html_table(url1)
    new_table = dadanSina.table_to_DF(table,new_table_index,DF_columns)
    new_table.columns = setColname().dadan_sina()
    new_table['date_time'] = now_date_time
    new_table['stock_date'] = now_date
    new_table = dadanSina.dfColumnsToFloat(new_table)
    dadanSina.save_the_table(new_table,dir_dadan,now_date,now_date_time)



예제 #27
0
from davidyu_cfg import *
from functions.data_dir import data_dict,stk_index_list,create_dir_if_not_exist
from functions.get_datetime import *
from functions.run_combine_all_csv import *
#from functions.pyspark_david.pyspark_functions import *
#from functions.pyspark_david.get_day_history_data import *


dir_yjyq = data_dict.get("YeJiYuQi")
#now_date,now_date_time = get_the_datetime()
now_date = "2020_01_05"
data_dir = os.path.join(dir_yjyq,now_date)
df1 = combine_csv_in_folder_raw(data_dir)

df1.columns = ["index","stock_index","stock_name","yeji_predict","yeji_abstract","profit_change_ratio",
        "profit_change","date"]
df2 = df1[(df1["yeji_predict"]=='业绩大幅上升')&(df1["date"]=="2020-01-04")]
stk_index_list = ['\''+str(x).zfill(6)+'\'' for x in df2.stock_index.tolist()]
stk_index_list = list(set(stk_index_list))
stk_index_list_str = ','.join(stk_index_list)
every_table = "stock_dev.day_history_insert"
start_date = "2020-01-04"
end_date = "2020-01-18"
df_history = get_data(every_table,start_date,end_date,stk_index_list_str)

#df_history.groupby('stock_index').apply(linear_REG)

data_vv = df_history.groupby('stock_index')
stock_ind = []
stock_slope = []
stock_row_len = []
예제 #28
0
    #df_f2 = df_f1.merge(df_kdj, left_index=True, right_index=True)
    df_max_min = df_max_min.reset_index()
    df_max_min = rolling_regression(df_max_min,regre_window,'date',regre_col)    
    #df_max_min['slope_5'] = linear_slope['slopes']
    df_max_min = df_max_min.reset_index().dropna()
    df_max_min['stock_date'] = df_max_min.reset_index()['date'].astype(str)
    ## combine data
    cols_to_use = df_max_min.columns.difference(stock_kdj_macd.columns).tolist()+['stock_date']
    df_f2 = pd.merge(stock_kdj_macd,df_max_min[cols_to_use],on="stock_date")
    return df_f2


if __name__ == "__main__":
    from functions.data_dir import data_dict,stk_index_list,create_dir_if_not_exist
    from functions.rolling_regression import *
    data_dir = data_dict.get("test")
    file_name = "60_data.csv"
    file_in = os.path.join(data_dir,file_name)
    df1 = pd.read_csv(file_in,sep="\t").iloc[300:,:]
    df1.columns = [x.split(".")[1] for x in df1.columns.tolist()]
    #df1['close'] = df1['adj_close']
    start_date = '2019-01-01'
    end_date = '2019-12-31'
    max_min_stat_window = 5
    regre_window = 5
    regre_col = 'close'
    df_out = []
    for name,group in df1.groupby('stock_index'):
        df2 = process(group,start_date,end_date,max_min_stat_window,regre_window,regre_col)
        df_out.append(df2)
            new_table.iat[row_marker, column_marker] = column.get_text()
            column_marker += 1
        row_marker += 1
    new_table = new_table.dropna(axis=0)
    return new_table


def save_the_table(new_table, dir_dadan, now_date, page):
    save_dir = os.path.join(dir_dadan, now_date)
    create_dir_if_not_exist(save_dir)
    save_file = os.path.join(save_dir, page + ".csv")
    new_table.to_csv(save_file, index=0)


if __name__ == '__main__':
    dir_dadan = data_dict.get("dadan_real_time_ifeng_1000")
    now_date, now_date_time = get_the_datetime()
    #now_date = '2020_06_19'
    import time
    import random
    #html1 = "http://app.finance.ifeng.com/hq/all_stock_bill.php"
    DF_columns = 10
    i = sys.argv[1]
    try:
        html1 = "http://app.finance.ifeng.com/hq/all_stock_bill.php?page=%s&by=hq_time&order=desc&amount=1000" % (
            str(i))
        table, new_table_index = get_html_table(html1)
        new_table = table_to_DF(table, new_table_index, DF_columns)
        new_table['date'] = now_date
        save_the_table(new_table, dir_dadan, now_date, str(i))
    except Exception as e:
예제 #30
0
from davidyu_cfg import *
from functions.run_combine_all_csv import *
from functions.data_dir import data_dict, stk_index_list, create_dir_if_not_exist

dir_fenhong = data_dict.get("fenhong")
save_dir_fenhong = tmp_data_dict.get("fenhong")
df1 = combine_csv_in_folder_raw(dir_fenhong)
df2 = df1[df1['除权除息日'] >= "2019-12-31"]

#df2['派息']/(df2['最新价']*100)

x1 = [float(x) if x != '停牌' else 999 for x in df2['最新价'].tolist()]
x2 = [float(x) if x != '--' else -1 for x in df2['派息'].tolist()]

df2['paixi'] = x2
df2['price'] = x1
df2['paixi_ratio'] = df2['paixi'] / df2['price']
df2['除权除息日']
df3 = df2[["股票代码", "股票简称", "除权除息日", "paixi_ratio"]]
save_file = os.path.join(save_dir_fenhong, "fenhong_data.csv")
df3.round(3).to_csv(save_file, index=0)

high_paixi_num = 30

df_sammple = df3.sort_values("paixi_ratio").tail(50)
high_fenhong_stock_list1 = df_sammple['股票代码']
high_fenhong_stock_list = [str(x).zfill(6) for x in high_fenhong_stock_list1]

#df2[df2['股票代码']==601216][["除权除息日","股权登记日"]]
###########################################################
###########################################################