Beispiel #1
0
from dataprocess import RawDataProcessor
from util import CommonUtil
#     price_end_time), 2))
market_open_time = '09:30:00'
market_close_time = '23:30:00'
# print(CrawlerUtil.get_sample_time_list(market_open_time, market_close_time, 1))

time1 = '2016/06/06  23:30:00'
time2 = '2016/06/05  09:30:00'
# print(CrawlerUtil.get_interval_seconds(CrawlerUtil.get_datetime_from_string(time1), CrawlerUtil.get_datetime_from_string(time2)))
# dt_time = CrawlerUtil.get_datetime_from_string(time)
# dt_time_date = dt_time.date()
# print(str(dt_time_date))
# cur_time = str(CrawlerUtil.get_sample_time_list(market_open_time, market_close_time, 1)[0])
print(
    CommonUtil.get_next_sample_time(CommonUtil.get_datetime_from_string(time1),
                                    1, market_open_time, market_close_time))
Beispiel #2
0
def process_original_price():
    logger.info("In Process Original Price...")
    global originalPriceList
    originalPriceList = CommonUtil.read_csv(ORIGINAL_PRICE_PATH)
    sample_datetime = None
    sample_price_list = list()
    # 对每一个原始价格
    for original_price in originalPriceList:
        logger.debug('price time: ' + original_price[0])
        price_datetime = CommonUtil.get_datetime_from_string(original_price[0])
        price_value = float(original_price[1])
        if sample_datetime is None:
            sample_datetime = CommonUtil.get_datetime_from_string(
                PRICE_START_TIME)
        time_interval = CommonUtil.get_interval_seconds(
            price_datetime, sample_datetime)
        # 价格时间在采集区间外(价格对应时间远早于采集时刻点),取下一个价格
        if time_interval < -PRICE_SAMPLE_MINUTE * 60 / 2:
            continue
        # 如果当前时间超过采样区间(晚于),先计算上一个采样时间的平均价格,再寻找下一个采样点
        while time_interval >= PRICE_SAMPLE_MINUTE * 60 / 2:
            # 如果当前采样点有价格
            if len(sample_price_list) > 0:
                price_sum = 0
                for price_item in sample_price_list:
                    price_sum += price_item
                average_price = round(price_sum / len(sample_price_list),
                                      CURRENCY_PAIR_PRECISION + 2)
                sample_datetime_str = CommonUtil.get_string_from_datetime(
                    sample_datetime)
                average_price_item = [sample_datetime_str, average_price]
                # 将采样时间及对应的计算后的价格加入列表
                processedPriceList.append(average_price_item)
                # 重置采样点价格列表
                sample_price_list = list()
            # 计算下一个采样点
            sample_datetime = CommonUtil.get_next_sample_time(
                sample_datetime, PRICE_SAMPLE_MINUTE, MARKET_OPEN_TIME,
                MARKET_CLOSE_TIME)
            time_interval = CommonUtil.get_interval_seconds(
                price_datetime, sample_datetime)
        logger.debug('sample datetime:' +
                     CommonUtil.get_string_from_datetime(sample_datetime))
        # 价格时间在采集区间外
        if sample_datetime > CommonUtil.get_datetime_from_string(
                PRICE_END_TIME):
            break
        # 属于当前采样点,加入当前采样点价格列表,前闭后开[,)
        sample_price_list.append(price_value)
    # 处理最后一个采集时刻的价格列表
    # 如果当前采样点有价格
    if len(sample_price_list) > 0:
        price_sum = 0
        for price_item in sample_price_list:
            price_sum += price_item
        average_price = round(price_sum / len(sample_price_list),
                              CURRENCY_PAIR_PRECISION + 2)
        sample_datetime_str = CommonUtil.get_string_from_datetime(
            sample_datetime)
        average_price_item = [sample_datetime_str, average_price]
        # 将采样时间及对应的计算后的价格加入列表
        processedPriceList.append(average_price_item)
    file_path = PROCESSED_PRICE_PATH + '_' + str(
        PRICE_SAMPLE_MINUTE) + CSV_FILE_SUFFIX
    CommonUtil.write_csv(file_path, processedPriceList)
    logger.info("Process Original Price Done!")
def process_original_news_vec(seg_news_vec, st, et):
    MARKET_OPEN_TIME = '09:30:00'
    MARKET_CLOSE_TIME = '23:30:00'
    NEWS_SAMPLE_MINUTE = 60
    processedNewsList = []
    # seg_news_vec = CommonUtil.read_csv('./doc/1209/news_sentivalue.csv')
    NEWS_START_TIME = st
    NEWS_END_TIME = et
    sample_datetime = None
    sample_news_list = []
    # 对每一个原始价格
    for original_vec in seg_news_vec:
        news_datetime = CommonUtil.get_datetime_from_string_(original_vec[0])
        # print(news_datetime)
        news_vec = original_vec[1::]
        if sample_datetime is None:
            sample_datetime = CommonUtil.get_datetime_from_string_(
                NEWS_START_TIME)
        time_interval = CommonUtil.get_interval_seconds(
            news_datetime, sample_datetime)
        # 价格时间在采集区间外(价格对应时间远早于采集时刻点),取下一个价格
        if time_interval < -NEWS_SAMPLE_MINUTE * 60 / 2:
            continue
        # 如果当前时间超过采样区间(晚于),先计算上一个采样时间的平均价格,再寻找下一个采样点
        while time_interval >= NEWS_SAMPLE_MINUTE * 60 / 2:
            # 如果当前采样点有价格
            if len(sample_news_list) > 0:
                vec_sum = {}
                for news_item in sample_news_list:
                    # print("news_item:", news_item, "vec_sum:", vec_sum)
                    if news_item[0] in vec_sum.keys():
                        vec_sum[news_item[0]] += float(news_item[1])
                    else:
                        vec_sum[news_item[0]] = float(news_item[1])
                news_sentiment_list = dict_to_vec(vec_sum)
                sample_datetime_str = CommonUtil.get_string_from_datetime(
                    sample_datetime)
                average_news_item = [sample_datetime_str] + news_sentiment_list
                # 将采样时间及对应的计算后的价格加入列表
                processedNewsList.append(average_news_item)
                # 重置采样点价格列表
                sample_news_list = []
            # 计算下一个采样点
            sample_datetime = CommonUtil.get_next_sample_time(
                sample_datetime, NEWS_SAMPLE_MINUTE, MARKET_OPEN_TIME,
                MARKET_CLOSE_TIME)
            time_interval = CommonUtil.get_interval_seconds(
                news_datetime, sample_datetime)
        # 价格时间在采集区间外
        if sample_datetime > CommonUtil.get_datetime_from_string_(
                NEWS_END_TIME):
            break
        # 属于当前采样点,加入当前采样点价格列表,前闭后开[,)
        sample_news_list.append(news_vec)
    # 处理最后一个采集时刻的价格列表
    # 如果当前采样点有价格
    if len(sample_news_list) > 0:
        vec_sum = {}
        for news_item in sample_news_list:
            if news_item[0] in vec_sum.keys():
                vec_sum[news_item[0]] += float(news_item[1])
            else:
                vec_sum[news_item[0]] = float(news_item[1])
        news_sentiment_list = dict_to_vec(vec_sum)
        sample_datetime_str = CommonUtil.get_string_from_datetime(
            sample_datetime)
        average_news_item = [sample_datetime_str] + news_sentiment_list
        # 将采样时间及对应的计算后的价格加入列表
        processedNewsList.append(average_news_item)
    # file_path = PROCESSED_NEWS_PATH + '_' + str(NEWS_SAMPLE_MINUTE) + CSV_FILE_SUFFIX
    # CommonUtil.write_csv(file_path, processedNewsList)
    # 89个维度对应数据
    # print("processedNewsList:", processedNewsList)
    list_out = dimension89(processedNewsList)
    return list_out
def process_original_price(originalPriceList, PRICE_START_TIME,
                           PRICE_END_TIME):
    # start_time and end_time 要注意时间区域(ORIGINAL_PRICE表格中的起始结束时间)
    # PRICE_START_TIME = originalPriceList[0][0]  #  '2016/06/30  09:30:00'
    # PRICE_END_TIME = originalPriceList[-1][0]  #  '2017/12/29  23:27:00'
    PRICE_SAMPLE_MINUTE = 60
    CURRENCY_PAIR_PRECISION = 4
    # 开市时间
    MARKET_OPEN_TIME = '09:30:00'
    # 闭市时间
    MARKET_CLOSE_TIME = '23:30:00'
    # 预处理后价格列表:[2018/6/30 15:00:00, 6.6433]
    processedPriceList = list()
    CSV_FILE_SUFFIX = '.csv'

    # logger.info("In Process Original Price...")
    # global originalPriceList
    # originalPriceList = CommonUtil.read_csv(ORIGINAL_PRICE_PATH)
    sample_datetime = None
    sample_price_list = list()
    # 对每一个原始价格
    for original_price in originalPriceList:
        #logger.debug('price time: ' + original_price[0])
        price_datetime = CommonUtil.get_datetime_from_string_(
            original_price[0])
        # print(original_price[1])
        if original_price[1] == '':
            print('null')
        price_value = float(original_price[1])
        if sample_datetime is None:
            sample_datetime = CommonUtil.get_datetime_from_string_(
                PRICE_START_TIME)
        time_interval = CommonUtil.get_interval_seconds(
            price_datetime, sample_datetime)
        # 价格时间在采集区间外(价格对应时间远早于采集时刻点),取下一个价格
        if time_interval < -PRICE_SAMPLE_MINUTE * 60 / 2:
            continue
        # 如果当前时间超过采样区间(晚于),先计算上一个采样时间的平均价格,再寻找下一个采样点
        while time_interval >= PRICE_SAMPLE_MINUTE * 60 / 2:
            # 如果当前采样点有价格
            if len(sample_price_list) > 0:
                price_sum = 0
                for price_item in sample_price_list:
                    price_sum += price_item
                average_price = round(price_sum / len(sample_price_list),
                                      CURRENCY_PAIR_PRECISION + 2)
                sample_datetime_str = CommonUtil.get_string_from_datetime(
                    sample_datetime)
                average_price_item = [sample_datetime_str, average_price]
                # 将采样时间及对应的计算后的价格加入列表
                processedPriceList.append(average_price_item)
                # 重置采样点价格列表
                sample_price_list = list()
            # 计算下一个采样点
            sample_datetime = CommonUtil.get_next_sample_time(
                sample_datetime, PRICE_SAMPLE_MINUTE, MARKET_OPEN_TIME,
                MARKET_CLOSE_TIME)
            time_interval = CommonUtil.get_interval_seconds(
                price_datetime, sample_datetime)
        #logger.debug('sample datetime:' + CommonUtil.get_string_from_datetime(sample_datetime))
        # 价格时间在采集区间外
        if sample_datetime > CommonUtil.get_datetime_from_string_(
                PRICE_END_TIME):
            break
        # 属于当前采样点,加入当前采样点价格列表,前闭后开[,)
        sample_price_list.append(price_value)
    # 处理最后一个采集时刻的价格列表
    # 如果当前采样点有价格
    if len(sample_price_list) > 0:
        price_sum = 0
        for price_item in sample_price_list:
            price_sum += price_item
        average_price = round(price_sum / len(sample_price_list),
                              CURRENCY_PAIR_PRECISION + 2)
        sample_datetime_str = CommonUtil.get_string_from_datetime(
            sample_datetime)
        average_price_item = [sample_datetime_str, average_price]
        # 将采样时间及对应的计算后的价格加入列表
        processedPriceList.append(average_price_item)
    return processedPriceList