예제 #1
0
def remove_temp_files():
    """删除日志、缓存文件"""
    dirs = ['geckordriver', 'webcache', 'download']
    for d in dirs:
        path = data_root(d)
        try:
            shutil.rmtree(path)
        except PermissionError:
            # 可能后台正在使用中,忽略
            pass
        # 然后再创建该目录
        data_root(d)
예제 #2
0
파일: base.py 프로젝트: liudengfeng/zipline
def bcolz_table_path(table_name):
    """bcolz文件路径"""
    root_dir = data_root('bcolz')
    if not os.path.exists(root_dir):
        os.makedirs(root_dir)
    path_ = os.path.join(root_dir, '{}.bcolz'.format(table_name))
    return path_
예제 #3
0
파일: cjmx.py 프로젝트: jingmouren/cnswd
def data_path(code, date_str):
    name = pd.Timestamp(date_str).strftime(r'%Y%m%d')
    data_dir = os.path.join(data_root('cjmx'), code)
    if not os.path.exists(data_dir):
        os.mkdir(data_dir)
    return os.path.join(data_dir, f'{name}.pkl')
예제 #4
0
import os
import sys
import pickle
import pandas as pd
from pandas.tseries.offsets import BDay, Week, MonthBegin, QuarterBegin, Hour, Minute, Second
from hashlib import md5
from six import iteritems
import logbook

from cnswd.constants import MARKET_START
from cnswd.utils import data_root

logger = logbook.Logger(__name__)

TEMP_DIR = data_root('webcache')

DEFAULT_TIME_STR = '18:00:00'  # 网站更新数据时间
DEFAULT_FREQ = 'D'


def hash_args(*args, **kwargs):
    """Define a unique string for any set of representable args."""
    arg_string = '_'.join([str(arg) for arg in args])
    kwarg_string = '_'.join([str(key) + '=' + str(value)
                             for key, value in iteritems(kwargs)])
    combined = ':'.join([arg_string, kwarg_string])
    hasher = md5()
    hasher.update(combined.encode('utf-8'))
    return hasher.hexdigest()
예제 #5
0
EARLIEST_POSSIBLE_DATE = pd.Timestamp('2002-1-4', tz='UTC')

DB_COLS_NAME = [
    'm0', 'm1', 'm2', 'm3', 'm6', 'm9', 'y1', 'y3', 'y5', 'y7', 'y10', 'y15',
    'y20', 'y30', 'y40', 'y50'
]
DB_INDEX_NAME = 'date'

OUTPUT_COLS_NAME = [
    '0month', '1month', '2month', '3month', '6month', '9month', '1year',
    '3year', '5year', '7year', '10year', '15year', '20year', '30year',
    '40year', '50year'
]
OUTPUT_INDEX_NAME = 'Time Period'
DATA_DIR = data_root('treasury')  # 在该目录存储国债利率数据


def read_local_data():
    """读取本地文件数据"""
    dfs = []
    for root, _, files in os.walk(DATA_DIR):
        for name in files:
            if name.endswith("xlsx"):
                file_path = os.path.join(root, name)
                df = pd.read_excel(file_path, index_col='日期', parse_dates=True)
                dfs.append(df)
    return pd.concat(dfs)


def download_last_year():
예제 #6
0
 def __init__(self, download_path=data_root('download')):
     self.host_url = 'http://www.sse.com.cn'
     logger.notice('初始化无头浏览器......')
     self.driver = make_headless_browser()
     self.wait = WebDriverWait(self.driver, MAX_WAIT_SECOND)
예제 #7
0
import time
import sys
import numpy as np
import pandas as pd

from cnswd.websource.exceptions import RetryException
from cnswd.utils import data_root, loop_period_by
from cnswd.websource.cninfo.constants import DB_NAME, DB_DATE_FREQ, TS_NAME, TS_DATE_FREQ
from cnswd.websource.cninfo.data_browse import DataBrowse
from cnswd.websource.cninfo.thematic_statistics import ThematicStatistics
from cnswd.sql.base import get_engine, get_session

from .base import DB_DATE_FIELD, DB_MODEL_MAPS, TS_DATE_FIELD, TS_MODEL_MAPS
from .units import fixed_data

record_path = os.path.join(data_root('record'), 'cninfo.csv')


def get_record(index):
    try:
        return pd.read_csv(record_path, index_col=0).loc[index].to_dict()
    except FileNotFoundError:
        df = pd.DataFrame(
            {
                '完成状态': '未执行',
                '尝试次数': 0,
                '完成时间': pd.Timestamp('now'),
                '备注': ''
            },
            index=[index])
        df.to_csv(record_path)