Esempio n. 1
0
    def file_processing(csv_file_name):
        trading_df = pd.read_csv(os.path.join(CRAWLING_TARGET_PATH, csv_file_name),
                                 delimiter=',', encoding='CP949', names=COLUMN_NAMES,
                                 skiprows=[0])

        trading_df = trading_df.fillna(0)
        trading_df['date'] = parse(str(re.findall('\d{8}', csv_file_name)[0])).date()
        trading_df['m_type'] = trading_df['m_type']\
            .apply(lambda m_type: m_type_dict[str(m_type)])
        trading_df = trading_df.drop(['m_dept'], axis=1)

        smdw.insert(trading_df)

        shutil.move(os.path.join(CRAWLING_TARGET_PATH, csv_file_name),
                    os.path.join(CRAWLING_BACKUP_PATH, csv_file_name))
Esempio n. 2
0
def insert_company_from_market():
    se = StartEndLogging()

    scw.delete()
    market_qs = smdw.gets(date=smdw.get_date(is_add_one=False))
    log.debug(f'market data size: {len(market_qs)}')

    company_objects = []
    for market_data in market_qs:
        company_object = scw.make_object(market_data)
        if company_object is not None:
            company_objects.append(company_object)
    scw.insert(company_objects)

    se.end()
Esempio n. 3
0
def get_krx_crawling_period(s_date, e_date, is_that_day=False):
    t_date = smdw.get_date(is_add_one=True)
    try:
        if s_date.lower() == 'all':
            cs_date = parse(BASE_TRADING_DATE).date()
        else:
            cs_date = parse(s_date).date()
            if not is_that_day:
                cs_date = max([cs_date, t_date])
    except:
        cs_date = t_date

    today = datetime.now().date()
    curr_time = datetime.now().time()
    try:
        ce_date = parse(e_date).date()
    except:
        ce_date = datetime.now().date()

    if ce_date > today:
        ce_date = today
    elif ce_date == today and curr_time < parse(BASE_CRAWLING_TIME).time():
        ce_date = ce_date - timedelta(days=1)
    ce_date = max([cs_date, ce_date])
    return cs_date, ce_date
Esempio n. 4
0
def insert():
    trading_df = pd.read_csv(os.path.join(CRAWLING_TARGET_PATH,
                                          '20210414.csv'),
                             delimiter=',',
                             encoding='CP949',
                             names=COLUMN_NAMES,
                             skiprows=[0])

    trading_df = trading_df.fillna(0)
    trading_df['date'] = parse(str(re.findall('\d{8}',
                                              csv_file_name)[0])).date()
    trading_df['m_type'] = trading_df['m_type'] \
        .apply(lambda m_type: m_type_list[str(m_type)])
    trading_df = trading_df.drop(['m_dept'], axis=1)

    smdw.insert(trading_df)
Esempio n. 5
0
    def get_normal_marketdata(com_code):
        company_market_qs = smdw.gets(com_code=com_code)
        first_data = company_market_qs.first()
        yesterday_data, first_normal_data = first_data, first_data
        if first_data.t_volume != 0:
            diff_ratio = company_market_qs.last(
            ).t_volume / first_data.t_volume
        else:
            # 첫번째 t_volume값이 0이기 때문에 전체 변동량 체크가 불가능함.
            # 따라서 세부 체크가 수행될 수 있도록 diff_ratio를 설정함
            diff_ratio = TOTAL_CHECK_MAX_RATIO + 1.

        if diff_ratio > TOTAL_CHECK_MAX_RATIO or diff_ratio < TOTAL_CHECK_MIN_RATIO:
            for market_data in company_market_qs[
                    1:]:  # 첫번째 값은 위에서 first()을 이용해 이미 사용
                if yesterday_data.t_volume != 0 and market_data.t_volume != 0:
                    diff_ratio = market_data.t_volume / yesterday_data.t_volume
                    if diff_ratio > DAY_CHECK_MAX_RATIO or diff_ratio < DAY_CHECK_MIN_RATIO:
                        first_normal_data = market_data
                    # 당일 거래량이 없는 종목은 감자/증자 대상으로 간주한다.
                    if market_data.volume == 0:
                        first_normal_data = market_data
                yesterday_data = market_data

        normal_qs = company_market_qs.filter(date__gte=first_normal_data.date)
        log.debug(f'com_code: {com_code},  modeling data size: '
                  f'total({len(company_market_qs)}), normal({len(normal_qs)})')
        return normal_qs
Esempio n. 6
0
def insert_marketdata_from_mergefile():
    insert_se = StartEndLogging('insert marketdata')

    col_names = ['com_code', 'com_name', 'm_type', 'm_dept', 'close',
                 'diff', 'ratio', 'open', 'high', 'low', 'volume',
                 'value', 't_value', 't_volume', 'date']
    # merge_reader = pd.read_csv(MERGE_FILE_PATH, encoding='CP949', low_memory=False,
    #                            names=col_names, chunksize=3000, skiprows=[0])
    #
    # loop_cnt = 1
    # for merge_df in merge_reader:
    #     merge_df = merge_df.fillna(0)
    #     smdw.insert(merge_df)
    #     insert_se.mid(f'{loop_cnt * 3000}')
    #     loop_cnt += 1

    smdw.delete()

    insert_se.end()
Esempio n. 7
0
def update_company_from_market():
    se = StartEndLogging()

    market_qs = smdw.gets(date=smdw.get_date(is_min=False, is_add_one=False))
    company_df = read_frame(scw.gets('id'))
    log.debug(f'market data size: {len(market_qs)}, company data size: {len(company_df)}')

    company_objects = []
    for market_data in market_qs:
        company = company_df[company_df['com_code']==market_data.com_code]
        if company.empty:
            company_object = scw.make_object(market_data)
        else:
            company_object = scw.make_object(company, market_data)
        if company_object is not None:
            company_objects.append(company_object)

    scw.insert(company_objects)

    se.end()
Esempio n. 8
0
def update_modelingdata_from_market():
    """
    daily 작업에서는 추가되는 회사 데이터의 건수에 대한 카운팅은 무시하고
    업데이트 되지 않은 마켓 데이터를 일괄로 추가한다.
    - 이 작업은 시스템 초기화나 주간 작업에서만 수행한다.
    :return: 없음
    """
    se = StartEndLogging()

    try:
        data_df = read_frame(smdw.gets('date', date__gt=smlw.get_date()))
        smlw.insert(data_df[['date','com_code']+MODELING_COLUMNS])
    except Exception as e:
        log.error(e)
        sys.exit()

    se.end()
Esempio n. 9
0
def delete():
    smdw.delete()