Esempio n. 1
0
    def run(self):
        self.prepareData()
        if self.state == '':  # already the latest data
            return

        # fetch data from sql
        dataO = readDB(self.state, ConfigQuant)
        dataO = dataO.drop_duplicates([self.dateField])
        dataO = dataO.sort_values(self.dateField)

        data_result = pd.DataFrame(dataO[self.dateField])
        tmp_df = priceTechnicalIndicatorTimeSeries(dataO[self.valueField],
                                                   self.lags,
                                                   'PRICE')  # MA, MA_DIFF...
        data_result = data_result.join(tmp_df)

        # trim data for increment
        if self.last_update_date != '':
            data_result = data_result.loc[
                data_result[self.dateField] > self.last_update_date]

        # add timestamp
        data_result['time_stamp'] = datetime.now()

        # dump data into sql
        writeDB(self.targetTableName, data_result, ConfigQuant, self.if_exist)
Esempio n. 2
0
def calFullArea(db_config,
                con_area,
                con_quote,
                con_weights,
                chunk_size,
                start_date='2007-01-01'):
    # create sql engine
    my_engine = create_engine(
        'mysql+pymysql://{user}:{password}@{host}/{db}?charset={charset}'.
        format(**db_config))

    # get area    ************ future data, need to find better solution
    sql_statement = "SELECT `{code}`, `{area}`  FROM `{tableName}`;".format(
        **con_area)
    stock_area = pd.read_sql(sql_statement, my_engine)
    stock_area = stock_area.rename(columns={
        con_area['code']: 'code',
        con_area['area']: 'area'
    })

    # get total trade dates
    trade_dates = getTradeDates(my_engine, con_quote['date'],
                                con_quote['tableName'], start_date)

    # read and process data by trade dates
    quote_fields = list(map(lambda x: '`%s`' % x, con_quote['fields']))
    quote_fields = ','.join(quote_fields)
    write_sql_method = 'replace'
    for i in range(int(trade_dates.size / chunk_size) + 1):
        tmp_trade_dates = trade_dates[i * chunk_size:(i + 1) * chunk_size]
        tmp_trade_dates = list(map(lambda x: "'%s'" % x, tmp_trade_dates))
        date_range = ','.join(tmp_trade_dates)

        # get quote data
        basic_data = getDataFromSQL(my_engine, con_quote['date'],
                                    con_quote['code'], quote_fields,
                                    con_quote['tableName'], date_range)

        # get weights
        weighted_field = '`%s`' % con_weights['weight']
        weights = getDataFromSQL(my_engine, con_weights['date'],
                                 con_weights['code'], weighted_field,
                                 con_weights['tableName'], date_range)
        weights = weights.rename(columns={con_weights['weight']: 'weight'})

        basic_data = basic_data.merge(weights,
                                      on=['date', 'code'],
                                      how='inner')
        basic_data = basic_data.merge(stock_area, on='code', how='inner')

        # calculate weighted sum index quote
        area_index_quote = calWeightedSumIndexQuote(basic_data,
                                                    con_quote['fields'],
                                                    'date', 'area', 'weight')

        # add timestamp
        area_index_quote[targetNewTimeStampField] = datetime.now()

        writeDB(targetTableName, area_index_quote, db_config, write_sql_method)
        write_sql_method = 'append'
Esempio n. 3
0
def calIncrmArea(db_config,
                 con_area,
                 con_quote,
                 con_weights,
                 chunk_size,
                 start_date='2007-01-01'):
    # create sql engine
    my_engine = create_engine(
        'mysql+pymysql://{user}:{password}@{host}/{db}?charset={charset}'.
        format(**db_config))

    write_sql_method = 'append'

    # get quote data (trim by timestamp)
    quote_fields = list(map(lambda x: '`%s`' % x, con_quote['fields']))
    quote_fields = ','.join(quote_fields)
    basic_data = getIncrmDataFromSQL(my_engine, con_quote['date'],
                                     con_quote['code'], quote_fields,
                                     con_quote['tableName'], targetTableName,
                                     con_quote['date'], targetDateField)

    # already the latest data
    if basic_data.empty:
        return

    # get weights (trim by timestamp)
    weight_field = '`%s`' % con_weights['weight']
    weights = getIncrmDataFromSQL(my_engine, con_weights['date'],
                                  con_weights['code'], weight_field,
                                  con_weights['tableName'], targetTableName,
                                  con_weights['date'], targetDateField)
    weights = weights.rename(columns={con_weights['weight']: 'weight'})

    # get area    ************ future data, need to find better solution
    sql_statement = "SELECT `{code}`, `{area}`  FROM `{tableName}`;".format(
        **con_area)
    stock_area = pd.read_sql(sql_statement, my_engine)
    stock_area = stock_area.rename(columns={
        con_area['code']: 'code',
        con_area['area']: 'area'
    })

    basic_data = basic_data.merge(weights, on=['date', 'code'], how='inner')
    basic_data = basic_data.merge(stock_area, on='code', how='inner')

    area_index_quote = calWeightedSumIndexQuote(basic_data,
                                                con_quote['fields'], 'date',
                                                'area', 'weight')

    area_index_quote[targetNewTimeStampField] = datetime.now()

    if not area_index_quote.empty:
        writeDB(targetTableName, area_index_quote, db_config, write_sql_method)
def calFullIndustry(db_config, con_industry, con_quote, con_weights, chunk_size, start_date = '2007-01-01'):
    # create sql engine
    my_engine = create_engine('mysql+pymysql://{user}:{password}@{host}/{db}?charset={charset}'.format(**db_config))

    # get total trade dates
    trade_dates = getTradeDates(my_engine, con_quote['date'], con_quote['tableName'], start_date)

    # read and process data by trade dates
    quote_fields = list(map(lambda x: '`%s`' % x, con_quote['fields']))
    quote_fields = ','.join(quote_fields)
    write_sql_method = 'replace'
    for i in range(int(trade_dates.size / chunk_size) + 1):
        tmp_trade_dates = trade_dates[i*chunk_size : (i+1)*chunk_size]
        tmp_trade_dates = list(map(lambda x: "'%s'" % x, tmp_trade_dates))
        date_range = ','.join(tmp_trade_dates)

        # get quote data
        basic_data = getDataFromSQL(my_engine, con_quote['date'], con_quote['code'], quote_fields,
                                    con_quote['tableName'], date_range)
        # get weights
        weight_field = '`%s`' % con_weights['weight']
        weights = getDataFromSQL(my_engine, con_weights['date'], con_weights['code'], weight_field,
                                 con_weights['tableName'], date_range)
        weights = weights.rename(columns={con_weights['weight']: 'weight'})

        # get industry
        industry_field = '`%s`' % con_industry['industry']
        industry = getDataFromSQL(my_engine, con_industry['date'], con_weights['code'], industry_field,
                                  con_industry['tableName'], date_range)
        industry = industry.rename(columns={con_industry['industry']: 'industry'})
        # tot_ind = industry['industry'].unique()

        basic_data = basic_data.merge(weights, on=['date', 'code'], how='inner')
        basic_data = basic_data.merge(industry, on=['date', 'code'], how='inner')

        # calculate index quote
        industry_index_quote = calWeightedSumIndexQuote(basic_data, con_quote['fields'], 'date', 'industry', 'weight')

        # add timestamp
        industry_index_quote[targetTimeStampField] = datetime.now()

        # dump data to sql
        writeDB(targetTableName, industry_index_quote, db_config, write_sql_method)
        write_sql_method = 'append'
Esempio n. 5
0
    def run(self):
        self.prepareData()
        if self.state == '':  # already the latest data
            return

        # fetch data from sql
        dataO = readDB(self.state, ConfigQuant)
        dataO = dataO.drop_duplicates([self.dateField])
        dataO = dataO.sort_values(self.dateField)  # sort by date
        dataO = dataO.fillna(method='ffill')
        # for field in [self.openField, self.highField, self.lowField, self.closeField, self.volumeField]:
        #     dataO.loc[:, field] = dataO[field].astype('float')

        data_result = pd.DataFrame(dataO[self.dateField])
        # MA, MA_DIFF...
        tmp_df = priceTechnicalIndicator(dataO[self.closeField], self.lags, '')
        data_result = data_result.join(tmp_df)

        # ADX, CCI ...
        tmp_df = priceTechnicalIndicatorOHLCV(dataO[self.openField],
                                              dataO[self.highField],
                                              dataO[self.lowField],
                                              dataO[self.closeField],
                                              dataO[self.volumeField])
        data_result = data_result.join(tmp_df)

        # VMA, VMA_DIFF..., OBV, VOLUME_RATIO...
        tmp_df = volumeTechnicalIndicators(dataO[self.closeField],
                                           dataO[self.volumeField], self.lags,
                                           'V')
        data_result = data_result.join(tmp_df)

        # trim data for increment
        if self.last_update_date != '':
            data_result = data_result.loc[
                data_result[self.dateField] > self.last_update_date]

        # add timestamp
        data_result['time_stamp'] = datetime.now()

        # dump data into sql
        writeDB(self.targetTableName, data_result, ConfigQuant, self.if_exist)
def calIncrmIndustry(db_config, con_industry, con_quote, con_weights, chunk_size, start_date = '2007-01-01'):
    # create sql engine
    my_engine = create_engine('mysql+pymysql://{user}:{password}@{host}/{db}?charset={charset}'.format(**db_config))

    # incremtental to database
    write_sql_method = 'append'

    # get quote data (trim by timestamp)
    quote_fields = list(map(lambda x: '`%s`' % x, con_quote['fields']))
    quote_fields = ','.join(quote_fields)
    basic_data = getIncrmDataFromSQL(my_engine, con_quote['date'], con_quote['code'], quote_fields,
                                con_quote['tableName'], targetTableName, con_quote['date'], targetDateField)

    # already the latest data
    if basic_data.empty:
        return

    # get weights (trim by timestamp)
    weight_field = '`%s`' % con_weights['weight']
    weights = getIncrmDataFromSQL(my_engine, con_weights['date'], con_weights['code'], weight_field,
                             con_weights['tableName'], targetTableName, con_weights['date'], targetDateField)
    weights = weights.rename(columns={con_weights['weight']: 'weight'})

    # get industry (trim by timestamp)
    ind_field = '`%s`' % con_industry['industry']
    industry = getIncrmDataFromSQL(my_engine, con_industry['date'], con_weights['code'], ind_field,
                                  con_industry['tableName'], targetTableName, con_industry['date'], targetDateField)
    industry = industry.rename(columns={con_industry['industry']: 'industry'})

    basic_data = basic_data.merge(weights, on=['date', 'code'], how='inner')
    basic_data = basic_data.merge(industry, on=['date', 'code'], how='inner')

    # calculate index quote
    industry_index_quote = calWeightedSumIndexQuote(basic_data, con_quote['fields'], 'date', 'industry', 'weight')

    # add timestamp
    industry_index_quote[targetTimeStampField] = datetime.now()

    # dump data to sql1
    writeDB(targetTableName, industry_index_quote, db_config, write_sql_method)
Esempio n. 7
0
    def runFull(self):
        # get total code list
        tmp_state = 'select distinct %s from %s' % (self.codeField,
                                                    self.sourceTableName)
        code_list = readDB(tmp_state, ConfigQuant).values
        code_list = code_list.T[0]

        # get trade date list from calendar
        tmp_state = 'select `date` from %s' % self.calendarTableName
        tradedates = readDB(tmp_state, ConfigQuant)['date']

        # calculate num of loop
        loop_num = int(code_list.size / self.chunkSize)
        if code_list.size > loop_num * self.chunkSize:
            loop_num = loop_num + 1

        if self.isMultiProcess:  # use multi processing
            # register pool
            pool = Pool(processes=self.processNum)
            # fetch and process data from sql by chunk
            for i in range(loop_num):
                tmp_code = code_list[i * self.chunkSize:(i + 1) *
                                     self.chunkSize]
                tmp_code_str = list(map(lambda x: "'%s'" % x, tmp_code))
                tmp_range = ','.join(tmp_code_str)
                tmp_state = self.state + "`%s` in (%s)" % (self.codeField,
                                                           tmp_range)
                dataO = readDB(tmp_state, ConfigQuant)
                dataO.loc[:, self.reportPeriodField] = dataO[
                    self.yearField] * 100 + dataO[
                        self.seasonField]  # combine report year and season
                dataO = dataO.drop_duplicates(
                    [self.codeField, self.reportPeriodField])
                dataO = dataO.sort_values(
                    self.reportPeriodField)  # sort by report period

                # process chunk data
                pool_results = []
                for code in tmp_code:
                    tmp_data = dataO.loc[dataO[self.codeField] ==
                                         code]  # dataO already sorted by date

                    if tmp_data.empty:
                        continue

                    # multiprocessing
                    tmp_procs = pool.apply_async(self.coreComputation,
                                                 (code, tmp_data, tradedates))
                    pool_results.append(tmp_procs)

                # get result from the process pool
                data_tot_result = pd.DataFrame([])
                for tmp_procs in pool_results:
                    data_result = tmp_procs.get()
                    data_tot_result = data_tot_result.append(data_result)

                # add timestamp
                data_tot_result['time_stamp'] = datetime.now()

                if data_tot_result.empty:
                    continue

                # dump chunk data into sql
                writeDB(self.targetTableName, data_tot_result, ConfigQuant,
                        self.if_exist)
                self.if_exist = 'append'

            pool.close()
        else:  # not multiprocess
            # fetch and process data from sql by chunk
            for i in range(loop_num):
                tmp_code = code_list[i * self.chunkSize:(i + 1) *
                                     self.chunkSize]
                tmp_code_str = list(map(lambda x: "'%s'" % x, tmp_code))
                tmp_range = ','.join(tmp_code_str)
                tmp_state = self.state + "`%s` in (%s)" % (self.codeField,
                                                           tmp_range)
                dataO = readDB(tmp_state, ConfigQuant)
                dataO.loc[:, self.reportPeriodField] = dataO[
                    self.yearField] * 100 + dataO[
                        self.seasonField]  # combine report year and season
                dataO = dataO.drop_duplicates(
                    [self.codeField, self.reportPeriodField])
                # dataO = dataO.sort_values(self.reportPeriodField)  # sort by report period

                data_tot_result = pd.DataFrame([])
                for code in tmp_code:
                    tmp_data = dataO.loc[dataO[self.codeField] ==
                                         code]  # dataO already sorted by date
                    tmp_data = tmp_data.sort_values(
                        self.reportPeriodField)  # sort by report period

                    if tmp_data.empty:
                        continue

                    data_result = self.coreComputation(code, tmp_data,
                                                       tradedates)

                    data_tot_result = data_tot_result.append(data_result)

                # add timestamp
                data_tot_result['time_stamp'] = datetime.now()

                if data_tot_result.empty:
                    continue

                # dump chunk data into sql
                writeDB(self.targetTableName, data_tot_result, ConfigQuant,
                        self.if_exist)
                self.if_exist = 'append'
Esempio n. 8
0
    def runIncrm(self):
        # fetch and process all incremental data from sql
        dataO = readDB(self.state, ConfigQuant)
        dataO.loc[:, self.reportPeriodField] = dataO[
            self.yearField] * 100 + dataO[self.seasonField]
        dataO = dataO.drop_duplicates([self.codeField, self.reportPeriodField])
        # dataO = dataO.sort_values(self.reportPeriodField) # sort by date

        # get calendar
        tmp_state = "select `date` from `%s`;" % self.calendarTableName
        trade_calendar = readDB(tmp_state, ConfigQuant)
        trade_calendar = trade_calendar['date']

        # get latest time stamp for each stock in the target table
        tmp_state = "select `%s`, max(`%s`) as %s from `%s` group by `%s`" % (
            self.codeField, self.timeStampField, self.latestTimeStamp,
            self.targetTableName, self.codeField)
        target_latest_time_stamp = readDB(tmp_state, ConfigQuant)
        target_latest_time_stamp = target_latest_time_stamp.set_index(
            self.codeField)

        # get the latest trade date of the data in target table
        tmp_state = "select max(`%s`) from `%s`" % (self.dateField,
                                                    self.targetTableName)
        target_latest_trade_date = readDB(tmp_state, ConfigQuant)
        target_latest_trade_date = target_latest_trade_date.iloc[0, 0]

        # process incremental data
        code_list = dataO[self.codeField].unique()

        data_tot_result = pd.DataFrame([])
        # use multiprocessing to improve computation hours
        if self.isMultiProcess:
            pool = Pool(processes=self.processNum)
            pool_results = []
            pool_data_first_date = []
            no_update_code_list = []
            # build pool
            for code in code_list:
                tmp_data = dataO.loc[dataO[self.codeField] == code]
                tmp_data = tmp_data.sort_values(
                    self.reportPeriodField)  #  sorted by report period

                if tmp_data.empty:
                    continue

                # find the latest time stamp, and compare it with the raw data, delete obsolete data if there exists
                is_new_data = False
                try:
                    tmp_target_latest_time_stamp = target_latest_time_stamp.loc[
                        code, self.latestTimeStamp]
                    tmp_data_source_new = tmp_data.loc[tmp_data[
                        self.timeStampField] >= tmp_target_latest_time_stamp]
                    tmp_data_source_unexpanded = tmp_data.loc[tmp_data[
                        self.releaseDateField] > target_latest_trade_date]
                    tmp_data_source_new = tmp_data_source_new.append(
                        tmp_data_source_unexpanded)
                    tmp_data_source_new = tmp_data_source_new.drop_duplicates(
                        [self.codeField, self.yearField, self.seasonField])

                    tmp_data_source_new = tmp_data_source_new.loc[
                        ~tmp_data_source_new[self.releaseDateField].isnull()]

                    if not tmp_data_source_new.empty:  # obsolete data
                        data_new_first_data = tmp_data_source_new[
                            self.releaseDateField].min(
                            )  # find the earliest report in new update data

                        if type(
                                data_new_first_data
                        ).__name__ == 'str':  # else data_new_first_data is nan
                            is_new_data = True
                            deleteObsoleteDataFromDB(
                                code, data_new_first_data, self.dateField,
                                self.codeField, self.targetTableName,
                                ConfigQuant
                            )  # delete obsolet data not earlier than the eariliest report date
                except KeyError:
                    is_new_data = True
                    data_new_first_data = '2007-01-01'  # this stock code is new to the target table

                if is_new_data:  # have values updated or completely new
                    tmp_result = pool.apply_async(
                        self.coreComputation, (code, tmp_data, trade_calendar))
                    pool_results.append(tmp_result)
                    data_new_first_data = min(
                        data_new_first_data, trade_calendar[
                            trade_calendar > target_latest_trade_date].iloc[0])
                    pool_data_first_date.append(data_new_first_data)
                else:  # no new data from source table to update target table
                    no_update_code_list.append(code)

            # get result from the pool
            for tmp_result, tmp_first_date in zip(pool_results,
                                                  pool_data_first_date):
                data_result = tmp_result.get()
                data_result = data_result.loc[
                    data_result[self.dateField] >=
                    tmp_first_date]  # slice data, from the earliest report release date
                print('%s regenerate %d data' % (code, data_result.shape[0]))
                data_tot_result = data_tot_result.append(data_result)

            # replicate the latest records for those not updated codes
            replicate_records = self.replicateLatestRecord(
                no_update_code_list, trade_calendar, target_latest_trade_date)
            data_tot_result = data_tot_result.append(replicate_records)

        else:  # single process
            no_update_code_list = []
            for code in code_list:
                tmp_data = dataO.loc[dataO[self.codeField] == code]
                tmp_data = tmp_data.sort_values(
                    self.reportPeriodField)  # sorted by report period

                if tmp_data.empty:
                    continue

                # find the latest time stamp, and compare it with the raw data, delete obsolete data if there exists
                has_new_data = False
                try:
                    tmp_target_latest_time_stamp = target_latest_time_stamp.loc[
                        code, self.latestTimeStamp]
                    tmp_data_source_new = tmp_data[tmp_data[
                        self.timeStampField] >= tmp_target_latest_time_stamp]
                    tmp_data_source_unexpanded = tmp_data[tmp_data[
                        self.releaseDateField] > target_latest_trade_date]
                    tmp_data_source_new = tmp_data_source_new.append(
                        tmp_data_source_unexpanded)
                    tmp_data_source_new = tmp_data_source_new.drop_duplicates(
                        [self.codeField, self.yearField, self.seasonField])

                    if not tmp_data_source_new.empty:  # obsolete data
                        tmp_data_new_first_data = tmp_data_source_new[
                            self.releaseDateField].min(
                            )  # find the earliest report in new update data
                        if type(
                                tmp_data_new_first_data
                        ).__name__ == 'str':  # else tmp_data_new_first_data is nan
                            has_new_data = True
                            deleteObsoleteDataFromDB(
                                code, tmp_data_new_first_data, self.dateField,
                                self.codeField, self.targetTableName,
                                ConfigQuant
                            )  # delete obsolet data later than the eariliest report date
                except KeyError:
                    has_new_data = True  # this stock code is new to the target table
                    tmp_data_new_first_data = '2007-01-01'

                if has_new_data:
                    data_result = self.coreComputation(code, tmp_data,
                                                       trade_calendar)
                    tmp_data_new_first_data = min(
                        tmp_data_new_first_data, trade_calendar[
                            trade_calendar > target_latest_trade_date].iloc[0])
                    data_result = data_result.loc[
                        data_result[self.dateField] >= tmp_data_new_first_data]
                    print('%s regenerate %d data' %
                          (code, data_result.shape[0]))
                    data_tot_result = data_tot_result.append(data_result)
                else:  # no new data from source table to update target table
                    no_update_code_list.append(code)

            # replicate latest records for those not updated codes
            replicate_records = self.replicateLatestRecord(
                no_update_code_list, trade_calendar, target_latest_trade_date)
            data_tot_result = data_tot_result.append(replicate_records)

        if not data_tot_result.empty:
            # add timestamp
            data_tot_result['time_stamp'] = datetime.now()

            # dump chunk data into sql
            writeDB(self.targetTableName, data_tot_result, ConfigQuant,
                    self.if_exist)
            self.if_exist = 'append'
    def run(self, startDate='2007-01-01'):
        self.prepareData()
        if self.stockState == '':  # already the latest data
            return

        # get total date list (horizontally)
        tmp_state = 'select distinct %s from %s' % (self.dateField,
                                                    self.sourceStockTableName)
        date_list = readDB(tmp_state, ConfigQuant).values
        date_list = date_list.T[0]
        if self.last_update_date == '':
            date_list = date_list[date_list > startDate]
        else:
            date_list = date_list[date_list > self.last_update_date]

        #  ******************  area is the same for all dates, that means using future data!!! need to be modified
        if self.categoryField == 'area':
            tmp_state = "SELECT `%s`, `%s`  FROM `%s`;" % (
                self.codeField, self.categoryField,
                self.sourceCategoryTableName)
            stock_area = readDB(tmp_state, ConfigQuant)
        else:
            stock_area = None

        # calculate num of loop
        loop_num = int(date_list.size / self.chunkSize)
        if date_list.size > loop_num * self.chunkSize:
            loop_num = loop_num + 1

        # fetch and process data from sql by chunk
        for i in range(loop_num):
            tmp_date = date_list[i * self.chunkSize:(i + 1) * self.chunkSize]
            tmp_date_str = list(map(lambda x: "'%s'" % x, tmp_date))
            tmp_range = ','.join(tmp_date_str)
            # read stock return data
            tmp_state = self.stockState + "`%s` in (%s)" % (self.dateField,
                                                            tmp_range)
            dataStockO = readDB(tmp_state, ConfigQuant)
            # read index return data
            tmp_state = self.indexState + "`%s` in (%s)" % (self.dateField,
                                                            tmp_range)
            dataIndexO = readDB(tmp_state, ConfigQuant)

            dataStockO = dataStockO.drop_duplicates(
                [self.dateField, self.codeField])
            if self.categoryField == 'HS300':
                dataIndexO = dataIndexO.drop_duplicates(self.dateField)
            else:
                dataIndexO = dataIndexO.drop_duplicates(
                    [self.dateField, self.categoryField])
            # dataStockO = dataStockO.fillna(0)
            # dataIndexO = dataIndexO.fillna(0)

            data_result = self.getStockCategory(dataStockO, dataIndexO,
                                                tmp_range, stock_area)
            new_fields = list(
                map(lambda x: 'EXCESSIVE_%s' % x, self.retSeriesField))
            for field in zip(new_fields, self.retSeriesField):
                data_result[field[0]] = data_result[field[1]] - data_result[
                    field[1] + '_index']

            tot_fields = [self.dateField, self.codeField]
            tot_fields.extend(new_fields)
            data_result = data_result[tot_fields]

            # add timestamp
            print(self.targetTableName)
            print(data_result.shape)
            data_result.loc[:, 'time_stamp'] = datetime.now()

            # dump chunk data into sql
            writeDB(self.targetTableName, data_result, ConfigQuant,
                    self.if_exist)
            self.if_exist = 'append'
    def run(self, startDate='2007-01-01'):
        self.prepareData()
        if self.stockState == '':  # already the latest data
            return

        # get total date list (horizontally)
        tmp_state = 'select distinct %s from %s' % (
            self.dateField, self.sourceStockTechIndiTableName)
        date_list = readDB(tmp_state, ConfigQuant).values
        date_list = date_list.T[0]
        if self.last_update_date == '':
            date_list = date_list[date_list > startDate]
        else:
            date_list = date_list[date_list > self.last_update_date]

        # sort
        date_list = np.sort(date_list)

        #  ******************  area is the same for all dates, using future data!!! need to be modified
        if self.categoryField == 'area':
            tmp_state = "SELECT `%s`, `%s`  FROM `%s`;" % (
                self.codeField, self.categoryField,
                self.sourceCategoryTableName)
            stock_area = readDB(tmp_state, ConfigQuant)
        else:
            stock_area = None

        # calculate num of loop
        loop_num = int(date_list.size / self.chunkSize)
        if date_list.size > loop_num * self.chunkSize:
            loop_num = loop_num + 1

        # fetch and process data from sql by chunk
        for i in range(loop_num):
            tmp_date_set = date_list[i * self.chunkSize:(i + 1) *
                                     self.chunkSize]
            tmp_date_str = list(map(lambda x: "'%s'" % x, tmp_date_set))
            tmp_range = ','.join(tmp_date_str)
            # read stock return data
            tmp_state = self.stockState + "`%s` in (%s)" % (self.dateField,
                                                            tmp_range)
            dataStockO = readDB(tmp_state, ConfigQuant)

            dataStockO = dataStockO.drop_duplicates(
                [self.dateField, self.codeField])
            dataStockO.loc[:, self.retSeriesField] = dataStockO[
                self.retSeriesField].astype('float')
            # dataStockO = dataStockO.fillna(0)

            data_result = self.getMarketRiseRatio(dataStockO, tmp_range,
                                                  stock_area, tmp_date_set)
            new_fields = list(
                map(lambda x: 'RATIO_%s' % x, self.retSeriesField))
            rename_dict = {}
            for field in zip(new_fields, self.retSeriesField):
                rename_dict[field[1]] = field[0]
            data_result = data_result.rename(columns=rename_dict)

            # add timestamp
            data_result['time_stamp'] = datetime.now()

            # dump chunk data into sql
            writeDB(self.targetTableName, data_result, ConfigQuant,
                    self.if_exist)
            self.if_exist = 'append'
Esempio n. 11
0
    def runFull(self):
        # get total code list
        tmp_state = 'select distinct %s from %s' % (self.codeField,
                                                    self.sourceTableName)
        code_list = readDB(tmp_state, ConfigQuant).values
        code_list = code_list.T[0]

        # calculate num of loop
        loop_num = int(code_list.size / self.chunkSize)
        if code_list.size > loop_num * self.chunkSize:
            loop_num = loop_num + 1

        if self.isMultiProcess:  # use multi processing
            # register pool
            pool = Pool(processes=self.processNum)
            # fetch and process data from sql by chunk
            for i in range(loop_num):
                tmp_code = code_list[i * self.chunkSize:(i + 1) *
                                     self.chunkSize]
                tmp_code_str = list(map(lambda x: "'%s'" % x, tmp_code))
                tmp_range = ','.join(tmp_code_str)
                tmp_state = self.state + "`%s` in (%s)" % (self.codeField,
                                                           tmp_range)
                dataO = readDB(tmp_state, ConfigQuant)
                dataO = dataO.drop_duplicates([self.dateField, self.codeField])
                dataO = dataO.sort_values(self.dateField)  # sort by date
                # dataO = dataO.fillna(method='ffill') # **** fill with pre-close, not simply forward fill
                for field in [
                        self.openField, self.highField, self.lowField,
                        self.closeField, self.volumeField
                ]:
                    dataO.loc[:, field] = dataO[field].astype(
                        'float')  # change data type
                if self.turnoverField == '':  # no turnover field, fill with all nan
                    self.turnoverField = 'turnover'
                    dataO[self.turnoverField] = np.nan

                # process chunk data
                pool_results = []
                for code in tmp_code:
                    tmp_data = dataO.loc[dataO[self.codeField] ==
                                         code]  # dataO already sorted by date

                    if tmp_data.empty:
                        continue

                    tmp_procs = pool.apply_async(self.coreComputationFull,
                                                 (tmp_data, ))
                    pool_results.append(tmp_procs)
                    # data_result = self.coreComputationFull(tmp_data)

                # get result from the pool
                data_tot_result = pd.DataFrame([])
                for tmp_procs in pool_results:
                    data_result = tmp_procs.get()
                    data_tot_result = data_tot_result.append(data_result)

                # # trim data for increment
                # if self.last_update_date != '':
                #     data_tot_result = data_tot_result.loc[data_tot_result[self.dateField] > self.last_update_date]

                # add timestamp
                data_tot_result['time_stamp'] = datetime.now()

                if data_tot_result.empty:
                    continue

                # dump chunk data into sql
                writeDB(self.targetTableName, data_tot_result, ConfigQuant,
                        self.if_exist)
                self.if_exist = 'append'
        else:
            # fetch and process data from sql by chunk
            for i in range(loop_num):
                tmp_code = code_list[i * self.chunkSize:(i + 1) *
                                     self.chunkSize]
                tmp_code_str = list(map(lambda x: "'%s'" % x, tmp_code))
                tmp_range = ','.join(tmp_code_str)
                tmp_state = self.state + "`%s` in (%s)" % (self.codeField,
                                                           tmp_range)
                dataO = readDB(tmp_state, ConfigQuant)
                dataO = dataO.drop_duplicates([self.dateField, self.codeField])
                dataO = dataO.sort_values(self.dateField)  # sort by date
                # dataO = dataO.fillna(method='ffill') # **** fill with pre-close, not simply forward fill
                for field in [
                        self.openField, self.highField, self.lowField,
                        self.closeField, self.volumeField
                ]:
                    dataO.loc[:, field] = dataO[field].astype(
                        'float')  # change data type
                if self.turnoverField == '':  # no turnover field, fill with all nan
                    self.turnoverField = 'turnover'
                    dataO[self.turnoverField] = np.nan

                # process chunk data
                data_tot_result = pd.DataFrame([])
                for code in tmp_code:
                    tmp_data = dataO.loc[dataO[self.codeField] ==
                                         code]  # dataO already sorted by date

                    if tmp_data.empty:
                        continue

                    # data_result = tmp_data[[self.dateField, self.codeField]]
                    #
                    # # MA, MA_DIFF...
                    # tmp_df = priceTechnicalIndicator(tmp_data[self.closeField], self.lags, '')
                    # data_result = data_result.join(tmp_df)
                    #
                    # # ADX, CCI ...
                    # tmp_df = priceTechnicalIndicatorOHLCV(tmp_data[self.openField], tmp_data[self.highField], tmp_data[self.lowField],
                    #                                       tmp_data[self.closeField], tmp_data[self.volumeField])
                    # data_result = data_result.join(tmp_df)
                    #
                    # # VMA, VMA_DIFF..., OBV, VOLUME_RATIO...
                    # tmp_df = volumeTechnicalIndicators(tmp_data[self.closeField], tmp_data[self.volumeField], self.lags, 'V')
                    # data_result = data_result.join(tmp_df)
                    #
                    # # turnover rolling sum, amplitude
                    # tmp_df = priceTechnicalIndicatorRollingSum(tmp_data[self.openField], tmp_data[self.highField],
                    #                                            tmp_data[self.lowField],
                    #                                            tmp_data[self.closeField], tmp_data[self.volumeField],
                    #                                            tmp_data[self.turnoverField], self.lags)
                    # data_result = data_result.join(tmp_df)
                    data_result = self.coreComputationFull(tmp_data)

                    data_tot_result = data_tot_result.append(data_result)

                # # trim data for increment
                # if self.last_update_date != '':
                #     data_tot_result = data_tot_result.loc[data_tot_result[self.dateField] > self.last_update_date]

                # add timestamp
                data_tot_result['time_stamp'] = datetime.now()

                if data_tot_result.empty:
                    continue

                # dump chunk data into sql
                writeDB(self.targetTableName, data_tot_result, ConfigQuant,
                        self.if_exist)
                self.if_exist = 'append'
Esempio n. 12
0
    def runIncrm(self):
        # fetch and process all incremental data from sql
        dataO = readDB(self.state, ConfigQuant)
        dataO = dataO.drop_duplicates([self.dateField, self.codeField])
        dataO = dataO.sort_values(self.dateField)  # sort by date
        # dataO = dataO.fillna(method='ffill') # **** fill with pre-close, not simply forward fill
        # for field in [self.openField, self.highField, self.lowField, self.closeField, self.volumeField]:
        #     dataO.loc[:, field] = dataO[field].astype('float') # change data type
        if self.turnoverField == '':  # no turnover field, fill with all nan
            self.turnoverField = 'turnover'
            dataO[self.turnoverField] = np.nan

        # process incremental data
        code_list = dataO[self.codeField].unique()

        data_tot_result = pd.DataFrame([])
        # use multiprocessing to improve computation hours
        if self.isMultiProcess:
            pool = Pool(processes=self.processNum)
            pool_results = []
            # build pool
            for code in code_list:
                tmp_data = dataO.loc[dataO[self.codeField] ==
                                     code]  # dataO already sorted by date

                if tmp_data.empty:
                    continue

                tmp_result = pool.apply_async(self.coreComputation,
                                              (tmp_data, ))
                pool_results.append(tmp_result)

            # get result from the pool
            for tmp_result in pool_results:
                data_result = tmp_result.get()
                data_tot_result = data_tot_result.append(data_result)
        else:
            for code in code_list:
                tmp_data = dataO.loc[dataO[self.codeField] ==
                                     code]  # dataO already sorted by date

                if tmp_data.empty:
                    continue

                data_result = self.coreComputation(tmp_data)
                data_tot_result = data_tot_result.append(data_result)

                # data_result = tmp_data[[self.dateField, self.codeField]]
                #
                # # MA, MA_DIFF...
                # tmp_df = priceTechnicalIndicator(tmp_data[self.closeField], self.lags, '')
                # data_result = data_result.join(tmp_df)
                #
                # # ADX, CCI ...
                # tmp_df = priceTechnicalIndicatorOHLCV(tmp_data[self.openField], tmp_data[self.highField], tmp_data[self.lowField],
                #                                       tmp_data[self.closeField], tmp_data[self.volumeField])
                # data_result = data_result.join(tmp_df)
                #
                # # OBV...
                # tmp_df = volumeTechnicalIndicators(tmp_data[self.closeField], tmp_data[self.volumeField], self.lags, 'V')
                # data_result = data_result.join(tmp_df)
                #
                # # turnover rolling sum, amplitude
                # tmp_df = priceTechnicalIndicatorRollingSum(tmp_data[self.openField], tmp_data[self.highField],
                #                                            tmp_data[self.lowField],
                #                                            tmp_data[self.closeField], tmp_data[self.volumeField],
                #                                            tmp_data[self.turnoverField], self.lags)
                # data_result = data_result.join(tmp_df)
                #
                # # trim data for increment
                # data_result = data_result.loc[data_result[self.dateField] > self.last_update_date]
                #
                # data_tot_result = data_tot_result.append(data_result)

        # if self.last_update_date != '':
        #     data_tot_result = data_tot_result.loc[data_tot_result[self.dateField] > self.last_update_date]

        if not data_tot_result.empty:
            # add timestamp
            data_tot_result['time_stamp'] = datetime.now()

            # dump chunk data into sql
            writeDB(self.targetTableName, data_tot_result, ConfigQuant,
                    self.if_exist)
            self.if_exist = 'append'
    def runFull(self):
        # get total code list
        tmp_state = 'select distinct %s from %s' % (self.codeField,
                                                    self.sourceTableName)
        code_list = readDB(tmp_state, ConfigQuant).values
        code_list = code_list.T[0]

        # get total trade dates
        tmp_state = "select `%s` from %s" % (self.dateField,
                                             self.calendarTableName)
        trade_calendar = readDB(tmp_state, ConfigQuant)
        trade_calendar = trade_calendar[self.dateField].values
        trade_calendar = trade_calendar[trade_calendar >= self.init_date]

        # calculate num of loop for time series calculation
        code_chunk_num = int(code_list.size / self.chunkSize)
        if code_list.size > code_chunk_num * self.chunkSize:
            code_chunk_num = code_chunk_num + 1

        # calculate num of loop for time horizon calculation
        date_chunk_num = int(trade_calendar.size / self.chunkSize)
        if trade_calendar.size > date_chunk_num * self.chunkSize:
            date_chunk_num = date_chunk_num + 1

        if self.isMultiProcess:  # use multi processing
            # register pool
            pool = Pool(processes=self.processNum)

            # calculate time series derivative features
            data_tot_result_time_series = pd.DataFrame([])
            for i in range(code_chunk_num):
                tmp_code = code_list[i * self.chunkSize:(i + 1) *
                                     self.chunkSize]
                tmp_code_str = list(map(lambda x: "'%s'" % x, tmp_code))
                tmp_range = ','.join(tmp_code_str)
                tmp_state = self.state + "`%s` in (%s)" % (self.codeField,
                                                           tmp_range)
                dataO = readDB(tmp_state, ConfigQuant)
                dataO = dataO.drop_duplicates([self.dateField, self.codeField])
                # dataO = dataO.sort_values(self.dateField)  # sort by date

                # process chunk data
                pool_results = []
                for code in tmp_code:
                    tmp_data = dataO.loc[dataO[self.codeField] ==
                                         code]  # dataO already sorted by date
                    tmp_data = tmp_data.sort_values(self.dateField)

                    if tmp_data.empty:
                        continue

                    # multiprocessing
                    tmp_procs = pool.apply_async(
                        self.coreComputationTimeSeries, (tmp_data, ))
                    pool_results.append(tmp_procs)
                    # data_result = self.coreComputationFull(tmp_data)

                # get result from the process pool
                for tmp_procs in pool_results:
                    data_result = tmp_procs.get()
                    data_tot_result_time_series = data_tot_result_time_series.append(
                        data_result)

            # calculate time horizon derivative features
            data_tot_result_time_horizon = pd.DataFrame([])
            for i in range(date_chunk_num):
                tmp_date = trade_calendar[i * self.chunkSize:(i + 1) *
                                          self.chunkSize]
                tmp_date_str = list(map(lambda x: "'%s'" % x, tmp_date))
                tmp_range = ','.join(tmp_date_str)
                tmp_state = self.state + "`%s` in (%s)" % (self.dateField,
                                                           tmp_range)
                dataO = readDB(tmp_state, ConfigQuant)
                dataO = dataO.drop_duplicates([self.dateField, self.codeField])
                # dataO = dataO.sort_values(self.dateField)  # sort by date

                # process chunk data
                pool_results = []
                for single_date in tmp_date:
                    tmp_data = dataO.loc[
                        dataO[self.dateField] ==
                        single_date]  # dataO already sorted by date

                    if tmp_data.empty:
                        continue

                    # multiprocessing
                    tmp_procs = pool.apply_async(
                        self.coreComputationTimeHorizon, (tmp_data, ))
                    pool_results.append(tmp_procs)
                    # data_result = self.coreComputationFull(tmp_data)

                # get result from the process pool
                for tmp_procs in pool_results:
                    data_result = tmp_procs.get()
                    data_tot_result_time_horizon = data_tot_result_time_horizon.append(
                        data_result)

            # combine time series and time horizon features
            data_tot_result = data_tot_result_time_series.merge(
                data_tot_result_time_horizon,
                on=[self.dateField, self.codeField],
                how='outer')

            # add timestamp
            data_tot_result.loc[:, 'time_stamp'] = datetime.now()

            # dump all data into database
            writeDB(self.targetTableName, data_tot_result, ConfigQuant,
                    self.if_exist)
            self.if_exist = 'append'

            pool.close()
        else:
            # calculate time series derivative features
            data_tot_result_time_series = pd.DataFrame([])
            for i in range(code_chunk_num):
                tmp_code = code_list[i * self.chunkSize:(i + 1) *
                                     self.chunkSize]
                tmp_code_str = list(map(lambda x: "'%s'" % x, tmp_code))
                tmp_range = ','.join(tmp_code_str)
                tmp_state = self.state + "`%s` in (%s)" % (self.codeField,
                                                           tmp_range)
                dataO = readDB(tmp_state, ConfigQuant)
                dataO = dataO.drop_duplicates([self.dateField, self.codeField])
                # dataO = dataO.sort_values(self.dateField)  # sort by date

                # process chunk data
                for code in tmp_code:
                    tmp_data = dataO.loc[dataO[self.codeField] ==
                                         code]  # dataO already sorted by date
                    tmp_data = tmp_data.sort_values(self.dateField)

                    if tmp_data.empty:
                        continue

                    data_result = self.coreComputationTimeSeries(tmp_data)
                    data_tot_result_time_series = data_tot_result_time_series.append(
                        data_result)

            # calculate time horizon derivative features
            data_tot_result_time_horizon = pd.DataFrame([])
            for i in range(date_chunk_num):
                tmp_date = trade_calendar[i * self.chunkSize:(i + 1) *
                                          self.chunkSize]
                tmp_date_str = list(map(lambda x: "'%s'" % x, tmp_date))
                tmp_range = ','.join(tmp_date_str)
                tmp_state = self.state + "`%s` in (%s)" % (self.dateField,
                                                           tmp_range)
                dataO = readDB(tmp_state, ConfigQuant)
                dataO = dataO.drop_duplicates([self.dateField, self.codeField])
                # dataO = dataO.sort_values(self.dateField)  # sort by date

                # process chunk data
                for single_date in tmp_date:
                    tmp_data = dataO.loc[
                        dataO[self.dateField] ==
                        single_date]  # dataO already sorted by date

                    if tmp_data.empty:
                        continue

                    # multiprocessing
                    data_result = self.coreComputationTimeHorizon(tmp_data)
                    data_tot_result_time_horizon = data_tot_result_time_horizon.append(
                        data_result)

            # combine time series and time horizon features
            data_tot_result = data_tot_result_time_series.merge(
                data_tot_result_time_horizon,
                on=[self.dateField, self.codeField],
                how='outer')

            # add timestamp
            data_tot_result.loc[:, 'time_stamp'] = datetime.now()

            # dump chunk data into database
            writeDB(self.targetTableName, data_tot_result, ConfigQuant,
                    self.if_exist)
            self.if_exist = 'append'
    def runIncrm(self):
        # fetch and process all incremental data from sql
        dataO = readDB(self.state, ConfigQuant)
        dataO = dataO.drop_duplicates([self.dateField, self.codeField])
        # dataO = dataO.sort_values(self.dateField) # sort by date

        # process incremental data
        code_list = dataO[self.codeField].unique()
        trade_calendar = dataO[self.dateField].unique()
        trade_calendar = trade_calendar[trade_calendar > self.last_update_date]

        # ==== calculate features
        if self.isMultiProcess:
            # build pool
            pool = Pool(processes=self.processNum)

            # calculate time series features
            data_tot_result_time_series = pd.DataFrame([])
            pool_results = []
            for code in code_list:
                tmp_data = dataO.loc[dataO[self.codeField] == code]
                tmp_data = tmp_data.sort_values(self.dateField)  # sort by date

                if tmp_data.empty:
                    continue

                tmp_result = pool.apply_async(self.coreComputationTimeSeries,
                                              (tmp_data, ))
                pool_results.append(tmp_result)

            # get result from the pool (time series)
            for tmp_result in pool_results:
                data_result = tmp_result.get()
                data_tot_result_time_series = data_tot_result_time_series.append(
                    data_result)

            # calculate time horizon features
            data_tot_result_time_horizon = pd.DataFrame([])
            pool_results = []
            for trade_date in trade_calendar:
                tmp_data = dataO.loc[dataO[self.dateField] ==
                                     trade_date]  # no need to sort date

                if tmp_data.empty:
                    continue

                tmp_result = pool.apply_async(self.coreComputationTimeHorizon,
                                              (tmp_data, ))
                pool_results.append(tmp_result)

            # get result from the pool (time horizon)
            for tmp_result in pool_results:
                data_result = tmp_result.get()
                data_tot_result_time_horizon = data_tot_result_time_horizon.append(
                    data_result)

            pool.terminate()

        # single process
        else:
            # calculate time series features
            data_tot_result_time_series = pd.DataFrame([])
            for code in code_list:
                tmp_data = dataO.loc[dataO[self.codeField] == code]
                tmp_data = tmp_data.sort_values(
                    self.dateField)  # sorted by date

                if tmp_data.empty:
                    continue

                data_result = self.coreComputationTimeSeries(tmp_data)
                data_tot_result_time_series = data_tot_result_time_series.append(
                    data_result)

            # calculate time horizon features
            data_tot_result_time_horizon = pd.DataFrame([])
            for trade_date in trade_calendar:
                tmp_data = dataO.loc[
                    dataO[self.dateField] ==
                    trade_date]  # dataO already sorted by date

                if tmp_data.empty:
                    continue

                data_result = self.coreComputationTimeHorizon(tmp_data)
                data_tot_result_time_horizon = data_tot_result_time_horizon.append(
                    data_result)

        # write to sql (both multiprocess and single process)
        data_tot_result = data_tot_result_time_series.merge(
            data_tot_result_time_horizon,
            on=[self.dateField, self.codeField],
            how='outer')  # combine time series and time horizon features
        data_tot_result = data_tot_result.loc[data_tot_result[
            self.dateField] > self.last_update_date]  # truncate data

        if not data_tot_result.empty:
            # add timestamp
            data_tot_result['time_stamp'] = datetime.now()

            # dump chunk data into sql
            writeDB(self.targetTableName, data_tot_result, ConfigQuant,
                    self.if_exist)
    def run(self, startDate='2007-01-01'):
        self.prepareData()
        if self.basicState == '':  # already the latest data
            return

        # get total date list (horizontally)
        tmp_state = 'select distinct %s from %s' % (self.dateField,
                                                    self.sourceBasicTableName)
        date_list = readDB(tmp_state, ConfigQuant).values
        date_list = date_list.T[0]
        if self.last_update_date == '':
            date_list = date_list[date_list > startDate]
        else:
            date_list = date_list[date_list > self.last_update_date]

        #  get area (if category is area) ******************  area is the same for all dates, that means using future data!!! need to be modified
        if self.categoryField == 'area':
            tmp_state = "SELECT `%s`, `%s`  FROM `%s`;" % (
                self.codeField, self.categoryField,
                self.sourceCategoryTableName)
            stock_area = readDB(tmp_state, ConfigQuant)
        else:
            stock_area = None

        # calculate num of loop
        loop_num = int(date_list.size / self.chunkSize)
        if date_list.size > loop_num * self.chunkSize:
            loop_num = loop_num + 1

        # fetch and process data from sql by chunk
        for i in range(loop_num):
            tmp_date = date_list[i * self.chunkSize:(i + 1) * self.chunkSize]
            tmp_date_str = list(map(lambda x: "'%s'" % x, tmp_date))
            tmp_range = ','.join(tmp_date_str)
            # read basic data
            tmp_state = self.basicState + "`%s` in (%s)" % (self.dateField,
                                                            tmp_range)
            dataBasicO = readDB(tmp_state, ConfigQuant)
            for field in self.reverseSeriesField:
                dataBasicO[field] = 1. / dataBasicO[field]
            # read technical data
            tmp_state = self.techState + "`%s` in (%s)" % (self.dateField,
                                                           tmp_range)
            dataTechO = readDB(tmp_state, ConfigQuant)
            for tmp_type in ['dataBasicO', 'dataTechO']:
                exec(
                    "{0} = {0}.drop_duplicates([self.dateField, self.codeField])"
                    .format(tmp_type))
                exec("{0} = {0}.sort_values(self.dateField)".format(
                    tmp_type))  # sort by date
            # dataBasicO = dataBasicO.fillna(method='ffill')

            # for field in self.basicSeriesField:
            #     dataBasicO.loc[:, field] = dataBasicO[field].astype('float') # change data type

            # process chunk data
            data_result = pd.DataFrame([])

            # Fundamental data
            uni_dates = dataBasicO[self.dateField].unique()
            dataBasicO = self.getStockCategory(
                dataBasicO, uni_dates, stock_area)  # merge with stock category
            tmp_df = priceOtherIndicatorRanking(dataBasicO, uni_dates,
                                                self.dateField, self.codeField,
                                                self.basicSeriesField,
                                                self.categoryField)  # ranking
            data_result = pd.concat([data_result, tmp_df])

            # Technical data
            uni_dates = dataTechO[self.dateField].unique()
            dataTechO = self.getStockCategory(
                dataTechO, uni_dates, stock_area)  # merge with stock category
            tmp_df = priceOtherIndicatorRanking(dataTechO, uni_dates,
                                                self.dateField, self.codeField,
                                                self.techSeriesField,
                                                self.categoryField)  # ranking
            data_result = data_result.merge(
                tmp_df, on=[self.dateField, self.codeField], how='outer')

            # add timestamp
            data_result['time_stamp'] = datetime.now()

            # dump chunk data into sql
            writeDB(self.targetTableName, data_result, ConfigQuant,
                    self.if_exist)
            self.if_exist = 'append'