Esempio n. 1
0
 def __init__(self, name):
     task.Task.__init__(self, name)
     self.name = name
     #-- fetch DB info
     db_dict = get_yaml(DB_YML)
     #-- open db connection
     self.conn = get_conn(db_dict["DB"], db_dict["Username"], db_dict["Password"], db_dict["Host"], db_dict["Port"])
        try:
            download_file = self.download_to_local()
            self.save_formatted_data()
            os.remove(download_file)
            self.update_log_table(is_success=True)
        except:
            traceback.print_exc()
            self.update_log_table(is_success=False)
            raise RuntimeError('Download {stock_id} for {date} failed.'.format(
                stock_id=self.stock_id, date=self.date))
        finally:
            queue_name = self.queue.get()


if __name__ == '__main__':
    queue = Queue()
    #-- fetch DB info
    db_dict = get_yaml(DB_YML)
    #-- open db connection
    conn = get_conn(db_dict["DB"], db_dict["Username"], db_dict["Password"],
                    db_dict["Host"], db_dict["Port"])

    s = Stock_trans_downloader(queue, conn, 'Netease_stock_transaction',
                               '000423', '20160415')

    s.start()

    s.join()

    print 'All done.'
		print_log("There are %(num)s stock bankuai combination will be marked valid. %(combination)s" % {"num": len(codes_to_valid), "combination": codes_to_valid_str})
		upd_sql = "update dw.dim_stock_bankuai t set is_valid = 'Y', upd_time = now() where %(combinations)s" % {"combinations": codes_to_valid_str}
		cur.execute(upd_sql)
		db_conn.commit()
	else:
		print_log("No stock bankuai combinations need to be marked valid.")			

	#---- insert stocks into dim_stock_bankuai
	if len(bk_st_pairs_dict.keys()) > 0:
		values = []
		print_log("There are %(num)s stock bankuai combination will be inserted." % {"num": len(bk_st_pairs_dict.keys())})
		for pk in bk_st_pairs_dict:
			print_log(pk)
			values.append("('%(stock_id)s', '%(bankuai_id)s', now(), 'Y')" % {"stock_id": bk_st_pairs_dict[pk]["st"], "bankuai_id": bk_st_pairs_dict[pk]["bk"]} )
		values_str = ",".join(values)
		ins_sql = "insert into dw.dim_stock_bankuai(stock_id, bankuai_id, upd_time, is_valid) values %(values)s" % {"values": values_str}
		cur.execute(ins_sql)
		db_conn.commit()
	else:
		print_log("No new stock bankuai combination.")

	print_log("dw.dim_stock_bankuai has been refreshed successfully.")

		
if __name__ == "__main__":
	db_dict = get_yaml('D:\\workspace\\Stock\\bin\\..\\etc\\db.yml')
	conn = get_conn(db_dict["DB"], db_dict["Username"], db_dict["Password"], db_dict["Host"], db_dict["Port"])
	
	load_into_stock_bankuai(conn, 'D:\\workspace\\Stock\\bin\\..\\log\\bankuai_stock_20160104.csv')
	conn.close()
	

#-- verify param
if options.type is None:
	types_to_recon = file_db_recon.keys()
	if not options.in_file is None:
		error_log("type can't be null if in_file specified!")
		exit_process()
elif options.type in file_db_recon.keys():
	types_to_recon.append(options.type)
else:
	error_log("type is not correct! [" + options.type + "]")
	exit_process()
		
#-- fetch DB info
db_dict = get_yaml(DB_YML)

#-- open db connection
conn = get_conn(db_dict["DB"], db_dict["Username"], db_dict["Password"], db_dict["Host"], db_dict["Port"])



for type in types_to_recon: 
	#-- determine the file name for reconcilation
	max_date = ""
	if options.in_file is None:
		# to fetch the latest file for reconcilation 
		for root, dirs, files in os.walk(DATA_DIR):
			for f in files:
				matcher = re.compile(r'' + os.path.basename(file_db_recon[type]["file"]).replace("$DATE", "(?P<date>\d{8})") + '')
				for m in matcher.finditer(f):
		print_log("There are %(num)s bankuais will be inserted." % {"num": len(bankuais.keys())})
		for b in bankuais:
			values.append("('%(name)s', '%(parent_bankuai_id)s', now(), 'Y')" % {"name": b, "parent_bankuai_id": bankuais[b]["parent_bankuai_id"]} )
		values_str = ",".join(values)
		ins_sql = "insert into dw.dim_bankuai(name, parent_bankuai_id, upd_time, is_valid) values %(values)s" % {"values": values_str}
		cur.execute(ins_sql)
		db_conn.commit()
	else:
		print_log("No new bankuai ids.")
	
	print_log("dw.dim_bankuai has been refreshed successfully.")

if __name__ == "__main__":
	def return_parent_bankuai_ids(db_conn):
		query = "SELECT ID, NAME FROM DW.DIM_PARENT_BANKUAI"
		cur = get_cur(db_conn)
		cur.execute(query)
		rows = list(cur)
		return_dict = {}
		for row in rows:
			return_dict[row["name"].decode("utf-8")] = row["id"]
		cur.close()
		return return_dict

	db_dict = get_yaml('D:\\workspace\\Stock\\bin\\..\\etc\\db.yml')
	conn = get_conn(db_dict["DB"], db_dict["Username"], db_dict["Password"], db_dict["Host"], db_dict["Port"])
	
	parent_bankuai_ids = return_parent_bankuai_ids(conn)
	load_into_bankuai(conn, 'D:\\workspace\\Stock\\bin\\..\\log\\bankuai_20160205.csv', parent_bankuai_ids)
	conn.close()
Esempio n. 6
0
    error_log,
    warn_log,
    get_date,
    recent_working_day,
    get_yaml,
    return_new_name_for_existing_file,
)
from tooling.db_func import inserter, get_query_result, psql_copy_from

# -- sys var
SEP = Sys_paths.SEP
DB_YML = Sys_paths.YML_DIR + SEP + "db.yml"
DATA_DIR = Sys_paths.DATA_STOCK_TRANSACTION
TABLE = "dw.stock_transaction"
COLS = "stock_id,biz_date,time,trans_price,price_change,volume,amount,buy_sell,source"
DB_NAME = get_yaml(DB_YML)["DB"]
DB_HOST = get_yaml(DB_YML)["Host"]
DB_PORT = get_yaml(DB_YML)["Port"]
DB_UNAME = get_yaml(DB_YML)["Username"]
DB_PWD = get_yaml(DB_YML)["Password"]


class Stock_trans_loader(threading.Thread):
    def __init__(self, queue, conn, log_row_id, stock_id, date, file=None, enable_copy=False):
        threading.Thread.__init__(self, name=stock_id + "-" + date)
        self.queue = queue
        self.conn = conn
        self.log_row_id = log_row_id
        self.stock_id = stock_id
        self.date = date
        self.file = DATA_DIR + SEP + date + SEP + stock_id + ".txt" if file is None else file
# It is an implementation of threading, and it put a spaceholder into queue when it gets started and remove it from queue when it is finished, so the outer caller could count on the queue size to see how many threads are running.

import random, threading, time, os, traceback, datetime
from Queue import Queue
from Sys_paths import Sys_paths
from tooling.psql import get_conn, get_cur
from tooling.common_tool import replace_vars, print_log, error_log, warn_log, get_date, recent_working_day, get_yaml, return_new_name_for_existing_file
from tooling.db_func import inserter, get_query_result, psql_copy_from

#-- sys var
SEP = Sys_paths.SEP
DB_YML = Sys_paths.YML_DIR + SEP + "db.yml"
DATA_DIR = Sys_paths.DATA_STOCK_TRANSACTION
TABLE = 'dw.stock_transaction'
COLS = 'stock_id,biz_date,time,trans_price,price_change,volume,amount,buy_sell,source'
DB_NAME = get_yaml(DB_YML)["DB"]
DB_HOST = get_yaml(DB_YML)["Host"]
DB_PORT = get_yaml(DB_YML)["Port"]
DB_UNAME = get_yaml(DB_YML)["Username"]
DB_PWD = get_yaml(DB_YML)["Password"]


class Stock_trans_loader(threading.Thread):
    def __init__(self,
                 queue,
                 conn,
                 log_row_id,
                 stock_id,
                 date,
                 file=None,
                 enable_copy=False):
Esempio n. 8
0
def insert_into_table(db_field_yaml, stock_obj_name, in_file, conn, log_fh, warn_fh):
    # based on the fields mapping between db and object, db type defined in yaml, generate delete sql and insert sql, and fire to db
    # this function could be used for any db insert, if yaml and object are setup properly
    # Yaml example
    # biz_date: 
    #   type: date
    #   is_pk: Y
    #   stock_object: 
    #         Tengxun_stock: date
    from object_impl.Sina_stock import Sina_stock
    from object_impl.Tengxun_stock import Tengxun_stock
    from object_impl.Yahoo_stock import Yahoo_stock
    
    db_field_mapping = get_yaml(db_field_yaml)
    tab_name = os.path.basename(db_field_yaml).replace('.yml', '') # yml file name as table name
    tab_fields = [] # table field names
    tab_pk = [] # table pk
    tab_types = [] # table field types
    obj_attrs = [] # attribute names in stock object
    for k,v in db_field_mapping.items():
        tab_type = v['type']
        obj_attr = v['stock_object'][stock_obj_name]
        if obj_attr != None: # If None|Null is set for fields in yml, remove the fields from insertion
            tab_fields.append(k)
            if v['is_pk'] == 'Y': tab_pk.append(k) # pk, delete before insert
            tab_types.append(tab_type)
            obj_attrs.append(obj_attr)
    del_sql = 'delete from {tab_name} where 1=1 '.format(tab_name=tab_name)
    ins_sql = 'insert into {tab_name}({fields}) '.format(tab_name=tab_name, fields=','.join(tab_fields))
    # iterate each row in the file, insert into table
    num = 0
    with open(in_file) as f:
        for row in f.readlines():
            # get_stock_object_from_str is a function should be available in all the stock objects
            # this function accepts the string returned from website and generate a dict for stock object
            # the dict is like {stock: {date: object}}
            # dynamically import object module, class name and file name should be identical
            #exec('from object_impl.{object} import {object}'.format(object = stock_obj_name), globals())
            stock_dict = eval('{object}.get_stock_object_from_str(row)'.format(object=stock_obj_name, row=row))
            for stock in stock_dict: # for Tengxun or sina interface, there is just one stock in one stock dict
                for date in stock_dict[stock]: # for Tengxun or sina interface, there is just one date in one stock dict
                    stock_obj = stock_dict[stock][date] # this object is stock implementation object
                    value_sql = reduce(lambda x, y: ( x if re.match(r'stock_obj', x) else 'stock_obj.' + x + ', ' ) + "stock_obj.{attr_name}, ".format(attr_name=y), obj_attrs) # add 'stock_obj.' to the first attr, and concatenate attrs to a string
                    value_sql = value_sql[0:-2] # remove the last comma and the blankspace next to it
                    value_sql = eval(value_sql) # tupe returned
                    final_value_sql = ''
                    del_where = ''
                    for i, v in enumerate(value_sql):
                        value = "'" + v + "'" if tab_types[i] == 'date' or tab_types[i] == 'varchar' else 'Null' if len(str(v)) == 0 else str(v) # date and varchar quoted by single quote, otherwise no quote or null(if length of value is 0)
                        final_value_sql = final_value_sql + value + ', '
                        if tab_fields[i] in tab_pk: 
                            del_where = del_where + ' and {field}={value}'.format(field=tab_fields[i], value=value)
                    final_value_sql = final_value_sql[0:-2]
                    del_complete_sql = del_sql + del_where
                    ins_complete_sql = ins_sql + ' values( ' + final_value_sql + ')'
                    #print_log('Deleting [{stock},{date}] from {tab_name}...\n {sql}'.format(stock=stock,date=date,tab_name=tab_name,sql=del_complete_sql), log_fh)
                    cur = get_cur(conn)
                    cur.execute(del_complete_sql)
                    cur.execute(ins_complete_sql)
                    print_log('Inserted [{stock},{date}] into {tab_name}.'.format(stock=stock,date=date,tab_name=tab_name), log_fh)
                    num += 1
                    if num % 1000 == 0: conn.commit()
    conn.commit()
    print_log('{num} records have been written into {tab_name}.'.format(num=num, tab_name=tab_name), log_fh)
Esempio n. 9
0
def insert_into_table(db_field_yaml, stock_obj_name, in_file, conn, log_fh,
                      warn_fh):
    # based on the fields mapping between db and object, db type defined in yaml, generate delete sql and insert sql, and fire to db
    # this function could be used for any db insert, if yaml and object are setup properly
    # Yaml example
    # biz_date:
    #   type: date
    #   is_pk: Y
    #   stock_object:
    #         Tengxun_stock: date
    from object_impl.Sina_stock import Sina_stock
    from object_impl.Tengxun_stock import Tengxun_stock
    from object_impl.Yahoo_stock import Yahoo_stock

    db_field_mapping = get_yaml(db_field_yaml)
    tab_name = os.path.basename(db_field_yaml).replace(
        '.yml', '')  # yml file name as table name
    tab_fields = []  # table field names
    tab_pk = []  # table pk
    tab_types = []  # table field types
    obj_attrs = []  # attribute names in stock object
    for k, v in db_field_mapping.items():
        tab_type = v['type']
        obj_attr = v['stock_object'][stock_obj_name]
        if obj_attr != None:  # If None|Null is set for fields in yml, remove the fields from insertion
            tab_fields.append(k)
            if v['is_pk'] == 'Y': tab_pk.append(k)  # pk, delete before insert
            tab_types.append(tab_type)
            obj_attrs.append(obj_attr)
    del_sql = 'delete from {tab_name} where 1=1 '.format(tab_name=tab_name)
    ins_sql = 'insert into {tab_name}({fields}) '.format(
        tab_name=tab_name, fields=','.join(tab_fields))
    # iterate each row in the file, insert into table
    num = 0
    with open(in_file) as f:
        for row in f.readlines():
            # get_stock_object_from_str is a function should be available in all the stock objects
            # this function accepts the string returned from website and generate a dict for stock object
            # the dict is like {stock: {date: object}}
            # dynamically import object module, class name and file name should be identical
            #exec('from object_impl.{object} import {object}'.format(object = stock_obj_name), globals())
            stock_dict = eval('{object}.get_stock_object_from_str(row)'.format(
                object=stock_obj_name, row=row))
            for stock in stock_dict:  # for Tengxun or sina interface, there is just one stock in one stock dict
                for date in stock_dict[
                        stock]:  # for Tengxun or sina interface, there is just one date in one stock dict
                    stock_obj = stock_dict[stock][
                        date]  # this object is stock implementation object
                    value_sql = reduce(
                        lambda x, y:
                        (x if re.match(r'stock_obj', x) else 'stock_obj.' + x +
                         ', ') + "stock_obj.{attr_name}, ".format(attr_name=y),
                        obj_attrs
                    )  # add 'stock_obj.' to the first attr, and concatenate attrs to a string
                    value_sql = value_sql[
                        0:
                        -2]  # remove the last comma and the blankspace next to it
                    value_sql = eval(value_sql)  # tupe returned
                    final_value_sql = ''
                    del_where = ''
                    for i, v in enumerate(value_sql):
                        value = "'" + v + "'" if tab_types[
                            i] == 'date' or tab_types[
                                i] == 'varchar' else 'Null' if len(
                                    str(v)
                                ) == 0 else str(
                                    v
                                )  # date and varchar quoted by single quote, otherwise no quote or null(if length of value is 0)
                        final_value_sql = final_value_sql + value + ', '
                        if tab_fields[i] in tab_pk:
                            del_where = del_where + ' and {field}={value}'.format(
                                field=tab_fields[i], value=value)
                    final_value_sql = final_value_sql[0:-2]
                    del_complete_sql = del_sql + del_where
                    ins_complete_sql = ins_sql + ' values( ' + final_value_sql + ')'
                    #print_log('Deleting [{stock},{date}] from {tab_name}...\n {sql}'.format(stock=stock,date=date,tab_name=tab_name,sql=del_complete_sql), log_fh)
                    cur = get_cur(conn)
                    cur.execute(del_complete_sql)
                    cur.execute(ins_complete_sql)
                    print_log(
                        'Inserted [{stock},{date}] into {tab_name}.'.format(
                            stock=stock, date=date, tab_name=tab_name), log_fh)
                    num += 1
                    if num % 1000 == 0: conn.commit()
    conn.commit()
    print_log(
        '{num} records have been written into {tab_name}.'.format(
            num=num, tab_name=tab_name), log_fh)