def __init__(self, name): task.Task.__init__(self, name) self.name = name #-- fetch DB info db_dict = get_yaml(DB_YML) #-- open db connection self.conn = get_conn(db_dict["DB"], db_dict["Username"], db_dict["Password"], db_dict["Host"], db_dict["Port"])
try: download_file = self.download_to_local() self.save_formatted_data() os.remove(download_file) self.update_log_table(is_success=True) except: traceback.print_exc() self.update_log_table(is_success=False) raise RuntimeError('Download {stock_id} for {date} failed.'.format( stock_id=self.stock_id, date=self.date)) finally: queue_name = self.queue.get() if __name__ == '__main__': queue = Queue() #-- fetch DB info db_dict = get_yaml(DB_YML) #-- open db connection conn = get_conn(db_dict["DB"], db_dict["Username"], db_dict["Password"], db_dict["Host"], db_dict["Port"]) s = Stock_trans_downloader(queue, conn, 'Netease_stock_transaction', '000423', '20160415') s.start() s.join() print 'All done.'
print_log("There are %(num)s stock bankuai combination will be marked valid. %(combination)s" % {"num": len(codes_to_valid), "combination": codes_to_valid_str}) upd_sql = "update dw.dim_stock_bankuai t set is_valid = 'Y', upd_time = now() where %(combinations)s" % {"combinations": codes_to_valid_str} cur.execute(upd_sql) db_conn.commit() else: print_log("No stock bankuai combinations need to be marked valid.") #---- insert stocks into dim_stock_bankuai if len(bk_st_pairs_dict.keys()) > 0: values = [] print_log("There are %(num)s stock bankuai combination will be inserted." % {"num": len(bk_st_pairs_dict.keys())}) for pk in bk_st_pairs_dict: print_log(pk) values.append("('%(stock_id)s', '%(bankuai_id)s', now(), 'Y')" % {"stock_id": bk_st_pairs_dict[pk]["st"], "bankuai_id": bk_st_pairs_dict[pk]["bk"]} ) values_str = ",".join(values) ins_sql = "insert into dw.dim_stock_bankuai(stock_id, bankuai_id, upd_time, is_valid) values %(values)s" % {"values": values_str} cur.execute(ins_sql) db_conn.commit() else: print_log("No new stock bankuai combination.") print_log("dw.dim_stock_bankuai has been refreshed successfully.") if __name__ == "__main__": db_dict = get_yaml('D:\\workspace\\Stock\\bin\\..\\etc\\db.yml') conn = get_conn(db_dict["DB"], db_dict["Username"], db_dict["Password"], db_dict["Host"], db_dict["Port"]) load_into_stock_bankuai(conn, 'D:\\workspace\\Stock\\bin\\..\\log\\bankuai_stock_20160104.csv') conn.close()
#-- verify param if options.type is None: types_to_recon = file_db_recon.keys() if not options.in_file is None: error_log("type can't be null if in_file specified!") exit_process() elif options.type in file_db_recon.keys(): types_to_recon.append(options.type) else: error_log("type is not correct! [" + options.type + "]") exit_process() #-- fetch DB info db_dict = get_yaml(DB_YML) #-- open db connection conn = get_conn(db_dict["DB"], db_dict["Username"], db_dict["Password"], db_dict["Host"], db_dict["Port"]) for type in types_to_recon: #-- determine the file name for reconcilation max_date = "" if options.in_file is None: # to fetch the latest file for reconcilation for root, dirs, files in os.walk(DATA_DIR): for f in files: matcher = re.compile(r'' + os.path.basename(file_db_recon[type]["file"]).replace("$DATE", "(?P<date>\d{8})") + '') for m in matcher.finditer(f):
print_log("There are %(num)s bankuais will be inserted." % {"num": len(bankuais.keys())}) for b in bankuais: values.append("('%(name)s', '%(parent_bankuai_id)s', now(), 'Y')" % {"name": b, "parent_bankuai_id": bankuais[b]["parent_bankuai_id"]} ) values_str = ",".join(values) ins_sql = "insert into dw.dim_bankuai(name, parent_bankuai_id, upd_time, is_valid) values %(values)s" % {"values": values_str} cur.execute(ins_sql) db_conn.commit() else: print_log("No new bankuai ids.") print_log("dw.dim_bankuai has been refreshed successfully.") if __name__ == "__main__": def return_parent_bankuai_ids(db_conn): query = "SELECT ID, NAME FROM DW.DIM_PARENT_BANKUAI" cur = get_cur(db_conn) cur.execute(query) rows = list(cur) return_dict = {} for row in rows: return_dict[row["name"].decode("utf-8")] = row["id"] cur.close() return return_dict db_dict = get_yaml('D:\\workspace\\Stock\\bin\\..\\etc\\db.yml') conn = get_conn(db_dict["DB"], db_dict["Username"], db_dict["Password"], db_dict["Host"], db_dict["Port"]) parent_bankuai_ids = return_parent_bankuai_ids(conn) load_into_bankuai(conn, 'D:\\workspace\\Stock\\bin\\..\\log\\bankuai_20160205.csv', parent_bankuai_ids) conn.close()
error_log, warn_log, get_date, recent_working_day, get_yaml, return_new_name_for_existing_file, ) from tooling.db_func import inserter, get_query_result, psql_copy_from # -- sys var SEP = Sys_paths.SEP DB_YML = Sys_paths.YML_DIR + SEP + "db.yml" DATA_DIR = Sys_paths.DATA_STOCK_TRANSACTION TABLE = "dw.stock_transaction" COLS = "stock_id,biz_date,time,trans_price,price_change,volume,amount,buy_sell,source" DB_NAME = get_yaml(DB_YML)["DB"] DB_HOST = get_yaml(DB_YML)["Host"] DB_PORT = get_yaml(DB_YML)["Port"] DB_UNAME = get_yaml(DB_YML)["Username"] DB_PWD = get_yaml(DB_YML)["Password"] class Stock_trans_loader(threading.Thread): def __init__(self, queue, conn, log_row_id, stock_id, date, file=None, enable_copy=False): threading.Thread.__init__(self, name=stock_id + "-" + date) self.queue = queue self.conn = conn self.log_row_id = log_row_id self.stock_id = stock_id self.date = date self.file = DATA_DIR + SEP + date + SEP + stock_id + ".txt" if file is None else file
# It is an implementation of threading, and it put a spaceholder into queue when it gets started and remove it from queue when it is finished, so the outer caller could count on the queue size to see how many threads are running. import random, threading, time, os, traceback, datetime from Queue import Queue from Sys_paths import Sys_paths from tooling.psql import get_conn, get_cur from tooling.common_tool import replace_vars, print_log, error_log, warn_log, get_date, recent_working_day, get_yaml, return_new_name_for_existing_file from tooling.db_func import inserter, get_query_result, psql_copy_from #-- sys var SEP = Sys_paths.SEP DB_YML = Sys_paths.YML_DIR + SEP + "db.yml" DATA_DIR = Sys_paths.DATA_STOCK_TRANSACTION TABLE = 'dw.stock_transaction' COLS = 'stock_id,biz_date,time,trans_price,price_change,volume,amount,buy_sell,source' DB_NAME = get_yaml(DB_YML)["DB"] DB_HOST = get_yaml(DB_YML)["Host"] DB_PORT = get_yaml(DB_YML)["Port"] DB_UNAME = get_yaml(DB_YML)["Username"] DB_PWD = get_yaml(DB_YML)["Password"] class Stock_trans_loader(threading.Thread): def __init__(self, queue, conn, log_row_id, stock_id, date, file=None, enable_copy=False):
def insert_into_table(db_field_yaml, stock_obj_name, in_file, conn, log_fh, warn_fh): # based on the fields mapping between db and object, db type defined in yaml, generate delete sql and insert sql, and fire to db # this function could be used for any db insert, if yaml and object are setup properly # Yaml example # biz_date: # type: date # is_pk: Y # stock_object: # Tengxun_stock: date from object_impl.Sina_stock import Sina_stock from object_impl.Tengxun_stock import Tengxun_stock from object_impl.Yahoo_stock import Yahoo_stock db_field_mapping = get_yaml(db_field_yaml) tab_name = os.path.basename(db_field_yaml).replace('.yml', '') # yml file name as table name tab_fields = [] # table field names tab_pk = [] # table pk tab_types = [] # table field types obj_attrs = [] # attribute names in stock object for k,v in db_field_mapping.items(): tab_type = v['type'] obj_attr = v['stock_object'][stock_obj_name] if obj_attr != None: # If None|Null is set for fields in yml, remove the fields from insertion tab_fields.append(k) if v['is_pk'] == 'Y': tab_pk.append(k) # pk, delete before insert tab_types.append(tab_type) obj_attrs.append(obj_attr) del_sql = 'delete from {tab_name} where 1=1 '.format(tab_name=tab_name) ins_sql = 'insert into {tab_name}({fields}) '.format(tab_name=tab_name, fields=','.join(tab_fields)) # iterate each row in the file, insert into table num = 0 with open(in_file) as f: for row in f.readlines(): # get_stock_object_from_str is a function should be available in all the stock objects # this function accepts the string returned from website and generate a dict for stock object # the dict is like {stock: {date: object}} # dynamically import object module, class name and file name should be identical #exec('from object_impl.{object} import {object}'.format(object = stock_obj_name), globals()) stock_dict = eval('{object}.get_stock_object_from_str(row)'.format(object=stock_obj_name, row=row)) for stock in stock_dict: # for Tengxun or sina interface, there is just one stock in one stock dict for date in stock_dict[stock]: # for Tengxun or sina interface, there is just one date in one stock dict stock_obj = stock_dict[stock][date] # this object is stock implementation object value_sql = reduce(lambda x, y: ( x if re.match(r'stock_obj', x) else 'stock_obj.' + x + ', ' ) + "stock_obj.{attr_name}, ".format(attr_name=y), obj_attrs) # add 'stock_obj.' to the first attr, and concatenate attrs to a string value_sql = value_sql[0:-2] # remove the last comma and the blankspace next to it value_sql = eval(value_sql) # tupe returned final_value_sql = '' del_where = '' for i, v in enumerate(value_sql): value = "'" + v + "'" if tab_types[i] == 'date' or tab_types[i] == 'varchar' else 'Null' if len(str(v)) == 0 else str(v) # date and varchar quoted by single quote, otherwise no quote or null(if length of value is 0) final_value_sql = final_value_sql + value + ', ' if tab_fields[i] in tab_pk: del_where = del_where + ' and {field}={value}'.format(field=tab_fields[i], value=value) final_value_sql = final_value_sql[0:-2] del_complete_sql = del_sql + del_where ins_complete_sql = ins_sql + ' values( ' + final_value_sql + ')' #print_log('Deleting [{stock},{date}] from {tab_name}...\n {sql}'.format(stock=stock,date=date,tab_name=tab_name,sql=del_complete_sql), log_fh) cur = get_cur(conn) cur.execute(del_complete_sql) cur.execute(ins_complete_sql) print_log('Inserted [{stock},{date}] into {tab_name}.'.format(stock=stock,date=date,tab_name=tab_name), log_fh) num += 1 if num % 1000 == 0: conn.commit() conn.commit() print_log('{num} records have been written into {tab_name}.'.format(num=num, tab_name=tab_name), log_fh)
def insert_into_table(db_field_yaml, stock_obj_name, in_file, conn, log_fh, warn_fh): # based on the fields mapping between db and object, db type defined in yaml, generate delete sql and insert sql, and fire to db # this function could be used for any db insert, if yaml and object are setup properly # Yaml example # biz_date: # type: date # is_pk: Y # stock_object: # Tengxun_stock: date from object_impl.Sina_stock import Sina_stock from object_impl.Tengxun_stock import Tengxun_stock from object_impl.Yahoo_stock import Yahoo_stock db_field_mapping = get_yaml(db_field_yaml) tab_name = os.path.basename(db_field_yaml).replace( '.yml', '') # yml file name as table name tab_fields = [] # table field names tab_pk = [] # table pk tab_types = [] # table field types obj_attrs = [] # attribute names in stock object for k, v in db_field_mapping.items(): tab_type = v['type'] obj_attr = v['stock_object'][stock_obj_name] if obj_attr != None: # If None|Null is set for fields in yml, remove the fields from insertion tab_fields.append(k) if v['is_pk'] == 'Y': tab_pk.append(k) # pk, delete before insert tab_types.append(tab_type) obj_attrs.append(obj_attr) del_sql = 'delete from {tab_name} where 1=1 '.format(tab_name=tab_name) ins_sql = 'insert into {tab_name}({fields}) '.format( tab_name=tab_name, fields=','.join(tab_fields)) # iterate each row in the file, insert into table num = 0 with open(in_file) as f: for row in f.readlines(): # get_stock_object_from_str is a function should be available in all the stock objects # this function accepts the string returned from website and generate a dict for stock object # the dict is like {stock: {date: object}} # dynamically import object module, class name and file name should be identical #exec('from object_impl.{object} import {object}'.format(object = stock_obj_name), globals()) stock_dict = eval('{object}.get_stock_object_from_str(row)'.format( object=stock_obj_name, row=row)) for stock in stock_dict: # for Tengxun or sina interface, there is just one stock in one stock dict for date in stock_dict[ stock]: # for Tengxun or sina interface, there is just one date in one stock dict stock_obj = stock_dict[stock][ date] # this object is stock implementation object value_sql = reduce( lambda x, y: (x if re.match(r'stock_obj', x) else 'stock_obj.' + x + ', ') + "stock_obj.{attr_name}, ".format(attr_name=y), obj_attrs ) # add 'stock_obj.' to the first attr, and concatenate attrs to a string value_sql = value_sql[ 0: -2] # remove the last comma and the blankspace next to it value_sql = eval(value_sql) # tupe returned final_value_sql = '' del_where = '' for i, v in enumerate(value_sql): value = "'" + v + "'" if tab_types[ i] == 'date' or tab_types[ i] == 'varchar' else 'Null' if len( str(v) ) == 0 else str( v ) # date and varchar quoted by single quote, otherwise no quote or null(if length of value is 0) final_value_sql = final_value_sql + value + ', ' if tab_fields[i] in tab_pk: del_where = del_where + ' and {field}={value}'.format( field=tab_fields[i], value=value) final_value_sql = final_value_sql[0:-2] del_complete_sql = del_sql + del_where ins_complete_sql = ins_sql + ' values( ' + final_value_sql + ')' #print_log('Deleting [{stock},{date}] from {tab_name}...\n {sql}'.format(stock=stock,date=date,tab_name=tab_name,sql=del_complete_sql), log_fh) cur = get_cur(conn) cur.execute(del_complete_sql) cur.execute(ins_complete_sql) print_log( 'Inserted [{stock},{date}] into {tab_name}.'.format( stock=stock, date=date, tab_name=tab_name), log_fh) num += 1 if num % 1000 == 0: conn.commit() conn.commit() print_log( '{num} records have been written into {tab_name}.'.format( num=num, tab_name=tab_name), log_fh)