def download_log_checker(conn, start_date=options.start_date, end_date=options.end_date, stock_id=options.stock_id):
    start_date_dt = datetime.datetime.strptime(start_date,'%Y%m%d')
    end_date_dt = datetime.datetime.strptime(end_date,'%Y%m%d')
    
    # get stock ids which is_download_success=N
    chk_sql = '''
    select t.biz_date, 
      t.stock_id
    from (
    select 
      biz_date, 
      stock_id, 
      is_download_success, 
      row_number() over(partition by biz_date, stock_id order by download_end_time desc nulls last) rankid
    from dw.log_stock_transaction
    where biz_date between '{start_date}' and '{end_date}' 
    ) t where t.rankid = 1
    and t.is_download_success = 'N' '''.format(start_date=start_date_dt, end_date=end_date_dt)
    if not stock_id is None: chk_sql = chk_sql + ' and t.stock_id = \'' + stock_id + '\''

    cur = get_cur(conn)
    cur.execute(chk_sql)
    rows = list(cur)
    if len(rows) == 0:
        print_log('All the stocks have been downloaded successfully.')
    else:
        for row in rows:
            error_log(str(row['biz_date']) + ':' + row['stock_id'] + ' failed to download.')
    return len(rows)
Example #2
0
def get_query_result(conn, query):
    cur = get_cur(conn)
    cur.execute(query)
    if re.match(r'^(insert|update|delete)', query.strip(), re.IGNORECASE):
        return 'Command exeucted successfully.'
    else:
        return list(cur)
Example #3
0
def get_query_result(conn, query):
    cur = get_cur(conn)
    cur.execute(query)
    if re.match(r'^(insert|update|delete)', query.strip(), re.IGNORECASE):
        return 'Command exeucted successfully.'
    else:
        return list(cur)
Example #4
0
def recent_working_day(in_date='today',
                       is_skip_holiday=False,
                       conn=None):  # date=yyyymmdd
    # if is_skip_holiday=False, return the most recent non-weekend day
    # if is_skip_holiday=True, return the most recent non-weekend day AND holiday will be skipped as well
    holidays = []
    if re.match("^\d{8}$", in_date):
        date_date = datetime.datetime.strptime(in_date, '%Y%m%d')
    else:
        date_date = get_date(in_date, to_date=True)

    if is_skip_holiday:
        if conn is None:
            raise RuntimeError(
                'connection is None which must be available when skip_holiday mode is on.'
            )
        else:
            cur = get_cur(conn)
            cur.execute('select date from dw.holiday')  # yyyymmdd
            rows = list(cur)
            for row in rows:
                holidays.append(row['date'])
            cur.close()

    while date_date.isoweekday() >= 6 or date_date.strftime(
            '%Y%m%d') in holidays:
        date_date = date_date + datetime.timedelta(-1)

    return date_date.strftime('%Y%m%d')
 def get_row_id(self):
     row_id_sql = "select nextval('dw.seq_log_stock_trans_row_id') as row_id"
     cur = get_cur(self.conn)
     cur.execute(row_id_sql)
     db_rows = list(cur)
     self.row_id = db_rows[0]['row_id']
     return self.row_id
 def get_row_id(self):
     row_id_sql = "select nextval('dw.seq_log_stock_trans_row_id') as row_id"
     cur = get_cur(self.conn)
     cur.execute(row_id_sql)
     db_rows = list(cur)
     self.row_id = db_rows[0]['row_id']
     return self.row_id
def load_log_checker(conn,
                     start_date=options.start_date,
                     end_date=options.end_date,
                     stock_id=options.stock_id):
    start_date_dt = datetime.datetime.strptime(start_date, '%Y%m%d')
    end_date_dt = datetime.datetime.strptime(end_date, '%Y%m%d')

    chk_sql = '''
    select biz_date, stock_id
    from dw.log_stock_transaction
    where biz_date between '{start_date}' and '{end_date}'
    and is_download_success = 'Y'
    and (is_load_success = 'N' or is_load_success is null)
    '''.format(start_date=start_date_dt, end_date=end_date_dt)
    if not stock_id is None:
        chk_sql = chk_sql + ' and stock_id = \'' + stock_id + '\''

    cur = get_cur(conn)
    cur.execute(chk_sql)
    rows = list(cur)
    if len(rows) == 0:
        print_log('All the stocks have been loaded successfully.')
    else:
        for row in rows:
            error_log(
                str(row['biz_date']) + ':' + row['stock_id'] +
                ' failed to load.')
    return len(rows)
 def update_log_table(self, is_success=True):
     ins_sql = '''update dw.log_stock_transaction 
     set download_end_time = '{end_time}', is_download_success = '{is_success}'
     where row_id = {row_id}
     '''.format(row_id=self.row_id, end_time=time.ctime(), is_success='Y' if is_success else 'N')
     cur = get_cur(self.conn)
     cur.execute(ins_sql)
     self.conn.commit()
	def return_parent_bankuai_ids(db_conn):
		query = "SELECT ID, NAME FROM DW.DIM_PARENT_BANKUAI"
		cur = get_cur(db_conn)
		cur.execute(query)
		rows = list(cur)
		return_dict = {}
		for row in rows:
			return_dict[row["name"].decode("utf-8")] = row["id"]
		cur.close()
		return return_dict
 def return_parent_bankuai_ids(db_conn):
     query = "SELECT ID, NAME FROM DW.DIM_PARENT_BANKUAI"
     cur = get_cur(db_conn)
     cur.execute(query)
     rows = list(cur)
     return_dict = {}
     for row in rows:
         return_dict[row["name"].decode("utf-8")] = row["id"]
     cur.close()
     return return_dict
def get_stock_list(conn):
    # get stock list from db
    stocks = []
    sel_query = "select id from dw.dim_stock where id <> '000000'"
    cur = get_cur(conn)
    cur.execute(sel_query)
    rows = list(cur)
    for row in rows:
        stocks.append(row['id'])
    return stocks
 def insert_log_table(self):
     ins_sql = '''insert into dw.log_stock_transaction ( row_id, biz_date, stock_id, download_start_time, download_source ) values ( {row_id}, '{date}', '{stock}', '{start_time}', '{stock_trans_obj_name}' )
     '''.format(row_id=self.row_id,
                date=self.date,
                stock=self.stock_id,
                start_time=time.ctime(),
                stock_trans_obj_name=self.stock_trans_obj_name)
     cur = get_cur(self.conn)
     cur.execute(ins_sql)
     self.conn.commit()
def get_stock_list(conn):
    # get stock list from db
    stocks = []
    sel_query = "select id from dw.dim_stock where id <> '000000'"
    cur = get_cur(conn)
    cur.execute(sel_query)
    rows = list(cur)
    for row in rows:
        stocks.append(row["id"])
    return stocks
 def update_log_table(self, is_success=True):
     ins_sql = '''update dw.log_stock_transaction 
     set download_end_time = '{end_time}', is_download_success = '{is_success}'
     where row_id = {row_id}
     '''.format(row_id=self.row_id,
                end_time=time.ctime(),
                is_success='Y' if is_success else 'N')
     cur = get_cur(self.conn)
     cur.execute(ins_sql)
     self.conn.commit()
def get_stock_list(conn, biz_date, stock_id):
    # get stock list from db
    stocks = []
    if not stock_id is None:
        sel_query = '''
            select id from dw.dim_stock where id <> '000000' and is_valid = 'Y' and id = '{stock_id}'
            except 
            select stock_id from dw.log_stock_transaction where biz_date = '{biz_date}' and is_download_success = 'Y' and stock_id = '{stock_id}'
            '''.format(stock_id=stock_id, biz_date=biz_date)
    else:
        sel_query = '''
            select id from dw.dim_stock where id <> '000000' and is_valid = 'Y'
            except 
            select stock_id from dw.log_stock_transaction where biz_date = '{biz_date}' and is_download_success = 'Y' 
            '''.format(biz_date=biz_date)
    cur = get_cur(conn)
    cur.execute(sel_query)
    rows = list(cur)
    for row in rows:
        stocks.append(row['id'])
    return stocks
def get_stock_list(conn, biz_date, stock_id):
    # get stock list from db
    stocks = []
    if not stock_id is None:
        sel_query = '''
            select id from dw.dim_stock where id <> '000000' and is_valid = 'Y' and id = '{stock_id}'
            except 
            select stock_id from dw.log_stock_transaction where biz_date = '{biz_date}' and is_download_success = 'Y' and stock_id = '{stock_id}'
            '''.format(stock_id=stock_id, biz_date=biz_date)
    else:
        sel_query = '''
            select id from dw.dim_stock where id <> '000000' and is_valid = 'Y'
            except 
            select stock_id from dw.log_stock_transaction where biz_date = '{biz_date}' and is_download_success = 'Y' 
            '''.format(biz_date=biz_date)
    cur = get_cur(conn)
    cur.execute(sel_query)
    rows = list(cur)
    for row in rows:
        stocks.append(row['id'])
    return stocks
def load_log_checker(conn, start_date=options.start_date, end_date=options.end_date, stock_id=options.stock_id):
    start_date_dt = datetime.datetime.strptime(start_date,'%Y%m%d')
    end_date_dt = datetime.datetime.strptime(end_date,'%Y%m%d')

    chk_sql = '''
    select biz_date, stock_id
    from dw.log_stock_transaction
    where biz_date between '{start_date}' and '{end_date}'
    and is_download_success = 'Y'
    and (is_load_success = 'N' or is_load_success is null)
    '''.format(start_date=start_date_dt, end_date=end_date_dt)
    if not stock_id is None: chk_sql = chk_sql + ' and stock_id = \'' + stock_id + '\''

    cur = get_cur(conn)
    cur.execute(chk_sql)
    rows = list(cur)
    if len(rows) == 0:
        print_log('All the stocks have been loaded successfully.')
    else:
        for row in rows:
            error_log(str(row['biz_date']) + ':' + row['stock_id'] + ' failed to load.')
    return len(rows)
def download_log_checker(conn,
                         start_date=options.start_date,
                         end_date=options.end_date,
                         stock_id=options.stock_id):
    start_date_dt = datetime.datetime.strptime(start_date, '%Y%m%d')
    end_date_dt = datetime.datetime.strptime(end_date, '%Y%m%d')

    # get stock ids which is_download_success=N
    chk_sql = '''
    select t.biz_date, 
      t.stock_id
    from (
    select 
      biz_date, 
      stock_id, 
      is_download_success, 
      row_number() over(partition by biz_date, stock_id order by download_end_time desc nulls last) rankid
    from dw.log_stock_transaction
    where biz_date between '{start_date}' and '{end_date}' 
    ) t where t.rankid = 1
    and t.is_download_success = 'N' '''.format(start_date=start_date_dt,
                                               end_date=end_date_dt)
    if not stock_id is None:
        chk_sql = chk_sql + ' and t.stock_id = \'' + stock_id + '\''

    cur = get_cur(conn)
    cur.execute(chk_sql)
    rows = list(cur)
    if len(rows) == 0:
        print_log('All the stocks have been downloaded successfully.')
    else:
        for row in rows:
            error_log(
                str(row['biz_date']) + ':' + row['stock_id'] +
                ' failed to download.')
    return len(rows)
Example #19
0
def recent_working_day(in_date='today', is_skip_holiday=False, conn=None): # date=yyyymmdd
	# if is_skip_holiday=False, return the most recent non-weekend day
	# if is_skip_holiday=True, return the most recent non-weekend day AND holiday will be skipped as well
	holidays = []
	if re.match("^\d{8}$", in_date):
		date_date = datetime.datetime.strptime(in_date, '%Y%m%d')
	else:
		date_date = get_date(in_date, to_date=True)
		
	if is_skip_holiday:
		if conn is None: 
			raise RuntimeError('connection is None which must be available when skip_holiday mode is on.')
		else:
			cur = get_cur(conn)
			cur.execute('select date from dw.holiday') # yyyymmdd
			rows = list(cur)
			for row in rows:
				holidays.append(row['date'])
			cur.close()

	while date_date.isoweekday() >= 6 or date_date.strftime('%Y%m%d') in holidays:
		date_date = date_date + datetime.timedelta(-1)
	
	return date_date.strftime('%Y%m%d')
def load_into_dim_stock_bankuai(db_conn, file ):
	#-- load CSV
	csvf = open(file)
	csvr = csv.DictReader(csvf)
	bk_st_pairs = []
	bk_st_pairs_dict = {}
	bk_id_dict = {}
	
	codes_to_valid = []
	codes_to_invalid = []
	
	# 板块	子版块		板块名称	股票代码	股票名称
	# 板块	概念板块	送转预期	600587		新华医疗
	for row in csvr:
		bk_name = row[u'板块名称'.encode("gbk")].decode("gbk")
		st_id = row[u'股票代码'.encode("gbk")].decode("gbk")
		bk_st_pairs.append([bk_name, st_id])
	csvf.close()
	print_log("%(num)s records have been read from %(fname)s." % {"num": len(bk_st_pairs), "fname": file})
	
	#---- get bankuai_id from dim_bankuai
	select_sql = "select t.id, t.name from dw.dim_bankuai t"
	cur = get_cur(db_conn)
	cur.execute(select_sql)
	db_rows = list(cur)
	for db_row in db_rows:
		db_name = db_row["name"].decode("utf-8")
		db_id = db_row["id"]
		bk_id_dict[db_name] = db_id
	
	#---- convert to dict 
	for i in range(len(bk_st_pairs)):
		bk_st_pairs[i][0] = bk_id_dict[bk_st_pairs[i][0]]
		bk_st_pairs[i].append(str(bk_st_pairs[i][0]) + "-" + str(bk_st_pairs[i][1])) # as PK
		bk_st_pairs_dict[bk_st_pairs[i][2]] = {"bk": bk_st_pairs[i][0], "st": bk_st_pairs[i][1]}
		
	#---- get bk_id, st_id from db, seach the combination in csv dict
	select_sql = "select t.stock_id, t.bankuai_id, t.is_valid from dw.dim_stock_bankuai t"
	cur.execute(select_sql)
	db_rows = list(cur)
	for db_row in db_rows:
		db_bk_id = db_row["bankuai_id"]
		db_st_id = db_row["stock_id"]
		db_pk = str(db_bk_id) + "-" + db_st_id
		db_is_valid = db_row["is_valid"]
		
		if db_pk in bk_st_pairs_dict and db_is_valid == "Y":
			del bk_st_pairs_dict[db_pk]
		elif db_pk in bk_st_pairs_dict and db_is_valid == "N":
			codes_to_valid.append(" ( bankuai_id = " + str(db_bk_id) + " and stock_id = '" + str(db_st_id) + "' ) ")
			del bk_st_pairs_dict[db_pk]
		elif db_is_valid == "N":
			# not in csv file and it's already invalid in db, do nothing
			pass
		else:
			# not in csv, but in db it's valid, mark it to invalid
			codes_to_invalid.append(" ( bankuai_id = " + str(db_bk_id) + " and stock_id = '" + str(db_st_id) + "' ) ")
			
	#---- mark is_valid=N
	if len(codes_to_invalid) > 0:
		codes_to_invalid_str = " or ".join(codes_to_invalid)
		print_log("There are %(num)s stock bankuai combination will be marked invalid. %(combination)s" % {"num": len(codes_to_invalid), "combination": codes_to_invalid_str})
		upd_sql = "update dw.dim_stock_bankuai t set is_valid = 'N', upd_time = now() where %(combinations)s" % {"combinations": codes_to_invalid_str}
		cur.execute(upd_sql)
		db_conn.commit()
	else:
		print_log("No stock bankuai combinations need to be marked invalid.")			

	#---- mark is_valid=Y
	if len(codes_to_valid) > 0:
		codes_to_valid_str = " or ".join(codes_to_valid)
		print_log("There are %(num)s stock bankuai combination will be marked valid. %(combination)s" % {"num": len(codes_to_valid), "combination": codes_to_valid_str})
		upd_sql = "update dw.dim_stock_bankuai t set is_valid = 'Y', upd_time = now() where %(combinations)s" % {"combinations": codes_to_valid_str}
		cur.execute(upd_sql)
		db_conn.commit()
	else:
		print_log("No stock bankuai combinations need to be marked valid.")			

	#---- insert stocks into dim_stock_bankuai
	if len(bk_st_pairs_dict.keys()) > 0:
		values = []
		print_log("There are %(num)s stock bankuai combination will be inserted." % {"num": len(bk_st_pairs_dict.keys())})
		for pk in bk_st_pairs_dict:
			print_log(pk)
			values.append("('%(stock_id)s', '%(bankuai_id)s', now(), 'Y')" % {"stock_id": bk_st_pairs_dict[pk]["st"], "bankuai_id": bk_st_pairs_dict[pk]["bk"]} )
		values_str = ",".join(values)
		ins_sql = "insert into dw.dim_stock_bankuai(stock_id, bankuai_id, upd_time, is_valid) values %(values)s" % {"values": values_str}
		cur.execute(ins_sql)
		db_conn.commit()
	else:
		print_log("No new stock bankuai combination.")

	print_log("dw.dim_stock_bankuai has been refreshed successfully.")
	# based on the list of recon_fields_in_file, read the corresponding fields in csv and concatenate them together as a PK
	print_log("Start to read %(file)s..." % {"file": file_to_recon})
	for row in csvr:
		key = []
		for i in range(len(file_db_recon[type]["recon_fields_in_file"])):
			field = file_db_recon[type]["recon_fields_in_file"][i]
			key.append(row[field.encode("gbk")].decode("gbk"))
		csv_dict["-".join(key)] = ""
	print_log("%(num)s records loaded, dict for csv done." % {"num": len(csv_dict.keys()) })
	csvf.close()

	#-- building dict for db
	# based on the list of recon_fields_in_db, read the corresponding fields in db and concatenate them together as a PK
	print_log("Start to read db...")
	select_sql = file_db_recon[type]["sql"]
	cur = get_cur(conn)
	cur.execute(select_sql)
	db_rows = list(cur)
	for row in db_rows:
		key = []
		for i in range(len(file_db_recon[type]["recon_fields_in_db"])):
			field = file_db_recon[type]["recon_fields_in_db"][i]
			key.append(row[field].decode("utf-8"))
		dbsql_dict["-".join(key)] = ""
	print_log("%(num)s records loaded, dict for db done." % {"num": len(csv_dict.keys()) })


	#------------------------------------------- RECONing
	print_log("Recon starting >>>")
	csv_dict_keys = csv_dict.keys()
	# iterate keys in csv dict, if it is found in db dict, remove it from both dict
def loader(queue, conn, start_date=options.start_date, end_date=options.end_date, stock_id=options.stock_id, merge_before_copy=options.merge_before_copy, enable_copy=options.enable_copy):

    cur_date_dt = datetime.datetime.strptime(start_date,'%Y%m%d')
    end_date_dt = datetime.datetime.strptime(end_date,'%Y%m%d')
    
    stock_list_sql = '''
    select row_id, biz_date, stock_id
    from dw.log_stock_transaction
    where biz_date = '{biz_date}'
    and is_download_success = 'Y'
    and (is_load_success = 'N' or is_load_success is null)
    '''
    if not stock_id is None: stock_list_sql = stock_list_sql + ' and stock_id = \'' + stock_id + '\''
    
    cur = get_cur(conn)
    while cur_date_dt <= end_date_dt:  
        if merge_before_copy:
        # since load files one by one into table is taking too much time, the solution to boost the procedure is to merge all the pieces of files into one file and load the merge file into table, this takes less than 5 mins to complete.
            cur_date_str = cur_date_dt.strftime('%Y%m%d')
            working_dir = data_dir + SEP + cur_date_str
            file_merged = os.path.join(working_dir, "file_merged.csv")
            if os.path.exists(file_merged):
                warn_log('Removing old file: ' + file_merged)
                os.remove(file_merged)
            #-- Starting to merge files
            with open(file_merged, "a") as dest:
                i=0
                for _, _, filenames in os.walk(working_dir):
                    for filename in fnmatch.filter(filenames, "[0-9]*.txt"):
                        with open(os.path.join(working_dir, filename)) as src:
                            shutil.copyfileobj(src, dest)
                        i+=1
                        print_log('Merged ' + str(i) + ' files.')
            #-- Deleting records from db
            del_sql = '''delete from dw.stock_transaction where biz_date = '{}' '''.format(cur_date_str)
            get_query_result(conn, del_sql)
            conn.commit()
            print_log('Deletion for biz_date {} completed successfully.'.format(cur_date_str))
            #-- Updating is_load_success to N in log table
            upd_sql = '''update dw.log_stock_transaction set is_load_success = 'N' where biz_date = '{}' and is_download_success = 'Y' '''.format(cur_date_str)
            get_query_result(conn, upd_sql)
            conn.commit()
            print_log('is_load_success is updated to N')

            #++++++++ Starting to load the merged file into table
            psql_copy_from(DB_HOST, DB_NAME, DB_UNAME, 'dw.stock_transaction', file_merged, DB_PORT, args=' with (encoding \'GBK\')')
            print_log('Successfully loaded {} into table.'.format(file_merged))
            
            #-- Updating is_load_success to Y in log table
            upd_sql = '''update dw.log_stock_transaction set is_load_success = 'Y' where biz_date = '{}' and is_download_success = 'Y' '''.format(cur_date_str)
            get_query_result(conn, upd_sql)
            conn.commit()
            print_log('is_load_success is updated to Y')

            #-- Cleaning up working dir
            os.remove(file_merged)
            
            cur_date_dt = cur_date_dt + datetime.timedelta(1)
            
        else:
            stock_list_sql_var_replaced = stock_list_sql.format(biz_date=cur_date_dt)
            cur.execute(stock_list_sql_var_replaced)
            rows = list(cur)
            for row in rows:
                row_id = row['row_id']
                biz_date = str(row['biz_date']).replace('-','')
                stock_id = row['stock_id']
                while queue.full():
                    print_log('=================> queue is full, wait for 1 second...')
                    time.sleep(1)
                s = Stock_trans_loader(queue, conn, row_id, stock_id, biz_date, enable_copy=enable_copy )
                s.start()
                print_log('-----> queue size: ' + str(queue.qsize()))
                conn.commit()
                    
            cur_date_dt = cur_date_dt + datetime.timedelta(1)

    while not queue.empty():
        print_log('=================> queue is not empty yet, wait for 1 second...')
        time.sleep(1)
    print_log("Start to read %(file)s..." % {"file": file_to_recon})
    for row in csvr:
        key = []
        for i in range(len(file_db_recon[type]["recon_fields_in_file"])):
            field = file_db_recon[type]["recon_fields_in_file"][i]
            key.append(row[field.encode("gbk")].decode("gbk"))
        csv_dict["-".join(key)] = ""
    print_log("%(num)s records loaded, dict for csv done." %
              {"num": len(csv_dict.keys())})
    csvf.close()

    #-- building dict for db
    # based on the list of recon_fields_in_db, read the corresponding fields in db and concatenate them together as a PK
    print_log("Start to read db...")
    select_sql = file_db_recon[type]["sql"]
    cur = get_cur(conn)
    cur.execute(select_sql)
    db_rows = list(cur)
    for row in db_rows:
        key = []
        for i in range(len(file_db_recon[type]["recon_fields_in_db"])):
            field = file_db_recon[type]["recon_fields_in_db"][i]
            key.append(row[field].decode("utf-8"))
        dbsql_dict["-".join(key)] = ""
    print_log("%(num)s records loaded, dict for db done." %
              {"num": len(csv_dict.keys())})

    #------------------------------------------- RECONing
    print_log("Recon starting >>>")
    csv_dict_keys = csv_dict.keys()
    # iterate keys in csv dict, if it is found in db dict, remove it from both dict
def loader(queue,
           conn,
           start_date=options.start_date,
           end_date=options.end_date,
           stock_id=options.stock_id,
           merge_before_copy=options.merge_before_copy,
           enable_copy=options.enable_copy):

    cur_date_dt = datetime.datetime.strptime(start_date, '%Y%m%d')
    end_date_dt = datetime.datetime.strptime(end_date, '%Y%m%d')

    stock_list_sql = '''
    select row_id, biz_date, stock_id
    from dw.log_stock_transaction
    where biz_date = '{biz_date}'
    and is_download_success = 'Y'
    and (is_load_success = 'N' or is_load_success is null)
    '''
    if not stock_id is None:
        stock_list_sql = stock_list_sql + ' and stock_id = \'' + stock_id + '\''

    cur = get_cur(conn)
    while cur_date_dt <= end_date_dt:
        if merge_before_copy:
            # since load files one by one into table is taking too much time, the solution to boost the procedure is to merge all the pieces of files into one file and load the merge file into table, this takes less than 5 mins to complete.
            cur_date_str = cur_date_dt.strftime('%Y%m%d')
            working_dir = data_dir + SEP + cur_date_str
            file_merged = os.path.join(working_dir, "file_merged.csv")
            if os.path.exists(file_merged):
                warn_log('Removing old file: ' + file_merged)
                os.remove(file_merged)
            #-- Starting to merge files
            with open(file_merged, "a") as dest:
                i = 0
                for _, _, filenames in os.walk(working_dir):
                    for filename in fnmatch.filter(filenames, "[0-9]*.txt"):
                        with open(os.path.join(working_dir, filename)) as src:
                            shutil.copyfileobj(src, dest)
                        i += 1
                        print_log('Merged ' + str(i) + ' files.')
            #-- Deleting records from db
            del_sql = '''delete from dw.stock_transaction where biz_date = '{}' '''.format(
                cur_date_str)
            get_query_result(conn, del_sql)
            conn.commit()
            print_log(
                'Deletion for biz_date {} completed successfully.'.format(
                    cur_date_str))
            #-- Updating is_load_success to N in log table
            upd_sql = '''update dw.log_stock_transaction set is_load_success = 'N' where biz_date = '{}' and is_download_success = 'Y' '''.format(
                cur_date_str)
            get_query_result(conn, upd_sql)
            conn.commit()
            print_log('is_load_success is updated to N')

            #++++++++ Starting to load the merged file into table
            psql_copy_from(DB_HOST,
                           DB_NAME,
                           DB_UNAME,
                           'dw.stock_transaction',
                           file_merged,
                           DB_PORT,
                           args=' with (encoding \'GBK\')')
            print_log('Successfully loaded {} into table.'.format(file_merged))

            #-- Updating is_load_success to Y in log table
            upd_sql = '''update dw.log_stock_transaction set is_load_success = 'Y' where biz_date = '{}' and is_download_success = 'Y' '''.format(
                cur_date_str)
            get_query_result(conn, upd_sql)
            conn.commit()
            print_log('is_load_success is updated to Y')

            #-- Cleaning up working dir
            os.remove(file_merged)

            cur_date_dt = cur_date_dt + datetime.timedelta(1)

        else:
            stock_list_sql_var_replaced = stock_list_sql.format(
                biz_date=cur_date_dt)
            cur.execute(stock_list_sql_var_replaced)
            rows = list(cur)
            for row in rows:
                row_id = row['row_id']
                biz_date = str(row['biz_date']).replace('-', '')
                stock_id = row['stock_id']
                while queue.full():
                    print_log(
                        '=================> queue is full, wait for 1 second...'
                    )
                    time.sleep(1)
                s = Stock_trans_loader(queue,
                                       conn,
                                       row_id,
                                       stock_id,
                                       biz_date,
                                       enable_copy=enable_copy)
                s.start()
                print_log('-----> queue size: ' + str(queue.qsize()))
                conn.commit()

            cur_date_dt = cur_date_dt + datetime.timedelta(1)

    while not queue.empty():
        print_log(
            '=================> queue is not empty yet, wait for 1 second...')
        time.sleep(1)
Example #25
0
def insert_into_table(db_field_yaml, stock_obj_name, in_file, conn, log_fh,
                      warn_fh):
    # based on the fields mapping between db and object, db type defined in yaml, generate delete sql and insert sql, and fire to db
    # this function could be used for any db insert, if yaml and object are setup properly
    # Yaml example
    # biz_date:
    #   type: date
    #   is_pk: Y
    #   stock_object:
    #         Tengxun_stock: date
    from object_impl.Sina_stock import Sina_stock
    from object_impl.Tengxun_stock import Tengxun_stock
    from object_impl.Yahoo_stock import Yahoo_stock

    db_field_mapping = get_yaml(db_field_yaml)
    tab_name = os.path.basename(db_field_yaml).replace(
        '.yml', '')  # yml file name as table name
    tab_fields = []  # table field names
    tab_pk = []  # table pk
    tab_types = []  # table field types
    obj_attrs = []  # attribute names in stock object
    for k, v in db_field_mapping.items():
        tab_type = v['type']
        obj_attr = v['stock_object'][stock_obj_name]
        if obj_attr != None:  # If None|Null is set for fields in yml, remove the fields from insertion
            tab_fields.append(k)
            if v['is_pk'] == 'Y': tab_pk.append(k)  # pk, delete before insert
            tab_types.append(tab_type)
            obj_attrs.append(obj_attr)
    del_sql = 'delete from {tab_name} where 1=1 '.format(tab_name=tab_name)
    ins_sql = 'insert into {tab_name}({fields}) '.format(
        tab_name=tab_name, fields=','.join(tab_fields))
    # iterate each row in the file, insert into table
    num = 0
    with open(in_file) as f:
        for row in f.readlines():
            # get_stock_object_from_str is a function should be available in all the stock objects
            # this function accepts the string returned from website and generate a dict for stock object
            # the dict is like {stock: {date: object}}
            # dynamically import object module, class name and file name should be identical
            #exec('from object_impl.{object} import {object}'.format(object = stock_obj_name), globals())
            stock_dict = eval('{object}.get_stock_object_from_str(row)'.format(
                object=stock_obj_name, row=row))
            for stock in stock_dict:  # for Tengxun or sina interface, there is just one stock in one stock dict
                for date in stock_dict[
                        stock]:  # for Tengxun or sina interface, there is just one date in one stock dict
                    stock_obj = stock_dict[stock][
                        date]  # this object is stock implementation object
                    value_sql = reduce(
                        lambda x, y:
                        (x if re.match(r'stock_obj', x) else 'stock_obj.' + x +
                         ', ') + "stock_obj.{attr_name}, ".format(attr_name=y),
                        obj_attrs
                    )  # add 'stock_obj.' to the first attr, and concatenate attrs to a string
                    value_sql = value_sql[
                        0:
                        -2]  # remove the last comma and the blankspace next to it
                    value_sql = eval(value_sql)  # tupe returned
                    final_value_sql = ''
                    del_where = ''
                    for i, v in enumerate(value_sql):
                        value = "'" + v + "'" if tab_types[
                            i] == 'date' or tab_types[
                                i] == 'varchar' else 'Null' if len(
                                    str(v)
                                ) == 0 else str(
                                    v
                                )  # date and varchar quoted by single quote, otherwise no quote or null(if length of value is 0)
                        final_value_sql = final_value_sql + value + ', '
                        if tab_fields[i] in tab_pk:
                            del_where = del_where + ' and {field}={value}'.format(
                                field=tab_fields[i], value=value)
                    final_value_sql = final_value_sql[0:-2]
                    del_complete_sql = del_sql + del_where
                    ins_complete_sql = ins_sql + ' values( ' + final_value_sql + ')'
                    #print_log('Deleting [{stock},{date}] from {tab_name}...\n {sql}'.format(stock=stock,date=date,tab_name=tab_name,sql=del_complete_sql), log_fh)
                    cur = get_cur(conn)
                    cur.execute(del_complete_sql)
                    cur.execute(ins_complete_sql)
                    print_log(
                        'Inserted [{stock},{date}] into {tab_name}.'.format(
                            stock=stock, date=date, tab_name=tab_name), log_fh)
                    num += 1
                    if num % 1000 == 0: conn.commit()
    conn.commit()
    print_log(
        '{num} records have been written into {tab_name}.'.format(
            num=num, tab_name=tab_name), log_fh)
Example #26
0
def load_into_bankuai(db_conn, file, biz_date=None ):

# 板块	子版块		板块名称	涨跌幅	总市值(亿)	换手率	上涨家数	下跌家数	领涨股票代码	领涨股票	领涨股票涨跌幅
# 板块	概念板块	全息技术	3.95%	365.12		11.65	7			1			600288			大恒科技	10.03
# 板块	概念板块	网络安全	2.95%	818.79		25.61	19			1			002308			威创股份	10.01

# biz_date date not null,
# bankuai_id integer not null,
# rise varchar(16),
# market_value_in_million decimal(12,2),
# turnover_rate decimal(5,2),
# num_of_rise integer,
# num_of_drop integer,
# leading_stock_id varchar(6),
# rise_of_leading_stock decimal(10,2),
# primary key(biz_date, bankuai_id)
	
	bk_id_dict = {}
	csv_data = []
	v_biz_date = ""
	
	#-- build dict for bankuai name and bankuai id from db
	select_sql = 'select t.name, t.id from dw.dim_bankuai t'
	cur = get_cur(db_conn)
	cur.execute(select_sql)
	db_rows = list(cur)
	for db_row in db_rows:
		db_name = db_row["name"].decode("utf-8")
		db_id = db_row["id"]
		bk_id_dict[db_name] = db_id
	
	print_log("There are %(num)s records read from %(name)s" % {"num": len(bk_id_dict.keys()), "name": 'dw.dim_bankuai'})

	#-- load CSV
	csvf = open(file)
	csvr = csv.DictReader(csvf)
	for row in csvr:
		bk_name = row[u'板块名称'.encode("gbk")].decode("gbk")
		bk_id = bk_id_dict[bk_name]
		row_dict = {}
		row_dict[bk_id] = {}
		row_dict[bk_id]["rise"] = row[u'涨跌幅'.encode("gbk")].decode("gbk")
		row_dict[bk_id]["market_value_in_million"] = row[u'总市值(亿)'.encode("gbk")]
		row_dict[bk_id]["turnover_rate"] = row[u'换手率'.encode("gbk")]
		row_dict[bk_id]["num_of_rise"] = row[u'上涨家数'.encode("gbk")]
		row_dict[bk_id]["num_of_drop"] = row[u'下跌家数'.encode("gbk")]
		row_dict[bk_id]["leading_stock_id"] = row[u'领涨股票代码'.encode("gbk")]
		row_dict[bk_id]["rise_of_leading_stock"] = row[u'领涨股票涨跌幅'.encode("gbk")]
		
		csv_data.append(row_dict)
		
	csvf.close()
	print_log("%(num)s records have been read from %(name)s." % {"num": len(csv_data), "name": file})

	#-- determine biz_date
	if not biz_date is None: 
		if re.search(r'\d{8}', biz_date):
			v_biz_date = biz_date
		else:
			raise RuntimeError(biz_date + " is not a valid date format, the date should be like YYYYMMDD.") 
	elif re.search(r'.*(?P<date>\d{8})\.csv', file):
		v_biz_date = re.search(r'.*(?P<date>\d{8})\.csv', file).group("date")
	else:
		raise RuntimeError('Can not determine biz_date, please check if file name has date included or pass biz_date when calling the function.')
	v_biz_date_dt = datetime.datetime.strptime(v_biz_date,'%Y%m%d')
	
	#-- delete biz_date from dw.bankuai
	del_sql = 'delete from dw.bankuai where biz_date = \'%(date)s \'' % {'date': v_biz_date_dt}
	cur.execute(del_sql)
	db_conn.commit()
	print_log("Deleted records from dw.bankuai where biz_date = '%(biz_date)s'." % {"biz_date": v_biz_date})

	#-- insert into dw.bankuai
	iter = 0
	for r in csv_data:
		k = r.keys()[0]
		iter += 1
		ins_sql = '''insert into dw.bankuai(
			biz_date, 
			bankuai_id, 
			rise, 
			market_value_in_million, 
			turnover_rate, 
			num_of_rise, 
			num_of_drop, 
			leading_stock_id, 
			rise_of_leading_stock) values(
			'%(biz_date)s',
			%(bankuai_id)s, 
			'%(rise)s', 
			%(market_value_in_million)s, 
			%(turnover_rate)s, 
			%(num_of_rise)s, 
			%(num_of_drop)s, 
			'%(leading_stock_id)s', 
			%(rise_of_leading_stock)s
			)''' % {
			'biz_date': v_biz_date_dt, 
			'bankuai_id': k, 
			'rise': r[k]['rise'], 
			'market_value_in_million': r[k]['market_value_in_million'], 
			'turnover_rate': r[k]['turnover_rate'], 
			'num_of_rise': r[k]['num_of_rise'], 
			'num_of_drop': r[k]['num_of_drop'], 
			'leading_stock_id': r[k]['leading_stock_id'] if r[k]['leading_stock_id'] != '-' else '000000', # sometimes eastmoney doesn't return valid leading stock id, but '-', for this case, '000000' would replace it as an unknown stock id
			'rise_of_leading_stock': r[k]['rise_of_leading_stock']
			}
		cur.execute(ins_sql)
		
	db_conn.commit()
	print_log( str(iter) + " inserted into dw.bankuai.")
	print_log("dw.bankuai has been refreshed successfully.")
Example #27
0
def insert_into_table(db_field_yaml, stock_obj_name, in_file, conn, log_fh, warn_fh):
    # based on the fields mapping between db and object, db type defined in yaml, generate delete sql and insert sql, and fire to db
    # this function could be used for any db insert, if yaml and object are setup properly
    # Yaml example
    # biz_date: 
    #   type: date
    #   is_pk: Y
    #   stock_object: 
    #         Tengxun_stock: date
    from object_impl.Sina_stock import Sina_stock
    from object_impl.Tengxun_stock import Tengxun_stock
    from object_impl.Yahoo_stock import Yahoo_stock
    
    db_field_mapping = get_yaml(db_field_yaml)
    tab_name = os.path.basename(db_field_yaml).replace('.yml', '') # yml file name as table name
    tab_fields = [] # table field names
    tab_pk = [] # table pk
    tab_types = [] # table field types
    obj_attrs = [] # attribute names in stock object
    for k,v in db_field_mapping.items():
        tab_type = v['type']
        obj_attr = v['stock_object'][stock_obj_name]
        if obj_attr != None: # If None|Null is set for fields in yml, remove the fields from insertion
            tab_fields.append(k)
            if v['is_pk'] == 'Y': tab_pk.append(k) # pk, delete before insert
            tab_types.append(tab_type)
            obj_attrs.append(obj_attr)
    del_sql = 'delete from {tab_name} where 1=1 '.format(tab_name=tab_name)
    ins_sql = 'insert into {tab_name}({fields}) '.format(tab_name=tab_name, fields=','.join(tab_fields))
    # iterate each row in the file, insert into table
    num = 0
    with open(in_file) as f:
        for row in f.readlines():
            # get_stock_object_from_str is a function should be available in all the stock objects
            # this function accepts the string returned from website and generate a dict for stock object
            # the dict is like {stock: {date: object}}
            # dynamically import object module, class name and file name should be identical
            #exec('from object_impl.{object} import {object}'.format(object = stock_obj_name), globals())
            stock_dict = eval('{object}.get_stock_object_from_str(row)'.format(object=stock_obj_name, row=row))
            for stock in stock_dict: # for Tengxun or sina interface, there is just one stock in one stock dict
                for date in stock_dict[stock]: # for Tengxun or sina interface, there is just one date in one stock dict
                    stock_obj = stock_dict[stock][date] # this object is stock implementation object
                    value_sql = reduce(lambda x, y: ( x if re.match(r'stock_obj', x) else 'stock_obj.' + x + ', ' ) + "stock_obj.{attr_name}, ".format(attr_name=y), obj_attrs) # add 'stock_obj.' to the first attr, and concatenate attrs to a string
                    value_sql = value_sql[0:-2] # remove the last comma and the blankspace next to it
                    value_sql = eval(value_sql) # tupe returned
                    final_value_sql = ''
                    del_where = ''
                    for i, v in enumerate(value_sql):
                        value = "'" + v + "'" if tab_types[i] == 'date' or tab_types[i] == 'varchar' else 'Null' if len(str(v)) == 0 else str(v) # date and varchar quoted by single quote, otherwise no quote or null(if length of value is 0)
                        final_value_sql = final_value_sql + value + ', '
                        if tab_fields[i] in tab_pk: 
                            del_where = del_where + ' and {field}={value}'.format(field=tab_fields[i], value=value)
                    final_value_sql = final_value_sql[0:-2]
                    del_complete_sql = del_sql + del_where
                    ins_complete_sql = ins_sql + ' values( ' + final_value_sql + ')'
                    #print_log('Deleting [{stock},{date}] from {tab_name}...\n {sql}'.format(stock=stock,date=date,tab_name=tab_name,sql=del_complete_sql), log_fh)
                    cur = get_cur(conn)
                    cur.execute(del_complete_sql)
                    cur.execute(ins_complete_sql)
                    print_log('Inserted [{stock},{date}] into {tab_name}.'.format(stock=stock,date=date,tab_name=tab_name), log_fh)
                    num += 1
                    if num % 1000 == 0: conn.commit()
    conn.commit()
    print_log('{num} records have been written into {tab_name}.'.format(num=num, tab_name=tab_name), log_fh)
def load_into_dim_bankuai(db_conn, file, parent_bankuai_ids={u"概念板块": 1, u"地域板块": 2, u"行业板块": 3}):
    # -- load CSV
    csvf = open(file)
    csvr = csv.DictReader(csvf)
    bankuais = {}
    invalid_bankuai_ids = []

    # ---- get parent_bankuai_id, bankuai_name from csv
    for row in csvr:
        bankuai = row[u"板块名称".encode("gbk")].decode("gbk")
        parent_bankuai = row[u"子版块".encode("gbk")].decode("gbk")
        parent_bankuai_id = parent_bankuai_ids[parent_bankuai]
        bankuais[bankuai] = {}
        bankuais[bankuai]["parent_bankuai_id"] = parent_bankuai_id
        # bankuais[bankuai].setdefault("parent_bankuai_id", parent_bankuai_id)
    csvf.close()
    print_log("%(num)s records have been read from %(fname)s." % {"num": len(bankuais.keys()), "fname": file})

    # ---- get parent_bankuai_id, bankuai_name from db, seach the combination in csv dict, if it doesn't exist, add to invalid_bankuai_ids
    select_sql = "select t.parent_bankuai_id, t.name, t.id from dw.dim_bankuai t where t.is_valid = 'Y'"
    cur = get_cur(db_conn)
    cur.execute(select_sql)
    db_rows = list(cur)

    for db_row in db_rows:
        db_bankuai = db_row["name"].decode("utf-8")
        db_parent_bankuai_id = db_row["parent_bankuai_id"]
        db_id = db_row["id"]

        if db_bankuai in bankuais:
            if db_parent_bankuai_id == bankuais[db_bankuai]["parent_bankuai_id"]:
                # delete from bankuais if it's already in the table and is_valid=Y
                del bankuais[db_bankuai]
            else:
                invalid_bankuai_ids.append(str(db_id))
        else:
            invalid_bankuai_ids.append(str(db_id))

            # ---- mark bankuais is_valid=N
    if len(invalid_bankuai_ids) > 0:
        invalid_bankuai_ids_str = ",".join(invalid_bankuai_ids)
        print_log("Invalid bankuai ids: " + invalid_bankuai_ids_str)
        upd_sql = "update dw.dim_bankuai t set is_valid = 'N', upd_time = now() where t.id in (%(ids)s)" % {
            "ids": invalid_bankuai_ids_str
        }
        cur.execute(upd_sql)
        db_conn.commit()
    else:
        print_log("No invalid bankuai ids.")

        # ---- insert bankuais into dim_bankuai
    if len(bankuais.keys()) > 0:
        values = []
        print_log("There are %(num)s bankuais will be inserted." % {"num": len(bankuais.keys())})
        for b in bankuais:
            values.append(
                "('%(name)s', '%(parent_bankuai_id)s', now(), 'Y')"
                % {"name": b, "parent_bankuai_id": bankuais[b]["parent_bankuai_id"]}
            )
        values_str = ",".join(values)
        ins_sql = "insert into dw.dim_bankuai(name, parent_bankuai_id, upd_time, is_valid) values %(values)s" % {
            "values": values_str
        }
        cur.execute(ins_sql)
        db_conn.commit()
    else:
        print_log("No new bankuai ids.")

    print_log("dw.dim_bankuai has been refreshed successfully.")
def load_into_bankuai(db_conn, file, biz_date=None):

    # 板块	子版块		板块名称	涨跌幅	总市值(亿)	换手率	上涨家数	下跌家数	领涨股票代码	领涨股票	领涨股票涨跌幅
    # 板块	概念板块	全息技术	3.95%	365.12		11.65	7			1			600288			大恒科技	10.03
    # 板块	概念板块	网络安全	2.95%	818.79		25.61	19			1			002308			威创股份	10.01

    # biz_date date not null,
    # bankuai_id integer not null,
    # rise varchar(16),
    # market_value_in_million decimal(12,2),
    # turnover_rate decimal(5,2),
    # num_of_rise integer,
    # num_of_drop integer,
    # leading_stock_id varchar(6),
    # rise_of_leading_stock decimal(10,2),
    # primary key(biz_date, bankuai_id)

    bk_id_dict = {}
    csv_data = []
    v_biz_date = ""

    #-- build dict for bankuai name and bankuai id from db
    select_sql = 'select t.name, t.id from dw.dim_bankuai t'
    cur = get_cur(db_conn)
    cur.execute(select_sql)
    db_rows = list(cur)
    for db_row in db_rows:
        db_name = db_row["name"].decode("utf-8")
        db_id = db_row["id"]
        bk_id_dict[db_name] = db_id

    print_log("There are %(num)s records read from %(name)s" % {
        "num": len(bk_id_dict.keys()),
        "name": 'dw.dim_bankuai'
    })

    #-- load CSV
    csvf = open(file)
    csvr = csv.DictReader(csvf)
    for row in csvr:
        bk_name = row[u'板块名称'.encode("gbk")].decode("gbk")
        bk_id = bk_id_dict[bk_name]
        row_dict = {}
        row_dict[bk_id] = {}
        row_dict[bk_id]["rise"] = row[u'涨跌幅'.encode("gbk")].decode("gbk")
        row_dict[bk_id]["market_value_in_million"] = row[u'总市值(亿)'.encode(
            "gbk")]
        row_dict[bk_id]["turnover_rate"] = row[u'换手率'.encode("gbk")]
        row_dict[bk_id]["num_of_rise"] = row[u'上涨家数'.encode("gbk")]
        row_dict[bk_id]["num_of_drop"] = row[u'下跌家数'.encode("gbk")]
        row_dict[bk_id]["leading_stock_id"] = row[u'领涨股票代码'.encode("gbk")]
        row_dict[bk_id]["rise_of_leading_stock"] = row[u'领涨股票涨跌幅'.encode(
            "gbk")]

        csv_data.append(row_dict)

    csvf.close()
    print_log("%(num)s records have been read from %(name)s." % {
        "num": len(csv_data),
        "name": file
    })

    #-- determine biz_date
    if not biz_date is None:
        if re.search(r'\d{8}', biz_date):
            v_biz_date = biz_date
        else:
            raise RuntimeError(
                biz_date +
                " is not a valid date format, the date should be like YYYYMMDD."
            )
    elif re.search(r'.*(?P<date>\d{8})\.csv', file):
        v_biz_date = re.search(r'.*(?P<date>\d{8})\.csv', file).group("date")
    else:
        raise RuntimeError(
            'Can not determine biz_date, please check if file name has date included or pass biz_date when calling the function.'
        )
    v_biz_date_dt = datetime.datetime.strptime(v_biz_date, '%Y%m%d')

    #-- delete biz_date from dw.bankuai
    del_sql = 'delete from dw.bankuai where biz_date = \'%(date)s \'' % {
        'date': v_biz_date_dt
    }
    cur.execute(del_sql)
    db_conn.commit()
    print_log(
        "Deleted records from dw.bankuai where biz_date = '%(biz_date)s'." %
        {"biz_date": v_biz_date})

    #-- insert into dw.bankuai
    iter = 0
    for r in csv_data:
        k = r.keys()[0]
        iter += 1
        ins_sql = '''insert into dw.bankuai(
			biz_date, 
			bankuai_id, 
			rise, 
			market_value_in_million, 
			turnover_rate, 
			num_of_rise, 
			num_of_drop, 
			leading_stock_id, 
			rise_of_leading_stock) values(
			'%(biz_date)s',
			%(bankuai_id)s, 
			'%(rise)s', 
			%(market_value_in_million)s, 
			%(turnover_rate)s, 
			%(num_of_rise)s, 
			%(num_of_drop)s, 
			'%(leading_stock_id)s', 
			%(rise_of_leading_stock)s
			)''' % {
            'biz_date':
            v_biz_date_dt,
            'bankuai_id':
            k,
            'rise':
            r[k]['rise'],
            'market_value_in_million':
            r[k]['market_value_in_million'],
            'turnover_rate':
            r[k]['turnover_rate'],
            'num_of_rise':
            r[k]['num_of_rise'],
            'num_of_drop':
            r[k]['num_of_drop'],
            'leading_stock_id':
            r[k]['leading_stock_id'] if r[k]['leading_stock_id'] != '-' else
            '000000',  # sometimes eastmoney doesn't return valid leading stock id, but '-', for this case, '000000' would replace it as an unknown stock id
            'rise_of_leading_stock':
            r[k]['rise_of_leading_stock']
        }
        cur.execute(ins_sql)

    db_conn.commit()
    print_log(str(iter) + " inserted into dw.bankuai.")
    print_log("dw.bankuai has been refreshed successfully.")
Example #30
0
def load_into_dim_stock_bankuai(db_conn, file):
    #-- load CSV
    csvf = open(file)
    csvr = csv.DictReader(csvf)
    bk_st_pairs = []
    bk_st_pairs_dict = {}
    bk_id_dict = {}

    codes_to_valid = []
    codes_to_invalid = []

    # 板块	子版块		板块名称	股票代码	股票名称
    # 板块	概念板块	送转预期	600587		新华医疗
    for row in csvr:
        bk_name = row[u'板块名称'.encode("gbk")].decode("gbk")
        st_id = row[u'股票代码'.encode("gbk")].decode("gbk")
        bk_st_pairs.append([bk_name, st_id])
    csvf.close()
    print_log("%(num)s records have been read from %(fname)s." % {
        "num": len(bk_st_pairs),
        "fname": file
    })

    #---- get bankuai_id from dim_bankuai
    select_sql = "select t.id, t.name from dw.dim_bankuai t"
    cur = get_cur(db_conn)
    cur.execute(select_sql)
    db_rows = list(cur)
    for db_row in db_rows:
        db_name = db_row["name"].decode("utf-8")
        db_id = db_row["id"]
        bk_id_dict[db_name] = db_id

    #---- convert to dict
    for i in range(len(bk_st_pairs)):
        bk_st_pairs[i][0] = bk_id_dict[bk_st_pairs[i][0]]
        bk_st_pairs[i].append(
            str(bk_st_pairs[i][0]) + "-" + str(bk_st_pairs[i][1]))  # as PK
        bk_st_pairs_dict[bk_st_pairs[i][2]] = {
            "bk": bk_st_pairs[i][0],
            "st": bk_st_pairs[i][1]
        }

    #---- get bk_id, st_id from db, seach the combination in csv dict
    select_sql = "select t.stock_id, t.bankuai_id, t.is_valid from dw.dim_stock_bankuai t"
    cur.execute(select_sql)
    db_rows = list(cur)
    for db_row in db_rows:
        db_bk_id = db_row["bankuai_id"]
        db_st_id = db_row["stock_id"]
        db_pk = str(db_bk_id) + "-" + db_st_id
        db_is_valid = db_row["is_valid"]

        if db_pk in bk_st_pairs_dict and db_is_valid == "Y":
            del bk_st_pairs_dict[db_pk]
        elif db_pk in bk_st_pairs_dict and db_is_valid == "N":
            codes_to_valid.append(" ( bankuai_id = " + str(db_bk_id) +
                                  " and stock_id = '" + str(db_st_id) + "' ) ")
            del bk_st_pairs_dict[db_pk]
        elif db_is_valid == "N":
            # not in csv file and it's already invalid in db, do nothing
            pass
        else:
            # not in csv, but in db it's valid, mark it to invalid
            codes_to_invalid.append(" ( bankuai_id = " + str(db_bk_id) +
                                    " and stock_id = '" + str(db_st_id) +
                                    "' ) ")

    #---- mark is_valid=N
    if len(codes_to_invalid) > 0:
        codes_to_invalid_str = " or ".join(codes_to_invalid)
        print_log(
            "There are %(num)s stock bankuai combination will be marked invalid. %(combination)s"
            % {
                "num": len(codes_to_invalid),
                "combination": codes_to_invalid_str
            })
        upd_sql = "update dw.dim_stock_bankuai t set is_valid = 'N', upd_time = now() where %(combinations)s" % {
            "combinations": codes_to_invalid_str
        }
        cur.execute(upd_sql)
        db_conn.commit()
    else:
        print_log("No stock bankuai combinations need to be marked invalid.")

    #---- mark is_valid=Y
    if len(codes_to_valid) > 0:
        codes_to_valid_str = " or ".join(codes_to_valid)
        print_log(
            "There are %(num)s stock bankuai combination will be marked valid. %(combination)s"
            % {
                "num": len(codes_to_valid),
                "combination": codes_to_valid_str
            })
        upd_sql = "update dw.dim_stock_bankuai t set is_valid = 'Y', upd_time = now() where %(combinations)s" % {
            "combinations": codes_to_valid_str
        }
        cur.execute(upd_sql)
        db_conn.commit()
    else:
        print_log("No stock bankuai combinations need to be marked valid.")

    #---- insert stocks into dim_stock_bankuai
    if len(bk_st_pairs_dict.keys()) > 0:
        values = []
        print_log(
            "There are %(num)s stock bankuai combination will be inserted." %
            {"num": len(bk_st_pairs_dict.keys())})
        for pk in bk_st_pairs_dict:
            print_log(pk)
            values.append(
                "('%(stock_id)s', '%(bankuai_id)s', now(), 'Y')" % {
                    "stock_id": bk_st_pairs_dict[pk]["st"],
                    "bankuai_id": bk_st_pairs_dict[pk]["bk"]
                })
        values_str = ",".join(values)
        ins_sql = "insert into dw.dim_stock_bankuai(stock_id, bankuai_id, upd_time, is_valid) values %(values)s" % {
            "values": values_str
        }
        cur.execute(ins_sql)
        db_conn.commit()
    else:
        print_log("No new stock bankuai combination.")

    print_log("dw.dim_stock_bankuai has been refreshed successfully.")
Example #31
0
def load_into_dim_stock(db_conn, file):
    #-- load CSV
    csvf = open(file)
    csvr = csv.DictReader(csvf)
    codes = {}
    codes_to_update = {}
    codes_to_valid = []
    codes_to_invalid = []

    # 板块	子版块		板块名称	股票代码	股票名称
    # 板块	概念板块	送转预期	600587		新华医疗
    for row in csvr:
        code = row[u'股票代码'.encode("gbk")].decode("gbk")
        name = row[u'股票名称'.encode("gbk")].decode("gbk")
        codes[code] = name
    csvf.close()
    print_log("%(num)s records have been read from %(fname)s." % {
        "num": len(codes.keys()),
        "fname": file
    })

    #---- get id, name from db, seach the combination in csv dict
    # if id exists but different name, update
    # if id doesn't exist, mark is_valid=N
    select_sql = "select t.id, t.name, t.is_valid from dw.dim_stock t /*where t.is_valid = 'Y'*/"
    cur = get_cur(db_conn)
    cur.execute(select_sql)
    db_rows = list(cur)

    for db_row in db_rows:
        db_name = db_row["name"].decode("utf-8")
        db_id = db_row["id"]
        db_is_valid = db_row["is_valid"]
        if db_id in codes and db_is_valid == "Y":
            if db_name == codes[db_id]:
                #delete from codes if it's already in the table and name is not changed.
                del codes[db_id]
            else:
                #delete from codes, we will use codes_to_update dict to update the name
                codes_to_update[db_id] = codes[db_id]
                del codes[db_id]
        elif db_id in codes and db_is_valid == "N":
            codes_to_valid.append("'" + str(db_id) + "'")
            del codes[db_id]
        elif db_is_valid == "N":
            # not in csv file and it's already invalid in db, do nothing
            pass
        else:
            # not in csv, but in db it's valid, mark it to invalid
            codes_to_invalid.append("'" + str(db_id) + "'")

    #---- mark stocks is_valid=N
    if len(codes_to_invalid) > 0:
        codes_to_invalid_str = ",".join(codes_to_invalid)
        #print_log("Mark stock ids to invalid: " + codes_to_invalid_str)
        print_log(
            "There are %(num)s stocks will be marked invalid. %(stocks)s" % {
                "num": len(codes_to_invalid),
                "stocks": codes_to_invalid_str
            })
        upd_sql = "update dw.dim_stock t set is_valid = 'N', upd_time = now() where t.id in (%(ids)s)" % {
            "ids": codes_to_invalid_str
        }
        cur.execute(upd_sql)
        db_conn.commit()
    else:
        print_log("No stocks need to be marked invalid.")

    #---- mark stocks is_valid=Y
    if len(codes_to_valid) > 0:
        codes_to_valid_str = ",".join(codes_to_valid)
        print_log("There are %(num)s stocks will be marked valid. %(stocks)s" %
                  {
                      "num": len(codes_to_valid),
                      "stocks": codes_to_valid_str
                  })
        upd_sql = "update dw.dim_stock t set is_valid = 'Y', upd_time = now() where t.id in (%(ids)s)" % {
            "ids": codes_to_valid_str
        }
        cur.execute(upd_sql)
        db_conn.commit()
    else:
        print_log("No stocks need to be marked valid.")

    #---- update stock names in dim_stock
    if len(codes_to_update.keys()) > 0:
        print_log("There are %(num)s stocks will be updated." %
                  {"num": len(codes_to_update.keys())})
        for id in codes_to_update:
            print_log(id)
            upd_sql = "update dw.dim_stock t set name = '%(name)s', upd_time = now() where t.id = '%(id)s'" % {
                "id": id,
                "name": codes_to_update[id]
            }
            cur.execute(upd_sql)
        db_conn.commit()
    else:
        print_log("No stocks need to be updated.")

    #---- insert stocks into dim_stock
    if len(codes.keys()) > 0:
        values = []
        print_log("There are %(num)s stocks will be inserted." %
                  {"num": len(codes.keys())})
        for b in codes:
            print_log(b)
            values.append("('%(id)s', '%(name)s', now(), 'Y')" % {
                "id": b,
                "name": codes[b]
            })
        values_str = ",".join(values)
        ins_sql = "insert into dw.dim_stock(id, name, upd_time, is_valid) values %(values)s" % {
            "values": values_str
        }
        cur.execute(ins_sql)
        db_conn.commit()
    else:
        print_log("No new stock ids.")

    print_log("dw.dim_stock has been refreshed successfully.")
 def insert_log_table(self):
     ins_sql = '''insert into dw.log_stock_transaction ( row_id, biz_date, stock_id, download_start_time, download_source ) values ( {row_id}, '{date}', '{stock}', '{start_time}', '{stock_trans_obj_name}' )
     '''.format(row_id=self.row_id, date=self.date, stock=self.stock_id, start_time=time.ctime(), stock_trans_obj_name=self.stock_trans_obj_name)
     cur = get_cur(self.conn)
     cur.execute(ins_sql)
     self.conn.commit()
def load_into_dim_bankuai(db_conn, file, parent_bankuai_ids={u'概念板块': 1, u'地域板块': 2, u'行业板块': 3} ):
	#-- load CSV
	csvf = open(file)
	csvr = csv.DictReader(csvf)
	bankuais = {}
	invalid_bankuai_ids = []

	#---- get parent_bankuai_id, bankuai_name from csv
	for row in csvr:
		bankuai = row[u'板块名称'.encode("gbk")].decode("gbk")
		parent_bankuai = row[u'子版块'.encode("gbk")].decode("gbk")
		parent_bankuai_id = parent_bankuai_ids[parent_bankuai]
		bankuais[bankuai] = {}
		bankuais[bankuai]["parent_bankuai_id"] = parent_bankuai_id
		#bankuais[bankuai].setdefault("parent_bankuai_id", parent_bankuai_id)
	csvf.close()
	print_log("%(num)s records have been read from %(fname)s." % {"num": len(bankuais.keys()), "fname": file})
	
	#---- get parent_bankuai_id, bankuai_name from db, seach the combination in csv dict, if it doesn't exist, add to invalid_bankuai_ids
	select_sql = "select t.parent_bankuai_id, t.name, t.id from dw.dim_bankuai t where t.is_valid = 'Y'"
	cur = get_cur(db_conn)
	cur.execute(select_sql)
	db_rows = list(cur)

	for db_row in db_rows:
		db_bankuai = db_row["name"].decode("utf-8")
		db_parent_bankuai_id = db_row["parent_bankuai_id"]
		db_id = db_row["id"]
		
		if db_bankuai in bankuais:
			if db_parent_bankuai_id == bankuais[db_bankuai]["parent_bankuai_id"]:
				#delete from bankuais if it's already in the table and is_valid=Y
				del bankuais[db_bankuai]
			else: 
				invalid_bankuai_ids.append(str(db_id))
		else:
			invalid_bankuai_ids.append(str(db_id))

	#---- mark bankuais is_valid=N
	if len(invalid_bankuai_ids) > 0:
		invalid_bankuai_ids_str = ",".join(invalid_bankuai_ids)
		print_log("Invalid bankuai ids: " + invalid_bankuai_ids_str)
		upd_sql = "update dw.dim_bankuai t set is_valid = 'N', upd_time = now() where t.id in (%(ids)s)" % {"ids": invalid_bankuai_ids_str}
		cur.execute(upd_sql)
		db_conn.commit()
	else:
		print_log("No invalid bankuai ids.")
		
	#---- insert bankuais into dim_bankuai
	if len(bankuais.keys()) > 0:
		values = []
		print_log("There are %(num)s bankuais will be inserted." % {"num": len(bankuais.keys())})
		for b in bankuais:
			values.append("('%(name)s', '%(parent_bankuai_id)s', now(), 'Y')" % {"name": b, "parent_bankuai_id": bankuais[b]["parent_bankuai_id"]} )
		values_str = ",".join(values)
		ins_sql = "insert into dw.dim_bankuai(name, parent_bankuai_id, upd_time, is_valid) values %(values)s" % {"values": values_str}
		cur.execute(ins_sql)
		db_conn.commit()
	else:
		print_log("No new bankuai ids.")
	
	print_log("dw.dim_bankuai has been refreshed successfully.")
def load_into_dim_stock(db_conn, file ):
	#-- load CSV
	csvf = open(file)
	csvr = csv.DictReader(csvf)
	codes = {}
	codes_to_update = {}
	codes_to_valid = []
	codes_to_invalid = []

	# 板块	子版块		板块名称	股票代码	股票名称
	# 板块	概念板块	送转预期	600587		新华医疗
	for row in csvr:
		code = row[u'股票代码'.encode("gbk")].decode("gbk")
		name = row[u'股票名称'.encode("gbk")].decode("gbk")
		codes[code] = name
	csvf.close()
	print_log("%(num)s records have been read from %(fname)s." % {"num": len(codes.keys()), "fname": file})
	
	
	#---- get id, name from db, seach the combination in csv dict
	# if id exists but different name, update
	# if id doesn't exist, mark is_valid=N
	select_sql = "select t.id, t.name, t.is_valid from dw.dim_stock t /*where t.is_valid = 'Y'*/"
	cur = get_cur(db_conn)
	cur.execute(select_sql)
	db_rows = list(cur)

	for db_row in db_rows:
		db_name = db_row["name"].decode("utf-8")
		db_id = db_row["id"]
		db_is_valid = db_row["is_valid"]
		if db_id in codes and db_is_valid == "Y":
			if db_name == codes[db_id]:
				#delete from codes if it's already in the table and name is not changed.
				del codes[db_id]
			else: 
				#delete from codes, we will use codes_to_update dict to update the name 
				codes_to_update[db_id] = codes[db_id]
				del codes[db_id]
		elif db_id in codes and db_is_valid == "N":
			codes_to_valid.append("'" + str(db_id) + "'")
			del codes[db_id]
		elif db_is_valid == "N":
			# not in csv file and it's already invalid in db, do nothing
			pass
		else:
			# not in csv, but in db it's valid, mark it to invalid
			codes_to_invalid.append("'" + str(db_id) + "'")
			
	#---- mark stocks is_valid=N
	if len(codes_to_invalid) > 0:
		codes_to_invalid_str = ",".join(codes_to_invalid)
		#print_log("Mark stock ids to invalid: " + codes_to_invalid_str)
		print_log("There are %(num)s stocks will be marked invalid. %(stocks)s" % {"num": len(codes_to_invalid), "stocks": codes_to_invalid_str})
		upd_sql = "update dw.dim_stock t set is_valid = 'N', upd_time = now() where t.id in (%(ids)s)" % {"ids": codes_to_invalid_str}
		cur.execute(upd_sql)
		db_conn.commit()
	else:
		print_log("No stocks need to be marked invalid.")

	#---- mark stocks is_valid=Y
	if len(codes_to_valid) > 0:
		codes_to_valid_str = ",".join(codes_to_valid)
		print_log("There are %(num)s stocks will be marked valid. %(stocks)s" % {"num": len(codes_to_valid), "stocks": codes_to_valid_str})
		upd_sql = "update dw.dim_stock t set is_valid = 'Y', upd_time = now() where t.id in (%(ids)s)" % {"ids": codes_to_valid_str}
		cur.execute(upd_sql)
		db_conn.commit()
	else:
		print_log("No stocks need to be marked valid.")
		
	#---- update stock names in dim_stock
	if len(codes_to_update.keys()) > 0:
		print_log("There are %(num)s stocks will be updated." % {"num": len(codes_to_update.keys())})
		for id in codes_to_update:
			print_log(id)
			upd_sql = "update dw.dim_stock t set name = '%(name)s', upd_time = now() where t.id = '%(id)s'" % {"id": id, "name": codes_to_update[id]}
			cur.execute(upd_sql)
		db_conn.commit()
	else:
		print_log("No stocks need to be updated.")
	
	#---- insert stocks into dim_stock
	if len(codes.keys()) > 0:
		values = []
		print_log("There are %(num)s stocks will be inserted." % {"num": len(codes.keys())})
		for b in codes:
			print_log(b)
			values.append("('%(id)s', '%(name)s', now(), 'Y')" % {"id": b, "name": codes[b]} )
		values_str = ",".join(values)
		ins_sql = "insert into dw.dim_stock(id, name, upd_time, is_valid) values %(values)s" % {"values": values_str}
		cur.execute(ins_sql)
		db_conn.commit()
	else:
		print_log("No new stock ids.")
	
	print_log("dw.dim_stock has been refreshed successfully.")