def city_id_loc(self): city_index = {} with open('city_index.csv', 'r', encoding='utf8') as csvfile: reader = csv.reader(csvfile) for line in reader: if line[0] == 'city_name': continue if line[0] not in city_index.keys(): city_index[line[0]] = int(line[1]) db = Mysqldb(config.MySqlHost, config.MySqlUser, config.MySqlPasswd, config.MySqlDb, config.MySqlPort) with open('cityidloc.csv', 'r', encoding='utf8') as csvfile: reader = csv.reader(csvfile) for line in reader: if line[1] == line[2]: if city_index.get(line[1], '') != '': url = 'http://api.map.baidu.com/geocoder/v2/?location=' + str( line[4] ) + ',' + str( line[5] ) + '&output=json&pois=0&ak=lS5SlxcGqXfkuj3pcwRGBv90' res_data = common_fun.get_url_json(url) city_code = res_data['result']['cityCode'] insert_str = "INSERT INTO city_location (country, city, latitude, longitude, altitude, is_use, city_code, claim_temperature)" insert_str += "VALUES ('China', '" + line[ 1] + "'," + str(line[4]) + "," + str( line[5]) + ", 0, 0," + str( city_code) + "," + str( city_index[line[1]]) + ")" #write_data.append([line[1], line[4], line[5], city_index[line[1]]]) db.insert(insert_str)
def foreign_data_to_sql(self): db = Mysqldb(config.MySqlHost, config.MySqlUser, config.MySqlPasswd, config.MySqlDb, config.MySqlPort) county_list = ['united-states', 'japan', 'south-korea', 'singapore'] with open('foreign_loc_index_shougong.csv', 'r', encoding='utf8') as csvfile: reader = csv.reader(csvfile) for line in reader: if line[0] == '城市名': continue if line[3] == '' or line[4] == '': continue else: url = 'http://api.map.baidu.com/geocoder/v2/?location=' + str( line[3]) + ',' + str( line[4] ) + '&output=json&pois=0&ak=lS5SlxcGqXfkuj3pcwRGBv90' res_data = common_fun.get_url_json(url) city_code = res_data['result']['cityCode'] country = '' for county_str in county_list: if county_str in line[2]: country = county_str insert_str = "INSERT INTO city_foreign (country, city, latitude, longitude, altitude, is_use, city_code, claim_temperature)" insert_str += "VALUES ('" + country + "', '" + line[ 0] + "'," + str(line[3]) + "," + str( line[4]) + ", 0, 0," + str(city_code) + "," + str( line[1]) + ")" db.insert(insert_str)
class TxDataPro(): def __init__(self): self.db = Mysqldb(config.MySqlHost, config.MySqlUser, config.MySqlPasswd, config.MySqlDb, config.MySqlPort) def get_tx_data(self): sel_str = "SELECT id, fxh_address from token_base WHERE fxh_address <> ''" db_res = self.db.select(sel_str) timestamps = int(time.time()) recordDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) print('get_tx_data', recordDate) for token_bace in db_res: res_html = common_fun.get_url_html(token_bace[1]) price_list = res_html.xpath( '//*[@id="baseInfo"]//*[@class="coinprice"]//text()') price_cny = price_list[0].split('¥')[1].replace(',', '').replace( '?', '0') gains_24h = price_list[1].replace('%', '') price_usdt = res_html.xpath( '//*[@id="baseInfo"]//*[@class="sub"]//text()')[0].replace( '≈', '') price_btc = res_html.xpath( '//*[@id="baseInfo"]//*[@class="sub"]//text()')[2].replace( '≈', '') price_usdt = price_usdt.split('$')[1].replace(',', '') price_btc = price_btc.split('BTC')[0].replace(',', '') flow_num = res_html.xpath( '//*[@id="baseInfo"]/div[1]/div[3]/div[2]/text()')[0].split( ' ')[0].replace(',', '') tx_amount_24h = res_html.xpath( '//*[@id="baseInfo"]/div[1]/div[4]/div[2]/text()')[0].replace( ',', '') if tx_amount_24h == '?' or tx_amount_24h == '?': pass else: tx_amount_24h = tx_amount_24h.split('¥')[1].replace(',', '') issued_num = res_html.xpath( '//*[@id="baseInfo"]/div[1]/div[3]/div[4]/text()')[0].split( ' ')[0].replace(',', '') change_rate_24h = res_html.xpath( '//*[@id="baseInfo"]/div[1]/div[4]/div[5]/div/span/text()' )[0].replace('%', '') insert_str = "INSERT INTO erc20_tx_data (token_id, get_data_time, issued_num, flow_num, tx_amount_24h, change_rate_24h, gains_24h, price_cny, price_usdt, price_btc, created_at)" insert_str += "VALUES (" + str( token_bace[0] ) + "," + str(timestamps) + ",'" + issued_num + "','" + str( flow_num) + "','" + str(tx_amount_24h) + "','" + str( change_rate_24h) + "','" + str(gains_24h) + "','" + str( price_cny) + "','" + str(price_usdt) + "','" + str( price_btc) + "','" + str(recordDate) + "')" try: self.db.insert(insert_str) except Exception as e: print(insert_str) print('insert err, token = ', token_bace[0]) recordDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) print('get_tx_data', recordDate)
class Huobi(object): def __init__(self): self.db = Mysqldb(config.MySqlHost, config.MySqlUser, config.MySqlPasswd, config.MySqlDb, config.MySqlPort) self.base_url = 'https://api.huobipro.com' self.headers = { "Content-type": "application/x-www-form-urlencoded", 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36' } self.symbols = [] def kline_data(self, symbol, date_type): tx_name = symbol['base-currency'] + '_' + symbol['quote-currency'] request_url = self.base_url + '/market/history/kline?period=' + date_type + '&size=200&symbol=' + symbol[ 'base-currency'] + symbol['quote-currency'] print(request_url) res_json = common_fun.get_url_json(request_url) res_json['tx_name'] = tx_name def all_kline_datas(self, date_type): print(len(self.symbols)) for symbol in self.symbols: self.kline_data(symbol, date_type) def ticker(self, symbol): tx_name = symbol['base-currency'] + '_' + symbol['quote-currency'] request_url = self.base_url + '/market/detail/merged?symbol=' + symbol[ 'base-currency'] + symbol['quote-currency'] res_json = common_fun.get_url_json(request_url) ticker = res_json['tick'] insert_str = "INSERT INTO huobi_tickers_copy (date_time, currency_pair, high, low, last, sell, buy, vol)" insert_str += "VALUES (" + str(res_json['ts'] / 1000) + ",'" + str( tx_name) + "'," + str(ticker['high']) + "," + str( ticker['low']) + "," + '-1' + "," + str( ticker['ask'][0]) + "," + str( ticker['bid'][0]) + "," + str(ticker['amount']) + ");" try: self.db.insert(insert_str) except: print(insert_str) print('insert_list tickers err tx_name = ', tx_name) def tickers(self): recordDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) print('start', recordDate) for symbol in self.symbols: print(symbol) self.ticker(symbol) recordDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) print('end', recordDate) def symbol_data(self): request_url = self.base_url + '/v1/common/symbols' res_json = common_fun.get_url_json(request_url) for symbol in res_json['data']: one_symbol = {} one_symbol['base-currency'] = symbol['base-currency'] one_symbol['quote-currency'] = symbol['quote-currency'] self.symbols.append(one_symbol)
class Erc20Data(): def __init__(self): self.db = Mysqldb(config.MySqlHost, config.MySqlUser, config.MySqlPasswd, config.MySqlDb, config.MySqlPort) #self.driver = webdriver.Chrome(executable_path = config.chromedriver) #由于网络原因,数据获取经常性的中断,所以需要分成多步来获取数据,原则上数据每天更新一次,每天0点启动脚本 #首先通过时间戳和token id生成erc20_data的唯一标识,并且给每条数据标记未获取 #通过数据库筛选出来所有未获取的数据得唯一标识和erc20_contract,并更新数据 def erc20_data_key(self): recordDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) print('erc20_data_key', recordDate) timestamps = int(time.time()) sel_str = "SELECT id, erc20_contract from token_base WHERE erc20_contract <> ''" res_sel = self.db.select(sel_str) for token_bace in res_sel: insert_str = "INSERT INTO erc20_data (token_id, get_data_time, top100_detail)" insert_str += "VALUES (" + str( token_bace[0]) + "," + str(timestamps) + "," + '0' + ")" try: self.db.insert(insert_str) except Exception as e: print(insert_str) print('INSERT err internet_data, token_id = ', token_bace[0]) continue def open_driver(self): options = webdriver.FirefoxOptions() options.add_argument('--headless') driver = webdriver.Firefox(options=options) driver.set_page_load_timeout(20) driver.maximize_window() return driver def get_erc20_data(self): recordDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) print('get_erc20_data', recordDate) sel_str = "SELECT a.token_id, a.get_data_time, b.erc20_contract from erc20_data as a, token_base as b WHERE (a.token_hold_num = -1 or a.token_tx_num = -1)AND a.token_id = b.id" while True: res_sel = self.db.select(sel_str) print(len(res_sel)) if len(res_sel) <= 0: break driver = self.open_driver() #driver = webdriver.Chrome(executable_path = config.chromedriver) for token_bace in res_sel: url = config.eth_token_url + token_bace[2] try: driver.get(url) except TimeoutException: driver.execute_script('window.stop()') try: token_holders = driver.find_element_by_xpath( '//*[@id="ContentPlaceHolder1_divSummary"]/div[1]/table/tbody/tr[3]/td[2]' ).text token_holders = token_holders.strip().split(' ')[0] tx_num = driver.find_element_by_xpath( '//*[@id="totaltxns"]').text updata_str = "UPDATE erc20_data SET token_hold_num=" + token_holders + ", token_tx_num=" + str( tx_num) updata_str += " where token_id =" + str( token_bace[0]) + " and get_data_time=" + str( token_bace[1]) try: self.db.update(updata_str) except Exception as e: print(updata_str) print('UPDATE err internet_data, token_id = ', token_bace[0]) continue except: print('xpath err', url) driver.close() driver.service.stop() recordDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) print('get_erc20_data', recordDate) def get_top100_hold(self): sel_str = "SELECT a.token_id, a.get_data_time, b.erc20_contract, a.token_hold_num from erc20_data as a, token_base as b WHERE top100_detail = 0 AND a.token_id = b.id LIMIT 30" while True: res_sel = self.db.select(sel_str) recordDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) print(len(res_sel), recordDate) if len(res_sel) <= 0: break driver = self.open_driver() #driver = webdriver.Chrome(executable_path = config.chromedriver) for token_bace in res_sel: url = config.eth_token_url + token_bace[2] try: driver.get(url) except TimeoutException: driver.execute_script('window.stop()') try: driver.find_element_by_xpath( '//*[@id="ContentPlaceHolder1_li_balances"]/a').click( ) except: print('click err', time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) time.sleep(5) continue try: driver.switch_to_alert().accept() except: pass driver.switch_to.frame('tokeholdersiframe') xpath_str = '//*[@id="maintable"]/table/tbody/tr[2]/td[1]' try: element_present = EC.text_to_be_present_in_element( (By.XPATH, xpath_str), '1') WebDriverWait(driver, 20, 1).until(element_present) recordDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) if len( driver.find_elements_by_xpath( '//*[@id="maintable"]/table/tbody/tr')) < 2: print('rank data len less') continue insert_list = [] for element in driver.find_elements_by_xpath( '//*[@id="maintable"]/table/tbody/tr'): try: rank = element.find_element_by_xpath( './td[1]').text.replace(',', '') address = element.find_element_by_xpath( './td/span').text.replace(',', '') quantity = element.find_element_by_xpath( './td[3]').text.replace(',', '') percentage = element.find_element_by_xpath( './td[4]').text.replace(',', '').replace('%', '') insert_str = "INSERT INTO hold_top_100 (token_id, get_data_time, rank, address, quantity, percentage, creat_at)" insert_str += "VALUES (" + str( token_bace[0] ) + "," + str(token_bace[1]) + "," + str( rank) + ",'" + str(address) + "'," + str( quantity) + "," + str( percentage) + ",'" + recordDate + "');" insert_list.append(insert_str) except: continue if token_bace[3] > 50: driver.find_element_by_xpath( '//*[@id="PagingPanel"]/a[3]').click() element_present = EC.text_to_be_present_in_element( (By.XPATH, xpath_str), '51') WebDriverWait(driver, 20, 1).until(element_present) for element in driver.find_elements_by_xpath( '//*[@id="maintable"]/table/tbody/tr'): try: rank = element.find_element_by_xpath( './td[1]').text.replace(',', '') address = element.find_element_by_xpath( './td/span').text.replace(',', '') quantity = element.find_element_by_xpath( './td[3]').text.replace(',', '') percentage = element.find_element_by_xpath( './td[4]').text.replace(',', '').replace( '%', '') insert_str = "INSERT INTO hold_top_100 (token_id, get_data_time, rank, address, quantity, percentage, creat_at)" insert_str += "VALUES (" + str( token_bace[0]) + "," + str( token_bace[1] ) + "," + str(rank) + ",'" + str( address ) + "'," + str(quantity) + "," + str( percentage) + ",'" + recordDate + "');" insert_list.append(insert_str) except Exception as e: continue if len(insert_list) == 100 or len( insert_list) == token_bace[3]: try: self.db.execute_list(insert_list) updata_str = "UPDATE erc20_data SET top100_detail=" + '1' updata_str += " where token_id =" + str( token_bace[0]) + " and get_data_time=" + str( token_bace[1]) try: self.db.update(updata_str) except Exception as e: print(updata_str) print('UPDATE err updata_str, token_id = ', token_bace[0]) except Exception as e: print('INSERT err internet_data, >50 token_id = ', token_bace[0]) except: print('Timed out waiting for page to load. token_id:', token_bace[0]) driver.close() driver.service.stop() time.sleep(5) recordDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) print('get_top100_hold', recordDate)
class GithubDataPro(): def __init__(self): self.db = Mysqldb(config.MySqlHost, config.MySqlUser, config.MySqlPasswd, config.MySqlDb, config.MySqlPort) self.git_url = 'https://github.com/eosio' def get_gitaddress(self): sel_str = "SELECT project_name FROM github_base" project_name_list = self.db.select(sel_str) sel_str = "SELECT id, github_address FROM token_base WHERE github_address <>''" res_sel = self.db.select(sel_str) for token_bace in res_sel: res_html = common_fun.get_url_html(token_bace[1]) if res_html == 404: print('404:', token_bace[1]) continue elif res_html == 500: print('500', token_bace[1]) continue elif res_html == '': print('err:', token_bace[1]) continue #res_html = common_fun.get_url_html(self.git_url) for pro_data_xpath in res_html.xpath( '//*[@id="org-repositories"]/div[1]/div/li'): project_name = pro_data_xpath.xpath( 'div[1]/h3/a/text()')[0].strip() project_address = pro_data_xpath.xpath( 'div[1]/h3/a/@href')[0].strip() if (project_name, ) not in project_name_list: project_address = 'https://github.com' + project_address insert_str = "INSERT INTO github_base (token_id, project_name, project_address)" insert_str += "VALUES (" + str( token_bace[0] ) + ",'" + project_name + "','" + project_address + "')" try: self.db.insert(insert_str) except Exception as e: print(insert_str) print('INSERT err internet_data, token_id = ', token_bace[0]) continue else: pass def open_driver(self): options = webdriver.FirefoxOptions() options.add_argument('--headless') driver = webdriver.Firefox(options=options) driver.set_page_load_timeout(20) driver.maximize_window() return driver def init_git_detail_data(self): sel_str = "SELECT id FROM github_base WHERE project_address <> ''" res_sel = self.db.select(sel_str) timestamps = int(time.time()) for git_base in res_sel: insert_str = "INSERT INTO github_detail_data (project_id, get_data_time)" insert_str += "VALUES (" + str( git_base[0]) + "," + str(timestamps) + ")" try: self.db.insert(insert_str) except Exception as e: print(insert_str) print('INSERT err internet_data, project_id = ', git_base[0]) continue def get_detail_data(self): sel_str = "SELECT b.project_id, b.get_data_time, a.project_address from github_base as a, github_detail_data as b WHERE (b.star_num = -1 or b.watch_num = -1 or b.fork_num = -1 or b.commits_num = -1 or b.branches_num = -1 or b.releases_num = -1 or b.contributors_num = -1)AND a.id = b.project_id" retry_times = 5 while True: res_sel = self.db.select(sel_str) retry_times = retry_times - 1 if retry_times < 0: for git_base in res_sel: updata_str = "UPDATE github_detail_data SET star_num=" + str( -2) + ", watch_num=" + str(-2) + ", fork_num=" + str( -2) + ", commits_num=" + str( -2) + ",branches_num=" + str( -2) + ", releases_num=" + str( -2) + ", contributors_num=" + str(-2) updata_str += " where project_id =" + str( git_base[0]) + " and get_data_time=" + str(git_base[1]) try: self.db.update(updata_str) except Exception as e: print(updata_str) print('UPDATE err retry_times < 0, gitaddress = ', git_base[2]) continue break driver = self.open_driver() for git_base in res_sel: try: driver.get(git_base[2]) except TimeoutException: driver.execute_script('window.stop()') try: watch_num = driver.find_element_by_xpath( '//*[@id="js-repo-pjax-container"]/div[1]/div/ul/li[1]/a[2]' ).text.replace(',', '') star_num = driver.find_element_by_xpath( '//*[@id="js-repo-pjax-container"]/div[1]/div/ul/li[2]/a[2]' ).text.replace(',', '') fork_num = driver.find_element_by_xpath( '//*[@class="pagehead-actions"]/li[3]/a[2]' ).text.replace(',', '') commit_num = driver.find_element_by_xpath( '/html/body/div[4]/div/div/div[2]/div[1]/div[3]/div/div/ul/li[1]/a/span' ).text.replace(',', '') branches_num = driver.find_element_by_xpath( '/html/body/div[4]/div/div/div[2]/div[1]/div[3]/div/div/ul/li[2]/a/span' ).text.replace(',', '') releases_num = driver.find_element_by_xpath( '/html/body/div[4]/div/div/div[2]/div[1]/div[3]/div/div/ul/li[3]/a/span' ).text.replace(',', '') contributors_num = driver.find_element_by_xpath( '/html/body/div[4]/div/div/div[2]/div[1]/div[3]/div/div/ul/li[4]/a/span' ).text.replace(',', '') updata_str = "UPDATE github_detail_data SET star_num=" + str( star_num ) + ", watch_num=" + str(watch_num) + ", fork_num=" + str( fork_num) + ", commits_num=" + str( commit_num) + ",branches_num=" + str( branches_num) + ", releases_num=" + str( releases_num ) + ", contributors_num=" + str( contributors_num) updata_str += " where project_id =" + str( git_base[0]) + " and get_data_time=" + str(git_base[1]) try: self.db.update(updata_str) except Exception as e: print(updata_str) print('UPDATE err, git_address = ', git_base[2], '-----project_id = ', git_base[0]) continue except: pass driver.close() driver.service.stop()