Example #1
0
    def city_id_loc(self):
        city_index = {}
        with open('city_index.csv', 'r', encoding='utf8') as csvfile:
            reader = csv.reader(csvfile)
            for line in reader:
                if line[0] == 'city_name':
                    continue
                if line[0] not in city_index.keys():
                    city_index[line[0]] = int(line[1])

        db = Mysqldb(config.MySqlHost, config.MySqlUser, config.MySqlPasswd,
                     config.MySqlDb, config.MySqlPort)

        with open('cityidloc.csv', 'r', encoding='utf8') as csvfile:
            reader = csv.reader(csvfile)
            for line in reader:
                if line[1] == line[2]:
                    if city_index.get(line[1], '') != '':

                        url = 'http://api.map.baidu.com/geocoder/v2/?location=' + str(
                            line[4]
                        ) + ',' + str(
                            line[5]
                        ) + '&output=json&pois=0&ak=lS5SlxcGqXfkuj3pcwRGBv90'
                        res_data = common_fun.get_url_json(url)
                        city_code = res_data['result']['cityCode']

                        insert_str = "INSERT INTO city_location (country, city, latitude, longitude, altitude, is_use, city_code, claim_temperature)"
                        insert_str += "VALUES ('China', '" + line[
                            1] + "'," + str(line[4]) + "," + str(
                                line[5]) + ", 0, 0," + str(
                                    city_code) + "," + str(
                                        city_index[line[1]]) + ")"
                        #write_data.append([line[1], line[4], line[5], city_index[line[1]]])
                        db.insert(insert_str)
    def foreign_data_to_sql(self):
        db = Mysqldb(config.MySqlHost, config.MySqlUser, config.MySqlPasswd,
                     config.MySqlDb, config.MySqlPort)
        county_list = ['united-states', 'japan', 'south-korea', 'singapore']
        with open('foreign_loc_index_shougong.csv', 'r',
                  encoding='utf8') as csvfile:
            reader = csv.reader(csvfile)
            for line in reader:
                if line[0] == '城市名':
                    continue
                if line[3] == '' or line[4] == '':
                    continue
                else:
                    url = 'http://api.map.baidu.com/geocoder/v2/?location=' + str(
                        line[3]) + ',' + str(
                            line[4]
                        ) + '&output=json&pois=0&ak=lS5SlxcGqXfkuj3pcwRGBv90'
                    res_data = common_fun.get_url_json(url)
                    city_code = res_data['result']['cityCode']
                    country = ''
                    for county_str in county_list:
                        if county_str in line[2]:
                            country = county_str

                    insert_str = "INSERT INTO city_foreign (country, city, latitude, longitude, altitude, is_use, city_code, claim_temperature)"
                    insert_str += "VALUES ('" + country + "', '" + line[
                        0] + "'," + str(line[3]) + "," + str(
                            line[4]) + ", 0, 0," + str(city_code) + "," + str(
                                line[1]) + ")"
                    db.insert(insert_str)
class TxDataPro():
    def __init__(self):
        self.db = Mysqldb(config.MySqlHost, config.MySqlUser,
                          config.MySqlPasswd, config.MySqlDb, config.MySqlPort)

    def get_tx_data(self):
        sel_str = "SELECT id, fxh_address from token_base WHERE fxh_address <> ''"
        db_res = self.db.select(sel_str)
        timestamps = int(time.time())
        recordDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        print('get_tx_data', recordDate)
        for token_bace in db_res:
            res_html = common_fun.get_url_html(token_bace[1])
            price_list = res_html.xpath(
                '//*[@id="baseInfo"]//*[@class="coinprice"]//text()')

            price_cny = price_list[0].split('¥')[1].replace(',', '').replace(
                '?', '0')
            gains_24h = price_list[1].replace('%', '')

            price_usdt = res_html.xpath(
                '//*[@id="baseInfo"]//*[@class="sub"]//text()')[0].replace(
                    '≈', '')
            price_btc = res_html.xpath(
                '//*[@id="baseInfo"]//*[@class="sub"]//text()')[2].replace(
                    '≈', '')
            price_usdt = price_usdt.split('$')[1].replace(',', '')
            price_btc = price_btc.split('BTC')[0].replace(',', '')

            flow_num = res_html.xpath(
                '//*[@id="baseInfo"]/div[1]/div[3]/div[2]/text()')[0].split(
                    ' ')[0].replace(',', '')
            tx_amount_24h = res_html.xpath(
                '//*[@id="baseInfo"]/div[1]/div[4]/div[2]/text()')[0].replace(
                    ',', '')
            if tx_amount_24h == '?' or tx_amount_24h == '?':
                pass
            else:
                tx_amount_24h = tx_amount_24h.split('¥')[1].replace(',', '')
            issued_num = res_html.xpath(
                '//*[@id="baseInfo"]/div[1]/div[3]/div[4]/text()')[0].split(
                    ' ')[0].replace(',', '')
            change_rate_24h = res_html.xpath(
                '//*[@id="baseInfo"]/div[1]/div[4]/div[5]/div/span/text()'
            )[0].replace('%', '')

            insert_str = "INSERT INTO erc20_tx_data (token_id, get_data_time, issued_num, flow_num, tx_amount_24h, change_rate_24h, gains_24h, price_cny, price_usdt, price_btc, created_at)"
            insert_str += "VALUES (" + str(
                token_bace[0]
            ) + "," + str(timestamps) + ",'" + issued_num + "','" + str(
                flow_num) + "','" + str(tx_amount_24h) + "','" + str(
                    change_rate_24h) + "','" + str(gains_24h) + "','" + str(
                        price_cny) + "','" + str(price_usdt) + "','" + str(
                            price_btc) + "','" + str(recordDate) + "')"

            try:
                self.db.insert(insert_str)
            except Exception as e:
                print(insert_str)
                print('insert err, token = ', token_bace[0])

        recordDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        print('get_tx_data', recordDate)
Example #4
0
class Huobi(object):
    def __init__(self):
        self.db = Mysqldb(config.MySqlHost, config.MySqlUser,
                          config.MySqlPasswd, config.MySqlDb, config.MySqlPort)
        self.base_url = 'https://api.huobipro.com'
        self.headers = {
            "Content-type":
            "application/x-www-form-urlencoded",
            'User-Agent':
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36'
        }
        self.symbols = []

    def kline_data(self, symbol, date_type):
        tx_name = symbol['base-currency'] + '_' + symbol['quote-currency']
        request_url = self.base_url + '/market/history/kline?period=' + date_type + '&size=200&symbol=' + symbol[
            'base-currency'] + symbol['quote-currency']
        print(request_url)
        res_json = common_fun.get_url_json(request_url)

        res_json['tx_name'] = tx_name

    def all_kline_datas(self, date_type):
        print(len(self.symbols))
        for symbol in self.symbols:
            self.kline_data(symbol, date_type)

    def ticker(self, symbol):
        tx_name = symbol['base-currency'] + '_' + symbol['quote-currency']
        request_url = self.base_url + '/market/detail/merged?symbol=' + symbol[
            'base-currency'] + symbol['quote-currency']
        res_json = common_fun.get_url_json(request_url)
        ticker = res_json['tick']

        insert_str = "INSERT INTO huobi_tickers_copy (date_time, currency_pair, high, low, last, sell, buy, vol)"
        insert_str += "VALUES (" + str(res_json['ts'] / 1000) + ",'" + str(
            tx_name) + "'," + str(ticker['high']) + "," + str(
                ticker['low']) + "," + '-1' + "," + str(
                    ticker['ask'][0]) + "," + str(
                        ticker['bid'][0]) + "," + str(ticker['amount']) + ");"

        try:
            self.db.insert(insert_str)
        except:
            print(insert_str)
            print('insert_list tickers err  tx_name = ', tx_name)

    def tickers(self):
        recordDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        print('start', recordDate)
        for symbol in self.symbols:
            print(symbol)
            self.ticker(symbol)
        recordDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        print('end', recordDate)

    def symbol_data(self):
        request_url = self.base_url + '/v1/common/symbols'
        res_json = common_fun.get_url_json(request_url)

        for symbol in res_json['data']:
            one_symbol = {}
            one_symbol['base-currency'] = symbol['base-currency']
            one_symbol['quote-currency'] = symbol['quote-currency']
            self.symbols.append(one_symbol)
Example #5
0
class Erc20Data():
    def __init__(self):
        self.db = Mysqldb(config.MySqlHost, config.MySqlUser,
                          config.MySqlPasswd, config.MySqlDb, config.MySqlPort)
        #self.driver = webdriver.Chrome(executable_path = config.chromedriver)

    #由于网络原因,数据获取经常性的中断,所以需要分成多步来获取数据,原则上数据每天更新一次,每天0点启动脚本
    #首先通过时间戳和token id生成erc20_data的唯一标识,并且给每条数据标记未获取
    #通过数据库筛选出来所有未获取的数据得唯一标识和erc20_contract,并更新数据
    def erc20_data_key(self):
        recordDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        print('erc20_data_key', recordDate)
        timestamps = int(time.time())
        sel_str = "SELECT id, erc20_contract from token_base WHERE erc20_contract <> ''"

        res_sel = self.db.select(sel_str)
        for token_bace in res_sel:
            insert_str = "INSERT INTO erc20_data (token_id, get_data_time, top100_detail)"
            insert_str += "VALUES (" + str(
                token_bace[0]) + "," + str(timestamps) + "," + '0' + ")"

            try:
                self.db.insert(insert_str)
            except Exception as e:
                print(insert_str)
                print('INSERT err internet_data, token_id = ', token_bace[0])
                continue

    def open_driver(self):
        options = webdriver.FirefoxOptions()
        options.add_argument('--headless')
        driver = webdriver.Firefox(options=options)

        driver.set_page_load_timeout(20)
        driver.maximize_window()
        return driver

    def get_erc20_data(self):
        recordDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        print('get_erc20_data', recordDate)
        sel_str = "SELECT a.token_id, a.get_data_time, b.erc20_contract from erc20_data as a, token_base as b WHERE (a.token_hold_num = -1 or a.token_tx_num = -1)AND a.token_id = b.id"
        while True:
            res_sel = self.db.select(sel_str)
            print(len(res_sel))
            if len(res_sel) <= 0:
                break

            driver = self.open_driver()

            #driver = webdriver.Chrome(executable_path = config.chromedriver)
            for token_bace in res_sel:
                url = config.eth_token_url + token_bace[2]
                try:
                    driver.get(url)
                except TimeoutException:
                    driver.execute_script('window.stop()')
                try:
                    token_holders = driver.find_element_by_xpath(
                        '//*[@id="ContentPlaceHolder1_divSummary"]/div[1]/table/tbody/tr[3]/td[2]'
                    ).text
                    token_holders = token_holders.strip().split(' ')[0]
                    tx_num = driver.find_element_by_xpath(
                        '//*[@id="totaltxns"]').text

                    updata_str = "UPDATE erc20_data SET token_hold_num=" + token_holders + ", token_tx_num=" + str(
                        tx_num)
                    updata_str += " where token_id =" + str(
                        token_bace[0]) + " and get_data_time=" + str(
                            token_bace[1])

                    try:
                        self.db.update(updata_str)
                    except Exception as e:
                        print(updata_str)
                        print('UPDATE err internet_data, token_id = ',
                              token_bace[0])
                        continue
                except:
                    print('xpath err', url)

            driver.close()
            driver.service.stop()

        recordDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        print('get_erc20_data', recordDate)

    def get_top100_hold(self):
        sel_str = "SELECT a.token_id, a.get_data_time, b.erc20_contract, a.token_hold_num from erc20_data as a, token_base as b WHERE top100_detail = 0 AND a.token_id = b.id LIMIT 30"

        while True:
            res_sel = self.db.select(sel_str)
            recordDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
            print(len(res_sel), recordDate)
            if len(res_sel) <= 0:
                break

            driver = self.open_driver()
            #driver = webdriver.Chrome(executable_path = config.chromedriver)
            for token_bace in res_sel:
                url = config.eth_token_url + token_bace[2]
                try:
                    driver.get(url)
                except TimeoutException:
                    driver.execute_script('window.stop()')

                try:
                    driver.find_element_by_xpath(
                        '//*[@id="ContentPlaceHolder1_li_balances"]/a').click(
                        )
                except:
                    print('click err',
                          time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
                    time.sleep(5)
                    continue

                try:
                    driver.switch_to_alert().accept()
                except:
                    pass

                driver.switch_to.frame('tokeholdersiframe')
                xpath_str = '//*[@id="maintable"]/table/tbody/tr[2]/td[1]'
                try:
                    element_present = EC.text_to_be_present_in_element(
                        (By.XPATH, xpath_str), '1')
                    WebDriverWait(driver, 20, 1).until(element_present)

                    recordDate = time.strftime("%Y-%m-%d %H:%M:%S",
                                               time.localtime())
                    if len(
                            driver.find_elements_by_xpath(
                                '//*[@id="maintable"]/table/tbody/tr')) < 2:
                        print('rank data len less')
                        continue
                    insert_list = []
                    for element in driver.find_elements_by_xpath(
                            '//*[@id="maintable"]/table/tbody/tr'):
                        try:
                            rank = element.find_element_by_xpath(
                                './td[1]').text.replace(',', '')
                            address = element.find_element_by_xpath(
                                './td/span').text.replace(',', '')
                            quantity = element.find_element_by_xpath(
                                './td[3]').text.replace(',', '')
                            percentage = element.find_element_by_xpath(
                                './td[4]').text.replace(',',
                                                        '').replace('%', '')
                            insert_str = "INSERT INTO hold_top_100 (token_id, get_data_time, rank, address, quantity, percentage, creat_at)"
                            insert_str += "VALUES (" + str(
                                token_bace[0]
                            ) + "," + str(token_bace[1]) + "," + str(
                                rank) + ",'" + str(address) + "'," + str(
                                    quantity) + "," + str(
                                        percentage) + ",'" + recordDate + "');"
                            insert_list.append(insert_str)
                        except:
                            continue

                    if token_bace[3] > 50:
                        driver.find_element_by_xpath(
                            '//*[@id="PagingPanel"]/a[3]').click()
                        element_present = EC.text_to_be_present_in_element(
                            (By.XPATH, xpath_str), '51')
                        WebDriverWait(driver, 20, 1).until(element_present)
                        for element in driver.find_elements_by_xpath(
                                '//*[@id="maintable"]/table/tbody/tr'):
                            try:
                                rank = element.find_element_by_xpath(
                                    './td[1]').text.replace(',', '')
                                address = element.find_element_by_xpath(
                                    './td/span').text.replace(',', '')
                                quantity = element.find_element_by_xpath(
                                    './td[3]').text.replace(',', '')
                                percentage = element.find_element_by_xpath(
                                    './td[4]').text.replace(',', '').replace(
                                        '%', '')
                                insert_str = "INSERT INTO hold_top_100 (token_id, get_data_time, rank, address, quantity, percentage, creat_at)"
                                insert_str += "VALUES (" + str(
                                    token_bace[0]) + "," + str(
                                        token_bace[1]
                                    ) + "," + str(rank) + ",'" + str(
                                        address
                                    ) + "'," + str(quantity) + "," + str(
                                        percentage) + ",'" + recordDate + "');"

                                insert_list.append(insert_str)
                            except Exception as e:
                                continue

                    if len(insert_list) == 100 or len(
                            insert_list) == token_bace[3]:
                        try:
                            self.db.execute_list(insert_list)

                            updata_str = "UPDATE erc20_data SET top100_detail=" + '1'
                            updata_str += " where token_id =" + str(
                                token_bace[0]) + " and get_data_time=" + str(
                                    token_bace[1])
                            try:
                                self.db.update(updata_str)
                            except Exception as e:
                                print(updata_str)
                                print('UPDATE err updata_str, token_id = ',
                                      token_bace[0])
                        except Exception as e:
                            print('INSERT err internet_data, >50 token_id = ',
                                  token_bace[0])
                except:
                    print('Timed out waiting for page to load. token_id:',
                          token_bace[0])

            driver.close()
            driver.service.stop()
            time.sleep(5)
        recordDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        print('get_top100_hold', recordDate)
Example #6
0
class GithubDataPro():
    def __init__(self):
        self.db = Mysqldb(config.MySqlHost, config.MySqlUser,
                          config.MySqlPasswd, config.MySqlDb, config.MySqlPort)
        self.git_url = 'https://github.com/eosio'

    def get_gitaddress(self):
        sel_str = "SELECT project_name FROM github_base"
        project_name_list = self.db.select(sel_str)
        sel_str = "SELECT id, github_address FROM token_base WHERE github_address <>''"
        res_sel = self.db.select(sel_str)
        for token_bace in res_sel:
            res_html = common_fun.get_url_html(token_bace[1])
            if res_html == 404:
                print('404:', token_bace[1])
                continue
            elif res_html == 500:
                print('500', token_bace[1])
                continue
            elif res_html == '':
                print('err:', token_bace[1])
                continue
            #res_html = common_fun.get_url_html(self.git_url)

            for pro_data_xpath in res_html.xpath(
                    '//*[@id="org-repositories"]/div[1]/div/li'):
                project_name = pro_data_xpath.xpath(
                    'div[1]/h3/a/text()')[0].strip()
                project_address = pro_data_xpath.xpath(
                    'div[1]/h3/a/@href')[0].strip()
                if (project_name, ) not in project_name_list:
                    project_address = 'https://github.com' + project_address

                    insert_str = "INSERT INTO github_base (token_id, project_name, project_address)"
                    insert_str += "VALUES (" + str(
                        token_bace[0]
                    ) + ",'" + project_name + "','" + project_address + "')"

                    try:
                        self.db.insert(insert_str)
                    except Exception as e:
                        print(insert_str)
                        print('INSERT err internet_data, token_id = ',
                              token_bace[0])
                        continue
                else:
                    pass

    def open_driver(self):
        options = webdriver.FirefoxOptions()
        options.add_argument('--headless')
        driver = webdriver.Firefox(options=options)
        driver.set_page_load_timeout(20)
        driver.maximize_window()
        return driver

    def init_git_detail_data(self):
        sel_str = "SELECT id FROM github_base WHERE project_address <> ''"
        res_sel = self.db.select(sel_str)
        timestamps = int(time.time())

        for git_base in res_sel:
            insert_str = "INSERT INTO github_detail_data (project_id, get_data_time)"
            insert_str += "VALUES (" + str(
                git_base[0]) + "," + str(timestamps) + ")"

            try:
                self.db.insert(insert_str)
            except Exception as e:
                print(insert_str)
                print('INSERT err internet_data, project_id = ', git_base[0])
                continue

    def get_detail_data(self):
        sel_str = "SELECT b.project_id, b.get_data_time, a.project_address from github_base as a, github_detail_data as b WHERE (b.star_num = -1 or b.watch_num = -1 or b.fork_num = -1 or b.commits_num = -1 or b.branches_num = -1 or b.releases_num = -1 or b.contributors_num = -1)AND a.id = b.project_id"

        retry_times = 5
        while True:
            res_sel = self.db.select(sel_str)
            retry_times = retry_times - 1
            if retry_times < 0:
                for git_base in res_sel:
                    updata_str = "UPDATE github_detail_data SET star_num=" + str(
                        -2) + ", watch_num=" + str(-2) + ", fork_num=" + str(
                            -2) + ", commits_num=" + str(
                                -2) + ",branches_num=" + str(
                                    -2) + ", releases_num=" + str(
                                        -2) + ", contributors_num=" + str(-2)
                    updata_str += " where project_id =" + str(
                        git_base[0]) + " and get_data_time=" + str(git_base[1])

                    try:
                        self.db.update(updata_str)
                    except Exception as e:
                        print(updata_str)
                        print('UPDATE err retry_times < 0, gitaddress = ',
                              git_base[2])
                        continue
                break
            driver = self.open_driver()
            for git_base in res_sel:
                try:
                    driver.get(git_base[2])
                except TimeoutException:
                    driver.execute_script('window.stop()')

                try:
                    watch_num = driver.find_element_by_xpath(
                        '//*[@id="js-repo-pjax-container"]/div[1]/div/ul/li[1]/a[2]'
                    ).text.replace(',', '')
                    star_num = driver.find_element_by_xpath(
                        '//*[@id="js-repo-pjax-container"]/div[1]/div/ul/li[2]/a[2]'
                    ).text.replace(',', '')
                    fork_num = driver.find_element_by_xpath(
                        '//*[@class="pagehead-actions"]/li[3]/a[2]'
                    ).text.replace(',', '')
                    commit_num = driver.find_element_by_xpath(
                        '/html/body/div[4]/div/div/div[2]/div[1]/div[3]/div/div/ul/li[1]/a/span'
                    ).text.replace(',', '')
                    branches_num = driver.find_element_by_xpath(
                        '/html/body/div[4]/div/div/div[2]/div[1]/div[3]/div/div/ul/li[2]/a/span'
                    ).text.replace(',', '')
                    releases_num = driver.find_element_by_xpath(
                        '/html/body/div[4]/div/div/div[2]/div[1]/div[3]/div/div/ul/li[3]/a/span'
                    ).text.replace(',', '')
                    contributors_num = driver.find_element_by_xpath(
                        '/html/body/div[4]/div/div/div[2]/div[1]/div[3]/div/div/ul/li[4]/a/span'
                    ).text.replace(',', '')

                    updata_str = "UPDATE github_detail_data SET star_num=" + str(
                        star_num
                    ) + ", watch_num=" + str(watch_num) + ", fork_num=" + str(
                        fork_num) + ", commits_num=" + str(
                            commit_num) + ",branches_num=" + str(
                                branches_num) + ", releases_num=" + str(
                                    releases_num
                                ) + ", contributors_num=" + str(
                                    contributors_num)

                    updata_str += " where project_id =" + str(
                        git_base[0]) + " and get_data_time=" + str(git_base[1])

                    try:
                        self.db.update(updata_str)
                    except Exception as e:
                        print(updata_str)
                        print('UPDATE err, git_address = ', git_base[2],
                              '-----project_id = ', git_base[0])
                        continue
                except:
                    pass

            driver.close()
            driver.service.stop()