def main(): # initialise an instance of the API client api = CoinGeckoAPI() # load a list containing the symbols of all the coins we're interested in symbols = getDesiredCoinSymbols() # find the ids of the coins we're interested in allCoins = api.get_coins_list() names = getDesiredCoinNames(allCoins, symbols) ids = getDesiredCoinIds(allCoins, symbols) print(len(ids)) for i in range(70, len(ids)): id = ids[i] data = getHistoricalData(api, id) jsonPath = baseDir + "/Data/CoingeckoData/"+id+".json" with open(jsonPath, "w+", encoding="utf-8") as dest: json.dump(data, dest, ensure_ascii=False, indent=4) json2Csv(jsonPath) stdout.write("\r%d coins done" % i) stdout.flush()
def getDataForAllTokens(): with open("/Users/maxtaylordavies/BlockchainResearch/Scripts/ids.json" ) as tokens: tokens = json.load(tokens) for i in range(975, len(tokens)): token = tokens[i] name = token["name"] contract = token["id"] stdout.write("\r%d tokens done" % i) stdout.flush() holders = getTopHoldersForToken(contract) # info = getTokenInfo(contract) # analytics = getTokenAnalytics(contract) with open(baseDir + "/Data/TokenData/" + name + "/top_holders.json", "w+", encoding="utf-8") as dest: json.dump(holders, dest, ensure_ascii=False, indent=4) # with open(baseDir + "/Data/TokenData/" + name + "/info.json", "w+", encoding="utf-8") as dest: # json.dump(info, dest, ensure_ascii=False, indent=4) # with open(baseDir + "/Data/TokenData/" + name + "/analytics.json", "w+", encoding="utf-8") as dest: # json.dump(analytics, dest, ensure_ascii=False, indent=4) json2Csv(baseDir + "/Data/TokenData/" + name + "/top_holders.json")
def getTopWalletsForChain(chain, headers, outputDir): url = "https://chainz.cryptoid.info/explorer/index.wallets.dws?coin=" + chain req = urllib.request.Request(url, headers=headers) response = urllib.request.urlopen(req) data = json.load(response)["wallets"] with open(outputDir + "top_wallets.json", "w+", encoding="utf-8") as dest: json.dump(data, dest, ensure_ascii=False, indent=4) json2Csv(outputDir + "top_wallets.json")
def getRichListForChain(chain, headers, outputDir): url = "https://chainz.cryptoid.info/explorer/index.stats.dws?coin=" + chain req = urllib.request.Request(url, headers=headers) response = urllib.request.urlopen(req) data = json.load(response)["largestAddresses"] with open(outputDir + "rich_list.json", "w+", encoding="utf-8") as dest: json.dump(data, dest, ensure_ascii=False, indent=4) json2Csv(outputDir + "rich_list.json")
def getLast1000BlocksForChain(chain, headers, outputDir): url = "https://chainz.cryptoid.info/explorer/index.data.dws?coin=" + chain + "&v=1&n=1000" req = urllib.request.Request(url, headers=headers) response = urllib.request.urlopen(req) data = json.load(response)["blocks"] with open(outputDir + "blocks.json", "w+", encoding="utf-8") as dest: json.dump(data, dest, ensure_ascii=False, indent=4) json2Csv(outputDir + "blocks.json")
def convertJsonFiles(): commitsDir = "/Users/maxtaylordavies/Dropbox/SHARED BLOCKCHAIN PROJECT - DATA/Max Taylor-Davies/Data/GithubData/Commits" for (dirpath, dirnames, filenames) in os.walk(commitsDir): for fn in filenames: fp = os.path.join(dirpath, fn) print("converting", fn) json2Csv(fp) forksDir = "/Users/maxtaylordavies/Dropbox/SHARED BLOCKCHAIN PROJECT - DATA/Max Taylor-Davies/Data/GithubData/Forks" for (dirpath, dirnames, filenames) in os.walk(forksDir): for fn in filenames: fp = os.path.join(dirpath, fn) json2Csv(fp)
def main(): with open("./repos.json") as f: coins = json.load(f) for coin in coins[46:]: dirpath = os.path.join(baseDir, "Data", "GithubData", "Activity", coin["name"]) if not os.path.exists(dirpath): os.makedirs(dirpath) logFilePath = os.path.join(dirpath, "log.txt") for repo in coin["repos"]: print("\nmining activity data for repo %s....." % repo) activity = getHistoricalActivityOnRepo(coin["org"], repo, logFilePath) destpath = os.path.join(dirpath, repo + ".json") with open(destpath, "w") as dest: json.dump(activity, dest, ensure_ascii=False, indent=4) json2Csv(destpath)
def scrapeAllForkedBlocks(): forkedBlocks = [] for p in range(1, 1113): forkedBlocks += scrapePageOfForkedBlocks(p) stdout.write("\r%d pages of forked blocks scraped" % p) stdout.flush() if p % 100 == 0: print("saving") with open(baseDir + "/Data/EtherscanData/Scraping/forked_blocks.json", "w+", encoding="utf-8") as dest: json.dump(forkedBlocks, dest, ensure_ascii=False, indent=4) json2Csv(baseDir + "/Data/EtherscanData/Scraping/forked_blocks.json") time.sleep(0.05)
def scrapeAllTransactions(): transactions = [] for p in range(1, 5001): transactions += scrapePageOfTransactions(p) stdout.write("\r%d pages of transactions scraped" % p) stdout.flush() if p % 100 == 0: print("saving") with open(baseDir + "/Data/EtherscanData/Scraping/transactions.json", "w+", encoding="utf-8") as dest: json.dump(transactions, dest, ensure_ascii=False, indent=4) json2Csv(baseDir + "/Data/EtherscanData/Scraping/transactions.json") time.sleep(0.5)
def getDevStatsOnCoin(api, id): coin = api.get_coin_by_id(id, localization=False, market_data=False) commits = coin["developer_data"]["last_4_weeks_commit_activity_series"] data = [] d = date.today() for c in commits: data.append({ "date": str(d), "commits": c }) d -= timedelta(days=1) jsonPath = "../Data/CoingeckoData/Developer/JSON/"+id+".json" csvPath = "../Data/CoingeckoData/Developer/CSV/"+id+".csv" with open(jsonPath, "w+", encoding="utf-8") as dest: json.dump(data, dest, ensure_ascii=False, indent=4) json2Csv(jsonPath, csvPath=csvPath)
def main(): with open(baseDir + "/Scripts/ids.json") as tokens: tokens = json.load(tokens) for i in range(12, 50): token = tokens[i] folder = baseDir + "/Data/TokenData/" + token["name"] os.mkdir(folder) if token["subreddit"] != "": redditStats = getSubmissionNumbersForSubreddit(token["subreddit"]) with open(folder + "/reddit.json", "w+", encoding="utf-8") as dest: json.dump(redditStats, dest, ensure_ascii=False, indent=4) json2Csv(folder + "/reddit.json") if token["repo"] != "": [repoOwner, repoName] = token["repo"].split("/") githubStats = getHistoricalActivityOnRepo(repoOwner, repoName, apiKey) with open(folder + "/github.json", "w+", encoding="utf-8") as dest: json.dump(githubStats, dest, ensure_ascii=False, indent=4) json2Csv(folder + "/github.json") stdout.write("\rgot stats on" + str(i+1) + "tokens") stdout.flush()
def getMarketDataOnCoin(api, id): data = [] hist = api.get_coin_market_chart_by_id(id=id, vs_currency="usd", days="max") prices = list(map(lambda x: x[1], hist["prices"])) marketCaps = list(map(lambda x: x[1], hist["market_caps"])) totalVolumes = list(map(lambda x: x[1], hist["total_volumes"])) timestamps = list(map(lambda x: x[0], hist["prices"])) for i in range(len(timestamps)): data.append({ "Date": time.strftime("%d-%m-%Y", time.localtime(timestamps[i]/1000)), "Price (USD)": prices[i], "Market cap": marketCaps[i], "Total volume": totalVolumes[i] }) jsonPath = "../Data/CoingeckoData/Market/JSON/"+id+".json" csvPath = "../Data/CoingeckoData/Market/CSV/"+id+".csv" with open(jsonPath, "w+", encoding="utf-8") as dest: json.dump(data, dest, ensure_ascii=False, indent=4) json2Csv(jsonPath, csvPath=csvPath)
def main(): # stats() rootDir = os.path.join(baseDir, "Data", "GithubData", "Activity") for coin in os.listdir(rootDir): # for coin in ["Bytecoin", "Clams", "Worldcoin", "Novacoin", "Startcoin", "Fastcoin", "Bullion"]: if coin == ".DS_Store": continue # get github data fNames = os.listdir(os.path.join(rootDir, coin)) githubFNames = [ fn for fn in fNames if fn.endswith("json") and not fn.startswith("difficulty") and not fn.startswith("addresses") ] # githubFNames = [coin.lower() + ".json"] if not githubFNames: continue githubFPath = os.path.join(rootDir, coin, githubFNames[0]) with open(githubFPath) as f: githubData = json.load(f) githubData = githubData[::-1] # get difficulty data if available difficultyData = [] if "difficulty.json" in fNames: difficultyFPath = os.path.join(rootDir, coin, "difficulty.json") with open(difficultyFPath) as f: difficultyData = json.load(f) # get active address data if available activeAddressData = [] if "addresses.json" in fNames: addressesFPath = os.path.join(rootDir, coin, "addresses.json") with open(addressesFPath) as f: activeAddressData = json.load(f) # find the range of dates for which we have all the stats githubStart = githubData[0]["Date"] githubEnd = githubData[-1]["Date"] difficultyStart = difficultyData[0][ "Date"] if difficultyData else githubStart difficultyEnd = difficultyData[-1][ "Date"] if difficultyData else githubEnd addressesStart = activeAddressData[0][ "Date"] if activeAddressData else githubStart addressesEnd = activeAddressData[-1][ "Date"] if activeAddressData else githubEnd start = max([ datetime.strptime(d, "%Y-%m-%d") for d in (githubStart, difficultyStart, addressesStart) ]) end = min([ datetime.strptime(d, "%Y-%m-%d") for d in (githubEnd, difficultyEnd, addressesEnd) ]) # remove entries outside these dates from all the data arrays githubData = [ x for x in githubData if start <= datetime.strptime(x["Date"], "%Y-%m-%d") <= end ] difficultyData = [ x for x in difficultyData if start <= datetime.strptime(x["Date"], "%Y-%m-%d") <= end ] activeAddressData = [ x for x in activeAddressData if start <= datetime.strptime(x["Date"], "%Y-%m-%d") <= end ] # merge for i in range(len(difficultyData)): githubData[i]["Average difficulty"] = difficultyData[i][ "Average difficulty"] for i in range(len(activeAddressData)): githubData[i]["Active addresses"] = activeAddressData[i][ "Active addresses"] outputPath = os.path.join(baseDir, "Data", "github_difficulty_addresses", coin + ".json") with open(outputPath, "w+") as f: json.dump(githubData, f, ensure_ascii=False, indent=4) json2Csv(outputPath)