def main():
    print("Generating address statistics")
    sc, sql_sc = hf.initiateSpark()
    dft, dfb, dfc, dfvi, dfvo, dfvj = hf.loadDataframesFromParquet(
        sql_sc, conf.pathresearch)
    dfb.unpersist()
    hf.generateAllStats(sc, dfvi, dfvo, dfc, dft, dfvj, conf.pathresearch)
def main():
    print("Generating initial statistics")
    sc, sql_sc = hf.initiateSpark()
    dft, dfb, dfc, dfvi, dfvo, dfvjx = hf.loadDataframesFromParquet(
        sql_sc, conf.pathresearch)
    print "Performing initial analysis"
    results = 'Initial Analysis : \n'
    # count blocks
    print "Obtaining Block counts"
    blocknum = dfb.count()
    results += 'Blocks: ' + str(blocknum) + '\n'
    # count transactions
    total = dft.count()
    print "Obtaining transaction statistics"
    results += 'Transactions: ' + str(total) + '\n'
    # types of txs
    types = [(r['t_type'], r['count'])
             for r in dft.groupBy('t_type').count().collect()]
    results += "Transactions by type \nType \tAmount \tPercentage \n"
    for type in types:
        results += type[0] + ' \t' + str(
            type[1]) + '\t' + ('%.2f' %
                               (float(type[1]) / float(total) * 100)) + '\n'
    # write results to disk
    print "Writing results to file %s" % "/root/research/basic_analysis.txt"
    resultsfile = open(conf.pathresearch + "initial_analysis.txt", "w")
    resultsfile.writelines(results)
    resultsfile.close()
    print(results)
    print "Complete"
def main():
    print("Generated parquest files from csv")
    path = conf.pathresearch
    sc, sql_sc = hf.initiateSpark()
    t = path + 'public.transaction.csv'
    b = path + 'public.block.csv'
    cg = path + 'public.coingen.csv'
    vi = path + 'public.vin.csv'
    vo = path + 'public.vout.csv'
    vj = path + 'public.vjoinsplit.csv'
    sparkClassPath = os.getenv('SPARK_CLASSPATH',
                               '/opt/spark/postgresql-42.2.2.jar')
    sc._conf.set('spark.jars', 'file:%s' % sparkClassPath)
    sc._conf.set('spark.executor.extraClassPath', sparkClassPath)
    sc._conf.set('spark.driver.extraClassPath', sparkClassPath)
    print("Reading Data from CSV files")
    t, b, c, vi, vo, vj = hf.getRdds(sc=sc,
                                     file_tx=t,
                                     file_blocks=b,
                                     file_coingen=cg,
                                     file_vin=vi,
                                     file_vout=vo,
                                     file_vjoinsplit=vj)
    print("Converting to dataframes")
    dft, dfb, dfc, dfvi, dfvo, dfvj = hf.convertToDataframes(
        sql_sc, t, b, c, vi, vo, vj, conf.blockheight)
    print("Saving data to disk")
    hf.saveDataframeToParquet(path, dft, dfb, dfc, dfvi, dfvo, dfvj)
    print("created parquet files")
Ejemplo n.º 4
0
def main():
    sc, sql_sc = hf.initiateSpark()
    dft, dfb, dfc, dfvi, dfvo, dfvj = hf.loadDataframesFromParquet(sql_sc, conf.pathresearch)
    txs = generateSmallTXlist(dft, dfvi, dfvo, dfvj)
    clusters, graph, distinctnodeandtxs = generateClusters(txs)
    addressStats = convertAddressStats(conf.pathresearch)
    clustersWithStats = clusterStats(txs, clusters, graph, addressStats, distinctnodeandtxs, True, conf.pathresearch)
    writeToDisk(clusters, graph, clustersWithStats, conf.pathresearch)
    print("Clustering script complete")
def main():
    print("Generated parquest files from csv")
    path = '/root/research/'
    sc, sql_sc = hf.initiateSpark()
    sparkClassPath = os.getenv('SPARK_CLASSPATH', '/opt/spark/postgresql-42.2.2.jar')
    sc._conf.set('spark.jars', 'file:%s' % sparkClassPath)
    sc._conf.set('spark.executor.extraClassPath', sparkClassPath)
    sc._conf.set('spark.driver.extraClassPath', sparkClassPath)
    url = 'postgresql://zcashpostgres:5432/zcashdb?user=postgres'
    properties = {"driver": 'com.postgresql.jdbc.Driver'}
    dft, dfb, dfc, dfvi, dfvo, dfvj = hf.readDataframeFromPostgres(sql_sc, url, properties, conf.blockheight)
    hf.saveDataframeToParquet(path, dft, dfb, dfc, dfvi, dfvo, dfvj)
    print("created parquet files")
def main():

    print("Connecting to Zcash node")
    sc, sql_sc = hf.initiateSpark()
    path = conf.pathresearch
    checkZcashCLI()
    blocklimit = conf.blockheight
    rpc = rpcConnection()
    latestBlock = int(rpc.getblockcount())
    if blocklimit > latestBlock:
        blocklimit = latestBlock
    dft, dfb, dfc, dfvi, dfvo, dfvj = fillDataFrames(sc, sql_sc, blocklimit)
    hf.saveDataframeToParquet(path, dft, dfb, dfc, dfvi, dfvo, dfvj)
from __future__ import print_function
import os
from founders import trivialFounders
from pools import pools
from pyspark.sql.functions import array_contains, col, size
from collections import Counter
import matplotlib as mpl

mpl.use('Agg')
from plotGraphs import *
import config as conf
import helpfulFunctions as hf
import sys

sc, sql_sc = hf.initiateSpark()


def dumpToCsv(filename, data):
    with open(conf.pathresearch + filename + ".csv", "w") as fw:
        fw.write("addresses\n")
        for element in data:
            fw.write("{}\n".format(element))


def createDataframes():
    dirx = conf.pathresearch
    dfvout = sql_sc.read.parquet(dirx + "vout.parquet")
    dfc = sql_sc.read.parquet(dirx + "coingen.parquet")
    dft = sql_sc.read.parquet(dirx + "transaction.parquet")
    dfb = sql_sc.read.parquet(dirx + "block.parquet")
    dfvj = sql_sc.read.parquet(dirx + "vjoinsplit.parquet")