Ejemplo n.º 1
0
def ico_rank_semiannual(user_input, ico_end_date):

    ico_end_date = ico_end_date.replace(" ", "")
    ico_end_date_o = datetime.strptime(ico_end_date, '%d%b%Y')
    ico_year = str(ico_end_date_o.year)

    bound1 = ('01 Jan ' + ico_year).replace(" ", "")
    bound1_o = datetime.strptime(bound1, '%d%b%Y')

    bound2 = ('31 Mar ' + ico_year).replace(" ", "")
    bound2_o = datetime.strptime(bound2, '%d%b%Y')

    bound3 = ('01 Apr ' + ico_year).replace(" ", "")
    bound3_o = datetime.strptime(bound3, '%d%b%Y')

    bound4 = ('30 Jun ' + ico_year).replace(" ", "")
    bound4_o = datetime.strptime(bound4, '%d%b%Y')

    bound5 = ('01 Jul ' + ico_year).replace(" ", "")
    bound5_o = datetime.strptime(bound5, '%d%b%Y')

    bound6 = ('30 Sep ' + ico_year).replace(" ", "")
    bound6_o = datetime.strptime(bound6, '%d%b%Y')

    bound7 = ('01 Oct ' + ico_year).replace(" ", "")
    bound7_o = datetime.strptime(bound7, '%d%b%Y')

    bound8 = ('31 Dec ' + ico_year).replace(" ", "")
    bound8_o = datetime.strptime(bound8, '%d%b%Y')

    if bound1_o <= ico_end_date_o <= bound2_o:
        sa_str = 'sa_1_'
    if bound3_o <= ico_end_date_o <= bound4_o:
        sa_str = 'sa_2_'
    if bound5_o <= ico_end_date_o <= bound6_o:
        sa_str = 'sa_3_'
    if bound7_o <= ico_end_date_o <= bound8_o:
        sa_str = 'sa_4_'
    if ico_end_date_o > bound8_o:
        sa_str = 'sa_4_'

    if ico_year == '2016':
        yr_str = '16'
    if ico_year == '2017':
        yr_str = '17'
    if ico_year == '2018':
        yr_str = '18'

    kk = -1
    ranks = []
    for i in range(0, len(user_input)):
        kk = kk + 1
        ranks.append(kk)

        z = []
        bins = []

        if i not in [0, 1, 6]:

            if i == 2:
                feature_in = eval(user_input[2])
                z = np.loadtxt('scaling_dataset/team_z_' + sa_str + yr_str +
                               '.npy')
                bins = np.loadtxt('scaling_dataset/team_bin_' + sa_str +
                                  yr_str + '.npy')
            if i == 3:
                feature_in = np.log10(eval(user_input[3]))
                z = np.loadtxt('scaling_dataset/hardcap_z_' + sa_str + yr_str +
                               '.npy')
                bins = np.loadtxt('scaling_dataset/hardcap_bin_' + sa_str +
                                  yr_str + '.npy')
            if i == 4:
                feature_in = np.log10(eval(user_input[4]))
                z = np.loadtxt('scaling_dataset/price_z_' + sa_str + yr_str +
                               '.npy')
                bins = np.loadtxt('scaling_dataset/price_bin_' + sa_str +
                                  yr_str + '.npy')
            if i == 5:
                feature_in = np.log10(eval(user_input[5]))
                z = np.loadtxt('scaling_dataset/telegram_z_' + sa_str +
                               yr_str + '.npy')
                bins = np.loadtxt('scaling_dataset/telegram_bin_' + sa_str +
                                  yr_str + '.npy')
            if i == 7:
                z = np.loadtxt('scaling_dataset/twitter_z_' + sa_str + yr_str +
                               '.npy')
                bins = np.loadtxt('scaling_dataset/twitter_bin_' + sa_str +
                                  yr_str + '.npy')
                feature_in = np.log10(eval(user_input[7]))
            if i == 8:
                feature_in = np.log10(eval(user_input[8]))
                z = np.loadtxt('scaling_dataset/dailyviews_z_' + sa_str +
                               yr_str + '.npy')
                bins = np.loadtxt('scaling_dataset/dailyviews_bin_' + sa_str +
                                  yr_str + '.npy')
            if i == 9:
                feature_in = np.log10(eval(user_input[9]))
                z = np.loadtxt('scaling_dataset/dailytime_z_' + sa_str +
                               yr_str + '.npy')
                bins = np.loadtxt('scaling_dataset/dailytime_bin_' + sa_str +
                                  yr_str + '.npy')

            order = np.argsort(z)

            group = 0
            for i in range(0, 5):
                if (bins[0] <= feature_in < bins[1]):
                    group = 1
                if (bins[1] <= feature_in < bins[2]):
                    group = 2
                if (bins[2] <= feature_in < bins[3]):
                    group = 3
                if (bins[3] <= feature_in < bins[4]):
                    group = 4
                if (bins[4] <= feature_in < bins[5]):
                    group = 5
                if (feature_in < bins[0]) or (feature_in > bins[5]):
                    group = 0

            location = group - 1

            try:
                grade = np.where(order == location)[0][0] + 1
            except:
                grade = 0

        if i == 0:

            region_in = eval(user_input[0])

            grade = 0

            if region_in == 7:
                grade = 5
            if region_in in [1, 5]:
                grade = 4
            if region_in in [4, 6]:
                grade = 3
            if region_in in [2, 3]:
                grade = 2
            if region_in in [8, 9]:
                grade = 1
            if region_in in [10, 11, 12]:
                grade = 0

        if i == 1:

            industry_in = func_industry(user_input[1])

            tag = 1

            if industry_in == 'blockchain':
                tag = 0
            if industry_in == 'other':
                tag = 1
            if industry_in == 'saas':
                tag = 2
            if industry_in == 'fintech':
                tag = 3
            if industry_in == 'gaming':
                tag = 4
            if industry_in == 'social services':
                tag = 5
            if industry_in == 'energy':
                tag = 6
            if industry_in == 'insurance services':
                tag = 7
            if industry_in == 'telecommunications':
                tag = 8
            if industry_in == 'transportation':
                tag = 9
            if industry_in == 'real estate':
                tag = 10

            grade = 0

            if industry_in in ['blockchain']:
                grade = 5
            if industry_in in ['fintech']:
                grade = 4
            if industry_in in ['saas']:
                grade = 3
            if industry_in in ['gaming']:
                grade = 2
            if industry_in in ['insurance services', 'telecommunications']:
                grade = 1
            if industry_in not in [
                    'blockchain', 'fintech', 'saas', 'gaming',
                    'insurance services', 'telecommunications'
            ]:
                grade = 0

        ranks[kk] = grade

    ico_rating = np.mean(ranks)
    ico_rating0 = ico_rating / 5.0

    return round(ico_rating0, 2)
Ejemplo n.º 2
0
def ico_data_collector(input_vector, bitcoin, top10s):

    currency = input_vector[0]
    token = input_vector[1]
    website_str = input_vector[2]

    #Bitcoin returns
    rbtc = bitcoin
    #Average returns of Top 10 coins
    rt10 = top10s

    today = dt.datetime.now().date().isoformat().replace('-', "")

    try:
        response2 = requests.get('https://coinmarketcap.com/currencies/' +
                                 website_str +
                                 '/historical-data/?start=20130428&end=' +
                                 today)
        txt2 = response2.text
        soup = BeautifulSoup(txt2, 'html.parser')

        table = soup.find("table")
        rows = table.findAll('tr')
        data2 = [[td.findChildren(text=True) for td in tr.findAll("td")]
                 for tr in rows]
        data2 = [[u"".join(d).strip() for d in l] for l in data2]

        date = []
        o = []
        h = []
        l = []
        c = []
        vol = []
        mc = []
        j = -1
        for i in range(1, len(data2)):
            j = j + 1

            date.append(j)
            o.append(j)
            h.append(j)
            l.append(j)
            c.append(j)
            vol.append(j)
            mc.append(j)

            date[j] = data2[i][0]
            o[j] = eval(data2[i][1])
            h[j] = eval(data2[i][2])
            l[j] = eval(data2[i][3])
            c[j] = eval(data2[i][4])
            vol[j] = eval(data2[i][5].replace(",", "").replace("-", "0.0"))
            mc[j] = eval((data2[i][6].replace(",", "")).replace("-", "0.0"))

        [date, o, h, l, c, vol, mc] = [
            date[::-1], o[::-1], h[::-1], l[::-1], c[::-1], vol[::-1], mc[::-1]
        ]

        #Calculated daily returns array

        r = [
            round((c[0] - o[0]) / o[0], 3),
        ]
        for i in range(1, len(c)):
            r.append(i)
            r[i] = round((c[i] - c[i - 1]) / c[i - 1], 3)

        #Calculate average returns and standard deviation of average returns
        r_av = np.mean(r)
        r_std = np.std(r)

        #Calculate 1-month, 3-month and annualized Sharpe ratios
        wd_month = 21
        wd_month3 = 3 * wd_month
        wd_annual = 252

        if len(c) < wd_month:
            s_1 = round(r_av * wd_month / (r_std * np.sqrt(wd_month)), 3)
        if len(c) >= wd_month:
            r_av = np.mean(r[0:wd_month])
            r_std = np.std(r[0:wd_month])
            s_1 = round(r_av / r_std, 3)

        if len(c) < wd_month3:
            s_3 = round(r_av * wd_month3 / (r_std * np.sqrt(wd_month3)), 3)
        if len(c) >= wd_month3:
            r_av = np.mean(r[0:wd_month3])
            r_std = np.std(r[0:wd_month3])
            s_3 = round(r_av / r_std, 3)

        s_annual = round(r_av * wd_annual / (r_std * np.sqrt(wd_annual)), 3)
        rav10 = np.mean(rt10)
        rstd10 = np.std(rt10)
        s_annual2 = round(
            (r_av - rav10) * wd_annual / (r_std * np.sqrt(wd_annual)), 3)

        #Calculation of coin beta based on BTC daily returns (~1/3 of market dominance)
        displacement = len(rbtc) - len(r)
        rbtc_2 = rbtc[displacement:len(rbtc)]
        beta_btc = round(stats.linregress(rbtc_2, r)[0], 3)
        alpha_btc = round(stats.linregress(rbtc_2, r)[1], 3)

        #Calculation of coin beta based on mean return of top 10 coins on coinmarketcap.com (~80% of cumulative market dominance)
        if len(rt10) > len(r):
            displacement2 = len(rt10) - len(r)
            rt10 = rt10[displacement2:len(rt10)]
            beta_top10 = round(stats.linregress(rt10, r)[0], 3)
            alpha_top10 = round(stats.linregress(rt10, r)[1], 3)

        if len(rt10) <= len(r):
            displacement2 = len(r) - len(rt10)
            r = r[displacement2:len(r)]
            beta_top10 = round(stats.linregress(rt10, r)[0], 3)
            alpha_top10 = round(stats.linregress(rt10, r)[1], 3)

    except:
        c = ['N/A']
        vol = ['N/A']
        [s_1, s_3, s_annual, s_annual2] = ['N/A', 'N/A', 'N/A', 'N/A']
        [beta_btc, beta_top10, alpha_btc,
         alpha_top10] = ['N/A', 'N/A', 'N/A', 'N/A']

    res = func_icobench(currency)
    res2 = func_icodrops(currency)
    res3 = func_tokenmarket(currency)
    res4 = func_icorating(currency, token)
    res5 = func_icomarks(currency)
    res6 = func_icobazaar(currency)
    res7 = func_googletwitter(currency)

    [
        start1, end1, duration1, country1, industry1, team1, raised1, hardcap1,
        success1, price1, telegram1
    ] = [
        res[1], res[2], res[3], res[4], res[5], res[6], res[7], res[8], res[9],
        res[10], res[11]
    ]
    [
        country2, industry2, team2, raised2, hardcap2, success2, price2,
        telegram2
    ] = [
        res2[1], res2[2], res2[3], res2[4], res2[5], res2[6], res2[7], res2[8]
    ]
    [start3, end3, duration3, country3,
     team3] = [res3[1], res3[2], res3[3], res3[4], res3[5]]
    [
        start4, end4, duration4, industry4, team4, raised4, hardcap4, success4,
        price4, telegram4, hype4, risk4
    ] = [
        res4[1], res4[2], res4[3], res4[4], res4[5], res4[6], res4[7], res4[8],
        res4[9], res4[10], res4[11], res4[12]
    ]
    [start5, end5, duration5, country5, team5, raised5, hardcap5, price5] = [
        res5[1], res5[2], res5[3], res5[4], res5[5], res5[6], res5[7], res5[8]
    ]
    #[start6,end6,duration6,team6,hardcap6,price6,bazaar_rating6]=[res6[1],res6[2],res6[3],res6[4],res6[5],res6[6],res6[7]]
    [team6, hardcap6, price6,
     bazaar_rating6] = [res6[4], res6[5], res6[6], res6[7]]
    [N_google_news, N_twitter, N_daily_views,
     N_daily_time] = [res7[2], res7[1], res7[3], res7[4]]

    try:
        ret_day1a = round(c[0], 3)
    except:
        ret_day1a = 'N/A'

    [
        ret_day1, vol_day1, sharpe_1, sharpe_3, sharpe_yr, sharpe_yr2,
        beta_btc, beta_top10, alpha_btc, alpha_top10
    ] = [
        ret_day1a, vol[0], s_1, s_3, s_annual, s_annual2, beta_btc, beta_top10,
        alpha_btc, alpha_top10
    ]

    #Calculation of age since last day of ICO
    today_a = datetime.strptime(today, '%Y%m%d')
    try:
        age1 = today_a - datetime.strptime(end1, '%d %b %Y')
        age1 = round(age1.total_seconds() / 86400., 1)
    except:
        age1 = 'N/A'
    try:
        age3 = today_a - datetime.strptime(end3, '%d %b %Y')
        age3 = round(age3.total_seconds() / 86400., 1)
    except:
        age3 = 'N/A'
    try:
        age4 = today_a - datetime.strptime(end4, '%d %b %Y')
        age4 = round(age4.total_seconds() / 86400., 1)
    except:
        age4 = 'N/A'
    try:
        age5 = today_a - datetime.strptime(end5, '%d %b %Y')
        age5 = round(age5.total_seconds() / 86400., 1)
    except:
        age5 = 'N/A'

    #GENERIC OUTPUT-----------------------------------------------------------------
    #1 - ICOBench, 2 - ICODrops, 3 - TokenMarket, 4 - ICORating, 5 - ICOMarks

    columnTitles = "coin,start1,end1,duration1,age1,start3,end3,duration3,age3,start4,end4,duration4,age4,start5,end5,duration5,age5,country1,country2,country3,country5,industry1,industry2,industry4,team1,team2,team3,team4,team5,team6,raised1,raised2,raised4,raised5,hardcap1,hardcap2,hardcap4,hardcap5,hardcap6,success1,success2,success4,price1,price2,price4,price5,price6,telegram1,telegram2,telegram4,N_google_news,N_twitter,hype4,risk4,bazaar_rating6,ret_day1,vol_day1,sharpe_1,sharpe_3,sharpe_yr,sharpe_yr2,beta_btc,beta_top10,alpha_btc,alpha_top10\n"

    with open('outdata/ico_data_full_single.csv', 'w') as csvfile1:
        csvfile1.write(columnTitles)
        writer = csv.writer(csvfile1, delimiter=',')
        writer.writerow([
            currency, start1, end1, duration1, age1, start3, end3, duration3,
            age3, start4, end4, duration4, age4, start5, end5, duration5, age5,
            country1, country2, country3, country5, industry1, industry2,
            industry4, team1, team2, team3, team4, team5, team6, raised1,
            raised2, raised4, raised5, hardcap1, hardcap2, hardcap4, hardcap5,
            hardcap6, success1, success2, success4, price1, price2, price4,
            price5, price6, telegram1, telegram2, telegram4, N_google_news,
            N_twitter, hype4, risk4, bazaar_rating6, ret_day1, vol_day1,
            sharpe_1, sharpe_3, sharpe_yr, sharpe_yr2, beta_btc, beta_top10,
            alpha_btc, alpha_top10
        ])

    #--------------------------------------------------------------------------------
    #--------------------------------------------------------------------------------
    #--------------------------------------------------------------------------------
    #HIERARCHICAL OUTPUT (No Ratings by ICOrating.com)-------------------------------
    #1 - ICOBench, 2 - ICODrops, 3 - TokenMarket, 4 - ICORating, 5 - ICOMarks

    start_date_all = [start1, start3, start4, start5]
    end_date_all = [end1, end3, end4, end5]
    duration_all = [duration1, duration3, duration4, duration5]
    age_all = [age1, age3, age4, age5]
    country_all = [country1, country2, country3, country5]
    industry_all = [industry1, industry2, industry4]
    team_all = [team1, team2, team3, team4, team5, team6]
    raised_all = [raised1, raised2, raised4, raised5]
    hardcap_all = [hardcap1, hardcap2, hardcap4, hardcap5, hardcap6]
    success_all = [success1, success2, success4]
    price_all = [price1, price2, price4, price5, price6]
    telegram_all = [telegram1, telegram2, telegram4]

    #1. Determining start date, end date and duration
    #1A) First remove all 'N/A' occurences
    duration_all = [item for item in duration_all if item != 'N/A']
    start_date_all = [item for item in start_date_all if item != 'N/A']
    end_date_all = [item for item in end_date_all if item != 'N/A']
    age_all = [item for item in age_all if item != 'N/A']

    #1B) If length of array is zero after removals, then final values are 'N/A'
    if len(duration_all) == 0:
        duration = 'N/A'
        start = 'N/A'
        end = 'N/A'
        age = 'N/A'
        [duration_all, start_date_all, end_date_all, age_all] = [['N/A'],
                                                                 ['N/A'],
                                                                 ['N/A'],
                                                                 ['N/A']]

    #1C) If all values are the same then adopt that value as the final value
    if duration_all.count(duration_all[0]) == len(duration_all):
        duration = duration_all[0]
        start = start_date_all[0]
        end = end_date_all[0]
        try:
            age = age_all[0]
        except:
            age = 'N/A'

    #1D) If values are not the same then adopt the value that appears most times. If failure adopt first elements.
    try:
        if duration_all.count(duration_all[0]) != len(duration_all):
            (values, counts) = np.unique(duration_all, return_counts=True)
            (values2, counts2) = np.unique(start_date_all, return_counts=True)
            (values3, counts3) = np.unique(end_date_all, return_counts=True)
            (values4, counts4) = np.unique(age_all, return_counts=True)
            if counts.count(counts[0]) == len(counts):
                duration = duration_all[0]
            if counts2.count(counts2[0]) == len(counts2):
                start = start_date_all[0]
            if counts3.count(counts3[0]) == len(counts3):
                end = end_date_all[0]
            if counts4.count(counts4[0]) == len(counts4):
                age = age_all[0]
            ind = np.argmax(counts)
            ind2 = np.argmax(counts2)
            ind3 = np.argmax(counts3)
            ind4 = np.argmax(counts4)
            duration = values[ind]
            start = values2[ind2]
            end = values3[ind3]
            age = values4[ind4]
    except:
        duration = duration_all[0]
        start = start_date_all[0]
        end = end_date_all[0]
        age = age_all[0]

    try:
        if duration < 0:
            duration = -duration

    except:
        duration == 'N/A'

    #2. Determining geographical region
    #2A) First remove all 'N/A' occurences
    country_all = [item for item in country_all if item != 'N/A']

    #2B) If length of array is zero after removals, then final values are 'N/A'
    if len(country_all) == 0:
        region = 'N/A'
        country_all = ['N/A']

    #2C) If all values are the same then adopt that value as the final value
    if country_all.count(country_all[0]) == len(country_all):
        region = country_all[0]
        region = func_region(region)

    #2D) If values are not the same then adopt the value that appears most times. If failure adopt first element.
    try:
        if country_all.count(country_all[0]) != len(country_all):
            (values, counts) = np.unique(country_all, return_counts=True)
            if counts.count(counts[0]) == len(counts):
                region = func_region(country_all[0])
            ind = np.argmax(counts)
            region = values[ind]
            region = func_region(region)
    except:
        region = func_region(country_all[0])

    #3. Determining industry category
    #3A) First remove all 'N/A' occurences
    industry_all = [item for item in industry_all if item != 'N/A']

    #3B) If length of array is zero after removals, then final values are 'N/A'
    if len(industry_all) == 0:
        industry = 'N/A'
        industry_all = ['N/A']

    #3C) If all values are the same then adopt that value as the final value
    if industry_all.count(industry_all[0]) == len(industry_all):
        industry_a = industry_all[0]
        industry = func_industry(industry_a)

    #3D) If values are not the same then adopt the value that appears most times. If failure adopt last element (ICOrating/ICOdrops/ICObench).
    try:
        if industry_all.count(industry_all[0]) != len(industry_all):
            (values, counts) = np.unique(industry_all, return_counts=True)
            if counts.count(counts[0]) == len(counts):
                industry = func_industry(industry_all[len(industry_all) - 1])
            ind = np.argmax(counts)
            industry_a = values[ind]
            industry = func_industry(industry_a)
    except:
        industry = func_industry(industry_all[0])

    #4. Determining team size

    #4A) Check if data availabe from tokenmarket or icomarks. If yes adopt this as team size value

    if (team_all[2] != 'N/A') or (team_all[4] != 'N/A'):
        if team_all[2] == team_all[4]:
            team = team_all[2]
        if team_all[2] != team_all[4]:
            team = team_all[4]

    #4B) In the opposite case, proceed as before

    #4C) First remove all 'N/A' occurences

    if (team_all[4] == 'N/A') or (team == 'N/A'):
        team_all = [item for item in team_all if item != 'N/A']

        #4D) If length of array is zero after removals, then final values are 'N/A'
        if len(team_all) == 0:
            team = 'N/A'
            team_all = ['N/A']

    #4E) If all values are the same then adopt that value as the final value
        if team_all.count(team_all[0]) == len(team_all):
            team = team_all[0]

    #4F) If values are not the same then adopt the value that appears most times if failure adopt first element
        try:
            if team_all.count(team_all[0]) != len(team_all):
                (values, counts) = np.unique(team_all, return_counts=True)
                if counts.count(counts[0]) == len(counts):
                    team = team_all[0]
                ind = np.argmax(counts)
                team = values[ind]
        except:
            team = team_all[0]

    #5. Determining success, money raised, hardcap

    success = 'N/A'

    ###

    raised_all2 = [item for item in raised_all if item != 'N/A']
    hardcap_all2 = [item for item in hardcap_all if item != 'N/A']

    if len(raised_all2) == 0:
        raised_all2 = ['N/A']
        raised = raised_all2[0]

    if len(hardcap_all2) == 0:
        hardcap_all2 = ['N/A']
        hardcap = hardcap_all2[0]

    if raised_all2.count(raised_all2[0]) == len(raised_all2):
        raised = raised_all2[0]

    if hardcap_all2.count(hardcap_all2[0]) == len(hardcap_all2):
        hardcap = hardcap_all2[0]

    try:
        if raised_all2.count(raised_all2[0]) != len(raised_all2):
            (values, counts) = np.unique(raised_all2, return_counts=True)
            ind = np.argmax(counts)
            raised = values[ind]

            if counts.count(counts[0]) == len(counts):
                raised = raised_all2[0]

    except:
        raised = raised_all2[0]

    try:
        if hardcap_all2.count(hardcap_all2[0]) != len(hardcap_all2):
            (values, counts) = np.unique(hardcap_all2, return_counts=True)
            ind = np.argmax(counts)
            hardcap = values[ind]

            if counts.count(counts[0]) == len(counts):
                hardcap = hardcap_all2[0]

    except:
        hardcap = hardcap_all2[0]

    if success == 'N/A':
        try:
            success = min(round(raised / hardcap, 2), 1.0)
        except:
            success = 'N/A'

    success_all2 = [item for item in success_all if item != 'N/A']

    if len(success_all2) == 0:
        success_all2 = ['N/A']
        success = success_all2[0]

    if (success_all[0] != 'N/A') and (success_all[1]
                                      == 'N/A') and (success_all[2] == 'N/A'):
        success = min(success_all[0], 1.0)
        raised = raised_all[0]
        hardcap = hardcap_all[0]

    if (success_all[0]
            == 'N/A') and (success_all[1] != 'N/A') and (success_all[2]
                                                         == 'N/A'):
        success = min(success_all[1], 1.0)
        raised = raised_all[1]
        hardcap = hardcap_all[1]

    if (success_all[0] != 'N/A') and (success_all[1] !=
                                      'N/A') and (success_all[2] == 'N/A'):
        success = min(success_all[1], 1.0)
        raised = raised_all[1]
        hardcap = hardcap_all[1]

    if (success_all[0] == 'N/A') and (success_all[1]
                                      == 'N/A') and (success_all[2] != 'N/A'):
        success = min(success_all[2], 1.0)
        raised = raised_all[2]
        hardcap = hardcap_all[2]

    if success == 'N/A':
        try:
            success = min(round(raised / hardcap, 2), 1.0)
        except:
            success = 'N/A'

    try:
        hardcap = round(hardcap, 0)
    except:
        hardcap = 'N/A'

    if raised != 'N/A' and success != 'N/A':
        try:
            hardcap = round(raised / success, 0)
        except:
            hardcap = 'N/A'

    if team == 0:
        team = 'N/A'

    #9. Determining ICO token price
    #9A) Check if data availabe from icobench or icodrops or icomarks. If yes adopt this money raised value
    if (price_all[0] != 'N/A') or (price_all[1] != 'N/A') or (price_all[3] !=
                                                              'N/A'):
        try:
            price = round(price_all[0], 5)
        except:
            try:
                price = round(price_all[1], 5)
            except:
                price = round(price_all[3], 5)

    #9B) In the opposite case, proceed as before
    if (price_all[0] == 'N/A') or (price_all[1] == 'N/A') or (price_all[3]
                                                              == 'N/A'):

        #9C) First remove all 'N/A' occurences
        price_all = [item for item in price_all if item != 'N/A']

        #9D) If length of array is zero after removals, then final values are 'N/A'
        if len(price_all) == 0:
            price_all = ['N/A']
            price = 'N/A'

    #9E) If all values are the same then adopt that value as the final value
        if price_all.count(price_all[0]) == len(price_all):
            try:
                price = round(price_all[0], 5)
            except:
                price = 'N/A'

    #9F) If values are not the same then adopt the value that appears most times. If failure adopt first element.
        try:
            if price_all.count(price_all[0]) != len(price_all):
                (values, counts) = np.unique(price_all, return_counts=True)
                if counts.count(counts[0]) == len(counts):
                    price = round(price_all[0], 5)
                ind = np.argmax(counts)
                price = round(values[ind], 5)
        except:
            try:
                price = round(price_all[0], 5)
            except:
                try:
                    price = round(price_all[1], 5)
                except:
                    price = 'N/A'

    #10. Determining telegram follower count

    #10A) First remove all 'N/A' occurences
    telegram_all = [item for item in telegram_all if item != 'N/A']

    #10B) If length of array is zero after removals, then final values are 'N/A'
    if len(telegram_all) == 0:
        telegram = 'N/A'
        telegram_all = ['N/A']

    #10C) If all values are the same then adopt that value as the final value
    if telegram_all.count(telegram_all[0]) == len(telegram_all):
        telegram = telegram_all[0]

    #10D) If values are not the same then adopt the value that appears most times. If failure adopt first element.
    try:
        if telegram_all.count(telegram_all[0]) != len(telegram_all):
            (values, counts) = np.unique(telegram_all, return_counts=True)
            if counts.count(counts[0]) == len(counts):
                telegram = telegram_all[0]
            ind = np.argmax(counts)
            telegram = values[ind]
    except:
        telegram = telegram_all[0]

    #11) Calculating first day exchange returns compared to ICO token price

    try:
        ret_icoday1 = round((c[0] - price) / price, 5)
    except:
        ret_icoday1 = 'N/A'

    ret_icoday1 = ret_icoday1

    columnTitles2 = "coin,start,end,duration,age,region,industry,team,raised,hardcap,success,price,telegram,N_google_news,N_twitter,N_daily_views,N_daily_time,ret_ico_to_day_one,vol_day1,sharpe_1,sharpe_3,sharpe_yr,sharpe_yr2,beta_btc,beta_top10,alpha_btc,alpha_top10\n"

    with open('outdata/ico_data_reduced_single.csv', 'w') as csvfile2:
        csvfile2.write(columnTitles2)
        writer = csv.writer(csvfile2, delimiter=',')
        writer.writerow([
            currency, start, end, duration, age, region, industry, team,
            raised, hardcap, success, price, telegram, N_google_news,
            N_twitter, N_daily_views, N_daily_time, ret_icoday1, vol_day1,
            sharpe_1, sharpe_3, sharpe_yr, sharpe_yr2, beta_btc, beta_top10,
            alpha_btc, alpha_top10
        ])

    #--------------------------------------------------------------------------------
    #--------------------------------------------------------------------------------
    #--------------------------------------------------------------------------------
    #HIERARCHICAL OUTPUT (Ratings by ICOrating.com and ICOBazaar.com included)-------
    #1 - ICOBench, 2 - ICODrops, 3 - TokenMarket, 4 - ICORating, 5 - ICOMarks

    hype = hype4
    risk = risk4
    bazaar = bazaar_rating6

    columnTitles3 = "coin,start,end,duration,age,region,industry,team,raised,hardcap,success,price,telegram,N_google_news,N_twitter,N_daily_views,N_daily_time,hype,risk,bazaar-rate,ret_ico_to_day_one,vol_day1,sharpe_1,sharpe_3,sharpe_yr,sharpe_yr2,beta_btc,beta_top10,alpha_btc,alpha_top10\n"

    with open('outdata/ico_data_reduced_wratings_single.csv', 'w') as csvfile3:
        csvfile3.write(columnTitles3)
        writer = csv.writer(csvfile3, delimiter=',')
        writer.writerow([
            currency, start, end, duration, age, region, industry, team,
            raised, hardcap, success, price, telegram, N_google_news,
            N_twitter, N_daily_views, N_daily_time, hype, risk, bazaar,
            ret_icoday1, vol_day1, sharpe_1, sharpe_3, sharpe_yr, sharpe_yr2,
            beta_btc, beta_top10, alpha_btc, alpha_top10
        ])

    #--------------------------------------------------------------------------------
    #--------------------------------------------------------------------------------
    #--------------------------------------------------------------------------------
    return [
        currency, start1, end1, duration1, age1, start3, end3, duration3, age3,
        start4, end4, duration4, age4, start5, end5, duration5, age5, country1,
        country2, country3, country5, industry1, industry2, industry4, team1,
        team2, team3, team4, team5, team6, raised1, raised2, raised4, raised5,
        hardcap1, hardcap2, hardcap4, hardcap5, hardcap6, success1, success2,
        success4, price1, price2, price4, price5, price6, telegram1, telegram2,
        telegram4, N_google_news, N_twitter, N_daily_views, N_daily_time,
        hype4, risk4, bazaar_rating6, ret_day1, vol_day1, sharpe_1, sharpe_3,
        sharpe_yr, sharpe_yr2, beta_btc, beta_top10, alpha_btc, alpha_top10
    ], [
        currency, start, end, duration, age, region, industry, team, raised,
        hardcap, success, price, telegram, N_google_news, N_twitter,
        N_daily_views, N_daily_time, hype, risk, bazaar, ret_icoday1, vol_day1,
        sharpe_1, sharpe_3, sharpe_yr, sharpe_yr2, beta_btc, beta_top10,
        alpha_btc, alpha_top10
    ]
Ejemplo n.º 3
0
def ico_rank_full(user_input):

    kk = -1
    ranks = []
    for i in range(0, len(user_input)):
        kk = kk + 1
        ranks.append(kk)

        z = []
        bins = []

        if i not in [0, 1, 6]:

            if i == 2:
                feature_in = eval(user_input[2])
                z = np.loadtxt('scaling_dataset/team_z.npy')
                bins = np.loadtxt('scaling_dataset/team_bin.npy')
            if i == 3:
                feature_in = np.log10(eval(user_input[3]))
                z = np.loadtxt('scaling_dataset/hardcap_z.npy')
                bins = np.loadtxt('scaling_dataset/hardcap_bin.npy')
            if i == 4:
                feature_in = np.log10(eval(user_input[4]))
                z = np.loadtxt('scaling_dataset/price_z.npy')
                bins = np.loadtxt('scaling_dataset/price_bin.npy')
            if i == 5:
                feature_in = np.log10(eval(user_input[5]))
                z = np.loadtxt('scaling_dataset/telegram_z.npy')
                bins = np.loadtxt('scaling_dataset/telegram_bin.npy')
            if i == 7:
                z = np.loadtxt('scaling_dataset/twitter_z.npy')
                bins = np.loadtxt('scaling_dataset/twitter_bin.npy')
                feature_in = np.log10(eval(user_input[7]))
            if i == 8:
                feature_in = np.log10(eval(user_input[8]))
                z = np.loadtxt('scaling_dataset/dailyviews_z.npy')
                bins = np.loadtxt('scaling_dataset/dailyviews_bin.npy')
            if i == 9:
                feature_in = np.log10(eval(user_input[9]))
                z = np.loadtxt('scaling_dataset/dailytime_z.npy')
                bins = np.loadtxt('scaling_dataset/dailytime_bin.npy')

            order = np.argsort(z)

            group = 0
            for i in range(0, 5):
                if (bins[0] <= feature_in < bins[1]):
                    group = 1
                if (bins[1] <= feature_in < bins[2]):
                    group = 2
                if (bins[2] <= feature_in < bins[3]):
                    group = 3
                if (bins[3] <= feature_in < bins[4]):
                    group = 4
                if (bins[4] <= feature_in < bins[5]):
                    group = 5
                if (feature_in < bins[0]) or (feature_in > bins[5]):
                    group = 0

            location = group - 1

            try:
                grade = np.where(order == location)[0][0] + 1
            except:
                grade = 0

        if i == 0:

            region_in = eval(user_input[0])

            grade = 0

            if region_in == 7:
                grade = 5
            if region_in in [1, 5]:
                grade = 4
            if region_in in [4, 6]:
                grade = 3
            if region_in in [2, 3]:
                grade = 2
            if region_in in [8, 9]:
                grade = 1
            if region_in in [10, 11, 12]:
                grade = 0

        if i == 1:

            industry_in = func_industry(user_input[1])

            tag = 1

            if industry_in == 'blockchain':
                tag = 0
            if industry_in == 'other':
                tag = 1
            if industry_in == 'saas':
                tag = 2
            if industry_in == 'fintech':
                tag = 3
            if industry_in == 'gaming':
                tag = 4
            if industry_in == 'social services':
                tag = 5
            if industry_in == 'energy':
                tag = 6
            if industry_in == 'insurance services':
                tag = 7
            if industry_in == 'telecommunications':
                tag = 8
            if industry_in == 'transportation':
                tag = 9
            if industry_in == 'real estate':
                tag = 10

            grade = 0

            if industry_in in ['blockchain']:
                grade = 5
            if industry_in in ['fintech']:
                grade = 4
            if industry_in in ['saas']:
                grade = 3
            if industry_in in ['gaming']:
                grade = 2
            if industry_in in ['insurance services', 'telecommunications']:
                grade = 1
            if industry_in not in [
                    'blockchain', 'fintech', 'saas', 'gaming',
                    'insurance services', 'telecommunications'
            ]:
                grade = 0

        ranks[kk] = grade

    ico_rating = np.mean(ranks)
    ico_rating0 = ico_rating / 5.0

    return round(ico_rating0, 2)
Ejemplo n.º 4
0
def ico_rank(ico_inp,ico_inp_token):


    #Here we allow the user to import the features of the ICO that is under investigation
    features_vec = ['region','industry','team','hardcap','price','telegram','N_google_news','N_twitter']
    #user_input = ['united states', 'fintech', '4', '10000000', '0.20', '3210', '1', '5632']

    bitcoin = func_btc()
    top10s = func_top10()
    ico_data = ico_data_collector([ico_inp,ico_inp_token,ico_inp],bitcoin,top10s)[1]
    #revert to verbose region variable
    if ico_data[5] == 1:
        reg = 'usa'
    if ico_data[5] == 2:
        reg = 'russia'
    if ico_data[5] == 3:
        reg = 'china'
    if ico_data[5] == 4:
        reg = 'uk'
    if ico_data[5] == 5:
        reg = 'estonia'
    if ico_data[5] == 6:
        reg = 'switzerland'
    if ico_data[5] == 7:
        reg = 'singapore'
    if ico_data[5] == 8:
        reg = 'japan'
    if ico_data[5] == 9:
        reg = 'australia'
    if ico_data[5] == 10:
        reg = 'brazil'
    if ico_data[5] == 11:
        reg = 'south africa'
    if ico_data[5] == 12:
        reg = 'mongolia'

    user_input = []
    user_input.append(reg)
    user_input.append(ico_data[6])
    user_input.append(ico_data[7])
    user_input.append(ico_data[9])
    user_input.append(ico_data[11])
    user_input.append(ico_data[12])
    user_input.append(ico_data[13])
    user_input.append(ico_data[14])

    for i in range(0,len(user_input)):
        if user_input[i] != 'N/A':
            user_input[i] = str(user_input[i])
        if user_input[i] == 'N/A':
            user_input[i] = input("Enter ICO feature: "+features_vec[i]+"\n")

#user_input = []
#for i in range(0,len(features_vec)):
#    user_input.append(i)
#    user_input[i] = input("Enter ICO "+features_vec[i]+"\n")

    kk = -1
    ranks = []
    for feature in features_vec:
        kk = kk + 1
        ranks.append(kk)


        success_threshold = 0.7

        with open("ico_data_reduced.csv") as f:
            reader = csv.reader(f)
            data = [r for r in reader]

        data = np.asarray(data)

        indices, = np.where(data[:,10] != 'N/A')
        indices = np.delete(indices,0)

        success = [eval(data[i][10]) for i in indices]

        try:
            ind_feature = np.where(data[0,:]==feature)[0][0]
        except:
            print('ERROR: This feature does not exist in this dataset')
            sys.exit()

        if feature in ['hype','risk']:
            for i in range(0,len(data)):
                if data[i,ind_feature] == ' N/A':
                    data[i,ind_feature] = 'N/A'

        indices_f, = np.where(data[:,ind_feature] != 'N/A')
        indices_f = np.delete(indices_f,0)

        indices_over = []
        k = -1
        for i in range(0,len(indices)):
            for j in range(0,len(indices_f)):
                if indices_f[j] == indices[i]:
                    k = k + 1
                    indices_over.append(k)
                    indices_over[k] = indices[i]

        success0 = [eval(data[i][10]) for i in indices_over]
        variable0 = [data[i][ind_feature] for i in indices_over]

        #Feature Controls
        if feature in ['team','N_google_news']:
            for i in range(0,len(variable0)):
                variable0[i] = abs(eval(variable0[i]))

        if feature == 'hardcap':
            for i in range(0,len(variable0)):
                variable0[i] = abs(eval(variable0[i]))
                variable0[i] = np.log10(variable0[i])


        if feature in ['price','telegram','N_twitter']:
            for i in range(0,len(variable0)):
                if variable0[i] == '0':
                    variable0[i] = str(1)
                variable0[i] = np.log10(eval(variable0[i]))
                #print(variable0[i])

        if feature == 'bazaar-rate':
            for i in range(0,len(variable0)):
                variable0[i] = eval(variable0[i])



        #Now reduce feature array to > 70% success values
        success0 = np.asarray(success0)
        indices2, = np.where(success0 > success_threshold)

        success0b = []
        variable0b = []
        k = -1
        for i in range(0,len(success0)):
            if i in indices2:
                k = k + 1
                success0b.append(k)
                variable0b.append(k)
                success0b[k] = success0[i]
                variable0b[k] = variable0[i]

        try:
            print('-------BASIC FEATURE STATISTICS: FULL SAMPLE-------')
            print('For feature:',feature.upper(),'the sample size is: ',len(variable0))
            print('For feature:',feature.upper(),'the max value is: ',np.max(variable0))
            print('For feature:',feature.upper(),'the min value is: ',np.min(variable0))
            print('For feature:',feature.upper(),'the mean value is: ',round(np.mean(variable0),3))
            print('For feature:',feature.upper(),'the median value is: ',round(np.median(variable0),3))
            print('For feature:',feature.upper(),'the standard deviation is: ',round(np.std(variable0),3))
            print('---------------------------------------------------')
            print('---------------------------------------------------')
            print('-------BASIC FEATURE STATISTICS: SUCCESSFUL SAMPLE-------')
            print('For feature:',feature.upper(),'the sample size is: ',len(variable0b))
            print('For feature:',feature.upper(),'the max value is: ',np.max(variable0b))
            print('For feature:',feature.upper(),'the min value is: ',np.min(variable0b))
            print('For feature:',feature.upper(),'the mean value is: ',round(np.mean(variable0b),3))
            print('For feature:',feature.upper(),'the median value is: ',round(np.median(variable0b),3))
            print('For feature:',feature.upper(),'the standard deviation is: ',round(np.std(variable0b),3))
            print('---------------------------------------------------------')
            print('---------------------------------------------------------')
        except:
            print('Statistics Are Not Done for Categorical Features')
            print('For feature:',feature.upper(),'the sample size is: ',len(variable0))
            print('For feature:',feature.upper(),'the sample size is: ',len(variable0b))


        z,bins,p3 = plt.hist(variable0, bins = 'auto', rwidth=0.9, facecolor = 'blue')
        plt.xlabel(feature,fontsize=17)
        plt.ylabel('Number',fontsize=15)
        plt.xticks(size = 15)
        plt.yticks(size = 15)
        plt.show()

        #fig, ax = plt.subplots()
        #plt.xlabel(feature)
        #plt.ylabel('success')
        #ax.scatter(variable0, success0, c='k')
        #plt.show()
        #fig, ax = plt.subplots()
        #plt.xlabel(feature)
        #plt.ylabel('success')
        #ax.scatter(variable0b, success0b, c='k')
        #plt.show()

        try:
            avg = np.median(variable0b)
            stdev = np.std(variable0b)
            f = [0.5,1.0,1.5,2.0,2.5]

            z,bins,p3 = plt.hist(variable0b, bins = 5, rwidth=0.9, facecolor = 'blue')
            plt.axvline(x=avg,linestyle = "-", linewidth = 2, color = 'k')
        #    for i in f:
        #        plt.axvline(x=avg+i*stdev,linestyle = ":", linewidth = 1, color = 'r')
        #        plt.axvline(x=avg-i*stdev,linestyle = ":", linewidth = 1, color = 'r')

            plt.xlabel(feature,fontsize=17)
            plt.ylabel('Number',fontsize=15)
            plt.xticks(size = 15)
            plt.yticks(size = 15)
            plt.show()
        except:
            if feature not in ['region','industry']:
                print('Statistics Are Not Done for Categorical Features')
                z,bins,p3 = plt.hist(variable0b, bins = 5, rwidth=0.9, facecolor = 'blue')
                plt.xlabel(feature,fontsize=17)
                plt.ylabel('Number',fontsize=15)
                plt.xticks(size = 15)
                plt.yticks(size = 15)
                plt.show()
            if feature == 'region':
                print('Statistics Are Not Done for Categorical Features')
                z,bins,p3 = plt.hist(variable0b, bins = 12, rwidth=0.9, facecolor = 'blue')
                plt.xlabel(feature,fontsize=17)
                plt.ylabel('Number',fontsize=15)
                plt.xticks(size = 15)
                plt.yticks(size = 15)
                plt.show()
            if feature == 'industry':
                print('Statistics Are Not Done for Categorical Features')
                z,bins,p3 = plt.hist(variable0b, bins = 14, rwidth=0.9, facecolor = 'blue')
                plt.xlabel(feature,fontsize=17)
                plt.ylabel('Number',fontsize=15)
                plt.xticks(size = 15)
                plt.xticks(rotation=90)
                plt.tick_params(labelsize=7)
                plt.yticks(size = 15)
                plt.show()

    #Perform grading based on simple stat comparisons (std from median)
    #First Determine Input Data bin

        if feature not in ['region','industry']:

            if feature == 'team':
                feature_in = eval(user_input[2])
            if feature == 'hardcap':
                feature_in = np.log10(eval(user_input[3]))
            if feature == 'price':
                feature_in = np.log10(eval(user_input[4]))
            if feature == 'telegram':
                feature_in = np.log10(eval(user_input[5]))
            if feature == 'N_google_news':
                feature_in = eval(user_input[6])
            if feature == 'N_twitter':
                feature_in = np.log10(eval(user_input[7]))

            order = np.argsort(z)

            group = 0
            for i in range(0,5):
                if (bins[0] <= feature_in < bins[1]):
                    group = 1
                if (bins[1] <= feature_in < bins[2]):
                    group = 2
                if (bins[2] <= feature_in < bins[3]):
                    group = 3
                if (bins[3] <= feature_in < bins[4]):
                    group = 4
                if (bins[4] <= feature_in < bins[5]):
                    group = 5
                if (feature_in < bins[0]) or (feature_in > bins[5]):
                    group = 0

            location = group - 1

            try:
                grade = np.where(order == location)[0][0] + 1
            except:
                grade = 0

            print('The ICO under investigation receives the grade: ',grade,' for feature: ',feature.upper())

            z,bins,p3 = plt.hist(variable0b, bins = 5, rwidth=0.9, facecolor = 'blue')
            plt.axvline(x=avg,linestyle = "-", linewidth = 2, color = 'k')
            plt.axvline(x=feature_in,linestyle = "-", linewidth = 2, color = 'r')
            plt.xlabel(feature,fontsize=17)
            plt.ylabel('Number',fontsize=15)
            plt.xticks(size = 15)
            plt.yticks(size = 15)
            plt.show()

        if feature == 'region':

            region_in = func_region(user_input[0])

            grade = 0

            if region_in == 7:
                grade = 5
            if region_in in [1,5]:
                grade = 4
            if region_in in [4,6]:
                grade = 3
            if region_in in [2,3]:
                grade = 2
            if region_in in [8,9]:
                grade = 1
            if region_in in [10,11,12]:
                grade = 0

            print('The ICO under investigation receives the grade: ',grade,' for feature: ',feature.upper())

            z,bins,p3 = plt.hist(variable0b, bins = 12, rwidth=0.9, facecolor = 'blue')
            plt.axvline(x=str(region_in), linestyle = "-", linewidth = 2, color = 'r')
            plt.xlabel(feature,fontsize=17)
            plt.ylabel('Number',fontsize=15)
            plt.xticks(size = 15)
            plt.yticks(size = 15)
            plt.show()

        if feature == 'industry':

            industry_in = func_industry(user_input[1])

            tag = 1

            if industry_in == 'blockchain':
                tag = 0
            if industry_in == 'other':
                tag = 1
            if industry_in  == 'saas':
                tag = 2
            if industry_in == 'fintech':
                tag = 3
            if industry_in == 'gaming':
                tag = 4
            if industry_in == 'social services':
                tag = 5
            if industry_in == 'energy':
                tag = 6
            if industry_in == 'insurance services':
                tag = 7
            if industry_in == 'telecommunications':
                tag = 8
            if industry_in == 'transportation':
                tag = 9
            if industry_in == 'real estate':
                tag = 10

            grade = 0

            if industry_in in ['blockchain']:
                grade = 5
            if industry_in in  ['fintech']:
                grade = 4
            if industry_in in  ['saas']:
                grade = 3
            if industry_in in  ['gaming']:
                grade = 2
            if industry_in in  ['insurance services','telecommunications']:
                grade = 1
            if industry_in not in ['blockchain','fintech','saas','gaming','insurance services','telecommunications']:
                grade = 0

            print('The ICO under investigation receives the grade: ',grade,' for feature: ',feature.upper())

            z,bins,p3 = plt.hist(variable0b, bins = 14, rwidth=0.9, facecolor = 'blue')
            plt.xlabel(feature,fontsize=17)
            plt.ylabel('Number',fontsize=15)
            plt.axvline(x=tag, linestyle = "-", linewidth = 2, color = 'r')
            plt.xticks(size = 15)
            plt.xticks(rotation=90)
            plt.tick_params(labelsize=7)
            plt.yticks(size = 15)
            plt.show()

        ranks[kk] = grade

    ico_rating = np.mean(ranks)
    ico_rating0 = ico_rating/5.0

    print('The average BloxVerse ranking for this ICO is: ',round(ico_rating,2))
    print('In the 0-1 scale this is equivalent to: ',round(ico_rating0,2))

    return 'Normalized BloxVerse Rating: ',ico_rating0