def main(): with open("nameDict.dat", "rb") as name_file: nameDict = pickle.load(name_file) bitNames = getDictSubset(nameDict, lambda record: record.namespace() == "d") max_height = getMaxHeight(nameDict) dotBitAlexa = alexaRanks() dirtyWords = [ word.strip() for word in open('dirty.txt', 'r') if " " not in word ] dictWords = set( [word.strip() for word in open('/usr/share/dict/words', 'r')]) bitWordList = set([ "coin", "satoshi", "wallet", "crypto", "currency", "btc", "nmc", "blockchain" ]) with open("name_lists/surnames.csv", "r") as surnames_file: reader = DictReader(surnames_file) surnamesSet = set(line["name"].lower() for line in reader) valueChangeRank = rankNumberOfValueChanges(bitNames, max_height) aliveRank = rankIsAlive(bitNames, max_height) validJSONRank = rankJSONDict(bitNames, max_height) validDNSRank = rankValidDNSDict(bitNames, max_height) timeActiveRank = rankByTimeActive(bitNames, max_height) maxRank = len(bitNames) xData = [] yData = [] for name in bitNames: yData.append( price([ valueChangeRank[name], aliveRank[name], validJSONRank[name], validDNSRank[name], timeActiveRank[name] ], maxRank)) xData.append([ int(dotBitAlexa[name]) + 1, # alexaRank int(len(wordnet.synsets(name[2:])) >= 1) + 1, # inDict int(any(dirtyWord in name.lower() for dirtyWord in dirtyWords)) + 1, # inDirty int(set(name[2:]).issubset(set("0123456789"))) + 1, # isNumber len(name), # length int( any(word in name.lower() for word in bitWordList) or name.startswith("d/bit")) + 1, # coinRelated int(set(name[2:]).issubset(set("abcdefghijklmnopqrstuvwxyz"))) + 1, SegmentString().string_segments(name[2:]) ]) # x_train, x_test, y_train, y_test = train_test_split(xData, yData, test_size=.10, random_state=33) alpha = 0.1 enet = ElasticNet(alpha=alpha) score = cross_val_score(enet, xData, yData, scoring='r2_score') print(score)
def main(): with open("nameDict.dat", "rb") as name_file: nameDict = pickle.load(name_file) bitNames = getDictSubset(nameDict, lambda record: record.namespace() == "d") max_height = getMaxHeight(nameDict) dotBitAlexa = alexaRanks() dirtyWords = [word.strip() for word in open('dirty.txt', 'r') if " " not in word] dictWords = set([word.strip() for word in open('/usr/share/dict/words', 'r')]) bitWordList = set(["coin", "satoshi", "wallet", "crypto", "currency", "btc", "nmc", "blockchain"]) with open("name_lists/surnames.csv", "r") as surnames_file: reader = DictReader(surnames_file) surnamesSet = set(line["name"].lower() for line in reader) valueChangeRank = rankNumberOfValueChanges(bitNames, max_height) aliveRank = rankIsAlive(bitNames, max_height) validJSONRank = rankJSONDict(bitNames, max_height) validDNSRank = rankValidDNSDict(bitNames, max_height) timeActiveRank = rankByTimeActive(bitNames, max_height) maxRank = len(bitNames) xData = [] yData = [] for name in bitNames: yData.append(price([valueChangeRank[name], aliveRank[name], validJSONRank[name], validDNSRank[name], timeActiveRank[name]], maxRank)) xData.append([ int(dotBitAlexa[name]) + 1, # alexaRank int(len(wordnet.synsets(name[2:])) >= 1) + 1, # inDict int(any(dirtyWord in name.lower() for dirtyWord in dirtyWords)) + 1, # inDirty int(set(name[2:]).issubset(set("0123456789"))) + 1, # isNumber len(name), # length int(any(word in name.lower() for word in bitWordList) or name.startswith("d/bit")) + 1, # coinRelated int(set(name[2:]).issubset(set("abcdefghijklmnopqrstuvwxyz"))) + 1, SegmentString().string_segments(name[2:]) ]) # x_train, x_test, y_train, y_test = train_test_split(xData, yData, test_size=.10, random_state=33) alpha = 0.1 enet = ElasticNet(alpha=alpha) score = cross_val_score(enet, xData, yData, scoring='r2_score') print(score)
# upper = int((4/3) * (i + 1)) # ret[i] = (cumulative[upper] - cumulative[lower]) / (upper - lower) # return ret alexa_ranks = alexaRanks() # Swap keys and values # alexa_ranks = {rank: intern(name) for name, rank in alexa_ranks.items()} # pdb.set_trace() # alexa_list = []n # for i in range(1, len(alexa_ranks) + 1): # alexa_list.append(alexa_ranks[i]) with open("nameDict.dat", "rb") as pickle_file: name_history = pickle.load(pickle_file) max_height = getMaxHeight(name_history) valid_names = getDictSubset(name_history, lambda record: record.isValidAtHeight(max_height)) name_history = None active_bit_names = getDictSubset(valid_names, lambda record: record.namespace() == "d") valid_names = None names = set(name for name in active_bit_names.keys()) active_bit_names = None registered = [alexa_name in names for alexa_name in alexa_ranks] averaged = variable_window_moving_average(registered) rc('font', serif='Helvetica Neue') rc('text', usetex='true')
# ret[i] = (cumulative[upper] - cumulative[lower]) / (upper - lower) # return ret alexa_ranks = alexaRanks() # Swap keys and values # alexa_ranks = {rank: intern(name) for name, rank in alexa_ranks.items()} # pdb.set_trace() # alexa_list = []n # for i in range(1, len(alexa_ranks) + 1): # alexa_list.append(alexa_ranks[i]) with open("nameDict.dat", "rb") as pickle_file: name_history = pickle.load(pickle_file) max_height = getMaxHeight(name_history) valid_names = getDictSubset(name_history, lambda record: record.isValidAtHeight(max_height)) name_history = None active_bit_names = getDictSubset(valid_names, lambda record: record.namespace() == "d") valid_names = None names = set(name for name in active_bit_names.keys()) active_bit_names = None registered = [alexa_name in names for alexa_name in alexa_ranks] averaged = variable_window_moving_average(registered) rc('font', serif='Helvetica Neue')
import pickle import pdb from nltk.util import ngrams from common import getDictSubset from nameHistory import getMaxHeight from segment_string import SegmentString DOMAIN_REGEX = "^[a-z]([a-z0-9-]{0,62}[a-z0-9])?$" def valid_domain_name(name, has_prefix=True): return bool(match(DOMAIN_REGEX, name.name()[2:])) with open("nameDict.dat", "rb") as pickle_file: names_dict = pickle.load(pickle_file) max_height = getMaxHeight(names_dict) names_dict = getDictSubset(names_dict, lambda record: record.isValidAtHeight(max_height)) names_dict = getDictSubset(names_dict, lambda record: record.namespace() == "d") names_dict = getDictSubset(names_dict, valid_domain_name) segment_counts = sorted([SegmentString().string_segments(name[2:]) for name in names_dict.keys()]) with open("segment_counts.pickle", "wb") as output_file: pickle.dump(segment_counts, output_file)
from nltk.util import ngrams from common import getDictSubset from nameHistory import getMaxHeight from segment_string import SegmentString DOMAIN_REGEX = "^[a-z]([a-z0-9-]{0,62}[a-z0-9])?$" def valid_domain_name(name, has_prefix=True): return bool(match(DOMAIN_REGEX, name.name()[2:])) with open("nameDict.dat", "rb") as pickle_file: names_dict = pickle.load(pickle_file) max_height = getMaxHeight(names_dict) names_dict = getDictSubset(names_dict, lambda record: record.isValidAtHeight(max_height)) names_dict = getDictSubset(names_dict, lambda record: record.namespace() == "d") names_dict = getDictSubset(names_dict, valid_domain_name) segment_counts = sorted( [SegmentString().string_segments(name[2:]) for name in names_dict.keys()]) with open("segment_counts.pickle", "wb") as output_file: pickle.dump(segment_counts, output_file)