def fix_experiment(file_loc): full_path = os.path.join(wd, file_loc) if 'params.experiment' in file_loc: exp_params = load_obj(full_path) if 'stimuli_sets' in exp_params: exp_params.pop('stimuli_sets') save_obj(exp_params, full_path)
def __init__(self): self.data = tools.load_from_csv(cfg.CSV_PATH) self.all_json = tools.load_obj(cfg.SAVING_FILE) self.mapping = {} self.stats = defaultdict(tuple) self.max_freq = 0 self.max_area = 0 self.max_shared_logos = 0 plt.rc('font', size=8)
def get_name2code(wgt_data): file_path = config.name2code if os.path.exists(file_path): name2code = load_obj(file_path) return name2code name2code = {} all_code = wgt_data['ts_code'].drop_duplicates().to_list() for code in all_code: stock_name = wgt_data[wgt_data.ts_code == code]['stock_name'] stock_name = stock_name.iloc[0] name2code[stock_name] = code save_obj(file_path, name2code) return name2code
def get_gics2name(): file_path = config.gics2name if os.path.exists(file_path): gics2name = load_obj(file_path) return gics2name hs300_industry_df = prepare_hs300_industry_data_df() industry_name = hs300_industry_df['industry_name'].to_list() gics_code = hs300_industry_df['gics_code'].to_list() tuple_list = [(i[0], i[1]) for i in zip(gics_code, industry_name)] tuple_set = set(tuple_list) gics2name = dict(list(tuple_set)) save_obj(file_path, gics2name) return gics2name
def get_code2gics(): file_path = config.code2gics if os.path.exists(file_path): code2gics_info = load_obj(file_path) return code2gics_info hs300_industry_df = prepare_hs300_industry_data_df() code = hs300_industry_df['ts_code'].to_list() gics_code = hs300_industry_df['gics_code'].to_list() tuple_list = [(i[0], i[1]) for i in zip(code, gics_code)] tuple_set = set(tuple_list) code2gics = dict(list(tuple_set)) save_obj(file_path, code2gics) return code2gics
data_file = open("player_stats2.tab","a") """data_file.write("wards placed/min\twards killed/min\tkill participation\tdragon kills/min\tbaron kills/min\t") data_file.write("games played\tminion kills/game\tn.m. kills/game\tturrets killed/game\t") data_file.write("diffrent champions/game\tchamp mastery 5\tchamp mastery 10\tchamp mastery 20\t") data_file.write("".join(["champ winrate " + str(i)+ "\t" for i in range(5)])) data_file.write("tier\n")""" read_files = 0 pulled_summoners = list() def write_attribute(file, value): file.write(str(value) + '\t') while os.path.isfile('../data/game_data_{0}.zip'.format(read_files)): game = load_obj('../data/game_data_{0}.zip'.format(read_files)) if not game or (game['matchVersion'][:4] != '6.10' and game['matchVersion'][:3] != '6.9') : read_files += 1 print(str(read_files) + "/" + str(6000)) continue for summoner_stats in game['participantStats']: summoner_id = summoner_stats['summonerId'] if summoner_id not in pulled_summoners and summoner_id in game['participantTier']: wards_placed=wards_killed=kill_participation=dragon_kills=baron_kills=total_games=0 summoner_match_history = game['participantMatchHistory'][summoner_id] for g in summoner_match_history: if g['matchId'] == -1: continue # check if we acttualy need to open another file to access this match
break except Exception, e: print("Error occured when trying to get match history details for " + str(match_id) + " data: " + str(e)) time.sleep(1) continue if num_try >=11: if match_id in newer_match_ids: return [{'matchId':-1, 'saved_file':-1}] return matches_data #restore program state while os.path.isfile('game_data_{0}.zip'.format(saved_files)): with zipfile.ZipFile('game_data_{0}.zip'.format(saved_files), 'r') as myzip: myzip.extractall() match = load_obj('game_data_{0}.pkl'.format(saved_files)) pulled_matches[match['matchId']] = saved_files pulled_matches_count += 1 os.remove('game_data_{0}.pkl'.format(saved_files)) saved_files += 1 while True: if len(unpulled_summoners) == 0: #if we run out of summoners, hopefully older summoners already played new matches unpulled_summoners = random.sample(pulled_summoners, 15) pulled_summoners = list() current_summoner_id = unpulled_summoners.pop(0) try:
def get_winner(game): team = game['teams'][0] if team['teamId'] == BLUE: blue_team_winner = team['winner'] else: blue_team_winner = not team['winner'] if blue_team_winner: return BLUE else: return RED while os.path.isfile('data/game_data_{0}.zip'.format(read_files)): game = load_obj('data/game_data_{0}.zip'.format(read_files)) if not game or (game['matchVersion'][:4] != '6.10' and game['matchVersion'][:3] != '6.9'): print(str(read_files) + "/" + str(6000)) read_files += 1 continue m_id = game['matchId'] #list of all atributes to be written in file blue_attributes = list() red_attributes = list() #map summoner ids to team_ids and summoner ids to champion ids - commonly needed summoner_team = dict() summoner_champion = dict() summoner_p_id = dict()
def main(): # Setup champion matrices champs = load_obj('champion_id_to_name.pkl') champId_to_champName = load_obj('champion_id_to_name.pkl') champs = [str(champId_to_champName[key]) for key in champId_to_champName] champs.sort() champs = [str(x) for x in champs] champName_to_champID = {v: k for k,v in champId_to_champName.items()} # Champ ID --> matrix index champID_to_matrixIndex = {} for i,c in enumerate(champs): champID_to_matrixIndex[champName_to_champID[c]] = i; # Matrix index --> Champ ID matrixIndex_to_champID = {v: k for k,v in champID_to_matrixIndex.items()} matrixIndex_to_champName = {} for champid,index in champID_to_matrixIndex.items(): matrixIndex_to_champName[index] = champId_to_champName[champid] champName_to_matrixIndex = {v: k for k,v in matrixIndex_to_champName.items()} #print(champName_to_matrixIndex) print("Generating Affinity Matrix...") affinity = [ [0.0 for i in range(len(champs))] for j in range(len(champs)) ] winnersfile = 'winners_temp.txt' #sys.argv[1] losersfile = 'losers_temp.txt' #sys.argv[2] winners = open(winnersfile).readlines() losers = open(losersfile).readlines() winners = [set([int(x) for x in line.strip().split(",")]) for line in winners] losers = [set([int(x) for x in line.strip().split(",")]) for line in losers] stop = len(winners) composition = winners + losers # You can use this line or the below line; in theory I think this should work better, but it is up to you. Try both and look at the results. #composition = [winners[i] | losers[i] for i in range(stop)] stop = len(composition) total = 0.0 count = 0.0 for i in range(stop): if(champName_to_champID['Lucian'] in composition[i]): total += 1.0 if(champName_to_champID['Ziggs'] in composition[i]): count += 1.0 for champ in composition[i]: for other in composition[i]: affinity[champID_to_matrixIndex[champ]][champID_to_matrixIndex[other]] += 0.5 affinity[champID_to_matrixIndex[other]][champID_to_matrixIndex[champ]] += 0.5 for c1 in range(len(champs)): for c2 in range(len(champs)): if(c1 == c2): continue affinity[c1][c2] = affinity[c1][c2] / (affinity[c2][c2]) #(0.5*affinity[c1][c1]+0.5*affinity[c2][c2]) for c1 in range(len(champs)): affinity[c1][c1] = 0 averages = [i for i in range(len(champs))] for i in averages: averages[i] = sum(affinity[i])/len(affinity[i]) #print(sum(affinity[champName_to_matrixIndex['Urgot']])/len(affinity[champName_to_matrixIndex['Urgot']])) for i in range(len(affinity)): affinity[i] = [-x/averages[i] for x in affinity[i]] #affinity = open('temp.txt').readlines() #affinity = [line.strip().split() for line in affinity] #affinity = [[float(x) for x in line] for line in affinity] affinity = np.array(affinity) print(affinity) #return 0 # Check which values of preference result in 5 clusters # I am trying values between -10 and 10, you can choose others # if you think they could result in 5 clusters. preferenceList = [] preferenceTest = True if(preferenceTest): #for i in range(-500,500): for i in range(-10,10): af = AffinityPropagation(affinity='precomputed',preference=i).fit(affinity) cluster_centers_indices = af.cluster_centers_indices_ if(cluster_centers_indices != None): n_clusters_ = len(cluster_centers_indices) if(n_clusters_ != len(champs)): print(n_clusters_,i) if(n_clusters_== 5): preferenceList.append(i) # Set a role dictionary by hand to give labels to the clusters # Hopefully these will never change roles = { "Top": ['Irelia', 'Renekton'], "Mid": ['Ahri', 'Ziggs'], "Jungle": ['Amumu', 'Nautilus'], "Adc": ['Draven', 'Caitlyn'], "Support": ['Taric', 'Braum']} # Compute Affinity Propagation # Set preferenceList to the values you found above that result in 5 clusters if(not preferenceTest): preferenceList = [-3,-4,-5,-6] clusterLists = [{} for i in range(len(preferenceList))] for ind,preference in enumerate(preferenceList): print("Computing Affinity Propagation with preference={0}...".format(preference)) af = AffinityPropagation(affinity='precomputed',preference=preference).fit(affinity) cluster_centers_indices = af.cluster_centers_indices_ labels = af.labels_ n_clusters_ = len(cluster_centers_indices) print("\nCenter of {0} clusters:".format(n_clusters_)) print([matrixIndex_to_champName[x] for x in cluster_centers_indices]) clusters = {} for i in range(n_clusters_): clusters[i] = [] for i,cnum in enumerate(labels): clusters[cnum].append(i) print("\nClusters: {0}".format([len(x) for key,x in clusters.items()])) for key in clusters: lane = None for role,c in roles.items(): if(all([champName_to_matrixIndex[x] in clusters[key] for x in c])): lane = role if(lane == None): raise Exception("You need to redefine the roles dictionary. The canonical champions I picked are not correct for this dataset.") clusterLists[ind][lane] = [matrixIndex_to_champName[x] for x in clusters[key]] print( lane + ':\t' + ', '.join(clusterLists[ind][lane])) print('') # Compare clusters for different values of "preference" # You can pick to use any of these. # I use the last one because that one is what the other variables # (e.g. cluster_centers_indices) have been calculated for. # You can choose a different one, but you have to manually specify it # and change some code. table = [ ['Champion'], ['changed from lane'], ['to lane:'] ] for cluster in clusterLists[1:]: # Compare each i+1 list to the i=0 list for key in roles: for champ in cluster[key]: if(champ not in clusterLists[0][key]): fromLane = None for role in clusterLists[0]: if(champ in clusterLists[0][role]): fromLane = role #print("Champion {0}\tchanged from\t{2}\tto {1}".format(champ,key,fromLane)) table[0].append(champ) table[1].append(fromLane) table[2].append(key) table = printTable(table) print(table) print('') # Calculate and save/print the final table of data that you want to see. if(True): order = [] for c in cluster_centers_indices: for role in roles: if(matrixIndex_to_champName[c] in clusterLists[-1][role]): order.append(role) table = [ ['Role Matrix'] ]#, ['Jungle'], ['Top'], ['Support'], ['Adc'], ['Mid'] ] table.append(['Primary Role']) for role in order: table.append([role]) f = open('output.txt','w') for champID,champName in matrixIndex_to_champName.items(): cat_percents = {} dists = {} # We have a specific champion specified by the above for loop # We will calculate the affinity of that champion to all champions in each # role. E.g. if champName=='Sivir' then we will look at all the roles (let's # assume role=='Jungle' for now) and calculate the affinity between 'Siver' # and all the champions who have been identified as 'Jungle'. Then I average # all these affinity values together and that is the "affinity" for 'Sivir' # to the 'Jungle' role. I store each of these average values into a dictionary # called 'cat_percents' which stands for category percents. # You can think of the 'affinity' as the distance from one champ to another. # The smaller the distance, the more likely they are to play the same role. for role in clusterLists[-1]: dists[role] = [] for c in clusterLists[-1][role]: c = champName_to_matrixIndex[c] dists[role].append((-affinity[c,champID]-affinity[champID,c])/2.0) avg_dist = sum(dists[role])/len(dists[role]) cat_percents[role] = avg_dist # Do a bunch of normalization on the outputs of cat_percents to get the numbers to be between 0 and 1. cat_min = min(cat_percents.values()) for role in cat_percents: cat_percents[role] = cat_percents[role] - cat_min cat_max = max(cat_percents.values()) for role in cat_percents: cat_percents[role] = cat_percents[role]/cat_max cat_percents[role] = 1 - cat_percents[role] cat_sum = sum(cat_percents.values()) for role in cat_percents: cat_percents[role] = cat_percents[role]/cat_sum*100 cat_percents[role] = round(cat_percents[role],2) table[0].append("".join(champName.split())) # Champion name, remove all whitespace table[2].append(cat_percents[order[0]]) # Order[0] == e.g Jungle table[3].append(cat_percents[order[1]]) # Order[1] table[4].append(cat_percents[order[2]]) # Order[2] table[5].append(cat_percents[order[3]]) # Order[3] table[6].append(cat_percents[order[4]]) # Order[4] lane = None for role in clusterLists[-1]: if(champName in clusterLists[-1][role]): lane = role table[1].append(lane) # Primary Role table = printTable(table) print(table) f.write(table) f.close() print('')
# mysql_bdd = MysqlBdd("localhost", "bde", "Coco", "0386479877") # # bde_admins = mysql_bdd.get_bde_admin() # # # Convert array of tuple in array of id (int) # temp=[] # for admin in bde_admins: # temp.append(admin[0]) # bde_admins = temp # # users = mysql_bdd.get_users() # # save_obj(bde_admins, "bde_admins.pkl") # save_obj(users, "users.pkl") bde_admins = load_obj("bde_admins.pkl") users = load_obj("users.pkl") neo4j_bdd.clear_database() id_bde = neo4j_bdd.add_union('BDE') id_cercle = neo4j_bdd.add_union('Cercle') for user in users: # Check if firstname and lastname are defined if user[2] == '': firstname = user[1].split('.')[0] try: lastname = user[1].split('.')[1] except: lastname = ''
def main(): load_data_from_pickles = True if(not load_data_from_pickles): # If false, will skip to just load the matrix from file modelfiles = sys.argv[1:] modelfiles = [file for file in modelfiles if '.xyz' in file] modelfiles = natsort.natsorted(modelfiles) modelfiles = modelfiles[0:5000] new_modelfiles = [file for file in modelfiles if(not os.path.isfile(file[:-4]+'_VP.txt'))] cutoff = 3.6 #print(modelfiles) print("Calculating VP for all models...") calculate_and_save_vp(new_modelfiles,cutoff) print("Finding all transformed VPs...") model_indexes = [] for model in modelfiles: indexes = open(model[:-4]+'_VP.txt').readlines() indexes = [eval(ind.strip()) for ind in indexes] model_indexes.append(copy.copy(indexes)) d_vpIndex_to_matIndex, d_matIndex_to_vpIndex, matrix = analyze(model_indexes) save_obj(d_vpIndex_to_matIndex, 'd_vpIndex_to_matIndex.pkl') save_obj(d_matIndex_to_vpIndex, 'd_matIndex_to_vpIndex.pkl') save_obj(matrix, 'matrix.pkl') f = open('output.txt','w') for row in matrix: row = ','.join([str(x) for x in row]) + '\n' f.write(row) f.close() # Post analysis if(load_data_from_pickles): d_vpIndex_to_matIndex = load_obj('d_vpIndex_to_matIndex.pkl') d_matIndex_to_vpIndex = load_obj('d_matIndex_to_vpIndex.pkl') matrix = load_obj('matrix.pkl') normalized_matrix = copy.copy(matrix) for i,row in enumerate(normalized_matrix): normalized_matrix[i][i] = 0 row_total = [ 1.0*sum(row) for row in normalized_matrix ] #print(row_total) normalized_matrix = [ [x/row_total[i] for x in row] for i,row in enumerate(normalized_matrix) ] if(True): count = 0 to_print = [[] for row in normalized_matrix] for i,row in enumerate(normalized_matrix): for j,x in enumerate(row): #if(i==j): continue if(x > .01): line = "{0}:\t{1} -> {2}".format(round(100.0*x,3), d_matIndex_to_vpIndex[i], d_matIndex_to_vpIndex[j]) to_print[i].append(tuple([row_total[i],line])) count += x*row_total[i]/100.0 to_print = natsort.natsorted(to_print) to_print = [natsort.natsorted(row) for row in to_print] for row in to_print: for x,line in row: print(line + '\t' + str(100.0*x)) print('') print("Total transformations: {0}".format(count)) # Find shortest paths to (0, 0, 12, 0) and (0, 6, 0, 8) ico_index = (0, 0, 12, 0, 0, 0, 0, 0) bcc_index = (0, 6, 0, 8, 0, 0, 0, 0) graph = nx.Graph() for i,row in enumerate(normalized_matrix): for j,x in enumerate(row): if(i==j): continue if(x > 0.00): #graph.add_edge( d_matIndex_to_vpIndex[i], d_matIndex_to_vpIndex[j] ) #graph[d_matIndex_to_vpIndex[i]][d_matIndex_to_vpIndex[j]]['weight'] = x graph.add_edge( d_matIndex_to_vpIndex[j], d_matIndex_to_vpIndex[i] ) graph[d_matIndex_to_vpIndex[j]][d_matIndex_to_vpIndex[i]]['weight'] = 1-x #test = [] bcc_dist = {} ico_dist = {} for ind in d_vpIndex_to_matIndex.keys(): try: path = nx.shortest_path(graph, source=ind, target=ico_index, weight='weight') dist = 1.0 for i in range(len(path)-1): dist = dist * (1-graph[path[i]][path[i+1]]['weight']) ico_dist[ind] = dist except nx.exception.NetworkXNoPath: ico_dist[ind] = 0.0 #test.append(tuple([ dist*100,ind, len(path) ])) try: path = nx.shortest_path(graph, source=ind, target=bcc_index, weight='weight') dist = 1.0 for i in range(len(path)-1): dist = dist * (1-graph[path[i]][path[i+1]]['weight']) bcc_dist[ind] = dist except nx.exception.NetworkXNoPath: bcc_dist[ind] = 0.0 #test.sort() #for t in test: # print(t) test = [] for key in ico_dist: #print(key, ico_dist[key], bcc_dist[key], ico_dist[key]/bcc_dist[key], sum(matrix[d_vpIndex_to_matIndex[key])) test.append([key, ico_dist[key], bcc_dist[key], ico_dist[key]/bcc_dist[key], sum(matrix[d_vpIndex_to_matIndex[key]])]) test.sort(key=operator.itemgetter(3)) test.reverse() for t in test: t = [str(x) for x in t] t = '$'.join(t) print(t)
def compute_factor(data_df, date_list, industry_list, config): size_factor_weight = config.size_factor_weight value_factor_weight = config.value_factor_weight quality_factor_weight = config.quality_factor_weight momenta_factor_weight = config.momenta_factor_weight rsi_factor_weight = config.rsi_factor_weight volatility_factor_weight = config.volatility_factor_weight gics2name = load_obj(config.gics2name) raw_factor = pd.DataFrame() raw_factor_path = config.raw_factor_path for i in industry_list: print('current industry is : ', gics2name[i], '\n') for j in tqdm(date_list): raw_file1 = data_df[data_df.trade_date == j] raw_file1 = raw_file1[raw_file1.gics_code == i] if raw_file1.empty: continue else: # size raw_file1["size_factor"] = compute(raw_file1['Log_mkt_Cap'], nagtive=True) # Volatility(交易量-为了验证波动率) raw_file1["volatility_factor"] = compute( raw_file1['Volatility'], nagtive=True) # Idiosyncratic volatility # raw_file1['idioVolatility_Factor'] = compute(raw_file1['idio_vol'], nagtive=True) # RSI 过去n:14天内多少天下降,多少天上升 raw_file1['RSI_factor'] = compute(raw_file1['RSI'], nagtive=True) # Momentum raw_file1["momentum_factor"] = compute( raw_file1['last_1mon_pricechange'], nagtive=True) # Quality sd_roa = stand(raw_file1['Rev_Over_mktCap'], 0.05) # 资产回报 roa_cdf = ECDF(sd_roa) sd_acc = stand(raw_file1["q_opincome"], 0.05) # accural ?现金流 acc_cdf = ECDF(sd_acc) sd_nocfod = stand(raw_file1['NOCF_Over_Debt'], 0.05) nocfod_cdf = ECDF(sd_nocfod) raw_file1["quality_factor"] = 0.25 * roa_cdf( sd_roa) + 0.25 * acc_cdf(sd_acc) + 0.5 * nocfod_cdf( sd_nocfod) # Value sd_cashval = stand(raw_file1['Cash_Over_MktCap'], 0.05) # 现金除以市值 sd_sd_cashval = std_winsor(sd_cashval) cashval_cdf = ECDF(sd_sd_cashval) sd_roa = stand(raw_file1['Rev_Over_mktCap'], 0.05) # 收益除以市值 sd_sd_roa = std_winsor(sd_roa) roa_cdf = ECDF(sd_sd_roa) sd_bp = stand(raw_file1['pb'], 0.05) # 市净率 sd_sd_bp = std_winsor(sd_bp) bp_cdf = ECDF(sd_sd_bp) raw_file1["value_factor"] = 1 / 3 * cashval_cdf(sd_sd_cashval) \ + 1 / 3 * roa_cdf(sd_sd_roa) \ + 1 / 3 * bp_cdf(sd_sd_bp) raw_file1["overall_factor"] = raw_file1["size_factor"] * size_factor_weight \ + raw_file1["volatility_factor"] * volatility_factor_weight \ + raw_file1["RSI_factor"] * rsi_factor_weight \ + raw_file1["momentum_factor"] * momenta_factor_weight \ + raw_file1["quality_factor"] * quality_factor_weight \ + raw_file1["value_factor"] * value_factor_weight if raw_factor.empty: raw_factor = raw_file1 else: raw_factor = pd.concat([raw_factor, raw_file1], axis=0) raw_factor.to_csv(raw_factor_path, mode='w', header=True)
+ str(match_id) + " data: " + str(e)) time.sleep(1) continue if num_try >= 11: if match_id in newer_match_ids: return [{'matchId': -1, 'saved_file': -1}] return matches_data #restore program state while os.path.isfile('game_data_{0}.zip'.format(saved_files)): with zipfile.ZipFile('game_data_{0}.zip'.format(saved_files), 'r') as myzip: myzip.extractall() match = load_obj('game_data_{0}.pkl'.format(saved_files)) pulled_matches[match['matchId']] = saved_files pulled_matches_count += 1 os.remove('game_data_{0}.pkl'.format(saved_files)) saved_files += 1 while True: if len(unpulled_summoners) == 0: #if we run out of summoners, hopefully older summoners already played new matches unpulled_summoners = random.sample(pulled_summoners, 15) pulled_summoners = list() current_summoner_id = unpulled_summoners.pop(0) try: