def fix_experiment(file_loc):
    full_path = os.path.join(wd, file_loc)
    if 'params.experiment' in file_loc:
        exp_params = load_obj(full_path)
        if 'stimuli_sets' in exp_params:
            save_obj(exp_params, full_path)
Esempio n. 2
 def __init__(self): = tools.load_from_csv(cfg.CSV_PATH)
     self.all_json = tools.load_obj(cfg.SAVING_FILE)
     self.mapping = {}
     self.stats = defaultdict(tuple)
     self.max_freq = 0
     self.max_area = 0
     self.max_shared_logos = 0
     plt.rc('font', size=8)
Esempio n. 3
def get_name2code(wgt_data):
    file_path = config.name2code
    if os.path.exists(file_path):
        name2code = load_obj(file_path)
        return name2code
    name2code = {}
    all_code = wgt_data['ts_code'].drop_duplicates().to_list()
    for code in all_code:
        stock_name = wgt_data[wgt_data.ts_code == code]['stock_name']
        stock_name = stock_name.iloc[0]
        name2code[stock_name] = code
    save_obj(file_path, name2code)
    return name2code
Esempio n. 4
def get_gics2name():
    file_path = config.gics2name
    if os.path.exists(file_path):
        gics2name = load_obj(file_path)
        return gics2name
    hs300_industry_df = prepare_hs300_industry_data_df()
    industry_name = hs300_industry_df['industry_name'].to_list()
    gics_code = hs300_industry_df['gics_code'].to_list()
    tuple_list = [(i[0], i[1]) for i in zip(gics_code, industry_name)]
    tuple_set = set(tuple_list)
    gics2name = dict(list(tuple_set))
    save_obj(file_path, gics2name)
    return gics2name
Esempio n. 5
def get_code2gics():
    file_path = config.code2gics
    if os.path.exists(file_path):
        code2gics_info = load_obj(file_path)
        return code2gics_info
    hs300_industry_df = prepare_hs300_industry_data_df()
    code = hs300_industry_df['ts_code'].to_list()
    gics_code = hs300_industry_df['gics_code'].to_list()
    tuple_list = [(i[0], i[1]) for i in zip(code, gics_code)]
    tuple_set = set(tuple_list)
    code2gics = dict(list(tuple_set))
    save_obj(file_path, code2gics)
    return code2gics
data_file = open("","a")
"""data_file.write("wards placed/min\twards killed/min\tkill participation\tdragon kills/min\tbaron kills/min\t")
data_file.write("games played\tminion kills/game\tn.m. kills/game\tturrets killed/game\t")
data_file.write("diffrent champions/game\tchamp mastery 5\tchamp mastery 10\tchamp mastery 20\t")
data_file.write("".join(["champ winrate " + str(i)+ "\t" for i in range(5)]))

read_files = 0
pulled_summoners = list()

def write_attribute(file, value):
    file.write(str(value) + '\t')

while os.path.isfile('../data/game_data_{0}.zip'.format(read_files)):
    game = load_obj('../data/game_data_{0}.zip'.format(read_files))
    if not game or (game['matchVersion'][:4] != '6.10' and game['matchVersion'][:3] != '6.9') :
        read_files += 1
        print(str(read_files) + "/" + str(6000))

    for summoner_stats in game['participantStats']:
        summoner_id = summoner_stats['summonerId']
        if summoner_id not in pulled_summoners and summoner_id in game['participantTier']:
            summoner_match_history = game['participantMatchHistory'][summoner_id]
            for g in summoner_match_history:
                if g['matchId'] == -1:

                # check if we acttualy need to open another file to access this match
Esempio n. 7
                except Exception, e:
                    print("Error occured when trying to get match history details for " + str(match_id) + " data: " + str(e))
            if num_try >=11:
                if match_id in newer_match_ids:
                    return [{'matchId':-1, 'saved_file':-1}]
    return matches_data

#restore program state
while os.path.isfile('game_data_{0}.zip'.format(saved_files)):
    with zipfile.ZipFile('game_data_{0}.zip'.format(saved_files), 'r') as myzip:
    match = load_obj('game_data_{0}.pkl'.format(saved_files))
    pulled_matches[match['matchId']] = saved_files
    pulled_matches_count += 1

    saved_files += 1

while True:
    if len(unpulled_summoners) == 0:
        #if we run out of summoners, hopefully older summoners already played new matches
        unpulled_summoners = random.sample(pulled_summoners, 15)
        pulled_summoners = list()

    current_summoner_id = unpulled_summoners.pop(0)

Esempio n. 8
def get_winner(game):
    team = game['teams'][0]
    if team['teamId'] == BLUE:
        blue_team_winner = team['winner']
        blue_team_winner = not team['winner']

    if blue_team_winner:
        return BLUE
        return RED

while os.path.isfile('data/game_data_{0}.zip'.format(read_files)):
    game = load_obj('data/game_data_{0}.zip'.format(read_files))
    if not game or (game['matchVersion'][:4] != '6.10'
                    and game['matchVersion'][:3] != '6.9'):
        print(str(read_files) + "/" + str(6000))
        read_files += 1
    m_id = game['matchId']

    #list of all atributes to be written in file
    blue_attributes = list()
    red_attributes = list()

    #map summoner ids to team_ids and summoner ids to champion ids - commonly needed
    summoner_team = dict()
    summoner_champion = dict()
    summoner_p_id = dict()
Esempio n. 9
def main():
	# Setup champion matrices
	champs = load_obj('champion_id_to_name.pkl')
	champId_to_champName = load_obj('champion_id_to_name.pkl')
	champs = [str(champId_to_champName[key]) for key in champId_to_champName]
	champs = [str(x) for x in champs]
	champName_to_champID = {v: k for k,v in champId_to_champName.items()}

	# Champ ID --> matrix index
	champID_to_matrixIndex = {}
	for i,c in enumerate(champs):
		champID_to_matrixIndex[champName_to_champID[c]] = i;
	# Matrix index --> Champ ID
	matrixIndex_to_champID = {v: k for k,v in champID_to_matrixIndex.items()}

	matrixIndex_to_champName = {}
	for champid,index in champID_to_matrixIndex.items():
		matrixIndex_to_champName[index] = champId_to_champName[champid]
	champName_to_matrixIndex = {v: k for k,v in matrixIndex_to_champName.items()}

	print("Generating Affinity Matrix...")
	affinity = [ [0.0 for i in range(len(champs))] for j in range(len(champs)) ]
	winnersfile = 'winners_temp.txt' #sys.argv[1]
	losersfile = 'losers_temp.txt' #sys.argv[2]
	winners = open(winnersfile).readlines()
	losers = open(losersfile).readlines()
	winners = [set([int(x) for x in line.strip().split(",")]) for line in winners]
	losers =  [set([int(x) for x in line.strip().split(",")]) for line in losers]
	stop = len(winners)
	composition = winners + losers # You can use this line or the below line; in theory I think this should work better, but it is up to you. Try both and look at the results.
	#composition = [winners[i] | losers[i] for i in range(stop)]
	stop = len(composition)

	total = 0.0
	count = 0.0
	for i in range(stop):
		if(champName_to_champID['Lucian'] in composition[i]):
			total += 1.0
			if(champName_to_champID['Ziggs'] in composition[i]):
				count += 1.0
		for champ in composition[i]:
			for other in composition[i]:
				affinity[champID_to_matrixIndex[champ]][champID_to_matrixIndex[other]] += 0.5
				affinity[champID_to_matrixIndex[other]][champID_to_matrixIndex[champ]] += 0.5
	for c1 in range(len(champs)):
		for c2 in range(len(champs)):
			if(c1 == c2): continue
			affinity[c1][c2] = affinity[c1][c2] / (affinity[c2][c2]) #(0.5*affinity[c1][c1]+0.5*affinity[c2][c2])

	for c1 in range(len(champs)):
		affinity[c1][c1] = 0

	averages = [i for i in range(len(champs))]
	for i in averages:
		averages[i] = sum(affinity[i])/len(affinity[i])
	for i in range(len(affinity)):
		affinity[i] = [-x/averages[i] for x in affinity[i]]

	#affinity = open('temp.txt').readlines()
	#affinity = [line.strip().split() for line in affinity]
	#affinity = [[float(x) for x in line] for line in affinity]
	affinity = np.array(affinity)
	#return 0

	# Check which values of preference result in 5 clusters
	# I am trying values between -10 and 10, you can choose others
	# if you think they could result in 5 clusters.
	preferenceList = []
	preferenceTest = True
		#for i in range(-500,500):
		for i in range(-10,10):
			af = AffinityPropagation(affinity='precomputed',preference=i).fit(affinity)
			cluster_centers_indices = af.cluster_centers_indices_
			if(cluster_centers_indices != None):
				n_clusters_ = len(cluster_centers_indices)
				if(n_clusters_ != len(champs)): print(n_clusters_,i)
				if(n_clusters_== 5): preferenceList.append(i)

	# Set a role dictionary by hand to give labels to the clusters
	# Hopefully these will never change
	roles = {
		"Top":     ['Irelia', 'Renekton'],
		"Mid":     ['Ahri',   'Ziggs'],
		"Jungle":  ['Amumu',  'Nautilus'],
		"Adc":     ['Draven', 'Caitlyn'],
		"Support": ['Taric',  'Braum']}

	# Compute Affinity Propagation
	# Set preferenceList to the values you found above that result in 5 clusters
	if(not preferenceTest):	preferenceList = [-3,-4,-5,-6]
	clusterLists = [{} for i in range(len(preferenceList))]
	for ind,preference in enumerate(preferenceList):
		print("Computing Affinity Propagation with preference={0}...".format(preference))
		af = AffinityPropagation(affinity='precomputed',preference=preference).fit(affinity)
		cluster_centers_indices = af.cluster_centers_indices_
		labels = af.labels_
		n_clusters_ = len(cluster_centers_indices)

		print("\nCenter of {0} clusters:".format(n_clusters_))
		print([matrixIndex_to_champName[x] for x in cluster_centers_indices])

		clusters = {}
		for i in range(n_clusters_):
			clusters[i] = []
		for i,cnum in enumerate(labels):
		print("\nClusters: {0}".format([len(x) for key,x in clusters.items()]))
		for key in clusters:
			lane = None
			for role,c in roles.items():
				if(all([champName_to_matrixIndex[x] in clusters[key] for x in c])):
					lane = role
			if(lane == None): raise Exception("You need to redefine the roles dictionary. The canonical champions I picked are not correct for this dataset.")
			clusterLists[ind][lane] = [matrixIndex_to_champName[x] for x in clusters[key]]
			print( lane + ':\t' + ', '.join(clusterLists[ind][lane]))

	# Compare clusters for different values of "preference"
	# You can pick to use any of these.
	# I use the last one because that one is what the other variables
	# (e.g. cluster_centers_indices) have been calculated for.
	# You can choose a different one, but you have to manually specify it
	# and change some code.
	table = [ ['Champion'], ['changed from lane'], ['to lane:'] ]
	for cluster in clusterLists[1:]: # Compare each i+1 list to the i=0 list
		for key in roles:
			for champ in cluster[key]:
				if(champ not in clusterLists[0][key]):
					fromLane = None
					for role in clusterLists[0]:
						if(champ in clusterLists[0][role]): fromLane = role
					#print("Champion {0}\tchanged from\t{2}\tto {1}".format(champ,key,fromLane))
	table = printTable(table)

	# Calculate and save/print the final table of data that you want to see.
		order = []
		for c in cluster_centers_indices:
			for role in roles:
				if(matrixIndex_to_champName[c] in clusterLists[-1][role]):
		table = [ ['Role Matrix'] ]#, ['Jungle'], ['Top'], ['Support'], ['Adc'], ['Mid'] ]
		table.append(['Primary Role'])
		for role in order:
		f = open('output.txt','w')
		for champID,champName in matrixIndex_to_champName.items():
			cat_percents = {}
			dists = {}
			# We have a specific champion specified by the above for loop
			# We will calculate the affinity of that champion to all champions in each
			# role. E.g. if champName=='Sivir' then we will look at all the roles (let's
			# assume role=='Jungle' for now) and calculate the affinity between 'Siver'
			# and all the champions who have been identified as 'Jungle'. Then I average
			# all these affinity values together and that is the "affinity" for 'Sivir'
			# to the 'Jungle' role. I store each of these average values into a dictionary
			# called 'cat_percents' which stands for category percents.
			# You can think of the 'affinity' as the distance from one champ to another.
			# The smaller the distance, the more likely they are to play the same role.
			for role in clusterLists[-1]:
				dists[role] = []
				for c in clusterLists[-1][role]:
					c = champName_to_matrixIndex[c]
					avg_dist = sum(dists[role])/len(dists[role])
				cat_percents[role] = avg_dist

			# Do a bunch of normalization on the outputs of cat_percents to get the numbers to be between 0 and 1.
			cat_min = min(cat_percents.values())
			for role in cat_percents:
				cat_percents[role] = cat_percents[role] - cat_min

			cat_max = max(cat_percents.values())
			for role in cat_percents:
				cat_percents[role] = cat_percents[role]/cat_max
				cat_percents[role] = 1 - cat_percents[role]
			cat_sum = sum(cat_percents.values())
			for role in cat_percents:
				cat_percents[role] = cat_percents[role]/cat_sum*100
				cat_percents[role] = round(cat_percents[role],2)

			table[0].append("".join(champName.split())) # Champion name, remove all whitespace
			table[2].append(cat_percents[order[0]]) # Order[0] == e.g Jungle
			table[3].append(cat_percents[order[1]]) # Order[1]
			table[4].append(cat_percents[order[2]]) # Order[2]
			table[5].append(cat_percents[order[3]]) # Order[3]
			table[6].append(cat_percents[order[4]]) # Order[4]

			lane = None
			for role in clusterLists[-1]:
				if(champName in clusterLists[-1][role]):
					lane = role
			table[1].append(lane) # Primary Role
		table = printTable(table)
Esempio n. 10
    # mysql_bdd = MysqlBdd("localhost", "bde", "Coco", "0386479877")
    # bde_admins = mysql_bdd.get_bde_admin()
    # # Convert array of tuple in array of id (int)
    # temp=[]
    # for admin in bde_admins:
    #     temp.append(admin[0])
    # bde_admins = temp
    # users = mysql_bdd.get_users()
    # save_obj(bde_admins, "bde_admins.pkl")
    # save_obj(users, "users.pkl")

    bde_admins = load_obj("bde_admins.pkl")
    users = load_obj("users.pkl")


    id_bde = neo4j_bdd.add_union('BDE')
    id_cercle = neo4j_bdd.add_union('Cercle')

    for user in users:
        # Check if firstname and lastname are defined
        if user[2] == '':
            firstname = user[1].split('.')[0]
                lastname = user[1].split('.')[1]
                lastname = ''
def main():
    load_data_from_pickles = True
    if(not load_data_from_pickles): # If false, will skip to just load the matrix from file
        modelfiles = sys.argv[1:]
        modelfiles = [file for file in modelfiles if '.xyz' in file]
        modelfiles = natsort.natsorted(modelfiles)
        modelfiles = modelfiles[0:5000]
        new_modelfiles = [file for file in modelfiles if(not os.path.isfile(file[:-4]+'_VP.txt'))]

        cutoff = 3.6
        print("Calculating VP for all models...")

        print("Finding all transformed VPs...")
        model_indexes = []
        for model in modelfiles:
            indexes = open(model[:-4]+'_VP.txt').readlines()
            indexes = [eval(ind.strip()) for ind in indexes]
        d_vpIndex_to_matIndex, d_matIndex_to_vpIndex, matrix = analyze(model_indexes)
        save_obj(d_vpIndex_to_matIndex, 'd_vpIndex_to_matIndex.pkl')
        save_obj(d_matIndex_to_vpIndex, 'd_matIndex_to_vpIndex.pkl')
        save_obj(matrix, 'matrix.pkl')
        f = open('output.txt','w')
        for row in matrix:
            row = ','.join([str(x) for x in row]) + '\n'

    # Post analysis
        d_vpIndex_to_matIndex = load_obj('d_vpIndex_to_matIndex.pkl')
        d_matIndex_to_vpIndex = load_obj('d_matIndex_to_vpIndex.pkl')
        matrix = load_obj('matrix.pkl')

    normalized_matrix = copy.copy(matrix)
    for i,row in enumerate(normalized_matrix):
        normalized_matrix[i][i] = 0
    row_total = [ 1.0*sum(row) for row in normalized_matrix ]
    normalized_matrix = [ [x/row_total[i] for x in row] for i,row in enumerate(normalized_matrix) ]

        count = 0
        to_print = [[] for row in normalized_matrix]
        for i,row in enumerate(normalized_matrix):
            for j,x in enumerate(row):
                #if(i==j): continue
                if(x > .01):
                    line = "{0}:\t{1} -> {2}".format(round(100.0*x,3), d_matIndex_to_vpIndex[i], d_matIndex_to_vpIndex[j])
                    count += x*row_total[i]/100.0
        to_print = natsort.natsorted(to_print)
        to_print = [natsort.natsorted(row) for row in to_print]
        for row in to_print:
            for x,line in row:
                print(line + '\t' + str(100.0*x))
        print("Total transformations: {0}".format(count))

    # Find shortest paths to (0, 0, 12, 0) and (0, 6, 0, 8)
    ico_index = (0, 0, 12, 0, 0, 0, 0, 0)
    bcc_index = (0, 6,  0, 8, 0, 0, 0, 0)
    graph = nx.Graph()
    for i,row in enumerate(normalized_matrix):
        for j,x in enumerate(row):
            if(i==j): continue
            if(x > 0.00):
                #graph.add_edge( d_matIndex_to_vpIndex[i], d_matIndex_to_vpIndex[j] )
                #graph[d_matIndex_to_vpIndex[i]][d_matIndex_to_vpIndex[j]]['weight'] = x
                graph.add_edge( d_matIndex_to_vpIndex[j], d_matIndex_to_vpIndex[i] )
                graph[d_matIndex_to_vpIndex[j]][d_matIndex_to_vpIndex[i]]['weight'] = 1-x
    #test = []
    bcc_dist = {}
    ico_dist = {}
    for ind in d_vpIndex_to_matIndex.keys():
            path = nx.shortest_path(graph, source=ind, target=ico_index, weight='weight')
            dist = 1.0
            for i in range(len(path)-1):
                dist = dist * (1-graph[path[i]][path[i+1]]['weight'])
            ico_dist[ind] = dist
        except nx.exception.NetworkXNoPath:
            ico_dist[ind] = 0.0
            #test.append(tuple([ dist*100,ind, len(path) ]))

            path = nx.shortest_path(graph, source=ind, target=bcc_index, weight='weight')
            dist = 1.0
            for i in range(len(path)-1):
                dist = dist * (1-graph[path[i]][path[i+1]]['weight'])
            bcc_dist[ind] = dist
        except nx.exception.NetworkXNoPath:
            bcc_dist[ind] = 0.0
    #for t in test:
    #    print(t)
    test = []
    for key in ico_dist:
        #print(key, ico_dist[key], bcc_dist[key], ico_dist[key]/bcc_dist[key], sum(matrix[d_vpIndex_to_matIndex[key]))
        test.append([key, ico_dist[key], bcc_dist[key], ico_dist[key]/bcc_dist[key], sum(matrix[d_vpIndex_to_matIndex[key]])])
    for t in test:
        t = [str(x) for x in t]
        t = '$'.join(t)
Esempio n. 12
def compute_factor(data_df, date_list, industry_list, config):
    size_factor_weight = config.size_factor_weight
    value_factor_weight = config.value_factor_weight
    quality_factor_weight = config.quality_factor_weight
    momenta_factor_weight = config.momenta_factor_weight
    rsi_factor_weight = config.rsi_factor_weight
    volatility_factor_weight = config.volatility_factor_weight
    gics2name = load_obj(config.gics2name)
    raw_factor = pd.DataFrame()
    raw_factor_path = config.raw_factor_path
    for i in industry_list:
        print('current industry is : ', gics2name[i], '\n')
        for j in tqdm(date_list):
            raw_file1 = data_df[data_df.trade_date == j]
            raw_file1 = raw_file1[raw_file1.gics_code == i]
            if raw_file1.empty:
                # size
                raw_file1["size_factor"] = compute(raw_file1['Log_mkt_Cap'],
                # Volatility(交易量-为了验证波动率)
                raw_file1["volatility_factor"] = compute(
                    raw_file1['Volatility'], nagtive=True)
                # Idiosyncratic volatility
                # raw_file1['idioVolatility_Factor'] = compute(raw_file1['idio_vol'], nagtive=True)
                # RSI 过去n:14天内多少天下降,多少天上升
                raw_file1['RSI_factor'] = compute(raw_file1['RSI'],
                # Momentum
                raw_file1["momentum_factor"] = compute(
                    raw_file1['last_1mon_pricechange'], nagtive=True)
                # Quality
                sd_roa = stand(raw_file1['Rev_Over_mktCap'], 0.05)  # 资产回报
                roa_cdf = ECDF(sd_roa)
                sd_acc = stand(raw_file1["q_opincome"], 0.05)  # accural ?现金流
                acc_cdf = ECDF(sd_acc)
                sd_nocfod = stand(raw_file1['NOCF_Over_Debt'], 0.05)
                nocfod_cdf = ECDF(sd_nocfod)
                raw_file1["quality_factor"] = 0.25 * roa_cdf(
                    sd_roa) + 0.25 * acc_cdf(sd_acc) + 0.5 * nocfod_cdf(
                # Value
                sd_cashval = stand(raw_file1['Cash_Over_MktCap'],
                                   0.05)  # 现金除以市值
                sd_sd_cashval = std_winsor(sd_cashval)
                cashval_cdf = ECDF(sd_sd_cashval)
                sd_roa = stand(raw_file1['Rev_Over_mktCap'], 0.05)  # 收益除以市值
                sd_sd_roa = std_winsor(sd_roa)
                roa_cdf = ECDF(sd_sd_roa)
                sd_bp = stand(raw_file1['pb'], 0.05)  # 市净率
                sd_sd_bp = std_winsor(sd_bp)
                bp_cdf = ECDF(sd_sd_bp)

                raw_file1["value_factor"] = 1 / 3 * cashval_cdf(sd_sd_cashval) \
                                            + 1 / 3 * roa_cdf(sd_sd_roa) \
                                            + 1 / 3 * bp_cdf(sd_sd_bp)

                raw_file1["overall_factor"] = raw_file1["size_factor"] * size_factor_weight \
                                              + raw_file1["volatility_factor"] * volatility_factor_weight \
                                              + raw_file1["RSI_factor"] * rsi_factor_weight \
                                              + raw_file1["momentum_factor"] * momenta_factor_weight \
                                              + raw_file1["quality_factor"] * quality_factor_weight \
                                              + raw_file1["value_factor"] * value_factor_weight
                if raw_factor.empty:
                    raw_factor = raw_file1
                    raw_factor = pd.concat([raw_factor, raw_file1], axis=0)
    raw_factor.to_csv(raw_factor_path, mode='w', header=True)
Esempio n. 13
                        + str(match_id) + " data: " + str(e))
            if num_try >= 11:
                if match_id in newer_match_ids:
                    return [{'matchId': -1, 'saved_file': -1}]

    return matches_data

#restore program state
while os.path.isfile('game_data_{0}.zip'.format(saved_files)):
    with zipfile.ZipFile('game_data_{0}.zip'.format(saved_files),
                         'r') as myzip:
    match = load_obj('game_data_{0}.pkl'.format(saved_files))
    pulled_matches[match['matchId']] = saved_files
    pulled_matches_count += 1

    saved_files += 1

while True:
    if len(unpulled_summoners) == 0:
        #if we run out of summoners, hopefully older summoners already played new matches
        unpulled_summoners = random.sample(pulled_summoners, 15)
        pulled_summoners = list()

    current_summoner_id = unpulled_summoners.pop(0)
