コード例 #1
0
ファイル: CFpipe.py プロジェクト: jphcoi/MLMTC


def edges_list_reverse(edges):
	distance_champ = {}
	for x,y in edges.iteritems():
		ori = x[0]
		t = x[1]
		for z in y:
			dest = z[0]
			poid = z[1]
			distance_champ[(ori,dest,t)]=poid
	return distance_champ

print 'on recupere le lexique'	
dico_termes=fonctions.lexique(termsandblogs)#on cree le dictionnaire des termes
#print dico_termes
import context_process
dist_mat = context_process.dist_mat#on recupere la matrice de distance entre termes
#p_cooccurrences=context_process.p_cooccurrences#on recupere la matrice de cooccurrences entre termes
fini=1
niveau=0
CF_weight_v = [0.5,0.2,0.5,0.5,0.5]
#CF_weight_v = [0.4,0.2,0.5,0.5,0.5]

	
seuil_net_champ_v = [0.,0.,0.,0.,0.]
taillemin_v=[3,3,3,3,3,3]
taillemax_v=[40,40,40,40,40,40]
kmin_v = [5,5,5,5,5,5]
kmin_v = [3,3,3,3,3,3]
コード例 #2
0
ファイル: tubes.py プロジェクト: jphcoi/MLMTC
def load_data(orphan_number):
	champs=['id_cluster_1','periode_1','id_cluster_1_univ','id_cluster_2','periode_2','id_cluster_2_univ','strength']
	res_maps = fonctions_bdd.select_bdd_table_champ_complet(name_bdd,'maps',','.join(champs))
	champs=['id_cluster_1','periode_1','id_cluster_1_univ','id_cluster_2','periode_2','id_cluster_2_univ','strength']
	res_phylo = fonctions_bdd.select_bdd_table_champ_complet(name_bdd,'phylo',','.join(champs))
	champs=['id_cluster','periode','id_cluster_univ','label_1','label_2','level','concept','nb_fathers','nb_sons','label']
	res_cluster = fonctions_bdd.select_bdd_table_champ_complet(name_bdd,'cluster',','.join(champs))
	champs=['jours','concepts_id']
	occurrences_concepts = fonctions_bdd.select_bdd_table_champ_complet(name_bdd,'billets',','.join(champs))
	champs=['concept1','concept2','periode','distance0','distance1']
	reseau_termes = fonctions_bdd.select_bdd_table_champ_complet(name_bdd,'sem_weighted',','.join(champs))
	dico_termes=fonctions.lexique()#on cree le dictionnaire des termes

	#on les restructure pour plus de confort d'utilisation.
	clusters={}#on crée un dico de dico.
	years_bins_first = []
	res_termes={}#on crée un dico de dico dans lequel seront indiqués les distances
	for years in years_bins:
		years_bins_first.append(years[0])
	for lien_terme in reseau_termes:
		[concept1,concept2,periode,distance0,distance1] = lien_terme
		if distance0>0:
			res_termes_inter = res_termes.get(periode,{})
			dict_id1 = res_termes_inter.get(concept1,{})
			dict_id1[concept2] = distance0
			res_termes_inter[concept1]=dict_id1
			res_termes[periode] = res_termes_inter
			
		if distance1>0:
			res_termes_inter = res_termes.get(periode,{})
			dict_id2 = res_termes_inter.get(concept2,{})		
			dict_id2[concept1] = distance1#attention on rapporte les distances en double	
			res_termes_inter[concept2]=dict_id2		
			res_termes[periode] = res_termes_inter
		
	for cluster_terme in res_cluster:
		[id_cluster,periode,id_cluster_univ,label_1,label_2,level,concept,nb_fathers,nb_sons,label] = cluster_terme
		periode = years_bins_first.index(int(str(periode).split(' ')[0]))
		if nb_fathers+nb_sons >= orphan_number:
			if id_cluster_univ in clusters:
				dict_id = clusters[id_cluster_univ]
				temp_concept = dict_id['concepts']
				temp_concept.append(concept)
				dict_id['concepts'] = temp_concept
				clusters[id_cluster_univ] = dict_id
			else:
				dict_id={}
				#dict_id['id_cluster']=id_cluster
				dict_id['periode']=periode
				dict_id['label']=[label_1,label_2]
				dict_id['nb_fathers']=nb_fathers
				dict_id['nb_sons']=nb_sons
				dict_id['concepts'] = [concept]
				dict_id['label'] = label
				clusters[id_cluster_univ] = dict_id
	#clusters[id_cluster_univ]['id_cluster'/'periode'/'label'/'nb_sons'/'nb_fathers'/'concepts']
	add_link(clusters,res_phylo,'dia')
	add_link(clusters,res_maps,'syn')
	for index in clusters.keys():
		if not 'syn' in  clusters[index]:
			clusters[index]['syn']={}
	#on construit la matrice temporelle d'occurrence des termes.
	occs = {}
	for occ in occurrences_concepts:	
		year = occ[0]
		if len(occ[1])>2:
			concept_list = list(map(int,occ[1][1:-1].split(', ')))
		else:
			concept_list=[]
		#print concept_list
		for conc in concept_list:
			occs_conc=occs.get(conc,{})
			occs_conc[year] = 1 + occs_conc.get(year,0)
			occs[conc]=occs_conc
	#on récupère dist_mat réseau des distances entre termes.
	name_date = str(years_bins[0][0]) + '_' + str(years_bins[0][-1]) + '_'+ str(years_bins[1][0])+ '_'+str(years_bins[-1][-1])
	#version longue et exacte
	#dist_mat = fonctions.dumpingout('dist_mat'+name_date)
	#version rapide et approchée:
	dist_mat = fonctions.dumpingout('dist_mat_10'+name_date)
	return dico_termes,clusters,dist_mat,res_termes