/
sourcecode.py
509 lines (390 loc) · 21.9 KB
/
sourcecode.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
#file converter form csv to any of the follwoing formats
# can convert to the follwing types gexf,gml,pickle,graphML,YAML,LEDA,SparseGraph6,Palek,GisShapefile,
#!/usr/bin/python
import csv
import xlsx2csv as X
import networkx as nx
import sys
import os.path
import pickle
#function1-----start!
def add_attributes_to_nodes(G,rj,nodes_coloumn,att_name,att_values):
#this function adds attribute_name atrribute of each node to that node.The attribute has the value atrribute_value
a=att_name
vars()[a]=att_name
G.add_node(rj[nodes_coloumn],attr_dict={att_name:att_values})
#adding the node to the graph with attribute
#funciton1 -----end!
#function2-----start!
def add_attributes_to_edges(G,rj,nodes_coloumn,edges_coloumn,att_name,attr_values):
#this fucntion adds edge atrribute at a time
a=att_name
vars()[a]=att_name
splited_edges_coloumn=rj[edges_coloumn].split(",")
#split the edge column w.r.t "," so that we can add each edge again with the required attibutes
splited_edges_atttributes=attr_values.split(",")
#read the above atrributes
for i in range(0,len(splited_edges_coloumn)):
#this loop the edges are added with there corresonding atrribute in the tuple------
G.add_edge(rj[nodes_coloumn],splited_edges_coloumn[i],attr_dict={att_name:splited_edges_atttributes[i]})
#adding the node to the graph with attribute
#function2-----end!
#start1----------file format INPUT--BLOCK------#
print("\n\n WELCOME ")
print("\nThe following File Formats are accpeted here as input and output \nINPUT FILE TYPES:csv , xlsx , gexf , gml , pickle , graphML,YAML , LEDA , SparseGraph6 , Pajek , GISShapefile\nOUTPUT FILE TYPES: csv , gexf , gml , pickle , graphML , YAML , SparseGraph6 , Pajek , GISShapefile\n\nNOTE:-case sensitive.\nyou can only get [csv] as output if you enter [xlsx] file as input \n")
accepted_input_type_set= ['gexf','gml','pickle','graphML','YAML','LEDA','SparseGraph6','Pajek','GISShapefile','csv','xlsx']
#defining a set the set has all the file formats the programs accepts
program_output_file_type= ['csv','gexf','gml','pickle','graphML','YAML','SparseGraph6','Pajek','GISShapefile']
#defining a set the set has all the formats the program can output
input_file_in_set_bool_value= False
#the variable is used bccause to check the file foemats the programs accepts in input formats set is the file format is present the reteurn value is True .initialised to false because to enter the while loop
output_file_in_set_bool_value=False
#same as the abouve reason
while(input_file_in_set_bool_value!=True): #run the loop until we get correct input -----------------------------------csn use try and catch
input_file_type = raw_input("Enter the input file format type: \n")
#asking the user to enter the input file format and putting in the varaible
input_file_in_set_bool_value= input_file_type in accepted_input_type_set
#if the input file format given by the user is in the accpeted format set.The value of the vari able is changetd to true and the while loop stopos
if(input_file_in_set_bool_value==False):
print("The file format is not accpetable or you entered a inccorect format please enter a file format again\n")
#if the file format is nor accpetable the ask the user to enter again
while(output_file_in_set_bool_value!=True): #run the loop until we get the correct input
output_file_type = raw_input("Enter the output file format:\n")
#--------------------------------can use try and catch
#taking the output file extension the user want to change
output_file_in_set_bool_value=output_file_type in program_output_file_type
#if the input file format given by the user is in the program output format set.The value of the varible is changetd to true and the while loop stopos
if(output_file_in_set_bool_value==False):
print("The file format is not accpetable or you entered a inccorect format please enter a file format again\n")
#if the file format is nor accpetable the ask the user to enter again
#end1-----------file format Input block ------#
#start2-----------input output file paths block-----#
while(True):
#while(True): #run the loop until we get correct file path
file_path=raw_input("Enter the input file path:\n")
splitter=file_path.split(".")
#take the inpu t file path in this variable
if(os.path.isfile(file_path)==True and splitter[1]==input_file_type):#os.path.isfile checks the files existence
print("\nFile path exixts\nAnd type is compactible....continue\n") #checks the file path existtence in the directrey .if the path exists then exit the loop with a message or else print the error message and execuate the while loop again
break
else :
print("\nFile doesnt exits or not compactable with input file type please try again \n")
output_file_path=raw_input("Now enter the output file path:\n")
#end2-------------input output file paths block-----#
#start3------------xlsx to csv first -----#
if(input_file_type=='xlsx' and output_file_type != 'csv'):
#if the nput file is xlsx then we convert that fiel i nto a csv file and then we follow the as usal csv converter for tht
converted_csv_file_path=raw_input("\n Now .....Enter the folder path to store [csv] file \nNOTE:-It should be a new folder\n")
#asking the user to enter the file path so that we can convert thta xlsx file to that one and this variable is used in creatting a grph block
X.Xlsx2csv(file_path).convert(converted_csv_file_path,sheetid=0)
converted_csv_file_path=converted_csv_file_path+"/Sheet1.csv"
#the file is strored in the folder so the converted file is in that folder withthe name Sheet1 in that folder so the converted csv file path is now changed to the Sheet1 file path in that folder
#converting the file xlsx - > csv
#end3--------------xlsx to csv............#
#start4 ----------creating a graph G-------#
if (input_file_type=='gexf'):
while True:
try:
G = nx.read_gexf(file_path)
#if the file format is in ---gexf--- read the graph and put that in G variable which is later used to write graph
break
except IOError:
print("Error while READING the file ")
elif(input_file_type=='gml'):
while True:
try:
G = nx.read_gml(file_path)
#if the file format isin ---gml---- read the graph and put that in G variable which is later used to write graph
break
except IOError:
print("Error while READING the file ")
elif(input_file_type=='pickle'):
while True:
try:
G=nx.read_gpickle(file_path)
break
#if the file format isin --Pickle--- read the graph and put that in G variable which is later used to write graph
except IOError:
print("Error while READING the file ")
elif(input_file_type=='graphML'):
while True:
try:
G = nx.read_graphml(file_path)
break
#if the file format isin ---GraphML---- read the graph and put that in G variablich is later used to write graph
except IOError:
print("Error while READING the file ")
elif(input_file_type=='LEDA'):
while True:
try:
G = nx.read_leda(file_path)
break
#if the file format isin ---LEDA---- read the graph and put that in G variable which is later used to write graph
except IOError:
print("Error while READING the file ")
elif(input_file_type=='YAML'):
while True:
try:
G = nx.read_yaml(file_path)
break
#if the file format isin ---YAML--- read the graph and put that in G variable which is later used to write graph
except IOError:
print("Error while READING the file ")
elif(input_file_type=='Pajek'):
while True:
try:
G = nx.read_pajek(file_path)
break
#if the file format isin ---Pajek---- read the graph and put that in G variable which is later used to write graph
except IOError:
print("Error while READING the file ")
elif(input_file_type=='SparseGraph6'):
a=input("enter 1.for Sparse6 \n2.for graph6 format")
if(a==1):
while True:
try:
G = nx.read_sparse6(file_path)
break
except IOError:
print("Error while READING the file ")
else :
while True:
try:
G=nx.read_graph6(file_path)
break
#if the file format isin ---SparseGraph6---- read the graph and put that in G variable which is later used to write graph
except IOError:
print("Error while READING the file ")
elif(input_file_type=='GISShapefile'):
while True:
try:
G = nx.read_shp(file_path)
break
#if the file format isin ---Gisshapefile--- read the graph and put that in G variable which is later used to write graph
except IOError:
print("Error while READING the file ")
elif((input_file_type=='csv' or input_file_type=='xlsx') and output_file_type!='csv'):
if(input_file_type=='xlsx'):
#if the file is xlsx then we have to give the converted xlsxfile (csv file ) file path
file_path=converted_csv_file_path;
#changing the filepath to converted file path
var_to_know_the_input_graph_format_is_directed_or_undirected_Graph=raw_input("please enter the graph type \n0.Directed \n1.Undirected graph\n")
#konwing whcih type of graph the user is giving
if(var_to_know_the_input_graph_format_is_directed_or_undirected_Graph=="0"):
G=nx.DiGraph()
#if the user wants a directed graph then create a directed graph G
else:
G=nx.Graph()
#if the user wa,''nts a undirected graph then create a undirected graph G
with open(file_path,'rU') as csvfile:
#reader = csv.DictReader(open(csvfile, 'rU'), dialect=csv.excel_tab)
reader = csv.DictReader(csvfile)
#reading the csv file in dictreader .with this dictreader we can directly access the column in the csv file
print("\nHere are the column names with indices")
for z in range(0,len(reader.fieldnames)):
print(str(z)+" "+reader.fieldnames[z])
while True:
try:
while True:
try:
key_word_error_catcher=0
n_c=raw_input("Enter the nodes column indices in csv file \n")
nodes_coloumn=reader.fieldnames[int(n_c)]
break
except IndexError:
print("Error :index error \nsee the index and enter again\n")
#asking the user ti input the node column name in the csv file .If the input column is in the csv file then exit the loop other wise execute again with a error mssg
if nodes_coloumn in reader.fieldnames:
break
else:
print("no such column enter again...")
except ValueError:
print("oops no such column in csv file ..please enter a correct column\n")
while True:
try:
while True:
try:
key_word_error_catcher=0
e_c=raw_input("Enter the edgescolumn in csv file types\n")
edges_coloumn=reader.fieldnames[int(e_c)]
#asking the user ti input the edge column name in the csv file .If the input column is in the csv file then exit the loop other wise execute again with a error mssg
break
except IndexError:
print("Error :index error \nsee the index and enter again\n")
if edges_coloumn in reader.fieldnames:
break
else:
print("no such column enter again...")
except ValueError:
print("oops no such column in csv file ..please enter a correct column\n")
while True:
try:
while True:
try:
key_word_error_catcher=0
a_c=raw_input("Enter the weight/attribute column in csv file \n")
attr_coloumn=reader.fieldnames[int(a_c)]
#asking the user ti input the attribute column name in the csv file .If the input column is in the csv file then exit the loop other wise execute again with a error mssg
break
except IndexError:
print("Error :index error \nsee the index and enter again\n")
if attr_coloumn in reader.fieldnames:
break
else:
print("no such column enter again...")
except ValueError:
print("oops no such column in csv file ..please enter a correct column\n")
more_atrributes=raw_input("Do your data has more attributes to be added to the nodes or edges [y/n] \n")
if(more_atrributes=='y'):
nd_atrributes=raw_input("Do you want to add node atrributes [y/n]\n")
if(nd_atrributes=='y'):
string_contaning_all_the_input_indices_of_node_atrributes=raw_input("Enter all the indices of NODE ATTRIBUTES in a line with commas between them and hit ENTER\n")
splited_string_contaning_all_the_input_indices_of_node_atrributes=string_contaning_all_the_input_indices_of_node_atrributes.split(",")
ed_atrributes=raw_input("Do you want to add any edge atrributes [y/n]\n")
if(ed_atrributes=='y'):
string_contaning_all_the_input_indices_of_edges_atrributes=raw_input("Enter all the indices of EDGES ATTRIBUTES in a line with commmas between them and hit ENTER\n")
splited_string_contaning_all_the_input_indices_of_edges_atrributes=string_contaning_all_the_input_indices_of_edges_atrributes.split(",")
for rj in reader:
#this like is a loop each time it ittiraes the rj vaiable goes to next row in the csv file ,we can access the corresponding column by directly refering like a array .if we gave rj[nodes_column] we can directly get the access to the element in a particular row
if(len(rj[nodes_coloumn])>0):
#checks if thereis a node in that row.if there is a node in thet row then that node is added other wise ist is neglected
if(more_atrributes=='y' and nd_atrributes=='y' and ed_atrributes=='y'):
for it in range(0,len(splited_string_contaning_all_the_input_indices_of_node_atrributes)):
#in this loop we add all the given node atrributes to a node ittiratively going through the row
r=int(splited_string_contaning_all_the_input_indices_of_node_atrributes[it])
#the arribute index are stored as a string so now converting it into a int
add_attributes_to_nodes(G,rj,nodes_coloumn,reader.fieldnames[r],rj[reader.fieldnames[r]])
#calling the function .This function will add node atrributes to the node.
string_1 = rj[edges_coloumn]
splited_string_1=string_1.split(',')#spliting string w.r.t ","bcoz in some cases there can be more than one edge
string_2 = rj[attr_coloumn]
splited_string_2=string_2.split(',')#spliting string w.r.t ","
#bcoz in some cases there can be more than one edge
for i in range(0,len(splited_string_1)):
G.add_edge(rj[nodes_coloumn],splited_string_1[i],weight=splited_string_2[i])
#adding edges to the graph with weight atrributes
for it in range(0,len(splited_string_contaning_all_the_input_indices_of_edges_atrributes)):
#in this loop we add all the given node atrributes to a node ittiratively going through the row
r=int(splited_string_contaning_all_the_input_indices_of_edges_atrributes[it])
#the arribute index are stored as a string so now converting it into a int
add_attributes_to_edges(G,rj,nodes_coloumn,edges_coloumn,reader.fieldnames[r],rj[reader.fieldnames[r]])
#calling the function .This function will add node atrributes to the node.
elif(more_atrributes=='y' and nd_atrributes=='y' and ed_atrributes!='y'):
for it in range(0,len(splited_string_contaning_all_the_input_indices_of_node_atrributes)):
#in this loop we add all the given node atrributes to a node ittiratively going through the row
r=int(splited_string_contaning_all_the_input_indices_of_node_atrributes[it])
#the arribute index are stored as a string so now converting it into a int
add_attributes_to_nodes(G,rj,nodes_coloumn,reader.fieldnames[r],rj[reader.fieldnames[r]])
#calling the function .This function will add node atrributes to the node.
string_1 = rj[edges_coloumn]
splited_string_1=string_1.split(',')#spliting string w.r.t ","bcoz in some cases there can be more than one edge
string_2=rj[attr_coloumn]
splited_string_2=string_2.split(',')
for i in range(0,len(splited_string_1)):
G.add_edge(rj[nodes_coloumn],splited_string_1[i],weight=splited_string_2[i])
#adding edges to the graph with weight atrributes
elif(more_atrributes=='y' and nd_atrributes!='y' and ed_atrributes=='y'):
G.add_node(rj[nodes_coloumn])
string_1 = rj[edges_coloumn]
splited_string_1=string_1.split(',')#spliting string w.r.t ","bcoz in some cases there can be more than one edge
string_2 = rj[attr_coloumn]
splited_string_2=string_2.split(',')#spliting string w.r.t ","
#bcoz in some cases there can be more than one edge
for i in range(0,len(splited_string_1)):
G.add_edge(rj[nodes_coloumn],splited_string_1[i],weight=splited_string_2[i])
#adding edges to the graph with weight atrributes
for it in range(0,len(splited_string_contaning_all_the_input_indices_of_edges_atrributes)):
#in this loop we add all the given node atrributes to a node ittiratively going through the row
r=int(splited_string_contaning_all_the_input_indices_of_edges_atrributes[it])
#the arribute index are stored as a string so now converting it into a int
add_attributes_to_edges(G,rj,nodes_coloumn,edges_coloumn,reader.fieldnames[r],rj[reader.fieldnames[r]])
#calling the function .This function will add node atrributes to the node.
else:
G.add_node(rj[nodes_coloumn])
string_1 = rj[edges_coloumn]
splited_string_1=string_1.split(',')#spliting string w.r.t ","bcoz in some cases there can be more than one edge
string_2=rj[attr_coloumn]
splited_string_2=string_2.split(',')
for i in range(0,len(splited_string_1)):
G.add_edge(rj[nodes_coloumn],splited_string_1[i],weight=splited_string_2[i])
#adding edges to the graph with weight atrributes
#if(input_file_type=='xlsx'):#changed-------------------------------------------------------------------------------------------------------------------------------
# os.remove(file_path)
elif(input_file_type=='xlsx' and output_file_type =='csv'):
X.Xlsx2csv(file_path).convert(output_file_path,sheetid=0)
#end4 ----------creating a graph G---------#
#start5---------writing graphs in the required formats------------#ile
while True:
if (output_file_type=='gexf'):
try:
G = nx.write_gexf(G,output_file_path)
#if the file format is in ---gexf--- write graph G
break
except IOError:
print("The out put type:"+output_file_type+"please select another output file path\n")
elif(output_file_type=='gml'):
try:
G = nx.write_gml(G,output_file_path)
#if the file format isin ---gml---- write graph G
break
except IOError:
print("The out put type:"+output_file_type+"please select another output file path\n")
elif(output_file_type=='pickle'):
try:
G=nx.write_gpickle(G,output_file_path)
#if the file format isin --Pickle--- write graph G
break
except IOError:
print("The out put type:"+output_file_type+"please select another output file path\n")
elif(output_file_type=='graphML'):
try:
G = nx.write_graphml(G,output_file_path)
#if the file format isin ---GraphML---- write graph Gkl
break
except IOError:
print("The out put type:"+output_file_type+"please select another output file path\n")
elif(output_file_type=='YAML'):
try:
G= nx.write_yaml(G,output_file_path)
#if te file format isin ---YAML--- write graph G
break
except IOError:
print("The out put type:"+output_file_type+"please select another output file path\n")
elif(output_file_type=='Pajek'):
try:
G = nx.write_pajek(G,output_file_path)
#if the file format isin ---Pajek---- write graph G
break
except IOError:
print("The out put type:"+output_file_type+"please select another output file path\n")
elif(output_file_type=='SparseGraph6'):
a=input("enter 1.for Sparse6 \n2.for graph6 format\n")
if(a==1):
try:
G = nx.write_sparse6(G,output_file_path)
#if the file format isin ---sparse6--- write graph G
break
except IOError:
print("The out put type:"+output_file_type+"please select another output file path\n")
else :
try:
G=nx.write_graph6(G,output_file_path)
#if the file format isin ---graph6---- write graph G
break
except IOError:
print("The out put type:"+output_file_type+"please select another output file path\n")
#if the file format isin ---SparseGraph6---- write graph G
elif(output_file_type=='GISShapefile'):
try:
G = nx.write_shp(G,output_file_path)
#if the file format isin ---Gisshapefile--- write graph G+
break
except IOError:
print("The out put type:"+output_file_type+"please select another output file path\n")
output_file_type=raw_input("Now enter file type again other than-> " +output_file_type+" \nE to exit")
if(output_file_type=='E\n'):
break
output_file_path=raw_input("Now enter the output file path again\n")
#end5---------writing graphs in the required formats------------#
print("\nThe Graph is converted successfulliy from ["+input_file_type+"] to ["+output_file_type+"]\n Check the location "+output_file_path+" for the required output file ")
print("\n THANK YOU\n\n\n")