/
create_tables_fragmdb.py
618 lines (554 loc) · 23.1 KB
/
create_tables_fragmdb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
#This code creates tables for database
#Python 2.7
import os
import sys
import time
import collections
import fileinput
import base
import nrpdb
import Graph
def organizeFiles(models_dir, mults_dir) :
#Decompose the BasePairs file to the set of files containing pairs by model
#e.g. 1.txt contains pairs of 1 model
#in : name of future directory for files
#read BasePairs file
base = open("BasePairs.txt", 'r')
#create a directory for models if it doesn't exist
if not os.path.exists(models_dir):
os.makedirs(models_dir)
if not os.path.exists(mults_dir):
os.makedirs(mults_dir)
#work in models directory
os.chdir(models_dir)
#list of files in the directory
filelist = [ f for f in os.listdir('.')]
#delete them
for f in filelist:
os.remove(f)
#start to fill the directory with models files
cur_model = '1'
#create a file for the first model
out = open( str(cur_model) + ".txt", 'w')
#walk througth the pairs
for line in base:
model = line.split('\t')[1]
#write pairs of the same model
if model!=cur_model:
out.close()
cur_model = model
out = open( cur_model + ".txt", 'w')
out.write(line)
#return to the home directory
os.chdir("..")
#close all the files
out.close()
base.close()
return
def nuclStems( ModelPairs,M ):
#Create dictionary {nucleotide: stems} for every model
#in : pairs data of the model
#out : dictionary {nucleotide: stems}
dNuclStems = {}
for line in ModelPairs :
line_list = line.split('\t')
stems = []
fstem = line_list[14]
rstem = line_list[15]
nucl1 = line_list[5]
nucl2 = line_list[7]
#add different types of stems into stem list
if fstem == '\\N' and rstem == '\\N' :
continue
if fstem != '\\N' :
stems.append(int(fstem))
if rstem != '\\N' :
stems.append(M+int(rstem))
#list(set(...)) to delete similar elements
#build dictionary {nucleotide: stems}
if nucl1 in dNuclStems:
dNuclStems[nucl1] += stems
else:
dNuclStems[nucl1] = stems
if(nucl2 in dNuclStems):
dNuclStems[nucl2]=list(set(dNuclStems[nucl2]+ stems))
else:
dNuclStems[nucl2] = stems
return dNuclStems
def stemLinks( ModelPairs,dStemLinks, dNuclStems):
#Create dictionary {stem: links} for every model
#in : pairs data of the model
#out : dictionary {stem: links} , stemGraph - adjacency list
stemGraph = {}
for line in ModelPairs :
line_list = line.split('\t')
stems = []
fstem = line_list[14]
rstem = line_list[15]
link = line_list[17]
#if pair doesnt belong to any stem
if fstem == '\\N' and rstem == '\\N':
nucl1 = line_list[5]
nucl2 = line_list[7]
#but if these nucleotides are paired with other nucleotides from stems
if(nucl1 in dNuclStems and nucl2 in dNuclStems) :
#merge their stems' connections
dNuclStems[nucl1] = list(set(dNuclStems[nucl1]+ dNuclStems[nucl2]))
dNuclStems[nucl2] = dNuclStems[nucl1][:]
#build dictionary {stem: links}
for st in dNuclStems[nucl1]:
if st in dStemLinks:
dStemLinks[st] += [link]
else:
dStemLinks[st] = [link]
#delete the same elements
#dStemLinks[st] = list(set(dStemLinks[st]))
#build adjacency list
for nucl in dNuclStems:
#get all combinationsof nStem-1 length
adj = []
for st in dNuclStems[nucl]:
vertex_stem = st
adj = list(set(dNuclStems[nucl]) - set([vertex_stem]))
if len(adj) > 0:
if(vertex_stem in stemGraph):
stemGraph[ vertex_stem]=list(set(adj+stemGraph[ vertex_stem]))
else:
stemGraph[ vertex_stem] = adj
return stemGraph
def dfs (v,G,marked):
#Depth-first search
#in: vertex to start traverse
if v in marked: # if v has been already visited
return
marked.add(v) # else mark v as visited now
for i in G[v]: # check all adjacent vertices
if not i in marked: # if i hasn't been already visited
dfs(i,G,marked) # visit it by run dfs
def componentsFind(G, marked):
#Find all connected components of the graph stemGraph
#out : lComponents - list of connected components ( list of lists )
lComponents = []
component = set() #current component
for i in G : # for every vertex in graph
if not i in marked:# check whether it is marked
marked_prev = set(marked)# save the list of marked vertices before dfs
dfs(i,G,marked)
component = marked-marked_prev# compare with the list after dfs
#and get component
if(len(component)>1):#if it contains more than one vertex
lComponents.append(list(component))#add it to the common list
return lComponents
def writeMults(f,M,dStemLinks, lComponents, ModelPairs, MultsData,FragData, WingsData,EdgeData,ModelNucls, mults_dir, stop_line,statistics):
#Write obtained multiplets in files, form fragments and edges
#in : model file f , lComponents - list of components, basepairs of the model, multiplets directory, stop line, statistics,
# multiplets table MultsData,fragments table FragData, wings table WingsData, edges table EdgeData, nucls of the model table
#out : num of fragments, num of nucleotides involved in more that 1 fragment(errors in BasePairs.txt file)
# num of multiplets with nucleotides with a letter in the number
path = mults_dir + '/' + f
op = open(path, 'w+')
nFragTotal = 0
counter = 0
counterCharNum = 0
for comp in lComponents: #for every multiplet in the model
multid = str(len(MultsData)+1)
modelid = f.strip('.txt')
fstem = []
rstem = []
links = []
for st in comp:
if st<M:
fstem.append(str(st))
else:
rstem.append(str(st-M))
if st in dStemLinks:
links+=dStemLinks[st]
links = list(set(links))
op.write(' '.join([str(st) for st in fstem])+'\n')
op.write(' '.join([str(st) for st in rstem])+'\n')
op.write(' '.join([str(l) for l in links])+'\n')
MultPairs = [] #BasePairs of the multiplet
indexes = []
for line in ModelPairs:
line_list = line.split('\t')
ln_fstem = line_list[14]
ln_rstem = line_list[15]
ln_link = line_list[17]
#construct a list of BaseRairs for the multiplet
if ln_fstem in fstem or ln_rstem in rstem or ln_link in links:
MultPairs.append(line)
indexes.append(ModelPairs.index(line))
#get fragments of the multiplet
dStemNucls, Pairs, flagIFcharNum = markWings(MultPairs,M) #get dStemNucls{stem: [left][rigth wing]} vocabulary
#get fragments
lFragments = mergeWings(getWings(dStemNucls,len(WingsData)),Pairs,len(FragData), WingsData,ModelNucls,modelid,multid,M)
#statistics counting
if(flagIFcharNum):
counterCharNum +=1
nFrag = len(lFragments) #num of fragments
if nFrag not in statistics:
statistics[nFrag] = 1
else:
statistics[nFrag] += 1
nFragTotal+= nFrag
#write fragments and form fragments table
adjList = {} # vocabulary {fragid: [adjacency list]}
for fragment in lFragments: #for every frgament
fragid = fragment['ID']
adjList[fragid] = []
op.write(' '.join([nucl for nucl in fragment['SEQ']])+'\n')
fragstr = base.aFragment(fragid, f.strip('.txt'), multid, fragment['SEQ'], fragment['CON'], fragment['NWINGS'], fragment['INDNUCLS'], fragment['BPSNUM'],ModelNucls[fragment['SEQ'][0]],ModelNucls[fragment['SEQ'][-1]])
#add to the frag table
FragData.append(fragstr)
#get edges
invariant,nEdges,counterMult = getEdges(EdgeData, modelid, multid,MultPairs, lFragments,adjList)
counter+=counterMult
#write BasePairs
for line in MultPairs:
op.write(line)
op.write(stop_line)
#form Multiplets table
for i in xrange(0, len(indexes)):
ModelPairs[indexes[i]] = MultPairs[i]
multstr= base.aMultiplet(multid,modelid,nFrag,nEdges,links,fstem,rstem, MultPairs, invariant)
MultsData.append(multstr)
op.close()
return nFragTotal,counter, counterCharNum
def getEdges(EdgeData, modelid, multid,MultPairs, lFragments,adjList):
#get edges of the multiplet (form edges table) and invariant of the multiplet
# in : Edges table, id of the model, id of the multiplet, basepairs of the multiplet, list of fragments,
# empty vocabulary {fragid: [adjacency list]} with only keys - ids of all the fragments for the multiplet
# out: invariant of the multiplet grapth ( nodes are fragments, edges are bonds between (nulceotides related to) them),
# num of edges of the multiplet, num of fragments, num of nucleotides involved in more that 1 fragment(errors in BasePairs.txt file)
counter = 0
edgeid = len(EdgeData)+1
dEdges = {} #vocabulary {(frag1id, frag2id): [bond type]}
for i in xrange(0,len(MultPairs)):
line_list = MultPairs[i].split('\t')
nucl1 = line_list[5]
nucl2 = line_list[7]
bond = line_list[8]
if len([f['ID'] for f in lFragments if nucl1 in f['SEQ']])>1:
counter +=1
if len([f['ID'] for f in lFragments if nucl2 in f['SEQ']])>1:
counter +=1
fragid1 = [f['ID'] for f in lFragments if nucl1 in f['SEQ']][0]
fragid2 = [f['ID'] for f in lFragments if nucl2 in f['SEQ']][0]
#suppose the fragment with a bigger id to be on the first place in the edge
edge =(fragid1,fragid2) if fragid1>fragid2 else (fragid2,fragid1)
if edge not in dEdges:
dEdges[edge] = [bond]
else:
dEdges[edge].append(bond)
#table of multiplet pairs replenish
MultPairs[i] +='\t'+'\t'.join([str(el) for el in [multid, edgeid,fragid1, fragid2]])
#form edges table
for e in dEdges:
edge_str = base.aEdge(edgeid, modelid, multid, e[0], e[1], dEdges[e])
EdgeData.append(edge_str)
edgeid +=1
#create adjacency list
adjList = getAdjList(adjList, dEdges.keys())
return Graph.invariant(adjList),len(dEdges),counter
def getAdjList(dAdj, edges):
#create adjacency list, filling in empty vocabulary {fragid: [edges]} with only fragments' ids (nodes labels)
for e in edges:
if e[1] not in dAdj[e[0]]:
dAdj[e[0]].append(e[1])
if e[0] not in dAdj[e[1]]:
dAdj[e[1]].append(e[0])
return dAdj
def markWings(MultPairs,M):
#build dictionary dStemNucls containing wings : {stem : [[left wing][rigth wing]]} and pairs of nucleotides of ledt-rigth wings
#in : Base pairs of a multiplet
#out : dictionarydStemNucls, list of pairs (nucl1,nucl2), related to left and rigth wings accordingly
dStemNucls={}
Pairs = []
flagIFcharNum = False
for line in MultPairs:
line_list = line.split('\t') #split the pair
fstem = line_list[14]
rstem = line_list[15]
nucl1 = line_list[5]
nucl2 = line_list[7]
flagIFcharNum = True if (len(nucl1.split('.')[3]) or len(nucl2.split('.')[3])) else False
#construct dictionary dStemNucls = {stem : [[left wing][rigth wing]]}
#fullstems:
if fstem!= '\\N' :
fstem = int(fstem)
if(fstem not in dStemNucls):
dStemNucls[fstem]=[[nucl1],[nucl2]]
else:
dStemNucls[fstem][0].append(nucl1)
dStemNucls[fstem][1] = [nucl2] + dStemNucls[fstem][1]
#revstems:
if rstem!= '\\N' :
rstem = M + int(rstem)
if(rstem not in dStemNucls):
dStemNucls[rstem]=[[nucl1],[nucl2]]
else:
dStemNucls[rstem][0].append(nucl1)
dStemNucls[rstem][1].append(nucl2)
Pairs.append((nucl1,nucl2))
return dStemNucls, Pairs, flagIFcharNum
def overlap(a, b):
#get sequence ab from sequences a and b without their intersection
#in : list of strings a and b
#out : maximal amount of common symbols (maxn) and merged line
maxn = -1
for n in xrange(1, 1 + min(len(a), len(b))):
suffix = a[-n:]
prefix = b[:n]
if prefix == suffix:
maxn = n
return maxn,a + b[maxn:]
def contains(a, b):
#return bigger list (a or b) that contains another
#in : a and b lists
#out : bigger list if it contains another, one of them if they are equal and -1 otherwise
sa = str(a).strip('[').strip(']')
sb = str(b).strip('[').strip(']')
if(sa==sb): return a
if len(sa)>len(sb):
if sa.find(sb)>=0 :
return a
else:
if sb.find(sa)>=0 :
return b
return -1
def overlap2side(seq1,seq2):
#get minimal sequence (ab or ba) from sequences a and b without their intersection
#in : list of strings seq1 and seq2
#out : minimal sequence from subsequences or -1 if there are no intersection
result = contains(seq1,seq2)
if result!=-1:
return result
nx, rx = overlap(seq1, seq2)
ny, ry = overlap(seq2,seq1)
if nx==-1 and ny==-1:
result = -1 # no intersection
else:
result = rx if nx > ny else ry
return result
def serial(a,b):
#chech if b nucleotide follows nucleotide a
#in : list of strings (nucleotides) a and b
#out : True / False
nucl2 = b[0].split('.')
nucl1 = a[-1].split('.')
if nucl1[0] == nucl2[0]:
# A.U.270.; A.G.271.
if int(nucl2[2])-1==int(nucl1[2]) and not len(nucl1[3]) and not len(nucl2[3]):
return True
# A.U.270.Z; A.G.271.
if int(nucl2[2])-1==int(nucl1[2]) and len(nucl1[3]) and not len(nucl2[3]) and nucl1[3]=='Z':
return True
if int(nucl2[2])==int(nucl1[2]) and len(nucl2[3]):
#A.A.270.A, A.A.270.B
if len(nucl1[3]):
if (ord(nucl2[3])-ord(nucl1[3]))==1:
return True
else:
if(nucl2[3]=='A'):
return True
return False
def serial2side(seq1,seq2, ):
#chech if seq1 follows seq2 or otherwise
#in : list of strings (nucleotides) seq2 and seq2
#out : whole sequence or -1, if sequences are not consecutive
#check both sides
if serial(seq1,seq2):
return [seq1+seq2, (seq1[-1], seq2[0])]
else:
if serial(seq2,seq1):
return [seq2+seq1, (seq2[-1], seq1[0])] #merge
else: #are not consecutive
return [-1, (0, 0)]
def getWings(dStemNucls,nWingsData):
#return merged wings (fragments)
#in : dictionary of wings dStemNucls
#out : list of Wings, WingsData
lWings = []
wingid = nWingsData+1
for st in dStemNucls:
# wingstr1 = base.aWing(wingid, modelid, multid, "", 0, st, dStemNucls[st], M)
# wingstr2 = base.aWing(wingid+1, modelid, multid, "", 1, st, dStemNucls[st], M)
# WingsData.append(wingstr1)
# WingsData.append(wingstr2)
lWings.append({'ID' : wingid, 'SEQ' : dStemNucls[st][0],'FRAGS': 0, 'TYPE': 0, 'ST' : st})
lWings.append({'ID' : wingid+1, 'SEQ' : dStemNucls[st][1], 'FRAGS' : 0, 'TYPE': 1, 'ST' : st })
wingid+=2
return lWings
def sortByFirstNucl(wing):
return int(wing['SEQ'][0].split('.')[2])
def mergeWings(lWings,Pairs,nFrag,WingsData,NuclsData,modelid,multid,M):
#return merged wings (fragments)
#in : list of wings dStemNucls
#out : list of dictionaries containing fragments
lFragments = []
lWingsTemp = []
id = nFrag +1
for wing in lWings:
lWingsTemp.append({'SEQ': wing['SEQ'], 'WINGS': [wing['ID']], 'INDNUCLS': []})
while True: #while there are fragments possible to merge
for wing in lWingsTemp:
flag = True #True - the wing is not consecutive and does not intersect
if not len(lFragments):
lFragments.append({'ID': -1, 'SEQ' : wing['SEQ'],'WINGS': wing['WINGS'], 'NWINGS': len(wing['WINGS']), 'CON' : [], 'INDNUCLS' : wing['INDNUCLS'] , 'BPSNUM' : 0 })
else:
for j in range(0, len(lFragments)):
frag = lFragments[j]
#check if they are consecutive
[seq,(nuc1,nuc2)] = serial2side(wing['SEQ'],frag['SEQ'])
if seq!=-1: # consecutive
flag = False
frag['WINGS']+=wing['WINGS']
frag['SEQ'] = seq #replace wing1 with merged fragment
frag['INDNUCLS']+=wing['INDNUCLS']
frag['INDNUCLS'].append((nuc1,nuc2))
lFragments[j] = frag
break
else: #not consecutive
#check if they intersect
inter = overlap2side(wing['SEQ'],frag['SEQ'])
if inter!=-1 : #intersect
flag = False
frag['WINGS']+=wing['WINGS']
frag['SEQ'] = inter
frag['INDNUCLS'] += wing['INDNUCLS']
if frag['INDNUCLS']:
for i in range(0, len(frag['INDNUCLS'])):
nucls = frag['INDNUCLS'][i]
if contains([nucls[0],nucls[1]], wing['SEQ']):
frag['INDNUCLS'][i] = -1
frag['INDNUCLS']= [x for x in frag['INDNUCLS'] if x != -1]
lFragments[j] = frag
if flag:
lFragments.append({'ID': -1, 'SEQ' : wing['SEQ'],'WINGS' : wing['WINGS'], 'NWINGS': len(wing['WINGS']), 'CON' : [], 'INDNUCLS' : wing['INDNUCLS'], 'BPSNUM' : 0 })
#for f in lFragments:
# print f
if(len(lWingsTemp)> len(lFragments)):
lWingsTemp = lFragments[:]
lFragments = []
else:
break
for fr in lFragments:
fr['ID'] = id
id+=1
if fr['INDNUCLS']:
fr['CON'] = len(fr['INDNUCLS'])
fr['INDNUCLS'] = [fr['SEQ'].index(p[0]) for p in fr['INDNUCLS']]
else:
fr['CON'] = 0
indexes = set()
for nucl in fr['SEQ']:
indexes = set( list(indexes) + filter(lambda i: Pairs[i][0] == nucl or Pairs[i][1] == nucl, xrange(0, len(Pairs))))
fr['BPSNUM'] = len(indexes)
for w in lWings:
w['FRAGS'] = [frag['ID'] for frag in lFragments if w['ID'] in frag['WINGS']][0]
wingstr = base.aWing(w['ID'], modelid, multid, w['FRAGS'], w['TYPE'], w['ST'], w['SEQ'], NuclsData[w['SEQ'][0]],NuclsData[w['SEQ'][-1]], M)
WingsData.append(wingstr)
return lFragments
#MAIN
def generateMults():
start = time.time()
M = 1000000 #if stems[i]>M then it is revStem and his id =stem[i]-M
marked = set()
stemGraph = {}
dStemLinks = {}
dNuclStems = {}
dStemNucls = {}
MultsData = []
FragData = []
WingsData = []
EdgeData = []
NuclsData = {}
#file with models
ModelsData = open('models.txt').readlines()
for i in range(0,len(ModelsData)):
modelstr = ModelsData[i].strip('\n')
filepdb = modelstr.split('\t')[2]
if filepdb in nrpdb.files:
filepdb = 1
else:
filepdb = 0
ModelsData[i] = modelstr + '\t'.join([str(el) for el in [filepdb, 0,0]]) + '\n'
for line in open('nucls.txt').readlines():
l = line.split('\t')
model = int(l[2])
if model not in NuclsData:
NuclsData[model] = {}
NuclsData[model][l[1]] = int(l[10])
modelstr = "" #string for current model
#directory to store files by models:
models_dir = 'models'
#directory to store result:
mults_dir = 'mults'
stop_line = '\n*****\n'
#-----------FIND MULTIPLETS
#1. Decompose the BasePairs file to the set of files containing pairs by model
#e.g. 1.txt contains pairs of 1 model
organizeFiles(models_dir, mults_dir)
#work in 'models' directory
filelist = [ f for f in os.listdir(models_dir)]
statistics = {} #{num of vertices (containing stems): num of multiplets}
statistics2 = {} #{num of vertices (containing fragments): num of multiplets}
nTotalMults = 0
idPairs = 1
counter = 0
counterCharNum = 0
for f in filelist:
curmodelnum = int(f.strip('.txt'))
path = models_dir + '/' + f
dNuclStems.clear()
dStemLinks.clear()
stemGraph.clear()
inp = file(path, 'r')
#read file and store content
ModelPairs = inp.readlines()
inp.close()
#2. create dictionary {nucleotide: stems} for every model
dNuclStems = nuclStems( ModelPairs, M )
#print {nucleotide: stems}
#print dNuclStems
#3. create dictionary {stem: links} and adjacency list for every model
#example
#result {1: [100], 2: [3, 101], 3: [2, 101], 100: [1], 101: [2, 3]}
stemGraph = stemLinks( ModelPairs, dStemLinks, dNuclStems)
#4. find connected components
marked.clear() #set of marked vertices
lComponents = componentsFind(stemGraph, marked) #get the list of components
#get statistics
nMults = len(lComponents) #num of multiplets in the model
nTotalMults += nMults
for comp in lComponents:
if len(comp) not in statistics:
statistics[len(comp)] = 1
else:
statistics[len(comp)] += 1
#5. Create output files
nFrag = 0
if len(lComponents)>0:
ModelNucls = NuclsData[curmodelnum]
nFrag,counterModel, counterCharNumModel = writeMults(f,M, dStemLinks, lComponents, ModelPairs,MultsData,FragData, WingsData,EdgeData,ModelNucls , mults_dir, stop_line, statistics2)
counter+=counterModel
counterCharNum+=counterCharNumModel
idPairs = base.createBasePairs(ModelPairs, idPairs)
ModelsData[curmodelnum-1] = ModelsData[curmodelnum-1].replace('\t'.join([str(el) for el in [0,0]]) + '\n', '\t'.join([str(el) for el in [nMults,nFrag]]) + '\n')
print statistics
print statistics2
print counter
print counterCharNum
print 'Total = ',nTotalMults
base.createModels(ModelsData)
base.createMultiplets(MultsData)
base.createFragments(FragData)
base.createWings(WingsData)
base.createEdges(EdgeData)
#print taken time
finish1 = time.time()
d = int(finish1 - start)
minutes, seconds = d/60, d % 60
print minutes,'min ',seconds,'sec\n'
return
generateMults()