def fromListToAdjMatrix(_list: List): adjMatrix = AdjMatrix(_list.tops) for i in range(0, _list.tops): for j in range(0, _list.tops): if _list.matrix[i][j] != 0: adjMatrix.matrix[i][_list.matrix[i][j] - 1] = 1 #else: #adjMatrix.matrix[i][_list.matrix[i][j]-1] = 0 return adjMatrix
def create_graph(text, filename): f = open(filename) basetext = f.read().lower() from textmodel import Text btext = Text(basetext) #btokens = nltk.word_tokenize(basetext) btokens = nltk.regexp_tokenize(basetext, "[\w']+") btext.tokens = btokens if text is None: print("Grrrrrrrrrr") #text.tokens = text1 #print ("..........",len(text.tokens)) txt = list(text.tokens) poslist = list() #print ("..............", len(txt)) #print text temp = {} t = [] #creating poslist for the terms for x in txt: #tmptokens = nltk.word_tokenize(txt[x]) tmptokens = nltk.regexp_tokenize(x, "[\w']+") #print(x,tmptokens) #print(tmptokens) #tmptokens = nltk.regexp_tokenize(text[x],"[\w']+") if len(tmptokens) == 1: pos = list(get_positions(btext.tokens, tmptokens[0], txt)) elif len(tmptokens) > 1: pos = list(get_phrasepos(btext.tokens, x, txt)) else: pos = [] t.append(x) if len(pos) > 0: if pos[0][0] in temp.keys(): for i in range(len(pos)): if pos[i] not in temp[pos[0][0]]: temp[pos[0][0]].append(pos[i]) else: temp[pos[0][0]] = pos if pos[0][0] != x: #print ("removed",x," | ",pos[0][0]) t.append(x) #removing unneccessary sub-lists ie "linked" from "linked list" k = [] for x in temp.keys(): for y in temp.keys(): if x != y: if x in y: tmpx = nltk.word_tokenize(x) tmpy = nltk.word_tokenize(y) fx = 0 fy = 0 for i in range(len(tmpy)): if tmpy[fy] == tmpx[fx]: fy += 1 fx += 1 else: fy += 1 if fx == len(tmpx): if len(temp[x]) == len(temp[y]): #print("delete: ",x," | ",y) if x not in k: #print(x) k.append(x) break #print("k : ",k) for i in k: del temp[i] txt.remove(i) si = {} for i in temp.keys(): si[i] = len(temp[i]) r = [] for i in temp.keys(): tmpx = nltk.word_tokenize(i) if len(tmpx) > 1: for j in temp: if i != j: if i in j: si[i] -= si[j] r.append(j) ## for i in j: ## si[i]=0 for i in temp.keys(): tmpx = nltk.word_tokenize(i) if len(tmpx) == 1: for j in temp: if i != j: if i in j: si[i] -= si[j] r = [] for i in temp.keys(): if si[i] == 0: #print ("to be removed: ",i) r.append(i) for i in r: txt.remove(i) del temp[i] for i in t: txt.remove(i) for i in temp.keys(): poslist.append(temp[i]) ## #removing unneccessary sub-lists ie "linked" from "linked list" ## d=[] ## for x in range(len(txt)): ## tmptokens = nltk.word_tokenize(txt[x]) ## if len(tmptokens)>1: ## for y in range(len(tmptokens)): ## if tmptokens[y] not in stop: ## if tmptokens[y] in txt: ## u=0 ## flag1=0 ## flag2=0 ## flag=0 ## w=0 ## z=0 ## for u in range(len(poslist)): ## if len(poslist[u])>1: ## if poslist[u][0][0]==tmptokens[y] and flag1==0: ## flag1=1 ## flag+=1 ## w=u ## if poslist[u][0][0]==txt[x] and flag2==0: ## flag2=1 ## flag+=1 ## z=u ## if flag==2: ## break ## if flag1==1 and flag2==1: ## #print(poslist[u][0][0]," : ",len(poslist[u]),poslist[v][0][0]," : ",len(poslist[v])) ## if len(poslist[w])==len(poslist[z]): ## print("del: ",poslist[w][0][0],len(poslist[w])," ",txt[x],len(poslist[z])) ## d.append(poslist[w][0][0]) ## del poslist[w] ## for i in d: ## txt.remove(i) #mlist is a merged list for merging the created positions in sorted order mlist = list() if len(poslist) > 1: mlist = merge(poslist[0], poslist[1]) x = 2 while x < len(poslist): if len(poslist[x]) != 0: #print len(poslist[x]), poslist[x] mlist = merge(mlist, poslist[x]) x += 1 t = [] for z in txt: count = 0 for i in range(len(mlist)): if z == mlist[i][0]: count += 1 ## if count==1: ## print ("1: ",z) if count == 0: #print ("0: ",z) t.append(z) for i in t: txt.remove(i) # Now create the adjacency matrix for the graph adjmatrix = list() txt.sort() text.tokens = txt #print ("txt::",txt) from adjmatrix import AdjMatrix amatrix = AdjMatrix() amatrix.createMatrix(txt, mlist, False) #text.append(filename) #print text #amatrix.addFilename(filename,text) #print(txt) return amatrix