def getContext(wordref, i, window, stopwords, toporef): j = i contextlist = [[wordref[j], "MainTopo", (i-j)]] while j > 1: j = j - 1 if i - window >= j: break if j in toporef: if " " in wordref[j]: contextlist.append([wordref[j].strip().replace(" ", "|"), "OtherTopo", (i-j)]) else: contextlist.append([wordref[j], "OtherTopo", (i-j)]) elif wordref[j] not in stopwords: try: #u1 = unicode(wordref[j], 'utf-8') if len(wordref[j]) == 1 and UnicodeBlocks.block(wordref[j]) == "General Punctuation": pass #print "~~~~Forbidden Character~~~~" #print wordref[j] #print "~~~~~~~~~~~~~~~~~~~~~" #sys.exit() else: contextlist.append([wordref[j], "Word", (i-j)]) except: #print "~~~~Broken String~~~~" #print wordref[j] pass # print "~~~~~~~~~~~~~~~~~~~~~" #print len(contextlist) j = i while j < len(wordref): j = j + 1 if i + window < j: break if j in toporef: if " " in wordref[j]: contextlist.append([wordref[j].strip().replace(" ", "|"), "OtherTopo", (i-j)]) else: contextlist.append([wordref[j], "OtherTopo", (i-j)]) elif wordref[j] not in stopwords: try: if len(wordref[j]) == 1 and UnicodeBlocks.block(wordref[j]) == "General Punctuation": pass #print "~~~~Forbidden Character~~~~" #print wordref[j] #print "~~~~~~~~~~~~~~~~~~~~~" #sys.exit() else: contextlist.append([wordref[j], "Word", (i-j)]) except: pass #print "~~~~Broken String~~~~" #print wordref[j] # print "~~~~~~~~~~~~~~~~~~~~~" return contextlist
def getContext(wordref, i, window, stopwords, toporef): j = i contextlist = [[wordref[j], "MainTopo", (i-j)]] while j > 1: j = j - 1 if i - window >= j: break if j in toporef: if " " in wordref[j]: contextlist.append([wordref[j].strip().replace(" ", "|"), "OtherTopo", (i-j)]) else: contextlist.append([wordref[j], "OtherTopo", (i-j)]) elif wordref[j] not in stopwords: try: if len(wordref[j]) == 1 and UnicodeBlocks.block(wordref[j]) == "General Punctuation": pass else: contextlist.append([wordref[j], "Word", (i-j)]) except: pass j = i while j < len(wordref): j = j + 1 if i + window < j: break if j in toporef: if " " in wordref[j]: contextlist.append([wordref[j].strip().replace(" ", "|"), "OtherTopo", (i-j)]) else: contextlist.append([wordref[j], "OtherTopo", (i-j)]) elif wordref[j] not in stopwords: try: if len(wordref[j]) == 1 and UnicodeBlocks.block(wordref[j]) == "General Punctuation": pass else: contextlist.append([wordref[j], "Word", (i-j)]) except: pass return contextlist