def reindex(ipt, begin=0): idx_vertex = Index(begin) lines = open(ipt, 'r').readlines() lines = map(lambda x: x.split(), lines) for line in filter(lambda x: len(x) >= 2, lines): idx_vertex.add(int(line[0])) idx_vertex.add(int(line[1])) idx_vertex.index() return idx_vertex
def sample_index(): sample = [ ("7", "DIGIT SEVEN"), ("8", "DIGIT EIGHT"), ("9", "DIGIT NINE"), (":", "COLON"), (";", "SEMICOLON"), ("<", "LESS-THAN SIGN"), ("=", "EQUALS SIGN"), (">", "GREATER-THAN SIGN"), ("≥", "GREATER-THAN OR EQUAL TO"), ] idx = Index() for char, words in sample: for word in words.replace("-", " ").split(): idx.add(word, char) return idx
def load_cluster_info(g, ipt): from index import Index idx_clst = Index() lines = open(ipt, 'r').readlines() lines = map(lambda x:x.split(), lines) set_vtx = set() for i in range(g.n()): set_vtx.add(g.nodes()[i].name) lines = filter(lambda x: int(x[0]) in set_vtx, lines) for line in lines: clst = int(line[1]) idx_clst.add(clst) idx_clst.index() for line in lines: nodeidx = g.index.get_idx_by_key(int(line[0])) clst = idx_clst.get_idx_by_key(int(line[1])) g.nodes()[nodeidx].expected = clst return idx_clst
def load_graph(g, A, list_clst): n, n = A.shape for i in range(n): g.add(i) g.indexing() for i in range(n): for j in range(n): if A.item(i, j) > 0: g.add_edge(i, j) #load cluster from index import Index idx_clst = Index() for clst in list_clst: idx_clst.add(clst) idx_clst.index() for e in enumerate(list_clst): nodeidx = e[0] clst = idx_clst.get_idx_by_key(e[1]) g.nodes()[nodeidx].expected = clst return idx_clst
class MailingList: def __init__(self, config, lang=None): """ Constructor method """ self.config = config self.lang = lang self.subscribers = Subscribers(config) self.index = Index(self.config) def __createDir(self): if not (os.path.exists(self.config.get('dir'))): os.mkdir(self.config.get('dir')) def __parse(self): """ Parse mailingg list and load all indexes into memory """ previous = None mbox = Mbox(self.config.get('mbox')) messages = 0 message = mbox.nextMessage() while(message != None): #fisrt load message messages += 1 msg = Message(message, self.config) #index it self.index.add(msg) self.subscribers.add(msg) subscriber = self.subscribers.get(msg.getFromMail()) msg.setSender(subscriber) #parent message (refactor) inReplyTo = msg.getInReplyTo() if (inReplyTo != None): parent = self.index.get(inReplyTo) if (parent != None): msg.setParent(parent) #link child with parent parent.addChild(msg) #and parent with child #and previous and next by date if (previous != None): previous.setNextByDate(msg) msg.setPreviousByDate(previous) previous = msg #and continue with next message message = mbox.nextMessage() self.messages = messages def publish(self): """ Publish the messages """ self.__createDir() #fisrt lap self.__parse() #and second lap mbox = Mbox(self.config.get('mbox')) messages = 0 message = mbox.nextMessage() try: while(message != None): messages += 1 id = message['Message-Id'] msg = self.index.getMessage(messages) if (msg != None and msg.getMessageId() == id): msg.setBody('FIXME') msg.toRDF() #msg.toHTML() #self.index.delete(id) else: print 'Someone was wrong with message ' + str(messages) + ' with ID ' + id + ' ('+msg.getMessageId()+')' message = mbox.nextMessage() self.__toRDF() self.subscribers.process() self.subscribers.export() except Exception, detail: print str(detail) if (self.messages != messages): print 'Something was wrong: ' + str(self.messages) + ' parsed, but ' + str(messages) + ' processed' return messages
class Graph: '''graph''' def __init__(self): self.__n = 0 self.__nodes = [] self.index = Index() def add(self, name): self.index.add(name) def indexing(self): if not self.index.index(): return self.__n = self.index.num_indices() for i in range(self.__n): node = Node(self.index.get_key_by_idx(i)) self.__nodes.append(node) def add_edge(self, a, b): a = self.index.get_idx_by_key(a) b = self.index.get_idx_by_key(b) self.__nodes[a].add_neighbor(b) self.__nodes[b].add_neighbor(a) def nodes(self): return self.__nodes def n(self): return self.__n def adjmatrix(self): m = [[0] * self.__n for _ in range(self.__n)] for i in range(self.__n): for j in self.__nodes[i].neighbor: m[i][j] = 1 return np.asmatrix(m) def tryoutliers(self, cluster): import time from collections import deque start = time.time() q = deque() for i in range(self.__n): if self.__nodes[i].extra[ 'status'] == 0: # and len(self.__nodes[i].clusters) == 1: q.append(i) #break while q: p = q.popleft() pclusters = self.__nodes[p].clusters for i in self.__nodes[p].neighbor: if self.__nodes[i].extra['status'] >= 0 or p in self.__nodes[ i].extra['visited']: continue self.__nodes[i].clusters |= pclusters self.__nodes[i].extra['visited'].append(p) if len(self.__nodes[i].extra['visited'] ) == self.__nodes[i].get_degree(): self.__nodes[i].extra['status'] = 1 elif len(self.__nodes[i].clusters) == cluster: self.__nodes[i].extra['status'] = 2 else: q.append(i) #log.info('try outliers: {}'.format(time.time() - start)) list_multiple = [] for i in range(self.__n): if len(self.__nodes[i].clusters) == 1: self.__nodes[i].actual = list(self.__nodes[i].clusters)[0] elif len(self.__nodes[i].clusters) > 1: list_multiple.append(i) else: self.__nodes[i].clusters |= set(range(cluster)) list_multiple.append(i) #raise ValueError('node %d has no potential cluster!' % i) log.info('%d nodes have at least two clusters.' % len(list_multiple)) return list_multiple def modularity(self): Q = 0.0 M = 0.0 for i in range(self.n()): M += self.__nodes[i].get_degree() for i in range(self.n()): visited = [True] * self.n() for j in self.__nodes[i].neighbor: visited[j] = False if self.__nodes[i].actual == self.__nodes[j].actual: Q += 1 - self.__nodes[i].get_degree( ) * self.__nodes[j].get_degree() * 1.0 / M for j in range(self.n()): if visited[j] and self.__nodes[i].actual == self.__nodes[ j].actual: Q -= self.__nodes[i].get_degree( ) * self.__nodes[j].get_degree() * 1.0 / M return Q / M def dump(self, opt): def dft(obj): if isinstance(obj, set): return sorted(obj) elif isinstance(obj, (Index, Node)): return obj.__dict__ import json json.dump(self.__dict__, open(opt, 'w'), default=dft, indent=4, sort_keys=True)
class MailingList: """ Mailing List abstraction """ def __init__(self, config, lang=None): """ Constructor method @param config: configuration @param lang: language """ self.config = config self.lang = lang self.subscribers = Subscribers(config) self.index = Index(self.config) def __createDir(self): """ Create the necessary directory """ if not (os.path.exists(self.config.get('dir'))): os.mkdir(self.config.get('dir')) def __parse(self): """ Parse mailingg list and load all indexes into memory """ previous = None mbox = Mbox(self.config.get('mbox')) messages = 0 message = mbox.nextMessage() while (message != None): #fisrt load message messages += 1 msg = Message(message, self.config) #index it self.index.add(msg) self.subscribers.add(msg) subscriber = self.subscribers.get(msg.getFromMail()) msg.setSender(subscriber) #parent message (refactor) inReplyTo = msg.getInReplyTo() if (inReplyTo != None): parent = self.index.get(inReplyTo) if (parent != None): msg.setParent(parent) #link child with parent parent.addChild(msg) #and parent with child #and previous and next by date if (previous != None): previous.setNextByDate(msg) msg.setPreviousByDate(previous) previous = msg #and continue with next message message = mbox.nextMessage() self.messages = messages def publish(self): """ Publish the messages """ self.__createDir() #fisrt lap self.__parse() #and second lap mbox = Mbox(self.config.get('mbox')) messages = 0 message = mbox.nextMessage() try: while (message != None): messages += 1 id = message['Message-Id'] msg = self.index.getMessage(messages) if (msg != None and msg.getMessageId() == id): msg.setBody(message.fp.read()) msg.toRDF() #msg.toHTML() #self.index.delete(id) else: print 'Someone was wrong with message ' + str( messages) + ' with ID ' + id + ' (' + msg.getMessageId( ) + ')' message = mbox.nextMessage() self.__toRDF() if (self.config.get('foaf')): self.subscribers.process() self.subscribers.export() except Exception, detail: print str(detail) if (self.messages != messages): print 'Something was wrong: ' + str( self.messages) + ' parsed, but ' + str(messages) + ' processed' return messages
exit() count = 1 keywords = index.get_keywords() keywords.sort() with open(filename) as f: # Utilize um método eficiente para verificar se uma # palavra lida do texto pertence ao índice. # TODO: Improve 2 loops for line in f: words = line.split() for i in keywords: if index.verify(i, words): index.add(i, count) count += 1 word = input( "\nSet a word to search in this index: " ) print('------------------------\n') index.search(word) print('------------------------\n') index.show() print('------------------------\n')
def test_unique_entry(): idx = Index() idx.add("COLON", ":") assert idx["COLON"] == {":"}
def test_three_occurrences(): sample = [("7", "DIGIT"), ("8", "DIGIT"), ("9", "DIGIT")] idx = Index() for char, word in sample: idx.add(word, char) assert idx["DIGIT"] == {"7", "8", "9"}