예제 #1
0
def reindex(ipt, begin=0):
    idx_vertex = Index(begin)
    lines = open(ipt, 'r').readlines()
    lines = map(lambda x: x.split(), lines)
    for line in filter(lambda x: len(x) >= 2, lines):
        idx_vertex.add(int(line[0]))
        idx_vertex.add(int(line[1]))

    idx_vertex.index()
    return idx_vertex
예제 #2
0
def sample_index():
    sample = [
        ("7", "DIGIT SEVEN"),
        ("8", "DIGIT EIGHT"),
        ("9", "DIGIT NINE"),
        (":", "COLON"),
        (";", "SEMICOLON"),
        ("<", "LESS-THAN SIGN"),
        ("=", "EQUALS SIGN"),
        (">", "GREATER-THAN SIGN"),
        ("≥", "GREATER-THAN OR EQUAL TO"),
    ]
    idx = Index()
    for char, words in sample:
        for word in words.replace("-", " ").split():
            idx.add(word, char)
    return idx
예제 #3
0
def load_cluster_info(g, ipt):
    from index import Index
    idx_clst = Index()
    lines = open(ipt, 'r').readlines()
    lines = map(lambda x:x.split(), lines)
    set_vtx = set()
    for i in range(g.n()):
        set_vtx.add(g.nodes()[i].name)
    lines = filter(lambda x: int(x[0]) in set_vtx, lines)
    for line in lines:
        clst = int(line[1])
        idx_clst.add(clst)
    idx_clst.index()
    for line in lines:
        nodeidx = g.index.get_idx_by_key(int(line[0]))
        clst = idx_clst.get_idx_by_key(int(line[1]))
        g.nodes()[nodeidx].expected = clst
    return idx_clst
예제 #4
0
def load_graph(g, A, list_clst):
    n, n = A.shape
    for i in range(n):
        g.add(i)
    g.indexing()
    for i in range(n):
        for j in range(n):
            if A.item(i, j) > 0:
                g.add_edge(i, j)
    #load cluster
    from index import Index
    idx_clst = Index()
    for clst in list_clst:
        idx_clst.add(clst)
    idx_clst.index()
    for e in enumerate(list_clst):
        nodeidx = e[0]
        clst = idx_clst.get_idx_by_key(e[1])
        g.nodes()[nodeidx].expected = clst
    return idx_clst
예제 #5
0
class MailingList:
    
    def __init__(self, config, lang=None):
        """
        Constructor method
        """
        
        self.config = config
        self.lang = lang
        self.subscribers = Subscribers(config)
        self.index = Index(self.config)
        
    def __createDir(self):
        if not (os.path.exists(self.config.get('dir'))):
            os.mkdir(self.config.get('dir'))
        
    def __parse(self):
        """
        Parse mailingg list and load all
        indexes into memory
        """
        
        previous = None
        
        mbox = Mbox(self.config.get('mbox'))
        messages = 0
        message = mbox.nextMessage()
        
        while(message != None):
            #fisrt load message
            messages += 1
            msg = Message(message, self.config)
            
            #index it
            self.index.add(msg)
            self.subscribers.add(msg)
            subscriber = self.subscribers.get(msg.getFromMail())
            msg.setSender(subscriber)
            
            #parent message (refactor)
            inReplyTo = msg.getInReplyTo()
            if (inReplyTo != None):
                parent = self.index.get(inReplyTo)
                if (parent != None):
                    msg.setParent(parent) #link child with parent
                    parent.addChild(msg) #and parent with child
                    
            #and previous and next by date
            if (previous != None):
                previous.setNextByDate(msg)
                msg.setPreviousByDate(previous)
            
            previous = msg
            
            #and continue with next message
            message = mbox.nextMessage()

        self.messages = messages
    
    def publish(self):
        """
        Publish the messages
        """
        
        self.__createDir()
        
        #fisrt lap
        self.__parse()
        
        #and second lap
        mbox = Mbox(self.config.get('mbox'))
        messages = 0

        message = mbox.nextMessage()
        
        try: 
            
            while(message != None):
                messages += 1
                id = message['Message-Id']
                msg = self.index.getMessage(messages)
                
                if (msg != None and msg.getMessageId() == id):
                    msg.setBody('FIXME')
                    msg.toRDF()
                    #msg.toHTML()
                    #self.index.delete(id)
                else:
                    print 'Someone was wrong with message ' + str(messages) + ' with ID ' + id + ' ('+msg.getMessageId()+')'

                message = mbox.nextMessage()
                
            self.__toRDF()
    
            self.subscribers.process()
            self.subscribers.export()
            
        except Exception, detail:
            print str(detail)
            
        
        if (self.messages != messages):
            print 'Something was wrong: ' + str(self.messages) + ' parsed, but ' + str(messages) + ' processed'

        return messages
예제 #6
0
class Graph:
    '''graph'''
    def __init__(self):
        self.__n = 0
        self.__nodes = []
        self.index = Index()

    def add(self, name):
        self.index.add(name)

    def indexing(self):
        if not self.index.index():
            return
        self.__n = self.index.num_indices()
        for i in range(self.__n):
            node = Node(self.index.get_key_by_idx(i))
            self.__nodes.append(node)

    def add_edge(self, a, b):
        a = self.index.get_idx_by_key(a)
        b = self.index.get_idx_by_key(b)
        self.__nodes[a].add_neighbor(b)
        self.__nodes[b].add_neighbor(a)

    def nodes(self):
        return self.__nodes

    def n(self):
        return self.__n

    def adjmatrix(self):
        m = [[0] * self.__n for _ in range(self.__n)]
        for i in range(self.__n):
            for j in self.__nodes[i].neighbor:
                m[i][j] = 1
        return np.asmatrix(m)

    def tryoutliers(self, cluster):
        import time
        from collections import deque
        start = time.time()
        q = deque()
        for i in range(self.__n):
            if self.__nodes[i].extra[
                    'status'] == 0:  # and len(self.__nodes[i].clusters) == 1:
                q.append(i)
                #break
        while q:
            p = q.popleft()
            pclusters = self.__nodes[p].clusters
            for i in self.__nodes[p].neighbor:
                if self.__nodes[i].extra['status'] >= 0 or p in self.__nodes[
                        i].extra['visited']:
                    continue
                self.__nodes[i].clusters |= pclusters
                self.__nodes[i].extra['visited'].append(p)
                if len(self.__nodes[i].extra['visited']
                       ) == self.__nodes[i].get_degree():
                    self.__nodes[i].extra['status'] = 1
                elif len(self.__nodes[i].clusters) == cluster:
                    self.__nodes[i].extra['status'] = 2
                else:
                    q.append(i)
        #log.info('try outliers: {}'.format(time.time() - start))

        list_multiple = []
        for i in range(self.__n):
            if len(self.__nodes[i].clusters) == 1:
                self.__nodes[i].actual = list(self.__nodes[i].clusters)[0]
            elif len(self.__nodes[i].clusters) > 1:
                list_multiple.append(i)
            else:
                self.__nodes[i].clusters |= set(range(cluster))
                list_multiple.append(i)
                #raise ValueError('node %d has no potential cluster!' % i)
        log.info('%d nodes have at least two clusters.' % len(list_multiple))
        return list_multiple

    def modularity(self):
        Q = 0.0
        M = 0.0
        for i in range(self.n()):
            M += self.__nodes[i].get_degree()
        for i in range(self.n()):
            visited = [True] * self.n()
            for j in self.__nodes[i].neighbor:
                visited[j] = False
                if self.__nodes[i].actual == self.__nodes[j].actual:
                    Q += 1 - self.__nodes[i].get_degree(
                    ) * self.__nodes[j].get_degree() * 1.0 / M
            for j in range(self.n()):
                if visited[j] and self.__nodes[i].actual == self.__nodes[
                        j].actual:
                    Q -= self.__nodes[i].get_degree(
                    ) * self.__nodes[j].get_degree() * 1.0 / M
        return Q / M

    def dump(self, opt):
        def dft(obj):
            if isinstance(obj, set):
                return sorted(obj)
            elif isinstance(obj, (Index, Node)):
                return obj.__dict__

        import json
        json.dump(self.__dict__,
                  open(opt, 'w'),
                  default=dft,
                  indent=4,
                  sort_keys=True)
예제 #7
0
class MailingList:
    """
    Mailing List abstraction
    """
    def __init__(self, config, lang=None):
        """
        Constructor method
        
        @param config: configuration
        @param lang: language
        """

        self.config = config
        self.lang = lang
        self.subscribers = Subscribers(config)
        self.index = Index(self.config)

    def __createDir(self):
        """
        Create the necessary directory
        """

        if not (os.path.exists(self.config.get('dir'))):
            os.mkdir(self.config.get('dir'))

    def __parse(self):
        """
        Parse mailingg list and load all indexes into memory
        """

        previous = None

        mbox = Mbox(self.config.get('mbox'))
        messages = 0
        message = mbox.nextMessage()

        while (message != None):
            #fisrt load message
            messages += 1
            msg = Message(message, self.config)

            #index it
            self.index.add(msg)
            self.subscribers.add(msg)
            subscriber = self.subscribers.get(msg.getFromMail())
            msg.setSender(subscriber)

            #parent message (refactor)
            inReplyTo = msg.getInReplyTo()
            if (inReplyTo != None):
                parent = self.index.get(inReplyTo)
                if (parent != None):
                    msg.setParent(parent)  #link child with parent
                    parent.addChild(msg)  #and parent with child

            #and previous and next by date
            if (previous != None):
                previous.setNextByDate(msg)
                msg.setPreviousByDate(previous)

            previous = msg

            #and continue with next message
            message = mbox.nextMessage()

        self.messages = messages

    def publish(self):
        """
        Publish the messages
        """

        self.__createDir()

        #fisrt lap
        self.__parse()

        #and second lap
        mbox = Mbox(self.config.get('mbox'))
        messages = 0

        message = mbox.nextMessage()

        try:

            while (message != None):
                messages += 1
                id = message['Message-Id']
                msg = self.index.getMessage(messages)

                if (msg != None and msg.getMessageId() == id):
                    msg.setBody(message.fp.read())
                    msg.toRDF()
                    #msg.toHTML()
                    #self.index.delete(id)
                else:
                    print 'Someone was wrong with message ' + str(
                        messages) + ' with ID ' + id + ' (' + msg.getMessageId(
                        ) + ')'

                message = mbox.nextMessage()

            self.__toRDF()

            if (self.config.get('foaf')):
                self.subscribers.process()

            self.subscribers.export()

        except Exception, detail:
            print str(detail)

        if (self.messages != messages):
            print 'Something was wrong: ' + str(
                self.messages) + ' parsed, but ' + str(messages) + ' processed'

        return messages
예제 #8
0
        exit()

    count = 1

    keywords = index.get_keywords()
    keywords.sort()

    with open(filename) as f:
        # Utilize um método eficiente para verificar se uma
        # palavra lida do texto pertence ao índice.
        # TODO: Improve 2 loops

        for line in f:
            words = line.split()
            for i in keywords:
                if index.verify(i, words):
                    index.add(i, count)

            count += 1

    word = input(
        "\nSet a word to search in this index: "
    )

    print('------------------------\n')
    index.search(word)

    print('------------------------\n')
    index.show()
    print('------------------------\n')
예제 #9
0
def test_unique_entry():
    idx = Index()
    idx.add("COLON", ":")
    assert idx["COLON"] == {":"}
예제 #10
0
def test_three_occurrences():
    sample = [("7", "DIGIT"), ("8", "DIGIT"), ("9", "DIGIT")]
    idx = Index()
    for char, word in sample:
        idx.add(word, char)
    assert idx["DIGIT"] == {"7", "8", "9"}