Esempio n. 1
0
class Repository(object):
    '''
    The git repository
    '''

    GIT_DIR = '.git'

    INIT_DIR = [
        'branches',
        'hooks',
        'info',
        'objects',
        'objects/info',
        'objects/pack',
        'refs',
        'refs/heads',
        'refs/tags',
    ]

    INIT_FILE = [
        ['HEAD', 'ref: refs/heads/master'],
        ['description', 'Unnamed repository'],
        ['info/exclude', ''],
    ]

    def __init__(self, workspace):
        self.workspace = workspace
        self.index = Index(os.path.join(workspace, '.git', 'index'))
        self.config = Config(workspace)
        self.head_path = self._get_head_path()
        self.head_tree = None
        if os.path.exists(self.head_path):
            self.head_tree = read_file(self.head_path).strip()

    def _get_head_path(self):
        branch_name = read_file(os.path.join(self.workspace, '.git',
                                             'HEAD')).strip('\n').rsplit(
                                                 '/', 1)[-1]
        return os.path.join(self.workspace, '.git', 'refs', 'heads',
                            branch_name)

    def stage(self, files):
        try:
            for file in files:
                content = read_file(file)
                blob = Blob(self.workspace, content)
                if not os.path.exists(blob.path):
                    write_object_to_file(blob.path, blob.content)
                stat = os.stat(os.path.join(self.workspace, file))
                self.index.add_entry(file, ctime=stat.st_ctime, mtime=stat.st_mtime, dev=stat.st_dev, ino=stat.st_ino, mode=cal_mode(stat.st_mode), \
                       uid=stat.st_uid, gid=stat.st_gid, size=stat.st_size,sha1=blob.sha1, flags=0)
            self.index.write_to_file()

        except Exception, e:
            print 'stage file %s error: %s' % (file, e)
Esempio n. 2
0
class Repository(object):
    '''
    The git repository
    '''

    GIT_DIR = '.git'
    
    INIT_DIR = [
        'branches',
        'hooks',
        'info',
        'objects',
        'objects/info',
        'objects/pack',
        'refs',
        'refs/heads',
        'refs/tags',
    ]
    
    INIT_FILE = [
        ['HEAD', 'ref: refs/heads/master'],
        ['description', 'Unnamed repository'],
        ['info/exclude', ''],
    ]
    
    
    def __init__(self, workspace):
        self.workspace = workspace
        self.index = Index(os.path.join(workspace, '.git', 'index'))
        self.config = Config(workspace)
        self.head_path = self._get_head_path()
        self.head_tree = None
        if os.path.exists(self.head_path):
            self.head_tree = read_file(self.head_path).strip()
    
    def _get_head_path(self):
        branch_name = read_file(os.path.join(self.workspace, '.git', 'HEAD')).strip('\n').rsplit('/', 1)[-1]
        return os.path.join(self.workspace, '.git', 'refs', 'heads', branch_name)
        
    def stage(self, files):
        try:
            for file in files:
                content = read_file(file)
                blob = Blob(self.workspace, content)
                if not os.path.exists(blob.path):
                    write_object_to_file(blob.path, blob.content)
                stat = os.stat(os.path.join(self.workspace, file))
                self.index.add_entry(file, ctime=stat.st_ctime, mtime=stat.st_mtime, dev=stat.st_dev, ino=stat.st_ino, mode=cal_mode(stat.st_mode), \
                       uid=stat.st_uid, gid=stat.st_gid, size=stat.st_size,sha1=blob.sha1, flags=0)
            self.index.write_to_file()
                    
        except Exception, e:
            print 'stage file %s error: %s' % (file, e)
Esempio n. 3
0
class Repository(object):
    """
    The git repository
    """

    GIT_DIR = ".git"

    INIT_DIR = [
        "branches",
        "hooks",
        "info",
        "objects",
        "objects/info",
        "objects/pack",
        "refs",
        "refs/heads",
        "refs/tags",
    ]

    INIT_FILE = [["HEAD", "ref: refs/heads/master"], ["description", "Unnamed repository"], ["info/exclude", ""]]

    def __init__(self, workspace):
        self.workspace = workspace
        self.index = Index(os.path.join(workspace, ".git", "index"))
        self.config = Config(workspace)

    def stage(self, files):
        try:
            for file in files:
                content = read_file(file)
                blob = Blob(self.workspace, content)
                if not os.path.exists(blob.path):
                    write_object_to_file(blob.path, blob.content)
                stat = os.stat(os.path.join(self.workspace, file))
                self.index.add_entry(
                    file,
                    ctime=stat.st_ctime,
                    mtime=stat.st_mtime,
                    dev=stat.st_dev,
                    ino=stat.st_ino,
                    mode=cal_mode(stat.st_mode),
                    uid=stat.st_uid,
                    gid=stat.st_gid,
                    size=stat.st_size,
                    sha1=blob.sha1,
                    flags=0,
                )
            self.index.write_to_file()

        except Exception, e:
            print "stage file %s error: %s" % (file, e)
Esempio n. 4
0
        def crawl(self, restart, seed=None):
            """
            Crawl the web starting from the seed url and using a potentially already
            dumped list of urls to crawl.

            A seed url is optional when restarting a crawl session, however, when
            setting up a new crawling session (restart being False), a seed url is
            mandatory.
            """
            # Build a Crawler that will start crawling from the seed.
            crawler = Crawler(restart, seed)

            # Build an Index. If restart is True, this will append data to the
            # existing index in the current directory.
            index = Index(restart)
            try:
                for i in xrange(1000): # ================================
                    page = crawler.crawl()
                    index.add_entry(page)
            except StopIteration:
                pass
Esempio n. 5
0
        def crawl(self, restart, seed=None):
            """
            Crawl the web starting from the seed url and using a potentially already
            dumped list of urls to crawl.

            A seed url is optional when restarting a crawl session, however, when
            setting up a new crawling session (restart being False), a seed url is
            mandatory.
            """
            # Build a Crawler that will start crawling from the seed.
            crawler = Crawler(restart, seed)

            # Build an Index. If restart is True, this will append data to the
            # existing index in the current directory.
            index = Index(restart)
            try:
                for i in xrange(1000):  # ================================
                    page = crawler.crawl()
                    index.add_entry(page)
            except StopIteration:
                pass
Esempio n. 6
0
File: peer.py Progetto: qkniep/naxos
class Peer(Thread):
    """Overlay network peer maintaining a network node, paxos instances, and an index.
    Serves as an index server to the clients.
    Coordinates with peers through paxos to maintain a consistent index.
    """

    SELECT_TIMEOUT = 1
    KEEPALIVE_TIMEOUT = 5
    VERSION = '0.3.0'

    def __init__(self, first=True, addr=None):
        """"""
        super().__init__()  # Thread constructor

        self.queue = util.PollableQueue()
        self.selector = selectors.DefaultSelector()
        self.cache = Cache()
        self.index = Index()
        self.network = NetworkNode(self.selector, self.cache)
        if first:
            self.paxos = PaxosNode(self.network)
            paxos_header = '- This peer is now operating Paxos node %s' % self.paxos.node_id(
            )
        else:
            self.paxos = None
            self.connect_to_paxos(addr)
            self.queue.put(('first_connection', {}))
            paxos_header = '- This peer is not yet operating as a Paxos node.'
        self.last_keepalive = time.time()
        self.peer_keepalives = {}
        self._running = True
        self.periodic_runner = PeriodicRunner()
        self.periodic_runner.start()

        naxos_header = '- Running Naxos v%s' % self.VERSION
        net_header = '- This peer is listening for incoming connections on: %s:%s' % self.network.listen_addr
        mid = '=' * max(len(naxos_header), len(net_header), len(paxos_header))

        print('\n'.join((mid, naxos_header, net_header, paxos_header, mid)))
        self.selector.register(self.queue, selectors.EVENT_READ)

    def run(self):  # called by Thread.start()
        """Main loop: Handles incoming messages and commands sent from main thread."""
        try:
            while self._running:
                events = self.selector.select(timeout=self.SELECT_TIMEOUT)
                if self.paxos is not None:
                    self.check_keepalive_timeouts()
                for key, mask in events:
                    if key.fileobj is self.queue:
                        self.handle_queue()
                    elif key.fileobj is self.network.listen_sock:
                        self.network.accept_incoming_connection()
                    else:
                        for msg in self.network.service_connection(
                                key.fileobj, mask):
                            self.cache.update_routing(
                                key.fileobj.getpeername(), msg)
                            self.handle_message(
                                self.network.get_connection(
                                    key.fileobj.getpeername()), msg)
                            self.cache.processed(key.fileobj.getpeername(),
                                                 msg)
                time.sleep(0.0001)  # performance
        finally:
            log.info('Shutting down this peer...')
            self.periodic_runner.stop()
            self.network.reset()

    def check_keepalive_timeouts(self):
        """Check if any peer timed out long enough to assume they are offline."""
        current_time = time.time()
        leader_id = self.paxos.current_leader
        if self.paxos.is_leader():
            for (node_id, last_keepalive) in self.peer_keepalives.items():
                if current_time - last_keepalive > 3 * self.KEEPALIVE_TIMEOUT:
                    self.paxos.start_paxos_round({
                        'change': 'leave',
                        'node_id': node_id,
                    })
                    self.peer_keepalives[
                        node_id] = current_time - self.KEEPALIVE_TIMEOUT
        elif leader_id in self.peer_keepalives:
            last_leader_keepalive = self.peer_keepalives[leader_id]
            if current_time - last_leader_keepalive > 3 * self.KEEPALIVE_TIMEOUT:  # TODO: randomize timeout
                self.paxos.start_election()
                self.peer_keepalives[
                    leader_id] = current_time - self.KEEPALIVE_TIMEOUT

        if current_time - self.last_keepalive > self.KEEPALIVE_TIMEOUT:
            if self.paxos.is_leader():
                self.network.broadcast({
                    'do': 'keepalive',
                    'node_id': self.paxos.node_id()
                })
            else:
                self.network.send(leader_id, {
                    'do': 'keepalive',
                    'node_id': self.paxos.node_id()
                })
            self.last_keepalive = current_time

    def handle_queue(self):
        """Handles the next incoming message on the thread-safe queue.
        The message was sent either by this thread or the CLI (stdin) thread.
        """
        cmd, payload = self.queue.get()
        if cmd == 'connect':
            self.network.connect_to_node(payload['addr'], 'paxos_join_request')
            # TODO: add to self.paxos.peer_addresses
        elif cmd == 'first_connection':
            # self.network.connect_to_node(payload['addr'])
            self.network.broadcast({
                'do': 'ping',
            })
            self.periodic_runner.register(watch_address_pool, {
                'queue': self.queue,
                'network': self.network,
                'old': set(),
            }, 0.5)
        elif cmd == 'connect_to_sampled':
            picked = payload['picked']
            log.debug("picked: %s", picked)
            for addr in picked:
                self.network.connect_to_node(addr)
            self.periodic_runner.unregister(watch_address_pool)
        elif cmd == 'start_paxos':
            if self.paxos is not None:
                self.paxos.start_paxos_round(payload['value'])
        else:
            raise ValueError('Unknown command: %s' % cmd)

    def handle_message(self, conn, msg):
        """Handles the Message msg, which arrived at the socket sock.
        The message might have been sent by another paxos peer or a client.
        """
        log.info('[IN]:\t%s' % msg)
        if self.cache.seen(msg):
            log.info(
                "Skip handling message since it has already been handled.")
            return

        sock = conn.sock
        cmd = msg['do']
        to = msg['to']
        src = msg['from']

        # forward messages that are not for this peer.
        # if it is a broadcast, cmd handling should call broadcast.
        if to not in (self.network.unique_id_from_own_addr(), 'broadcast'):
            log.info("Forward message")
            self.network.send(to, msg)
            return

        # ping broadcast to discover peers in the network
        if cmd == 'ping':
            self.network.send(src, {
                'do': 'ping_response',
                'addr': self.network.listen_addr,
            })
            self.network.broadcast(msg, sock)

        # response to a ping broadcast to the one who initially sent the ping, delivering own addr
        elif cmd == 'ping_response':
            # has to be for this peer, since it would have been forwardet otherwise.
            self.network.address_pool.add(tuple(
                msg['addr']))  # remember this addr

        # broadcast to analyze the overlay structure
        elif cmd == 'discover_overlay':
            self.network.send(
                src, {
                    'do':
                    'discover_overlay_response',
                    'neighbours': [
                        conn.get_identifier()
                        for conn in self.network.connections.values()
                        if not conn.is_client()
                    ]
                })
            self.network.broadcast(msg, sock)

        if cmd == 'paxos_join_confirm':
            self.paxos = PaxosNode(self.network, msg['leader'])
            self.network.send(self.paxos.current_leader, {
                'do': 'keepalive',
                'node_id': self.paxos.node_id()
            })

        elif self.paxos is None:  # do not handle other Messages if not yet part of paxos
            if self.network.connections:
                self.network.send(
                    src, {
                        'do': 'try_other_peer',
                        'addr': self.network.get_random_listen_addr(),
                    })

        elif cmd == 'hello':
            self.network.set_remote_listen_addr(sock,
                                                tuple(msg['listen_addr']))
        elif cmd == 'client_hello':
            self.network.set_http_addr(sock, tuple(msg['http_addr']))
        elif cmd == 'keepalive':
            self.peer_keepalives[msg['node_id']] = time.time()
            log.debug(self.peer_keepalives)

        elif cmd == 'paxos_join_request':
            self.network.set_remote_listen_addr(sock,
                                                tuple(msg['listen_addr']))
            index = self.paxos.get_last_applied_value_index()
            if self.paxos.group_sizes[index] == 1:
                self.paxos.group_sizes[index] += 1
                self.send_paxos_join_confirmation(src)
            else:
                self.run_paxos({
                    'change': 'join',
                    'node_id': src,
                    'listen_addr': tuple(msg['listen_addr']),
                })
        elif cmd == 'paxos_relay':
            self.paxos.start_paxos_round(msg['value'])
        elif cmd == 'paxos_prepare':
            self.paxos.handle_prepare(src, tuple(msg['proposal_id']))
        elif cmd == 'paxos_promise':
            self.paxos.handle_promise(tuple(msg['proposal_id']), msg['acc_id'],
                                      msg['accepted'], msg['majority'])
        elif cmd == 'paxos_propose':
            self.paxos.handle_propose(src, tuple(msg['proposal_id']),
                                      msg['index'], msg['value'])
        elif cmd == 'paxos_accept':
            index, chosen_value = self.paxos.handle_accept(
                tuple(msg['proposal_id']), msg['index'])
            if chosen_value is not None:
                self.update_paxos_log_and_apply_value(index)
        elif cmd == 'paxos_learn':  # TODO: handle duplicate learns
            if len(self.paxos.chosen) <= msg['index'] or not self.paxos.chosen[
                    msg['index']]:
                self.paxos.handle_learn(msg['index'], msg['value'])
                self.update_paxos_log_and_apply_value(msg['index'])
        elif cmd == 'paxos_fill_log_hole':
            self.network.send(
                src, {
                    'do': 'paxos_learn',
                    'index': msg['index'],
                    'value': self.paxos.log[msg['index']],
                })

        elif cmd == 'index_search':
            self.network.send(
                src,
                {
                    'do': 'index_search_result',
                    'query':
                    msg['filename'],  # in case of multiple searches, out of order...
                    'addr': self.index.search_entry(msg['filename']),
                })
        elif cmd == 'index_add':
            addr = self.network.get_http_addr(sock)
            log.debug(self.paxos.log)
            # if self.paxos.group_sizes[len(self.paxos.log)] == 1:
            if self.paxos.group_sizes[
                    self.paxos.get_last_applied_value_index()] == 1:
                self.index.add_entry(msg['filename'], addr)
            else:
                self.run_paxos({
                    'change': 'add',
                    'entry': msg['filename'],
                    'addr': addr,
                })
        elif cmd == 'index_remove':
            if self.paxos.group_sizes[
                    self.paxos.get_last_applied_value_index()] == 1:
                self.index.remove_entry(msg['filename'])
            else:
                self.run_paxos({
                    'change': 'remove',
                    'entry': msg['filename'],
                })

    def stop(self):
        """Stop this Peer's thread from running, initiating clean shutdown."""
        self._running = False

    def connect_to_paxos(self, addr):
        """."""
        self.network.connect_to_node(addr, 'paxos_join_request')

    def run_paxos(self, value):
        """."""
        if self.paxos is not None:
            self.paxos.start_paxos_round(value)

    def update_paxos_log_and_apply_value(self, index):
        """."""
        if self.paxos.fix_log_holes(index):
            while len(self.paxos.chosen) > index and self.paxos.chosen[index]:
                self.apply_chosen_value(index, self.paxos.log[index])
                index += 1

    def apply_chosen_value(self, index, value):
        """Applies the changes needed after selecting value through paxos.
        Might handle these changes differently based on whether we started the paxos round.
        """
        assert len(self.paxos.group_sizes) > index
        if value['change'] == 'join':
            self.paxos.group_sizes.append(self.paxos.group_sizes[index] + 1)
        elif value['change'] == 'leave':
            self.paxos.group_sizes.append(self.paxos.group_sizes[index] - 1)
        else:
            self.paxos.group_sizes.append(self.paxos.group_sizes[index])

        if value['change'] == 'join':
            if self.paxos.is_leader():
                self.send_paxos_join_confirmation(value['node_id'])
        elif value['change'] == 'leave':
            if self.paxos.is_leader():
                self.peer_keepalives.pop(value['node_id'], None)
        elif value['change'] == 'add':
            self.index.add_entry(value['entry'], value['addr'])
        elif value['change'] == 'remove':
            self.index.remove_entry(value['entry'])

    def send_paxos_join_confirmation(self, node_id):
        """Sends a confirmation for joining the paxos overlay network to addr.
        All necessary information about the curent state of paxos is included.
        """
        self.network.send(
            node_id, {
                'do': 'paxos_join_confirm',
                'my_node_id': self.paxos.node_id(),
                'leader': self.paxos.current_leader,
            })

    def connect(self, addr):
        self.queue.put(('first_connection', {
            'addr': addr,
        }))