Example #1
0
        if verbose == 1:
            print(
                '\'{}\': {} matched links (total: {}), {} pages in the history.'
                .format(cur_url, len(matched_links), matched_link_count,
                        len(history)))
        elif verbose > 1:
            print('{} matched links (total: {}), {} pages in the history.'.
                  format(len(matched_links), matched_link_count, len(history)))

        if fname != None:
            cur_file = cur_url.split('/')[-1].encode(sys.stdout.encoding,
                                                     errors='replace')[:197]
            cur_file = utility.remove_illegal_path_characters(
                re.sub(r'[^\x00-\x7F]+', '-', urllib2.unquote(cur_file)))

            with open('{}/{}.txt'.format(fname, cur_file), 'w') as outfile:
                for link in matched_links:
                    outfile.write('{}\n'.format(link.href))

                matched_link_count += len(matched_links)
                matched_links = []
        else:
            matched_link_count = len(matched_links)

    return history, matched_links


if __name__ == '__main__':
    command_line(utility.command_line_args(sys.argv))
Example #2
0
    def handle_response(self, mode='get', updated=0):
        response_str = self.f.readline()
        response = utility.command_line_args(shlex.split(response_str))

        print('Received: {}'.format(response_str))

        error = False

        if 'error' in response:
            error = True
            if 'fatal' in response:
                if 'reason' in response:
                    raise FatalException(
                        'Fatal error occurred because: {}'.format(
                            response['reason']))
                else:
                    raise FatalException(
                        'Fatal error occurred for an unknown reason.')
            else:
                if 'reason' in response:
                    print('Error: {}'.format(reason))
                else:
                    print('Error: An unknown error has occurred.')

        if mode == 'update':
            if 'done' in response:
                print('{} files updated.'.format(updated))
                # print('Closing socket.')
                self.s.shutdown(socket.SHUT_WR)
                self.s.close()

                if updated > 0:
                    raise Exception(
                        'Files were updated, please restart this program.')
            else:
                if 'update' in response:
                    receive_name = response['fname']

                    print('Receiving file \'{}\'.'.format(receive_name))

                    lines = 0
                    # Write out the file we received.
                    with open(receive_name, 'w') as receive_file:
                        while True:
                            next_line = self.f.readline()

                            if next_line != '<EOF>\n':
                                receive_file.write(next_line)
                            else:
                                break

                            lines += 1

                    print('Received {} lines.'.format(lines))

                    self.handle_response(mode='update', updated=updated + 1)
        else:
            if mode == 'get':
                self.fname = response['fname']

                print('Receiving file.')

                lines = 0
                # Write out the file we received.
                with open(self.work_file, 'w') as work_file:
                    while True:
                        next_line = self.f.readline()

                        if next_line != '<EOF>\n':
                            work_file.write(next_line)
                        else:
                            break

                        lines += 1

                print('Received {} lines.'.format(lines))

        if error or mode != 'update':
            # print('Closing socket.')
            self.s.shutdown(socket.SHUT_WR)
            self.s.close()
Example #3
0
    ip = args.get('ip', 'localhost')
    port = int(args.get('port', 60000))
    directory = args.get('directory', 'allpages/')
    temp_directory = args.get('temp_directory', 'temp/')
    finished_directory = args.get('finished_directory', 'completed/')
    target = args.get('target', 'Philosophy')
    mode = args.get('mode', 'new')
    noupdate = args.get('noupdate', False)
    nowrite = args.get('nowrite', False)

    if 'start' in args:
        s = WikiServer(ip,
                       port,
                       directory=directory,
                       temp_directory=temp_directory,
                       finished_directory=finished_directory,
                       mode=mode)
        s.start()
    elif 'join' in args:
        client = WikiClient(ip,
                            port,
                            noupdate=noupdate,
                            nowrite=nowrite,
                            target=target)

        client.start()


if __name__ == '__main__':
    command_line(utility.command_line_args())
Example #4
0
    def handle(self):
        print('Client connected.')
        client_id = ''
        while True:
            request_str = self.rfile.readline()

            if request_str == '':
                print('Client disconnected.')
                return

            request = utility.command_line_args(shlex.split(request_str))

            if 'client' in request:
                client_id = request['client']
            else:
                self.send('error fatal reason="no client id supplied!"')
                return

            print('Client: {}'.format(request_str))

            if 'check_files' in request:
                if not 'checksums' in request:
                    self.send('error reason="no checksums in request."')
                    continue

                print(request['checksums'])
                checksums = ast.literal_eval(request['checksums'])
                server_checksums = map(lambda (_, path): utility.md5(path),
                                       check_files)

                for client_checksum, server_checksum, (client_path,
                                                       server_path) in zip(
                                                           checksums,
                                                           server_checksums,
                                                           check_files):
                    if client_checksum != server_checksum:
                        self.send('update fname="{}"'.format(client_path))

                        contents = []
                        with open(server_path, 'r') as f:
                            contents = f.readlines()

                        for line in contents:
                            self.send(line[:-1], noprint=True)

                        self.send('<EOF>', noprint=True)

                self.send('done')
            elif 'next_file' in request:
                fname = self.server.get_next_file(client_id)

                self.send('success fname="{}"'.format(fname))

                contents = []
                with open(self.server.temp_directory + fname, 'r') as f:
                    contents = f.readlines()

                for line in contents:
                    self.send(line[:-1], noprint=True)

                self.send('<EOF>', noprint=True)
            elif 'finished' in request:
                if 'fname' in request:
                    fname = request['fname']
                else:
                    self.send(
                        'error reason="no filename included in request."')
                    continue

                if 'results' in request:
                    results = request['results']
                else:
                    self.send('error reason="no result included in request."')
                    continue

                self.server.finish_file(client_id, fname,
                                        ast.literal_eval(results))

                self.send('success')
            else:
                self.send('error reason="no recognized command in request."')
                continue
Example #5
0
        href = source

        while href != self.target:
            links = valid_links(href)

            for link in links:
                open_list[link] = href

            # Score each page and choose the best one
            for link in open_list:
                return

def command_line(args):
    op = 'find'
    source = args.get('source', '/wiki/Special:Random')
    target = args.get('target', '/wiki/Philosophy')
    verbose = int(args.get('verbose', '1'))
    sample_n = int(args.get('n', '25'))
    depth = int(args.get('depth', '2'))

    f = Finder(source, target, depth, sample_n)
    res = f.find()
    print(res)

if __name__ == '__main__':
    args = utility.command_line_args()
    if 'profile' in args:
        cProfile.run('command_line(args)', sort='tottime')
    else:
        command_line(args)