if verbose == 1: print( '\'{}\': {} matched links (total: {}), {} pages in the history.' .format(cur_url, len(matched_links), matched_link_count, len(history))) elif verbose > 1: print('{} matched links (total: {}), {} pages in the history.'. format(len(matched_links), matched_link_count, len(history))) if fname != None: cur_file = cur_url.split('/')[-1].encode(sys.stdout.encoding, errors='replace')[:197] cur_file = utility.remove_illegal_path_characters( re.sub(r'[^\x00-\x7F]+', '-', urllib2.unquote(cur_file))) with open('{}/{}.txt'.format(fname, cur_file), 'w') as outfile: for link in matched_links: outfile.write('{}\n'.format(link.href)) matched_link_count += len(matched_links) matched_links = [] else: matched_link_count = len(matched_links) return history, matched_links if __name__ == '__main__': command_line(utility.command_line_args(sys.argv))
def handle_response(self, mode='get', updated=0): response_str = self.f.readline() response = utility.command_line_args(shlex.split(response_str)) print('Received: {}'.format(response_str)) error = False if 'error' in response: error = True if 'fatal' in response: if 'reason' in response: raise FatalException( 'Fatal error occurred because: {}'.format( response['reason'])) else: raise FatalException( 'Fatal error occurred for an unknown reason.') else: if 'reason' in response: print('Error: {}'.format(reason)) else: print('Error: An unknown error has occurred.') if mode == 'update': if 'done' in response: print('{} files updated.'.format(updated)) # print('Closing socket.') self.s.shutdown(socket.SHUT_WR) self.s.close() if updated > 0: raise Exception( 'Files were updated, please restart this program.') else: if 'update' in response: receive_name = response['fname'] print('Receiving file \'{}\'.'.format(receive_name)) lines = 0 # Write out the file we received. with open(receive_name, 'w') as receive_file: while True: next_line = self.f.readline() if next_line != '<EOF>\n': receive_file.write(next_line) else: break lines += 1 print('Received {} lines.'.format(lines)) self.handle_response(mode='update', updated=updated + 1) else: if mode == 'get': self.fname = response['fname'] print('Receiving file.') lines = 0 # Write out the file we received. with open(self.work_file, 'w') as work_file: while True: next_line = self.f.readline() if next_line != '<EOF>\n': work_file.write(next_line) else: break lines += 1 print('Received {} lines.'.format(lines)) if error or mode != 'update': # print('Closing socket.') self.s.shutdown(socket.SHUT_WR) self.s.close()
ip = args.get('ip', 'localhost') port = int(args.get('port', 60000)) directory = args.get('directory', 'allpages/') temp_directory = args.get('temp_directory', 'temp/') finished_directory = args.get('finished_directory', 'completed/') target = args.get('target', 'Philosophy') mode = args.get('mode', 'new') noupdate = args.get('noupdate', False) nowrite = args.get('nowrite', False) if 'start' in args: s = WikiServer(ip, port, directory=directory, temp_directory=temp_directory, finished_directory=finished_directory, mode=mode) s.start() elif 'join' in args: client = WikiClient(ip, port, noupdate=noupdate, nowrite=nowrite, target=target) client.start() if __name__ == '__main__': command_line(utility.command_line_args())
def handle(self): print('Client connected.') client_id = '' while True: request_str = self.rfile.readline() if request_str == '': print('Client disconnected.') return request = utility.command_line_args(shlex.split(request_str)) if 'client' in request: client_id = request['client'] else: self.send('error fatal reason="no client id supplied!"') return print('Client: {}'.format(request_str)) if 'check_files' in request: if not 'checksums' in request: self.send('error reason="no checksums in request."') continue print(request['checksums']) checksums = ast.literal_eval(request['checksums']) server_checksums = map(lambda (_, path): utility.md5(path), check_files) for client_checksum, server_checksum, (client_path, server_path) in zip( checksums, server_checksums, check_files): if client_checksum != server_checksum: self.send('update fname="{}"'.format(client_path)) contents = [] with open(server_path, 'r') as f: contents = f.readlines() for line in contents: self.send(line[:-1], noprint=True) self.send('<EOF>', noprint=True) self.send('done') elif 'next_file' in request: fname = self.server.get_next_file(client_id) self.send('success fname="{}"'.format(fname)) contents = [] with open(self.server.temp_directory + fname, 'r') as f: contents = f.readlines() for line in contents: self.send(line[:-1], noprint=True) self.send('<EOF>', noprint=True) elif 'finished' in request: if 'fname' in request: fname = request['fname'] else: self.send( 'error reason="no filename included in request."') continue if 'results' in request: results = request['results'] else: self.send('error reason="no result included in request."') continue self.server.finish_file(client_id, fname, ast.literal_eval(results)) self.send('success') else: self.send('error reason="no recognized command in request."') continue
href = source while href != self.target: links = valid_links(href) for link in links: open_list[link] = href # Score each page and choose the best one for link in open_list: return def command_line(args): op = 'find' source = args.get('source', '/wiki/Special:Random') target = args.get('target', '/wiki/Philosophy') verbose = int(args.get('verbose', '1')) sample_n = int(args.get('n', '25')) depth = int(args.get('depth', '2')) f = Finder(source, target, depth, sample_n) res = f.find() print(res) if __name__ == '__main__': args = utility.command_line_args() if 'profile' in args: cProfile.run('command_line(args)', sort='tottime') else: command_line(args)