def map_hosts(filename): ''' reads ip from file created on ip.urand0m.com and stores in memory, then cleans and rewrites it with ip per line ''' MAP_HOSTS = [] with open(filename, 'r') as scanfile: split = scanfile.readlines() if ',' in split[0]: with open(filename, 'w') as scanfile: for ip in split[0].split(','): MAP_HOSTS.append((str(ip), '443')) scanfile.write("{}\n".format(ip)) logging.info( "[SYSTEM] file {} has been cleaned and organised by line". format(filename)) else: for ip in split: MAP_HOSTS.append((ip.rstrip(), '443')) logging.info( "[SYSTEM] file {} is already organised by line".format( filename)) return MAP_HOSTS
def init_global_model(self, scheduler_client): logging.info("initializing global model") place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(self.startup_program_) logging.info("finish initializing global model") global_param_dict = self.input_model_.get_global_param_dict() scheduler_client.update_global_params(global_param_dict)
def __init__(self): """Initialize db class variables""" if not os.path.isfile(Datastore.__DB_LOCATION): with open(Datastore.__DB_LOCATION,"w+") as datastore: logging.info("[DB] created new database file: {}".format(Datastore.__DB_LOCATION)) datastore.close() self.connection = sqlite3.connect(Datastore.__DB_LOCATION,check_same_thread=False) self.cur = self.connection.cursor()
def send_one_patch(self, patch, date): for line in patch: group = line.strip().split("\t") if len(group) != 3: continue data = data_server_pb2.Data() data.uid = group[0] data.date = date data.line = line.strip() stub_idx = self.uid_shard(data.uid) if stub_idx == -1: logging.info("send_one_patch continue for uid: %s" % data.uid) continue call_future = self.stub_list[stub_idx].SendData.future(data) u_num = call_future.result()
def start(self, max_workers=1000, concurrency=100, endpoint=""): if endpoint == "": logging.info("You should specify endpoint in start function") return server = grpc.server( futures.ThreadPoolExecutor(max_workers=max_workers), options=[('grpc.max_send_message_length', 1024 * 1024 * 1024), ('grpc.max_receive_message_length', 1024 * 1024 * 1024)], maximum_concurrent_rpcs=concurrency) scheduler_server_pb2_grpc.add_SchedulerServerServicer_to_server( SchedulerServerServicer(), server) # print("SchedulerServer add endpoint: ", '[::]:{}'.format(endpoint)) server.add_insecure_port('[::]:{}'.format(endpoint)) server.start() logging.info("server started") os.system("rm _shutdown_scheduler") while (not os.path.isfile("_shutdown_scheduler")): time.sleep(10)
def global_shuffle_by_patch(self, data_patch, date, concurrency): shuffle_time = len(data_patch) / concurrency + 1 for i in range(shuffle_time): if i * concurrency >= len(data_patch): break pros = [] end = min((i + 1) * concurrency, len(data_patch)) patch_list = data_patch[i * concurrency:end] width = len(patch_list) for j in range(width): p = Process(target=self.send_one_patch, args=(patch_list[j], date)) pros.append(p) for p in pros: p.start() for p in pros: p.join() logging.info("shuffle round {} done.".format(i))
def UpdateUserInstNum(self, request, context): shard_num = request.shard_num date = request.date if date not in self.uid_inst_num_dict: self.uid_inst_num_dict[date] = {} if date not in self.shard_id_dict: self.shard_id_dict[date] = {} for user in request.inst_nums: shard_id = self.uid_shard(user.uid, shard_num) if shard_id == -1: logging.info("UpdateUserInstNum continue") continue if user.uid in self.uid_inst_num_dict[date]: self.uid_inst_num_dict[date][user.uid] += user.inst_num else: self.uid_inst_num_dict[date][user.uid] = user.inst_num if shard_id not in self.shard_id_dict[date]: self.shard_id_dict[date][shard_id] = [user.uid] else: self.shard_id_dict[date][shard_id].append(user.uid) res = scheduler_server_pb2.Res() res.err_code = 0 return res
def tls_scan(host, port): logging.info("[TLSSCANNER] starting tls-scan for Host: {}".format(host)) tls_scanner = "tools/tls-scan/tls-scan -b 1 --no-parallel-enum -V --cacert=tools/tls-scan/ca-bundle.crt -c {}:{} ".format( host, port) args = shlex.split(tls_scanner) proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) (output, err) = proc.communicate() if err: logging.critical("[TLSSCANNER] Unable to scan {} error: {}".format( host, err)) ret_code = proc.wait() if ret_code == 0 and len(output) > 5: try: output = json.loads(output.decode('utf-8').strip()) return output except json.decoder.JSONDecodeError as e: return False else: return False
def main(): db_session = initialize_db() subscribe_ses = partial(subscribe, db_session=db_session) unsubscribe_ses = partial(unsubscribe, db_session=db_session) plot_fun = partial(plot, db_session=db_session) dispatcher = updater.dispatcher # Add handler for start command start_handler = CommandHandler('start', start) dispatcher.add_handler(start_handler) # Commands logging.debug("Adding handlers.") for unit, commands in UNITS.items(): get_unit = partial(get_kursi, unit=unit) for command in commands: dispatcher.add_handler(CommandHandler(command, get_unit)) dispatcher.add_handler(CommandHandler('subscribe', subscribe_ses)) dispatcher.add_handler(CommandHandler('unsubscribe', unsubscribe_ses)) dispatcher.add_handler(CommandHandler('plot', plot_fun)) # Start infinit loop to respond to requests logging.info('Starting polling...') updater.start_polling()
def infer_one_user(arg_dict, trainer_config): """ infer a model with global_param and user params input: global_param user_params infer_program user_data output: [sample_cout, top1] """ # run startup program, set params uid = arg_dict["uid"] batch_size = trainer_config["batch_size"] startup_program = fluid.Program.parse_from_string( trainer_config["startup_program_desc"]) infer_program = fluid.Program.parse_from_string( trainer_config["infer_program_desc"]) place = fluid.CPUPlace() exe = fluid.Executor(place) scope = fluid.global_scope() if (startup_program is None): logging.error("startup_program is None") exit() if (infer_program is None): logging.error("infer_program is None") exit() exe.run(startup_program) data_client = DataClient() data_client.set_data_server_endpoints(arg_dict["data_endpoints"]) # get user param # logging.debug("do not need to get user params") set_global_param_dict(arg_dict["global_param_names"], arg_dict["global_params"], scope) # reader date = arg_dict["date"] global_param_dict = arg_dict["global_params"] user_data = data_client.get_data_by_uid(uid, date) infer_reader = reader.infer_reader(user_data) infer_reader = paddle.batch(infer_reader, batch_size=batch_size) # run infer program os.mkdir(arg_dict["infer_result_dir"]) #pred_file = open(arg_dict["infer_result_dir"] + '/' + "pred_file", "w") feeder = fluid.DataFeeder(feed_list=trainer_config["input_names"], place=place, program=infer_program) fetch_list = trainer_config["target_names"] #logging.info("fetch_list: {}".format(fetch_list)) fetch_res = [] sample_count = 0 num_layers = trainer_config["num_layers"] hidden_size = trainer_config["n_hidden"] tot_correct, tot_loss = 0, 0 tot_samples, tot_batches = 0, 0 init_hidden, init_cell = generate_init_data(batch_size, num_layers, hidden_size) for data in infer_reader(): feed_data, input_lengths = prepare_input(batch_size, data, init_hidden, init_cell) fetch_res = exe.run(infer_program, feed=feeder.feed(feed_data), fetch_list=fetch_list) loss, last_hidden, last_cell, correct = fetch_res cost_eval = np.array(loss) init_hidden = np.array(last_hidden) init_cell = np.array(last_cell) correct_val = np.array(correct) tot_loss += cost_eval tot_correct += correct_val tot_samples += np.sum(input_lengths) tot_batches += 1 loss = tot_loss / tot_batches acc = float(tot_correct) / tot_samples logging.info("infer acc: {}".format(acc)) with open(arg_dict["infer_result_dir"] + "/res", "w") as f: f.write("%d\t%f\n" % (1, acc))
def insert(self,id,host,port,tls_version,secure_renegotiation,tls_supported,issuer,subject,subjectCN,subjectAltName,pubkeysize,expired,notAfter,cert_valid,signatureAlg): insertdb = 'INSERT INTO certgrabbers VALUES ("{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}")'.format(id,host,port,tls_version,secure_renegotiation,tls_supported,issuer,subject,subjectCN,subjectAltName,pubkeysize,expired,notAfter,cert_valid,signatureAlg) logging.info("[DB] Inserted to DB: {}".format(host)) self.cur.execute(insertdb)
def callback_results(host, scan_results): ''' Where all the magic happens, checks results from nmap_scan and stores information in db when port open ''' nmap = scan_results['nmap'] port = nmap['scaninfo']['tcp']['services'] scan = scan_results['scan'] state = scan[host]['tcp'][int(port)]['state'] if state == 'open': logging.info('[NMAP-{}] Host {} open!'.format(port, host)) tls_scanner_output = tls_scan(host, port) if tls_scanner_output: tls_version = tls_scanner_output['tlsVersion'] secure_renegotiation = tls_scanner_output['secureRenego'] tls_supported = tls_scanner_output['tlsVersions'] for certchain in range(len( tls_scanner_output['certificateChain'])): if certchain == 0: try: subjectCN = tls_scanner_output['certificateChain'][0][ 'subjectCN'] except KeyError as e: if 'subjectCN' in str(e): subjectCN = "None" try: subjectAltName = tls_scanner_output[ 'certificateChain'][0]['subjectAltName'] except KeyError as e: if 'subjectAltName' in str(e): subjectAltName = "None" try: pubkeysize = tls_scanner_output['certificateChain'][0][ 'publicKeySize'] except KeyError as e: if 'pubkeysize' in str(e): pubkeysize = "None" try: expired = tls_scanner_output['certificateChain'][0][ 'expired'] except KeyError as e: if 'expired' in str(e): expired = "None" try: cert_valid = tls_scanner_output['verifyCertResult'] except KeyError as e: if 'cert_valid' in str(e): cert_valid = "None" try: subject = tls_scanner_output['certificateChain'][0][ 'subject'] except KeyError as e: if 'subject' in str(e): subject = "None" try: issuer = tls_scanner_output['certificateChain'][0][ 'issuer'] except KeyError as e: if 'issuer' in str(e): issuer = "None" try: notAfter = tls_scanner_output['certificateChain'][0][ 'notAfter'] except KeyError as e: if 'notAfter' in str(e): notAfter = "None" try: signatureAlg = tls_scanner_output['certificateChain'][ 0]['signatureAlg'] except KeyError as e: if 'signatureAlg' in str(e): signatureAlg = "None" id = len(db.getdata().fetchall()) id += 1 logging.info( "[COUNTER] There are other {} hosts to complete".format( counter)) db.insert(id, host, port, tls_version, secure_renegotiation, tls_supported, issuer, subject, subjectCN, subjectAltName, pubkeysize, expired, notAfter, cert_valid, signatureAlg) db.commit() else: with open('failed-hosts.txt', 'a') as failed: logging.critical( "[TLSSCANNER] check the host {} results of report was empty. Logged host to file failed-hosts.txt" .format(host)) failed.write("{}\n".format(host))
'pad_symbol': vocab['<PAD>'] } def save_vocab(filename, vocab): with open(filename, "w") as f: f.write(json.dumps(vocab)) def load_vocab(filename): with open(filename) as f: return json.loads(f.read()) if os.path.exists(VOCAB_PATH): logging.info("load vocab form: {}".format(VOCAB_PATH)) VOCAB = load_vocab(VOCAB_PATH) else: #TODO: singleton logging.info("build vocab form: {}".format(TRAIN_DATA_PATH)) VOCAB = build_vocab(TRAIN_DATA_PATH) logging.info("save vocab into: {}".format(VOCAB_PATH)) save_vocab(VOCAB_PATH, VOCAB) if VOCAB is None: logging.error("load vocab error") raise Exception("load vocab error") def train_reader(lines): def local_iter(): seg_id = 0
if __name__ == '__main__': if os.getuid() != 0: logging.critical("You need to run as root user!") sys.exit() if not which('nmap'): logging.critical( "no nmap binary found in path. (Examples - MacOS: brew install nmap, Linux: apt-get install nmap" ) sys.exit() MAP = map_hosts(sys.argv[1]) if os.path.exists('failed-hosts.txt'): os.remove('failed-hosts.txt') logging.info( "File failed-hosts.txt from previous scan has been removed") counter = len(MAP) logging.info("[SCANNER] Total Number of IP in list: {}".format(counter)) try: for ip, port in MAP: counter -= 1 pool.add_task(nmap_scan, ip, port) except KeyboardInterrupt: print( "[SYSTEM] Pressed CTRL+C gracefully exiting from the application..." ) pool.wait_completion()
def exists(self): check = self.execute("SELECT count(*) FROM sqlite_master WHERE type='table' AND name='gggitrepos';") logging.info("[DB] {} already exists.".format(self.__DB_LOCATION)) return check.fetchone()[0]
def create_table(self): """create a database table if it does not exist already""" self.cur.execute('''CREATE TABLE IF NOT EXISTS certgrabbers(id integer,host,port,tls_version,secure_renegotiation,tls_supported,issuer,subject,subjectCN,subjectAltName,pubkeysize,expired,notAfter,cert_valid,signatureAlg)''') logging.info("[DB] Created: certgrabbers")
def run_simulation(self, base_path, dates, fs_upload_path=None, sim_num_everyday=1, do_test=False, test_skip_day=6): if not self.role_maker.is_simulator(): pass data_services = self._get_data_services() for service in data_services: service.start() self._barrier_simulators() self.data_client = DataClient() self.data_client.set_load_data_into_patch_func( self.trainer.get_load_data_into_patch_func()) self.data_client.set_data_server_endpoints( self.role_maker.get_data_server_endpoints()) self.scheduler_client = SchedulerClient() self.scheduler_client.set_data_server_endpoints( self.role_maker.get_data_server_endpoints()) self.scheduler_client.set_scheduler_server_endpoints( [self.role_maker.get_global_scheduler_endpoint()]) logging.info("trainer config: ", self.trainer.trainer_config) self.trainer.prepare(do_test=do_test) if self.role_maker.simulator_idx() == 0: self.trainer.init_global_model(self.scheduler_client) self._barrier_simulators() for date_idx, date in enumerate(dates): if date_idx > 0: self.do_profile = False self.profile_file.close() logging.info("reading data for date: %s" % date) local_files = self._profile( self.data_client.get_local_files, base_path, date, self.role_maker.simulator_idx(), self.role_maker.simulator_num(), hdfs_configs=self.hdfs_configs) logging.info("loading data into patch for date: %s" % date) data_patch, local_user_dict = self._profile( self.data_client.load_data_into_patch, local_files, 10000) logging.info("shuffling data for date: %s" % date) self._profile(self.data_client.global_shuffle_by_patch, data_patch, date, 30) logging.info("updating user inst num for date: %s" % date) self._profile(self.scheduler_client.update_user_inst_num, date, local_user_dict) self.role_maker.barrier_simulator() if do_test and date_idx != 0 and date_idx % test_skip_day == 0: self._barrier_simulators() self._profile(self._test, date) self._barrier_simulators() self._profile(self._save_and_upload, date, fs_upload_path) self._run_sim(date, sim_num_everyday=sim_num_everyday) self.role_maker.barrier_simulator() logging.info("clear user data for date: %s" % date) self.data_client.clear_user_data(date) self._barrier_simulators() logging.info("training done all date.") logging.info("stoping scheduler") self.scheduler_client.stop_scheduler_server() for pro in data_services: pro.terminate() logging.info("after terminate for all server.")
def _test(self, date): if self.trainer.infer_one_user_func is None: pass logging.info("doing test...") if self.test_sampler is None: logging.error("self.test_sampler should not be None when testing") sim_idx = self.role_maker.simulator_idx() sim_num = self.role_maker.simulator_num() user_info_dict = self.test_sampler.sample_user_list( self.scheduler_client, date, sim_idx, len(self.data_client.stub_list), sim_num, ) if self.do_profile: print("test user info_dict: ", user_info_dict) global_param_dict = self.scheduler_client.get_global_params() def divide_chunks(l, n): for i in range(0, len(l), n): yield l[i:i + n] # at most 50 process for testing chunk_size = 50 # at most 100 uid for testing max_test_uids = 100 uid_chunks = divide_chunks(user_info_dict.keys(), chunk_size) os.system("rm -rf _test_result") os.system("mkdir _test_result") tested_uids = 0 for uids in uid_chunks: if tested_uids >= max_test_uids: break processes = [] for user in uids: arg_dict = { "uid": str(user), "date": date, "data_endpoints": self.role_maker.get_data_server_endpoints(), "global_params": global_param_dict, "user_param_names": self.trainer.get_user_param_names(), "global_param_names": self.trainer.get_global_param_names(), "infer_result_dir": "_test_result/uid-%s" % user, } p = Process( target=self.trainer.infer_one_user_func, args=(arg_dict, self.trainer.trainer_config)) p.start() processes.append(p) if self.do_profile: logging.info("wait test processes to close") for i, p in enumerate(processes): processes[i].join() tested_uids += chunk_size infer_results = [] # only support one test metric now for uid in os.listdir("_test_result"): with open("_test_result/" + uid + "/res", 'r') as f: sample_cout, metric = f.readlines()[0].strip('\n').split('\t') infer_results.append((int(sample_cout), float(metric))) if sum([x[0] for x in infer_results]) == 0: logging.info("infer results: 0.0") else: count = sum([x[0] for x in infer_results]) metric = sum([x[0] * x[1] for x in infer_results]) / count logging.info("infer results: %f" % metric)
def _run_sim(self, date, sim_num_everyday=1): sim_idx = self.role_maker.simulator_idx() sim_num = self.role_maker.simulator_num() sim_all_trainer_run_time = 0 sim_read_praram_and_optimize = 0 for sim in range(sim_num_everyday): logging.info("sim id: %d" % sim) # sampler algorithm user_info_dict = self._profile( self.sampler.sample_user_list, self.scheduler_client, date, sim_idx, len(self.data_client.stub_list), sim_num) if self.do_profile: print("sim_idx: ", sim_idx) print("shard num: ", len(self.data_client.stub_list)) print("sim_num: ", sim_num) print("user_info_dict: ", user_info_dict) global_param_dict = self._profile( self.scheduler_client.get_global_params) processes = [] os.system("rm -rf _global_param") os.system("mkdir _global_param") start = time.time() for idx, user in enumerate(user_info_dict): arg_dict = { "uid": str(user), "date": date, "data_endpoints": self.role_maker.get_data_server_endpoints(), "global_params": global_param_dict, "user_param_names": self.trainer.get_user_param_names(), "global_param_names": self.trainer.get_global_param_names(), "write_global_param_file": "_global_param/process_%d" % idx, } p = Process( target=self.trainer.train_one_user_func, args=(arg_dict, self.trainer.trainer_config)) p.start() processes.append(p) if self.do_profile: logging.info("wait processes to close") for i, p in enumerate(processes): processes[i].join() end = time.time() sim_all_trainer_run_time += (end - start) start = time.time() train_result = [] new_global_param_by_user = {} training_sample_by_user = {} for i, p in enumerate(processes): param_dir = "_global_param/process_%d/" % i with open(param_dir + "/_info", "r") as f: user, train_sample_num = pickle.load(f) param_dict = {} for f_name in os.listdir(os.path.join(param_dir, "params")): f_path = os.path.join(param_dir, "params", f_name) if os.path.isdir(f_path): # layer for layer_param in os.listdir(f_path): layer_param_path = os.path.join(f_path, layer_param) with open(layer_param_path) as f: param_dict["{}/{}".format( f_name, layer_param)] = np.load(f) else: with open(f_path) as f: param_dict[f_name] = np.load(f) new_global_param_by_user[user] = param_dict training_sample_by_user[user] = train_sample_num self.fl_optimizer.update(training_sample_by_user, new_global_param_by_user, global_param_dict, self.scheduler_client) end = time.time() sim_read_praram_and_optimize += (end - start) if self.do_profile: self.profile_file.write("sim_all_trainer_run_time\t\t%f s\n" % sim_all_trainer_run_time) self.profile_file.write("sim_read_praram_and_optimize\t\t%f s\n" % sim_read_praram_and_optimize) logging.info("training done for date %s." % date)
def save_and_upload(arg_dict, trainer_config, dfs_upload_path): logging.info("do not save and upload...") return
def removedb(self): logging.info("[DB] {} successfully removed".format(self.__DB_LOCATION)) os.remove(Datastore.__DB_LOCATION)