Exemple #1
0
def map_hosts(filename):
    '''
    reads ip from file created on ip.urand0m.com and stores in memory, then cleans and rewrites it with ip per line 
    '''

    MAP_HOSTS = []

    with open(filename, 'r') as scanfile:
        split = scanfile.readlines()
        if ',' in split[0]:
            with open(filename, 'w') as scanfile:
                for ip in split[0].split(','):
                    MAP_HOSTS.append((str(ip), '443'))
                    scanfile.write("{}\n".format(ip))
            logging.info(
                "[SYSTEM] file {} has been cleaned and organised by line".
                format(filename))
        else:
            for ip in split:
                MAP_HOSTS.append((ip.rstrip(), '443'))
            logging.info(
                "[SYSTEM] file {} is already organised by line".format(
                    filename))

    return MAP_HOSTS
Exemple #2
0
    def init_global_model(self, scheduler_client):
        logging.info("initializing global model")
        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
        exe.run(self.startup_program_)
        logging.info("finish initializing global model")

        global_param_dict = self.input_model_.get_global_param_dict()
        scheduler_client.update_global_params(global_param_dict)
Exemple #3
0
    def __init__(self):
        """Initialize db class variables"""

        if not os.path.isfile(Datastore.__DB_LOCATION):
            with open(Datastore.__DB_LOCATION,"w+") as datastore:
                logging.info("[DB] created new database file: {}".format(Datastore.__DB_LOCATION))
                datastore.close()

        self.connection = sqlite3.connect(Datastore.__DB_LOCATION,check_same_thread=False)
        self.cur = self.connection.cursor()
Exemple #4
0
 def send_one_patch(self, patch, date):
     for line in patch:
         group = line.strip().split("\t")
         if len(group) != 3:
             continue
         data = data_server_pb2.Data()
         data.uid = group[0]
         data.date = date
         data.line = line.strip()
         stub_idx = self.uid_shard(data.uid)
         if stub_idx == -1:
             logging.info("send_one_patch continue for uid: %s" % data.uid)
             continue
         call_future = self.stub_list[stub_idx].SendData.future(data)
         u_num = call_future.result()
 def start(self, max_workers=1000, concurrency=100, endpoint=""):
     if endpoint == "":
         logging.info("You should specify endpoint in start function")
         return
     server = grpc.server(
         futures.ThreadPoolExecutor(max_workers=max_workers),
         options=[('grpc.max_send_message_length', 1024 * 1024 * 1024),
                  ('grpc.max_receive_message_length', 1024 * 1024 * 1024)],
         maximum_concurrent_rpcs=concurrency)
     scheduler_server_pb2_grpc.add_SchedulerServerServicer_to_server(
         SchedulerServerServicer(), server)
     # print("SchedulerServer add endpoint: ", '[::]:{}'.format(endpoint))
     server.add_insecure_port('[::]:{}'.format(endpoint))
     server.start()
     logging.info("server started")
     os.system("rm _shutdown_scheduler")
     while (not os.path.isfile("_shutdown_scheduler")):
         time.sleep(10)
Exemple #6
0
 def global_shuffle_by_patch(self, data_patch, date, concurrency):
     shuffle_time = len(data_patch) / concurrency + 1
     for i in range(shuffle_time):
         if i * concurrency >= len(data_patch):
             break
         pros = []
         end = min((i + 1) * concurrency, len(data_patch))
         patch_list = data_patch[i * concurrency:end]
         width = len(patch_list)
         for j in range(width):
             p = Process(target=self.send_one_patch,
                         args=(patch_list[j], date))
             pros.append(p)
         for p in pros:
             p.start()
         for p in pros:
             p.join()
         logging.info("shuffle round {} done.".format(i))
 def UpdateUserInstNum(self, request, context):
     shard_num = request.shard_num
     date = request.date
     if date not in self.uid_inst_num_dict:
         self.uid_inst_num_dict[date] = {}
     if date not in self.shard_id_dict:
         self.shard_id_dict[date] = {}
     for user in request.inst_nums:
         shard_id = self.uid_shard(user.uid, shard_num)
         if shard_id == -1:
             logging.info("UpdateUserInstNum continue")
             continue
         if user.uid in self.uid_inst_num_dict[date]:
             self.uid_inst_num_dict[date][user.uid] += user.inst_num
         else:
             self.uid_inst_num_dict[date][user.uid] = user.inst_num
         if shard_id not in self.shard_id_dict[date]:
             self.shard_id_dict[date][shard_id] = [user.uid]
         else:
             self.shard_id_dict[date][shard_id].append(user.uid)
     res = scheduler_server_pb2.Res()
     res.err_code = 0
     return res
Exemple #8
0
def tls_scan(host, port):

    logging.info("[TLSSCANNER] starting tls-scan for Host: {}".format(host))
    tls_scanner = "tools/tls-scan/tls-scan -b 1 --no-parallel-enum -V --cacert=tools/tls-scan/ca-bundle.crt -c {}:{} ".format(
        host, port)
    args = shlex.split(tls_scanner)
    proc = subprocess.Popen(args,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.DEVNULL)
    (output, err) = proc.communicate()

    if err:
        logging.critical("[TLSSCANNER] Unable to scan {} error: {}".format(
            host, err))
    ret_code = proc.wait()
    if ret_code == 0 and len(output) > 5:
        try:
            output = json.loads(output.decode('utf-8').strip())
            return output

        except json.decoder.JSONDecodeError as e:
            return False
    else:
        return False
Exemple #9
0
def main():
    db_session = initialize_db()
    subscribe_ses = partial(subscribe, db_session=db_session)
    unsubscribe_ses = partial(unsubscribe, db_session=db_session)
    plot_fun = partial(plot, db_session=db_session)

    dispatcher = updater.dispatcher
    # Add handler for start command
    start_handler = CommandHandler('start', start)
    dispatcher.add_handler(start_handler)
    # Commands
    logging.debug("Adding handlers.")
    for unit, commands in UNITS.items():
        get_unit = partial(get_kursi, unit=unit)
        for command in commands:
            dispatcher.add_handler(CommandHandler(command, get_unit))

    dispatcher.add_handler(CommandHandler('subscribe', subscribe_ses))
    dispatcher.add_handler(CommandHandler('unsubscribe', unsubscribe_ses))
    dispatcher.add_handler(CommandHandler('plot', plot_fun))

    # Start infinit loop to respond to requests
    logging.info('Starting polling...')
    updater.start_polling()
Exemple #10
0
def infer_one_user(arg_dict, trainer_config):
    """
    infer a model with global_param and user params
    input:
        global_param
        user_params
        infer_program
        user_data
    output:
        [sample_cout, top1] 
    """
    # run startup program, set params
    uid = arg_dict["uid"]
    batch_size = trainer_config["batch_size"]
    startup_program = fluid.Program.parse_from_string(
        trainer_config["startup_program_desc"])
    infer_program = fluid.Program.parse_from_string(
        trainer_config["infer_program_desc"])
    place = fluid.CPUPlace()
    exe = fluid.Executor(place)
    scope = fluid.global_scope()

    if (startup_program is None):
        logging.error("startup_program is None")
        exit()
    if (infer_program is None):
        logging.error("infer_program is None")
        exit()

    exe.run(startup_program)

    data_client = DataClient()
    data_client.set_data_server_endpoints(arg_dict["data_endpoints"])

    # get user param
    # logging.debug("do not need to get user params")

    set_global_param_dict(arg_dict["global_param_names"],
                          arg_dict["global_params"], scope)

    # reader
    date = arg_dict["date"]
    global_param_dict = arg_dict["global_params"]
    user_data = data_client.get_data_by_uid(uid, date)
    infer_reader = reader.infer_reader(user_data)
    infer_reader = paddle.batch(infer_reader, batch_size=batch_size)

    # run infer program
    os.mkdir(arg_dict["infer_result_dir"])
    #pred_file = open(arg_dict["infer_result_dir"] + '/' + "pred_file", "w")
    feeder = fluid.DataFeeder(feed_list=trainer_config["input_names"],
                              place=place,
                              program=infer_program)

    fetch_list = trainer_config["target_names"]
    #logging.info("fetch_list: {}".format(fetch_list))
    fetch_res = []
    sample_count = 0

    num_layers = trainer_config["num_layers"]
    hidden_size = trainer_config["n_hidden"]
    tot_correct, tot_loss = 0, 0
    tot_samples, tot_batches = 0, 0
    init_hidden, init_cell = generate_init_data(batch_size, num_layers,
                                                hidden_size)
    for data in infer_reader():
        feed_data, input_lengths = prepare_input(batch_size, data, init_hidden,
                                                 init_cell)
        fetch_res = exe.run(infer_program,
                            feed=feeder.feed(feed_data),
                            fetch_list=fetch_list)
        loss, last_hidden, last_cell, correct = fetch_res

        cost_eval = np.array(loss)
        init_hidden = np.array(last_hidden)
        init_cell = np.array(last_cell)
        correct_val = np.array(correct)
        tot_loss += cost_eval
        tot_correct += correct_val
        tot_samples += np.sum(input_lengths)
        tot_batches += 1

    loss = tot_loss / tot_batches
    acc = float(tot_correct) / tot_samples
    logging.info("infer acc: {}".format(acc))
    with open(arg_dict["infer_result_dir"] + "/res", "w") as f:
        f.write("%d\t%f\n" % (1, acc))
Exemple #11
0
 def insert(self,id,host,port,tls_version,secure_renegotiation,tls_supported,issuer,subject,subjectCN,subjectAltName,pubkeysize,expired,notAfter,cert_valid,signatureAlg):
     
     insertdb = 'INSERT INTO certgrabbers VALUES ("{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}")'.format(id,host,port,tls_version,secure_renegotiation,tls_supported,issuer,subject,subjectCN,subjectAltName,pubkeysize,expired,notAfter,cert_valid,signatureAlg)
     logging.info("[DB] Inserted to DB: {}".format(host))
     self.cur.execute(insertdb)
Exemple #12
0
def callback_results(host, scan_results):
    '''
    Where all the magic happens, checks results from nmap_scan and stores information in db when port open
    '''
    nmap = scan_results['nmap']
    port = nmap['scaninfo']['tcp']['services']
    scan = scan_results['scan']
    state = scan[host]['tcp'][int(port)]['state']

    if state == 'open':
        logging.info('[NMAP-{}] Host {} open!'.format(port, host))

        tls_scanner_output = tls_scan(host, port)

        if tls_scanner_output:

            tls_version = tls_scanner_output['tlsVersion']
            secure_renegotiation = tls_scanner_output['secureRenego']
            tls_supported = tls_scanner_output['tlsVersions']
            for certchain in range(len(
                    tls_scanner_output['certificateChain'])):
                if certchain == 0:
                    try:
                        subjectCN = tls_scanner_output['certificateChain'][0][
                            'subjectCN']
                    except KeyError as e:
                        if 'subjectCN' in str(e):
                            subjectCN = "None"
                    try:
                        subjectAltName = tls_scanner_output[
                            'certificateChain'][0]['subjectAltName']
                    except KeyError as e:
                        if 'subjectAltName' in str(e):
                            subjectAltName = "None"
                    try:
                        pubkeysize = tls_scanner_output['certificateChain'][0][
                            'publicKeySize']
                    except KeyError as e:
                        if 'pubkeysize' in str(e):
                            pubkeysize = "None"
                    try:
                        expired = tls_scanner_output['certificateChain'][0][
                            'expired']
                    except KeyError as e:
                        if 'expired' in str(e):
                            expired = "None"

                    try:
                        cert_valid = tls_scanner_output['verifyCertResult']
                    except KeyError as e:
                        if 'cert_valid' in str(e):
                            cert_valid = "None"

                    try:
                        subject = tls_scanner_output['certificateChain'][0][
                            'subject']
                    except KeyError as e:
                        if 'subject' in str(e):
                            subject = "None"
                    try:
                        issuer = tls_scanner_output['certificateChain'][0][
                            'issuer']
                    except KeyError as e:
                        if 'issuer' in str(e):
                            issuer = "None"
                    try:
                        notAfter = tls_scanner_output['certificateChain'][0][
                            'notAfter']
                    except KeyError as e:
                        if 'notAfter' in str(e):
                            notAfter = "None"
                    try:
                        signatureAlg = tls_scanner_output['certificateChain'][
                            0]['signatureAlg']
                    except KeyError as e:
                        if 'signatureAlg' in str(e):
                            signatureAlg = "None"

                id = len(db.getdata().fetchall())
                id += 1
                logging.info(
                    "[COUNTER] There are other {} hosts to complete".format(
                        counter))
                db.insert(id, host, port, tls_version, secure_renegotiation,
                          tls_supported, issuer, subject, subjectCN,
                          subjectAltName, pubkeysize, expired, notAfter,
                          cert_valid, signatureAlg)
                db.commit()
        else:
            with open('failed-hosts.txt', 'a') as failed:
                logging.critical(
                    "[TLSSCANNER] check the host {} results of report was empty. Logged host to file failed-hosts.txt"
                    .format(host))
                failed.write("{}\n".format(host))
        'pad_symbol': vocab['<PAD>']
    }


def save_vocab(filename, vocab):
    with open(filename, "w") as f:
        f.write(json.dumps(vocab))


def load_vocab(filename):
    with open(filename) as f:
        return json.loads(f.read())


if os.path.exists(VOCAB_PATH):
    logging.info("load vocab form: {}".format(VOCAB_PATH))
    VOCAB = load_vocab(VOCAB_PATH)
else:
    #TODO: singleton
    logging.info("build vocab form: {}".format(TRAIN_DATA_PATH))
    VOCAB = build_vocab(TRAIN_DATA_PATH)
    logging.info("save vocab into: {}".format(VOCAB_PATH))
    save_vocab(VOCAB_PATH, VOCAB)
if VOCAB is None:
    logging.error("load vocab error")
    raise Exception("load vocab error")


def train_reader(lines):
    def local_iter():
        seg_id = 0
Exemple #14
0
if __name__ == '__main__':
    if os.getuid() != 0:
        logging.critical("You need to run as root user!")
        sys.exit()

    if not which('nmap'):
        logging.critical(
            "no nmap binary found in path. (Examples - MacOS: brew install nmap, Linux: apt-get install nmap"
        )
        sys.exit()

    MAP = map_hosts(sys.argv[1])
    if os.path.exists('failed-hosts.txt'):
        os.remove('failed-hosts.txt')
        logging.info(
            "File failed-hosts.txt from previous scan has been removed")

    counter = len(MAP)
    logging.info("[SCANNER] Total Number of IP in list: {}".format(counter))

    try:
        for ip, port in MAP:
            counter -= 1
            pool.add_task(nmap_scan, ip, port)

    except KeyboardInterrupt:
        print(
            "[SYSTEM] Pressed CTRL+C gracefully exiting from the application..."
        )
        pool.wait_completion()
Exemple #15
0
 def exists(self):
     check = self.execute("SELECT count(*) FROM sqlite_master WHERE type='table' AND name='gggitrepos';")
     logging.info("[DB] {} already exists.".format(self.__DB_LOCATION))
     return check.fetchone()[0]
Exemple #16
0
 def create_table(self):
     """create a database table if it does not exist already"""
     self.cur.execute('''CREATE TABLE IF NOT EXISTS certgrabbers(id integer,host,port,tls_version,secure_renegotiation,tls_supported,issuer,subject,subjectCN,subjectAltName,pubkeysize,expired,notAfter,cert_valid,signatureAlg)''')
     logging.info("[DB] Created: certgrabbers")
Exemple #17
0
    def run_simulation(self,
                       base_path,
                       dates,
                       fs_upload_path=None,
                       sim_num_everyday=1,
                       do_test=False,
                       test_skip_day=6):
        if not self.role_maker.is_simulator():
            pass
        data_services = self._get_data_services()
        for service in data_services:
            service.start()
        self._barrier_simulators()
        self.data_client = DataClient()
        self.data_client.set_load_data_into_patch_func(
            self.trainer.get_load_data_into_patch_func())
        self.data_client.set_data_server_endpoints(
            self.role_maker.get_data_server_endpoints())
        self.scheduler_client = SchedulerClient()
        self.scheduler_client.set_data_server_endpoints(
            self.role_maker.get_data_server_endpoints())
        self.scheduler_client.set_scheduler_server_endpoints(
            [self.role_maker.get_global_scheduler_endpoint()])
        logging.info("trainer config: ", self.trainer.trainer_config)
        self.trainer.prepare(do_test=do_test)

        if self.role_maker.simulator_idx() == 0:
            self.trainer.init_global_model(self.scheduler_client)
        self._barrier_simulators()

        for date_idx, date in enumerate(dates):
            if date_idx > 0:
                self.do_profile = False
                self.profile_file.close()
            logging.info("reading data for date: %s" % date)
            local_files = self._profile(
                self.data_client.get_local_files,
                base_path,
                date,
                self.role_maker.simulator_idx(),
                self.role_maker.simulator_num(),
                hdfs_configs=self.hdfs_configs)

            logging.info("loading data into patch for date: %s" % date)
            data_patch, local_user_dict = self._profile(
                self.data_client.load_data_into_patch, local_files, 10000)
            logging.info("shuffling data for date: %s" % date)
            self._profile(self.data_client.global_shuffle_by_patch, data_patch,
                          date, 30)

            logging.info("updating user inst num for date: %s" % date)
            self._profile(self.scheduler_client.update_user_inst_num, date,
                          local_user_dict)
            self.role_maker.barrier_simulator()

            if do_test and date_idx != 0 and date_idx % test_skip_day == 0:
                self._barrier_simulators()
                self._profile(self._test, date)
                self._barrier_simulators()
                self._profile(self._save_and_upload, date, fs_upload_path)

            self._run_sim(date, sim_num_everyday=sim_num_everyday)
            self.role_maker.barrier_simulator()
            logging.info("clear user data for date: %s" % date)
            self.data_client.clear_user_data(date)

        self._barrier_simulators()
        logging.info("training done all date.")
        logging.info("stoping scheduler")
        self.scheduler_client.stop_scheduler_server()
        for pro in data_services:
            pro.terminate()
        logging.info("after terminate for all server.")
Exemple #18
0
    def _test(self, date):
        if self.trainer.infer_one_user_func is None:
            pass
        logging.info("doing test...")
        if self.test_sampler is None:
            logging.error("self.test_sampler should not be None when testing")

        sim_idx = self.role_maker.simulator_idx()
        sim_num = self.role_maker.simulator_num()
        user_info_dict = self.test_sampler.sample_user_list(
            self.scheduler_client,
            date,
            sim_idx,
            len(self.data_client.stub_list),
            sim_num, )
        if self.do_profile:
            print("test user info_dict: ", user_info_dict)
        global_param_dict = self.scheduler_client.get_global_params()

        def divide_chunks(l, n):
            for i in range(0, len(l), n):
                yield l[i:i + n]

        # at most 50 process for testing
        chunk_size = 50
        # at most 100 uid for testing
        max_test_uids = 100
        uid_chunks = divide_chunks(user_info_dict.keys(), chunk_size)
        os.system("rm -rf _test_result")
        os.system("mkdir _test_result")

        tested_uids = 0
        for uids in uid_chunks:
            if tested_uids >= max_test_uids:
                break
            processes = []
            for user in uids:
                arg_dict = {
                    "uid": str(user),
                    "date": date,
                    "data_endpoints":
                    self.role_maker.get_data_server_endpoints(),
                    "global_params": global_param_dict,
                    "user_param_names": self.trainer.get_user_param_names(),
                    "global_param_names":
                    self.trainer.get_global_param_names(),
                    "infer_result_dir": "_test_result/uid-%s" % user,
                }
                p = Process(
                    target=self.trainer.infer_one_user_func,
                    args=(arg_dict, self.trainer.trainer_config))
                p.start()
                processes.append(p)
            if self.do_profile:
                logging.info("wait test processes to close")
            for i, p in enumerate(processes):
                processes[i].join()
            tested_uids += chunk_size

        infer_results = []
        # only support one test metric now
        for uid in os.listdir("_test_result"):
            with open("_test_result/" + uid + "/res", 'r') as f:
                sample_cout, metric = f.readlines()[0].strip('\n').split('\t')
                infer_results.append((int(sample_cout), float(metric)))
        if sum([x[0] for x in infer_results]) == 0:
            logging.info("infer results: 0.0")
        else:
            count = sum([x[0] for x in infer_results])
            metric = sum([x[0] * x[1] for x in infer_results]) / count
            logging.info("infer results: %f" % metric)
Exemple #19
0
    def _run_sim(self, date, sim_num_everyday=1):
        sim_idx = self.role_maker.simulator_idx()
        sim_num = self.role_maker.simulator_num()
        sim_all_trainer_run_time = 0
        sim_read_praram_and_optimize = 0
        for sim in range(sim_num_everyday):
            logging.info("sim id: %d" % sim)
            # sampler algorithm
            user_info_dict = self._profile(
                self.sampler.sample_user_list, self.scheduler_client, date,
                sim_idx, len(self.data_client.stub_list), sim_num)

            if self.do_profile:
                print("sim_idx: ", sim_idx)
                print("shard num: ", len(self.data_client.stub_list))
                print("sim_num: ", sim_num)
                print("user_info_dict: ", user_info_dict)

            global_param_dict = self._profile(
                self.scheduler_client.get_global_params)
            processes = []
            os.system("rm -rf _global_param")
            os.system("mkdir _global_param")
            start = time.time()
            for idx, user in enumerate(user_info_dict):
                arg_dict = {
                    "uid": str(user),
                    "date": date,
                    "data_endpoints":
                    self.role_maker.get_data_server_endpoints(),
                    "global_params": global_param_dict,
                    "user_param_names": self.trainer.get_user_param_names(),
                    "global_param_names":
                    self.trainer.get_global_param_names(),
                    "write_global_param_file":
                    "_global_param/process_%d" % idx,
                }
                p = Process(
                    target=self.trainer.train_one_user_func,
                    args=(arg_dict, self.trainer.trainer_config))
                p.start()
                processes.append(p)
            if self.do_profile:
                logging.info("wait processes to close")
            for i, p in enumerate(processes):
                processes[i].join()
            end = time.time()
            sim_all_trainer_run_time += (end - start)

            start = time.time()
            train_result = []
            new_global_param_by_user = {}
            training_sample_by_user = {}
            for i, p in enumerate(processes):
                param_dir = "_global_param/process_%d/" % i
                with open(param_dir + "/_info", "r") as f:
                    user, train_sample_num = pickle.load(f)
                param_dict = {}
                for f_name in os.listdir(os.path.join(param_dir, "params")):
                    f_path = os.path.join(param_dir, "params", f_name)
                    if os.path.isdir(f_path):  # layer
                        for layer_param in os.listdir(f_path):
                            layer_param_path = os.path.join(f_path,
                                                            layer_param)
                            with open(layer_param_path) as f:
                                param_dict["{}/{}".format(
                                    f_name, layer_param)] = np.load(f)
                    else:
                        with open(f_path) as f:
                            param_dict[f_name] = np.load(f)
                new_global_param_by_user[user] = param_dict
                training_sample_by_user[user] = train_sample_num

            self.fl_optimizer.update(training_sample_by_user,
                                     new_global_param_by_user,
                                     global_param_dict, self.scheduler_client)
            end = time.time()
            sim_read_praram_and_optimize += (end - start)
        if self.do_profile:
            self.profile_file.write("sim_all_trainer_run_time\t\t%f s\n" %
                                    sim_all_trainer_run_time)
            self.profile_file.write("sim_read_praram_and_optimize\t\t%f s\n" %
                                    sim_read_praram_and_optimize)

        logging.info("training done for date %s." % date)
Exemple #20
0
def save_and_upload(arg_dict, trainer_config, dfs_upload_path):
    logging.info("do not save and upload...")
    return
Exemple #21
0
 def removedb(self):
     logging.info("[DB] {} successfully removed".format(self.__DB_LOCATION))
     os.remove(Datastore.__DB_LOCATION)