def __init__(self, cuckoo_analysis_dir=None, report_dir_name=None, report_dir_path=None, report_file_name=None): self.load_args(report_dir_name, report_dir_path, report_file_name) self.prep_data = PrepareData(self.args, cuckoo_analysis_dir=cuckoo_analysis_dir) self.app = None
def __init__(self, task_ids=None, cuckoo_analysis_dir=None, report_dir_name=None, report_dir_path=None, report_file_name=None): self.task_ids = task_ids self.load_args(task_ids, report_dir_name, report_dir_path, report_file_name) self.prep_data = PrepareData(self.args, cuckoo_analysis_dir=cuckoo_analysis_dir)
def load_dataset(args, cuda): if 'prep_path' in config_params and config_params['prep_path'] is not None: sys.path.insert(0, config_params['prep_path']) print('*** prep path', config_params['prep_path']) from prep_data_n import PrepareData else: from utils.prep_data_n import PrepareData prep_data = PrepareData(args, cuckoo_analysis_dir='api_tasks/data_report') data = prep_data.load_data() data = to_cuda(data) if cuda is True else data # if cuda is True: # data[GRAPH] = data[GRAPH].to(torch.device('cuda:0')) return data
class HAN_module: def __init__(self, task_ids=None): self.load_args(task_ids) self.prep_data = PrepareData(self.args) def load_args(self, task_ids=None): self.args = read_params(CONFIG_PATH, verbose=False) self.args['config_fpath'] = CONFIG_PATH if task_ids is not None: task_ids = [str(tid) for tid in task_ids] batch_task_name = '-'.join(task_ids) self.args["input_report_folder"] = cf.__ROOT__+'/api_tasks/data_report' self.args["input_data_folder"] = cf.__ROOT__+'/api_tasks/data_json/{}'.format(batch_task_name) self.args["input_pickle_folder"] = cf.__ROOT__+'/api_tasks/data_pickle/{}'.format(batch_task_name) self.args["mapping_path"] = cf.__ROOT__+'/'+self.args["mapping_path"] self.args["train_embedding_path"] = cf.__ROOT__+'/'+self.args["train_embedding_path"] self.args["vocab_path"] = cf.__ROOT__+'/'+self.args["vocab_path"] self.args["graph_viz_dir"] = cf.__ROOT__+'/data/graphviz' self.args["from_pickle"] = False self.args["from_report_folder"] = False self.args["from_data_json"] = False self.args["prepare_word_embedding"] = True self.args["train_embedder"] = False self.args["reverse_edge"] = __REVERSE_EDGE__ del self.args['train_list_file'] del self.args['test_list_file'] self.args['do_draw'] = __DO_DRAW__ # for predict fconfig_name = self.args['config_fpath'].split('/')[-1] odir = self.args['config_fpath'].split(fconfig_name)[0] self.model_config = self.args['model_configs'] self.args['checkpoint_file'] = odir+'/checkpoint' def prepare_files(self, task_ids=None, cuda=True): data = self.prep_data.load_data_files(task_ids) if data is None: return None, self.args data = to_cuda(data) if cuda is True else data return data, self.args def predict_files(self, data, self.args, cuda=True):
def prepare_files(task_ids=None, cuda=True): args = read_params(CONFIG_PATH, verbose=False) args['config_fpath'] = CONFIG_PATH if task_ids is not None: task_ids = [str(tid) for tid in task_ids] batch_task_name = '-'.join(task_ids) args["input_report_folder"] = cf.__ROOT__ + '/api_tasks/data_report' args[ "input_data_folder"] = cf.__ROOT__ + '/api_tasks/data_json/{}'.format( batch_task_name) args[ "input_pickle_folder"] = cf.__ROOT__ + '/api_tasks/data_pickle/{}'.format( batch_task_name) args["mapping_path"] = cf.__ROOT__ + '/' + args["mapping_path"] args["train_embedding_path"] = cf.__ROOT__ + '/' + args[ "train_embedding_path"] args["vocab_path"] = cf.__ROOT__ + '/' + args["vocab_path"] args["graph_viz_dir"] = cf.__ROOT__ + '/data/graphviz' args["from_pickle"] = False args["from_report_folder"] = False args["from_data_json"] = False args["prepare_word_embedding"] = True args["train_embedder"] = False args["reverse_edge"] = __REVERSE_EDGE__ del args['train_list_file'] del args['test_list_file'] args['do_draw'] = __DO_DRAW__ prep_data = PrepareData(args) data = prep_data.load_data_files(task_ids) if data is None: return None, args data = to_cuda(data) if cuda is True else data return data, args
class HAN_module: def __init__(self, task_ids=None, cuckoo_analysis_dir=None, report_dir_name=None, report_dir_path=None, report_file_name=None): self.task_ids = task_ids self.load_args(task_ids, report_dir_name, report_dir_path, report_file_name) self.prep_data = PrepareData(self.args, cuckoo_analysis_dir=cuckoo_analysis_dir) def load_args(self, task_ids=None, report_dir_name=None, report_dir_path=None, report_file_name=None): self.args = read_params(CONFIG_PATH, verbose=False) self.report_dir_name = report_dir_name self.report_dir_path = report_dir_path self.report_file_name = report_file_name self.args['config_fpath'] = CONFIG_PATH if task_ids is not None: task_ids = [str(tid) for tid in task_ids] batch_task_name = '-'.join(task_ids) self.args[ "input_report_folder"] = cf.__ROOT__ + '/api_tasks/data_report' self.args[ "input_data_folder"] = cf.__ROOT__ + '/api_tasks/data_json/{}'.format( batch_task_name) self.args[ "input_pickle_folder"] = cf.__ROOT__ + '/api_tasks/data_pickle/{}'.format( batch_task_name) else: self.args[ "input_report_folder"] = cf.__ROOT__ + '/api_tasks/data_report' self.args[ "input_data_folder"] = cf.__ROOT__ + '/api_tasks/data_json/{}'.format( report_dir_name) self.args[ "input_pickle_folder"] = cf.__ROOT__ + '/api_tasks/data_pickle/{}'.format( report_dir_name) self.args[ "mapping_path"] = cf.__ROOT__ + '/' + self.args["mapping_path"] self.args["train_embedding_path"] = cf.__ROOT__ + '/' + self.args[ "train_embedding_path"] self.args["vocab_path"] = cf.__ROOT__ + '/' + self.args["vocab_path"] # self.args["graph_viz_dir"] = cf.__ROOT__+'/data_graphviz' # self.args["from_pickle"] = False # self.args["from_folder"] = False # self.args["from_json"] = False self.args["prepare_word_embedding"] = True self.args["train_embedder"] = False self.args["reverse_edge"] = __REVERSE_EDGE__ del self.args['train_list_file'] del self.args['test_list_file'] self.args['do_draw'] = __DO_DRAW__ # for predict fconfig_name = self.args['config_fpath'].split('/')[-1] self.odir = self.args['config_fpath'].split(fconfig_name)[0] self.model_config = self.args['model_configs'] self.args['checkpoint_file'] = self.odir + '/checkpoint' print('\t [load_args] self.args', self.args) print('\t [load_args] self.odir', self.odir) def prepare_files(self, cuda=True): if self.task_ids is None: data = self.prep_data.load_data_files( self.task_ids, report_dir_path=self.report_dir_path, report_dir_name=self.report_dir_name, report_file_name=self.report_file_name) else: data = self.prep_data.load_data_files(self.task_ids) # data = self.prep_data.load_data() if data is None: return None, self.args data = to_cuda(data) if cuda is True else data return data def predict_files(self, data, cuda=True): print('\n*** Start testing ***\n') learning_config = {'cuda': cuda} graphviz_dir_path = self.args["input_pickle_folder"].replace( 'data_pickle', 'data_graphviz') gdot_path = None if self.args['do_draw'] is False else '{}/{}'.format( graphviz_dir_path, data[GNAMES][0]) app = App(data, model_config=self.model_config, learning_config=learning_config, pretrained_weight=self.args['checkpoint_file'], early_stopping=True, patience=20, json_path=self.args['input_data_folder'], pickle_folder=self.args['input_pickle_folder'], vocab_path=self.args['vocab_path'], mapping_path=self.args['mapping_path'], model_src_path=self.odir, append_nid_eid=__APPEND_NID_EID__, gdot_path=gdot_path) return app.predict(self.args['checkpoint_file'])
def __init__(self, task_ids=None): self.load_args(task_ids) self.prep_data = PrepareData(self.args)