예제 #1
0
 def __init__(self,
              cuckoo_analysis_dir=None,
              report_dir_name=None,
              report_dir_path=None,
              report_file_name=None):
     self.load_args(report_dir_name, report_dir_path, report_file_name)
     self.prep_data = PrepareData(self.args,
                                  cuckoo_analysis_dir=cuckoo_analysis_dir)
     self.app = None
예제 #2
0
 def __init__(self,
              task_ids=None,
              cuckoo_analysis_dir=None,
              report_dir_name=None,
              report_dir_path=None,
              report_file_name=None):
     self.task_ids = task_ids
     self.load_args(task_ids, report_dir_name, report_dir_path,
                    report_file_name)
     self.prep_data = PrepareData(self.args,
                                  cuckoo_analysis_dir=cuckoo_analysis_dir)
예제 #3
0
def load_dataset(args, cuda):
    if 'prep_path' in config_params and config_params['prep_path'] is not None:
        sys.path.insert(0, config_params['prep_path'])
        print('*** prep path', config_params['prep_path'])
        from prep_data_n import PrepareData
    else:
        from utils.prep_data_n import PrepareData

    prep_data = PrepareData(args, cuckoo_analysis_dir='api_tasks/data_report')
    data = prep_data.load_data()
    data = to_cuda(data) if cuda is True else data
    # if cuda is True:
    #     data[GRAPH] = data[GRAPH].to(torch.device('cuda:0'))
    return data
예제 #4
0
class HAN_module:
    def __init__(self, task_ids=None):
        self.load_args(task_ids)
        self.prep_data = PrepareData(self.args)
    
    def load_args(self, task_ids=None):
        self.args = read_params(CONFIG_PATH, verbose=False)

        self.args['config_fpath'] = CONFIG_PATH

        if task_ids is not None:
            task_ids = [str(tid) for tid in task_ids]
            batch_task_name = '-'.join(task_ids)
            self.args["input_report_folder"] = cf.__ROOT__+'/api_tasks/data_report'
            self.args["input_data_folder"] = cf.__ROOT__+'/api_tasks/data_json/{}'.format(batch_task_name)
            self.args["input_pickle_folder"] = cf.__ROOT__+'/api_tasks/data_pickle/{}'.format(batch_task_name)
        
        self.args["mapping_path"] = cf.__ROOT__+'/'+self.args["mapping_path"]
        self.args["train_embedding_path"] = cf.__ROOT__+'/'+self.args["train_embedding_path"]
        self.args["vocab_path"] = cf.__ROOT__+'/'+self.args["vocab_path"]

        self.args["graph_viz_dir"] = cf.__ROOT__+'/data/graphviz'

        self.args["from_pickle"] = False
        self.args["from_report_folder"] = False
        self.args["from_data_json"] = False
        
        self.args["prepare_word_embedding"] = True
        self.args["train_embedder"] = False

        self.args["reverse_edge"] = __REVERSE_EDGE__

        del self.args['train_list_file']
        del self.args['test_list_file']

        self.args['do_draw'] = __DO_DRAW__


        # for predict
        fconfig_name = self.args['config_fpath'].split('/')[-1]
        odir = self.args['config_fpath'].split(fconfig_name)[0]

        self.model_config = self.args['model_configs']

        self.args['checkpoint_file'] = odir+'/checkpoint'


    def prepare_files(self, task_ids=None, cuda=True):
        data = self.prep_data.load_data_files(task_ids)
        if data is None:
            return None, self.args
        
        data = to_cuda(data) if cuda is True else data
        return data, self.args


    def predict_files(self, data, self.args, cuda=True):
예제 #5
0
def prepare_files(task_ids=None, cuda=True):
    args = read_params(CONFIG_PATH, verbose=False)

    args['config_fpath'] = CONFIG_PATH

    if task_ids is not None:
        task_ids = [str(tid) for tid in task_ids]
        batch_task_name = '-'.join(task_ids)
        args["input_report_folder"] = cf.__ROOT__ + '/api_tasks/data_report'
        args[
            "input_data_folder"] = cf.__ROOT__ + '/api_tasks/data_json/{}'.format(
                batch_task_name)
        args[
            "input_pickle_folder"] = cf.__ROOT__ + '/api_tasks/data_pickle/{}'.format(
                batch_task_name)

    args["mapping_path"] = cf.__ROOT__ + '/' + args["mapping_path"]
    args["train_embedding_path"] = cf.__ROOT__ + '/' + args[
        "train_embedding_path"]
    args["vocab_path"] = cf.__ROOT__ + '/' + args["vocab_path"]

    args["graph_viz_dir"] = cf.__ROOT__ + '/data/graphviz'

    args["from_pickle"] = False
    args["from_report_folder"] = False
    args["from_data_json"] = False

    args["prepare_word_embedding"] = True
    args["train_embedder"] = False

    args["reverse_edge"] = __REVERSE_EDGE__

    del args['train_list_file']
    del args['test_list_file']

    args['do_draw'] = __DO_DRAW__

    prep_data = PrepareData(args)
    data = prep_data.load_data_files(task_ids)
    if data is None:
        return None, args

    data = to_cuda(data) if cuda is True else data
    return data, args
예제 #6
0
class HAN_module:
    def __init__(self,
                 task_ids=None,
                 cuckoo_analysis_dir=None,
                 report_dir_name=None,
                 report_dir_path=None,
                 report_file_name=None):
        self.task_ids = task_ids
        self.load_args(task_ids, report_dir_name, report_dir_path,
                       report_file_name)
        self.prep_data = PrepareData(self.args,
                                     cuckoo_analysis_dir=cuckoo_analysis_dir)

    def load_args(self,
                  task_ids=None,
                  report_dir_name=None,
                  report_dir_path=None,
                  report_file_name=None):
        self.args = read_params(CONFIG_PATH, verbose=False)

        self.report_dir_name = report_dir_name
        self.report_dir_path = report_dir_path
        self.report_file_name = report_file_name

        self.args['config_fpath'] = CONFIG_PATH

        if task_ids is not None:
            task_ids = [str(tid) for tid in task_ids]
            batch_task_name = '-'.join(task_ids)
            self.args[
                "input_report_folder"] = cf.__ROOT__ + '/api_tasks/data_report'
            self.args[
                "input_data_folder"] = cf.__ROOT__ + '/api_tasks/data_json/{}'.format(
                    batch_task_name)
            self.args[
                "input_pickle_folder"] = cf.__ROOT__ + '/api_tasks/data_pickle/{}'.format(
                    batch_task_name)
        else:
            self.args[
                "input_report_folder"] = cf.__ROOT__ + '/api_tasks/data_report'
            self.args[
                "input_data_folder"] = cf.__ROOT__ + '/api_tasks/data_json/{}'.format(
                    report_dir_name)
            self.args[
                "input_pickle_folder"] = cf.__ROOT__ + '/api_tasks/data_pickle/{}'.format(
                    report_dir_name)

        self.args[
            "mapping_path"] = cf.__ROOT__ + '/' + self.args["mapping_path"]
        self.args["train_embedding_path"] = cf.__ROOT__ + '/' + self.args[
            "train_embedding_path"]
        self.args["vocab_path"] = cf.__ROOT__ + '/' + self.args["vocab_path"]

        # self.args["graph_viz_dir"] = cf.__ROOT__+'/data_graphviz'

        # self.args["from_pickle"] = False
        # self.args["from_folder"] = False
        # self.args["from_json"] = False

        self.args["prepare_word_embedding"] = True
        self.args["train_embedder"] = False

        self.args["reverse_edge"] = __REVERSE_EDGE__

        del self.args['train_list_file']
        del self.args['test_list_file']

        self.args['do_draw'] = __DO_DRAW__

        # for predict
        fconfig_name = self.args['config_fpath'].split('/')[-1]
        self.odir = self.args['config_fpath'].split(fconfig_name)[0]

        self.model_config = self.args['model_configs']

        self.args['checkpoint_file'] = self.odir + '/checkpoint'

        print('\t [load_args] self.args', self.args)
        print('\t [load_args] self.odir', self.odir)

    def prepare_files(self, cuda=True):
        if self.task_ids is None:
            data = self.prep_data.load_data_files(
                self.task_ids,
                report_dir_path=self.report_dir_path,
                report_dir_name=self.report_dir_name,
                report_file_name=self.report_file_name)
        else:
            data = self.prep_data.load_data_files(self.task_ids)
        # data = self.prep_data.load_data()

        if data is None:
            return None, self.args

        data = to_cuda(data) if cuda is True else data
        return data

    def predict_files(self, data, cuda=True):
        print('\n*** Start testing ***\n')
        learning_config = {'cuda': cuda}

        graphviz_dir_path = self.args["input_pickle_folder"].replace(
            'data_pickle', 'data_graphviz')
        gdot_path = None if self.args['do_draw'] is False else '{}/{}'.format(
            graphviz_dir_path, data[GNAMES][0])

        app = App(data,
                  model_config=self.model_config,
                  learning_config=learning_config,
                  pretrained_weight=self.args['checkpoint_file'],
                  early_stopping=True,
                  patience=20,
                  json_path=self.args['input_data_folder'],
                  pickle_folder=self.args['input_pickle_folder'],
                  vocab_path=self.args['vocab_path'],
                  mapping_path=self.args['mapping_path'],
                  model_src_path=self.odir,
                  append_nid_eid=__APPEND_NID_EID__,
                  gdot_path=gdot_path)
        return app.predict(self.args['checkpoint_file'])
예제 #7
0
 def __init__(self, task_ids=None):
     self.load_args(task_ids)
     self.prep_data = PrepareData(self.args)