def train(repository: FunctionRepository, params: Asm2VecParams) -> None: context = TrainingContext(repository, params) context.add_counter(TrainingContext.TOKENS_HANDLED_COUNTER) asm2vec_logger().debug('Total number of functions: %d', len(context.repo().funcs())) progress = Atomic(1) def train_function(fn: VectorizedFunction): for seq in fn.sequential().sequences(): _train_sequence(fn, seq, context) asm2vec_logger().debug( 'Function "%s" trained, progress: %f%%', fn.sequential().name(), progress.value() / len(context.repo().funcs()) * 100) with progress.lock() as prog_proxy: prog_proxy.set(prog_proxy.value() + 1) executor = concurrent.futures.ThreadPoolExecutor( max_workers=context.params().jobs) futures = [] for f in context.repo().funcs(): futures.append(executor.submit(train_function, f)) done, not_done = concurrent.futures.wait( futures, return_when=concurrent.futures.FIRST_EXCEPTION) if len(not_done) > 0: raise RuntimeError('Train failed due to one or more failed task.')
def train_function(fn: VectorizedFunction): for seq in fn.sequential().sequences(): _train_sequence(fn, seq, context) asm2vec_logger().debug( 'Function "%s" trained, progress: %f%%', fn.sequential().name(), progress.value() / len(context.repo().funcs()) * 100) with progress.lock() as prog_proxy: prog_proxy.set(prog_proxy.value() + 1)
def func_handler(f: Function): with vec_funcs_atomic.lock() as vfa: vfa.value().append( VectorizedFunction( make_sequential_function(f, num_of_rnd_walks))) tokens = _get_function_tokens(f, dim) for tk in tokens: with vocab_atomic.lock() as va: if tk.name() in va.value(): va.value()[tk.name()].count += 1 else: va.value()[tk.name()] = Token(tk) asm2vec_logger().debug( 'Sequence generated for function "%s", progress: %f%%', f.name(), progress.value() / len(funcs) * 100) with progress.lock() as prog: prog.set(prog.value() + 1)