def worker(args, queue, worker_id): #lm = config.lm if args.partial_lm: lm = build_language_model(args.num_props, new=args.gen_lm, _all=False) else: lm = load_language_model(_all=False, new=args.gen_lm) if args.data_sampling > 0: load_proof_steps_into_lm(lm, ['train'], args.data_sampling) #lm = load_language_model(_all=False, new=args.gen_lm) #load_proof_steps_into_lm(lm, ['train'], args.data_sampling) config = get_config(lm) device_idx = args.num_gpus - 1 - (worker_id // args.num_workers) if args.random_generate: device_idx = 0 device_id = args.gpu_list[device_idx] print ('build a worker on gpu %d' % (device_id)) torch.cuda.set_device(device_id) args.device = torch.device('cuda:'+str(device_id)) interface = None if args.random_generate else interface_lm.LMInterface(args, lm) generator = Constructor(args, config, interface) generator.initialize() _logger = log.get_logger('worker%d'%(worker_id), args, append=True) _logger.info('worker %d initialize', worker_id) tt = 0 cnt = 0 while True: t = time.time() if args.random_generate: expr = generator.random_generate() else: expr = generator.parameterized_generate() tt += time.time() - t if expr is not None: if args.task == 'pred' and len(expr.prop.e) > 0: queue.put(generator.encode_pred_tasks([expr])) if args.task == 'gen' and len(expr.unconstrained) > 0: queue.put(generator.encode_gen_tasks([expr])) #if not (args.task == 'gen' and len(expr.unconstrained) == 0): # if args.task == 'pred': # queue.put(generator.encode_pred_tasks([expr])) # else: # queue.put(generator.encode_gen_tasks([expr])) if len(generator.expressions_list) > args.num_cons_exprs+generator.num_initial_expr: generator.reinitialize_expressions() _logger.info('worker %d initialize', worker_id) if cnt == 5000: _logger.info('worker %d generate time per expr %s seconds', worker_id, tt/cnt) cnt = 0 tt = 0 cnt += 1
def worker_pre(args, queue, batch_size, ii): _logger = log.get_logger('worker_pre', args, append=True) _logger.info('worker_pre initialize') if args.partial_lm: lm = build_language_model(args.num_props, new=args.gen_lm, _all=False) else: lm = load_language_model(_all=False, new=args.gen_lm, iset=args.iset) config = get_config(lm) #exprs = load_exprs(args.expr_list[0], lm) #interface = interface_lm.LMInterface(args, lm) #generator = Constructor(args, config) #generator.initialize_prop() #generator.expressions_list = exprs #for e in exprs: # generator.expressions[e.id] = e #generator.num_initial_expr = len(generator.expressions) #generator.initialize_searcher() #_logger.info('initialize generator with %d exprs', generator.num_initial_expr) fl = os.listdir(args.exprs_pre) if True: if args.data_sampling > 0: exprs = load_exprs(args.expr_list[0], lm) generator = Constructor(args, config) generator.initialize_prop() generator.expressions_list = exprs for e in exprs: generator.expressions[e.id] = e generator.num_initial_expr = len(generator.expressions) generator.initialize_searcher() else: generator = Constructor(args, config) generator.initialize() #generator.reinitialize_expressions() print ('--loading pre exprs--') exprs_pre = load_exprs(os.path.join(args.exprs_pre, fl[ii]), lm) print ('--done--') #if args.train_from_queue: # exprs_pre = exprs_pre[:300000] generator.expressions_list += exprs_pre for e in exprs_pre: generator.expressions[e.id] = e _logger.info('load %d exprs' % (len(generator.expressions))) i = 0 while True: #for i in range(len(exprs_pre)//batch_size//5): _exprs = [] while len(_exprs) < batch_size: expr = random.choice(exprs_pre) if args.task == 'gen' and len(expr.unconstrained) == 0: continue if args.task == 'pred' and len(expr.prop.e) == 0: continue _exprs.append(expr) if args.task == 'pred': data = generator.encode_pred_tasks(_exprs) else: data = generator.encode_gen_tasks(_exprs) #print (i, data) queue.put(data) i += 1 if i >= len(exprs_pre)//batch_size//5 and (not args.train_from_queue) and (not args.cons_pre_one): break print ('finish processing current exprs')