def prepare_crop(self, _csv_file): root_dir = os.path.dirname(_csv_file) csv_name = os.path.basename(_csv_file) name_list = csv_name.split(".") csv_name = name_list[0] if csv_name.startswith("ext-"): csv_name = csv_name[4:] package_index = csv_name with open(_csv_file, "r") as f: line_str = f.readline() while line_str: image, label = line_str.split(",") image = image.strip() label = label.strip() if not image.startswith("ext-") or not label.startswith( "ext-"): line_str = f.readline() continue if not os.path.exists(os.path.join(root_dir, label)): line_str = f.readline() self.logger.error( "package:{}, label missing:[{}]=>[{}]".format( package_index, image, label)) continue if not os.path.exists(os.path.join(root_dir, image)): line_str = f.readline() self.logger.error( "package:{}, image missing:[{}]=>[{}]".format( package_index, image, label)) continue origin_image = os.path.join(root_dir, image[4:]) _src = os.path.join(root_dir, image) _dest_image = os.path.join(root_dir, origin_image) task = Task(package_index, _src, _dest_image, None) global_queue.remote_cut_queue.put(task) _src = os.path.join(root_dir, label) _dest_id2 = label[4:] _dest_label = os.path.join(root_dir, _dest_id2) _dest_label = _dest_label.strip() task = Task(package_index, _src, _dest_label, None) global_queue.remote_cut_queue.put(task) line_str = f.readline()
def train(**kwargs): config.parse(kwargs) if os.path.exists(config.filename + '_' + str(config.split_ratio) + 'SineData.pkl'): train_data = pickle.load( file(config.filename + '_' + str(config.split_ratio) + 'SineData.pkl')) print 'exists SineData.pkl, load it!' else: train_data = SineData(config.filename, split_ratio=config.split_ratio) pickle.dump( train_data, file( config.filename + '_' + str(config.split_ratio) + 'SineData.pkl', 'w')) config.N = train_data.G.g.number_of_nodes() + 1 model = getattr(models, config.model)(config) # .eval() if torch.cuda.is_available(): model.cuda() config.CUDA = True train_dataloader = DataLoader(train_data, config.batch_size, shuffle=True, num_workers=config.num_workers) # optimizer = torch.optim.SGD(model.parameters(),lr = config.lr, weight_decay = config.weight_decay) optimizer = torch.optim.Adadelta(model.parameters(), lr=1.0, rho=0.95, weight_decay=config.weight_decay) task = Task(train_data.G) # model.train() for epoch in range(config.epochs): total_loss = 0.0 for idx, data in enumerate(train_dataloader): # if config.CUDA: # data = map(lambda x: Variable(x.cuda()), data) # else: # data = map(lambda x: Variable(x), data) optimizer.zero_grad() loss = model(data) loss.backward() optimizer.step() if config.CUDA: total_loss += loss.cpu().data.numpy() else: total_loss += loss.data.numpy() print 'epoch {0}, loss: {1}'.format(epoch, total_loss) task.link_sign_prediction_split(model.get_embedding())
def prepare_extend(self, _csv_file): root_dir = os.path.dirname(_csv_file) file_name = os.path.basename(_csv_file) package_index = file_name.split(".")[0] ext_list = [] ext_csv = "ext-" + file_name ext_csv = os.path.join(root_dir, ext_csv) with open(_csv_file, "r") as f: line_str = f.readline() while line_str: image, label = line_str.split(",") _src = os.path.join(root_dir, image) _dest_id1 = "ext-" + image _dest_image = os.path.join(root_dir, _dest_id1) _src = _src.strip() _dest_image = _dest_image.strip() task = Task(package_index, _src, _dest_image, None) global_queue.divide_queue.put(task) _src = os.path.join(root_dir, label) _src = _src.strip() _dest_id2 = "ext-" + label _dest_label = os.path.join(root_dir, _dest_id2) _dest_label = _dest_label.strip() _out_str = _dest_id1 + "," + _dest_id2 ext_list.append(_out_str) task = Task(package_index, _src, _dest_label, None) global_queue.divide_queue.put(task) line_str = f.readline() ext_list.sort() with open(ext_csv, "w") as f: for str in ext_list: f.write(str) # modify *.csv=>*.txt name_list = file_name.split(".") new_file_name = name_list[0] + ".txt" new_file_path = os.path.join(root_dir, new_file_name) os.rename(_csv_file, new_file_path)
async def task_status_handler(request): task_id = request.match_info['id'] try: task = Task(app.loop, task_id) status = await task.get_status() resp = {"task_status": status} except: resp = {"task_status": "Not found"} return web.json_response(resp)
def alg_list(): with open(sys.argv[1], 'r') as file: n = load_line(file)[0] tasks = [Task(*load_line(file), i) for i in range(n)] tasks = sorted(tasks, key=lambda task: task.r) schedules = [[], [], [], []] timers = [0, 0, 0, 0] criterium = 0 awaiting = [] counter = 0 it = 0 while True: timerId = argmin(timers) if it != n: for i in range(it, n): if tasks[i].r <= timers[timerId]: awaiting += [tasks[i]] else: it = i break else: it = n if not awaiting: timers[timerId] = tasks[it].r for i in range(it, n): if tasks[i].r <= timers[timerId]: awaiting += [tasks[i]] else: it = i break else: it = n awaiting.sort(key=lambda task: (min(0, timers[timerId] + task.p - task.d), -task.p)) popped = awaiting.pop() schedules[timerId] += [popped] timers[timerId] = max(timers[timerId], popped.r) + popped.p criterium += max(0, timers[timerId] - popped.d) counter += 1 if counter == n: break with open(sys.argv[2], 'w') as output: output.write( str(criterium) + '\n' + '\n'.join([ ' '.join([str(task.i + 1) for task in schedule]) for schedule in schedules ]))
def test(**kwargs): snap_root = kwargs['snap_root'] config_file = snap_root + '/config.pkl' config = pickle.load(file(config_file)) model_file = snap_root + '/{}.model'.format(config.model) dataset_name = 'kTupleDataV1' if os.path.exists(config.filename + '_' + str(config.split_ratio) + '_{}.pkl'.format(dataset_name)): train_data = pickle.load(file(snap_root + '/data.pkl')) print('exists {}.pkl, load it!'.format(dataset_name)) print(train_data.G.g.number_of_nodes(), train_data.G.g.number_of_edges()) else: raise Exception('Data Module not exists!') model = getattr(models, config.model)(config) # .eval() if torch.cuda.is_available(): model.cuda() config.CUDA = True model.load_state_dict(torch.load(model_file)) task = Task(train_data.G, config) task.link_sign_prediction_split(utils.cat_neighbor(train_data.G.g, model.get_embedding(), method='null'), method='concatenate')
def get_tasks_from_service(self): tasks = [] events = self.get_events_from_service() # colors = self.service.colors().get(fields="event").execute() for event in events: start = self.get_event_start_time(event) end = self.get_event_end_time(event) if "colorId" in event: # color_hexcode = colors["event"][event["colorId"]]["background"] color_id = event["colorId"] color = COLOR_ID_TO_COLOR_DICT[color_id] else: color = "OTHER" task = Task(color, start, end) tasks.append(task) return tasks
async def main(loop): connection = await aio_pika.connect_robust(settings.RABBITMQ_HOST, loop=loop) queue_name = settings.PRECESS_IMG_QUEUE async with connection: # Creating channel channel = await connection.channel() # Declaring queue queue = await channel.declare_queue(queue_name, auto_delete=True) async for message in queue: with message.process(): data = json.loads(message.body.decode('utf-8')) aws_client = AWSWrapper() task_id = data['task_id'] task = Task(loop, id=task_id) await task.set_status('Started downloading') try: file_path = await aws_client.get_file(**data['aws_data']) except Exception as e: await task.set_status("Download error") raise e else: await task.set_status("Started image processing") try: result_future = canny_task.delay(file_path) canny_img_path = await wait_for_done(result_future) except Exception as e: await task.set_status("Processing error") raise e else: delete_file(file_path) data['aws_data']['file_path'] = canny_img_path data['key'] = canny_img_path.split('/')[-1] await task.set_status("Started uploading canny image") try: resp = await aws_client.upload_huge( **data['aws_data']) await task.set_status("Canny edge image uploaded") except Exception as e: await task.set_status("Error uploading") raise e delete_file(canny_img_path)
def extract_task_from_line(line: str) -> Task: """ Translate one line from txt file into arguments for execution: instance, process, parameters :param line: Arguments for execution. E.g. instance="tm1srv01" process="Bedrock.Server.Wait" pWaitSec=2 :return: instance_name, process_name, parameters """ line_arguments = dict() for pair in shlex.split(line): param, value = pair.split("=") # if instance or process, needs to be case insensitive if param.lower() == "process" or param.lower() == "instance": line_arguments[param.lower()] = value.strip('"').strip() # parameters (e.g. pWaitSec) are case sensitive in TM1 REST API ! else: line_arguments[param] = value.strip('"').strip() return Task(instance_name=line_arguments.pop("instance"), process_name=line_arguments.pop("process"), parameters=line_arguments)
def get_glue_task(task_name: str, data_dir: str = None): """Return a GLUE task object Args: task_name (str): name of GLUE task data_dir (str, optional): path to dataset, if not provided will be taken from GLUE_DIR env. variable """ task_name = task_name.lower() if task_name not in processors: raise ValueError("Task not found: {}".format(task_name)) task_processor = processors[task_name]() if data_dir is None: try: data_dir = os.path.join(os.environ["GLUE_DIR"], DEFAULT_FOLDER_NAMES[task_name]) except Exception: data_dir = None task_type = output_modes[task_name] return Task(task_name, task_processor, data_dir, task_type)
async def task_handler(request): data = await request.json() try: aws_data = { 'bucket': data['BUCKET'], 'key': data['file_path'], 'AWS_SECRET_ACCESS_KEY': data['SECRET_ACCESS_KEY'], 'AWS_ACCESS_KEY_ID': data['ACCESS_KEY_ID'] } task = Task(app.loop) await task.set_status("Pending") data = {"aws_data": aws_data, "task_id": task.id} serialized_data = json.dumps(data) await rabbit_pub(app.loop, settings.PRECESS_IMG_QUEUE, serialized_data) except KeyError: raise web.HTTPForbidden() resp = {"task_id": task.id} return web.json_response(resp)
def get(self, *args, **kwargs): time1 = time.time() self.set_header('Access-Control-Allow-Origin', '*') self.set_header('Content-type', 'application/json') # { # “images”:”原始图片目录,缺省默认为当前目录的images目录”, # “pixel”:外边框像素大小,缺省默认为50, # } try: _ver = self.get_argument("version", "all") self.src_dir = os.path.join(self.src_dir, _ver) self.temp_dir = os.path.join(self.temp_dir, _ver) if not os.path.exists(self.src_dir): task_count = 0 err_code = 1 else: if not os.path.exists(self.temp_dir): os.makedirs(self.temp_dir) else: # clean this directory tmp_dirs = os.listdir(self.temp_dir) for tmp_dir in tmp_dirs: if not tmp_dir.isdigit(): continue tmp_path = os.path.join(self.temp_dir, tmp_dir) if os.path.isfile(tmp_path): os.remove(tmp_path) else: shutil.rmtree(tmp_path) err_code = 0 for dir in range(max_packages): csv_name = "ext-" + str(dir) + ".csv" csv_file = os.path.join(self.src_dir, str(dir), csv_name) if not os.path.exists(csv_file): continue self.clean_dir(_csv_file=csv_file) self.prepare_crop(_csv_file=csv_file) task_count = self.queue.qsize() # start to process task = Task(None, None, None, None, True) self.queue.put(task) process = multiprocessing.Process(target=self.do_work) process.start() self.logger.info(str(process.pid) + ", start") process.join() self.logger.info(str(process.pid) + ", join") for i in range(max_packages): _sub_dir = os.path.join(self.temp_dir, str(i)) if not os.path.exists(_sub_dir): continue dest_dir = os.path.join(self.temp_dir, str(i)) if not os.path.exists(dest_dir): os.mkdir(dest_dir) origin_list = os.listdir(_sub_dir) for _image in origin_list: _image = _image.strip() if not _image.startswith("label-"): continue name_list = _image.split('.') if len(name_list) < 2: continue ext_name = name_list[1] if ext_name != 'png' and ext_name != 'jpg': continue # start with label- label_file = name_list[0] if label_file.startswith("label-"): label_file = label_file[6:] anna_file = label_file + ".png" origin_name = label_file + ".jpg" image_path = os.path.join(_sub_dir, _image) origin_image = os.path.join(_sub_dir, origin_name) if not os.path.exists(origin_image): os.remove(image_path) self.logger.error( "package:{}, file missing:[{}]=>[{}]".format( str(i), origin_name, _image)) continue result_path = os.path.join(self.temp_dir, str(i), anna_file) task = Task(str(i), image_path, result_path, None, False) self.task_queue.put(task) for i in range(20): task = Task(None, None, None, None, True) self.task_queue.put(task) all_processes = [] for i in range(20): process = multiprocessing.Process(target=self.transform) all_processes.append(process) for process in all_processes: process.start() self.logger.info(str(process.pid) + ", start") for process in all_processes: process.join() self.logger.info(str(process.pid) + ", join") # 先拷贝到all目录下 if _ver != "all": temp_dir_list = os.listdir(self.temp_dir) for temp_dir in temp_dir_list: if not temp_dir.isdigit(): continue src_temp = os.path.join(self.temp_dir, temp_dir) dest_temp = os.path.join( os.path.dirname(self.temp_dir), "all", temp_dir) if os.path.exists(dest_temp): shutil.rmtree(dest_temp) shutil.copytree(src_temp, dest_temp) # 拷贝 copy_dir = os.path.join(os.path.dirname(self.temp_dir), "all") dir_list = os.listdir(copy_dir) for _dir in dir_list: old_src = os.path.join(copy_dir, _dir) new_dest = os.path.join(self.dest_dir, "all", _dir) shutil.copytree(old_src, new_dest) time2 = time.time() result_obj = { "count": str(task_count), "time": str(time2 - time1) + " s" } resp = ServerResponse(err_code=err_code, err_info=None, result=result_obj) resp_str = resp.generate_response() self.logger.info(resp_str) self.write(resp_str) except Exception as err: err_info = repr(err) json_res = {"code": "99", "msg": str(err_info)} self.logger.error(json.dumps(json_res)) self.write(json.dumps(json_res)) except: self.write('{"code": "99", "msg": "unknown exception"}') self.logger.error('{"code": "99", "msg": "unknown exception"}') self.finish()
def train(**kwargs): """use the triplet like transE """ config.parse(kwargs) dataset_name = 'kTupleDataV1' train_data = kTupleDataV1(config.filename, split_ratio=config.split_ratio, neg_num=config.neg_num) #feature? feature = sp.csr_matrix(train_data.G.pos_adjmatrix, dtype=np.float32).T #feature = train_data.pos_feature #feature = sp.csr_matrix(np.array(train_data.feature), dtype=np.float32).T #print(feature) #feature_pos = sp.csr_matrix(np.array(train_data.pos_feature), dtype=np.float32).T feature_pos = sp.csr_matrix(train_data.G.pos_adjmatrix, dtype=np.float32).T #print(feature_pos.shape()) #feature_neg = sp.csr_matrix(np.array(train_data.neg_feature), dtype=np.float32).T feature_neg = sp.csr_matrix(train_data.G.neg_adjmatrix, dtype=np.float32).T print(feature_neg) #print(feature_neg.shape()) #feature = train_data.G.adj_matrix #print(feature.todense()) ''' feature = np.matrix([ [i, -i] for i in range(train_data.adj_pos.shape[0]) ], dtype=float) ''' #adj = train_data.adj_matrix adj = train_data.G.all_matrix adj_pos = train_data.adj_pos #print(adj_pos.shape) adj_neg = train_data.adj_neg graph_pos = nx.from_scipy_sparse_matrix(adj_pos, create_using=nx.DiGraph()) graph_neg = nx.from_scipy_sparse_matrix(adj_neg, create_using=nx.DiGraph()) #数据预处理 #数据预处理 #对结果进行处理-》执行gcn # build symmetric adjacency matrix 构建一个对称的邻接矩阵 # 目的将有向图的邻接矩阵变成无向图的邻接矩阵 adj_pos = adj_pos + adj_pos.T.multiply( adj_pos.T > adj_pos) - adj_pos.multiply(adj_pos.T > adj_pos) adj_neg = adj_neg + adj_neg.T.multiply( adj_neg.T > adj_neg) - adj_neg.multiply(adj_neg.T > adj_neg) adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj) #print(adj_pos.todense()) #特征进行归一化处理 feature = normalize(feature) feature_pos = normalize(feature_pos) feature_neg = normalize(feature_neg) #print(np.array(feature.todense()).shape) #print(feature.todense()) #邻接矩阵进行归一化处理 adj_pos = normalize(adj_pos + sp.eye(adj_pos.shape[0])) adj_neg = normalize(adj_neg + sp.eye(adj_neg.shape[0])) adj = normalize(adj + sp.eye(adj.shape[0])) #print(adj_pos) adj = sparse_mx_to_torch_sparse_tensor(adj) adj_pos = sparse_mx_to_torch_sparse_tensor(adj_pos) #print(adj_pos) adj_neg = sparse_mx_to_torch_sparse_tensor(adj_neg) #这三个参数自己调试 idx_train = range(2) #训练集 idx_val = range(2) #评估集 idx_test = range(500, 1500) #测试集、 idx_train = torch.LongTensor(idx_train) idx_val = torch.LongTensor(idx_val) idx_test = torch.LongTensor(idx_test) features = torch.FloatTensor(np.array(feature.todense())) features_pos = torch.FloatTensor(np.array(feature_pos.todense())) features_neg = torch.FloatTensor(np.array(feature_neg.todense())) #符号预测 #使用one-hot encode 来获取标签,如果没有标签,默认fromnode,to_node来进行表示, #在matlab进行实验分类时使用find去除了背景类0,所以所有的类别从1开始,在matlab进行分类的时候没问题 #但是Pytorch有个要求,在使用CrossEntropyLoss这个函数进行验证时label必须是以0开始的,所以会报错 labels = [0, 1] #labels = encode_onehot(labels) #features = torch.FloatTensor(feature) #print(data.features) labels = torch.LongTensor(labels) #print(data.labels) #mask = torch.ByteTensor(data.train_mask) #g = data.graph # add self loop #g.remove_edges_from(g.selfloop_edges()) g_pos = DGLGraph(graph_pos) g_neg = DGLGraph(graph_neg) #g.add_edges(g.nodes(), g.nodes()) #train_dataloader = DataLoader(train_data, config.batch_size, shuffle=True, num_workers=config.num_workers) #shujuxunlian #g, features, labels, mask = load_data(train_data) optimizer = torch.optim.Adam(net.parameters(), lr=1e-3) best_eval_dict = {'f1-micro': 0.0, 'f1-macro': 0.0} #''' model = GCN(nfeat=features_pos.shape[1], nhid=args.hidden, nclass=64, dropout=args.dropout) #''' #''' model2 = nGCN(nfeat=features_neg.shape[1], nhid=args.hidden, nclass=64, dropout=args.dropout) #''' ''' model3 = GAT(nfeat=features_pos.shape[1], nhid=args.hidden, nclass=64, dropout=args.dropout, nheads=args.nb_heads, alpha=args.alpha) ''' optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) optimizer1 = torch.optim.Adam(model2.parameters(), lr=args.lr, weight_decay=args.weight_decay) #optimizer2 = torch.optim.Adam(model3.parameters(), #lr=args.lr, weight_decay=args.weight_decay) task = Task(train_data.G, config) #features,features_pos,features_neg,adj,labels = Variable(features),Variable(features_pos),Variable(features_neg), Variable(adj), Variable(labels) #cuda config #if args.cuda: #model.cuda() #features = features.cuda() #adj = adj.cuda() #labels = labels.cuda() #idx_train = idx_train.cuda() #idx_val = idx_val.cuda() #idx_test = idx_test.cuda() #pygat #for epoch in range(config.epochs): #t = time.time() #model3.train() #optimizer2.zero_grad() #output = model3(features, adj) #loss_train = F.nll_loss(output[idx_train], labels[idx_train]) #acc_train = accuracy(output[idx_train], labels[idx_train]) #loss_train.backward() #optimizer.step() #loss_val = F.nll_loss(output[idx_val], labels[idx_val]) #acc_val = accuracy(output[idx_val], labels[idx_val]) #print('Epoch: {:04d}'.format(epoch+1), #'loss_train: {:.4f}'.format(loss_train.data.item()), #'acc_train: {:.4f}'.format(acc_train.data.item()), #'loss_val: {:.4f}'.format(loss_val.data.item()), #'acc_val: {:.4f}'.format(acc_val.data.item()), #'time: {:.4f}s'.format(time.time() - t)) #pytorch for epoch in range(config.epochs): #for epoch in range(config.epochs): #for epoch in range(60): t = time.time() model.train() optimizer.zero_grad() optimizer1.zero_grad() #__import__('pdb').set_trace() output_pos = model(features_pos, adj_pos) #var.detach().numpy().savetxt("output_pos.txt",output_pos) # __import__('pdb').set_trace() #print(feature_neg) #print(adj_neg) #output_neg = model2(features_neg, adj) output_neg = model2(features_neg, adj) #__import__('pdb').set_trace() #var.detach().numpy().savetxt("output_neg.txt",output_neg) # print(output_pos) # print(output_neg) #output = model(features,adj_neg) loss_train = F.nll_loss(output_pos[idx_train], labels[idx_train]) loss_neg_train = F.nll_loss(output_neg[idx_train], labels[idx_train]) #acc_train = accuracy(output_pos[idx_train], labels[idx_train]) loss_train.backward() loss_neg_train.backward() #print(loss_train) optimizer.step() optimizer1.step() #if not args.fastmode: # Evaluate validation set performance separately, # deactivates dropout during validation run. #model.eval() #output = model(features, adj) #对节点符号进行符号预测 #如ou何对节点符号进行embedding: eval_dict = task.link_sign_pre_con( utils.cat_neighbor_new(train_data.G.g, output_pos, method='cat_neg'), utils.cat_neighbor_new(train_data.G.g, output_neg, method='cat_neg'), idx_train, idx_val, idx_test, method='concatenate') #print(np.all(model.get_embedding())) # task.link_sign_prediction_ktuple(model.get_embedding()) #print(eval_dict) if config.snapshoot: #print('epoch {0}, loss: {1}, time: {2}'.format(epoch, total_loss, train_time), file=fout) print( "link_sign_prediction auc: {:.3f}, f1: {:.3f}, f1-micro: {:.3f}, f1-macro: {:.3f}" .format(eval_dict['auc'], eval_dict['f1'], eval_dict['f1-micro'], eval_dict['f1-macro'])) for key in best_eval_dict: if eval_dict[key] > best_eval_dict[key]: for key in best_eval_dict: best_eval_dict[key] = eval_dict[key] #model.save(snap_root + '/{}.model'.format(config.model)) #model.save('/{}.model'.format(config.model)) break #loss_val = F.nll_loss(output[idx_val], labels[idx_val]) #acc_val = accuracy(output[idx_val], labels[idx_val]) #print('Epoch: {:04d}'.format(epoch+1), # 'loss_train: {:.4f}'.format(loss_train.item()), # 'acc_train: {:.4f}'.format(acc_train.item()), # 'loss_val: {:.4f}'.format(loss_val.item()), # 'acc_val: {:.4f}'.format(acc_val.item()), # 'time: {:.4f}s'.format(time.time() - t)) #pygcn #''' ''' #dgl dur = [] for epoch in range(30): if epoch >=3: t0 = time.time() logits_pos = net(g_pos, features) logits_neg = net(g_neg, features) #print(logits_pos) #logits_neg = net(g_neg, features) logp = F.log_softmax(logits_pos, 1) logn = F.log_softmax(logits_neg, 1) print('logp:{}'.format(logp)) print('logn:{}'.format(logn)) #logp = F.log_softmax(logits, 1) #np.savetxt("logp.txt",logits_pos.get_embedding()) loss_train = F.nll_loss(logits_pos[idx_train], labels[idx_train]) acc_train = accuracy(logits_pos[idx_train], labels[idx_train]) loss = F.nll_loss(logits_pos[idx_val], labels[idx_val]) optimizer.zero_grad() #print(logits) loss.backward() optimizer.step() if epoch >=3: dur.append(time.time() - t0) eval_dict = task.link_sign_prediction_split(utils.cat_neighbor( train_data.G.g, logp, method='cat_pos'), method='concatenate') #print(np.all(model.get_embedding())) # task.link_sign_prediction_ktuple(model.get_embedding()) #print(eval_dict) if config.snapshoot: #print('epoch {0}, loss: {1}, time: {2}'.format(epoch, total_loss, train_time), file=fout) #print("link_sign_prediction auc: {:.3f}, f1: {:.3f}, f1-micro: {:.3f}, f1-macro: {:.3f}".format( #eval_dict['auc'], eval_dict['f1'], eval_dict['f1-micro'], eval_dict['f1-macro']), file=fout) for key in best_eval_dict: if eval_dict[key] > best_eval_dict[key]: for key in best_eval_dict: best_eval_dict[key] = eval_dict[key] #model.save(snap_root + '/{}.model'.format(config.model)) output.save('/{}.model'.format(config.model)) break loss_val = F.nll_loss(logits_pos[idx_val], labels[idx_val]) acc_val = accuracy(logits_pos[idx_val], labels[idx_val]) print('Epoch: {:04d}'.format(epoch+1), 'loss_train: {:.4f}'.format(loss_train.item()), 'acc_train: {:.4f}'.format(acc_train.item()), 'loss_val: {:.4f}'.format(loss_val.item()), 'acc_val: {:.4f}'.format(acc_val.item())) print("Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f}| acc {:.4f} ".format( epoch, loss.item(), np.mean(dur))) ''' #train_dataloader = DataLoader(train_data, config.batch_size, shuffle=True, num_workers=config.num_workers) #print(train_dataloader) #pytorch traindataloader #创建DataLoader,batch_size设置为2,shuffle=False不打乱数据顺序,num_workers= 4使用4个子进程: #train_dataloader = DataLoader(train_data, config.batch_size, shuffle=True, num_workers=config.num_workers) #保存train_data文件 ''' if os.path.exists(config.filename + '_' + str(config.split_ratio) + '_{}.pkl'.format(dataset_name)) and not config.save_dataset: #将数据编译成pkl格式 train_data = pickle.load(open(config.filename + '_' + str(config.split_ratio) + '_{}.pkl'.format(dataset_name), 'rb')) print(config.filename + '_' + str(config.split_ratio)) print(train_data) #<data.kTupleDataV1.kTupleDataV1 object at 0x7f96e2ddc5c0> print('exists {}.pkl, load it!'.format(dataset_name)) print(train_data.G.g.number_of_nodes(), train_data.G.g.number_of_edges()) else: train_data = kTupleDataV1(config.filename, split_ratio=config.split_ratio, neg_num=config.neg_num) #list数据写入 with open('train_data_ktuple','w') as f: f.write(str(train_data.sign_tuple)) pickle.dump(train_data, open( config.filename + '_' + str(config.split_ratio) + '_{}.pkl'.format(dataset_name), 'wb')) print('success save {}.pkl'.format(dataset_name)) ''' ''' #3333 config.N = train_data.G.g.number_of_nodes() model = getattr(models, config.model)(config) # .eval() #''' if torch.cuda.is_available(): model.cuda() config.CUDA = True ''' train_dataloader = DataLoader(train_data, config.batch_size, shuffle=True, num_workers=config.num_workers) optimizer = torch.optim.Adadelta(model.parameters(), lr=1.0, rho=0.95) #print(optimizer) task = Task(train_data.G, config) best_eval_dict = {'f1-micro': 0.0, 'f1-macro': 0.0} ''' ''' #if config.snapshoot: #snapshoot_time = datetime.strftime(datetime.now(), '%y-%m-%d_%H:%M:%S') #best_eval_dict = {'f1-micro': 0.0, 'f1-macro': 0.0} #fout, snap_root = utils.init_snapshoot(config.filename, snapshoot_time) #config.save(fout) ''' ''' #config.show() # model.train() #print(range(config.epochs)) for epoch in range(config.epochs): total_loss = 0.0 start_time = datetime.now() for idx, data in enumerate(train_dataloader): negs = data[-1] data = data[:-1] neg_r, neg_t = zip(*negs) #transpose只能操作2D矩阵的转置 neg_r = torch.cat(neg_r).view(len(negs), -1).transpose(0, 1) neg_t = torch.cat(neg_t).view(len(negs), -1).transpose(0, 1) data.extend([neg_r, neg_t]) data = map(lambda x: Variable(x), data) if config.CUDA: data = map(lambda x: Variable(x.cuda()), data) else: data = map(lambda x: Variable(x), data) #optimizer.zero_grad()意思是把梯度置零,也就是把loss关于weight的导数变成0. optimizer.zero_grad() loss = model(data) loss.backward() #optimizer.step()通常用在每个mini-batch之中,而scheduler.step()通常用在epoch里面,但是不绝对,可以根据具体的需求来做。 #只有用了optimizer.step(),模型才会更新,而scheduler.step()是对lr进行调整 #更新模型 optimizer.step() #计算损失函数-》2 if config.CUDA: total_loss += loss.cpu().data.numpy() else: total_loss += loss.data.numpy() train_time = (datetime.now() - start_time).seconds print('epoch {0}, loss: {1}, time: {2}'.format(epoch, total_loss, train_time)) if (epoch > 30 or config.speedup): if config.speedup: if epoch % config.speedup != 0: continue if epoch % 5 != 0: continue eval_dict = task.link_sign_prediction_split(utils.cat_neighbor( train_data.G.g, model.get_embedding(), method='cat_neg'), method='concatenate') print(np.all(model.get_embedding())) # task.link_sign_prediction_ktuple(model.get_embedding()) print(eval_dict) if config.snapshoot: #print('epoch {0}, loss: {1}, time: {2}'.format(epoch, total_loss, train_time), file=fout) #print("link_sign_prediction auc: {:.3f}, f1: {:.3f}, f1-micro: {:.3f}, f1-macro: {:.3f}".format( #eval_dict['auc'], eval_dict['f1'], eval_dict['f1-micro'], eval_dict['f1-macro']), file=fout) for key in best_eval_dict: if eval_dict[key] > best_eval_dict[key]: for key in best_eval_dict: best_eval_dict[key] = eval_dict[key] #model.save(snap_root + '/{}.model'.format(config.model)) model.save('/{}.model'.format(config.model)) break #333333 ''' ''' if config.snapshoot: fout.write('best result:' + str(best_eval_dict) + '\n') fout.close() config.save(open(snap_root + '/{:.3f}.config'.format(best_eval_dict['f1-micro']), 'w')) pickle.dump(config, open(snap_root + '/config.pkl', 'wb')) if config.save_dataset: pickle.dump(train_data, open(snap_root + '/data.pkl', 'wb')) ''' '''
def alg_list(): env = {'start': time(), 'break': False} with open(sys.argv[1], 'r') as file: n = load_line(file)[0] env['originalTasks'] = [Task(*load_line(file), i) for i in range(n)] tasks = sorted(env['originalTasks'], key=lambda x: x.r) schedules = [[], [], [], []] started = [[], [], [], []] timers = [0, 0, 0, 0] criterium = 0 awaiting = [] counter = 0 it = 0 while True: timer_id = argmin(timers) if it != n: for i in range(it, n): if tasks[i].r <= timers[timer_id]: awaiting += [tasks[i]] else: it = i break else: it = n if not awaiting: timers[timer_id] = tasks[it].r for i in range(it, n): if tasks[i].r <= timers[timer_id]: awaiting += [tasks[i]] else: it = i break else: it = n awaiting.sort( key=lambda x: (min(0, timers[timer_id] + x.p - x.d), -x.p)) popped = awaiting.pop() schedules[timer_id] += [popped] started[timer_id] += [max(timers[timer_id], popped.r)] timers[timer_id] = max(timers[timer_id], popped.r) + popped.p criterium += max(0, timers[timer_id] - popped.d) counter += 1 if counter == n: break env.update({ 'bestSchedules': deepcopy(schedules), 'bestCriterium': criterium, 'schedules': schedules, 'started': started, 'counter': counter, 'timers': timers, 'awaiting': [], 'n': n }) while True: for _ in range(env['n']): popped_id = argmax(j[-1] if j else -1 for j in env['started']) popped = env['schedules'][popped_id].pop() env['started'][popped_id].pop() env['awaiting'] += [popped] if env['started'][popped_id]: env['timers'][popped_id] = env['started'][popped_id][-1] + env[ 'schedules'][popped_id][-1].p else: env['timers'][popped_id] = 0 env['counter'] -= 1 alg_adv(popped_id, env) if env['break']: env['break'] = False break if (time() - env['start']) * 100 > env['n']: save_results(env) return else: save_results(env) return
def get(self, *args, **kwargs): time1 = time.time() self.set_header('Access-Control-Allow-Origin', '*') self.set_header('Content-type', 'application/json') # { # “images”:”原始图片目录,缺省默认为当前目录的images目录”, # “pixel”:外边框像素大小,缺省默认为50, # } try: _ver = self.get_argument("version", "all") self.src_dir = os.path.join(self.src_dir, _ver) self.temp_dir = os.path.join(self.temp_dir, _ver) if not os.path.exists(self.src_dir): task_count = 0 err_code = 1 else: if not os.path.exists(self.temp_dir): os.makedirs(self.temp_dir) # else: # # clean this directory # tmp_dirs = os.listdir(self.temp_dir) # for tmp_dir in tmp_dirs: # if not tmp_dir.isdigit(): # continue # tmp_path = os.path.join(self.temp_dir, tmp_dir) # if os.path.isfile(tmp_path): # os.remove(tmp_path) # else: # shutil.rmtree(tmp_path) err_code = 0 for dir in range(max_packages): csv_name = "ext-" + str(dir) + ".csv" csv_file = os.path.join(self.src_dir, str(dir), csv_name) if not os.path.exists(csv_file): continue self.clean_dir(_csv_file=csv_file) self.prepare_crop(_csv_file=csv_file) task_count = global_queue.remote_cut_queue.qsize() task_count = task_count // 2 # start to process task = Task(None, None, None, None, True) global_queue.remote_cut_queue.put(task) self.do_work() for i in range(max_packages): _sub_dir = os.path.join(self.src_dir, str(i)) if not os.path.exists(_sub_dir): continue dest_dir = os.path.join(self.temp_dir, str(i)) if not os.path.exists(dest_dir): os.makedirs(dest_dir) origin_list = os.listdir(_sub_dir) for _image in origin_list: _image = _image.strip() if not _image.startswith("label-"): continue if not _image.endswith("png"): continue # start with label- anna_file = _image[6:] origin_name = anna_file[:-3] + "jpg" image_path = os.path.join(_sub_dir, _image) origin_image = os.path.join(_sub_dir, origin_name) if not os.path.exists(origin_image): os.remove(image_path) self.logger.error( "package:{}, file missing:[{}]=>[{}]".format( str(i), origin_name, _image)) continue result_path = os.path.join(self.temp_dir, str(i), anna_file) task = Task(str(i), image_path, result_path, origin_image, False) global_queue.remote_process_queue.put(task) for i in range(20): task = Task(None, None, None, None, True) global_queue.remote_process_queue.put(task) all_processes = [] for i in range(20): process = multiprocessing.Process(target=self.transform) process.daemon = True all_processes.append(process) for process in all_processes: process.start() self.logger.info(str(process.pid) + ", start") for process in all_processes: process.join() self.logger.info(str(process.pid) + ", join") # 拷贝 if _ver != "all": cur_day = time.strftime("lane-aug-%Y%m%d", time.localtime()) else: cur_day = time.strftime("lane-all-%Y%m%d", time.localtime()) copy_dir = self.temp_dir dir_list = os.listdir(copy_dir) if self.dest_scp_ip != global_variables.model_host.value: files = self.dest_sftp.listdir(path=self.dest_dir) self.dest_dir = os.path.join(self.dest_dir, cur_day) # if cur_day in files: # self.rm(self.dest_dir) if cur_day not in files: self.dest_sftp.mkdir(self.dest_dir) for _dir in dir_list: old_src = os.path.join(copy_dir, _dir) files = self.dest_sftp.listdir(path=self.dest_dir) if _dir in files: self.rm(os.path.join(self.dest_dir, _dir)) self.dest_scp.put(old_src, self.dest_dir, recursive=True) time2 = time.time() result_obj = { "count": str(task_count), "time": str(time2 - time1) + " s" } resp = ServerResponse(err_code=err_code, err_info=None, result=result_obj) resp_str = resp.generate_response() self.logger.info(resp_str) self.write(resp_str) except Exception as err: err_info = repr(err) json_res = {"code": "99", "msg": str(err_info)} self.logger.error(json.dumps(json_res)) self.write(json.dumps(json_res)) self.finish()
def prepare_task(self, src_dir, dest_dir, start_package, cnt_per_package): # 生成标注任务包 src_len = len(src_dir) get_file(src_dir, self.file_list, src_len) total_count = len(self.file_list) file_index = 0 total_index = 0 package_index = start_package package_list = {} for _file_path in self.file_list: total_index += 1 _file_id = os.path.basename(_file_path) package_dir = os.path.join(dest_dir, str(package_index)) if not os.path.exists(package_dir): os.makedirs(package_dir) image_file = _file_id _file_name = _file_id.split(".") _file_name = _file_name[0] label_file = "label-" + _file_name + ".png" package_list[image_file] = label_file src_path = os.path.join(src_dir, _file_path) _image = cv2.imread(src_path) if _image is None: print(src_path) continue dest_path = os.path.join(package_dir, _file_id) dest_label = os.path.join(package_dir, label_file) _task = Task( package_index=str(package_index), src_path=src_path, dest_path=dest_path, dest_label=dest_label ) global_queue.extend_queue.put(_task) file_index += 1 if file_index == cnt_per_package: dest_file = str(package_index) + ".csv" dest_file_path = os.path.join(dest_dir, str(package_index), dest_file) with open(dest_file_path, "w") as f: for _image, _label in package_list.items(): _str = "{},{}\n".format(_image, _label) f.write(_str) package_list = {} file_index = 0 package_index += 1 elif total_index == total_count: dest_file = str(package_index) + ".csv" dest_file_path = os.path.join(dest_dir, str(package_index), dest_file) with open(dest_file_path, "w") as f: for _image, _label in package_list.items(): _str = "{},{}\n".format(_image, _label) f.write(_str) return
def get(self, *args, **kwargs): time_start = time.time() self.set_header('Access-Control-Allow-Origin', '*') self.set_header('Content-type', 'application/json') query_path = self.request.path if query_path != "/task": print(query_path) # { # “images”:”原始图片目录,缺省默认为当前目录的images目录”, # “step”:单个任务包的数量,缺省默认为20, # “pixel”:外边框像素大小,缺省默认为50, # “start”:任务包编号开始序号,缺省默认自动记录, # } try: step = self.get_argument("step", "20") step = int(step) self.step = step if not os.path.exists(self.src_dir): task_count = 0 err_code = 1 else: err_code = 0 dir_list = os.listdir(self.dest_dir) cur_max = 0 for _dir in dir_list: if _dir.isdigit(): if int(_dir) > cur_max: cur_max = int(_dir) if self.start <= cur_max: self.start = cur_max + 1 self.prepare_task( src_dir=self.src_dir, dest_dir=self.dest_dir, start_package=self.start, cnt_per_package=self.step ) task_count = global_queue.extend_queue.qsize() # start to process _task = Task( package_index=None, src_path=None, dest_path=None, dest_label=None, exit_flag=True ) global_queue.extend_queue.put(_task) process = multiprocessing.Process(target=self.do) process.daemon = True process.start() process.join() # 开始增加边框 for dir in range(self.start, max_packages): time1 = time.time() csv_name = str(dir) + ".csv" csv_file = os.path.join(self.dest_dir, str(dir), csv_name) if not os.path.exists(csv_file): continue self.prepare_extend(_csv_file=csv_file) task = Task(None, None, None, None, True) global_queue.divide_queue.put(task) process = multiprocessing.Process(target=self.do_work) process.daemon = True process.start() process.join() time2 = time.time() self.logger.info("process[{}] in {} s".format(dir, time2 - time1)) time_end = time.time() result_obj = { "count": str(task_count), "time": str(time_end - time_start) + " s" } resp = ServerResponse(err_code=err_code, err_info=None, result=result_obj) resp_str = resp.generate_response() self.logger.info(resp_str) self.write(resp_str) except Exception as err: err_info = err.args[0] json_res = {"code": "99", "msg": str(err_info)} self.write(json.dumps(json_res)) self.logger.error(json.dumps(json_res)) except: self.write('{"code": "99", "msg": "unknown exception"}') self.logger.error('{"code": "99", "msg": "unknown exception"}') self.finish()
def train(self): print("Training the model...") self.entity_idxs = {d.entities[i]: i for i in range(len(d.entities))} self.relation_idxs = { d.relations[i]: i for i in range(len(d.relations)) } self.idx2entity = {v: k for k, v in self.entity_idxs.items()} self.idx2relation = {v: k for k, v in self.relation_idxs.items()} if self.add_constraint == True: # constrain types Output_mask = torch.ones([ len(self.idx2relation.keys()), len(self.idx2entity.keys()), len(self.idx2entity.keys()) ], dtype=torch.float) if self.cuda: Output_mask = Output_mask.cuda() # gather objects, properties, and affordances task_names = ['situated-OP', 'situated-OA', 'situated-AP'] task_mapping = defaultdict() for name in task_names: task_mapping[name] = Task(TASK_REV_MEDIUMHAND[name]) objects, properties, affordances = get_entity_sets(task_mapping) for k in range(len(self.idx2relation.keys())): relation = self.idx2relation[k] for i in range(len(self.idx2entity.keys())): for j in range(len(self.idx2entity.keys())): e1 = self.idx2entity[i] e2 = self.idx2entity[j] if 'situated-OP' in relation: if ((e1 in objects and e2 in properties) or (e2 in objects and e1 in properties)): Output_mask[k, i, j] = 0.0 if 'situated-OA' in relation: if ((e1 in objects and e2 in affordances) or (e2 in objects and e1 in affordances)): Output_mask[k, i, j] = 0.0 if 'situated-AP' in relation: if ((e1 in properties and e2 in affordances) or (e2 in properties and e1 in affordances)): Output_mask[k, i, j] = 0.0 train_data_idxs = self.get_data_idxs(d.train_data) print("Number of training data points: %d" % len(train_data_idxs)) if self.add_dropout_bn == True: model = TuckER(d, self.ent_vec_dim, self.rel_vec_dim, **self.kwargs) else: model = TuckERNoDropoutBN(d, self.ent_vec_dim, self.rel_vec_dim, self.cuda) if self.cuda: model.cuda() model.init() opt = torch.optim.Adam(model.parameters(), lr=self.learning_rate) if self.decay_rate: scheduler = ExponentialLR(opt, self.decay_rate) er_vocab = self.get_er_vocab(train_data_idxs) er_vocab_pairs = list(er_vocab.keys()) print("Starting training...") start_time = timeit.default_timer() for it in range(1, self.num_iterations + 1): model.train() losses = [] np.random.shuffle(er_vocab_pairs) for j in range(0, len(er_vocab_pairs), self.batch_size): data_batch, targets = self.get_batch(er_vocab, er_vocab_pairs, j) opt.zero_grad() e1_idx = torch.tensor(data_batch[:, 0]) if self.do_link_prediction == True: r_idx = torch.tensor(data_batch[:, 1]) else: e2_idx = torch.tensor(data_batch[:, 1]) if self.cuda: e1_idx = e1_idx.cuda() if self.do_link_prediction == True: r_idx = r_idx.cuda() else: e2_idx = e2_idx.cuda() if self.do_link_prediction == True: outputs = model.forward_lp(e1_idx, r_idx) else: outputs = model.forward(e1_idx, e2_idx) if self.add_dropout_bn == True: predictions = outputs else: predictions = outputs[0] W = outputs[1] E = outputs[2] R = outputs[3] if self.label_smoothing: targets = ((1.0 - self.label_smoothing) * targets) + (1.0 / targets.size(1)) loss = model.loss(predictions, targets) if self.add_constraint == True: reg = self.reg d1_want = R.size(0) d2_want = E.size(0) d3_want = d2_want d1_in = R.size(1) d2_in = E.size(1) d3_in = d2_in W_mat = torch.mm(E, W.view(d3_in, -1)) W_mat = W_mat.view(d1_in, d2_in, d3_want) W_mat = torch.mm(E, W_mat.view(d2_in, -1)) W_mat = W_mat.view(d1_in, d2_want, d3_want) W_mat = torch.mm(R, W_mat.view(d1_in, -1)) Output = W_mat.view(d1_want, d2_want, d3_want) type_constraint = ((Output * Output_mask)**2).mean() loss += (reg * type_constraint) loss.backward() opt.step() losses.append(loss.item()) if self.decay_rate: scheduler.step() print(it) stop_time = timeit.default_timer() print('training time: {}'.format( (stop_time - start_time) / self.num_iterations)) if self.do_link_prediction == True: model.eval() with torch.no_grad(): print("Test:") self.evaluate_link_prediction(model, d.test_data) if not os.path.exists(self.saved_model_path): os.makedirs(self.saved_model_path) torch.save(model, os.path.join(self.saved_model_path, 'model.pt')) joblib.dump([self.entity_idxs, self.relation_idxs], os.path.join(self.saved_model_path, 'dic.pkl'))
def prepare_crop(self, _csv_file): root_dir = os.path.dirname(_csv_file) csv_name = os.path.basename(_csv_file) name_list = csv_name.split(".") csv_name = name_list[0] if csv_name.startswith("ext-"): csv_name = csv_name[4:] package_index = csv_name list_file = csv_name + ".csv" list_path = os.path.join(root_dir, list_file) temp_dir = os.path.join(self.temp_dir, package_index) if not os.path.exists(temp_dir): os.mkdir(temp_dir) dest_list_path = os.path.join(temp_dir, list_file) if os.path.exists(list_path): shutil.copy(list_path, dest_list_path) else: list_file = csv_name + ".txt" list_path = os.path.join(root_dir, list_file) dest_list_path = os.path.join(temp_dir, list_file) if os.path.exists(list_path): shutil.copy(list_path, dest_list_path) with open(_csv_file, "r") as f: line_str = f.readline() while line_str: image, label = line_str.split(",") image = image.strip() label = label.strip() if not image.startswith("ext-") or not label.startswith("ext-"): line_str = f.readline() continue origin_image = os.path.join(root_dir, image[4:]) if not os.path.exists(origin_image): line_str = f.readline() self.logger.error("package:{}, file missing:[{}]=>[{}]".format(package_index, image, label)) continue else: dest_origin_image = os.path.join(temp_dir, image[4:]) shutil.copy(origin_image, dest_origin_image) if not os.path.exists(os.path.join(root_dir, label)): line_str = f.readline() self.logger.error("package:{}, file missing:[{}]=>[{}]".format(package_index, image, label)) continue _src = os.path.join(root_dir, label) _dest_id2 = label[4:] _dest_label = os.path.join(temp_dir, _dest_id2) _dest_label = _dest_label.strip() task = Task(package_index, _src, _dest_label, None) self.queue.put(task) line_str = f.readline()
map_location=torch.device('cpu')) if args.cuda: model = model.cuda() entity_idxs = dic[0] relation_idxs = dic[1] entity_reverse_idxs = {i: name for name, i in entity_idxs.items()} relation_reverse_idxs = {i: name for name, i in relation_idxs.items()} model.eval() # gather objects, properties, and affordances task_names = ['situated-OP', 'situated-OA', 'situated-AP'] task_mapping = defaultdict() for name in task_names: task_mapping[name] = Task(TASK_REV_MEDIUMHAND[name]) objects, properties, affordances = get_entity_sets(task_mapping) def e12_type(e1, e2): if (e1 in objects and e2 in properties): return 'situated-OP' elif (e1 in objects and e2 in affordances): return 'situated-OA' elif (e1 in affordances and e2 in properties): return 'situated-AP' else: return 'nothing'
# style = style_name.split(" ")[1] style = level + module + list_type + language else: level = '1' list_type = '' [upParaId, no] = getUpId_no(paraId, 0) style = level + module + list_type + language para = Paragraph_(paraId, upParaId, style, no, content).__dict__ paras.append(para) i += 1 return paras,articleTittle,stuName,teacherName,majorName,gradTime if __name__ == '__main__': paragraphs,articleTittle,stuName,teacherName,majorName,gradTime = read_word("demo3.docx") task = Task("e6c26921-3ec6-48b6-bb73-efd48cef969f", "428d81a2-30b7-4960-9535-c1c0e74e9677", "366775a1-f341-4e0b-ae45-382199d6c978", paragraphs, articleTittle,stuName,teacherName,majorName,gradTime) f = open("./test.json", 'w') f.write(json.dumps(task, default=task_2_json, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ': '))) f.close()
model = DistMult(ent_tot=len(ent_list), rel_tot=len(rel_list), dim=200) elif args.model == 'complex': model = ComplEx(ent_tot=len(ent_list), rel_tot=len(rel_list), dim=200) elif args.model == 'simple': model = SimplE(ent_tot=len(ent_list), rel_tot=len(rel_list), dim=200) model = model.cpu() start_time = timeit.default_timer() task_names = ['situated-OP', 'situated-OA', 'situated-AP'] for task_name in task_names: print('{} task'.format(task_name)) task = Task(TASK_REV_MEDIUMHAND[task_name]) samples = task.get_test_examples() y_hat = [] y = [] names = [] for sample in samples: names.append(sample.name) head, tail = sample.name.split('/') if task_name == 'situated-OP': res = openke_predict(model, np.array(ent_list[head + '-o']), np.array(ent_list[tail + '-p']), np.array([[0], [1], [2]]), 0) elif task_name == 'situated-OA': res = openke_predict(model, np.array(ent_list[head + '-o']), np.array(ent_list[tail + '-a']),
def get(self, *args, **kwargs): time1 = time.time() self.set_header('Access-Control-Allow-Origin', '*') self.set_header('Content-type', 'application/json') # { # “images”:”原始图片目录,缺省默认为当前目录的images目录”, # “pixel”:外边框像素大小,缺省默认为50, # } try: _ver = self.get_argument("version", "all") self.src_dir = os.path.join(self.src_dir, _ver) if not os.path.exists(self.src_dir): task_count = 0 err_code = 1 check_result = "dir[{}] is not exist".format(self.src_dir) else: if not os.path.exists(self.temp_dir): os.mkdir(self.temp_dir) else: # clean this directory tmp_dirs = os.listdir(self.temp_dir) for tmp_dir in tmp_dirs: if not tmp_dir.isdigit(): continue tmp_path = os.path.join(self.temp_dir, tmp_dir) if os.path.isfile(tmp_path): os.remove(tmp_path) else: shutil.rmtree(tmp_path) err_code = 0 for dir in range(max_packages): csv_name = "ext-" + str(dir) + ".csv" csv_file = os.path.join(self.src_dir, str(dir), csv_name) if not os.path.exists(csv_file): continue self.clean_dir(_csv_file=csv_file) self.prepare_crop(_csv_file=csv_file) task_count = self.queue.qsize() # start to process task = Task(None, None, None, None, True) self.queue.put(task) process = multiprocessing.Process(target=self.do_work) process.start() self.logger.info(str(process.pid) + ", start") process.join() self.logger.info(str(process.pid) + ", join") for i in range(max_packages): _sub_dir = os.path.join(self.temp_dir, str(i)) if not os.path.exists(_sub_dir): continue dest_dir = os.path.join(self.temp_dir, str(i)) if not os.path.exists(dest_dir): os.mkdir(dest_dir) origin_list = os.listdir(_sub_dir) for _image in origin_list: _image = _image.strip() if not _image.startswith("label-"): continue name_list = _image.split('.') if len(name_list) < 2: continue ext_name = name_list[1] if ext_name != 'png' and ext_name != 'jpg': continue # start with label- label_file = name_list[0] if label_file.startswith("label-"): label_file = label_file[6:] anna_file = label_file + ".png" origin_name = label_file + ".jpg" image_path = os.path.join(_sub_dir, _image) origin_image = os.path.join(_sub_dir, origin_name) if not os.path.exists(origin_image): os.remove(image_path) self.logger.error("package:{}, file missing:[{}]=>[{}]".format(str(i), origin_name, _image)) continue result_path = os.path.join(self.temp_dir, str(i), anna_file) task = Task(str(i), image_path, result_path, None, False) self.task_queue.put(task) for i in range(20): task = Task(None, None, None, None, True) self.task_queue.put(task) all_processes = [] for i in range(20): process = multiprocessing.Process(target=self.transform) all_processes.append(process) for process in all_processes: process.start() self.logger.info(str(process.pid) + ", start") for process in all_processes: process.join() self.logger.info(str(process.pid) + ", join") # # 拷贝 # cur_day = time.strftime("lane-%Y%m%d", time.localtime()) # dir_list = os.listdir(self.temp_dir) # # # 创建远程目录 # if self.dest_scp_ip != host_ip: # files = self.dest_sftp.listdir(path=self.dest_dir) # self.dest_dir = os.path.join(self.dest_dir, cur_day) # if cur_day in files: # self.rm(self.dest_dir) # self.dest_sftp.mkdir(self.dest_dir) # 校验 check_model = CheckData(gpu_id=0) check_result = check_model.run() time2 = time.time() result_obj = { "count": str(task_count), "time": str(time2-time1)+" s", "doubt": str(len(check_result)), "message": check_result } resp = ServerResponse(err_code=err_code, err_info=None, result=result_obj) resp_str = resp.generate_response() self.logger.info(resp_str) self.write(resp_str) except Exception as err: err_info = repr(err) json_res = {"code": "99", "msg": str(err_info)} self.logger.error(json.dumps(json_res)) self.write(json.dumps(json_res)) except: self.write('{"code": "99", "msg": "unknown exception"}') self.logger.error('{"code": "99", "msg": "unknown exception"}') self.finish()
utils.printf(f'\tValid PPL: {ppl: 6.2f}\n') def auto_test(self): test_input = ['what is your name ?', 'how old are you ?'] for input in test_input: bs_ret = self.beam_search.decode(input.split()) print(input) for sentence in bs_ret[:5]: print(' '.join(self.task.dec_vocab.ids2word(sentence['ids'])), sentence['prob']) print('==') print('\n') if __name__ == '__main__': task = Task(config) if len(sys.argv) > 2: # load checkpoint task.load(mode='train', ckpt_path=sys.argv[1], model_name=sys.argv[2]) else: # 重新训练 if config['train']['silence']: # backgrounder sys.stdout = open('train.log', 'w') task.load(mode='train') trainer = Trainer(task) trainer.train() # task.save('./ckpt')
data2 = [(0, [(1, 5)]), (1, [(1, 7)]), (2, [(1, 6)])] datas = [] for _ in range(5): job = (random.randint(1, 5), []) for _ in range(3): job[1].append((random.randint(1, 3), random.randint(1, 5))) datas.append(job) data = datas if __name__ == '__main__': # Task(machine_id, duration, order) jobs = [Job(id=i + 1, arrival_time=row[0]) for i, row in enumerate(data)] for i, row in enumerate(data): for col in row[1]: jobs[i].add_task(Task(machine_id=col[0], duration=col[1])) def simulate(jobs, rule): simulator = JobShopSimulator(env=simpy.Environment(), jobs=jobs, rule=rule) simulator.run(until=50) simulator.plot() simulate(jobs, 'FIFO') simulate(jobs, 'LIFO') simulate(jobs, 'SPT')
def eval_tc(saved_model_path): batch_size = 128 cuda = True if torch.cuda.is_available() else False add_constraint = True add_dropout_bn = False dic = joblib.load(os.path.join(saved_model_path, 'dic.pkl')) model = torch.load(os.path.join(saved_model_path, 'model.pt'), map_location=torch.device('cpu')) if cuda: model = model.cuda() entity_idxs = dic[0] relation_idxs = dic[1] entity_reverse_idxs = {i: name for name, i in entity_idxs.items()} relation_reverse_idxs = {i: name for name, i in relation_idxs.items()} model.eval() # gather objects, properties, and affordances task_names = ['situated-OP', 'situated-OA', 'situated-AP'] task_mapping = defaultdict() for name in task_names: task_mapping[name] = Task(TASK_REV_MEDIUMHAND[name]) objects, properties, affordances = get_entity_sets(task_mapping) def e12_type(e1, e2): if (e1 in objects and e2 in properties): return 'situated-OP' elif (e1 in objects and e2 in affordances): return 'situated-OA' elif (e1 in affordances and e2 in properties): return 'situated-AP' else: return 'nothing' res = [] for task_name, task in task_mapping.items(): print('{} task'.format(task_name)) y_hat = [] y = [] names = [] input1 = [] input2 = [] test_samples = task.get_test_examples() for th, sample in enumerate(test_samples): names.append(sample.name) e12 = sample.name.split('/') input1.append(entity_idxs[e12[0]]) input2.append(entity_idxs[e12[1]]) if (th + 1) % batch_size == 0 or th + batch_size >= len( test_samples): input1 = torch.tensor(input1) input2 = torch.tensor(input2) if cuda: input1 = input1.cuda() input2 = input2.cuda() outputs = model.forward(input1, input2) if add_dropout_bn == True: predictions = outputs else: predictions = outputs[0] W = outputs[1] E = outputs[2] R = outputs[3] if cuda: predictions = predictions.cpu() for k, e12 in enumerate(zip(input1, input2)): e1 = e12[0].cpu().item() e2 = e12[1].cpu().item() r = e12_type(entity_reverse_idxs[e1], entity_reverse_idxs[e2]) if add_constraint == True: if r is not 'nothing': v1 = predictions[k, relation_idxs[r]] v2 = predictions[k, relation_idxs['NOT-' + r]] if v1 > v2: y_hat.append(1) else: y_hat.append(0) else: y_hat.append(0) else: if r is not 'nothing': v = predictions[k, relation_idxs[r]].item() if v >= 0.5: y_hat.append(1) else: y_hat.append(0) else: y_hat.append(0) input1 = [] input2 = [] y.append(int(sample.label)) y = np.array(y) y_hat = np.array(y_hat) acc, micro_f1, macro_f11, macro_f12 = metrics.report_more4cv( y_hat, y, names, TASK_LABELS[TASK_REV_MEDIUMHAND[task_name]]) res.append((acc, micro_f1, macro_f11, macro_f12)) return res