def prepare_crop(self, _csv_file):
        root_dir = os.path.dirname(_csv_file)
        csv_name = os.path.basename(_csv_file)

        name_list = csv_name.split(".")
        csv_name = name_list[0]
        if csv_name.startswith("ext-"):
            csv_name = csv_name[4:]
        package_index = csv_name

        with open(_csv_file, "r") as f:
            line_str = f.readline()

            while line_str:
                image, label = line_str.split(",")
                image = image.strip()
                label = label.strip()

                if not image.startswith("ext-") or not label.startswith(
                        "ext-"):
                    line_str = f.readline()
                    continue

                if not os.path.exists(os.path.join(root_dir, label)):
                    line_str = f.readline()
                    self.logger.error(
                        "package:{}, label missing:[{}]=>[{}]".format(
                            package_index, image, label))
                    continue

                if not os.path.exists(os.path.join(root_dir, image)):
                    line_str = f.readline()
                    self.logger.error(
                        "package:{}, image missing:[{}]=>[{}]".format(
                            package_index, image, label))
                    continue

                origin_image = os.path.join(root_dir, image[4:])
                _src = os.path.join(root_dir, image)
                _dest_image = os.path.join(root_dir, origin_image)
                task = Task(package_index, _src, _dest_image, None)
                global_queue.remote_cut_queue.put(task)

                _src = os.path.join(root_dir, label)
                _dest_id2 = label[4:]
                _dest_label = os.path.join(root_dir, _dest_id2)
                _dest_label = _dest_label.strip()

                task = Task(package_index, _src, _dest_label, None)
                global_queue.remote_cut_queue.put(task)

                line_str = f.readline()
Esempio n. 2
0
def train(**kwargs):
    config.parse(kwargs)
    if os.path.exists(config.filename + '_' + str(config.split_ratio) +
                      'SineData.pkl'):
        train_data = pickle.load(
            file(config.filename + '_' + str(config.split_ratio) +
                 'SineData.pkl'))
        print 'exists SineData.pkl, load it!'
    else:
        train_data = SineData(config.filename, split_ratio=config.split_ratio)
        pickle.dump(
            train_data,
            file(
                config.filename + '_' + str(config.split_ratio) +
                'SineData.pkl', 'w'))
    config.N = train_data.G.g.number_of_nodes() + 1
    model = getattr(models, config.model)(config)  # .eval()
    if torch.cuda.is_available():
        model.cuda()
        config.CUDA = True
    train_dataloader = DataLoader(train_data,
                                  config.batch_size,
                                  shuffle=True,
                                  num_workers=config.num_workers)
    #  optimizer = torch.optim.SGD(model.parameters(),lr = config.lr, weight_decay = config.weight_decay)
    optimizer = torch.optim.Adadelta(model.parameters(),
                                     lr=1.0,
                                     rho=0.95,
                                     weight_decay=config.weight_decay)
    task = Task(train_data.G)
    # model.train()
    for epoch in range(config.epochs):
        total_loss = 0.0
        for idx, data in enumerate(train_dataloader):
            #  if config.CUDA:
            #  data = map(lambda x: Variable(x.cuda()), data)
            #  else:
            #  data = map(lambda x: Variable(x), data)
            optimizer.zero_grad()
            loss = model(data)
            loss.backward()
            optimizer.step()
            if config.CUDA:
                total_loss += loss.cpu().data.numpy()
            else:
                total_loss += loss.data.numpy()
        print 'epoch {0}, loss: {1}'.format(epoch, total_loss)
        task.link_sign_prediction_split(model.get_embedding())
Esempio n. 3
0
    def prepare_extend(self, _csv_file):
        root_dir = os.path.dirname(_csv_file)
        file_name = os.path.basename(_csv_file)

        package_index = file_name.split(".")[0]

        ext_list = []
        ext_csv = "ext-" + file_name
        ext_csv = os.path.join(root_dir, ext_csv)
        with open(_csv_file, "r") as f:
            line_str = f.readline()

            while line_str:
                image, label = line_str.split(",")

                _src = os.path.join(root_dir, image)
                _dest_id1 = "ext-" + image
                _dest_image = os.path.join(root_dir, _dest_id1)

                _src = _src.strip()
                _dest_image = _dest_image.strip()
                task = Task(package_index, _src, _dest_image, None)
                global_queue.divide_queue.put(task)

                _src = os.path.join(root_dir, label)
                _src = _src.strip()
                _dest_id2 = "ext-" + label
                _dest_label = os.path.join(root_dir, _dest_id2)
                _dest_label = _dest_label.strip()

                _out_str = _dest_id1 + "," + _dest_id2
                ext_list.append(_out_str)

                task = Task(package_index, _src, _dest_label, None)
                global_queue.divide_queue.put(task)

                line_str = f.readline()
        ext_list.sort()
        with open(ext_csv, "w") as f:
            for str in ext_list:
                f.write(str)

        # modify *.csv=>*.txt
        name_list = file_name.split(".")
        new_file_name = name_list[0] + ".txt"
        new_file_path = os.path.join(root_dir, new_file_name)
        os.rename(_csv_file, new_file_path)
Esempio n. 4
0
async def task_status_handler(request):
    task_id = request.match_info['id']
    try:
        task = Task(app.loop, task_id)
        status = await task.get_status()
        resp = {"task_status": status}
    except:
        resp = {"task_status": "Not found"}
    return web.json_response(resp)
Esempio n. 5
0
def alg_list():
    with open(sys.argv[1], 'r') as file:
        n = load_line(file)[0]
        tasks = [Task(*load_line(file), i) for i in range(n)]
    tasks = sorted(tasks, key=lambda task: task.r)

    schedules = [[], [], [], []]
    timers = [0, 0, 0, 0]
    criterium = 0
    awaiting = []
    counter = 0
    it = 0

    while True:
        timerId = argmin(timers)
        if it != n:
            for i in range(it, n):
                if tasks[i].r <= timers[timerId]:
                    awaiting += [tasks[i]]
                else:
                    it = i
                    break
            else:
                it = n
        if not awaiting:
            timers[timerId] = tasks[it].r
            for i in range(it, n):
                if tasks[i].r <= timers[timerId]:
                    awaiting += [tasks[i]]
                else:
                    it = i
                    break
            else:
                it = n
        awaiting.sort(key=lambda task:
                      (min(0, timers[timerId] + task.p - task.d), -task.p))
        popped = awaiting.pop()
        schedules[timerId] += [popped]
        timers[timerId] = max(timers[timerId], popped.r) + popped.p
        criterium += max(0, timers[timerId] - popped.d)
        counter += 1
        if counter == n:
            break

    with open(sys.argv[2], 'w') as output:
        output.write(
            str(criterium) + '\n' + '\n'.join([
                ' '.join([str(task.i + 1) for task in schedule])
                for schedule in schedules
            ]))
Esempio n. 6
0
def test(**kwargs):
    snap_root = kwargs['snap_root']
    config_file = snap_root + '/config.pkl'
    config = pickle.load(file(config_file))
    model_file = snap_root + '/{}.model'.format(config.model)
    dataset_name = 'kTupleDataV1'
    if os.path.exists(config.filename + '_' + str(config.split_ratio) +
                      '_{}.pkl'.format(dataset_name)):
        train_data = pickle.load(file(snap_root + '/data.pkl'))
        print('exists {}.pkl, load it!'.format(dataset_name))
        print(train_data.G.g.number_of_nodes(),
              train_data.G.g.number_of_edges())
    else:
        raise Exception('Data Module not exists!')
    model = getattr(models, config.model)(config)  # .eval()
    if torch.cuda.is_available():
        model.cuda()
        config.CUDA = True
    model.load_state_dict(torch.load(model_file))
    task = Task(train_data.G, config)
    task.link_sign_prediction_split(utils.cat_neighbor(train_data.G.g,
                                                       model.get_embedding(),
                                                       method='null'),
                                    method='concatenate')
 def get_tasks_from_service(self):
     tasks = []
     events = self.get_events_from_service()
     # colors = self.service.colors().get(fields="event").execute()
     for event in events:
         start = self.get_event_start_time(event)
         end = self.get_event_end_time(event)
         if "colorId" in event:
             # color_hexcode = colors["event"][event["colorId"]]["background"]
             color_id = event["colorId"]
             color = COLOR_ID_TO_COLOR_DICT[color_id]
         else:
             color = "OTHER"
         task = Task(color, start, end)
         tasks.append(task)
     return tasks
Esempio n. 8
0
async def main(loop):
    connection = await aio_pika.connect_robust(settings.RABBITMQ_HOST,
                                               loop=loop)

    queue_name = settings.PRECESS_IMG_QUEUE

    async with connection:
        # Creating channel
        channel = await connection.channel()

        # Declaring queue
        queue = await channel.declare_queue(queue_name, auto_delete=True)

        async for message in queue:
            with message.process():
                data = json.loads(message.body.decode('utf-8'))
                aws_client = AWSWrapper()
                task_id = data['task_id']
                task = Task(loop, id=task_id)

                await task.set_status('Started downloading')
                try:
                    file_path = await aws_client.get_file(**data['aws_data'])
                except Exception as e:
                    await task.set_status("Download error")
                    raise e
                else:
                    await task.set_status("Started image processing")
                    try:
                        result_future = canny_task.delay(file_path)
                        canny_img_path = await wait_for_done(result_future)
                    except Exception as e:
                        await task.set_status("Processing error")
                        raise e
                    else:
                        delete_file(file_path)
                        data['aws_data']['file_path'] = canny_img_path
                        data['key'] = canny_img_path.split('/')[-1]
                        await task.set_status("Started uploading canny image")
                        try:
                            resp = await aws_client.upload_huge(
                                **data['aws_data'])
                            await task.set_status("Canny edge image uploaded")
                        except Exception as e:
                            await task.set_status("Error uploading")
                            raise e
                        delete_file(canny_img_path)
Esempio n. 9
0
def extract_task_from_line(line: str) -> Task:
    """ Translate one line from txt file into arguments for execution: instance, process, parameters

    :param line: Arguments for execution. E.g. instance="tm1srv01" process="Bedrock.Server.Wait" pWaitSec=2
    :return: instance_name, process_name, parameters
    """
    line_arguments = dict()
    for pair in shlex.split(line):
        param, value = pair.split("=")
        # if instance or process, needs to be case insensitive
        if param.lower() == "process" or param.lower() == "instance":
            line_arguments[param.lower()] = value.strip('"').strip()
        # parameters (e.g. pWaitSec) are case sensitive in TM1 REST API !
        else:
            line_arguments[param] = value.strip('"').strip()
    return Task(instance_name=line_arguments.pop("instance"),
                process_name=line_arguments.pop("process"),
                parameters=line_arguments)
Esempio n. 10
0
def get_glue_task(task_name: str, data_dir: str = None):
    """Return a GLUE task object
    Args:
        task_name (str): name of GLUE task
        data_dir (str, optional): path to dataset, if not provided will be taken from
            GLUE_DIR env. variable
    """
    task_name = task_name.lower()
    if task_name not in processors:
        raise ValueError("Task not found: {}".format(task_name))
    task_processor = processors[task_name]()
    if data_dir is None:
        try:
            data_dir = os.path.join(os.environ["GLUE_DIR"], DEFAULT_FOLDER_NAMES[task_name])
        except Exception:
            data_dir = None
    task_type = output_modes[task_name]
    return Task(task_name, task_processor, data_dir, task_type)
Esempio n. 11
0
async def task_handler(request):
    data = await request.json()
    try:
        aws_data = {
            'bucket': data['BUCKET'],
            'key': data['file_path'],
            'AWS_SECRET_ACCESS_KEY': data['SECRET_ACCESS_KEY'],
            'AWS_ACCESS_KEY_ID': data['ACCESS_KEY_ID']
        }
        task = Task(app.loop)
        await task.set_status("Pending")
        data = {"aws_data": aws_data, "task_id": task.id}
        serialized_data = json.dumps(data)
        await rabbit_pub(app.loop, settings.PRECESS_IMG_QUEUE, serialized_data)
    except KeyError:
        raise web.HTTPForbidden()
    resp = {"task_id": task.id}
    return web.json_response(resp)
Esempio n. 12
0
    def get(self, *args, **kwargs):
        time1 = time.time()

        self.set_header('Access-Control-Allow-Origin', '*')
        self.set_header('Content-type', 'application/json')

        # {
        #     “images”:”原始图片目录,缺省默认为当前目录的images目录”,
        #     “pixel”:外边框像素大小,缺省默认为50,
        # }

        try:
            _ver = self.get_argument("version", "all")
            self.src_dir = os.path.join(self.src_dir, _ver)
            self.temp_dir = os.path.join(self.temp_dir, _ver)

            if not os.path.exists(self.src_dir):
                task_count = 0
                err_code = 1
            else:
                if not os.path.exists(self.temp_dir):
                    os.makedirs(self.temp_dir)
                else:
                    # clean this directory
                    tmp_dirs = os.listdir(self.temp_dir)
                    for tmp_dir in tmp_dirs:
                        if not tmp_dir.isdigit():
                            continue
                        tmp_path = os.path.join(self.temp_dir, tmp_dir)
                        if os.path.isfile(tmp_path):
                            os.remove(tmp_path)
                        else:
                            shutil.rmtree(tmp_path)

                err_code = 0
                for dir in range(max_packages):
                    csv_name = "ext-" + str(dir) + ".csv"
                    csv_file = os.path.join(self.src_dir, str(dir), csv_name)
                    if not os.path.exists(csv_file):
                        continue

                    self.clean_dir(_csv_file=csv_file)
                    self.prepare_crop(_csv_file=csv_file)

                task_count = self.queue.qsize()
                # start to process
                task = Task(None, None, None, None, True)
                self.queue.put(task)

                process = multiprocessing.Process(target=self.do_work)
                process.start()
                self.logger.info(str(process.pid) + ", start")
                process.join()
                self.logger.info(str(process.pid) + ", join")

                for i in range(max_packages):
                    _sub_dir = os.path.join(self.temp_dir, str(i))
                    if not os.path.exists(_sub_dir):
                        continue

                    dest_dir = os.path.join(self.temp_dir, str(i))
                    if not os.path.exists(dest_dir):
                        os.mkdir(dest_dir)

                    origin_list = os.listdir(_sub_dir)

                    for _image in origin_list:
                        _image = _image.strip()
                        if not _image.startswith("label-"):
                            continue

                        name_list = _image.split('.')
                        if len(name_list) < 2:
                            continue

                        ext_name = name_list[1]
                        if ext_name != 'png' and ext_name != 'jpg':
                            continue

                        # start with label-
                        label_file = name_list[0]
                        if label_file.startswith("label-"):
                            label_file = label_file[6:]
                        anna_file = label_file + ".png"

                        origin_name = label_file + ".jpg"
                        image_path = os.path.join(_sub_dir, _image)
                        origin_image = os.path.join(_sub_dir, origin_name)
                        if not os.path.exists(origin_image):
                            os.remove(image_path)
                            self.logger.error(
                                "package:{}, file missing:[{}]=>[{}]".format(
                                    str(i), origin_name, _image))
                            continue

                        result_path = os.path.join(self.temp_dir, str(i),
                                                   anna_file)

                        task = Task(str(i), image_path, result_path, None,
                                    False)
                        self.task_queue.put(task)
                for i in range(20):
                    task = Task(None, None, None, None, True)
                    self.task_queue.put(task)

                all_processes = []
                for i in range(20):
                    process = multiprocessing.Process(target=self.transform)
                    all_processes.append(process)

                for process in all_processes:
                    process.start()
                    self.logger.info(str(process.pid) + ", start")

                for process in all_processes:
                    process.join()
                    self.logger.info(str(process.pid) + ", join")

                # 先拷贝到all目录下
                if _ver != "all":
                    temp_dir_list = os.listdir(self.temp_dir)
                    for temp_dir in temp_dir_list:
                        if not temp_dir.isdigit():
                            continue
                        src_temp = os.path.join(self.temp_dir, temp_dir)
                        dest_temp = os.path.join(
                            os.path.dirname(self.temp_dir), "all", temp_dir)
                        if os.path.exists(dest_temp):
                            shutil.rmtree(dest_temp)
                        shutil.copytree(src_temp, dest_temp)

                # 拷贝
                copy_dir = os.path.join(os.path.dirname(self.temp_dir), "all")
                dir_list = os.listdir(copy_dir)

                for _dir in dir_list:
                    old_src = os.path.join(copy_dir, _dir)
                    new_dest = os.path.join(self.dest_dir, "all", _dir)
                    shutil.copytree(old_src, new_dest)

            time2 = time.time()
            result_obj = {
                "count": str(task_count),
                "time": str(time2 - time1) + " s"
            }
            resp = ServerResponse(err_code=err_code,
                                  err_info=None,
                                  result=result_obj)
            resp_str = resp.generate_response()
            self.logger.info(resp_str)

            self.write(resp_str)
        except Exception as err:
            err_info = repr(err)
            json_res = {"code": "99", "msg": str(err_info)}
            self.logger.error(json.dumps(json_res))
            self.write(json.dumps(json_res))
        except:
            self.write('{"code": "99", "msg": "unknown exception"}')
            self.logger.error('{"code": "99", "msg": "unknown exception"}')

        self.finish()
Esempio n. 13
0
def train(**kwargs):
    """use the triplet like transE
    """
    config.parse(kwargs)
    dataset_name = 'kTupleDataV1'
    train_data = kTupleDataV1(config.filename,
                              split_ratio=config.split_ratio,
                              neg_num=config.neg_num)

    #feature?
    feature = sp.csr_matrix(train_data.G.pos_adjmatrix, dtype=np.float32).T
    #feature = train_data.pos_feature
    #feature = sp.csr_matrix(np.array(train_data.feature), dtype=np.float32).T
    #print(feature)
    #feature_pos = sp.csr_matrix(np.array(train_data.pos_feature), dtype=np.float32).T
    feature_pos = sp.csr_matrix(train_data.G.pos_adjmatrix, dtype=np.float32).T
    #print(feature_pos.shape())
    #feature_neg = sp.csr_matrix(np.array(train_data.neg_feature), dtype=np.float32).T

    feature_neg = sp.csr_matrix(train_data.G.neg_adjmatrix, dtype=np.float32).T
    print(feature_neg)
    #print(feature_neg.shape())
    #feature = train_data.G.adj_matrix
    #print(feature.todense())
    '''
    feature = np.matrix([
            [i, -i]
            for i in range(train_data.adj_pos.shape[0])
        ], dtype=float)
    '''
    #adj = train_data.adj_matrix
    adj = train_data.G.all_matrix
    adj_pos = train_data.adj_pos
    #print(adj_pos.shape)
    adj_neg = train_data.adj_neg
    graph_pos = nx.from_scipy_sparse_matrix(adj_pos, create_using=nx.DiGraph())
    graph_neg = nx.from_scipy_sparse_matrix(adj_neg, create_using=nx.DiGraph())
    #数据预处理
    #数据预处理
    #对结果进行处理-》执行gcn
    # build symmetric adjacency matrix 构建一个对称的邻接矩阵
    # 目的将有向图的邻接矩阵变成无向图的邻接矩阵
    adj_pos = adj_pos + adj_pos.T.multiply(
        adj_pos.T > adj_pos) - adj_pos.multiply(adj_pos.T > adj_pos)
    adj_neg = adj_neg + adj_neg.T.multiply(
        adj_neg.T > adj_neg) - adj_neg.multiply(adj_neg.T > adj_neg)
    adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
    #print(adj_pos.todense())
    #特征进行归一化处理
    feature = normalize(feature)
    feature_pos = normalize(feature_pos)
    feature_neg = normalize(feature_neg)
    #print(np.array(feature.todense()).shape)
    #print(feature.todense())
    #邻接矩阵进行归一化处理
    adj_pos = normalize(adj_pos + sp.eye(adj_pos.shape[0]))
    adj_neg = normalize(adj_neg + sp.eye(adj_neg.shape[0]))
    adj = normalize(adj + sp.eye(adj.shape[0]))
    #print(adj_pos)
    adj = sparse_mx_to_torch_sparse_tensor(adj)
    adj_pos = sparse_mx_to_torch_sparse_tensor(adj_pos)
    #print(adj_pos)
    adj_neg = sparse_mx_to_torch_sparse_tensor(adj_neg)
    #这三个参数自己调试
    idx_train = range(2)  #训练集
    idx_val = range(2)  #评估集
    idx_test = range(500, 1500)  #测试集、

    idx_train = torch.LongTensor(idx_train)
    idx_val = torch.LongTensor(idx_val)
    idx_test = torch.LongTensor(idx_test)

    features = torch.FloatTensor(np.array(feature.todense()))
    features_pos = torch.FloatTensor(np.array(feature_pos.todense()))
    features_neg = torch.FloatTensor(np.array(feature_neg.todense()))
    #符号预测
    #使用one-hot encode 来获取标签,如果没有标签,默认fromnode,to_node来进行表示,
    #在matlab进行实验分类时使用find去除了背景类0,所以所有的类别从1开始,在matlab进行分类的时候没问题
    #但是Pytorch有个要求,在使用CrossEntropyLoss这个函数进行验证时label必须是以0开始的,所以会报错
    labels = [0, 1]
    #labels = encode_onehot(labels)
    #features = torch.FloatTensor(feature)

    #print(data.features)
    labels = torch.LongTensor(labels)
    #print(data.labels)
    #mask = torch.ByteTensor(data.train_mask)
    #g = data.graph
    # add self loop
    #g.remove_edges_from(g.selfloop_edges())
    g_pos = DGLGraph(graph_pos)
    g_neg = DGLGraph(graph_neg)
    #g.add_edges(g.nodes(), g.nodes())

    #train_dataloader = DataLoader(train_data, config.batch_size, shuffle=True, num_workers=config.num_workers)
    #shujuxunlian
    #g, features, labels, mask = load_data(train_data)
    optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)

    best_eval_dict = {'f1-micro': 0.0, 'f1-macro': 0.0}

    #'''
    model = GCN(nfeat=features_pos.shape[1],
                nhid=args.hidden,
                nclass=64,
                dropout=args.dropout)
    #'''
    #'''
    model2 = nGCN(nfeat=features_neg.shape[1],
                  nhid=args.hidden,
                  nclass=64,
                  dropout=args.dropout)
    #'''
    '''
    model3 = GAT(nfeat=features_pos.shape[1], 
                nhid=args.hidden, 
                nclass=64, 
                dropout=args.dropout, 
                nheads=args.nb_heads, 
                alpha=args.alpha)
    '''
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    optimizer1 = torch.optim.Adam(model2.parameters(),
                                  lr=args.lr,
                                  weight_decay=args.weight_decay)
    #optimizer2 = torch.optim.Adam(model3.parameters(),
    #lr=args.lr, weight_decay=args.weight_decay)
    task = Task(train_data.G, config)
    #features,features_pos,features_neg,adj,labels = Variable(features),Variable(features_pos),Variable(features_neg), Variable(adj), Variable(labels)

    #cuda config
    #if args.cuda:
    #model.cuda()
    #features = features.cuda()
    #adj = adj.cuda()
    #labels = labels.cuda()
    #idx_train = idx_train.cuda()
    #idx_val = idx_val.cuda()
    #idx_test = idx_test.cuda()
    #pygat
    #for epoch in range(config.epochs):
    #t = time.time()
    #model3.train()
    #optimizer2.zero_grad()
    #output = model3(features, adj)
    #loss_train = F.nll_loss(output[idx_train], labels[idx_train])
    #acc_train = accuracy(output[idx_train], labels[idx_train])
    #loss_train.backward()
    #optimizer.step()
    #loss_val = F.nll_loss(output[idx_val], labels[idx_val])
    #acc_val = accuracy(output[idx_val], labels[idx_val])
    #print('Epoch: {:04d}'.format(epoch+1),
    #'loss_train: {:.4f}'.format(loss_train.data.item()),
    #'acc_train: {:.4f}'.format(acc_train.data.item()),
    #'loss_val: {:.4f}'.format(loss_val.data.item()),
    #'acc_val: {:.4f}'.format(acc_val.data.item()),
    #'time: {:.4f}s'.format(time.time() - t))

    #pytorch
    for epoch in range(config.epochs):
        #for epoch in range(config.epochs):
        #for epoch in range(60):
        t = time.time()
        model.train()
        optimizer.zero_grad()
        optimizer1.zero_grad()
        #__import__('pdb').set_trace()
        output_pos = model(features_pos, adj_pos)
        #var.detach().numpy().savetxt("output_pos.txt",output_pos)
        # __import__('pdb').set_trace()

        #print(feature_neg)
        #print(adj_neg)
        #output_neg = model2(features_neg, adj)
        output_neg = model2(features_neg, adj)
        #__import__('pdb').set_trace()
        #var.detach().numpy().savetxt("output_neg.txt",output_neg)
        # print(output_pos)
        # print(output_neg)
        #output = model(features,adj_neg)
        loss_train = F.nll_loss(output_pos[idx_train], labels[idx_train])
        loss_neg_train = F.nll_loss(output_neg[idx_train], labels[idx_train])
        #acc_train = accuracy(output_pos[idx_train], labels[idx_train])
        loss_train.backward()
        loss_neg_train.backward()
        #print(loss_train)
        optimizer.step()
        optimizer1.step()

        #if not args.fastmode:
        # Evaluate validation set performance separately,
        # deactivates dropout during validation run.
        #model.eval()
        #output = model(features, adj)

        #对节点符号进行符号预测
        #如ou何对节点符号进行embedding:

        eval_dict = task.link_sign_pre_con(
            utils.cat_neighbor_new(train_data.G.g,
                                   output_pos,
                                   method='cat_neg'),
            utils.cat_neighbor_new(train_data.G.g,
                                   output_neg,
                                   method='cat_neg'),
            idx_train,
            idx_val,
            idx_test,
            method='concatenate')
        #print(np.all(model.get_embedding()))
        # task.link_sign_prediction_ktuple(model.get_embedding())
        #print(eval_dict)
        if config.snapshoot:
            #print('epoch {0}, loss: {1}, time: {2}'.format(epoch, total_loss, train_time), file=fout)
            print(
                "link_sign_prediction auc: {:.3f}, f1: {:.3f}, f1-micro: {:.3f}, f1-macro: {:.3f}"
                .format(eval_dict['auc'], eval_dict['f1'],
                        eval_dict['f1-micro'], eval_dict['f1-macro']))
            for key in best_eval_dict:
                if eval_dict[key] > best_eval_dict[key]:
                    for key in best_eval_dict:
                        best_eval_dict[key] = eval_dict[key]

                    #model.save(snap_root + '/{}.model'.format(config.model))
                    #model.save('/{}.model'.format(config.model))
                    break

        #loss_val = F.nll_loss(output[idx_val], labels[idx_val])
        #acc_val = accuracy(output[idx_val], labels[idx_val])
        #print('Epoch: {:04d}'.format(epoch+1),
        #      'loss_train: {:.4f}'.format(loss_train.item()),
        #      'acc_train: {:.4f}'.format(acc_train.item()),
        #      'loss_val: {:.4f}'.format(loss_val.item()),
        #      'acc_val: {:.4f}'.format(acc_val.item()),
        #      'time: {:.4f}s'.format(time.time() - t))

    #pygcn
    #'''
    '''
    #dgl
    dur = []
    for epoch in range(30):
        if epoch >=3:
            t0 = time.time()
        logits_pos = net(g_pos, features)
        logits_neg = net(g_neg, features)
        #print(logits_pos)
        #logits_neg = net(g_neg, features)
        logp = F.log_softmax(logits_pos, 1)
        logn = F.log_softmax(logits_neg, 1)
        print('logp:{}'.format(logp))
        print('logn:{}'.format(logn))
        #logp = F.log_softmax(logits, 1)
        #np.savetxt("logp.txt",logits_pos.get_embedding())

        loss_train = F.nll_loss(logits_pos[idx_train], labels[idx_train])
        acc_train = accuracy(logits_pos[idx_train], labels[idx_train])
        loss = F.nll_loss(logits_pos[idx_val], labels[idx_val])
        optimizer.zero_grad()
        #print(logits)
        loss.backward()
        optimizer.step()

        if epoch >=3:
            dur.append(time.time() - t0)

        eval_dict = task.link_sign_prediction_split(utils.cat_neighbor(
            train_data.G.g, logp, method='cat_pos'), method='concatenate')
        #print(np.all(model.get_embedding()))
        # task.link_sign_prediction_ktuple(model.get_embedding())
        #print(eval_dict)
        if config.snapshoot:
            #print('epoch {0}, loss: {1}, time: {2}'.format(epoch, total_loss, train_time), file=fout)
            #print("link_sign_prediction auc: {:.3f}, f1: {:.3f}, f1-micro: {:.3f}, f1-macro: {:.3f}".format(
                #eval_dict['auc'], eval_dict['f1'], eval_dict['f1-micro'], eval_dict['f1-macro']), file=fout)
            for key in best_eval_dict:
                if eval_dict[key] > best_eval_dict[key]:
                    for key in best_eval_dict:
                        best_eval_dict[key] = eval_dict[key]

                    #model.save(snap_root + '/{}.model'.format(config.model))
                    output.save('/{}.model'.format(config.model))
                    break
        loss_val = F.nll_loss(logits_pos[idx_val], labels[idx_val])
        acc_val = accuracy(logits_pos[idx_val], labels[idx_val])
        print('Epoch: {:04d}'.format(epoch+1),
              'loss_train: {:.4f}'.format(loss_train.item()),
              'acc_train: {:.4f}'.format(acc_train.item()),
              'loss_val: {:.4f}'.format(loss_val.item()),
              'acc_val: {:.4f}'.format(acc_val.item()))

        print("Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f}| acc {:.4f} ".format(
                epoch,
                loss.item(),
                np.mean(dur)))


    '''

    #train_dataloader = DataLoader(train_data, config.batch_size, shuffle=True, num_workers=config.num_workers)
    #print(train_dataloader)
    #pytorch traindataloader
    #创建DataLoader,batch_size设置为2,shuffle=False不打乱数据顺序,num_workers= 4使用4个子进程:
    #train_dataloader = DataLoader(train_data, config.batch_size, shuffle=True, num_workers=config.num_workers)
    #保存train_data文件
    '''
    if os.path.exists(config.filename + '_' + str(config.split_ratio)
                      + '_{}.pkl'.format(dataset_name)) and not config.save_dataset:
        #将数据编译成pkl格式
        train_data = pickle.load(open(config.filename + '_' + str(config.split_ratio) + '_{}.pkl'.format(dataset_name), 'rb'))
        print(config.filename + '_' + str(config.split_ratio))
        print(train_data) #<data.kTupleDataV1.kTupleDataV1 object at 0x7f96e2ddc5c0>
        print('exists {}.pkl, load it!'.format(dataset_name))
        print(train_data.G.g.number_of_nodes(), train_data.G.g.number_of_edges())
    else:
        train_data = kTupleDataV1(config.filename, split_ratio=config.split_ratio, neg_num=config.neg_num)
        #list数据写入
        with open('train_data_ktuple','w') as f:
            f.write(str(train_data.sign_tuple))
        pickle.dump(train_data, open(
            config.filename + '_' + str(config.split_ratio) + '_{}.pkl'.format(dataset_name), 'wb'))
        print('success save {}.pkl'.format(dataset_name))
    '''
    '''
    #3333
    config.N = train_data.G.g.number_of_nodes()
    model = getattr(models, config.model)(config)   # .eval()
    #'''
    if torch.cuda.is_available():
        model.cuda()
        config.CUDA = True
    '''
    train_dataloader = DataLoader(train_data, config.batch_size, shuffle=True, num_workers=config.num_workers)

    optimizer = torch.optim.Adadelta(model.parameters(), lr=1.0, rho=0.95)
    #print(optimizer)
    task = Task(train_data.G, config)

    best_eval_dict = {'f1-micro': 0.0, 'f1-macro': 0.0}
    
    '''
    '''
    #if config.snapshoot:
        #snapshoot_time = datetime.strftime(datetime.now(),  '%y-%m-%d_%H:%M:%S')
        #best_eval_dict = {'f1-micro': 0.0, 'f1-macro': 0.0}
        #fout, snap_root = utils.init_snapshoot(config.filename, snapshoot_time)
        #config.save(fout)
    '''
    '''
    #config.show()

    # model.train()
    #print(range(config.epochs))
    for epoch in range(config.epochs):
        total_loss = 0.0
        start_time = datetime.now()
        for idx, data in enumerate(train_dataloader):
            negs = data[-1]
            data = data[:-1]
            neg_r, neg_t = zip(*negs)
            #transpose只能操作2D矩阵的转置
            neg_r = torch.cat(neg_r).view(len(negs), -1).transpose(0, 1)
            neg_t = torch.cat(neg_t).view(len(negs), -1).transpose(0, 1)
            data.extend([neg_r, neg_t])
            data = map(lambda x: Variable(x), data)
            if config.CUDA:
                data = map(lambda x: Variable(x.cuda()), data)
            else:
                data = map(lambda x: Variable(x), data)
            #optimizer.zero_grad()意思是把梯度置零,也就是把loss关于weight的导数变成0.
            optimizer.zero_grad()
            loss = model(data)

            loss.backward()
            #optimizer.step()通常用在每个mini-batch之中,而scheduler.step()通常用在epoch里面,但是不绝对,可以根据具体的需求来做。
            #只有用了optimizer.step(),模型才会更新,而scheduler.step()是对lr进行调整
            #更新模型
            optimizer.step()
            #计算损失函数-》2
            if config.CUDA:
                total_loss += loss.cpu().data.numpy()
            else:
                total_loss += loss.data.numpy()
        train_time = (datetime.now() - start_time).seconds
        print('epoch {0}, loss: {1}, time: {2}'.format(epoch, total_loss, train_time))
        if (epoch > 30 or config.speedup):
            if config.speedup:
                if epoch % config.speedup != 0:
                    continue
            if epoch % 5 != 0:
                continue
        eval_dict = task.link_sign_prediction_split(utils.cat_neighbor(
            train_data.G.g, model.get_embedding(), method='cat_neg'), method='concatenate')
        print(np.all(model.get_embedding()))
        # task.link_sign_prediction_ktuple(model.get_embedding())
        print(eval_dict)
        if config.snapshoot:
            #print('epoch {0}, loss: {1}, time: {2}'.format(epoch, total_loss, train_time), file=fout)
            #print("link_sign_prediction auc: {:.3f}, f1: {:.3f}, f1-micro: {:.3f}, f1-macro: {:.3f}".format(
                #eval_dict['auc'], eval_dict['f1'], eval_dict['f1-micro'], eval_dict['f1-macro']), file=fout)
            for key in best_eval_dict:
                if eval_dict[key] > best_eval_dict[key]:
                    for key in best_eval_dict:
                        best_eval_dict[key] = eval_dict[key]

                    #model.save(snap_root + '/{}.model'.format(config.model))
                    model.save('/{}.model'.format(config.model))
                    break
    #333333
    '''
    '''
    if config.snapshoot:
        fout.write('best result:' + str(best_eval_dict) + '\n')
        fout.close()
        config.save(open(snap_root + '/{:.3f}.config'.format(best_eval_dict['f1-micro']), 'w'))
        pickle.dump(config, open(snap_root + '/config.pkl', 'wb'))
        if config.save_dataset:
            pickle.dump(train_data, open(snap_root + '/data.pkl', 'wb'))
    '''
    '''
Esempio n. 14
0
def alg_list():
    env = {'start': time(), 'break': False}
    with open(sys.argv[1], 'r') as file:
        n = load_line(file)[0]
        env['originalTasks'] = [Task(*load_line(file), i) for i in range(n)]
    tasks = sorted(env['originalTasks'], key=lambda x: x.r)

    schedules = [[], [], [], []]
    started = [[], [], [], []]
    timers = [0, 0, 0, 0]
    criterium = 0
    awaiting = []
    counter = 0
    it = 0

    while True:
        timer_id = argmin(timers)
        if it != n:
            for i in range(it, n):
                if tasks[i].r <= timers[timer_id]:
                    awaiting += [tasks[i]]
                else:
                    it = i
                    break
            else:
                it = n
        if not awaiting:
            timers[timer_id] = tasks[it].r
            for i in range(it, n):
                if tasks[i].r <= timers[timer_id]:
                    awaiting += [tasks[i]]
                else:
                    it = i
                    break
            else:
                it = n
        awaiting.sort(
            key=lambda x: (min(0, timers[timer_id] + x.p - x.d), -x.p))
        popped = awaiting.pop()
        schedules[timer_id] += [popped]
        started[timer_id] += [max(timers[timer_id], popped.r)]
        timers[timer_id] = max(timers[timer_id], popped.r) + popped.p
        criterium += max(0, timers[timer_id] - popped.d)
        counter += 1
        if counter == n:
            break
    env.update({
        'bestSchedules': deepcopy(schedules),
        'bestCriterium': criterium,
        'schedules': schedules,
        'started': started,
        'counter': counter,
        'timers': timers,
        'awaiting': [],
        'n': n
    })

    while True:
        for _ in range(env['n']):
            popped_id = argmax(j[-1] if j else -1 for j in env['started'])
            popped = env['schedules'][popped_id].pop()
            env['started'][popped_id].pop()
            env['awaiting'] += [popped]
            if env['started'][popped_id]:
                env['timers'][popped_id] = env['started'][popped_id][-1] + env[
                    'schedules'][popped_id][-1].p
            else:
                env['timers'][popped_id] = 0
            env['counter'] -= 1
            alg_adv(popped_id, env)
            if env['break']:
                env['break'] = False
                break
            if (time() - env['start']) * 100 > env['n']:
                save_results(env)
                return
        else:
            save_results(env)
            return
    def get(self, *args, **kwargs):
        time1 = time.time()

        self.set_header('Access-Control-Allow-Origin', '*')
        self.set_header('Content-type', 'application/json')

        # {
        #     “images”:”原始图片目录,缺省默认为当前目录的images目录”,
        #     “pixel”:外边框像素大小,缺省默认为50,
        # }

        try:
            _ver = self.get_argument("version", "all")
            self.src_dir = os.path.join(self.src_dir, _ver)
            self.temp_dir = os.path.join(self.temp_dir, _ver)

            if not os.path.exists(self.src_dir):
                task_count = 0
                err_code = 1
            else:
                if not os.path.exists(self.temp_dir):
                    os.makedirs(self.temp_dir)
                # else:
                #     # clean this directory
                #     tmp_dirs = os.listdir(self.temp_dir)
                #     for tmp_dir in tmp_dirs:
                #         if not tmp_dir.isdigit():
                #             continue
                #         tmp_path = os.path.join(self.temp_dir, tmp_dir)
                #         if os.path.isfile(tmp_path):
                #             os.remove(tmp_path)
                #         else:
                #             shutil.rmtree(tmp_path)

                err_code = 0
                for dir in range(max_packages):
                    csv_name = "ext-" + str(dir) + ".csv"
                    csv_file = os.path.join(self.src_dir, str(dir), csv_name)
                    if not os.path.exists(csv_file):
                        continue

                    self.clean_dir(_csv_file=csv_file)
                    self.prepare_crop(_csv_file=csv_file)

                task_count = global_queue.remote_cut_queue.qsize()
                task_count = task_count // 2
                # start to process
                task = Task(None, None, None, None, True)
                global_queue.remote_cut_queue.put(task)

                self.do_work()

                for i in range(max_packages):
                    _sub_dir = os.path.join(self.src_dir, str(i))
                    if not os.path.exists(_sub_dir):
                        continue

                    dest_dir = os.path.join(self.temp_dir, str(i))
                    if not os.path.exists(dest_dir):
                        os.makedirs(dest_dir)

                    origin_list = os.listdir(_sub_dir)

                    for _image in origin_list:
                        _image = _image.strip()
                        if not _image.startswith("label-"):
                            continue

                        if not _image.endswith("png"):
                            continue

                        # start with label-
                        anna_file = _image[6:]
                        origin_name = anna_file[:-3] + "jpg"
                        image_path = os.path.join(_sub_dir, _image)
                        origin_image = os.path.join(_sub_dir, origin_name)
                        if not os.path.exists(origin_image):
                            os.remove(image_path)
                            self.logger.error(
                                "package:{}, file missing:[{}]=>[{}]".format(
                                    str(i), origin_name, _image))
                            continue

                        result_path = os.path.join(self.temp_dir, str(i),
                                                   anna_file)

                        task = Task(str(i), image_path, result_path,
                                    origin_image, False)
                        global_queue.remote_process_queue.put(task)
                for i in range(20):
                    task = Task(None, None, None, None, True)
                    global_queue.remote_process_queue.put(task)

                all_processes = []
                for i in range(20):
                    process = multiprocessing.Process(target=self.transform)
                    process.daemon = True
                    all_processes.append(process)

                for process in all_processes:
                    process.start()
                    self.logger.info(str(process.pid) + ", start")

                for process in all_processes:
                    process.join()
                    self.logger.info(str(process.pid) + ", join")

                # 拷贝
                if _ver != "all":
                    cur_day = time.strftime("lane-aug-%Y%m%d",
                                            time.localtime())
                else:
                    cur_day = time.strftime("lane-all-%Y%m%d",
                                            time.localtime())
                copy_dir = self.temp_dir
                dir_list = os.listdir(copy_dir)

                if self.dest_scp_ip != global_variables.model_host.value:
                    files = self.dest_sftp.listdir(path=self.dest_dir)
                    self.dest_dir = os.path.join(self.dest_dir, cur_day)
                    # if cur_day in files:
                    #     self.rm(self.dest_dir)
                    if cur_day not in files:
                        self.dest_sftp.mkdir(self.dest_dir)

                for _dir in dir_list:
                    old_src = os.path.join(copy_dir, _dir)
                    files = self.dest_sftp.listdir(path=self.dest_dir)
                    if _dir in files:
                        self.rm(os.path.join(self.dest_dir, _dir))
                    self.dest_scp.put(old_src, self.dest_dir, recursive=True)

            time2 = time.time()
            result_obj = {
                "count": str(task_count),
                "time": str(time2 - time1) + " s"
            }
            resp = ServerResponse(err_code=err_code,
                                  err_info=None,
                                  result=result_obj)
            resp_str = resp.generate_response()
            self.logger.info(resp_str)

            self.write(resp_str)
        except Exception as err:
            err_info = repr(err)
            json_res = {"code": "99", "msg": str(err_info)}
            self.logger.error(json.dumps(json_res))
            self.write(json.dumps(json_res))

        self.finish()
Esempio n. 16
0
    def prepare_task(self, src_dir, dest_dir, start_package, cnt_per_package):
        # 生成标注任务包
        src_len = len(src_dir)
        get_file(src_dir, self.file_list, src_len)

        total_count = len(self.file_list)
        file_index = 0
        total_index = 0
        package_index = start_package
        package_list = {}
        for _file_path in self.file_list:
            total_index += 1

            _file_id = os.path.basename(_file_path)

            package_dir = os.path.join(dest_dir, str(package_index))
            if not os.path.exists(package_dir):
                os.makedirs(package_dir)

            image_file = _file_id

            _file_name = _file_id.split(".")
            _file_name = _file_name[0]
            label_file = "label-" + _file_name + ".png"
            package_list[image_file] = label_file

            src_path = os.path.join(src_dir, _file_path)

            _image = cv2.imread(src_path)
            if _image is None:
                print(src_path)
                continue

            dest_path = os.path.join(package_dir, _file_id)
            dest_label = os.path.join(package_dir, label_file)

            _task = Task(
                package_index=str(package_index),
                src_path=src_path,
                dest_path=dest_path,
                dest_label=dest_label
            )
            global_queue.extend_queue.put(_task)

            file_index += 1
            if file_index == cnt_per_package:
                dest_file = str(package_index) + ".csv"
                dest_file_path = os.path.join(dest_dir, str(package_index), dest_file)

                with open(dest_file_path, "w") as f:
                    for _image, _label in package_list.items():
                        _str = "{},{}\n".format(_image, _label)
                        f.write(_str)

                package_list = {}
                file_index = 0
                package_index += 1
            elif total_index == total_count:
                dest_file = str(package_index) + ".csv"
                dest_file_path = os.path.join(dest_dir, str(package_index), dest_file)

                with open(dest_file_path, "w") as f:
                    for _image, _label in package_list.items():
                        _str = "{},{}\n".format(_image, _label)
                        f.write(_str)
        return
Esempio n. 17
0
    def get(self, *args, **kwargs):
        time_start = time.time()

        self.set_header('Access-Control-Allow-Origin', '*')
        self.set_header('Content-type', 'application/json')

        query_path = self.request.path
        if query_path != "/task":
            print(query_path)

        # {
        #     “images”:”原始图片目录,缺省默认为当前目录的images目录”,
        #     “step”:单个任务包的数量,缺省默认为20,
        #     “pixel”:外边框像素大小,缺省默认为50,
        #     “start”:任务包编号开始序号,缺省默认自动记录,
        # }

        try:
            step = self.get_argument("step", "20")
            step = int(step)
            self.step = step

            if not os.path.exists(self.src_dir):
                task_count = 0
                err_code = 1
            else:
                err_code = 0
                dir_list = os.listdir(self.dest_dir)
                cur_max = 0
                for _dir in dir_list:
                    if _dir.isdigit():
                        if int(_dir) > cur_max:
                            cur_max = int(_dir)

                if self.start <= cur_max:
                    self.start = cur_max + 1

                self.prepare_task(
                    src_dir=self.src_dir,
                    dest_dir=self.dest_dir,
                    start_package=self.start,
                    cnt_per_package=self.step
                )

                task_count = global_queue.extend_queue.qsize()

                # start to process
                _task = Task(
                    package_index=None,
                    src_path=None,
                    dest_path=None,
                    dest_label=None,
                    exit_flag=True
                )
                global_queue.extend_queue.put(_task)

                process = multiprocessing.Process(target=self.do)
                process.daemon = True
                process.start()
                process.join()

                # 开始增加边框
                for dir in range(self.start, max_packages):
                    time1 = time.time()

                    csv_name = str(dir) + ".csv"
                    csv_file = os.path.join(self.dest_dir, str(dir), csv_name)
                    if not os.path.exists(csv_file):
                        continue

                    self.prepare_extend(_csv_file=csv_file)

                    task = Task(None, None, None, None, True)
                    global_queue.divide_queue.put(task)

                    process = multiprocessing.Process(target=self.do_work)
                    process.daemon = True
                    process.start()
                    process.join()

                    time2 = time.time()

                    self.logger.info("process[{}] in {} s".format(dir, time2 - time1))

            time_end = time.time()
            result_obj = {
                "count": str(task_count),
                "time": str(time_end - time_start) + " s"
            }
            resp = ServerResponse(err_code=err_code, err_info=None, result=result_obj)
            resp_str = resp.generate_response()
            self.logger.info(resp_str)

            self.write(resp_str)
        except Exception as err:
            err_info = err.args[0]
            json_res = {"code": "99", "msg": str(err_info)}
            self.write(json.dumps(json_res))
            self.logger.error(json.dumps(json_res))
        except:
            self.write('{"code": "99", "msg": "unknown exception"}')
            self.logger.error('{"code": "99", "msg": "unknown exception"}')

        self.finish()
    def train(self):
        print("Training the model...")
        self.entity_idxs = {d.entities[i]: i for i in range(len(d.entities))}
        self.relation_idxs = {
            d.relations[i]: i
            for i in range(len(d.relations))
        }
        self.idx2entity = {v: k for k, v in self.entity_idxs.items()}
        self.idx2relation = {v: k for k, v in self.relation_idxs.items()}

        if self.add_constraint == True:
            # constrain types
            Output_mask = torch.ones([
                len(self.idx2relation.keys()),
                len(self.idx2entity.keys()),
                len(self.idx2entity.keys())
            ],
                                     dtype=torch.float)
            if self.cuda:
                Output_mask = Output_mask.cuda()

            # gather objects, properties, and affordances
            task_names = ['situated-OP', 'situated-OA', 'situated-AP']
            task_mapping = defaultdict()

            for name in task_names:
                task_mapping[name] = Task(TASK_REV_MEDIUMHAND[name])

            objects, properties, affordances = get_entity_sets(task_mapping)

            for k in range(len(self.idx2relation.keys())):
                relation = self.idx2relation[k]
                for i in range(len(self.idx2entity.keys())):
                    for j in range(len(self.idx2entity.keys())):
                        e1 = self.idx2entity[i]
                        e2 = self.idx2entity[j]
                        if 'situated-OP' in relation:
                            if ((e1 in objects and e2 in properties)
                                    or (e2 in objects and e1 in properties)):
                                Output_mask[k, i, j] = 0.0
                        if 'situated-OA' in relation:
                            if ((e1 in objects and e2 in affordances)
                                    or (e2 in objects and e1 in affordances)):
                                Output_mask[k, i, j] = 0.0
                        if 'situated-AP' in relation:
                            if ((e1 in properties and e2 in affordances) or
                                (e2 in properties and e1 in affordances)):
                                Output_mask[k, i, j] = 0.0

        train_data_idxs = self.get_data_idxs(d.train_data)
        print("Number of training data points: %d" % len(train_data_idxs))

        if self.add_dropout_bn == True:
            model = TuckER(d, self.ent_vec_dim, self.rel_vec_dim,
                           **self.kwargs)
        else:
            model = TuckERNoDropoutBN(d, self.ent_vec_dim, self.rel_vec_dim,
                                      self.cuda)
        if self.cuda:
            model.cuda()
        model.init()
        opt = torch.optim.Adam(model.parameters(), lr=self.learning_rate)
        if self.decay_rate:
            scheduler = ExponentialLR(opt, self.decay_rate)

        er_vocab = self.get_er_vocab(train_data_idxs)
        er_vocab_pairs = list(er_vocab.keys())

        print("Starting training...")
        start_time = timeit.default_timer()
        for it in range(1, self.num_iterations + 1):
            model.train()
            losses = []
            np.random.shuffle(er_vocab_pairs)
            for j in range(0, len(er_vocab_pairs), self.batch_size):
                data_batch, targets = self.get_batch(er_vocab, er_vocab_pairs,
                                                     j)
                opt.zero_grad()
                e1_idx = torch.tensor(data_batch[:, 0])
                if self.do_link_prediction == True:
                    r_idx = torch.tensor(data_batch[:, 1])
                else:
                    e2_idx = torch.tensor(data_batch[:, 1])
                if self.cuda:
                    e1_idx = e1_idx.cuda()
                    if self.do_link_prediction == True:
                        r_idx = r_idx.cuda()
                    else:
                        e2_idx = e2_idx.cuda()
                if self.do_link_prediction == True:
                    outputs = model.forward_lp(e1_idx, r_idx)
                else:
                    outputs = model.forward(e1_idx, e2_idx)

                if self.add_dropout_bn == True:
                    predictions = outputs
                else:
                    predictions = outputs[0]
                    W = outputs[1]
                    E = outputs[2]
                    R = outputs[3]
                if self.label_smoothing:
                    targets = ((1.0 - self.label_smoothing) *
                               targets) + (1.0 / targets.size(1))

                loss = model.loss(predictions, targets)

                if self.add_constraint == True:
                    reg = self.reg

                    d1_want = R.size(0)
                    d2_want = E.size(0)
                    d3_want = d2_want
                    d1_in = R.size(1)
                    d2_in = E.size(1)
                    d3_in = d2_in

                    W_mat = torch.mm(E, W.view(d3_in, -1))
                    W_mat = W_mat.view(d1_in, d2_in, d3_want)

                    W_mat = torch.mm(E, W_mat.view(d2_in, -1))
                    W_mat = W_mat.view(d1_in, d2_want, d3_want)

                    W_mat = torch.mm(R, W_mat.view(d1_in, -1))
                    Output = W_mat.view(d1_want, d2_want, d3_want)

                    type_constraint = ((Output * Output_mask)**2).mean()
                    loss += (reg * type_constraint)
                loss.backward()
                opt.step()
                losses.append(loss.item())
            if self.decay_rate:
                scheduler.step()

            print(it)
        stop_time = timeit.default_timer()
        print('training time: {}'.format(
            (stop_time - start_time) / self.num_iterations))

        if self.do_link_prediction == True:
            model.eval()
            with torch.no_grad():
                print("Test:")
                self.evaluate_link_prediction(model, d.test_data)

        if not os.path.exists(self.saved_model_path):
            os.makedirs(self.saved_model_path)
        torch.save(model, os.path.join(self.saved_model_path, 'model.pt'))
        joblib.dump([self.entity_idxs, self.relation_idxs],
                    os.path.join(self.saved_model_path, 'dic.pkl'))
Esempio n. 19
0
    def prepare_crop(self, _csv_file):
        root_dir = os.path.dirname(_csv_file)
        csv_name = os.path.basename(_csv_file)

        name_list = csv_name.split(".")
        csv_name = name_list[0]
        if csv_name.startswith("ext-"):
            csv_name = csv_name[4:]
        package_index = csv_name

        list_file = csv_name + ".csv"
        list_path = os.path.join(root_dir, list_file)

        temp_dir = os.path.join(self.temp_dir, package_index)
        if not os.path.exists(temp_dir):
            os.mkdir(temp_dir)

        dest_list_path = os.path.join(temp_dir, list_file)
        if os.path.exists(list_path):
            shutil.copy(list_path, dest_list_path)
        else:
            list_file = csv_name + ".txt"
            list_path = os.path.join(root_dir, list_file)
            dest_list_path = os.path.join(temp_dir, list_file)
            if os.path.exists(list_path):
                shutil.copy(list_path, dest_list_path)

        with open(_csv_file, "r") as f:
            line_str = f.readline()

            while line_str:
                image, label = line_str.split(",")
                image = image.strip()
                label = label.strip()

                if not image.startswith("ext-") or not label.startswith("ext-"):
                    line_str = f.readline()
                    continue

                origin_image = os.path.join(root_dir, image[4:])
                if not os.path.exists(origin_image):
                    line_str = f.readline()
                    self.logger.error("package:{}, file missing:[{}]=>[{}]".format(package_index, image, label))
                    continue
                else:
                    dest_origin_image = os.path.join(temp_dir, image[4:])
                    shutil.copy(origin_image, dest_origin_image)

                if not os.path.exists(os.path.join(root_dir, label)):
                    line_str = f.readline()
                    self.logger.error("package:{}, file missing:[{}]=>[{}]".format(package_index, image, label))
                    continue

                _src = os.path.join(root_dir, label)
                _dest_id2 = label[4:]
                _dest_label = os.path.join(temp_dir, _dest_id2)
                _dest_label = _dest_label.strip()

                task = Task(package_index, _src, _dest_label, None)
                self.queue.put(task)

                line_str = f.readline()
                   map_location=torch.device('cpu'))

if args.cuda:
    model = model.cuda()
entity_idxs = dic[0]
relation_idxs = dic[1]
entity_reverse_idxs = {i: name for name, i in entity_idxs.items()}
relation_reverse_idxs = {i: name for name, i in relation_idxs.items()}
model.eval()

# gather objects, properties, and affordances
task_names = ['situated-OP', 'situated-OA', 'situated-AP']
task_mapping = defaultdict()

for name in task_names:
    task_mapping[name] = Task(TASK_REV_MEDIUMHAND[name])

objects, properties, affordances = get_entity_sets(task_mapping)


def e12_type(e1, e2):
    if (e1 in objects and e2 in properties):
        return 'situated-OP'
    elif (e1 in objects and e2 in affordances):
        return 'situated-OA'
    elif (e1 in affordances and e2 in properties):
        return 'situated-AP'
    else:
        return 'nothing'

Esempio n. 21
0
                # style = style_name.split(" ")[1]
                style = level + module + list_type + language
            else:
                level = '1'
                list_type = ''
                [upParaId, no] = getUpId_no(paraId, 0)
                style = level + module + list_type + language
            para = Paragraph_(paraId, upParaId, style, no, content).__dict__
            paras.append(para)
            i += 1

    return paras,articleTittle,stuName,teacherName,majorName,gradTime


if __name__ == '__main__':
    paragraphs,articleTittle,stuName,teacherName,majorName,gradTime = read_word("demo3.docx")

    task = Task("e6c26921-3ec6-48b6-bb73-efd48cef969f",
                "428d81a2-30b7-4960-9535-c1c0e74e9677",
                "366775a1-f341-4e0b-ae45-382199d6c978",
                paragraphs,
                articleTittle,stuName,teacherName,majorName,gradTime)

    f = open("./test.json", 'w')
    f.write(json.dumps(task, default=task_2_json,
                       ensure_ascii=False,
                       sort_keys=True,
                       indent=4,
                       separators=(',', ': ')))
    f.close()
    model = DistMult(ent_tot=len(ent_list), rel_tot=len(rel_list), dim=200)
elif args.model == 'complex':
    model = ComplEx(ent_tot=len(ent_list), rel_tot=len(rel_list), dim=200)
elif args.model == 'simple':
    model = SimplE(ent_tot=len(ent_list), rel_tot=len(rel_list), dim=200)

model = model.cpu()

start_time = timeit.default_timer()

task_names = ['situated-OP', 'situated-OA', 'situated-AP']

for task_name in task_names:
    print('{} task'.format(task_name))

    task = Task(TASK_REV_MEDIUMHAND[task_name])
    samples = task.get_test_examples()

    y_hat = []
    y = []
    names = []
    for sample in samples:
        names.append(sample.name)
        head, tail = sample.name.split('/')
        if task_name == 'situated-OP':
            res = openke_predict(model, np.array(ent_list[head + '-o']),
                                 np.array(ent_list[tail + '-p']),
                                 np.array([[0], [1], [2]]), 0)
        elif task_name == 'situated-OA':
            res = openke_predict(model, np.array(ent_list[head + '-o']),
                                 np.array(ent_list[tail + '-a']),
Esempio n. 23
0
    def get(self, *args, **kwargs):
        time1 = time.time()

        self.set_header('Access-Control-Allow-Origin', '*')
        self.set_header('Content-type', 'application/json')

        # {
        #     “images”:”原始图片目录,缺省默认为当前目录的images目录”,
        #     “pixel”:外边框像素大小,缺省默认为50,
        # }
        try:
            _ver = self.get_argument("version", "all")
            self.src_dir = os.path.join(self.src_dir, _ver)

            if not os.path.exists(self.src_dir):
                task_count = 0
                err_code = 1
                check_result = "dir[{}] is not exist".format(self.src_dir)
            else:
                if not os.path.exists(self.temp_dir):
                    os.mkdir(self.temp_dir)
                else:
                    # clean this directory
                    tmp_dirs = os.listdir(self.temp_dir)
                    for tmp_dir in tmp_dirs:
                        if not tmp_dir.isdigit():
                            continue
                        tmp_path = os.path.join(self.temp_dir, tmp_dir)
                        if os.path.isfile(tmp_path):
                            os.remove(tmp_path)
                        else:
                            shutil.rmtree(tmp_path)

                err_code = 0
                for dir in range(max_packages):
                    csv_name = "ext-" + str(dir) + ".csv"
                    csv_file = os.path.join(self.src_dir, str(dir), csv_name)
                    if not os.path.exists(csv_file):
                        continue

                    self.clean_dir(_csv_file=csv_file)
                    self.prepare_crop(_csv_file=csv_file)

                task_count = self.queue.qsize()
                # start to process
                task = Task(None, None, None, None, True)
                self.queue.put(task)

                process = multiprocessing.Process(target=self.do_work)
                process.start()
                self.logger.info(str(process.pid) + ", start")
                process.join()
                self.logger.info(str(process.pid) + ", join")

                for i in range(max_packages):
                    _sub_dir = os.path.join(self.temp_dir, str(i))
                    if not os.path.exists(_sub_dir):
                        continue

                    dest_dir = os.path.join(self.temp_dir, str(i))
                    if not os.path.exists(dest_dir):
                        os.mkdir(dest_dir)

                    origin_list = os.listdir(_sub_dir)

                    for _image in origin_list:
                        _image = _image.strip()
                        if not _image.startswith("label-"):
                            continue

                        name_list = _image.split('.')
                        if len(name_list) < 2:
                            continue

                        ext_name = name_list[1]
                        if ext_name != 'png' and ext_name != 'jpg':
                            continue

                        # start with label-
                        label_file = name_list[0]
                        if label_file.startswith("label-"):
                            label_file = label_file[6:]
                        anna_file = label_file + ".png"

                        origin_name = label_file + ".jpg"
                        image_path = os.path.join(_sub_dir, _image)
                        origin_image = os.path.join(_sub_dir, origin_name)
                        if not os.path.exists(origin_image):
                            os.remove(image_path)
                            self.logger.error("package:{}, file missing:[{}]=>[{}]".format(str(i), origin_name, _image))
                            continue

                        result_path = os.path.join(self.temp_dir, str(i), anna_file)

                        task = Task(str(i), image_path, result_path, None, False)
                        self.task_queue.put(task)
                for i in range(20):
                    task = Task(None, None, None, None, True)
                    self.task_queue.put(task)

                all_processes = []
                for i in range(20):
                    process = multiprocessing.Process(target=self.transform)
                    all_processes.append(process)

                for process in all_processes:
                    process.start()
                    self.logger.info(str(process.pid) + ", start")

                for process in all_processes:
                    process.join()
                    self.logger.info(str(process.pid) + ", join")

                # # 拷贝
                # cur_day = time.strftime("lane-%Y%m%d", time.localtime())
                # dir_list = os.listdir(self.temp_dir)
                #
                # # 创建远程目录
                # if self.dest_scp_ip != host_ip:
                #     files = self.dest_sftp.listdir(path=self.dest_dir)
                #     self.dest_dir = os.path.join(self.dest_dir, cur_day)
                #     if cur_day in files:
                #         self.rm(self.dest_dir)
                #     self.dest_sftp.mkdir(self.dest_dir)

                # 校验
                check_model = CheckData(gpu_id=0)
                check_result = check_model.run()

            time2 = time.time()
            result_obj = {
                "count": str(task_count),
                "time": str(time2-time1)+" s",
                "doubt": str(len(check_result)),
                "message": check_result
            }
            resp = ServerResponse(err_code=err_code, err_info=None, result=result_obj)
            resp_str = resp.generate_response()
            self.logger.info(resp_str)

            self.write(resp_str)
        except Exception as err:
            err_info = repr(err)
            json_res = {"code": "99", "msg": str(err_info)}
            self.logger.error(json.dumps(json_res))
            self.write(json.dumps(json_res))
        except:
            self.write('{"code": "99", "msg": "unknown exception"}')
            self.logger.error('{"code": "99", "msg": "unknown exception"}')

        self.finish()
Esempio n. 24
0
        utils.printf(f'\tValid PPL: {ppl: 6.2f}\n')

    def auto_test(self):
        test_input = ['what is your name ?', 'how old are you ?']
        for input in test_input:
            bs_ret = self.beam_search.decode(input.split())
            print(input)
            for sentence in bs_ret[:5]:
                print(' '.join(self.task.dec_vocab.ids2word(sentence['ids'])),
                      sentence['prob'])
            print('==')
        print('\n')


if __name__ == '__main__':
    task = Task(config)

    if len(sys.argv) > 2:
        # load checkpoint
        task.load(mode='train', ckpt_path=sys.argv[1], model_name=sys.argv[2])
    else:
        # 重新训练
        if config['train']['silence']:
            # backgrounder
            sys.stdout = open('train.log', 'w')
        task.load(mode='train')

    trainer = Trainer(task)
    trainer.train()
    # task.save('./ckpt')
Esempio n. 25
0
data2 = [(0, [(1, 5)]), (1, [(1, 7)]), (2, [(1, 6)])]

datas = []
for _ in range(5):
    job = (random.randint(1, 5), [])

    for _ in range(3):
        job[1].append((random.randint(1, 3), random.randint(1, 5)))
    datas.append(job)

data = datas

if __name__ == '__main__':
    # Task(machine_id, duration, order)
    jobs = [Job(id=i + 1, arrival_time=row[0]) for i, row in enumerate(data)]

    for i, row in enumerate(data):
        for col in row[1]:
            jobs[i].add_task(Task(machine_id=col[0], duration=col[1]))

    def simulate(jobs, rule):
        simulator = JobShopSimulator(env=simpy.Environment(),
                                     jobs=jobs,
                                     rule=rule)
        simulator.run(until=50)
        simulator.plot()

    simulate(jobs, 'FIFO')
    simulate(jobs, 'LIFO')
    simulate(jobs, 'SPT')
Esempio n. 26
0
def eval_tc(saved_model_path):
    batch_size = 128
    cuda = True if torch.cuda.is_available() else False
    add_constraint = True
    add_dropout_bn = False

    dic = joblib.load(os.path.join(saved_model_path, 'dic.pkl'))
    model = torch.load(os.path.join(saved_model_path, 'model.pt'),
                       map_location=torch.device('cpu'))

    if cuda:
        model = model.cuda()
    entity_idxs = dic[0]
    relation_idxs = dic[1]
    entity_reverse_idxs = {i: name for name, i in entity_idxs.items()}
    relation_reverse_idxs = {i: name for name, i in relation_idxs.items()}
    model.eval()

    # gather objects, properties, and affordances
    task_names = ['situated-OP', 'situated-OA', 'situated-AP']
    task_mapping = defaultdict()

    for name in task_names:
        task_mapping[name] = Task(TASK_REV_MEDIUMHAND[name])

    objects, properties, affordances = get_entity_sets(task_mapping)

    def e12_type(e1, e2):
        if (e1 in objects and e2 in properties):
            return 'situated-OP'
        elif (e1 in objects and e2 in affordances):
            return 'situated-OA'
        elif (e1 in affordances and e2 in properties):
            return 'situated-AP'
        else:
            return 'nothing'

    res = []
    for task_name, task in task_mapping.items():
        print('{} task'.format(task_name))

        y_hat = []
        y = []
        names = []
        input1 = []
        input2 = []
        test_samples = task.get_test_examples()

        for th, sample in enumerate(test_samples):
            names.append(sample.name)

            e12 = sample.name.split('/')
            input1.append(entity_idxs[e12[0]])
            input2.append(entity_idxs[e12[1]])

            if (th + 1) % batch_size == 0 or th + batch_size >= len(
                    test_samples):
                input1 = torch.tensor(input1)
                input2 = torch.tensor(input2)
                if cuda:
                    input1 = input1.cuda()
                    input2 = input2.cuda()

                outputs = model.forward(input1, input2)
                if add_dropout_bn == True:
                    predictions = outputs
                else:
                    predictions = outputs[0]
                    W = outputs[1]
                    E = outputs[2]
                    R = outputs[3]

                if cuda:
                    predictions = predictions.cpu()

                for k, e12 in enumerate(zip(input1, input2)):
                    e1 = e12[0].cpu().item()
                    e2 = e12[1].cpu().item()
                    r = e12_type(entity_reverse_idxs[e1],
                                 entity_reverse_idxs[e2])
                    if add_constraint == True:
                        if r is not 'nothing':
                            v1 = predictions[k, relation_idxs[r]]
                            v2 = predictions[k, relation_idxs['NOT-' + r]]
                            if v1 > v2:
                                y_hat.append(1)
                            else:
                                y_hat.append(0)
                        else:
                            y_hat.append(0)
                    else:
                        if r is not 'nothing':
                            v = predictions[k, relation_idxs[r]].item()
                            if v >= 0.5:
                                y_hat.append(1)
                            else:
                                y_hat.append(0)
                        else:
                            y_hat.append(0)

                input1 = []
                input2 = []
            y.append(int(sample.label))
        y = np.array(y)
        y_hat = np.array(y_hat)
        acc, micro_f1, macro_f11, macro_f12 = metrics.report_more4cv(
            y_hat, y, names, TASK_LABELS[TASK_REV_MEDIUMHAND[task_name]])
        res.append((acc, micro_f1, macro_f11, macro_f12))

    return res