Ejemplo n.º 1
0
def generateParams(filters: list):
    paramsStr = ''

    # 转换成字符组
    strList = arrays.mapcat(filters, lambda item: item['key'])

    # 获取相似indexsGroup
    indexs = DataUtils.enumSimilarityGroup(strList)

    # 遍历group,查看是不是时间字段,如果是则将indexGroup存下来 [[1,2],[4,5]]
    tempList = []
    for index in range(len(indexs)):
        key = strList[indexs[index][0]].lower()
        if 'time' in key or 'date' in key:
            tempList.append(indexs[index])

    tarList = DataUtils.convertTimeGroup(filters, tempList)

    for item in tarList:
        # 输入类型
        if item['type'] == 'string':
            paramsStr = paramsStr + generateInput(item)
        # 时间类型
        elif item['type'] in ['integer', 'long', 'number'] and DataUtils.isLikeDate(item['key']):
            paramsStr = paramsStr + generateDate(item)
        # select 类型
        elif DataUtils.isSelectType(item):
            paramsStr = paramsStr + generateSelect(item)
        # 未知类型,均按照input进行渲染
        else:
            paramsStr = paramsStr + generateInput(item)

    return paramsStr
Ejemplo n.º 2
0
    def __init__(self, args):
        self.args = args
        with open(args.config, 'r') as stream:
            config = yaml.load(stream, Loader=yaml.SafeLoader)
            self.config = config[self.args.task]
        if args.model_type == 'transformer':
            self.data_utils = DataUtils(self.config, args.train, args.task)
        elif args.model_type == 'bert':
            assert args.task == 'seq2seq'
            self.data_utils = bert_utils(self.config, args.train, args.task)
        if args.train and args.save_checkpoints:
            self.model_dir = make_save_dir(
                os.path.join(args.model_dir, args.task, args.exp_name))
        self._disable_comet = args.disable_comet
        self._model_type = args.model_type
        self._save_checkpoints = args.save_checkpoints

        ###### loading .... ######
        print("====================")
        print("start to build model")
        print('====================')
        vocab_size = self.data_utils.vocab_size
        print("Vocab Size: %d" % (vocab_size))
        self.model = self.make_model(src_vocab=vocab_size,
                                     tgt_vocab=vocab_size,
                                     config=self.config['model'])
Ejemplo n.º 3
0
 def on_pushButton_2_clicked(self):
     # 存储路径
     savePath = self.lineEdit_2.text()
     # 生成TableContent的TSX
     fieldsIndex = self.tableWidget_3.selectedIndexes()
     fieldsIndex = DataUtils.getSelectIndexs(fieldsIndex, 2)
     fieldsData = DataUtils.getSelectFilter(fieldsIndex, self.fields)
     fieldsData = DataUtils.convertSelectFields(fieldsData)
     CreaterTools.generateContent(fieldsData, savePath)
     self.label_7.setText(u'状态:生成Content成功!')
     self.label_7.repaint()
Ejemplo n.º 4
0
 def on_pushButton_3_clicked(self):
     # 存储路径
     savePath = self.lineEdit_2.text()
     # 生成manage 部分的TSX
     paramsItems = self.tableWidget_2.selectedIndexes()
     paramsIndexs = DataUtils.getSelectIndexs(paramsItems)
     filteredData = DataUtils.getSelectFilter(paramsIndexs, self.params)
     filteredData = DataUtils.convertSelectFilter(filteredData)
     CreaterTools.generateManage(savePath, filteredData, self.currentItem)
     self.label_7.setText('状态:生成Manage成功!')
     self.label_7.repaint()
Ejemplo n.º 5
0
    def on_pushButton_clicked(self):
        # 存储路径
        savePath = self.lineEdit_2.text()

        # 生成查询参数部分的Form表单TSX
        paramsItems = self.tableWidget_2.selectedIndexes()
        paramsIndexs = DataUtils.getSelectIndexs(paramsItems)
        filteredData = DataUtils.getSelectFilter(paramsIndexs, self.params)
        filteredData = DataUtils.convertSelectFilter(filteredData)
        CreaterTools.generateFilterForm(filteredData, savePath)
        self.label_7.setText(u'状态:生成FilterForm成功!')
        self.label_7.repaint()
Ejemplo n.º 6
0
def getInterfaceCount(tags):
    rowIndex = 0
    for row in range(len(tags)):
        for childIndex in range(len(tags[row]['child'])):
            rowData = tags[row]['child'][childIndex]
            methodTypes = DataUtils.getValidMethod(rowData)
            rowIndex += len(methodTypes)
    return rowIndex
def train(args):
    graph_file = './data/%s/%s.npz' % (args.name, args.name)
    graph_file = graph_file.replace('.npz', '_train.npz')
    data_loader = DataUtils(graph_file)

    n = args.n_trials
    res_hom, res_het = [0] * n, [0] * n
    tm = [0] * n
    for i in range(n):
        tm[i] = TrialManager(args=copy.deepcopy(args),
                             ind=i,
                             data_loader=data_loader)
    import tensorflow
    tf = tensorflow.compat.v1

    sess = tf.Session()
    tf.global_variables_initializer().run(session=sess)
    losses = []

    with sess.as_default():
        for b in range(1, args.num_batches + 1):
            fd = {}
            to_comp = []
            for to_comp1, fd1 in map(train_batch_command, tm):
                to_comp.extend(to_comp1)
                for k, v in fd1.items():
                    fd[k] = v
            res = sess.run(to_comp, feed_dict=fd)
            losses.append(res[0::2])
            if (b % 25) == 0:
                losses = np.array(losses)
                for i in range(n):
                    res, val_hom_auc = tm[i].test()
                    best_test_hom_auc, best_test_het_auc = res['hom'], res[
                        'het']
                    res_hom[i], res_het[
                        i] = best_test_hom_auc * 100, best_test_het_auc * 100
                    print(
                        f'batch:{b:8} - '
                        f'time:{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())} - '
                        f'loss:{np.mean(losses[:, i]):.4f} - '
                        f'val(hom):{val_hom_auc*100:.4f} - '
                        f'test(by best val):[hom:{best_test_hom_auc:.4f},het:{best_test_het_auc:.4f}]'
                    )
                losses = []
        print('finished')

    def stats(x):
        return f'{np.mean(x):.2f}, {np.std(x) / np.sqrt(len(x)):.2f}'

    print('hom', stats(res_hom), [f'{xx:.2f}' for xx in res_hom])
    print('het', stats(res_het), [f'{xx:.2f}' for xx in res_het])
Ejemplo n.º 8
0
 def _distance_edge_server_base_station(self, edge_server: EdgeServer,
                                        base_station: BaseStation) -> float:
     """
     Calculate distance between given edge server and base station
     
     :param edge_server: 
     :param base_station: 
     :return: distance(km)
     """
     if edge_server.base_station_id:
         return self.distances[edge_server.base_station_id][base_station.id]
     return DataUtils.calc_distance(edge_server.latitude,
                                    edge_server.longitude,
                                    base_station.latitude,
                                    base_station.longitude)
Ejemplo n.º 9
0
def caculateImportance(source_id):
    # 连接数据库,获取到sourceid相关信息
    user = "******"
    password = '******'
    write_db = 'pp_conf'
    host = 'localhost'
    # source_id = getSourceId()
    conn_conf = DataUtils.getConfigDBConn()
    sql = "select name, url, sqld, user, pwd from isf_data_source_conf where uuid='" + str(
        source_id) + "'"
    df_conf = pd.read_sql(sql, con=conn_conf)
    df_conf.head()

    name = df_conf.name[0]
    url = df_conf.url[0]
    sql = df_conf.sqld[0]
    print("name is: ", name, "url is: ", url)

    url_parts = url.split('/')
    host = url_parts[0].split(":")[0]
    port = int(url_parts[0].split(":")[1])
    db = url_parts[1]
    conf = df_conf.head(1)
    user = df_conf.user[0]
    password = df_conf.pwd[0]
    print('host is: ', host, 'port is: ', port, 'db: ', db, 'user: '******'password: '******'utf8')
    df = pd.read_sql(sql, con=conn)
    df = df.drop(["REPORT_DATE"], axis=1)
    df = df.astype(float)
    json = df.corr()['QLI'].to_json()

    # update the factor_importance to config database
    insert_sql = "update isf_forecast_factor set factor_impact='" + json + "' where ds_conf_id=" + str(
        source_id)
    print(insert_sql)
    cursor = conn_conf.cursor()
    cursor.execute(insert_sql)
Ejemplo n.º 10
0
def get_data(csv_fname,
             video_fname,
             avg_fname,
             num_frames=None,
             start_frame=0,
             OBJECTS=['person'],
             resol=(50, 50),
             center=True,
             dtype='float32',
             train_ratio=0.6):
    def print_class_numbers(Y, nb_classes):
        classes = np_utils.probas_to_classes(Y)
        for i in xrange(nb_classes):
            print 'class %d: %d' % (i, np.sum(classes == i))

    print '\tParsing %s, extracting %s' % (csv_fname, str(OBJECTS))
    all_counts = DataUtils.get_binary(csv_fname,
                                      limit=num_frames,
                                      OBJECTS=OBJECTS,
                                      start=start_frame)  #返回的是所有帧是否有特定物体的二值矩阵
    print '\tRetrieving all frames from %s' % video_fname
    all_frames = VideoUtils.get_all_frames(len(all_counts),
                                           video_fname,
                                           scale=resol,
                                           start=start_frame)
    print '\tSplitting data into training and test sets'
    X_train, X_test, Y_train, Y_test = to_test_train(avg_fname, all_frames,
                                                     all_counts)

    nb_classes = all_counts.max() + 1
    print '(train) positive examples: %d, total examples: %d' % \
     (np.count_nonzero(np_utils.probas_to_classes(Y_train)),
      len(Y_train))
    print_class_numbers(Y_train, nb_classes)
    print '(test) positive examples: %d, total examples: %d' % \
     (np.count_nonzero(np_utils.probas_to_classes(Y_test)),
      len(Y_test))
    print_class_numbers(Y_test, nb_classes)

    print 'shape of image: ' + str(all_frames[0].shape)
    print 'number of classes: %d' % (nb_classes)

    data = (X_train, Y_train, X_test, Y_test)
    return data, nb_classes
Ejemplo n.º 11
0
def buildListData(tags, keyWord=''):
    listData = []

    for row in range(len(tags)):
        for childIndex in range(len(tags[row]['child'])):
            rowData = tags[row]['child'][childIndex]

            path = rowData['path']
            methodTypes = DataUtils.getValidMethod(rowData)

            for methodType in methodTypes:
                itemData = objects.clone_deep(
                    tags[row]['child'][childIndex][methodType])
                itemData = objects.assign(itemData, {
                    'path': path,
                    'type': methodType
                })
                listData.append(itemData)

    return listData
Ejemplo n.º 12
0
class Solver():
    '''
    Do training, validation and testing.
    '''
    def __init__(self, args):
        self.args = args
        with open(args.config, 'r') as stream:
            config = yaml.load(stream, Loader=yaml.SafeLoader)
            self.config = config[self.args.task]
        if args.model_type == 'transformer':
            self.data_utils = DataUtils(self.config, args.train, args.task)
        elif args.model_type == 'bert':
            assert args.task == 'seq2seq'
            self.data_utils = bert_utils(self.config, args.train, args.task)
        if args.train and args.save_checkpoints:
            self.model_dir = make_save_dir(
                os.path.join(args.model_dir, args.task, args.exp_name))
        self._disable_comet = args.disable_comet
        self._model_type = args.model_type
        self._save_checkpoints = args.save_checkpoints

        ###### loading .... ######
        print("====================")
        print("start to build model")
        print('====================')
        vocab_size = self.data_utils.vocab_size
        print("Vocab Size: %d" % (vocab_size))
        self.model = self.make_model(src_vocab=vocab_size,
                                     tgt_vocab=vocab_size,
                                     config=self.config['model'])

    def make_model(self, src_vocab, tgt_vocab, config):

        "Helper: Construct a model from hyperparameters."
        if self._model_type == 'transformer':
            model = make_transformer_model(src_vocab, tgt_vocab, config)
        elif self._model_type == 'bert':
            tokenizer = BertTokenizer.from_pretrained('bert-base-chinese',
                                                      padding_side='left')
            num_added_tokens = tokenizer.add_tokens(self.data_utils.all_tokens)
            print('We have added %d tokens to the bert tokenizer.' %
                  num_added_tokens)
            self.data_utils.set_tokenizer(tokenizer)
            model = BERT(BertModel.from_pretrained('bert-base-chinese'),
                         self.config['max_len'], config['d_bert'],
                         self.data_utils.vocab_size)

        return model.cuda()

    def train(self):
        if not self._disable_comet:
            # logging
            COMET_PROJECT_NAME = 'weibo-stc'
            COMET_WORKSPACE = 'timchen0618'

            self.exp = Experiment(
                project_name=COMET_PROJECT_NAME,
                workspace=COMET_WORKSPACE,
                auto_output_logging='simple',
                auto_metric_logging=None,
                display_summary=False,
            )

            self.exp.add_tag(self.args.task)
            if self.args.task != 'pure_seq2seq':
                if self.args.processed:
                    self.exp.add_tag('processed')
                else:
                    self.exp.add_tag('unprocessed')
            if self.args.sampler_label != 'none':
                self.exp.add_tag(self.args.sampler_label)
            if self._model_type == 'bert':
                self.exp.add_tag('BERT')

            self.exp.set_name(self.args.exp_name)
            self.exp.log_parameters(self.config)
            self.exp.log_parameters(self.config['model'])

        # if finetune, load pretrain
        if self.args.task == 'finetune':
            lr = 5e-7
            state_dict = torch.load(self.args.load_model)['state_dict']
            print('loading model from %s ...' % self.args.load_model)
            self.model.load_state_dict(state_dict)
        else:
            lr = self.config['lr_init']
            if self.args.load_model is not None:
                state_dict = torch.load(self.args.load_model,
                                        map_location='cuda:%d' %
                                        self.args.gpuid)['state_dict']
                print('loading model from %s ...' % self.args.load_model)
                self.model.load_state_dict(state_dict)

        if self.args.pretrain_embedding:
            self.model.load_embedding(self.args.pretrain_embedding)

        # Optimizer and some info for logging.
        if self.config['optimizer'] == 'adam':
            optim = torch.optim.Adam(self.model.parameters(),
                                     lr=lr,
                                     betas=(0.9, 0.98),
                                     eps=1e-9,
                                     weight_decay=0)
        elif self.config['optimizer'] == 'adamw':
            optim = torch.optim.AdamW(self.model.parameters(),
                                      lr=lr,
                                      betas=(0.9, 0.98),
                                      eps=1e-9)
        else:
            raise NotImplementedError

        total_loss = []
        p_gen_list = []
        start = time.time()
        step = self.args.start_step
        print('starting from step %d' % step)

        for epoch in range(self.config['num_epoch']):
            self.model.train()
            train_data = self.data_utils.data_yielder(valid=False)

            for batch in train_data:
                # print('-'*30)
                # Whether do noam learning rate scheduling
                if self.config['noam_decay']:
                    if step % 5 == 1:
                        lr = self.config['lr'] * (
                            1 / (self.config['model']['d_model']**0.5)) * min(
                                (1 / (step)**0.5), (step) *
                                (1 / (self.config['warmup_steps']**1.5)))
                        if self.args.task == 'finetune':
                            lr /= self.config['lr_decay']
                        for param_group in optim.param_groups:
                            param_group['lr'] = lr

                tgt_mask = batch['tgt_mask'].long()
                y = batch['y'].long()

                if self._model_type == 'bert':
                    inp = batch['src']['input_ids'].cuda()
                    out = self.model.forward(inp)
                    pred = tens2np(out.topk(1, dim=-1)[1].squeeze())
                    p_gen_list.append(0.0)
                else:
                    tgt = batch['tgt'].long()
                    src = batch['src'].long()
                    src_mask = batch['src_mask'].long()

                    # Forwarding (with mask or not)
                    if self.config['pos_masking']:
                        out, p_gen = self.model.forward_with_mask(
                            src, tgt, src_mask, tgt_mask, batch['posmask'])
                    elif self.args.task == 'joint_gen' and self.config[
                            'greedy']:
                        out = self.model.forward_with_ss(
                            src, src_mask, tgt, self.config['max_decode_step'],
                            self.data_utils.bos)
                        # print('out', out.size())
                        p_gen = torch.zeros((1, 1))
                    else:
                        out, p_gen = self.model.forward(
                            src, tgt, src_mask, tgt_mask)

                    # Info for printing
                    pred = tens2np(out.topk(1, dim=-1)[1].squeeze())
                    p_gen = p_gen.mean()
                    p_gen_list.append(p_gen.item())

                loss = self.model.loss_compute(out, y, self.data_utils.pad)
                loss.backward()

                optim.step()
                optim.zero_grad()
                total_loss.append(tens2np(loss))

                # print out info
                if step % self.config['print_every_step'] == 0:
                    elapsed = time.time() - start
                    print(
                        "Epoch Step: %d Loss: %f  P_gen:%f Time: %f Lr: %4.6f"
                        % (step, np.mean(total_loss),
                           sum(p_gen_list) / len(p_gen_list), elapsed, lr))

                    if self._model_type == 'bert':
                        source_text = tens2np(inp.long())
                        target_text = tens2np(batch['y'].long())
                    elif self._model_type == 'transformer':
                        source_text = tens2np(batch['src'].long())
                        target_text = tens2np(batch['tgt'].long())

                    print('src:', self.data_utils.id2sent(source_text[0]))
                    print('tgt:', self.data_utils.id2sent(target_text[0]))
                    print('pred:', self.data_utils.id2sent(pred[0]))

                    # If using transformer, we want to see greedy decoding result
                    if self._model_type == 'transformer':
                        if self.config['pos_masking']:
                            greedy_text = self.model.greedy_decode(
                                src.long()[:1], src_mask[:1],
                                self.config['max_len'], self.data_utils.bos,
                                batch['posmask'][:1])
                        else:
                            greedy_text = self.model.greedy_decode(
                                src.long()[:1], src_mask[:1],
                                self.config['max_len'], self.data_utils.bos)
                        greedy_text = tens2np(greedy_text)
                        print('pred_greedy:',
                              self.data_utils.id2sent(greedy_text[0]))

                    # logging statistics
                    if not self._disable_comet:
                        self.exp.log_metric('Train Loss',
                                            np.mean(total_loss),
                                            step=step)
                        self.exp.log_metric('Lr', lr, step=step)
                    print()
                    start = time.time()
                    total_loss = []
                    p_gen_list = []

                # Do validation
                if step % self.config['valid_every_step'] == self.config[
                        'valid_every_step'] - 1:
                    self.validate(step)

                step += 1

    @torch.no_grad()
    def validate(self, step):
        print('*********************************')
        print('            Validation           ')
        print('*********************************')
        fw = open(self.args.w_valid_file, 'w')
        val_yielder = self.data_utils.data_yielder(valid=True)
        self.model.eval()
        total_loss = []

        # Validate one batch, writing valid hypothesis to file
        for batch in val_yielder:
            if self._model_type == 'bert':
                inp = batch['src']['input_ids'].cuda()
                out = self.model.forward(inp)
            else:
                # model is transformer
                batch['src'] = batch['src'].long()
                batch['tgt'] = batch['tgt'].long()

                if self.config['pos_masking']:
                    out, _ = self.model.forward_with_mask(
                        batch['src'], batch['tgt'], batch['src_mask'],
                        batch['tgt_mask'], batch['posmask'])
                else:
                    out, _ = self.model.forward(batch['src'], batch['tgt'],
                                                batch['src_mask'],
                                                batch['tgt_mask'])

            loss = self.model.loss_compute(out, batch['y'].long(),
                                           self.data_utils.pad)
            total_loss.append(loss.item())

            if self.config['pos_masking']:
                out = self.model.greedy_decode(batch['src'].long(),
                                               batch['src_mask'],
                                               self.config['max_len'],
                                               self.data_utils.bos,
                                               batch['posmask'])
            else:
                out = self.model.greedy_decode(batch['src'].long(),
                                               batch['src_mask'],
                                               self.config['max_len'],
                                               self.data_utils.bos)

            # Writing sentences to hypothesis file
            for l in out:
                sentence = self.data_utils.id2sent(l[1:], True)
                fw.write(sentence)
                fw.write("\n")

        fw.close()

        # Calculate BLEU score and log to comet if needed
        bleus = cal_bleu(self.args.w_valid_file, self.args.w_valid_tgt_file)
        if not self._disable_comet:
            self.exp.log_metric('BLEU-1', bleus[0], step=step)
            self.exp.log_metric('BLEU-2', bleus[1], step=step)
            self.exp.log_metric('BLEU-3', bleus[2], step=step)
            self.exp.log_metric('BLEU-4', bleus[3], step=step)

            self.exp.log_metric('Valid Loss',
                                sum(total_loss) / len(total_loss),
                                step=step)

        print('=============================================')
        print('Validation Result -> Loss : %6.6f' %
              (sum(total_loss) / len(total_loss)))
        print('=============================================')
        self.model.train()

        # Saving model checkpoints
        if self._save_checkpoints:
            print('saving!!!!')

            model_name = str(int(
                step / 1000)) + 'k_' + '%6.6f__%4.4f_%4.4f_' % (
                    sum(total_loss) / len(total_loss), bleus[0],
                    bleus[3]) + 'model.pth'
            state = {'step': step, 'state_dict': self.model.state_dict()}
            torch.save(state, os.path.join(self.model_dir, model_name))

    @torch.no_grad()
    def test(self):
        # Prepare model
        path = self.args.load_model
        state_dict = torch.load(path)['state_dict']

        self.model.load_state_dict(state_dict)

        # file path for prediction
        pred_dir = make_save_dir(self.args.pred_dir)
        filename = self.args.filename
        outfile = open(os.path.join(pred_dir, self.args.task, filename), 'w')

        # Start decoding
        data_yielder = self.data_utils.data_yielder()
        total_loss = []
        start = time.time()

        # If beam search, create sequence generator object
        self._beam_search = self.config['eval']['beam_size'] > 1
        # self._beam_search = True
        if self._beam_search:
            seq_gen = SequenceGenerator(
                self.model,
                self.data_utils,
                beam_size=self.config['eval']['beam_size'],
                no_repeat_ngram_size=self.config['eval']['block_ngram'])

        self.model.eval()
        step = 0

        # Run one batch
        for batch in data_yielder:
            step += 1
            if step % 10 == 1:
                print('Step ', step)

            # Decoding according to scheme
            if self._beam_search:
                out = seq_gen.generate(batch,
                                       pos_masking=self.config['pos_masking'],
                                       bos_token=self.data_utils.bos)
            else:
                max_length = self.config['max_len']
                if self.config['pos_masking']:
                    out = self.model.greedy_decode(batch['src'].long(),
                                                   batch['src_mask'],
                                                   max_length,
                                                   self.data_utils.bos,
                                                   batch['posmask'])
                else:
                    if self.args.task == 'joint_gen':
                        max_length = self.config['max_decode_step']
                    out = self.model.greedy_decode(batch['src'].long(),
                                                   batch['src_mask'],
                                                   max_length,
                                                   self.data_utils.bos)

            # Write sentences to file
            for l in out:
                if self._beam_search:
                    sentence = self.data_utils.id2sent(l[0]['tokens'][:-1],
                                                       True)
                else:
                    sentence = self.data_utils.id2sent(l[1:], True)
                outfile.write(sentence)
                outfile.write("\n")

        outfile.close()
Ejemplo n.º 13
0
def train(args):
    graph_file = '/Users/bhagya/PycharmProjects/Old data/line-master data/%s/%s.npz' % (args.name, args.name)
    graph_file = graph_file.replace('.npz', '_train.npz') if not args.is_all else graph_file
    data_loader = DataUtils(graph_file, args.is_all)

    suffix = args.proximity
    args.X = data_loader.X if args.suf != 'oh' else sp.identity(data_loader.X.shape[0])
    if not args.is_all:
        args.val_edges = data_loader.val_edges
        args.val_ground_truth = data_loader.val_ground_truth

    m = args.model
    name = m + '_' + args.name
    if m == 'lace':
        model = LACE(args)
    elif 'glace' == m:
        model = GLACE(args)

    with tf.Session() as sess:
        print('-------------------------- ' + m + ' --------------------------')
        if model.val_set:
            print('batches\tloss\tval_auc\tval_ap\tsampling time\ttraining_time\tdatetime')
        else:
            print('batches\tloss\tsampling time\ttraining_time\tdatetime')

        tf.global_variables_initializer().run()
        sampling_time, training_time = 0, 0

        for b in range(args.num_batches):
            t1 = time.time()
            u_i, u_j, label, w = data_loader.fetch_next_batch(batch_size=args.batch_size, K=args.K)
            feed_dict = {model.u_i: u_i, model.u_j: u_j, model.label: label}
            t2 = time.time()
            sampling_time += t2 - t1

            loss, _ = sess.run([model.loss, model.train_op], feed_dict=feed_dict)

            training_time += time.time() - t2

            if model.val_set:
                if b % 50 == 0:
                    val_energy = sess.run(model.neg_val_energy)
                    val_auc, val_ap = score_link_prediction(data_loader.val_ground_truth, val_energy)
                    print('%d\t%f\t%f\t%f\t%0.2f\t%0.2f\t%s' % (b, loss, val_auc, val_ap, sampling_time, training_time,
                                                                    time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
                    sampling_time, training_time = 0, 0
            else:
                if b % 50 == 0:
                    print('%d\t%f\t%0.2f\t%0.2f\t%s' % (b, loss, sampling_time, training_time,
                                                        time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
                    sampling_time, training_time = 0, 0

            if b % 50 == 0 or b == (args.num_batches - 1):
                if m == 'glace':
                    mu, sigma = sess.run([model.embedding, model.sigma])
                    pickle.dump({'mu': data_loader.embedding_mapping(mu),
                                 'sigma': data_loader.embedding_mapping(sigma)},
                                open('emb/%s%s_embedding_%s.pkl' % (name, '_all' if args.is_all else '', suffix), 'wb'))
                    # if model.val_set:
                    #     r = kl_link_pred(mu, sigma, test_edges)
                    #     print('{:.4f}, {:.4f}'.format(r[0], r[1]))
                else:
                    embedding = sess.run(model.embedding)
                    pickle.dump(data_loader.embedding_mapping(embedding),
                                open('emb/%s%s_embedding_%s.pkl' % (name, '_all' if args.is_all else '', suffix), 'wb'))
Ejemplo n.º 14
0
from algorithms import *
from utils import DataUtils

if __name__ == '__main__':
    logging.basicConfig(level=logging.INFO)

    data = DataUtils('data/基站经纬度.csv', 'data/上网信息输出表(日表)6月15号之后.csv')
    mip_placer = MIPServerPlacer(data.base_stations, data.distances)
    mip_placer.place_server(10, 2)
    print(mip_placer.objective_latency(), mip_placer.objective_workload())
    # kmeans_placer = KMeansServerPlacement(data.base_stations, data.distances)
    # kmeans_placer.place_server(300, 30)
    # print(kmeans_placer.objective_latency(), kmeans_placer.objective_workload())
    # top_k_placer = TopKServerPlacement(data.base_stations, data.distances)
    # top_k_placer.place_server(300, 30)
    # print(top_k_placer.objective_latency(), top_k_placer.objective_workload())
    # random_placer = RandomServerPlacement(data.base_stations, data.distances)
    # random_placer.place_server(300, 30)
    # print(random_placer.objective_latency(), random_placer.objective_workload())
    pass
Ejemplo n.º 15
0
 def load_data(self):
     du = DataUtils(self.cfg)
     self.train = du.train
     self.dev = du.dev
Ejemplo n.º 16
0
def generateManage(path: str, filters, apiItem):
    fileName = path + os.sep + 'Manage.tsx'

    contentTpl = FileTools.readFile(tplPaths['manage'])

    # 寻找filter中时间组索引
    # 转换成字符组
    strList = arrays.mapcat(filters, lambda item: item['key'])

    # 获取相似indexsGroup
    indexs = DataUtils.enumSimilarityGroup(strList)

    # 遍历group,查看是不是时间字段,如果是则将indexGroup存下来 [[1,2],[4,5]]
    tempList = []
    for index in range(len(indexs)):
        key = strList[indexs[index][0]].lower()
        if 'time' in key or 'date' in key:
            tempList.append(indexs[index])

    keyMaps = []
    # 生成相似的索引组与实际formkey关联结构
    # 建立formKey与filterKey的对应关系
    keyMaps = DataUtils.buildMaps(tempList, filters)

    # 循环构建替换代码
    timeTpl = ''
    for formKey in keyMaps:
        tempKey = keyMaps[formKey][0].lower()
        if 'to' in tempKey or 'end' in tempKey:
            timeTpl += 'filterDump.' + keyMaps[formKey][
                1] + '= getValue(filterDump,\'' + formKey + '.startTime\',undefined);'
            timeTpl += 'filterDump.' + keyMaps[formKey][0] + '= getValue(filterDump,\'' + formKey + '.endTime\',undefined);'
        else:
            timeTpl += 'filterDump.' + keyMaps[formKey][
                0] + '= getValue(filterDump,\'' + formKey + '.startTime\',undefined);'
            timeTpl += 'filterDump.' + keyMaps[formKey][1] + '= getValue(filterDump,\'' + formKey + '.endTime\',undefined);'

        timeTpl += 'delete filterDump.' + formKey + ';'

    print(timeTpl)

    # filter.contractStartDate = filter.signDate && filter.signDate.startTime;
    #   filter.contractEndDate = filter.signDate && filter.signDate.endTime;

    # 执行替换

    # 寻找分页参数

    # 执行替换

    # # Manage 筛选条件部分替换
    # REPLACE_MANAGE_FILTER = '##REPLACE_MANAGE_FILTER##'
    # # Manage api导出名称
    # REPLACE_MANAGE_API = '##REPLACE_MANAGE_API##'
    # # Manage api 方法名称
    # REPLACE_MANAGE_API_METHOD = '##REPLACE_MANAGE_API_METHOD##'
    # # 分页参数 - 页 no
    # MANAGE_PAGE_NO = '##MANAGE_PAGE_NO##'
    # # 分页参数 - 页 size
    # MANAGE_PAGE_SIZE = '##MANAGE_PAGE_SIZE##'

    contentTpl = DataUtils.replaceManageTpl(contentTpl, filter, apiItem, timeTpl)

    FileTools.writeFile(fileName, contentTpl)

    contentTpl = PrettierTools.format(fileName)

    FileTools.writeFile(fileName, contentTpl)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--csv_in', required=True, help='CSV input filename')
    parser.add_argument('--csv_out_base',
                        required=True,
                        help='CSV output filename--do NOT confuse with csv_in')
    parser.add_argument('--video_in',
                        required=True,
                        help='Video input filename')
    parser.add_argument(
        '--num_frames',
        type=int,
        default=1000,
        help='Number of frames to use to form training and test set. \
                    Default: 1000')
    parser.add_argument(
        '--frame_delay',
        type=int,
        default=15,
        help='Delta between current frame and previous frame to compare \
                    against. Must be greater than 0. Default: 15')
    parser.add_argument('--object', required=True, help='Object to detect.')
    parser.add_argument(
        '--scale',
        type=float,
        default=0.1,
        help='Scale factor applied to each frame. Default: 0.1')
    parser.add_argument(
        '--features',
        default='hog',
        help='Type of features: HOG (hog), SIFT (sift), Color Histogram (ch), \
                    or raw images (raw). Multiple values must be separated by comma (e.g., hog,ch). Default: hog'
    )
    args = parser.parse_args()
    csv_out_base = args.csv_out_base
    video_in = args.video_in
    csv_in = args.csv_in
    if args.frame_delay <= 0:
        import sys
        print '--frame_delay must be greater than 0'
        sys.exit(1)
    print args
    features_to_try = args.features.strip().split(',')
    args_dict = args.__dict__
    del (args_dict['features'])
    del (args_dict['csv_out_base'])
    del (args_dict['video_in'])
    del (args_dict['csv_in'])
    init_header, init_row = zip(*sorted(list(args_dict.iteritems())))
    init_header, init_row = list(init_header), list(init_row)

    print 'Retrieving %d frames from %s' % (args.num_frames, video_in)
    video_frames = VideoUtils.get_all_frames(args.num_frames,
                                             video_in,
                                             scale=args.scale,
                                             interval=1)

    print 'Retrieving %d labels from %s' % (args.num_frames, csv_in)
    # 0 represents no difference between 2 frames, 1 represents difference
    Y_truth = DataUtils.get_differences(csv_in,
                                        args.object,
                                        limit=args.num_frames,
                                        interval=1,
                                        delay=args.frame_delay)

    header = init_header + [
        'feature', 'distance metric', 'threshold', 'filtration',
        'true positive ratio'
    ]
    rows = []
    for feature_type in features_to_try:
        row_with_feat = init_row[:]
        row_with_feat.append(feature_type)
        print feature_type
        feature_fn, get_distance_fn, dist_metrics_to_try = get_feature_and_dist_fns(
            feature_type)
        features = get_features(feature_fn, video_frames)
        for dist_metric in dist_metrics_to_try:
            recorder = StatsUtils.OutputRecorder(
                '%s_%s_%s.csv' % (csv_out_base, feature_type, dist_metric))
            row = row_with_feat[:]
            row.append(dist_metric)
            print dist_metric
            dists = get_distances(get_distance_fn(dist_metric), features,
                                  args.frame_delay)
            prev_thresh = None
            prev_metrics = None
            best_Y_preds = None

            thresholds_to_try = np.linspace(np.min(dists), np.max(dists), 250)
            for thresh in thresholds_to_try[1:]:
                Y_preds = dists > thresh
                metrics = evaluate_model(Y_preds, Y_truth)
                if metrics['false negative ratio'] > 0.01:
                    break
                prev_metrics = metrics
                prev_thresh = thresh
                best_Y_preds = Y_preds

            if not prev_metrics:
                prev_thresh = 0.0
                prev_metrics = {'filtration': 0.0, 'true positive ratio': 0.0}

            print prev_thresh, prev_metrics['filtration'], prev_metrics[
                'true positive ratio']
            _row = row[:]
            _row.append(prev_thresh)
            for key in ['filtration', 'true positive ratio']:
                val = prev_metrics[key]
                _row.append(val)
            rows.append(_row)
            for i in xrange(args.frame_delay):
                recorder.add_row(False, args.object)
            if best_Y_preds is not None:
                for pred in best_Y_preds:
                    recorder.add_row(pred, args.object)
                recorder.output_csv()
    StatsUtils.output_csv(csv_out_base + '_summary.csv', np.array(rows),
                          np.array(header))