Ejemplo n.º 1
0
    def graph_node_cluster(self,
                           node2hcel_infn,
                           node2lab_outfn,
                           hcel2lab_outfn=None,
                           strictness=4,
                           verbose=True):
        loader = Loader()

        ndidx_hcel = loader.load_pt2pos(node2hcel_infn)
        outliers_marker = -1
        hcls_labs, _ = self._hcels_labeling_(self._hcel, outliers_marker,
                                             strictness)
        hcel2lab = dict(zip(map(tuple, self._hcel), hcls_labs))

        if hcel2lab_outfn is not None:
            np.savetxt(hcel2lab_outfn,
                       np.vstack((self._hcel.T, hcls_labs)).T,
                       '%i',
                       delimiter=',',
                       header='#hcel: {}, #label: {}'.format(
                           self._total, len(np.unique(hcls_labs))))

        with open(node2lab_outfn, 'w') as ofp:
            ofp.writelines("# #pt: {}, #label: {}\n".format(
                len(ndidx_hcel), len(np.unique(hcls_labs))))
            for k in range(len(ndidx_hcel)):
                ndidx, pos = ndidx_hcel[k, 0], tuple(ndidx_hcel[k, 1:])
                ndlab = hcel2lab.get(pos, -1)
                ofp.writelines("{},{}\n".format(ndidx, ndlab))
            ofp.close()

        if verbose:
            print("clustering done!")
Ejemplo n.º 2
0
    def __init__(self,
                 data_json,
                 dets_json,
                 visual_feats_dir,
                 opt,
                 tree_pth=None):
        # parent loader instance, see loader.py
        Loader.__init__(self, data_json)

        self.opt = opt
        self.batch_size = opt.batch_size
        self.vis_dim = 2048 + 512 + 512

        # img_iterators for each split
        self.split_ix = {}
        self.iterators = {}

        # prepare dets
        self.dets = json.load(open(dets_json))
        self.Dets = {det['det_id']: det for det in self.dets}

        # add dets to image
        for image in self.images:
            image['det_ids'] = []
        for det in self.dets:
            image = self.Images[det['image_id']]
            image['det_ids'] += [det['det_id']]

        # load visual feats
        print('loading visual feats ...')
        pickle.load = partial(pickle.load, encoding="latin1")
        pickle.Unpickler = partial(pickle.Unpickler, encoding="latin1")
        self.visual_feats = torch.load(
            visual_feats_dir,
            map_location=lambda storage, loc: storage,
            pickle_module=pickle)
        print('loaded')

        if tree_pth:
            self.trees = torch.load(tree_pth, 'r')
        else:
            self.trees = None

        for image_id, image in self.Images.items():
            split = self.Refs[image['ref_ids'][0]]['split']

            if split not in self.split_ix:
                self.split_ix[split] = []
                self.iterators[split] = 0

            # add sentences to each subsets
            sent_ids = []
            for ref_id in self.Images[image_id]['ref_ids']:
                sent_ids += self.Refs[ref_id]['sent_ids']
            self.split_ix[split].append(image_id)

        for k, v in self.split_ix.items():
            print('assigned %d images to split %s' % (len(v), k))
Ejemplo n.º 3
0
def main():
    utils.globals.WINDOW = arcade.Window(utils.globals.WIDTH,
                                         utils.globals.HEIGHT,
                                         utils.globals.TITLE)
    l = Loader()
    l.load()

    utils.globals.WINDOW.show_view(utils.views.main_menu)
    arcade.run()
Ejemplo n.º 4
0
def test(model, test_data):
	batch_size = pool_size = 3000
	test_loader = Loader(batch_size, pool_size, (1, 8), test_data)
	vloss_list, ploss_list, acc_list = [], [], []
	tot = 0

	print('Testing.')
	while True:
		test_loader.next(training=False)
		remain = test_loader.remain()
		if not remain:
			break

		test_loader.sample()
		test_loader.update(model.calc_rnn_states)
		vloss, ploss, acc = test_loader(model.loss)

		tot += remain
		vloss_list.append(vloss * remain)
		ploss_list.append(ploss * remain)
		acc_list.append(acc * remain)
		print('test: %.4f  %.4f  %.2f%%' % (vloss, ploss, acc * 100))
	vloss = float(np.sum(vloss_list)) / tot
	ploss = float(np.sum(ploss_list)) / tot
	acc = float(np.sum(acc_list)) / tot
	return vloss, ploss, acc
Ejemplo n.º 5
0
 def load_histogram(self, infn_hist):
     loader = Loader()
     shape, ticks, hist_arr = loader.load_histogram(infn_hist)
     self._hcel = hist_arr[:, :-1]
     self._count = hist_arr[:, -1]
     n_cel, n_dim = self._hcel.shape
     if n_dim != self.mode:
         raise ValueError(
             "Input histogram dimension does match with the initial dimension."
         )
     self._total = n_cel
     self.shape = shape
Ejemplo n.º 6
0
 def count_commits(self):
     loader = Loader(self.file, type='txt')
     data = loader.start()
     count = 0
     for line in data:
         if re.match('^(commit )', line):
             count += 1
         if re.match('^(Date: )', line):
             self.dates.append(line.replace("Date:", "").strip())
         else:
             continue
     return count, self.dates
Ejemplo n.º 7
0
def NGramLangModel():
    cl = Loader(MAIN_DIR+DS_DIR)
    f = cl.loadLarge('tb_kota_bywiki.txt',lazy_load=True)#tb_berita_onlinemedia, tb_kota_bywiki
    w = cl.processRaw(f,to_lower=True)
    r = cl.rawForLangmodel(w,punct_remove=True,to_token=True)
    
    lms = NGramModels(ngram=2)
    # njump parameter belum bisa digunakan untuk modkn optimizer
    models = lms.train(r, optimizer='modkn',\
                       separate=False, njump=0, verbose=False)

    print "##########################################################"
Ejemplo n.º 8
0
def histogram_view(histogram_infn, xlabel, ylabel, outfn=None):
    loader = Loader()
    _shape_, ticks_vec, hist_arr = loader.load_multi_histogram(histogram_infn)
    csr_mat = csr_matrix((hist_arr[:, -1], (hist_arr[:, 0], hist_arr[:, 1])),
                         shape=_shape_,
                         dtype=int)
    plot_heatmap(ticks_vec[1],
                 ticks_vec[0],
                 csr_mat.toarray(),
                 xlabel=xlabel,
                 ylabel=ylabel,
                 outfn=outfn)
    # plot_heatmap_graphlab_pgrk(ticks_vec[1], ticks_vec[0], csr_mat.toarray(), xlabel=xlabel, ylabel=ylabel, outfn=outfn)
    # plot_heatmap_2discretes(ticks_vec[1], ticks_vec[0], csr_mat.toarray(), xlabel=xlabel, ylabel=ylabel, outfn=outfn)
    print('Histogram view done!')
Ejemplo n.º 9
0
def histogram_view(ins_hist, x_lab, y_lab, outs_viz=None):
    loader = Loader()
    _shape_, ticks_vec, hist_arr = loader.load_multi_histogram(ins_hist)
    csr_mat = csr_matrix((hist_arr[:, -1], (hist_arr[:, 0], hist_arr[:, 1])),
                         shape=_shape_,
                         dtype=int)
    plot_heatmap(ticks_vec[1],
                 ticks_vec[0],
                 csr_mat.toarray(),
                 xlabel=x_lab,
                 ylabel=y_lab,
                 outfn=outs_viz)
    # plot_heatmap_graphlab_pgrk(ticks_vec[1], ticks_vec[0], csr_mat.toarray(), xlabel=x_lab, ylabel=y_lab, outfn=outs_viz)
    # plot_heatmap_2discretes(ticks_vec[1], ticks_vec[0], csr_mat.toarray(), xlabel=x_lab, ylabel=y_lab, outfn=outs_viz)
    print('done!')
Ejemplo n.º 10
0
class Network():
    def __init__(self, net_name):
        self.loader = Loader(net_name)
        self.activate = {}

    def conv2d(self, input_tensor, block_num, conv_num):
        with tf.variable_scope('conv{}'.format(conv_num)):
            w, b = self.loader.get_weights(block_num, conv_num)
            #weights = tf.get_variable('W', shape=w.shape, dtype=tf.float32, initializer=tf.constant_initializer(w), trainable=False)
            #bias = tf.get_variable('b', shape=b.shape, dtype=tf.float32, initializer=tf.constant_initializer(b), trainable=False)
            conv_out = tf.nn.conv2d(input_tensor,
                                    w,
                                    strides=[1, 1, 1, 1],
                                    padding='SAME')
            conv_out = tf.nn.bias_add(conv_out, b)
            conv_out = tf.nn.relu(conv_out)
            self.activate['conv{}_{}'.format(block_num, conv_num)] = conv_out
            return conv_out

    def pool(self, input_tensor, block_num):
        return tf.nn.max_pool(input_tensor,
                              ksize=[1, 2, 2, 1],
                              strides=[1, 2, 2, 1],
                              padding='SAME',
                              name='pool{}'.format(block_num))
Ejemplo n.º 11
0
def NGramLangModel():
    cl = Loader('C:\\BimaNLP\\dataset\\')
    f = cl.loadLarge('tb_kota_bywiki.txt',lazy_load=True)#tb_berita_onlinemedia, tb_kota_bywiki
    w = cl.processRaw(f,to_lower=True)
    r = cl.rawForLangmodel(w,punct_remove=True,to_token=True)
                           
    dataset=[['saya','suka','kamu'],
         ['kamu','suka','saya'],
         ['saya','tidak','suka','jika','kamu','pergi','dengan','dia']
         ]
    
    lms = NGramModels(ngram=2)
    # njump parameter belum bisa digunakan untuk modkn optimizer
    models = lms.train(dataset, optimizer='modkn',\
                       separate=False, njump=0, verbose=True)

    print "##########################################################"
Ejemplo n.º 12
0
def demo():
    mode = 2
    outs = '../output/'
    ins_gfeat = '../example/outd2hub_feature'
    outs = '../output/'
    ofn_hist = 'histogram.out'
    ofn_node2hcel = 'node2hcel.out'
    ofn_hcel2avgfeat = 'hcel2avgfeat.out'
    ofn_heatmap = 'heatmap.png'
    x_lab, y_lab = [
        "Hubness", "Out-degree"
    ]  # ["Authority", "In-degree"], ["PageRank", "Degree"], ["Degree", "Triangles"]

    loader = Loader()
    m, _, gfts = loader.load_features(ins_gfeat, float)
    histogram_construct(gfts[:m], 1, outs + ofn_hist, outs + ofn_node2hcel,
                        outs + ofn_hcel2avgfeat, mode)
    histogram_view(outs + ofn_hist, x_lab, y_lab, outs + ofn_heatmap)
Ejemplo n.º 13
0
def describe_view(ins_hist, ins_desc, desc_voc, xlab, ylab, outs):
    assert desc_voc in VALID_DESCVOCS
    loader = Loader()
    desc = DTMNormDescribe if desc_voc == 'dtmnorm' else GaussianDescribe
    desc_parms = loader.load_describes_parms(ins_desc, desc, mode=2)
    h_shape, ticks_vec, hist_arr = loader.load_histogram(ins_hist)
    csr_mat = csr_matrix((hist_arr[:, -1], (hist_arr[:, 0], hist_arr[:, 1])),
                         shape=h_shape,
                         dtype=int)
    # plot_heatmap(ticks_vec[1], ticks_vec[0], csr_mat.toarray(), xlabel=xlabel, ylabel=ylabel, outfn=outfn)
    plot_heatmap_ellipse_covs(ticks_vec[1],
                              ticks_vec[0],
                              csr_mat.toarray(),
                              desc_parms,
                              base=10,
                              scales=(1.5, 3),
                              xlabel=xlab,
                              ylabel=ylab,
                              outfn=outs)
Ejemplo n.º 14
0
def load_hcel_weights(in_hist,
                      in_hcel2avgfeat,
                      mode=2,
                      wtcol_index=1,
                      sep=','):
    loader = Loader()
    _, _, hist_arr = loader.load_histogram(in_hist)

    nhcubes = len(hist_arr)
    hcube2index = dict(zip(map(tuple, hist_arr[:, :2]), range(nhcubes)))
    hcube_weight = np.zeros(nhcubes)  #np.empty((nhcubes, nfeat))
    with open(in_hcel2avgfeat, 'r') as fp:
        for line in fp.readlines():
            if line.startswith('#'): continue
            tok = line.strip().split(sep)
            pos = tuple(map(int, tok[:mode]))
            hcube_weight[hcube2index[pos]] = float(tok[wtcol_index + mode - 1])
        fp.close()

    return hcube_weight
Ejemplo n.º 15
0
class RawDataReader:
    def __init__(self):
        logging.basicConfig(level=logging.INFO)
        self.logger = logging.getLogger(__name__)
        self.cleaner = Cleaner()
        self.loader = Loader()

    def read_from_gz(self, input_file):
        logging.info(
            "reading file {0}...this may take a while".format(input_file))
        with gzip.open(input_file, 'rt', errors='ignore',
                       encoding='utf-8') as f:
            for i, line in enumerate(f):
                line = self.cleaner.clean(line)
                if (i % 20000 == 0):
                    logging.info("read {0} medical text".format(i))
                    print(line)
                # setting max_len is important to avoid missing words
                yield gensim.utils.simple_preprocess(line, max_len=5000)

    def get_docs_from_gz(self, gz_file):
        # data_file = os.path.join(abspath, gzip)
        data_file = gz_file
        documents = list(self.read_from_gz(data_file))
        logging.info("Done reading data file")
        return documents

    def get_docs_from_file(self, text_file):
        with open(text_file, 'r', errors='ignore') as file:
            yield file.data()

    def get_docs_from_directory(self, directory):
        files = self.loader.list_files(directory)
        documents = list(self.get_docs_from_file(file) for file in files)
        return documents

    def get_docs(self, path):
        import pathlib
        print(pathlib.Path(path).suffixes)
        if '.gz' in pathlib.Path(path).suffixes:
            return self.get_docs_from_gz(path)
        elif os.path.isfile(path):
            return self.get_docs_from_file(path)
        else:
            """ Read files from directory"""
            return self.get_docs_from_directory(path)
Ejemplo n.º 16
0
def train(model, train_data, test_data, save_interval, batch_size):
	vloss, ploss, acc = test(model, test_data)
	# vloss, ploss, acc = 100.0, 100.0, 100.0, 0.0
	min_loss = vloss + ploss

	it, epoch = 0, 0
	__time = time()

	log('epoch: %d  %.5f  %.5f  %.5f  %.3f%%' % (epoch, vloss, ploss, min_loss, acc * 100))
	pool_size = batch_size * 16
	train_loader = Loader(batch_size, pool_size, (14, 18), train_data)
	while True:
		it += 1
		# from utils import watch4
		# watch4.reset()
		if train_loader.next(training=True):
			epoch += 1
			test_data.reload()  # new epoch, reload train data & test data
			best_model = Model(config.name)
			best_model.restore()
			vloss, ploss, acc = test(best_model, test_data)
			min_loss = vloss + ploss
			log('epoch: %d  %.5f  %.5f  %.5f  %.3f%%' % (epoch, vloss, ploss, min_loss, acc * 100))
		train_loader.sample()
		train_loader.update(model.calc_rnn_states)
		# watch4.print('update', reset=True)
		vloss, ploss, acc = train_loader(model.learn)
		# watch4.print('learn', reset=True)
		
		if it % 20 == 0:
			model.push_down()
		
		print('%6d: %.4f  %.4f  %.2f%%  %.2fs' % (it, vloss, ploss, acc * 100, time() - __time))
		__time = time()

		if it % save_interval == 0:
			model.save(model.name + '_t')
			vloss, ploss, acc = test(model, test_data)
			if vloss + ploss < min_loss:
				min_loss = vloss + ploss
				model.save()
			log('%d: %.5f  %.5f  %.5f  %.3f%%' % ((it + 1) // save_interval, vloss, ploss, min_loss, acc * 100))
Ejemplo n.º 17
0
                          verbose=False,
                          outfn=os.path.join(outpath, tiny_blobs))

    print("Refine tree structure.")
    print("a). tree contract")
    tree.tree_contract(VERBOSE)
    tree.save_leveltree(os.path.join(outpath, contracttree))

    print("b). tree pruned")
    tree.tree_prune(alpha=0.8, verbose=VERBOSE)
    tree.save_leveltree(os.path.join(outpath, prunetree))

    print("c). tree node expand")
    tree.tree_node_expand(VERBOSE)
    tree.save_leveltree(os.path.join(outpath, refinetree))

    tree.dump()


if __name__ == '__main__':
    path = '../output/'
    histogram_infn = 'histogram.out'

    loader = Loader()
    print("load data")
    shape, ticks_vec, hist_arr = loader.load_multi_histogram(
        os.path.join(path, histogram_infn))
    mode = len(shape)
    print("Info: mode:{} shape:{}".format(mode, shape))
    waterleveltree(hist_arr, path)
    print("done!")
Ejemplo n.º 18
0
    validation_loss = sum_loss.item() / len(dataloader)
    validation_accuracy = sum_accuracy.item() / len(dataloader)
    print(
        f"Test Loss: {validation_loss}, Test Accuracy: {validation_accuracy}")


if __name__ == '__main__':
    flags = FLAGS()
    dim = (flags.height, flags.width)

    # datasets
    test_dataset = NCaltech101(flags.test_dataset)
    datasetClasses = test_dataset.getClasses()

    # construct loader, responsible for streaming data to gpu
    test_loader = Loader(test_dataset, flags, flags.device)

    # model, load and put to device
    if DEBUG > 0:
        model = Classifier(device=flags.device, dimension=dim)
        model.setMode(1)
    else:
        model = Classifier()
    ckpt = torch.load(flags.checkpoint, map_location=flags.device)
    model.load_state_dict(ckpt["state_dict"])
    model = model.to(flags.device)

    model = model.eval()

    anim = FuncAnimation(fig, updateImg, frames=2000, interval=1)
    events_prediction = get_events_and_predict(model, flags.device,
Ejemplo n.º 19
0
 def __init__(self):
     logging.basicConfig(level=logging.INFO)
     self.logger = logging.getLogger(__name__)
     self.cleaner = Cleaner()
     self.loader = Loader()
Ejemplo n.º 20
0
    arguments = parser.parse_args()
    shader_path = arguments.shader
    model_path = os.path.join(arguments.dataset, "3dmodels", arguments.object)
    sequence_path = os.path.join(arguments.dataset, "sequences", arguments.object)
    save_path = arguments.save
    load_path = arguments.load

    model_geo_path = os.path.join(model_path, "geometry.ply")
    model_ao_path = os.path.join(model_path, "ao.ply")
    fps = 30

    loaded_result = None
    if load_path is not "":
        loaded_result = np.load(load_path)

    dataset = Loader(sequence_path)

    vpRender = ModelRenderer(model_geo_path, shader_path, dataset.camera, [(dataset.camera.width, dataset.camera.height)])
    vpRender.load_ambiant_occlusion_map(model_ao_path)

    gt_parameters = []
    print("Sequence length: {}".format(len(dataset.data_pose)))
    for i, (frame, pose) in enumerate(dataset.data_pose):
        rgb, depth = frame.get_rgb_depth(sequence_path)

        # save data as 6 parameters (tx, ty, tz, rx, ry, rz)
        gt_parameters.append(pose.to_parameters())

        # use load parameters else use dataset ground truth
        if loaded_result is not None:
            loaded_pose = Transform.from_parameters(*loaded_result[i])
Ejemplo n.º 21
0
        if shuffle:
            self._shuffle()

        train_size = int(self._images_train.shape[0] * train_rate)
        data_size = int(len(self._images_train))

        train_set = self._perm(0, train_size)
        test_set = self._perm(train_size, data_size)

        return train_set, test_set

    def _shuffle(self):
        index = np.arange(self._images_train.shape[0])
        np.random.shuffle(index)
        self._images_train, self._images_mask = \
            self._images_train[index], self._images_mask[index]

    def _perm(self, start, end):
        end = min(end, len(self._images_train))
        return Dataset(self._images_train[start:end],
                       self._images_mask[start:end], self._palette)


if __name__ == "__main__":
    data_loader = Loader(dir_train=Path(r'.\src\datasets\train'),
                         dir_masks=Path(r'.\src\datasets\train_masks'))
    my_dataset = Dataset(data_loader.images_train, data_loader.images_mask,
                         data_loader.palette)
    train, valid = my_dataset.train_valid_split(train_rate=0.8, shuffle=True)
    print(my_dataset)
Ejemplo n.º 22
0
def eval_gt_split_by_length(loader, model, crit, split, opt, is_dump_json=False):
    # initialize
    model.eval()
    loader.reset_iterator(split)

    data_json = 'data/feats/refcocog_umd/data_plain.json'
    ori_loader = Loader(data_json)

    # get the predict result
    pred_sent = {}
    total_loss = 0.0
    iterations = 0.0
    while True:
        data = loader.get_data(split)

        # forward
        scores = model(data)
        if crit:
            loss = crit(scores, data['gts']) if crit else 0
            total_loss += loss.data.cpu().numpy()
            
        iterations += 1

        scores = scores.data.cpu().numpy()
        pred_ix = np.argmax(scores, axis=1)

        # get the predict result
        ann_ids = data['ann_ids']
        for ix, sent_id in enumerate(data['sent_ids']):
            pred_sent[sent_id] = {'sent_id': sent_id,
                                  'ann_id': ann_ids[pred_ix[ix]],
                                  'candidates': ann_ids,
                                  'box': loader.Anns[ann_ids[pred_ix[ix]]]['box']}

        # if used up
        if data['bounds']['wrapped']:
            break

    # compute accuracy
    n = {}
    acc = {}
    for _, ref in loader.Refs.items():
        if ref['split'] == split:
            for sent_id in ref['sent_ids']:
                sent_len = len(ori_loader.Sentences[sent_id]['tokens'])
                n[sent_len] = n.get(sent_len, 0) + 1
                if pred_sent[sent_id]['ann_id'] == ref['ann_id']:
                    acc[sent_len] = acc.get(sent_len, 0) + 1
                # check out the candidates are right, for fair accuracy
                assert loader.Images[ref['image_id']]['ann_ids'] == pred_sent[sent_id]['candidates']
        else:
            continue

    # save the predict result if get better result
    if is_dump_json:
        checkpoint_dir = osp.join(opt['checkpoint_path'], opt['dataset_split_by'] + '_' + opt['id'])
        json.dump(pred_sent, open(osp.join(checkpoint_dir, split+'_gt_res.json'), 'w'))

    # restore the model to train
    model.train()

    return acc, n, total_loss/iterations
Ejemplo n.º 23
0
        self.down = False
        self.space = False


TITLE = "Raiden Py"
WINDOW = None


WIDTH = 600
HEIGHT = 600
SCREEN_WIDTH = WIDTH
SCREEN_HEIGHT = HEIGHT
bullets = []
enemies = []

l = Loader()
print("load Start")
l.load()
print("load End")

enemyBullets = arcade.SpriteList()
playerBullets = arcade.SpriteList()
enemies = arcade.SpriteList()
explosions = arcade.SpriteList()
playerKills = 0


def getPlayerKills():
    return playerKills

Ejemplo n.º 24
0
 def __init__(self, net_name):
     self.loader = Loader(net_name)
     self.activate = {}
Ejemplo n.º 25
0
    # datasets, add augmentation to training set papa
    #training_dataset = NCaltech101(flags.training_dataset, augmentation=True)
    #validation_dataset = NCaltech101(flags.validation_dataset)

    training_dataset = RostrosDatasetEnorme(flags.training_dataset,
                                            split='train',
                                            augmentation=True)
    validation_dataset = RostrosDatasetEnorme(flags.validation_dataset,
                                              split='test',
                                              augmentation=False)
    print("Train: {}".format(len(training_dataset)))
    print("Test: {}".format(len(validation_dataset)))

    # construct loader, handles data streaming to gpu
    training_loader = Loader(training_dataset, flags, device=flags.device)
    validation_loader = Loader(validation_dataset, flags, device=flags.device)

    # model, and put to device
    model = Classifier(num_classes=2,
                       voxel_dimension=(18, 180, 240),
                       pretrained=False)

    if flags.checkpoint != "":
        ckpt = torch.load(flags.checkpoint)
        model.load_state_dict(ckpt["state_dict"])
        print("Loaded model {}".format(flags.checkpoint))

    # model parameters
    pytorch_total_params = sum(p.numel() for p in model.parameters())
    print("Total Parameters: {}".format(pytorch_total_params))
Ejemplo n.º 26
0
def fit(sentence,
        method,
        dataset_folder=None,
        json_data=None,
        json_ent=None,
        verbose=False):
    if dataset_folder:
        train_dir = dataset_folder + 'classifier/'

    # Read the data
    train_data = []
    train_labels = []
    classes = []
    test_data = [sentence]
    test_labels = ['NONE']

    if verbose:
        print "Begin train to classifying sentence..."

    if json_data:
        classes = json_data.keys()

        if verbose:
            print "Using json as training, so adding some new class, and classes become:\n",
    else:
        classes = ['int_greetings', 'int_ask']

    ## Per 17-October 2016,
    ##  dimaksud jika user menambahkan specific char @ pada trained data, maka secara otomatis
    ##  system akan menambahkan kata tersebut berulang sebanyak entities yg dimaksud
    regex = re.compile('\@\w+')

    for z in classes:
        if json_data:
            if z.lower() != 'none':  #Dont process none data
                f = json_data[z]["trained_data"]
            else:
                pass
        else:
            ld = Loader(train_dir)
            f = ld.loadLarge(z + '.txt', lazy_load=True)

        if z.lower() != 'none':
            label = z

            ttl = len(f)
            i = 0

            txtre = []
            for x in f:
                i += 1

                #### sub untuk autogenerate trained data addition = v0.1 ####
                regex_string = re.search(regex, x.lower())

                if regex_string:
                    xx = list(set(re.findall(regex, x.lower())))
                    ents = defaultdict(list)
                    for ii in range(len(xx)):
                        ent, type = intentReqParamLoader(xx[ii][1:], json_ent)
                        for k, v in ent.iteritems():
                            for it in v:
                                if it not in ents:
                                    ents[xx[ii][1:]].append(it)

                    for ii in ents.keys():
                        for iii in range(len(ents[ii])):
                            random.shuffle(ents[ii])

                            train_data.append(
                                re.sub(r'@' + ii + '', ents[ii][0], x))
                            train_labels.append(label)

                ##### End Sub ####
                else:
                    if verbose:
                        msg = "Processing train data {} of {}".format(i, ttl)
                        sys.stdout.write("\r {:<10}".format(msg))
                        sys.stdout.flush()

                    sen = x

                    if len(sen) >= 1:
                        train_data.append(sen.lower())
                        train_labels.append(label)
        if verbose:
            print "\n"

    ######################## Begin Training to Classifying Data ########################
    print "solvin intent using classfier:", method
    model = IntentClassifier(solver_algo=method)

    models = model.train(train_data, train_labels, max_df=1.0, minword=1)
    predicted_label = [models.predict(test_data)[0]]

    from operator import itemgetter
    predict_proba = sorted(zip(models.clf.classes_,
                               models.predict_proba(test_data)[0]),
                           key=itemgetter(1),
                           reverse=True)
    ####################################################################################

    if verbose:
        print "Hasil klasifikasi kalimat: %s , adalah: %s" % (sentence,
                                                              predicted_label)
        print "\n"

    return predicted_label, predict_proba
Ejemplo n.º 27
0
    csr_mat = csr_matrix((hist_arr[:, -1], (hist_arr[:, 0], hist_arr[:, 1])),
                         shape=_shape_,
                         dtype=int)
    plot_heatmap(ticks_vec[1],
                 ticks_vec[0],
                 csr_mat.toarray(),
                 xlabel=xlabel,
                 ylabel=ylabel,
                 outfn=outfn)
    # plot_heatmap_graphlab_pgrk(ticks_vec[1], ticks_vec[0], csr_mat.toarray(), xlabel=xlabel, ylabel=ylabel, outfn=outfn)
    # plot_heatmap_2discretes(ticks_vec[1], ticks_vec[0], csr_mat.toarray(), xlabel=xlabel, ylabel=ylabel, outfn=outfn)
    print('Histogram view done!')


if __name__ == '__main__':
    ins_gfeat = '../example/outd2hub_feature'
    outs = '../output/'
    outs_hist = 'histogram.out'
    ofn_node2hcel = 'node2hcel.out'
    ofn_hcel2avgfeat = 'hcel2avgfeat.out'
    ofn_heatmap = 'heatmap.png'
    x_lab, y_lab = ["Hubness", "Out-degree"]
    # ["Authoritativeness", "In-degree"], ["PageRank", "Degree"], ["Degree", "Triangles"]

    mode = 2
    loader = Loader()
    m, _, gfts = loader.load_features(ins_gfeat, float)
    histogram_construct(gfts[:m], 1, outs + outs_hist, outs + ofn_node2hcel,
                        outs + ofn_hcel2avgfeat, mode)
    histogram_view(outs + outs_hist, x_lab, y_lab, outs + ofn_heatmap)