Ejemplo n.º 1
0
def _parallel_explain_subset(procnum, chosen_data, classifier,
                             explainer, save_dir, num_features=10,
                             n_neighbor_samples=5000, top_labels=1):
    """
    Runs the interpretation algorithm in parallel.

    Parameters
    ----------
    procnum: int
        Identifier of the parallel.
    chosen_data: numpy array
        Subset of voxels to be interpreted. The features.
    classifier: sklearn trained classifier
        Trained classifier.
    explainer: LIME object
        LIME explainer for interpretation.
    num_features: int
        Number of features to be selected by LIME.
    n_neighbor_samples: int
        Number of neighbooring samples to be synthesized by LIME.

    Returns
    -------
    list
        Features selected by LIME.
    """
    explanation = explain_subset(
        chosen_data, classifier, explainer, num_features=num_features,
        n_neighbor_samples=n_neighbor_samples, top_labels=top_labels)

    dump_data(explanation, save_dir=save_dir,
              save_name='explainer_proc_id_' + str(procnum))
Ejemplo n.º 2
0
	def addStudent(self):
		surname = str( self.ui.surname.text() )
		others = str( self.ui.othernames.text() )
		department = str( self.ui.studentDept.currentText() )
		campusID = str( self.ui.campusID.text() )
		avatar = str( self.ui.studentImage.text() )
		# Form Validation
		if surname == '':
			self.ui.studentSaved.setText("Surname field is required")
			self.ui.studentSaved.show()
		elif others == '':
			self.ui.studentSaved.setText("Other names field is required")
			self.ui.studentSaved.show()
		elif campusID == '':
			self.ui.studentSaved.setText("Campus ID is required")
			self.ui.studentSaved.show()
		elif campusID in self.databag['students']:
			self.ui.studentSaved.setText("A student exists with that ID")
			self.ui.studentSaved.show()
		else:
			self.databag['students'][campusID] = [ surname + ' ' + others, department,avatar,'']
			function.talk('Student has been added')
			self.ui.studentSaved.setText("Student Saved")
			self.ui.studentSaved.show()
			function.dump_data(self.databag)
			self.ui.surname.clear()
			self.ui.othernames.clear()
			self.ui.campusID.clear()
			self.ui.studentImage.clear()
			QtCore.QTimer.singleShot(1000 * 3, self.ui.studentSaved.hide)
Ejemplo n.º 3
0
def _start(hp, model, train_data, test_data, dev_data):
    for k, v in hp.items():
        logging.info('[nagisa] {}: {}'.format(k, v))

    logs = ['Epoch', 'LR', 'Loss', 'Time', 'DevWS',
            'DevPOS', 'TestWS', 'TestPOS']
    logging.info('\t'.join(logs))

    utils.dump_data(hp, hp['HYPERPARAMS'])

    decay_counter  = 0
    best_dev_score = 0.
    indice = [i for i in range(len(train_data.ws_data))]
    for e in range(1, hp['EPOCH']+1):
        t = time.time()
        losses = 0.
        random.shuffle(indice)
        for i in indice:
            # Word Segmentation
            X = train_data.ws_data[i][0]
            Y = train_data.ws_data[i][1]

            obs = model.encode_ws(X, train=True)
            gold_score = model.score_sentence(obs, Y)
            forward_score = model.forward(obs)
            loss = forward_score-gold_score
            # Update
            loss.backward()
            model.trainer.update()
            losses += loss.value()

            # POS-tagging
            X = train_data.pos_data[i][0]
            Y = train_data.pos_data[i][1]
            loss = model.get_POStagging_loss(X, Y)
            losses += loss.value()
            # Update
            loss.backward()
            model.trainer.update()

        model.model.save(hp['EPOCH_MODEL'])
        dev_ws_f, dev_pos_f = _evaluation(hp, fn_model=hp['EPOCH_MODEL'], data=dev_data)

        if dev_ws_f > best_dev_score:
            best_dev_score = dev_ws_f
            decay_counter = 0
            model.model.save(hp['MODEL'])
            test_ws_f, test_pos_f = _evaluation(hp, fn_model=hp['MODEL'], data=test_data)
        else:
            decay_counter += 1
            if decay_counter >= hp['DECAY']:
                model.trainer.learning_rate = model.trainer.learning_rate/2
                decay_counter = 0

        losses = losses/len(indice)
        logs = [e, model.trainer.learning_rate, losses, (time.time()-t)/60,
                dev_ws_f, dev_pos_f, test_ws_f, test_pos_f]

        log_text = '\t'.join([log[:5] for log in map(str, logs)])
        logging.info(log_text)
Ejemplo n.º 4
0
def build_knns(knn_prefix,
               feats,
               knn_method,
               k,
               num_process=None,
               is_rebuild=False):
    knn_prefix = os.path.join(knn_prefix, '{}_k_{}'.format(knn_method, k))
    mkdir_if_no_exists(knn_prefix)
    knn_path = knn_prefix + '.npz'
    if not os.path.isfile(knn_path) or is_rebuild:
        index_path = knn_prefix + '.index'
        with Timer('build index'):
            if knn_method == 'hnsw':
                index = knn_hnsw(feats, k, index_path)
            elif knn_method == 'faiss':
                index = knn_faiss(feats,
                                  k,
                                  index_path,
                                  omp_num_threads=num_process)
            elif knn_method == 'faiss_gpu':
                index = knn_faiss_gpu(feats,
                                      k,
                                      index_path,
                                      num_process=num_process)
            else:
                raise KeyError('Unsupported method({}). \
                        Only support hnsw and faiss currently'.format(
                    knn_method))
            knns = index.get_knns()
        with Timer('dump knns to {}'.format(knn_path)):
            dump_data(knn_path, knns, force=True)
    else:
        print('read knn from {}'.format(knn_path))
        knns = load_data(knn_path)
    return knns
Ejemplo n.º 5
0
 def addStudent(self):
     surname = str(self.ui.surname.text())
     others = str(self.ui.othernames.text())
     department = str(self.ui.studentDept.currentText())
     campusID = str(self.ui.campusID.text())
     avatar = str(self.ui.studentImage.text())
     # Form Validation
     if surname == '':
         self.ui.studentSaved.setText("Surname field is required")
         self.ui.studentSaved.show()
     elif others == '':
         self.ui.studentSaved.setText("Other names field is required")
         self.ui.studentSaved.show()
     elif campusID == '':
         self.ui.studentSaved.setText("Campus ID is required")
         self.ui.studentSaved.show()
     elif campusID in self.databag['students']:
         self.ui.studentSaved.setText("A student exists with that ID")
         self.ui.studentSaved.show()
     else:
         self.databag['students'][campusID] = [
             surname + ' ' + others, department, avatar, ''
         ]
         function.talk('Student has been added')
         self.ui.studentSaved.setText("Student Saved")
         self.ui.studentSaved.show()
         function.dump_data(self.databag)
         self.ui.surname.clear()
         self.ui.othernames.clear()
         self.ui.campusID.clear()
         self.ui.studentImage.clear()
         QtCore.QTimer.singleShot(1000 * 3, self.ui.studentSaved.hide)
Ejemplo n.º 6
0
 def deleteDepartment(self):
     currentIndex = self.ui.deptEditSelect.currentIndex()
     currentDept = str(self.ui.deptEditSelect.currentText())
     self.databag['departments'].pop(currentDept)
     function.dump_data(self.databag)
     self.ui.editUpdateNotif.show()
     function.talk('department deleted')
     QtCore.QTimer.singleShot(1000 * 10, self.ui.editUpdateNotif.hide)
     self.ui.deptEditSelect.removeItem(currentIndex)
Ejemplo n.º 7
0
	def deleteDepartment(self):
		currentIndex = self.ui.deptEditSelect.currentIndex()
		currentDept = str( self.ui.deptEditSelect.currentText() )
		self.databag['departments'].pop(currentDept)
		function.dump_data(self.databag)
		self.ui.editUpdateNotif.show()
		function.talk('department deleted')
		QtCore.QTimer.singleShot(1000 * 10, self.ui.editUpdateNotif.hide)
		self.ui.deptEditSelect.removeItem(currentIndex)
def dump_features(name, extractor):
    print('Extracting %s' % name)
    fnames = ['features_' + name, 'labels_' + name]
    train_features, train_labels = extractor(AUDIO_DIR, TRAIN)
    val_features, val_labels = extractor(AUDIO_DIR, VAL)
    test_features, test_labels = extractor(AUDIO_DIR, TEST)
    data = [(train_features, val_features, test_features),
            (train_labels, val_labels, test_labels)]
    u.dump_data(SAVE_DIR, data, fnames)
Ejemplo n.º 9
0
	def editDept(self):
		currentIndex = self.ui.deptEditSelect.currentIndex()
		currentDept = str( self.ui.deptEditSelect.currentText() )
		value = str( self.ui.DeptEdit.text() )
		self.databag['departments'][value] = self.databag['departments'].pop(currentDept)
		function.dump_data(self.databag)
		function.talk('department name changed')
		self.ui.editUpdateNotif.show()
		QtCore.QTimer.singleShot(1000 * 10, self.ui.editUpdateNotif.hide)
		self.ui.deptEditSelect.setItemText(currentIndex, value)
Ejemplo n.º 10
0
	def deleteCourse(self):
		currentIndex = self.ui.courseEditSelect.currentIndex()
		currentDept = str( self.ui.deptEditSelect.currentText() )
		currentCourse = str( self.ui.courseEditSelect.currentText() )
		self.databag['departments'][currentDept].remove(currentCourse)
		function.dump_data(self.databag)
		self.ui.editUpdateNotif.show()
		function.talk('course has been deleted')
		QtCore.QTimer.singleShot(1000 * 10, self.ui.editUpdateNotif.hide)
		self.ui.courseEditSelect.removeItem(currentIndex)
Ejemplo n.º 11
0
 def deleteCourse(self):
     currentIndex = self.ui.courseEditSelect.currentIndex()
     currentDept = str(self.ui.deptEditSelect.currentText())
     currentCourse = str(self.ui.courseEditSelect.currentText())
     self.databag['departments'][currentDept].remove(currentCourse)
     function.dump_data(self.databag)
     self.ui.editUpdateNotif.show()
     function.talk('course has been deleted')
     QtCore.QTimer.singleShot(1000 * 10, self.ui.editUpdateNotif.hide)
     self.ui.courseEditSelect.removeItem(currentIndex)
Ejemplo n.º 12
0
 def editDept(self):
     currentIndex = self.ui.deptEditSelect.currentIndex()
     currentDept = str(self.ui.deptEditSelect.currentText())
     value = str(self.ui.DeptEdit.text())
     self.databag['departments'][value] = self.databag['departments'].pop(
         currentDept)
     function.dump_data(self.databag)
     function.talk('department name changed')
     self.ui.editUpdateNotif.show()
     QtCore.QTimer.singleShot(1000 * 10, self.ui.editUpdateNotif.hide)
     self.ui.deptEditSelect.setItemText(currentIndex, value)
Ejemplo n.º 13
0
	def courseUpdates(self):
		offered = ''
		for x in xrange(1,16,1):
			if getattr(self.ui, 'courseCheck{}'.format(x)).checkState() == 2:
				offered += str(x) + ':'
		offered = offered.rstrip(':')
		studentIndex = str(self.ui.studentID.text())
		databag = function.dict_object('data.json')
		databag['students'][studentIndex][3] = offered
		function.dump_data(databag)
		function.talk('Your courses have been updated')
		self.ui.courseStatus.setText('Courses updated')
Ejemplo n.º 14
0
 def courseUpdates(self):
     offered = ''
     for x in xrange(1, 16, 1):
         if getattr(self.ui, 'courseCheck{}'.format(x)).checkState() == 2:
             offered += str(x) + ':'
     offered = offered.rstrip(':')
     studentIndex = str(self.ui.studentID.text())
     databag = function.dict_object('data.json')
     databag['students'][studentIndex][3] = offered
     function.dump_data(databag)
     function.talk('Your courses have been updated')
     self.ui.courseStatus.setText('Courses updated')
Ejemplo n.º 15
0
 def addCourses(self):
     courseName = str(self.ui.addCourse.text())
     if courseName == '':
         function.talk('Empty Input')
     else:
         selectedDepartment = str(self.ui.courseAddDeptList.currentText())
         self.databag['departments'][selectedDepartment].append(courseName)
         function.dump_data(self.databag)
         self.ui.addCourse.clear()
         self.ui.courseAdded.show()
         function.talk("Course added")
         QtCore.QTimer.singleShot(1000 * 3, self.ui.courseAdded.hide)
Ejemplo n.º 16
0
	def addCourses(self):
		courseName = str( self.ui.addCourse.text() )
		if courseName == '':
			function.talk('Empty Input')
		else:
			selectedDepartment = str( self.ui.courseAddDeptList.currentText() )
			self.databag['departments'][selectedDepartment].append(courseName)
			function.dump_data(self.databag)
			self.ui.addCourse.clear()
			self.ui.courseAdded.show()
			function.talk("Course added")
			QtCore.QTimer.singleShot(1000 * 3, self.ui.courseAdded.hide)
Ejemplo n.º 17
0
def build_knns(
        knn_prefix,
        feats,
        knn_method,
        k,
        num_process=16,  # default None
        is_rebuild=False,
        feat_create_time=None):
    knn_prefix = os.path.join(knn_prefix, '{}_k_{}'.format(knn_method, k))
    mkdir_if_no_exists(knn_prefix)
    knn_path = knn_prefix + '.npz'
    if os.path.isfile(
            knn_path) and not is_rebuild and feat_create_time is not None:
        knn_create_time = os.path.getmtime(knn_path)
        if knn_create_time <= feat_create_time:
            print('[warn] knn is created before feats ({} vs {})'.format(
                format_time(knn_create_time), format_time(feat_create_time)))
            is_rebuild = True
    if not os.path.isfile(knn_path) or is_rebuild:
        index_path = knn_prefix + '.index'
        with Timer('build index'):
            if knn_method == 'hnsw':
                index = knn_hnsw(feats, k, index_path)
            elif knn_method == 'faiss':
                index = knn_faiss(feats,
                                  k,
                                  index_path,
                                  omp_num_threads=num_process,
                                  rebuild_index=True)
            elif knn_method == 'faiss_gpu':
                # index = knn_faiss_my_gpu(feats,
                #                   k,
                #                   index_path,
                #                   omp_num_threads=num_process,
                #                   rebuild_index=True)
                index = knn_faiss_gpu(feats,
                                      k,
                                      index_path,
                                      num_process=num_process)
            else:
                raise KeyError(
                    'Only support hnsw and faiss currently ({}).'.format(
                        knn_method))
            knns = index.get_knns()
        with Timer('dump knns to {}'.format(knn_path)):
            dump_data(knn_path, knns, force=True)
    else:
        print('read knn from {}'.format(knn_path))
        knns = load_data(knn_path)
    return knns
Ejemplo n.º 18
0
	def changePass(self):
		"""Changes administrator's password"""
		newpass = str( self.ui.passwordChange.text() )
		if newpass == '':
			self.ui.passwordChanged.setText('Password Field cannot be left blank')
			self.ui.passwordChanged.show()
			function.talk('password empty!')
		else:
			self.databag['auth'] = str(function.computeHash(newpass))
			function.dump_data(self.databag)
			self.ui.passwordChanged.setText('Password Updated')
			self.ui.passwordChanged.show()
			self.ui.passwordChange.clear()
			function.talk('password updated')
Ejemplo n.º 19
0
 def changePass(self):
     """Changes administrator's password"""
     newpass = str(self.ui.passwordChange.text())
     if newpass == '':
         self.ui.passwordChanged.setText(
             'Password Field cannot be left blank')
         self.ui.passwordChanged.show()
         function.talk('password empty!')
     else:
         self.databag['auth'] = str(function.computeHash(newpass))
         function.dump_data(self.databag)
         self.ui.passwordChanged.setText('Password Updated')
         self.ui.passwordChanged.show()
         self.ui.passwordChange.clear()
         function.talk('password updated')
Ejemplo n.º 20
0
	def addDepartment(self):
		deptInput = str( self.ui.addDept.text() )
		if not deptInput:
			self.ui.addDeptNotice.setText("Empty Input")
			function.talk('Empty Input')
		else:
			self.databag['departments'][deptInput] = []
			function.dump_data(self.databag)
			self.ui.addDeptNotice.setText('Department Saved')
			self.ui.addDept.clear()
			self.ui.courseAddDeptList.addItem(deptInput)
			self.ui.deptEditSelect.addItem(deptInput)
			self.ui.studentDept.addItem(deptInput)
			self.ui.ed_department.addItem(deptInput)
			function.talk("Department saved")
Ejemplo n.º 21
0
 def addDepartment(self):
     deptInput = str(self.ui.addDept.text())
     if not deptInput:
         self.ui.addDeptNotice.setText("Empty Input")
         function.talk('Empty Input')
     else:
         self.databag['departments'][deptInput] = []
         function.dump_data(self.databag)
         self.ui.addDeptNotice.setText('Department Saved')
         self.ui.addDept.clear()
         self.ui.courseAddDeptList.addItem(deptInput)
         self.ui.deptEditSelect.addItem(deptInput)
         self.ui.studentDept.addItem(deptInput)
         self.ui.ed_department.addItem(deptInput)
         function.talk("Department saved")
Ejemplo n.º 22
0
def main(min_record_threshold, min_support_threshold, derive_trends,
         save_trends):
    transactions, trans_with_dt = create_transactions(min_record_threshold)

    if not use_stale_patterns:
        # create new patterns
        patterns = extract_patterns(transactions, min_support_threshold)
        # save patterns (useful for debugging)
        dump_data(patterns)
    else:
        # load preexisting patterns (useful for debugging)
        patterns = load_data()

    trends = derive_trends(trans_with_dt, patterns)

    save_trends(trends)
Ejemplo n.º 23
0
def graph(name = None):
    graphs = list_graphs()
    if name is not None:
        if request.method == 'GET':
            if existed(name):
                data = load_data(name)
        elif request.method == 'POST':
            try:
                data = request.forms.get('data', None)
                data = [int(v.strip()) for v in data.split(',')]
                if data is not None:
                    if not existed(name):
                        dump_data(name, data)
            except:
                return redirect('/' + name)
    return template('templates/graph', **locals())
Ejemplo n.º 24
0
	def updateStudent(self):
		studentID = str( self.ui.ed_studentID.text() )
		name = str( self.ui.ed_studentName.text() )
		studentDept = str( self.ui.ed_department.currentText() )
		studentImage = str( self.ui.ed_image.text() )
		if not studentID or not name or not studentDept:
			self.ui.studentRecorded.show()
			QtCore.QTimer.singleShot(1000 * 10, self.ui.studentRecorded.hide)
		else:
			enteredId = str( self.ui.matricEditEntry.text() )
			currentCourses = self.databag['students'][enteredId][3]
			self.databag['students'][enteredId] = [name, studentDept, studentImage, currentCourses]
			self.databag['students'][studentID] = self.databag['students'].pop(enteredId)
			self.ui.matricEditEntry.clear()
			self.ui.dataEditContainer.hide()
			self.ui.studentRecordSuccess.show()
			QtCore.QTimer.singleShot(1000 * 10, self.ui.studentRecordSuccess.hide)
			function.dump_data(self.databag)
Ejemplo n.º 25
0
def label_classification(file_name):
    # Use a breakpoint in the code line below to debug your script.
    # print(f'Hi, {name}')  # Press ⌘F8 to toggle the breakpoint.
    dataset = load_data(file_name)
    classifier, labels_list = init_model()
    modified = []
    # for each question line
    for question in dataset:
        for sub_ques_et in question['q_et']:
            # 0. question labels
            ques = sub_ques_et["text"]
            # print(type(sub_ques_et)) #dictionary
            sub_ques_et["label"] = named_entity_classifier(classifier,
                                                           labels_list,
                                                           ques,
                                                           multi_class=True)
            # 1. question entity labels
            sub_question = sub_ques_et['entity']
            for entity in sub_question:
                # dict: 1'et'  2 first  sentence
                # print(entity['et'],entity['first_sent'])
                # print(type(entity)) #dictionary
                entity["label"] = named_entity_classifier(
                    classifier, labels_list, entity['first_sent'])
                # label_classifier(entity['first_sent'])
                # break
        # 2. candidate entity labels
        # 2.1 positive candidate entity
        sub_cand_pos_et = question['pos_et']  # dictionary
        sub_cand_pos_et["label"] = named_entity_classifier(
            classifier,
            labels_list,
            sub_cand_pos_et['first_sent'],
            multi_class=False)
        # print(sub_cand_pos_et['et'],sub_cand_pos_et['first_sent'])
        # 2.2 negative candidate entities
        sub_cand_neg_ets = question['neg_ets']
        for sub_cand_neg_et in sub_cand_neg_ets:
            sub_cand_neg_et["label"] = named_entity_classifier(
                classifier, labels_list, sub_cand_neg_et['first_sent'])
        print(question)
        print("#######################################")
        modified.append(question)
    dump_data(file_name + "_labeled", modified)
 def get_active_users(self, username, dir_to_save):
     # TODO: make w/o get_user_medias UPD: probably, unimplementable
     media = self.bot.get_user_medias(username, filtration=False)
     cache_path = os.path.join(dir_to_save, username,
                               'cache_active_users.tsv')
     set_counter = {}
     if os.path.exists(cache_path):
         with open(cache_path) as cache:
             keys = cache.readline().split()
             values = cache.readline().split()
             set_counter = {int(k): int(v) for (k, v) in zip(keys, values)}
     else:
         counter = Counter()
         for media_id in media:
             counter.update(self.get_active_users_by_media_id(media_id))
         set_counter = {
             a[0]: a[1]
             for a in sorted(list(counter), key=lambda x: -x[1])
         }
         dump_data(set_counter, path=cache_path)
     return set_counter
def save_proposals(clusters, knns, ofolder, force=False):
    for lb, nodes in enumerate(tqdm(clusters)):
        nodes = set(nodes)
        edges = []
        visited = set()
        # get edges from knn
        for idx in nodes:
            ners, dists = knns[idx]
            for n, dist in zip(ners, dists):
                if n == idx or n not in nodes:
                    continue
                idx1, idx2 = (idx, n) if idx < n else (n, idx)
                key = '{}-{}'.format(idx1, idx2)
                if key not in visited:
                    visited.add(key)
                    edges.append([idx1, idx2, dist])
        # save to npz file
        opath_node = os.path.join(ofolder, '{}_node.npz'.format(lb))
        opath_edge = os.path.join(ofolder, '{}_edge.npz'.format(lb))
        nodes = list(nodes)
        dump_data(opath_node, data=nodes, force=force)
        dump_data(opath_edge, data=edges, force=force)
Ejemplo n.º 28
0
 def updateStudent(self):
     studentID = str(self.ui.ed_studentID.text())
     name = str(self.ui.ed_studentName.text())
     studentDept = str(self.ui.ed_department.currentText())
     studentImage = str(self.ui.ed_image.text())
     if not studentID or not name or not studentDept:
         self.ui.studentRecorded.show()
         QtCore.QTimer.singleShot(1000 * 10, self.ui.studentRecorded.hide)
     else:
         enteredId = str(self.ui.matricEditEntry.text())
         currentCourses = self.databag['students'][enteredId][3]
         self.databag['students'][enteredId] = [
             name, studentDept, studentImage, currentCourses
         ]
         self.databag['students'][studentID] = self.databag['students'].pop(
             enteredId)
         self.ui.matricEditEntry.clear()
         self.ui.dataEditContainer.hide()
         self.ui.studentRecordSuccess.show()
         QtCore.QTimer.singleShot(1000 * 10,
                                  self.ui.studentRecordSuccess.hide)
         function.dump_data(self.databag)
Ejemplo n.º 29
0
def save_answer(user, question, answer):
	chunk_dict = json.loads(answer)
	utils.renumber_chunks(chunk_dict)
	answer = utils.dump_data(chunk_dict)
#	if not compare.compare(utils.sent_breakdown(question.question), utils.sent_breakdown(answer)):
#		return False
	if answer == question.question:
		return False
	try:
		final = Answer(question = question,
				answer = answer,
				user = user)
		final.save()
	except:
		return False
	return True
Ejemplo n.º 30
0
def main(argv):
    print '\nSYSTEM START'
    print '\nMODE: Training'
    print '\nRECURRENT HIDDEN UNIT: %s\n' % argv.unit

    print '\tTRAINING\t\tBatch: %d  Epoch: %d  Parameters Save: %s' % (
        argv.batch, argv.epoch, argv.save)
    print '\tINITIAL EMBEDDING\t %s' % argv.init_emb
    print '\tNETWORK STRUCTURE\tEmb Dim: %d  Hidden Dim: %d  Layers: %d' % (
        argv.emb, argv.hidden, argv.layer)
    print '\tOPTIMIZATION\t\tMethod: %s  Learning Rate: %f %f  L2 Reg: %f' % (
        argv.opt, argv.lr1, argv.lr2, argv.reg)
    """ load corpus"""
    print '\n\tCorpus Preprocessing...'

    train_corpus = load_conll(argv.train_data, exclude=True)
    print '\tTrain Sentences: %d' % len(train_corpus)

    if argv.dev_data:
        dev_corpus = load_conll(argv.dev_data)
        print '\tDev   Sentences: %d' % len(dev_corpus)

    if argv.test_data:
        test_corpus = load_conll(argv.test_data)
        print '\tTest  Sentences: %d' % len(test_corpus)
    """ load initial embedding file """
    print '\n\tInitial Embedding Loading...'
    init_emb, vocab_word = load_init_emb(init_emb=argv.init_emb)
    print '\tVocabulary Size: %d' % vocab_word.size()
    """ convert words into ids """
    print '\n\tConverting Words into IDs...'

    tr_id_sents, tr_id_ctx, tr_marks, tr_prds, train_y, arg_dict = get_id_samples(
        train_corpus, vocab_word=vocab_word, sort=True)

    if argv.dev_data:
        dev_id_sents, dev_id_ctx, dev_marks, dev_prds, dev_y, dev_arg_dict =\
            get_id_samples(dev_corpus, vocab_word=vocab_word, a_dict=arg_dict)
    if argv.test_data:
        te_id_sents, te_id_ctx, te_marks, te_prds, test_y, test_arg_dict =\
            get_id_samples(test_corpus, vocab_word=vocab_word, a_dict=arg_dict)

    print '\tLabel size: %d' % arg_dict.size()
    dump_data(data=arg_dict,
              fn=argv.train_dir + 'arg_dict-%d' % (arg_dict.size()))
    """ convert formats for theano """
    print '\n\tCreating Training/Dev/Test Samples...'

    train_sample_x, train_sample_y = convert_data(tr_id_sents, tr_prds,
                                                  tr_id_ctx, tr_marks, train_y,
                                                  init_emb)
    print '\tTrain Samples: %d' % len(train_sample_x)

    if argv.dev_data:
        dev_sample_x, dev_sample_y = convert_data_test(dev_id_sents, dev_prds,
                                                       dev_id_ctx, dev_marks,
                                                       dev_y, init_emb)
        print '\tDev Samples: %d' % len(dev_sample_x)

    if argv.test_data:
        test_sample_x, test_sample_y = convert_data_test(
            te_id_sents, te_prds, te_id_ctx, te_marks, test_y, init_emb)
        print '\tTest Samples: %d' % len(test_sample_x)
    """symbol definition"""
    x = T.ftensor3()
    d = T.imatrix()

    n_in = init_emb.shape[1]
    n_h = argv.hidden
    n_y = arg_dict.size()
    reg = argv.reg
    batch = argv.batch
    """ Model Setup """
    print '\nTheano Code Compiling...'

    tagger = RNN(unit=argv.unit,
                 x=x,
                 d=d,
                 n_layers=argv.layer,
                 n_in=n_in,
                 n_h=n_h,
                 n_y=n_y,
                 reg=reg)

    train_model = theano.function(inputs=[x, d],
                                  outputs=[tagger.nll, tagger.errors],
                                  updates=tagger.updates,
                                  mode='FAST_RUN')

    test_model = theano.function(inputs=[x, d],
                                 outputs=[tagger.y_pred, tagger.errors],
                                 mode='FAST_RUN')
    """ Training """
    print '\nTRAIN START'

    best_dev_f = 0.0
    best_test_f = 0.0
    best_epoch = -1
    flag = False

    for epoch in xrange(argv.epoch):
        _train_sample_x, _train_sample_y = shuffle(train_sample_x,
                                                   train_sample_y)

        print '\nEpoch: %d' % (epoch + 1)
        print '\tIndex: ',
        start = time.time()

        losses = []
        errors = []

        sample_index = 0
        for index in xrange(len(train_sample_x)):
            batch_x = _train_sample_x[index]
            batch_y = _train_sample_y[index]

            for b_index in xrange(len(batch_x) / batch + 1):
                sample_index += 1
                if sample_index % 100 == 0:
                    print '%d' % sample_index,
                    sys.stdout.flush()

                sample_x = batch_x[b_index * batch:(b_index + 1) * batch]
                sample_y = batch_y[b_index * batch:(b_index + 1) * batch]

                if len(sample_x) == 0:
                    continue

                loss, error = train_model(sample_x, sample_y)

                losses.append(loss)
                errors.extend(error)

        end = time.time()
        avg_loss = np.mean(losses)
        total, correct = count_correct(errors)

        print '\tTime: %f seconds' % (end - start)
        print '\tAverage Negative Log Likelihood: %f' % avg_loss
        print '\tTrain Accuracy: %f' % (correct / total)
        """ Check model performance """
        if argv.dev_data:
            dev_f, predicts = test(test_model, dev_sample_x, dev_sample_y,
                                   dev_arg_dict, 'Dev')
            if best_dev_f < dev_f:
                best_dev_f = dev_f
                best_epoch = epoch
                """ Save Parameters """
                if argv.save:
                    fn = 'Layer-%d_Dim-%d_Batch-%d_Hidden-%d_Reg-%f_Epoch-%d' % (
                        argv.layer, argv.hidden, argv.batch, argv.hidden,
                        argv.reg, epoch)
                    dump_data(data=tagger, fn=argv.train_dir + fn)
                """ Output Results """
                output_results(
                    dev_corpus, dev_prds, arg_dict, predicts, argv.train_dir +
                    'Dev-result.layer%d.batch%d.hidden%d.opt-%s.reg-%f.epoch%d.txt'
                    % (argv.layer, argv.batch, argv.hidden, argv.opt, argv.reg,
                       epoch))
                flag = True
            print '\t### Best Dev F Score: %f  Epoch: %d ###' % (
                best_dev_f, best_epoch + 1)

        if argv.test_data:
            test_f, predicts = test(test_model, test_sample_x, test_sample_y,
                                    test_arg_dict, 'Test')
            if flag:
                best_test_f = test_f
                flag = False
                output_results(
                    test_corpus, te_prds, arg_dict, predicts, argv.train_dir +
                    'Test-result.layer%d.batch%d.hidden%d.opt-%s.reg-%f.epoch%d.txt'
                    % (argv.layer, argv.batch, argv.hidden, argv.opt, argv.reg,
                       epoch))
            if argv.dev_data:
                print '\t### Best Test F Score: %f  Epoch: %d ###' % (
                    best_test_f, best_epoch + 1)
Ejemplo n.º 31
0
    try:
        participants = app.send(
            functions.channels.GetParticipants(
                channel=app.resolve_peer(target),
                filter=types.ChannelParticipantsSearch(
                    ""),  # Filter by empty string (search for all)
                offset=offset,
                limit=limit,
                hash=0))
    except FloodWait as e:
        # Very large channels will trigger FloodWait.
        # When happens, wait X seconds before continuing
        time.sleep(e.x)
        continue

    if not participants.participants:
        break  # No more participants left

    for i in participants.users:
        full_user = app.send(
            functions.users.GetFullUser(app.resolve_peer(i.id)))
        username = full_user.user.username
        identifier = f'@{username}' if username else full_user.user.id
        users.append(
            USER(identifier, full_user.user.first_name, full_user.about))
        time.sleep(1)
    offset += limit

utils.dump_data(users)
app.stop()
Ejemplo n.º 32
0
 def save(self):
     utils.dump_data(data=self.dump_params(), fn='layers%d.emb%d.vocab%d' % (self.n_layers, self.n_in, self.vocab_size))
     utils.dump_data(data=self.emb.get_value(), fn='emb%d.vocab%d.layers%d' % (self.n_in, self.vocab_size, self.n_layers))
Ejemplo n.º 33
0
def generate_proposals(oprefix,
                       feats,
                       feat_dim=256,
                       knn_method='hnsw',
                       k=80,
                       th_knn=0.6,
                       th_step=0.05,
                       min_size=3,
                       max_size=300,
                       is_rebuild=False,
                       is_save_proposals=False):
    print('k={}, th_knn={}, th_step={}, max_size={}, is_rebuild={}'.\
            format(k, th_knn, th_step, max_size, is_rebuild))

    ## knn retrieval
    oprefix = os.path.join(oprefix, '{}_k_{}'.format(knn_method, k))
    knn_fn = oprefix + '.npz'
    if not os.path.isfile(knn_fn) or is_rebuild:
        index_fn = oprefix + '.index'
        with Timer('build index'):
            if knn_method == 'hnsw':
                from proposals import knn_hnsw
                index = knn_hnsw(feats, k, index_fn)
            elif knn_method == 'faiss':
                from proposals import knn_faiss
                index = knn_faiss(feats, k, index_fn)
            else:
                raise KeyError('Unsupported method({}). \
                        Only support hnsw and faiss currently'.format(
                    knn_method))
            knns = index.get_knns()
        with Timer('dump knns to {}'.format(knn_fn)):
            dump_data(knn_fn, knns, force=True)
    else:
        print('read knn from {}'.format(knn_fn))
        knns = load_data(knn_fn)

    # obtain cluster proposals
    ofolder = oprefix + '_th_{}_step_{}_minsz_{}_maxsz_{}_iter0'.\
                format(th_knn, th_step, min_size, max_size)
    ofn_pred_labels = os.path.join(ofolder, 'pred_labels.txt')
    if not os.path.exists(ofolder):
        os.makedirs(ofolder)
    if not os.path.isfile(ofn_pred_labels) or is_rebuild:
        with Timer('build super vertices'):
            clusters = super_vertex(knns, k, th_knn, th_step, max_size)
        with Timer('dump clustering to {}'.format(ofn_pred_labels)):
            labels = clusters2labels(clusters)
            write_meta(ofn_pred_labels, labels)
    else:
        print('read clusters from {}'.format(ofn_pred_labels))
        lb2idxs, _ = read_meta(ofn_pred_labels)
        clusters = labels2clusters(lb2idxs)
    clusters = filter_clusters(clusters, min_size)

    # output cluster proposals
    if is_save_proposals:
        ofolder = os.path.join(ofolder, 'proposals')
        print('saving cluster proposals to {}'.format(ofolder))
        if not os.path.exists(ofolder):
            os.makedirs(ofolder)
        save_proposals(clusters, knns, ofolder=ofolder, force=True)
def dump_sample(name, extractor):
    print('Extracting %s' % name)
    fnames = ['features_' + name, 'labels_' + name]
    samp_features, samp_labels = extractor(AUDIO_DIR, SAMPLE)
    data = [(samp_features), (samp_labels)]
    u.dump_data(SAVE_DIR, data, fnames)
Ejemplo n.º 35
0
def create_vocabs_from_trainset(trainset,
                                threshold=2,
                                fn_dictionary=None,
                                save_vocabs=True,
                                fn_vocabs=None,
                                oov=OOV,
                                pad=PAD):
    # Creat a word-to-POStags dictionary.
    word2postags = {}
    with open(fn_dictionary, 'r') as texts:
        for text in texts:
            text = utils.utf8rstrip(text)
            word, postag = text.split('\t')
            word = utils.normalize(word)
            # lower setting: 1
            word = word.lower()
            if word in word2postags:
                word2postags[word].append(postag)
            else:
                word2postags[word] = [postag]

    # Creat a word-to-index dictionary and a index-to-word dictionary.
    dictionary = {oov: 0, pad: 1}
    for word in word2postags.keys():
        dictionary[word] = len(dictionary)
    id2word = {i: w for w, i in dictionary.items()}

    # Creat a unigram-to-index dictionary, a bigram-to-index dictionary.
    # Reconstruct a word-to-index dictionary.
    words = []
    uni2id = {}
    bi2id = {}
    word2id = {}
    pos2id = {oov: 0}
    with open(trainset, 'r') as texts:
        for text in texts:
            text = utils.utf8rstrip(text)
            if text == 'EOS':
                sent = ''.join(words)
                unis = utils.get_unigram(sent)
                for uni in unis:
                    uni2id = update_dict(uni, uni2id)

                bis = utils.get_bigram(sent)
                for bi in bis:
                    bi2id = update_dict(bi, bi2id)

                words_at_i = utils.get_words_starting_at_i(sent, dictionary)
                words_at_i += utils.get_words_ending_at_i(sent, dictionary)
                for words in words_at_i:
                    for wid in words:
                        word = id2word[wid]
                        word2id = update_dict(word, word2id)
                words = []

            else:
                word, pos = text.split('\t')
                word = utils.normalize(word)
                word = word.replace(' ', ' ')
                # lower setting: 2
                word = word.lower()
                words.append(word)
                word2id = update_dict(word, word2id)
                pos2id = update_dict(pos, pos2id)

    # Cut keys by frequency threshold.
    uni2id = cut_by_threshold(uni2id, oov, pad, threshold)
    bi2id = cut_by_threshold(bi2id, oov, pad, threshold)
    word2id = cut_by_threshold(word2id, oov, pad, threshold)

    # Creat a POStag-to-index dictionary.
    pos2id = {k: i for i, k in enumerate(pos2id.keys())}
    word2postags = {
        k: [pos2id[p] for p in list(set(v))]
        for k, v in word2postags.items()
    }

    vocabs = [uni2id, bi2id, word2id, pos2id, word2postags]
    if save_vocabs is True:
        utils.dump_data(vocabs, fn_vocabs)

    return vocabs
Ejemplo n.º 36
0
	def addSchool(self):
		self.databag['school'] = str( self.ui.schoolName.text() )
		function.dump_data(self.databag)
		self.ui.schoolSaved.show()
		QtCore.QTimer.singleShot(1000 * 5, self.ui.schoolSaved.hide)
		function.talk('School name saved')
Ejemplo n.º 37
0
import os
import time
import numpy as np

url = 'https://investir.lesechos.fr/traders/forex/'

if __name__ == '__main__':

    #Create dump file if not created (not tracked by git)
    if not os.path.exists('../../data/'):
        os.makedirs('../../data/')

    finename = '../../data/forex.csv'

    soup, now = utils.make_the_soup(url)
    header, data = utils.get_currency(soup, now)

    with open(finename, 'wb') as f:
        np.savetxt(f, data, header=header)
        try:
            while True:
                soup, now = utils.make_the_soup(url)
                header, data = utils.get_currency(soup, now)
                utils.dump_data(f, data)
                print('Forex currency data loaded at ', now.hour, ':',
                      now.minute, ':', now.second)
                time.sleep(180)

        except KeyboardInterrupt:
            print('Manual break by user')
Ejemplo n.º 38
0
 def addSchool(self):
     self.databag['school'] = str(self.ui.schoolName.text())
     function.dump_data(self.databag)
     self.ui.schoolSaved.show()
     QtCore.QTimer.singleShot(1000 * 5, self.ui.schoolSaved.hide)
     function.talk('School name saved')