def _parallel_explain_subset(procnum, chosen_data, classifier, explainer, save_dir, num_features=10, n_neighbor_samples=5000, top_labels=1): """ Runs the interpretation algorithm in parallel. Parameters ---------- procnum: int Identifier of the parallel. chosen_data: numpy array Subset of voxels to be interpreted. The features. classifier: sklearn trained classifier Trained classifier. explainer: LIME object LIME explainer for interpretation. num_features: int Number of features to be selected by LIME. n_neighbor_samples: int Number of neighbooring samples to be synthesized by LIME. Returns ------- list Features selected by LIME. """ explanation = explain_subset( chosen_data, classifier, explainer, num_features=num_features, n_neighbor_samples=n_neighbor_samples, top_labels=top_labels) dump_data(explanation, save_dir=save_dir, save_name='explainer_proc_id_' + str(procnum))
def addStudent(self): surname = str( self.ui.surname.text() ) others = str( self.ui.othernames.text() ) department = str( self.ui.studentDept.currentText() ) campusID = str( self.ui.campusID.text() ) avatar = str( self.ui.studentImage.text() ) # Form Validation if surname == '': self.ui.studentSaved.setText("Surname field is required") self.ui.studentSaved.show() elif others == '': self.ui.studentSaved.setText("Other names field is required") self.ui.studentSaved.show() elif campusID == '': self.ui.studentSaved.setText("Campus ID is required") self.ui.studentSaved.show() elif campusID in self.databag['students']: self.ui.studentSaved.setText("A student exists with that ID") self.ui.studentSaved.show() else: self.databag['students'][campusID] = [ surname + ' ' + others, department,avatar,''] function.talk('Student has been added') self.ui.studentSaved.setText("Student Saved") self.ui.studentSaved.show() function.dump_data(self.databag) self.ui.surname.clear() self.ui.othernames.clear() self.ui.campusID.clear() self.ui.studentImage.clear() QtCore.QTimer.singleShot(1000 * 3, self.ui.studentSaved.hide)
def _start(hp, model, train_data, test_data, dev_data): for k, v in hp.items(): logging.info('[nagisa] {}: {}'.format(k, v)) logs = ['Epoch', 'LR', 'Loss', 'Time', 'DevWS', 'DevPOS', 'TestWS', 'TestPOS'] logging.info('\t'.join(logs)) utils.dump_data(hp, hp['HYPERPARAMS']) decay_counter = 0 best_dev_score = 0. indice = [i for i in range(len(train_data.ws_data))] for e in range(1, hp['EPOCH']+1): t = time.time() losses = 0. random.shuffle(indice) for i in indice: # Word Segmentation X = train_data.ws_data[i][0] Y = train_data.ws_data[i][1] obs = model.encode_ws(X, train=True) gold_score = model.score_sentence(obs, Y) forward_score = model.forward(obs) loss = forward_score-gold_score # Update loss.backward() model.trainer.update() losses += loss.value() # POS-tagging X = train_data.pos_data[i][0] Y = train_data.pos_data[i][1] loss = model.get_POStagging_loss(X, Y) losses += loss.value() # Update loss.backward() model.trainer.update() model.model.save(hp['EPOCH_MODEL']) dev_ws_f, dev_pos_f = _evaluation(hp, fn_model=hp['EPOCH_MODEL'], data=dev_data) if dev_ws_f > best_dev_score: best_dev_score = dev_ws_f decay_counter = 0 model.model.save(hp['MODEL']) test_ws_f, test_pos_f = _evaluation(hp, fn_model=hp['MODEL'], data=test_data) else: decay_counter += 1 if decay_counter >= hp['DECAY']: model.trainer.learning_rate = model.trainer.learning_rate/2 decay_counter = 0 losses = losses/len(indice) logs = [e, model.trainer.learning_rate, losses, (time.time()-t)/60, dev_ws_f, dev_pos_f, test_ws_f, test_pos_f] log_text = '\t'.join([log[:5] for log in map(str, logs)]) logging.info(log_text)
def build_knns(knn_prefix, feats, knn_method, k, num_process=None, is_rebuild=False): knn_prefix = os.path.join(knn_prefix, '{}_k_{}'.format(knn_method, k)) mkdir_if_no_exists(knn_prefix) knn_path = knn_prefix + '.npz' if not os.path.isfile(knn_path) or is_rebuild: index_path = knn_prefix + '.index' with Timer('build index'): if knn_method == 'hnsw': index = knn_hnsw(feats, k, index_path) elif knn_method == 'faiss': index = knn_faiss(feats, k, index_path, omp_num_threads=num_process) elif knn_method == 'faiss_gpu': index = knn_faiss_gpu(feats, k, index_path, num_process=num_process) else: raise KeyError('Unsupported method({}). \ Only support hnsw and faiss currently'.format( knn_method)) knns = index.get_knns() with Timer('dump knns to {}'.format(knn_path)): dump_data(knn_path, knns, force=True) else: print('read knn from {}'.format(knn_path)) knns = load_data(knn_path) return knns
def addStudent(self): surname = str(self.ui.surname.text()) others = str(self.ui.othernames.text()) department = str(self.ui.studentDept.currentText()) campusID = str(self.ui.campusID.text()) avatar = str(self.ui.studentImage.text()) # Form Validation if surname == '': self.ui.studentSaved.setText("Surname field is required") self.ui.studentSaved.show() elif others == '': self.ui.studentSaved.setText("Other names field is required") self.ui.studentSaved.show() elif campusID == '': self.ui.studentSaved.setText("Campus ID is required") self.ui.studentSaved.show() elif campusID in self.databag['students']: self.ui.studentSaved.setText("A student exists with that ID") self.ui.studentSaved.show() else: self.databag['students'][campusID] = [ surname + ' ' + others, department, avatar, '' ] function.talk('Student has been added') self.ui.studentSaved.setText("Student Saved") self.ui.studentSaved.show() function.dump_data(self.databag) self.ui.surname.clear() self.ui.othernames.clear() self.ui.campusID.clear() self.ui.studentImage.clear() QtCore.QTimer.singleShot(1000 * 3, self.ui.studentSaved.hide)
def deleteDepartment(self): currentIndex = self.ui.deptEditSelect.currentIndex() currentDept = str(self.ui.deptEditSelect.currentText()) self.databag['departments'].pop(currentDept) function.dump_data(self.databag) self.ui.editUpdateNotif.show() function.talk('department deleted') QtCore.QTimer.singleShot(1000 * 10, self.ui.editUpdateNotif.hide) self.ui.deptEditSelect.removeItem(currentIndex)
def deleteDepartment(self): currentIndex = self.ui.deptEditSelect.currentIndex() currentDept = str( self.ui.deptEditSelect.currentText() ) self.databag['departments'].pop(currentDept) function.dump_data(self.databag) self.ui.editUpdateNotif.show() function.talk('department deleted') QtCore.QTimer.singleShot(1000 * 10, self.ui.editUpdateNotif.hide) self.ui.deptEditSelect.removeItem(currentIndex)
def dump_features(name, extractor): print('Extracting %s' % name) fnames = ['features_' + name, 'labels_' + name] train_features, train_labels = extractor(AUDIO_DIR, TRAIN) val_features, val_labels = extractor(AUDIO_DIR, VAL) test_features, test_labels = extractor(AUDIO_DIR, TEST) data = [(train_features, val_features, test_features), (train_labels, val_labels, test_labels)] u.dump_data(SAVE_DIR, data, fnames)
def editDept(self): currentIndex = self.ui.deptEditSelect.currentIndex() currentDept = str( self.ui.deptEditSelect.currentText() ) value = str( self.ui.DeptEdit.text() ) self.databag['departments'][value] = self.databag['departments'].pop(currentDept) function.dump_data(self.databag) function.talk('department name changed') self.ui.editUpdateNotif.show() QtCore.QTimer.singleShot(1000 * 10, self.ui.editUpdateNotif.hide) self.ui.deptEditSelect.setItemText(currentIndex, value)
def deleteCourse(self): currentIndex = self.ui.courseEditSelect.currentIndex() currentDept = str( self.ui.deptEditSelect.currentText() ) currentCourse = str( self.ui.courseEditSelect.currentText() ) self.databag['departments'][currentDept].remove(currentCourse) function.dump_data(self.databag) self.ui.editUpdateNotif.show() function.talk('course has been deleted') QtCore.QTimer.singleShot(1000 * 10, self.ui.editUpdateNotif.hide) self.ui.courseEditSelect.removeItem(currentIndex)
def deleteCourse(self): currentIndex = self.ui.courseEditSelect.currentIndex() currentDept = str(self.ui.deptEditSelect.currentText()) currentCourse = str(self.ui.courseEditSelect.currentText()) self.databag['departments'][currentDept].remove(currentCourse) function.dump_data(self.databag) self.ui.editUpdateNotif.show() function.talk('course has been deleted') QtCore.QTimer.singleShot(1000 * 10, self.ui.editUpdateNotif.hide) self.ui.courseEditSelect.removeItem(currentIndex)
def editDept(self): currentIndex = self.ui.deptEditSelect.currentIndex() currentDept = str(self.ui.deptEditSelect.currentText()) value = str(self.ui.DeptEdit.text()) self.databag['departments'][value] = self.databag['departments'].pop( currentDept) function.dump_data(self.databag) function.talk('department name changed') self.ui.editUpdateNotif.show() QtCore.QTimer.singleShot(1000 * 10, self.ui.editUpdateNotif.hide) self.ui.deptEditSelect.setItemText(currentIndex, value)
def courseUpdates(self): offered = '' for x in xrange(1,16,1): if getattr(self.ui, 'courseCheck{}'.format(x)).checkState() == 2: offered += str(x) + ':' offered = offered.rstrip(':') studentIndex = str(self.ui.studentID.text()) databag = function.dict_object('data.json') databag['students'][studentIndex][3] = offered function.dump_data(databag) function.talk('Your courses have been updated') self.ui.courseStatus.setText('Courses updated')
def courseUpdates(self): offered = '' for x in xrange(1, 16, 1): if getattr(self.ui, 'courseCheck{}'.format(x)).checkState() == 2: offered += str(x) + ':' offered = offered.rstrip(':') studentIndex = str(self.ui.studentID.text()) databag = function.dict_object('data.json') databag['students'][studentIndex][3] = offered function.dump_data(databag) function.talk('Your courses have been updated') self.ui.courseStatus.setText('Courses updated')
def addCourses(self): courseName = str(self.ui.addCourse.text()) if courseName == '': function.talk('Empty Input') else: selectedDepartment = str(self.ui.courseAddDeptList.currentText()) self.databag['departments'][selectedDepartment].append(courseName) function.dump_data(self.databag) self.ui.addCourse.clear() self.ui.courseAdded.show() function.talk("Course added") QtCore.QTimer.singleShot(1000 * 3, self.ui.courseAdded.hide)
def addCourses(self): courseName = str( self.ui.addCourse.text() ) if courseName == '': function.talk('Empty Input') else: selectedDepartment = str( self.ui.courseAddDeptList.currentText() ) self.databag['departments'][selectedDepartment].append(courseName) function.dump_data(self.databag) self.ui.addCourse.clear() self.ui.courseAdded.show() function.talk("Course added") QtCore.QTimer.singleShot(1000 * 3, self.ui.courseAdded.hide)
def build_knns( knn_prefix, feats, knn_method, k, num_process=16, # default None is_rebuild=False, feat_create_time=None): knn_prefix = os.path.join(knn_prefix, '{}_k_{}'.format(knn_method, k)) mkdir_if_no_exists(knn_prefix) knn_path = knn_prefix + '.npz' if os.path.isfile( knn_path) and not is_rebuild and feat_create_time is not None: knn_create_time = os.path.getmtime(knn_path) if knn_create_time <= feat_create_time: print('[warn] knn is created before feats ({} vs {})'.format( format_time(knn_create_time), format_time(feat_create_time))) is_rebuild = True if not os.path.isfile(knn_path) or is_rebuild: index_path = knn_prefix + '.index' with Timer('build index'): if knn_method == 'hnsw': index = knn_hnsw(feats, k, index_path) elif knn_method == 'faiss': index = knn_faiss(feats, k, index_path, omp_num_threads=num_process, rebuild_index=True) elif knn_method == 'faiss_gpu': # index = knn_faiss_my_gpu(feats, # k, # index_path, # omp_num_threads=num_process, # rebuild_index=True) index = knn_faiss_gpu(feats, k, index_path, num_process=num_process) else: raise KeyError( 'Only support hnsw and faiss currently ({}).'.format( knn_method)) knns = index.get_knns() with Timer('dump knns to {}'.format(knn_path)): dump_data(knn_path, knns, force=True) else: print('read knn from {}'.format(knn_path)) knns = load_data(knn_path) return knns
def changePass(self): """Changes administrator's password""" newpass = str( self.ui.passwordChange.text() ) if newpass == '': self.ui.passwordChanged.setText('Password Field cannot be left blank') self.ui.passwordChanged.show() function.talk('password empty!') else: self.databag['auth'] = str(function.computeHash(newpass)) function.dump_data(self.databag) self.ui.passwordChanged.setText('Password Updated') self.ui.passwordChanged.show() self.ui.passwordChange.clear() function.talk('password updated')
def changePass(self): """Changes administrator's password""" newpass = str(self.ui.passwordChange.text()) if newpass == '': self.ui.passwordChanged.setText( 'Password Field cannot be left blank') self.ui.passwordChanged.show() function.talk('password empty!') else: self.databag['auth'] = str(function.computeHash(newpass)) function.dump_data(self.databag) self.ui.passwordChanged.setText('Password Updated') self.ui.passwordChanged.show() self.ui.passwordChange.clear() function.talk('password updated')
def addDepartment(self): deptInput = str( self.ui.addDept.text() ) if not deptInput: self.ui.addDeptNotice.setText("Empty Input") function.talk('Empty Input') else: self.databag['departments'][deptInput] = [] function.dump_data(self.databag) self.ui.addDeptNotice.setText('Department Saved') self.ui.addDept.clear() self.ui.courseAddDeptList.addItem(deptInput) self.ui.deptEditSelect.addItem(deptInput) self.ui.studentDept.addItem(deptInput) self.ui.ed_department.addItem(deptInput) function.talk("Department saved")
def addDepartment(self): deptInput = str(self.ui.addDept.text()) if not deptInput: self.ui.addDeptNotice.setText("Empty Input") function.talk('Empty Input') else: self.databag['departments'][deptInput] = [] function.dump_data(self.databag) self.ui.addDeptNotice.setText('Department Saved') self.ui.addDept.clear() self.ui.courseAddDeptList.addItem(deptInput) self.ui.deptEditSelect.addItem(deptInput) self.ui.studentDept.addItem(deptInput) self.ui.ed_department.addItem(deptInput) function.talk("Department saved")
def main(min_record_threshold, min_support_threshold, derive_trends, save_trends): transactions, trans_with_dt = create_transactions(min_record_threshold) if not use_stale_patterns: # create new patterns patterns = extract_patterns(transactions, min_support_threshold) # save patterns (useful for debugging) dump_data(patterns) else: # load preexisting patterns (useful for debugging) patterns = load_data() trends = derive_trends(trans_with_dt, patterns) save_trends(trends)
def graph(name = None): graphs = list_graphs() if name is not None: if request.method == 'GET': if existed(name): data = load_data(name) elif request.method == 'POST': try: data = request.forms.get('data', None) data = [int(v.strip()) for v in data.split(',')] if data is not None: if not existed(name): dump_data(name, data) except: return redirect('/' + name) return template('templates/graph', **locals())
def updateStudent(self): studentID = str( self.ui.ed_studentID.text() ) name = str( self.ui.ed_studentName.text() ) studentDept = str( self.ui.ed_department.currentText() ) studentImage = str( self.ui.ed_image.text() ) if not studentID or not name or not studentDept: self.ui.studentRecorded.show() QtCore.QTimer.singleShot(1000 * 10, self.ui.studentRecorded.hide) else: enteredId = str( self.ui.matricEditEntry.text() ) currentCourses = self.databag['students'][enteredId][3] self.databag['students'][enteredId] = [name, studentDept, studentImage, currentCourses] self.databag['students'][studentID] = self.databag['students'].pop(enteredId) self.ui.matricEditEntry.clear() self.ui.dataEditContainer.hide() self.ui.studentRecordSuccess.show() QtCore.QTimer.singleShot(1000 * 10, self.ui.studentRecordSuccess.hide) function.dump_data(self.databag)
def label_classification(file_name): # Use a breakpoint in the code line below to debug your script. # print(f'Hi, {name}') # Press ⌘F8 to toggle the breakpoint. dataset = load_data(file_name) classifier, labels_list = init_model() modified = [] # for each question line for question in dataset: for sub_ques_et in question['q_et']: # 0. question labels ques = sub_ques_et["text"] # print(type(sub_ques_et)) #dictionary sub_ques_et["label"] = named_entity_classifier(classifier, labels_list, ques, multi_class=True) # 1. question entity labels sub_question = sub_ques_et['entity'] for entity in sub_question: # dict: 1'et' 2 first sentence # print(entity['et'],entity['first_sent']) # print(type(entity)) #dictionary entity["label"] = named_entity_classifier( classifier, labels_list, entity['first_sent']) # label_classifier(entity['first_sent']) # break # 2. candidate entity labels # 2.1 positive candidate entity sub_cand_pos_et = question['pos_et'] # dictionary sub_cand_pos_et["label"] = named_entity_classifier( classifier, labels_list, sub_cand_pos_et['first_sent'], multi_class=False) # print(sub_cand_pos_et['et'],sub_cand_pos_et['first_sent']) # 2.2 negative candidate entities sub_cand_neg_ets = question['neg_ets'] for sub_cand_neg_et in sub_cand_neg_ets: sub_cand_neg_et["label"] = named_entity_classifier( classifier, labels_list, sub_cand_neg_et['first_sent']) print(question) print("#######################################") modified.append(question) dump_data(file_name + "_labeled", modified)
def get_active_users(self, username, dir_to_save): # TODO: make w/o get_user_medias UPD: probably, unimplementable media = self.bot.get_user_medias(username, filtration=False) cache_path = os.path.join(dir_to_save, username, 'cache_active_users.tsv') set_counter = {} if os.path.exists(cache_path): with open(cache_path) as cache: keys = cache.readline().split() values = cache.readline().split() set_counter = {int(k): int(v) for (k, v) in zip(keys, values)} else: counter = Counter() for media_id in media: counter.update(self.get_active_users_by_media_id(media_id)) set_counter = { a[0]: a[1] for a in sorted(list(counter), key=lambda x: -x[1]) } dump_data(set_counter, path=cache_path) return set_counter
def save_proposals(clusters, knns, ofolder, force=False): for lb, nodes in enumerate(tqdm(clusters)): nodes = set(nodes) edges = [] visited = set() # get edges from knn for idx in nodes: ners, dists = knns[idx] for n, dist in zip(ners, dists): if n == idx or n not in nodes: continue idx1, idx2 = (idx, n) if idx < n else (n, idx) key = '{}-{}'.format(idx1, idx2) if key not in visited: visited.add(key) edges.append([idx1, idx2, dist]) # save to npz file opath_node = os.path.join(ofolder, '{}_node.npz'.format(lb)) opath_edge = os.path.join(ofolder, '{}_edge.npz'.format(lb)) nodes = list(nodes) dump_data(opath_node, data=nodes, force=force) dump_data(opath_edge, data=edges, force=force)
def updateStudent(self): studentID = str(self.ui.ed_studentID.text()) name = str(self.ui.ed_studentName.text()) studentDept = str(self.ui.ed_department.currentText()) studentImage = str(self.ui.ed_image.text()) if not studentID or not name or not studentDept: self.ui.studentRecorded.show() QtCore.QTimer.singleShot(1000 * 10, self.ui.studentRecorded.hide) else: enteredId = str(self.ui.matricEditEntry.text()) currentCourses = self.databag['students'][enteredId][3] self.databag['students'][enteredId] = [ name, studentDept, studentImage, currentCourses ] self.databag['students'][studentID] = self.databag['students'].pop( enteredId) self.ui.matricEditEntry.clear() self.ui.dataEditContainer.hide() self.ui.studentRecordSuccess.show() QtCore.QTimer.singleShot(1000 * 10, self.ui.studentRecordSuccess.hide) function.dump_data(self.databag)
def save_answer(user, question, answer): chunk_dict = json.loads(answer) utils.renumber_chunks(chunk_dict) answer = utils.dump_data(chunk_dict) # if not compare.compare(utils.sent_breakdown(question.question), utils.sent_breakdown(answer)): # return False if answer == question.question: return False try: final = Answer(question = question, answer = answer, user = user) final.save() except: return False return True
def main(argv): print '\nSYSTEM START' print '\nMODE: Training' print '\nRECURRENT HIDDEN UNIT: %s\n' % argv.unit print '\tTRAINING\t\tBatch: %d Epoch: %d Parameters Save: %s' % ( argv.batch, argv.epoch, argv.save) print '\tINITIAL EMBEDDING\t %s' % argv.init_emb print '\tNETWORK STRUCTURE\tEmb Dim: %d Hidden Dim: %d Layers: %d' % ( argv.emb, argv.hidden, argv.layer) print '\tOPTIMIZATION\t\tMethod: %s Learning Rate: %f %f L2 Reg: %f' % ( argv.opt, argv.lr1, argv.lr2, argv.reg) """ load corpus""" print '\n\tCorpus Preprocessing...' train_corpus = load_conll(argv.train_data, exclude=True) print '\tTrain Sentences: %d' % len(train_corpus) if argv.dev_data: dev_corpus = load_conll(argv.dev_data) print '\tDev Sentences: %d' % len(dev_corpus) if argv.test_data: test_corpus = load_conll(argv.test_data) print '\tTest Sentences: %d' % len(test_corpus) """ load initial embedding file """ print '\n\tInitial Embedding Loading...' init_emb, vocab_word = load_init_emb(init_emb=argv.init_emb) print '\tVocabulary Size: %d' % vocab_word.size() """ convert words into ids """ print '\n\tConverting Words into IDs...' tr_id_sents, tr_id_ctx, tr_marks, tr_prds, train_y, arg_dict = get_id_samples( train_corpus, vocab_word=vocab_word, sort=True) if argv.dev_data: dev_id_sents, dev_id_ctx, dev_marks, dev_prds, dev_y, dev_arg_dict =\ get_id_samples(dev_corpus, vocab_word=vocab_word, a_dict=arg_dict) if argv.test_data: te_id_sents, te_id_ctx, te_marks, te_prds, test_y, test_arg_dict =\ get_id_samples(test_corpus, vocab_word=vocab_word, a_dict=arg_dict) print '\tLabel size: %d' % arg_dict.size() dump_data(data=arg_dict, fn=argv.train_dir + 'arg_dict-%d' % (arg_dict.size())) """ convert formats for theano """ print '\n\tCreating Training/Dev/Test Samples...' train_sample_x, train_sample_y = convert_data(tr_id_sents, tr_prds, tr_id_ctx, tr_marks, train_y, init_emb) print '\tTrain Samples: %d' % len(train_sample_x) if argv.dev_data: dev_sample_x, dev_sample_y = convert_data_test(dev_id_sents, dev_prds, dev_id_ctx, dev_marks, dev_y, init_emb) print '\tDev Samples: %d' % len(dev_sample_x) if argv.test_data: test_sample_x, test_sample_y = convert_data_test( te_id_sents, te_prds, te_id_ctx, te_marks, test_y, init_emb) print '\tTest Samples: %d' % len(test_sample_x) """symbol definition""" x = T.ftensor3() d = T.imatrix() n_in = init_emb.shape[1] n_h = argv.hidden n_y = arg_dict.size() reg = argv.reg batch = argv.batch """ Model Setup """ print '\nTheano Code Compiling...' tagger = RNN(unit=argv.unit, x=x, d=d, n_layers=argv.layer, n_in=n_in, n_h=n_h, n_y=n_y, reg=reg) train_model = theano.function(inputs=[x, d], outputs=[tagger.nll, tagger.errors], updates=tagger.updates, mode='FAST_RUN') test_model = theano.function(inputs=[x, d], outputs=[tagger.y_pred, tagger.errors], mode='FAST_RUN') """ Training """ print '\nTRAIN START' best_dev_f = 0.0 best_test_f = 0.0 best_epoch = -1 flag = False for epoch in xrange(argv.epoch): _train_sample_x, _train_sample_y = shuffle(train_sample_x, train_sample_y) print '\nEpoch: %d' % (epoch + 1) print '\tIndex: ', start = time.time() losses = [] errors = [] sample_index = 0 for index in xrange(len(train_sample_x)): batch_x = _train_sample_x[index] batch_y = _train_sample_y[index] for b_index in xrange(len(batch_x) / batch + 1): sample_index += 1 if sample_index % 100 == 0: print '%d' % sample_index, sys.stdout.flush() sample_x = batch_x[b_index * batch:(b_index + 1) * batch] sample_y = batch_y[b_index * batch:(b_index + 1) * batch] if len(sample_x) == 0: continue loss, error = train_model(sample_x, sample_y) losses.append(loss) errors.extend(error) end = time.time() avg_loss = np.mean(losses) total, correct = count_correct(errors) print '\tTime: %f seconds' % (end - start) print '\tAverage Negative Log Likelihood: %f' % avg_loss print '\tTrain Accuracy: %f' % (correct / total) """ Check model performance """ if argv.dev_data: dev_f, predicts = test(test_model, dev_sample_x, dev_sample_y, dev_arg_dict, 'Dev') if best_dev_f < dev_f: best_dev_f = dev_f best_epoch = epoch """ Save Parameters """ if argv.save: fn = 'Layer-%d_Dim-%d_Batch-%d_Hidden-%d_Reg-%f_Epoch-%d' % ( argv.layer, argv.hidden, argv.batch, argv.hidden, argv.reg, epoch) dump_data(data=tagger, fn=argv.train_dir + fn) """ Output Results """ output_results( dev_corpus, dev_prds, arg_dict, predicts, argv.train_dir + 'Dev-result.layer%d.batch%d.hidden%d.opt-%s.reg-%f.epoch%d.txt' % (argv.layer, argv.batch, argv.hidden, argv.opt, argv.reg, epoch)) flag = True print '\t### Best Dev F Score: %f Epoch: %d ###' % ( best_dev_f, best_epoch + 1) if argv.test_data: test_f, predicts = test(test_model, test_sample_x, test_sample_y, test_arg_dict, 'Test') if flag: best_test_f = test_f flag = False output_results( test_corpus, te_prds, arg_dict, predicts, argv.train_dir + 'Test-result.layer%d.batch%d.hidden%d.opt-%s.reg-%f.epoch%d.txt' % (argv.layer, argv.batch, argv.hidden, argv.opt, argv.reg, epoch)) if argv.dev_data: print '\t### Best Test F Score: %f Epoch: %d ###' % ( best_test_f, best_epoch + 1)
try: participants = app.send( functions.channels.GetParticipants( channel=app.resolve_peer(target), filter=types.ChannelParticipantsSearch( ""), # Filter by empty string (search for all) offset=offset, limit=limit, hash=0)) except FloodWait as e: # Very large channels will trigger FloodWait. # When happens, wait X seconds before continuing time.sleep(e.x) continue if not participants.participants: break # No more participants left for i in participants.users: full_user = app.send( functions.users.GetFullUser(app.resolve_peer(i.id))) username = full_user.user.username identifier = f'@{username}' if username else full_user.user.id users.append( USER(identifier, full_user.user.first_name, full_user.about)) time.sleep(1) offset += limit utils.dump_data(users) app.stop()
def save(self): utils.dump_data(data=self.dump_params(), fn='layers%d.emb%d.vocab%d' % (self.n_layers, self.n_in, self.vocab_size)) utils.dump_data(data=self.emb.get_value(), fn='emb%d.vocab%d.layers%d' % (self.n_in, self.vocab_size, self.n_layers))
def generate_proposals(oprefix, feats, feat_dim=256, knn_method='hnsw', k=80, th_knn=0.6, th_step=0.05, min_size=3, max_size=300, is_rebuild=False, is_save_proposals=False): print('k={}, th_knn={}, th_step={}, max_size={}, is_rebuild={}'.\ format(k, th_knn, th_step, max_size, is_rebuild)) ## knn retrieval oprefix = os.path.join(oprefix, '{}_k_{}'.format(knn_method, k)) knn_fn = oprefix + '.npz' if not os.path.isfile(knn_fn) or is_rebuild: index_fn = oprefix + '.index' with Timer('build index'): if knn_method == 'hnsw': from proposals import knn_hnsw index = knn_hnsw(feats, k, index_fn) elif knn_method == 'faiss': from proposals import knn_faiss index = knn_faiss(feats, k, index_fn) else: raise KeyError('Unsupported method({}). \ Only support hnsw and faiss currently'.format( knn_method)) knns = index.get_knns() with Timer('dump knns to {}'.format(knn_fn)): dump_data(knn_fn, knns, force=True) else: print('read knn from {}'.format(knn_fn)) knns = load_data(knn_fn) # obtain cluster proposals ofolder = oprefix + '_th_{}_step_{}_minsz_{}_maxsz_{}_iter0'.\ format(th_knn, th_step, min_size, max_size) ofn_pred_labels = os.path.join(ofolder, 'pred_labels.txt') if not os.path.exists(ofolder): os.makedirs(ofolder) if not os.path.isfile(ofn_pred_labels) or is_rebuild: with Timer('build super vertices'): clusters = super_vertex(knns, k, th_knn, th_step, max_size) with Timer('dump clustering to {}'.format(ofn_pred_labels)): labels = clusters2labels(clusters) write_meta(ofn_pred_labels, labels) else: print('read clusters from {}'.format(ofn_pred_labels)) lb2idxs, _ = read_meta(ofn_pred_labels) clusters = labels2clusters(lb2idxs) clusters = filter_clusters(clusters, min_size) # output cluster proposals if is_save_proposals: ofolder = os.path.join(ofolder, 'proposals') print('saving cluster proposals to {}'.format(ofolder)) if not os.path.exists(ofolder): os.makedirs(ofolder) save_proposals(clusters, knns, ofolder=ofolder, force=True)
def dump_sample(name, extractor): print('Extracting %s' % name) fnames = ['features_' + name, 'labels_' + name] samp_features, samp_labels = extractor(AUDIO_DIR, SAMPLE) data = [(samp_features), (samp_labels)] u.dump_data(SAVE_DIR, data, fnames)
def create_vocabs_from_trainset(trainset, threshold=2, fn_dictionary=None, save_vocabs=True, fn_vocabs=None, oov=OOV, pad=PAD): # Creat a word-to-POStags dictionary. word2postags = {} with open(fn_dictionary, 'r') as texts: for text in texts: text = utils.utf8rstrip(text) word, postag = text.split('\t') word = utils.normalize(word) # lower setting: 1 word = word.lower() if word in word2postags: word2postags[word].append(postag) else: word2postags[word] = [postag] # Creat a word-to-index dictionary and a index-to-word dictionary. dictionary = {oov: 0, pad: 1} for word in word2postags.keys(): dictionary[word] = len(dictionary) id2word = {i: w for w, i in dictionary.items()} # Creat a unigram-to-index dictionary, a bigram-to-index dictionary. # Reconstruct a word-to-index dictionary. words = [] uni2id = {} bi2id = {} word2id = {} pos2id = {oov: 0} with open(trainset, 'r') as texts: for text in texts: text = utils.utf8rstrip(text) if text == 'EOS': sent = ''.join(words) unis = utils.get_unigram(sent) for uni in unis: uni2id = update_dict(uni, uni2id) bis = utils.get_bigram(sent) for bi in bis: bi2id = update_dict(bi, bi2id) words_at_i = utils.get_words_starting_at_i(sent, dictionary) words_at_i += utils.get_words_ending_at_i(sent, dictionary) for words in words_at_i: for wid in words: word = id2word[wid] word2id = update_dict(word, word2id) words = [] else: word, pos = text.split('\t') word = utils.normalize(word) word = word.replace(' ', ' ') # lower setting: 2 word = word.lower() words.append(word) word2id = update_dict(word, word2id) pos2id = update_dict(pos, pos2id) # Cut keys by frequency threshold. uni2id = cut_by_threshold(uni2id, oov, pad, threshold) bi2id = cut_by_threshold(bi2id, oov, pad, threshold) word2id = cut_by_threshold(word2id, oov, pad, threshold) # Creat a POStag-to-index dictionary. pos2id = {k: i for i, k in enumerate(pos2id.keys())} word2postags = { k: [pos2id[p] for p in list(set(v))] for k, v in word2postags.items() } vocabs = [uni2id, bi2id, word2id, pos2id, word2postags] if save_vocabs is True: utils.dump_data(vocabs, fn_vocabs) return vocabs
def addSchool(self): self.databag['school'] = str( self.ui.schoolName.text() ) function.dump_data(self.databag) self.ui.schoolSaved.show() QtCore.QTimer.singleShot(1000 * 5, self.ui.schoolSaved.hide) function.talk('School name saved')
import os import time import numpy as np url = 'https://investir.lesechos.fr/traders/forex/' if __name__ == '__main__': #Create dump file if not created (not tracked by git) if not os.path.exists('../../data/'): os.makedirs('../../data/') finename = '../../data/forex.csv' soup, now = utils.make_the_soup(url) header, data = utils.get_currency(soup, now) with open(finename, 'wb') as f: np.savetxt(f, data, header=header) try: while True: soup, now = utils.make_the_soup(url) header, data = utils.get_currency(soup, now) utils.dump_data(f, data) print('Forex currency data loaded at ', now.hour, ':', now.minute, ':', now.second) time.sleep(180) except KeyboardInterrupt: print('Manual break by user')
def addSchool(self): self.databag['school'] = str(self.ui.schoolName.text()) function.dump_data(self.databag) self.ui.schoolSaved.show() QtCore.QTimer.singleShot(1000 * 5, self.ui.schoolSaved.hide) function.talk('School name saved')