def get_samples_list_by_category_2(self, category_positive): positive_samples_list = [] unlabeled_samples_list = [] category_1 = Categories.get_category_1_id(category_positive) for sample_id in self.sm_matrix: (category_id, sample_terms, term_map) = self.sm_matrix[sample_id] #if category_id == 2054000: #logging.debug(Logger.debug("category_id:%d category_positive:%d " % (category_id, category_positive) )) category_1_id = Categories.get_category_1_id(category_id) if category_1_id != category_1: continue is_positive = False if category_id == category_positive: is_positive = True #logging.debug(Logger.debug("category_id:%d category_positive:%d is_positive:%d" % (category_id, category_positive, is_positive) )) if is_positive: positive_samples_list.append(sample_id) else: unlabeled_samples_list.append(sample_id) return positive_samples_list, unlabeled_samples_list
def __init__(self): self.categories = Categories() self.categories.load() self.alphabet = Alphabet() self.alphabet.load() self.responses = [] self.nextRound()
def __init__(self, mainwindow): super().__init__() self.ui = Ui_Dialog() self.ui.setupUi(self) self.mainwindow = mainwindow self.ui.expense_radioButton.setChecked(True) self.ui.back_pushButton.clicked.connect(self.close) self.ui.savecategory_pushButton.clicked.connect(self.save_category) self.ui.income_radioButton.clicked.connect(self.refresh_categories) self.ui.expense_radioButton.clicked.connect(self.refresh_categories) self.category_manager = Categories() self.refresh_categories() #################################### # categories tab part #################################### self.ui.t_expense_radioButton.setChecked(True) self.ui.t_expense_radioButton.clicked.connect( self.refresh_transaction_categories) self.ui.t_income_radioButton.clicked.connect( self.refresh_transaction_categories) self.ui.t_dateEdit.setDate( QDate(date.today().year, date.today().month, date.today().day)) self.ui.t_savebutton.clicked.connect(self.save_transaction) self.transaction_manager = Transactions() self.refresh_transaction_categories()
class Round(): def __init__(self): self.categories = Categories() self.categories.load() self.alphabet = Alphabet() self.alphabet.load() self.responses = [] self.nextRound() def allResponses(self): return [d['response'] for d in self.responses] def getResponse(self, ptn): log( 'getResponse for ' + ptn ) try: pr = [d for d in self.responses if d['tn'] == ptn] return pr[0] except Exception as e: return { 'tn': ptn, 'valid': False, 'response': 'UNK' } def nextRound(self): self.cat_index = randint( 0, len(self.categories.data)-1) log( self.cat_index) self.alpha_index = randint( 0, len(self.alphabet.data)-1) log( self.alpha_index ) self.responses = [] def describe(self): alpha = self.alphabet.data[self.alpha_index] return self.categories.data[self.cat_index]['category'] + " that " + alpha['position'].lower() + " " + alpha['letter']
def __init__(self): self.kategorie = Categories() # Tabela wydatków zeros_exp = [0 for i in range(len(self.kategorie.exp_categories))] dict_exp = { 'Planowane': zeros_exp, 'Rzeczywiste': zeros_exp, 'Różnica': zeros_exp, 'St. realizacji budżetu (%)': zeros_exp } self.df_exp = pd.DataFrame(dict_exp, index=self.kategorie.exp_categories) # Tabela przychodów zeros_inc = [0 for j in range(len(self.kategorie.inc_categories))] dict_inc = { 'Planowane': zeros_inc, 'Rzeczywiste': zeros_inc, 'Różnica': zeros_inc, 'St. realizacji budżetu (%)': zeros_inc } self.df_inc = pd.DataFrame(dict_inc, index=self.kategorie.inc_categories) # Dane szczegółowe wydatków zeros_details = [0 for k in range(31)] dict_details = {} for i in range(len(self.kategorie.exp_categories)): dict_details[self.kategorie.exp_categories[i]] = zeros_details self.df_details = pd.DataFrame(dict_details, index=[x for x in range(1, 32)])
def list_categories(): # display the existing categories or the categories with the products option_list_categories = int( input( "List the categories only or the categories with products?\n1. Categories only\n2. Categories and products\n3. Go back\n" )) if option_list_categories == 1: try: categories = Categories.load_categories() for index, cat in enumerate(categories, start=1): print(f"{index}. {cat.name}") input("Press enter key in order to continue\n") except JSONDecodeError: input( "Error on retrieving the categories. Press enter key in order to continue\n" ) elif option_list_categories == 2: try: categories = Categories.load_categories() products = Products.load_products() for index, cat in enumerate(categories, start=1): print(f"{index}. {cat.name}") for prod in products: if prod.get_category_name() == cat.name: print(f"\t{prod}") input("Press enter key in order to continue\n") except JSONDecodeError: input( "Error on retrieving the categories. Press enter key in order to continue\n" ) elif option_list_categories == 3: print("Going back...\n") else: error_handler() list_categories()
def __init__(self, parent=None): super().__init__(parent) self.youtube_class = YouTubeView self.youtube = YouTubeView(self) self.tv_class = TVView self.tv = TVView(self) self.onetv = OneTvView(self) self.onetv_class = OneTvView self.folder = FolderView(self) self.folder_class = FolderView self.views = ('youtube', 'tv', 'onetv', 'folder') #self.container = MainWidget(self) self.container = QScrollArea() #self.container.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOn) self.container.setWidgetResizable(True) self.container.setStyleSheet(""" QWidget { background-color: rgb(50,50,50); color: #fff; margin: 0; } """) #self.container.setWidget(MainWidget()) self.setCentralWidget(self.container) #self.container.setAttribute(Qt.WA_DontCreateNativeAncestors) #self.container.setAttribute(Qt.WA_NativeWindow) self.categories = Categories(self.container) self.addDockWidget(Qt.LeftDockWidgetArea, self.categories, Qt.Vertical) self.categories.setFocus() self.play_control = QDockWidget() # self.play_control.setLayout(QVBoxLayout()) # self.play_control.layout().setContentsMargins(0, 0, 0, 0) # self.play_control.layout().setSpacing(0) # self.play_control.setFixedHeight(50) # self.play_control.setAllowedAreas(Qt.BottomDockWidgetArea) # self.play_control.setFeatures(self.play_control.NoDockWidgetFeatures) self.addDockWidget(Qt.BottomDockWidgetArea, self.play_control) self.player = Player(str(int(self.container.winId())), self.play_control) #player = mpv.MPV(wid=str(int(self.container.winId())), # vo='vdpau', # You may not need this # log_handler=print) #player.play(sys.argv[1]) self.timer = QTimer() self.timer.timeout.connect(self._timer) self.timer.start(1000) self.overlay = Overlay(self.centralWidget()) self.overlay.resize(1920, 1080) self.overlay.hide()
def crawl_categories(): try: categories = Categories() categories = categories.go(request.args.get('url')) return jsonify(categories) except Exception as e: resp = jsonify({'errcode': 500, 'errmsg': '抓取分类列表异常: ' + str(e)}) resp.status_code = 500 return resp
def __init__(self, entry: dict, mainwindow): super().__init__() self.ui = Ui_Dialog() self.ui.setupUi(self) self.show() self.ui.back_pushButton.clicked.connect(self.close) self.ui.back_pushButton_2.clicked.connect(self.edit) self.entry = entry self.mainwindow = mainwindow self.categories = Categories().category_list[self.entry['type']] self.transactions = Transactions() self.load_entry()
def add_expense(user_id: int, raw_message: str): """ Add expense associated with category """ messages = _parse_input(raw_message) for message in messages: category = Categories().get_category(user_id, message.category_name) if not category: category = Categories().add_category(user_id, message.category_name) db.insert( "expenses", { "user_id": user_id, "category_id": category.id, "ammount": message.ammount, "created": message.created })
def __init__(self, transaction, mainwindow): super().__init__() self.ui = Ui_edit_transaction_dialog() self.ui.setupUi(self) self.mainwindow = mainwindow self.transaction = transaction self.mainwindow.setEnabled(False) self.transaction_manager = Transactions() self.categories = Categories().category_list[self.transaction['type']] self.laod_transaction() self.show() self.ui.back_pushButton.clicked.connect(self.close) self.ui.savebutton.clicked.connect(self.edit) print(transaction)
async def categories_list(message: types.Message): """Отправляет список категорий расходов""" usr_id = int(message.from_user.id) categories = Categories(user_id=usr_id).get_all_categories() answer_message = "Категории(группы) трат:\n\n* " +\ ("\n\n* ".join([c.name+' ('+", ".join(c.aliases)+')' for c in categories])) await message.answer(answer_message)
def update(self): self.info.clear() if not conn.isOpen(): if not conn.open(): raise DatabaseError query = QSqlQuery(conn) query.prepare('SELECT name, price, count, category FROM outgones WHERE\ (day = {} and month = {} and year = {})'.format( self.day, self.month, self.year )) query.exec_() if not query.isSelect(): raise DatabaseError query.first() while query.isValid(): name = query.value('name') price = query.value('price') count = query.value('count') category = query.value('category') if category not in self.categories: parent = QTreeWidgetItem(self.tree) parent.setText(0, Categories(category).name) self.categories[category] = parent parent = self.categories[category] child = QTreeWidgetItem(parent) child.setText( 0, '{}: {}x{} = {}'.format(name, price, count, price * count) ) self.info.addData(category, price * count) query.next()
def get_categories_1_weight_matrix(self): tsm = self.tsm cfm = CategoryFeatureMatrix() sfm = SampleFeatureMatrix() categories = self.get_categories() for category_name in categories.categories_1: category_id = categories.categories_1[category_name] positive_samples_list, unlabeled_samples_list = tsm.get_samples_list_by_category_1(category_id) print "\n%s(%d) Positive Samples: %d Unlabeled Samples: %d" % (category_name, category_id, len(positive_samples_list), len(unlabeled_samples_list)) terms_positive_degree = get_terms_positive_degree_by_category(tsm, positive_samples_list, unlabeled_samples_list) features = {} for term_id in terms_positive_degree: (pd_word, speciality, popularity) = terms_positive_degree[term_id] features[term_id] = pd_word cfm.set_features(category_id, features) for sample_id in positive_samples_list: (sample_category, sample_terms, term_map) = tsm.get_sample_row(sample_id) category_1_id = Categories.get_category_1_id(sample_category) sfm.set_sample_category(sample_id, category_1_id) for term_id in term_map: if term_id in terms_positive_degree: (pd_word, speciality, popularity) = terms_positive_degree[term_id] sfm.add_sample_feature(sample_id, term_id, pd_word) no_terms = False return cfm, sfm
def to_sklearn_data(self): indptr = [0] indices = [] data = [] categories = [] terms = {} category_map = {} for sample_id in self.sm_matrix: (category_id, sample_terms, term_map) = self.sm_matrix[sample_id] category_1_id = Categories.get_category_1_id(category_id) category_id_1 = category_1_id / 1000000 category_idx = category_map.setdefault(category_id_1, len(category_map)) categories.append(category_idx) #categories.append(category) for term_id in term_map: term_idx = terms.setdefault(term_id, len(terms)) indices.append(term_idx) term_used_in_sample = term_map[term_id] data.append(term_used_in_sample) indptr.append(len(indices)) rows = len(self.sm_matrix) cols = len(terms) print rows, cols X = csr_matrix((np.array(data), np.array(indices), np.array(indptr)), shape = (rows, cols)) y = categories return X, y, terms, category_map
def get_categories() -> str: """Функция вывода списка категорий с их алиасами (псевдонимами)""" categories = Categories().get_all_categories() answer_message = "Категории трат:\n\n- " + \ ("\n- ".join([c.name + ' (' + ", ".join(c.aliases) + ')\n' for c in categories])) return answer_message
def treeItems(): categories = Categories().data for key in categories: parent = QTreeWidgetItem(tree) parent.setText(0, key) for item in categories[key]: child = QTreeWidgetItem(parent) child.setText(0, item)
def __init__(self): super().__init__() self.ui = Ui_Dialog() self.ui.setupUi(self) self.ui.expense_radioButton.setChecked(True) self.categories_manager = Categories() self.load_categories() print(self.categories_manager) self.show() self.ui.add_pushButton.clicked.connect(self.add_button_click) self.ui.rename_pushButton.clicked.connect(self.rename_button_click) self.ui.delete_pushButton.clicked.connect(self.delete_button_click) self.ui.back_pushButton.clicked.connect(self.close) self.ui.expense_radioButton.toggled.connect( self.expense_radiobtn_active) self.ui.income_radioButton.toggled.connect(self.income_radiobtn_active)
def generate_password_list(individual, words_to_list, plugins): """ Kick off the password list generation """ individuals_passwords = Categories(individual, words_to_list, plugins).process_categories() dict_to_yaml = ProcessOutputYaml() dict_to_yaml.output_processor(individuals_passwords)
def create_category(update: Update, context: CallbackContext) -> int: """ Finishes the exchange and creates a category """ raw_message = update.message.text category = Categories().add_category(update.effective_user.id, raw_message) if category: update.message.reply_text("Success") return ConversationHandler.END update.message.reply_text("This category already exists") return ConversationHandler.END
def categories_list(update, context: CallbackContext): reply_markup = categories_markup() message = 'Виберіть категорію:' query_edit_message(update, message=message, reply_markup=reply_markup) user_data = context.user_data user_data['categories'] = Categories().all_categories context.user_data['categories'] = user_data['categories'] return 3
def delete_category(update: Update, context: CallbackContext) -> int: """ Finishes the exchange and deletes a category """ categories = Categories().get_all_categories() text = update.message.text.lower() for c in categories: if text == c.name: Categories().del_category(c) expenses.delete_category(c) update.message.reply_text( "Category and associated expenses were successfully deleted\n") return ConversationHandler.END update.message.reply_text( "Category you are trying to delete does not exist\n" "Specify a different name\n") return State.REPLYING_CATEGORY_NAME_DELETE
def new_product(product_name): processed_food = food = False conditionned_weight = sub_category = category = great_category = None print("pour le new product gret, category et sub: ", great_category, category, sub_category) great_category = Categories.choice_list(rank=1) if great_category == 'r': print("vous devez choisir une grande catégorie !") Product.new_product(product_name) elif great_category != "nourriture": category = Categories.choice_list(great_category=great_category, rank=2) if category == 'r': Product.new_product(product_name) else: category = Categories.choice_list(great_category=great_category, rank=2, food=True) if category == 'r': Product.new_product(product_name) else: sub_category = Categories.choice_list( great_category=great_category, category=category, rank=3) if sub_category == 'r': Product.new_product(product_name) else: conditionned_weight = Product.cond_weight() rep = Check.check_yn( "Est_ce de la nourriture transformée? ") if rep == "o": processed_food = True datas = product_name, great_category, category, sub_category, processed_food, conditionned_weight with Connection.get_cursor() as cur: sql = ( """INSERT INTO product(product_name, great_category, category, sub_category, processed_food, conditionned_weight) VALUES (%s, %s, %s, %s, %s, %s);""") cur.execute(sql, datas) with Connection.get_cursor() as cur: cur.execute("SELECT MAX(id) FROM product") product_id = cur.fetchone() return product_id[0], conditionned_weight, food
def _create_categories(): categories = Categories() categories.create_category(SAME_LEMMA) categories.create_category(DIFF_LEMMA) categories.create_category(NOT_IN_BIN) return categories
async def list_categories(message: types.Message): """ Sends a list of categories to the user """ categories_list = Categories().get_all_categories() categories_strs = [ text( bold(f"\"{c.category_name.capitalize()}\"\n") + f"Теги: {c.aliases}\n") for c in categories_list ] answer_text = "\n".join(categories_strs) await message.answer(text(bold("Список категорий\n\n") + answer_text), parse_mode=ParseMode.MARKDOWN)
def add_expense(raw_message: str) -> Expense: parsed_message = _parse_message(raw_message) category = Categories().get_category(parsed_message.category_text) db.insert( "expense", { "amount": parsed_message.amount, "created": _get_now_formatted(), "category_name": category.name, "raw_text": raw_message }) return Expense(id=None, amount=parsed_message.amount, category_name=category.name)
def simulation(self): kategorie = Categories() trans = ['inc', 'exp'] p = [0.2, 0.8] categories_exp = kategorie.show_exp_categories() categories_inc = kategorie.show_inc_categories() types = ['r', 'p'] days = [x for x in range(1, 32)] for i in range(self.n): tr = ''.join(random.choices(trans, p)) t = random.choice(types) d = random.choice(days) if tr == 'inc': cat = random.choice(categories_inc) if cat == 'Wynagrodzenie' or cat == 'Premia': a = random.randint(1500, 8000) self.budzet.add_income(cat, t, a, d) else: a = random.randint(10, 1000) self.budzet.add_income(cat, t, a, d) elif tr == 'exp': cat = random.choice(categories_exp) if cat == 'Mieszkanie': a = random.randint(500, 2000) self.budzet.add_expense(cat, t, a, d) else: a = random.randint(5, 500) self.budzet.add_expense(cat, t, a, d) print('\n') print(self.budzet.show_transactions() + '\n') print(self.budzet.real_budget() + '\n' + self.budzet.plan_budget()) print(self.budzet.show_df_exp()) print('\n') print(self.budzet.show_df_inc()) print('\n') print(self.budzet.show_df_details())
def add_expense(raw_message: str) -> Expense: """Добавляет новое сообщение. Принимает на вход текст сообщения, пришедшего в бот.""" parsed_message = _parse_message(raw_message) category = Categories().get_category( parsed_message.category_text) inserted_row_id = db.insert("expense", { "amount": parsed_message.amount, "created": _get_now_formatted(), "category_codename": category.codename, "raw_text": raw_message }) return Expense(amount=parsed_message.amount, category_name=category.name)
def add_expense(raw_message: str) -> Expense: """Adds a new message. Accepts a text message came in the boat.""" parsed_message = _parse_message(raw_message) category = Categories().get_category(parsed_message.category_text) inserted_row_id = db.insert( "expense", { "amount": parsed_message.amount, "created": _get_now_formatted(), "category_codename": category.codename, "raw_text": raw_message }) return Expense(id=None, amount=parsed_message.amount, category_name=category.name)
def update_table(self, item): table = Categories().tables stylesheet = "::section{Background-color:rgb(176,224,230);border-radius:2px;}" self.setColumnCount(len(table[item][0])) self.setRowCount(len(table[item]) - 1) for i in range(len(table[item])): for j in range(len(table[item][i])): if i == 0: self.setHorizontalHeaderItem( j, QTableWidgetItem(table[item][i][j])) continue self.setItem(i - 1, j, QTableWidgetItem(table[item][i][j])) self.horizontalHeader().setStyleSheet(stylesheet) self.resizeColumnsToContents() self.horizontalHeader().setMaximumHeight(200)
def remove_category(): option_remove_category = int( input( "Warning! Deleting a category will also delete all the products inside of it.\n1. Continue\n2. Go back\n" )) if option_remove_category == 1: category_to_remove = Category( input("Introduce the name of the category to be removed:\n")) try: categories = Categories.load_categories() if categories.count(category_to_remove) > 0: products = Products.load_products() for prod in products: if prod.get_category_name() == category_to_remove.name: Products.remove_product(prod) Categories.remove_category(category_to_remove) input( "Category -" + str(category_to_remove) + "- and all its products were removed successfully.\nPress enter key in order to continue\n" ) else: category_option = int( input( "This category does not exist in the list. Input 1 to try entering another category or any other number to return to the store menu:\n" )) if category_option == 1: remove_category() except JSONDecodeError: input( "Error on retrieving the categories. Press enter key in order to continue\n" ) elif option_remove_category == 2: print("Going back...\n") else: error_handler() remove_category()
def add_expense(raw_message: str) -> Expense: """Додає нове повідомлення. Приймає на вхід текст повідомлення, що прийшов в бот.""" parsed_message = _parse_message(raw_message) category = Categories().get_category(parsed_message.category_text) inserted_row_id = db.insert( "expense", { "amount": parsed_message.amount, "created": _get_now_formatted(), "category_codename": category.codename, "raw_text": raw_message }) return Expense(id=None, amount=parsed_message.amount, category_name=category.name)
def _generate_rows(self) -> [[str]]: rows = list() for item in self._result.results: authors = self._process_authors(item.authors) topics = self._process_set(item.topics) src_info = item.source.data rows.append( self._clean_list([ src_info.date, src_info.year, authors, item.title, item.category, Categories().instance.name_for_key(self._cat_id), self._ref_type, topics, self._journal_title(), src_info.volume, src_info.number, src_info.page, src_info.doi, item.pdf ])) return None if len(rows) < 1 else rows
def open(self, corpus_dir): self.root_dir = corpus_dir if not path.isdir(corpus_dir): os.mkdir(corpus_dir) self.meta_dir = self.root_dir + "/meta" self.samples_dir = self.root_dir + "/samples" if not path.isdir(self.samples_dir): os.mkdir(self.samples_dir) self.vocabulary_dir = self.root_dir + "/vocabulary" self.vocabulary = Vocabulary(self.vocabulary_dir) self.categories_dir = self.root_dir + "/categories" self.categories = Categories(self.categories_dir) self.categories.load_categories() self.categories.print_categories()
def multicategories_predict(samples_test, model_name, result_dir): if model_name is None or len(model_name) == 0: logging.warn(Logger.warn("model_name must not be NULL.")) return if result_dir is None: cfm_file = "%s.cfm" % (model_name) sfm_file = "%s.sfm" % (model_name) else: if not os.path.isdir(result_dir): try: os.mkdir(result_dir) except OSError: logging.error(Logger.error("mkdir %s failed." % (result_dir))) return cfm_file = "%s/%s.cfm" % (result_dir, model_name) sfm_file = "%s/%s.sfm" % (result_dir, model_name) logging.debug(Logger.error("Loading train sample feature matrix ...")) sfm_train = SampleFeatureMatrix() sfm_train.load(sfm_file) logging.debug(Logger.debug("Loading train category feature matrix ...")) cfm_train = CategoryFeatureMatrix() cfm_train.load(cfm_file) logging.debug(Logger.debug("Making sample feature matrix for test data ...")) category_id = 2000000 sfm_test = SampleFeatureMatrix(sfm_train.get_category_id_map(), sfm_train.get_feature_id_map()) features = cfm_train.get_features(category_id) for sample_id in samples_test.tsm.sample_matrix(): (sample_category, sample_terms, term_map) = samples_test.tsm.get_sample_row(sample_id) category_1_id = Categories.get_category_1_id(sample_category) sfm_test.set_sample_category(sample_id, category_1_id) for feature_id in features: if feature_id in term_map: feature_weight = features[feature_id] sfm_test.add_sample_feature(sample_id, feature_id, feature_weight) logging.debug(Logger.debug("train sample feature matrix - features:%d categories:%d" % (sfm_train.get_num_features(), sfm_train.get_num_categories()))) X_train, y_train = sfm_train.to_sklearn_data() logging.debug(Logger.debug("test sample feature matrix - features:%d categories:%d" % (sfm_test.get_num_features(), sfm_test.get_num_categories()))) X_test, y_test = sfm_test.to_sklearn_data() clf = Classifier() logging.debug(Logger.debug("Classifier training ...")) clf.train(X_train, y_train) logging.debug(Logger.debug("Classifier predicting ...")) categories = samples_test.get_categories() categories_1_names = [] categories_1_idx_map = {} categories_1_idlist = categories.get_categories_1_idlist() for category_id in categories_1_idlist: category_idx = sfm_test.get_category_idx(category_id) category_name = categories.get_category_name(category_id) categories_1_idx_map[category_idx] = (category_id, category_name) categories_1_idx_list = sorted_dict(categories_1_idx_map) for (category_idx, (category_id, category_name)) in categories_1_idx_list: categories_1_names.append("%s(%d)" % (category_name, category_id)) clf.predict(X_test, y_test, categories_1_names)
class Corpus(): # ---------------- __init__() ---------------- def __init__(self, corpus_dir): self.lock_meta = Lock() self.open(corpus_dir) # ---------------- __del__() ---------------- def __del__(self): self.close() # ---------------- open_db_meta() ---------------- def open_db_meta(self): logging.debug(Logger.debug("Corpus open_db_meta() %s" % (self.meta_dir) )) db_meta = leveldb.LevelDB(self.meta_dir) return db_meta # ---------------- close_db_meta() ---------------- def close_db_meta(self, db_meta): db_meta = None def lock(self): self.lock_meta.acquire() def unlock(self): self.lock_meta.release() # ---------------- open() ---------------- def open(self, corpus_dir): self.root_dir = corpus_dir if not path.isdir(corpus_dir): os.mkdir(corpus_dir) self.meta_dir = self.root_dir + "/meta" self.samples_dir = self.root_dir + "/samples" if not path.isdir(self.samples_dir): os.mkdir(self.samples_dir) self.vocabulary_dir = self.root_dir + "/vocabulary" self.vocabulary = Vocabulary(self.vocabulary_dir) self.categories_dir = self.root_dir + "/categories" self.categories = Categories(self.categories_dir) self.categories.load_categories() self.categories.print_categories() # ---------------- close() ---------------- def close(self): pass # ---------------- acquire_sample_id() ---------------- # 线程安全方式获取num_samples个sample_id(全Corpus唯一)。 def acquire_sample_id(self, num_samples): self.lock() sample_id = self.get_sample_maxid() sample_maxid = sample_id + num_samples self.set_sample_maxid(sample_maxid) self.unlock() return sample_id def get_sample_maxid(self): sample_maxid = 0 db_meta = self.open_db_meta() try: str_maxid = db_meta.Get("__sample_maxid__") sample_maxid = int(str_maxid) except KeyError: db_meta.Put("__sample_maxid__", "0") self.close_db_meta(db_meta) return sample_maxid def set_sample_maxid(self, sample_maxid): db_meta = self.open_db_meta() db_meta.Put("__sample_maxid__", str(sample_maxid)) self.close_db_meta(db_meta) # ---------------- export_svm_file() ---------------- def export_svm_file(self, samples_name, svm_file): samples = Samples(self, samples_name) logging.debug(Logger.debug("Export svm file...")) tm_tfidf = samples.load_tfidf_matrix() save_term_matrix_as_svm_file(tm_tfidf, svm_file) # ---------------- transform_sensitive_terms() ---------------- def transform_sensitive_terms(self, sensitive_words, vocabulary): sensitive_terms = {} if not sensitive_words is None: for word in sensitive_words: w = sensitive_words[word] term_id = vocabulary.get_term_id(word) sensitive_terms[term_id] = w return sensitive_terms # ---------------- query_by_id() ---------------- def query_by_id(self, samples_positive, samples_unlabeled, sample_id): tsm_positive = samples_positive.tsm tsm_unlabeled = samples_unlabeled.tsm sensitive_words = { ##u"立案":3.0, ##u"获刑":3.0, ##u"受贿":3.0, ##u"有期徒刑":3.0, ##u"宣判":3.0, ##u"审计":2.0, ##u"调查":2.0 } sensitive_terms = self.transform_sensitive_terms(sensitive_words, self.vocabulary) try: sample_content = samples_unlabeled.db_content.Get(str(sample_id)) #(_, category, date, title, key, url, content) = msgpack.loads(sample_content) (_, category, date, title, key, url, msgext) = decode_sample_meta(sample_content) (version, content, (cat1, cat2, cat3)) = msgext print "sample id: %d" % (sample_id) print "category: %d" % (category) print "key: %s" % (key) print "url: %s" % (url) print "date: %s" % (date) print "title: %s" % (title) print "---------------- content ----------------" #print "%s" % (content) sample_terms, term_map = self.vocabulary.seg_content(content) print "sample_terms: %d terms_count: %d" % (sample_terms, len(term_map)) #for term_id in term_map: terms_list = sorted_dict_by_values(term_map, reverse=True) for (term_id, term_used_in_sample) in terms_list: term_text = self.vocabulary.get_term_text(term_id) #term_used_in_sample = term_map[term_id] print "%s(%d): %d" % (term_text, term_id, term_used_in_sample) except KeyError: print "Sample %d not found in db_content." % (sample_id) db_sm = samples_unlabeled.tsm.open_db_sm() try: str_sample_info = db_sm.Get(str(sample_id)) (category, sample_terms, term_map) = msgpack.loads(str_sample_info) print "" print "---------------- keywords ----------------" print "" terms = {} for term_id in term_map: term_text = self.vocabulary.get_term_text(term_id) term_used = term_map[term_id] (pd_word, speciality, popularity) = calculate_term_positive_degree(term_id, tsm_positive, tsm_unlabeled, sensitive_terms) terms[term_id] = (pd_word, speciality, popularity, term_used, term_text) terms_list = sorted_dict_by_values(terms, reverse = True) for (term_id, (pd_word, speciality, popularity, term_used, term_text)) in terms_list: print "%s\t%d\t[%.6f,%.6f,%.6f]\t(id:%d)" % (term_text, term_used, pd_word, speciality, popularity, term_id) except KeyError: print "Sample %d not found in db_sm." % (sample_id) samples_unlabeled.tsm.close_db(db_sm)
def new_category(): resp = helpers.get_response(request) loginmanager.verify_token(resp) category = Categories().new(resp) category.save() return jsonify({'category' : category._to_json()})