Beispiel #1
0
    def get_samples_list_by_category_2(self, category_positive):
        positive_samples_list = []
        unlabeled_samples_list = []

        category_1 = Categories.get_category_1_id(category_positive)

        for sample_id in self.sm_matrix:
            (category_id, sample_terms, term_map) = self.sm_matrix[sample_id]

            #if category_id == 2054000:
                #logging.debug(Logger.debug("category_id:%d category_positive:%d " % (category_id, category_positive) ))

            category_1_id = Categories.get_category_1_id(category_id)
            if category_1_id != category_1:
                continue

            is_positive = False
            if category_id == category_positive:
                is_positive = True

            #logging.debug(Logger.debug("category_id:%d category_positive:%d is_positive:%d" % (category_id, category_positive, is_positive) ))

            if is_positive:
                positive_samples_list.append(sample_id)
            else:
                unlabeled_samples_list.append(sample_id)

        return positive_samples_list, unlabeled_samples_list
Beispiel #2
0
 def __init__(self):
     self.categories = Categories()
     self.categories.load()
     self.alphabet = Alphabet()
     self.alphabet.load()
     self.responses = []
     self.nextRound()
Beispiel #3
0
    def __init__(self, mainwindow):
        super().__init__()
        self.ui = Ui_Dialog()
        self.ui.setupUi(self)
        self.mainwindow = mainwindow
        self.ui.expense_radioButton.setChecked(True)
        self.ui.back_pushButton.clicked.connect(self.close)
        self.ui.savecategory_pushButton.clicked.connect(self.save_category)
        self.ui.income_radioButton.clicked.connect(self.refresh_categories)
        self.ui.expense_radioButton.clicked.connect(self.refresh_categories)
        self.category_manager = Categories()
        self.refresh_categories()

        ####################################
        # categories tab part
        ####################################
        self.ui.t_expense_radioButton.setChecked(True)
        self.ui.t_expense_radioButton.clicked.connect(
            self.refresh_transaction_categories)
        self.ui.t_income_radioButton.clicked.connect(
            self.refresh_transaction_categories)
        self.ui.t_dateEdit.setDate(
            QDate(date.today().year,
                  date.today().month,
                  date.today().day))
        self.ui.t_savebutton.clicked.connect(self.save_transaction)
        self.transaction_manager = Transactions()
        self.refresh_transaction_categories()
Beispiel #4
0
class Round():
    def __init__(self):
        self.categories = Categories()
        self.categories.load()
        self.alphabet = Alphabet()
        self.alphabet.load()
        self.responses = []
        self.nextRound()


    def allResponses(self):
        return [d['response'] for d in self.responses]

    def getResponse(self, ptn):
        log( 'getResponse for ' + ptn )
        try:
            pr = [d for d in self.responses if d['tn'] == ptn]
            return pr[0]
        except Exception as e:
            return { 'tn': ptn, 'valid': False, 'response': 'UNK' }

    def nextRound(self):
        self.cat_index = randint( 0, len(self.categories.data)-1)
        log( self.cat_index)
        self.alpha_index = randint( 0, len(self.alphabet.data)-1)
        log( self.alpha_index )
        self.responses = []

    def describe(self):
        alpha = self.alphabet.data[self.alpha_index]
        return  self.categories.data[self.cat_index]['category'] + " that " + alpha['position'].lower() + " " + alpha['letter']
Beispiel #5
0
    def __init__(self):
        self.kategorie = Categories()

        # Tabela wydatków
        zeros_exp = [0 for i in range(len(self.kategorie.exp_categories))]
        dict_exp = {
            'Planowane': zeros_exp,
            'Rzeczywiste': zeros_exp,
            'Różnica': zeros_exp,
            'St. realizacji budżetu (%)': zeros_exp
        }
        self.df_exp = pd.DataFrame(dict_exp,
                                   index=self.kategorie.exp_categories)

        # Tabela przychodów
        zeros_inc = [0 for j in range(len(self.kategorie.inc_categories))]
        dict_inc = {
            'Planowane': zeros_inc,
            'Rzeczywiste': zeros_inc,
            'Różnica': zeros_inc,
            'St. realizacji budżetu (%)': zeros_inc
        }
        self.df_inc = pd.DataFrame(dict_inc,
                                   index=self.kategorie.inc_categories)

        # Dane szczegółowe wydatków
        zeros_details = [0 for k in range(31)]
        dict_details = {}
        for i in range(len(self.kategorie.exp_categories)):
            dict_details[self.kategorie.exp_categories[i]] = zeros_details
        self.df_details = pd.DataFrame(dict_details,
                                       index=[x for x in range(1, 32)])
Beispiel #6
0
def list_categories():
    # display the existing categories or the categories with the products
    option_list_categories = int(
        input(
            "List the categories only or the categories with products?\n1. Categories only\n2. Categories and products\n3. Go back\n"
        ))
    if option_list_categories == 1:
        try:
            categories = Categories.load_categories()
            for index, cat in enumerate(categories, start=1):
                print(f"{index}. {cat.name}")
            input("Press enter key in order to continue\n")
        except JSONDecodeError:
            input(
                "Error on retrieving the categories. Press enter key in order to continue\n"
            )
    elif option_list_categories == 2:
        try:
            categories = Categories.load_categories()
            products = Products.load_products()
            for index, cat in enumerate(categories, start=1):
                print(f"{index}. {cat.name}")
                for prod in products:
                    if prod.get_category_name() == cat.name:
                        print(f"\t{prod}")
            input("Press enter key in order to continue\n")
        except JSONDecodeError:
            input(
                "Error on retrieving the categories. Press enter key in order to continue\n"
            )
    elif option_list_categories == 3:
        print("Going back...\n")
    else:
        error_handler()
        list_categories()
Beispiel #7
0
    def __init__(self, parent=None):
        super().__init__(parent)

        self.youtube_class = YouTubeView
        self.youtube = YouTubeView(self)
        self.tv_class = TVView
        self.tv = TVView(self)
        self.onetv = OneTvView(self)
        self.onetv_class = OneTvView
        self.folder = FolderView(self)
        self.folder_class = FolderView

        self.views = ('youtube', 'tv', 'onetv', 'folder')

        #self.container = MainWidget(self)
        self.container = QScrollArea()
        #self.container.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOn)
        self.container.setWidgetResizable(True)
        self.container.setStyleSheet("""
            QWidget {
                background-color: rgb(50,50,50);
                color: #fff;
                margin: 0;
            }
        """)
        #self.container.setWidget(MainWidget())

        self.setCentralWidget(self.container)
        #self.container.setAttribute(Qt.WA_DontCreateNativeAncestors)
        #self.container.setAttribute(Qt.WA_NativeWindow)

        self.categories = Categories(self.container)
        self.addDockWidget(Qt.LeftDockWidgetArea, self.categories, Qt.Vertical)
        self.categories.setFocus()

        self.play_control = QDockWidget()
#        self.play_control.setLayout(QVBoxLayout())
#        self.play_control.layout().setContentsMargins(0, 0, 0, 0)
#        self.play_control.layout().setSpacing(0)
#        self.play_control.setFixedHeight(50)
#        self.play_control.setAllowedAreas(Qt.BottomDockWidgetArea)
#        self.play_control.setFeatures(self.play_control.NoDockWidgetFeatures)
        self.addDockWidget(Qt.BottomDockWidgetArea, self.play_control)

        self.player = Player(str(int(self.container.winId())), self.play_control)

        #player = mpv.MPV(wid=str(int(self.container.winId())),
        #        vo='vdpau', # You may not need this
        #        log_handler=print)
        #player.play(sys.argv[1])

        self.timer = QTimer()
        self.timer.timeout.connect(self._timer)
        self.timer.start(1000)
        self.overlay = Overlay(self.centralWidget())
        self.overlay.resize(1920, 1080)
        self.overlay.hide()
Beispiel #8
0
def crawl_categories():
    try:
        categories = Categories()
        categories = categories.go(request.args.get('url'))
        return jsonify(categories)
    except Exception as e:
        resp = jsonify({'errcode': 500, 'errmsg': '抓取分类列表异常: ' + str(e)})
        resp.status_code = 500
        return resp
Beispiel #9
0
 def __init__(self, entry: dict, mainwindow):
     super().__init__()
     self.ui = Ui_Dialog()
     self.ui.setupUi(self)
     self.show()
     self.ui.back_pushButton.clicked.connect(self.close)
     self.ui.back_pushButton_2.clicked.connect(self.edit)
     self.entry = entry
     self.mainwindow = mainwindow
     self.categories = Categories().category_list[self.entry['type']]
     self.transactions = Transactions()
     self.load_entry()
Beispiel #10
0
def add_expense(user_id: int, raw_message: str):
    """ Add expense associated with category """
    messages = _parse_input(raw_message)
    for message in messages:
        category = Categories().get_category(user_id, message.category_name)
        if not category:
            category = Categories().add_category(user_id,
                                                 message.category_name)
        db.insert(
            "expenses", {
                "user_id": user_id,
                "category_id": category.id,
                "ammount": message.ammount,
                "created": message.created
            })
Beispiel #11
0
    def __init__(self, transaction, mainwindow):
        super().__init__()
        self.ui = Ui_edit_transaction_dialog()
        self.ui.setupUi(self)
        self.mainwindow = mainwindow
        self.transaction = transaction
        self.mainwindow.setEnabled(False)
        self.transaction_manager = Transactions()
        self.categories = Categories().category_list[self.transaction['type']]
        self.laod_transaction()
        self.show()

        self.ui.back_pushButton.clicked.connect(self.close)
        self.ui.savebutton.clicked.connect(self.edit)
        print(transaction)
Beispiel #12
0
async def categories_list(message: types.Message):
    """Отправляет список категорий расходов"""
    usr_id = int(message.from_user.id)
    categories = Categories(user_id=usr_id).get_all_categories()
    answer_message = "Категории(группы) трат:\n\n* " +\
            ("\n\n* ".join([c.name+' ('+", ".join(c.aliases)+')' for c in categories]))
    await message.answer(answer_message)
Beispiel #13
0
 def update(self):
     self.info.clear()
     if not conn.isOpen():
         if not conn.open():
             raise DatabaseError
     query = QSqlQuery(conn)
     query.prepare('SELECT name, price, count, category FROM outgones WHERE\
         (day = {} and month = {} and year = {})'.format(
             self.day, self.month, self.year
     ))
     query.exec_()
     if not query.isSelect():
         raise DatabaseError
     query.first()
     while query.isValid():
         name = query.value('name')
         price = query.value('price')
         count = query.value('count')
         category = query.value('category')
         if category not in self.categories:
             parent = QTreeWidgetItem(self.tree)
             parent.setText(0, Categories(category).name)
             self.categories[category] = parent
         parent = self.categories[category]
         child = QTreeWidgetItem(parent)
         child.setText(
             0,
             '{}:  {}x{} = {}'.format(name, price, count, price * count)
         )
         self.info.addData(category, price * count)
         query.next()
Beispiel #14
0
    def get_categories_1_weight_matrix(self):
        tsm = self.tsm
        cfm = CategoryFeatureMatrix()
        sfm = SampleFeatureMatrix()

        categories = self.get_categories()
        for category_name in categories.categories_1:
            category_id = categories.categories_1[category_name]
            positive_samples_list, unlabeled_samples_list = tsm.get_samples_list_by_category_1(category_id)

            print "\n%s(%d) Positive Samples: %d Unlabeled Samples: %d" % (category_name, category_id, len(positive_samples_list), len(unlabeled_samples_list))

            terms_positive_degree = get_terms_positive_degree_by_category(tsm, positive_samples_list, unlabeled_samples_list)
            features = {}
            for term_id in terms_positive_degree:
                (pd_word, speciality, popularity) = terms_positive_degree[term_id]
                features[term_id] = pd_word
            cfm.set_features(category_id, features)

            for sample_id in positive_samples_list:
                (sample_category, sample_terms, term_map) = tsm.get_sample_row(sample_id)
                category_1_id = Categories.get_category_1_id(sample_category)
                sfm.set_sample_category(sample_id, category_1_id)
                for term_id in term_map:
                    if term_id in terms_positive_degree:
                        (pd_word, speciality, popularity) = terms_positive_degree[term_id]
                        sfm.add_sample_feature(sample_id, term_id, pd_word)
                        no_terms = False

        return cfm, sfm
Beispiel #15
0
    def to_sklearn_data(self):
        indptr = [0]
        indices = []
        data = []
        categories = []
        terms = {}
        category_map = {}
        for sample_id in self.sm_matrix:
            (category_id, sample_terms, term_map) = self.sm_matrix[sample_id]

            category_1_id = Categories.get_category_1_id(category_id)
            category_id_1 = category_1_id / 1000000
            category_idx = category_map.setdefault(category_id_1, len(category_map))
            categories.append(category_idx)
            #categories.append(category)

            for term_id in term_map:
                term_idx = terms.setdefault(term_id, len(terms))
                indices.append(term_idx)
                term_used_in_sample = term_map[term_id]
                data.append(term_used_in_sample)
            indptr.append(len(indices))

        rows = len(self.sm_matrix)
        cols = len(terms)
        print rows, cols
        X = csr_matrix((np.array(data), np.array(indices), np.array(indptr)), shape = (rows, cols))
        y = categories

        return X, y, terms, category_map
Beispiel #16
0
def get_categories() -> str:
    """Функция вывода списка категорий с их алиасами (псевдонимами)"""

    categories = Categories().get_all_categories()
    answer_message = "Категории трат:\n\n- " + \
                     ("\n- ".join([c.name + ' (' + ", ".join(c.aliases) + ')\n' for c in categories]))
    return answer_message
Beispiel #17
0
 def treeItems():
     categories = Categories().data
     for key in categories:
         parent = QTreeWidgetItem(tree)
         parent.setText(0, key)
         for item in categories[key]:
             child = QTreeWidgetItem(parent)
             child.setText(0, item)
    def __init__(self):
        super().__init__()
        self.ui = Ui_Dialog()
        self.ui.setupUi(self)
        self.ui.expense_radioButton.setChecked(True)
        self.categories_manager = Categories()
        self.load_categories()
        print(self.categories_manager)
        self.show()

        self.ui.add_pushButton.clicked.connect(self.add_button_click)
        self.ui.rename_pushButton.clicked.connect(self.rename_button_click)
        self.ui.delete_pushButton.clicked.connect(self.delete_button_click)
        self.ui.back_pushButton.clicked.connect(self.close)
        self.ui.expense_radioButton.toggled.connect(
            self.expense_radiobtn_active)
        self.ui.income_radioButton.toggled.connect(self.income_radiobtn_active)
Beispiel #19
0
def generate_password_list(individual, words_to_list, plugins):
    """
    Kick off the password list generation
    """
    individuals_passwords = Categories(individual, words_to_list,
                                       plugins).process_categories()
    dict_to_yaml = ProcessOutputYaml()
    dict_to_yaml.output_processor(individuals_passwords)
def create_category(update: Update, context: CallbackContext) -> int:
    """ Finishes the exchange and creates a category  """
    raw_message = update.message.text
    category = Categories().add_category(update.effective_user.id, raw_message)
    if category:
        update.message.reply_text("Success")
        return ConversationHandler.END
    update.message.reply_text("This category already exists")
    return ConversationHandler.END
def categories_list(update, context: CallbackContext):
    reply_markup = categories_markup()
    message = 'Виберіть категорію:'
    query_edit_message(update, message=message, reply_markup=reply_markup)

    user_data = context.user_data
    user_data['categories'] = Categories().all_categories
    context.user_data['categories'] = user_data['categories']
    return 3
def delete_category(update: Update, context: CallbackContext) -> int:
    """ Finishes the exchange and deletes a category """
    categories = Categories().get_all_categories()
    text = update.message.text.lower()

    for c in categories:
        if text == c.name:
            Categories().del_category(c)
            expenses.delete_category(c)
            update.message.reply_text(
                "Category and associated expenses were successfully deleted\n")
            return ConversationHandler.END

    update.message.reply_text(
        "Category you are trying to delete does not exist\n"
        "Specify a different name\n")

    return State.REPLYING_CATEGORY_NAME_DELETE
 def new_product(product_name):
     processed_food = food = False
     conditionned_weight = sub_category = category = great_category = None
     print("pour le new product gret, category  et sub: ", great_category,
           category, sub_category)
     great_category = Categories.choice_list(rank=1)
     if great_category == 'r':
         print("vous devez choisir une grande catégorie !")
         Product.new_product(product_name)
     elif great_category != "nourriture":
         category = Categories.choice_list(great_category=great_category,
                                           rank=2)
         if category == 'r':
             Product.new_product(product_name)
     else:
         category = Categories.choice_list(great_category=great_category,
                                           rank=2,
                                           food=True)
         if category == 'r':
             Product.new_product(product_name)
         else:
             sub_category = Categories.choice_list(
                 great_category=great_category, category=category, rank=3)
             if sub_category == 'r':
                 Product.new_product(product_name)
             else:
                 conditionned_weight = Product.cond_weight()
                 rep = Check.check_yn(
                     "Est_ce de la nourriture transformée? ")
                 if rep == "o":
                     processed_food = True
     datas = product_name, great_category, category, sub_category, processed_food, conditionned_weight
     with Connection.get_cursor() as cur:
         sql = (
             """INSERT INTO product(product_name, great_category, category, sub_category, processed_food, conditionned_weight)
                VALUES (%s, %s, %s, %s, %s, %s);""")
         cur.execute(sql, datas)
     with Connection.get_cursor() as cur:
         cur.execute("SELECT MAX(id) FROM product")
         product_id = cur.fetchone()
     return product_id[0], conditionned_weight, food
Beispiel #24
0
def _create_categories():
    categories = Categories()
    categories.create_category(SAME_LEMMA)
    categories.create_category(DIFF_LEMMA)
    categories.create_category(NOT_IN_BIN)

    return categories
Beispiel #25
0
async def list_categories(message: types.Message):
    """
    Sends a list of categories to the user
    """
    categories_list = Categories().get_all_categories()
    categories_strs = [
        text(
            bold(f"\"{c.category_name.capitalize()}\"\n") +
            f"Теги: {c.aliases}\n") for c in categories_list
    ]
    answer_text = "\n".join(categories_strs)
    await message.answer(text(bold("Список категорий\n\n") + answer_text),
                         parse_mode=ParseMode.MARKDOWN)
def add_expense(raw_message: str) -> Expense:
    parsed_message = _parse_message(raw_message)
    category = Categories().get_category(parsed_message.category_text)
    db.insert(
        "expense", {
            "amount": parsed_message.amount,
            "created": _get_now_formatted(),
            "category_name": category.name,
            "raw_text": raw_message
        })
    return Expense(id=None,
                   amount=parsed_message.amount,
                   category_name=category.name)
Beispiel #27
0
    def simulation(self):
        kategorie = Categories()
        trans = ['inc', 'exp']
        p = [0.2, 0.8]
        categories_exp = kategorie.show_exp_categories()
        categories_inc = kategorie.show_inc_categories()
        types = ['r', 'p']
        days = [x for x in range(1, 32)]

        for i in range(self.n):
            tr = ''.join(random.choices(trans, p))
            t = random.choice(types)
            d = random.choice(days)
            if tr == 'inc':
                cat = random.choice(categories_inc)
                if cat == 'Wynagrodzenie' or cat == 'Premia':
                    a = random.randint(1500, 8000)
                    self.budzet.add_income(cat, t, a, d)
                else:
                    a = random.randint(10, 1000)
                    self.budzet.add_income(cat, t, a, d)
            elif tr == 'exp':
                cat = random.choice(categories_exp)
                if cat == 'Mieszkanie':
                    a = random.randint(500, 2000)
                    self.budzet.add_expense(cat, t, a, d)
                else:
                    a = random.randint(5, 500)
                    self.budzet.add_expense(cat, t, a, d)

        print('\n')
        print(self.budzet.show_transactions() + '\n')
        print(self.budzet.real_budget() + '\n' + self.budzet.plan_budget())
        print(self.budzet.show_df_exp())
        print('\n')
        print(self.budzet.show_df_inc())
        print('\n')
        print(self.budzet.show_df_details())
Beispiel #28
0
def add_expense(raw_message: str) -> Expense:
    """Добавляет новое сообщение.
    Принимает на вход текст сообщения, пришедшего в бот."""
    parsed_message = _parse_message(raw_message)
    category = Categories().get_category(
        parsed_message.category_text)
    inserted_row_id = db.insert("expense", {
        "amount": parsed_message.amount,
        "created": _get_now_formatted(),
        "category_codename": category.codename,
        "raw_text": raw_message
    })
    return Expense(amount=parsed_message.amount,
                   category_name=category.name)
Beispiel #29
0
def add_expense(raw_message: str) -> Expense:
    """Adds a new message. Accepts a text message came in the boat."""
    parsed_message = _parse_message(raw_message)
    category = Categories().get_category(parsed_message.category_text)
    inserted_row_id = db.insert(
        "expense", {
            "amount": parsed_message.amount,
            "created": _get_now_formatted(),
            "category_codename": category.codename,
            "raw_text": raw_message
        })
    return Expense(id=None,
                   amount=parsed_message.amount,
                   category_name=category.name)
Beispiel #30
0
 def update_table(self, item):
     table = Categories().tables
     stylesheet = "::section{Background-color:rgb(176,224,230);border-radius:2px;}"
     self.setColumnCount(len(table[item][0]))
     self.setRowCount(len(table[item]) - 1)
     for i in range(len(table[item])):
         for j in range(len(table[item][i])):
             if i == 0:
                 self.setHorizontalHeaderItem(
                     j, QTableWidgetItem(table[item][i][j]))
                 continue
             self.setItem(i - 1, j, QTableWidgetItem(table[item][i][j]))
     self.horizontalHeader().setStyleSheet(stylesheet)
     self.resizeColumnsToContents()
     self.horizontalHeader().setMaximumHeight(200)
Beispiel #31
0
def remove_category():
    option_remove_category = int(
        input(
            "Warning! Deleting a category will also delete all the products inside of it.\n1. Continue\n2. Go back\n"
        ))
    if option_remove_category == 1:
        category_to_remove = Category(
            input("Introduce the name of the category to be removed:\n"))
        try:
            categories = Categories.load_categories()
            if categories.count(category_to_remove) > 0:
                products = Products.load_products()
                for prod in products:
                    if prod.get_category_name() == category_to_remove.name:
                        Products.remove_product(prod)
                Categories.remove_category(category_to_remove)
                input(
                    "Category -" + str(category_to_remove) +
                    "- and all its products were removed successfully.\nPress enter key in order to continue\n"
                )
            else:
                category_option = int(
                    input(
                        "This category does not exist in the list. Input 1 to try entering another category or any other number to return to the store menu:\n"
                    ))
                if category_option == 1:
                    remove_category()
        except JSONDecodeError:
            input(
                "Error on retrieving the categories. Press enter key in order to continue\n"
            )
    elif option_remove_category == 2:
        print("Going back...\n")
    else:
        error_handler()
        remove_category()
Beispiel #32
0
def add_expense(raw_message: str) -> Expense:
    """Додає нове повідомлення.
     Приймає на вхід текст повідомлення, що прийшов в бот."""
    parsed_message = _parse_message(raw_message)
    category = Categories().get_category(parsed_message.category_text)
    inserted_row_id = db.insert(
        "expense", {
            "amount": parsed_message.amount,
            "created": _get_now_formatted(),
            "category_codename": category.codename,
            "raw_text": raw_message
        })
    return Expense(id=None,
                   amount=parsed_message.amount,
                   category_name=category.name)
Beispiel #33
0
 def _generate_rows(self) -> [[str]]:
     rows = list()
     for item in self._result.results:
         authors = self._process_authors(item.authors)
         topics = self._process_set(item.topics)
         src_info = item.source.data
         rows.append(
             self._clean_list([
                 src_info.date, src_info.year, authors, item.title,
                 item.category,
                 Categories().instance.name_for_key(self._cat_id),
                 self._ref_type, topics,
                 self._journal_title(), src_info.volume, src_info.number,
                 src_info.page, src_info.doi, item.pdf
             ]))
     return None if len(rows) < 1 else rows
Beispiel #34
0
    def open(self, corpus_dir):
        self.root_dir = corpus_dir
        if not path.isdir(corpus_dir):
            os.mkdir(corpus_dir)

        self.meta_dir = self.root_dir + "/meta"

        self.samples_dir = self.root_dir + "/samples"
        if not path.isdir(self.samples_dir):
            os.mkdir(self.samples_dir)

        self.vocabulary_dir = self.root_dir + "/vocabulary"
        self.vocabulary = Vocabulary(self.vocabulary_dir)

        self.categories_dir = self.root_dir + "/categories"
        self.categories = Categories(self.categories_dir)
        self.categories.load_categories()
        self.categories.print_categories()
Beispiel #35
0
def multicategories_predict(samples_test, model_name, result_dir):
    if model_name is None or len(model_name) == 0:
        logging.warn(Logger.warn("model_name must not be NULL."))
        return

    if result_dir is None:
        cfm_file = "%s.cfm" % (model_name)
        sfm_file = "%s.sfm" % (model_name)
    else:
        if not os.path.isdir(result_dir):
            try:
                os.mkdir(result_dir)
            except OSError:
                logging.error(Logger.error("mkdir %s failed." % (result_dir)))
                return
        cfm_file = "%s/%s.cfm" % (result_dir, model_name)
        sfm_file = "%s/%s.sfm" % (result_dir, model_name)

    logging.debug(Logger.error("Loading train sample feature matrix ..."))
    sfm_train = SampleFeatureMatrix()
    sfm_train.load(sfm_file)
    logging.debug(Logger.debug("Loading train category feature matrix ..."))
    cfm_train = CategoryFeatureMatrix()
    cfm_train.load(cfm_file)

    logging.debug(Logger.debug("Making sample feature matrix for test data ..."))
    category_id = 2000000
    sfm_test = SampleFeatureMatrix(sfm_train.get_category_id_map(), sfm_train.get_feature_id_map())

    features = cfm_train.get_features(category_id)

    for sample_id in samples_test.tsm.sample_matrix():
        (sample_category, sample_terms, term_map) = samples_test.tsm.get_sample_row(sample_id)

        category_1_id = Categories.get_category_1_id(sample_category)

        sfm_test.set_sample_category(sample_id, category_1_id)
        for feature_id in features:
            if feature_id in term_map:
                feature_weight = features[feature_id]
                sfm_test.add_sample_feature(sample_id, feature_id, feature_weight)

    logging.debug(Logger.debug("train sample feature matrix - features:%d categories:%d" % (sfm_train.get_num_features(), sfm_train.get_num_categories())))
    X_train, y_train = sfm_train.to_sklearn_data()

    logging.debug(Logger.debug("test sample feature matrix - features:%d categories:%d" % (sfm_test.get_num_features(), sfm_test.get_num_categories())))
    X_test, y_test = sfm_test.to_sklearn_data()

    clf = Classifier()

    logging.debug(Logger.debug("Classifier training ..."))
    clf.train(X_train, y_train)

    logging.debug(Logger.debug("Classifier predicting ..."))

    categories = samples_test.get_categories()

    categories_1_names = []

    categories_1_idx_map = {}
    categories_1_idlist = categories.get_categories_1_idlist()
    for category_id in categories_1_idlist:
        category_idx = sfm_test.get_category_idx(category_id)
        category_name = categories.get_category_name(category_id)
        categories_1_idx_map[category_idx] = (category_id, category_name)
    categories_1_idx_list = sorted_dict(categories_1_idx_map)
    for (category_idx, (category_id, category_name)) in categories_1_idx_list:
        categories_1_names.append("%s(%d)" % (category_name, category_id))

    clf.predict(X_test, y_test, categories_1_names)
Beispiel #36
0
class Corpus():

    # ---------------- __init__() ----------------
    def __init__(self, corpus_dir):
        self.lock_meta = Lock()
        self.open(corpus_dir)

    # ---------------- __del__() ----------------
    def __del__(self):
        self.close()


    # ---------------- open_db_meta() ----------------
    def open_db_meta(self):
        logging.debug(Logger.debug("Corpus open_db_meta() %s" % (self.meta_dir) ))
        db_meta = leveldb.LevelDB(self.meta_dir)
        return db_meta

    # ---------------- close_db_meta() ----------------
    def close_db_meta(self, db_meta):
        db_meta = None


    def lock(self):
        self.lock_meta.acquire()

    def unlock(self):
        self.lock_meta.release()

    # ---------------- open() ----------------
    def open(self, corpus_dir):
        self.root_dir = corpus_dir
        if not path.isdir(corpus_dir):
            os.mkdir(corpus_dir)

        self.meta_dir = self.root_dir + "/meta"

        self.samples_dir = self.root_dir + "/samples"
        if not path.isdir(self.samples_dir):
            os.mkdir(self.samples_dir)

        self.vocabulary_dir = self.root_dir + "/vocabulary"
        self.vocabulary = Vocabulary(self.vocabulary_dir)

        self.categories_dir = self.root_dir + "/categories"
        self.categories = Categories(self.categories_dir)
        self.categories.load_categories()
        self.categories.print_categories()

    # ---------------- close() ----------------
    def close(self):
        pass


    # ---------------- acquire_sample_id() ----------------
    # 线程安全方式获取num_samples个sample_id(全Corpus唯一)。
    def acquire_sample_id(self, num_samples):
        self.lock()
        sample_id = self.get_sample_maxid()
        sample_maxid = sample_id + num_samples
        self.set_sample_maxid(sample_maxid)
        self.unlock()

        return sample_id

    def get_sample_maxid(self):
        sample_maxid = 0
        db_meta = self.open_db_meta()
        try:
            str_maxid = db_meta.Get("__sample_maxid__")
            sample_maxid = int(str_maxid)
        except KeyError:
            db_meta.Put("__sample_maxid__", "0")
        self.close_db_meta(db_meta)

        return sample_maxid

    def set_sample_maxid(self, sample_maxid):
        db_meta = self.open_db_meta()
        db_meta.Put("__sample_maxid__", str(sample_maxid))
        self.close_db_meta(db_meta)

    # ---------------- export_svm_file() ----------------
    def export_svm_file(self, samples_name, svm_file):
        samples = Samples(self, samples_name)

        logging.debug(Logger.debug("Export svm file..."))
        tm_tfidf = samples.load_tfidf_matrix()

        save_term_matrix_as_svm_file(tm_tfidf, svm_file)


    # ---------------- transform_sensitive_terms() ----------------
    def transform_sensitive_terms(self, sensitive_words, vocabulary):
        sensitive_terms = {}
        if not sensitive_words is None:
            for word in sensitive_words:
                w = sensitive_words[word]
                term_id = vocabulary.get_term_id(word)
                sensitive_terms[term_id] = w
        return sensitive_terms

    # ---------------- query_by_id() ----------------
    def query_by_id(self, samples_positive, samples_unlabeled, sample_id):
        tsm_positive = samples_positive.tsm
        tsm_unlabeled = samples_unlabeled.tsm

        sensitive_words = {
                ##u"立案":3.0,
                ##u"获刑":3.0,
                ##u"受贿":3.0,
                ##u"有期徒刑":3.0,
                ##u"宣判":3.0,
                ##u"审计":2.0,
                ##u"调查":2.0
                }

        sensitive_terms = self.transform_sensitive_terms(sensitive_words, self.vocabulary)

        try:
            sample_content = samples_unlabeled.db_content.Get(str(sample_id))
            #(_, category, date, title, key, url, content) = msgpack.loads(sample_content)

            (_, category, date, title, key, url, msgext) = decode_sample_meta(sample_content)
            (version, content, (cat1, cat2, cat3)) = msgext

            print "sample id: %d" % (sample_id)
            print "category: %d" % (category)
            print "key: %s" % (key)
            print "url: %s" % (url)
            print "date: %s" % (date)
            print "title: %s" % (title)
            print "---------------- content ----------------"
            #print "%s" % (content)

            sample_terms, term_map = self.vocabulary.seg_content(content)
            print "sample_terms: %d terms_count: %d" % (sample_terms, len(term_map))
            #for term_id in term_map:
            terms_list = sorted_dict_by_values(term_map, reverse=True)
            for (term_id, term_used_in_sample) in terms_list:
                term_text = self.vocabulary.get_term_text(term_id)
                #term_used_in_sample = term_map[term_id]
                print "%s(%d): %d" % (term_text, term_id, term_used_in_sample)


        except KeyError:
            print "Sample %d not found in db_content." % (sample_id)

        db_sm = samples_unlabeled.tsm.open_db_sm()
        try:
            str_sample_info = db_sm.Get(str(sample_id))
            (category, sample_terms, term_map) = msgpack.loads(str_sample_info)
            print ""
            print "---------------- keywords ----------------"
            print ""
            terms = {}
            for term_id in term_map:
                term_text = self.vocabulary.get_term_text(term_id)
                term_used = term_map[term_id]
                (pd_word, speciality, popularity) = calculate_term_positive_degree(term_id, tsm_positive, tsm_unlabeled, sensitive_terms)
                terms[term_id] = (pd_word, speciality, popularity, term_used, term_text)

            terms_list = sorted_dict_by_values(terms, reverse = True)
            for (term_id, (pd_word, speciality, popularity, term_used, term_text)) in terms_list:
                print "%s\t%d\t[%.6f,%.6f,%.6f]\t(id:%d)" % (term_text, term_used, pd_word, speciality, popularity, term_id)

        except KeyError:
            print "Sample %d not found in db_sm." % (sample_id)

        samples_unlabeled.tsm.close_db(db_sm)
Beispiel #37
0
def new_category():
	resp = helpers.get_response(request)
	loginmanager.verify_token(resp)
	category = Categories().new(resp)
	category.save()
	return jsonify({'category' : category._to_json()})