def users():
    users = db.get_users()
    users = utils.sorted_dict(users, 'username')
    ctx = {
        'users': users,
    }
    return render_template('accounts/users.html', **ctx)
Example #2
0
def users():
    users = db.get_users()
    users = utils.sorted_dict(users, 'username')
    ctx = {
        'users': users,
    }
    return render_template('accounts/users.html', **ctx)
Example #3
0
 def get_categories_1_names(self):
     names_map = {}
     for category_name in self.categories_1:
         category_id = self.categories_1[category_name]
         names_map[category_id] = category_name
     names_list = sorted_dict(names_map)
     return [ cat_name for (cat_id, cat_name) in names_list]
Example #4
0
def compute_cos(X, users_ratings, N):
    res = {}

    for key, Y in users_ratings.items():
        res[key] = cos(X, Y)

    res = utils.sorted_dict(res)
    return res[:N]
Example #5
0
    def export_categories_to_xls(self, categories_useinfo, xls_file):
        categories = self

        if xls_file is None:
            return

        wb = xlwt.Workbook(encoding='utf-8')
        ws = wb.add_sheet("categories")

        ws.write(0, 0, 'ID')
        ws.write(0, 1, 'CAT1')
        ws.write(0, 2, 'CAT2')
        ws.write(0, 3, 'CAT3')
        ws.write(0, 4, 'SAMPLES')

        rowidx = 1
        categories_list = sorted_dict(categories_useinfo)
        for (category_id, category_used) in categories_list:
            if category_id % 1000 == 0:
                if category_id % 1000000 == 0:
                    category_3 = -1
                    category_2 = -1
                    category_1 = category_id
                else:
                    category_3 = -1
                    category_2 = category_id
                    category_1 = int(category_id / 1000000) * 1000000
            else:
                category_3 = category_id
                category_2 = int(category_id / 1000) * 1000
                category_1 = int(category_id / 1000000) * 1000000

            category_1_name = categories.get_category_name(category_1)
            category_2_name = categories.get_category_name(category_2)
            category_3_name = categories.get_category_name(category_3)

            logging.debug(Logger.debug("id:%d 1:%d 2:%d 3:%d" % ( category_id, category_1, category_2, category_3)))

            ws.write(rowidx, 0, category_id)
            ws.write(rowidx, 1, category_1_name)
            ws.write(rowidx, 2, category_2_name)
            ws.write(rowidx, 3, category_3_name)
            ws.write(rowidx, 4, category_used)

            rowidx += 1

        wb.save(xls_file)
        logging.debug(Logger.debug("Export categories to xls file %s" % (xls_file)))
Example #6
0
    def print_categories(self):
        logging.info(Logger.notice("--------------- categories -----------------"))
        categories_dict = {}
        for k in self.categories_1:
            v = self.categories_1[k]
            categories_dict[v] = k
        for k in self.categories_2:
            v = self.categories_2[k]
            categories_dict[v] = k
        for k in self.categories_3:
            v = self.categories_3[k]
            categories_dict[v] = k

        categories_list = sorted_dict(categories_dict)
        for (category_id, category_name) in categories_list:
            logging.info(Logger.notice("%s - %d" % (category_name, category_id)))
Example #7
0
def index():
    workflows = config.get_workflows()
    job = None
    result_key = None
    workflow_id = None
    workflows = utils.sorted_dict(workflows, 'name')
    if request.method == 'POST':
        form = request.form
        workflow_id = form.get('workflow_id')
        if workflow_id:
            workflow = [x for x in workflows
                        if x.get('name') == workflow_id][0]
            username = session.get('user', {}).get('username')
            db.log({
                'ip': request.remote_addr,
                'user': username,
                'command': 'Workflow: {0}'.format(workflow_id),
            })
            args = workflow.get('arguments')
            # substitute args
            task = workflow.get('command')
            if args:
                for a in args:
                    arg_name = a.get('name')
                    arg_val = form.get(arg_name, None)
                    task = task.replace('<{0}>'.format(arg_name),
                                        form.get(arg_name))
            # add proxy_user to get launchpad user
            task = task.replace('<proxy_user>', username)
            # generate result_key
            result_key = str(int(time.time()))
            # run command
            job = queue_task(ops.run_fabric_task, task, result_key,
                             workflow.get('notify'))
    ctx = {
        'workflows': workflows,
        'job': job,
        'result_key': result_key,
    }
    return render_template('admin/index.html', **ctx)
Example #8
0
def index():
    workflows = config.get_workflows()
    job = None
    result_key = None
    workflow_id = None
    workflows = utils.sorted_dict(workflows, 'name')
    if request.method == 'POST':
        form = request.form
        workflow_id = form.get('workflow_id')
        if workflow_id:
            workflow = [x for x in workflows if x.get('name') == workflow_id][0]
            username = session.get('user', {}).get('username')
            db.log({
                'ip': request.remote_addr,
                'user': username,
                'command': 'Workflow: {0}'.format(workflow_id),
            })
            args = workflow.get('arguments')
            # substitute args
            task = workflow.get('command')
            if args:
                for a in args:
                    arg_name = a.get('name')
                    arg_val = form.get(arg_name, None)
                    task = task.replace('<{0}>'.format(arg_name),
                        form.get(arg_name))
            # add proxy_user to get launchpad user
            task = task.replace('<proxy_user>', username)
            # generate result_key
            result_key = str(int(time.time()))
            # run command
            job = queue_task(ops.run_fabric_task, task, result_key,
                workflow.get('notify'))
    ctx = {
        'workflows': workflows,
        'job': job,
        'result_key': result_key,
    }
    return render_template('admin/index.html', **ctx)
Example #9
0
def main() -> None:
    """
    Main method of Seddit. Processes arguments, performs search, and returns the result
    """

    # ========================================================= Read input

    # Load config file
    config = load_config(DEFAULT_CONFIG_FILE)

    # Read command line arguments
    args = load_params()

    # Load config file from CLI argument
    if args.config:
        config.read(args.config)
    """ Convert arguments to local variables """

    # The name of the subreddit being scraped
    sub_name = args.subreddit

    # Whether to force each feed to update regardless of cache validity
    force = True if args.force else config["DEFAULT"].getboolean(
        "force", fallback=False)
    # Whether to display a graph of the most popular terms
    show_graph = True if args.graph else config["DEFAULT"].getboolean(
        "show_graph", fallback=False)
    # The post limit for refreshing feeds
    try:
        feed_limit = args.feed_limit if args.feed_limit else config[
            "DEFAULT"].getint("feed_limit")
        if feed_limit < 0:  # Change non-positive feed limit to "None"
            feed_limit = None
    except TypeError:
        feed_limit = None  # Convert any non-number limit to None
    # Method for scoring
    scoring = args.scoring.lower()
    if scoring == "count":
        method = PostCache.COUNT
    elif scoring == "score":
        method = PostCache.SCORE
    else:
        raise ValueError(f"Unrecognized scoring method '{scoring}'")
    # Read frequency threshold
    threshold = args.threshold if args.threshold else config.getint(
        "Filters", "threshold")
    """ Ingest CSV files """

    # Read search terms from CSV
    search_term_path = args.search_terms if args.search_terms else config[
        "Files"]["search_terms"]
    search_terms = utils.ingest_csv(
        search_term_path) if search_term_path else None

    # Read term groups from CSV
    term_groups_path = args.term_groups if args.term_groups else config[
        "Files"]["term_groups"]
    terms_list = utils.ingest_csv(
        term_groups_path) if term_groups_path else None
    term_groups = data.TermGroups(terms_list)

    # Read in word filter CSV files and flatten to 1D list
    word_set = set()
    if args.word_filter:
        word_filters = args.word_filter
    elif config["Files"].get("word_filters"):
        word_filters = config["Files"].get("word_filters").split(" ")
    else:
        word_filters = []

    for path in word_filters:
        word_array = utils.ingest_csv(path)
        # Add every word in file to set
        for row in word_array:
            for word in row:
                word_set.add(word.strip())

    filtered_words = list(word_set) if word_set else None

    # ========================================================= Load Data

    # Create PRAW reddit object
    reddit = praw.Reddit(client_id=config["PRAW"]["client_id"],
                         client_secret=config["PRAW"]["client_secret"],
                         user_agent=config["PRAW"]["user_agent"])

    # Load cache from file
    cache_path = f"{config['Cache']['dir_path']}{os.path.sep}{sub_name.lower()}.json"
    cache = PostCache(sub_name, cache_path, reddit,
                      config["Cache"].getint("ttl_hot"),
                      config["Cache"].getint("ttl_new"),
                      config["Cache"].getint("ttl_top"))

    # Refresh cache
    if cache.refresh(force=force, limit=feed_limit):
        cache.save()

    # ========================================================= Perform search

    # Perform search term result if provided, otherwise perform word count
    if search_terms:
        result_dict = cache.search_terms(
            search_terms,
            ignore_title_regex=config["Regex"]["ignore_title"],
            require_title_regex=config["Regex"]["require_title"],
            method=method)
    else:
        result_dict = cache.count_words(
            term_group=term_groups,
            ignore_title_regex=config["Regex"]["ignore_title"],
            require_title_regex=config["Regex"]["require_title"],
            method=method)
    """ Filter results """

    # Remove filtered words
    if filtered_words:
        utils.list_filter_dict(result_dict, filtered_words)

    # Filter low-frequency words if not using search terms
    if threshold is not None and not search_terms:
        result_dict = utils.value_filter_dict(result_dict, threshold)

    # Filter words by regex
    if config["Regex"]["require_word"] or config["Regex"]["ignore_word"]:
        utils.regex_filter_dict(result_dict,
                                require=config["Regex"]["require_word"],
                                remove=config["Regex"]["ignore_word"])
    """ Sort words by frequency """

    sorted_tuples = utils.sorted_dict(result_dict)

    # ========================================================== Display Findings
    """ Print rankings to stdout """

    print("===============================================")
    print("================  RESULTS  ====================")
    print("===============================================\n")

    print("Popularity score:\n")
    num = 1
    for name, count in sorted_tuples:
        print(f"{num}) {name} - {count}")
        num += 1
    """ Present graph if requested """

    if show_graph:
        sorted_tuples = sorted_tuples[:config["Filters"].getint(
            "rank_cutoff")]  # Trim results list
        utils.show_bar_chart(
            sorted_tuples,
            "Top {} Results for /r/{}".format(len(sorted_tuples), sub_name))
Example #10
0
def multicategories_predict(samples_test, model_name, result_dir):
    if model_name is None or len(model_name) == 0:
        logging.warn(Logger.warn("model_name must not be NULL."))
        return

    if result_dir is None:
        cfm_file = "%s.cfm" % (model_name)
        sfm_file = "%s.sfm" % (model_name)
    else:
        if not os.path.isdir(result_dir):
            try:
                os.mkdir(result_dir)
            except OSError:
                logging.error(Logger.error("mkdir %s failed." % (result_dir)))
                return
        cfm_file = "%s/%s.cfm" % (result_dir, model_name)
        sfm_file = "%s/%s.sfm" % (result_dir, model_name)

    logging.debug(Logger.error("Loading train sample feature matrix ..."))
    sfm_train = SampleFeatureMatrix()
    sfm_train.load(sfm_file)
    logging.debug(Logger.debug("Loading train category feature matrix ..."))
    cfm_train = CategoryFeatureMatrix()
    cfm_train.load(cfm_file)

    logging.debug(Logger.debug("Making sample feature matrix for test data ..."))
    category_id = 2000000
    sfm_test = SampleFeatureMatrix(sfm_train.get_category_id_map(), sfm_train.get_feature_id_map())

    features = cfm_train.get_features(category_id)

    for sample_id in samples_test.tsm.sample_matrix():
        (sample_category, sample_terms, term_map) = samples_test.tsm.get_sample_row(sample_id)

        category_1_id = Categories.get_category_1_id(sample_category)

        sfm_test.set_sample_category(sample_id, category_1_id)
        for feature_id in features:
            if feature_id in term_map:
                feature_weight = features[feature_id]
                sfm_test.add_sample_feature(sample_id, feature_id, feature_weight)

    logging.debug(Logger.debug("train sample feature matrix - features:%d categories:%d" % (sfm_train.get_num_features(), sfm_train.get_num_categories())))
    X_train, y_train = sfm_train.to_sklearn_data()

    logging.debug(Logger.debug("test sample feature matrix - features:%d categories:%d" % (sfm_test.get_num_features(), sfm_test.get_num_categories())))
    X_test, y_test = sfm_test.to_sklearn_data()

    clf = Classifier()

    logging.debug(Logger.debug("Classifier training ..."))
    clf.train(X_train, y_train)

    logging.debug(Logger.debug("Classifier predicting ..."))

    categories = samples_test.get_categories()

    categories_1_names = []

    categories_1_idx_map = {}
    categories_1_idlist = categories.get_categories_1_idlist()
    for category_id in categories_1_idlist:
        category_idx = sfm_test.get_category_idx(category_id)
        category_name = categories.get_category_name(category_id)
        categories_1_idx_map[category_idx] = (category_id, category_name)
    categories_1_idx_list = sorted_dict(categories_1_idx_map)
    for (category_idx, (category_id, category_name)) in categories_1_idx_list:
        categories_1_names.append("%s(%d)" % (category_name, category_id))

    clf.predict(X_test, y_test, categories_1_names)