Esempio n. 1
0
def main(new_users):
    reddit = helpers.initialize_reddit()

    user_list = helpers.load_data("user_list")

    new_users = [user for user in new_users if user not in user_list]

    if not new_users:
        helpers.write_log_trash(
            "All re-adds already on the memberlist {}".format(helpers.date_string()),
            "",
        )
        sys.exit(1)

    daddy.add_users(new_users, reddit)
    daddy.flair_users(
        new_users, reddit, config.flair_normal, number_adjustment=len(user_list)
    )

    insert_users_to_userlist(new_users)
    user_list = helpers.load_data("user_list")

    title, body = build_post(new_users, len(user_list) - len(new_users) + 1)
    daddy.make_post(title, body, reddit, distinguish=True, sticky=False)

    if config.update_sidebar:
        updates.update_sidebar(user_list)
Esempio n. 2
0
def main():
    if config.forward_user:
        forward_messages.forward(config.forward_user)

    reddit = helpers.initialize_reddit()
    participated = set(helpers.load_data("participated"))
    stats = helpers.load_data("stats")

    participated = participated.union(
        get_participants(reddit, stats["last_full_run"]))
    helpers.write_data("participated", list(participated))

    regulars = config.regulars_list
    warn = []

    for i in regulars:
        if i not in participated:
            warn.append(i)

    if len(warn) > 0:
        msg = "These regulars haven't participated this week: "
        aux = ""
        for i in warn:
            msg += aux + i
            aux = ", "
        print(msg)

    print("Amount of participants this week:",
          len(set(participated) - set(config.mods_list)))

    stats["last_daily_run"] = (
        time.time() - 60)  # to cover accidental gaps due to execution time
    helpers.write_data("stats", stats)
Esempio n. 3
0
def replace(old_un, new_un):
    users = helpers.load_data("user_list")
    users[users.index(old_un)] = new_un
    helpers.write_data("user_list", users)

    reddit = helpers.initialize_reddit()
    if not config.testing:
        try:
            reddit.subreddit(config.target_subreddit).flair.set(
                redditor=old_un, text="Moved to /u/{}".format(new_un)
            )
            reddit.subreddit(config.target_subreddit).contributor.remove(old_un)
        except (praw.exceptions.PRAWException, prawcore.PrawcoreException):
            # Deleted user, most likely
            pass
        main.flair_users(
            [new_un], reddit, config.flair_normal, number_adjustment=users.index(new_un)
        )
        main.add_users([new_un], reddit)
        participated = set(helpers.load_data("participated"))
        if old_un in participated:
            participated.add(new_un)
            helpers.write_data("participated", list(participated))
    else:
        print(
            "Flaired and removed /u/{}; Flaired and added /u/{}".format(old_un, new_un)
        )

    if config.update_sidebar:
        updates.update_sidebar(users)
Esempio n. 4
0
    def _load_topic(self, topic_id):
        ''' Loads topic data given the topic ID '''

        # Check if the topic is already loaded
        if self.topic_id == topic_id:
            return

        # Otherwise, load the topic
        self.topic_id = topic_id

        # Here we check if we want the data encodded or not
        if self.encoded:
            topic = load_data(os.path.join(self.base_data_dir,
                                           self.dataset_id),
                              topic_id,
                              encoded=True)
        else:
            topic = load_data(self.base_data_dir,
                              self.dataset_id,
                              encoded=False)[self.topic_id]

        # Extract the topic data
        (self.documents, self.summaries, self.indices, self.pyr_scores,
         self.summary_ids) = extract(topic)

        print(f'Loaded data from topic {topic_id}')
Esempio n. 5
0
def main():
    user_list = helpers.load_data("user_list")
    reddit = helpers.initialize_reddit()
    stats = helpers.load_data("stats")

    if user_list and ("--ignore-active-community"
                      not in sys.argv):  # checks if the user-list is non-empty
        msg = "Userlist is non-empty. Exiting. Call with --ignore-active-community to run anyway"
        helpers.write_log_trash("Failed {}".format(helpers.date_string()), msg)
        raise ActiveCommunity(msg)

    new_users, new_user_urls = helpers.load_data("potential_adds")
    helpers.write_log_trash("New users {}".format(helpers.date_string()),
                            new_users)

    post_text_items = [daddy.build_new_text(new_users, 1), "\n"]

    if config.entry_comments:
        post_text_items.append("[Comments for entry]({})".format(
            daddy.build_and_post_gist(new_users, new_user_urls)))
    if config.stats_section:
        post_text_items.append("# Info:\n\n")
        post_text_items.append("- {} users added".format(len(new_users)))
        diff = len(new_users)
        change = "+{}".format(diff) if diff >= 0 else str(diff)
        post_text_items.append("- Membercap: {} ({})".format(
            len(new_users), change))

    post_text = "\n".join(post_text_items)

    title = config.main_log_title
    if config.title_date:
        title = helpers.date_string() + " - " + title
    if config.title_number:
        stats["log_count"] += 1
        title += " #{}".format(stats["log_count"])

    daddy.make_post(title, post_text, reddit)

    if config.change_title:
        updates.change_title()

    daddy.add_users(new_users, reddit)
    daddy.flair_users(new_users, reddit, config.flair_new)

    if config.update_sidebar:
        updates.update_sidebar(new_users)

    stats["last_full_run"] = time.time()
    helpers.write_data("stats", stats)
    helpers.write_data("user_list", new_users)
    helpers.write_data("participated", [])
def main():
    # データの読み込み
    df = helpers.load_data()
    df['body_wakati'] = df.body.apply(helpers.fetch_tokenize)

    # 入力データと正解ラベル生成
    X = df.body_wakati.values
    le = LabelEncoder()
    y = le.fit_transform(df.category)

    # パイプラインの構築とグリッドサーチ
    pipe = make_pipeline(BoW(), PCA(n_components=100),
                         SVC(random_state=0, probability=True))
    param_range = [0.1, 1, 10, 100]
    param_grid = [{
        'C': param_range,
        'kernel': 'linear'
    }, {
        'C': param_range,
        'gamma': param_range,
        'kernel': 'rbf'
    }]
    best_score, best_model = evaluator.grid_search(estimator=pipe,
                                                   params=param_grid,
                                                   X=X,
                                                   y=y)

    # スコアとモデルの保存
    save_dir = './models/bow'
    helpers.mkdir(save_dir)
    np.savetxt(save_dir + '/accuracy.txt', np.array(best_score).reshape(1, 1))
    joblib.dump(best_model, save_dir + '/model.pkl')
Esempio n. 7
0
def main():
    # データの読み込み
    df = helpers.load_data()
    df['body_wakati'] = df.body.apply(helpers.fetch_tokenize)

    # 入力データと正解ラベル生成
    X = df.body_wakati.values
    le = LabelEncoder()
    y = le.fit_transform(df.category)

    # doc2vecの学習
    print('training doc2vec')
    training_data = [TaggedDocument(words=tokenize_texts, tags=[idx]) for idx, tokenize_texts in enumerate(X)]
    doc2vec = Doc2Vec(training_data, vector_size=100, workers=4)
    print('finish training doc2vec')

    # # パイプラインの構築とグリッドサーチ
    pipe = make_pipeline(Doc2Vec_feature(model=doc2vec), SVC(random_state=0, probability=True))
    param_range = [0.1, 1, 10, 100]
    param_grid = [
        {'C':param_range, 'kernel':'linear'},
        {'C':param_range, 'gamma':param_range, 'kernel':'rbf'}
    ]
    best_score, best_model = evaluator.grid_search(estimator=pipe, params=param_grid,X=X, y=y)
    print(best_score)
    print(best_model.get_params())

    # スコアとモデルの保存
    save_dir = './models/doc2vec'
    helpers.mkdir(save_dir)
    np.savetxt(save_dir + '/accuracy.txt', np.array(best_score).reshape(1,1))    
    joblib.dump(best_model, save_dir + '/model.pkl')
Esempio n. 8
0
    def load_data(self, **kwargs):
        self.data = load_data(**kwargs)
        self.XCols = np.sum(self.data.idxcols)
        self.seed = self.data.seed

        #$ load_data
        XU, XL, XT = [
            theano.shared(floatX(xx)) for xx in [
                self.data.train_x,
                self.data.small_x,
                self.data.test_x,
            ]
        ]
        YL, YLh, YT = [
            theano.shared(floatX(xx)) for xx in [
                self.data.small_y,
                self.data.small_yh,
                self.data.test_y,
            ]
        ]
        self.Xu = XU
        self.Xl = XL
        self.Yl = T.cast(YL, "int32")
        self.Ylh = YLh
        self.Xt = XT
        self.Yt = T.cast(YT, "int32")
Esempio n. 9
0
def clean_fixtures_data():
    """
    Cleans the fixtures data and filters it.
    """

    # load the fixtures data
    all_fixtures = load_data("fixtures.json", "data/original")

    # define headers that we need to keep
    headers = headers = [
        "event", "finished", "team_a", "team_a_difficulty", "team_h",
        "team_h_difficulty"
    ]

    # list to store the filtered information
    filtered_fixtures = []

    # iterate over all the teams and remove unwanted information
    for fixture in all_fixtures:
        if fixture["event"] is not None:
            # remove unwanted keys from the fixture's data
            fixture = {header: fixture[header] for header in headers}
            filtered_fixtures.append(fixture)

    # only retain the fixtures that are yet to take place
    filtered_fixtures = [
        fixture for fixture in filtered_fixtures
        if fixture['event'] >= next_event
    ]

    # save the data in a JSON file
    save_data(filtered_fixtures, "filtered_fixtures.json", "data")
Esempio n. 10
0
def load_saved_data():  # sends JSON containing saved data
    global results
    results = helpers.load_data("csv/ahs_air_output_saved.csv")
    print("data to be sent: ")
    print(results)
    json = jsonify(results)
    return json
Esempio n. 11
0
def write_sgd_prediction(user_features,
                         item_features,
                         input_path='../data/sample_submission.csv',
                         output_path='../data/predictions/sgd_prediction.csv',
                         verbose=False):
    """
    Writes a prediction based on matrix factorization given in argument
    :param user_features: sparse matrix of shape (num_features, num_users)
    :param item_features: sparse matrix of shape (num_features, num_items)
    :param input_path: path to the sample submission provided by Kaggle
    :param output_path: path to output the submission
    :param verbose: if True, details about computation are printed
    """
    test = load_data(input_path, verbose=verbose)
    nnz_row, nnz_col = test.nonzero()
    nnz_test = list(zip(nnz_row, nnz_col))
    with open(output_path, 'w') as output:
        output.write('Id,Prediction\n')
        for row, col in nnz_test:
            item_info = item_features[:, row]
            user_info = user_features[:, col]
            prediction = user_info.T.dot(item_info)
            prediction = min(5, prediction)
            prediction = max(1, prediction)
            output.write('r{}_c{},{}\n'.format(row + 1, col + 1, prediction))
Esempio n. 12
0
def run_with_dataset(directory: Union[str, 'pathlib.Path'],
                     dataset: str,
                     hidden: List[int] = [91],
                     dropout: float = 0.6449297033170698,
                     learning_rate: float = 0.011888866964052763,
                     weight_decay: float = 0.0005959130002875904,
                     epochs: int = 200,
                     verbose: bool = True) -> None:
    """Runs training with a given dataset

    Args:
        directory: Path to datasets 
        dataset: dataset to run on 
        hidden: Hidden Layer sizes
        dropout: Dropout Rate
        learning_rate: Learning Rate 
        weight_decay: Weight decay
        epochs: Number of epochs to train for
        verbose: If True, prints messages during training time. \
            Defaults to true
    """
    gcn = GCN(*load_data(directory, dataset))
    gcn.train(hidden=hidden,
              dropout=dropout,
              learning_rate=learning_rate,
              weight_decay=weight_decay,
              epochs=epochs,
              verbose=verbose)
    return gcn
Esempio n. 13
0
def build_post(new_users, number):
    title = "User re-add"
    if config.title_date:
        title = helpers.date_string() + " - " + title
    if config.title_number:
        stats = helpers.load_data("stats")
        stats["re-add count"] += 1
        readd_count = stats["re-add count"]
        helpers.write_data("stats", stats)
        title += " #{}".format(readd_count)

    lines = []
    for user in new_users:
        lines.append(r"- \#{} /u/{}".format(number, user))
        number += 1

    if config.stats_section:
        cap = number - 1
        diff = len(new_users)
        lines.append(
            "\n# Info:\n\n- 0 users kicked\n- {} users added\n- Membercap: {} (+{})".format(
                diff, cap, diff
            )
        )

    body = "  \n".join(lines)

    return title, body
Esempio n. 14
0
def add_awards():
    '''
    Add the processed awards to the processed users.
    '''

    awards = load_data('awards.json')
    users = load_data('processed_users.json')

    for user in users:
        for u in awards:
            if u['id'] == user['id']:
                user['awards'] = u['awards']
                break

    save_data(users, 'processed_users.json')

    return users
Esempio n. 15
0
 def update_time_message(self):
     id = load_data(self.tpath).get(self.chat_id)
     template = "Последняя проверка:\n{}".format(
         (datetime.datetime.now().strftime("%d %b %H:%M:%S")))
     if id:
         self.bot.editMessageText(template, self.chat_id, id)
     else:
         self.send_time_message(template)
Esempio n. 16
0
 def send_time_message(self, template):
     timemsg = load_data(self.tpath)
     data = self.bot.send_message(self.chat_id,
                                  template,
                                  disable_notification=True)
     mid = data['message_id']
     timemsg[self.chat_id] = mid
     dump_data(timemsg, self.tpath)
Esempio n. 17
0
def update_floor():
    global results
    os.system("pgrep python3 > process_id.txt")
    file = open("process_id.txt", 'r')
    process_id = file.read()
    print(process_id)
    file.close()
    if process_id != "":
        # kill any other queries, there can be only one
        os.system("kill -9 " + str(process_id))

    floor = request.args.get("floor")
    os.system('python3 query_specific.py -h 10.12.4.98 -p 8000 -w A -f ' +
              floor + ' ; wait')
    area_results = helpers.load_data("csv/ahs_air_specific_output.csv")
    results = helpers.update_data(results, area_results,
                                  ord('A') - ord('A'), int(floor))

    print("A", results)

    os.system('python3 query_specific.py -h 10.12.4.98 -p 8000 -w B -f ' +
              floor + ' ; wait')
    area_results = helpers.load_data("csv/ahs_air_specific_output.csv")
    results = helpers.update_data(results, area_results,
                                  ord('B') - ord('A'), int(floor))

    print("B", results)

    os.system('python3 query_specific.py -h 10.12.4.98 -p 8000 -w C -f ' +
              floor + ' ; wait')
    area_results = helpers.load_data("csv/ahs_air_specific_output.csv")
    results = helpers.update_data(results, area_results,
                                  ord('C') - ord('A'), int(floor))

    print("C", results)

    os.system('python3 query_specific.py -h 10.12.4.98 -p 8000 -w D -f ' +
              floor + ' ; wait')
    area_results = helpers.load_data("csv/ahs_air_specific_output.csv")
    results = helpers.update_data(results, area_results,
                                  ord('D') - ord('A'), int(floor))

    print("D", results)

    return jsonify(results)
Esempio n. 18
0
def dump_tracks(readPath='data/train/', writePath='./json/'):
    if not os.path.exists(writePath):
        os.mkdir(writePath)
    files = glob.glob(readPath + '*.json')

    for name in files:
        data = helpers.load_data(name)
        tracks = helpers.extract_tracks(data)
        tracks.to_json(writePath + os.path.relpath(name, readPath),
                       orient='table')
Esempio n. 19
0
def run():
    parser = argparse.ArgumentParser(description='Banded matrix visualization')
    parser.add_argument(
        'mode',
        metavar='MODE',
        type=str,
        choices=['clusters', 'rules'],
        help='Choose mode (coloring clusters or displaying rules)')
    parser.add_argument('folder',
                        metavar='FOLDER',
                        type=str,
                        help='Folder containing the data')
    parser.add_argument('-s',
                        '--steps',
                        metavar='STEPS',
                        type=int,
                        help='Number of iterations of the algorithm',
                        default=100)
    parser.add_argument('-a',
                        '--algorithm',
                        metavar='ALGORITHM',
                        type=str,
                        choices=['biMBA', 'barycentric', 'MBA'],
                        help='Number of iterations of the algorithm',
                        default='biMBA')

    parser.add_argument(
        '-o',
        '--output',
        metavar='OUTPUT',
        type=str,
        help='Name of output file (leave blank to display in window)',
        default=None)

    args = parser.parse_args()
    step = args.steps
    method = args.algorithm
    folder = args.folder
    y_label = folder.capitalize()
    save_to = args.output
    dna_f, band_f, clus_f, rules_f, hierarchy_f = [
        folder + '/data.txt', folder + '/column_names.txt',
        folder + '/row_labels.txt', folder + '/rules.txt',
        folder + '/hierarchy.txt'
    ]
    matrix, column_names, clusters = load_data(dna_f, band_f, clus_f)
    if args.mode == 'clusters':
        rows(matrix, column_names, clusters, y_label, method, step, save_to)
    else:
        hierarchy = load_hierarchy(hierarchy_f)
        new_hierarchy = {}
        for band in column_names:
            new_hierarchy[band] = hierarchy[band] if band in hierarchy else []
        rules = load_rules(rules_f, new_hierarchy, column_names, top_k_rules=5)
        columns(matrix, column_names, clusters, rules, save_to, y_label, step)
Esempio n. 20
0
def new_sub():
    reddit = helpers.initialize_reddit()
    main.check_permissions(reddit)
    user_list = helpers.load_data("user_list")
    main.flair_users(user_list, reddit, config.flair_normal)
    if config.change_title:
        updates.change_title()
    if config.update_sidebar:
        updates.update_sidebar(user_list)
    main.add_users(user_list, reddit)
    helpers.write_data("participated", [])
Esempio n. 21
0
def update():  # sends JSON containing the updated data
    global results
    print("Updating")
    #os.system('python3 ahs_air.py -h 10.12.4.98 -p 8000 ; wait')

    results = helpers.load_data("csv/ahs_air_output.csv")

    # save the temporary values into permanent storage
    os.system('cp ahs_air_output.csv ahs_air_output_saved.csv ; wait')

    return jsonify(results)
Esempio n. 22
0
 def __init__(self):
     self.batch_idx = 0
     self.questions = []
     self.responses = []
     self.labels = []
     self.embeddings = helpers.load_embeddings()
     data = helpers.load_data()
     for item in data:
         self.questions.append(item[0])
         self.responses.append(item[1])
         self.labels.append(item[2])
     del data
Esempio n. 23
0
def user_details(user_name):

    users = load_data('sorted_users_acc_days_30.json')
    for user in users:
        if user['user_name'] == user_name:
            break
    else:
        return 'Not Found.<br> If you think this link is broken, please file an <a href="https://github.com/ravgeetdhillon/gnome-hackers/issues">issue</a>.'

    data = {'page': {'user': user}, 'site': SITE_CONFIG}

    return render_template('user.html', data=data)
Esempio n. 24
0
def index():

    users = load_data('sorted_users_acc_days_1.json')
    users_days_1 = users[:10]
    users = load_data('sorted_users_acc_days_7.json')
    users_days_7 = users[:10]
    users = load_data('sorted_users_acc_days_15.json')
    users_days_15 = users[:10]
    users = load_data('sorted_users_acc_days_30.json')
    users_days_30 = users[:10]

    data = {
        'page': {
            'stats': [
                {
                    'type': 'Today',
                    'key': 'days_1',
                    'users': users_days_1,
                },
                {
                    'type': 'Week',
                    'key': 'days_7',
                    'users': users_days_7,
                },
                {
                    'type': 'Fortnight',
                    'key': 'days_15',
                    'users': users_days_15,
                },
                {
                    'type': 'Month',
                    'key': 'days_30',
                    'users': users_days_30,
                },
            ]
        },
        'site': SITE_CONFIG
    }

    return render_template('index.html', data=data)
Esempio n. 25
0
def process_awards():
    '''
    Process the awards for top 10 users.
    '''

    try:
        # awards = load_data('awards.json')
        awards = requests.get(
            'https://raw.githubusercontent.com/ravgeetdhillon/gnome-hackers/website/artifacts/data/awards.json'
        )
        awards = json.loads(awards.text)
    except:
        awards = []

    # sort the data for each criteria and save them in their respective json files
    criteria = ['days_1', 'days_7', 'days_15', 'days_30']
    for key in criteria:

        users = load_data('processed_users.json')
        users = sorted(users, key=lambda k: k['points'][key],
                       reverse=True)[:10]

        for user in users:
            for u in awards:
                if user['id'] == u['id']:
                    break
            else:
                awards.append({
                    'id': user['id'],
                    'awards': {
                        'gold': 0,
                        'silver': 0,
                        'bronze': 0,
                        'top10': 0,
                    }
                })

        for u in awards:
            for index, user in enumerate(users, start=1):
                if u['id'] == user['id']:

                    if index == 1:
                        u['awards']['gold'] += 1
                    elif index == 2:
                        u['awards']['silver'] += 1
                    elif index == 3:
                        u['awards']['bronze'] += 1
                    u['awards']['top10'] += 1

                    break

    save_data(awards, 'awards.json')
Esempio n. 26
0
def algorithm_test(path_dataset, kwargs):
    '''test algorithms given '''
    ratings = load_data(path_dataset)

    train, test = split_data(ratings)

    alg = kwargs['algorithms']
    n_features = kwargs['k_range']
    lambda_user = kwargs['lambda_u']
    lambda_item = kwargs['lambda_i']

    if alg[0].lower() == 'als':
        X, RMSE_test, RMSE_train = get_ALS_predictions(ratings,
                                                       train,
                                                       test,
                                                       n_features,
                                                       lambda_user,
                                                       lambda_item,
                                                       kwargs=kwargs)

    elif alg[0].lower() == 'als_ours':
        X, RMSE_test, RMSE_train = get_ALS_predictions(ratings,
                                                       train,
                                                       test,
                                                       n_features,
                                                       lambda_user,
                                                       lambda_item,
                                                       kwargs=kwargs)

    elif alg[0].lower() == 'sgd':
        X, RMSE_test, RMSE_train = get_SGD_predictions(ratings,
                                                       train,
                                                       test,
                                                       n_features,
                                                       lambda_user,
                                                       lambda_item,
                                                       kwargs=kwargs)

    elif alg[0].lower() == 'svd' or alg[0].lower() == 'knn' or alg[0].lower(
    ) == 'cluster':

        X, RMSE_test, RMSE_train = get_splib_predictions(alg[0].lower(),
                                                         train,
                                                         test,
                                                         kwargs=kwargs)

    else:
        print('Algorithm', alg, 'is not supported in this project!')
        sys.exit(1)

    return X, RMSE_test, RMSE_train
Esempio n. 27
0
def test_process_order():
    kitchen = setup_kitchen()
    order = load_data(kitchen.app.config["JSON_FILE"])[0]
    order["id"] = "1"
    order["expirationAge"] = "1"
    order["temp"] = "hot"
    order["shelfLife"] = 100
    order["decayRate"] = .5
    kitchen.process_order(order)

    processed_order = kitchen.redis_store.hgetall(order['temp'] + ':' +
                                                  order['id'])
    assert processed_order != None
    assert 'redisKey' in processed_order
Esempio n. 28
0
def k_fold_nn(n, X_total, y_total, iqr=True):
    mini_batch_size = 10

    # 4-fold cross-validation
    mse_storage = []
    mae_storage = []
    r2_storage = []

    # Load data, remove outliers but do not split yet into train and test
    X, y = hl.load_data(n, X_total, y_total)

    kf = KFold(n_splits=4, shuffle=True)

    for idx, (train_index, test_index) in enumerate(kf.split(X)):
        print("FOLD {}".format(idx + 1))
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        if iqr:
            X_train, y_train = outliers.IQR_y_outliers(X_train, y_train)

        train_input = torch.Tensor(X_train)
        test_input = torch.Tensor(X_test)
        train_target = torch.Tensor(y_train.reshape(len(y_train), 1))
        test_target = torch.Tensor(y_test.reshape(len(y_test), 1))

        nb_input_neurons = train_input.shape[1]

        model = Net_3(nb_input_neurons)
        losses = train_model(model,
                             train_input,
                             train_target,
                             mini_batch_size,
                             monitor_loss=True)

        #Make predictions
        y_hat = compute_pred(model, test_input)

        #Compute score
        mse_nn, mae_nn, r2_nn = compute_score(y_test, y_hat.detach().numpy())

        mse_storage.append(mse_nn)
        mae_storage.append(mae_nn)
        r2_storage.append(r2_nn)

        print('MSE: {:0.2f} \nMAE: {:0.2f} \nr2: {:0.2f}'.format(
            mse_nn, mae_nn, r2_nn))

    return mse_storage, mae_storage, r2_storage
Esempio n. 29
0
def main():
    '''
    Main function for the process.py.
    '''

    # initialize the users array to store the data about the users contributing to the GNOME
    users = []

    # load the commits, merge requests and issues
    commits = load_data('commits.json')
    merge_requests = load_data('merge_requests.json')
    issues = load_data('issues.json')
    all_users = load_data('users.json')

    # process the commits, merge requests and issues and generate points for the users
    users = process_issues(users, issues)
    users = process_merge_requests(users, merge_requests)
    users = process_commits(users, commits)
    users = process_users(users, all_users)

    # download the avatar image from each user
    fetch_images(users)

    save_data(users, 'processed_users.json')
Esempio n. 30
0
def main():
    potential_adds = helpers.load_data("potential_adds", {
        "users": [],
        "urls": []
    })
    potential_adds_copy = deepcopy(potential_adds)
    users_to_remove = [
        3, 4, 5, 8, 9, 11, 12, 13, 14, 22, 24, 25, 26, 28, 29, 30, 32, 33, 34,
        35, 36, 37, 38, 39, 40, 41, 44, 49, 55, 56, 57, 58, 59, 60, 64
    ]
    for i in users_to_remove:
        config.redditor_blacklist += potential_adds_copy["users"][i - 2]
        potential_adds["users"].remove(potential_adds_copy["users"][i - 2])
        potential_adds["urls"].remove(potential_adds_copy["urls"][i - 2])
    helpers.write_data("potential_adds", potential_adds)
    print("Amount remaining: " + str(len(potential_adds["users"])))
Esempio n. 31
0
import helpers as H
import cnn_model as CNN

# debug
from ipdb import set_trace as bp

# initialization
args         = H.parse_args()                          # Function for parcing command-line arguments
train_params = {
     'do' : float(args.do) if args.do else 0.5,        # Dropout Parameter
     'a'  : float(args.a) if args.a else 0.3,          # Conv Layers LeakyReLU alpha param [if alpha set to 0 LeakyReLU is equivalent with ReLU]
     'k'  : int(args.k) if args.k else 4,              # Feature maps k multiplier
     's'  : float(args.s) if args.s else 1,            # Input Image rescale factor
     'pf' : float(args.pf) if args.pf else 1,          # Percentage of the pooling layer: [0,1]
     'pt' : args.pt if args.pt else 'Avg',             # Pooling type: Avg, Max
     'fp' : args.fp if args.fp else 'proportional',    # Feature maps policy: proportional, static
     'cl' : int(args.cl) if args.cl else 5,            # Number of Convolutional Layers
     'opt': args.opt if args.opt else 'Adam',          # Optimizer: SGD, Adagrad, Adam
     'obj': args.obj if args.obj else 'ce',            # Minimization Objective: mse, ce
     'patience' : args.pat if args.pat else 200,       # Patience parameter for early stoping
     'tolerance': args.tol if args.tol else 1.005,     # Tolerance parameter for early stoping [default: 1.005, checks if > 0.5%]
     'res_alias': args.csv if args.csv else 'res'      # csv results filename alias
}

# loading mnist data as example
(X_train, y_train), (X_val, y_val) = H.load_data()

# train a CNN model
model = CNN.train(X_train, y_train, X_val, y_val, train_params)
Esempio n. 32
0
import helpers as h
import numpy as np

datasets = h.create_datasets(h.load_data())

training_statistics = h.class_statistics(datasets['training'])

results = zip(datasets['test'], map(lambda x : h.classify(training_statistics, x), datasets['test']))

tp = h.true_positives(results)
tn = h.true_negatives(results)
fp = h.false_positives(results)
fn = h.false_negatives(results)

print "True positives:", tp
print "True negatives:", tn
print "False positives:", fp
print "False negatives:", fn

print "Accuracy:", float(tp + tn) / len(datasets['test'])
print "Precision:", float(tp) / (tp + fp)
print "Recall:", float(tp) / (tp + fn)
Esempio n. 33
0
parser.add_argument("--n-jobs", metavar="N", type=int, default=int(os.getenv("NSLOTS",1)))
parser.add_argument("--save-model", metavar="FILE")
parser.add_argument("--save-predictions", metavar="FILE")
args = parser.parse_args()
vargs = vars(args)


# load data
patches = {}
labels = {}
src = {}
for purpose in ("train", "validation", "test"):
  this_patches, this_labels, this_src = \
    helpers.load_data(vargs["%s_set"%purpose],
                      vargs["%s_patches"%purpose],
                      vargs["%s_labels"%purpose],
                      patches_dtype="float32",
                      labels_dtype="int32",
                      return_src=True)

  if len(this_patches) > 0:
    patches[purpose] = this_patches.reshape([this_patches.shape[0],-1])
    this_labels = this_labels.reshape([-1])
    if len(this_labels) > 0:
      labels[purpose] = this_labels
    if this_src is not None and len(this_src) > 0:
      src[purpose] = this_src

    print "%s patches:" % purpose, patches[purpose].shape
    if len(labels[purpose]) > 0:
      print "%s labels:" % purpose, labels[purpose].shape
    if this_src is not None and len(this_src) > 0: