Python TopCoder Beispiele

Programmiersprache: Python

Namespace / Paketname: tc_main

Klasse / Typ: TopCoder

Beispiele auf hotexamples.com: 6

Python TopCoder - 6 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die tc_main.TopCoder, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

get_filtered_challenge_basic_info(4)

TopCoder(3)

get_tech_popularity(2)

get_handpick_dev_cha_id(1)

Beispiel #1

Datei anzeigen

Datei: util.py Projekt: BenjiTheC/TopCoderDataAnalysis

def clean_tech_lst(top_n=5):
    """ Clean up technology list of challenges."""
    tc = TopCoder()  # trying to reduce the length of variable name here
    filt_cha_info = tc.get_filtered_challenge_basic_info(
    )  # it's readable for me anyway ;-)
    most_popular_tech = tc.get_tech_popularity().head(
        top_n).tech_name.to_list()

    with open(os.path.join(os.curdir, 'data', 'tech_by_challenge.json')) as f:
        tech_by_cha_rough = {
            cha['challenge_id']: cha['tech_lst']
            for cha in json.load(f)
            if cha['challenge_id'] in filt_cha_info.index
        }

    print(f'Top {top_n} most popular technologies', most_popular_tech)

    tech_by_cha = []
    for cha_id, tech_lst in tech_by_cha_rough.items():
        cleaned_tech_lst = [
            'angularjs' if 'angular' in tech.lower() else tech.lower()
            for tech in tech_lst
        ]
        filtered_tech_lst = [
            tech for tech in cleaned_tech_lst if tech in most_popular_tech
        ]
        if filtered_tech_lst:
            tech_by_cha.append({
                'challenge_id': cha_id,
                'tech_lst': filtered_tech_lst
            })

    print(f'Challenge with tech after filtering: {len(tech_by_cha)}')

    with open(os.path.join(os.curdir, 'data', 'tech_by_challenge_clean.json'),
              'w') as f:
        json.dump(tech_by_cha, f, indent=4)

Beispiel #2

Datei anzeigen

Datei: pm_builder_5.py Projekt: BenjiTheC/TopCoderDataAnalysis

    Train KNN model with hand pick challenge by the price range
"""
import os
import json
from collections import defaultdict

import numpy as np
import pandas as pd

from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import KFold

from tc_main import TopCoder

TOPCODER = TopCoder()
DOC_VEC_SIZE = 100 # choose this dimensionality based on empirical reason
DOC_VEC_PATH = os.path.join(os.curdir, 'pricing_model_0', 'develop_track', 'document_vec', f'document_vec_{DOC_VEC_SIZE}D.json')

HANDPICKED_CHALLENGES = TOPCODER.get_handpick_dev_cha_id()

def get_challenge_meta_data():
    """ Return challenge meta data of handpicked challenges."""
    cbi_df = TOPCODER.challenge_basic_info.loc[TOPCODER.challenge_basic_info.index.isin(HANDPICKED_CHALLENGES)]

    challenge_duration = (cbi_df.submission_end_date - cbi_df.registration_start_date).apply(lambda td: td.days)

    meta_data = pd.concat(
        [
            cbi_df.reindex(['subtrack'], axis=1).astype('category').apply(lambda c: c.cat.codes),
            cbi_df.reindex(['number_of_platforms'], axis=1),

Beispiel #3

Datei anzeigen

Datei: pm_builder_3.py Projekt: BenjiTheC/TopCoderDataAnalysis

"""

import os
import json
from collections import defaultdict

import numpy as np
import pandas as pd

from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import KFold

from tc_main import TopCoder

TOPCODER = TopCoder()
ACTUAL_PRIZE = TOPCODER.challenge_basic_info.total_prize[TOPCODER.challenge_basic_info.total_prize != 0]

def get_path_by_track_and_dimension(track, dimension):
    """ Get document vector path by track and doc vec dimension."""
    return os.path.join(os.curdir, 'pricing_model_0', f'{track}_track', 'document_vec', f'document_vec_{dimension}D.json')

def get_path_handpick_challenge(no_overlap, with_phrase):
    """ Get document vector path from pricing model 4"""
    return os.path.join(os.curdir, 'pricing_model_4', 'document_vec', f'document_vec_{str(no_overlap)[0]}{str(with_phrase)[0]}_600D.json')

def get_challenge_meta_data():
    """ Return challenge meta data in pandas DataFrame."""
    cha_basic_info = TOPCODER.challenge_basic_info
    challenge_duration = (cha_basic_info.submission_end_date - cha_basic_info.registration_start_date).apply(lambda td: td.days)

Beispiel #4

Datei anzeigen

Datei: pair_cha_imbl_learning.py Projekt: BenjiTheC/TopCoderDataAnalysis

from imblearn.under_sampling import ClusterCentroids, RandomUnderSampler

from tc_main import TopCoder

PP_PATH = os.path.join(os.curdir, 'pricing_model_6', 'preprocess_data')
PP_DATA = {
    'splt_cha': os.path.join(PP_PATH, 'split_challenges.json'),
}

XY_PATH = {
    'X': os.path.join(os.curdir, 'pricing_model_6', 'round1', 'X_{}.json'),
    'y': os.path.join(os.curdir, 'pricing_model_6', 'round1', 'y_{}.json')
}
RESULT_PATH = os.path.join(os.curdir, 'pricing_model_6', 'round1_res')

TC = TopCoder()
FILT_CHA_INFO = TC.get_filtered_challenge_basic_info()

def split_challenges():
    """ Split challenges into 10 equal part randomly, with proportionally divided challenges by subtrack.
        It's randomly splited but consistant with a fixed random_state param.
    """
    cha_id_sr = pd.Series(FILT_CHA_INFO.index)
    split_cha_id = [splt_ids.to_list() for splt_ids in np.array_split(cha_id_sr.sample(frac=1, random_state=0), 10)]
    with open(PP_DATA['splt_cha'], 'w') as fwrite:
        json.dump(split_cha_id, fwrite, indent=4)

def get_train_test_Xy(X: pd.DataFrame, y: pd.DataFrame, chunk_idx: int):
    """ Get train X, test X, train y, test y for given chunk of challenge ids."""
    with open(PP_DATA['splt_cha']) as fread:
        split_cha_id = json.load(fread)

Beispiel #5

Datei anzeigen

Datei: doc_pair_training_data.py Projekt: BenjiTheC/TopCoderDataAnalysis

import os
import json
import itertools
from datetime import datetime

import numpy as np
import pandas as pd

from tc_main import TopCoder
from tc_pricing_models import cosine_similarity

DATA_PATH = os.path.join(os.curdir, 'pricing_model_6',
                         'training_data_segments')
TRAINING_DATA_PATH = os.path.join(os.curdir, 'pricing_model_6', 'round1')

TOPCODER = TopCoder()
FILTERED_CHALLENGE_INFO = TOPCODER.get_filtered_challenge_basic_info()
CHALLENGE_ID_COMBINATION = lambda: itertools.combinations(
    FILTERED_CHALLENGE_INFO.index, 2)

SUBTRACK_COMB = [
    sorted(subtrack_comb)
    for subtrack_comb in itertools.combinations_with_replacement(
        FILTERED_CHALLENGE_INFO.subtrack.unique(), 2)
]
TECH_COMB = \
    [sorted(tech_comb) for tech_comb in itertools.combinations_with_replacement(TOPCODER.get_tech_popularity().head(5).tech_name, 2)] +\
    TOPCODER.get_tech_popularity().head(5).tech_name.to_list()

TOP5_SUBTRACK = list(
    FILTERED_CHALLENGE_INFO.subtrack.value_counts().sort_values(

Beispiel #6

Datei anzeigen

""" Build pricing model with random forest."""
import os
import json
import pickle
from datetime import datetime

import numpy as np
import pandas as pd

from sklearn.ensemble import RandomForestClassifier

from tc_main import TopCoder
from pair_cha_imbl_learning import PP_DATA

TOPCODER = TopCoder()
FILT_CHA_INFO = TOPCODER.get_filtered_challenge_basic_info()

# Round 1


def prz_estimation_from_prob(y_prob_path, target_ids: list):
    """ Estimate challenge prize from top most confident predictions."""
    prob_df = pd.read_json(y_prob_path,
                           orient='records').set_index(['l0', 'l1'])

    prz_estimation = []
    for cha_id in target_ids:
        cha_pair = prob_df.loc[
            (prob_df.index.get_level_values(0) == cha_id) |
            (prob_df.index.get_level_values(1) == cha_id)].copy()
        cha_pair.index = cha_pair.index.map(