Example #1
0
def t_typecast(sep=','):
    import predicate
    index_field = 'mrn'
    f = '/phi/proj/poc7002/bulk_training/data-exp/cdr/lab/cerner/cerner_microbio_112.0_tset_bt_100.csv'
    df0 = load_df(_file=f, from_csv=True, sep=sep, verbose=False)
    idx = set(df0[index_field].values)
    if all([predicate.isNumber2(e) for e in idx]):
        print('> normal.')
    else:
        print('> non-numerical values found.')
    df = load_df(_file=f, from_csv=True, sep=sep, verbose=False)

    # cannot convert type because some entries are nan
    # df = df.dropna(thresh=1)
    # df = df[pd.notnull(df['mrn'])]
    df = convert_dtype(df, col='mrn', typ='int64', _debug=1)

    s0 = set(df[index_field].values)
    s = set([e for e in df[index_field].values if predicate.isNumber(e)])
    print('> filter nan rows after convert_dtype: %d >=? %d' %
          (len(s0), len(s)))
    idx2 = set(s)
    div(message='> size of idx: %d' % len(idx2))
    if all([isinstance(e, int) for e in idx2]):
        print('> normal.')
        print('> examples: %s' % list(idx2)[:5])
    else:
        print('> non int found.')
        print('> examples: %s' % list(idx2)[:5])

    return
Example #2
0
def log(model, i):
    mmm = []
    for loader in a_loader, b_loader, c_loader:
        y, y_bar = infer(loader, model)

        tp = utils.tp(y, y_bar) / len(y)
        fp = utils.fp(y, y_bar) / len(y)
        fn = utils.fn(y, y_bar) / len(y)
        tn = utils.tn(y, y_bar) / len(y)

        a = tp + tn
        p = utils.div(tp, tp + fp)
        r = utils.div(tp, p1)
        m = metric(p1, fn, fp)
        mmm.append([tp, fp, fn, tn, a, p, r, m])

    tagg = ['tp', 'fp', 'fn', 'tn', 'a', 'p', 'r', args.metric]

    placeholder = '0' * (len(str(args.ni)) - len(str(i)))
    xx = ['/'.join(['%0.2f' % m for m in mm]) for mm in zip(*mmm)]
    x = ' | '.join('%s %s' % (tag, mm) for tag, mm in zip(tagg, xx))
    print('[iteration %s%d]%s' % ((placeholder, i, x)))

    if args.tb:
        for writer, mm in zip([a_writer, b_writer, c_writer], mmm):
            for tag, m in zip(tagg, mm):
                writer.add_scalar(tag, m, i)
Example #3
0
    def __add__(self, rhs):
        """Add two points.

        Following the description from 
        https://en.wikipedia.org/wiki/Elliptic_curve#The_group_law"""

        p = ECpoint.p
        a = ECpoint.a

        ## Addition of identity
        if rhs.isInfty():
            return ECpoint(self.x, self.y)
        elif self.isInfty():
            return ECpoint(rhs.x, rhs.y)

        ## Following notation from wikipedia
        xp, yp = self.x, self.y
        xq, yq = rhs.x, rhs.y
        if xp != xq:
            s = div((yp - yq), (xp - xq), p)
            xr = s**2 - xp - xq
            yr = s * (xp - xr) - yp
            return ECpoint(xr % p, yr % p)
        # else if xp == xq, there are two cases
        elif yp == -yq:
            return ECpoint.infty()
        else:
            s = div((3 * xp**2 + a), (yp << 1), p)
            xr = s**2 - (xp << 1)
            yr = s * (xp - xr) - yp
            return ECpoint(xr % p, yr % p)
Example #4
0
def display(adict, title=None, msg_per_entry=None):
    if title: div(message='icd9utils: %s' % title, symbol='*')
    for k, v in adict.items():
        if msg_per_entry:
            print('[%s] %s (%s)' % (k, v, msg_per_entry))
        else:
            print('[%s] %s' % (k, v))
    return
Example #5
0
def t_hierarchy2():
    import utils

    # get annotated codes
    gfiles = [
        'gold_candidates_neg_random_gh.csv',
        'gold_candidates_pos_random_gh.csv'
    ]
    acodes = set()
    sdict = {}
    for i, f in enumerate(gfiles):
        fp = os.path.join('data-gold', f)
        df = pd.read_csv(fp,
                         sep='|',
                         header=0,
                         index_col=False,
                         error_bad_lines=True,
                         dtype={'icd9': str})
        codes = df['icd9'].values
        sdict[i] = codes
        acodes.update(codes)
    n_annotated = len(acodes)
    print('info> n_annotated: %d' % n_annotated)  # 54

    codes = [
        '112.3', '047.9', '038.10', '038.11', '112.5', '031.0', '038.19',
        '031.9', '031.8', '090.9', '041.09', '135', '041.9', '041.6', '090.1',
        '138', '033.9', '049.9', '031.2', '003.0', '001.1', '017.00', '011.93',
        '041.00', '079.0', '079.6', '123.1', '079.4', '112.4', '009.0',
        '112.2', '070.51', '034.0', '007.1', '061', '070.32', '070.30',
        '054.79', '054.2', '054.3', '054.10', '046.3', '052.7', '038.42',
        '038.40', '088.81', '053.19', '010.10', '133.0', '110.0', '110.3',
        '137.0', '040.82', '008.45', '098.0', '075', '057.9', '112.89',
        '041.7', '112.84', '027.0', '097.1', '078.5', '136.9', '078.0',
        '009.1', '070.70', '131.01', '070.71', '099.9', '041.89', '127.4',
        '041.85', '097.9', '005.9', '054.13', '053.9', '054.11', '047.8',
        '009.3', '083.2', '054.19', '481', '117.3', '091.3', '117.5', '130.7',
        '038.8', '117.9', '036.0', '094.9', '130.0', '136.3', '008.69',
        '053.79', '087.9', '041.10', '041.11', '008.61', '111.9'
    ]

    assert len(set(acodes) - set(codes)) == 0

    print('info> size: %d' % len(codes))

    cur, freespots = evalRoot(codes, scope=None, verbose=True)
    print('> n_roots:%d, current roots:\n%s\n' % (len(cur), cur.keys()))
    utils.div()
    display(cur)
    n = 100  # setting too high may take time for UpSetR to finish
    candidates = utils.sample_hashtable(cur, n_sample=n)
    print('> sample existing %d=?=%d candidates:\n%s\n' %
          (n, len(candidates), list(candidates)))

    return
Example #6
0
def train(model, loss_func, dictionary, epoch, train_data, dev_data,
          identity_mat, stop_counter):
    global best_dev_loss, best_acc
    model.train()
    total_loss = 0
    for texts, labels, masks, bsz in utils.getBatch(data=train_data,
                                                    dictionary=dictionary,
                                                    maxlen=MAX_LEN,
                                                    batch_size=BATCH_SIZE):
        init_state = model.init_hidden(bsz)
        fc, outh, pred, attention = model.forward(sents=texts,
                                                  mask=masks,
                                                  init_hc=init_state)

        loss = loss_func(pred.view(texts.size(0), -1), labels)
        if USE_ATTENTION:
            attentionT = torch.transpose(attention, 1, 2).contiguous()
            extra_loss = Frobenius(
                torch.bmm(attention, attentionT) -
                identity_mat[:attention.size(0)])
            loss += PENALIZATION_COEFF * extra_loss

        optimizer.zero_grad()
        loss.backward()

        nn.utils.clip_grad_norm_(model.parameters(), CLIP)
        optimizer.step()

        total_loss += loss.data

    res, dev_loss, acc = evaluate(model, loss_func, dictionary, dev_data)
    print(res)
    utils.saveLog(LOG_PATH, res)

    total_res = 'epoch: %d, dev loss: %f, acc: %f' % (epoch + 1, dev_loss, acc)
    print(total_res)
    utils.saveLog(LOG_PATH, total_res)
    utils.div('-')

    if not best_dev_loss or dev_loss < best_dev_loss:
        with open(MODEL_PATH % (dev_loss, acc), 'wb') as f:
            torch.save(model, f)
        best_dev_loss = dev_loss
        stop_counter = 0
    else:
        for param_group in optimizer.param_groups:
            param_group['lr'] = param_group['lr'] * 0.2
        if EARLY_STOP != 0:
            stop_counter += 1

    return stop_counter
Example #7
0
def t_select():
    """
    Memo
    ----
    1. df.iloc[i] returns the ith row of df. i does not refer to the index value, i is a 0-based index.
       In contrast, the attribute index is returning index values.
    """
    def show(df, prompt=''):
        if prompt:
            print('> %s\n%s\n' % (prompt, df))
        else:
            print('\n%s\n' % df)
        return

    div(message='Use case #1: select rows using row indices')
    df = pd.DataFrame({'BoolCol': [True, False, False, True, True]},
                      index=[10, 20, 30, 40, 50])
    idx = df[df['BoolCol'] == True].index.tolist()
    # select the rows
    print df.loc[idx]

    div(message='Use case #2: select rows where columns match certain values.')
    df = pd.DataFrame({
        'A': 'foo bar foo bar foo bar foo foo'.split(),
        'B': 'one one two three two two one three'.split(),
        'C': np.arange(8),
        'D': np.arange(8) * 2
    })
    show(df)
    show(df.loc[df['A'] == 'foo'])
    print('\nVS\n')
    criteria = {'A': 'foo'}
    show(match(df, criteria))

    print('\n> for multiple values?')
    show(df.loc[df['B'].isin(['one', 'three'])])

    print('> another way')
    df = df.set_index(['B'])
    show(df.loc['one'])

    print('> multiple values ...')
    show(df.loc[df.index.isin(['one', 'two'])])

    return
Example #8
0
def t_dtype(sep=','):
    import predicate
    index_field = 'mrn'
    f = '/phi/proj/poc7002/bulk_training/data-exp/cdr/lab/cerner/cerner_urine_009.2_tset_bt_77.csv'
    df0 = load_df(_file=f, from_csv=True, sep=sep, verbose=False)
    idx = set(df0[index_field].values)
    print("> Should expect to see integer types for idx: %s" % list(idx)[:5])

    a = [['a', '1.2', '4.2'], ['b', '70', '0.03'], ['x', '5', '0']]
    df = pd.DataFrame(a, columns=['one', 'two', 'three'])
    print("> df:\n%s\n" % df)
    print("> dtypes:\n%s" % df.dtypes)
    div()
    # df[['two', 'three']] = df[['two', 'three']].astype(float)
    df[['two']] = df[['two']].astype(float)
    print("> df:\n%s\n" % df)
    print("> dtypes:\n%s" % df.dtypes)

    return
 def log_r_X_z(self):
     X_m_tau = minus(self.Xz, self.tau)
     X_m_tau_vec = T.reshape(X_m_tau, [self.B * self.R, 1])
     X_m_tau_vec.name = 'X_m_tau_vec'
     if self.Tau_isDiagonal:
         log_rX_z = -0.5 * self.R * self.B * log2pi - 0.5 * self.R * self.logDetTau \
         - 0.5 * trace(dot(X_m_tau_vec.T, div(X_m_tau_vec,self.Tau)))
     else:
         log_rX_z = -0.5 * self.R * self.B * log2pi - 0.5 * self.R * self.logDetTau \
         - 0.5 * trace(dot(X_m_tau_vec.T, dot(self.iTau, X_m_tau_vec)))
     log_rX_z.name = 'log_rX_z'
     return log_rX_z
Example #10
0
def make_1d_E_B_plots(bfrange,
                      y_databdl,
                      colors,
                      mu_pos=None,
                      enrange=None,
                      figsize=DEFAULT_FIGURE_SIZE,
                      linewidth=DEFAULT_LW,
                      ax=None,
                      plotrange=None,
                      legend=True):
    if not isinstance(bfrange, list) or not isinstance(
            y_databdl, list) or not isinstance(colors, list):
        raise TypeError(f'either x_databdl or y_databdl or colors is not list')
    if not len(y_databdl) == len(colors):
        raise ValueError(f'y_databdl, colors are not of the same length')
    if not any(isinstance(el, list) for el in y_databdl):
        raise TypeError(f'y_databdl is not nested list')
    if not isinstance(figsize, tuple):
        raise TypeError(f'figsize should be a tuple like (10,10)')
    if ax is None:
        ax = make_canvas(figsize=figsize)
    for n_band, (y_data, color) in enumerate(zip(y_databdl, colors)):
        for y in y_data:
            line, = ax.plot(bfrange,
                            div(y, e0),
                            linewidth=linewidth,
                            color=color)
        line.set_label(f'Band{n_band}')
    if legend:
        ax.legend(loc=DEFAULT_LEGEND_LOC, bbox_to_anchor=DEFAULT_LEGEND_POS)
    if mu_pos and len(mu_pos) == len(bfrange):
        ax.plot(bfrange, div(mu_pos, e0), linewidth=linewidth, color='k')
    if enrange is not None:
        ax.set_ylim(min(enrange) / e0, max(enrange) / e0)

    ax.set_xlabel(DEFAULT_XLABEL)
    ax.set_ylabel(DEFAULT_EBPLOT_YLABEL)
    return ax
Example #11
0
def test_query():
    codes = getInfectiousParasiticCodes()
    print "how many? %d" % len(codes)
    print "max? %s" % Code.max(codes)
    print "min? %s" % Code.min(codes)

    except_ = [481, '005', '039.1', 'V42.0', '010.2', '010.01']  # 3 not valid
    print "min max of except_: %s << %s" % (Code.min(except_),
                                            Code.max(except_))
    codes = getInfectiousParasiticCodes(diff=except_, verbose=True)
    print "how many? %d" % len(codes)
    print "max? %s" % Code.max(codes)
    print "min? %s" % Code.min(codes)

    n = 100
    print('> randomly select %d infectious diseases' % n)
    codes = getInfectiousParasiticCodes(n_samples=n)
    print "how many? %d | they are: %s" % (len(codes), codes)
    print "max? %s" % Code.max(codes)
    print "min? %s" % Code.min(codes)
    print "type: %s" % type(codes)
    div(message='> mapping from codes to names ...')
    for i, code in enumerate(codes):
        print "[%d] %s -> %s" % (i, code, getName(code))

    print "-" * 60
    regex = 'meningitis'  # 'mening.*'
    print "Getting %s-related codes ..." % regex
    codes = getCode(regex)
    div(message='Found %d codes with %s as a keyword.' % (len(codes), regex))
    for code in codes:
        print "  + %s -> %s" % (code, getName(code))

    print "-" * 60
    codes = getInfectiousParasiticCodes(filter_=isTuberculosis)
    codes2 = getCode('tubercu')
    print "> size %d =?= %d" % (len(codes), len(codes2))
Example #12
0
def t_preproc(**kargs):

    ### input
    # code_str = '24900 25000 25001 7902 79021 79022 79029 7915 7916 V4585 V5391 V6546'
    # code_str += ' ' + """24901 24910 24911 24920 24921 24930 24931 24940 24941 24950 24951 24960 24961 24970 24971 24980 24981 24990 24991 25002
    #     25003 25010 25011 25012 25013 25020 25021 25022 25023 25030 25031 25032 25033 25040 25041 25042 25043 25050 25051 25052
    #     25053 25060 25061 25062 25063 25070 25071 25072 25073 25080 25081 25082 25083 25090 25091 25092 25093"""
    # code_str += ' ' + "64800 64801 64802 64803 64804 64880 64881 64882 64883 64884"

    code_str = '585 5851 5852 5853 5854 5855 5856 5859 7925 V420 V451 V4511 V4512 V560 V561 V562 V5631 V5632 V568'

    # [params]
    base_only = False

    codes = preproc_code(code_str, base_only=base_only)
    print('> n_codes: %d\n> codes:\n%s\n' % (len(codes), codes))

    codes_minus_ve = preproc_code(code_str,
                                  base_only=base_only,
                                  no_ve_code=True)
    print('> n_codes: %d\n> codes:\n%s\n' %
          (len(codes_minus_ve), codes_minus_ve))

    # [status] ok
    # print('\nNow, do base only\n')
    # codes = preproc_code(code_str, base_only=False)
    # print('> n_codes: %d\n> codes:\n%s\n' % (len(codes), codes))
    # print('> codeset:\n%s\n' % set(codes))  # [log] set(['791', '790', 'V65', 'V45', 'V53', '648', '250', '249'])

    div(message='Now, testing lookup ...')
    n_limit = 100
    for j, c in enumerate(codes[:20]):
        description = lookup2(c)
        print('+ code: %s => %s' % (c, description))
        if j >= n_limit: break

    return
Example #13
0
def test_manipulate(**kargs):
    """

    Log
    --- 
    * ./data-exp/cdr/lab/cerner/cerner_blood_481_tset_mixed.csv
    * 'data-lab/cerner/cerner_blood_481_tset_mixed.csv'
    * 'data-meds/cerner_antibiotic_481_tset_mixed.csv'

    Data
    ----
    * ./data-exp/cdr/lab/cerner/cerner_microbio_tset_mixed_infections_bt.csv

    """
    import os
    from utils import div
    from learner import Group, Feature
    from pprint import pprint

    file_ = kargs.get(
        'file_',
        os.path.join(
            ProjDir,
            'data-exp/cdr/lab/cerner/cerner_microbio_tset_mixed_infections_bt.csv'
        ))
    print('test> path: %s' % file_)
    df = load_df(_file=file_, from_csv=True, sep=',')

    # profiling
    params = profile(df)
    div()
    pprint(params)
    div()

    # df.columns is an index object
    df = Group.canonicalize(df)  # [w1][1]
    fg = Feature(df.columns)
    # print "> total feature set: %s" % fg.total()
    print("> number of features: %d =?= %d, type: %s" %
          (len(fg.total()), len(fg.active()), type(fg.total())))
    print("> number of columns:%s type: %s, examples: %s" %
          (len(df.columns), type(df.columns), df.columns))
    div()

    # check support and indexing
    columns = Series([f for f in df.columns[:10]])
    print("> ncols: %d, type: %s, ex: %s" %
          (len(columns), type(columns), columns))
    idx = [1, 3, 5]
    support = [False] * len(columns)
    for i in idx:
        support[i] = True
    print("> idx: %s -> features:\n %s" % (idx, columns[idx]))
    print("> support: %s -> features:\n %s" % (support, columns[support]))

    return
Example #14
0
def t_filter():
    from numpy.random import randn
    div('(1) demo dropping rows with nan ...', border=1)
    df = DataFrame(randn(10, 3),
                   columns=list('ABC'),
                   index=pd.date_range('20130101', periods=10))
    df.ix[6, 'A'] = np.nan
    df.ix[6, 'B'] = np.nan
    df.ix[2, 'A'] = np.nan
    df.ix[4, 'B'] = np.nan
    print("> show row 0-5:\n%s\n" % df.iloc[0:6])
    df2 = df.iloc[0:6].dropna()
    print("> after dropping rows with nan:\n%s\n" % df2)

    div('(2) filtering out data without NaN ...', border=1)
    df = pd.DataFrame({
        'movie': ['thg', 'thg', 'mol', 'mol', 'lob', 'lob'],
        'rating': [3., 4., 5., np.nan, np.nan, np.nan],
        'name': ['John', np.nan, 'N/A', 'Graham', np.nan, np.nan]
    })
    print("> df:\n%s\n" % df)
    nbs = df['name'].str.extract('^(N/A|NA|na|n/a)')  # standardize nan data
    nms = df[(df['name'] != nbs)]
    print('> nms:\n%s\n' % nms)
    thresh = 2
    nms = nms.dropna(thresh=thresh)
    print('> after dropping rows with at least 2 %d nan:\n%s\n' %
          (thresh, nms))
    div()
    nms01 = nms[nms.name.notnull()]
    print('> dropped rows with na in name col:\n%s\n' % nms01)
    # nms02 = nms[np.isfinite(nms['name'])]
    #  => error: ufunc 'isfinite' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''
    # print('> dropped rows with na in name col:\n%s\n' % nms02)
    nms03 = nms[pd.notnull(nms['name'])]
    print('> dropped rows with na in name col:\n%s\n' % nms03)

    return
Example #15
0
def plot_from_csv(path, ax=None, cmap=None, legend=True):
    if not isinstance(path, str):
        sys.stderr.write(f'the path {path} is not a string\n')
    if not os.path.isfile(path):
        sys.stderr.write(f'the file {path} does not exist\n')
    if not path.endswith('.csv'):
        sys.stderr.write(f'the file {path} is not a csv file\n')
    try:
        df = pd.read_csv(path)
    except:
        sys.stderr.write(f'Failed to read the csv file\n')
    if cmap is None:
        cmap = DEFAULT_CMAP
    colors = make_n_colors(len(df.Band.unique()), cmap, DEFAULT_CMAP_VMIN,
                           DEFAULT_CMAP_VMAX)
    if ax is None:
        ax = make_canvas()
    if 'N' in df.columns:
        x, y, N, iBand = [], [], df.iloc[0].N, df.iloc[0].Band
    else:
        x, y, iBand = [], [], df.iloc[0].Band

    # define plot parameter and type
    if 'System([band density])' in df.columns:
        ind, plottype = 'den', 'scatter'
    elif 'den' in df.columns:
        ind, plottype = 'den', 'plot'
    elif 'E' in df.columns:
        ind, plottype = 'E', 'plot'
    elif 'dos_at_mu' in df.columns:
        ind, plottype = 'dos_at_mu', 'plot'
    else:
        sys.stderr.write('This file is not readable by toybands\n')

    # plot
    for i in range(len(df)):
        if 'N' in df.columns:
            if df.iloc[i].N != N or df.iloc[i].Band != iBand:
                N = df.iloc[i].N
                if plottype == 'plot':
                    if ind == 'E':
                        line, = ax.plot(x,
                                        div(y, e0),
                                        color=colors[int(iBand)],
                                        linewidth=DEFAULT_LW)
                    else:
                        line, = ax.plot(x,
                                        y,
                                        color=colors[int(iBand)],
                                        linewidth=DEFAULT_LW)
                else:
                    ax.scatter(x, y, color=colors[int(iBand)])
                if df.iloc[i].Band != iBand:
                    if plottype == 'plot':
                        line.set_label(f'Band{int(iBand)}')
                    iBand = df.iloc[i].Band
                x, y = [], []
            x.append(df.iloc[i].B)
            y.append(df.iloc[i][ind])

        else:
            if df.iloc[i].Band != iBand:
                line, = ax.plot(x,
                                y,
                                color=colors[int(iBand)],
                                linewidth=DEFAULT_LW)
                line.set_label(f'Band{int(iBand)}')
                iBand = df.iloc[i].Band
                x, y = [], []
                x.append(df.iloc[i].B)
                y.append(df.iloc[i][ind])
            else:
                x.append(df.iloc[i].B)
                y.append(df.iloc[i][ind])
    # plot the stored last curve
    if 'System([band density])' in df.columns:
        ax.scatter(x, y, color=colors[int(iBand)])
    else:
        line, = ax.plot(x, y, color=colors[int(iBand)], linewidth=DEFAULT_LW)
        line.set_label(f'Band{int(iBand)}')

    # label and legend
    ax.set_xlabel(DEFAULT_XLABEL)
    if ind == 'den':
        ax.set_ylabel(DEFAULT_NBPLOT_YLABEL)
    elif ind == 'E':
        ax.set_ylabel(DEFAULT_EBPLOT_YLABEL)
    elif ind == 'dos_at_mu':
        ax.set_ylabel(DEFAULT_DOSBPLOT_YLABEL)
    if legend and not 'System([band density])' in df.columns:
        ax.legend(loc=DEFAULT_LEGEND_LOC, bbox_to_anchor=DEFAULT_LEGEND_POS)

    super_save(filename=path.split('/')[-1].split('.')[-2] + '-replot')
Example #16
0
def leaderboard(request):
    context = dict()
    rnds_played_set = UserStat.objects.order_by('-rounds_played')
    total_score_set = UserStat.objects.order_by('-total_score')
    word_score_set = WordStat.objects.order_by('avg_score')
    # Only words played 10+ times are allowed on the leaderboard
    word_score_list = list(
        filter(lambda x: x.rounds_played >= 10, word_score_set))

    average_score_list = list(UserStat.objects.all())
    # Only players who have played 10+ rounds are allowed on the leaderboard
    # Furthermore players who have not played in a month are not ranked for avg score.
    average_score_list = filter(lambda x: x.rounds_played >= 10,
                                average_score_list)
    average_score_list = filter(
        lambda x: abs((date.today() - x.last_login).days) <= 30,
        average_score_list)
    average_score_list.sort(
        key=lambda x: utils.div(x.total_score, x.rounds_played), reverse=True)

    # If the user is authenticated, gather data on them in addition to just top players
    if request.user.is_authenticated():
        user_stat = utils.get_or_create_user_stat(request.user)
        context['rounds_played'] = user_stat.rounds_played
        context['total_score'] = round(user_stat.total_score, 2)
        context['avg_score'] = round(
            utils.div(user_stat.total_score, user_stat.rounds_played), 2)
        rounds_played_rank = utils.rank(rnds_played_set, user_stat,
                                        lambda x: x.rounds_played, 0,
                                        len(rnds_played_set) - 1)
        total_score_rank = utils.rank(total_score_set, user_stat,
                                      lambda x: x.total_score, 0,
                                      len(total_score_set) - 1)
        avg_score_rank = utils.rank(
            average_score_list, user_stat,
            lambda x: utils.div(x.total_score, x.rounds_played), 0,
            len(average_score_list) - 1)
        context['total_score_rank'] = str(total_score_rank +
                                          1) if total_score_rank >= 0 else '-'
        context['avg_score_rank'] = str(avg_score_rank +
                                        1) if avg_score_rank >= 0 else '-'
        context['rounds_played_rank'] = str(
            rounds_played_rank + 1) if rounds_played_rank >= 0 else '-'
        context['num_players'] = len(total_score_set)
    # Adding top x scores per category to context
    for i in range(0, 5):
        if i < len(word_score_list):
            stat = word_score_list[i]
            context['word_rank' + str(i + 1)] = stat.word
            context['word_score' + str(i + 1)] = round(stat.avg_score, 2)
            context['sem_rel' +
                    str(i + 1)] = ("Synonyms of " if stat.sem_rel == 'synonyms'
                                   else "Antonyms of " if stat.sem_rel
                                   == 'antonyms' else rel_a_map[stat.sem_rel] +
                                   utils.get_det(stat.word, determiners))
    for i in range(0, 5):
        if i < len(rnds_played_set):
            stat = rnds_played_set[i]
            context['rnd_rank' + str(i + 1)] = stat.user.username
            context['rnd_score' + str(i + 1)] = stat.rounds_played
    for i in range(0, 5):
        if i < len(total_score_set):
            stat = total_score_set[i]
            context['ttl_rank' + str(i + 1)] = stat.user.username
            context['ttl_score' + str(i + 1)] = round(stat.total_score, 2)
    for i in range(0, 10):
        if i < len(average_score_list):
            stat = average_score_list[i]
            context['avg_rank' + str(i + 1)] = stat.user.username
            val = 0
            if (stat.rounds_played != 0):
                val = stat.total_score / stat.rounds_played
            context['avg_score' + str(i + 1)] = round(val, 2)
    return render(request, 'leaderboard.html', context)
Example #17
0
        df[col] = df[col].astype(typ)
        return df
    except KeyError, e:
        msg = "convert_dtype> column %s does not exist" % col
        if not no_op:
            raise KeyError, msg
    except Exception, e:
        try:
            try:
                n0 = df.shape[0]
                msg = "convert_dtype> 'df[%s]' may contain NaN values. Try removing rows with NaNs\n" % col
                df = df[pd.notnull(df[col])]
                msg += "convert_dtype> size of df %d -> %d\n" % (n0,
                                                                 df.shape[0])
                df[col] = df[col].astype(typ)
                if _debug: div(message=msg, symbol='*', adaptive=False)
                return df
            except:
                msg = "convert_dtype> Could not cast 'df[%s]' to type: %s\n" % (
                    col, typ)
                # if _debug > 0:
                msg += "  + %s\n" % e
                msg += "  + value:\n%s" % df.head(3)

        except Exception, e:
            msg = "convert_dtype> %s\n" % e
            msg += "  + Invalid dataframe:\n%s" % str(df.head())
    if _debug: print msg
    if no_op:
        # do nothing
        return df
Example #18
0
def subset3(path,
            n_samples=None,
            balanced=True,
            ratio=0.8,
            min_ratio=0.05,
            sep=',',
            shuffle=True,
            verbose=False,
            save_=True,
            verify_=False):
    """
    Take a (balanced) subset of a training set data. 
    """
    from utils import div
    import sys
    from pprint import pprint
    if sep != Params.sep_tset:
        div(message="Warning: a training set data are usually '%s'-separated."
            % Params.sep_tset,
            symbol='~')

    root, fname = os.path.dirname(path), os.path.basename(path)
    df = load_df(_file=path, from_csv=True, sep=sep)
    params = profile(df)
    nrow, ncol = params['nrow'], params['ncol']
    assert (nrow, ncol) == df.shape and nrow >= 2

    # determine n_samples
    n_total = nrow
    if n_samples is None: n_samples = math.floor(n_total * ratio)
    min_samples = math.floor(n_total *
                             min_ratio) if min_ratio is not None else 0
    if n_samples < min_samples:
        if verbose:
            print('subset3> n_samples:%d may be too small; adjusting to %d' %
                  (n_samples, min_samples))
        n_samples = min_samples

    # how many instances should a label have?
    labels = params['labels']
    dfl = []
    dimtb = {}
    min_nrow, max_nrow = (np.inf, -np.inf)
    n_avg = int(
        n_samples /
        (len(labels) +
         0.0))  # each label should have this many instances if obtainable
    use_min_nrow = False
    for label in labels:
        n = params[str(label)]
        dimtb[str(label)] = n  # each label has n instances
        if balanced and n < n_avg:  # if any of the num. of instances does not meet the average, then 'optimal' balanced set is not possible
            use_min_nrow = True
            if verbose:
                msg = 'Warning: %s-labeled data are not sufficient to reach a balanced data set given n_samples=%d' % (
                    label, n_samples)
                div(message=msg, symbol='%')
        if n <= min_nrow: min_nrow = n
        if n >= max_nrow: max_nrow = n
    if verbose:
        msg = "subset3> n_avg: %d, min_nrow: %d, max_nrow: %d" % (
            n_avg, min_nrow, max_nrow)
        div(message=msg, symbol='~')
    for label in labels:
        try:
            subdf = df[df[Params.target_field] == label]
        except:
            print('subset3> label %s is not in %s?' %
                  (label, [c for c in df.columns]))
            sys.exit(1)
        if verbose:
            print(
                'subset3> prior to slicing, subset of df with label=%s has dim: %s'
                % (label, str(subdf.shape)))
        if shuffle: subdf = subdf.reindex(np.random.permutation(subdf.index))
        if use_min_nrow:
            subdf = subdf[:min_nrow]
        else:
            subdf = subdf[:n_avg]
        dfl.append(subdf)

    df = pd.concat(dfl, ignore_index=True)
    if shuffle: df = df.reindex(np.random.permutation(df.index))
    if verbose:
        print('subset3> slicing and combining completed, dim of new tset: %s' %
              str(df.shape))
    if verify_:
        params = profile(df)
        print("subset3> profile of new training set:")
        div()
        pprint(params)
        div()
        if balanced:
            nref = params[str(labels[0])]
            for label in labels[1:]:
                assert nref == params[str(
                    label)], "imbalanced training data. see profile above."

    if save_:
        # rename file
        fname_prefix, ext = os.path.splitext(fname)
        fname = (fname_prefix + '-%s' % n_samples) + ext
        path = os.path.join(root, fname)
        if verbose:
            print('subset3> saving new tset of dim %s to %s' %
                  (str(df.shape), path))
        save_df(df, _file=path, is_data=True, to_csv=True, sep=Params.sep_tset)

    return df
Example #19
0
def t_hierarchy():

    import utils, configure

    X = [481.01, '112', '130.09']
    Y = [481.0, '112.0', '130.9']
    for i, x in enumerate(X):
        if isA(x, Y[i]):
            print('> %s is a %s' % (x, Y[i]))

    # root analysis


#     targets = ['047.8','112.2','038.10','038.11','112.5','047.9','038.19','090.9','135','041.9','041.6',
# '090.1','138','041.3','001.1','017.00','011.93','112.4','003.0','094.9','008.45',
# '054.2','070.71','052.7','088.81','041.7','027.0','131.01','041.89','041.85','049.9',
# '046.3','009.2','009.3','009.0','009.1','038.2','117.3','038.0','091.3','117.5',
# '038.8','117.9','054.10','041.19','136.3','041.10','041.11','031.2','031.0','031.9',
# '031.8','112.3','033.9','041.02','041.01','041.00','079.0','079.6','041.09','079.4',
# '054.13','070.51','007.1','070.32','070.30','038.3','038.49','038.43','038.42','038.40',
# '054.79','053.19','110.0','110.3','137.0','075','057.9','112.89','112.84','097.9',
# '097.1','078.5','078.0','070.70','054.3','099.9','127.4','005.9','136.9','053.9',
# '054.11','083.2','054.19','481','130.7','036.0','130.0','008.69','053.79','087.9',
# '008.61','111.9']

    otra = '112.1,112.0,112.9,072.9,096,056.9,041.8,098.86,041.4,041.5,041.2,041.0,011.12,091.0,026.9,001.9,091.9,123.1,003.1,074.0,003.9,074.8,077.99,098.0,008.6,098.2,054.0,054.6,008.8,099.40,099.41,052.9,129,088.82,057.0,039.9,008.43,010.10,131.9,039.1,133.0,079.53,040.82,099.50,099.53,099.55,099.54,039.8,090.2,035,092.9,010.01,010.00,041.1,094.0,131.00,079.51,079.83,041.86,131.09,079.88,079.89,049.8,048,042,038.1,038.9,094.89,136.1,136.8,031.1,079.98,066.3,139.8,033.0,070.54,041.04,041.03,074.3,079.2,079.1,070.22,054.40,054.43,007.4,045.90,007.2,070.59,061,078.19,077.8,070.31,078.10,078.11,004.9,046.1,038.44,038.41,058.10,053.12,053.11,084.0,084.6,110.1,070.41,110.2,110.5,110.4,110.9,110.8,054.8,134.0,054.9,010.90,057.8,078.89,078.88,040.0,055.9,112.81,078.8,097.0,078.2,078.1,111.0,002.0,127.2,099.1,099.0,099.3,054.12,053.21,070.3,053.0,034.0,034.1,130.9,111.8,036.2,132.9,088.8,008.62,132.2,132.1,132.0,088.0'
    otra = otra.split(',')

    # get annotated codes
    gfiles = [
        'gold_candidates_neg_random_gh.csv',
        'gold_candidates_pos_random_gh.csv'
    ]
    acodes = set()
    sdict = {}
    for i, f in enumerate(gfiles):
        fp = os.path.join('data-gold', f)
        df = pd.read_csv(fp,
                         sep='|',
                         header=0,
                         index_col=False,
                         error_bad_lines=True,
                         dtype={'icd9': str})
        codes = df['icd9'].values
        sdict[i] = codes
        acodes.update(codes)
    n_annotated = len(acodes)
    print('info> n_annotated: %d' % n_annotated)  # 54
    overlap = set(sdict[0]).intersection(sdict[1])
    print('info> overlap? size: %d, %s' % (len(overlap), overlap))

    total_set = configure.Params.code_set
    print('info> size of total targets: %d' % len(total_set))
    targets = list(set(total_set) - set(acodes))
    n_remaining = len(targets)
    n_targets = 100
    n_to_draw = n_targets - n_annotated
    print('info> number of remaining: %d but only need %d' %
          (n_remaining, n_to_draw))
    # otra = configure.Params.otra

    cur, freespots = evalRoot(targets, scope=None, verbose=True)
    print('> n_roots:%d, current roots:\n%s\n' % (len(cur), cur.keys()))
    utils.div()
    display(cur)
    n = n_to_draw  # setting too high may take time for UpSetR to finish
    candidates = utils.sample_hashtable(cur, n_sample=n)
    print('> sample existing %d=?=%d candidates:\n%s\n' %
          (n, len(candidates), list(candidates)))

    print('-' * 100)
    acodes.update(candidates)
    wanted = list(acodes)
    print('info> %d candidates:\n%s\n' % (len(wanted), wanted))
    print('-' * 100)

    newcodes = assignRoot(otra, freespots)
    print('> suggested pick:\n%s\n' % newcodes)

    n = 10
    candidates = utils.sample_hashtable(newcodes, n_sample=n)
    print('> sample %d=?=%d candidates:\n%s\n' %
          (n, len(candidates), list(candidates)))

    return
Example #20
0
def random_forest(N, M, F, table, attr_indexes, attr_domains, class_index,
                  strat_index):
    random.shuffle(table)
    test, remainder = test_remainder_stratified(table, strat_index)
    boot_samples = []
    attr_subsets = []
    trees = []
    accuracies = []
    trees = []
    #setup boot straps
    for _ in range(N):
        attr_subsets.append(utils.rand_attributes(attr_indexes, F))
        boot = utils.bootstrap(remainder)
        valid = []
        #build validator set
        for item in remainder:
            if item not in boot:
                valid.append(item)
        boot_samples.append([boot, valid])

    #build trees
    for i in range(N):
        #returns predictions, tree
        pred, tree = train_test_tree(boot_samples[i][0], boot_samples[i][1],
                                     attr_subsets[i], attr_domains,
                                     class_index)
        correct = 0
        for j in range(len(boot_samples[i][1])):
            if boot_samples[i][1][j][class_index] == pred[j]:
                correct += 1
        trees.append([tree, utils.div(correct, len(boot_samples[i][1]))])

    trees.sort(key=lambda x: x[1])
    mtrees = trees[len(trees) - M:]

    #predict and determine accuracy
    print("     grouping test set")
    minutes, groups = utils.groupBy(test, 1)
    print("     running classifier")
    accuracies = []
    overall_correct = 0
    total_instance = len(test)

    for count in range(len(minutes)):
        correct = 0
        for item in groups[count]:
            votes = []
            for tree in mtrees:
                votes.append(classify_tdidt(tree[0], item))
            vote = utils.majority_vote(votes)
            if item[class_index] == vote:
                correct += 1
                overall_correct += 1
        accuracies.append([
            minutes[count], correct / len(groups[count]), correct,
            len(groups[count])
        ])

    print("Sorting accuracies")
    accuracies.sort(key=lambda x: x[0])
    count = 0
    for item in accuracies:
        print('Minute: ', item[0])
        print('     Accuracy: ', item[1])
        print('     Correct: ', item[2])
        print('     Instances: ', item[3])
        print()
        count += 1
    print("Overll Accurracy: ", overall_correct / total_instance)
    print("Instances: ", total_instance)
    print("Correct: ", overall_correct)

    return accuracies
Example #21
0
def single_match(raw, mid):
    m = Match(id=mid)
    match_incr()
    if raw[0][0]['officl'] == "1" and raw[0][0]['cas'] == "1":
        # m.mode = 'cs'
        m.mode = 2
    elif raw[0][0]['officl'] == "1" and raw[0][0]['cas'] == "0":
        # m.mode = "rnk"
        m.mode = 1
    else:
        # m.mode = "acc"
        m.mode = 3
    m.version = raw[3][0]['version']
    m.map_used = raw[3][0]['map']
    m.length = raw[3][0]['time_played']
    # '2014-07-27 01:31:18'
    unaware_date = datetime.strptime(raw[3][0]['mdt'], '%Y-%m-%d %H:%M:%S')
    m.date = utc.localize(unaware_date)
    pitems = {}
    for p in raw[1]:
        items = []
        for item in range(1, 7):
            if p['slot_' + str(item)]:
                items.append(int(p['slot_' + str(item)]))
        pitems[p['account_id']] = items
    for p in raw[2]:
        if p['account_id'] not in pitems:
            pitems[p['account_id']] = []
        m.players.append(PlayerMatch(
            player_id=int(p['account_id']),
            nickname=p['nickname'],
            clan_id=int(p['clan_id']),
            hero_id=int(p['hero_id']),
            position=int(p['position']),
            items=pitems[p['account_id']],
            team=int(p['team']),
            level=int(p['level']),
            win=bool(int(p['wins'])),
            concedes=int(p['concedes']),
            concedevotes=int(p['concedevotes']),
            buybacks=int(p['buybacks']),
            discos=int(p['discos']),
            kicked=int(p['kicked']),
            mmr_change=float(p['amm_team_rating']),
            herodmg=int(p['herodmg']),
            kills=int(p['herokills']),
            assists=int(p['heroassists']),
            deaths=int(p['deaths']),
            kdr=div(p['herokills'], p['deaths']),
            goldlost2death=int(p['goldlost2death']),
            secs_dead=int(p['secs_dead']),
            cs=int(p['teamcreepkills']) + int(p['neutralcreepkills']),
            bdmg=p['bdmg'],
            denies=p['denies'],
            exp_denied=p['exp_denied'],
            gpm=divmin(p['gold'], m.length),
            xpm=divmin(p['exp'], m.length),
            apm=divmin(p['actions'], m.length),
            consumables=int(p['consumables']),
            wards=int(p['wards'])
        ))
    return m
Example #22
0
def update_player(nickname, p=None):
    raw = get_json('/player_statistics/all/nickname/' + nickname)
    if raw is None or int(raw['account_id']) == 0:
        return None
    if p is None:
        p = Player(id=int(raw['account_id']))
        not_exists = True
    else:
        not_exists = False
    p.nickname = raw['nickname'].lower()
    var = [
        'rnk_games_played', 'rnk_wins', 'rnk_losses', 'rnk_concedes', 'rnk_concedevotes', 'rnk_buybacks', 'rnk_discos', 'rnk_kicked', 'rnk_herokills', 'rnk_herodmg', 'rnk_heroexp', 'rnk_herokillsgold', 'rnk_heroassists', 'rnk_deaths', 'rnk_goldlost2death', 'rnk_secs_dead', 'rnk_teamcreepkills', 'rnk_teamcreepdmg', 'rnk_teamcreepexp', 'rnk_teamcreepgold', 'rnk_neutralcreepkills', 'rnk_neutralcreepdmg', 'rnk_teamcreepexp', 'rnk_neutralcreepgold', 'rnk_bdmg', 'rnk_razed', 'rnk_bgold', 'rnk_denies', 'rnk_exp_denied', 'rnk_gold', 'rnk_gold_spent', 'rnk_exp', 'rnk_actions', 'rnk_secs', 'rnk_consumables', 'rnk_wards', 'rnk_level', 'rnk_level_exp', 'rnk_time_earning_exp', 'rnk_bloodlust', 'rnk_doublekill', 'rnk_triplekill', 'rnk_quadkill', 'rnk_annihilation', 'rnk_ks3', 'rnk_ks4', 'rnk_ks5', 'rnk_ks6', 'rnk_ks7', 'rnk_ks8', 'rnk_ks9', 'rnk_ks10', 'rnk_ks15', 'rnk_smackdown', 'rnk_humiliation', 'rnk_nemesis', 'rnk_retribution', 'cs_games_played', 'cs_wins', 'cs_losses', 'cs_concedes', 'cs_concedevotes', 'cs_buybacks', 'cs_discos', 'cs_kicked', 'cs_herokills', 'cs_herodmg', 'cs_heroexp', 'cs_herokillsgold', 'cs_heroassists', 'cs_deaths', 'cs_goldlost2death', 'cs_secs_dead', 'cs_teamcreepkills', 'cs_teamcreepdmg', 'cs_teamcreepexp', 'cs_teamcreepgold', 'cs_neutralcreepkills', 'cs_neutralcreepdmg', 'cs_teamcreepexp', 'cs_neutralcreepgold',
        'cs_bdmg', 'cs_bdmgexp', 'cs_razed', 'cs_bgold', 'cs_denies', 'cs_exp_denied', 'cs_gold', 'cs_gold_spent', 'cs_exp', 'cs_actions', 'cs_secs', 'cs_consumables', 'cs_wards', 'cs_level', 'cs_level_exp', 'cs_time_earning_exp', 'cs_bloodlust', 'cs_doublekill', 'cs_triplekill', 'cs_quadkill', 'cs_annihilation', 'cs_ks3', 'cs_ks4', 'cs_ks5', 'cs_ks6', 'cs_ks7', 'cs_ks8', 'cs_ks9', 'cs_ks10', 'cs_ks15', 'cs_smackdown', 'cs_humiliation', 'cs_nemesis', 'cs_retribution', 'acc_games_played', 'acc_wins', 'acc_losses', 'acc_concedes', 'acc_concedevotes', 'acc_buybacks', 'acc_discos', 'acc_kicked', 'acc_herokills', 'acc_herodmg', 'acc_heroexp', 'acc_herokillsgold', 'acc_heroassists', 'acc_deaths', 'acc_goldlost2death', 'acc_secs_dead', 'acc_teamcreepkills', 'acc_teamcreepdmg', 'acc_teamcreepexp', 'acc_teamcreepgold', 'acc_neutralcreepkills', 'acc_neutralcreepdmg', 'acc_teamcreepexp', 'acc_neutralcreepgold', 'acc_bdmg', 'acc_bdmgexp', 'acc_razed', 'acc_bgold', 'acc_denies', 'acc_exp_denied', 'acc_gold', 'acc_gold_spent', 'acc_exp', 'acc_actions', 'acc_secs', 'acc_consumables', 'acc_wards', 'acc_time_earning_exp', 'acc_bloodlust', 'acc_doublekill', 'acc_triplekill', 'acc_quadkill', 'acc_annihilation', 'acc_ks3', 'acc_ks4', 'acc_ks5', 'acc_ks6', 'acc_ks7', 'acc_ks8', 'acc_ks9', 'acc_ks10', 'acc_ks15', 'acc_smackdown', 'acc_humiliation', 'acc_nemesis', 'acc_retribution'
    ]
    for v in var:
        setattr(p, v, int(raw[v]))
    p.updated = datetime.utcnow()
    p.rnk_mmr = float(raw['rnk_amm_team_rating'])
    p.rnk_avg_kills = div(p.rnk_herokills, p.rnk_games_played)
    p.rnk_avg_deaths = div(p.rnk_deaths, p.rnk_games_played)
    p.rnk_avg_assists = div(p.rnk_heroassists, p.rnk_games_played)
    p.rnk_avg_creeps = div((p.rnk_neutralcreepkills + p.rnk_teamcreepkills), p.rnk_games_played)
    p.rnk_avg_denies = div(p.rnk_denies, p.rnk_games_played)
    rnk_minutes = div(p.rnk_secs, 60)
    p.rnk_avg_xpm = div(p.rnk_exp, rnk_minutes)
    p.rnk_avg_apm = div(p.rnk_actions, rnk_minutes)
    p.rnk_avg_gpm = div(p.rnk_gold, rnk_minutes)
    p.rnk_avg_consumables = div(p.rnk_consumables, p.rnk_games_played)
    p.rnk_avg_time = div(rnk_minutes, p.rnk_games_played)
    p.rnk_winpercent = div(p.rnk_wins, p.rnk_games_played)
    p.rnk_kdr = div(p.rnk_herokills, p.rnk_deaths)
    p.rnk_avg_wards = div(p.rnk_wards, p.rnk_games_played)
    p.rnk_kadr = div((p.rnk_herokills + p.rnk_heroassists), p.rnk_deaths)
    try:
        p.rnk_tsr = ((p.rnk_herokills / p.rnk_deaths / 1.15) * 0.65) + ((p.rnk_heroassists / p.rnk_deaths / 1.55) * 1.20) + (((p.rnk_wins / (p.rnk_wins + p.rnk_losses)) / 0.55) * 0.9) + (((p.rnk_gold / p.rnk_secs * 60) / 230) * 0.35) + ((((p.rnk_exp / p.rnk_time_earning_exp * 60) / 380)) * 0.40) + (
            (((((p.rnk_denies / p.rnk_games_played) / 12)) * 0.70) + ((((p.rnk_teamcreepkills / p.rnk_games_played) / 93)) * 0.50) + ((p.rnk_wards / p.rnk_games_played) / 1.45 * 0.30)) * (37.5 / (p.rnk_secs / p.rnk_games_played / 60)))
    except:
        p.rnk_tsr = 0
    p.cs_mmr = float(raw['cs_amm_team_rating'])
    p.cs_avg_kills = div(p.cs_herokills, p.cs_games_played)
    p.cs_avg_deaths = div(p.cs_deaths, p.cs_games_played)
    p.cs_avg_assists = div(p.cs_heroassists, p.cs_games_played)
    p.cs_avg_creeps = div((p.cs_neutralcreepkills + p.cs_teamcreepkills), p.cs_games_played)
    p.cs_avg_denies = div(p.cs_denies, p.cs_games_played)
    cs_minutes = div(p.cs_secs, 60)
    p.cs_avg_xpm = div(p.cs_exp, cs_minutes)
    p.cs_avg_apm = div(p.cs_actions, cs_minutes)
    p.cs_avg_gpm = div(p.cs_gold, cs_minutes)
    p.cs_avg_consumables = div(p.cs_consumables, p.cs_games_played)
    p.cs_avg_time = div(cs_minutes, p.cs_games_played)
    p.cs_winpercent = div(p.cs_wins, p.cs_games_played)
    p.cs_kdr = div(p.cs_herokills, p.cs_deaths)
    p.cs_avg_wards = div(p.cs_wards, p.cs_games_played)
    p.cs_kadr = div((p.cs_herokills + p.cs_heroassists), p.cs_deaths)
    try:
        p.cs_tsr = ((p.cs_herokills / p.cs_deaths / 1.15) * 0.65) + ((p.cs_heroassists / p.cs_deaths / 1.55) * 1.20) + (((p.cs_wins / (p.cs_wins + p.cs_losses)) / 0.55) * 0.9) + (((p.cs_gold / p.cs_secs * 60) / 230) * 0.35) + ((((p.cs_exp / p.cs_time_earning_exp * 60) / 380)) * 0.40) + (
            (((((p.cs_denies / p.cs_games_played) / 12)) * 0.70) + ((((p.cs_teamcreepkills / p.cs_games_played) / 93)) * 0.50) + ((p.cs_wards / p.cs_games_played) / 1.45 * 0.30)) * (37.5 / (p.cs_secs / p.cs_games_played / 60)))
    except:
        p.cs_tsr = 0
    p.acc_mmr = float(raw['acc_pub_skill'])
    p.acc_avg_kills = div(p.acc_herokills, p.acc_games_played)
    p.acc_avg_deaths = div(p.acc_deaths, p.acc_games_played)
    p.acc_avg_assists = div(p.acc_heroassists, p.acc_games_played)
    p.acc_avg_creeps = div((p.acc_neutralcreepkills + p.acc_teamcreepkills), p.acc_games_played)
    p.acc_avg_denies = div(p.acc_denies, p.acc_games_played)
    acc_minutes = div(p.acc_secs, 60)
    p.acc_avg_xpm = div(p.acc_exp, acc_minutes)
    p.acc_avg_apm = div(p.acc_actions, acc_minutes)
    p.acc_avg_gpm = div(p.acc_gold, acc_minutes)
    p.acc_avg_consumables = div(p.acc_consumables, p.acc_games_played)
    p.acc_avg_time = div(acc_minutes, p.acc_games_played)
    p.acc_winpercent = div(p.acc_wins, p.acc_games_played)
    p.acc_kdr = div(p.acc_herokills, p.acc_deaths)
    p.acc_avg_wards = div(p.acc_wards, p.acc_games_played)
    p.acc_kadr = div((p.acc_herokills + p.acc_heroassists), p.acc_deaths)
    try:
        p.acc_tsr = ((p.acc_herokills / p.acc_deaths / 1.15) * 0.65) + ((p.acc_heroassists / p.acc_deaths / 1.55) * 1.20) + (((p.acc_wins / (p.acc_wins + p.acc_losses)) / 0.55) * 0.9) + (((p.acc_gold / p.acc_secs * 60) / 230) * 0.35) + ((((p.acc_exp / p.acc_time_earning_exp * 60) / 380)) * 0.40) + (
            (((((p.acc_denies / p.acc_games_played) / 12)) * 0.70) + ((((p.acc_teamcreepkills / p.acc_games_played) / 93)) * 0.50) + ((p.acc_wards / p.acc_games_played) / 1.45 * 0.30)) * (37.5 / (p.acc_secs / p.acc_games_played / 60)))
    except:
        p.acc_tsr = 0
    if not_exists:
        db.session.add(p)
        db.session.commit()
        avatar.delay(p.id)
        player_incr()
    db.session.commit()
    return p