Exemple #1
0
def temporal_join(u, v, v_name, key, time_col):
    timer = Timer()

    window_size = CONSTANT.WINDOW_SIZE if len(u) * CONSTANT.WINDOW_RATIO < CONSTANT.WINDOW_SIZE \
        else int(len(u) * CONSTANT.WINDOW_RATIO)
    hash_max = CONSTANT.HASH_MAX if len(u) / CONSTANT.HASH_MAX > CONSTANT.HASH_BIN \
        else int(len(u) / CONSTANT.HASH_BIN)

    # window_size = CONSTANT.WINDOW_SIZE
    # hash_max = CONSTANT.HASH_MAX

    if isinstance(key, list):
        assert len(key) == 1
        key = key[0]

    tmp_u = u[[time_col, key]]
    timer.check("select")

    tmp_u = pd.concat([tmp_u, v], keys=['u', 'v'], sort=False)
    timer.check("concat")

    # rehash_key = f'rehash_{key}'
    # tmp_u[rehash_key] = tmp_u[key].apply(lambda x: hash(x) % hash_max)
    # timer.check("rehash_key")

    tmp_u.sort_values(time_col, inplace=True)
    timer.check("sort")

    agg_funcs = {
        col: Config.aggregate_op(col)
        for col in v if col != key and not col.startswith(CONSTANT.TIME_PREFIX)
        and not col.startswith(CONSTANT.MULTI_CAT_PREFIX)
    }

    # tmp_u = tmp_u.groupby(rehash_key).rolling(window=5).agg(agg_funcs)
    tmp_u = tmp_u.rolling(window=window_size).agg(agg_funcs)

    timer.check("group & rolling & agg")

    # tmp_u.reset_index(0, drop=True, inplace=True)  # drop rehash index
    # timer.check("reset_index")

    tmp_u.columns = tmp_u.columns.map(
        lambda a:
        f"{CONSTANT.NUMERICAL_PREFIX}{a[1].upper()}_ROLLING5({v_name}.{a[0]})")

    if tmp_u.empty:
        log("empty tmp_u, return u")
        return u

    ret = pd.concat([u, tmp_u.loc['u']], axis=1, sort=False)
    timer.check("final concat")

    del tmp_u

    return ret
Exemple #2
0
class JumpTimer(object):
	'''When hitting the ground wait a bit before jumping again. When leaving the ground wait a bit before forbidding jumps'''
	def __init__(self):
		object.__init__(self)
		self.timer = None
		self.wait_timer = None
	
	def set(self):
		# Add a 100ms timer
		self.timer = Timer(config.getint('Physics','jump_time'))
	
	def update(self, ms):
		# Remove timer when time runs out
		if self.timer and self.timer.check(ms):
			self.unset()

	def jump_allowed(self):
		return self.timer is not None

	def unset(self):
		self.timer = None
    except KeyError:
        Glof = None

    # Load the missense burdens
    try:
        Gmiss = bloader_missense.genotypes_by_id(gene).astype(float)
    except KeyError:
        Gmiss = None

    call(Glof, 'pLOF')
    call(Gmiss, 'missense')

    if (Glof is None) or (Gmiss is None):
        call(None, 'mrg')
    else:
        G = np.maximum(Glof, Gmiss)
        call(G, 'mrg')

    return pval_dict


timer = Timer()
results = pd.DataFrame.from_dict([test_gene(gene)
                                  for gene in genes]).set_index('gene')

t = timer.check()
logging.info('{} genes tested in {:.2f} minutes.'.format(
    len(results), t / 60.))

results.to_csv(snakemake.output.results_tsv, sep='\t', index=True, na_rep='.')
Exemple #4
0
def temporal_join(u, v, v_name, key, time_col):
    timer = Timer()

    if isinstance(key, list):
        key = key[0]

    tmp_u = u[[time_col, key]]
    timer.check("select")

    tmp_u = pd.concat([tmp_u, v], keys=['u', 'v'], sort=False)
    timer.check("concat")

    rehash_key = f'rehash_{key}'
    tmp_u[rehash_key] = tmp_u[key].apply(lambda x: hash(x) % CONSTANT.HASH_MAX)
    timer.check("rehash_key")

    tmp_u.sort_values(time_col, inplace=True)
    timer.check("sort")

    agg_funcs = {
        col: Config.aggregate_op(col)
        for col in v if col != key and not col.startswith(CONSTANT.TIME_PREFIX)
        and not col.startswith(CONSTANT.MULTI_CAT_PREFIX)
    }

    tmp_u = tmp_u.groupby(rehash_key).rolling(5).agg(agg_funcs)
    timer.check("group & rolling & agg")

    tmp_u.reset_index(0, drop=True, inplace=True)  # drop rehash index
    timer.check("reset_index")

    def rename_func(x):
        dtype = CONSTANT.CATEGORY_PREFIX if x[1].upper() in [
            'MODE'
        ] else CONSTANT.NUMERICAL_PREFIX
        return f"{dtype}{x[1].upper()}_ROLLING10({v_name}.{x[0]})"

    tmp_u.columns = tmp_u.columns.map(rename_func)

    if tmp_u.empty:
        log("empty tmp_u, return u")
        return u

    ret = pd.concat([u, tmp_u.loc['u']], axis=1, sort=False)
    timer.check("final concat")

    del tmp_u

    return ret
                G1 = np.concatenate([G1, G2], axis=1)
                call_score(G1, 'linwcollapsed_cLOF')
                call_lrt(G1, 'linwcollapsed_cLOF')

        pval_dict['nCarrier'] = ncarrier
        pval_dict['cumMAC'] = cummac.sum()
        pval_dict['n_homo'] = n_homo
        pval_dict['n_snp'] = len(vids)
        pval_dict['n_cluster'] = len(set(clusters))

        # sanity check
        assert pval_dict['cumMAC'] >= pval_dict['nCarrier'], 'Error: something is broken.'
        
        return pval_dict, sim_dict

    logging.info('loaders for chromosome {} initialized in {:.1f} seconds.'.format(chromosome, timer.check()))
    # run tests for all genes on the chromosome
    for _, region in regions.iterrows():

        try:
            gene_stats, sims = test_gene(region, i_gene)
        except GotNone:
            continue

        stats.append(gene_stats)
        simulations.append(sims)
        i_gene += 1
        if (i_gene % 100) == 0:
            logging.info('tested {} genes...'.format(i_gene))

    logging.info('all tests for chromosome {} performed in {:.2f} minutes.'.format(chromosome, timer.check()/60.))
            call_score(G2, 'LOF')
            call_lrt(G2, 'LOF')

            # merged (single variable)
            G1_burden_mrg = np.maximum(G2, G1_burden)
            call_score(G1_burden_mrg, 'linwb_mrgLOF')
            call_lrt(G1_burden_mrg, 'linwb_mrgLOF')

            # concatenated
            call_score(np.concatenate([G1, G2], axis=1), 'linwcollapsed_cLOF')
            call_lrt(np.concatenate([G1, G2], axis=1), 'linwcollapsed_cLOF')
            pval_dict['linwcollapsed_cLOF']['cluster_id'] = sorted(set(clusters)) + [-1] # -1 indicates the LOF cluster

        return pval_dict, called

    logging.info('loaders for chromosome {} initialized in {:.1f} seconds.'.format(chromosome, timer.check()))
    # run tests for all genes on the chromosome
    for _, region in regions.iterrows():

        try:
            gene_stats, called = test_gene(region, i_gene)
        except GotNone:
            continue


        # build the single-variant datafame
        single_var_columns = ['gene','vid','weights','MAC','nCarrier','pos','ref','alt','cosine_similarity','cluster_id']
        sv_df = pd.DataFrame.from_dict({k: gene_stats[k] for k in single_var_columns})

        sv_df['pv_score'] = gene_stats['variant_pvals']['pv_score'] # single-variant p-values estimated independently
        sv_df['coef_random'] = gene_stats['variant_pvals']['coef_random']  # single-variant coefficients estimated jointly after weighting
Exemple #7
0
                G1 = np.concatenate([G1[:, keep], G2], axis=1)
                call_score(G1,
                           'linw_cLOF',
                           vids=np.array(vids[keep].tolist() + [-1]))
                call_lrt(G1, 'linw_cLOF')
            else:
                logging.info(
                    'All Splice-AI variants for gene {} where already identified by the Ensembl variant effect predictor'
                    .format(interval['name']))

        return pval_dict, called

    logging.info(
        'loaders for chromosome {} initialized in {:.1f} seconds.'.format(
            chromosome, timer.check()))
    # run tests for all genes on the chromosome
    for _, region in regions.iterrows():

        try:
            gene_stats, called = test_gene(region, i_gene)
        except GotNone:
            continue

        # build the single-variant datafame
        single_var_columns = [
            'gene', 'vid', 'weights', 'MAC', 'nCarrier', 'not_LOF', 'DS_AG',
            'DS_AL', 'DS_DG', 'DS_DL', 'DP_AG', 'DP_AL', 'DP_DG', 'DP_DL'
        ]
        sv_df = pd.DataFrame.from_dict(
            {k: gene_stats[k]
            pval_dict['n_snp_notLOF'] = keep.sum()
            pval_dict['cumMAC_notLOF'] = cummac[keep].sum()
            pval_dict['nCarrier_notLOF'] = ncarrier_notLOF

            pval_dict['flag1'] = flag1
            pval_dict['flag2'] = flag2

            # sanity check
            assert pval_dict['cumMAC'] >= pval_dict[
                'nCarrier'], 'Error: something is broken.'

            return pval_dict, sim_dict

        logging.info(
            'loaders for chromosome {}, strand "{}" initialized in {:.1f} seconds.'
            .format(chromosome, strand, timer.check()))
        # run tests for all genes on the chromosome / strand
        for _, region in regions.iterrows():

            try:
                gene_stats, sims = test_gene(region, i_gene)
            except GotNone:
                continue

            stats.append(gene_stats)
            simulations.append(sims)
            i_gene += 1
            if (i_gene % 100) == 0:
                logging.info('tested {} genes...'.format(i_gene))
            # print(i_gene)
Exemple #9
0
def temporal_join(u, v, v_name, key, time_col):
    timer = Timer()

    if isinstance(key, list):
        assert len(key) == 1
        key = key[0]

    print ("-----tmp_u--------")
    
    tmp_u = u[[time_col, key]]
    print ("------Number of columns before concatenation---")
    print (len(tmp_u.columns))

    timer.check("select")
     
    tmp_u = pd.concat([tmp_u, v], keys=['u', 'v'], sort=False)


    print (len(tmp_u.columns))
    
    timer.check("concat")

    rehash_key = f'rehash_{key}'
    
    tmp_u[rehash_key] = tmp_u[key].apply(lambda x: hash(x) % CONSTANT.HASH_MAX)
    timer.check("rehash_key")
     
    tmp_u.sort_values(time_col, inplace=True)
    print ("----after sorting----")
    #print (tmp_u)
    print ("----after sorting----")
    timer.check("sort")
    
    agg_funcs = {col: Config.aggregate_op(col) for col in v if col != key
                 and not col.startswith(CONSTANT.TIME_PREFIX)
                 and not col.startswith(CONSTANT.MULTI_CAT_PREFIX)}
    print ("-----after group by operation-----")
    tmp_u = tmp_u.groupby(rehash_key).rolling(5, min_periods=1).agg(agg_funcs)
    #print (tmp_u)
    print ("-----after group by operation------")
    
    print ("-----tmp_u--------")
    timer.check("group & rolling & agg")
    
    tmp_u.reset_index(0, drop=True, inplace=True)  # drop rehash index
    timer.check("reset_index")
    
    tmp_u.columns = tmp_u.columns.map(lambda a:
        f"{CONSTANT.NUMERICAL_PREFIX}{a[1].upper()}_ROLLING5({v_name}.{a[0]})")

    if tmp_u.empty:
        log("empty tmp_u, return u")
        return u
    #print ("----number of rows in u-----")    
    #print (len(u.index))
    #print ("---number of rows in u-----")
    #print ("----number of rows in tmp_u-----")
    #print (len(tmp_u.index))
    #print ("----number of rows in tmp_u---------")

    ret = pd.concat([u, tmp_u.loc['u']], axis=1, sort=False)
    timer.check("final concat")

    del tmp_u

    return ret
        pval_dict['nCarrier_notLOF'] = ncarrier_notLOF
        pval_dict['cumMAC_notLOF'] = cummac[keep].sum()

        # 0.5 is the recommended spliceAI cutoff
        pval_dict['n_greater_05'] = np.sum(weights >= 0.5)
        pval_dict['n_greater_05_notLOF'] = np.sum(weights[keep] >= 0.5)

        # sanity check
        assert pval_dict['cumMAC'] >= pval_dict[
            'nCarrier'], 'Error: something is broken.'

        return pval_dict, sim_dict

    logging.info(
        'loaders for chromosome {} initialized in {:.1f} seconds.'.format(
            chromosome, timer.check()))
    # run tests for all genes on the chromosome
    for _, region in regions.iterrows():

        try:
            gene_stats, sims = test_gene(region, i_gene)
        except GotNone:
            continue

        stats.append(gene_stats)
        simulations.append(sims)
        i_gene += 1
        if (i_gene % 100) == 0:
            logging.info('tested {} genes...'.format(i_gene))
        # print(i_gene)
Exemple #11
0
def temporal_join(u, v, v_name, key, time_col, type_):

    if type_.split("_")[2] == 'many':
        timer = Timer()

        if isinstance(key, list):
            assert len(key) == 1
            key = key[0]

        tmp_u = u[[time_col, key]]
        timer.check("select")
        # print(tmp_u)

        tmp_u = pd.concat([v, tmp_u], keys=['v', 'u'], sort=False)

        # tmp_u = pd.concat([tmp_u, v], keys=['u', 'v'], sort=False)

        timer.check("concat")
        # print(tmp_u)

        rehash_key = f'rehash_{key}'

        hash_max_tmp = tmp_u[key].nunique()
        if hash_max_tmp > 3000:
            tmp = min(int(hash_max_tmp / 10), 3000)
            hash_max = tmp - tmp % 100
        else:
            hash_max = hash_max_tmp - hash_max_tmp % 100
        # hash_max = hash_max_tmp % 10000

        print("#" * 20)
        print(hash_max)
        print("#" * 20)

        tmp_u[rehash_key] = tmp_u[key].apply(lambda x: hash(x) % hash_max)

        # tmp_u[rehash_key] = tmp_u[key].apply(lambda x: hash(x) % CONSTANT.HASH_MAX)
        timer.check("rehash_key")
        # print(tmp_u)

        tmp_u.sort_values(time_col, inplace=True)
        timer.check("sort")
        # print(tmp_u)

        tmp_u = tmp_u.groupby(rehash_key).fillna(method='ffill')
        timer.check("group & ffill")
        # print(tmp_u)

        tmp_u = tmp_u.loc['u']
        # print(tmp_u)
        tmp_u.sort_index(inplace=True)
        # print(tmp_u)
        needed_cols = [
            col for col in tmp_u
            if col != key and col != rehash_key and col != time_col
        ]
        tmp_u = tmp_u[needed_cols]
        tmp_u.columns = tmp_u.columns.map(
            lambda a: f"{a.split('_', 1)[0]}_{v_name}_TMJOIN.{a}")
        timer.check("get tmp_u to cnocat")
        # print(tmp_u)

        if tmp_u.empty:
            log("empty tmp_u, return u")
            return u

        ret = pd.concat([u, tmp_u], axis=1, sort=False)
        timer.check("final concat")
        del tmp_u
    else:
        v = v.set_index(key)
        v.columns = v.columns.map(
            lambda a: f"{a.split('_', 1)[0]}_{v_name}_TOJOIN.{a}")
        ret = u.join(v, on=key)

    return ret
Exemple #12
0
def temporal_join(u, v, v_name, key, time_col):
    timer = Timer()

    if isinstance(key, list):
        assert len(key) == 1
        key = key[0]

    tmp_u = u[[time_col, key]]
    timer.check("select")
    #tmp_u = pd.concat([tmp_u, v], keys=['u', 'v'], sort=False)
    #timer.check("concat")

    tmp_u = pd.concat([tmp_u, v], keys=['u', 'v'], sort=False)
    timer.check("concat")

    rehash_key = f'rehash_{key}'
    tmp_u[rehash_key] = tmp_u[key].apply(lambda x: hash(x) % CONSTANT.HASH_MAX)
    timer.check("rehash_key")

    tmp_u.sort_values(time_col, inplace=True)
    timer.check("sort")

    agg_funcs = {
        col: Config.aggregate_op(col)
        for col in v
        if col != key and not col.startswith(CONSTANT.TIME_PREFIX) and not col.
        startswith(CONSTANT.MULTI_CAT_PREFIX) and 'mul_feature_' not in col
    }

    tmp_u_2 = tmp_u

    ##---------------FillNA-----------------
    #tmp_u = tmp_u.groupby(rehash_key).rolling(5).agg(agg_funcs)
    # tmp_u_2 = tmp_u

    tmp_u = tmp_u.groupby(key).agg(agg_funcs)

    timer.check("group & rolling & agg")

    # tmp_u.reset_index(0, drop=True, inplace=True)  # drop rehash index
    timer.check("reset_index")

    # tmp_u.columns = tmp_u.columns.map(lambda a:
    #    f"{CONSTANT.NUMERICAL_PREFIX}{a[1].upper()}_ROLLING5({v_name}.{a[0]})")

    if tmp_u.empty:
        log("empty tmp_u, return u")
        return u
    # print(u.shape,tmp_u.loc['u'].shape,tmp_u_2.shape)
    # ret = pd.concat([u, tmp_u_2], axis=1, sort=False)
    # ret = pd.concat([u, tmp_u.loc['u'],tmp_u_2], axis=1, sort=False)

    # ret = pd.concat([u, tmp_u.loc['u']], axis=1, sort=False)
    timer.check("final concat")

    tmp_u.columns = tmp_u.columns.map(lambda a: f"{v_name}.{a})")

    tmpindex = u.index

    u["index"] = list(range(0, len(tmpindex)))

    ret = pd.merge(u, tmp_u, left_index=True, on=[key])

    ret.sort_values("index", inplace=True)

    # ret.index = ret["index"]
    ret.index = tmpindex
    ret.drop("index", axis=1, inplace=True)

    # u[key] = u[key].apply(int)
    # v[key] = v[key].apply(int)
    # #u = u.join(v,on=key)
    # u = u.merge(v)
    # print(u)
    del tmp_u

    return u
Exemple #13
0
def temporal_join_jinnian(u, v, v_name, key, time_col, type_):
    if isinstance(key, list):
        assert len(key) == 1
        key = key[0]

    if type_.split("_")[2] == 'many':
        timer = Timer()

        tmp_u = u[[time_col, key]]
        timer.check("select")
        # tmp_u = pd.concat([tmp_u, v], keys=['u', 'v'], sort=False)
        # timer.check("concat")

        # tmp_u = pd.concat([tmp_u, v], keys=['u', 'v'], sort=False)
        for c in v.columns:
            if c != key and c.startswith(CONSTANT.CATEGORY_PREFIX):
                v[c] = v[c].apply(lambda x: int(x))
        tmp_u = pd.concat([tmp_u, v], sort=False)
        #tmp_u = v
        # print(tmp_u.index)
        # input()
        # print(tmp_u[key].nunique())
        # input()
        timer.check("concat")

        rehash_key = f'rehash_{key}'
        # tmp_u[rehash_key] = tmp_u[key].apply(lambda x: hash(x) % CONSTANT.HASH_MAX)
        timer.check("rehash_key")

        # tmp_u.sort_values(time_col, inplace=True)
        timer.check("sort")

        agg_funcs = {
            col: Config.aggregate_op(col)
            for col in v
            if col != key and not col.startswith(CONSTANT.TIME_PREFIX)
            and not col.startswith(CONSTANT.MULTI_CAT_PREFIX)
            and 'mul_feature_' not in col
        }
        tmp_u = tmp_u.fillna(0).groupby(key).agg(agg_funcs)
        '''agg_funcs_num = {col: Config.aggregate_op(col) for col in v if col != key
                     and col.startswith(CONSTANT.NUMERICAL_PREFIX)
                     }
        agg_funcs_cat = {col: Config.aggregate_op(col) for col in v if col != key
                     and col.startswith(CONSTANT.CATEGORY_PREFIX)
                     }
        num_features = [c for c in tmp_u.columns if c.startswith(CONSTANT.NUMERICAL_PREFIX)]
        if key not in num_features:
            num_features.append(key)
        cat_features = [c for c in tmp_u.columns if c.startswith(CONSTANT.CATEGORY_PREFIX)]
        if key not in cat_features:
            cat_features.append(key)
        #print(num_features)
        #print(cat_features)
        #input()
        tmp_u_num = tmp_u[num_features]
        tmp_u_cat = tmp_u[cat_features]'''
        ##---------------FillNA-----------------
        # tmp_u = tmp_u.groupby(rehash_key).rolling(5).agg(agg_funcs)
        ##tmp_u = tmp_u.fillna(0).groupby(rehash_key).rolling(5).agg(agg_funcs)
        '''if len(num_features) > 1:
            tmp_u_cat = tmp_u_cat.groupby(key).agg(agg_funcs_cat)
            tmp_u_cat.reset_index(0, inplace=True)

        if len()


        tmp_u_num = tmp_u_num.fillna(0).groupby(key).agg(agg_funcs_num)
        tmp_u_num.reset_index(0, inplace=True)
        print(tmp_u_cat.index)
        print(tmp_u_cat.columns)
        print(tmp_u_num.index)
        print(tmp_u_num.columns)
        input()
        tmp_u = pd.merge(tmp_u_cat, tmp_u_num, on=[key])'''

        timer.check("group & rolling & agg")

        # tmp_u.reset_index(0, drop=True, inplace=True)  # drop rehash index
        timer.check("reset_index")

        tmp_u.columns = tmp_u.columns.map(
            lambda a:
            f"{CONSTANT.NUMERICAL_PREFIX}{a[1].upper()}_ROLLING5({v_name}.{a[0]})"
        )

        # new_columns = []
        # for a in tmp_u.columns:
        #     if "collect_list" == a[1]:
        #         new_columns.append(f"{'mul_'}{a[1].upper()}_ROLLING5({v_name}.{a[0]})")
        #     else:
        #         new_columns.append(f"{CONSTANT.NUMERICAL_PREFIX}{a[1].upper()}_ROLLING5({v_name}.{a[0]})")
        #
        # tmp_u.columns = new_columns
        # print(tmp_u.columns)

        ##-------------remove duplicated rolling columns---------------
        c_tmp = None
        count = 0
        for c in tmp_u.columns:
            if 'COUNT_ROLLING5' in c:
                if c_tmp is None:
                    c_tmp = tmp_u[c]
                else:
                    tmp_u.drop(c, axis=1, inplace=True)
                    count += 1
        print("There are %d duplicated columns in temporal join!" % count)

        # print(tmp_u.columns)
        # input()

        ##------------check whether all n_COUNT_ROLLING_X are the same---------------
        '''all_columns = tmp_u.columns
        print(all_columns)
        c_tmp = None
        for c in all_columns:
            if 'COUNT_ROLLING5' in c:
                if c_tmp is None:
                    c_tmp = tmp_u[c]
                else:
                    print(c)
                    print([(tmp_u[c]-c_tmp).max(), (tmp_u[c]-c_tmp).min()])
        input()'''

        # print(tmp_u.columns)
        tmp_u.reset_index(0, inplace=True)

        ##-------------check NAN after aggregation functions----------
        '''print(tmp_u.columns)
        #print(tmp_u["n_COUNT_ROLLING5(table_1.c_1)"])
        for c in tmp_u.columns:
            print(c)
            print(tmp_u[c].loc['u'].shape[0])
            print(np.sum(np.isnan(tmp_u[c]).loc['u']))
            print(tmp_u[c].loc['u'])
        #print(tmp_u['n_MEAN_ROLLING5(table_1.n_1)'])
        input()'''

        if tmp_u.empty:
            log("empty tmp_u, return u")
            return u
        # print(u.shape,tmp_u.loc['u'].shape,tmp_u_2.shape)
        # ret = pd.concat([u, tmp_u.loc['u'],#tmp_u_2], axis=1, sort=False)
        # ret = pd.concat([u, tmp_u.loc['u']], axis=1, sort=False)
        index_tmp = u.index
        u["index"] = list(range(0, len(index_tmp)))
        ret = pd.merge(u, tmp_u, on=[key])
        ret.sort_values("index", inplace=True)
        ret.index = index_tmp
        ret.drop("index", axis=1, inplace=True)

        timer.check("final concat")

        del tmp_u

        return ret

    else:
        ###------------ Multi-cat features will be processed in the main function --------##
        for c in [c for c in v if c.startswith(CONSTANT.MULTI_CAT_PREFIX)]:
            v[c].fillna("0", inplace=True)
            v["mul_feature_" + c] = v[c].apply(lambda x: str(x).split(","))
            # v["mul_feature_" + c] = v[c].str.split(",")
            # print(v["mul_feature_" + c])
            # v["mul_feature_" + c] = v[c]
            v[c] = v["mul_feature_" + c].apply(lambda x: int(x[0]))
        '''
        for c in [c for c in v if c.startswith(CONSTANT.MULTI_CAT_PREFIX)]:
            v[c].fillna("0",inplace=True)
            mul_features = get_tfidf_vector(v[c], c)
            v.drop(c, axis=1, inplace=True)
            v = pd.concat([v, mul_features], axis=1)
            '''
        # tmp_u = u[[time_col, key]]
        # tmp_u = pd.concat([tmp_u, v], keys=['u', 'v'], sort=False)
        # print(tmp_u[key].nunique())
        # input()
        # print(u.dtypes)
        # input()
        # u[key] = u[key].astype('int64')
        v = v.set_index(key)
        v.columns = v.columns.map(
            lambda a: f"{a.split('_', 1)[0]}_{v_name}.{a}")

        return u.join(v, on=key)
Exemple #14
0
class Level(object):
	'''This object is responsible for drawing the level and everything in it'''
	def __init__(self, filename, screen_width, screen_height):
		object.__init__(self)
		filename = level_path(filename)
		self.player = Player((50, 50))
		self.camera = pygame.Rect((50,50), (screen_width, screen_height))
		self.world_map = tiledtmxloader.TileMapParser().parse_decode(filename)
		resources = ResourceLoaderPygame()
		resources.load(self.world_map)
		self.renderer = RendererPygame(resources)
		self.renderer.set_camera_position_and_size(0, 0, screen_width, screen_height)
		self.renderer.set_camera_margin(0, 0, 0, 0)
		self.screen_width = screen_width
		self.screen_height = screen_height
		self.jump_timer = JumpTimer()
		self.jump_wait_timer = None
		self.input_state = None
		self._img_cache = {}
		self._img_cache["hits"] = 0

	def restart(self):
		self.player.restart()

	def tile_magic(self, tile, allow_break=True):
		'''Handle special tiles'''
		#debug(tile)
		for special_tile in ('grow', 'shrink', 'narrow', 'widen'):
			if config.getint('Tiles', special_tile) == tile:
				getattr(self.player, special_tile)()
				return

	def collide_walls(self, ms):
		grounded = False

		for layer in self.world_map.layers:
			if layer.is_object_group:
				continue

			floor_tile = None
			for pos in self.player.bottom_collide_pts:
				tile = self.get_tile(pos, layer)
				if tile:
					# Move above this tile.
					# This assumes we are not completely overlapping a tile.
					# This should never happen if we limit movement to < 16px per frame
					floor_tile = tile
					self.player.displace(0, -(self.player.collide_rect.bottom % 16))
					grounded = True
					break
			if floor_tile:
				self.tile_magic(floor_tile)

				if floor_tile == config.getint('Tiles', 'crack') and self.player.heavy:
					self.set_platform(pos, layer, config.getint('Tiles', 'break'))
				elif floor_tile == config.getint('Tiles', 'break') and self.player.heavy:
					self.set_platform(pos, layer, 0)

				# All floor tiles stop the players fall
				self.player.velocity = (self.player.velocity[0],0)

			top_points = self.player.top_collide_pts
			for pos in top_points:
				if grounded: break
				if self.get_tile(pos, layer):
					debug('collide top %s', pos)
					self.player.displace(0, 16) # move one tile down
					self.player.displace(0, -(self.player.collide_rect.top % 16)) # move to the top of the tile
					self.player.velocity = (self.player.velocity[0],0)
					break

			left_points = self.player.left_collide_pts
			for pos in left_points:
				if self.get_tile(pos, layer):
					debug('collide left %s', pos)
					self.player.displace(16, 0)
					self.player.displace(-(self.player.collide_rect.left % 16), 0)
					break

			right_points = self.player.right_collide_pts
			for pos in right_points:
				if self.get_tile(pos, layer):
					debug('collide right %s', pos)
					debug(self.get_tile(pos,layer))
					self.player.displace(-(self.player.collide_rect.right % 16), 0)
					break

		# Update the jump timer which determines if it's possible to jump
		if grounded:
			self.jump_timer.set()
		else:
			self.jump_timer.update(ms)

	def get_tile(self, pos, layer):
		x,y = pos
		if x < 0 or y < 0  or x >= layer.pixel_width or y >= layer.pixel_height:
			raise OutOfBounds()
		x /= 16
		y /= 16
		tile = layer.decoded_content[x + y*layer.width]
		return tile

	def set_tile(self, pos, layer, tile):
		x,y = pos
		if x < 0 or y < 0  or x >= layer.pixel_width or y >= layer.pixel_height:
			raise OutOfBounds()
		x /= 16
		y /= 16
		layer.decoded_content[x + y*layer.width] = tile
		layer._gen_2D()

	def set_platform(self, pos, layer, tile):
		'''Set tile and horizontal neighbours'''
		x,y = pos
		current_tile = self.get_tile(pos, layer)
		platform = [pos]
		if x>=16:
			platform.append((x-16,y))
		if x<layer.pixel_width-16:
			platform.append((x+16,y))
		for pos2 in platform:
			if self.get_tile(pos2, layer) == current_tile:
				self.set_tile(pos2, layer, tile)

	def input_changed(self, action, state):
		x = 0
		if state['left']:
			x = -1
		elif state['right']:
			x = 1
		self.player.set_direction(x)

		self.input_state = state

		if not state['up']:
			self.jump_wait_timer = None

	def tick(self, ms):
		self.player.move(ms)

		if self.player.collide_rect.left >= self.world_map.pixel_width - 16:
			debug('Level complete')
			return True

		# Handle collisions with walls/platforms
		try:
			self.collide_walls(ms)
		except OutOfBounds:
			debug('%s out of bounds', self.player.collide_rect)
			raise FellOffMap()

		# Check for jump every frame, in case user is holding down the button
		if not self.jump_wait_timer and self.input_state and self.input_state['up'] and self.jump_timer.jump_allowed():
			debug('jump')
			self.jump_timer.unset()
			self.jump_wait_timer = Timer(config.getint('Physics','jump_wait_time')) # wait a bit between jumps
			self.player.start_jump()
		elif self.jump_wait_timer:
			if self.jump_wait_timer.check(ms):
				self.jump_wait_timer = None

		# Center camera on player
		self.camera.center = self.player.rect.center

		# Constrain camera to the level
		self.camera.right = min(self.camera.right, self.world_map.pixel_width)
		self.camera.bottom = min(self.camera.bottom, self.world_map.pixel_height)
		self.camera.left = max(self.camera.left, 0)
		self.camera.top = max(self.camera.top, 0)
		self.renderer.set_camera_position(self.camera.centerx, self.camera.centery)
		self.renderer.set_camera_margin(0, 0, 0, 0) # something is resetting the margin to 16px each frame... grrr

	def debug(self):
		debug('camera %s %s', self.camera.topleft,self.camera.bottomright)
		sprite_layers = self.renderer.get_layers_from_map()
		for sprite_layer in sprite_layers:
			debug('layer %s: margin %s, cam rect %s, render cam rect %s, paralax factor %s,%s',
					id(sprite_layer),
					self.renderer._margin,
					self.renderer._cam_rect,
					self.renderer._render_cam_rect,
					sprite_layer.paralax_factor_x,
					sprite_layer.paralax_factor_y)

	def screen_coordinates(self, pos):
		'''Convert world coordinates into screen coordinates'''
		x,y = pos
		return (x-self.camera.left, y-self.camera.top)

	def draw(self, screen):
		screen.fill((255,255,255))
		sprite_layers = self.renderer.get_layers_from_map()
		for sprite_layer in sprite_layers:
			if sprite_layer.is_object_group:
				self._draw_obj_group(screen, sprite_layer, self.camera.left, self.camera.top)
			else:
				self.renderer.render_layer(screen, sprite_layer)

		screen.blit(self.player.image, self.player.rect.move((-self.camera.left, -self.camera.y)).topleft)

		if config.options.debug:
			pygame.draw.rect(screen, (255,0,0), self.player.collide_rect.move((-self.camera.left, -self.camera.y)), 1)

		pygame.display.flip()

	def _draw_obj_group(self, screen, obj_group, cam_world_pos_x, cam_world_pos_y, font=None):
		goffx = obj_group.x
		goffy = obj_group.y
		for map_obj in obj_group.objects:
			debug1('object at %d,%d', map_obj.x, map_obj.y)
			debug1('source %s, image %s', map_obj.image_source, map_obj.image)
			size = (map_obj.width, map_obj.height)
			if size == (0, 0):
				size = (16,16)
			if map_obj.image_source:
				surf = pygame.image.load(map_obj.image_source)
				surf = pygame.transform.scale(surf, size)
				screen.blit(surf, (goffx + map_obj.x - cam_world_pos_x, \
                          goffy + map_obj.y - cam_world_pos_y))
			else:
				r = pygame.Rect(\
					(goffx + map_obj.x - cam_world_pos_x, \
					goffy + map_obj.y - cam_world_pos_y),\
                   size)
				pygame.draw.rect(screen, (255, 255, 0), r, 1)