Example #1
0
def main():
    umap = np.loadtxt(snakemake.input.umap, delimiter='\t')

    chosen_k = int(snakemake.params.chosen_k)
    chosen_res = float(snakemake.params.chosen_res)

    if chosen_k == 0:
        raise ValueError('No chosen number of neighbours - k - for louvain')

    elif chosen_res == 0:
        raise ValueError('No chosen resolution for louvain')

    with open(snakemake.input.cluster_file, 'rb') as f:
        clustering = pickle.load(f)

    plot, no_clusters, no_singletons = plot_clusters_on_umap(umap,
                                                             clustering,
                                                             label=True)
    plot.set_title('k_%s_res_%s__no_clusters__%s__no_singletons__%s' %
                   (chosen_k, chosen_res, no_clusters, no_singletons),
                   fontsize=20)
    plt.savefig(snakemake.output.fig)

    table = pd.read_csv(snakemake.input.filtered, sep='\t')
    col_name = 'chosen_k_%s_res_%s' % (chosen_k, chosen_res)
    table[col_name] = clustering

    # just keep label id and clusters
    if 'label_id' in table.columns:
        table = table[['label_id', col_name]]
    elif 'unique_id' in table.columns:
        table = table[['unique_id', col_name]]
    table.to_csv(snakemake.output.merged_table, index=False, sep='\t')

    # make morph and viz tables
    viz_table = pd.read_csv(snakemake.input.viz_table, sep='\t')
    full_with_ids = pd.read_csv(snakemake.input.full_with_ids, sep='\t')
    merged_morph = pd.read_csv(snakemake.input.merged_morph, sep='\t')
    merged_morph = filter_texture_fails(merged_morph)

    if 'unique' not in snakemake.params.gene_assign:
        viz_table['clusters'] = clustering

        # make table with label id & clusters for all cells
        full_with_ids['clusters'] = clustering
        table = full_with_ids[['label_id', 'clusters']]

    else:
        # make table with label id & clusters for all cells
        cut = full_with_ids[['label_id', 'unique_id']]
        table = cut.join(table.set_index('unique_id'),
                         on='unique_id',
                         how='left')
        table = table[['label_id', col_name]]
        table.columns = ['label_id', 'clusters']

        viz_table = viz_table.join(table.set_index('label_id'),
                                   on='label_id',
                                   how='left')

    make_binary_columns(viz_table, 'clusters', snakemake.output.viz_table)
    morph_table = table.join(merged_morph.set_index('label_id'),
                             on='label_id',
                             how='inner')
    morph_table.to_csv(snakemake.output.morph_table, index=False, sep='\t')

    # and a normalised version
    just_morph = morph_table.drop(columns=['label_id', 'clusters'])
    col_names = just_morph.columns.tolist()
    just_morph = StandardScaler().fit_transform(just_morph)
    just_morph = pd.DataFrame(data=just_morph)
    just_morph.columns = col_names

    # can have issues with index matches producing nan values - reset indices here
    just_morph.reset_index(drop=True, inplace=True)
    morph_table.reset_index(drop=True, inplace=True)

    just_morph.insert(0, 'clusters', morph_table['clusters'])
    just_morph.insert(0, 'label_id', morph_table['label_id'])
    just_morph.to_csv(snakemake.output.morph_table_normalised,
                      index=False,
                      sep='\t')
Example #2
0
###############################################################################
# Backfill Volume data
###############################################################################
if 1:

    # Get historical Volume data
    volume, rat_volume = backfill_volume_with_singular(currencies, granularity,
                                                       _from, _to)
    volume.reset_index(inplace=True)
    volume = volume.rename({'index': 'timestamp'}, axis=1)
    volume.to_pickle(volume_path)

    # try standardized
    ss = StandardScaler().fit_transform(volume.iloc[:, 1:])
    ss = pd.DataFrame(ss, columns=volume.columns[1:], index=volume.index)
    ss.insert(0, 'timestamp', volume.timestamp)
    ss.drop('jpy', axis=1, inplace=True)
    ss.drop('hkd', axis=1, inplace=True)

    cu, ratios = backfill_with_singular(currencies, granularity, _from, _to)
    cu.reset_index(inplace=True)
    cu = cu.rename({'index': 'timestamp'}, axis=1)
    cu.to_pickle(cu_small_path)

    ratios.reset_index(inplace=True)
    ratios = ratios.rename({'index': 'timestamp'}, axis=1)

###############################################################################
# Update Volume and Diff
###############################################################################
if 0: