Esempio n. 1
0
#nn parameters
max_len = 1500
sample_n = 10
embed_size = 256
batch_size = 100
epochs = 100
cnn_fun_path = 'scripts/python/tmr/'
seq_type = 'aa'
num_letters = 26
seq_resize = True

#%%
#generate datasets for fitting
if new_model == True:
    seq_df = cf.load_seq_dataframe(data_path)
    uniq_anno = seq_df.annotation.unique()
    num_classes = len(uniq_anno)
    annotation_ydata_df = pd.DataFrame({
        'ydata': range(num_classes),
        'annotation': uniq_anno
    })
    seq_df = pd.merge(seq_df, annotation_ydata_df, on='annotation')
    seq_cluster = seq_df.loc[seq_df['Cluster'] > -1]
    seq_cluster_noise = seq_df.loc[seq_df['Cluster'] == -1]
    seq_cluster_a = seq_cluster

    #%%
    #generate training data for annotation/cluster datasets
    ##annotations
    train_a = seq_cluster_a.groupby(['annotation']).sample(n=sample_n)
Esempio n. 2
0
# all_save_path='data/density_sample/all_data'
tmp_save_path = 'data/density_sample/tmp/'
final_save_path = 'data/density_sample/KDE'
# n_thres=26

#Sampling Parameters
rep = 3
val_sample = range(1, 11, 1)  #start on 18
test_n = 5  #test per cluster
# n_thres=26
# sample_rate=0.98

#%%
#generate datasets for fitting

seq_df = cf.load_seq_dataframe(data_path)
uniq_anno = seq_df.annotation.unique()
num_classes = len(uniq_anno)
annotation_ydata_df = pd.DataFrame({
    'ydata': range(num_classes),
    'annotation': uniq_anno
})
seq_df = pd.merge(seq_df, annotation_ydata_df, on='annotation')
# seq_df=seq_df.groupby(['annotation','Cluster']).filter(lambda x: x['id'].count()>n_thres)
# n_sample=round(min(seq_df.groupby(['annotation'])['id'].count())*max_sample_rate)*2
# seq_df=seq_df.groupby(['annotation']).sample(n_sample)
seq_df = seq_df.reset_index(drop=True)
seq_df['o_index'] = seq_df.index

for s in val_sample:
    train_sample = round((s - 0.2 * s) / 0.2)