예제 #1
0
before_event_mask = dilated_df['time_for_the_event'] < 30

dilated_df = dilated_df.loc[before_event_mask]

# cluster based on time
# cluster_based_on_time(dilated_df['time_for_the_event'], k=2)

betas_values = {}
betas = [13]  #np.logspace(-3, 3, 50)
censored_time = {}
removed_rows = {}
for beta in betas:
    print('beta = ' + str(beta))
    K = calculate_distance(
        dilated_df[list(range(20))], {
            subject_id: subject_data[list(range(20))]
            for subject_id, subject_data in censored_data.items()
        }, beta)
    K_t_time = K.transpose().join(dilated_df['time_for_the_event'])

    def multiply_by_time_for_the_event(col):
        return col.apply(lambda x: x * col['time_for_the_event'])

    K_t_time_multiplied_time_for_the_event = K_t_time.apply(
        multiply_by_time_for_the_event, axis=1)

    denominator = K_t_time.sum()
    nominator = K_t_time_multiplied_time_for_the_event.sum()
    censored_time[beta] = nominator / denominator

    censored_data_with_time = OtuMf.mapping_file.loc[
예제 #2
0
            for (subject_id, class_predicted), subject_data in zip(
                    class_prediction.items(), inputs.values()):
                inputs_per_class[class_predicted].update(
                    {subject_id: subject_data})

            K = {}
            K_t_time = {}
            K_t_time_multiplied_time_for_the_event = {}
            censored_data_with_time = pd.DataFrame()
            for class_number in range(number_of_classes):
                a = dilated_df.loc[dilated_df_clusterd['cluster_number'] ==
                                   class_number]
                K[class_number], _ = calculate_distance(
                    a[list(range(20))],
                    inputs_per_class[class_number],
                    beta,
                    visualize=False)
                K_t_time[class_number] = K[class_number].transpose().join(
                    dilated_df['time_for_the_event'])
                K_t_time_multiplied_time_for_the_event[
                    class_number] = K_t_time[class_number].apply(
                        multiply_by_time_for_the_event, axis=1)
                denominator = K_t_time[class_number].sum()
                nominator = K_t_time_multiplied_time_for_the_event[
                    class_number].sum()
                tmp = nominator / denominator
                censored_data_with_time_per_class = OtuMf.mapping_file.loc[
                    tmp.index[:-1].tolist()]
                censored_data_with_time_per_class['time_for_the_event'] = tmp
                censored_data_with_time = censored_data_with_time.append(
betas_values = {}
beta = 9
censored_time = {}
removed_rows = {}


print('beta = ' + str(beta))

def multiply_by_time_for_the_event(col):
    return col.apply(lambda x: x * col['time_for_the_event'])


inputs = {subject_id: subject_data[list(range(n_components))] for subject_id, subject_data in
          censored_data.items()}

K, _ = calculate_distance(dilated_df[list(range(n_components))], inputs, beta, visualize=False)
K_t_time = K.transpose().join(dilated_df['time_for_the_event'])

K_t_time_multiplied_time_for_the_event = K_t_time.apply(multiply_by_time_for_the_event, axis=1)

denominator = K_t_time.sum()
nominator = K_t_time_multiplied_time_for_the_event.sum()
censored_time[beta] = nominator / denominator
censored_data_with_time = OtuMf.mapping_file.loc[censored_time[beta].index[:-1].tolist()]
censored_data_with_time['time_for_the_event'] = censored_time[beta]


censored_data_with_time = censored_data_with_time.join(otu_after_pca_wo_taxonomy)
number_of_rows_before_removal = censored_data_with_time.shape[0]
# remove subects with no data in mocrobiome
censored_data_with_time = censored_data_with_time.loc[censored_data_with_time[0].notnull()]