Esempio n. 1
0
    print("Only one input file detected; no downsampling")

#Transformation#
#Literature recommends cofactor of 5 for cytof data
arc, cols = arcsinh_transf(cofactor, no_arc)
#Storing marker columns for later use below

#~~~~~~~~~~~~~~~Define the markers used for UMAP calculation~~~~~~~~~~~~~~~~~~#

#Group columns of the dataframe based on the type of measurement
not_markers_cols = [column for column in arc.columns if column not in cols]
all_markers_cols = cols.copy()

# define the v's for umap calculation (vs_markers_cols)
not_these = []  # columns to be excluded for umap calculation
vs_markers_cols = read_marker_csv(
    input_dir)  #Read them from a .csv file in ./input
print(vs_markers_cols)
df_vs_markers_cols = pd.DataFrame(vs_markers_cols, columns=['marker'])
df_vs_markers_cols.index = np.arange(1, len(df_vs_markers_cols) + 1)
df_vs_markers_cols.to_csv(f"{output_dir}/{info_run}/markers_for_{info_run}.csv"
                          )  # save markers used for UMAP in output folder
no_vs_markers_cols = [
    column for column in all_markers_cols if column not in vs_markers_cols
]

print(arc.columns)
# keep the columns ('v's) needed for umap calculation (all_together_vs_marks)
all_together_vs_marks = arc.loc[:, vs_markers_cols].copy()
print(all_together_vs_marks)

print(f"Markers used for UMAP calculation: \n")
Esempio n. 2
0
            compare_to = read_rFCS(denom_path)[0]
    else:
        sys.exit(
            "ERROR: Reference not recognised.\nPlease state exact and full name of file to be used as denominator!"
        )
    input_files = filelist
else:
    denominator = 'concatenated-inputs'
    print(
        'Concatenated input files will be used as the reference distribution')
    compare_to, input_files = concatenate_fcs(
        input_dir)  #compare_from=inputfile

#Keep only selected markers
if filter_markers:
    selected_markers = read_marker_csv(input_dir)
    print(compare_to.columns)
    file_cols = compare_to.columns
    [x for x in file_cols if x[0].isdigit()]
    for x in file_cols:
        if x[0].isdigit():
            if x not in selected_markers:
                compare_to = compare_to.drop(x, axis=1)
    print(compare_to.columns)

# compare_to = downsample_data(compare_to, info_run, output_dir) #Customtest_1
compare_to_arc, marker_list = arcsinh_transf(cofactor,
                                             compare_to)  #Leeave as default
# compare_to_arc = compare_to #Customtest_2
# marker_list = [x for x in compare_to_arc.columns if x[0].isdigit()] #Customtest_2
Esempio n. 3
0
        if "emd" in file.lower():
            emd_file.append(file)
if len(emd_file) != 1:
    sys.exit("ERROR: Please have only ONE .txt file with 'emd' in its name!")
emd_file = f"{input_dir}/{emd_file[0]}"
df = pd.read_csv(emd_file, sep='\t')

# create the marker_list csv file if it doesn't exist
# the user needs to specify the markers used for PCA with 'Y' in the file

if marker_list == False:
    sys.exit(
        "ERROR: Please select markers for PCA in the panel_markers.csv file!")

# define the list of markers used for PCA
markers_pca = read_marker_csv(input_dir)

info_run = input("Write PCA info (using no spaces!): ")

# reformat the data for PCA
df = df.sort_values(by=["file_origin"])

df_one_cond = pd.DataFrame()
df_all_cond = pd.DataFrame()

# extract and reformat emd info
cols_to_keep = ["EMD_no_norm_arc", "marker", "file_origin"]
df = df[cols_to_keep].iloc[:, :].copy()
df = df.rename(columns={"EMD_no_norm_arc": "EMD_score"})

i = 0  # counter for conditions