Beispiel #1
0
if len(info_run) == 0:
    print("No info run given. Saving results in UNNAMED")
    info_run = "UNNAMED"

if os.path.isdir(f"{output_dir}/{info_run}") == False:
    os.makedirs(f"{output_dir}/{info_run}")
else:
    if info_run != "UNNAMED":
        sys.exit(
            "ERROR: You already used this name for a previous run. \nUse a different name!"
        )
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#

#~~~~~~~~~~~~~~~~~~~Preliminary steps and transformation~~~~~~~~~~~~~~~~~~~~~~#
#Concatenate#
no_arc, input_files = concatenate_fcs(
    input_dir)  #Does sanity check of files in input

#Downsampling#
#Test lenght of input files -> Go with minimun denominator -> select, at random,
# that number of cells from other files
if no_arc["file_origin"].value_counts().size > 1:
    downs_inputs = yes_or_NO(
        "Multiple input files detected. Would you like to donwsample the number of cells?",
        default="YES")
    if downs_inputs:
        print("Downsampling taking place. Check output folder for more info")
        print(no_arc["file_origin"].value_counts())
        no_arc = downsample_data(no_arc, info_run, output_dir)
        print(no_arc["file_origin"].value_counts())
    else:
        print("Multiple input files; no downsampling")
Beispiel #2
0
            if len(pnn_extracted) != 0:
                raise fcsparser.api.ParserFeatureNotImplementedError
        except fcsparser.api.ParserFeatureNotImplementedError:
            print("WARNING: Non-standard .fcs file detected: ", denominator)
            #use rpy2 to read the files and load into python
            compare_to = read_rFCS(denom_path)[0]
    else:
        sys.exit(
            "ERROR: Reference not recognised.\nPlease state exact and full name of file to be used as denominator!"
        )
    input_files = filelist
else:
    denominator = 'concatenated-inputs'
    print(
        'Concatenated input files will be used as the reference distribution')
    compare_to, input_files = concatenate_fcs(
        input_dir)  #compare_from=inputfile

#Keep only selected markers
if filter_markers:
    selected_markers = read_marker_csv(input_dir)
    print(compare_to.columns)
    file_cols = compare_to.columns
    [x for x in file_cols if x[0].isdigit()]
    for x in file_cols:
        if x[0].isdigit():
            if x not in selected_markers:
                compare_to = compare_to.drop(x, axis=1)
    print(compare_to.columns)

# compare_to = downsample_data(compare_to, info_run, output_dir) #Customtest_1
compare_to_arc, marker_list = arcsinh_transf(cofactor,
Beispiel #3
0
filelist = [f for f in os.listdir(input_dir) if f.endswith(".txt")]
if len(filelist) == 0:
    sys.exit(f"ERROR: There are no .txt files in {input_dir}!")
#Check the files found in the directory:
print("Downsample script supports only .txt files. Input files:")
for i in filelist:
    print(i)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#

print(
    "This script now downsamples multiple files on the input to the condition with less cells"
)
info_run = input("Write downsampling info run (using no spaces!): ")

concatenated_df = concatenate_fcs(input_dir)[0]
downsampled_conc_df = downsample_data(concatenated_df, info_run, output_dir)

# Since after downsampling file_origin is also an index, drop index from dataframe
for name, group in downsampled_conc_df.reset_index(
        drop=True).groupby("file_identifier"):
    print(name)
    print(group)
    group.reset_index()
    # group['post-downsample_cell-index'] = group.index
    group.to_csv(f"{output_dir}/{info_run}/{name}_downsample_{info_run}.txt",
                 index=False,
                 sep='\t')

print(f"Downsampled files saved in {output_dir}")