예제 #1
0
파일: 3-emd.py 프로젝트: TAPE-Lab/CyGNAL
        input_dir)  #compare_from=inputfile

#Keep only selected markers
if filter_markers:
    selected_markers = read_marker_csv(input_dir)
    print(compare_to.columns)
    file_cols = compare_to.columns
    [x for x in file_cols if x[0].isdigit()]
    for x in file_cols:
        if x[0].isdigit():
            if x not in selected_markers:
                compare_to = compare_to.drop(x, axis=1)
    print(compare_to.columns)

# compare_to = downsample_data(compare_to, info_run, output_dir) #Customtest_1
compare_to_arc, marker_list = arcsinh_transf(cofactor,
                                             compare_to)  #Leeave as default
# compare_to_arc = compare_to #Customtest_2
# marker_list = [x for x in compare_to_arc.columns if x[0].isdigit()] #Customtest_2

print('Sample files:')
print('\n'.join([f for f in input_files]))
print(f'\nReference:\n{denominator}')
print('\nMarkers:')
print('\n'.join([m for m in marker_list]))

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~Perform EMD~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
#Calculate EMD and save to the output folder (with denominator info run):
# calculate emd and sign the emd score by the difference of median between compare_from and compare_to

emd_df = pd.DataFrame()
emd_infodict = {}
예제 #2
0
파일: 4-dremi.py 프로젝트: TAPE-Lab/CyGNAL
            data = fcsparser.parse(file_path, meta_data_only=False)[1]
            reg_pnn = re.compile("(\d+Di$)")  #Detect if, despite flag
            pnn_extracted = []  #columns match PnN pattern
            for n in data.columns.values.tolist():
                if reg_pnn.search(n):
                    pnn_extracted.append(n)
            if len(pnn_extracted) != 0:
                raise fcsparser.api.ParserFeatureNotImplementedError
        except fcsparser.api.ParserFeatureNotImplementedError:
            print("WARNING: Non-standard .fcs file detected: ", f)
            #use rpy2 to read the files and load into python
            data = read_rFCS(file_path)[0]
    if filter_markers:  #Load .csv with the markers to use -> Often PTMs
        selected_markers = read_marker_csv(input_dir)
        data = data.loc[:, selected_markers]  # Remove unwanted markers
    data_arc, markers = arcsinh_transf(cofactor, data)

    # generate the list of marker-marker pairs for dremi calculation
    marker_pairs = [comb for comb in list(permutations(markers, 2))]
    for marker_x, marker_y in marker_pairs:
        df_info_dict = {}
        df_info_dict["file_origin"] = filename
        df_info_dict["marker_x"] = marker_x
        df_info_dict["marker_y"] = marker_y
        df_info_dict["marker_x_marker_y"] = marker_x + '_' + marker_y
        df_info_dict["num_of_cells"] = data.shape[0]

        if plot == True:
            if os.path.isdir(
                    f'{output_dir}/{info_run}/plots/x={marker_x}-y={marker_y}'
            ) == False:
예제 #3
0
###~Co-factor~###
#Check if user wants to filter the markers based on a .csv marker file
cofactor = 5
user_cofactor = yes_or_NO(
    "Using alpha=5 for the transformation. Would you like to change this value?"
)
if user_cofactor:
    cofactor = int(input("Enter the new alpha to use (5=default): "))
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~Perform transformation~~~~~~~~~~~~~~~~~~~~~~~~~~#
#Identify marker columns
for input_file in filelist:
    markers = []
    dataset = pd.read_csv(f"{input_dir}/{input_file}", sep='\t')
    print("Data read!")
    markers.append([x for x in dataset.columns if x[0].isdigit()])
    print("Processed columns")
    for i in markers:
        print("Columns identified as markers: \n", i)
    print("Start transform (might take around 10min with larger datasets)")
    normalised_dataset = arcsinh_transf(cofactor, dataset)[0]
    print("Finished transform")
    print(
        "Start writing results to file (might take some time with larger datasets)"
    )
    normalised_dataset.to_csv(f"{output_dir}/arcsinhTRANSF_{input_file}",
                              index=False,
                              sep='\t')
예제 #4
0
    downs_inputs = yes_or_NO(
        "Multiple input files detected. Would you like to donwsample the number of cells?",
        default="YES")
    if downs_inputs:
        print("Downsampling taking place. Check output folder for more info")
        print(no_arc["file_origin"].value_counts())
        no_arc = downsample_data(no_arc, info_run, output_dir)
        print(no_arc["file_origin"].value_counts())
    else:
        print("Multiple input files; no downsampling")
else:
    print("Only one input file detected; no downsampling")

#Transformation#
#Literature recommends cofactor of 5 for cytof data
arc, cols = arcsinh_transf(cofactor, no_arc)
#Storing marker columns for later use below

#~~~~~~~~~~~~~~~Define the markers used for UMAP calculation~~~~~~~~~~~~~~~~~~#

#Group columns of the dataframe based on the type of measurement
not_markers_cols = [column for column in arc.columns if column not in cols]
all_markers_cols = cols.copy()

# define the v's for umap calculation (vs_markers_cols)
not_these = []  # columns to be excluded for umap calculation
vs_markers_cols = read_marker_csv(
    input_dir)  #Read them from a .csv file in ./input
print(vs_markers_cols)
df_vs_markers_cols = pd.DataFrame(vs_markers_cols, columns=['marker'])
df_vs_markers_cols.index = np.arange(1, len(df_vs_markers_cols) + 1)
예제 #5
0
        input_dir)  #compare_from=inputfile

#Keep only selected markers
if filter_markers:
    selected_markers = read_marker_csv(input_dir)
    print(compare_to.columns)
    file_cols = compare_to.columns
    [x for x in file_cols if x[0].isdigit()]
    for x in file_cols:
        if x[0].isdigit():
            if x not in selected_markers:
                compare_to = compare_to.drop(x, axis=1)
    print(compare_to.columns)

# compare_to = downsample_data(compare_to, info_run, output_dir)
compare_to_arc, marker_list = arcsinh_transf(cofactor, compare_to)

print('Sample files:')
print('\n'.join([f for f in input_files]))
print(f'\nReference:\n{denominator}')
print('\nMarkers:')
print('\n'.join([m for m in marker_list]))

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~Perform EMD~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
#Calculate EMD and save to the output folder (with denominator info run):
# calculate emd and sign the emd score by the difference of median between compare_from and compare_to

emd_df = pd.DataFrame()
emd_infodict = {}

emd_infodict["denominator"] = denominator