def create_bucket_from_clusterinfo(cluster_info_filename, param_filename,
                                   clusterinfosummary_filename,
                                   output_filename):
    param_object = ming_proteosafe_library.parse_xml_file(
        open(param_filename, "r"))
    output_file = open(output_filename, "w")
    if param_object["CREATE_CLUSTER_BUCKETS"][0] != "1":
        output_file.write("No Output")
        return

    test_network = molecular_network_library.MolecularNetwork()
    test_network.load_clustersummary(clusterinfosummary_filename)

    line_counts, table_data = ming_fileio_library.parse_table_with_headers(
        cluster_info_filename)

    mangled_mapping = ming_proteosafe_library.get_mangled_file_mapping(
        param_object)

    cluster_index_to_file_map = {}

    clusters_map = {}
    all_files = {}
    for i in range(line_counts):
        cluster_number = table_data["#ClusterIdx"][i]
        if test_network.get_cluster_index(cluster_number) == None:
            continue

        if not (cluster_number in clusters_map):
            clusters_map[cluster_number] = []
            cluster_index_to_file_map[cluster_number] = {}
            #Adding all file names to mapping
            for mangled_name in mangled_mapping.keys():
                cluster_index_to_file_map[cluster_number][mangled_name] = 0.0

        #print table_data["#Filename"][i].split("/")[1]
        mangled_filename_only = os.path.basename(table_data["#Filename"][i])
        cluster_index_to_file_map[cluster_number][
            mangled_filename_only] += float(table_data["#PrecIntensity"][i])
        spectrum_info = {
            "filename": table_data["#Filename"][i],
            "intensity": table_data["#PrecIntensity"][i]
        }
        all_files[table_data["#Filename"][i]] = 1
        clusters_map[cluster_number].append(spectrum_info)

    output_header = "#OTU ID\t"
    for header in mangled_mapping.keys():
        output_header += os.path.basename(mangled_mapping[header]) + "\t"

    output_file.write(output_header + "\n")

    for cluster_idx in cluster_index_to_file_map:
        line_string = str(cluster_idx) + "\t"
        for header in mangled_mapping.keys():
            line_string += str(
                cluster_index_to_file_map[cluster_idx][header]) + "\t"

        #print line_string
        output_file.write(line_string + "\n")
Esempio n. 2
0
def create_ili_output_from_clusterinfo(cluster_info_filename, param_filename, clusterinfosummary_filename, filename_coordinate_mapping, output_filename):
    output_file = open(output_filename, "w")
    test_network = molecular_network_library.MolecularNetwork()
    test_network.load_clustersummary(clusterinfosummary_filename)
    line_counts, table_data = ming_fileio_library.parse_table_with_headers(cluster_info_filename)
    param_object = ming_proteosafe_library.parse_xml_file(open(param_filename, "r"))
    mangled_mapping = ming_proteosafe_library.get_mangled_file_mapping(param_object)

    cluster_index_to_file_map = {}

    clusters_map = {}
    all_files = {}
    for i in range(line_counts):
        cluster_number = table_data["#ClusterIdx"][i]
        if test_network.get_cluster_index(cluster_number) == None:
            continue

        if not (cluster_number in clusters_map):
            clusters_map[cluster_number] = []
            cluster_index_to_file_map[cluster_number] = {}
            #Adding all file names to mapping
            for mangled_name in mangled_mapping.keys():
                cluster_index_to_file_map[cluster_number][mangled_name] = 0.0

        #print table_data["#Filename"][i].split("/")[1]
        mangled_filename_only = os.path.basename(table_data["#Filename"][i])
        cluster_index_to_file_map[cluster_number][mangled_filename_only] += float(table_data["#PrecIntensity"][i])
        spectrum_info = {"filename":table_data["#Filename"][i], "intensity": table_data["#PrecIntensity"][i]}
        all_files[table_data["#Filename"][i]] = 1
        clusters_map[cluster_number].append(spectrum_info)

    all_headers = ["filename", "X", "Y", "Z", "radius"]
    for cluster_idx in cluster_index_to_file_map:
        all_headers.append(cluster_idx)

    #writing header
    output_file.write(",".join(all_headers) + "\n")

    for sample_name in mangled_mapping:
        if sample_name.find("spec") == -1:
            continue
        real_filename = mangled_mapping[sample_name]

        if not os.path.basename(real_filename) in filename_coordinate_mapping:
            continue

        line_output = [real_filename]
        coordinate_object = filename_coordinate_mapping[os.path.basename(real_filename)]
        line_output.append(coordinate_object["x"])
        line_output.append(coordinate_object["y"])
        line_output.append(coordinate_object["z"])
        line_output.append(coordinate_object["radius"])
        print(line_output, coordinate_object)
        for cluster_idx in cluster_index_to_file_map:
            line_output.append(str(cluster_index_to_file_map[cluster_idx][sample_name]))
        output_file.write(",".join(line_output) + "\n")

    output_file.close()
Esempio n. 3
0
def get_molecular_network_obj(job_obj):
    try:
        print(job_obj)
        path_to_clusterinfosummary = ming_proteosafe_library.get_proteosafe_result_file_path(
            job_obj["task"], "continuous",
            "clusterinfosummarygroup_attributes_withIDs")[0]
        path_to_pairs = ming_proteosafe_library.get_proteosafe_result_file_path(
            job_obj["task"], "continuous", "networkedges_selfloop")[0]

        molecular_network = molecular_network_library.MolecularNetwork()
        molecular_network.load_network(path_to_clusterinfosummary,
                                       path_to_pairs)
        return molecular_network
    except KeyboardInterrupt:
        raise
    except:
        #raise
        return None