Python LCAComputation.get_a_Valid_ID 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: libs.python_modules.taxonomy

클래스/타입: LCAComputation

메소드/함수: get_a_Valid_ID

hotexamples.com에서의 예제들: 4

Python LCAComputation.get_a_Valid_ID - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 libs.python_modules.taxonomy.LCAComputation.get_a_Valid_ID에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

LCAComputation(3)

clear_cells(2)

get_a_Valid_ID(2)

get_lca(2)

setParameters(2)

translateIdToName(2)

wtd(2)

compute_min_support_tree(1)

load_accession_to_taxon_map(1)

set_results_dictionary(1)

예제 #1

파일 보기

파일: MetaPathways_run_pathologic.py 프로젝트: fw1121/metapathways2

def ExtractPathway_WTD(options):
    # Extract pathways and WTD
    # place to store list of expected taxonomic range(s)
    printf('INFO\tEntering the WTD calculations!\n')
    serialized_metacyc_taxa_ranges = "/tmp/metacyc_pwy_taxa_range.pk"
    serialized_metacyc_taxa_ranges_tmp = "/tmp/metacyc_pwy_taxa_range.pk.tmp"
    try:
        if options.wtd and not path.isfile(serialized_metacyc_taxa_ranges):
            # get MetaCyc's expected taxonomic range(s) and serialize for later use in /tmp
            # try:
            printf('INFO\tGetting MetaCyc Expected Taxonomic Range(s)\n')
            pythonCyc = startPathwayTools('meta', options.ptoolsExec, True)

            pwys = pythonCyc.getAllPathways()

            pwy_taxa_range = {}  # hash from pwy to expected taxonomic range(s)
            pwy_taxa_range_pk = open(serialized_metacyc_taxa_ranges_tmp, "w")

            # get expected taxonomic ranges for each pathway
            for pwy in pwys:
                # printf(" " + pwy)
                my_expected_taxonomic_range = pythonCyc.getExpectedTaxonomicRange(
                    pwy)
                pwy_taxa_range[pwy] = my_expected_taxonomic_range
            # printf(" " + pwy)

            # write the pathway
            pickle.dump(pwy_taxa_range, pwy_taxa_range_pk)
            pwy_taxa_range_pk.close()
            StopPathwayTools()
            rename(serialized_metacyc_taxa_ranges_tmp,
                   serialized_metacyc_taxa_ranges)
        else:
            # read expected taxonomic range from serialized file
            exepected_taxa_in = open(serialized_metacyc_taxa_ranges, "r")
            pwy_taxa_range = pickle.load(exepected_taxa_in)

        # create mapping of preferred NCBI to MEGAN taxonomy
        megan_map = {}
        if options.ncbi_megan_map:
            with open(options.ncbi_megan_map) as megan_map_file:
                for line in megan_map_file:
                    fields = line.split("\t")
                    fields = map(str.strip, fields)
                    megan_map[fields[0]] = fields[1]

        # get ORF to taxa map from annotation_table
        printf("INFO\tGetting ORF to Taxa Map from AnnotationTable\n")
        orf_lca = {}
        with open(options.annotation_table) as f:
            for line in f:
                fields = line.split("\t")
                orf_lca[fields[0].strip()] = fields[8].strip()

        # get pathway ORFs and Rxns
        pwy_to_orfs = {}
        pwy_to_long = {}
        pwy_to_rxns = {}
        try:
            pythonCyc = startPathwayTools(options.sample_name.lower(),
                                          options.ptoolsExec, True)
            pwys = pythonCyc.getAllPathways()

            for pwy in pwys:
                # printf(" " + pwy)
                genes = pythonCyc.getPathwayORFs(pwy)
                rxns = pythonCyc.getPathwayReactionInfo(pwy)
                pwy_to_orfs[pwy] = genes
                pwy_to_long[pwy] = cleanup(
                    pythonCyc.get_slot_value(pwy, "common-name"))
                pwy_to_rxns[pwy] = rxns
            # printf("\n")
            StopPathwayTools()

        except:
            print """
            Problem connecting to Pathway Tools. Check the /tmp/ptools-socket file.
            """
    except:
        print """
        Problem calculating WTD via Pathway Tools. Check the /tmp/ptools-socket file.
        """

    # get LCA per pathway
    pwy_lca = {}
    # load NCBI taxonomy map
    printf("INFO\tLoading NCBI Taxonomy Map\n")
    lca = LCAComputation([options.ncbi_tree], )

    for pwy in pwy_to_orfs:
        orfs = pwy_to_orfs[pwy]
        taxa_ids = []
        for orf in orfs:
            if orf in orf_lca:
                # could strip out id here
                res = re.search("(.+?)\(([0-9]+?)\)", orf_lca[orf])
                if res:
                    taxa_annotation = res.group(1)
                    id = res.group(2)
                else:
                    id = lca.get_a_Valid_ID([orf_lca[orf]])
                taxa_ids.append(id)
        pwy_lca_id = lca.get_lca(taxa_ids, True)
        # print "In run_pathologic"
        # print pwy_lca_id
        # print pwy_lca_id
        lca.clear_cells(taxa_ids)

        pwy_lca[pwy] = [pwy_lca_id, lca.translateIdToName(pwy_lca_id)]

    # calculate weighted taxonomic distance
    pwy_to_wtd = {}
    for pwy in pwy_lca:

        C = []  # list of distances
        C_taxa = []  # list of parallel observed-expected taxa pairs
        C_pos = []  # list of non-negative distances
        C_pos_taxa = []  # list of parallel observed-expected taxa pairs
        C_neg = []  # list of negative distances
        C_neg_taxa = []  # list of parallel observed-expected taxa pairs

        if len(pwy_taxa_range[pwy]) > 0:
            for expected in pwy_taxa_range[pwy]:
                dist = lca.wtd(expected[0], pwy_lca[pwy][0])
                if dist or dist == 0:
                    # valid distance
                    # add distance respective lists
                    C.append(dist)  # add distance
                    C_taxa.append([expected[0], pwy_lca[pwy][0]])
                    if dist >= 0:
                        C_pos.append(dist)  # add to non-negative list
                        C_pos_taxa.append([expected[0], pwy_lca[pwy][0]])
                    else:
                        C_neg.append(dist)  # add to negative list
                        C_neg_taxa.append([expected[0], pwy_lca[pwy][0]])
                else:
                    print "Not a valid distance"
                    continue
        else:
            # no expected taxonomy, set to root
            min_taxa = "1"
            dist = lca.wtd(min_taxa, pwy_lca[pwy][0])
            # add distance respective lists
            C.append(dist)  # add distance
            C_taxa.append([min_taxa, pwy_lca[pwy][0]])
            if dist >= 0:
                C_pos.append(dist)  # add to non-negative list
                C_pos_taxa.append([min_taxa, pwy_lca[pwy][0]])
            else:
                C_neg.append(dist)  # add to negative list
                C_neg_taxa.append([min_taxa, pwy_lca[pwy][0]])

        # find index with max distance (closest to expected taxonomy)
        max_index, max_dist = max(enumerate(C), key=operator.itemgetter(1))
        max_taxa = C_taxa[max_index]

        # remap to preferred names
        observed = get_preferred_taxa_name(max_taxa[1], megan_map,
                                           lca.id_to_name)
        expected = get_preferred_taxa_name(max_taxa[0], megan_map,
                                           lca.id_to_name)

        pwy_to_wtd[pwy] = [max_dist, observed, expected]

    # write out pathway table
    table_out_tmp = options.table_out + ".tmp"
    try:
        out = open(table_out_tmp, "w")
    except:
        print "Had problems opening file: " + options.table_out

    # write appropreate header
    if options.wtd:
        header = "SAMPLE\tPWY_NAME\tPWY_COMMON_NAME\tNUM_REACTIONS\tNUM_COVERED_REACTIONS\tORF_COUNT\tWTD\tOBSERVED\tEXPECTED\tORFS\n"
    else:
        header = "SAMPLE\tPWY_NAME\tPWY_COMMON_NAME\tNUM_REACTIONS\tNUM_COVERED_REACTIONS\tORF_COUNT\tORFS\n"
    out.write(header)

    sample = options.sample_name  # sample name
    for pwy in pwy_to_orfs:
        # generate output line
        line = []
        line.append(sample)  # sample name
        line.append(pwy)  # pathway name
        line.append(pwy_to_long[pwy])  # pathway longname
        line.append(pwy_to_rxns[pwy][0])  # pathway num reactions
        line.append(pwy_to_rxns[pwy][1])  # pathway covered reactions
        line.append(len(pwy_to_orfs[pwy]))  # num orfs
        if options.wtd:
            line.append(pwy_to_wtd[pwy][0])  # wtd
            line.append(pwy_to_wtd[pwy][1])  # wtd observed taxa
            line.append(pwy_to_wtd[pwy][2])  # wtd expected taxa
        line.append("[" + ",".join(pwy_to_orfs[pwy]) + "]")  # list of ORFs

        line = map(str, line)  # cast all to string

        out.write("\t".join(line) + "\n")  # write out line
    try:
        out.close()  # close file
        rename(table_out_tmp, options.table_out)
    except:
        print "Had problems closing file: " + options.table_out

예제 #2

파일 보기

파일: MetaPathways_run_pathologic.py 프로젝트: kishori82/MetaPathways_Python.3.0

def  ExtractPathway_WTD(options):
    # Extract pathways and WTD
   # place to store list of expected taxonomic range(s)
    printf('\n')
    printf('INFO\tEntering the WTD calculations!\n')
    serialized_metacyc_taxa_ranges = "/tmp/metacyc_pwy_taxa_range.pk"

    try:
        #print  options.wtd,  not path.isfile(serialized_metacyc_taxa_ranges),  serialized_metacyc_taxa_ranges
        if options.wtd and not path.isfile(serialized_metacyc_taxa_ranges):
            # get MetaCyc's expected taxonomic range(s) and serialize for later use in /tmp
            # try:
            printf('INFO\tGetting MetaCyc Expected Taxonomic Range(s)\n')
            pythonCyc = startPathwayTools('meta', options.ptoolsExec, True)

            pwys = pythonCyc.getAllPathways()

            pwy_taxa_range = {} # hash from pwy to expected taxonomic range(s)
            pwy_taxa_range_pk = open(serialized_metacyc_taxa_ranges ,"w")

            # get expected taxonomic ranges for each pathway
            for pwy in pwys:
                printf(" " + pwy) 
                my_expected_taxonomic_range = pythonCyc.getExpectedTaxonomicRange(pwy)
                pwy_taxa_range[pwy] = my_expected_taxonomic_range
            # printf(" " + pwy)

            # write the pathway
            pickle.dump(pwy_taxa_range, pwy_taxa_range_pk)
            pwy_taxa_range_pk.close()
            StopPathwayTools()

        # read expected taxonomic range from serialized file
        exepected_taxa_in = open(serialized_metacyc_taxa_ranges ,"r")
        pwy_taxa_range = pickle.load(exepected_taxa_in)

        # create mapping of preferred NCBI to MEGAN taxonomy
        megan_map = {}
        if options.ncbi_megan_map:
            with open(options.ncbi_megan_map) as megan_map_file:
                for line in megan_map_file:
                    fields = line.split("\t")
                    fields = map(str.strip, fields)
                    megan_map[ fields[0] ] = fields[1]

        # get ORF to taxa map from annotation_table
        printf("INFO\tGetting ORF to Taxa Map from AnnotationTable\n")
        orf_lca = {}
        with open(options.annotation_table) as f:
            for line in f:
                fields = line.split("\t")
                orf_lca[fields[0].strip()] = fields[8].strip()

        # get pathway ORFs and Rxns
        pwy_to_orfs = {}
        pwy_to_long = {}
        pwy_to_rxns = {}
        try:
            pythonCyc = startPathwayTools(options.sample_name.lower(), options.ptoolsExec, True)
            pwys = pythonCyc.getAllPathways()

            for pwy in pwys:
                printf(" " + pwy)
                genes = pythonCyc.getPathwayORFs(pwy)
                rxns = pythonCyc.getPathwayReactionInfo(pwy)
                pwy_to_orfs[pwy] = genes
                pwy_to_long[pwy] = cleanup(pythonCyc.get_slot_value(pwy, "common-name"))
                pwy_to_rxns[pwy] = rxns
            # printf("\n")
            StopPathwayTools()

        except:
            insert_error(9)
            print """
            Problem connecting to Pathway Tools. Check the /tmp/ptools-socket file.
            """
    except:
        print """
        Problem calculating WTD via Pathway Tools. Check the /tmp/ptools-socket file.
        """
        insert_error(9)

    # get LCA per pathway
    pwy_lca = {}
    # load NCBI taxonomy map
    printf("\nINFO\tLoading NCBI Taxonomy Map\n")
    lca = LCAComputation([ options.ncbi_tree ], )

    for pwy in pwy_to_orfs:
        orfs = pwy_to_orfs[pwy]
        taxa_ids = []
        for orf in orfs:
            if orf in orf_lca:
                # could strip out id here
                res = re.search("(.+?)\(([0-9]+?)\)",  orf_lca[orf] )
                if res:
                    taxa_annotation = res.group(1)
                    id = res.group(2)
                else:
                    id = lca.get_a_Valid_ID([ orf_lca[orf] ])
                taxa_ids.append(id)
        pwy_lca_id = lca.get_lca(taxa_ids, True)
        # print "In run_pathologic"
        # print pwy_lca_id
        # print pwy_lca_id
        lca.clear_cells(taxa_ids)

        pwy_lca[pwy] = [pwy_lca_id, lca.translateIdToName(pwy_lca_id)]

    # calculate weighted taxonomic distance
    pwy_to_wtd = {}
    printf("INFO\tCalculating WTD\n")

    for pwy in pwy_lca:

        C = [] # list of distances
        C_taxa = [] # list of parallel observed-expected taxa pairs
        C_pos = [] # list of non-negative distances
        C_pos_taxa = [] # list of parallel observed-expected taxa pairs
        C_neg = [] # list of negative distances
        C_neg_taxa = [] # list of parallel observed-expected taxa pairs

        if pwy in pwy_taxa_range and  len(pwy_taxa_range[pwy]) :
            for expected in pwy_taxa_range[pwy]:
                dist = lca.wtd(expected[0], pwy_lca[pwy][0])
                if dist or dist == 0:
                    # valid distance
                    # add distance respective lists
                    C.append(dist) # add distance
                    C_taxa.append([ expected[0], pwy_lca[pwy][0] ])
                    if dist >= 0:
                        C_pos.append(dist)  # add to non-negative list
                        C_pos_taxa.append([ expected[0], pwy_lca[pwy][0] ])
                    else:
                        C_neg.append(dist)  # add to negative list
                        C_neg_taxa.append([ expected[0], pwy_lca[pwy][0] ])
                else:
                    print "Not a valid distance"
                    continue
        else:
            # no expected taxonomy, set to root
            min_taxa = "1"
            dist = lca.wtd(min_taxa, pwy_lca[pwy][0])
            # add distance respective lists
            C.append(dist) # add distance
            C_taxa.append([ min_taxa, pwy_lca[pwy][0] ])
            if dist >= 0:
                C_pos.append(dist)  # add to non-negative list
                C_pos_taxa.append([ min_taxa, pwy_lca[pwy][0] ])
            else:
                C_neg.append(dist)  # add to negative list
                C_neg_taxa.append([ min_taxa, pwy_lca[pwy][0] ])

        # find index with max distance (closest to expected taxonomy)
        max_index, max_dist = max(enumerate(C), key=operator.itemgetter(1))
        max_taxa = C_taxa[max_index]

        # remap to preferred names
        observed = get_preferred_taxa_name(max_taxa[1], megan_map, lca.id_to_name)
        expected = get_preferred_taxa_name(max_taxa[0], megan_map, lca.id_to_name)

        pwy_to_wtd[pwy] = [ max_dist, observed, expected ]

    # write out pathway table
    table_out_tmp  = options.table_out + ".tmp"
    try:
        out = open(table_out_tmp, "w")
    except:
        print "Had problems opening file: " + options.table_out
        insert_error(9)

    # write appropreate header
    if options.wtd:
        header = "SAMPLE\tPWY_NAME\tPWY_COMMON_NAME\tNUM_REACTIONS\tNUM_COVERED_REACTIONS\tORF_COUNT\tWTD\tOBSERVED\tEXPECTED\tORFS\n"
    else:
        header = "SAMPLE\tPWY_NAME\tPWY_COMMON_NAME\tNUM_REACTIONS\tNUM_COVERED_REACTIONS\tORF_COUNT\tORFS\n"
    out.write(header)

    sample = options.sample_name # sample name
    for pwy in pwy_to_orfs:
        # generate output line
        line = []
        line.append(sample) # sample name
        line.append(pwy) # pathway name
        line.append(pwy_to_long[pwy]) # pathway longname
        line.append(pwy_to_rxns[pwy][0]) # pathway num reactions
        line.append(pwy_to_rxns[pwy][1]) # pathway covered reactions
        line.append(len(pwy_to_orfs[pwy])) # num orfs
        if options.wtd:
            line.append(pwy_to_wtd[pwy][0]) # wtd
            line.append(pwy_to_wtd[pwy][1]) # wtd observed taxa
            line.append(pwy_to_wtd[pwy][2]) # wtd expected taxa
        line.append("[" + ",".join(pwy_to_orfs[pwy]) + "]") # list of ORFs

        line = map(str, line) # cast all to string

        out.write("\t".join(line) + "\n") # write out line
    try:
        out.close() # close file
        rename(table_out_tmp, options.table_out)
    except:
        print "Had problems closing file: " + options.table_out
        insert_error(9)

예제 #3

파일 보기

파일: MetaPathways_extract_pathways.py 프로젝트: ar0ch/metapathways2

def main(argv):
    global parser
    (opts, args) = parser.parse_args()

    if not check_arguments(opts, args):
        print usage
        sys.exit(0)

    # place to store list of expected taxonomic range(s)
    serialized_metacyc_taxa_ranges = "/tmp/metacyc_pwy_taxa_range.pk"

    if opts.wtd and not os.path.isfile(serialized_metacyc_taxa_ranges):
        # get MetaCyc's expected taxonomic range(s) and serialize for later use in /tmp
        try:
            print "Getting MetaCyc Expected Taxonomic Range(s)"

            # connect to Pathway Tools
            cyc = PythonCyc()
            cyc.setOrganism("meta")
            cyc.setPToolsExec(opts.pathway_tools)
            cyc.startPathwayTools()

            pwys = cyc.getAllPathways()

            pwy_taxa_range = {}  # hash from pwy to expected taxonomic range(s)
            pwy_taxa_range_pk = open(serialized_metacyc_taxa_ranges, "w")

            # get expected taxonomic ranges for each pathway
            for pwy in pwys:
                my_expected_taxonomic_range = cyc.getExpectedTaxonomicRange(pwy)
                pwy_taxa_range[pwy] = my_expected_taxonomic_range

            # write the pathway
            pickle.dump(pwy_taxa_range, pwy_taxa_range_pk)
            pwy_taxa_range_pk.close()

            # close Pathway Tools
            cyc.stopPathwayTools()
        except:
            print """
            Problem connecting to Pathway Tools. Check the /tmp/ptools-socket file.
            """
    else:
        # read expected taxonomic range from serialized file
        exepected_taxa_in = open(serialized_metacyc_taxa_ranges, "r")
        pwy_taxa_range = pickle.load(exepected_taxa_in)

    # create mapping of preferred NCBI to MEGAN taxonomy
    megan_map = {}
    if opts.ncbi_megan_map:
        with open(opts.ncbi_megan_map) as megan_map_file:
            for line in megan_map_file:
                fields = line.split("\t")
                fields = map(str.strip, fields)
                megan_map[fields[0]] = fields[1]

    # get ORF to taxa map from annotation_table
    print "Getting ORF to Taxa Map from AnnotationTable"
    orf_lca = {}
    with open(opts.annotation_table) as f:
        for line in f:
            fields = line.split("\t")
            orf_lca[fields[0].strip()] = fields[8].strip()

    # get pathway ORFs and Rxns
    pwy_to_orfs = {}
    pwy_to_long = {}
    pwy_to_rxns = {}
    try:
        cyc = PythonCyc()
        cyc.setOrganism(opts.pgdb_name)
        cyc.setPToolsExec(opts.pathway_tools)
        cyc.startPathwayTools()
        pwys = cyc.getAllPathways()
        for pwy in pwys:
            genes = cyc.getPathwayORFs(pwy)
            rxns = cyc.getPathwayReactionInfo(pwy)
            pwy_to_orfs[pwy] = genes
            pwy_to_long[pwy] = cleanup(cyc.get_slot_value(pwy, "common-name"))
            pwy_to_rxns[pwy] = rxns

        cyc.stopPathwayTools()
    except:
        print """
        Problem connecting to Pathway Tools. Check the /tmp/ptools-socket file.
        """

    # get LCA per pathway
    pwy_lca = {}
    # load NCBI taxonomy map
    print "Loading NCBI Taxonomy Map"
    lca = LCAComputation([opts.ncbi_tree])
    lca.setParameters(opts.lca_min_score, opts.lca_top_percent, opts.lca_min_support)

    for pwy in pwy_to_orfs:
        orfs = pwy_to_orfs[pwy]
        taxa_ids = []
        for orf in orfs:
            if orf in orf_lca:
                id = lca.get_a_Valid_ID([orf_lca[orf]])
                taxa_ids.append(id)
        pwy_lca_id = lca.get_lca(taxa_ids, True)
        lca.clear_cells(taxa_ids)

        pwy_lca[pwy] = [pwy_lca_id, lca.translateIdToName(pwy_lca_id)]

    # calculate weighted taxonomic distance
    pwy_to_wtd = {}
    for pwy in pwy_lca:

        C = []  # list of distances
        C_taxa = []  # list of parallel observed-expected taxa pairs
        C_pos = []  # list of non-negative distances
        C_pos_taxa = []  # list of parallel observed-expected taxa pairs
        C_neg = []  # list of negative distances
        C_neg_taxa = []  # list of parallel observed-expected taxa pairs
        if pwy in pwy_taxa_range:
            if len(pwy_taxa_range[pwy]) > 0:
                for expected in pwy_taxa_range[pwy]:
                    dist = lca.wtd(expected[0], pwy_lca[pwy][0])
                    if dist or dist == 0:
                        # valid distance
                        # add distance respective lists
                        C.append(dist)  # add distance
                        C_taxa.append([expected[0], pwy_lca[pwy][0]])
                        if dist >= 0:
                            C_pos.append(dist)  # add to non-negative list
                            C_pos_taxa.append([expected[0], pwy_lca[pwy][0]])
                        else:
                            C_neg.append(dist)  # add to negative list
                            C_neg_taxa.append([expected[0], pwy_lca[pwy][0]])
                    else:
                        print "Not a valid distance"
                        continue
            else:
                # no expected taxonomy, set to root
                min_taxa = "1"
                dist = lca.wtd(min_taxa, pwy_lca[pwy][0])
                # add distance respective lists
                C.append(dist)  # add distance
                C_taxa.append([min_taxa, pwy_lca[pwy][0]])
                if dist >= 0:
                    C_pos.append(dist)  # add to non-negative list
                    C_pos_taxa.append([min_taxa, pwy_lca[pwy][0]])
                else:
                    C_neg.append(dist)  # add to negative list
                    C_neg_taxa.append([min_taxa, pwy_lca[pwy][0]])

            # find index with max distance (closest to expected taxonomy)
            max_index, max_dist = max(enumerate(C), key=operator.itemgetter(1))
            max_taxa = C_taxa[max_index]

            # remap to preferred names
            observed = get_preferred_taxa_name(max_taxa[1], megan_map, lca.id_to_name)
            expected = get_preferred_taxa_name(max_taxa[0], megan_map, lca.id_to_name)

            pwy_to_wtd[pwy] = [max_dist, observed, expected]

    # write out pathway table
    try:
        out = open(opts.table_out, "w")
    except:
        print "Had problems opening file: " + opts.table_out

    # write appropreate header
    if opts.wtd:
        header = "SAMPLE\tPWY_NAME\tPWY_COMMON_NAME\tNUM_REACTIONS\tNUM_COVERED_REACTIONS\tORF_COUNT\tWTD\tOBSERVED\tEXPECTED\tORFS\n"
    else:
        header = "SAMPLE\tPWY_NAME\tPWY_COMMON_NAME\tNUM_REACTIONS\tNUM_COVERED_REACTIONS\tORF_COUNT\tORFS\n"
    out.write(header)

    sample = opts.pgdb_name  # sample name
    for pwy in pwy_to_orfs:
        # generate output line
        line = []
        line.append(sample)  # sample name
        line.append(pwy)  # pathway name
        line.append(pwy_to_long[pwy])  # pathway longname
        line.append(pwy_to_rxns[pwy][0])  # pathway num reactions
        line.append(pwy_to_rxns[pwy][1])  # pathway covered reactions
        line.append(len(pwy_to_orfs[pwy]))  # num orfs
        if opts.wtd:
            if pwy in pwy_to_wtd:
                line.append(pwy_to_wtd[pwy][0])  # wtd
                line.append(pwy_to_wtd[pwy][1])  # wtd observed taxa
                line.append(pwy_to_wtd[pwy][2])  # wtd expected taxa
            else:
                line.append("NA")
                line.append("NA")
                line.append("NA")
        line.append("[" + ",".join(pwy_to_orfs[pwy]) + "]")  # list of ORFs

        line = map(str, line)  # cast all to string

        out.write("\t".join(line) + "\n")  # write out line
    try:
        out.close()  # close file
    except:
        print "Had problems closing file: " + opts.table_out

예제 #4

파일 보기

파일: MetaPathways_extract_pathways.py 프로젝트: ariahahn/MetaPathways_Python.3.0

def main(argv):
    global parser
    (opts, args) = parser.parse_args()

    if not check_arguments(opts, args):
        print(usage)
        sys.exit(0)

    # place to store list of expected taxonomic range(s)
    serialized_metacyc_taxa_ranges = "/tmp/metacyc_pwy_taxa_range.pk"

    if opts.wtd and not os.path.isfile(serialized_metacyc_taxa_ranges):
        # get MetaCyc's expected taxonomic range(s) and serialize for later use in /tmp
        try:
            print('Getting MetaCyc Expected Taxonomic Range(s)')

            # connect to Pathway Tools
            cyc = PythonCyc()
            cyc.setOrganism('meta')
            cyc.setPToolsExec(opts.pathway_tools)
            cyc.startPathwayTools()

            pwys = cyc.getAllPathways()

            pwy_taxa_range = {} # hash from pwy to expected taxonomic range(s)
            pwy_taxa_range_pk = open(serialized_metacyc_taxa_ranges ,"w")

            # get expected taxonomic ranges for each pathway
            for pwy in pwys:
                my_expected_taxonomic_range = cyc.getExpectedTaxonomicRange(pwy)
                pwy_taxa_range[pwy] = my_expected_taxonomic_range

            # write the pathway
            pickle.dump(pwy_taxa_range, pwy_taxa_range_pk)
            pwy_taxa_range_pk.close()

            # close Pathway Tools
            cyc.stopPathwayTools()
        except:
            print( """
            Problem connecting to Pathway Tools. Check the /tmp/ptools-socket file.
            """)
    else:
        # read expected taxonomic range from serialized file
        exepected_taxa_in = open(serialized_metacyc_taxa_ranges ,"r")
        pwy_taxa_range = pickle.load(exepected_taxa_in)

    # create mapping of preferred NCBI to MEGAN taxonomy
    megan_map = {}
    if opts.ncbi_megan_map:
        with open(opts.ncbi_megan_map) as megan_map_file:
            for line in megan_map_file:
                fields = line.split("\t")
                fields = map(str.strip, fields)
                megan_map[ fields[0] ] = fields[1]

    # get ORF to taxa map from annotation_table
    print("Getting ORF to Taxa Map from AnnotationTable")
    orf_lca = {}
    with open(opts.annotation_table) as f:
        for line in f:
            fields = line.split("\t")
            orf_lca[fields[0].strip()] = fields[8].strip()

    # get pathway ORFs and Rxns
    pwy_to_orfs = {}
    pwy_to_long = {}
    pwy_to_rxns = {}
    try:
        cyc = PythonCyc()
        cyc.setOrganism(opts.pgdb_name)
        cyc.setPToolsExec(opts.pathway_tools)
        cyc.startPathwayTools()
        pwys = cyc.getAllPathways()
        for pwy in pwys:
            genes = cyc.getPathwayORFs(pwy)
            rxns = cyc.getPathwayReactionInfo(pwy)
            pwy_to_orfs[pwy] = genes
            pwy_to_long[pwy] = cleanup(cyc.get_slot_value(pwy, "common-name"))
            pwy_to_rxns[pwy] = rxns

        cyc.stopPathwayTools()
    except:
        print("""
        Problem connecting to Pathway Tools. Check the /tmp/ptools-socket file.
        """)

    # get LCA per pathway
    pwy_lca = {}
    # load NCBI taxonomy map
    print("Loading NCBI Taxonomy Map")
    lca = LCAComputation([ opts.ncbi_tree ])
    lca.setParameters(opts.lca_min_score, opts.lca_top_percent, opts.lca_min_support)

    for pwy in pwy_to_orfs:
        orfs = pwy_to_orfs[pwy]
        taxa_ids = []
        for orf in orfs:
            if orf in orf_lca:
                id = lca.get_a_Valid_ID([ orf_lca[orf] ])
                taxa_ids.append(id)
        pwy_lca_id = lca.get_lca(taxa_ids, True)
        lca.clear_cells(taxa_ids)

        pwy_lca[pwy] = [pwy_lca_id, lca.translateIdToName(pwy_lca_id)]

    # calculate weighted taxonomic distance
    pwy_to_wtd = {}
    for pwy in pwy_lca:

        C = [] # list of distances
        C_taxa = [] # list of parallel observed-expected taxa pairs
        C_pos = [] # list of non-negative distances
        C_pos_taxa = [] # list of parallel observed-expected taxa pairs
        C_neg = [] # list of negative distances
        C_neg_taxa = [] # list of parallel observed-expected taxa pairs
        if pwy in pwy_taxa_range:
            if len(pwy_taxa_range[pwy]) > 0:
                for expected in pwy_taxa_range[pwy]:
                    dist = lca.wtd(expected[0], pwy_lca[pwy][0])
                    if dist or dist == 0:
                        # valid distance
                        # add distance respective lists
                        C.append(dist) # add distance
                        C_taxa.append([ expected[0], pwy_lca[pwy][0] ])
                        if dist >= 0:
                            C_pos.append(dist)  # add to non-negative list
                            C_pos_taxa.append([ expected[0], pwy_lca[pwy][0] ])
                        else:
                            C_neg.append(dist)  # add to negative list
                            C_neg_taxa.append([ expected[0], pwy_lca[pwy][0] ])
                    else:
                        print("Not a valid distance")
                        continue
            else:
                # no expected taxonomy, set to root
                min_taxa = "1"
                dist = lca.wtd(min_taxa, pwy_lca[pwy][0])
                # add distance respective lists
                C.append(dist) # add distance
                C_taxa.append([ min_taxa, pwy_lca[pwy][0] ])
                if dist >= 0:
                    C_pos.append(dist)  # add to non-negative list
                    C_pos_taxa.append([ min_taxa, pwy_lca[pwy][0] ])
                else:
                    C_neg.append(dist)  # add to negative list
                    C_neg_taxa.append([ min_taxa, pwy_lca[pwy][0] ])

            # find index with max distance (closest to expected taxonomy)
            max_index, max_dist = max(enumerate(C), key=operator.itemgetter(1))
            max_taxa = C_taxa[max_index]

            # remap to preferred names
            observed = get_preferred_taxa_name(max_taxa[1], megan_map, lca.id_to_name)
            expected = get_preferred_taxa_name(max_taxa[0], megan_map, lca.id_to_name)

            pwy_to_wtd[pwy] = [ max_dist, observed, expected ]

    # write out pathway table
    try:
        out = open(opts.table_out, "w")
    except:
        print("Had problems opening file: " + opts.table_out)

    # write appropreate header
    if opts.wtd:
        header = "SAMPLE\tPWY_NAME\tPWY_COMMON_NAME\tNUM_REACTIONS\tNUM_COVERED_REACTIONS\tORF_COUNT\tWTD\tOBSERVED\tEXPECTED\tORFS\n"
    else:
        header = "SAMPLE\tPWY_NAME\tPWY_COMMON_NAME\tNUM_REACTIONS\tNUM_COVERED_REACTIONS\tORF_COUNT\tORFS\n"
    out.write(header)

    sample = opts.pgdb_name # sample name
    for pwy in pwy_to_orfs:
        # generate output line
        line = []
        line.append(sample) # sample name
        line.append(pwy) # pathway name
        line.append(pwy_to_long[pwy]) # pathway longname
        line.append(pwy_to_rxns[pwy][0]) # pathway num reactions
        line.append(pwy_to_rxns[pwy][1]) # pathway covered reactions
        line.append(len(pwy_to_orfs[pwy])) # num orfs
        if opts.wtd:
            if pwy in pwy_to_wtd:
                line.append(pwy_to_wtd[pwy][0]) # wtd
                line.append(pwy_to_wtd[pwy][1]) # wtd observed taxa
                line.append(pwy_to_wtd[pwy][2]) # wtd expected taxa
            else:
                line.append("NA")
                line.append("NA")
                line.append("NA")
        line.append("[" + ",".join(pwy_to_orfs[pwy]) + "]") # list of ORFs

        line = map(str, line) # cast all to string

        out.write("\t".join(line) + "\n") # write out line
    try:
        out.close() # close file
    except:
        print("Had problems closing file: " + opts.table_out)