Ejemplo n.º 1
0
def main():
    if len(sys.argv) != 2:
        print("Usage: bait_topmatch.py file.fsl")
        sys.exit(-1)
    fsl_file = sys.argv[1]
    total = read_fsl(fsl_file)
    terms = ontology_common.parse_obo('new_combined.obo')
    baits_for_class = collections.defaultdict(int)
    for gene in total.keys():
        for cl in ontology_common.get_class(gene, terms):
            baits_for_class[cl] += total[gene]

    total_baits = 0
    for k, v in baits_for_class.items():
        if 'resistance gene' in terms[k]['name'][0]:
            print(terms[k]['name'][0], v)
            total_baits += v
    print("Total counts for gene class ", total_baits)
    print()
    total_baits = 0
    for k, v in baits_for_class.items():
        if 'resistance gene' not in terms[k]['name'][0]:
            print(terms[k]['name'][0], v)
            total_baits += v
    print("Total counts for mechanism ", total_baits)
Ejemplo n.º 2
0
def main():
    if len(sys.argv) != 3:
        print("Usage: bait_frequency.py file.fsl genes.fa")
        sys.exit(-1)
    fsl_file = sys.argv[1]
    target_to_id = read_fsl(fsl_file)
    fasta_file = sys.argv[2]
    genes, name_map = readers.read_fasta(fasta_file, shorten=True, max_length_shorten=False)
    terms = ontology_common.parse_obo('new_combined.obo')
    baits_for_class = collections.defaultdict(set)
    for gene in genes.keys():
        name = gene
        found = name in target_to_id
        if found:
            results = target_to_id[name]
            for result in results:
                for cl in ontology_common.get_class(result[0], terms):
                    baits_for_class[cl].add(result[1])
            #print(name, len(results))
        else:
            print(name, '0')
    print()
    total_baits = 0
    for k, v in baits_for_class.items():
        if 'resistance gene' in terms[k]['name'][0]:
            print(terms[k]['name'][0], len(v))
            total_baits += len(v)
    print("Total counts for gene class ", total_baits)
    print()
    total_baits = 0
    for k, v in baits_for_class.items():
        if 'resistance gene' not in terms[k]['name'][0]:
            print(terms[k]['name'][0], len(v))
            total_baits += len(v)
    print("Total counts for mechanism ", total_baits)
Ejemplo n.º 3
0
def main():
    if len(sys.argv) != 2:
        print("Usage: bait_topmatch.py file.fsl")
        sys.exit(-1)
    fsl_file = sys.argv[1]
    total = read_fsl(fsl_file)
    terms = ontology_common.parse_obo('new_combined.obo')
    baits_for_class = collections.defaultdict(int)
    for gene in total.keys():
        for cl in ontology_common.get_class(gene, terms):
            baits_for_class[cl] += total[gene]

    total_baits = 0
    for k, v in baits_for_class.items():
        if 'resistance gene' in terms[k]['name'][0]:
            print(terms[k]['name'][0], v)
            total_baits += v
    print("Total counts for gene class ", total_baits)
    print()
    total_baits = 0
    for k, v in baits_for_class.items():
        if 'resistance gene' not in terms[k]['name'][0]:
            print(terms[k]['name'][0], v)
            total_baits += v
    print("Total counts for mechanism ", total_baits)
def main():
    terms = ontology_common.parse_obo('new_combined.obo')
    with open('mismatch.csv') as f:
        headers = f.readline()
        more_general = 0
        shared_ancestor = 0
        print(
            "Gene,Expected ID,Found ID,Common Class,Common Class Description")
        while True:
            line = f.readline()
            if not line:
                break
            id, actual, name = line.strip().split(',', 2)
            #id = id.replace('ARO', 'ARO:')
            #id = id.split('s')[0]
            #actual = actual.replace('ARO', 'ARO:')
            #actual = actual.split('s')[0]
            #id_parents = [p for p in get_lineage(id, terms)]
            #print(id_parents, [l for l in get_lineage(actual, terms)])

            ancestor = determine_ancestor(id, actual, terms)
            if ancestor == actual:
                more_general += 1
            elif ancestor is None:
                print('%s,%s,%s,%s,%s' % (name, id, actual, ancestor, "none"))
            elif "ARO" in ancestor and ancestor != 'ARO:3000000':
                shared_ancestor += 1
            else:
                #description = terms[actual]['def'][0] if terms.get(actual) else "No Description"
                print('%s,%s,%s,%s' % (name, id, actual, ancestor))

    print("More general:%d, Shared grouping:%d" %
          (more_general, shared_ancestor))
Ejemplo n.º 5
0
def hyperbolic_tree():
    # outputs json hierarchy used by hyperbolic tree
    terms = ontology_common.parse_obo('../new_combined.obo')
    ids = get_ids(sys.argv[1])

    print("code_hierarchy_data = ")
    print(json.dumps(root, indent=4, separators=(',', ': ')))
Ejemplo n.º 6
0
def main():
    if len(sys.argv) != 2:
        print("Usage: bait_count.py master_amr_parsed.tsv")
        sys.exit(-1)
    count_file = sys.argv[1]
    name_to_count = read_count(count_file)
    groups = readers.read_grouping('grouping.csv', short=True, map_name=True, strip_colon=False)
    terms = ontology_common.parse_obo('new_combined.obo')
    baits_for_class = collections.defaultdict(int)
    for gene, count in name_to_count.items():
        group = groups[gene]
        for cl in ontology_common.get_class(group, terms):
            baits_for_class[cl] += count
    print()
    total_baits = 0
    for k, v in baits_for_class.items():
        if 'resistance gene' in terms[k]['name'][0]:
            print(terms[k]['name'][0], v)
            total_baits += v
    print("Total counts for gene class ", total_baits)
    print()
    total_baits = 0
    for k, v in baits_for_class.items():
        if 'resistance gene' not in terms[k]['name'][0]:
            print(terms[k]['name'][0], v)
            total_baits += v
    print("Total counts for mechanism ", total_baits)
def main():
    terms = ontology_common.parse_obo('new_combined.obo')
    with open('mismatch.csv') as f:
        headers = f.readline()
        more_general = 0
        shared_ancestor = 0
        print("Gene,Expected ID,Found ID,Common Class,Common Class Description")
        while True:
            line = f.readline()
            if not line:
                break
            id, actual, name = line.strip().split(',', 2)
            #id = id.replace('ARO', 'ARO:')
            #id = id.split('s')[0]
            #actual = actual.replace('ARO', 'ARO:')
            #actual = actual.split('s')[0]
            #id_parents = [p for p in get_lineage(id, terms)]
            #print(id_parents, [l for l in get_lineage(actual, terms)])

            ancestor = determine_ancestor(id, actual, terms)
            if ancestor == actual:
                more_general += 1
            elif ancestor is None:
                print('%s,%s,%s,%s,%s' % (name, id, actual, ancestor, "none"))
            elif "ARO" in ancestor and ancestor != 'ARO:3000000':
                shared_ancestor += 1
            else:
                #description = terms[actual]['def'][0] if terms.get(actual) else "No Description"
                print('%s,%s,%s,%s' % (name, id, actual, ancestor))

    print("More general:%d, Shared grouping:%d" % (more_general, shared_ancestor))
Ejemplo n.º 8
0
def doughnut():
    # outputs json hierarchy with count used by doughnut graph
    terms = ontology_common.parse_obo('../new_combined.obo')
    counts = get_counts(sys.argv[1])
    root, nodes = build_tree(counts, terms)
    fill_children(root, nodes, terms)
    print("code_hierarchy_data = ")
    print(json.dumps(root, indent=4, separators=(',', ': ')))
Ejemplo n.º 9
0
def main():
    if len(sys.argv) != 3:
        print("Usage: bait_frequency.py file.fsl genes.fa")
        sys.exit(-1)
    fsl_file = sys.argv[1]
    target_to_id = read_fsl(fsl_file)
    fasta_file = sys.argv[2]
    genes, name_map = readers.read_fasta(fasta_file,
                                         shorten=True,
                                         max_length_shorten=False)
    terms = ontology_common.parse_obo('new_combined.obo')
    baits_for_class = collections.defaultdict(set)
    for gene in genes.keys():
        name = gene
        found = name in target_to_id
        if found:
            results = target_to_id[name]
            for result in results:
                for cl in ontology_common.get_class(result[0], terms):
                    baits_for_class[cl].add(result[1])
            #print(name, len(results))
        else:
            print(name, '0')
    print()
    total_baits = 0
    for k, v in baits_for_class.items():
        if 'resistance gene' in terms[k]['name'][0]:
            print(terms[k]['name'][0], len(v))
            total_baits += len(v)
    print("Total counts for gene class ", total_baits)
    print()
    total_baits = 0
    for k, v in baits_for_class.items():
        if 'resistance gene' not in terms[k]['name'][0]:
            print(terms[k]['name'][0], len(v))
            total_baits += len(v)
    print("Total counts for mechanism ", total_baits)
def main():
    if len(sys.argv) < 3 or len(sys.argv) > 4:
        print(
            "Usage: functional_compare.py [-fsl] [-protein] results.scan seq.fasta"
        )
        sys.exit(-1)
    fsl = True if sys.argv[1] == '-fsl' else False
    protein = True if sys.argv[1] == '-protein' else False
    if fsl:
        fsl_file = sys.argv[2]
        fasta_file = sys.argv[3]
    elif protein:
        scan_file = sys.argv[2]
        fasta_file = sys.argv[3]
    else:
        scan_file = sys.argv[1]
        fasta_file = sys.argv[2]
    genes = readers.read_fasta(fasta_file)
    if fsl:
        target_to_id = read_fsl(fsl_file)
    else:
        id_to_target, target_to_id = readers.read_scan_results(0,
                                                               scan_file,
                                                               protein=protein)
    if protein:
        readers.change_RF_to_ARO(target_to_id)
        already_seen_protein = set(
        )  # We don't want to double count if we have seen the same gene

    terms = ontology_common.parse_obo('new_combined.obo')
    false_positive = 0
    true_positive = 0
    false_negative = 0
    for gene in genes.keys():
        if protein:
            names = gene.split('>')
            gene = names[1].strip()
            name = names[0].strip()
            if gene in already_seen_protein:
                continue
            else:
                already_seen_protein.add(gene)
        else:
            name = gene
        found = name in target_to_id
        if found:
            antibiotic = gene.split('_')[1]
            functional_antibiotic = antibiotic_code[antibiotic]
            results = target_to_id[name]
            results.sort(key=lambda l: l[1], reverse=True)
            index = 0
            while index < len(results):
                result = results[index]
                index += 1
                id = result[0]
                # remove formatting used by hmm
                if 's' in id:
                    id = id.replace('ARO', 'ARO:')
                    id = id.split('s')[0]
                if ';' in id:
                    # resfams can have a list of ids associated with a gene
                    classes = [
                        terms[p]['name'] for i in id.split(';')
                        for p in ontology_common.get_class(i, terms)
                    ]
                    drugs = set()
                    for i in id.split(';'):
                        drugs |= ontology_common.get_resistance(
                            ontology_common.get_lineage(i, terms), terms)
                else:
                    classes = [
                        terms[p]['name']
                        for p in ontology_common.get_class(id, terms)
                    ]
                    drugs = ontology_common.get_resistance(
                        ontology_common.get_lineage(id, terms), terms)
                identified = False
                for drug in drugs:
                    for d in ontology_common.get_lineage(drug, terms):
                        for fd in ontology_common.get_lineage(
                                functional_antibiotic[1], terms):
                            if d == fd and d not in [
                                    'ARO:1000001', 'ARO:1000003', 'Unknown'
                            ]:
                                identified = True

                if identified:
                    true_positive += 1
                    break
        else:
            false_negative += 1
        if found and not identified:
            print(gene, functional_antibiotic, id, classes, drugs)
            false_positive += 1

    print('False negative: %d; False Positive:%d; True Positive:%d' %
          (false_negative, false_positive, true_positive))
def main():
    terms = ontology_common.parse_obo('new_combined.obo')
    for l in ontology_common.get_lineage(sys.argv[1], terms):
        print(l)
# This file prints out all of the CARD entries that are children of the root entry

import ontology_common


def find_children(parent):
    for k, v in terms.items():
        if 'is_a' in v:
            for value in v['is_a']:
                id = value.split()
                if id[0] == parent:
                    print("'%s': %s," % (k, v['name']))

terms = ontology_common.parse_obo('../aro.obo')
find_children('ARO:3000000')
# This file prints out all of the CARD entries that are children of the root entry

import ontology_common


def find_children(parent):
    for k, v in terms.items():
        if 'is_a' in v:
            for value in v['is_a']:
                id = value.split()
                if id[0] == parent:
                    print("'%s': %s," % (k, v['name']))


terms = ontology_common.parse_obo('../aro.obo')
find_children('ARO:3000000')
Ejemplo n.º 14
0
def main():
    terms = ontology_common.parse_obo('new_combined.obo')
    for l in ontology_common.get_lineage(sys.argv[1], terms):
        print(l)
Ejemplo n.º 15
0
def main():
    terms = ontology_common.parse_obo('new_combined.obo')
    categories(terms)
Ejemplo n.º 16
0
def main():
    terms = ontology_common.parse_obo('new_combined.obo')
    categories(terms)
def main():
    if len(sys.argv) < 3 or len(sys.argv) > 4:
        print("Usage: functional_compare.py [-fsl] [-protein] results.scan seq.fasta")
        sys.exit(-1)
    fsl = True if sys.argv[1] == '-fsl' else False
    protein = True if sys.argv[1] == '-protein' else False
    if fsl:
        fsl_file = sys.argv[2]
        fasta_file = sys.argv[3]
    elif protein:
        scan_file = sys.argv[2]
        fasta_file = sys.argv[3]
    else:
        scan_file = sys.argv[1]
        fasta_file = sys.argv[2]
    genes = readers.read_fasta(fasta_file)
    if fsl:
        target_to_id = read_fsl(fsl_file)
    else:
        id_to_target, target_to_id = readers.read_scan_results(0, scan_file, protein=protein)
    if protein:
        readers.change_RF_to_ARO(target_to_id)
        already_seen_protein = set() # We don't want to double count if we have seen the same gene

    terms = ontology_common.parse_obo('new_combined.obo')
    false_positive = 0
    true_positive = 0
    false_negative = 0
    for gene in genes.keys():
        if protein:
            names = gene.split('>')
            gene = names[1].strip()
            name = names[0].strip()
            if gene in already_seen_protein:
                continue
            else:
                already_seen_protein.add(gene)
        else:
            name = gene
        found = name in target_to_id
        if found:
            antibiotic = gene.split('_')[1]
            functional_antibiotic = antibiotic_code[antibiotic]
            results = target_to_id[name]
            results.sort(key=lambda l: l[1], reverse=True)
            index = 0
            while index < len(results):
                result = results[index]
                index += 1
                id = result[0]
                # remove formatting used by hmm
                if 's' in id:
                    id = id.replace('ARO', 'ARO:')
                    id = id.split('s')[0]
                if ';' in id:
                    # resfams can have a list of ids associated with a gene
                    classes = [terms[p]['name'] for i in id.split(';') for p in ontology_common.get_class(i, terms)]
                    drugs = set()
                    for i in id.split(';'):
                        drugs |= ontology_common.get_resistance(ontology_common.get_lineage(i, terms), terms)
                else:
                    classes = [terms[p]['name'] for p in ontology_common.get_class(id, terms)]
                    drugs = ontology_common.get_resistance(ontology_common.get_lineage(id, terms), terms)
                identified = False
                for drug in drugs:
                    for d in ontology_common.get_lineage(drug, terms):
                        for fd in ontology_common.get_lineage(functional_antibiotic[1], terms):
                            if d == fd and d not in ['ARO:1000001', 'ARO:1000003', 'Unknown']:
                                identified = True

                if identified:
                    true_positive += 1
                    break
        else:
            false_negative += 1
        if found and not identified:
            print(gene, functional_antibiotic, id, classes, drugs)
            false_positive += 1


    print('False negative: %d; False Positive:%d; True Positive:%d' % (false_negative, false_positive, true_positive))