Ejemplo n.º 1
0
def get_ham_treemap_from_row(row, tree, level=None):
    fam, orthoxml = row
    orthoxml = switch_name_ncbi_id(orthoxml)
    try:
        if level is None:
            ham_obj = pyham.Ham(tree,
                                orthoxml,
                                type_hog_file="orthoxml",
                                use_internal_name=True,
                                orthoXML_as_string=True)
            tp = ham_obj.create_tree_profile(
                hog=ham_obj.get_list_top_level_hogs()[0])
            return tp.treemap
        else:
            ham_obj = pyham.Ham(tree,
                                orthoxml,
                                type_hog_file="orthoxml",
                                use_internal_name=True,
                                orthoXML_as_string=True)
            #return subHOGs at level
            slice = ham_obj.get_ancestral_genome_by_name(level)
            treeprofiles = [
                ham_obj.create_tree_profile(hog=h) for h in
                ham_obj.get_list_top_level_hogs()[0].get_at_level(slice)
            ]

    except TypeError as err:
        print('Type error:', err)
        return None
    except AttributeError as err:
        print('Attribute error:', err)
        return None
Ejemplo n.º 2
0
    def _load_ogs(self):
        """
        Using the orthoxml file select only the OGs of interest that have more species than the min_species threshold
        :return: Dictionary with og name as key and list of SeqRecords
        """

        if '.fa' in self.args.dna_reference or '.fasta' in self.args.dna_reference:
            print('--- Load ogs and find their corresponding DNA seq from {} ---'.format(self.args.dna_reference))
            print(
                'Loading {} into memory. This might take a while . . . '.format(self.args.dna_reference.split("/")[-1]))
            self._db = SeqIO.index(self.args.dna_reference, "fasta")
            self._db_source = 'fa'
        elif '.h5' in self.args.dna_reference:
            print('--- Load ogs and find their corresponding DNA seq from {} ---'.format(self.args.dna_reference))
            self._db = db.Database(self.args.dna_reference)
            self._db_id_map = db.OmaIdMapper(self._db)
            self._db_source = 'h5'
        else:
            print('--- Load ogs and find their corresponding DNA seq using the REST api ---')
            self._db_source = 'REST_api'

        if self.oma.mode is 'standalone':
            self._og_orthoxml = os.path.join(self.oma_output_path, 'OrthologousGroups.orthoxml')
            self._tree_str = os.path.join(self.oma_output_path, 'EstimatedSpeciesTree.nwk')
            self._ham_analysis = pyham.Ham(self._tree_str, self._og_orthoxml, use_internal_name=False)

        ogs = {}

        orthologous_groups_aa = os.path.join(self.args.output_path, "01_ref_ogs_aa")
        if not os.path.exists(orthologous_groups_aa):
            os.makedirs(orthologous_groups_aa)

        orthologous_groups_dna = os.path.join(self.args.output_path, "01_ref_ogs_dna")
        if not os.path.exists(orthologous_groups_dna):
            os.makedirs(orthologous_groups_dna)

        names_og = self.ogs

        for name, records in tqdm(names_og.items(), desc='Loading OGs', unit=' OGs'):
            # name = file.split("/")[-1].split(".")[0]
            ogs[name] = OG()
            ogs[name].aa = self._get_aa_records(name, records)
            output_file_aa = os.path.join(orthologous_groups_aa, name + ".fa")
            output_file_dna = os.path.join(orthologous_groups_dna, name + ".fa")

            if self._db_source:
                ogs[name].dna = self._get_dna_records(ogs[name].aa, name)
            else:
                print("DNA reference was not provided. Only amino acid sequences gathered!")
            self._write(output_file_dna, ogs[name].dna)
            self._write(output_file_aa, ogs[name].aa)

        return ogs
Ejemplo n.º 3
0
def orthoxml_parsing(orthoxml, nwk, species_list, gene_hog_dict):
    ham_analysis = pyham.Ham(nwk, orthoxml)

    for species in species_list:
        sp_extant_genome = ham_analysis.get_extant_genome_by_name(species)

        for gene in sp_extant_genome.genes:
            xref = gene.get_dict_xref()['protId'].split(" ")[0]
            hog = gene.get_top_level_hog()
            if hog.is_singleton():
                gene_hog_dict[xref] = "singleton"
            else:
                gene_hog_dict[xref] = gene.get_top_level_hog().hog_id
Ejemplo n.º 4
0
#  This is the HAM package
import pyham

#  OPTIONAL: only if you want to have the logger information printed
import logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(name)-12s %(levelname)-8s %(message)s")

# Initialise pyHam with a gene oma id

query = 'HUMAN12'
pyham_analysis = pyham.Ham(query_database=query, use_data_from='oma')

hog = pyham_analysis.get_hog_by_id('HOG:0359282')

# create the iHam for it and store it into an html file
output_name = "iHam{}.html".format(hog.hog_id)
pyham_analysis.create_iHam(hog=hog, outfile=output_name)

# create the iHam for it and store it into an html file
output_name = "TreeProfile{}.html".format(hog.hog_id)
pyham_analysis.create_tree_profile(hog=hog, outfile=output_name)
Ejemplo n.º 5
0
#  This is the HAM package
import pyham

#  OPTIONAL: only if you want to have the logger information printed
import logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(name)-12s %(levelname)-8s %(message)s")

# Initialise pyHam with a phyloxml tree and orthoXML HOGs
phyloxml_path = "simpleEx.phyloxml"
orthoxml_path3 = "simpleEx.orthoxml"

pyham_analysis = pyham.Ham(phyloxml_path,
                           orthoxml_path3,
                           use_internal_name=True,
                           tree_format='phyloxml')

hog = pyham_analysis.get_hog_by_id('HOG:0355161')

# create the iHam for it and store it into an html file
output_name = "iHam{}.html".format(hog.hog_id)
pyham_analysis.create_iHam(hog=hog, outfile=output_name)
Ejemplo n.º 6
0
                second=second_ancestor,
                results_tag=results_tag), 'a') as output_txt:
        for xref in results_xref:
            output_txt.write("{xref}\n".format(xref=xref))


if __name__ == "__main__":
    # All genes in ancestor genomes according to pyHAM:
    digenea_ancestor_all, platy_ancestor_all, = [], []
    # Filtered sets of genes from ancestor genomes:
    digenea_ancestor_filtered, platy_ancestor_filtered = [], []
    # For vertical comparison:
    retained_gene_ids, duplicated_gene_ids, gained_gene_ids = [], [], []

    # pyHAM analysis:
    ham_analysis = pyham.Ham(nwk_file, orthoxml_file, use_internal_name=True)

    # Ancestor genome reconstruction:
    digenea_ancestor_genome = ham_analysis.get_ancestral_genome_by_name(
        "Digenea")
    platy_ancestor_genome = ham_analysis.get_ancestral_genome_by_name(
        "Platyhelminthes")

    digenea_ancestor_all.extend(digenea_ancestor_genome.genes)
    print(
        "According to pyHAM, Digenea ancestor genome model includes {num} genes"
        .format(num=len(digenea_ancestor_all)))
    platy_ancestor_all.extend(platy_ancestor_genome.genes)
    print(
        "According to pyHAM, Platyhelminthes ancestor genome model includes {num} genes"
        .format(num=len(platy_ancestor_all)))