Example #1
0
 def clean_genome(self):
     try:
         genome = entrez_utils.get_genome(
             self.cleaned_data['genome'].strip())
     except entrez_utils.EntrezException:
         raise forms.ValidationError("Invalid RefSeq accession number.")
     return genome
Example #2
0
 def clean_genome(self):
     try:
         genome = entrez_utils.get_genome(
             self.cleaned_data['genome'].strip())
     except entrez_utils.EntrezException:
         raise forms.ValidationError("Invalid RefSeq accession number.")
     return genome
Example #3
0
def fill_gene_type_field():
    """Populates the gene_type field for all genes."""
    for genome in models.Genome.objects.all():
        print genome.genome_accession
        rec = entrez_utils.get_genome(genome.genome_accession)
        genes = entrez_utils.get_genes(rec)
        gene_dict = {gene['locus_tag']: gene['gene_type']
                     for gene in genes}
        for gene in models.Gene.objects.filter(genome=genome).all():
            if gene.locus_tag in gene_dict:
                gene.gene_type = gene_dict[gene.locus_tag]
                gene.save()
            else:
                print "Missing locus tag", gene.locus_tag
Example #4
0
    def clean_genome_accession_helper(self, genome_accession):
        """Checks if the genome accession field is valid.

        If the genome accession is valid and not in the database, downloads
        genome sequences and list of genes from NCBI database and adds them to
        the CollecTF database.

        Gets called for all genome accesion fields.
        """
        genome_accession = genome_accession.strip()
        if '.' not in genome_accession:
            raise forms.ValidationError("""
                Please enter RefSeq accession number with the version
                number.""")
        if not genome_accession.startswith('NC_'):
            raise forms.ValidationError(
                "RefSeq genome accession number should start with 'NC_'")

        try:
            Genome.objects.get(genome_accession=genome_accession)
        except Genome.DoesNotExist:
            # Get genome record
            try:
                record = entrez_utils.get_genome(genome_accession)
            except entrez_utils.EntrezException:
                raise forms.ValidationError("""
                Can not fetch genome record from NCBI. Check accession number.
                """)
            # Get taxonomy record
            try:
                entrez_utils.get_organism_taxon(record)
            except entrez_utils.EntrezException:
                raise forms.ValidationError(
                    "Can not fetch strain taxonomy information.")

            # Get genes
            try:
                genes = entrez_utils.get_genes(record)
            except entrez_utils.EntrezException:
                raise forms.ValidationError("""
                Can't retrieve list of genes. Check genome accession
                number.""")

            # Create genome object and genes.
            species_taxon = new_taxonomy(record)
            new_genome(record, genes, species_taxon)

        return genome_accession
Example #5
0
    def clean_genome_accession_helper(self, genome_accession):
        """Checks if the genome accession field is valid.

        If the genome accession is valid and not in the database, downloads
        genome sequences and list of genes from NCBI database and adds them to
        the CollecTF database.

        Gets called for all genome accesion fields.
        """
        genome_accession = genome_accession.strip()
        if '.' not in genome_accession:
            raise forms.ValidationError("""
                Please enter RefSeq accession number with the version
                number.""")
        if not genome_accession.startswith('NC_'):
            raise forms.ValidationError(
                "RefSeq genome accession number should start with 'NC_'")

        try:
            Genome.objects.get(genome_accession=genome_accession)
        except Genome.DoesNotExist:
            # Get genome record
            try:
                record = entrez_utils.get_genome(genome_accession)
            except entrez_utils.EntrezException:
                raise forms.ValidationError("""
                Can not fetch genome record from NCBI. Check accession number.
                """)
            # Get taxonomy record
            try:
                entrez_utils.get_organism_taxon(record)
            except entrez_utils.EntrezException:
                raise forms.ValidationError(
                    "Can not fetch strain taxonomy information.")

            # Get genes
            try:
                genes = entrez_utils.get_genes(record)
            except entrez_utils.EntrezException:
                raise forms.ValidationError("""
                Can't retrieve list of genes. Check genome accession
                number.""")

            # Create genome object and genes.
            species_taxon = new_taxonomy(record)
            new_genome(record, genes, species_taxon)

        return genome_accession
Example #6
0
def batch_refseq_accession(genome_accession):
    """Retrieves product NP/YP/WP accession numbers of the given genome.

    Returns dictionary {locus_tag: RefSeq_acc}.
    """
    print "Downloading", genome_accession
    genome_rec = entrez_utils.get_genome(genome_accession)
    genes = entrez_utils.get_genes(genome_rec)
    gene_dict = {}
    for gene in genes:
        if gene['protein_id'] and len(gene['protein_id']) == 1:
            protein_id = gene['protein_id'][0]  #.split('.')[0]
            gene_dict[gene['locus_tag']] = protein_id
            for old_locus_tag in gene['old_locus_tag']:
                gene_dict[old_locus_tag] = protein_id
    return gene_dict
Example #7
0
def batch_refseq_accession(genome_accession):
    """Retrieves product NP/YP/WP accession numbers of the given genome.

    Returns dictionary {locus_tag: RefSeq_acc}.
    """
    print "Downloading", genome_accession
    genome_rec = entrez_utils.get_genome(genome_accession)
    genes = entrez_utils.get_genes(genome_rec)
    gene_dict = {}
    for gene in genes:
        if gene['protein_id'] and len(gene['protein_id']) == 1:
            protein_id = gene['protein_id'][0]#.split('.')[0]
            gene_dict[gene['locus_tag']] = protein_id
            for old_locus_tag in gene['old_locus_tag']:
                gene_dict[old_locus_tag] = protein_id
    return gene_dict