예제 #1
0
    def _load_go_terms(self, go_terms, feature_id, analysis_id, go_db_id, skip_missing):
        for go_id in go_terms:
            term = go_id
            term_sp = term.split(':')
            if len(term_sp) != 2:
                self.session.rollback()
                raise Exception("Cannot parse GO term {}".format(go_id))
            term_db = term_sp[0]
            term_acc = term_sp[1]

            try:
                goterm_id = self.ci.get_cvterm_id(term_acc, term_db)
            except chado.RecordNotFoundError:
                goterm_id = None

            if not goterm_id:
                if skip_missing:
                    warn('Could not find term with name "%s", skipping it', term_acc)
                    continue
                else:
                    raise Exception('Could not find term with name "%s"' % term_acc)

            # Insert GO terms into feature_cvterm table. Default pub_id = 1 (NULL) was used. But
            # only insert if not already there
            self._add_feat_cvterm_with_id(feature_id, goterm_id)

            # Insert Go terms into the analysisfeatureprop table but only if it
            # doesn't already exist
            self._add_analysis_feature(feature_id, analysis_id, goterm_id, term_acc)
예제 #2
0
    def _parse_interpro_xml5(self, analysis_id, organism_id, xml, parse_go, re_name, query_type, skip_missing):
        res = self.session.query(self.model.db).filter_by(name="GO")
        if res.count():
            go_db_id = res.one().db_id
        else:
            warn("Goterm loading was requested but the GO schema is not installed in chado, skipping")
            go_db_id = False

        total_count = 0
        for entity in xml:
            total_count += 1
            for child in entity:
                child_name = child.tag
                if child_name == "xref":
                    seq_id = child.get('id')
                    try:
                        feature_id = self._match_feature(seq_id, re_name, query_type, organism_id, skip_missing=False)  # we need to have an exception if it fails
                    except RecordNotFoundError:
                        seq_name = child.get('name', "")
                        feature_id = self._match_feature(seq_name, re_name, query_type, organism_id, skip_missing)
                    if skip_missing and feature_id is None:
                        continue
                    analysisfeature_id = self._add_analysis_feature_ipr(feature_id, analysis_id, entity)
                    if not analysisfeature_id:
                        continue
                    ipr_array = self._parse_feature_xml(entity, feature_id)
                    ipr_terms = ipr_array["iprterms"]
                    self._load_ipr_terms(ipr_terms, feature_id, analysis_id, skip_missing)

                    if parse_go and go_db_id:
                        self._load_go_terms(ipr_array["goterms"], feature_id, analysis_id, go_db_id, skip_missing)
        return total_count
예제 #3
0
    def _load_ipr_terms(self, ipr_terms, feature_id, analysis_id, skip_missing):
        for ipr_id, ipr_term in ipr_terms.items():
            if (ipr_term["ipr_name"] and ipr_term["ipr_name"] != 'noIPR'):
                # currently there is no InterPro Ontology OBO file so we can't
                # load the IPR terms that way, we need to just add them
                # as we encounter them. If the term already exists
                # we do not want to update it.

                # Check using IPRnumber (in case ipr_name changed at some point in time)
                if ipr_id in self._interpro_cache:
                    cvterm_id = self._interpro_cache[ipr_id]
                else:
                    cvterm_id = self.ci.create_cvterm(ipr_term['ipr_name'], 'INTERPRO', 'INTERPRO', term_definition=ipr_term['ipr_desc'], accession=ipr_id)
                    if not cvterm_id:
                        if skip_missing:
                            warn('Could not find cvterm %s %s, skipping it', ipr_id, ipr_term['ipr_name'])
                            continue
                        else:
                            raise Exception('Could not find cvterm %s %s' % ipr_id, ipr_term['ipr_name'])
                    self._interpro_cache[ipr_id] = cvterm_id

                # Insert IPR terms into the feature_cvterm table
                # the default pub_id of 1 (NULL) is used. if the cvterm already exists then just skip adding it
                self._add_feat_cvterm_with_id(feature_id, cvterm_id)

                # Insert IPR terms into the analysisfeatureprop table but only if it
                # doesn't already exist
                self._add_analysis_feature(feature_id, analysis_id, cvterm_id, ipr_id)
예제 #4
0
    def _match_feature(self,
                       feature_id,
                       re_name,
                       query_type,
                       organism_id,
                       skip_missing=False):

        seqterm = self.ci.get_cvterm_id(query_type, 'sequence')

        if re_name:
            re_res = re.search(re_name, feature_id)
            if re_res:
                feature_id = re_res.group(1)

        cache_id = (feature_id, organism_id, seqterm)

        if cache_id not in self._feature_cache:
            if skip_missing:
                warn('Could not find feature with name "%s", skipping it',
                     feature_id)
                return None
            else:
                raise RecordNotFoundError(
                    'Could not find feature with name "%s"' % feature_id)

        return self._feature_cache[cache_id]['feature_id']
예제 #5
0
def cli(ctx, url=None, api_key=None, admin=False, **kwds):
    """Help initialize global configuration (in home directory)
    """

    click.echo("""Welcome to Chado's Chakin! (茶巾)""")
    if os.path.exists(config.global_config_path()):
        info(
            "Your chakin configuration already exists. Please edit it instead: %s"
            % config.global_config_path())
        return 0

    while True:
        # Check environment
        dbhost = click.prompt("PGHOST")
        dbname = click.prompt("PGDATABASE")
        dbuser = click.prompt("PGUSER")
        dbpass = click.prompt("PGPASS", hide_input=True)
        dbport = click.prompt("PGPORT")
        schema = click.prompt("PGSCHEMA")

        info("Testing connection...")
        try:
            instance = ChadoInstance(dbhost=dbhost,
                                     dbname=dbname,
                                     dbuser=dbuser,
                                     dbpass=dbpass,
                                     dbport=dbport,
                                     dbschema=schema)
            # We do a connection test during startup.
            info("Ok! Everything looks good.")
            break
        except Exception as e:
            warn(
                "Error, we could not access the configuration data for your instance: %s",
                e)
            should_break = click.prompt(
                "Continue despite inability to contact this instance? [y/n]")
            if should_break in ('Y', 'y'):
                break

    config_path = config.global_config_path()
    if os.path.exists(config_path):
        warn("File %s already exists, refusing to overwrite." % config_path)
        return -1

    with open(config_path, "w") as f:
        f.write(
            CONFIG_TEMPLATE % {
                'dbhost': dbhost,
                'dbname': dbname,
                'dbuser': dbuser,
                'dbpass': dbpass,
                'dbport': dbport,
                'schema': schema,
            })
        info(SUCCESS_MESSAGE)
예제 #6
0
    def _parse_interpro_xml4(self, analysis_id, organism_id, xml,
                             interpro_file, parse_go, re_name, query_type,
                             skip_missing):
        # If there is an EBI header then we need to skip that
        # and set our proteins array to be the second element of the array. This
        # occurs if results were generated with the online InterProScan tool.
        # if the XML starts in with the results then this happens when InterProScan
        # is used command-line and we can just use the object as is
        res = self.session.query(self.model.db).filter_by(name="GO")
        if res.count():
            go_db_id = res.one().db_id
        else:
            warn(
                "Goterm loading was requested but the GO schema is not installed in chado, skipping"
            )
            go_db_id = False

        total_count = 0
        if re.search("^EBIInterProScanResults", xml.tag):
            proteins = xml[1]
        elif re.search("^interpro_matches", xml.tag):
            proteins = xml

        for protein in proteins:
            total_count += 1
            # match the protein id with the feature name
            feature_id = 0
            seqid = protein.get('id')
            # Remove _ORF from the sequence name
            seqid = re.search(r'^(.+)_\d+_ORF\d+.*', seqid).group(1)
            # match the name of the feature in the XML file to a feature in Chado
            feature_id = self._match_feature(seqid, re_name, query_type,
                                             organism_id, skip_missing)
            if not feature_id:
                continue
            # Create an entry in the analysisfeature table and add the XML for this feature
            # to the analysisfeatureprop table
            analysisfeature_id = self._add_analysis_feature_ipr(
                feature_id, analysis_id, protein)
            if not analysisfeature_id:
                continue

            # parse the xml
            ipr_array = self._parse_feature_xml(protein, feature_id)
            ipr_terms = ipr_array['iprterms']
            # Add IPR terms
            self._load_ipr_terms(ipr_terms, feature_id, analysis_id,
                                 skip_missing)

            if parse_go and go_db_id:
                self._load_go_terms(ipr_array["goterms"], feature_id,
                                    analysis_id, go_db_id, skip_missing)
        return total_count
예제 #7
0
    def _create_biomaterial(self, biomaterial_name, organism_id, analysis_id=None,
                            biosourceprovider_id=None, dbxref_id=None, description=None):

        # Check if biomaterial exist
        res_biomaterial = self.session.query(self.model.biomaterial).filter_by(name=biomaterial_name)
        biomaterial_id = ""
        if res_biomaterial.count():
            biomaterial_id = res_biomaterial.one().biomaterial_id
            # Do not update if not set and existing in DB
            if not description:
                description = res_biomaterial.one().description
            if not dbxref_id:
                dbxref_id = res_biomaterial.one().dbxref_id
            if not biosourceprovider_id:
                res_biomaterial.one().biosourceprovider_id

        analysis_name = ""
        if analysis_id:
            res_analysis = self.session.query(self.model.analysis).filter_by(analysis_id=analysis_id)
            if res_analysis.count():
                analysis_name = res_analysis.one().name
            else:
                warn("Analysis not found: will ignore")

        if (not description and analysis_name):
            description = 'This biomaterial: ' + biomaterial_name + ', was created for the analysis: ' + analysis_name

        if not biomaterial_id:
            biomat = self.model.biomaterial()
            biomat.name = biomaterial_name
            biomat.description = description
            biomat.taxon_id = organism_id
            biomat.biosourceprovider_id = biosourceprovider_id
            biomat.dbxref_id = dbxref_id
            self.session.add(biomat)
            self.session.flush()
            self.session.refresh(biomat)
            biomaterial_id = biomat.biomaterial_id
        else:
            self.session.query(self.model.biomaterial).filter_by(biomaterial_id=biomaterial_id).update({
                'description': description,
                'biosourceprovider_id': biosourceprovider_id,
                'dbxref_id': dbxref_id
            })

        return biomaterial_id
예제 #8
0
def cli(ctx, url=None, api_key=None, admin=False, **kwds):
    """Help initialize global configuration (in home directory)
    """

    click.echo("""Welcome to Chado's Chakin! (茶巾)""")
    if os.path.exists(config.global_config_path()):
        info("Your chakin configuration already exists. Please edit it instead: %s" % config.global_config_path())
        return 0

    while True:
        # Check environment
        dbhost = click.prompt("PGHOST")
        dbname = click.prompt("PGDATABASE")
        dbuser = click.prompt("PGUSER")
        dbpass = click.prompt("PGPASS", hide_input=True)
        dbport = click.prompt("PGPORT")
        schema = click.prompt("PGSCHEMA")

        info("Testing connection...")
        try:
            instance = ChadoInstance(dbhost=dbhost, dbname=dbname, dbuser=dbuser, dbpass=dbpass, dbport=dbport, dbschema=schema)
            # We do a connection test during startup.
            info("Ok! Everything looks good.")
            break
        except Exception as e:
            warn("Error, we could not access the configuration data for your instance: %s", e)
            should_break = click.prompt("Continue despite inability to contact this instance? [y/n]")
            if should_break in ('Y', 'y'):
                break

    config_path = config.global_config_path()
    if os.path.exists(config_path):
        warn("File %s already exists, refusing to overwrite." % config_path)
        return -1

    with open(config_path, "w") as f:
        f.write(CONFIG_TEMPLATE % {
            'dbhost': dbhost,
            'dbname': dbname,
            'dbuser': dbuser,
            'dbpass': dbpass,
            'dbport': dbport,
            'schema': schema,
        })
        info(SUCCESS_MESSAGE)
예제 #9
0
    def _add_target(self, feat, target_str):

        target = target_str.split(' ')
        if len(target) != 3 and len(target) != 4:
            warn('Malformed Target value: {}, skipping'.format(target_str))
            return

        strand = 1
        if len(target) == 4:
            if target[3] == '+':
                strand = 1
            elif target[3] == '-':
                strand = -1
            else:
                warn('Malformed Target value (bad strand): {}, skipping'.format(target_str))
                return

        landmark_str = target[0]
        landmark = None
        start = int(target[1])
        end = int(target[2])
        rank = 0
        if feat in self._featureloc_cache:
            rank = len(self._featureloc_cache[feat])

        for x in self._feature_cache:
            if x[0] == landmark_str:
                landmark = self._feature_cache[x]['feature_id']
                break

        if landmark is None:
            warn('Malformed Target value (unknown target): {}, skipping'.format(target_str))
            return

        self._do_add_featureloc(landmark, feat, rank, start, end, strand)
예제 #10
0
    def go(self, input, organism_id, analysis_id, query_type='polypeptide', match_on_name=False,
           name_column=2, go_column=5, re_name=None, skip_missing=False):
        """
        Load GO annotation from a tabular file, in the same way as does the tripal_analysis_go module

        :type input: str
        :param input: Path to the input tabular file to load

        :type organism_id: int
        :param organism_id: Organism ID

        :type analysis_id: int
        :param analysis_id: Analysis ID

        :type query_type: str
        :param query_type: The feature type (e.g. \'gene\', \'mRNA\', 'polypeptide', \'contig\') of the query. It must be a valid Sequence Ontology term.

        :type match_on_name: bool
        :param match_on_name: Match features using their name instead of their uniquename

        :type name_column: int
        :param name_column: Column containing the feature identifiers (2, 3, 10 or 11; default=2).

        :type go_column: int
        :param go_column: Column containing the GO id (default=5).

        :type re_name: str
        :param re_name: Regular expression to extract the feature name from the input file (first capturing group will be used).

        :type skip_missing: bool
        :param skip_missing: Skip lines with unknown features or GO id instead of aborting everything.

        :rtype: dict
        :return: Number of inserted GO terms
        """

        if analysis_id and len(self.ci.analysis.get_analyses(analysis_id=analysis_id)) != 1:
            raise Exception("Could not find analysis with id '{}'".format(analysis_id))

        if len(self.ci.organism.get_organisms(organism_id=organism_id)) != 1:
            raise Exception("Could not find organism with id '{}'".format(organism_id))

        seqterm = self.ci.get_cvterm_id(query_type, 'sequence')

        # Cache all possibly existing features
        self._reset_cache()
        self._init_feature_cache(organism_id, seqterm, match_on_name)

        # Cache analysisfeature content for given analysis_id
        self._init_analysisfeature_cache(analysis_id)

        self._init_featcvterm_cache()

        # Cache all existing cvterms from GO cv
        db = 'GO'
        self.ci._preload_dbxref2cvterms(db)

        count_ins = 0

        # Parse the tab file
        with open(input) as in_gaf:
            rd = csv.reader(in_gaf, delimiter=str("\t"))
            for row in rd:
                if row[0] and row[0][0] in ('!', '#'):
                    # skip header
                    continue

                term = row[go_column - 1]
                term_sp = term.split(':')
                if len(term_sp) != 2:
                    raise Exception('Malformed term "%s"' % term)
                term_db = term_sp[0]
                term_acc = term_sp[1]

                feat_id = row[name_column - 1]

                feat_id = self._match_feature(feat_id, re_name, query_type, organism_id, skip_missing)
                if skip_missing and feat_id is None:
                    continue

                try:
                    term_id = self.ci.get_cvterm_id(term_acc, term_db)
                except chado.RecordNotFoundError:
                    term_id = None

                if not term_id:
                    if skip_missing:
                        warn('Could not find term with name "%s", skipping it', term_acc)
                        continue
                    else:
                        raise Exception('Could not find term with name "%s"' % term_acc)

                # Add feature<->cvterm association
                self._add_feat_cvterm_with_id(feat_id, term_id)

                # Associate the feature to the analysis
                self._add_analysis_feature(feat_id, analysis_id, term_id, term)

                count_ins += 1

        self.session.commit()

        self._reset_cache()

        return {'inserted': count_ins}
예제 #11
0
    def _load_gff_feature_with_children(self, rec, f, analysis_id, organism_id, re_protein_capture, re_protein, protein_id_attr, parent=None, no_seq_compute=False):

        # Be tolerant for proteins (shameless hard coding)
        if f.type == 'protein':
            f.type = 'polypeptide'

        if f.type in self._blacklisted_cvterms:
            if 'ID' in f.qualifiers and len(f.qualifiers['ID']) > 1:
                warn("WARNING: skipping feature %s of unknown type %s" % (f.qualifiers['ID'][0], f.type))
            else:
                warn("WARNING: skipping feature of unknown type %s" % (f.type))
            return

        full_transcript_seq = None
        if f.type == 'mRNA':
            seq_exons = []
            seq_cds = []
            min_cds = None
            max_cds = None

            detected_protein_id = None
            if protein_id_attr:
                if protein_id_attr in f.qualifiers and f.qualifiers[protein_id_attr]:
                    detected_protein_id = f.qualifiers[protein_id_attr][0]

            # To compute mRNA and polypeptide
            for subrna in f.sub_features:
                if subrna.type == 'CDS':
                    seq_cds.append(rec.seq[subrna.location.nofuzzy_start:subrna.location.nofuzzy_end])

                    if min_cds is None or subrna.location.start < min_cds:
                        min_cds = subrna.location.start
                    if max_cds is None or subrna.location.end > max_cds:
                        max_cds = subrna.location.end

                    if protein_id_attr and not detected_protein_id:
                        if protein_id_attr in subrna.qualifiers and subrna.qualifiers[protein_id_attr]:
                            detected_protein_id = subrna.qualifiers[protein_id_attr][0]
                if subrna.type == 'exon':
                    seq_exons.append(rec.seq[subrna.location.nofuzzy_start:subrna.location.nofuzzy_end])

            if not no_seq_compute and len(rec.seq) > 0 and str(rec.seq)[0:10] != "??????????":
                if seq_exons:
                    full_transcript_seq = reduce(operator.add, seq_exons)
                elif seq_cds:
                    full_transcript_seq = reduce(operator.add, seq_cds)
                if f.strand == -1:
                    full_transcript_seq = full_transcript_seq.reverse_complement()

        if full_transcript_seq is not None:
            added_feat = self._add_feature_with_attr(rec, f, analysis_id, organism_id, residues=str(full_transcript_seq), parent=parent)
        else:
            added_feat = self._add_feature_with_attr(rec, f, analysis_id, organism_id, parent=parent)

        mrna_has_polypeptide = False
        for subf in f.sub_features:

            self._load_gff_feature_with_children(rec, subf, analysis_id, organism_id, re_protein_capture, re_protein, protein_id_attr, parent=added_feat['feature_id'], no_seq_compute=no_seq_compute)

            if f.type == 'mRNA':
                mrna_has_polypeptide = mrna_has_polypeptide or (subf.type == 'polypeptide')

        # Create a polypeptide feature
        if f.type == 'mRNA' and not mrna_has_polypeptide and min_cds is not None and max_cds is not None:

            if re_protein:
                pep_uname = re.sub(re_protein_capture, re_protein, added_feat['uniquename'])
            elif detected_protein_id:
                pep_uname = detected_protein_id
            else:
                pep_uname = added_feat['uniquename'] + '-protein'
            polypeptide = SeqFeature(FeatureLocation(min_cds, max_cds), type="polypeptide", strand=f.location.strand, qualifiers={'ID': [pep_uname], 'Name': [added_feat['name']]})
            if 'source' in subrna.qualifiers:
                polypeptide.qualifiers['source'] = subrna.qualifiers['source']

            protein_seq = None
            if not no_seq_compute and len(rec.seq) > 0 and str(rec.seq)[0:10] != "??????????":
                full_cds_seq = reduce(operator.add, seq_cds)
                if f.strand == -1:
                    full_cds_seq = full_cds_seq.reverse_complement()
                protein_seq = str(full_cds_seq.translate())

            self._add_feature_with_attr(rec, polypeptide, analysis_id, organism_id, residues=protein_seq, parent=added_feat['feature_id'], parent_rel='derives_from')
예제 #12
0
    def load_gff(self, gff, analysis_id, organism_id, landmark_type=None, re_protein=None, re_protein_capture="^(.*?)$", fasta=None, no_seq_compute=False, quiet=False, add_only=False, protein_id_attr=None):
        """
        Load features from a gff file

        :type gff: str
        :param gff: Path to the Fasta file to load

        :type analysis_id: int
        :param analysis_id: Analysis ID

        :type organism_id: int
        :param organism_id: Organism ID

        :type landmark_type: str
        :param landmark_type: Type of the landmarks (will speed up loading if provided, e.g. contig, should be a term of the Sequence ontology)

        :type re_protein: str
        :param re_protein: Replacement string for the protein name using capturing groups defined by --re_protein_capture

        :type re_protein_capture: str
        :param re_protein_capture: Regular expression to capture groups in mRNA name to use in --re_protein (e.g. "^(.*?)-R([A-Z]+)$", default="^(.*?)$")

        :type protein_id_attr: str
        :param protein_id_attr: Attribute containing the protein uniquename. It is searched at the mRNA level, and if not found at CDS level.

        :type fasta: str
        :param fasta: Path to a Fasta containing sequences for some features. When creating a feature, if its sequence is in this fasta file it will be loaded. Otherwise for mRNA and polypeptides it will be computed from the genome sequence (if available), otherwise it will be left empty.

        :type no_seq_compute: bool
        :param no_seq_compute: Disable the computation of mRNA and polypeptides sequences based on genome sequence and positions.

        :type quiet: bool
        :param quiet: Hide progress information

        :type add_only: bool
        :param add_only: Use this flag if you're not updating existing features, but just adding new features to the selected analysis and organism. It will speedup loading, and reduce memory usage, but might produce errors in case of already existing feature.

        :rtype: None
        :return: None
        """

        if len(self.ci.analysis.get_analyses(analysis_id=analysis_id)) != 1:
            raise Exception("Could not find analysis with id '{}'".format(analysis_id))

        if len(self.ci.organism.get_organisms(organism_id=organism_id)) != 1:
            raise Exception("Could not find organism with id '{}'".format(organism_id))

        if protein_id_attr and re_protein:
            raise Exception("--protein_id_attr and --re_protein cannot be used at the same time.")

        self.cache_existing = not add_only

        # Get possible landmarks
        landmarks = self.session.query(self.model.feature.name, self.model.feature.uniquename, self.model.feature.feature_id, self.model.feature.type_id, self.model.feature.organism_id) \
            .filter_by(organism_id=organism_id)
        if landmark_type:
            # Filter by landmark type if provided (else we look for all features)
            landmark_type_id = self.ci.get_cvterm_id(landmark_type, 'sequence')
            landmarks = landmarks.filter(self.model.feature.type_id == landmark_type_id)

        self._landmark_cache = {}
        for lm in landmarks:
            if lm.name not in self._landmark_cache:
                self._landmark_cache[lm.name] = []
            if lm.feature_id not in self._landmark_cache[lm.name]:
                self._landmark_cache[lm.name].append(lm.feature_id)  # There may be multiple landmarks with the same name

            # Also look for uniquename
            if lm.uniquename not in self._landmark_cache:
                self._landmark_cache[lm.uniquename] = []
            if lm.feature_id not in self._landmark_cache[lm.uniquename]:
                self._landmark_cache[lm.uniquename].append(lm.feature_id)

        examiner = GFF.GFFExaminer()
        gff_handle = open(gff)
        gff_limits = examiner.available_limits(gff_handle)
        gff_handle.close()

        # Check that we have all the cvterms in the db
        self._blacklisted_cvterms = []
        for feat_type in gff_limits['gff_type']:
            type_to_check = feat_type[0]
            # Be tolerant for proteins (shameless hard coding)
            if type_to_check == 'protein':
                type_to_check = 'polypeptide'

            # Will raise an exception if not present + keep value in cache
            try:
                self.ci.get_cvterm_id(type_to_check, 'sequence', True)
            except chado.RecordNotFoundError:
                if type_to_check not in self._blacklisted_cvterms:
                    warn("WARNING: will skip features of unknown type: %s", type_to_check)
                    self._blacklisted_cvterms.append(type_to_check)

        # Read optional fasta file
        self._fasta_sequence_cache = {}
        if fasta:
            for record in SeqIO.parse(fasta, "fasta"):
                self._fasta_sequence_cache[record.id] = str(record.seq)

        # Check that all landmarks are there
        for seq_id in gff_limits['gff_id']:
            seq_id = seq_id[0]
            if seq_id not in self._landmark_cache:
                if landmark_type:
                    # Landmark does not exist yet, but we know how to create it
                    lm = SeqFeature(FeatureLocation(0, 1), type=landmark_type, qualifiers={'ID': [seq_id], 'Name': [seq_id]})
                    if seq_id in self._fasta_sequence_cache:
                        added_feat = self._add_feature_with_attr(None, lm, analysis_id, organism_id, have_loc=False, residues=self._fasta_sequence_cache[seq_id])
                    else:
                        added_feat = self._add_feature_with_attr(None, lm, analysis_id, organism_id, have_loc=False)
                    self._landmark_cache[seq_id] = [added_feat['feature_id']]
                else:
                    raise Exception("Could not find landmark named '{}', add --landmark_type to create it".format(seq_id))
            elif len(self._landmark_cache[seq_id]) > 1:
                raise Exception("Found {} landmarks with same name '{}'".format(len(self._landmark_cache[seq_id]), seq_id))

        count_ins = 0

        for rec in GFF.parse(gff):

            # Preload landmark seq to compute some seqs on it
            # We compare to ????... as the gff parser will populate rec.seq with a fake sequence based on the size from "sequence-region" header
            if not no_seq_compute:
                if rec.id in self._fasta_sequence_cache:
                    rec.seq = Seq.Seq(self._fasta_sequence_cache[rec.id])
                    del self._fasta_sequence_cache[rec.id]  # Save a little memory
                elif len(rec.seq) == 0 or str(rec.seq)[0:10] == "??????????":
                    seq_res = self.session.query(self.model.feature.residues) \
                        .filter(self.model.feature.uniquename == rec.id)

                    if landmark_type:
                        seq_res = seq_res.filter(self.model.feature.type_id == landmark_type_id)

                    seq_res = seq_res.all()

                    if len(seq_res) == 1 and seq_res[0].residues:
                        rec.seq = Seq.Seq(seq_res[0].residues)

            # Set a custom attr to store the chado feature_id
            rec._chado_feature_id = self._landmark_cache[rec.id][0]
            if not quiet:
                print("Loading features on {}".format(rec.id))

            for f in rec.features:

                self._load_gff_feature_with_children(rec, f, analysis_id, organism_id, re_protein_capture, re_protein, protein_id_attr, no_seq_compute=no_seq_compute)
                count_ins += 1

                if not quiet:
                    print("Inserted feature #{}".format(count_ins))

        self._update_rel_ranks()

        self.session.commit()

        self._reset_cache()

        return {'inserted': count_ins}