records = krbioio.read_sequence_file(input_file, 'gb', ret_type='list') # print('Found', len(records), 'in', input_file) excluded_qualifiers = ['translation', 'db_xref', 'exception', 'rpt_unit_seq', 'gene_synonym', 'rpt_type', 'satellite', 'transl_table', 'replace', 'rpt_unit_range', 'protein_id', 'codon_recognized', 'EC_number', 'function', 'estimated_length', 'mobile_element_type', 'codon_start', 'transl_except', 'number', 'standard_name', 'allele', 'inference'] feature_dict = dict() taxa_dict = dict() for record in records: txid = krncbi.get_ncbi_tax_id_for_record(record) for feature in record.features: if feature.type != 'source': for qualifier in feature.qualifiers: if qualifier not in excluded_qualifiers: key = feature.type + '.' + qualifier qualifier_label = feature.qualifiers[qualifier][0] qualifier_label_key = key + '.' + qualifier_label if key not in feature_dict.keys(): feature_dict[key] = list() if qualifier_label_key not in taxa_dict.keys(): taxa_dict[qualifier_label_key] = list() feature_dict[key].append(qualifier_label) taxa_dict[qualifier_label_key].append(txid) # print(key) # print(feature_dict[key])