Ejemplo n.º 1
0
 def __init__(self, **kwargs):
     self.tree_lineage_names = None
     self.config_lineage_names = None
     self.is_case_sensitive = kwargs.pop("is_case_sensitive", False)
     self.is_fail_on_extra_tree_lineages = kwargs.pop(
         "is_fail_on_extra_tree_lineages", True)
     self.is_fail_on_extra_configuration_lineages = kwargs.pop(
         "is_fail_on_extra_configuration_lineages", True)
     self.logger = kwargs.pop("logger")
     self.original_to_normalized_lineage_name_map = {}
     self.config_name_normalization_report = {}
     self.preanalysis_constrained_species_lineages_map = {}
     if self.is_case_sensitive:
         self.normalized_tree_lineage_names = {}
         self.normalized_config_lineage_names = {}
         self.species_names = {}
         self.preanalysis_constrained_lineage_species_map = {}
     else:
         self.normalized_tree_lineage_names = OrderedCaselessDict()
         self.normalized_config_lineage_names = OrderedCaselessDict()
         self.normalized_species_names = OrderedCaselessDict()
         self.preanalysis_constrained_lineage_species_map = OrderedCaselessDict(
         )
     self.extra_tree_lineage_names = []
     self.extra_configuration_lineages = []
Ejemplo n.º 2
0
 def read_configuration_table_species(self,
         conf_lineage_species_map,
         conf_constrained_lineages):
     if self.is_case_sensitive:
         nccl = {}
     else:
         nccl = OrderedCaselessDict()
     for ln in conf_constrained_lineages:
         nccl[ln] = True
     for lineage_name in conf_lineage_species_map:
         if lineage_name not in nccl:
             continue
         species_name = conf_lineage_species_map[lineage_name]
         if species_name not in self.normalized_species_names:
             self.normalized_species_names[species_name] = species_name
         else:
             species_name = self.normalized_species_names[species_name]
         try:
             normalized_lineage_name = self.original_to_normalized_lineage_name_map[lineage_name]
         except KeyError:
             utility.error_exit(
                     msg="Lineage '{}' not defined (missing on tree?)".format(lineage_name),
                     logger=self.logger)
         if normalized_lineage_name in self.preanalysis_constrained_lineage_species_map:
             utility.error_exit(
                     msg="Duplicate lineage species assignment: '{}'".format(normalized_lineage_name),
                     logger=self.logger)
         self.preanalysis_constrained_lineage_species_map[normalized_lineage_name] = species_name
         try:
             self.preanalysis_constrained_species_lineages_map[species_name].add(normalized_lineage_name)
         except KeyError:
             self.preanalysis_constrained_species_lineages_map[species_name] = set([normalized_lineage_name])
     self.preanalysis_constrained_species_report()
Ejemplo n.º 3
0
 def __init__(self, citation=None):
     """
     Sets up internal dictionary of BibTeX fields, and initializes
     if argument is given.
     """
     self.bibtype = None
     self.citekey = None
     if isinstance(citation, BibTexEntry):
         self._entry_dict = OrderedCaselessDict(citation._entry_dict)
     elif isinstance(citation, dict):
         self._entry_dict = OrderedCaselessDict()
         for k, v in citation.items():
             self._entry_dict[k.lower()] = v
         self.bibtype = self._entry_dict.get("bibtype", None)
         self.citekey = self._entry_dict.get("citekey", None)
     else:
         self._entry_dict = OrderedCaselessDict()
         self.parse_text(citation)
Ejemplo n.º 4
0
 def __init__(self, citation=None):
     """
     Sets up internal dictionary of BibTeX fields, and initializes
     if argument is given.
     """
     self.bibtype = None
     self.citekey = None
     if isinstance(citation, BibTexEntry):
         self._entry_dict = OrderedCaselessDict(citation._entry_dict)
     elif isinstance(citation, dict):
         self._entry_dict = OrderedCaselessDict()
         for k, v in citation.items():
             self._entry_dict[k.lower()] = v
         self.bibtype = self._entry_dict.get("bibtype", None)
         self.citekey = self._entry_dict.get("citekey", None)
     else:
         self._entry_dict = OrderedCaselessDict()
         self.parse_text(citation)
Ejemplo n.º 5
0
class Registry(object):
    def __init__(self, **kwargs):
        self.tree_lineage_names = None
        self.config_lineage_names = None
        self.is_case_sensitive = kwargs.pop("is_case_sensitive", False)
        self.is_fail_on_extra_tree_lineages = kwargs.pop(
            "is_fail_on_extra_tree_lineages", True)
        self.is_fail_on_extra_configuration_lineages = kwargs.pop(
            "is_fail_on_extra_configuration_lineages", True)
        self.logger = kwargs.pop("logger")
        self.original_to_normalized_lineage_name_map = {}
        self.config_name_normalization_report = {}
        self.preanalysis_constrained_species_lineages_map = {}
        if self.is_case_sensitive:
            self.normalized_tree_lineage_names = {}
            self.normalized_config_lineage_names = {}
            self.species_names = {}
            self.preanalysis_constrained_lineage_species_map = {}
        else:
            self.normalized_tree_lineage_names = OrderedCaselessDict()
            self.normalized_config_lineage_names = OrderedCaselessDict()
            self.normalized_species_names = OrderedCaselessDict()
            self.preanalysis_constrained_lineage_species_map = OrderedCaselessDict(
            )
        self.extra_tree_lineage_names = []
        self.extra_configuration_lineages = []

    def normalize_lineage_names(self):
        tree_lineage_set = set(self.tree_lineage_names)
        # tree lineages give the canonical orthography
        for lineage in self.tree_lineage_names:
            self.normalized_tree_lineage_names[lineage] = lineage
            self.original_to_normalized_lineage_name_map[lineage] = lineage
        normalized_configuration_lineages = {}
        extra_configuration_lineages = set()
        for lineage in self.config_lineage_names:
            self.normalized_config_lineage_names[lineage] = lineage
            try:
                normalized_name = self.normalized_tree_lineage_names[lineage]
                self.original_to_normalized_lineage_name_map[
                    lineage] = normalized_name
                if normalized_name != lineage:
                    self.config_name_normalization_report[
                        lineage] = "(NORMALIZED TO: '{}')".format(
                            normalized_name)
                    normalized_configuration_lineages[
                        lineage] = normalized_name
                else:
                    self.config_name_normalization_report[lineage] = ""
            except KeyError as e:
                # This is a serious error: it means that the configuration file
                # has a taxon that is not on the tree. But we handle this issue
                # later so a full report can be shown
                self.config_name_normalization_report[
                    lineage] = "(NOT FOUND ON TREE)"
                extra_configuration_lineages.add(lineage)
        self.normalization_report(
            normalized_configuration_lineages=normalized_configuration_lineages,
            extra_configuration_lineages=extra_configuration_lineages)

    def read_configuration_table_species(self, conf_lineage_species_map,
                                         conf_constrained_lineages):
        if self.is_case_sensitive:
            nccl = {}
        else:
            nccl = OrderedCaselessDict()
        for ln in conf_constrained_lineages:
            nccl[ln] = True
        for lineage_name in conf_lineage_species_map:
            if lineage_name not in nccl:
                continue
            species_name = conf_lineage_species_map[lineage_name]
            if species_name not in self.normalized_species_names:
                self.normalized_species_names[species_name] = species_name
            else:
                species_name = self.normalized_species_names[species_name]
            try:
                normalized_lineage_name = self.original_to_normalized_lineage_name_map[
                    lineage_name]
            except KeyError:
                utility.error_exit(
                    msg="Lineage '{}' not defined (missing on tree?)".format(
                        lineage_name),
                    logger=self.logger)
            if normalized_lineage_name in self.preanalysis_constrained_lineage_species_map:
                utility.error_exit(
                    msg="Duplicate lineage species assignment: '{}'".format(
                        normalized_lineage_name),
                    logger=self.logger)
            self.preanalysis_constrained_lineage_species_map[
                normalized_lineage_name] = species_name
            try:
                self.preanalysis_constrained_species_lineages_map[
                    species_name].add(normalized_lineage_name)
            except KeyError:
                self.preanalysis_constrained_species_lineages_map[
                    species_name] = set([normalized_lineage_name])
        self.preanalysis_constrained_species_report()

    def compile_configuration_species_groupings(self,
                                                species_leafset_constraints):
        for spi, sp in enumerate(species_leafset_constraints):
            lineages = []
            species_name = "ConstrainedSp{:03d}".format(spi + 1)
            self.normalized_species_names[species_name] = species_name
            for lineage_name in sp:
                try:
                    normalized_lineage_name = self.original_to_normalized_lineage_name_map[
                        lineage_name]
                except KeyError:
                    utility.error_exit(
                        msg="Lineage '{}' not defined (missing on tree?)".
                        format(lineage_name),
                        logger=self.logger)
                self.preanalysis_constrained_lineage_species_map[
                    normalized_lineage_name] = species_name
                try:
                    self.preanalysis_constrained_species_lineages_map[
                        species_name].add(normalized_lineage_name)
                except KeyError:
                    self.preanalysis_constrained_species_lineages_map[
                        species_name] = set([normalized_lineage_name])
        self.preanalysis_constrained_species_report()

    def preanalysis_constrained_species_report(self):
        species_names = sorted(
            self.preanalysis_constrained_species_lineages_map.keys())
        num_lineages = [
            "({} lineages)".format(
                len(self.preanalysis_constrained_species_lineages_map[n]))
            for n in species_names
        ]
        stbl = utility.compose_table(columns=[
            species_names,
            num_lineages,
        ],
                                     prefixes=["", ""],
                                     quoted=[True, False],
                                     is_indexed=True,
                                     indent="    ")
        self.logger.info(
            "{} species defined in configuration constraints, with {} lineages assigned:\n{}"
            .format(
                len(species_names),
                len(self.preanalysis_constrained_lineage_species_map),
                stbl,
            ))
        constrained_lineages = sorted(
            self.preanalysis_constrained_lineage_species_map.keys(),
            key=lambda n:
            (self.preanalysis_constrained_lineage_species_map[n], n))
        species_assignments = [
            "(SPECIES: '{}')".format(
                self.preanalysis_constrained_lineage_species_map[n])
            for n in constrained_lineages
        ]
        lntbl = utility.compose_table(columns=[
            constrained_lineages,
            species_assignments,
        ],
                                      prefixes=["", ""],
                                      quoted=[True, False],
                                      is_indexed=True,
                                      indent="    ")
        self.logger.info(
            "{} out of {} lineages assigned by constraints to {} species:\n{}".
            format(
                len(constrained_lineages),
                len(self.tree_lineage_names),
                len(species_names),
                lntbl,
            ))
        unconstrained_lineages = sorted(
            n for n in self.tree_lineage_names
            if n not in self.preanalysis_constrained_lineage_species_map)
        lntbl = utility.compose_table(columns=[
            unconstrained_lineages,
        ],
                                      prefixes=[""],
                                      quoted=[True],
                                      is_indexed=True,
                                      indent="    ")
        self.logger.info(
            "{} out of {} lineages not constrained by species assignments:\n{}"
            .format(
                len(unconstrained_lineages),
                len(self.tree_lineage_names),
                lntbl,
            ))
        assert len(unconstrained_lineages) + len(constrained_lineages) == len(
            self.tree_lineage_names)

    def normalization_report(self, normalized_configuration_lineages,
                             extra_configuration_lineages):
        treetbl = utility.compose_table(columns=[
            self.tree_lineage_names,
            [
                "(NOT FOUND IN CONFIGURATION)"
                if lineage not in self.normalized_config_lineage_names else ""
                for lineage in self.tree_lineage_names
            ],
        ],
                                        prefixes=["", ""],
                                        quoted=[True, False],
                                        is_indexed=True,
                                        indent="    ")
        self.logger.info("{} lineages found on population tree:\n{}".format(
            len(self.tree_lineage_names),
            treetbl,
        ))
        if extra_configuration_lineages:
            cfntbl = utility.compose_table(columns=[
                self.config_lineage_names,
                [
                    self.config_name_normalization_report[n]
                    for n in self.config_lineage_names
                ]
            ],
                                           prefixes=["", ""],
                                           quoted=[True, False],
                                           is_indexed=True,
                                           indent="    ")
            self.logger.info(
                "{} lineages found in configuration file:\n{}".format(
                    len(self.config_lineage_names),
                    cfntbl,
                ))
        elif normalized_configuration_lineages:
            n1 = list(normalized_configuration_lineages.keys())
            n2 = [normalized_configuration_lineages[k] for k in n1]
            cfntbl = utility.compose_table(columns=[
                n1,
                n2,
            ],
                                           prefixes=["", "NORMALIZED TO: "],
                                           quoted=[True, True],
                                           is_indexed=True,
                                           indent="    ")
            self.logger.info(
                "{} lineages found in configuration file, with the following normalized for concordance with tree lineages:\n{}"
                .format(
                    len(self.config_lineage_names),
                    cfntbl,
                ))
        else:
            self.logger.info(
                "{} lineages found in configuration file fully concordant with tree lineages"
                .format(len(self.config_lineage_names), ))

    def validate_lineage_names(self):
        for lineage in self.config_lineage_names:
            if lineage not in self.normalized_tree_lineage_names:
                self.extra_configuration_lineages.append(lineage)
        for lineage in self.tree_lineage_names:
            if lineage not in self.normalized_config_lineage_names:
                self.extra_tree_lineage_names.append(lineage)
        if self.extra_tree_lineage_names:
            s1_error_msg = [
                "{}: {} lineages found on tree but not in configuration data:".
                format(
                    "ERROR"
                    if self.is_fail_on_extra_tree_lineages else "WARNING",
                    len(self.extra_tree_lineage_names))
            ]
            s1_error_msg.append(
                self.compose_name_list(self.extra_tree_lineage_names))
            s1_error_msg = "\n".join(s1_error_msg)
        else:
            s1_error_msg = ""

        if self.extra_configuration_lineages:
            s2_error_msg = [
                "{}: {} lineages found in configuration data but not on tree:".
                format(
                    "ERROR" if self.is_fail_on_extra_configuration_lineages
                    else "WARNING", len(self.extra_configuration_lineages))
            ]
            s2_error_msg.append(
                self.compose_name_list(self.extra_configuration_lineages))
            s2_error_msg = "\n".join(s2_error_msg)
        else:
            s2_error_msg = ""

        is_fail = []
        if self.extra_tree_lineage_names and self.is_fail_on_extra_tree_lineages:
            self.logger.error(s1_error_msg)
            is_fail.append("1")
        elif s1_error_msg:
            self.logger.warning(s1_error_msg)
        if self.extra_configuration_lineages and self.is_fail_on_extra_configuration_lineages:
            self.logger.error(s2_error_msg)
            is_fail.append("2")
        elif s2_error_msg:
            self.logger.warning(s2_error_msg)
        if is_fail:
            utility.error_exit(msg="Lineage identity errors found ({})".format(
                ", ".join(is_fail)),
                               logger=self.logger)

    def compose_name_list(self, names):
        s = utility.compose_table(columns=[names],
                                  prefixes=[""],
                                  quoted=[True],
                                  is_indexed=True,
                                  indent="    ")
        return s

    def compose_report(self):
        msg = []
        msg.append("{} terminal lineages on population tree".format(
            len(self.tree_lineage_names)))
        msg.append("{} lineages described in configuration file".format(
            len(self.config_lineage_names)))
Ejemplo n.º 6
0
class BibTexEntry(object):
    """
    Tracks a single BibTeX entry.
    """
    decompose_pattern = re.compile(r'^@(\w*)\s*{\s*([\w|\:|\-]*),(.*)}')
    # works, but misses last field
    field_pattern = re.compile(
        r'\s*([\w|\-]*?)\s*=\s*(.*?),(?=\s*[\w|\-]*\s*\=)')
    # get the last field
    last_field_pattern = re.compile(r'\s*([\w|\-]*?)\s*=\s*(.*?)\s*[,]*\s*$')

    def __init__(self, citation=None):
        """
        Sets up internal dictionary of BibTeX fields, and initializes
        if argument is given.
        """
        self.bibtype = None
        self.citekey = None
        if isinstance(citation, BibTexEntry):
            self._entry_dict = OrderedCaselessDict(citation._entry_dict)
        elif isinstance(citation, dict):
            self._entry_dict = OrderedCaselessDict()
            for k, v in citation.items():
                self._entry_dict[k.lower()] = v
            self.bibtype = self._entry_dict.get("bibtype", None)
            self.citekey = self._entry_dict.get("citekey", None)
        else:
            self._entry_dict = OrderedCaselessDict()
            self.parse_text(citation)

    def __getattr__(self, name):
        """
        Allows bibtex fields (and any additional ones) to be treated
        like object attributes.
        """
        entry_dict = self._get_entry_dict()
        if name == '_entry_dict' or name == '_BibTexEntry_entry_dict':
            return entry_dict
        elif name == '__dict__':
            return object.__getattribute__(self, '__dict__')
        elif name == 'bibtype' and hasattr(self, 'bibtype'):
            return object.__getattribute__(self, '__dict__')['bibtype']
        elif name == 'citekey' and hasattr(self, 'citekey'):
            return object.__getattribute__(self, '__dict__')['citekey']
        elif name in entry_dict:
            return entry_dict[name]
        elif name in BIBTEX_FIELDS:
            return ""
        else:
            raise AttributeError(name)

    def __setattr__(self, name, value):
        """
        Allows bibtex fields (and any additional ones) to be treated
        like object attributes.
        """
        entry_dict = self._get_entry_dict()
        if name == '_entry_dict' or name == '_BibTexEntry_entry_dict':
            entry_dict = value
        elif name == 'bibtype' or name == 'citekey':
            object.__setattr__(self, name, value)
        else:
            self._entry_dict[name] = value

    def __delattr__(self, name):
        """
        Allows bibtex fields (and any additional ones) to be treated
        like object attributes.
        """
        entry_dict = self._get_entry_dict()
        if name == '_entry_dict' or name == '_BibTexEntry_entry_dict':
            object.__delattr__(self, '_entry_dict')
        elif name in entry_dict:
            del (entry_dict[name])
        elif name in BIBTEX_FIELDS:
            pass
        elif name in object.__getattribute__(self, '__dict__'):
            object.__delattr__(name)
        else:
            raise AttributeError(name)

    def __str__(self):
        """
        String representation of self.
        """
        return self.as_bibtex()

    def __repr__(self):
        """
        Internal representation of self.
        """
        repr_dict = {}
        repr_dict['bibtype'] = self.bibtype
        repr_dict['citekey'] = self.citekey
        repr_dict.update(self.fields_as_dict())
        return repr_dict

    def _get_entry_dict(self):
        """
        Returns the internal field dictionary, creating it first if
        neccessary.
        """
        if not hasattr(self, '_entry_dict'):
            object.__setattr__(self, '_entry_dict', {})
        return object.__getattribute__(self, '_entry_dict')

    def _get_fields(self):
        """
        Returns list of populated fields in order (does not include
        bibtype and citekey).
        """
        fields = []
        for field in BIBTEX_FIELDS:
            if field in self._entry_dict:
                fields.append(field)
        for key in self._entry_dict:
            if key not in fields:
                fields.append(key)
        return fields

    fields = property(_get_fields)

    def parse_text(self, text):
        """
        Parses a BibTeX text entry.
        """
        text = text.replace("\n", "")
        self.bibtype = None
        self.citekey = None
        text = text.strip()
        decompose_match = self.decompose_pattern.match(text)
        try:
            self.bibtype = decompose_match.group(1)
        except AttributeError as exception:
            raise ValueError("Failed to parse bibtype: {}".format(text))
        try:
            self.citekey = decompose_match.group(2)
        except AttributeError as exception:
            raise ValueError("Failed to parse citekey: {}".format(text))
        remaining = decompose_match.group(3)
        field_match = self.field_pattern.match(remaining)
        while field_match:
            field_match = self.field_pattern.match(remaining)
            if field_match:
                field_name = field_match.group(1).lower()
                field_value = _clean_parsed_text(field_match.group(2))
                self._entry_dict[field_name] = field_value
                remaining = remaining.replace(field_match.group(), '')
        if remaining:
            last_field_match = self.last_field_pattern.match(remaining)
        if last_field_match:
            field_name = last_field_match.group(1).lower()
            field_value = _clean_parsed_text(last_field_match.group(2))
            self._entry_dict[field_name] = field_value

    def fields_as_dict(self):
        """
        Returns the fields (i.e., all public attributes except for
        bibtype and citekey as a dictionary).
        """
        return dict(self._entry_dict)

    def as_bibtex(self, wrap_width=78):
        """
        Composes entry in BibTex format.
        """
        entry = []
        sep = "  =  "
        entry.append('@{}{{},'.format((self.bibtype, self.citekey)))
        fields = self.fields
        #         maxlen = max([len(field) for field in fields])
        maxlen = max([len(field) for field in BIBTEX_FIELDS])
        for field in fields:
            if field != 'url':
                wrap = True
            else:
                wrap = False
            field_header = field.ljust(maxlen)
            field_value = _format_bibtex_value(self._entry_dict[field],
                                               wrap=wrap,
                                               width=wrap_width - maxlen -
                                               len(sep) + 2,
                                               col_start=maxlen + len(sep) + 2)
            entry.append("  {}{}{},".format((field_header, sep, field_value)))
        entry.append('}')
        return '\n'.join(entry)

    def as_compact_bibtex(self):
        """
        Composes entry in BibTex format.
        """
        entry = []
        entry.append('@{}{{{},'.format((self.bibtype, self.citekey)))
        fields = self.fields
        for field in fields:
            field_value = _format_bibtex_value(self._entry_dict[field],
                                               wrap=False,
                                               width=None,
                                               col_start=1)
            entry.append("{}={},".format((field, field_value)))
        entry.append('}')
        return ''.join(entry)
Ejemplo n.º 7
0
class BibTexEntry(object):
    """
    Tracks a single BibTeX entry.
    """
    decompose_pattern = re.compile(r'^@(\w*)\s*{\s*([\w|\:|\-]*),(.*)}')
    # works, but misses last field
    field_pattern = re.compile(r'\s*([\w|\-]*?)\s*=\s*(.*?),(?=\s*[\w|\-]*\s*\=)')
    # get the last field
    last_field_pattern = re.compile(r'\s*([\w|\-]*?)\s*=\s*(.*?)\s*[,]*\s*$')

    def __init__(self, citation=None):
        """
        Sets up internal dictionary of BibTeX fields, and initializes
        if argument is given.
        """
        self.bibtype = None
        self.citekey = None
        if isinstance(citation, BibTexEntry):
            self._entry_dict = OrderedCaselessDict(citation._entry_dict)
        elif isinstance(citation, dict):
            self._entry_dict = OrderedCaselessDict()
            for k, v in citation.items():
                self._entry_dict[k.lower()] = v
            self.bibtype = self._entry_dict.get("bibtype", None)
            self.citekey = self._entry_dict.get("citekey", None)
        else:
            self._entry_dict = OrderedCaselessDict()
            self.parse_text(citation)

    def __getattr__(self, name):
        """
        Allows bibtex fields (and any additional ones) to be treated
        like object attributes.
        """
        entry_dict = self._get_entry_dict()
        if name == '_entry_dict' or name == '_BibTexEntry_entry_dict':
            return entry_dict
        elif name == '__dict__':
            return object.__getattribute__(self, '__dict__')
        elif name == 'bibtype' and hasattr(self, 'bibtype'):
            return object.__getattribute__(self, '__dict__')['bibtype']
        elif name == 'citekey' and hasattr(self, 'citekey'):
            return object.__getattribute__(self, '__dict__')['citekey']
        elif name in entry_dict:
            return entry_dict[name]
        elif name in BIBTEX_FIELDS:
            return ""
        else:
            raise AttributeError(name)

    def __setattr__(self, name, value):
        """
        Allows bibtex fields (and any additional ones) to be treated
        like object attributes.
        """
        entry_dict = self._get_entry_dict()
        if name == '_entry_dict' or name == '_BibTexEntry_entry_dict':
            entry_dict = value
        elif name == 'bibtype' or name == 'citekey':
            object.__setattr__(self, name, value)
        else:
            self._entry_dict[name] = value

    def __delattr__(self, name):
        """
        Allows bibtex fields (and any additional ones) to be treated
        like object attributes.
        """
        entry_dict = self._get_entry_dict()
        if name == '_entry_dict' or name == '_BibTexEntry_entry_dict':
            object.__delattr__(self, '_entry_dict')
        elif name in entry_dict:
            del(entry_dict[name])
        elif name in BIBTEX_FIELDS:
            pass
        elif name in object.__getattribute__(self, '__dict__'):
            object.__delattr__(name)
        else:
            raise AttributeError(name)

    def __str__(self):
        """
        String representation of self.
        """
        return self.as_bibtex()

    def __repr__(self):
        """
        Internal representation of self.
        """
        repr_dict = {}
        repr_dict['bibtype'] = self.bibtype
        repr_dict['citekey'] = self.citekey
        repr_dict.update(self.fields_as_dict())
        return repr_dict

    def _get_entry_dict(self):
        """
        Returns the internal field dictionary, creating it first if
        neccessary.
        """
        if not hasattr(self, '_entry_dict'):
            object.__setattr__(self, '_entry_dict', {})
        return object.__getattribute__(self, '_entry_dict')

    def _get_fields(self):
        """
        Returns list of populated fields in order (does not include
        bibtype and citekey).
        """
        fields = []
        for field in BIBTEX_FIELDS:
            if field in self._entry_dict:
                fields.append(field)
        for key in self._entry_dict:
            if key not in fields:
                fields.append(key)
        return fields

    fields = property(_get_fields)

    def parse_text(self, text):
        """
        Parses a BibTeX text entry.
        """
        text = text.replace("\n", "")
        self.bibtype = None
        self.citekey = None
        text = text.strip()
        decompose_match = self.decompose_pattern.match(text)
        try:
            self.bibtype = decompose_match.group(1)
        except AttributeError as exception:
            raise ValueError("Failed to parse bibtype: {}".format(text))
        try:
            self.citekey = decompose_match.group(2)
        except AttributeError as exception:
            raise ValueError("Failed to parse citekey: {}".format(text))
        remaining = decompose_match.group(3)
        field_match = self.field_pattern.match(remaining)
        while field_match:
            field_match = self.field_pattern.match(remaining)
            if field_match:
                field_name = field_match.group(1).lower()
                field_value = _clean_parsed_text(field_match.group(2))
                self._entry_dict[field_name] = field_value
                remaining = remaining.replace(field_match.group(), '')
        if remaining:
            last_field_match = self.last_field_pattern.match(remaining)
        if last_field_match:
            field_name = last_field_match.group(1).lower()
            field_value = _clean_parsed_text(last_field_match.group(2))
            self._entry_dict[field_name] = field_value

    def fields_as_dict(self):
        """
        Returns the fields (i.e., all public attributes except for
        bibtype and citekey as a dictionary).
        """
        return dict(self._entry_dict)

    def as_bibtex(self, wrap_width=78):
        """
        Composes entry in BibTex format.
        """
        entry = []
        sep = "  =  "
        entry.append('@{}{{},'.format((self.bibtype, self.citekey)))
        fields = self.fields
#         maxlen = max([len(field) for field in fields])
        maxlen = max([len(field) for field in BIBTEX_FIELDS])
        for field in fields:
            if field != 'url':
                wrap = True
            else:
                wrap = False
            field_header = field.ljust(maxlen)
            field_value = _format_bibtex_value(self._entry_dict[field],
                                      wrap=wrap,
                                      width = wrap_width - maxlen - len(sep) + 2,
                                      col_start = maxlen + len(sep) + 2 )
            entry.append("  {}{}{},".format((field_header, sep, field_value)))
        entry.append('}')
        return '\n'.join(entry)

    def as_compact_bibtex(self):
        """
        Composes entry in BibTex format.
        """
        entry = []
        entry.append('@{}{{{},'.format((self.bibtype, self.citekey)))
        fields = self.fields
        for field in fields:
            field_value = _format_bibtex_value(self._entry_dict[field],
                                      wrap=False,
                                      width=None,
                                      col_start=1)
            entry.append("{}={},".format((field, field_value)))
        entry.append('}')
        return ''.join(entry)