Ejemplo n.º 1
0
    def parse(self, buff, delimiter=DELIMITER, separator=SEPARATOR):
        """
        Parse a FIX message. The FIX message is expected to be a bytestring and the output
        is a dictionary-like object which type is determined by the ``fragment_class`` constructor argument
        and which keys are ``int`` and values ``unicode``. Note that if there is a non-int tag in the message,
        this will be stored as a key in the original format (i.e. bytestring)

        :param buff: Buffer to parse
        :type buff:  ``bytestr`` or ``unicode``
        :param delimiter: A character that separate key and values inside the FIX message. Generally '='. Note the type:
          because of the way the buffer is tokenised, this needs to be unicode (or ``str`` in python 2.7*).
        :type delimiter: ``unicode``
        :param separator: A character that separate key+value pairs inside the FIX message. Generally '\1'. See type
          observations above.
        :type separator: ``unicode``
        """
        def pushback_generator(iterator):
            """
            Generator which allows to push back a previously picked item
            for example:
            gen = pushback_generator(range(10))
            print next(gen)
            print next(gen)
            v = next(gen)
            print v
            gen.send(v)
            print next(gen)
            :param iterator:
            :return:
            """
            for value in iterator:
                back = yield value
                if back is not None:
                    yield back
                    yield back

        assert not (delimiter.isalnum() or separator.isalnum())

        encoding, encoding_347 = self.encoding, None
        input_in_unicode = False
        msg_type = None

        if isinstance(buff, six.text_type):
            input_in_unicode = True
            custom_r = re.compile(
                six.ensure_text(FIX_REGEX_STRING.format(
                    d=re.escape(delimiter), s=re.escape(separator)),
                                encoding='ascii'), re.DOTALL)
            if self.encoding is not None:
                encoding = None  # No need to decode
                warnings.warn(
                    'Processing a unicode message and ignore the argument "decode_as={}"'
                    .format(self.encoding))
            if self.decode_all_as_347:
                warnings.warn(
                    'Processing a unicode message and ignore the argument "decode_all_as_347={}"'
                    .format(self.decode_all_as_347))
        elif isinstance(buff, bytes):
            custom_r = re.compile(
                six.ensure_binary(FIX_REGEX_STRING.format(
                    d=re.escape(delimiter), s=re.escape(separator)),
                                  encoding='ascii'), re.DOTALL)
        else:
            raise ValueError('Unsupported type of input: {}'.format(
                type(buff)))

        tagvals = custom_r.findall(buff)

        if not self._no_groups and self.spec is not None:
            for i in range(4):
                if tagvals[i][0] in (b'35', u'35'):
                    msg_type = self.spec.msg_types.get(tagvals[i][1])

        if not input_in_unicode:
            for tag, val in tagvals:
                if int_or_str(tag) == 347:
                    encoding_347 = six.ensure_str(val)
                    break
                if six.ensure_str(
                        tag
                ) not in HEADER_TAGS_SET:  # already enter the message body
                    break

        if self.decode_all_as_347 and encoding_347:
            tagvals = ((int_or_str(tval[0], encoding_347),
                        six.ensure_text(tval[1], encoding_347))
                       for tval in tagvals)
        elif encoding:
            tagvals = ((int_or_str(tval[0], encoding),
                        six.ensure_text(
                            tval[1],
                            (encoding_347 if encoding_347 and tval[0].decode()
                             in ENCODED_TAG_SET else encoding)))
                       for tval in tagvals)
        elif not input_in_unicode and six.PY3:
            tagvals = ((int_or_str(tval[0], 'ascii'),
                        six.ensure_text(
                            tval[1],
                            (encoding_347 if encoding_347 and tval[0].decode()
                             in ENCODED_TAG_SET else 'UTF-8')))
                       for tval in tagvals)
        elif input_in_unicode and six.PY2:
            tagvals = ((int_or_str(six.ensure_binary(tval[0]), 'ascii'),
                        six.ensure_binary(
                            tval[1],
                            (encoding_347 if encoding_347 and tval[0].encode()
                             in ENCODED_TAG_SET else 'UTF-8')))
                       for tval in tagvals)
        else:
            tagvals = ((int_or_str(tval[0]), tval[1]) for tval in tagvals)

        if self._no_groups or self.spec is None or msg_type is None:
            # no groups can be found without a spec, so no point looking up the msg type.
            return self._frg_class(tagvals)
        msg = self._frg_class()
        groups = msg_type.groups
        tagvals = pushback_generator(tagvals)
        for tag, value in tagvals:
            if tag not in groups:
                msg[tag] = value
            else:
                if value in (b'0', u'0'):
                    msg[tag] = RepeatingGroup.create_repeating_group(tag)
                else:
                    contents, last_tagval = self._process_group(
                        tag, tagvals, msg_type=msg_type, group=groups[tag])
                    msg[tag] = contents
                    if last_tagval:
                        tagvals.send(last_tagval)
        return msg
Ejemplo n.º 2
0
 def _process_group(self, identifying_tag, enumerator, msg_type, group):
     """
     Recursively process a group
     Returns ``(count_tag, [{}, {}])``
     """
     rep_group = RepeatingGroup()
     rep_group.number_tag = identifying_tag
     member = self._frg_class()
     first_tag = None
     inner_groups = group.groups
     valid_tags = group.tags
     for tag, value in enumerator:
         if first_tag is None:
             # handle first tag: we expect all the members of the group to start with this tag
             first_tag = tag
             rep_group.first_tag = tag
             member[tag] = value
         elif first_tag == tag:
             # we start a new group, replace the current member by an empty one and add the current tag
             rep_group.append(member)
             member = self._frg_class()
             member[tag] = value
         elif tag in valid_tags:
             # tag is a member, we just add
             member[tag] = value
         elif tag in inner_groups:
             # tag is starting a new sub group, we recurse
             contents, last_tagval = self._process_group(
                 tag, enumerator, msg_type, group.groups[tag])
             member[tag] = contents
             if last_tagval:
                 # we are not at the end of the message.
                 tag, val = last_tagval
                 if tag == first_tag:
                     # the embedded group finished this member
                     rep_group.append(member)
                     member = self._frg_class()
                     member[tag] = val
                 elif tag in group.tags:
                     # didn't finish this member
                     member[tag] = val
                 else:
                     # didn't finish the message but finished the current group
                     rep_group.append(member)
                     return rep_group, (tag, val)
         else:
             # we're out of the group.
             rep_group.append(member)
             return rep_group, (tag, value)
     # we are reaching the end of the message, so complete, no further tags to pass on
     rep_group.append(member)
     return rep_group, None
Ejemplo n.º 3
0
    def parse(self, buff, delimiter=DELIMITER, separator=SEPARATOR):
        """
        Parse a FIX message. The FIX message is expected to be a bytestring and the output
        is a dictionary-like object which type is determined by the ``fragment_class`` constructor argument
        and which keys are ``int`` and values ``unicode``. Note that if there is a non-int tag in the message,
        this will be stored as a key in the original format (i.e. bytestring)

        :param buff: Buffer to parse
        :type buff: ``bytestr``
        :param delimiter: A character that separate key and values inside the FIX message. Generally '='. Note the type:
          because of the way the buffer is tokenised, this needs to be unicode (or ``str`` in python 2.7*).
        :type delimiter: ``unicode``
        :param separator: A character that separate key+value pairs inside the FIX message. Generally '\1'. See type
          observations above.
        :type separator: ``unicode``
        """
        def pushback_generator(iterator):
            """
            Generator which allows to push back a previously picked item
            for example:
            gen = pushback_generator(range(10))
            print next(gen)
            print next(gen)
            v = next(gen)
            print v
            gen.send(v)
            print next(gen)
            :param iterator:
            :return:
            """
            for value in iterator:
                back = yield value
                if back is not None:
                    yield back
                    yield back

        custom_r = re.compile(
            FIX_REGEX_STRING.format(d=re.escape(delimiter),
                                    s=re.escape(separator)).encode('UTF-8'),
            re.DOTALL)
        tagvals = custom_r.findall(buff)

        msg_type = None
        if not self._no_groups and self.spec is not None:
            for i in range(4):
                if tagvals[i][0] == b'35':
                    msg_type = self.spec.msg_types.get(tagvals[i][1])

        if self.encoding is not None:
            tagvals = ((int_or_str(tval[0], self.encoding),
                        tval[1].decode(self.encoding)) for tval in tagvals)
        elif self.decode_all_as_347:
            tagvals = [(int_or_str(tval[0]), tval[1]) for tval in tagvals]
            encoding = None
            for tag, val in tagvals:
                if tag == 347:
                    encoding = val.decode('UTF-8')
                    break
            if encoding is not None:
                tagvals = ((t[0], t[1].decode(encoding)) for t in tagvals)
        else:
            tagvals = ((int_or_str(tval[0]), tval[1]) for tval in tagvals)
        # Need to add logic to parse Encoded* tags according to 347

        if self._no_groups or self.spec is None or msg_type is None:
            # no groups can be found without a spec, so no point looking up the msg type.
            return self._frg_class(tagvals)
        msg = self._frg_class()
        groups = msg_type.groups
        tagvals = pushback_generator(tagvals)
        for tag, value in tagvals:
            if tag not in groups:
                msg[tag] = value
            else:
                if value == '0':
                    msg[tag] = RepeatingGroup.create_repeating_group(tag)
                else:
                    contents, last_tagval = self._process_group(
                        tag, tagvals, msg_type=msg_type, group=groups[tag])
                    msg[tag] = contents
                    if last_tagval:
                        tagvals.send(last_tagval)
        return msg