Example #1
0
 def _etree2botstree(self, xmlnode):
     """ recursive. """
     newnode = node.Node(record=self._etreenode2botstreenode(
         xmlnode))  # make new node, use fields
     for xmlchildnode in xmlnode:  # for every node in mpathtree
         entitytype = self._entitytype(xmlchildnode)
         if not entitytype:  # is a field, or unknown that looks like a field
             if xmlchildnode.text:  # if xml element has content, add as field
                 newnode.record[
                     xmlchildnode.tag] = xmlchildnode.text  # add as a field
             # convert the xml-attributes of this 'xml-filed' to fields in dict with attributemarker.
             newnode.record.update(
                 (xmlchildnode.tag + self.ta_info["attributemarker"] + key,
                  value) for key, value in xmlchildnode.items() if value)
         elif entitytype == 1:  # childnode is a record according to grammar
             # go recursive and add child (with children) as a node/record
             newnode.append(self._etree2botstree(xmlchildnode))
             self.stack.pop(
             )  # handled the xmlnode, so remove it from the stack
         else:  # is a record, but not in grammar
             if self.ta_info["checkunknownentities"]:
                 self.add2errorlist(
                     _('[S02]%(linpos)s: Unknown xml-tag "%(recordunkown)s" (within "%(record)s") in message.\n'
                       ) % {
                           "linpos": newnode.linpos(),
                           "recordunkown": xmlchildnode.tag,
                           "record": newnode.record["BOTSID"],
                       })
             continue
     return newnode  # return the new node
Example #2
0
 def putloop(self, *mpaths):
     if not self.root.record:  # no input yet, and start with a putloop(): dummy root
         if len(mpaths) == 1:
             self.root.append(node.Node(record=mpaths[0]))
             return self.root.children[-1]
         else:
             raise MappingRootError(
                 _("putloop(%(mpath)s): mpath too long???"),
                 {"mpath": mpaths})
     return self.root.putloop(*mpaths)
Example #3
0
    def initfromfile(self):
        self.messagegrammarread(typeofgrammarfile="grammars")
        self._readcontent_edifile()

        jsonobject = simplejson.loads(self.rawinput)
        del self.rawinput
        if isinstance(jsonobject, list):
            self.root = node.Node()  # initialise empty node.
            self.root.children = self._dojsonlist(
                jsonobject, self._getrootid())  # fill root with children
            for child in self.root.children:
                if not child.record:  # sanity test: the children must have content
                    raise InMessageError(_("[J51]: No usable content."))
                self.checkmessage(child, self.defmessage)
                self.ta_info.update(child.queries)
        elif isinstance(jsonobject, dict):
            if len(jsonobject) == 1 and isinstance(
                    list(jsonobject.values())[0], dict):
                # best structure: {rootid:{id2:<dict, list>}}
                self.root = self._dojsonobject(
                    list(jsonobject.values())[0],
                    list(jsonobject.keys())[0])
            elif len(jsonobject) == 1 and isinstance(
                    list(jsonobject.values())[0], list):
                # root dict has no name; use value from grammar for rootID; {id2:<dict, list>}
                self.root = node.Node(record={"BOTSID": self._getrootid()
                                              })  # initialise empty node.
                self.root.children = self._dojsonlist(
                    list(jsonobject.values())[0],
                    list(jsonobject.keys())[0])
            else:
                self.root = self._dojsonobject(jsonobject, self._getrootid())
            if not self.root:
                raise InMessageError(_("[J52]: No usable content."))
            self.checkmessage(self.root, self.defmessage)
            self.ta_info.update(self.root.queries)
        else:
            # root in JSON is neither dict or list.
            raise InMessageError(
                _('[J53]: Content must be a "list" or "object".'))
Example #4
0
    def initfromfile(self):
        """ Initialisation from a edi file.
        """
        self.messagegrammarread(typeofgrammarfile="grammars")
        # **charset errors, lex errors
        # open file. variants: read with charset, read as binary & handled in sniff, only opened and read in _lex.
        self._readcontent_edifile()
        self._sniff(
        )  # some hard-coded examination of edi file; ta_info can be overruled by syntax-parameters in edi-file
        # start lexing
        self._lex()
        # lex preprocessing via user exit indicated in syntax
        preprocess_lex = self.ta_info.get("preprocess_lex", False)
        if callable(preprocess_lex):
            preprocess_lex(lex=self.lex_records, ta_info=self.ta_info)
        if hasattr(self, "rawinput"):
            del self.rawinput
        # **breaking parser errors
        self.root = node.Node()  # make root Node None.
        self.iternext_lex_record = iter(self.lex_records)
        leftover = self._parse(structure_level=self.defmessage.structure,
                               inode=self.root)
        if leftover:
            raise InMessageError(
                _("[A50] line %(line)s pos %(pos)s: Found non-valid data at end of edi file; probably a problem with separators or message structure."
                  ),
                {
                    "line": leftover[0][LIN],
                    "pos": leftover[0][POS]
                },
            )  # probably not reached with edifact/x12 because of mailbag processing.
        del self.lex_records
        # self.root is now root of a tree (of nodes).

        # **non-breaking parser errors
        self.checkenvelope()
        self.checkmessage(self.root, self.defmessage)
        # get queries-dict for parsed message; this is used to update in database
        if self.root.record:
            self.ta_info.update(self.root.queries)
        else:
            for childnode in self.root.children:
                self.ta_info.update(childnode.queries)
                break
Example #5
0
 def _dojsonobject(self, jsonobject, name):
     thisnode = node.Node(record={"BOTSID": name})  # initialise empty node.
     for key, value in jsonobject.items():
         if value is None:
             continue
         elif isinstance(value,
                         str):  # json field; map to field in node.record
             ## for generating grammars: empty strings should generate a field
             if value and not value.isspace(
             ):  # use only if string has a value.
                 thisnode.record[key] = value
         elif isinstance(value, dict):
             newnode = self._dojsonobject(value, key)
             if newnode:
                 thisnode.append(newnode)
         elif isinstance(value, list):
             thisnode.children.extend(self._dojsonlist(value, key))
         elif isinstance(
                 value,
             (int, float)):  # json field; map to field in node.record
             thisnode.record[key] = str(value)
         else:
             if self.ta_info["checkunknownentities"]:
                 raise InMessageError(
                     _('[J55]: Key "%(key)s" value "%(value)s": is not string, list or dict.'
                       ),
                     {
                         "key": key,
                         "value": value
                     },
                 )
             thisnode.record[key] = str(value)
     if len(thisnode.record) == 2 and not thisnode.children:
         return None  # node is empty...
     # ~ thisnode.record['BOTSID']=name
     return thisnode
Example #6
0
 def nextmessage(self):
     """ Passes each 'message' to the mapping script.
     """
     # node preprocessing via user exit indicated in syntax
     preprocess_nodes = self.ta_info.get("preprocess_nodes", False)
     if callable(preprocess_nodes):
         preprocess_nodes(thisnode=self)
     if (self.defmessage.nextmessage is not None
         ):  # if nextmessage defined in grammar: split up messages
         # first: count number of messages
         self.ta_info[
             "total_number_of_messages"] = self.getcountoccurrences(
                 *self.defmessage.nextmessage)
         # yield the messages, using nextmessage
         count = 0
         self.root.processqueries({}, len(self.defmessage.nextmessage))
         # eachmessage is a list: [mpath,mpath, etc, node]
         for eachmessage in self.getloop_including_mpath(
                 *self.defmessage.nextmessage):
             count += 1
             ta_info = self.ta_info.copy()
             ta_info.update(eachmessage[-1].queries)
             ta_info["message_number"] = count
             ta_info[
                 "bots_accessenvelope"] = self.root  # give mappingscript access to envelope
             yield self._initmessagefromnode(eachmessage[-1], ta_info,
                                             eachmessage[:-1])
         if (self.defmessage.nextmessage2
                 is not None):  # edifact uses nextmessage2 for UNB-UNG
             # first: count number of messages
             self.ta_info[
                 "total_number_of_messages"] = self.getcountoccurrences(
                     *self.defmessage.nextmessage2)
             # yield the messages, using nextmessage2
             self.root.processqueries({}, len(self.defmessage.nextmessage2))
             count = 0
             # eachmessage is a list: [mpath,mpath, etc, node]
             for eachmessage in self.getloop_including_mpath(
                     *self.defmessage.nextmessage2):
                 count += 1
                 ta_info = self.ta_info.copy()
                 ta_info.update(eachmessage.queries[-1])
                 ta_info["message_number"] = count
                 ta_info[
                     "bots_accessenvelope"] = self.root  # give mappingscript access to envelope
                 yield self._initmessagefromnode(eachmessage[-1], ta_info,
                                                 eachmessage[:-1])
     # for csv/fixed: nextmessageblock indicates which field(s) determines a message
     elif self.defmessage.nextmessageblock is not None:
         # --> as long as the field(s) has same value, it is the same message
         # note there is only one recordtype (as checked in grammar.py)
         # first: count number of messages
         count = 0
         for line in self.root.children:
             kriterium = line.enhancedget(self.defmessage.nextmessageblock)
             if not count:
                 count += 1
                 oldkriterium = kriterium
             elif kriterium != oldkriterium:
                 count += 1
                 oldkriterium = kriterium
             # ~ else:
             # ~ pass    #if kriterium is the same
         self.ta_info["total_number_of_messages"] = count
         # yield the messages, using nextmessageblock
         count = 0
         for line in self.root.children:
             kriterium = line.enhancedget(self.defmessage.nextmessageblock)
             if not count:
                 count += 1
                 oldkriterium = kriterium
                 newroot = node.Node()  # make new empty root node.
             elif kriterium != oldkriterium:
                 count += 1
                 oldkriterium = kriterium
                 ta_info = self.ta_info.copy()
                 ta_info.update(
                     oldline.queries
                 )  # update ta_info with information (from previous line) 20100905
                 ta_info["message_number"] = count
                 yield self._initmessagefromnode(newroot, ta_info)
                 newroot = node.Node()  # make new empty root node.
             else:
                 pass  # if kriterium is the same
             newroot.append(line)
             oldline = line  # save line 20100905
         else:
             if count:  # not if count is zero (that is, if there are no lines)
                 ta_info = self.ta_info.copy()
                 ta_info.update(
                     line.queries
                 )  # update ta_info with information (from last line) 20100904
                 ta_info["message_number"] = count
                 yield self._initmessagefromnode(newroot, ta_info)
     else:  # no split up is indicated in grammar. Normally you really would...
         # if contains root-record or explicitly indicated (csv): pass whole tree
         if self.root.record or self.ta_info.get("pass_all", False):
             ta_info = self.ta_info.copy()
             ta_info.update(self.root.queries)
             ta_info["total_number_of_messages"] = 1
             ta_info["message_number"] = 1
             ta_info[
                 "bots_accessenvelope"] = self.root  # give mappingscript access to envelop
             yield self._initmessagefromnode(self.root, ta_info)
         else:  # pass nodes under root one by one
             # first: count number of messages
             total_number_of_messages = len(self.root.children)
             # yield the messages
             count = 0
             for child in self.root.children:
                 count += 1
                 ta_info = self.ta_info.copy()
                 ta_info.update(child.queries)
                 ta_info[
                     "total_number_of_messages"] = total_number_of_messages
                 ta_info["message_number"] = count
                 ta_info[
                     "bots_accessenvelope"] = self.root  # give mappingscript access to envelope
                 yield self._initmessagefromnode(child, ta_info)
Example #7
0
 def _parse(self, structure_level, inode):
     """ This is the heart of the parsing of incoming messages (but not for xml, json)
         Read the lex_records one by one (self.iternext_lex_record, is an iterator)
         - parse the records.
         - identify record (lookup in structure)
         - identify fields in the record (use the record_definition from the grammar).
         - add grammar-info to records: field-tag,mpath.
         Parameters:
         - structure_level: current grammar/segmentgroup of the grammar-structure.
         - inode: parent node; all parsed records are added as children of inode
         2x recursive: SUBTRANSLATION and segmentgroups
     """
     structure_index = 0  # keep track of where we are in the structure_level
     countnrofoccurences = 0  # number of occurences of current record in structure
     structure_end = len(structure_level)
     # indicate if the next record should be fetched, or if the current_lex_record is still being parsed.
     get_next_lex_record = True
     # it might seem logical to test here 'current_lex_record is None', but
     # this is already used to indicate 'no more records'.
     while True:
         if get_next_lex_record:
             try:
                 current_lex_record = next(self.iternext_lex_record)
             except StopIteration:  # catch when no more lex_record.
                 current_lex_record = None
             get_next_lex_record = False
         if (current_lex_record is None
                 or structure_level[structure_index][ID] !=
                 current_lex_record[ID][VALUE]):
             # is record is required in structure_level, and countnrofoccurences==0: error;
             if structure_level[structure_index][
                     MIN] and not countnrofoccurences:
                 # enough check here; message is
                 # validated more accurate later
                 try:
                     raise InMessageError(
                         self.messagetypetxt +
                         _('[S50]: Line:%(line)s pos:%(pos)s record:"%(record)s": message has an error in its structure; this record is not allowed here. Scanned in message definition until mandatory record: "%(looked)s".'
                           ),
                         {
                             "record":
                             current_lex_record[ID][VALUE],
                             "line":
                             current_lex_record[ID][LIN],
                             "pos":
                             current_lex_record[ID][POS],
                             "looked":
                             self.mpathformat(
                                 structure_level[structure_index][MPATH]),
                         },
                     )
                 except TypeError:  # when no UNZ (edifact)
                     raise InMessageError(
                         self.messagetypetxt +
                         _('[S51]: Missing mandatory record "%(record)s".'),
                         {
                             "record":
                             self.mpathformat(
                                 structure_level[structure_index][MPATH])
                         },
                     )
             structure_index += 1
             if (structure_index == structure_end
                 ):  # current_lex_record is not in this level. Go level up
                 # if on 'first level': give specific error
                 if (current_lex_record is not None
                         and structure_level == self.defmessage.structure):
                     raise InMessageError(
                         self.messagetypetxt +
                         _('[S50]: Line:%(line)s pos:%(pos)s record:"%(record)s": message has an error in its structure; this record is not allowed here. Scanned in message definition until mandatory record: "%(looked)s".'
                           ),
                         {
                             "record":
                             current_lex_record[ID][VALUE],
                             "line":
                             current_lex_record[ID][LIN],
                             "pos":
                             current_lex_record[ID][POS],
                             "looked":
                             self.mpathformat(
                                 structure_level[structure_index -
                                                 1][MPATH]),
                         },
                     )
                 # return either None (no more lex_records to parse) or the last
                 # current_lex_record (the last current_lex_record is not found in this
                 # level)
                 return current_lex_record
             countnrofoccurences = 0
             continue  # continue while-loop: get_next_lex_record is false as no match with structure is made; go and look at next record of structure
         # record is found in grammar
         countnrofoccurences += 1
         newnode = node.Node(
             record=self._parsefields(current_lex_record,
                                      structure_level[structure_index]),
             linpos_info=(current_lex_record[0][LIN],
                          current_lex_record[0][POS]),
         )  # make new node
         inode.append(
             newnode
         )  # succes! append new node as a child to current (parent)node
         if SUBTRANSLATION in structure_level[structure_index]:
             # start a SUBTRANSLATION; find the right messagetype, etc
             messagetype = newnode.enhancedget(
                 structure_level[structure_index][SUBTRANSLATION])
             if not messagetype:
                 raise TranslationNotFoundError(
                     _('Could not find SUBTRANSLATION "%(sub)s" in (sub)message.'
                       ),
                     {
                         "sub":
                         structure_level[structure_index][SUBTRANSLATION]
                     },
                 )
             messagetype = self._manipulatemessagetype(messagetype, inode)
             try:
                 defmessage = grammar.grammarread(
                     self.__class__.__name__,
                     messagetype,
                     typeofgrammarfile="grammars",
                 )
             except BotsImportError:
                 raisenovalidmapping_error = True
                 if hasattr(self.defmessage.module, "getmessagetype"):
                     messagetype2 = runscript(
                         self.defmessage.module,
                         self.defmessage.grammarname,
                         "getmessagetype",
                         editype=self.__class__.__name__,
                         messagetype=messagetype,
                     )
                     if messagetype2:
                         try:
                             defmessage = grammar.grammarread(
                                 self.__class__.__name__,
                                 messagetype2,
                                 typeofgrammarfile="grammars",
                             )
                             raisenovalidmapping_error = False
                         except BotsImportError:
                             pass
                 if raisenovalidmapping_error:
                     raise TranslationNotFoundError(
                         _('No (valid) grammar for editype "%(editype)s" messagetype "%(messagetype)s".'
                           ),
                         {
                             "editype": self.__class__.__name__,
                             "messagetype": messagetype,
                         },
                     )
             self.messagecount += 1
             self.messagetypetxt = _(
                 "Message nr %(count)s, type %(type)s, " % {
                     "count": self.messagecount,
                     "type": messagetype
                 })
             current_lex_record = self._parse(
                 structure_level=defmessage.structure[0][LEVEL],
                 inode=newnode)
             # copy messagetype into 1st segment of subtranslation (eg UNH, ST)
             newnode.queries = {"messagetype": messagetype}
             newnode.queries.update(defmessage.syntax)
             # ~ newnode.queries = defmessage.syntax.copy()       #if using this line instead of previous 2: gives errors eg in incoming edifact...do not understand why
             self.checkmessage(newnode, defmessage, subtranslation=True
                               )  # check the results of the subtranslation
             # ~ end SUBTRANSLATION
             self.messagetypetxt = ""
             # get_next_lex_record is still False; we are trying to match the last (not
             # matched) record from the SUBTRANSLATION (named 'current_lex_record').
         else:
             if (LEVEL in structure_level[structure_index]
                 ):  # if header, go parse segmentgroup (recursive)
                 current_lex_record = self._parse(
                     structure_level=structure_level[structure_index]
                     [LEVEL],
                     inode=newnode,
                 )
                 # get_next_lex_record is still False; the current_lex_record that was not
                 # matched in lower segmentgroups is still being parsed.
             else:
                 get_next_lex_record = True
             # accomodate for UNS = UNS construction
             if (structure_level[structure_index][MIN] ==
                     structure_level[structure_index][MAX] ==
                     countnrofoccurences):
                 if structure_index + 1 == structure_end:
                     pass
                 else:
                     structure_index += 1
                     countnrofoccurences = 0
Example #8
0
    def initfromfile(self):
        """ initialisation from an excel file.
            file is first converted to csv using python module xlrd
        """
        try:
            self.xlrd = botsbaseimport("xlrd")
        except ImportError:
            raise ImportError(
                _('Dependency failure: editype "excel" requires python library "xlrd".'
                  ))
        import csv as csvlib

        try:
            import StringIO
        except:
            import io as StringIO

        self.messagegrammarread(typeofgrammarfile="grammars")
        self.ta_info["charset"] = self.defmessage.syntax[
            "charset"]  # always use charset of edi file.
        if self.ta_info["escape"]:
            doublequote = False
        else:
            doublequote = True

        logger.debug('Read edi file "%(filename)s".', self.ta_info)
        # xlrd reads excel file; python's csv modules write this to file-like
        # StringIO (as utf-8); read StringIO as self.rawinput; decode this
        # (utf-8->str)
        infilename = abspathdata(self.ta_info["filename"])
        try:
            xlsdata = self.read_xls(infilename)
        except:
            txt = txtexc()
            logger.error(
                _("Excel extraction failed, may not be an Excel file? Error:\n%(txt)s"
                  ),
                {"txt": txt},
            )
            raise InMessageError(
                _("Excel extraction failed, may not be an Excel file? Error:\n%(txt)s"
                  ),
                {"txt": txt},
            )
        rawinputfile = StringIO.StringIO()
        csvout = csvlib.writer(
            rawinputfile,
            quotechar=self.ta_info["quote_char"],
            delimiter=self.ta_info["field_sep"],
            doublequote=doublequote,
            escapechar=self.ta_info["escape"],
        )
        csvout.writerows(map(self.utf8ize, xlsdata))
        rawinputfile.seek(0)
        self.rawinput = rawinputfile.read()
        rawinputfile.close()
        self.rawinput = self.rawinput.decode("utf-8")
        # start lexing and parsing as csv
        self._lex()
        if hasattr(self, "rawinput"):
            del self.rawinput
        self.root = node.Node()  # make root Node None.
        self.iternext_lex_record = iter(self.lex_records)
        leftover = self._parse(structure_level=self.defmessage.structure,
                               inode=self.root)
        if leftover:
            raise InMessageError(
                _('[A52]: Found non-valid data at end of excel file: "%(leftover)s".'
                  ),
                {"leftover": leftover},
            )
        del self.lex_records
        self.checkmessage(self.root, self.defmessage)
Example #9
0
 def __init__(self, ta_info):
     super(OutMessage, self).__init__(ta_info)
     # message tree; build via put()-interface in mappingscript. Initialise with empty dict
     self.root = node.Node(record={})
     self._outstream: EdiFile = None