コード例 #1
0
def botsunzip(ta_from, endstatus, password=None, pass_non_zip=False, **argv):
    ''' unzip file;
        editype & messagetype are unchanged.
    '''
    try:
        z = zipfile.ZipFile(botslib.abspathdata(filename=ta_from.filename),
                            mode='r')
    except zipfile.BadZipfile:
        botsglobal.logger.debug(_(u'File is not a zip-file.'))
        if pass_non_zip:  #just pass the file
            botsglobal.logger.debug(
                _(u'"pass_non_zip" is True, just pass the file.'))
            ta_to = ta_from.copyta(status=endstatus, statust=OK)
            return
        raise botslib.InMessageError(_(u'File is not a zip-file.'))

    if password:
        z.setpassword(password)
    for f in z.infolist():
        if f.filename[-1] == '/':  #check if this is a dir; if so continue
            continue
        ta_to = ta_from.copyta(status=endstatus)
        tofilename = str(ta_to.idta)
        tofile = botslib.opendata(tofilename, 'wb')
        tofile.write(z.read(f.filename))
        tofile.close()
        ta_to.update(
            statust=OK,
            filename=tofilename)  #update outmessage transaction with ta_info;
        botsglobal.logger.debug(_(u'        File written: "%s".'), tofilename)
コード例 #2
0
        def receive_layout(self, ltpage):

            # recursively get every text element and it's coordinates
            def render(item):
                if isinstance(item, LTContainer):
                    for child in item:
                        render(child)
                elif isinstance(item, LTText):
                    (_, _, x, y) = item.bbox

                    # group the y values (rows) within group tolerance
                    for v in yv:
                        if y > v - y_group and y < v + y_group:
                            y = v
                    yv.append(y)

                    line = lines[int(-y)]
                    line[x] = item.get_text().encode('utf-8')

            from collections import defaultdict
            lines = defaultdict(lambda: {})

            yv = []
            render(ltpage)

            lineid = 0
            for y in sorted(lines.keys()):
                line = lines[y]
                lineid += 1
                csvdata = [ltpage.pageid,
                           lineid]  # first 2 columns are page and line numbers

                # group the x values (fields) within group tolerance
                p = 0
                field_txt = ''
                for x in sorted(line.keys()):
                    gap = x - p
                    if p > 0 and gap > x_group:
                        csvdata.append(field_txt)
                        field_txt = ''
                    field_txt += line[x]
                    p = x
                csvdata.append(field_txt)
                csvout.writerow(csvdata)
            if lineid == 0:
                raise botslib.InMessageError(
                    _(u'PDF text extraction failed, it may contain just image(s)?'
                      ))
コード例 #3
0
def mailbag(ta_from, endstatus, **argv):
    ''' 2 main functions:
        -   recognizes and distuinguishes several edi types: x12 edifact tradacoms xml
            ('mailbag' in, correct editype out)
        -   split up interchanges (edifact, x12, tradacoms)
        details:
        - edifact, x12 and tradacoms can be can be mixed,
        - recognizes xml files; but messagetype 'xml' has a special handling when reading xml-files: xpath identifiers are used.
          this is needed for using xml in mailbag!
        - when more interchanges in one file: strongly recommended to mailbag/split these.
        - handle multiple UNA in one file, including different charsets.
        - handle multiple x12 seperators in one file.
    '''
    edifile = botslib.readdata(filename=ta_from.filename)  #read as binary...
    startpos = 0
    while (1):
        found = HEADER.search(edifile[startpos:])
        if found is None:
            if startpos:  #ISA/UNB have been found in file; no new ISA/UNB is found. So all processing is done.
                break
            #guess if this is an xml file.....
            sniffxml = edifile[:25]
            sniffxml = sniffxml.lstrip(
                ' \t\n\r\f\v\xFF\xFE\xEF\xBB\xBF\x00'
            )  #to find first ' real' data; some char are because of BOM, UTF-16 etc
            if sniffxml and sniffxml[0] == '<':
                ta_to = ta_from.copyta(
                    status=endstatus,
                    statust=OK,
                    filename=ta_from.filename,
                    editype='xml',
                    messagetype='mailbag'
                )  #make transaction for translated message; gets ta_info of ta_frommes
                #~ ta_tomes.update(status=STATUSTMP,statust=OK,filename=ta_set_for_processing.filename,editype='xml') #update outmessage transaction with ta_info;
                break
            else:
                raise botslib.InMessageError(
                    _(u'Found no content in mailbag.'))
        elif found.group(1):
            editype = 'x12'
            headpos = startpos + found.start(2)
            count = 0
            for char in edifile[
                    headpos:headpos +
                    120]:  #search first 120 characters to find separators
                if char in '\r\n' and count != 105:
                    continue
                count += 1
                if count == 4:
                    field_sep = char
                elif count == 106:
                    record_sep = char
                    break
            #~ foundtrailer = re.search(re.escape(record_sep)+'\s*IEA'+re.escape(field_sep)+'.+?'+re.escape(record_sep),edifile[headpos:],re.DOTALL)
            foundtrailer = re.search(
                re.escape(record_sep) + '\s*I\s*E\s*A\s*' +
                re.escape(field_sep) + '.+?' + re.escape(record_sep),
                edifile[headpos:], re.DOTALL)
        elif found.group(3):
            editype = 'edifact'
            if found.group(4):
                field_sep = edifile[startpos + found.start(4) + 4]
                record_sep = edifile[startpos + found.start(4) + 8]
                headpos = startpos + found.start(4)
            else:
                field_sep = '+'
                record_sep = "'"
                headpos = startpos + found.start(5)
            foundtrailer = re.search(
                re.escape(record_sep) + '\s*U\s*N\s*Z\s*' +
                re.escape(field_sep) + '.+?' + re.escape(record_sep),
                edifile[headpos:], re.DOTALL)
        elif found.group(8):
            editype = 'tradacoms'
            headpos = startpos + found.start(9)
            field_sep = '='  #the tradacoms 'after-segment-tag-seperator'
            record_sep = "'"
            foundtrailer = re.search(
                re.escape(record_sep) + '\s*E\s*N\s*D\s*' +
                re.escape(field_sep) + '.+?' + re.escape(record_sep),
                edifile[headpos:], re.DOTALL)
        if not foundtrailer:
            raise botslib.InMessageError(
                _(u'Found no valid envelope trailer in mailbag.'))
        endpos = headpos + foundtrailer.end()
        #so: interchange is from headerpos untill endpos
        #~ if HEADER.search(edifile[headpos+25:endpos]):   #check if there is another header in the interchange
        #~ raise botslib.InMessageError(u'Error in mailbag format: found no valid envelope trailer.')
        ta_to = ta_from.copyta(
            status=endstatus
        )  #make transaction for translated message; gets ta_info of ta_frommes
        tofilename = str(ta_to.idta)
        tofile = botslib.opendata(tofilename, 'wb')
        tofile.write(edifile[headpos:endpos])
        tofile.close()
        ta_to.update(
            statust=OK,
            filename=tofilename,
            editype=editype,
            messagetype=editype)  #update outmessage transaction with ta_info;
        startpos = endpos
        botsglobal.logger.debug(_(u'        File written: "%s".'), tofilename)
コード例 #4
0
def extractexcel(ta_from, endstatus, **argv):
    ''' extract excel file.
        editype & messagetype are unchanged.
    '''

    #***functions used by extractexcel
    #-------------------------------------------------------------------------------
    def read_xls(infilename):
        # Read excel first sheet into a 2-d array
        book = xlrd.open_workbook(infilename)
        sheet = book.sheet_by_index(0)
        formatter = lambda (t, v): format_excelval(book, t, v, False)
        xlsdata = []
        for row in range(sheet.nrows):
            (types, values) = (sheet.row_types(row), sheet.row_values(row))
            xlsdata.append(map(formatter, zip(types, values)))
        return xlsdata

    #-------------------------------------------------------------------------------
    def dump_csv(xlsdata, tofilename):
        stream = botslib.opendata(tofilename, 'wb')
        csvout = csv.writer(stream,
                            quotechar=quotechar,
                            delimiter=field_sep,
                            doublequote=doublequote,
                            escapechar=escape)
        csvout.writerows(map(utf8ize, xlsdata))
        stream.close()

    #-------------------------------------------------------------------------------
    def format_excelval(book, type, value, wanttupledate):
        #  Clean up the incoming excel data for some data types
        returnrow = []
        if type == 2:
            if value == int(value):
                value = int(value)
        elif type == 3:
            datetuple = xlrd.xldate_as_tuple(value, book.datemode)
            value = datetuple if wanttupledate else tupledate_to_isodate(
                datetuple)
        elif type == 5:
            value = xlrd.error_text_from_code[value]
        return value

    #-------------------------------------------------------------------------------
    def tupledate_to_isodate(tupledate):
        # Turns a gregorian (year, month, day, hour, minute, nearest_second) into a
        # standard YYYY-MM-DDTHH:MM:SS ISO date.
        (y, m, d, hh, mm, ss) = tupledate
        nonzero = lambda n: n != 0
        date = "%04d-%02d-%02d" % (y, m, d) if filter(nonzero,
                                                      (y, m, d)) else ''
        time = "T%02d:%02d:%02d" % (hh, mm, ss) if filter(
            nonzero, (hh, mm, ss)) or not date else ''
        return date + time

    #-------------------------------------------------------------------------------
    def utf8ize(l):
        # Make string-like things into utf-8, leave other things alone
        return [
            unicode(s).encode(charset) if hasattr(s, 'encode') else s
            for s in l
        ]

    #***end functions used by extractexcel
    import xlrd
    import csv
    #get parameters for csv-format; defaults are as the csv defaults (in grammar.py)
    charset = argv.get('charset', "utf-8")
    quotechar = argv.get('quotechar', "'")
    field_sep = argv.get('field_sep', ':')
    escape = argv.get('escape', '')
    if escape:
        doublequote = False
    else:
        doublequote = True
    try:
        infilename = botslib.abspathdata(ta_from.filename)
        xlsdata = read_xls(infilename)
        ta_to = ta_from.copyta(status=endstatus)
        tofilename = str(ta_to.idta)
        dump_csv(xlsdata, tofilename)
        ta_to.update(
            statust=OK,
            filename=tofilename)  #update outmessage transaction with ta_info;
        botsglobal.logger.debug(_(u'        File written: "%s".'), tofilename)
    except:
        txt = botslib.txtexc()
        botsglobal.logger.error(
            _(u'Excel extraction failed, may not be an Excel file? Error:\n%s'
              ), txt)
        raise botslib.InMessageError(_(
            u'Excel extraction failed, may not be an Excel file? Error:\n$error'
        ),
                                     error=txt)
コード例 #5
0
def extractpdf(ta_from, endstatus, **argv):
    ''' Try to extract text content of a PDF file to a csv.
        You know this is not a great idea, right? But we'll do the best we can anyway!
        Page and line numbers are added to each row.
        Columns and rows are based on the x and y coordinates of each text element within tolerance allowed.
        Multiple text elements may combine to make one field, some PDFs have every character separated!
        You may need to experiment with x_group and y_group values, but defaults seem ok for most files.
        Output csv is UTF-8 encoded - The csv module doesn't directly support reading and writing Unicode
        If the PDF is just an image, all bets are off. Maybe try OCR, good luck with that!
        Mike Griffin 14/12/2011
    '''
    from pdfminer.pdfinterp import PDFResourceManager, process_pdf
    from pdfminer.converter import TextConverter
    from pdfminer.layout import LAParams, LTContainer, LTText, LTTextBox
    import csv

    class CsvConverter(TextConverter):
        def __init__(self, *args, **kwargs):
            TextConverter.__init__(self, *args, **kwargs)

        def receive_layout(self, ltpage):

            # recursively get every text element and it's coordinates
            def render(item):
                if isinstance(item, LTContainer):
                    for child in item:
                        render(child)
                elif isinstance(item, LTText):
                    (_, _, x, y) = item.bbox

                    # group the y values (rows) within group tolerance
                    for v in yv:
                        if y > v - y_group and y < v + y_group:
                            y = v
                    yv.append(y)

                    line = lines[int(-y)]
                    line[x] = item.get_text().encode('utf-8')

            from collections import defaultdict
            lines = defaultdict(lambda: {})

            yv = []
            render(ltpage)

            lineid = 0
            for y in sorted(lines.keys()):
                line = lines[y]
                lineid += 1
                csvdata = [ltpage.pageid,
                           lineid]  # first 2 columns are page and line numbers

                # group the x values (fields) within group tolerance
                p = 0
                field_txt = ''
                for x in sorted(line.keys()):
                    gap = x - p
                    if p > 0 and gap > x_group:
                        csvdata.append(field_txt)
                        field_txt = ''
                    field_txt += line[x]
                    p = x
                csvdata.append(field_txt)
                csvout.writerow(csvdata)
            if lineid == 0:
                raise botslib.InMessageError(
                    _(u'PDF text extraction failed, it may contain just image(s)?'
                      ))

    #get some optional parameters
    x_group = argv.get('x_group',
                       10)  # group text closer than this as one field
    y_group = argv.get('y_group',
                       5)  # group lines closer than this as one line
    password = argv.get('password', '')
    quotechar = argv.get('quotechar', '"')
    field_sep = argv.get('field_sep', ',')
    escape = argv.get('escape', '\\')
    charset = argv.get('charset', 'utf-8')
    if not escape:
        doublequote = True
    else:
        doublequote = False

    try:
        pdf_stream = botslib.opendata(ta_from.filename, 'rb')
        ta_to = ta_from.copyta(status=endstatus)
        tofilename = str(ta_to.idta)
        csv_stream = botslib.opendata(tofilename, 'wb')
        csvout = csv.writer(csv_stream,
                            quotechar=quotechar,
                            delimiter=field_sep,
                            doublequote=doublequote,
                            escapechar=escape)

        # Process PDF
        rsrcmgr = PDFResourceManager(caching=True)
        device = CsvConverter(rsrcmgr, csv_stream, codec=charset)
        process_pdf(rsrcmgr,
                    device,
                    pdf_stream,
                    pagenos=set(),
                    password=password,
                    caching=True,
                    check_extractable=True)

        device.close()
        pdf_stream.close()
        csv_stream.close()
        ta_to.update(
            statust=OK,
            filename=tofilename)  #update outmessage transaction with ta_info;
        botsglobal.logger.debug(_(u'        File written: "%s".'), tofilename)
    except:
        txt = botslib.txtexc()
        botsglobal.logger.error(
            _(u'PDF extraction failed, may not be a PDF file? Error:\n%s'),
            txt)
        raise botslib.InMessageError(
            _(u'PDF extraction failed, may not be a PDF file? Error:\n$error'),
            error=txt)
コード例 #6
0
def mailbag(ta_from,endstatus,frommessagetype,**argv):
    ''' 2 main functions:
        -   recognizes and distuinguishes several edi types: x12 edifact tradacoms ('mailbag' in, correct editype out)
        -   split up interchanges (edifact, x12, tradacoms)
        details:
        - edifact, x12 and tradacoms can be can be mixed
        - handle multiple UNA in one file, including different charsets.
        - handle multiple ISA's with different separators in one file
        in bots > 3.0.0 all mailbag, edifact, x12 and tradacoms go via mailbag.
    '''
    edifile = botslib.readdata(filename=ta_from.filename)
    startpos = 0
    nr_interchanges = 0
    while (1):
        found = HEADER.match(edifile[startpos:])
        if found is None:
            if edifile[startpos:].strip(string.whitespace+'\x1A\x00'):  #there is content...but not valid
                if nr_interchanges:    #found interchanges, but remainder is not valid
                    raise botslib.InMessageError(_(u'[M50]: Found data not in a valid interchange at position %(pos)s.'),{'pos':startpos})                
                else:   #no interchanges found, content is not a valid edifact/x12/tradacoms interchange
                    if frommessagetype == 'mailbag':    #if indicated 'mailbag': guess if this is an xml file.....
                        sniffxml = edifile[:25]
                        sniffxml = sniffxml.lstrip(' \t\n\r\f\v\xFF\xFE\xEF\xBB\xBF\x00')       #to find first ' real' data; some char are because of BOM, UTF-16 etc
                        if sniffxml and sniffxml[0] == '<':
                            #is a xml file; inmessage.py can determine the right xml messagetype via xpath. 
                            filesize = len(edifile)
                            ta_to = ta_from.copyta(status=endstatus,statust=OK,filename=ta_from.filename,editype='xml',messagetype='mailbag',filesize=filesize)
                            return
                    raise botslib.InMessageError(_(u'[M51]: Edi file does not start with a valid interchange.'))
            else:   #no parseble content
                if nr_interchanges:    #OK: there are interchanges, but no new interchange is found.
                    return
                else:   #no edifact/x12/tradacoms envelope at all
                    raise botslib.InMessageError(_(u'[M52]: Edi file contains only whitespace.'))
        elif found.group('x12'):
            editype = 'x12'
            headpos = startpos + found.start('x12')
            #determine field_sep and record_sep
            count = 0
            for char in edifile[headpos:headpos+120]:  #search first 120 characters to determine separators
                if char in '\r\n' and count != 105:
                    continue
                count += 1
                if count == 4:
                    field_sep = char
                elif count in [7,18,21,32,35,51,54,70]:   #extra checks for fixed ISA. 
                    if char != field_sep:
                        raise botslib.InMessageError(_(u'[M53]: Non-valid ISA header at position %(pos)s; position %(pos_element)s of ISA is "%(foundchar)s", expect here element separator "%(field_sep)s".'),
                                                        {'pos':headpos,'pos_element':unicode(count),'foundchar':char,'field_sep':field_sep})
                elif count == 106:
                    record_sep = char
                    break
            foundtrailer = re.search('''%(record_sep)s
                                        \s*
                                        I[\n\r]*E[\n\r]*A
                                        .+?
                                        %(record_sep)s
                                        '''%{'record_sep':re.escape(record_sep)},
                                        edifile[headpos:],re.DOTALL|re.VERBOSE)
            if not foundtrailer:
                foundtrailer2 = re.search('''%(record_sep)s
                                            \s*
                                            I[\n\r]*E[\n\r]*A
                                            '''%{'record_sep':re.escape(record_sep)},
                                            edifile[headpos:],re.DOTALL|re.VERBOSE)
                if foundtrailer2:
                    raise botslib.InMessageError(_(u'[M60]: Found no segment terminator for IEA trailer at position %(pos)s.'),{'pos':foundtrailer2.start()})
                else:
                    raise botslib.InMessageError(_(u'[M54]: Found no valid IEA trailer for the ISA header at position %(pos)s.'),{'pos':headpos})
        elif found.group('edifact'):
            editype = 'edifact'
            headpos = startpos + found.start('edifact')
            #parse UNA. valid UNA: UNA:+.? '
            if found.group('UNA'):
                count = 0
                for char in found.group('UNAstring'):
                    if char in '\r\n':
                        continue
                    count += 1
                    if count == 2:
                        field_sep = char
                    elif count == 4:
                        escape = char
                    elif count == 6:
                        record_sep = char
                if count != 6 and len(found.group('UNAstring').rstrip()) != 6:
                    raise botslib.InMessageError(_(u'[M55]: Non-valid UNA-segment at position %(pos)s. UNA-segment should be 6 positions.'),{'pos':headpos})
                if found.group('field_sep') != field_sep:
                    raise botslib.InMessageError(_(u'[M56]: Data element separator used in edifact file differs from value indicated in UNA-segment.'))
            else:   #no UNA, interpret UNB
                if found.group('field_sep') == '+':
                    record_sep = "'"
                    escape = '?'
                elif found.group('field_sep') == '\x1D':        #according to std this was preffered way...probably quite theoretic...but does no harm
                    record_sep = '\x1C'
                    escape = ''
                else:
                    raise botslib.InMessageError(_(u'[M57]: Edifact file with non-standard separators. UNA segment should be used.'))
            #search trailer
            foundtrailer = re.search('''[^%(escape)s\n\r]       #char that is not escape or cr/lf
                                        [\n\r]*?                #maybe some cr/lf's
                                        %(record_sep)s          #segment separator
                                        \s*                     #whitespace between segments
                                        U[\n\r]*N[\n\r]*Z       #UNZ
                                        .+?                     #any chars
                                        [^%(escape)s\n\r]       #char that is not escape or cr/lf
                                        [\n\r]*?                #maybe some cr/lf's
                                        %(record_sep)s          #segment separator
                                        '''%{'escape':escape,'record_sep':re.escape(record_sep)},
                                        edifile[headpos:],re.DOTALL|re.VERBOSE)
            if not foundtrailer:
                raise botslib.InMessageError(_(u'[M58]: Found no valid UNZ trailer for the UNB header at position %(pos)s.'),{'pos':headpos})
        elif found.group('tradacoms'):
            editype = 'tradacoms'
            #~ field_sep = '='     #the tradacoms 'after-segment-tag-separator'
            record_sep = "'"
            escape = '?'
            headpos = startpos + found.start('STX')
            foundtrailer = re.search('''[^%(escape)s\n\r]       #char that is not escape or cr/lf
                                        [\n\r]*?                #maybe some cr/lf's
                                        %(record_sep)s          #segment separator
                                        \s*                     #whitespace between segments
                                        E[\n\r]*N[\n\r]*D
                                        .+?
                                        [^%(escape)s\n\r]       #char that is not escape or cr/lf
                                        [\n\r]*?                #maybe some cr/lf's
                                        %(record_sep)s          #segment separator
                                        '''%{'escape':escape,'record_sep':re.escape(record_sep)},
                                        edifile[headpos:],re.DOTALL|re.VERBOSE)
            if not foundtrailer:
                raise botslib.InMessageError(_(u'[M59]: Found no valid END trailer for the STX header at position %(pos)s.'),{'pos':headpos})
        #so: found an interchange (from headerpos until endpos)
        endpos = headpos + foundtrailer.end()
        ta_to = ta_from.copyta(status=endstatus)  #make transaction for translated message; gets ta_info of ta_frommes
        tofilename = unicode(ta_to.idta)
        filesize = len(edifile[headpos:endpos])
        tofile = botslib.opendata(tofilename,'wb')
        tofile.write(edifile[headpos:endpos])
        tofile.close()
        #editype is now either edifact, x12 or tradacoms
        #frommessagetype is the original frommessagetype (from route).
        #frommessagetype would normally be edifact, x12, tradacoms or mailbag, but could also be eg ORDERSD96AUNEAN007.
        #If so, we want to preserve that.
        if frommessagetype != 'mailbag' and frommessagetype != editype:
            messagetype = frommessagetype
        else:
            messagetype = editype
        ta_to.update(statust=OK,filename=tofilename,editype=editype,messagetype=messagetype,filesize=filesize) #update outmessage transaction with ta_info;
        startpos = endpos
        nr_interchanges += 1
        botsglobal.logger.debug(_(u'        File written: "%(tofilename)s".'),{'tofilename':tofilename})
コード例 #7
0
ファイル: transform.py プロジェクト: jbellomy/bots.v202
def splitmailbag(startstatus=MAILBAG,endstatus=TRANSLATE,idroute=''):
    ''' splits 'mailbag'files to seperate files each containging one interchange (ISA-IEA or UNA/UNB-UNZ).
        handles x12 and edifact; these can be mixed.
    '''
    header = re.compile('(\s*(ISA))|(\s*(UNA.{6})?\s*(U\s*N\s*B)s*.{1}(.{4}).{1}(.{1}))',re.DOTALL)
    #           group:    1   2       3  4            5        6         7
    for row in botslib.query(u'''SELECT idta,filename,charset
                                FROM  ta
                                WHERE   idta>%(rootidta)s
                                AND     status=%(status)s
                                AND     statust=%(statust)s
                                AND     idroute=%(idroute)s
                                ''',
                                {'status':startstatus,'statust':OK,'idroute':idroute,'rootidta':botslib.get_minta4query()}):
        try:
            ta_org=botslib.OldTransaction(row['idta'])
            ta_intermediate = ta_org.copyta(status=MAILBAGPARSED)
            edifile = botslib.readdata(filename=row['filename'])    #read as binary...
            botsglobal.logmap.debug(u'Start parsing mailbag file "%s".',row['filename'])
            startpos=0
            while (1):
                found = header.search(edifile[startpos:])
                if found is None:
                    if startpos:    #ISA/UNB have been found in file; no new ISA/UNB is found. So all processing is done.
                        break
                    #guess if this is an xml file.....
                    sniffxml = edifile[:25]
                    sniffxml = sniffxml.lstrip(' \t\n\r\f\v\xFF\xFE\xEF\xBB\xBF\x00')       #to find first ' real' data; some char are because of BOM, UTF-16 etc
                    if sniffxml and sniffxml[0]=='<':
                        ta_tomes=ta_intermediate.copyta(status=endstatus)  #make transaction for translated message; gets ta_info of ta_frommes
                        ta_tomes.update(status=STATUSTMP,statust=OK,filename=row['filename'],editype='xml') #update outmessage transaction with ta_info;
                        break;
                    else:
                        raise botslib.InMessageError(_(u'Found no content in mailbag.'))
                elif found.group(1):
                    editype='x12'
                    headpos=startpos+ found.start(2)
                    count=0
                    for c in edifile[headpos:headpos+120]:  #search first 120 characters to find seperators
                        if c in '\r\n' and count!=105:
                            continue
                        count +=1
                        if count==4:
                            field_sep = c
                        elif count==106:
                            record_sep = c
                            break
                    foundtrailer = re.search(re.escape(record_sep)+'\s*IEA'+re.escape(field_sep)+'.+?'+re.escape(record_sep),edifile[headpos:],re.DOTALL)
                elif found.group(3):
                    editype='edifact'
                    if found.group(4):
                        field_sep = edifile[startpos + found.start(4) + 4]
                        record_sep = edifile[startpos + found.start(4) + 8]
                        headpos=startpos+ found.start(4)
                    else:
                        field_sep = '+'
                        record_sep = "'"
                        headpos=startpos+ found.start(5)
                    foundtrailer = re.search(re.escape(record_sep)+'\s*U\s*N\s*Z\s*'+re.escape(field_sep)+'.+?'+re.escape(record_sep),edifile[headpos:],re.DOTALL)
                if not foundtrailer:
                    raise botslib.InMessageError(_(u'Found no valid envelope trailer in mailbag.'))
                endpos = headpos+foundtrailer.end()
                #so: interchange is from headerpos untill endpos
                #~ if header.search(edifile[headpos+25:endpos]):   #check if there is another header in the interchange
                    #~ raise botslib.InMessageError(u'Error in mailbag format: found no valid envelope trailer.')
                ta_tomes=ta_intermediate.copyta(status=endstatus)  #make transaction for translated message; gets ta_info of ta_frommes
                tofilename = str(ta_tomes.idta)
                tofile = botslib.opendata(tofilename,'wb')
                tofile.write(edifile[headpos:endpos])
                tofile.close()
                ta_tomes.update(status=STATUSTMP,statust=OK,filename=tofilename,editype=editype,messagetype=editype) #update outmessage transaction with ta_info;
                startpos=endpos
        except:
            txt=botslib.txtexc()
            ta_intermediate.failure()
            ta_intermediate.update(statust=ERROR,errortext=txt)
        else:
            botsglobal.logmap.debug(u'OK Parsing mailbag file "%s".',row['filename'])
            ta_org.update(statust=DONE)
            ta_intermediate.succes(endstatus)
            ta_intermediate.update(statust=DONE)