예제 #1
0
def botszip(ta_from,endstatus,**argv):
    ''' zip file;
        editype & messagetype are unchanged.
    '''
    ta_to = ta_from.copyta(status=endstatus)
    tofilename = unicode(ta_to.idta)
    pluginzipfilehandler = zipfile.ZipFile(botslib.abspathdata(filename=tofilename), 'w', zipfile.ZIP_DEFLATED)
    pluginzipfilehandler.write(botslib.abspathdata(filename=ta_from.filename),ta_from.filename)
    pluginzipfilehandler.close()
    ta_to.update(statust=OK,filename=tofilename) #update outmessage transaction with ta_info;
예제 #2
0
def botsunzip(ta_from,endstatus,password=None,pass_non_zip=False,**argv):
    ''' unzip file;
        editype & messagetype are unchanged.
    '''
    try:
        z = zipfile.ZipFile(botslib.abspathdata(filename=ta_from.filename),mode='r')
    except zipfile.BadZipfile:
        botsglobal.logger.debug(_(u'File is not a zip-file.'))
        if pass_non_zip:        #just pass the file
            botsglobal.logger.debug(_(u'"pass_non_zip" is True, just pass the file.'))
            ta_to = ta_from.copyta(status=endstatus,statust=OK)
            return
        raise botslib.InMessageError(_(u'File is not a zip-file.'))

    if password:
        z.setpassword(password)
    for f in z.infolist():
        if f.filename[-1] == '/':    #check if this is a dir; if so continue
            continue
        ta_to = ta_from.copyta(status=endstatus)
        tofilename = str(ta_to.idta)
        tofile = botslib.opendata(tofilename,'wb')
        tofile.write(z.read(f.filename))
        tofile.close()
        ta_to.update(statust=OK,filename=tofilename) #update outmessage transaction with ta_info;
        botsglobal.logger.debug(_(u'        File written: "%s".'),tofilename)
예제 #3
0
def botsunzip(ta_from, endstatus, password=None, pass_non_zip=False, **argv):
    ''' unzip file;
        editype & messagetype are unchanged.
    '''
    try:
        z = zipfile.ZipFile(botslib.abspathdata(filename=ta_from.filename),
                            mode='r')
    except zipfile.BadZipfile:
        botsglobal.logger.debug(_(u'File is not a zip-file.'))
        if pass_non_zip:  #just pass the file
            botsglobal.logger.debug(
                _(u'"pass_non_zip" is True, just pass the file.'))
            ta_to = ta_from.copyta(status=endstatus, statust=OK)
            return
        raise botslib.InMessageError(_(u'File is not a zip-file.'))

    if password:
        z.setpassword(password)
    for f in z.infolist():
        if f.filename[-1] == '/':  #check if this is a dir; if so continue
            continue
        ta_to = ta_from.copyta(status=endstatus)
        tofilename = str(ta_to.idta)
        tofile = botslib.opendata(tofilename, 'wb')
        tofile.write(z.read(f.filename))
        tofile.close()
        ta_to.update(
            statust=OK,
            filename=tofilename)  #update outmessage transaction with ta_info;
        botsglobal.logger.debug(_(u'        File written: "%s".'), tofilename)
예제 #4
0
 def initfromfile(self):
     filename = botslib.abspathdata(self.ta_info['filename'])
     self.ta_info['attributemarker'] = '__'
     parser = ET.XMLParser()
     etree =  ET.ElementTree()   #ElementTree: lexes, parses, makes etree; etree is quite similar to bots-node trees but conversion is needed
     etreeroot = etree.parse(filename, parser)
     self.root = self._etree2botstree(etreeroot)  #convert etree to bots-nodes-tree
예제 #5
0
파일: transform.py 프로젝트: alexproca/bots
def handle_out_message(out_translated,ta_translated):
    if out_translated.ta_info['statust'] == DONE:    #if indicated in mappingscript the message should be discarded
        botsglobal.logger.debug(_(u'No output file because mappingscript explicitly indicated this.'))
        out_translated.ta_info['filename'] = ''
        out_translated.ta_info['status'] = DISCARD
    else:
        botsglobal.logger.debug(_(u'Start writing output file editype "%(editype)s" messagetype "%(messagetype)s".'),out_translated.ta_info)
        out_translated.writeall()   #write result of translation.
        out_translated.ta_info['filesize'] = os.path.getsize(botslib.abspathdata(out_translated.ta_info['filename']))  #get filesize
    ta_translated.update(**out_translated.ta_info)  #update outmessage transaction with ta_info; statust = OK
예제 #6
0
def write_outgoing(run):
    outputdir = botslib.join(run.outpath)
    botslib.dirshouldbethere(outputdir)
    for outgoing in run.outgoing:
        if not outgoing['error']:
            try:
                unique_filename = filename_formatter(run.outfilename,outgoing)
                tofilepath = botslib.join(outputdir,unique_filename)
                fromfilepath = botslib.abspathdata(outgoing['filename'])
                shutil.move(fromfilepath,tofilepath)
            except:
                txt = botslib.txtexc()
                outgoing.update({'error':txt})
            else:
                outgoing.update({'outfilename':tofilepath})
예제 #7
0
def read_incoming(run):
    outputdir = botslib.join(run.inpath,run.infilename)
    filelist = [filename for filename in glob.iglob(outputdir) if os.path.isfile(filename)]
    filelist.sort()
    for infilename in filelist:
        try:
            filename = transform.unique('bots_file_name')
            abs_filename = botslib.abspathdata(filename)
            shutil.copy(infilename,abs_filename)          #move if to be delted
        except:
            txt = botslib.txtexc()
        else:
            txt = ''    #no errors
        finally:
            run.incoming.append({'infilename':infilename,'filename':filename,'error':txt,'editype':run.translation['editype'],'messagetype':run.translation['messagetype']})
예제 #8
0
def write_outgoing(run):
    outputdir = botslib.join(run.outpath)
    botslib.dirshouldbethere(outputdir)
    for outgoing in run.outgoing:
        if not outgoing['error']:
            try:
                unique_filename = filename_formatter(run.outfilename, outgoing)
                tofilepath = botslib.join(outputdir, unique_filename)
                fromfilepath = botslib.abspathdata(outgoing['filename'])
                shutil.move(fromfilepath, tofilepath)
            except:
                txt = botslib.txtexc()
                outgoing.update({'error': txt})
            else:
                outgoing.update({'outfilename': tofilepath})
예제 #9
0
def handle_out_message(out_translated, ta_translated):
    if out_translated.ta_info[
            'statust'] == DONE:  #if indicated in mappingscript the message should be discarded
        botsglobal.logger.debug(
            _(u'No output file because mappingscript explicitly indicated this.'
              ))
        out_translated.ta_info['filename'] = ''
        out_translated.ta_info['status'] = DISCARD
    else:
        botsglobal.logger.debug(
            _(u'Start writing output file editype "%(editype)s" messagetype "%(messagetype)s".'
              ), out_translated.ta_info)
        out_translated.writeall()  #write result of translation.
        out_translated.ta_info['filesize'] = os.path.getsize(
            botslib.abspathdata(
                out_translated.ta_info['filename']))  #get filesize
    ta_translated.update(
        **out_translated.ta_info
    )  #update outmessage transaction with ta_info; statust = OK
예제 #10
0
def read_incoming(run):
    outputdir = botslib.join(run.inpath, run.infilename)
    filelist = [
        filename for filename in glob.iglob(outputdir)
        if os.path.isfile(filename)
    ]
    filelist.sort()
    for infilename in filelist:
        try:
            filename = transform.unique('bots_file_name')
            abs_filename = botslib.abspathdata(filename)
            shutil.copy(infilename, abs_filename)  #move if to be delted
        except:
            txt = botslib.txtexc()
        else:
            txt = ''  #no errors
        finally:
            run.incoming.append({
                'infilename': infilename,
                'filename': filename,
                'error': txt,
                'editype': run.translation['editype'],
                'messagetype': run.translation['messagetype']
            })
예제 #11
0
def extractexcel(ta_from, endstatus, **argv):
    ''' extract excel file.
        editype & messagetype are unchanged.
    '''

    #***functions used by extractexcel
    #-------------------------------------------------------------------------------
    def read_xls(infilename):
        # Read excel first sheet into a 2-d array
        book = xlrd.open_workbook(infilename)
        sheet = book.sheet_by_index(0)
        formatter = lambda (t, v): format_excelval(book, t, v, False)
        xlsdata = []
        for row in range(sheet.nrows):
            (types, values) = (sheet.row_types(row), sheet.row_values(row))
            xlsdata.append(map(formatter, zip(types, values)))
        return xlsdata

    #-------------------------------------------------------------------------------
    def dump_csv(xlsdata, tofilename):
        stream = botslib.opendata(tofilename, 'wb')
        csvout = csv.writer(stream,
                            quotechar=quotechar,
                            delimiter=field_sep,
                            doublequote=doublequote,
                            escapechar=escape)
        csvout.writerows(map(utf8ize, xlsdata))
        stream.close()

    #-------------------------------------------------------------------------------
    def format_excelval(book, type, value, wanttupledate):
        #  Clean up the incoming excel data for some data types
        returnrow = []
        if type == 2:
            if value == int(value):
                value = int(value)
        elif type == 3:
            datetuple = xlrd.xldate_as_tuple(value, book.datemode)
            value = datetuple if wanttupledate else tupledate_to_isodate(
                datetuple)
        elif type == 5:
            value = xlrd.error_text_from_code[value]
        return value

    #-------------------------------------------------------------------------------
    def tupledate_to_isodate(tupledate):
        # Turns a gregorian (year, month, day, hour, minute, nearest_second) into a
        # standard YYYY-MM-DDTHH:MM:SS ISO date.
        (y, m, d, hh, mm, ss) = tupledate
        nonzero = lambda n: n != 0
        date = "%04d-%02d-%02d" % (y, m, d) if filter(nonzero,
                                                      (y, m, d)) else ''
        time = "T%02d:%02d:%02d" % (hh, mm, ss) if filter(
            nonzero, (hh, mm, ss)) or not date else ''
        return date + time

    #-------------------------------------------------------------------------------
    def utf8ize(l):
        # Make string-like things into utf-8, leave other things alone
        return [
            unicode(s).encode(charset) if hasattr(s, 'encode') else s
            for s in l
        ]

    #***end functions used by extractexcel
    import xlrd
    import csv
    #get parameters for csv-format; defaults are as the csv defaults (in grammar.py)
    charset = argv.get('charset', "utf-8")
    quotechar = argv.get('quotechar', "'")
    field_sep = argv.get('field_sep', ':')
    escape = argv.get('escape', '')
    if escape:
        doublequote = False
    else:
        doublequote = True
    try:
        infilename = botslib.abspathdata(ta_from.filename)
        xlsdata = read_xls(infilename)
        ta_to = ta_from.copyta(status=endstatus)
        tofilename = str(ta_to.idta)
        dump_csv(xlsdata, tofilename)
        ta_to.update(
            statust=OK,
            filename=tofilename)  #update outmessage transaction with ta_info;
        botsglobal.logger.debug(_(u'        File written: "%s".'), tofilename)
    except:
        txt = botslib.txtexc()
        botsglobal.logger.error(
            _(u'Excel extraction failed, may not be an Excel file? Error:\n%s'
              ), txt)
        raise botslib.InMessageError(_(
            u'Excel extraction failed, may not be an Excel file? Error:\n$error'
        ),
                                     error=txt)
예제 #12
0
def mergemessages(startstatus, endstatus, idroute, rootidta=None):
    ''' Merges and/or envelopes one or more messages to one file;
        In db-ta: attribute 'merge' indicates message should be merged with similar messages; 'merge' is generated in translation from messagetype-grammar
        If merge is False: 1 message per envelope - no merging, else append all similar messages to one file
        Implementation as separate loops: one for merge&envelope, another for enveloping only
        db-ta status TRANSLATED---->FILEOUT
    '''
    if rootidta is None:
        rootidta = botsglobal.currentrun.get_minta4query()
    #**********for messages only to envelope (no merging)
    for row in botslib.query(
            u'''SELECT editype,messagetype,frompartner,topartner,testindicator,charset,contenttype,envelope,nrmessages,idroute,merge,idta,filename,rsrv3
                                FROM ta
                                WHERE idta>%(rootidta)s
                                AND status=%(status)s
                                AND statust=%(statust)s
                                AND merge=%(merge)s
                                AND idroute=%(idroute)s
                                ORDER BY idta
                                ''', {
                'rootidta': rootidta,
                'status': startstatus,
                'statust': OK,
                'merge': False,
                'idroute': idroute
            }):
        try:
            ta_info = dict(row)
            ta_fromfile = botslib.OldTransaction(
                ta_info['idta'])  #edi message to envelope
            ta_tofile = ta_fromfile.copyta(
                status=endstatus
            )  #edifile for enveloped message; attributes of not-enveloped message are copied...
            ta_info['filename'] = unicode(
                ta_tofile.idta)  #create filename for enveloped message
            botsglobal.logger.debug(
                u'Envelope 1 message editype: %(editype)s, messagetype: %(messagetype)s.',
                ta_info)
            envelope(ta_info, [row['filename']])
            ta_info['filesize'] = os.path.getsize(
                botslib.abspathdata(ta_info['filename']))  #get filesize
        except:
            txt = botslib.txtexc()
            ta_tofile.update(statust=ERROR, errortext=txt)
        else:
            ta_tofile.update(
                statust=OK,
                **ta_info)  #selection is used to update enveloped message;
        finally:
            ta_fromfile.update(statust=DONE)

    #**********for messages to merge & envelope
    for row in botslib.query(
            u'''SELECT editype,messagetype,frompartner,topartner,testindicator,charset,contenttype,envelope,rsrv3,sum(nrmessages) as nrmessages
                                FROM ta
                                WHERE idta>%(rootidta)s
                                AND status=%(status)s
                                AND statust=%(statust)s
                                AND merge=%(merge)s
                                AND idroute=%(idroute)s
                                GROUP BY editype,messagetype,frompartner,topartner,testindicator,charset,contenttype,envelope,rsrv3
                                ORDER BY editype,messagetype,frompartner,topartner,testindicator,charset,contenttype,envelope,rsrv3
                                ''', {
                'rootidta': rootidta,
                'status': startstatus,
                'statust': OK,
                'merge': True,
                'idroute': idroute
            }):
        try:
            ta_info = dict(row)
            ta_tofile = botslib.NewTransaction(
                status=endstatus,
                idroute=idroute)  #edifile for enveloped messages
            ta_info.update({
                'idroute': idroute,
                'merge': False,
                'filename': unicode(ta_tofile.idta)
            })  #SELECT/GROUP BY gives only values that are the grouped
            filename_list = []
            #gather individual idta and filenames
            #explicitly allow formpartner/topartner to be None/NULL
            for row2 in botslib.query(
                    u'''SELECT idta, filename
                                            FROM ta
                                            WHERE idta>%(rootidta)s
                                            AND status=%(status)s
                                            AND statust=%(statust)s
                                            AND merge=%(merge)s
                                            AND editype=%(editype)s
                                            AND messagetype=%(messagetype)s
                                            AND (frompartner=%(frompartner)s OR frompartner IS NULL)
                                            AND (topartner=%(topartner)s OR topartner IS NULL)
                                            AND testindicator=%(testindicator)s
                                            AND charset=%(charset)s
                                            ORDER BY idta
                                            ''', {
                        'rootidta': rootidta,
                        'status': startstatus,
                        'statust': OK,
                        'merge': True,
                        'editype': ta_info['editype'],
                        'messagetype': ta_info['messagetype'],
                        'frompartner': ta_info['frompartner'],
                        'topartner': ta_info['topartner'],
                        'testindicator': ta_info['testindicator'],
                        'charset': ta_info['charset'],
                        'rsrv3': ta_info['rsrv3']
                    }):
                ta_fromfile = botslib.OldTransaction(
                    row2['idta'])  #edi message to be merged/envelope
                ta_fromfile.update(
                    child=ta_tofile.idta,
                    statust=DONE)  #st child because of n->1 relation
                filename_list.append(row2['filename'])
            botsglobal.logger.debug(
                u'Merge and envelope: editype: %(editype)s, messagetype: %(messagetype)s, %(nrmessages)s messages',
                ta_info)
            envelope(ta_info, filename_list)
            ta_info['filesize'] = os.path.getsize(
                botslib.abspathdata(ta_info['filename']))  #get filesize
        except:
            txt = botslib.txtexc()
            ta_tofile.update(statust=ERROR, errortext=txt)
        else:
            ta_tofile.update(statust=OK, **ta_info)
예제 #13
0
def mergemessages(startstatus,endstatus,idroute,rootidta=None):
    ''' Merges and/or envelopes one or more messages to one file;
        In db-ta: attribute 'merge' indicates message should be merged with similar messages; 'merge' is generated in translation from messagetype-grammar
        If merge is False: 1 message per envelope - no merging, else append all similar messages to one file
        Implementation as separate loops: one for merge&envelope, another for enveloping only
        db-ta status TRANSLATED---->FILEOUT
    '''
    if rootidta is None:
        rootidta = botsglobal.currentrun.get_minta4query()
    #**********for messages only to envelope (no merging)
    for row in botslib.query(u'''SELECT editype,messagetype,frompartner,topartner,testindicator,charset,contenttype,envelope,nrmessages,idroute,merge,idta,filename,rsrv3
                                FROM ta
                                WHERE idta>%(rootidta)s
                                AND status=%(status)s
                                AND statust=%(statust)s
                                AND merge=%(merge)s
                                AND idroute=%(idroute)s
                                ORDER BY idta
                                ''',
                                {'rootidta':rootidta,'status':startstatus,'statust':OK,'merge':False,'idroute':idroute}):
        try:
            ta_info = dict(row)
            ta_fromfile = botslib.OldTransaction(ta_info['idta'])    #edi message to envelope
            ta_tofile = ta_fromfile.copyta(status=endstatus)  #edifile for enveloped message; attributes of not-enveloped message are copied...
            ta_info['filename'] = unicode(ta_tofile.idta)   #create filename for enveloped message
            botsglobal.logger.debug(u'Envelope 1 message editype: %(editype)s, messagetype: %(messagetype)s.',ta_info)
            envelope(ta_info,[row['filename']])
            ta_info['filesize'] = os.path.getsize(botslib.abspathdata(ta_info['filename']))    #get filesize
        except:
            txt = botslib.txtexc()
            ta_tofile.update(statust=ERROR,errortext=txt)
        else:
            ta_tofile.update(statust=OK,**ta_info)  #selection is used to update enveloped message;
        finally:
            ta_fromfile.update(statust=DONE)

    #**********for messages to merge & envelope
    for row in botslib.query(u'''SELECT editype,messagetype,frompartner,topartner,testindicator,charset,contenttype,envelope,rsrv3,sum(nrmessages) as nrmessages
                                FROM ta
                                WHERE idta>%(rootidta)s
                                AND status=%(status)s
                                AND statust=%(statust)s
                                AND merge=%(merge)s
                                AND idroute=%(idroute)s
                                GROUP BY editype,messagetype,frompartner,topartner,testindicator,charset,contenttype,envelope,rsrv3
                                ORDER BY editype,messagetype,frompartner,topartner,testindicator,charset,contenttype,envelope,rsrv3
                                ''',
                                {'rootidta':rootidta,'status':startstatus,'statust':OK,'merge':True,'idroute':idroute}):
        try:
            ta_info = dict(row)
            ta_tofile = botslib.NewTransaction(status=endstatus,idroute=idroute)  #edifile for enveloped messages
            ta_info.update({'idroute':idroute,'merge':False,'filename':unicode(ta_tofile.idta)})       #SELECT/GROUP BY gives only values that are the grouped
            filename_list = []
            #gather individual idta and filenames
            #explicitly allow formpartner/topartner to be None/NULL
            for row2 in botslib.query(u'''SELECT idta, filename
                                            FROM ta
                                            WHERE idta>%(rootidta)s
                                            AND status=%(status)s
                                            AND statust=%(statust)s
                                            AND merge=%(merge)s
                                            AND editype=%(editype)s
                                            AND messagetype=%(messagetype)s
                                            AND (frompartner=%(frompartner)s OR frompartner IS NULL)
                                            AND (topartner=%(topartner)s OR topartner IS NULL)
                                            AND testindicator=%(testindicator)s
                                            AND charset=%(charset)s
                                            ORDER BY idta
                                            ''',
                                            {'rootidta':rootidta,'status':startstatus,'statust':OK,'merge':True,
                                            'editype':ta_info['editype'],'messagetype':ta_info['messagetype'],'frompartner':ta_info['frompartner'],
                                            'topartner':ta_info['topartner'],'testindicator':ta_info['testindicator'],'charset':ta_info['charset'],
                                            'rsrv3':ta_info['rsrv3']}):
                ta_fromfile = botslib.OldTransaction(row2['idta'])      #edi message to be merged/envelope
                ta_fromfile.update(child=ta_tofile.idta,statust=DONE)                #st child because of n->1 relation
                filename_list.append(row2['filename'])
            botsglobal.logger.debug(u'Merge and envelope: editype: %(editype)s, messagetype: %(messagetype)s, %(nrmessages)s messages',ta_info)
            envelope(ta_info,filename_list)
            ta_info['filesize'] = os.path.getsize(botslib.abspathdata(ta_info['filename']))    #get filesize
        except:
            txt = botslib.txtexc()
            ta_tofile.update(statust=ERROR,errortext=txt)
        else:
            ta_tofile.update(statust=OK,**ta_info)
예제 #14
0
def extractpdf(ta_from,endstatus,**argv):
    ''' Try to extract text content of a PDF file to a csv.
        You know this is not a great idea, right? But we'll do the best we can anyway!
        Page and line numbers are added to each row.
        Columns and rows are based on the x and y coordinates of each text element within tolerance allowed.
        Multiple text elements may combine to make one field, some PDFs have every character separated!
        You may need to experiment with x_group and y_group values, but defaults seem ok for most files.
        Output csv is UTF-8 encoded - The csv module doesn't directly support reading and writing Unicode
        If the PDF is just an image, all bets are off. Maybe try OCR, good luck with that!
        Mike Griffin 14/12/2011
    '''
    from pdfminer.pdfinterp import PDFResourceManager, process_pdf
    from pdfminer.converter import TextConverter
    from pdfminer.layout import LAParams, LTContainer, LTText, LTTextBox
    import csv

    class CsvConverter(TextConverter):
        def __init__(self, *args, **kwargs):
            TextConverter.__init__(self, *args, **kwargs)

        def receive_layout(self, ltpage):

            # recursively get every text element and it's coordinates
            def render(item):
                if isinstance(item, LTContainer):
                    for child in item:
                        render(child)
                elif isinstance(item, LTText):
                    (unused1,unused2,x,y) = item.bbox

                    # group the y values (rows) within group tolerance
                    for v in yv:
                        if y > v-y_group and y < v+y_group:
                            y = v
                    yv.append(y)

                    line = lines[int(-y)]
                    line[x] = item.get_text().encode('utf-8')

            from collections import defaultdict
            lines = defaultdict(lambda : {})

            yv = []
            render(ltpage)

            lineid = 0
            for y in sorted(lines.keys()):
                line = lines[y]
                lineid += 1
                csvdata = [ltpage.pageid,lineid] # first 2 columns are page and line numbers

                # group the x values (fields) within group tolerance
                p = 0
                field_txt = ''
                for x in sorted(line.keys()):
                    gap = x - p
                    if p > 0 and gap > x_group:
                        csvdata.append(field_txt)
                        field_txt = ''
                    field_txt += line[x]
                    p = x
                csvdata.append(field_txt)
                csvout.writerow(csvdata)
            if lineid == 0:
                raise botslib.InMessageError(_(u'PDF text extraction failed, it may contain just image(s)?'))


    #get some optional parameters
    x_group = argv.get('x_group',10) # group text closer than this as one field
    y_group = argv.get('y_group',5)  # group lines closer than this as one line
    password = argv.get('password','')
    quotechar = argv.get('quotechar','"')
    field_sep = argv.get('field_sep',',')
    escape = argv.get('escape','\\')
    charset = argv.get('charset','utf-8')
    if not escape:
        doublequote = True
    else:
        doublequote = False

    try:
        pdf_stream = botslib.opendata(ta_from.filename, 'rb')
        ta_to = ta_from.copyta(status=endstatus)
        tofilename = unicode(ta_to.idta)
        csv_stream = botslib.opendata(tofilename,'wb')
        csvout = csv.writer(csv_stream, quotechar=quotechar, delimiter=field_sep, doublequote=doublequote, escapechar=escape)

        # Process PDF
        rsrcmgr = PDFResourceManager(caching=True)
        device = CsvConverter(rsrcmgr, csv_stream, codec=charset)
        process_pdf(rsrcmgr, device, pdf_stream, pagenos=set(), password=password, caching=True, check_extractable=True)

        device.close()
        pdf_stream.close()
        csv_stream.close()
        filesize = os.path.getsize(botslib.abspathdata(tofilename))
        ta_to.update(statust=OK,filename=tofilename,filesize=filesize) #update outmessage transaction with ta_info;
        botsglobal.logger.debug(_(u'        File written: "%(tofilename)s".'),{'tofilename':tofilename})
    except:
        txt = botslib.txtexc()
        botsglobal.logger.error(_(u'PDF extraction failed, may not be a PDF file? Error:\n%(txt)s'),{'txt':txt})
        raise botslib.InMessageError(_(u'PDF extraction failed, may not be a PDF file? Error:\n%(txt)s'),{'txt':txt})
예제 #15
0
def extractexcel(ta_from,endstatus,**argv):
    ''' extract excel file.
        editype & messagetype are unchanged.
    '''
    #***functions used by extractexcel
    #-------------------------------------------------------------------------------
    def read_xls(infilename):
        # Read excel first sheet into a 2-d array
        book       = xlrd.open_workbook(infilename)
        sheet      = book.sheet_by_index(0)
        formatter  = lambda(t,v): format_excelval(book,t,v,False)
        xlsdata = []
        for row in range(sheet.nrows):
            (types, values) = (sheet.row_types(row), sheet.row_values(row))
            xlsdata.append(map(formatter, zip(types, values)))
        return xlsdata
    #-------------------------------------------------------------------------------
    def dump_csv(xlsdata, tofilename):
        stream = botslib.opendata(tofilename, 'wb')
        csvout = csv.writer(stream, quotechar=quotechar, delimiter=field_sep, doublequote=doublequote, escapechar=escape)
        csvout.writerows( map(utf8ize, xlsdata) )
        stream.close()
    #-------------------------------------------------------------------------------
    def format_excelval(book, type, value, wanttupledate):
        #  Clean up the incoming excel data for some data types
        returnrow = []
        if   type == 2:
            if value == int(value):
                value = int(value)
        elif type == 3:
            datetuple = xlrd.xldate_as_tuple(value, book.datemode)
            value = datetuple if wanttupledate else tupledate_to_isodate(datetuple)
        elif type == 5:
            value = xlrd.error_text_from_code[value]
        return value
    #-------------------------------------------------------------------------------
    def tupledate_to_isodate(tupledate):
        # Turns a gregorian (year, month, day, hour, minute, nearest_second) into a
        # standard YYYY-MM-DDTHH:MM:SS ISO date.
        (y,m,d, hh,mm,ss) = tupledate
        nonzero = lambda n: n != 0
        date = "%04d-%02d-%02d"  % (y,m,d)    if filter(nonzero, (y,m,d))                else ''
        time = "T%02d:%02d:%02d" % (hh,mm,ss) if filter(nonzero, (hh,mm,ss)) or not date else ''
        return date+time
    #-------------------------------------------------------------------------------
    def utf8ize(l):
        # Make string-like things into utf-8, leave other things alone
        return [unicode(s).encode(charset) if hasattr(s,'encode') else s for s in l]
    #***end functions used by extractexcel
    import xlrd
    import csv
    #get parameters for csv-format; defaults are as the csv defaults (in grammar.py)
    charset = argv.get('charset',"utf-8")
    quotechar = argv.get('quotechar',"'")
    field_sep = argv.get('field_sep',':')
    escape = argv.get('escape','')
    if escape:
        doublequote = False
    else:
        doublequote = True
    try:
        infilename = botslib.abspathdata(ta_from.filename)
        xlsdata = read_xls(infilename)
        ta_to = ta_from.copyta(status=endstatus)
        tofilename = str(ta_to.idta)
        dump_csv(xlsdata,tofilename)
        ta_to.update(statust=OK,filename=tofilename) #update outmessage transaction with ta_info;
        botsglobal.logger.debug(_(u'        File written: "%s".'),tofilename)
    except:
        txt = botslib.txtexc()
        botsglobal.logger.error(_(u'Excel extraction failed, may not be an Excel file? Error:\n%s'),txt)
        raise botslib.InMessageError(_(u'Excel extraction failed, may not be an Excel file? Error:\n$error'),error=txt)
예제 #16
0
 def filelist2absolutepaths(self):
     """ utility function; some classes need absolute filenames eg for xml-including"""
     return [botslib.abspathdata(filename) for filename in self.ta_list]
예제 #17
0
 def filelist2absolutepaths(self):
     ''' utility function; some classes need absolute filenames eg for xml-including'''
     return [botslib.abspathdata(filename) for filename in self.ta_list]