Esempi in Python per scrub

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: scrubber

Metodo/funzione: scrub

Esempi su hotexamples.com: 4

scrub in Python: 4 esempi trovati. Questi sono i migliori esempi reali in Python per scrubber.scrub, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

File: __init__.py Progetto: Frihet/djangomail

def scrub_html_email(text, cid_mapping={}):

    from BeautifulSoup import BeautifulSoup

    soup = BeautifulSoup(text)

    for tag in soup.findAll(True):
        attrs = dict(tag.attrs)
        if 'src' in attrs:
            src = attrs['src']
            if src[:4]=='cid:':
                tag['src'] = cid_mapping[src[4:]]

    mapped = soup.renderContents()

    scrubber = tuit.scrubber.Scrubber(autolink=False)

    # The scrubber removes complete html documents out of the box? Weird...
    scrubber.disallowed_tags_save_content.add('html')
    scrubber.disallowed_tags_save_content.add('body')
    scrubber.disallowed_tags_save_content.add('xml')
    scrubber.disallowed_tags_save_content.add('doctype')
    scrubber.allowed_attributes.add('color')
    scrubbed = scrubber.scrub(mapped)
    
    return scrubbed

Esempio n. 2

Mostra file

def getOFX(account, interval):

    sitename = account[0]
    _acct_num = account[1]  #account value defined in sites.dat
    acct_type = account[2]
    user = account[3]
    password = account[4]
    acct_num = _acct_num.split(':')[
        0]  #bank account# (stripped of :xxx version)

    #get site and other user-defined data
    site = userdat.sites[sitename]

    #set the interval (days)
    minInterval = FieldVal(
        site, 'mininterval'
    )  #minimum interval (days) defined for this site (optional)
    if minInterval:
        interval = max(minInterval, interval)  #use the longer of the two

    #set the start date/time
    dtstart = time.strftime("%Y%m%d",
                            time.localtime(time.time() - interval * 86400))
    dtnow = time.strftime("%Y%m%d%H%M%S", time.localtime())

    client = OFXClient(site, user, password)
    print sitename, ':', acct_num, ": Getting records since: ", dtstart

    status = True
    #we'll place ofx data transfers in xfrdir (defined in control2.py).
    #check to see if we have this directory.  if not, create it
    if not os.path.exists(xfrdir):
        try:
            os.mkdir(xfrdir)
        except:
            print '** Error.  Could not create', xfrdir
            system.exit()

    #remove illegal WinFile characters from the file name (in case someone included them in the sitename)
    #Also, the os.system() call doesn't allow the '&' char, so we'll replace it too
    sitename = ''.join(a for a in sitename
                       if a not in ' &\/:*?"<>|()')  #first char is a space

    ofxFileSuffix = str(random.randrange(1e5, 1e6)) + ".ofx"
    ofxFileName = xfrdir + sitename + dtnow + ofxFileSuffix

    try:
        if acct_num == '':
            query = client.acctQuery(
                "19700101000000"
            )  #19700101000000 is just a default DTSTART date/time string
        else:
            caps = FieldVal(site, "CAPS")
            if "CCSTMT" in caps:
                query = client.ccQuery(acct_num, dtstart)
            elif "INVSTMT" in caps:
                #if we have a brokerid, use it.  Otherwise, try the fiorg value.
                orgID = FieldVal(site, 'BROKERID')
                if orgID == '': orgID = FieldVal(site, 'FIORG')
                if orgID == '':
                    msg = '** Error: Site', sitename, 'does not have a (REQUIRED) BrokerID or FIORG value defined.'
                    raise Exception(msg)
                query = client.invstQuery(orgID, acct_num, dtstart)

            elif "BASTMT" in caps:
                bankid = FieldVal(site, "BANKID")
                if bankid == '':
                    msg = '** Error: Site', sitename, 'does not have a (REQUIRED) BANKID value defined.'
                    raise Exception(msg)
                query = client.baQuery(bankid, acct_num, dtstart, acct_type)

        SendRequest = True
        if Debug:
            print query
            print
            ask = raw_input(
                'DEBUG:  Send request to bank server (y/n)?').upper()
            if ask == 'N': return False, ''

        #do the deed
        client.doQuery(query, ofxFileName)
        if not client.status: return False, ''

        #check the ofx file and make sure it looks valid (contains header and <ofx>...</ofx> blocks)
        if glob.glob(ofxFileName) == []:
            status = False  #no ofx file?
        else:
            f = open(ofxFileName, 'r')
            content = f.read().upper()
            f.close

            if acct_num <> _acct_num:
                #replace bank account number w/ value defined in sites.dat
                content = content.replace('<ACCTID>' + acct_num,
                                          '<ACCTID>' + _acct_num)
                f = open(ofxFileName, 'w')
                f.write(content)
                f.close()

            content = ''.join(a for a in content
                              if a not in '\r\n ')  #strip newlines & spaces

            if content.find('OFXHEADER:') < 0 and content.find(
                    '<OFX>') < 0 and content.find('</OFX>') < 0:
                #throw exception and exit
                raise Exception("Invalid OFX statement.")

            #look for <SEVERITY>ERROR code... rlc*2013
            if content.find('<SEVERITY>ERROR') > 0:
                #throw exception and exit
                raise Exception("OFX message contains ERROR condition")

            #attempted debug of a Vanguard issue... rlc*2010
            #if content.find('<INVPOSLIST>') > -1 and content.find('<SECLIST>') < 0:    #DEBUG: rlc*5/2011
            if content.find('<INVPOS>') > -1 and content.find('<SECLIST>') < 0:
                #An investment statement must contain a <SECLIST> section when a <INVPOSLIST> section exists
                #Some Vanguard statements have been missing this when there are no transactions, causing Money to crash
                #It may be necessary to match every investment position with a security entry, but we'll try to just
                #verify the existence of these section pairs. rlc*9/2010
                raise Exception(
                    "OFX statement is missing required <SECLIST> section.")

            #cleanup the file if needed
            scrubber.scrub(ofxFileName, site)

    except Exception as inst:
        status = False
        print inst
        if glob.glob(ofxFileName) <> []:
            print '**  Review', ofxFileName, 'for possible clues...'
        if Debug:
            traceback.print_exc()

    return status, ofxFileName

Esempio n. 3

Mostra file

File: ofx.py Progetto: NolanT/ynab-qfx

def getOFX(account, interval):

    sitename   = account[0]
    _acct_num  = account[1]             #account value defined in sites.dat
    acct_type  = account[2]
    user       = account[3]
    password   = account[4]
    acct_num = _acct_num.split(':')[0]  #bank account# (stripped of :xxx version)
    
    #get site and other user-defined data
    site = userdat.sites[sitename]
    
    #set the interval (days)
    minInterval = FieldVal(site,'mininterval')    #minimum interval (days) defined for this site (optional)
    if minInterval:
         interval = max(minInterval, interval)    #use the longer of the two
    
    #set the start date/time
    dtstart = time.strftime("%Y%m%d",time.localtime(time.time()-interval*86400))
    dtnow = time.strftime("%Y%m%d%H%M%S",time.localtime())
  
    client = OFXClient(site, user, password)
    print sitename,':',acct_num,": Getting records since: ",dtstart
    
    status = True
    #we'll place ofx data transfers in xfrdir (defined in control2.py).  
    #check to see if we have this directory.  if not, create it
    if not os.path.exists(xfrdir):
        try:
            os.mkdir(xfrdir)
        except:
            print '** Error.  Could not create', xfrdir
            system.exit()
    
    #remove illegal WinFile characters from the file name (in case someone included them in the sitename)
    #Also, the os.system() call doesn't allow the '&' char, so we'll replace it too
    sitename = ''.join(a for a in sitename if a not in ' &\/:*?"<>|()')  #first char is a space
    
    ofxFileSuffix = str(random.randrange(1e5,1e6)) + ".ofx"
    ofxFileName = xfrdir + sitename + dtnow + ofxFileSuffix
    
    try:
        if acct_num == '':
            query = client.acctQuery("19700101000000")       #19700101000000 is just a default DTSTART date/time string
        else:
            caps = FieldVal(site, "CAPS")
            if "CCSTMT" in caps:
                query = client.ccQuery(acct_num, dtstart)
            elif "INVSTMT" in caps:
                #if we have a brokerid, use it.  Otherwise, try the fiorg value.
                orgID = FieldVal(site, 'BROKERID')
                if orgID == '': orgID = FieldVal(site, 'FIORG')
                if orgID == '':
                    msg = '** Error: Site', sitename, 'does not have a (REQUIRED) BrokerID or FIORG value defined.'
                    raise Exception(msg)
                query = client.invstQuery(orgID, acct_num, dtstart)

            elif "BASTMT" in caps:
                bankid = FieldVal(site, "BANKID")
                if bankid == '':
                    msg='** Error: Site', sitename, 'does not have a (REQUIRED) BANKID value defined.'
                    raise Exception(msg)
                query = client.baQuery(bankid, acct_num, dtstart, acct_type)

        SendRequest = True
        if Debug: 
            print query
            print
            ask = raw_input('DEBUG:  Send request to bank server (y/n)?').upper()
            if ask=='N': return False, ''
        
        #do the deed
        client.doQuery(query, ofxFileName)
        if not client.status: return False, ''
        
        #check the ofx file and make sure it looks valid (contains header and <ofx>...</ofx> blocks)
        if glob.glob(ofxFileName) == []:
            status = False  #no ofx file?
        else: 
            f = open(ofxFileName,'r')
            content = f.read().upper()
            f.close

            if acct_num <> _acct_num:
                #replace bank account number w/ value defined in sites.dat
                content = content.replace('<ACCTID>'+acct_num, '<ACCTID>'+ _acct_num)
                f = open(ofxFileName,'w')
                f.write(content)
                f.close()
                
            content = ''.join(a for a in content if a not in '\r\n ')  #strip newlines & spaces
           
            if content.find('OFXHEADER:') < 0 and content.find('<OFX>') < 0 and content.find('</OFX>') < 0:
                #throw exception and exit
                raise Exception("Invalid OFX statement.")
                
            #look for <SEVERITY>ERROR code... rlc*2013
            if content.find('<SEVERITY>ERROR') > 0:
                #throw exception and exit
                raise Exception("OFX message contains ERROR condition")

            #attempted debug of a Vanguard issue... rlc*2010
            #if content.find('<INVPOSLIST>') > -1 and content.find('<SECLIST>') < 0:    #DEBUG: rlc*5/2011
            if content.find('<INVPOS>') > -1 and content.find('<SECLIST>') < 0:
                #An investment statement must contain a <SECLIST> section when a <INVPOSLIST> section exists
                #Some Vanguard statements have been missing this when there are no transactions, causing Money to crash
                #It may be necessary to match every investment position with a security entry, but we'll try to just
                #verify the existence of these section pairs. rlc*9/2010
                raise Exception("OFX statement is missing required <SECLIST> section.")
                
            #cleanup the file if needed
            scrubber.scrub(ofxFileName, site)
        
    except Exception as inst:
        status = False
        print inst
        if glob.glob(ofxFileName) <> []:
           print '**  Review', ofxFileName, 'for possible clues...'
        if Debug:
            traceback.print_exc()
        
    return status, ofxFileName

Esempio n. 4

Mostra file

                print('Searching %s for statements to import' % importdir)
                for f in glob.glob(importdir + '*.*'):
                    fname = os.path.basename(f)  #full base filename.extension
                    bname = os.path.splitext(fname)[0]  #basename w/o extension
                    bext = os.path.splitext(fname)[1]  #file extension
                    with open(f) as ifile:
                        dat = ifile.read()

                    #only import if it looks like an ofx file
                    if validOFX(dat) == '':
                        print("Importing %s" % fname)
                        if 'NEWFILEUID:PSIMPORT' not in dat[:200]:
                            #only scrub if it hasn't already been imported (and hence, scrubbed)
                            site = getSite(dat)
                            scrubber.scrub(f, site)

                        #set NEWFILEUID:PSIMPORT to flag the file as having already been imported/scrubbed
                        #don't want to accidentally scrub twice
                        with open(f) as ifile:
                            ofx = ifile.read()
                        p = re.compile(r'NEWFILEUID:.*')
                        ofx2 = p.sub('NEWFILEUID:PSIMPORT', ofx)
                        if ofx2:
                            with open(f, 'w') as ofile:
                                ofile.write(ofx2)
                        #preserve origina file type but save w/ ofx extension
                        outname = xfrdir + fname + ('' if bext == '.ofx' else
                                                    '.ofx')
                        os.rename(f, outname)
                        ofxList.append(['import file', '', outname])