Ejemplo n.º 1
0
 def _clean_content(self, content):
     original = content
     scrubber = Scrubber()
     content = scrubber.scrub(content)
     if len(content) < len(original) * 0.01:
         content = original
     content = content.replace('!important', '')
     return content
Ejemplo n.º 2
0
df = pd.read_csv(ppeFinalFile, sep='\031')

#Initial Formatting
print 'Initial Formatting...'
df = format.initial_format(df)
#!!!! do initial scrubbing here as well

#Target Concept
print 'Extracting target concept..'
tc = TargetConcept(df)
df_target_concept = tc.target_concept(config.has_label)
print '\nTarget concept: ' + str(df_target_concept.columns)

#Null column scrubbing
print 'Scrubbing sparse features..'
scrubber = Scrubber(df)
scrubber.initial_nullscrubber_percent()
print '\nNull scrubbed features: ' + str(scrubber.scrubbed_list)

#column typing
print '\nLoad column typer keywords...'
ct = Typer(df)
master_list = ct.column_typer()
scrubber.remove(scrubber.scrubbed_list, master_list['cat_list'],
                master_list['num_list'], master_list['date_list'],
                master_list['zip_list'])
print '\nDates: ' + str(master_list['date_list'])
print '\nGeos: ' + str(master_list['zip_list'])
#Initial scrubbing
print 'Initial scrubbing...'
#scrubber.initial_scrubber_abs()
Ejemplo n.º 3
0
 def clean_content(self, content):
     scrubber = Scrubber()
     content = scrubber.scrub(content)
     content = content.replace('!important', '')
     return content
Ejemplo n.º 4
0
 def clean_content(self, content):
     scrubber = Scrubber()
     return scrubber.scrub(content)