Python ScrubLnameFn Exemples

Langage de programmation: Python

Espace de nommage/Pack: transforms.df_ptransforms

Méthode/Fonction: ScrubLnameFn

Exemples au hotexamples.com: 2

Python ScrubLnameFn - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de transforms.df_ptransforms.ScrubLnameFn extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Exemple #1

0

Afficher le fichier

} yield d # Runs the main part of the pipeline. Errors will be tagged, clean politicians will continue on to BQ. pol = ( p | 'Read from CSV' >> beam.io.ReadFromText( '{0}/tmp/senate_members/*.csv'.format(os.path.expanduser('~')), skip_header_lines=1) | 'Split Values' >> beam.ParDo(SplitFn()) # | 'Isolate Attributes' >> beam.ParDo(pt.IsolateAttrFn()) | 'Scrub First Name' >> beam.ParDo(pt.ScrubFnameFn(), keep_suffix=True) | 'Fix Nicknames' >> beam.ParDo( pt.FixNicknameFn(), n_tbl=nickname_tbl, keep_nickname=True) | 'Scrub Last Name' >> beam.ParDo(pt.ScrubLnameFn()) | 'Fix Nones' >> beam.ParDo(pt.FixNoneFn()) | 'Tag Errors' >> beam.ParDo(pt.TagErrorsFn()).with_outputs('error_tag')) error_pols = pol.error_tag clean_pols = pol[None] # A new Politician will only be published if thy are not already contained in the Politicians table. If they are new, # they will be properly uploaded. If they are not new, then they will be ignored in this pipeline. # new_pol = ( # clean_pols # | 'Filter Existing Pols' >> beam.ParDo(pt.NewPolsOnlyFn(), pol_tbl=pols_tbl) # | 'Filter Pol Keys' >> beam.ParDo(pt.FilterKeysFn(), attr_lst=pol_attr_lst) # | 'Write Pol to BQ' >> beam.io.WriteToBigQuery( # table=pol_spec, # write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND,

Exemple #2

0

Afficher le fichier

Fichier : house_members_df.py Projet : cmattheson6/poliviews

'district': district, 'date': date } yield d # Runs the main part of the pipeline. Errors will be tagged, clean politicians will continue on to BQ. pol = ( p | 'Read from CSV' >> beam.io.ReadFromText(df_blob, # 'gs://{0}/tmp/house_members/*.csv'.format(os.path.expanduser('~')), skip_header_lines=1) | 'Split Values' >> beam.ParDo(SplitFn()) # | 'Isolate Attributes' >> beam.ParDo(pt.IsolateAttrFn()) | 'Scrub First Name' >> beam.ParDo(pt.ScrubFnameFn(), keep_suffix=True) | 'Fix Nicknames' >> beam.ParDo(pt.FixNicknameFn(), n_tbl=nickname_tbl, keep_nickname=True) | 'Scrub Last Name' >> beam.ParDo(pt.ScrubLnameFn(), keep_suffix=True) | 'Map States' >> beam.ParDo(StateMapFn(), tbl=state_tbl) | 'Fix Nones' >> beam.ParDo(pt.FixNoneFn()) | 'Tag Errors' >> beam.ParDo(pt.TagErrorsFn()).with_outputs('error_tag')) error_pols = pol.error_tag clean_pols = pol[None] # A new Politician will only be published if thy are not already contained in the Politicians table. If they are new, # they will be properly uploaded. If they are not new, then they will be ignored in this pipeline. new_pol = (clean_pols | 'Filter Existing Pols' >> beam.ParDo(pt.NewPolsOnlyFn(), pol_tbl=pols_tbl) | 'Filter Keys' >> beam.ParDo(pt.FilterKeysFn(), attr_lst=pol_attr_lst) | 'Write Pol to BQ' >> beam.io.WriteToBigQuery( table=pol_spec, write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND,