Example #1
0
 def run(self):
     output = shellout('marctotsv -k -s "|" {input} 001 653.a > {output}',
              input=self.input().get('dump').path)
     with luigi.File(output, format=TSV).open() as handle:
         with self.output().open('w') as output:
             for row in handle.iter_tsv(cols=('id', 'terms')):
                 for subfield in row.terms.split('|'):
                     for term in subfield.split('--'):
                         term = term.strip()
                         output.write_tsv(row.id, term)
Example #2
0
 def run(self):
     """ Just run wget quietly. """
     output = shellout('wget -q "{url}" -O {output}', url=self.url)
     luigi.File(output).move(self.output().path)
Example #3
0
 def run(self):
     output = shellout("cut -f 2- {input}| sort | uniq -c | sort -nr > {output}",
                       input=self.input().path)
     luigi.File(output).move(self.output().path)
Example #4
0
 def run(self):
     url = "http://gutenberg.readingroo.ms/cache/generated/feeds/catalog.marc.bz2"
     output = shellout('wget -q "{url}" -O {output}', url=url)
     output = shellout('bunzip2 {input} -c > {output}', input=output)
     luigi.File(output).move(self.output().path)
Example #5
0
 def run(self):
     """ Just run wget quietly. """
     output = shellout('wget -q "{url}" -O {output}', url=self.url)
     luigi.File(output).move(self.output().path)