Example #1
0
 def run(self):
     output_dir = self.output().path
     common.shell_cmd('mkdir -p %s', output_dir)
     for i in range(len(self.input())):
         input_dir = self.input()[i].path
         download_util.extract_and_clean(input_dir, 'ISO-8859-1//TRANSLIT',
                                         'UTF-8', 'txt')
Example #2
0
 def run(self):
   output_dir = self.output().path
   common.shell_cmd('mkdir -p %s', output_dir)
   for i in range(len(self.input())):
     input_dir = self.input()[i].path
     download_util.extract_and_clean(input_dir,
                                     'ISO-8859-1//TRANSLIT',
                                     'UTF-8',
                                     'txt')
Example #3
0
    def run(self):
        output_dir = self.output().path
        common.shell_cmd("mkdir -p %s", output_dir)
        input_dir = self.input()[0].path
        supplemental_dir = self.input()[1].path
        download_util.extract_and_clean(input_dir, "ISO-8859-1", "UTF-8", "txt")

        # One of the files needs to be remapped from one column (submission_number)
        # to two columns (pma_number and k_number) depending on the prefix.
        file_name = "registration_listing.txt"
        output_file = join(output_dir, "remapped_" + file_name)
        remap_supplemental_files(join(output_dir, file_name), join(supplemental_dir, file_name), output_file)
Example #4
0
  def run(self):
    output_dir = self.output().path
    common.shell_cmd('mkdir -p %s', output_dir)
    input_dir = self.input()[0].path
    supplemental_dir = self.input()[1].path
    download_util.extract_and_clean(input_dir, 'ISO-8859-1', 'UTF-8', 'txt')

    # One of the files needs to be remapped from one column (submission_number)
    # to two columns (pma_number and k_number) depending on the prefix.
    file_name = 'registration_listing.txt'
    output_file = join(output_dir, 'remapped_' + file_name)
    remap_supplemental_files(join(output_dir, file_name),
                             join(supplemental_dir, file_name),
                             output_file)
Example #5
0
  def run(self):
    output_dir = self.output().path
    common.shell_cmd('mkdir -p %s', output_dir)
    input_dir = self.input()[0].path
    supplemental_dir = self.input()[1].path
    download_util.extract_and_clean(input_dir, 'ISO-8859-1', 'UTF-8', 'txt')

    # One of the files needs to be remapped from one column (submission_number)
    # to two columns (pma_number and k_number) depending on the prefix.
    file_name = 'registration_listing.txt'
    output_file = join(output_dir, 'remapped_' + file_name)
    remap_supplemental_files(join(output_dir, file_name),
                             join(supplemental_dir, file_name),
                             output_file)

    # There are a handful of files with floats for keys
    # This step can be removed once it is fixed on the source system.
    for fix_file in self.problem_files:
      with open(join(output_dir, fix_file), 'r') as needs_fixing:
        lines = needs_fixing.readlines()
      with open(join(output_dir, fix_file), 'w') as gets_fixing:
        for line in lines:
          gets_fixing.write(re.sub(r'\.0', '', line))
Example #6
0
 def run(self):
   output_dir = self.output().path
   common.shell_cmd('mkdir -p %s', output_dir)
   input_dir = self.input().path
   download_util.extract_and_clean(input_dir, 'ISO-8859-1', 'UTF-8', 'txt')
Example #7
0
 def run(self):
     output_dir = self.output().path
     common.shell_cmd('mkdir -p %s', output_dir)
     input_dir = self.input().path
     download_util.extract_and_clean(input_dir, 'ISO-8859-1', 'UTF-8',
                                     'txt')