def run(self): output_dir = self.output().path common.shell_cmd('mkdir -p %s', output_dir) for i in range(len(self.input())): input_dir = self.input()[i].path download_util.extract_and_clean(input_dir, 'ISO-8859-1//TRANSLIT', 'UTF-8', 'txt')
def run(self): output_dir = self.output().path common.shell_cmd("mkdir -p %s", output_dir) input_dir = self.input()[0].path supplemental_dir = self.input()[1].path download_util.extract_and_clean(input_dir, "ISO-8859-1", "UTF-8", "txt") # One of the files needs to be remapped from one column (submission_number) # to two columns (pma_number and k_number) depending on the prefix. file_name = "registration_listing.txt" output_file = join(output_dir, "remapped_" + file_name) remap_supplemental_files(join(output_dir, file_name), join(supplemental_dir, file_name), output_file)
def run(self): output_dir = self.output().path common.shell_cmd('mkdir -p %s', output_dir) input_dir = self.input()[0].path supplemental_dir = self.input()[1].path download_util.extract_and_clean(input_dir, 'ISO-8859-1', 'UTF-8', 'txt') # One of the files needs to be remapped from one column (submission_number) # to two columns (pma_number and k_number) depending on the prefix. file_name = 'registration_listing.txt' output_file = join(output_dir, 'remapped_' + file_name) remap_supplemental_files(join(output_dir, file_name), join(supplemental_dir, file_name), output_file)
def run(self): output_dir = self.output().path common.shell_cmd('mkdir -p %s', output_dir) input_dir = self.input()[0].path supplemental_dir = self.input()[1].path download_util.extract_and_clean(input_dir, 'ISO-8859-1', 'UTF-8', 'txt') # One of the files needs to be remapped from one column (submission_number) # to two columns (pma_number and k_number) depending on the prefix. file_name = 'registration_listing.txt' output_file = join(output_dir, 'remapped_' + file_name) remap_supplemental_files(join(output_dir, file_name), join(supplemental_dir, file_name), output_file) # There are a handful of files with floats for keys # This step can be removed once it is fixed on the source system. for fix_file in self.problem_files: with open(join(output_dir, fix_file), 'r') as needs_fixing: lines = needs_fixing.readlines() with open(join(output_dir, fix_file), 'w') as gets_fixing: for line in lines: gets_fixing.write(re.sub(r'\.0', '', line))
def run(self): output_dir = self.output().path common.shell_cmd('mkdir -p %s', output_dir) input_dir = self.input().path download_util.extract_and_clean(input_dir, 'ISO-8859-1', 'UTF-8', 'txt')