def process_s3_file(self, file_name, original_file_name): bucket = boto3.resource('s3').Bucket(app.config['BUCKET_NAME']) # Pull down the input file from S3. # Input file name will have been uniqued to a GUID. bucket.download_file(file_name, file_name) # Get the size of the downloaded file file_size = os.stat(file_name).ST_SIZE # Generate a unique converted file name. uniqued_name = uuid.uuid4() + '.csv' with tempfile.NamedTemporaryFile(encoding='utf-8') as output_file: with open(file_name, 'rb') as input_file: fix15.process_file( input_file, output_file, skip_headers=True, progress=lambda bytes: self.update_state( 'PROGRESS', meta={'progress': bytes / file_size})) bucket.upload_file(output_file.name, 'outputs/' + uniqued_name) self.update_state( 'SUCCESS', meta={ 'result_url': 'https://%s.s3.amazonaws.com/outputs/%s' % (app.config['BUCKET_NAME'], uniqued_name), 'download_name': os.path.splitext(original_file_name)[0] + '.fix15.csv' })
def test_process_file_column_mode(self): input_file = io.StringIO(test_file) output_file = io.StringIO(newline=None) fix15.process_file(input_file, output_file, ['id', 'accountid']) self.assertEqual(test_file_converted.strip(), output_file.getvalue().strip())
def test_process_file_index_mode(self): input_file = io.StringIO(test_file) output_file = io.StringIO(newline=None) fix15.process_file(input_file, output_file, [0, 3], skip_headers=True) self.assertEqual(test_file_converted.strip(), output_file.getvalue().strip())
def main(): parser = argparse.ArgumentParser() parser.add_argument( '-c', '--column', dest='column_names', action='append', type=str, help= 'The name of a column to be converted from a 15 to 18 character Salesforce Id' ) parser.add_argument( '-n', '--column-index', dest='column_indices', action='append', type=int, help= 'The index (starting with 0) of a column to be converted from a 15 to 18 character Salesforce Id' ) parser.add_argument('-i', '--input', dest='infile', type=argparse.FileType('r'), default=sys.stdin, help='The input file. The default is standard input.') parser.add_argument( '-o', '--output', dest='outfile', type=argparse.FileType('w'), default=sys.stdout, help='The output file. The default is standard output.') parser.add_argument( '-s', '--skip-headers', dest='skip_headers', action='store_true', default=True, help= 'Skip over a header row, if using column indices rather than names.') a = parser.parse_args() if a.column_indices is not None and len(a.column_indices) > 0: columns = a.column_indices elif a.column_names is not None and len(a.column_names) > 0: columns = a.column_names else: parser.print_usage() exit(-1) process_file(a.infile, a.outfile, columns=columns, skip_headers=a.skip_headers) return 0
def test_process_file_progress(self): input_file = io.StringIO(test_file) output_file = io.StringIO(newline=None) called_n = 0 def p(b): nonlocal called_n self.assertTrue(b > 0 and b < len(test_file)) called_n += 1 fix15.process_file(input_file, output_file, ['id', 'accountid'], progress=p) self.assertEqual(5, called_n)
def test_process_file_index_mode_no_skip_header(self): input_file = io.StringIO(test_file) output_file = io.StringIO(newline=None) with self.assertRaises(ValueError): fix15.process_file(input_file, output_file, [0, 3], skip_headers=False)