def run_lexmapr(input_file): """Run LexMapr package with specified arguments.""" # Need to convert InMemoryUploadedFile values to actual files with TemporaryDirectory() as tmp_dir: tmp_input_path = tmp_dir + "/" + str(input_file) tmp_output_path = tmp_dir + "/output.tsv" with open(tmp_input_path, "ab") as tmp_input_fp: for input_file_chunk in input_file.chunks(): tmp_input_fp.write(input_file_chunk) try: run( Namespace(input_file=tmp_input_path, config="envo_foodon_config.json", format="basic", output=tmp_output_path, version=False, bucket=True)) except Exception as e: return "Oops! Something went wrong" with open(tmp_output_path, "r") as tmp_output_fp: ret = tmp_output_fp.read() return ret
def run_lexmapr(job_id): """Execute ``PipelineJob`` object. This means running the original LexMapr pipeline using parameters set by the object, and in this function. If the execution succeeds, the object's ``complete`` field is set to ``True``. :param str job_id: ``id`` value of ``PipelineJob`` object """ job = PipelineJob.objects.get(id=job_id) try: run( Namespace(input_file=job.input_file.path, config=None, full=None, output=job.output_file.path, version=False, bucket=False, no_cache=False, profile="ifsac")) except Exception as e: job.err = True job.err_msg = str(e) job.complete = True job.save()
def run_pipeline_with_args(bucket=None): """Run pipeline with some default arguments.""" # Path to input file used in all tests small_simple_path = os.path.join(ROOT, "tests", "test_input", "small_simple.csv") pipeline.run(argparse.Namespace(input_file=small_simple_path, config=None, full=None, output=None, version=False, bucket=bucket, no_cache=False, profile=None))
def run_pipeline_with_args(input_file, config=None): """Run pipeline with some default arguments. input_file must be specified. web and root can be specified, but otherwise are ``None`` by default. """ pipeline.run( argparse.Namespace(input_file=input_file, config=config, format="basic", output=None, version=False))
def test_pipeline_with_files(self): """Compares actual pipeline.run outputs to expected outputs. For each expected output and input pair in self.test_files, we compare the contents of the actual output of pipeline.run (when given input) to the contents of the expected output. This function raises a single assertion error that lists all failed assertions. """ # This will be a multi-line string containing all expected # outputs that are not equal to their actual outputs. failures = [] # Iterate over all expected outputs for expected_output in self.test_files: # Path of expected output file expected_output_path = os.path.join( os.path.dirname(__file__), "output/" + expected_output + ".tsv") # Path of input file input = self.test_files[expected_output][0] input_path = os.path.join(os.path.dirname(__file__), "input/" + input + ".csv") # Format value format = self.test_files[expected_output][1] # Temporary file path to store actual output of input file actual_output_path = tempfile.mkstemp()[1] # Run pipeline.run using input_path and actual_output_path pipeline.run( type( "", (object, ), { "input_file": input_path, "output": actual_output_path, "format": format, "config": None })()) # Get actual_output_path contents with open(actual_output_path, "r") as actual_output_file: actual_output_contents = actual_output_file.read() # Get expected_output_path contents with open(expected_output_path, "r") as expected_output_file: expected_output_contents = expected_output_file.read() try: # Compare expected output with actual output self.assertMultiLineEqual(expected_output_contents, actual_output_contents) except AssertionError as e: print(e) failures += [expected_output] if failures: print("Failed files:") for failure in failures: print(failure) raise AssertionError
def test_pipeline_with_files(self): """Compares actual pipeline.run outputs to expected outputs. For each expected output and input pair in self.test_files, we compare the contents of the actual output of pipeline.run (when given input) to the contents of the expected output. This function raises a single assertion error that lists all failed assertions. """ # This will be a multi-line string containing all expected # outputs that are not equal to their actual outputs. failures = [] # Iterate over all expected outputs for expected_output_filename, pipeline_args in self.test_files.items(): # Path of expected output file expected_output_path = os.path.join(ROOT, "tests", "test_output", expected_output_filename + ".tsv") # File path to store actual output of input file actual_output_path = os.path.join(self.tmp_dir, "actual_output.tsv") # Run pipeline.run using input_path and actual_output_path default_args = {"full": True, "bucket": False} default_args.update(pipeline_args) pipeline.run(argparse.Namespace(input_file=default_args["input"], config=None, full=default_args["full"], output=actual_output_path, version=False, bucket=default_args["bucket"], no_cache=False, profile=None)) # Get actual_output_path contents with open(actual_output_path, "r") as actual_output_file: actual_output_contents = actual_output_file.read() # Get expected_output_path contents with open(expected_output_path, "r") as expected_output_file: expected_output_contents = expected_output_file.read() try: # Compare expected output with actual output self.assertMultiLineEqual(expected_output_contents, actual_output_contents) except AssertionError as e: print(e) failures += [expected_output_path] if failures: print("Failed files:") for failure in failures: print(failure) raise AssertionError