def test_validate_errors_with_no_files(capsys): with pytest.raises(SystemExit) as pytest_e: sys.argv = [""] main() out, err = capsys.readouterr() assert "No files given to process" in err assert pytest_e.type, pytest_e.value.code == [SystemExit, 1]
def main(): args = parse_args() source = args.input results = [] for root, dirname, filenames in os.walk(source): error_counter = 0 for files in filenames: if files.endswith('_manifest.md5'): if os.path.basename(root) != 'logs': manifest = os.path.join(root, files) print(manifest) if os.path.isfile(manifest): if args.y: error_counter = validate.main([manifest, '-y']) else: error_counter = validate.main([manifest]) if error_counter == 0: results.append([root, 'success']) else: results.append([root, 'failure']) for result in results: print(result) else: continue for result in results: print(result)
def train_translation_model(data_dir, arch, extra_flags=None, task='translation'): train_parser = options.get_training_parser() train_args = options.parse_args_and_arch( train_parser, [ '--task', task, data_dir, '--save-dir', data_dir, '--arch', arch, '--lr', '0.05', '--max-tokens', '500', '--max-epoch', '1', '--no-progress-bar', '--distributed-world-size', '1', '--source-lang', 'in', '--target-lang', 'out', ] + (extra_flags or []), ) train.main(train_args) # test validation validate_parser = options.get_validation_parser() validate_args = options.parse_args_and_arch( validate_parser, [ '--task', task, data_dir, '--path', os.path.join(data_dir, 'checkpoint_last.pt'), '--valid-subset', 'valid', '--max-tokens', '500', '--no-progress-bar', ] ) validate.main(validate_args)
def test_validate_fails_invalid_json(prep_data, capsys): with pytest.raises(SystemExit) as pytest_e: sys.argv = ["", os.path.join(prep_data["dir"], "invalid_json.log")] main() out, err = capsys.readouterr() assert "invalid_json.log: invalid" in err assert not os.path.isfile( os.path.join(prep_data["dir"], "invalid.log.validated")) assert pytest_e.type, pytest_e.value.code == [SystemExit, 0]
def train_language_model(data_dir, arch, extra_flags=None, run_validation=False): train_parser = options.get_training_parser() train_args = options.parse_args_and_arch( train_parser, [ "--task", "language_modeling", data_dir, "--arch", arch, "--optimizer", "adam", "--lr", "0.0001", "--criterion", "adaptive_loss", "--adaptive-softmax-cutoff", "5,10,15", "--max-tokens", "500", "--tokens-per-sample", "500", "--save-dir", data_dir, "--max-epoch", "1", "--no-progress-bar", "--distributed-world-size", "1", "--ddp-backend", "no_c10d", ] + (extra_flags or []), ) train.main(train_args) if run_validation: # test validation validate_parser = options.get_validation_parser() validate_args = options.parse_args_and_arch( validate_parser, [ "--task", "language_modeling", data_dir, "--path", os.path.join(data_dir, "checkpoint_last.pt"), "--valid-subset", "valid", "--max-tokens", "500", "--no-progress-bar", ], ) validate.main(validate_args)
def executeMainAndCaptureOutput(dummyStdinFileName): #https://stackoverflow.com/questions/6271947/how-can-i-simulate-input-to-stdin-for-pyunit sys.stdin = open(dummyStdinFileName,'r') #https://eli.thegreenplace.net/2015/redirecting-all-kinds-of-stdout-in-python/ f = io.StringIO() with redirect_stdout(f): validate.main() sys.stdin.close() return f.getvalue()
def test_validate_will_not_overwrite(prep_data, capsys): shutil.copy( os.path.join(prep_data["dir"], "valid.log"), os.path.join(prep_data["dir"], "valid.log.validated"), ) with pytest.raises(SystemExit) as pytest_e: sys.argv = ["", os.path.join(prep_data["dir"], "valid.log")] main() out, err = capsys.readouterr() assert "valid.log: valid" not in err assert pytest_e.type, pytest_e.value.code == [SystemExit, 0]
def train_language_model(data_dir, arch, extra_flags=None, run_validation=False): train_parser = options.get_training_parser() train_args = options.parse_args_and_arch( train_parser, [ '--task', 'language_modeling', data_dir, '--arch', arch, '--optimizer', 'adam', '--lr', '0.0001', '--criterion', 'adaptive_loss', '--adaptive-softmax-cutoff', '5,10,15', '--max-tokens', '500', '--tokens-per-sample', '500', '--save-dir', data_dir, '--max-epoch', '1', '--no-progress-bar', '--distributed-world-size', '1', '--ddp-backend', 'no_c10d', ] + (extra_flags or []), ) train.main(train_args) if run_validation: # test validation validate_parser = options.get_validation_parser() validate_args = options.parse_args_and_arch(validate_parser, [ '--task', 'language_modeling', data_dir, '--path', os.path.join(data_dir, 'checkpoint_last.pt'), '--valid-subset', 'valid', '--max-tokens', '500', '--no-progress-bar', ]) validate.main(validate_args)
def test_validate_respects_dry_run(prep_data, capsys): with pytest.raises(SystemExit) as pytest_e: sys.argv = [ "", os.path.join(prep_data["dir"], "valid.log"), "--dry-run" ] main() out, err = capsys.readouterr() assert "valid.log: valid" in err assert not os.path.isfile( os.path.join(prep_data["dir"], "valid.log.validated")) assert os.path.isfile(os.path.join(prep_data["dir"], "valid.log")) assert pytest_e.type, pytest_e.value.code == [SystemExit, 0]
def train_translation_model(data_dir, arch, extra_flags=None, task="translation", run_validation=False): train_parser = options.get_training_parser() train_args = options.parse_args_and_arch( train_parser, [ "--task", task, data_dir, "--save-dir", data_dir, "--arch", arch, "--lr", "0.05", "--max-tokens", "500", "--max-epoch", "1", "--no-progress-bar", "--distributed-world-size", "1", "--source-lang", "in", "--target-lang", "out", ] + (extra_flags or []), ) train.main(train_args) if run_validation: # test validation validate_parser = options.get_validation_parser() validate_args = options.parse_args_and_arch( validate_parser, [ "--task", task, data_dir, "--path", os.path.join(data_dir, "checkpoint_last.pt"), "--valid-subset", "valid", "--max-tokens", "500", "--no-progress-bar", ], ) validate.main(validate_args)
def test_validate_fails_duplicate_id(prep_data, capsys): with pytest.raises(SystemExit) as pytest_e: sys.argv = [ "", os.path.join(prep_data["dir"], "duplicate.log"), "--verbose" ] main() out, err = capsys.readouterr() assert "Duplicate" in err assert "duplicate.log: invalid" in err assert not os.path.isfile( os.path.join(prep_data["dir"], "duplicate.log.validated")) assert pytest_e.type, pytest_e.value.code == [SystemExit, 0]
def test_validate_handles_success_and_failure(prep_data, capsys): with pytest.raises(SystemExit) as pytest_e: sys.argv = [ "", os.path.join(prep_data["dir"], "valid.log"), os.path.join(prep_data["dir"], "duplicate.log"), os.path.join(prep_data["dir"], "invalid_json.log"), ] main() out, err = capsys.readouterr() assert ": invalid" in err assert ": valid" in err assert pytest_e.type, pytest_e.value.code == [SystemExit, 0]
def test_upload_xml_no_iiif_deposit(self): "api returns valid article-json with valid values for image widths and heights when iiif returns a 404" xml_fname = 'elife-24271-v1.xml' xml_fixture = join(self.fixtures_dir, xml_fname) expected_lax_resp = { 'status': conf.VALIDATED, 'override': {}, 'ajson': base.load_ajson(xml_fixture + '.json')['article'], 'message': None # this should trigger an error when logged naively by api.py but doesn't... } # don't call lax with patch('adaptor.call_lax', return_value=expected_lax_resp): # also, don't call iiif no_iiif_info = {} with patch('iiif.iiif_info', return_value=no_iiif_info): resp = self.client.post( '/xml', **{ 'buffered': True, 'content_type': 'multipart/form-data', 'data': { 'xml': (open(xml_fixture, 'rb'), xml_fname), } }) # ensure ajson validated expected_ajson_path = join(self.temp_dir, xml_fname) + '.json' success, _ = validate.main(open(expected_ajson_path, 'rb')) self.assertTrue(success) self.assertEqual(resp.status_code, 200)
def test_upload_valid_xml(self): "the response we expect when everything happens perfectly" xml_fname = 'elife-16695-v1.xml' xml_fixture = join(self.fixtures_dir, xml_fname) expected_lax_resp = { 'status': conf.VALIDATED, 'override': {}, 'ajson': base.load_ajson(xml_fixture + '.json')['article'], 'message': None # this should trigger an error when logged naively by api.py but doesn't... } with patch('adaptor.call_lax', return_value=expected_lax_resp): resp = self.client.post( '/xml', **{ 'buffered': True, 'content_type': 'multipart/form-data', 'data': { 'xml': (open(xml_fixture, 'rb'), xml_fname), } }) # ensure xml uploaded expected_xml_path = join(self.temp_dir, xml_fname) self.assertTrue(os.path.exists(expected_xml_path), "uploaded xml cannot be found") # ensure ajson scraped expected_ajson_path = join(self.temp_dir, xml_fname) + '.json' self.assertTrue(os.path.exists(expected_ajson_path), "scraped ajson not found") # ensure scraped ajson is identical to what we're expecting expected_ajson = base.load_ajson( join(self.fixtures_dir, 'elife-16695-v1.xml.json')) actual_ajson = base.load_ajson(expected_ajson_path) self.assertEqual(actual_ajson, expected_ajson) # ensure ajson validated success, _ = validate.main(open(expected_ajson_path, 'rb')) self.assertTrue(success) # ensure ajson is successfully sent to lax self.assertEqual(resp.status_code, 200) del resp.json['ajson'][ '-meta'] # remove the -meta key from the response. self.assertEqual(resp.json, expected_lax_resp)
def job(path): strbuffer = StringIO() try: fname = os.path.basename(path) dirname = os.path.dirname(path) strbuffer.write("%s => " % fname) doc = open(path, 'r') valid, article_with_placeholders = validate.main(doc, quiet=True) if valid: strbuffer.write("success") os.symlink(path, join(dirname, VALIDDIR, fname)) else: strbuffer.write("failed") os.symlink(path, join(dirname, INVALIDDIR, fname)) except BaseException as err: strbuffer.write("error (%s)" % err) finally: log = conf.multiprocess_log('validation.log', __name__) log.info(strbuffer.getvalue())
def main(): usage = "Usage: vcfPytools.py [tool] [options]\n\n" + \ "Available tools:\n" + \ " annotate:\n\tAnnotate the vcf file with membership in other vcf files.\n" + \ " extract:\n\tExtract vcf records from a region.\n" + \ " filter:\n\tFilter the vcf file.\n" + \ " indel:\n\tIndel manipulation tools.\n" + \ " intersect:\n\tGenerate the intersection of two vcf files.\n" + \ " merge:\n\tMerge a list of vcf files.\n" + \ " multi:\n\tFind the intersections and unique fractions of multiple vcf files.\n" + \ " sort:\n\tSort a vcf file.\n" + \ " stats:\n\tGenerate statistics from a vcf file.\n" + \ " union:\n\tGenerate the union of two vcf files.\n" + \ " unique:\n\tGenerate the unique fraction from two vcf files.\n" + \ " validate:\n\tValidate the input vcf file.\n\n" + \ "vcfPytools.py [tool] --help for information on a specific tool." # Determine the requested tool. if len(sys.argv) > 1: tool = sys.argv[1] else: print >> sys.stderr, usage exit(1) if tool == "annotate": import annotate success = annotate.main() elif tool == "extract": import extract success = extract.main() elif tool == "filter": import filter success = filter.main() elif tool == "intersect": import intersect success = intersect.main() elif tool == "indel": import indel success = indel.main() elif tool == "multi": import multi success = multi.main() elif tool == "merge": import merge success = merge.main() elif tool == "sort": import sort success = sort.main() elif tool == "stats": import stats success = stats.main() elif tool == "union": import union success = union.main() elif tool == "unique": import unique success = unique.main() elif tool == "test": import test success = test.main() elif tool == "validate": import validate success = validate.main() elif tool == "--help" or tool == "-h" or tool == "?": print >> sys.stderr, usage else: print >> sys.stderr, "Unknown tool: ",tool print >> sys.stderr, "\n", usage exit(1) # If program completed properly, terminate. if success == 0: exit(0)
def post_xml(): "upload jats xml, generate xml, validate, send to lax as a dry run" http_ensure('xml' in request.files, "xml file required", 400) try: override = scraper.deserialize_overrides( request.form.getlist('override')) except ValueError: sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.BAD_OVERRIDES, 'message': 'an error occurred attempting to parse your given overrides.', 'trace': sio.getvalue() }, 400 # upload try: xml = request.files['xml'] filename = os.path.basename(xml.filename) http_ensure( os.path.splitext(filename)[1] == '.xml', "file doesn't look like xml") path = join(upload_folder(), filename) xml.save(path) except Exception: sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.BAD_UPLOAD, 'message': 'an error occured uploading the article xml to be processed', 'trace': sio.getvalue(), }, 400 # shouldn't this be a 500? everything is always the client's fault. # generate try: article_json = scraper.main(path, { 'override': override, 'fill-missing-image-dimensions': True }) json_filename = filename + '.json' json_path = join(upload_folder(), json_filename) open(json_path, 'w').write(article_json) except Exception as err: sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.BAD_SCRAPE, 'message': str(err), 'trace': sio.getvalue() }, 400 # validate try: conf.API_PRE_VALIDATE and ajson_validate.main(open(json_path, 'r')) except jsonschema.ValidationError as err: return { 'status': conf.INVALID, 'code': conf.ERROR_INVALID, 'message': 'the generated article-json failed validation, see trace for details.', 'trace': str(err), # todo: any good? }, 400 except Exception: sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.ERROR_VALIDATING, 'message': 'an error occurred attempting to validate the generated article-json', 'trace': sio.getvalue() }, 400 # TODO: shouldn't this be a 500? # send to lax try: #msid, version = utils.version_from_path(filename) msid = request.args['id'] version = request.args['version'] token = str(uuid.uuid4()) args = { # the *most* important parameter. don't modify lax. 'dry_run': True, # a forced ingest by default 'action': conf.INGEST, 'force': True, # article details 'msid': msid, 'version': int(version), 'article_json': article_json, 'token': token, } lax_resp = adaptor.call_lax(**args) context = utils.renkeys(lax_resp, [("message", "lax-message")]) LOG.info("lax response", extra=context) api_resp = utils.subdict(lax_resp, ['status', 'code', 'message', 'trace']) if api_resp['status'] in [conf.INVALID, conf.ERROR]: # failure return api_resp, 400 # success # 'code', 'message' and 'trace' are not returned by lax on success, just 'status' api_resp['ajson'] = json.loads(article_json)['article'] api_resp['override'] = override return api_resp, 200 except Exception: # lax returned something indecipherable sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.ERROR_COMMUNICATING, 'message': "lax responded with something that couldn't be decoded", 'trace': sio.getvalue(), }, 400 # TODO: shouldn't this be a 500?
def main(): """Run the validation & generation for all vector issuer icons.""" validate.main() search_data = {} manifest_data = {} output_count = 0 search_denylist = [ 'text', 'monochrome', 'filled', 'circle', 'rounded', 'square', 'icon', 'symbol', 'alt', 'alternative', 'main', 'second', 'secondary', 'light', 'dark', 'white', 'black' ] try: vectors = glob.glob('./vectors/*/*.svg') except: vectors = [] # Generate a PNG for every SVG for vector in vectors: print('Processing {}...'.format(vector)) domain = vector[10:].split('/')[0] filename = vector[10:].split('/')[1][0:-4] output_directory = './dist/{}'.format(domain) output_file = '{}/{}.png'.format(output_directory, filename) safe_output_directory = re.sub(r'[^a-z0-9-\./]+', '', output_directory) safe_output_file = re.sub(r'[^a-z0-9-\./]+', '', output_file) safe_input_file = re.sub(r'[^a-z0-9-\./]+', '', vector) subprocess.check_call(['mkdir', '-p', safe_output_directory]) subprocess.check_call([ 'cairosvg', safe_input_file, '-f', 'png', '-W', '200', '-H', '200', '-o', safe_output_file ]) output_count = output_count + 1 # MANIFEST if domain not in manifest_data.keys(): manifest_data[domain] = { 'domain': domain, 'additional_search_terms': [], 'icons': [] } manifest_data[domain]['icons'].append('{}/{}.png'.format( domain, filename)) # SEARCH for issuer_part in filename.split('-'): if issuer_part in search_denylist: continue if len(issuer_part) <= 1: continue if issuer_part not in search_data.keys(): search_data[issuer_part] = [ '{}/{}.png'.format(domain, filename) ] elif '{}/{}.png'.format(domain, filename) not in search_data[issuer_part]: search_data[issuer_part].append('{}/{}.png'.format( domain, filename)) with open('dist/manifest.json', 'w') as manifest_file: json.dump(manifest_data, manifest_file) print('Manifest JSON generation done!') with open('dist/search.json', 'w') as search_file: json.dump(search_data, search_file) print('Generation finished, everything looks good! Generated {} icons.'. format(output_count))
#!/usr/bin/python import calibrate import validate import validate_for_plot import shutoff from time import sleep if __name__=='__main__': validate.main() validate_for_plot.main()
def main(): usage = "Usage: vcfPytools.py [tool] [options]\n\n" + \ "Available tools:\n" + \ " annotate:\n\tAnnotate the vcf file with membership in other vcf files.\n" + \ " extract:\n\tExtract vcf records from a region.\n" + \ " filter:\n\tFilter the vcf file.\n" + \ " indel:\n\tIndel manipulation tools.\n" + \ " intersect:\n\tGenerate the intersection of two vcf files.\n" + \ " merge:\n\tMerge a list of vcf files.\n" + \ " multi:\n\tFind the intersections and unique fractions of multiple vcf files.\n" + \ " sort:\n\tSort a vcf file.\n" + \ " stats:\n\tGenerate statistics from a vcf file.\n" + \ " union:\n\tGenerate the union of two vcf files.\n" + \ " unique:\n\tGenerate the unique fraction from two vcf files.\n" + \ " validate:\n\tValidate the input vcf file.\n\n" + \ "vcfPytools.py [tool] --help for information on a specific tool." # Determine the requested tool. if len(sys.argv) > 1: tool = sys.argv[1] else: print >> sys.stderr, usage exit(1) if tool == "annotate": import annotate success = annotate.main() elif tool == "extract": import extract success = extract.main() elif tool == "filter": import filter success = filter.main() elif tool == "intersect": import intersect success = intersect.main() elif tool == "indel": import indel success = indel.main() elif tool == "multi": import multi success = multi.main() elif tool == "merge": import merge success = merge.main() elif tool == "sort": import sort success = sort.main() elif tool == "stats": import stats success = stats.main() elif tool == "union": import union success = union.main() elif tool == "unique": import unique success = unique.main() elif tool == "test": import test success = test.main() elif tool == "validate": import validate success = validate.main() elif tool == "--help" or tool == "-h" or tool == "?": print >> sys.stderr, usage else: print >> sys.stderr, "Unknown tool: ", tool print >> sys.stderr, "\n", usage exit(1) # If program completed properly, terminate. if success == 0: exit(0)
def post_xml(): "upload jats xml, generate xml, validate, send to lax as a dry run" http_ensure('xml' in request.files, "xml file required", 400) try: override = scraper.deserialize_overrides(request.form.getlist('override')) except ValueError: sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.BAD_OVERRIDES, 'message': 'an error occurred attempting to parse your given overrides.', 'trace': sio.getvalue() } # shouldn't this be a 400? # upload try: xml = request.files['xml'] filename = os.path.basename(xml.filename) http_ensure(os.path.splitext(filename)[1] == '.xml', "file doesn't look like xml") path = join(upload_folder(), filename) xml.save(path) except Exception: sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.BAD_UPLOAD, 'message': 'an error occured uploading the article xml to be processed', 'trace': sio.getvalue(), }, 400 # shouldn't this be a 500? # generate try: article_json = scraper.main(path, { 'override': override, 'fill-missing-image-dimensions': True }) json_filename = filename + '.json' json_path = join(upload_folder(), json_filename) open(json_path, 'w').write(article_json) except Exception as err: sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.BAD_SCRAPE, 'message': str(err), 'trace': sio.getvalue() }, 400 # validate try: conf.API_PRE_VALIDATE and ajson_validate.main(open(json_path, 'r')) except jsonschema.ValidationError as err: return { 'status': conf.INVALID, 'code': conf.ERROR_INVALID, 'message': 'the generated article-json failed validation, see trace for details.', 'trace': str(err), # todo: any good? }, 400 except Exception: sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.ERROR_VALIDATING, 'message': 'an error occurred attempting to validate the generated article-json', 'trace': sio.getvalue() }, 400 # TODO: shouldn't this be a 500? # send to lax try: #msid, version = utils.version_from_path(filename) msid = request.args['id'] version = request.args['version'] token = str(uuid.uuid4()) args = { # the *most* important parameter. don't modify lax. 'dry_run': True, # a forced ingest by default 'action': conf.INGEST, 'force': True, # article details 'msid': msid, 'version': int(version), 'article_json': article_json, 'token': token, } lax_resp = adaptor.call_lax(**args) context = utils.renkeys(lax_resp, [("message", "lax-message")]) LOG.info("lax response", extra=context) api_resp = utils.subdict(lax_resp, ['status', 'code', 'message', 'trace']) if api_resp['status'] in [conf.INVALID, conf.ERROR]: # failure return api_resp, 400 # success # 'code', 'message' and 'trace' are not returned by lax on success, just 'status' api_resp['ajson'] = json.loads(article_json)['article'] api_resp['override'] = override return api_resp, 200 except Exception: # lax returned something indecipherable sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.ERROR_COMMUNICATING, 'message': "lax responded with something that couldn't be decoded", 'trace': sio.getvalue(), }, 400 # TODO: shouldn't this be a 500?
#!/usr/bin/env python3 import validate import sys import os import subprocess source = sys.argv[1] results = [] for root, dirname, filenames in os.walk(source): error_counter = 0 for files in filenames: if files.endswith('_manifest.md5'): if os.path.basename(root) != 'logs': manifest = os.path.join(root, files) print(manifest) if os.path.isfile(manifest): error_counter = validate.main([manifest]) if error_counter == 0: results.append([root, 'success']) else: results.append([root, 'failure']) for result in results: print(result) else: continue for result in results: print(result)
def test_main_bootstrap(self): "valid output is returned" valid, results = validate.main(open(self.doc_json, 'r')) self.assertTrue(isinstance(results, dict)) self.assertTrue(isinstance(valid, bool))