"json_field": str } GITHUB_SOURCE = {"provider": 'github', "properties": GITHUB_SOURCE_PROPS} # S3 Source S3_SOURCE_PROPS = { "account_id": And(Use(int), lambda n: len(str(n)) == 12), "bucket_name": str, "object_key": str } S3_SOURCE = {"provider": 's3', "properties": S3_SOURCE_PROPS} # CodeBuild CODEBUILD_PROPS = { Optional("image"): str, Optional("size"): Or('small', 'medium', 'large'), Optional("spec_filename"): str, Optional("environment_variables"): { Optional(str): Or(str, bool, int, object) }, Optional("role"): str, Optional("timeout"): int, Optional("privileged"): bool, Optional("spec_inline"): str } DEFAULT_CODEBUILD_BUILD = { Optional("provider"): 'codebuild', Optional("enabled"): bool, Optional("properties"): CODEBUILD_PROPS } STAGE_CODEBUILD_BUILD = {
def spook_schema(cls) -> dict: return {"a": Or(None, int), "b": Or(None, str), "c": Or(None, bool)}
import logging from concurrent.futures import ThreadPoolExecutor from schema import Or from tornado import httpclient from tornado.web import RequestHandler from tornado.web import gen from error_code import ERR_UNKNOWN, ERR_NO_CONTENT, ERR_ARG, ERR_MULTIPLE_OBJ_RETURNED, ERR_DUPLICATE_ENTRY from tools_lib.gtornado.http_code import ( HTTP_200_OK, HTTP_204_NO_CONTENT, HTTP_201_CREATED, HTTP_400_BAD_REQUEST, HTTP_422_UNPROCESSABLE_ENTITY, HTTP_403_FORBIDDEN, HTTP_500_INTERNAL_SERVER_ERROR, HTTP_401_UNAUTHORIZED) from . import async_requests STR_OR_UNICODE = Or(str, unicode) executor = ThreadPoolExecutor(8) class RedirectedHTTPResponse(object): def __init__(self, response): self.request = response.request self.code = response.code self.reason = response.reason self.headers = response.headers self.effective_url = response.effective_url self.buffer = response.buffer self.body = response.body self.error = response.error self.request_time = response.request_time self.time_info = response.time_info
class OutputBase(object): IS_DEPENDENCY = False REMOTE = None PARAM_PATH = 'path' PARAM_CACHE = 'cache' PARAM_METRIC = 'metric' PARAM_METRIC_TYPE = 'type' PARAM_METRIC_XPATH = 'xpath' METRIC_SCHEMA = Or( None, bool, { Optional(PARAM_METRIC_TYPE): Or(str, None), Optional(PARAM_METRIC_XPATH): Or(str, None) }) DoesNotExistError = OutputDoesNotExistError IsNotFileOrDirError = OutputIsNotFileOrDirError def __init__(self, stage, path, info=None, remote=None, cache=True, metric=False): self.stage = stage self.project = stage.project self.url = path self.info = info self.remote = remote or self.REMOTE(self.project, {}) self.use_cache = False if self.IS_DEPENDENCY else cache self.metric = False if self.IS_DEPENDENCY else metric if self.use_cache and getattr(self.project.cache, self.REMOTE.scheme) is None: raise DvcException( "no cache location setup for '{}' outputs.".format( self.REMOTE.scheme)) def __repr__(self): return "{class_name}: '{url}'".format(class_name=type(self).__name__, url=(self.url or 'No url')) def __str__(self): return self.url @classmethod def match(cls, url): return re.match(cls.REMOTE.REGEX, url) def group(self, name): match = self.match(self.url) if not match: return None return match.group(name) @classmethod def supported(cls, url): return cls.match(url) is not None @property def scheme(self): return self.REMOTE.scheme @property def path(self): return self.path_info['path'] @property def sep(self): return '/' @property def exists(self): return self.remote.exists(self.path_info) def changed(self): if not self.exists: return True if not self.use_cache: return self.info != self.remote.save_info(self.path_info) return getattr(self.project.cache, self.scheme).changed(self.path_info, self.info) def status(self): if self.changed(): # FIXME better msgs return {str(self): 'changed'} return {} def save(self): if not self.use_cache: self.info = self.remote.save_info(self.path_info) else: self.info = getattr(self.project.cache, self.scheme).save(self.path_info) def dumpd(self): ret = self.info.copy() ret[self.PARAM_PATH] = self.url if self.IS_DEPENDENCY: return ret ret[self.PARAM_CACHE] = self.use_cache if isinstance(self.metric, dict): if self.PARAM_METRIC_XPATH in self.metric \ and not self.metric[self.PARAM_METRIC_XPATH]: del self.metric[self.PARAM_METRIC_XPATH] ret[self.PARAM_METRIC] = self.metric return ret def download(self, to_info): self.remote.download([self.path_info], [to_info]) def checkout(self, force=False): if not self.use_cache: return getattr(self.project.cache, self.scheme).checkout(self.path_info, self.info, force=force) def remove(self, ignore_remove=False): self.remote.remove(self.path_info) if self.scheme != 'local': return if ignore_remove and self.use_cache and self.is_local: self.project.scm.ignore_remove(self.path) def move(self, out): if self.scheme == 'local' and self.use_cache and self.is_local: self.project.scm.ignore_remove(self.path) self.remote.move(self.path_info, out.path_info) self.url = out.url self.path_info = out.path_info self.save() if self.scheme == 'local' and self.use_cache and self.is_local: self.project.scm.ignore(self.path)
def nullable(schema): """ Create new schema that allows the supported schema or None. """ return Or(schema, None)
app = Flask(__name__) CORS(app) model = pickle.load(open('model.pkl', 'rb')) PREDICT_SCHEMA = Schema({ 'CHARGE_COUNT': int, 'CHARGE_DISPOSITION': And(str, len), 'OFFENSE_CATEGORY': And(str, len), 'PRIMARY_CHARGE_FLAG': bool, 'DISPOSITION_CHARGED_OFFENSE_TITLE': And(str, len), 'DISPOSITION_CHARGED_CLASS': And(str, len), 'SENTENCE_JUDGE': And(str, len), 'SENTENCE_PHASE': And(str, len), 'COMMITMENT_TERM': And(str, len), 'COMMITMENT_UNIT': And(str, len), 'LENGTH_OF_CASE_in_Days': Or(float, int), 'AGE_AT_INCIDENT': Or(float, int), 'RACE': And(str, len), 'GENDER': And(str, len), 'INCIDENT_CITY': And(str, len), 'LAW_ENFORCEMENT_AGENCY': And(str, len), 'LAW_ENFORCEMENT_UNIT': And(str, len), 'SENTENCE_TYPE': And(str, len) }) PREDICT_KEYS = [ 'OFFENSE_CATEGORY', 'PRIMARY_CHARGE_FLAG', 'DISPOSITION_CHARGED_OFFENSE_TITLE', 'CHARGE_COUNT', 'DISPOSITION_CHARGED_CLASS', 'CHARGE_DISPOSITION', 'SENTENCE_JUDGE', 'SENTENCE_PHASE', 'AGE_AT_INCIDENT', 'GENDER', 'LAW_ENFORCEMENT_AGENCY' ]
from schema import Schema, And, Or, Optional, Const CONFIGURATION_PATH = "configuration/config.yaml" # All messages need to have a source address and a destination address. # These addresses should resolve using cluster DNS. # Messages may optionally include a list of headers as string key/value pairs. # and an optional body. How these headers and bodies are included in new values # is dependent on where the message is defined in the schema - `matchRequest` messages # have different behaviour than `onFailue` messages. HTTP_REQUEST_SCHEMA = Schema( { "method": lambda t: t in ["GET", "HEAD", "PUT", "PATCH", "DELETE", "POST"], "url": str, Optional("headers"): Schema(Or({str: str}, {})), Optional("body"): str, }, ignore_extra_keys=True, ) HTTP_RESPONSE_SCHEMA = Schema( { "status-code": int, Optional("headers"): Schema(Or({str: str}, {})), Optional("body"): str, }, ignore_extra_keys=True, ) # Some messages are part of a transaction. Such transactions need to specify a timeout,
tier_schema = Schema({ "data": [vectorised_data_schema], "images": { str: { "imagesFilename": str, # config.imagesFile(tier) "imgsInfoFilename": str, # config.imgsInfoFile(tier) "imgsSceneGraph": str # config.sceneGraphsFile(tier) } }, "train": bool }) dataset_schema = Schema({ "evalTrain": tier_schema, "test": tier_schema, "train": Or(None, tier_schema), "val": tier_schema, }) data_schema = Schema({ "main": Or(None, dataset_schema), "extra": Or(None, dataset_schema) }) separate_embeddings_schema = Schema({ 'a': Or(None, np.array), 'q': np.array, 'scene': Or(None, np.array) }) shared_embeddings_schema = Schema({
#!/var/www/lisb/venv/bin/python import json import os import sys import boto3 import tarfile from datetime import datetime from schema import Schema, Optional, And, Or from common_functions import encrypt_file # BACKUPS SCHEMA command_schema = Schema({ Optional("--to-backup"): And([Or("conf", "data", "logs")], lambda l: 0 < len(l) <= 3), Optional("--s3"): [And(str, lambda bucket_str: len(bucket_str.split("/", maxsplit=1)) == 2)], Optional("--encrypted"): And(list, lambda l: len(l) == 0) }) def create_backup(options): # Crate backups directory if necessary base_path = "/var/www/lisb/" backups_path = "/var/www/lisb/backups/" if not os.path.exists(backups_path): os.makedirs(backups_path) # Create GZ-compressed local TAR backup file of information specified by '--to-backup' to_backup = ['conf', 'data', 'logs'] if '--to-backup' not in options else options['--to-backup'] backup_name = "backup" + datetime.utcnow().strftime("%Y%m%d%H%M%S") + ".tar.gz"
Optional('searchSpacePath'): os.path.exists, Optional('multiPhase'): bool, 'useAnnotation': bool, 'tuner': Or( { 'builtinTunerName': Or('TPE', 'Random', 'Anneal', 'Evolution', 'SMAC', 'BatchTuner'), 'classArgs': { 'optimize_mode': Or('maximize', 'minimize'), Optional('speed'): int }, Optional('gpuNum'): And(int, lambda x: 0 <= x <= 99999), }, { 'codeDir': os.path.exists, 'classFileName': str, 'className': str, Optional('classArgs'): dict, Optional('gpuNum'): And(int, lambda x: 0 <= x <= 99999), }), Optional('assessor'): Or( { 'builtinAssessorName': lambda x: x in ['Medianstop'], 'classArgs': { 'optimize_mode': lambda x: x in ['maximize', 'minimize'] },
class Lucene(HoaxyCommand): """ usage: hoaxy lucene --index [--mode=<mode>] hoaxy lucene --search --query=<q> [--top=<n>] hoaxy lucene -h | --help Using Apache Lucene to build index from the parsed articles. And also provide a simple interface to query the indexed articles. --index Create, append and update index. --search Do lucene search Options: --mode=<mode> Mode for create index, available choices are: create_or_append, create, append [default: create_or_append] --query=<q> String to query. --top=<n> Number of top results to show. [default: 5] -h --help Show help. Examples: 1. Create index of all non-index documents hoaxy lucene --index --mode=create_or_append 2. If you want to replace the old indexes and create a new one: hoaxy lucene --index --mode=create 3. Search top 5 most relavant article containing keywords 'trump' hoaxy lucene --search --query=trump """ name = 'lucene' short_description = 'Lucene Indexing and Searching' args_schema = Schema({ '--query': Or(None, lambda s: len(s) > 0), '--mode': Or( None, And(Use(str.lower), lambda s: s in ('create_or_append', 'create', 'append'))), '--top': Or(None, And(Use(int), lambda x: x > 0)), object: object }) @classmethod def prepare_article(cls, article_data): article_id, group_id, canonical_url, title, meta, content,\ date_published, domain, site_type = article_data article = dict(article_id=article_id, group_id=group_id, canonical_url=canonical_url, title=title, content=content, date_published=date_published, domain=domain, site_type=site_type) article['meta'] = unicode(meta) article['uq_id_str'] = unicode(group_id) + title if article['content'] is None: article['content'] = u'NULL' return article @classmethod def index(cls, session, mode, articles_iter, mgid): lucene.initVM() index_dir = cls.conf['lucene']['index_dir'] indexer = Indexer(index_dir, mode, date_format=cls.conf['lucene']['date_format']) article = None for i, data in enumerate(articles_iter): article = cls.prepare_article(data) indexer.index_one(article) if i % cls.conf['window_size'] == 1: logger.info('Indexed %s articles', i) indexer.close() if article is not None: mgid.value = str(article['group_id']) session.commit() logger.info('Indexed article pointer updated!') else: logger.warning('No new articles are found!') logger.info('Done!') @classmethod def search(cls, query, n): lucene.initVM() index_dir = cls.conf['lucene']['index_dir'] searcher = Searcher(index_dir) rs = searcher.search(query, n) pprint.pprint(rs) @classmethod def run(cls, args): try: # print(args) args = cls.args_schema.validate(args) except SchemaError as e: sys.exit(e) session = Session() # make sure lucene be inited lucene.initVM() lucene.getVMEnv().attachCurrentThread() if args['--index'] is True: configure_logging('lucene.index', console_level='INFO') mgid = get_or_create_m( session, MetaInfo, data=dict( name='article_group_id_lucene_index', value='0', value_type='int', description='article.group_id used for lucene index'), fb_uk='name') if args['--mode'] == 'create': mgid.set_value(0) session.commit() q = """ SELECT DISTINCT ON (a.group_id) a.id, a.group_id, a.canonical_url, a.title, a.meta, a.content, coalesce(a.date_published, a.date_captured) AS pd, s.domain, s.site_type FROM article AS a JOIN site AS s ON s.id=a.site_id WHERE a.site_id IS NOT NULL AND s.is_enabled IS TRUE AND a.group_id>:gid ORDER BY group_id, pd ASC """ articles_iter = session.execute( sqlalchemy.text(q).bindparams(gid=mgid.get_value())) cls.index(session, args['--mode'], articles_iter, mgid) elif args['--search'] is True: configure_logging('lucene.search', console_level='INFO') cls.search(args['--query'], args['--top']) else: print("Unrecognized command!") sys.exit(2)
def bayesitc_mcmc_parser(argv=sys.argv[1:]): __usage__ = """Analyze ITC data using Markov chain Monte Carlo (MCMC). Uses MicroCal .itc files, or custom format .yml files for modeling experiments. When running the program you can select one of two options: competitive A competitive binding model. Requires multiple experiments to be specified. twocomponent A twocomponent binding model. Analyzes only a single experiment Usage: bayesitc_mcmc.py twocomponent <datafile> <heatsfile> [-v | -vv | -vvv] [--cc=<c_cell>] [--cs=<c_syringe> ] [--dc=<dc_cell>] [--ds=<dc_syringe>] [options] bayesitc_mcmc.py competitive (<datafile> <heatsfile>)... (-r <receptor> | --receptor <receptor>) [-v | -vv | -vvv] [options] bayesitc_mcmc.py (-h | --help) bayesitc_mcmc.py --license bayesitc_mcmc.py --version Options: -h, --help Show this screen --version Show version --license Show license -l <logfile>, --log=<logfile> File to write logs to. Will be placed in workdir. --cc <c_cell> Concentration of component in cell in mM. Defaults to value in input file --cs <c_syringe> Concentration of component in syringe in mM. Defaults to value in input file --dc <dc_cell> Relative uncertainty in cell concentration [default: 0.1] --ds <dc_syringe> Relative uncertainty in syringe concentration [default: 0.1] -v, Verbose output level. Multiple flags increase verbosity. -w <workdir>, --workdir <workdir> Directory for output files [default: ./] -r <receptor> | --receptor <receptor> The name of the receptor for a competitive binding model. -n <name>, --name <name> Name for the experiment. Will be used for output files. Defaults to inputfile name. -i <ins>, --instrument <ins> The name of the instrument used for the experiment. Overrides .itc file instrument. --nfit=<n> No. of iteration for maximum a posteriori fit [default: 20000] --niters=<n> No. of iterations for mcmc sampling [default: 2000000] --nburn=<n> No. of Burn-in iterations for mcmc sampling [default: 500000] --nthin=<n> Thinning period for mcmc sampling [default: 500] """ arguments = docopt(__usage__, argv=argv, version='bayesitc_mcmc.py, pre-alpha') schema = Schema({ '--help': bool, # True or False are accepted '--license': bool, # True or False are accepted # integer between 0 and 3 '-v': And(int, lambda n: 0 <= n <= 3), # str and found in this dict 'twocomponent': bool, 'competitive': bool, '--nfit': And(Use(int), lambda n: n > 0), # Convert str to int, make sure that it is larger than 0 '--nburn': And(Use(int), lambda n: n > 0), # Convert str to int, make sure that it is larger than 0 '--niters': And(Use(int), lambda n: n > 0), # Convert str to int, make sure that it is larger than 0 '--nthin': And(Use(int), lambda n: n > 0), # Convert str to int, make sure that it is larger than 0 '--name': Or(None, And(str, len)), # Not an empty string '--instrument': Or(None, And(str, lambda m: m in known_instruments)), # None, or str and found in this dict '--version': bool, # True or False are accepted '--receptor': Or(None, str), # str or None '--workdir': str, # str # str and ensure file exists # list and ensure it contains existing files '<datafile>': And( list, lambda inpfiles: [os.path.isfile(inpfile) for inpfile in inpfiles], Use(lambda inpfiles: [os.path.abspath(inpfile) for inpfile in inpfiles])), # list and ensure it contains existing files '<heatsfile>': And( list, lambda inpfiles: [os.path.isfile(inpfile) for inpfile in inpfiles], Use(lambda inpfiles: [os.path.abspath(inpfile) for inpfile in inpfiles])), # Don't use, or open file with writing permissions '--log': Or(None, str), # Don't use, or str '--cc': Or(None, And(Use(float), lambda n: n > 0.0)), # Not specified, or a float greater than 0 '--cs': Or(None, And(Use(float), lambda n: n > 0.0)), # Not specified, or a float '--dc': And(Use(float), lambda n: n > 0.0), # a float greater than 0 '--ds': And(Use(float), lambda n: n > 0.0), # a float greater than 0 }) return schema.validate(arguments)
def bayesitc_util_parser(argv=sys.argv[1:]): __usage__ = """ Bayesian analysis of ITC data. Uses MicroCal .itc files, or custom format .yml files for analysing experiments. Usage: ITC.py <datafiles>... [-w <workdir> | --workdir=<workdir>] [-n <name> | --name=<name>] [-q <file> | --heats=<file>] [-i <ins> | --instrument=<ins> ] [-v | -vv | -vvv] [-r <file> | --report=<file>] [ -l <logfile> | --log=<logfile>] ITC.py mcmc <datafiles>... (-m <model> | --model=<model>) [-w <workdir> | --workdir=<workdir>] [ -r <receptor> | --receptor=<receptor>] [-n <name> | --name=<name>] [-q <file> | --heats=<file>] [-i <ins> | --instrument=<ins> ] [ -l <logfile> | --log=<logfile>] [-v | -vv | -vvv] [--report=<file>] [options] ITC.py (-h | --help) ITC.py --license ITC.py --version Options: -h, --help Show this screen --version Show version --license Show license -l <logfile>, --log=<logfile> File to write logs to. Will be placed in workdir. -v, Verbose output level. Multiple flags increase verbosity. <datafiles> Datafile(s) to perform the analysis on, .itc, .yml -w <workdir>, --workdir=<workdir> Directory for output files [default: ./] -r <receptor> | --receptor=<receptor> The name of the receptor for a Competitive Binding model. -n <name>, --name=<name> Name for the experiment. Will be used for output files. Defaults to inputfile name. -i <ins>, --instrument=<ins> The name of the instrument used for the experiment. Overrides .itc file instrument. -q <file>, --heats=<file> Origin format integrated heats file. (From NITPIC use .dat file) -m <model>, --model=<model> Model to use for mcmc sampling [default: TwoComponent] --nfit=<n> No. of iteration for maximum a posteriori fit [default: 20000] --niters=<n> No. of iterations for mcmc sampling [default: 6000] --nburn=<n> No. of Burn-in iterations for mcmc sampling [default: 1000] --nthin=<n> Thinning period for mcmc sampling [default: 5] --report=<file> Output file with summary in markdown """ arguments = docopt(__usage__, argv=argv, version='ITC.py, pre-alpha') schema = Schema({ '--heats': Or(None, And(str, os.path.isfile, Use(os.path.abspath))), # str, verify that it exists '--help': bool, # True or False are accepted '--license': bool, # True or False are accepted # integer between 0 and 3 '-v': And(int, lambda n: 0 <= n <= 3), # str and found in this dict '--model': And(str, lambda m: m in known_models), '--nfit': And(Use(int), lambda n: n > 0), # Convert str to int, make sure that it is larger than 0 '--nburn': And(Use(int), lambda n: n > 0), # Convert str to int, make sure that it is larger than 0 '--niters': And(Use(int), lambda n: n > 0), # Convert str to int, make sure that it is larger than 0 '--nthin': And(Use(int), lambda n: n > 0), # Convert str to int, make sure that it is larger than 0 '--name': Or(None, And(str, len)), # Not an empty string '--instrument': Or(None, And(str, lambda m: m in known_instruments)), # None, or str and found in this dict '--version': bool, # True or False are accepted '--receptor': Or(None, str), # str or None '--workdir': str, # str # list and ensure it contains existing files '<datafiles>': And( list, lambda inpfiles: [os.path.isfile(inpfile) for inpfile in inpfiles], Use(lambda inpfiles: [os.path.abspath(inpfile) for inpfile in inpfiles])), 'mcmc': bool, # True or False are accepted '--report': Or(None, Use(lambda f: open(f, 'w'))), # Don't use, or open file with writing permissions '--log': Or(None, str), # Don't use, or str }) return schema.validate(arguments)
def integrate_parser(argv=sys.argv[1:]): __usage__ = """ Integrate ITC data using Gaussian process regression. Uses MicroCal .itc files, or custom format .yml files for analysing experiments. Usage: bayesitc_integrate.py <datafiles>... [-w <workdir> | --workdir=<workdir>] [-v | -vv | -vvv] [options] bayesitc_integrate.py (-h | --help) bayesitc_integrate.py --license bayesitc_integrate.py --version Options: -h, --help Show this screen --version Show version --license Show license -l <logfile>, --log=<logfile> File to write logs to. Will be placed in workdir. -v, Verbose output level. Multiple flags increase verbosity. <datafiles> Datafile(s) to perform the analysis on, .itc, .yml -w <workdir>, --workdir=<workdir> Directory for output files [default: ./] -n <name>, --name=<name> Name for the experiment. Will be used for output files. Defaults to input file name. -i <ins>, --instrument=<ins> The name of the instrument used for the experiment. Overrides .itc file instrument. -f <frac>, --fraction=<frac> The fraction of the injection to fit, measured from the end [default: 0.2] --theta0=<theta0> The parameters in the autocorrelation model. [default: 5.0] --nugget=<nugget> Size of nugget effect to allow smooth predictions from noisy data. [default: 1.0] --plot Generate plots of the baseline fit """ arguments = docopt(__usage__, argv=argv, version='bayesitc_integrate.py, pre-alpha') schema = Schema({ '--help': bool, # True or False are accepted '--license': bool, # True or False are accepted # integer between 0 and 3 '-v': And(int, lambda n: 0 <= n <= 3), # Float greater than 0 '--fraction': And(Use(float), lambda n: 0 < n <= 1.0), '--nugget': And(Use(float), lambda n: n > 0), '--theta0': And(Use(float), lambda n: n > 0), # Convert str to int, make sure that it is larger than 0 '--name': Or(None, And(str, len)), # Not an empty string '--instrument': Or(None, And(str, lambda m: m in known_instruments)), # None, or str and found in this dict '--version': bool, # True or False are accepted '--plot': bool, # True or False are accepted '--workdir': str, # str # list and ensure it contains existing files '<datafiles>': And( list, lambda inpfiles: [os.path.isfile(inpfile) for inpfile in inpfiles], Use(lambda inpfiles: [os.path.abspath(inpfile) for inpfile in inpfiles])), # Don't use, or open file with writing permissions '--log': Or(None, str), # Don't use, or str }) return schema.validate(arguments)
def parse_args(): from schema import Schema, Use, And, Or args = docopt.docopt(__doc__, version='SAGA Project Ver 4.2') requirements = { '--phone' : And(Use(str), lambda x: x in ('galaxy', 'pixel', 'stratux'), error='Phone type only support \'galaxy\', \'pixel\' and \'stratux\''), '--threshold' : And(Use(int), lambda x: x > 0, error='Roll threshold should be integer > 0'), '--freq' : Or(None, And(Use(str), lambda x: x in ('haar'), error='Wavelet only support \'haar\'')), '--batch-size': And(Use(int), lambda x: x > 0, error='Batch size should be integer > 0'), '--win' : Or(None, And(Use(int), lambda x: x > 0), error='Feature window length should be integer > 0'), '--offset' : Or(None, And(Use(int), lambda x: x > 0), error='Feature window offset should be integer > 0'), '--rate' : Or(None, And(Use(float), lambda x: (0 < x) & (x <= 1)), error='Feature window offset rate should be float in (0, 1]'), '--limit' : Or(None, And(Use(int), lambda x: x > 0), error='Batch size should be integer > 0'), '--model' : And(Use(str), lambda x: x in nnet.model_list, error='Model not available'), '--epochs' : And(Use(int), lambda x: x >= 0, error='Number of epochs should be integer >= 0'), '--lr' : And(Use(float), lambda x: x > 0, error='Learning rate should be float > 0'), '--print-freq': And(Use(int), lambda x: x > 0, error='Print frequency should be integer > 0'), '--device' : And(Use(int), lambda x: x >= 0, error='CUDA device ID should be integer >= 0'), '--keyword' : And(Use(str), lambda x: x in ('pitch', 'roll', 'heading', 'hazard'), error='Only predict \'pitch\' or \'roll\' or \'heading\' or \'hazard\''), '--stratux' : Or(None, And(Use(int), lambda x: x >= 0, error='Stratux input level should be integer >= 0')), '--try' : And(Use(int), lambda x: x > 0, error='Number of trials should be integer > 0'), object : object, } args = Schema(requirements).validate(args) # midterm prediction must be fixed assert not (args['--trig'] and args['--diff'] and args['--direct']) assert args['--keyword'] != 'hazard' or (args['--direct'] and args['--no-normal']) assert args['--phone'] != 'stratux' or args['--stratux'] is not None global WINDOW_CONFIG if args['--win'] is not None: WINDOW_CONFIG = { 'input': { 'length': args['--win'], 'offset_length': args['--offset'], 'offset_rate': args['--rate'], 'padding': 'repeat_base', }, 'target': { 'length': args['--win'], 'offset_length': args['--offset'], 'offset_rate': args['--rate'], 'padding': 'repeat_base', }, } else: WINDOW_CONFIG = constant.WINDOW_CONFIG return args
def query_latest_articles(): """Handle API request '/latest-articles'. API Request Parameters ---------------------- past_hours : int Set the hours from now to past to be defined as latest hours. domains : object If None, return all articles in the latest hours; If str, should be one of {'fact_checking', 'claim', 'fake'}: if 'fact_checking', return fact checking articles, if 'claim', return claim articles, if 'fake', return selected fake articles, which is a subset of claim, which is selected by us. If array of domain, return articles belonging to these domains. domains_file : str When `domains` is 'fake', the actual used domains are loaded from file `domains_file`. If this file doesn't exist, then `claim` type domains would be used. API Response Keys ----------------- status : string num_of_entries : int articles : dict keys are: canonical_url : string date_published : string formatted datetime domain : string id : int site_type : {'claim', 'fact_checking'} title : string """ lucene.getVMEnv().attachCurrentThread() # Validate input of request q_articles_schema = Schema({ 'past_hours': And(Use(int), lambda x: x > 0, error='Invalid value of `past_hours`'), Optional('domains', default=None): Or(lambda s: s in ('fact_checking', 'claim', 'fake'), Use(flask.json.loads, error='Not valid values nor JSON string of `domains`')) }) q_kwargs = copy_req_args(request.args) try: q_kwargs = q_articles_schema.validate(q_kwargs) domains_file = CONF['api'].get('selected_fake_domains_path') df = db_query_latest_articles( engine, domains_file=domains_file, **q_kwargs) if len(df) == 0: raise APINoResultError('No articles found!') response = dict( status='OK', num_of_entries=len(df), articles=flask.json.loads(df.to_json(**TO_JSON_KWARGS))) except SchemaError as e: response = dict(status='Parameter error', error=str(e)) except APIParseError as e: response = dict(status='Invalide query', error=str(e)) except APINoResultError as e: response = dict(status='No result error', error=str(e)) except Exception as e: logger.exception(e) response = dict(status='ERROR', error='Server error, query failed!') return flask.jsonify(response)
class Stage(object): STAGE_FILE = "Dvcfile" STAGE_FILE_SUFFIX = ".dvc" PARAM_MD5 = "md5" PARAM_CMD = "cmd" PARAM_WDIR = "wdir" PARAM_DEPS = "deps" PARAM_OUTS = "outs" PARAM_LOCKED = "locked" SCHEMA = { Optional(PARAM_MD5): Or(str, None), Optional(PARAM_CMD): Or(str, None), Optional(PARAM_WDIR): Or(str, None), Optional(PARAM_DEPS): Or(And(list, Schema([dependency.SCHEMA])), None), Optional(PARAM_OUTS): Or(And(list, Schema([output.SCHEMA])), None), Optional(PARAM_LOCKED): bool, } def __init__( self, repo, path=None, cmd=None, wdir=os.curdir, deps=None, outs=None, md5=None, locked=False, ): if deps is None: deps = [] if outs is None: outs = [] self.repo = repo self.path = path self.cmd = cmd self.wdir = wdir self.outs = outs self.deps = deps self.md5 = md5 self.locked = locked def __repr__(self): return "Stage: '{path}'".format( path=self.relpath if self.path else "No path") @property def relpath(self): return os.path.relpath(self.path) @property def is_data_source(self): """Whether the stage file was created with `dvc add` or `dvc import`""" return self.cmd is None @staticmethod def is_valid_filename(path): return (path.endswith(Stage.STAGE_FILE_SUFFIX) or os.path.basename(path) == Stage.STAGE_FILE) @staticmethod def is_stage_file(path): return os.path.isfile(path) and Stage.is_valid_filename(path) def changed_md5(self): return self.md5 != self._compute_md5() @property def is_callback(self): """ A callback stage is always considered as changed, so it runs on every `dvc repro` call. """ return not self.is_data_source and len(self.deps) == 0 @property def is_import(self): """Whether the stage file was created with `dvc import`.""" return not self.cmd and len(self.deps) == 1 and len(self.outs) == 1 def _changed_deps(self): if self.locked: return False if self.is_callback: logger.warning( "Dvc file '{fname}' is a 'callback' stage " "(has a command and no dependencies) and thus always " "considered as changed.".format(fname=self.relpath)) return True for dep in self.deps: if dep.changed(): logger.warning( "Dependency '{dep}' of '{stage}' changed.".format( dep=dep, stage=self.relpath)) return True return False def _changed_outs(self): for out in self.outs: if out.changed(): logger.warning("Output '{out}' of '{stage}' changed.".format( out=out, stage=self.relpath)) return True return False def _changed_md5(self): if self.changed_md5(): logger.warning("Dvc file '{}' changed.".format(self.relpath)) return True return False def changed(self): ret = any( [self._changed_deps(), self._changed_outs(), self._changed_md5()]) if ret: msg = "Stage '{}' changed.".format(self.relpath) color = "yellow" else: msg = "Stage '{}' didn't change.".format(self.relpath) color = "green" logger.info(logger.colorize(msg, color)) return ret def remove_outs(self, ignore_remove=False): """ Used mainly for `dvc remove --outs` """ for out in self.outs: out.remove(ignore_remove=ignore_remove) def unprotect_outs(self): for out in self.outs: if out.scheme != "local" or not out.exists: continue self.repo.unprotect(out.path) def remove(self): self.remove_outs(ignore_remove=True) os.unlink(self.path) def reproduce(self, force=False, dry=False, interactive=False, no_commit=False): if not self.changed() and not force: return None if (self.cmd or self.is_import) and not self.locked and not dry: # Removing outputs only if we actually have command to reproduce self.remove_outs(ignore_remove=False) msg = ("Going to reproduce '{stage}'. " "Are you sure you want to continue?".format(stage=self.relpath)) if interactive and not prompt.confirm(msg): raise DvcException("reproduction aborted by the user") logger.info("Reproducing '{stage}'".format(stage=self.relpath)) self.run(dry=dry, no_commit=no_commit, force=force) logger.debug("'{stage}' was reproduced".format(stage=self.relpath)) return self @staticmethod def validate(d, fname=None): from dvc.utils import convert_to_unicode try: Schema(Stage.SCHEMA).validate(convert_to_unicode(d)) except SchemaError as exc: raise StageFileFormatError(fname, exc) @classmethod def _stage_fname(cls, fname, outs, add): if fname: return fname if not outs: return cls.STAGE_FILE out = outs[0] path_handler = out.remote.ospath fname = path_handler.basename(out.path) + cls.STAGE_FILE_SUFFIX fname = Stage._expand_to_path_on_add_local(add, fname, out, path_handler) return fname @staticmethod def _expand_to_path_on_add_local(add, fname, out, path_handler): if (add and out.is_local and not contains_symlink_up_to(out.path, out.repo.root_dir)): fname = path_handler.join(path_handler.dirname(out.path), fname) return fname @staticmethod def _check_stage_path(repo, path): assert repo is not None real_path = os.path.realpath(path) if not os.path.exists(real_path): raise StagePathNotFoundError(path) if not os.path.isdir(real_path): raise StagePathNotDirectoryError(path) proj_dir = os.path.realpath(repo.root_dir) + os.path.sep if not (real_path + os.path.sep).startswith(proj_dir): raise StagePathOutsideError(path) @property def is_cached(self): """ Checks if this stage has been already ran and stored """ from dvc.remote.local import RemoteLOCAL from dvc.remote.s3 import RemoteS3 old = Stage.load(self.repo, self.path) if old._changed_outs(): return False # NOTE: need to save checksums for deps in order to compare them # with what is written in the old stage. for dep in self.deps: dep.save() old_d = old.dumpd() new_d = self.dumpd() # NOTE: need to remove checksums from old dict in order to compare # it to the new one, since the new one doesn't have checksums yet. old_d.pop(self.PARAM_MD5, None) new_d.pop(self.PARAM_MD5, None) outs = old_d.get(self.PARAM_OUTS, []) for out in outs: out.pop(RemoteLOCAL.PARAM_CHECKSUM, None) out.pop(RemoteS3.PARAM_CHECKSUM, None) return old_d == new_d @staticmethod def create( repo=None, cmd=None, deps=None, outs=None, outs_no_cache=None, metrics=None, metrics_no_cache=None, fname=None, cwd=None, wdir=None, locked=False, add=False, overwrite=True, ignore_build_cache=False, remove_outs=False, validate_state=True, ): if outs is None: outs = [] if deps is None: deps = [] if outs_no_cache is None: outs_no_cache = [] if metrics is None: metrics = [] if metrics_no_cache is None: metrics_no_cache = [] # Backward compatibility for `cwd` option if wdir is None and cwd is not None: if fname is not None and os.path.basename(fname) != fname: raise StageFileBadNameError( "stage file name '{fname}' may not contain subdirectories" " if '-c|--cwd' (deprecated) is specified. Use '-w|--wdir'" " along with '-f' to specify stage file path and working" " directory.".format(fname=fname)) wdir = cwd else: wdir = os.curdir if wdir is None else wdir stage = Stage(repo=repo, wdir=wdir, cmd=cmd, locked=locked) stage.outs = output.loads_from(stage, outs, use_cache=True) stage.outs += output.loads_from(stage, metrics, use_cache=True, metric=True) stage.outs += output.loads_from(stage, outs_no_cache, use_cache=False) stage.outs += output.loads_from(stage, metrics_no_cache, use_cache=False, metric=True) stage.deps = dependency.loads_from(stage, deps) stage._check_circular_dependency() stage._check_duplicated_arguments() fname = Stage._stage_fname(fname, stage.outs, add=add) wdir = os.path.abspath(wdir) if cwd is not None: path = os.path.join(wdir, fname) else: path = os.path.abspath(fname) Stage._check_stage_path(repo, wdir) Stage._check_stage_path(repo, os.path.dirname(path)) stage.wdir = wdir stage.path = path # NOTE: remove outs before we check build cache if remove_outs: stage.remove_outs(ignore_remove=False) logger.warning("Build cache is ignored when using --remove-outs.") ignore_build_cache = True else: stage.unprotect_outs() if validate_state: if os.path.exists(path): if not ignore_build_cache and stage.is_cached: logger.info("Stage is cached, skipping.") return None msg = ( "'{}' already exists. Do you wish to run the command and " "overwrite it?".format(stage.relpath)) if not overwrite and not prompt.confirm(msg): raise StageFileAlreadyExistsError(stage.relpath) os.unlink(path) return stage @staticmethod def _check_dvc_filename(fname): if not Stage.is_valid_filename(fname): raise StageFileBadNameError( "bad stage filename '{}'. Stage files should be named" " 'Dvcfile' or have a '.dvc' suffix (e.g. '{}.dvc').".format( os.path.relpath(fname), os.path.basename(fname))) @staticmethod def _check_file_exists(fname): if not os.path.exists(fname): raise StageFileDoesNotExistError(fname) @staticmethod def load(repo, fname): Stage._check_file_exists(fname) Stage._check_dvc_filename(fname) if not Stage.is_stage_file(fname): raise StageFileIsNotDvcFileError(fname) d = load_stage_file(fname) Stage.validate(d, fname=os.path.relpath(fname)) path = os.path.abspath(fname) stage = Stage( repo=repo, path=path, wdir=os.path.abspath( os.path.join(os.path.dirname(path), d.get(Stage.PARAM_WDIR, "."))), cmd=d.get(Stage.PARAM_CMD), md5=d.get(Stage.PARAM_MD5), locked=d.get(Stage.PARAM_LOCKED, False), ) stage.deps = dependency.loadd_from(stage, d.get(Stage.PARAM_DEPS, [])) stage.outs = output.loadd_from(stage, d.get(Stage.PARAM_OUTS, [])) return stage def dumpd(self): from dvc.remote.local import RemoteLOCAL return { key: value for key, value in { Stage.PARAM_MD5: self.md5, Stage.PARAM_CMD: self.cmd, Stage.PARAM_WDIR: RemoteLOCAL.unixpath( os.path.relpath(self.wdir, os.path.dirname(self.path))), Stage.PARAM_LOCKED: self.locked, Stage.PARAM_DEPS: [d.dumpd() for d in self.deps], Stage.PARAM_OUTS: [o.dumpd() for o in self.outs], }.items() if value } def dump(self): fname = self.path self._check_dvc_filename(fname) logger.info("Saving information to '{file}'.".format( file=os.path.relpath(fname))) d = self.dumpd() with open(fname, "w") as fd: yaml.safe_dump(d, fd, default_flow_style=False) self.repo.scm.track_file(os.path.relpath(fname)) def _compute_md5(self): from dvc.output.local import OutputLOCAL d = self.dumpd() # NOTE: removing md5 manually in order to not affect md5s in deps/outs if self.PARAM_MD5 in d.keys(): del d[self.PARAM_MD5] # Ignore the wdir default value. In this case stage file w/o # wdir has the same md5 as a file with the default value specified. # It's important for backward compatibility with pipelines that # didn't have WDIR in their stage files. if d.get(self.PARAM_WDIR) == ".": del d[self.PARAM_WDIR] # NOTE: excluding parameters that don't affect the state of the # pipeline. Not excluding `OutputLOCAL.PARAM_CACHE`, because if # it has changed, we might not have that output in our cache. m = dict_md5(d, exclude=[self.PARAM_LOCKED, OutputLOCAL.PARAM_METRIC]) logger.debug("Computed stage '{}' md5: '{}'".format(self.relpath, m)) return m def save(self): for dep in self.deps: dep.save() for out in self.outs: out.save() self.md5 = self._compute_md5() @staticmethod def _changed_entries(entries): ret = [] for entry in entries: if entry.checksum and entry.changed_checksum(): ret.append(entry.rel_path) return ret def check_can_commit(self, force): changed_deps = self._changed_entries(self.deps) changed_outs = self._changed_entries(self.outs) if changed_deps or changed_outs or self.changed_md5(): msg = ("dependencies {}".format(changed_deps) if changed_deps else "") msg += " and " if (changed_deps and changed_outs) else "" msg += "outputs {}".format(changed_outs) if changed_outs else "" msg += "md5" if not (changed_deps or changed_outs) else "" msg += " of '{}' changed. Are you sure you commit it?".format( self.relpath) if not force and not prompt.confirm(msg): raise StageCommitError( "unable to commit changed '{}'. Use `-f|--force` to " "force.`".format(self.relpath)) self.save() def commit(self): for out in self.outs: out.commit() def _check_missing_deps(self): missing = [dep for dep in self.deps if not dep.exists] if any(missing): raise MissingDep(missing) @staticmethod def _warn_if_fish(executable): # pragma: no cover if (executable is None or os.path.basename(os.path.realpath(executable)) != "fish"): return logger.warning( "DVC detected that you are using fish as your default " "shell. Be aware that it might cause problems by overwriting " "your current environment variables with values defined " "in '.fishrc', which might affect your command. See " "https://github.com/iterative/dvc/issues/1307. ") def _check_circular_dependency(self): from dvc.exceptions import CircularDependencyError circular_dependencies = set(d.path for d in self.deps) & set( o.path for o in self.outs) if circular_dependencies: raise CircularDependencyError(circular_dependencies.pop()) def _check_duplicated_arguments(self): from dvc.exceptions import ArgumentDuplicationError from collections import Counter path_counts = Counter(edge.path for edge in self.deps + self.outs) for path, occurrence in path_counts.items(): if occurrence > 1: raise ArgumentDuplicationError(path) def _run(self): self._check_missing_deps() executable = os.getenv("SHELL") if os.name != "nt" else None self._warn_if_fish(executable) p = subprocess.Popen( self.cmd, cwd=self.wdir, shell=True, env=fix_env(os.environ), executable=executable, ) p.communicate() if p.returncode != 0: raise StageCmdFailedError(self) def run(self, dry=False, resume=False, no_commit=False, force=False): if self.locked: logger.info("Verifying outputs in locked stage '{stage}'".format( stage=self.relpath)) if not dry: self.check_missing_outputs() elif self.is_import: logger.info("Importing '{dep}' -> '{out}'".format( dep=self.deps[0].path, out=self.outs[0].path)) if not dry: if self._already_cached() and not force: self.outs[0].checkout() else: self.deps[0].download(self.outs[0].path_info, resume=resume) elif self.is_data_source: msg = "Verifying data sources in '{}'".format(self.relpath) logger.info(msg) if not dry: self.check_missing_outputs() else: logger.info("Running command:\n\t{}".format(self.cmd)) if not dry: if (not force and not self.is_callback and self._already_cached()): self.checkout() else: self._run() if not dry: self.save() if not no_commit: self.commit() def check_missing_outputs(self): paths = [ out.path if out.scheme != "local" else out.rel_path for out in self.outs if not out.exists ] if paths: raise MissingDataSource(paths) def checkout(self, force=False, progress_callback=None): for out in self.outs: out.checkout(force=force, progress_callback=progress_callback) @staticmethod def _status(entries): ret = {} for entry in entries: ret.update(entry.status()) return ret def status(self): ret = [] if not self.locked: deps_status = self._status(self.deps) if deps_status: ret.append({"changed deps": deps_status}) outs_status = self._status(self.outs) if outs_status: ret.append({"changed outs": outs_status}) if self.changed_md5(): ret.append("changed checksum") if self.is_callback: ret.append("always changed") if ret: return {self.relpath: ret} return {} def _already_cached(self): return (not self.changed_md5() and all(not dep.changed() for dep in self.deps) and all(not out.changed_cache() if out. use_cache else not out.changed() for out in self.outs)) def get_all_files_number(self): return sum(out.get_files_number() for out in self.outs)
class ScriptSchema(Schema): """Extends `Schema` adapting it to PA scripts validation strategies. Adds predefined schemata as class variables to be used in scripts' validation schemas as well as `validate_user_input` method which acts as `Schema.validate` but returns a dictionary with converted keys ready to be used as function keyword arguments, e.g. validated arguments {"--foo": bar, "<baz>": qux} will be be converted to {"foo": bar, "baz": qux}. Additional conversion rules may be added as dictionary passed to `validate_user_input` :method: as `conversions` :param:. Use :method:`ScriptSchema.validate_user_input` to obtain kwarg dictionary.""" # class variables are used in task scripts schemata: boolean = Or(None, bool) hour = Or(None, And(Use(int), lambda h: 0 <= h <= 23), error="--hour has to be in 0..23") id_multi = Or([], And(lambda y: [x.isdigit() for x in y], error="<id> has to be integer")) id_required = And(Use(int), error="<id> has to be an integer") minute_required = And(Use(int), lambda m: 0 <= m <= 59, error="--minute has to be in 0..59") minute = Or(None, minute_required) string = Or(None, str) tabulate_format = Or( None, And(str, lambda f: f in tabulate_formats), error=f"--format should match one of: {', '.join(tabulate_formats)}", ) replacements = {"--": "", "<": "", ">": ""} def convert(self, string): """Removes cli argument notation characters ('--', '<', '>' etc.). :param string: cli argument key to be converted to fit Python argument syntax.""" for key, value in self.replacements.items(): string = string.replace(key, value) return string def validate_user_input(self, arguments, *, conversions=None): """Calls `Schema.validate` on provided `arguments`. Returns dictionary with keys converted by `ScriptSchema.convert` :method: to be later used as kwarg arguments. Universal rules for conversion are stored in `replacements` class variable and may be updated using `conversions` kwarg. Use optional `conversions` :param: to add custom replacement rules. :param arguments: dictionary of cli arguments provided be (e.g.) `docopt` :param conversions: dictionary of additional rules to `self.replacements`""" if conversions: self.replacements.update(conversions) try: self.validate(arguments) return {self.convert(key): val for key, val in arguments.items()} except SchemaError as e: logger.warning(snakesay(str(e))) sys.exit(1)
import yaml from schema import Schema, And, Or, Optional from hyperlink import parse def check_url(url_str): url = parse(unicode(url_str)) assert url.scheme in ('http', 'https') return url IN_SCHEMA = Schema( {'projects': [{ 'name': str, Or('url', 'gh_url'): check_url }]}, ignore_extra_keys=True) def redundant(src, key=None, distinct=False, sort=True): """The complement of unique(), returns non-unique values. Pass distinct=True to get a list of the *first* redundant value for each key. Results are sorted by default. >>> redundant(range(5)) [] >>> redundant([1, 2, 3, 2, 3, 3]) [[2, 2], [3, 3, 3]] >>> redundant([1, 2, 3, 2, 3, 3], distinct=True) [2, 3]
def main(argv, session): args = docopt(__doc__, argv=argv) # Validate args. s = Schema({ six.text_type: bool, '<identifier>': list, '--modify': list, '--append': list, '--spreadsheet': Or( None, And(lambda f: os.path.exists(f), error='<file> should be a readable file or directory.')), '--target': Or(None, str), '--priority': Or(None, Use(int, error='<priority> should be an integer.')), }) try: args = s.validate(args) except SchemaError as exc: print('{0}\n{1}'.format(str(exc), printable_usage(__doc__)), file=sys.stderr) sys.exit(1) formats = set() responses = [] for i, identifier in enumerate(args['<identifier>']): item = session.get_item(identifier) # Check existence of item. if args['--exists']: if item.exists: responses.append(True) print('{0} exists'.format(identifier)) else: responses.append(False) print('{0} does not exist'.format(identifier), file=sys.stderr) if (i + 1) == len(args['<identifier>']): if all(r is True for r in responses): sys.exit(0) else: sys.exit(1) # Modify metadata. elif args['--modify'] or args['--append']: metadata_args = args['--modify'] if args['--modify'] else args[ '--append'] metadata = get_args_dict(metadata_args) responses.append(modify_metadata(item, metadata, args)) if (i + 1) == len(args['<identifier>']): if all(r.status_code == 200 for r in responses): sys.exit(0) else: for r in responses: if r.status_code == 200: continue # We still want to exit 0 if the non-200 is a # "no changes to xml" error. elif 'no changes' in r.content.decode('utf-8'): continue else: sys.exit(1) # Get metadata. elif args['--formats']: for f in item.get_files(): formats.add(f.format) if (i + 1) == len(args['<identifier>']): print('\n'.join(formats)) # Dump JSON to stdout. else: metadata = json.dumps(item.item_metadata) print(metadata) # Edit metadata for items in bulk, using a spreadsheet as input. if args['--spreadsheet']: if not args['--priority']: args['--priority'] = -5 with io.open(args['--spreadsheet'], 'rU', newline='', encoding='utf-8') as csvfp: spreadsheet = csv.DictReader(csvfp) responses = [] for row in spreadsheet: if not row['identifier']: continue item = session.get_item(row['identifier']) if row.get('file'): del row['file'] metadata = dict((k.lower(), v) for (k, v) in row.items() if v) responses.append(modify_metadata(item, metadata, args)) if all(r.status_code == 200 for r in responses): sys.exit(0) else: for r in responses: if r.status_code == 200: continue # We still want to exit 0 if the non-200 is a # "no changes to xml" error. elif 'no changes' in r.content.decode('utf-8'): continue else: sys.exit(1)
def main(argv, session): args = docopt(__doc__, argv=argv) # Validation error messages. destdir_msg = '--destdir must be a valid path to a directory.' itemlist_msg = '--itemlist must be a valid path to an existing file.' # Validate args. s = Schema({ str: Use(bool), '--destdir': Or([], And(Use(lambda d: d[0]), dir_exists), error=destdir_msg), '--format': list, '--glob': Use(lambda l: l[0] if l else None), '<file>': list, '--search': Or(str, None), '--itemlist': Or(None, And(lambda f: os.path.isfile(f)), error=itemlist_msg), '<identifier>': Or(str, None), '--retries': Use(lambda x: x[0]), }) # Filenames should be unicode literals. Support PY2 and PY3. if six.PY2: args['<file>'] = [f.decode('utf-8') for f in args['<file>']] try: args = s.validate(args) except SchemaError as exc: sys.stderr.write('{0}\n{1}\n'.format(str(exc), printable_usage(__doc__))) sys.exit(1) retries = int(args['--retries']) if args['--itemlist']: ids = [x.strip() for x in open(args['--itemlist'])] total_ids = len(ids) elif args['--search']: _search = search_items(args['--search']) total_ids = _search.num_found ids = search_ids(args['--search']) # Download specific files. if args['<identifier>'] and args['<identifier>'] != '-': if '/' in args['<identifier>']: identifier = args['<identifier>'].split('/')[0] files = ['/'.join(args['<identifier>'].split('/')[1:])] else: identifier = args['<identifier>'] files = args['<file>'] total_ids = 1 ids = [identifier] elif args['<identifier>'] == '-': total_ids = 1 ids = sys.stdin files = None else: files = None errors = list() for i, identifier in enumerate(ids): identifier = identifier.strip() if total_ids > 1: item_index = '{0}/{1}'.format((i + 1), total_ids) else: item_index = None try: item = session.get_item(identifier) except Exception as exc: print('{0}: failed to retrieve item metadata - errors'.format( identifier)) continue # Otherwise, download the entire item. _errors = item.download(files=files, formats=args['--format'], glob_pattern=args['--glob'], dry_run=args['--dry-run'], verbose=args['--verbose'], silent=args['--silent'], ignore_existing=args['--ignore-existing'], checksum=args['--checksum'], destdir=args['--destdir'], no_directory=args['--no-directories'], retries=retries, item_index=item_index, ignore_errors=True) if _errors: errors.append(_errors) if errors: # TODO: add option for a summary/report. sys.exit(1) else: sys.exit(0)
def get_options(cls): """ Schema for options validation and assignment of default values. """ return { 'hosts': dict, ConfigOption('abort_signals', default=[signal.SIGINT, signal.SIGTERM]): [int], ConfigOption('worker_type', default=RemoteWorker): object, ConfigOption('pool_type', default='thread'): str, ConfigOption('host', default=cls.default_hostname): str, ConfigOption('port', default=0): int, ConfigOption('copy_cmd', default=copy_cmd): lambda x: callable(x), ConfigOption('link_cmd', default=link_cmd): lambda x: callable(x), ConfigOption('ssh_cmd', default=ssh_cmd): lambda x: callable(x), ConfigOption('workspace', default=cls.default_workspace_root): str, ConfigOption('workspace_exclude', default=[]): Or(list, None), ConfigOption('remote_workspace', default=None): Or(str, None), ConfigOption('copy_workspace_check', default=remote_filepath_exists): Or(lambda x: callable(x), None), ConfigOption('env', default=None): Or(dict, None), ConfigOption('setup_script', default=None): Or(list, None), ConfigOption('push', default=[]): Or(list, None), ConfigOption('push_exclude', default=[]): Or(list, None), ConfigOption('push_relative_dir', default=None): Or(str, None), ConfigOption('delete_pushed', default=False): bool, ConfigOption('pull', default=[]): Or(list, None), ConfigOption('pull_exclude', default=[]): Or(list, None), ConfigOption('remote_mkdir', default=['/bin/mkdir', '-p']): list, ConfigOption('testplan_path', default=None): Or(str, None), ConfigOption('worker_heartbeat', default=30): Or(int, float, None) }
""" Create new schema that allows the supported schema or None. """ return Or(schema, None) # #126f9a == $m-blue-d3 in variables.scss. It's rgb(18,111,154) main_color = '#126f9a' # Same as main_color. Almost like openedx's #0075b4 == rgb(0, 117, 180) link_color = '#126f9a' # openedx also uses white by default header_bg_color = '#ffffff' # openedx also uses white by default footer_bg_color = '#ffffff' text_color_options = Or('light', 'dark', color) main_color_options = Or('accent', 'main', color) button_color_schema = { Optional('main'): main_color_options, Optional('text'): text_color_options, Optional('line'): nullable(main_color_options), Optional('hover-main'): main_color_options, Optional('hover-text'): text_color_options, Optional('hover-line'): nullable(main_color_options), } theme_schema_v0 = Schema({ 'version': 0, # This is used as the primary color in your theme palette. It is used as filler for buttons.
from schema import Schema, Use, Optional, Regex, Or # Packed data schema; packed data is stored in FmsFormat object attribute '_packeddata' packed_fms_structure = { 'schema': Schema({ 'config': { 'num_channels': Use(int), 'num_frames': Use(int), 'speed': Use(float), Optional('meta'): Use(dict), }, 'props': [Use(str)], 'data': { # channels Regex('^channel[0-9]+$'): [ # frames [ # position-indexed properties Or([Use(float)], dict) ] ], }, }), 'default': { 'config': { 'num_channels': 1, 'num_frames': 0, 'speed': 0.0, }, 'props': [], 'data': { 'channel0': [], }, },
def get_options(cls): """ Schema for options validation and assignment of default values. """ return { "hosts": dict, ConfigOption("abort_signals", default=[signal.SIGINT, signal.SIGTERM]): [int], ConfigOption("worker_type", default=RemoteWorker): object, ConfigOption("pool_type", default="thread"): str, ConfigOption("host", default=cls.default_hostname): str, ConfigOption("port", default=0): int, ConfigOption("copy_cmd", default=copy_cmd): lambda x: callable(x), ConfigOption("link_cmd", default=link_cmd): lambda x: callable(x), ConfigOption("ssh_cmd", default=ssh_cmd): lambda x: callable(x), ConfigOption("workspace", default=cls.default_workspace_root): str, ConfigOption("workspace_exclude", default=[]): Or(list, None), ConfigOption("remote_workspace", default=None): Or(str, None), ConfigOption("copy_workspace_check", default=remote_filepath_exists): Or(lambda x: callable(x), None), ConfigOption("env", default=None): Or(dict, None), ConfigOption("setup_script", default=None): Or(list, None), ConfigOption("push", default=[]): Or(list, None), ConfigOption("push_exclude", default=[]): Or(list, None), ConfigOption("push_relative_dir", default=None): Or(str, None), ConfigOption("delete_pushed", default=False): bool, ConfigOption("pull", default=[]): Or(list, None), ConfigOption("pull_exclude", default=[]): Or(list, None), ConfigOption("remote_mkdir", default=["/bin/mkdir", "-p"]): list, ConfigOption("testplan_path", default=None): Or(str, None), ConfigOption("worker_heartbeat", default=30): Or(int, float, None), }
setChoice('logLevel', 'trace', 'debug', 'info', 'warning', 'error', 'fatal'), Optional('logCollection'): setChoice('logCollection', 'http', 'none'), 'useAnnotation': setType('useAnnotation', bool), Optional('tuner'): AlgoSchema('tuner'), Optional('advisor'): AlgoSchema('advisor'), Optional('assessor'): AlgoSchema('assessor'), Optional('localConfig'): { Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'), Optional('maxTrialNumPerGpu'): setType('maxTrialNumPerGpu', int), Optional('useActiveGpu'): setType('useActiveGpu', bool) } } common_trial_schema = { 'trial': { 'command': setType('command', str), 'codeDir': setPathCheck('codeDir'), Optional('gpuNum'):
def main(argv, session): args = docopt(__doc__, argv=argv) # Validation error messages. invalid_id_msg = ( '<identifier> should be between 3 and 80 characters in length, and ' 'can only contain alphanumeric characters, underscores ( _ ), or ' 'dashes ( - )') # Validate args. s = Schema({ six.text_type: Use(lambda x: bool(x)), '<file>': And(list, Use(lambda x: convert_str_list_to_unicode(x) if six.PY2 else x)), '--format': list, '--glob': list, 'delete': bool, '<identifier>': Or(None, And(str, validate_ia_identifier, error=invalid_id_msg)), '--retries': Use(lambda i: int(i[0])), }) try: args = s.validate(args) except SchemaError as exc: print('{0}\n{1}'.format(str(exc), printable_usage(__doc__)), file=sys.stderr) sys.exit(1) verbose = True if not args['--quiet'] else False item = session.get_item(args['<identifier>']) if not item.exists: print('{0}: skipping, item does\'t exist.') # Files that cannot be deleted via S3. no_delete = ['_meta.xml', '_files.xml', '_meta.sqlite'] if verbose: sys.stdout.write('Deleting files from {0}\n'.format(item.identifier)) if args['--all']: files = [f for f in item.get_files()] args['--cacade'] = True elif args['--glob']: files = item.get_files(glob_pattern=args['--glob']) elif args['--format']: files = item.get_files(formats=args['--format']) else: fnames = [] if args['<file>'] == ['-']: if six.PY2: fnames = convert_str_list_to_unicode( [f.strip() for f in sys.stdin]) else: fnames = [f.strip() for f in sys.stdin] else: fnames = [f.strip() for f in args['<file>']] files = list(item.get_files(fnames)) if not files: sys.stderr.write(' warning: no files found, nothing deleted.\n') sys.exit(1) errors = False for f in files: if not f: if verbose: sys.stderr.write(' error: "{0}" does not exist\n'.format( f.name)) errors = True if any(f.name.endswith(s) for s in no_delete): continue if args['--dry-run']: sys.stdout.write(' will delete: {0}/{1}\n'.format( item.identifier, f.name.encode('utf-8'))) continue try: resp = f.delete(verbose=verbose, cascade_delete=args['--cascade'], retries=args['--retries']) except requests.exceptions.RetryError as e: print(' error: max retries exceeded for {0}'.format(f.name), file=sys.stderr) errors = True continue if resp.status_code != 204: errors = True msg = get_s3_xml_text(resp.content) print(' error: {0} ({1})'.format(msg, resp.status_code), file=sys.stderr) continue if errors is True: sys.exit(1)
from bentoml.configuration import config from bentoml.exceptions import BentoMLConfigException from bentoml.utils.ruamel_yaml import YAML LOGGER = logging.getLogger(__name__) SCHEMA = Schema({ "api_server": { "port": And(int, lambda port: port > 0), "enable_microbatch": bool, "run_with_ngrok": bool, "enable_swagger": bool, "enable_metrics": bool, "enable_feedback": bool, "max_request_size": And(int, lambda size: size > 0), "workers": Or(And(int, lambda workers: workers > 0), None), "timeout": And(int, lambda timeout: timeout > 0), }, "marshal_server": { "max_batch_size": Or(And(int, lambda size: size > 0), None), "max_latency": Or(And(int, lambda latency: latency > 0), None), "workers": Or(And(int, lambda workers: workers > 0), None), "request_header_flag": str, }, "yatai": { "url": Or(str, None) }, "tracing": { "type": Or(And(str, Use(str.lower), lambda s: s in ('zipkin', 'jaeger')), None),
"query_required_fields": [str], "stable_past_days": int, "stable_min_threshold": float, "stable_max_threshold": float, "event_raise_max_thershold": float, "event_raise_min_thershold": float, }) feature_selection_params_schema = Schema({ "feature_names": [str], }) single_estimator_grid_search_params_schema = Schema({ "estimator_name": str, "param_grid": { str: Or(float, int, list) }, "scoring_name": str }) grid_search_params_schema = Schema({ "estimators": [single_estimator_grid_search_params_schema], "cv": int, "verbose": int }) label_producing_params_schema = Schema({ "method": Or("binary_min_threshold_classification", "binary_max_threshold_classification", "regression",
def oformat(self): return self._format @property def precision(self): return self._precision @property def filedata(self): return self._filedata argvalidators = { '--amodetag': Or(None, And(str, lambda s: s.upper() in params._amodetagChoices), error='--amodetag must be in ' + str(params._amodetagChoices)), '--beamenergy': Or(None, And(Use(int), lambda n: n > 0), error='--beamenergy should be a positive number'), '--xingMin': Or(None, And(Use(float), lambda n: n > 0), error='--xingMin should be a positive number'), '--xingTr': Or(None, And(Use(float), lambda n: (n > 0 and n <= 1)), error='--xingTr should be a number in (0,1]'), '--xingId': Or(None,