# for complete details. import threading import types import typing import warnings import cryptography from cryptography import utils from cryptography.exceptions import InternalError from cryptography.hazmat.bindings._openssl import ffi, lib from cryptography.hazmat.bindings.openssl._conditional import CONDITIONAL_NAMES _OpenSSLErrorWithText = typing.NamedTuple( "_OpenSSLErrorWithText", [("code", int), ("lib", int), ("reason", int), ("reason_text", bytes)], ) class _OpenSSLError: def __init__(self, code: int, lib: int, reason: int): self._code = code self._lib = lib self._reason = reason def _lib_reason_match(self, lib: int, reason: int) -> bool: return lib == self.lib and reason == self.reason @property def code(self) -> int: return self._code
def testfunc( a: float ) -> ty.NamedTuple("Output", [("fractional", float), ("integer", int)]): import math return math.modf(a)
import typing import unittest Horse = typing.NamedTuple("Horse", [('k', int), ('s', int)]) class Solution: def solve(self, d: int, n: int, horses) -> float: times = map(lambda horse: (d - horse.k) / horse.s, horses) speed = d / max(times) return speed class TestSolution(unittest.TestCase): def test_example_1(self): d = 2525 n = 1 horses = [Horse(2400, 5)] expected = 101 actual = Solution().solve(d, n, horses) self.assertAlmostEqual(expected, actual) def test_example_2(self): d = 300 n = 2 horses = [Horse(120, 60), Horse(60, 90)] expected = 100 actual = Solution().solve(d, n, horses) self.assertAlmostEqual(expected, actual) def test_example_3(self):
from apache_beam.transforms.external import NamedTupleBasedPayloadBuilder from apache_beam.typehints.schemas import typing_to_runner_api __all__ = [ 'WriteToJdbc', 'ReadFromJdbc', ] def default_io_expansion_service(): return BeamJarExpansionService( ':sdks:java:extensions:schemaio-expansion-service:shadowJar') JdbcConfigSchema = typing.NamedTuple( 'JdbcConfigSchema', [('location', str), ('config', bytes)], ) Config = typing.NamedTuple( 'Config', [ ('driver_class_name', str), ('jdbc_url', str), ('username', str), ('password', str), ('connection_properties', typing.Optional[str]), ('connection_init_sqls', typing.Optional[typing.List[str]]), ('write_statement', typing.Optional[str]), ('read_query', typing.Optional[str]), ('fetch_size', typing.Optional[int]), ('output_parallelization', typing.Optional[bool]),
import re import logging import os import itertools import subprocess import tempfile import typing # We recommend a maximum line length of 80, but do allow up to 100 characters # if deemed necessary by the developer. Lines that exceed that limit will # be wrapped after 80 characters automatically. LINE_LENGTH_THRESHOLD = 100 BREAK_BEFORE = 80 Line = typing.NamedTuple( "Line", [("sourcefile", str), ("number", int), ("text", str)] ) LineGenerator = typing.Generator[Line, None, None] FileLines = typing.NamedTuple( "FileLines", [("filename", str), ("lines", typing.Sequence[typing.Tuple[int, int]])], ) def get_git_added_lines() -> LineGenerator: proc = subprocess.run( ["git", "diff", "--cached", "--unified=0"], capture_output=True, text=True, ) proc.check_returncode()
) def test_is_dict_like(the_type: t.Any, expected_result: bool) -> None: actual_result = axion_types.is_dict_like(the_type) assert expected_result == actual_result @pytest.mark.parametrize( 'the_type,expected_result', ( (None, False), (type(None), True), (int, False), (te.Literal[None], False), (te.Literal[None, 204], False), (t.Dict[str, str], False), (t.NamedTuple('NT', x=int), False), ), ids=lambda x: repr(x), ) def test_is_none_type(the_type: t.Any, expected_result: bool) -> None: actual_result = axion_types.is_none_type(the_type) assert expected_result == actual_result @pytest.mark.parametrize( 'the_type,expected_types', ( ( te.Literal[1, 204], {int}, ),
"typ,coll_type,instance_of", [ (typing.Set[int], set, int), (typing.FrozenSet[int], frozenset, int), (typing.Dict[int, int], dict, int), (typing.KeysView[int], type({}.keys()), int), (typing.ValuesView[int], type({}.values()), int), (typing.List[int], list, int), (typing.Tuple[int], tuple, int), (typing.Tuple[int, ...], tuple, int), (typing.Iterator[int], typing.Iterator, int), (typing.Sequence[int], typing.Sequence, int), (typing.Iterable[int], typing.Iterable, int), (typing.Mapping[int, None], typing.Mapping, int), (typing.Container[int], typing.Container, int), (typing.NamedTuple("A_NamedTuple", (("elem", int),)), tuple, int), ], ) def test_specialised_collection_types(typ, coll_type, instance_of): @given(from_type(typ)) def inner(ex): if sys.version_info[:2] >= (3, 6): assume(ex) assert isinstance(ex, coll_type) assert all(isinstance(elem, instance_of) for elem in ex) try: inner() except (ResolutionFailed, AssertionError): if sys.version_info[:2] < (3, 6): pytest.skip("Hard-to-reproduce bug (early version of typing?)")
def my_task(a: int) -> typing.NamedTuple("OutputsBC", b=int, c=str): ctx = flytekit.current_context() assert ctx.execution_id == "ex:local:local:local" return a + 2, "hello world"
def t1(a: int) -> typing.NamedTuple("OutputsBC", t1_int_output=int, c=str): a = a + 2 return a, "world-" + str(a)
import typing from collections import namedtuple from urllib.parse import urlparse PodcastData = typing.NamedTuple( 'PodcastData', [ ('title', str), # title ('subtitle', str), # subtitle ('published', float), # time published # string of the audio url, or None if we couldn't find one ('audio_link', typing.Dict[str, str]), ]) # Podcast states UnmergedStatus = typing.NamedTuple('UnmergedStatus', []) RequestedStatus = typing.NamedTuple('RequestedStatus', []) CancelledStatus = typing.NamedTuple('CancelledStatus', []) NewStatus = typing.NamedTuple('NewStatus', []) StartedStatus = typing.NamedTuple('StartedStatus', []) FinishedStatus = typing.NamedTuple('FinishedStatus', []) DeletedStatus = typing.NamedTuple('DeletedStatus', []) Podcast = typing.NamedTuple( 'Podcast', [('data', PodcastData), ('status', typing.Union[UnmergedStatus, RequestedStatus, CancelledStatus, NewStatus, StartedStatus, FinishedStatus, DeletedStatus, ])])
from shutil import rmtree, which from subprocess import CalledProcessError, DEVNULL, PIPE, Popen from tempfile import mkdtemp from threading import Thread from time import sleep, time from telepresence.runner.background import (Background, BackgroundThread, BackgroundProcess, TrackedBG) from telepresence.runner.cache import Cache from telepresence.output import Output from telepresence.runner.span import Span from telepresence.utilities import str_command _CleanupItem = typing.NamedTuple("_CleanupItem", [ ("name", str), ("callable", typing.Callable), ("args", typing.Tuple), ("kwargs", typing.Dict[str, typing.Any]), ]) class Runner(object): """Context for running subprocesses.""" def __init__(self, output: Output, kubeinfo, verbose: bool) -> None: """ :param output: The Output instance for the session :param kubeinfo: How to run kubectl or equivalent :param verbose: Whether subcommand should run in verbose mode. """ self.output = output self.kubectl = kubeinfo self.verbose = verbose
import typing as t from asyncio import iscoroutinefunction from croniter import croniter from aiomisc import Service from aiomisc.cron import CronCallback log = logging.getLogger(__name__) ExceptionsType = t.Tuple[t.Type[Exception], ...] StoreItem = t.NamedTuple( "StoreItem", ( ("callback", CronCallback), ("spec", str), ("shield", bool), ("suppress_exceptions", ExceptionsType), ), ) class CronService(Service): def __init__(self, **kwargs: t.Any): super(CronService, self).__init__(**kwargs) self._callbacks_storage = set() # type: t.Set[StoreItem] def register( self, function: t.Callable, spec: str, shield: bool = False,
def get_properties(swagger_spec, schema): # type: (Spec, typing.Optional[typing.Mapping[typing.Text, typing.Any]]) -> typing.Optional[typing.Set[typing.Text]] if schema is None or determine_object_type(schema) != ObjectType.SCHEMA: return None required, not_required = get_collapsed_properties_type_mappings( definition=schema, deref=swagger_spec.deref) return set(chain(iterkeys(required), iterkeys(not_required))) StatusCodeSchema = typing.NamedTuple( 'StatusCodeSchema', ( ('status_code', typing.Text), ('mapping', EntityMapping[typing.Optional[typing.Mapping[typing.Text, typing.Any]]]), ), ) def iterate_on_responses_status_codes( old_operation, # type: typing.Mapping[typing.Text, typing.Any] new_operation, # type: typing.Mapping[typing.Text, typing.Any] ): # type: (...) -> typing.Generator[StatusCodeSchema, None, None] old_status_code_schema_mapping = old_operation.get('responses') or {} new_status_code_schema_mapping = new_operation.get('responses') or {} common_response_codes = set(
from apache_beam.runners.portability import job_server from apache_beam.runners.portability import portable_runner from apache_beam.runners.portability import portable_runner_test from apache_beam.testing.util import assert_that from apache_beam.testing.util import equal_to from apache_beam.transforms import userstate from apache_beam.transforms.sql import SqlTransform # Run as # # pytest flink_runner_test.py[::TestClass::test_case] \ # --test-pipeline-options="--environment_type=LOOPBACK" _LOGGER = logging.getLogger(__name__) Row = typing.NamedTuple("Row", [("col1", int), ("col2", unicode)]) beam.coders.registry.register_coder(Row, beam.coders.RowCoder) class FlinkRunnerTest(portable_runner_test.PortableRunnerTest): _use_grpc = True _use_subprocesses = True conf_dir = None expansion_port = None flink_job_server_jar = None def __init__(self, *args, **kwargs): super(FlinkRunnerTest, self).__init__(*args, **kwargs) self.environment_type = None self.environment_config = None
import asyncio import datetime import logging import typing from ib_async.errors import UnsupportedFeature, ApiException from ib_async.order import Order, Action, OrderType, TimeInForce, OrderOrigin from ib_async.instrument import Instrument, UnderlyingComponent from ib_async.messages import Outgoing from ib_async.protocol import ProtocolInterface, IncomingMessage, ProtocolVersion from ib_async.utils import wrap_immediate_future LOG = logging.getLogger(__name__) OrderEvent = typing.NamedTuple("OrderEvent", [('instrument', Instrument), ('size', float), ('average_cost', typing.Optional[float])]) def _dummy_handler__for_get_orders(PositionEvent): # This event handler is just used as a dummy to trigger the event subscribe/unsubscribe mechanisms pass class OrdersMixin(ProtocolInterface): def __init__(self): super().__init__() self.__orders = {} # type: typing.Dict[int, Order] self._next_order_id = 1 self.__submitted_future = {} self.__open_orders_future = None
def t1(a: int) -> typing.NamedTuple("OutputsBC", t1_int_output=int, c=str): return a + 2, "world"
ret = self.func(*pargs) if ret is None and self.returntype is None: return typ = mitmproxy.types.CommandTypes.get(self.returntype) if not typ.is_valid(self.manager, typ, ret): raise exceptions.CommandError( "%s returned unexpected data - expected %s" % (self.path, typ.display)) return ret ParseResult = typing.NamedTuple( "ParseResult", [ ("value", str), ("type", typing.Type), ("valid", bool), ], ) class CommandManager(mitmproxy.types._CommandBase): def __init__(self, master): self.master = master self.commands = {} # type: typing.Dict[str, Command] def collect_commands(self, addon): for i in dir(addon): if not i.startswith("__"): o = getattr(addon, i) if hasattr(o, "command_path"):
# limitations under the License. # """Package for SqlTransform and related classes.""" # pytype: skip-file import typing from apache_beam.transforms.external import BeamJarExpansionService from apache_beam.transforms.external import ExternalTransform from apache_beam.transforms.external import NamedTupleBasedPayloadBuilder __all__ = ['SqlTransform'] SqlTransformSchema = typing.NamedTuple('SqlTransformSchema', [('query', str), ('dialect', typing.Optional[str])]) class SqlTransform(ExternalTransform): """A transform that can translate a SQL query into PTransforms. Input PCollections must have a schema. Currently, there are two ways to define a schema for a PCollection: 1) Register a `typing.NamedTuple` type to use RowCoder, and specify it as the output type. For example:: Purchase = typing.NamedTuple('Purchase', [('item_name', unicode), ('price', float)]) coders.registry.register_coder(Purchase, coders.RowCoder)
def concat(parts): if len(parts) > 1: return pd.concat(parts) elif len(parts) == 1: return parts[0] else: return None def df_equal_to(expected): return lambda actual: check_correct(expected, concat(actual)) AnimalSpeed = typing.NamedTuple('AnimalSpeed', [('Animal', str), ('Speed', int)]) coders.registry.register_coder(AnimalSpeed, coders.RowCoder) Nested = typing.NamedTuple('Nested', [('id', int), ('animal_speed', AnimalSpeed)]) coders.registry.register_coder(Nested, coders.RowCoder) class TransformTest(unittest.TestCase): def run_scenario(self, input, func): expected = func(input) empty = input.iloc[0:0] input_placeholder = expressions.PlaceholderExpression(empty) input_deferred = frame_base.DeferredFrame.wrap(input_placeholder) actual_deferred = func(input_deferred)._expr.evaluate_at( expressions.Session({input_placeholder: input}))
# """Google Pub/Sub Lite sources and sinks. This API is currently under development and is subject to change. """ # pytype: skip-file import typing from apache_beam.transforms.external import BeamJarExpansionService from apache_beam.transforms.external import ExternalTransform from apache_beam.transforms.external import NamedTupleBasedPayloadBuilder _ReadSchema = typing.NamedTuple( '_ReadSchema', [('subscription_path', str), ('deduplicate', bool)]) def _default_io_expansion_service(): return BeamJarExpansionService( 'sdks:java:io:google-cloud-platform:expansion-service:shadowJar') class _ReadExternal(ExternalTransform): """ An external PTransform which reads from Pub/Sub Lite and returns a SequencedMessage as serialized bytes. This transform is not part of the public API. Experimental; no backwards-compatibility guarantees.
from algos.sma_algo import MovingAverageAlgo from utilities.graphing import * import settings import typing from dataclasses import dataclass from tqdm import tqdm import statistics import pprint ''' Declaring Global Scope Data Types (really just named tuples), but here in python, we don't really have a 'typedef' equivalent ''' signal_tuple = typing.NamedTuple('signal_2', [('action', str), ('signal_str', float), ('currency', str), ('price', float), ('quantity', float), ('liquidate', bool)]) generated_data = typing.NamedTuple('rdata', [('price_data', dict), ('equity_history', list), ('signal_data', dict), ('universe', list)]) class Account: equity = 0 cash = 0 trades = {'buys': 0, 'sells': 0} ''' equity = cash + market value of all active holdings
def test_diabetes(): # Since we are working with a specific dataset, we will create a strictly typed schema for the dataset. # If we wanted a generic data splitter we could use a Generic schema without any column type and name information # Example file: https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv # CSV Columns # 1. Number of times pregnant # 2. Plasma glucose concentration a 2 hours in an oral glucose tolerance test # 3. Diastolic blood pressure (mm Hg) # 4. Triceps skin fold thickness (mm) # 5. 2-Hour serum insulin (mu U/ml) # 6. Body mass index (weight in kg/(height in m)^2) # 7. Diabetes pedigree function # 8. Age (years) # 9. Class variable (0 or 1) # Example Row: 6,148,72,35,0,33.6,0.627,50,1 # the input dataset schema DATASET_COLUMNS = OrderedDict({ "#preg": int, "pgc_2h": int, "diastolic_bp": int, "tricep_skin_fold_mm": int, "serum_insulin_2h": int, "bmi": float, "diabetes_pedigree": float, "age": int, "class": int, }) # the first 8 columns are features FEATURE_COLUMNS = OrderedDict( {k: v for k, v in DATASET_COLUMNS.items() if k != "class"}) # the last column is the class CLASSES_COLUMNS = OrderedDict({"class": int}) MODELSER_JOBLIB = typing.TypeVar("joblib.dat") class XGBoostModelHyperparams(object): """ These are the xgboost hyper parameters available in scikit-learn library. """ def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, objective="binary:logistic", booster="gbtree", n_jobs=1, **kwargs): self.n_jobs = int(n_jobs) self.booster = booster self.objective = objective self.n_estimators = int(n_estimators) self.learning_rate = learning_rate self.max_depth = int(max_depth) def to_dict(self): return self.__dict__ @classmethod def from_dict(cls, d): return cls(**d) # load data # Example file: https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv @task(cache_version="1.0", cache=True, limits=Resources(mem="200Mi")) def split_traintest_dataset( dataset: FlyteFile[typing.TypeVar("csv")], seed: int, test_split_ratio: float ) -> ( FlyteSchema[FEATURE_COLUMNS], FlyteSchema[FEATURE_COLUMNS], FlyteSchema[CLASSES_COLUMNS], FlyteSchema[CLASSES_COLUMNS], ): """ Retrieves the training dataset from the given blob location and then splits it using the split ratio and returns the result This splitter is only for the dataset that has the format as specified in the example csv. The last column is assumed to be the class and all other columns 0-8 the features. The data is returned as a schema, which gets converted to a parquet file in the back. """ column_names = [k for k in DATASET_COLUMNS.keys()] df = pd.read_csv(dataset, names=column_names) # Select all features x = df[column_names[:8]] # Select only the classes y = df[[column_names[-1]]] # We will fake train test split. Just return the same dataset multiple times return x, x, y, y nt = typing.NamedTuple("Outputs", model=FlyteFile[MODELSER_JOBLIB]) @task(cache_version="1.0", cache=True, limits=Resources(mem="200Mi")) def fit(x: FlyteSchema[FEATURE_COLUMNS], y: FlyteSchema[CLASSES_COLUMNS], hyperparams: dict) -> nt: """ This function takes the given input features and their corresponding classes to train a XGBClassifier. NOTE: We have simplified the number of hyper parameters we take for demo purposes """ x_df = x.open().all() print(x_df) y_df = y.open().all() print(y_df) hp = XGBoostModelHyperparams.from_dict(hyperparams) print(hp) # fit model no training data # Faking fit fname = "model.joblib.dat" with open(fname, "w") as f: f.write("Some binary data") return nt(model=fname) @task(cache_version="1.0", cache=True, limits=Resources(mem="200Mi")) def predict( x: FlyteSchema[FEATURE_COLUMNS], model_ser: FlyteFile[MODELSER_JOBLIB] ) -> FlyteSchema[CLASSES_COLUMNS]: """ Given a any trained model, serialized using joblib (this method can be shared!) and features, this method returns predictions. """ # make predictions for test data x_df = x.open().all() print(x_df) col = [k for k in CLASSES_COLUMNS.keys()] y_pred_df = pd.DataFrame(data=[{ col[0]: [0, 1] }], columns=col, dtype="int64") y_pred_df.round(0) return y_pred_df @task(cache_version="1.0", cache=True, limits=Resources(mem="200Mi")) def score(predictions: FlyteSchema[CLASSES_COLUMNS], y: FlyteSchema[CLASSES_COLUMNS]) -> float: """ Compares the predictions with the actuals and returns the accuracy score. """ pred_df = predictions.open().all() print(pred_df) y_df = y.open().all() print(y_df) # evaluate predictions return 0.2 @workflow def diabetes_xgboost_model( dataset: FlyteFile[typing.TypeVar("csv")], # = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv", test_split_ratio: float = 0.33, seed: int = 7, ) -> typing.NamedTuple( "Outputs", model=FlyteFile[MODELSER_JOBLIB], accuracy=float): """ This pipeline trains an XGBoost mode for any given dataset that matches the schema as specified in https://github.com/jbrownlee/Datasets/blob/master/pima-indians-diabetes.names. """ x_train, x_test, y_train, y_test = split_traintest_dataset( dataset=dataset, seed=seed, test_split_ratio=test_split_ratio) model = fit(x=x_train, y=y_train, hyperparams=XGBoostModelHyperparams(max_depth=4).to_dict()) predictions = predict(x=x_test, model_ser=model.model) return model.model, score(predictions=predictions, y=y_test)
# Licensed Materials - Property of IBM # Copyright IBM Corp. 2016 import unittest import sys import typing import random import time from streamsx.topology.topology import Topology from streamsx.topology.tester import Tester from streamsx.topology.context import submit from streamsx.topology.schema import StreamSchema, CommonSchema, _normalize SensorReading = typing.NamedTuple('SensorReading', [('sensor_id', int), ('ts', int), ('reading', float)]) class P(object): pass class S(P): pass def s_none(): pass def s_int() -> typing.Iterable[int]:
# Adapted from https://gist.github.com/JeffPaine/3145490/revisions#diff-6c74585fc93fb54ed50f85e4166e2fb4 import getpass import json import typing from pathlib import Path import requests GitHubIdentity = typing.NewType('GitHubIdentity', str) Issue = typing.NamedTuple('Issue', ( ('id', int), ('url', str), ('assignees', typing.List[GitHubIdentity]), ('title', str), )) def get_credentials() -> typing.Tuple[str, str]: config_file = Path(__file__).parent.parent / '.config.json' data = {} # type: typing.Dict[str, str] username = None password = None if config_file.exists(): with config_file.open() as f: data = json.load(f) username = data['username'] password = data.get('password')
def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out1", float)]): return a + b
import utils import typing Point = typing.NamedTuple('Point', [('x', int), ('y', int)]) Slice = typing.NamedTuple('Slice', [('start', Point), ('end', Point)]) def get_pizza(lines: list): first_line = lines[0].split(' ') L = int(first_line[2]) H = int(first_line[3]) mapping = {'T': 0, 'M': 1} return [[mapping[i] for i in line[:-1]] for line in lines[1:]], L, H def size_of_slice(s: Slice): return ((s.end.x - s.start.x) + 1) * ((s.end.y - s.start.y) + 1) def score(slices: list): return sum(map(size_of_slice, slices)) def is_slice_valid(pizza: list, already_cut: list, s: Slice, L, H): if size_of_slice(s) > H: return False if not can_be_cut(already_cut, s): return False num_tomato, num_mushroom = 0, 0 for y in range(s.start.y, s.end.y + 1): for x in range(s.start.x, s.end.x + 1):
from yamlformat.validator import base_lib from yamlformat.validator import entity_type_lib from yamlformat.validator import entity_type_manager from yamlformat.validator import field_lib from yamlformat.validator import findings_lib from yamlformat.validator import namespace_validator from yamlformat.validator import parse_config_lib as parse from yamlformat.validator import state_lib from yamlformat.validator import subfield_lib from yamlformat.validator import unit_lib # Define namedtuple Config to store the different kinds of config files # All attributes should be tuples. # Each property is a tuple of base_lib.PathParts tuples Config = typing.NamedTuple('Config', [('fields', tuple), ('subfields', tuple), ('states', tuple), ('type_defs', tuple), ('units', tuple)]) class ConfigUniverse(findings_lib.Findings): """Helper class to represent the defined universe of ontology configuration. Contains all valid components of the ontology Args: entity_type_universe: config for entity types field_universe: config for fields subfield_universe: config for subfields state_universe: config for states unit_universe: config for units """
# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # """Test for Beam type compatibility library.""" from __future__ import absolute_import import typing import unittest from apache_beam.typehints import native_type_compatibility from apache_beam.typehints import typehints _TestNamedTuple = typing.NamedTuple('_TestNamedTuple', [('age', int), ('name', bytes)]) _TestFlatAlias = typing.Tuple[bytes, float] _TestNestedAlias = typing.List[_TestFlatAlias] class _TestClass(object): pass class NativeTypeCompatibilityTest(unittest.TestCase): def test_convert_to_beam_type(self): test_cases = [ ('raw bytes', bytes, bytes), ('raw int', int, int), ('raw float', float, float), ('any', typing.Any, typehints.Any),
import typing as t from abc import ABCMeta, abstractmethod from dataclasses import dataclass import mbae_resources from mbae_resources import resources _Tuple_mapping = t.Tuple[t.Tuple[str, str]] Boundaries = t.NamedTuple('boundaries', [('start', int), ('end', int)]) @dataclass class _Constants: """ Data class holding constant values required for data preparation. """ alignment_tool: str = 'mafft' alignment_command: t.Callable[[str, str], str] = ( lambda seq, msa, threads: f'mafft --add {seq} --keeplength --anysymbol --thread {threads} {msa}') alignment_profile_path: str = './mbae_resources/binding_regions.fsa' available_sources: t.Tuple = ('iedb', 'bdata') iedb_url: str = "https://www.iedb.org/downloader.php?file_name=doc/mhc_ligand_full_single_file.zip" bdata_url: str = "http://tools.iedb.org/static/main/binding_data_2013.zip" ipd_history_url: str = "https://raw.githubusercontent.com/ANHIG/IPDMHC/Latest/MHC.xml" ipd_sequences_url: str = "https://raw.githubusercontent.com/ANHIG/IPDMHC/Latest/MHC_prot.fasta" imgt_history_url: str = "https://raw.githubusercontent.com/ANHIG/IMGTHLA/Latest/Allelelist_history.txt" imgt_sequences_url: str = "https://raw.githubusercontent.com/ANHIG/IMGTHLA/Latest/hla_prot.fasta" peptide_length: Boundaries = Boundaries(6, 16) rare_threshold: int = 100 train_fraction: float = 0.8
import logging import typing import unittest import pytest import apache_beam as beam from apache_beam import coders from apache_beam.options.pipeline_options import StandardOptions from apache_beam.testing.test_pipeline import TestPipeline from apache_beam.testing.util import assert_that from apache_beam.testing.util import equal_to from apache_beam.transforms.sql import SqlTransform SimpleRow = typing.NamedTuple("SimpleRow", [("id", int), ("str", str), ("flt", float)]) coders.registry.register_coder(SimpleRow, coders.RowCoder) Enrich = typing.NamedTuple("Enrich", [("id", int), ("metadata", str)]) coders.registry.register_coder(Enrich, coders.RowCoder) Shopper = typing.NamedTuple("Shopper", [("shopper", str), ("cart", typing.Mapping[str, int])]) coders.registry.register_coder(Shopper, coders.RowCoder) @pytest.mark.xlang_sql_expansion_service @unittest.skipIf( TestPipeline().get_pipeline_options().view_as(StandardOptions).runner is None, "Must be run with a runner that supports staging java artifacts.") class SqlTransformTest(unittest.TestCase):