Esempio n. 1
0
import json
from collections import namedtuple
from dotmap import DotMap
import mysql.connector
from config import Config

Company = namedtuple('COMPANY', 'CompanyName Modified HiringStatus Comment')
JobDescription = namedtuple(
    'JOBDESCRIPTION',
    'JobTitle Salary JobResponsibilities TimeCommitment TypeOfContract '
    'DateUpload ContractStart ContractDuration Link CompanyName')

_sql = DotMap(Config.sql)


class _Global:
    pass


def make_cursor():
    db = mysql.connector.connect(host=_sql.server,
                                 user=_sql.username,
                                 passwd=_sql.password,
                                 database=_sql.database,
                                 ssl_ca='cert.pem')
    cursor = db.cursor()
    _Global.db = db
    _Global.cursor = cursor
    return cursor

Esempio n. 2
0
def question_generation(_input):
    metadata, output = _input
    args = DotMap()
    """
    parser = ArgumentParser()
    parser.add_argument("--model_type", type=str, default="gpt", help="gpt or gpt2")
    parser.add_argument("--model_checkpoint", type=str, default="", help="Path, url or short name of the model")
    parser.add_argument("--device", type=str, default="cuda" if torch.cuda.is_available() else "cpu",
                        help="Device (cuda or cpu)")
    parser.add_argument("--filename", type=str, default="data/instances_dev.pkl", help="File to use for decoding")
    parser.add_argument("--no_sample", action='store_true', help="Set to use greedy decoding instead of sampling")
    parser.add_argument("--max_length", type=int, default=50, help="Maximum length of the output utterances")
    parser.add_argument("--min_length", type=int, default=1, help="Minimum length of the output utterances")
    parser.add_argument("--seed", type=int, default=42, help="Seed")
    parser.add_argument("--temperature", type=int, default=0.7, help="Sampling softmax temperature")
    parser.add_argument("--top_k", type=int, default=0, help="Filter top-k tokens before sampling (<=0: no filtering)")
    parser.add_argument("--top_p", type=float, default=0.9,
                        help="Nucleus filtering (top-p) before sampling (<=0.0: no filtering)")

    # While using SQUASH in the pipeline mode, prefer using the --key flag
    parser.add_argument("--key", type=str, default=None,
                        help="Override the default settings if the key is set, used in pipeline mode")
    args = parser.parse_args()
    """
    """
    if args.key is not None:
        # Override some the filename and top_p default settings if args.key is set
        # This is done when the question generation module is being used in the SQUASH pipeline mode
        args.filename = "squash/temp/%s/input.pkl" % args.key

        with open("squash/temp/%s/metadata.json" % args.key, "r") as f:
            metadata = json.loads(f.read())
        args.top_p = metadata["settings"]["top_p"]
    args.filename = "squash/temp/%s/input.pkl" % args.key

    with open("squash/temp/%s/metadata.json" % args.key, "r") as f:
        metadata = json.loads(f.read())

    args.top_p = metadata["settings"]["top_p"]
    """
    setattr(args, "top_p", metadata["settings"]["top_p"])
    args.top_p = metadata["settings"]["top_p"]

    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger(__file__)
    logger.info(pformat(args))

    args.seed = 42
    random.seed(args.seed)
    torch.random.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)

    logger.info("Get pretrained model and tokenizer")

    # NEW BLOCK
    model_checkpoint = "question_generation/gpt2_corefs_question_generation"
    model_checkpoint = "/home/gpt2_corefs_question_generation"
    model_type = "gpt2"
    #model_checkpoint = "https://storage.cloud.google.com/ds-playground/squash/gpt2_qa.tar.gz"
    SAVED_MODEL_DIR = "gpt2_corefs_question_generation"
    dir_path = os.path.dirname(os.path.realpath(__file__))
    model_checkpoint = os.path.join(dir_path, SAVED_MODEL_DIR)
    model_checkpoint = "question_generation/gpt2_corefs_question_generation"

    tokenizer = GPT2Tokenizer.from_pretrained(model_checkpoint)
    model = GPT2LMHeadModel.from_pretrained(model_checkpoint)
    """ OLD BLOCK
    if args.model_type == 'gpt2':
        tokenizer = GPT2Tokenizer.from_pretrained(args.model_checkpoint)
        model = GPT2LMHeadModel.from_pretrained(args.model_checkpoint)
    else:
        tokenizer = OpenAIGPTTokenizer.from_pretrained(args.model_checkpoint)
        model = OpenAIGPTLMHeadModel.from_pretrained(args.model_checkpoint)
    """

    output_config_file = "/content/squash-generation/question_generation/gpt2_corefs_question_generation/config.json"
    output_model_file = "/content/squash-generation/question_generation/gpt2_corefs_question_generation/pytorch_model.bin"
    output_vocab_file = "/content/squash-generation/question_generation/gpt2_corefs_question_generation/vocab.json"
    merges_file = "/content/squash-generation/question_generation/gpt2_corefs_question_generation/merges.txt"

    output_config_file = SparkFiles.get("config.json")
    output_model_file = SparkFiles.get("pytorch_model.bin")
    output_vocab_file = SparkFiles.get("vocab.json")
    merges_file = SparkFiles.get("merges.txt")

    config = GPT2Config.from_json_file(output_config_file)
    model = GPT2LMHeadModel(config)
    state_dict = torch.load(output_model_file,
                            map_location=torch.device('cpu'))
    model.load_state_dict(state_dict)
    tokenizer = GPT2Tokenizer(output_vocab_file, merges_file=merges_file)
    model.to("cpu")
    model.eval()
    args.device = "cpu"

    args.device = "cpu"
    model.to(args.device)
    model.eval()

    return {"break": "point"}
    #data = get_positional_dataset_from_file(tokenizer, args.filename)
    data = get_positional_dataset_from_file(tokenizer, output)
    final_output_dict = {"version": "squash-2.0", "data": [{"paragraphs": []}]}
    question_number = 0

    para_cache = {"index": None, "hidden_states": None}

    for inst in tqdm.tqdm(data):
        with torch.no_grad():
            para_index = inst["para_index"]
            # Questions from the same paragraph all appear together
            # We can re-use the paragraph hidden representations for different questions in the same paragraph
            if para_index != para_cache["index"]:
                # Since we have moved to a new paragraph, generate its cache
                para_cache["hidden_states"] = None
                # Ignore the answer and question while building the input
                instance, _ = build_para_only_input_from_segments(
                    inst, tokenizer)
                input_ids = torch.tensor(instance['input_ids'],
                                         device=args.device).unsqueeze(0)
                token_type_ids = torch.tensor(instance['token_type_ids'],
                                              device=args.device).unsqueeze(0)

                # Run a forward pass to generate the para caches
                _, para_cache["hidden_states"] = model(
                    input_ids, token_type_ids=token_type_ids)

            # Sample a question using the paragraph cache
            output = sample_sequence(inst, tokenizer, model, args, para_cache)

        original_paragraph = tokenizer.decode(output['paragraph'])
        generated_question = tokenizer.decode(output['question'],
                                              skip_special_tokens=True)
        original_answer = tokenizer.decode(output['answer'],
                                           skip_special_tokens=True)
        para_index = inst['para_index']
        para_cache["index"] = inst['para_index']

        # verify whether the answer position is correct, since this will be utilized for filtering
        original_ans_position = output["answer_position"]
        if original_paragraph[
                output["answer_position"]:output["answer_position"] +
                len(original_answer)] != original_answer:
            # This should never be executed, only used as a last resort
            logger.info("Answer mismatch!")
            original_ans_position = original_paragraph.index(original_answer)

        # Output in a SQUAD-like format with questions clumped together under their parent paragraph
        if len(final_output_dict["data"][0]["paragraphs"]) > para_index:
            # verify whether the paragraph text is identical
            assert original_paragraph == final_output_dict["data"][0][
                "paragraphs"][para_index]['context']
            # append the question answer pair
            final_output_dict["data"][0]["paragraphs"][para_index][
                'qas'].append({
                    'id':
                    'question_%d' % question_number,
                    'question':
                    generated_question,
                    'answers': [{
                        'text': original_answer,
                        'answer_start': original_ans_position,
                    }],
                    'class':
                    output['class'],
                    'algorithm':
                    output['algorithm'],
                    'is_impossible':
                    False
                })
        else:
            # add a new question to the list of QA pairs
            final_output_dict['data'][0]['paragraphs'].append({
                'context':
                original_paragraph,
                'qas': [{
                    'id':
                    'question_%d' % question_number,
                    'question':
                    generated_question,
                    'answers': [{
                        'text': original_answer,
                        'answer_start': original_ans_position,
                    }],
                    'class':
                    output['class'],
                    'algorithm':
                    output['algorithm'],
                    'is_impossible':
                    False
                }]
            })

        question_number += 1

    #with open("squash/temp/%s/generated_questions.json" % args.key, "w") as f:
    #    f.write(json.dumps(final_output_dict))

    return final_output_dict
Esempio n. 3
0
    def _validate(
        self,
        input,
        seq_lens,
        span_token_ids,
        target,
        target_lens,
        decode_strategy,
        definition=None,
        definition_lens=None,
        sentence_mask=None,
    ):
        batch_size, tgt_len = target.shape

        # (batch_size,seq_len,hidden_size), (batch_size,hidden_size), (num_layers,batch_size,seq_len,hidden_size)
        last_hidden_layer, pooled_representation, all_hidden_layers = self.encoder(
            input, attention_mask=sequence_mask(seq_lens), token_type_ids=sentence_mask
        )

        KLD = None
        mu_prime = None
        if self.aggregator == "cls":
            cls_hidden = last_hidden_layer[:, 0]
            cls_hidden_forwarded = self.cls_feed_forward(cls_hidden)
            span_representation = cls_hidden_forwarded
            hidden_states = last_hidden_layer
        else:
            span_ids = self._id_extractor(
                tokens=span_token_ids, batch=input, lens=seq_lens
            )

            span_representation, hidden_states = self._span_aggregator(
                all_hidden_layers if self.scalar_mix is not None else last_hidden_layer,
                sequence_mask(seq_lens),
                span_ids,
            )

            span_representation = self.context_feed_forward(span_representation)
        if self.variational:
            (
                definition_last_hidden_layer,
                _,
                definition_all_hidden_layers,
            ) = self.definition_encoder(
                definition, attention_mask=sequence_mask(definition_lens)
            )
            definition_representation = self.definition_feed_forward(
                definition_last_hidden_layer[:, 0]
            )

            post_project = self.w_z_post(
                torch.cat([span_representation, definition_representation], -1)
            )
            prior_project = self.w_z_prior(span_representation)

            mu = self.mean_layer(post_project)
            logvar = self.logvar_layer(post_project)

            mu_prime = self.mean_prime_layer(prior_project)
            logvar_prime = self.logvar_prime_layer(prior_project)

            hidden_states = last_hidden_layer

            KLD = (
                kl_divergence(
                    Normal(mu, torch.exp(logvar * 0.5)),
                    Normal(mu_prime, torch.exp(logvar_prime * 0.5)),
                )
                .sum(1)
                .mean()
            )

            span_representation = self.z_project(mu_prime)
        memory_bank = hidden_states if self.decoder.attention else None
        _, logits = self.decoder(
            target, target_lens, span_representation, memory_bank, seq_lens,
        )

        loss = F.cross_entropy(
            logits.view(batch_size * (tgt_len - 1), -1),
            target[:, 1:].contiguous().view(-1),
            ignore_index=self.embeddings.tgt.padding_idx,
        )

        ppl = loss.exp()
        beam_results = self._strategic_decode(
            target,
            target_lens,
            decode_strategy,
            memory_bank,
            seq_lens,
            span_representation,
        )
        return DotMap(
            {
                "predictions": beam_results["predictions"],
                "logits": logits.view(batch_size * (tgt_len - 1), -1),
                "loss": loss,
                "perplexity": ppl,
                "kl": KLD,
            }
        )
    def process_search_by_coordinates(cls, x, y, pictures=False):
        """
        Searches by coordinate from XML and processes the result.
        :param x: longitude
        :param y: latitude
        :param pictures: True if we want house plan pictures to be scrapped
        :return: List of CadasterEntry objects
        """

        results = []

        xml_dict_map = ScrapperXML.scrap_coord(x, y)
        pc1 = None
        pc2 = None
        if xml_dict_map.consulta_coordenadas.coordenadas.coord.pc != DotMap():
            pc1 = xml_dict_map.consulta_coordenadas.coordenadas.coord.pc.pc1
            if pc1 == DotMap():
                pc1 = None

            pc2 = xml_dict_map.consulta_coordenadas.coordenadas.coord.pc.pc2
            if pc2 == DotMap():
                pc2 = None

        if pc1 is not None and pc2 is not None:

            entry = ScrapperXML.get_cadaster_entries_by_cadaster(
                '', '', ''.join([pc1, pc2]))
            picture = None
            if entry.consulta_dnp.bico.bi.dt.loine != DotMap():
                prov_num = entry.consulta_dnp.bico.bi.dt.loine.cp
                city_num = entry.consulta_dnp.bico.bi.dt.cmc

                # Parcela
                if pictures and prov_num != DotMap() and city_num != DotMap():
                    picture = Scrapper.scrap_site_picture(
                        prov_num, city_num, ''.join([pc1, pc2]))
                cadaster_entry = CadasterEntryXML(entry, x, y, picture)
                cadaster_entry.to_elasticsearch()
                results.append(cadaster_entry)
            elif entry.consulta_dnp.lrcdnp.rcdnp != DotMap():
                # Multiparcela
                parcelas = entry.consulta_dnp.lrcdnp.rcdnp
                if not isinstance(parcelas, list):
                    parcelas = [parcelas]

                for parcela in parcelas:

                    prov_num = parcela.dt.loine.cp
                    city_num = parcela.dt.cmc

                    cadaster = parcela.rc.pc1 if parcela.rc.pc1 != DotMap(
                    ) else ''
                    cadaster += parcela.rc.pc2 if parcela.rc.pc2 != DotMap(
                    ) else ''
                    cadaster += parcela.rc.car if parcela.rc.car != DotMap(
                    ) else ''
                    cadaster += parcela.rc.cc1 if parcela.rc.cc1 != DotMap(
                    ) else ''
                    cadaster += parcela.rc.cc2 if parcela.rc.cc2 != DotMap(
                    ) else ''

                    if pictures and prov_num != DotMap(
                    ) and city_num != DotMap():
                        picture = Scrapper.scrap_site_picture(
                            prov_num, city_num, cadaster)

                    try:
                        # Try to get info by complete cadaster num
                        sub_entry = ScrapperXML.get_cadaster_entries_by_cadaster(
                            '', '', cadaster)
                    except:
                        # Cadastro did not return anything by cadaster entry (error? bug?)
                        # Try to get it by complete address
                        prov_name = parcela.dt.np
                        if prov_name is DotMap():
                            continue
                        city_name = parcela.dt.np
                        if city_name is DotMap():
                            continue
                        tv = parcela.ldt.locs.lous.lourb.dir.tv
                        if tv is DotMap():
                            tv = ''
                        nv = parcela.ldt.locs.lous.lourb.dir.nv
                        if nv is DotMap():
                            nv = ''
                        num = parcela.ldt.locs.lous.lourb.dir.pnp
                        if num is DotMap():
                            num = ''

                        loint = parcela.dt.locs.lous.lourb.loint
                        if loint is DotMap():
                            continue
                        bl = loint.bl
                        if bl == DotMap():
                            bl = ''
                        es = loint.es
                        if es == DotMap():
                            es = ''
                        pt = loint.pt
                        if es == DotMap():
                            pt = ''
                        pu = loint.pu
                        if es == DotMap():
                            pu = ''
                        sub_entry = ScrapperXML.get_cadaster_entries_by_address(
                            prov_name, city_name, tv, nv, num, bl, es, pt, pu)

                    cadaster_entry = CadasterEntryXML(sub_entry, x, y, picture)
                    cadaster_entry.to_elasticsearch()

                    results.append(cadaster_entry)
        return results
Esempio n. 5
0
def nested_mapping_to_line(nested_mapping, keys):
    # type: (typing.Mapping, typing.Sequence[typing.Text]) -> typing.Text
    dotted = DotMap(nested_mapping)

    return ','.join([str(recursive_getattr(dotted, key)) for key in keys])
Esempio n. 6
0
sett_config = DotMap(
    native=DotMap(
        badger=DotMap(
            strategyName="StrategyBadgerRewards",
            params=DotMap(
                # want = Badger token
                # geyser = Special Geyser
                performanceFeeStrategist=0,
                performanceFeeGovernance=0,
                withdrawalFee=0,
            ),
        ),
        uniBadgerWbtc=DotMap(
            strategyName="StrategyBadgerLpMetaFarm",
            params=DotMap(
                # Note: Will not be able to be deployed until the LP token is created
                # want = Uni Badger<>Wbtc LP
                performanceFeeStrategist=0,
                performanceFeeGovernance=0,
                withdrawalFee=0,
            ),
        ),
        sbtcCrv=DotMap(
            strategyName="StrategyCurveGauge",
            params=DotMap(
                want=pools.sbtcCrv.token,
                gauge=pools.sbtcCrv.gauge,
                swap=pools.sbtcCrv.swap,
                minter=curve.minter,
                lpComponent=registry.tokens.wbtc,
                performanceFeeStrategist=1000,
                performanceFeeGovernance=1000,
                withdrawalFee=75,
                keepCRV=0,
            ),
        ),
        renCrv=DotMap(
            strategyName="StrategyCurveGauge",
            params=DotMap(
                want=pools.renCrv.token,
                gauge=pools.renCrv.gauge,
                swap=pools.renCrv.swap,
                minter=curve.minter,
                lpComponent=registry.tokens.wbtc,
                performanceFeeStrategist=1000,
                performanceFeeGovernance=1000,
                withdrawalFee=75,
                keepCRV=0,
            ),
        ),
        tbtcCrv=DotMap(
            strategyName="StrategyCurveGauge",
            params=DotMap(
                want=pools.tbtcCrv.token,
                gauge=pools.tbtcCrv.gauge,
                swap=pools.tbtcCrv.swap,
                minter=curve.minter,
                lpComponent=registry.tokens.wbtc,
                performanceFeeStrategist=1000,
                performanceFeeGovernance=1000,
                withdrawalFee=75,
                keepCRV=0,
            ),
        ),
    ),
    pickle=DotMap(renCrv=DotMap(
        strategyName="StrategyPickleMetaFarm",
        params=DotMap(
            want=pools.renCrv.token,
            pickleJar=pickle.jars.renCrv,
            curveSwap=registry.curve.pool.renCrv.swap,
            lpComponent=registry.tokens.wbtc,
            pid=pickle.pids.pRenCrv,
            performanceFeeStrategist=1000,
            performanceFeeGovernance=1000,
            withdrawalFee=75,
        ),
    ), ),
    harvest=DotMap(
        renCrv=DotMap(
            # Unfinished
            strategyName="StrategyHarvestMetaFarm",
            params=DotMap(
                want=pools.renCrv.token,
                harvestVault=registry.harvest.vaults.renCrv,
                vaultFarm=registry.harvest.farms.fRenCrv,
                metaFarm=registry.harvest.farms.farm,
                badgerTree=registry.harvest.badgerTree,
                performanceFeeStrategist=1000,
                performanceFeeGovernance=1000,
                withdrawalFee=75,
            ),
        ), ),
    uni=DotMap(
        uniDiggWbtc=DotMap(
            # Unfinished
            strategyName="StrategyDiggLpMetaFarm",
            params=DotMap(
                performanceFeeStrategist=0,
                performanceFeeGovernance=0,
                withdrawalFee=0,
            ),
        ), ),
    sushi=DotMap(
        sushiBadgerWBtc=DotMap(
            # Unfinished
            strategyName="StrategySushiBadgerWbtc",
            params=DotMap(
                # want=pools.renCrv.token,
                performanceFeeStrategist=1000,
                performanceFeeGovernance=1000,
                withdrawalFee=0,
            ),
        ),
        sushiWethWBtc=DotMap(
            # Unfinished
            strategyName="StrategySushiBadgerWbtc",
            params=DotMap(
                # want=pools.renCrv.token,
                performanceFeeStrategist=1000,
                performanceFeeGovernance=1000,
                withdrawalFee=50,
            ),
        ),
        sushiDiggWBtc=DotMap(
            # Unfinished
            strategyName="StrategySushiDiggWbtcLpOptimizer",
            params=DotMap(
                performanceFeeStrategist=1000,
                performanceFeeGovernance=1000,
                withdrawalFee=0,
            ),
        ),
    ),
)
    def process_search_by_provinces(cls,
                                    prov_list,
                                    pictures=False,
                                    start_from='',
                                    matches=None):
        """
            Searches by province from XML and processes the result.
            :param prov_list: List of province names
            :param start_from: Name of the city of the first province to start from
            :param pictures: True if we want house plan pictures to be scrapped
            :param matches: Max number of matches (for debugging purporses mainly)
            :return: List of CadasterEntry objects
        """
        times = 0
        results = []

        for prov_name, prov_num, city_name, city_num, address, tv, nv in Scrapper.get_address_iter(
                prov_list, start_from):
            if tv == DotMap() or nv == DotMap():
                continue

            if ElasticSearchUtils.check_if_address_present(
                    "{} {}".format(tv, nv), city_name, prov_name):
                logger.debug(
                    "Skipping {} {} {} {} because it's been already scrapped.".
                    format(tv, nv, prov_name, city_name))
                continue

            num_scrapping_fails = 10
            counter = 1
            while num_scrapping_fails > 0:
                try:
                    cadaster = ScrapperXML.get_cadaster_by_address(
                        prov_name, city_name, tv, nv, counter)
                    res = cls.parse_xml_by_address(cadaster, prov_name,
                                                   city_name, tv, nv, counter,
                                                   pictures)
                    if len(res) < 1:
                        num_scrapping_fails -= 1
                    else:
                        num_scrapping_fails = 10
                        times += 1
                        results.append(res)
                        if matches is not None and times >= matches:
                            return ListUtils.flat(results)

                except urllib.error.HTTPError as e:
                    logger.error("ERROR AT ADDRESS {} {} {} {} {}".format(
                        tv, nv, counter, prov_name, city_name))
                    logger.error(
                        "=============================================")
                    logger.error(e, exc_info=True)
                    logger.error("...sleeping due to connection reset...")
                    logger.debug("...sleeping due to connection reset...")
                    logger.error(
                        "=============================================")
                    ''' Could be a service Unavailable or denegation of service'''
                    num_scrapping_fails -= 1

                except Exception as e:
                    logger.error("ERROR AT ADDRESS {} {} {} {} {}".format(
                        tv, nv, counter, prov_name, city_name))
                    logger.error(
                        "=============================================")
                    logger.error(e, exc_info=True)
                    logger.error(
                        "=============================================")
                    num_scrapping_fails -= 1

                counter += 1

        return results
Esempio n. 8
0
def test_space():
    # small search space to see if code works
    full_space = DotMap()
    full_space.nP = [100, 200]  # number of requests per sim time
    return full_space
import time
import os

from . import sl_constants
from . import sl_helpers
from . import sl_console
from pathlib import Path
from dotmap import DotMap
from netaddr import IPAddress
from pandas.io.json import json_normalize

__author__ = "Richard Gold"
CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'])
PROFILE_FILE = os.path.join(str(Path.home()), '.shadowline', 'profile')

settings = DotMap()


@click.group(context_settings=CONTEXT_SETTINGS)
@click.option('--profile',
              default='DEFAULT',
              help="Name of profile to use. 'DEFAULT' if not specified.")
@click.pass_context
def main(ctx, profile):
    """
    ShadowLine: A command-line API for Digital Shadows SearchLight
    """
    if ctx.invoked_subcommand == 'setup_profile':
        # create_profile method can be called without existing credentials
        pass
    else:
Esempio n. 10
0
with open("dependency-artifacts/wbtc/wbtc.json") as f:
    wbtc = json.load(f)

artifacts = DotMap(
    aragon=DotMap(
        Agent=Agent,
        CompanyTemplate=CompanyTemplate,
        Vault=Vault,
        Voting=Voting,
        Finance=Finance,
        TokenManager=TokenManager,
        MiniMeToken=MiniMeToken,
    ),
    gnosis_safe=DotMap(MasterCopy=MasterCopy,
                       ProxyFactory=ProxyFactory,
                       GnosisSafe=GnosisSafe),
    open_zeppelin=DotMap(
        ProxyAdmin=ProxyAdmin,
        AdminUpgradeabilityProxy=AdminUpgradeabilityProxy,
        TokenTimelock=TokenTimelock,
    ),
    uniswap=DotMap(
        UniswapV2Factory=UniswapV2Factory,
        UniswapV2Router=UniswapV2Router,
        UniswapV2Pair=UniswapV2Pair,
    ),
    multicall=DotMap(multicall={"abi": Multicall}),
    wbtc=DotMap(wbtc={"abi": wbtc}),
)
Esempio n. 11
0
import logging

# start = time.time()

logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

# To log file
fh = logging.FileHandler('example.log')
fh.setLevel(logging.DEBUG)
logger.addHandler(fh)

task = Quadratic()
stopCriteria = opto.opto.classes.StopCriteria(maxEvals=10000)

p = DotMap()
p.verbosity = 1
opt = opto.RandomSearch(parameters=p, task=task, stopCriteria=stopCriteria)
opt.optimize()
logs = opt.get_logs()
print('Number evaluations: %d' % logs.get_n_evals())
print('Optimization completed in %f[s]' % (logs.get_final_time()))
# print(logs.get_parameters())

# Parameters
plt.ioff()
# plt.figure()
logs.plot_parameters()

#
plt.figure()
Esempio n. 12
0
    def train_dataloader(self):
        train_loader = DataLoader(self.train_dataset,
                                  batch_size=self.arg.batch_size,
                                  shuffle=True)
        return train_loader

    def val_dataloader(self):
        test_loader = DataLoader(self.test_dataset,
                                 batch_size=self.arg.batch_size,
                                 shuffle=False)
        return test_loader


def main(arg):
    seed_everything(42)
    model = PLModel(arg)
    wandb_logger = WandbLogger(project="Bachelorarbeit", name=arg.name)
    wandb_logger.watch(model)
    wandb_logger.log_hyperparams(arg)
    trainer = Trainer(gpus=2,
                      logger=wandb_logger,
                      distributed_backend='ddp',
                      deterministic=True,
                      auto_select_gpus=True,
                      num_sanity_val_steps=0)
    trainer.fit(model)


if __name__ == '__main__':
    arg = DotMap(vars(parse_args()))
    main(arg)
Esempio n. 13
0
    yamls = yaml.load(crfile)
    crfile.close()
    yamls['data']['azure_resource_prefix'] = base64.b64encode(
        bytes(resource_group, 'utf-8')).decode('utf-8')
    yamls['data']['azure_resourcegroup'] = base64.b64encode(
        bytes(resource_group, 'utf-8')).decode('utf-8')
    with open('./gw/openshift/99_cloud-creds-secret.yaml', 'w') as crout:
        yaml.dump(yamls, crout, default_flow_style=False)
        crout.close()

with open('./gw/manifests/cloud-provider-config.yaml') as file:
    yamlx = yaml.load(file)
    file.close()
    jsondata = yamlx['data']['config']
    jsonx = json.loads(jsondata, object_pairs_hook=OrderedDict)
    config = DotMap(jsonx)
    config.resourceGroup = resource_group
    config.vnetName = "openshiftVnet"
    config.vnetResourceGroup = resource_group
    config.subnetName = "masterSubnet"
    config.securityGroupName = "master1nsg"
    config.routeTableName = ""
    config.azure_resourcegroup = resource_group
    jsondata = json.dumps(dict(**config.toDict()), indent='\t')
    jsonstr = str(jsondata)
    yamlx['data']['config'] = jsonstr + '\n'
    yamlx['metadata']['creationTimestamp'] = None
    yamlstr = yaml.dump(yamlx, default_style='\"', width=4096)
    yamlstr = yamlstr.replace('!!null "null"', 'null')
    with open('./gw/manifests/cloud-provider-config.yaml', 'w') as outfile:
        outfile.write(yamlstr)
Esempio n. 14
0
from dotmap import DotMap

config = dict()

app = DotMap(config)  # app globals


def reset():
    app.clear()
    app.update(config)
Esempio n. 15
0
    def __init__(self, settings):
        self.channels = DotMap()
        self.channels.rgb = [0, 3]
        self.channels.pedestrian_trajectory = 3
        self.channels.cars_trajectory = 4
        self.channels.pedestrians = 5
        self.channels.cars = 6
        self.channels.semantic = 7

        self.semantic_channels = DotMap()
        self.labels_indx = {}
        # Local sem mapping
        if settings.new_carla_net:
            self.semantic_channels.building = 0
            self.semantic_channels.fence = 1
            self.semantic_channels.static = 2
            self.semantic_channels.vegetation = 3
            self.semantic_channels.traffic_sign = 4
            self.semantic_channels.traffic_light = 5
            self.semantic_channels.road = 6
            self.semantic_channels.terrain = 7
            self.semantic_channels.crosswalk = 8
            self.semantic_channels.sidewalk = 9
            self.semantic_channels.sidewalk_extra = 10

            #
            # Buildings and large structures
            self.labels_indx[cityscapes_labels_dict[
                'building']] = self.semantic_channels.building
            self.labels_indx[cityscapes_labels_dict[
                'bridge']] = self.semantic_channels.building
            self.labels_indx[cityscapes_labels_dict[
                'tunnel']] = self.semantic_channels.building

            # Fences and guard and individual walls
            self.labels_indx[
                cityscapes_labels_dict['fence']] = self.semantic_channels.fence
            self.labels_indx[cityscapes_labels_dict[
                'guard rail']] = self.semantic_channels.fence
            self.labels_indx[
                cityscapes_labels_dict['wall']] = self.semantic_channels.fence

            # All non-moving obstacles
            self.labels_indx[cityscapes_labels_dict[
                'static']] = self.semantic_channels.static
            self.labels_indx[cityscapes_labels_dict[
                'dynamic']] = self.semantic_channels.static
            self.labels_indx[
                cityscapes_labels_dict['pole']] = self.semantic_channels.static
            self.labels_indx[cityscapes_labels_dict[
                'polegroup']] = self.semantic_channels.static

            # Vegetation
            self.labels_indx[cityscapes_labels_dict[
                'vegetation']] = self.semantic_channels.vegetation

            # Traffic sign
            self.labels_indx[cityscapes_labels_dict[
                'traffic sign']] = self.semantic_channels.traffic_sign

            # Traffic light
            self.labels_indx[cityscapes_labels_dict[
                'traffic light']] = self.semantic_channels.traffic_light

            # Road, areas where car drives
            self.labels_indx[
                cityscapes_labels_dict['ground']] = self.semantic_channels.road
            self.labels_indx[
                cityscapes_labels_dict['road']] = self.semantic_channels.road
            self.labels_indx[cityscapes_labels_dict[
                'parking']] = self.semantic_channels.road

            # Terrain & rail track
            self.labels_indx[cityscapes_labels_dict[
                'terrain']] = self.semantic_channels.terrain
            self.labels_indx[cityscapes_labels_dict[
                'rail track']] = self.semantic_channels.terrain

            # Crosswalk
            self.labels_indx[cityscapes_labels_dict[
                'crosswalk']] = self.semantic_channels.crosswalk

            # Sidewalk
            self.labels_indx[cityscapes_labels_dict[
                'sidewalk']] = self.semantic_channels.sidewalk

            # Sidewalk extra
            self.labels_indx[cityscapes_labels_dict[
                'sidewalk_extra']] = self.semantic_channels.sidewalk_extra

        else:
            self.semantic_channels.building = 0
            self.semantic_channels.fence = 1
            self.semantic_channels.static = 2
            self.semantic_channels.pole = 3
            self.semantic_channels.road = 4
            self.semantic_channels.sidewalk = 5
            self.semantic_channels.vegetation = 6
            self.semantic_channels.wall = 7
            self.semantic_channels.traffic_sign = 8

            self.labels_indx[cityscapes_labels_dict[
                'building']] = self.semantic_channels.building
            self.labels_indx[
                cityscapes_labels_dict['fence']] = self.semantic_channels.fence
            self.labels_indx[cityscapes_labels_dict[
                'static']] = self.semantic_channels.static
            self.labels_indx[cityscapes_labels_dict[
                'bridge']] = self.semantic_channels.static
            self.labels_indx[cityscapes_labels_dict[
                'tunnel']] = self.semantic_channels.static
            self.labels_indx[cityscapes_labels_dict[
                'dynamic']] = self.semantic_channels.static
            self.labels_indx[cityscapes_labels_dict[
                'guard rail']] = self.semantic_channels.static
            self.labels_indx[
                cityscapes_labels_dict['pole']] = self.semantic_channels.pole
            self.labels_indx[cityscapes_labels_dict[
                'polegroup']] = self.semantic_channels.pole
            self.labels_indx[
                cityscapes_labels_dict['ground']] = self.semantic_channels.road
            self.labels_indx[
                cityscapes_labels_dict['road']] = self.semantic_channels.road
            self.labels_indx[cityscapes_labels_dict[
                'parking']] = self.semantic_channels.road
            self.labels_indx[cityscapes_labels_dict[
                'rail track']] = self.semantic_channels.road
            self.labels_indx[cityscapes_labels_dict[
                'sidewalk']] = self.semantic_channels.sidewalk
            self.labels_indx[cityscapes_labels_dict[
                'vegetation']] = self.semantic_channels.vegetation
            self.labels_indx[cityscapes_labels_dict[
                'terrain']] = self.semantic_channels.vegetation
            self.labels_indx[
                cityscapes_labels_dict['wall']] = self.semantic_channels.wall
            self.labels_indx[cityscapes_labels_dict[
                'traffic light']] = self.semantic_channels.traffic_sign
            self.labels_indx[cityscapes_labels_dict[
                'traffic sign']] = self.semantic_channels.traffic_sign

        self.carla = settings.carla

        self.channels = DotMap()
        self.channels.rgb = [0, 3]
        self.channels.pedestrian_trajectory = 3
        self.channels.cars_trajectory = 4
        self.channels.pedestrians = 5
        self.channels.cars = 6
        self.channels.semantic = 7
        self.set_nbr_channels()
        super(Seg_2d_min, self).__init__(settings)
            intersection_polygon = intersection_polygon.buffer(
                intersection_buffer + 0.5)
            intersection_safe = not Point(
                turning_x, turning_y).within(intersection_polygon)

            write_data = DotMap()
            write_data.turning.theta = turning_th
            write_data.turning.x = turning_x
            write_data.turning.y = turning_y
            write_data.braking_info.safe = intersection_safe
            write_data.braking_info.dist = ego_y - destination

            pickle.dump(write_data, open(par_dir + '/data_ego.pickle', 'wb'))

        return


PORT = 8888
BUFSIZE = 4096
N_SIM_STEPS = 300
supervisor = Supervisor()
simulation_data = DotMap()
simulation_data.port = PORT
simulation_data.bufsize = BUFSIZE
simulation_data.task = scenic_intersection(N_SIM_STEPS=N_SIM_STEPS,
                                           supervisor=supervisor)
client_task = ClientWebots(simulation_data)
if not client_task.run_client():
    print("End of accident scenario generation")
    supervisor.simulationQuit(True)
Esempio n. 17
0
    return digg_shares(value)


def as_wei(value):
    return value


def as_original(value):
    return value


erc20 = DotMap(
    balanceOf="balanceOf(address)(uint256)",
    totalSupply="totalSupply()(uint256)",
    transfer="transfer(address,uint256)()",
    safeTransfer="safeTransfer(address,uint256)()",
    name="name()(string)",
    symbol="symbol()(string)",
    decimals="decimals()(uint256)",
)
sett = DotMap(
    getPricePerFullShare="getPricePerFullShare()(uint256)",
    available="available()(uint256)",
    balance="balance()(uint256)",
    controller="controller()(address)",
    governance="governance()(address)",
    strategist="strategist()(address)",
    keeper="keeper()(address)",
    shares="shares()(uint256)",
)
strategy = DotMap(
Esempio n. 18
0
 def __init__(self):
     self._data = DotMap()
Esempio n. 19
0
                'PHENO_predicted.tsv.gz',
                'PHENO_metagenome_out/pred_metagenome_unstrat.tsv.gz',
            ],
        ),
        metacyc=dict(
            name='MetaCyc',
            title='MetaCyc',
            relfp=[
                'pathways_out/path_abun_predictions.tsv.gz',
                'pathways_out/path_abun_unstrat.tsv.gz',
            ],
        ),
    ),
)

Var = DotMap(_config, _dynamic=False)  # app-wide globals container


def reset_Var():
    Var.clear()
    Var.update(_config)


'''
TSVs are:

'pathways_out/path_abun_unstrat.tsv', # most important to workflow
'pathways_out/path_abun_unstrat_per_seq.tsv',
'pathways_out/path_abun_predictions.tsv',
'EC_predicted.tsv', # 100M
'KO_predicted.tsv', # 358M (Ginormo)
Esempio n. 20
0
 def __init__(self, melodic_model, timbre_model, hparams):
     if hparams is None:
         hparams = DotMap(get_default_hparams())
     self.hparams = hparams
     self.melodic_model: Model = melodic_model
     self.timbre_model: Model = timbre_model
Esempio n. 21
0
    def parse_xml_by_address(cls,
                             numerero_map,
                             prov_name,
                             city_name,
                             tv,
                             nv,
                             num,
                             pictures=False):
        """
        Parses an XML and crates a CadasterEntry object
        :param numerero_map: DotMap obtained from a previous call with information about the address to parse
        :param prov_name: Province Name
        :param city_name: City Name
        :param tv: Kind of way (Tipo de Via) - CL (calle), AV (Avenida) ...
        :param nv: Street name (Nombre de via)
        :param num: Street number (Numero de via)
        :param pictures: True if we want to scrap also house plan pictures. False otherwise.
        :return: List of CadasterEntry objects
        """
        results = []
        if numerero_map.consulta_numerero.lerr.err.cod != DotMap():
            return results

        numps = numerero_map.consulta_numerero.numerero.nump

        if not isinstance(numps, list):
            numps = [numps]

        for nump in numps:
            if nump.num.pnp == DotMap():
                continue

            num = nump.num.pnp

            if nump.pc == DotMap():
                continue

            if nump.pc.pc1 == DotMap() or nump.pc.pc2 == DotMap():
                continue

            cadaster_num = nump.pc.pc1 + nump.pc.pc2

            coords_map = ScrapperXML.get_coords_from_cadaster(
                prov_name, city_name, cadaster_num)
            lon = coords_map.consulta_coordenadas.coordenadas.coord.geo.xcen
            if lon == DotMap():
                lon = None

            lat = coords_map.consulta_coordenadas.coordenadas.coord.geo.ycen
            if lat == DotMap():
                lat = None
            ''' Adding to tracking file'''
            logger.info('{},{}'.format(lon, lat))

            entry_map = ScrapperXML.get_cadaster_entries_by_address(
                prov_name, city_name, tv, nv, num)
            picture = None
            if entry_map.consulta_dnp.bico != DotMap():

                prov_num = entry_map.consulta_dnp.bico.bi.dt.loine.cp
                city_num = entry_map.consulta_dnp.bico.bi.dt.loine.cm

                if pictures and prov_num != DotMap() and city_num != DotMap():
                    picture = Scrapper.scrap_site_picture(
                        prov_num, city_num, cadaster_num)

                # Parcela
                cadaster_entry = CadasterEntryXML(entry_map, lon, lat, picture)
                results.append(cadaster_entry)
                cadaster_entry.to_elasticsearch()

            elif entry_map.consulta_dnp.lrcdnp.rcdnp != DotMap():
                # Multiparcela
                for site in entry_map.consulta_dnp.lrcdnp.rcdnp:
                    site_map = DotMap(site)

                    if site_map.rc == DotMap():
                        continue

                    # Multiparcela
                    parcelas = entry_map.consulta_dnp.lrcdnp.rcdnp
                    if not isinstance(parcelas, list):
                        parcelas = [parcelas]
                    for parcela in parcelas:
                        cadaster = parcela.rc.pc1 if parcela.rc.pc1 != DotMap(
                        ) else ''
                        cadaster += parcela.rc.pc2 if parcela.rc.pc2 != DotMap(
                        ) else ''
                        cadaster += parcela.rc.car if parcela.rc.car != DotMap(
                        ) else ''
                        cadaster += parcela.rc.cc1 if parcela.rc.cc1 != DotMap(
                        ) else ''
                        cadaster += parcela.rc.cc2 if parcela.rc.cc2 != DotMap(
                        ) else ''

                        prov_num = parcela.dt.loine.cp
                        city_num = parcela.dt.cmc

                        if pictures and prov_num != DotMap(
                        ) and city_num != DotMap():
                            picture = Scrapper.scrap_site_picture(
                                prov_num, city_num, cadaster)

                        try:
                            # Try to get info by complete cadaster num
                            sub_entry = ScrapperXML.get_cadaster_entries_by_cadaster(
                                prov_name, city_name, cadaster)
                        except:
                            # Cadastro did not return anything by cadaster entry (error? bug?)
                            # Try to get it by complete address
                            loint = parcela.dt.locs.lous.lourb.loint
                            if loint is DotMap():
                                continue
                            bl = loint.bl
                            if bl == DotMap():
                                bl = ''
                            es = loint.es
                            if es == DotMap():
                                es = ''
                            pt = loint.pt
                            if es == DotMap():
                                pt = ''
                            pu = loint.pu
                            if es == DotMap():
                                pu = ''
                            sub_entry = ScrapperXML.get_cadaster_entries_by_address(
                                prov_name, city_name, tv, nv, num, bl, es, pt,
                                pu)

                        cadaster_entry = CadasterEntryXML(
                            sub_entry, lon, lat, picture)
                        cadaster_entry.to_elasticsearch()

                        results.append(cadaster_entry)

        return results
Esempio n. 22
0
    def train(self, *, device, n_total_steps, n_warm_up_steps, record_freq,
              record, model_training_freq, policy_training_freq, eval_freq,
              task_name, model_training_n_batches, train_reward):
        """ A single step of interaction with the environment. """
        self._setup_if_new()

        ex.step_i += 1

        behavioral_agent = self.random_agent if ex.step_i <= n_warm_up_steps else self.agent
        with torch.no_grad():
            action = behavioral_agent.get_action(self.env_loop.state,
                                                 deterministic=False).to('cpu')
        prev_state = self.env_loop.state.clone().to(device)
        if record and (ex.step_i == 1 or ex.step_i % record_freq == 0):
            self.env_loop.record_next_episode()
        state, next_state, done = self.env_loop.step(
            to_np(action), video_file_suffix=ex.step_i)
        reward = self.exploitation_task(state, action, next_state).item()
        self.buffer.add(state, action, next_state,
                        torch.from_numpy(np.array([[reward]], dtype=np.float)))
        self.stats.add(state, action, next_state, done)
        if done:
            log_last_episode(self.stats)

        tasks_rewards = {
            f'{task_name}': self.stats.get_recent_reward(task_name)
            for task_name in self.eval_tasks
        }
        step_stats = dict(
            step=ex.step_i,
            done=done,
            action_abs_mean=action.abs().mean().item(),
            reward=self.exploitation_task(state, action, next_state).item(),
            action_value=self.agent.get_action_value(prev_state,
                                                     action).item(),
        )
        ex.mlog.add_scalars('main_loop', {**step_stats, **tasks_rewards})

        # (Re)train the model on the current buffer
        if model_training_freq is not None and model_training_n_batches > 0 and ex.step_i % model_training_freq == 0:
            self.model.setup_normalizer(self.buffer.normalizer)
            self.reward_model.setup_normalizer(self.buffer.normalizer)
            timed(train_model)(self.model,
                               self.model_optimizer,
                               self.buffer,
                               mode='train')
            if train_reward:
                task = self.exploitation_task
                timed(train_reward_model)(self.reward_model,
                                          self.reward_model_optimizer,
                                          self.buffer,
                                          mode='train',
                                          task=task)

        # (Re)train the policy using current buffer and model
        if ex.step_i >= n_warm_up_steps and ex.step_i % policy_training_freq == 0:
            task = self.exploitation_task
            self.agent.setup_normalizer(self.buffer.normalizer)
            self.agent = timed(train_agent)(self.agent,
                                            self.model,
                                            self.reward_model,
                                            self.buffer,
                                            task=task,
                                            task_name=task_name,
                                            is_done=self.is_done,
                                            mode='train',
                                            context_i={})

        # Evaluate the agent
        if eval_freq is not None and ex.step_i % eval_freq == 0:
            self.last_avg_eval_score = evaluate_on_tasks(agent=self.agent,
                                                         model=self.model,
                                                         buffer=self.buffer,
                                                         task_name=task_name,
                                                         context='eval')

        experiment_finished = ex.step_i >= n_total_steps
        return DotMap(
            done=experiment_finished,
            avg_eval_score=self.last_avg_eval_score,
            action_abs_mean=action.abs().mean().item(
            ),  # This is just for regression tests
            step_i=ex.step_i)
Esempio n. 23
0
def sample_to_features_squad(
    sample, tokenizer, max_seq_len, doc_stride, max_query_length, tasks,
):
    sample.clear_text = DotMap(sample.clear_text, _dynamic=False)
    is_training = sample.clear_text.is_training

    unique_id = 1000000000
    features = []

    query_tokens = tokenizer.tokenize(sample.clear_text.question_text)

    if len(query_tokens) > max_query_length:
        query_tokens = query_tokens[0:max_query_length]

    tok_to_orig_index = []
    orig_to_tok_index = []
    all_doc_tokens = []
    for (i, token) in enumerate(sample.clear_text.doc_tokens):
        orig_to_tok_index.append(len(all_doc_tokens))
        sub_tokens = tokenizer.tokenize(token)
        for sub_token in sub_tokens:
            tok_to_orig_index.append(i)
            all_doc_tokens.append(sub_token)

    tok_start_position = None
    tok_end_position = None
    if is_training and sample.clear_text.is_impossible:
        tok_start_position = -1
        tok_end_position = -1
    if is_training and not sample.clear_text.is_impossible:
        tok_start_position = orig_to_tok_index[sample.clear_text.start_position]
        if sample.clear_text.end_position < len(sample.clear_text.doc_tokens) - 1:
            tok_end_position = orig_to_tok_index[sample.clear_text.end_position + 1] - 1
        else:
            tok_end_position = len(all_doc_tokens) - 1
        (tok_start_position, tok_end_position) = _SQUAD_improve_answer_span(
            all_doc_tokens,
            tok_start_position,
            tok_end_position,
            tokenizer,
            sample.clear_text.orig_answer_text,
        )

    # The -3 accounts for [CLS], [SEP] and [SEP]
    max_tokens_for_doc = max_seq_len - len(query_tokens) - 3

    # We can have documents that are longer than the maximum sequence length.
    # To deal with this we do a sliding window approach, where we take chunks
    # of the up to our max length with a stride of `doc_stride`.
    _DocSpan = collections.namedtuple(  # pylint: disable=invalid-name
        "DocSpan", ["start", "length"]
    )
    doc_spans = []
    start_offset = 0
    while start_offset < len(all_doc_tokens):
        length = len(all_doc_tokens) - start_offset
        if length > max_tokens_for_doc:
            length = max_tokens_for_doc
        doc_spans.append(_DocSpan(start=start_offset, length=length))
        if start_offset + length == len(all_doc_tokens):
            break
        start_offset += min(length, doc_stride)

    for (doc_span_index, doc_span) in enumerate(doc_spans):
        tokens = []
        token_to_orig_map = {}
        token_is_max_context = {}
        segment_ids = []
        tokens.append("[CLS]")
        segment_ids.append(0)
        for token in query_tokens:
            tokens.append(token)
            segment_ids.append(0)
        tokens.append("[SEP]")
        segment_ids.append(0)

        for i in range(doc_span.length):
            split_token_index = doc_span.start + i
            token_to_orig_map[len(tokens)] = tok_to_orig_index[split_token_index]

            is_max_context = _SQUAD_check_is_max_context(
                doc_spans, doc_span_index, split_token_index
            )
            token_is_max_context[len(tokens)] = is_max_context
            tokens.append(all_doc_tokens[split_token_index])
            segment_ids.append(1)
        tokens.append("[SEP]")
        segment_ids.append(1)

        input_ids = tokenizer.convert_tokens_to_ids(tokens)

        # The mask has 1 for real tokens and 0 for padding tokens. Only real
        # tokens are attended to.
        padding_mask = [1] * len(input_ids)

        # Zero-pad up to the sequence length.
        while len(input_ids) < max_seq_len:
            input_ids.append(0)
            padding_mask.append(0)
            segment_ids.append(0)

        assert len(input_ids) == max_seq_len
        assert len(padding_mask) == max_seq_len
        assert len(segment_ids) == max_seq_len

        start_position = 0
        end_position = 0
        if is_training and not sample.clear_text.is_impossible:
            # For training, if our document chunk does not contain an annotation
            # we throw it out, since there is nothing to predict.
            doc_start = doc_span.start
            doc_end = doc_span.start + doc_span.length - 1
            out_of_span = False
            if not (tok_start_position >= doc_start and tok_end_position <= doc_end):
                out_of_span = True
            if out_of_span:
                start_position = 0
                end_position = 0
            else:
                doc_offset = len(query_tokens) + 2
                start_position = tok_start_position - doc_start + doc_offset
                end_position = tok_end_position - doc_start + doc_offset
        if is_training and sample.clear_text.is_impossible:
            start_position = 0
            end_position = 0

        inp_feat = {}
        inp_feat["input_ids"] = input_ids
        inp_feat["padding_mask"] = padding_mask  # attention_mask
        inp_feat["segment_ids"] = segment_ids  # token_type_ids
        inp_feat["start_position"] = start_position
        inp_feat["end_position"] = end_position
        inp_feat["is_impossible"] = sample.clear_text.is_impossible
        features.append(inp_feat)
        unique_id += 1

    return features
Esempio n. 24
0
from helpers.registry.ChainRegistry import ChainRegistry
from dotmap import DotMap
from helpers.registry.WhaleRegistryAction import WhaleRegistryAction

gnosis_safe_registry = DotMap(addresses=DotMap(
    proxyFactory="0x76E2cFc1F5Fa8F6a5b3fC4c8F4788F0116861F9B",
    masterCopy="0x34CfAC646f301356fAa8B21e94227e3583Fe3F5F",
), )
pancake_registry = DotMap(
    cake="0x0e09fabb73bd3ade0a17ecc321fd13a19e81ce82",
    symbol="Cake",
    syrup="0x009cF7bC57584b7998236eff51b98A168DceA9B0",
    masterChef="0x73feaa1eE314F8c655E354234017bE2193C9E24E",
    factoryV2="0xBCfCcbde45cE874adCB698cC183deBcF17952812",
    routerV2="0x05fF2B0DB69458A0750badebc4f9e13aDd608C7F",
    smartChefs="0xe4dD0C50fb314A8B2e84D211546F5B57eDd7c2b9",
    chefPairs=DotMap(
        bnbBtcb="0x7561EEe90e24F3b348E1087A005F78B4c8453524",
        bBadgerBtcb="0x10f461ceac7a17f59e249954db0784d42eff5db5",
        bDiggBtcb="0xE1E33459505bB3763843a426F7Fd9933418184ae",
    ),
    chefPids=DotMap(
        bnbBtcb=15,
        bBadgerBtcb=0,
        bDiggBtcb=104,
    ),
)
multicall_registry = DotMap(
    multicall="0xE1dDc30f691CA671518090931e3bFC1184BFa4Aa", )
sushi_registry = DotMap(
    sushiToken="",
Esempio n. 25
0
 def _equal_with(_large_rect, _rect2):
     _large_rect, rect2 = DotMap(_large_rect), DotMap(_rect2)
     if (_large_rect.x == rect2.x):
             return True
     else:
             return False
Esempio n. 26
0
cosmos_container_client = cosmos_database_client.get_container_client(
    getenv("AZURE_COSMOS_CONTAINER", default="images")
)

while True:

    print("Receiving messages from queue")
    
    batches = queue_client.receive_messages(
        messages_per_page=getenv("AZURE_STORAGE_QUEUE_MSG_COUNT", default="10")
    )

    for batch in batches.by_page():
        for message in batch:
            message_json = DotMap(json.loads(message.content))

            print("Message received: " + message_json.id)

            print("Extracting text from image")
            fr_poller = fr_client.begin_recognize_content_from_url(message_json.url)
            fr_result = fr_poller.result()

            message_json.text = " ".join([line.text for page in fr_result for line in page.lines])

            print("Image text: " + message_json.text)

            if message_json.text:
                print("Analyzing text sentiment")
                ta_response = ta_client.analyze_sentiment([message_json.text])
Esempio n. 27
0
    def forward(
        self,
        input,
        seq_lens,
        span_token_ids,
        target,
        target_lens,
        definition=None,
        definition_lens=None,
        classification_labels=None,
        sentence_mask=None,
    ):
        batch_size, tgt_len = target.shape

        # (batch_size,seq_len,hidden_size), (batch_size,hidden_size), (num_layers,batch_size,seq_len,hidden_size)
        last_hidden_layer, sentence_representation, all_hidden_layers = self.encoder(
            input, attention_mask=sequence_mask(seq_lens), token_type_ids=sentence_mask
        )

        cosine_loss = None
        loss = None
        KLD = None
        fake_loss_kl = None
        if self.aggregator == "cls":
            cls_hidden = last_hidden_layer[:, 0, :].squeeze(1)
            span_representation = self.context_feed_forward(cls_hidden)
            hidden_states = last_hidden_layer

        else:
            span_ids = self._id_extractor(
                tokens=span_token_ids, batch=input, lens=seq_lens
            )

            span_representation, hidden_states = self._span_aggregator(
                all_hidden_layers if self.scalar_mix is not None else last_hidden_layer,
                sequence_mask(seq_lens),
                span_ids,
            )
            span_representation = self.context_feed_forward(span_representation)
        if self.variational:
            (
                definition_last_hidden_layer,
                _,
                definition_all_hidden_layers,
            ) = self.definition_encoder(
                definition, attention_mask=sequence_mask(definition_lens)
            )
            definition_representation = self.definition_feed_forward(
                definition_last_hidden_layer[:, 0]
            )

            post_project = self.w_z_post(
                torch.cat([span_representation, definition_representation], -1)
            )
            prior_project = self.w_z_prior(span_representation)

            mu = self.mean_layer(post_project)
            logvar = self.logvar_layer(post_project)

            mu_prime = self.mean_prime_layer(prior_project)
            logvar_prime = self.logvar_prime_layer(prior_project)

            z = mu + torch.exp(logvar * 0.5) * torch.randn_like(logvar)
            span_representation = self.z_project(z)
            KLD = kl_divergence(
                Normal(mu, torch.exp(logvar * 0.5)),
                Normal(mu_prime, torch.exp(logvar_prime * 0.5)),
            )
            kl_mask = (KLD > (self.target_kl / self.latent_size)).float()
            fake_loss_kl = (kl_mask * KLD).sum(dim=1)

        predictions, logits = self.decoder(
            target, target_lens, span_representation, hidden_states, seq_lens,
        )

        if self.variational:
            loss = (
                F.cross_entropy(
                    logits.view(batch_size * (tgt_len - 1), -1),
                    target[:, 1:].contiguous().view(-1),
                    ignore_index=self.embeddings.tgt.padding_idx,
                    reduction="none",
                )
                .view(batch_size, tgt_len - 1)
                .sum(1)
            )

            perplexity = F.cross_entropy(
                logits.view(batch_size * (tgt_len - 1), -1),
                target[:, 1:].contiguous().view(-1),
                ignore_index=self.embeddings.tgt.padding_idx,
                reduction="mean",
            ).exp()
        else:
            loss = F.cross_entropy(
                logits.view(batch_size * (tgt_len - 1), -1),
                target[:, 1:].contiguous().view(-1),
                ignore_index=self.embeddings.tgt.padding_idx,
            )
            perplexity = loss.exp()
        return DotMap(
            {
                "predictions": predictions,
                "logits": logits,
                "loss": loss,
                "perplexity": perplexity,
                "fake_kl": fake_loss_kl,
                "kl": KLD,
                "cosine_loss": cosine_loss,
            }
        )
Esempio n. 28
0
        'red_high': HSVColorRange([165, 60, 60], [179, 255, 255]),
        'green': HSVColorRange([15, 20, 30], [60, 225, 225]),
    },
    'processing': {
        'rectify_shape': (1920, int(1920 * 0.7)),
        'gaussian_blur': (3, 3),
        'img_threshold': (100, 255),
        'area_threshold': 5000,
        'contours_size_threshold': 100,
        'polygon_tolerance': 1,
        'line_width': 1,
        'out_size_inches': (7.87402, 5.51181),  # TODO: fix scaling
    },
    'pdf_output': {
        'plate_width_cm': 80,
        'plate_height_cm': 50,
        'draw_area_width_cm': 20,
        'draw_area_height_cm': 14
    },
    'data': {
        'overwrite_output': False,
        'output_location': 'desktop'  # can be either 'desktop' or 'repository'
    }
}

# DotMap: This enables config access like...
#     from planvec.config import CONFIG
#     x_blur, y_blur = CONFIG.processing.gaussian_blur
# TODO: This is not really satisfactory
planvec_config = DotMap(config_dict)
Esempio n. 29
0
    def __init__(
        self,
        encoder,
        encoder_pretrained,
        encoder_frozen,
        decoder_hidden,
        embeddings,
        max_layer=12,
        src_pad_idx=0,
        encoder_hidden=None,
        variational=None,
        latent_size=None,
        scalar_mix=False,
        aggregator="mean",
        teacher_forcing_p=0.3,
        classification=None,
        attentional=False,
        definition_encoder=None,
        word_dropout_p=None,
        decoder_num_layers=None,
    ):
        super(DefinitionProbing, self).__init__()

        self.embeddings = embeddings
        self.variational = variational
        self.encoder_hidden = encoder_hidden
        self.decoder_hidden = decoder_hidden
        self.decoder_num_layers = decoder_num_layers
        self.encoder = encoder
        self.latent_size = latent_size
        self.src_pad_idx = src_pad_idx
        if encoder_pretrained:
            self.encoder_hidden = self.encoder.config.hidden_size
        if encoder_frozen:
            for param in self.encoder.parameters():
                param.requires_grad = False
        self.max_layer = max_layer
        self.aggregator = aggregator
        if self.aggregator == "span":
            self.span_extractor = SelfAttentiveSpanExtractor(self.encoder_hidden)
        self.context_feed_forward = nn.Linear(self.encoder_hidden, self.encoder_hidden)
        self.scalar_mix = None
        if scalar_mix:
            self.scalar_mix = ScalarMix(self.max_layer + 1)
        self.global_scorer = GNMTGlobalScorer(
            alpha=2, beta=None, length_penalty="avg", coverage_penalty=None
        )

        self.decoder = LSTM_Decoder(
            embeddings.tgt,
            hidden=self.decoder_hidden,
            encoder_hidden=self.encoder_hidden,
            num_layers=self.decoder_num_layers,
            word_dropout=word_dropout_p,
            teacher_forcing_p=teacher_forcing_p,
            attention="general" if attentional else None,
            dropout=DotMap({"input": 0.5, "output": 0.5}),
            decoder="VDM" if self.variational else "LSTM",
            variational=self.variational,
            latent_size=self.latent_size,
        )

        self.target_kl = 1.0
        if self.variational:
            self.definition_encoder = definition_encoder
            self.definition_feed_forward = nn.Linear(
                self.encoder_hidden, self.encoder_hidden
            )
            self.mean_layer = nn.Linear(self.latent_size, self.latent_size)
            self.logvar_layer = nn.Linear(self.latent_size, self.latent_size)
            self.w_z_post = nn.Sequential(
                nn.Linear(self.encoder_hidden * 2, self.latent_size), nn.Tanh()
            )
            self.mean_prime_layer = nn.Linear(self.latent_size, self.latent_size)
            self.logvar_prime_layer = nn.Linear(self.latent_size, self.latent_size)
            self.w_z_prior = nn.Sequential(
                nn.Linear(self.encoder_hidden, self.latent_size), nn.Tanh()
            )
            self.z_project = nn.Sequential(
                nn.Linear(self.latent_size, self.decoder_hidden), nn.Tanh()
            )
Esempio n. 30
0
def nested_mapping_to_line(nested_mapping: typing.Mapping,
                           keys: typing.Sequence[typing.Text]) -> typing.Text:
    dotted = DotMap(nested_mapping)

    return ",".join([str(recursive_getattr(dotted, key)) for key in keys])