Esempi in Python per lmap, esempi in Python per utils.lmap

Esempio n. 1

0

Mostra file

File: api.py Progetto: elifesciences/bot-lax-adaptor

def listfiles(path, ext_list=None):
    "returns a pair of (basename_list, absolute_path_list) for given dir, optionally filtered by extension"
    path_list = lmap(lambda fname: os.path.abspath(join(path, fname)), os.listdir(path))
    if ext_list:
        path_list = lfilter(lambda path: os.path.splitext(path)[1] in ext_list, path_list)
    path_list = sorted(filter(os.path.isfile, path_list))
    return lmap(os.path.basename, path_list)

Esempio n. 2

0

Mostra file

 def decode_pred(pred: EvalPrediction) -> Tuple[List[str], List[str]]:
     pred_str = tokenizer.batch_decode(pred.predictions,
                                       skip_special_tokens=True)
     label_str = tokenizer.batch_decode(pred.label_ids,
                                        skip_special_tokens=True)
     pred_str = lmap(str.strip, pred_str)
     label_str = lmap(str.strip, label_str)
     return pred_str, label_str

Esempio n. 3

0

Mostra file

    def read_data(self, data_path):

        with open(data_path, "rt") as file_:

            func = lambda line: [1] + lmap(float, line.split())

            data = lmap(func, file_.readlines())

        return data

Esempio n. 4

0

Mostra file

File: api.py Progetto: FAtherden-eLife/bot-lax-adaptor

def listfiles(path, ext_list=None):
    "returns a pair of (basename_list, absolute_path_list) for given dir, optionally filtered by extension"
    path_list = lmap(lambda fname: os.path.abspath(join(path, fname)),
                     os.listdir(path))
    if ext_list:
        path_list = lfilter(lambda path: os.path.splitext(path)[1] in ext_list,
                            path_list)
    path_list = sorted(filter(os.path.isfile, path_list))
    return lmap(os.path.basename, path_list)

Esempio n. 5

0

Mostra file

def main():
    m = {'#': 1, '.': 0}
    l = lmap(lambda x: [m[i] for i in x[:-1]], read_lines())
    ast = []

    for i in range(len(l)):
        for j in range(len(l[0])):
            if l[i][j]:
                ast.append((j, i))

    s = []
    for i in ast:
        angles = set()
        for j in ast:
            if i != j:
                k = compute_k(i, j)
                angles.add(k)
        s.append(len(angles))

    i = np.argmax(s)
    print(s[i])

    best = ast[i]
    dd = collections.defaultdict(list)
    for i in ast:
        if i != best:
            k = compute_k(i, best)
            dd[k].append(i)

    d = dict(dd)
    s = sorted([j for i in d.values() for j in compute_angles_group(i, best)])
    print(s[199][1])

Esempio n. 6

0

Mostra file

File: glencoe.py Progetto: elifesciences/bot-lax-adaptor

def expand_videos(msid, video):
    gc_data = metadata(msid) # cached on first hit
    gc_id_str = ", ".join(gc_data.keys())

    v_id = video['id']
    ensure(gc_data, "glencoe doesn't know %r, it doesn't have any media")
    ensure(v_id in gc_data, "glencoe doesn't know %r, only %r" % (v_id, gc_id_str))

    video_data = gc_data[v_id]
    video_data = utils.subdict(video_data, ['jpg_href', 'width', 'height'])
    video_data = utils.renkeys(video_data, [('jpg_href', 'image')])

    func = lambda mtype: OrderedDict([
        ('mediaType', SOURCES[mtype]),
        ('uri', gc_data[v_id][mtype + "_href"])
    ])
    video_data['sources'] = lmap(func, SOURCES)
    video.update(video_data)

    del video['uri'] # returned by elife-tools, not part of spec

    # Add placeholder, the video thumbnail image
    video["placeholder"] = OrderedDict()
    video["placeholder"]["uri"] = video["image"].split('/')[-1]
    video["placeholder"]["alt"] = ""

    return video

Esempio n. 7

0

Mostra file

def main():
    line = input()
    intcode = lmap(int, line.split(','))
    rv = run_intcode(intcode, 1)
    print('1.)', rv)
    rv = run_intcode(intcode, 5)
    print('2.)', rv)

Esempio n. 8

0

Mostra file

def expand_videos(msid, video):
    gc_data = metadata(msid)  # cached on first hit
    gc_id_str = ", ".join(gc_data.keys())

    v_id = video['id']
    ensure(v_id in gc_data,
           "glencoe doesn't know %r, only %r" % (v_id, gc_id_str))

    video_data = gc_data[v_id]
    video_data = utils.subdict(video_data, ['jpg_href', 'width', 'height'])
    video_data = utils.renkeys(video_data, [('jpg_href', 'image')])

    func = lambda mtype: {
        'mediaType': SOURCES[mtype],
        'uri': gc_data[v_id][mtype + "_href"]
    }
    video_data['sources'] = lmap(func, SOURCES)
    video.update(video_data)

    del video['uri']  # returned by elife-tools, not part of spec

    # Add placeholder, the video thumbnail image
    video["placeholder"] = {}
    video["placeholder"]["uri"] = video["image"].split('/')[-1]
    video["placeholder"]["alt"] = ""

    return video

Esempio n. 9

0

Mostra file

    def _generative_step(self, batch: dict) -> dict:
        t0 = time.time()
        # generated_ids = self.model.generate(
        #     batch["input_ids"],
        #     attention_mask=batch["attention_mask"],
        #     use_cache=True,
        #     decoder_start_token_id=self.decoder_start_token_id,
        # )
        generated_ids = self.model.generate(
            batch["input_ids"],
            attention_mask=batch["attention_mask"],
            encoder_answer_relevance_atten=batch['answer_relevance_atten'],
            use_cache=True,
            decoder_start_token_id=self.decoder_start_token_id,
        )

        gen_time = (time.time() - t0) / batch["input_ids"].shape[0]
        preds: List[str] = self.ids_to_clean_text(generated_ids)
        target: List[str] = self.ids_to_clean_text(batch["labels"])
        loss_tensors = self._step(batch)
        base_metrics = {
            name: loss
            for name, loss in zip(self.loss_names, loss_tensors)
        }
        rouge: Dict = self.calc_generative_metrics(preds, target)
        summ_len = np.mean(lmap(len, generated_ids))
        base_metrics.update(gen_time=gen_time,
                            gen_len=summ_len,
                            preds=preds,
                            target=target,
                            **rouge)
        return base_metrics

Esempio n. 10

0

Mostra file

File: finetune.py Progetto: kmeng01/custom-transformers

    def _generative_step(self, batch: dict) -> dict:
        start_time = time.time()
        generated_ids = self.model.generate(
            batch["input_ids"],
            attention_mask=batch["attention_mask"],
            do_deduplication=False,  # rag specific parameter
            use_cache=True,
            min_length=1,
            max_length=self.target_lens["val"],
        )

        gen_time = (time.time() - start_time) / batch["input_ids"].shape[0]
        preds: List[str] = self.ids_to_clean_text(generated_ids)
        target: List[str] = self.ids_to_clean_text(batch["decoder_input_ids"])
        loss_tensors = self._step(batch)
        base_metrics = {
            name: loss
            for name, loss in zip(self.loss_names, loss_tensors)
        }
        gen_metrics: Dict = self.calc_generative_metrics(preds, target)

        summ_len = np.mean(lmap(len, generated_ids))
        base_metrics.update(gen_time=gen_time,
                            gen_len=summ_len,
                            preds=preds,
                            target=target,
                            **gen_metrics)
        return base_metrics

Esempio n. 11

0

Mostra file

File: finetune.py Progetto: tuner007/transformers

 def _generative_step(self, batch: dict) -> dict:
     pad_token_id = self.tokenizer.pad_token_id
     source_ids, source_mask, y = SummarizationDataset.trim_seq2seq_batch(
         batch, pad_token_id)
     t0 = time.time()
     generated_ids = self.model.generate(
         input_ids=source_ids,
         attention_mask=source_mask,
         use_cache=True,
         decoder_start_token_id=self.decoder_start_token_id,
     )
     gen_time = (time.time() - t0) / source_ids.shape[0]
     preds = self.ids_to_clean_text(generated_ids)
     target = self.ids_to_clean_text(y)
     loss_tensors = self._step(batch)
     base_metrics = {
         name: loss
         for name, loss in zip(self.loss_names, loss_tensors)
     }
     rouge: Dict = self.calc_generative_metrics(preds, target)
     summ_len = np.mean(lmap(len, generated_ids))
     base_metrics.update(gen_time=gen_time,
                         summ_len=summ_len,
                         preds=preds,
                         target=target,
                         **rouge)
     return base_metrics

Esempio n. 12

0

Mostra file

    def _generative_step(self, batch: dict) -> dict:
        t0 = time.time()

        # parser.add_argument('--eval_max_gen_length', type=int, default=None, help='never generate more than n tokens')
        generated_ids = self.model.generate(
            batch["input_ids"],
            attention_mask=batch["attention_mask"],
            use_cache=True,
            decoder_start_token_id=self.decoder_start_token_id,
            num_beams=self.eval_beams,
            max_length=self.eval_max_length,
        )
        gen_time = (time.time() - t0) / batch["input_ids"].shape[0]
        preds: List[str] = self.ids_to_clean_text(generated_ids)
        target: List[str] = self.ids_to_clean_text(batch["labels"])
        loss_tensors = self._step(batch)
        base_metrics = {
            name: loss
            for name, loss in zip(self.loss_names, loss_tensors)
        }
        rouge: Dict = self.calc_generative_metrics(preds, target)
        summ_len = np.mean(lmap(len, generated_ids))
        base_metrics.update(gen_time=gen_time,
                            gen_len=summ_len,
                            preds=preds,
                            target=target,
                            **rouge)
        return base_metrics

Esempio n. 13

0

Mostra file

File: finetune.py Progetto: dingding552/Pre

 def _generative_step(self, batch: dict) -> dict:
     t0 = time.time()
     # TODO(LISA)
     # write the prompt generation from self.model.
     # parser.add_argument('--eval_max_gen_length', type=int, default=None, help='never generate more than n tokens')
     # get the prompt:
     bsz = batch["input_ids"].size(0)
     prefix_prompt = self.model.get_prompt(bsz=bsz, sample_size=self.eval_beams)
     # print(prefix_prompt)
     generated_ids = self.seq2seq_model.generate(
         batch["input_ids"],
         past_key_values=prefix_prompt,
         attention_mask=batch["attention_mask"],
         use_cache=True,
         length_penalty=self.hparams.length_penalty,
         use_prefix=True,
         decoder_start_token_id=self.decoder_start_token_id,
         num_beams=self.eval_beams,
         min_length=self.eval_min_length,
         max_length=self.eval_max_length,
     )
     gen_time = (time.time() - t0) / batch["input_ids"].shape[0]
     preds: List[str] = self.ids_to_clean_text(generated_ids)
     target: List[str] = self.ids_to_clean_text(batch["labels"])
     loss_tensors = self._step(batch)
     base_metrics = {name: loss for name, loss in zip(self.loss_names, loss_tensors)}
     # print('INPUT:', self.ids_to_clean_text(batch["input_ids"]))
     # print(preds, target)
     rouge: Dict = self.calc_generative_metrics(preds, target)
     summ_len = np.mean(lmap(len, generated_ids))
     base_metrics.update(gen_time=gen_time, gen_len=summ_len, preds=preds, target=target, **rouge)
     return base_metrics

Esempio n. 14

0

Mostra file

    def test_distill_checkpointing_with_teacher(self):
        updates = dict(
            student_encoder_layers=2,
            student_decoder_layers=1,
            max_epochs=4,
            val_check_interval=0.25,
            alpha_hid=2.0,
            model_name_or_path="IGNORE_THIS_IT_DOESNT_GET_USED",
        )
        model = self._test_distiller_cli(updates, check_contents=False)

        ckpts = list(Path(model.output_dir).glob("*.ckpt"))
        self.assertEqual(1, len(ckpts))
        transformer_ckpts = list(Path(model.output_dir).glob("**/*.bin"))
        self.assertEqual(len(transformer_ckpts), 2)
        examples = lmap(
            str.strip,
            model.hparams.data_dir.joinpath("test.source").open().readlines())
        out_path = tempfile.mktemp()
        generate_summaries_or_translations(examples, out_path,
                                           str(model.output_dir / "best_tfmr"))
        self.assertTrue(Path(out_path).exists())

        out_path_new = tempfile.mkdtemp()
        convert_pl_to_hf(ckpts[0], transformer_ckpts[0].parent, out_path_new)
        assert os.path.exists(os.path.join(out_path_new, "pytorch_model.bin"))

Esempio n. 15

0

Mostra file

File: generate_article_json.py Progetto: digirati-co-uk/bot-lax-adaptor-fork

def main(xml_dir, json_output_dir):
    paths = lmap(lambda fname: join(xml_dir, fname), os.listdir(xml_dir))
    paths = lfilter(lambda path: path.lower().endswith('.xml'), paths)
    paths = sorted(paths, reverse=True)
    num_processes = 2
    Parallel(n_jobs=num_processes)(delayed(render)(path, json_output_dir)
                                   for path in paths)
    print('see scrape.log for errors')

Esempio n. 16

0

Mostra file

File: day15.py Progetto: jsemric/advent-of-code-2019

def main():
    line = input()
    intcode = lmap(int, line.split(','))
    intcode.extend([0] * 1000)
    O, cost = task1(intcode.copy())
    print("1.)", cost)
    cnt = task2(intcode, O)
    print("2.)", cnt)

Esempio n. 17

0

Mostra file

def main():
    line = input()
    intcode = lmap(int, line.split(','))
    intcode.extend([0] * 10000)
    r = task1(intcode)
    print("1.)", r)
    r = task2(intcode)
    print("2.)", r)

Esempio n. 18

0

Mostra file

File: generate_article_json.py Progetto: elifesciences/bot-lax-adaptor

def main(xml_dir, json_output_dir, num=None):
    paths = lmap(lambda fname: join(xml_dir, fname), os.listdir(xml_dir))
    paths = lfilter(lambda path: path.lower().endswith('.xml'), paths)
    paths = sorted(paths, reverse=True)
    if num:
        paths = paths[:num] # only scrape first n articles
    num_processes = -1
    Parallel(n_jobs=num_processes)(delayed(render)(path, json_output_dir) for path in paths)
    print('see scrape.log for errors')

Esempio n. 19

0

Mostra file

    def read_labels(self, labels_path):

        with open(labels_path, "rt") as file_:

            func = lambda line: lmap(int, reversed(line.split()))

            labels = dict(map(func, file_.readlines()))

        return labels

Esempio n. 20

0

Mostra file

File: main.py Progetto: elifesciences/bot-lax-adaptor

def serialize_overrides(override_map):
    def serialize(pair):
        key, val = pair
        ensure(isinstance(key, basestring), "key must be a string")
        ensure('|' not in key, "key must not contain a pipe")
        key = key.strip()
        ensure(key, "key must not be empty")
        return '|'.join([key, json.dumps(val)])
    return lmap(serialize, override_map.items())

Esempio n. 21

0

Mostra file

def serialize_overrides(override_map):
    def serialize(pair):
        key, val = pair
        ensure(isinstance(key, basestring), "key must be a string")
        ensure('|' not in key, "key must not contain a pipe")
        key = key.strip()
        ensure(key, "key must not be empty")
        return '|'.join([key, json.dumps(val)])

    return lmap(serialize, override_map.items())

Esempio n. 22

0

Mostra file

File: main.py Progetto: elifesciences/bot-lax-adaptor

def pdf_uri(triple):
    """predict an article's pdf url.
    some article types don't have a PDF (like corrections) and some
    older articles that should have a pdf, don't. this function doesn't
    concern itself with those latter exceptions."""
    content_type, msid, version = triple
    if content_type and any(lmap(lambda type: type in ['Correction', 'Retraction'], content_type)):
        return EXCLUDE_ME
    filename = "elife-%s-v%s.pdf" % (utils.pad_msid(msid), version) # ll: elife-09560-v1.pdf
    return cdnlink(msid, filename)

Esempio n. 23

0

Mostra file

File: main.py Progetto: elifesciences/bot-lax-adaptor

def figures_pdf_uri(triple):
    graphics, msid, version = triple
    filename_match = '-figsupp'

    if any(lmap(lambda graphic: graphic.get('xlink_href')
                and filename_match in graphic.get('xlink_href'), graphics)):
        filename = "elife-%s-figures-v%s.pdf" % (utils.pad_msid(msid), version) # ll: elife-09560-figures-v1.pdf
        figures_pdf_cdnlink = cdnlink(msid, filename)
        return cdn.url_exists(figures_pdf_cdnlink, msid)
    else:
        return None

Esempio n. 24

0

Mostra file

File: main.py Progetto: elifesciences/bot-lax-adaptor

def deserialize_overrides(override_list):
    def splitter(string):
        if isinstance(string, list):
            pair = string # already split into pairs, return what we have
            return pair
        ensure('|' in string, "override key and value must be seperated by a pipe '|'")
        first, rest = string.split('|', 1)
        ensure(rest.strip(), "a value must be provided. use 'null' without quotes to use an empty value")
        return first, rest
    pairs = lmap(splitter, override_list)
    return OrderedDict([(key, utils.json_loads(val)) for key, val in pairs])

Esempio n. 25

0

Mostra file

File: base_loader.py Progetto: sulphur1010/earthquake-detector-9000

    def get_spectrograms(self, path, folder_name,  pattern='', ignore_names=None):
        """
        The base_loader allows the most flexibility in structuring your files, though it is probably overly complicated
        at the moment. Other loaders are designed simpler that inherit from base loader and modify this method.

        :param path: Base path containing folders that can contain events and noise.
                     Example:

                     /path/California
                     /path/Oklahoma
                     /path/Hawaii

        :param folder_name: Could be "noise" or "quakes" if following recommended file organization. Will return paths
                            within this folder. Example:

                            /path/California/noise
                            /path/California/quakes
                            ...

        :param pattern: Pattern to access spectrogram folders within the folder_name. The default setting assumes the
                        files set up as something like:

                            /path/California/quakes/1/first_component.png
                            /pathCalifornia/quakes/1/second_component.png
                            /path/California/quakes/1/vertical_component.png
                            ...

                        where the names of the components can be anything there should be 3 components per folder.


                        Custom patterns are passed as regex as a string and can allow specifying locations to the
                        spectrograms within the subfolders of the path:

                            /path/California/pattern/quakes/1/first_component.png

        :param ignore_names:
        :return:
        """
        folders = lmap(os.path.basename, glob.glob(os.path.join(path, '*')))
        folders = [f for f in folders if f not in ignore_names]

        def get_file_paths(folder_path):
            folders_path = os.path.join(path, folder_path, pattern, folder_name, '*/')
            subfolder_paths = glob.glob(folders_path)
            return self.get_components(subfolder_paths)
            
        file_paths = [] 
        for folder in folders:
            file_paths += get_file_paths(folder)

        # Maintain the same order each time, guaranteed with sorting

        file_paths.sort()
        return file_paths

Esempio n. 26

0

Mostra file

File: day09.py Progetto: jsemric/advent-of-code-2019

def main():
    line = input()
    intcode = lmap(int, line.split(','))
    intcode.extend([0] * 100000)

    x = IntCodeProgram(intcode)
    code = x.run_intcode(iter([1]))
    print(code)
    x = IntCodeProgram(intcode)
    code = x.run_intcode(iter([2]))
    print(code)

Esempio n. 27

0

Mostra file

def main():
    line = input()
    intcode = lmap(int, line.split(','))

    r = run_intcode(intcode)
    print('1.)', r)

    for n, v in itertools.product(range(100), range(100)):
        r = run_intcode(intcode, n, v)
        if r == 19690720:
            print('2.)', n * 100 + v)
            break

Esempio n. 28

0

Mostra file

def main():
    intcode = lmap(int, input().split(','))
    intcode.extend([0] * 1000)

    computers = []
    for ip in range(50):
        pc = IntCodeProgram(intcode)
        pc.add_input(ip)
        computers.append(pc)

    print("1.)", task1(deepcopy(computers)))
    print("2.)", task2(computers))

Esempio n. 29

0

Mostra file

def pdf_uri(triple):
    """predict an article's pdf url.
    some article types don't have a PDF (like corrections) and some
    older articles that should have a pdf, don't. this function doesn't
    concern itself with those latter exceptions."""
    content_type, msid, version = triple
    if content_type and any(
            lmap(lambda type: type in ['Correction'], content_type)):
        return EXCLUDE_ME
    filename = "elife-%s-v%s.pdf" % (utils.pad_msid(msid), version
                                     )  # ll: elife-09560-v1.pdf
    return cdnlink(msid, filename)

Esempio n. 30

0

Mostra file

def main(args=None):
    target = first(args) or conf.JSON_DIR

    if os.path.isdir(target):
        paths = lmap(lambda fname: join(target, fname), os.listdir(target))
        paths = sorted(paths, reverse=True)
    else:
        paths = [os.path.abspath(target)]

    paths = lfilter(lambda path: path.lower().endswith('.json'), paths)
    print('jobs %d' % len(paths))
    Parallel(n_jobs=-1)(delayed(job)(path) for path in paths)
    print('see validate.log for errors')

Esempio n. 31

0

Mostra file

def task2(intcode):
    intcode[0] = 2
    prog = IntCodeProgram(intcode)
    out = prog.run()
    out = ''.join(lmap(chr, out))
    robot_pos = out.find('^')
    out = lmap(ord, out[:out.find('\n\n') + 1])
    i = out.index(ord('\n'))
    a = np.array(out).reshape(-1, i + 1)
    robot_pos = robot_pos // a.shape[1] + 1, robot_pos % a.shape[1] + 1
    moves = find_moves(np.pad(a, 1, constant_values=ord('.')), robot_pos)

    # solution found by hand using vim
    m = ('C,A,C,B,C,A,B,C,A,B\n'
         'R,6,L,8,R,10\n'
         'L,8,R,4,R,4,R,6\n'
         'R,12,R,4,R,10,R,12\nn\n')

    for i in m:
        prog.add_input(ord(i))
    out = prog.run()
    return out[-1]

Esempio n. 32

0

Mostra file

File: finetune.py Progetto: wutaiqiang/plms-graph2text

    def _generative_step(self, batch: dict) -> dict:
        t0 = time.time()

        generated_ids = self.model.generate(
            batch["input_ids"],
            attention_mask=batch["attention_mask"],
            use_cache=True,
            decoder_start_token_id=self.decoder_start_token_id,
            num_beams=self.eval_beams,
            no_repeat_ngram_size=0,
            min_length=0,
            max_length=self.eval_max_length,
            length_penalty=1.0)
        gen_time = (time.time() - t0) / batch["input_ids"].shape[0]
        preds: List[str] = self.ids_to_clean_text(generated_ids)
        target: List[str] = self.ids_to_clean_text(batch["labels"])

        y = batch["labels"]
        decoder_input_ids = y[:, :-1].contiguous()
        lm_labels = y[:, 1:].clone()

        a = self.tokenizer.batch_decode(batch["input_ids"].tolist())
        #b = self.tokenizer.batch_decode(batch["labels"].tolist())
        b = self.tokenizer.batch_decode(lm_labels.tolist())
        c = self.tokenizer.batch_decode(generated_ids)
        pad_token_id = self.tokenizer.pad_token_id
        tgt_ids = batch["labels"]
        # if isinstance(self.model, T5ForConditionalGeneration):
        #     decoder_input_ids = self.model._shift_right(tgt_ids)
        # else:
        #     decoder_input_ids = shift_tokens_right(tgt_ids, pad_token_id)

        e = self.tokenizer.batch_decode(decoder_input_ids.tolist())

        loss_tensors = self._step(batch)
        base_metrics = {
            name: loss
            for name, loss in zip(self.loss_names, loss_tensors)
        }
        rouge: Dict = self.calc_generative_metrics(preds, target)
        summ_len = np.mean(lmap(len, generated_ids))
        base_metrics.update(gen_time=gen_time,
                            gen_len=summ_len,
                            preds=preds,
                            target=target,
                            a=a,
                            b=b,
                            c=c,
                            e=e,
                            **rouge)
        return base_metrics

Esempio n. 33

0

Mostra file

def figures_pdf_uri(triple):
    graphics, msid, version = triple
    filename_match = '-figsupp'

    if any(
            lmap(
                lambda graphic: graphic.get('xlink_href') and filename_match in
                graphic.get('xlink_href'), graphics)):
        filename = "elife-%s-figures-v%s.pdf" % (
            utils.pad_msid(msid), version)  # ll: elife-09560-figures-v1.pdf
        figures_pdf_cdnlink = cdnlink(msid, filename)
        return cdn.url_exists(figures_pdf_cdnlink, msid)
    else:
        return None

Esempio n. 34

0

Mostra file

File: main.py Progetto: elifesciences/bot-lax-adaptor

def mixed_citation_to_related_articles(mixed_citation_list):
    # ll: [{'article': {'authorLine': 'R Straussman et al',
    #      'authors': [{'given': u'R', 'surname': u'Straussman'}, ...}],
    #      'doi': u'10.1038/nature11183', 'pub-date': [2014, 2, 28], 'title': u'Pants-Party'},
    #      'journal': {'volume': u'487', 'lpage': u'504', 'name': u'Nature', 'fpage': u'500'}}]

    def et(struct):
        return OrderedDict([
            ('type', 'external-article'),
            ('articleTitle', p(struct, 'article.title')),
            ('journal', p(struct, 'journal.name')),
            ('authorLine', p(struct, 'article.authorLine')),
            ('uri', 'https://doi.org/%s' % p(struct, 'article.doi')),
        ])
    return lmap(et, mixed_citation_list)

Esempio n. 35

0

Mostra file

File: finetune.py Progetto: yon606/transformers

    def _generative_step(self, batch):
        pad_token_id = self.tokenizer.pad_token_id
        source_ids, source_mask, y = SummarizationDataset.trim_seq2seq_batch(batch, pad_token_id)
        # TODO(SS): task specific params

        t0 = time.time()
        generated_ids = self.model.generate(input_ids=source_ids, attention_mask=source_mask, use_cache=True,)
        gen_time = time.time() - t0
        preds = self.ids_to_clean_text(generated_ids)
        target = self.ids_to_clean_text(y)
        loss_tensors = self._step(batch)
        base_metrics = {name: loss for name, loss in zip(self.loss_names, loss_tensors)}
        rouge: Dict = calculate_rouge(preds, target)
        summ_len = np.mean(lmap(len, generated_ids))
        base_metrics.update(gen_time=gen_time, summ_len=summ_len, preds=preds, target=target, **rouge)
        return base_metrics

Esempio n. 36

0

Mostra file

def main():
    img = lmap(int, input())
    img = np.array(img)
    img.shape = (-1, 6, 25)

    nz = np.sum(img == 0, axis=1).sum(axis=1)
    a = img[np.argmin(nz)]
    print('1.)', np.equal(a, 1).sum() * np.equal(a, 2).sum())

    a = np.ones(img.shape[1:]) * 2
    mask = np.zeros(img.shape[1:]).astype(bool)
    for i in img:
        a[mask] = i[mask]
        mask = a == 2
    plt.imshow(a)
    plt.show()

Esempio n. 37

0

Mostra file

def deserialize_overrides(override_list):
    def splitter(string):
        if isinstance(string, list):
            pair = string  # already split into pairs, return what we have
            return pair
        ensure('|' in string,
               "override key and value must be seperated by a pipe '|'")
        first, rest = string.split('|', 1)
        ensure(
            rest.strip(),
            "a value must be provided. use 'null' without quotes to use an empty value"
        )
        return first, rest

    pairs = lmap(splitter, override_list)
    return {key: json.loads(val) for key, val in pairs}

Esempio n. 38

0

Mostra file

def mixed_citation_to_related_articles(mixed_citation_list):
    # ll: [{'article': {'authorLine': 'R Straussman et al',
    #      'authors': [{'given': u'R', 'surname': u'Straussman'}, ...}],
    #      'doi': u'10.1038/nature11183', 'pub-date': [2014, 2, 28], 'title': u'Pants-Party'},
    #      'journal': {'volume': u'487', 'lpage': u'504', 'name': u'Nature', 'fpage': u'500'}}]

    def et(struct):
        return {
            'type': 'external-article',
            'articleTitle': p(struct, 'article.title'),
            'journal': p(struct, 'journal.name'),
            'authorLine': p(struct, 'article.authorLine'),
            'uri': 'https://doi.org/%s' % p(struct, 'article.doi'),
        }

    return lmap(et, mixed_citation_list)

Esempio n. 39

0

Mostra file

File: adhoc_backfill.py Progetto: elifesciences/bot-lax-adaptor

def main(args):
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--dry-run', action='store_true')
    parser.add_argument('paths', nargs="*")
    args = parser.parse_args(args)

    # read any filenames that were passed in as arguments
    paths = args.paths

    # failing that, try reading from stdin
    if not paths:
        paths = read_from_stdin()
        try:
            paths = lmap(json.loads, paths)
        except ValueError:
            # assume filenames.
            pass

    return do_paths(paths, dry_run=args.dry_run)