def do_quest(self, player_state: PlayerState):
     while player_state.inventory[Items.AntiqueMachete] < 1:
         park.resolve_turn(player_state)
     utils.unlock(player_state)
     while player_state.locations[apartment].progress < 1:
         apartment.resolve_turn(player_state)
     utils.unlock(player_state)
     while player_state.locations[office].progress < 1:
         office.resolve_turn(player_state)
     utils.unlock(player_state)
     while player_state.locations[hospital].progress < 1:
         hospital.resolve_turn(player_state)
     utils.unlock(player_state)
     while player_state.locations[alley].progress < 5:
         alley.resolve_turn(player_state)
     utils.unlock(player_state)
Example #2
0
def add_urls(urls, oauth_token, oauth_secret):
    rdd = readability.oauth(
        settings.API_KEY,
        settings.API_SECRET,
        token=( oauth_token, oauth_secret))

    for url in urls:
        try:
            logging.error('Adding url %s' % url.get('url'))
            rdd.add_bookmark(url = url.get('url'), archive = url.get('achive', False))

        except Exception as e:
            logger.error(e.__str__())

        time.sleep(settings.ARTICLE_THROTTLE)

    logging.error('Removing lock for %s' % oauth_token)
    unlock(oauth_token)
Example #3
0
#     parser.add_argument('--data', required=True, help='dataset path')
#     parser.add_argument('--epoch', default=1, help='epoches')
#     return parser.parse_args()

if __name__ == '__main__':
    try:
        logger.info("------ start ------")
        utils.lock()

        # args = parse_args()
        # if not os.path.exists(args.data):
        #     raise LunaExcepion(config.inputerr)

        # here write some logic


    except (KeyboardInterrupt, SystemExit):
        utils.unlock()
        utils.error(config.syserr)
    except LunaExcepion as e:
        utils.error(e.value)
        if (e.value == config.locked):
            exit()
            logger.info("------ end ------")
    except Exception as e:
        logger.error(e)
        logger.error(traceback.format_exc())
        utils.error(config.syserr)
    utils.unlock()
    logger.info("------ end ------")
Example #4
0
 def unlockInstance(self):
     if self.lock_file is not None:
         utils.unlock(self.lock_file)
Example #5
0
 def unlockInstance(self):
     if self.lock_file is not None:
         utils.unlock(self.lock_file)
Example #6
0
#!/usr/bin/env python

# To change this template, choose Tools | Templates
# and open the template in the editor.

__author__ = "Filip"
__date__ = "$12-Nov-2010 17:06:00$"

import utils
import time

if __name__ == "__main__":
    utils.lock()

    for i in range(10):
        print "Sleeping: ", i
        time.sleep(1)

    utils.unlock()

    for i in range(10):
        print "Unlocked: ", i
        time.sleep(1)
def main(args, subparsers):
    print(args)
    print("Started experiment!")
    utils.print_args(args)
    utils.set_seed(args.seed)

    ###################################################################################
    ################################# Intialization ###################################
    ###################################################################################
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = transformers.AutoModelForCausalLM.from_pretrained(
        args.pretrained_class, local_files_only=True).eval()
    # register special tokens
    # num_added_tokens = tokenizer.add_special_tokens({"bos_token": "<BOS>", "eos_token": "<EOS>",
    # "pad_token": "<PAD>"})
    model.to(device)

    tokenizer = transformers.AutoTokenizer.from_pretrained(
        args.pretrained_class, local_files_only=True)
    sampler_args = vars(subparsers[args.sampler].parse_known_args()[0])
    sampler_args_items = "-".join(
        [f"{k}:{v}" for k, v in sampler_args.items()])

    tokenizer_args = f"tokenizer:{tokenizer.__class__.__name__}"
    args.pretrained_class = args.pretrained_class.replace("/", "_")
    if args.pretrained_class == "ctrl":
        tokenizer_args += f"-ctrl_code:{args.ctrl_code}"
    elif "gpt2" in args.pretrained_class:
        tokenizer.pad_token = tokenizer.eos_token

    pretrained_class = args.pretrained_class.replace("-", "_")
    sampler_name = args.sampler
    if (args.sampler == "NegativeSampler"):
        print(sampler_args)
        sampler_name += "_Negative_" + sampler_args['negative_base']
    output_file = f"model:{model.__class__.__name__}-model_class:{pretrained_class}-{tokenizer_args}-sampler:{sampler_name}-temperature:{args.temperature}-seq_length:{args.max_seq_length}-ngram:{args.gram}-{sampler_args_items}.txt"

    results_file = os.path.join("results/", args.pretrained_class,
                                args.results_file)

    # if our results file eixsts
    if os.path.exists(results_file):
        with open(results_file, "r+") as f:
            current = json.load(f)
        key = output_file[:-4]
        # check if we have already ran this
        if key in current:
            raise Exception("We've already computed the result!" + " " +
                            results_file)

    print("Using", args.prefix_file, "as the prefix file!")
    if not args.prefix_file:
        if args.pretrained_class == "ctrl":
            input_tokens = [tokenizer.control_codes[args.ctrl_code]]
        else:
            input_tokens = [tokenizer.bos_token_id]
        input_tokens = torch.tensor(input_tokens).to(device).unsqueeze(0)
    else:
        with open(args.prefix_file, "r") as f:
            # remove lines that are empty
            lines = []
            for line in f.readlines():
                if line.strip() and line.count(" ") > args.prefix_length:
                    lines.append(line)

            # shuffle to ensure we have some diversity
            random.shuffle(lines)
            # truncate to number of the sentences that we are generating
            lines = lines[:args.num_sentences]
            input_tokens = tokenizer.batch_encode_plus(
                lines,
                add_special_tokens=False,
                truncation=True,
                max_length=args.prefix_length,
                padding="max_length",
                return_tensors="pt")
            attention_mask = input_tokens['attention_mask']
            input_tokens = input_tokens['input_ids']
            attn_token = torch.tensor([1]).unsqueeze(0).repeat(
                args.num_sentences, 1)
            attention_mask = torch.cat((attn_token, attention_mask), dim=1)
            assert tokenizer.bos_token_id not in input_tokens[0]
            bos_token = torch.tensor([tokenizer.bos_token_id
                                      ]).unsqueeze(0).repeat(
                                          args.num_sentences, 1)
            input_tokens = torch.cat((bos_token, input_tokens), dim=1)

    print("Input Tokens:", input_tokens.shape)

    all_sentences = []
    k_primes, p_primes, entropy_primes = [], [], []
    num_sentences_left = args.num_sentences
    sentences_per_batch = args.generation_batch_size
    all_logprobs = []

    with torch.no_grad():
        for idx in range(ceil(args.num_sentences / sentences_per_batch)):
            batch_size = None
            if num_sentences_left > sentences_per_batch:
                batch_size = sentences_per_batch
            else:
                batch_size = num_sentences_left

            schedule = getattr(sampler, args.sampler)(**sampler_args)
            if input_tokens.shape[0] == 1:
                num_return_sequences = 1
                input_ids = input_tokens
            else:
                input_ids = input_tokens[idx:idx + batch_size].to(device)
                num_return_sequences = 1

            num_sentences_left -= batch_size

            sentences, model_logits, transformed_logits = filtering.generate(
                model=model,
                input_ids=input_ids,
                max_length=args.max_seq_length,
                do_sample=True,
                num_beams=None,
                temperature=args.temperature,
                schedule=schedule,
                repetition_penalty=1.0,
                bos_token_id=tokenizer.bos_token_id,
                pad_token_id=tokenizer.pad_token_id,
                eos_token_id=tokenizer.eos_token_id,
                num_return_sequences=num_return_sequences,
                dry_run=args.dry_run)

            #########################################################################
            ############################### K Prime #################################
            #########################################################################
            sz = list(transformed_logits.size())
            mask = (sentences[:, -sz[1]:] >
                    0).cuda()  #careful! make sure this mask makes sense

            distro = torch.softmax(transformed_logits.view(-1, sz[-1]).cuda(),
                                   dim=-1)
            #use .float() for Bool to avoid bug!!!
            k_prime = torch.sum((distro >
                                 (1.0 / transformed_logits.size(-1))).float(),
                                dim=-1).view(sz[0], sz[1])
            k_prime = torch.masked_select(k_prime, mask)
            assert (torch.min(k_prime).item() > 0)
            k_prime = torch.log(k_prime)

            k_primes.extend(k_prime.cpu().tolist())
            #print('k_primes:', np.mean(k_primes))
            e_distro = torch.softmax(model_logits.contiguous().view(
                -1, sz[-1]).cuda(),
                                     dim=-1)

            ori_distro = torch.softmax(
                model_logits[:, -sz[1]:, :].contiguous().view(-1,
                                                              sz[-1]).cuda(),
                dim=-1)

            distro = torch.softmax(transformed_logits.view(-1, sz[-1]).cuda(),
                                   dim=-1)

            ori_distro = ori_distro * (distro >
                                       (1.0 / transformed_logits.size(-1)))
            p_prime = torch.sum(ori_distro, dim=-1).view(sz[0], sz[1])
            p_prime = torch.log(torch.masked_select(p_prime, mask).float())
            p_primes.extend(p_prime.cpu().tolist())

            distro = torch.softmax(transformed_logits.view(-1, sz[-1]).cuda(),
                                   dim=-1)
            entropy = -torch.sum(distro * torch.log(distro + 1e-10),
                                 dim=-1).view(sz[0], sz[1])
            entropy = torch.masked_select(entropy, mask)
            entropy_primes.extend(entropy.cpu().tolist())

            ##################################################################
            ############################ K Prime Ends ##########################
            ##################################################################

            transformed_logits = transformed_logits.to(device)
            model_logits = model_logits.to(device)
            sentences = sentences.to(device)
            logprobs = utils.calculate_logprobs(
                sentences,
                transformed_logits,
                model_logits,
                args.prefix_length,
                0,
                interpolate_ratio=args.filter_weight,
                batch_size=args.generation_batch_size)
            del model_logits
            del transformed_logits
            gc.collect()

            all_logprobs.append(logprobs.cpu().detach())
            all_sentences.append(sentences.cpu().detach())

    all_sentences = torch.cat(all_sentences, dim=0)
    all_logprobs = torch.cat(all_logprobs, dim=0)
    k_prime, p_prime, entropy_prime = np.mean(k_primes), np.mean(
        p_primes), np.mean(entropy_primes)
    print('Entropy Prime:', entropy_prime, 'K Prime:', k_prime, 'P Prime:',
          p_prime)
    results = {
        'k_prime': k_prime,
        'p_prime': p_prime,
        'entropy_prime': entropy_prime
    }

    del model
    print("Final shapes:", all_sentences.shape, all_logprobs.shape)
    # all text includes the prefix
    all_text_sentences = []
    # prefixed_text_sentences excludes the prefix
    prefixed_text_sentences = []
    for idx in range(
            all_sentences.shape[0]):  # iterate over the batch dimension
        # sentence_id = sentence[0]
        idx_offset = 1 if args.pretrained_class == "ctrl" else 0
        prefixed_sentence = all_sentences[idx, idx_offset:].tolist()
        idx_offset += args.prefix_length
        sentence = all_sentences[idx, idx_offset:].tolist()

        decoded_sentence = tokenizer.decode(sentence,
                                            skip_special_tokens=True,
                                            clean_up_tokenization_spaces=True)
        prefixed_decoded_sentence = tokenizer.decode(
            prefixed_sentence,
            skip_special_tokens=True,
            clean_up_tokenization_spaces=True)
        for idx in range(len(decoded_sentence))[::-1]:
            if decoded_sentence[idx] != "!":
                break

        decoded_sentence = decoded_sentence[:idx + 1]

        for idx in range(len(prefixed_decoded_sentence))[::-1]:
            if prefixed_decoded_sentence[idx] != "!":
                break

        prefixed_decoded_sentence = prefixed_decoded_sentence[:idx + 1]

        # all_text is without the prefix, prefixed is including the prefix.
        all_text_sentences.append(decoded_sentence)
        prefixed_text_sentences.append(prefixed_decoded_sentence)

    ###################################################################################
    ############################ Score the Generated Texts ############################
    ###################################################################################
    results_file = os.path.join("results/", args.pretrained_class,
                                args.results_file)
    results_basename = os.path.basename(results_file).replace(".json", "")
    results_dir = os.path.dirname(results_file)
    if not os.path.isdir(results_dir):
        os.makedirs(results_dir)

    #results = {}  # moved to k/p/ent_prime
    scores = {}
    files = os.path.join("saved_generations/", results_basename,
                         args.pretrained_class, args.output_dir, output_file)
    files_dir = os.path.dirname(files)
    if not os.path.isdir(files_dir):
        os.makedirs(files_dir)

    print(f"Writing generated sentences to {files}.")
    utils.write_sentences(all_text_sentences, files)

    preprocessed_files = os.path.join("preprocessed_generations/",
                                      results_basename, args.pretrained_class,
                                      args.output_dir, output_file)
    preprocessed_files_dir = os.path.dirname(preprocessed_files)
    if not os.path.isdir(preprocessed_files_dir):
        os.makedirs(preprocessed_files_dir)

    print(f"Writing preprocessed sentences to {preprocessed_files}.")
    preprocessed_sentences, filtered_indicies, filtered_lengths = utils.preprocess_text(
        prefixed_text_sentences,
        tokenizer,
        lmin=args.preprocessed_min,
        lmax=args.preprocessed_max)
    utils.write_sentences(preprocessed_sentences, preprocessed_files)

    # update the reference file to be chunked to our size
    reference_file = args.eval_text
    chunked_reference_file = f"{reference_file}_seq:{args.max_seq_length}_min:{args.preprocessed_min}_max:{args.preprocessed_max}_prefix:{args.prefix_length}_model:{args.pretrained_class.replace('models/', '')}"
    if not os.path.exists(chunked_reference_file):
        utils.lock(chunked_reference_file)
        print("Reference lock acquired!")
        # begin critical section!
        utils.chunk_and_prefix_file(reference_file,
                                    tokenizer,
                                    args.preprocessed_min,
                                    args.preprocessed_max,
                                    chunked_reference_file,
                                    prefix_length=args.prefix_length)
        # end critical section!
        utils.unlock(chunked_reference_file)

    filtered_tokenizations = []
    filtered_logprobs = []
    for idx in filtered_indicies:
        filtered_tokenizations.append(all_sentences[idx])
        filtered_logprobs.append(all_logprobs[idx])
    filtered_tokenizations = torch.stack(filtered_tokenizations, dim=0)
    filtered_logprobs = torch.stack(filtered_logprobs, dim=0)

    del all_logprobs
    gc.collect()

    if args.eval_method == "BLEU":
        # use BLEU calculation
        smoothing_method = {"nist": SmoothingFunction().method3}
        for name, method in smoothing_method.items():
            scores[name] = utils.evaluate_bleu(
                files,
                chunked_reference_file,
                num_real_sentences=args.num_sentences,
                num_generated_sentences=args.num_sentences,
                gram=args.gram,
                smoothing_method=method,
                chunk_size=15)
            print()

        for name in smoothing_method.keys():
            results[name] = {}
            results[name]['scores'] = scores[name]

        results['nist']['scores'][
            'bleu5'] = results['nist']['scores']['bleu5'] * -1.0
        bleu = results['nist']['scores']['bleu5'] * -1.0
        sbleu = results['nist']['scores']['self-bleu5']
    else:
        raise Exception("We don't support other automatic metrics!")

    print("Results:", bleu, sbleu)

    ###################################################################################
    ############################# Result Reporting Section ############################
    ###################################################################################

    if not args.dry_run:
        results_file = os.path.join("results/", args.pretrained_class,
                                    args.results_file)
        results_dir = os.path.dirname(results_file)
        if not os.path.isdir(results_dir):
            os.makedirs(results_dir)
        utils.lock(results_file)
        print("Lock acquired!")

        # begin critical section!
        if os.path.exists(results_file):
            with open(results_file, "r+") as f:
                current = json.load(f)
        else:
            current = {}

        key = output_file[:-4]
        current[key] = results
        random_file = ''.join(
            random.SystemRandom().choice(string.ascii_uppercase +
                                         string.digits) for _ in range(10))
        random_file = os.path.join("results/", args.pretrained_class,
                                   random_file)
        with open(random_file, "w+") as f:
            json.dump(current, f)

        os.rename(random_file, results_file)

        # save generations
        saved_tokens_file = os.path.join("tokens/", results_basename,
                                         args.pretrained_class,
                                         args.output_dir, output_file)
        saved_tokens_dir = os.path.dirname(saved_tokens_file)
        if not os.path.isdir(saved_tokens_dir):
            os.makedirs(saved_tokens_dir)

        saved_tokens = {}
        saved_tokens['args'] = [
            vars(args),
            vars(subparsers[args.sampler].parse_known_args()[0])
        ]
        idx_offset = 1 if args.pretrained_class == "ctrl" else 0
        saved_tokens['with_prefix'] = all_sentences[:, idx_offset:].tolist()
        idx_offset += args.prefix_length
        saved_tokens['without_prefix'] = all_sentences[:, idx_offset:].tolist()

        with open("saved_tokens_file", "w+") as f:
            json.dump(saved_tokens, f)

        # save log probabilities
        preprocessed_logits = os.path.join("preprocessed_logprobs/",
                                           results_basename,
                                           args.pretrained_class,
                                           args.output_dir, output_file)
        preprocessed_logits_dir = os.path.dirname(preprocessed_logits)
        if not os.path.isdir(preprocessed_logits_dir):
            os.makedirs(preprocessed_logits_dir)

        d = {}
        print(filtered_logprobs.shape)
        for idx in range(filtered_logprobs.shape[0]):
            if preprocessed_sentences[idx] in d:
                raise Exception("Duplicate sentences found!")
            sent_id = hashlib.sha256(
                preprocessed_sentences[idx].encode()).hexdigest()
            d[sent_id] = {
                "model_score": filtered_logprobs[idx].item(),
                "lengths": filtered_lengths[idx] - args.prefix_length,
                "sentence": preprocessed_sentences[idx]
            }
        print("Avg log probabilities:",
              (filtered_logprobs /
               (torch.tensor(filtered_lengths) - args.prefix_length)).mean(
                   dim=0))

        with open(preprocessed_logits, "w") as f:
            json.dump(d, f)

        # create plot
        plots_file = os.path.join("plots/", args.pretrained_class,
                                  args.results_file)
        plots_dir = os.path.dirname(plots_file)
        if not os.path.isdir(plots_dir):
            os.makedirs(plots_dir)

        plot = plotter.Plotter(results_file)
        plot.plot_curves()
        if args.plot_gold:
            params = {
                "eval_method": args.eval_method,
                "chunk": args.max_seq_length,
                "ngram": args.gram,
                "knn": args.knn,
                "num_sentences": args.num_sentences
            }
            result = plot.plot_gold(params)
            if not result:
                # We don't have a proper score for our reference file, so let's go ahead and create it.
                params['gold_file'] = chunked_reference_file.replace(
                    "test", "valid")
                print(
                    f"Evaluating gold point on {params['gold_file']} with KNN={args.knn}"
                )
                params['num_sentences'] = args.num_sentences
                params['reference_corpus'] = chunked_reference_file
                params['chunk'] = args.max_seq_length
                params['eval_method'] = args.eval_method
                params['knn'] = args.knn
                params['gram'] = args.gram
                params['device'] = device
                score_gold(params)
                result = plot.plot_gold(params)

        plot.save(plots_file.replace(".json", ""))
        # end critical section!
        utils.unlock(results_file)