Esempio n. 1
0
 def lock_until_qi_nonempty(qi, qo, timeout_sec):
     try:
         utils.lock(lockfile, timeout_sec)
     except utils.BinWifiException:
         qo.put('timed out')
         return
     qo.put('acquired')
     wvtest.WVPASSEQ(qi.get(), 'release')
     qo.put('released')
     sys.exit(0)
        def atomic_save():
            try:
                data = {}
                data.update(self)

                with lock(self._path):
                    with atomic_write(self._path, 'wb') as handle:
                        json.dump(data, handle, sort_keys=True, indent=2, encoding='utf-8')

                return True
            except (OSError, IOError):
                return False
Esempio n. 3
0
def upload(request):

    if is_locked(request):
        return HttpResponseRedirect(reverse('index'))

    file = request.FILES['file'].readlines()
    file_csv = csv.reader(file)
    urls_to_add = []

    for row in file_csv:
        url = row[0]
        try:
            validate(url) 
        except Exception:
            continue

        # If we are here, we have a valid url
        try:
            article_type = row[3].strip()
        except Exception:
            # Could be an index error, could not None
            article_type = ''

        urls_to_add.append({
            'url': url,
            'archive': True if article_type == 'Archive' else False
        })


    add_urls.delay(
        urls_to_add,
        request.session['oauth_token'],
        request.session['oauth_secret'])

    lock(request.session['oauth_token'])

    request.session['len_urls'] = len(urls_to_add)

    return HttpResponseRedirect(reverse('done'))
        def atomic_save():
            try:
                data = {}
                data.update(self)

                with lock(self._path):
                    with atomic_write(self._path, 'wb') as handle:
                        json.dump(data,
                                  handle,
                                  sort_keys=True,
                                  indent=2,
                                  encoding='utf-8')

                return True
            except (OSError, IOError):
                return False
Esempio n. 5
0
logging.config.fileConfig("logging.conf")
logger = logging.getLogger()
#os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'


# def parse_args():
#     parser = argparse.ArgumentParser()
#     parser.add_argument('--model', default='vgg', help='vgg or resnet')
#     parser.add_argument('--data', required=True, help='dataset path')
#     parser.add_argument('--epoch', default=1, help='epoches')
#     return parser.parse_args()

if __name__ == '__main__':
    try:
        logger.info("------ start ------")
        utils.lock()

        # args = parse_args()
        # if not os.path.exists(args.data):
        #     raise LunaExcepion(config.inputerr)

        # here write some logic


    except (KeyboardInterrupt, SystemExit):
        utils.unlock()
        utils.error(config.syserr)
    except LunaExcepion as e:
        utils.error(e.value)
        if (e.value == config.locked):
            exit()
Esempio n. 6
0
def _run(argv):
    """Runs a wifi command.

  This is the main entry point of the script, and is called by the main function
  and also by commands which need to run other commands (e.g. restore).

  Args:
    argv: A list containing a command and a series of options, e.g.
    sys.argv[1:].

  Returns:
    Whether the command succeeded.

  Raises:
    BinWifiException: On file write failures.
  """
    global lockfile_taken

    serial = 'UNKNOWN'
    try:
        serial = subprocess.check_output(('serial')).strip()
    except subprocess.CalledProcessError:
        utils.log('Could not get serial number')

    optspec = _OPTSPEC_FORMAT.format(bin=__file__.split('/')[-1],
                                     ssid='%s_TestWifi' % serial)
    parser = options.Options(optspec)
    opt, _, extra = parser.parse(argv)
    stringify_options(opt)

    if not extra:
        parser.fatal('Must specify a command (see usage for details).')
        return 1

    command = extra[0]

    # set and setclient have a different default for -b.
    if command.startswith('set') and ' ' in opt.band:
        opt.band = '2.4'

    if command == 'off' or command.startswith('stop'):
        if not opt.interface_suffix:
            opt.interface_suffix = 'ALL'
        elif opt.interface_suffix == 'NONE':
            opt.interface_suffix = ''

    try:
        function = {
            'set': set_wifi,
            'stop': stop_wifi,
            'off': stop_wifi,
            'restore': restore_wifi,
            'show': show_wifi,
            'setclient': set_client_wifi,
            'stopclient': stop_client_wifi,
            'stopap': stop_ap_wifi,
            'scan': scan_wifi,
        }[command]
    except KeyError:
        parser.fatal('Unrecognized command %s' % command)

    read_only_commands = ('show', )

    if command not in read_only_commands:
        if not lockfile_taken:
            utils.lock(_LOCKFILE, int(opt.lock_timeout))
            atexit.register(utils.unlock, _LOCKFILE)
            lockfile_taken = True

    success = function(opt)

    if success:
        if command in ('set', 'setclient'):
            if command == 'set':
                program = 'hostapd'
                interface_type = iw.INTERFACE_TYPE.ap
            else:
                program = 'wpa_supplicant'
                interface_type = iw.INTERFACE_TYPE.client
            interface = iw.find_interface_from_band(opt.band, interface_type,
                                                    opt.interface_suffix)
            if opt.persist:
                # Save in /fiber/config.
                persist.save_options(program, interface, argv, False)
            # Save in /tmp.
            persist.save_options(program, interface, argv, True)

    return success
Esempio n. 7
0
files = glob.glob(osp.join(root,'*/catalog.json'))
catalog_path=osp.join(root,'catalog.json')
lock_path=catalog_path + '.lock'

catalog = {}
for f in files:
    try:
        f_json = json.load(open(f))
    except:
        logging.error('loading file %s failed, ignoring' % f)
    catalog.update(f_json)

# add blank fields if not present
for i in catalog:
    catalog[i]['processed_utc']=catalog[i].get('processed_utc',None)
    catalog[i]['run_utc']=catalog[i].get('run_utc',None)
    catalog[i]['kml_url']=catalog[i].get('kml_url',None)
    catalog[i]['kml_size']=catalog[i].get('kml_size',None)
    catalog[i]['job_id']=i

catalog_sorted=collections.OrderedDict(sorted(catalog.items(), reverse=True))

l=lock(lock_path)
l.acquire()
json.dump(catalog_sorted, open(catalog_path, 'w'), indent=1, separators=(',',':'))
l.release()

logging.info('Created catalog at %s',catalog_path)
    

Esempio n. 8
0
#!/usr/bin/env python

# To change this template, choose Tools | Templates
# and open the template in the editor.

__author__ = "Filip"
__date__ = "$12-Nov-2010 17:06:00$"

import utils
import time

if __name__ == "__main__":
    utils.lock()

    for i in range(10):
        print "Sleeping: ", i
        time.sleep(1)

    utils.unlock()

    for i in range(10):
        print "Unlocked: ", i
        time.sleep(1)
Esempio n. 9
0
catalog_path = osp.join(root, 'catalog.json')
lock_path = catalog_path + '.lock'

catalog = {}
for f in files:
    try:
        f_json = json.load(open(f))
    except:
        logging.error('loading file %s failed, ignoring' % f)
    catalog.update(f_json)

# add blank fields if not present
for i in catalog:
    catalog[i]['processed_utc'] = catalog[i].get('processed_utc', None)
    catalog[i]['run_utc'] = catalog[i].get('run_utc', None)
    catalog[i]['kml_url'] = catalog[i].get('kml_url', None)
    catalog[i]['kml_size'] = catalog[i].get('kml_size', None)
    catalog[i]['job_id'] = i

catalog_sorted = collections.OrderedDict(sorted(catalog.items(), reverse=True))

l = lock(lock_path)
l.acquire()
json.dump(catalog_sorted,
          open(catalog_path, 'w'),
          indent=1,
          separators=(',', ':'))
l.release()

logging.info('Created catalog at %s', catalog_path)
def main(args, subparsers):
    print(args)
    print("Started experiment!")
    utils.print_args(args)
    utils.set_seed(args.seed)

    ###################################################################################
    ################################# Intialization ###################################
    ###################################################################################
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = transformers.AutoModelForCausalLM.from_pretrained(
        args.pretrained_class, local_files_only=True).eval()
    # register special tokens
    # num_added_tokens = tokenizer.add_special_tokens({"bos_token": "<BOS>", "eos_token": "<EOS>",
    # "pad_token": "<PAD>"})
    model.to(device)

    tokenizer = transformers.AutoTokenizer.from_pretrained(
        args.pretrained_class, local_files_only=True)
    sampler_args = vars(subparsers[args.sampler].parse_known_args()[0])
    sampler_args_items = "-".join(
        [f"{k}:{v}" for k, v in sampler_args.items()])

    tokenizer_args = f"tokenizer:{tokenizer.__class__.__name__}"
    args.pretrained_class = args.pretrained_class.replace("/", "_")
    if args.pretrained_class == "ctrl":
        tokenizer_args += f"-ctrl_code:{args.ctrl_code}"
    elif "gpt2" in args.pretrained_class:
        tokenizer.pad_token = tokenizer.eos_token

    pretrained_class = args.pretrained_class.replace("-", "_")
    sampler_name = args.sampler
    if (args.sampler == "NegativeSampler"):
        print(sampler_args)
        sampler_name += "_Negative_" + sampler_args['negative_base']
    output_file = f"model:{model.__class__.__name__}-model_class:{pretrained_class}-{tokenizer_args}-sampler:{sampler_name}-temperature:{args.temperature}-seq_length:{args.max_seq_length}-ngram:{args.gram}-{sampler_args_items}.txt"

    results_file = os.path.join("results/", args.pretrained_class,
                                args.results_file)

    # if our results file eixsts
    if os.path.exists(results_file):
        with open(results_file, "r+") as f:
            current = json.load(f)
        key = output_file[:-4]
        # check if we have already ran this
        if key in current:
            raise Exception("We've already computed the result!" + " " +
                            results_file)

    print("Using", args.prefix_file, "as the prefix file!")
    if not args.prefix_file:
        if args.pretrained_class == "ctrl":
            input_tokens = [tokenizer.control_codes[args.ctrl_code]]
        else:
            input_tokens = [tokenizer.bos_token_id]
        input_tokens = torch.tensor(input_tokens).to(device).unsqueeze(0)
    else:
        with open(args.prefix_file, "r") as f:
            # remove lines that are empty
            lines = []
            for line in f.readlines():
                if line.strip() and line.count(" ") > args.prefix_length:
                    lines.append(line)

            # shuffle to ensure we have some diversity
            random.shuffle(lines)
            # truncate to number of the sentences that we are generating
            lines = lines[:args.num_sentences]
            input_tokens = tokenizer.batch_encode_plus(
                lines,
                add_special_tokens=False,
                truncation=True,
                max_length=args.prefix_length,
                padding="max_length",
                return_tensors="pt")
            attention_mask = input_tokens['attention_mask']
            input_tokens = input_tokens['input_ids']
            attn_token = torch.tensor([1]).unsqueeze(0).repeat(
                args.num_sentences, 1)
            attention_mask = torch.cat((attn_token, attention_mask), dim=1)
            assert tokenizer.bos_token_id not in input_tokens[0]
            bos_token = torch.tensor([tokenizer.bos_token_id
                                      ]).unsqueeze(0).repeat(
                                          args.num_sentences, 1)
            input_tokens = torch.cat((bos_token, input_tokens), dim=1)

    print("Input Tokens:", input_tokens.shape)

    all_sentences = []
    k_primes, p_primes, entropy_primes = [], [], []
    num_sentences_left = args.num_sentences
    sentences_per_batch = args.generation_batch_size
    all_logprobs = []

    with torch.no_grad():
        for idx in range(ceil(args.num_sentences / sentences_per_batch)):
            batch_size = None
            if num_sentences_left > sentences_per_batch:
                batch_size = sentences_per_batch
            else:
                batch_size = num_sentences_left

            schedule = getattr(sampler, args.sampler)(**sampler_args)
            if input_tokens.shape[0] == 1:
                num_return_sequences = 1
                input_ids = input_tokens
            else:
                input_ids = input_tokens[idx:idx + batch_size].to(device)
                num_return_sequences = 1

            num_sentences_left -= batch_size

            sentences, model_logits, transformed_logits = filtering.generate(
                model=model,
                input_ids=input_ids,
                max_length=args.max_seq_length,
                do_sample=True,
                num_beams=None,
                temperature=args.temperature,
                schedule=schedule,
                repetition_penalty=1.0,
                bos_token_id=tokenizer.bos_token_id,
                pad_token_id=tokenizer.pad_token_id,
                eos_token_id=tokenizer.eos_token_id,
                num_return_sequences=num_return_sequences,
                dry_run=args.dry_run)

            #########################################################################
            ############################### K Prime #################################
            #########################################################################
            sz = list(transformed_logits.size())
            mask = (sentences[:, -sz[1]:] >
                    0).cuda()  #careful! make sure this mask makes sense

            distro = torch.softmax(transformed_logits.view(-1, sz[-1]).cuda(),
                                   dim=-1)
            #use .float() for Bool to avoid bug!!!
            k_prime = torch.sum((distro >
                                 (1.0 / transformed_logits.size(-1))).float(),
                                dim=-1).view(sz[0], sz[1])
            k_prime = torch.masked_select(k_prime, mask)
            assert (torch.min(k_prime).item() > 0)
            k_prime = torch.log(k_prime)

            k_primes.extend(k_prime.cpu().tolist())
            #print('k_primes:', np.mean(k_primes))
            e_distro = torch.softmax(model_logits.contiguous().view(
                -1, sz[-1]).cuda(),
                                     dim=-1)

            ori_distro = torch.softmax(
                model_logits[:, -sz[1]:, :].contiguous().view(-1,
                                                              sz[-1]).cuda(),
                dim=-1)

            distro = torch.softmax(transformed_logits.view(-1, sz[-1]).cuda(),
                                   dim=-1)

            ori_distro = ori_distro * (distro >
                                       (1.0 / transformed_logits.size(-1)))
            p_prime = torch.sum(ori_distro, dim=-1).view(sz[0], sz[1])
            p_prime = torch.log(torch.masked_select(p_prime, mask).float())
            p_primes.extend(p_prime.cpu().tolist())

            distro = torch.softmax(transformed_logits.view(-1, sz[-1]).cuda(),
                                   dim=-1)
            entropy = -torch.sum(distro * torch.log(distro + 1e-10),
                                 dim=-1).view(sz[0], sz[1])
            entropy = torch.masked_select(entropy, mask)
            entropy_primes.extend(entropy.cpu().tolist())

            ##################################################################
            ############################ K Prime Ends ##########################
            ##################################################################

            transformed_logits = transformed_logits.to(device)
            model_logits = model_logits.to(device)
            sentences = sentences.to(device)
            logprobs = utils.calculate_logprobs(
                sentences,
                transformed_logits,
                model_logits,
                args.prefix_length,
                0,
                interpolate_ratio=args.filter_weight,
                batch_size=args.generation_batch_size)
            del model_logits
            del transformed_logits
            gc.collect()

            all_logprobs.append(logprobs.cpu().detach())
            all_sentences.append(sentences.cpu().detach())

    all_sentences = torch.cat(all_sentences, dim=0)
    all_logprobs = torch.cat(all_logprobs, dim=0)
    k_prime, p_prime, entropy_prime = np.mean(k_primes), np.mean(
        p_primes), np.mean(entropy_primes)
    print('Entropy Prime:', entropy_prime, 'K Prime:', k_prime, 'P Prime:',
          p_prime)
    results = {
        'k_prime': k_prime,
        'p_prime': p_prime,
        'entropy_prime': entropy_prime
    }

    del model
    print("Final shapes:", all_sentences.shape, all_logprobs.shape)
    # all text includes the prefix
    all_text_sentences = []
    # prefixed_text_sentences excludes the prefix
    prefixed_text_sentences = []
    for idx in range(
            all_sentences.shape[0]):  # iterate over the batch dimension
        # sentence_id = sentence[0]
        idx_offset = 1 if args.pretrained_class == "ctrl" else 0
        prefixed_sentence = all_sentences[idx, idx_offset:].tolist()
        idx_offset += args.prefix_length
        sentence = all_sentences[idx, idx_offset:].tolist()

        decoded_sentence = tokenizer.decode(sentence,
                                            skip_special_tokens=True,
                                            clean_up_tokenization_spaces=True)
        prefixed_decoded_sentence = tokenizer.decode(
            prefixed_sentence,
            skip_special_tokens=True,
            clean_up_tokenization_spaces=True)
        for idx in range(len(decoded_sentence))[::-1]:
            if decoded_sentence[idx] != "!":
                break

        decoded_sentence = decoded_sentence[:idx + 1]

        for idx in range(len(prefixed_decoded_sentence))[::-1]:
            if prefixed_decoded_sentence[idx] != "!":
                break

        prefixed_decoded_sentence = prefixed_decoded_sentence[:idx + 1]

        # all_text is without the prefix, prefixed is including the prefix.
        all_text_sentences.append(decoded_sentence)
        prefixed_text_sentences.append(prefixed_decoded_sentence)

    ###################################################################################
    ############################ Score the Generated Texts ############################
    ###################################################################################
    results_file = os.path.join("results/", args.pretrained_class,
                                args.results_file)
    results_basename = os.path.basename(results_file).replace(".json", "")
    results_dir = os.path.dirname(results_file)
    if not os.path.isdir(results_dir):
        os.makedirs(results_dir)

    #results = {}  # moved to k/p/ent_prime
    scores = {}
    files = os.path.join("saved_generations/", results_basename,
                         args.pretrained_class, args.output_dir, output_file)
    files_dir = os.path.dirname(files)
    if not os.path.isdir(files_dir):
        os.makedirs(files_dir)

    print(f"Writing generated sentences to {files}.")
    utils.write_sentences(all_text_sentences, files)

    preprocessed_files = os.path.join("preprocessed_generations/",
                                      results_basename, args.pretrained_class,
                                      args.output_dir, output_file)
    preprocessed_files_dir = os.path.dirname(preprocessed_files)
    if not os.path.isdir(preprocessed_files_dir):
        os.makedirs(preprocessed_files_dir)

    print(f"Writing preprocessed sentences to {preprocessed_files}.")
    preprocessed_sentences, filtered_indicies, filtered_lengths = utils.preprocess_text(
        prefixed_text_sentences,
        tokenizer,
        lmin=args.preprocessed_min,
        lmax=args.preprocessed_max)
    utils.write_sentences(preprocessed_sentences, preprocessed_files)

    # update the reference file to be chunked to our size
    reference_file = args.eval_text
    chunked_reference_file = f"{reference_file}_seq:{args.max_seq_length}_min:{args.preprocessed_min}_max:{args.preprocessed_max}_prefix:{args.prefix_length}_model:{args.pretrained_class.replace('models/', '')}"
    if not os.path.exists(chunked_reference_file):
        utils.lock(chunked_reference_file)
        print("Reference lock acquired!")
        # begin critical section!
        utils.chunk_and_prefix_file(reference_file,
                                    tokenizer,
                                    args.preprocessed_min,
                                    args.preprocessed_max,
                                    chunked_reference_file,
                                    prefix_length=args.prefix_length)
        # end critical section!
        utils.unlock(chunked_reference_file)

    filtered_tokenizations = []
    filtered_logprobs = []
    for idx in filtered_indicies:
        filtered_tokenizations.append(all_sentences[idx])
        filtered_logprobs.append(all_logprobs[idx])
    filtered_tokenizations = torch.stack(filtered_tokenizations, dim=0)
    filtered_logprobs = torch.stack(filtered_logprobs, dim=0)

    del all_logprobs
    gc.collect()

    if args.eval_method == "BLEU":
        # use BLEU calculation
        smoothing_method = {"nist": SmoothingFunction().method3}
        for name, method in smoothing_method.items():
            scores[name] = utils.evaluate_bleu(
                files,
                chunked_reference_file,
                num_real_sentences=args.num_sentences,
                num_generated_sentences=args.num_sentences,
                gram=args.gram,
                smoothing_method=method,
                chunk_size=15)
            print()

        for name in smoothing_method.keys():
            results[name] = {}
            results[name]['scores'] = scores[name]

        results['nist']['scores'][
            'bleu5'] = results['nist']['scores']['bleu5'] * -1.0
        bleu = results['nist']['scores']['bleu5'] * -1.0
        sbleu = results['nist']['scores']['self-bleu5']
    else:
        raise Exception("We don't support other automatic metrics!")

    print("Results:", bleu, sbleu)

    ###################################################################################
    ############################# Result Reporting Section ############################
    ###################################################################################

    if not args.dry_run:
        results_file = os.path.join("results/", args.pretrained_class,
                                    args.results_file)
        results_dir = os.path.dirname(results_file)
        if not os.path.isdir(results_dir):
            os.makedirs(results_dir)
        utils.lock(results_file)
        print("Lock acquired!")

        # begin critical section!
        if os.path.exists(results_file):
            with open(results_file, "r+") as f:
                current = json.load(f)
        else:
            current = {}

        key = output_file[:-4]
        current[key] = results
        random_file = ''.join(
            random.SystemRandom().choice(string.ascii_uppercase +
                                         string.digits) for _ in range(10))
        random_file = os.path.join("results/", args.pretrained_class,
                                   random_file)
        with open(random_file, "w+") as f:
            json.dump(current, f)

        os.rename(random_file, results_file)

        # save generations
        saved_tokens_file = os.path.join("tokens/", results_basename,
                                         args.pretrained_class,
                                         args.output_dir, output_file)
        saved_tokens_dir = os.path.dirname(saved_tokens_file)
        if not os.path.isdir(saved_tokens_dir):
            os.makedirs(saved_tokens_dir)

        saved_tokens = {}
        saved_tokens['args'] = [
            vars(args),
            vars(subparsers[args.sampler].parse_known_args()[0])
        ]
        idx_offset = 1 if args.pretrained_class == "ctrl" else 0
        saved_tokens['with_prefix'] = all_sentences[:, idx_offset:].tolist()
        idx_offset += args.prefix_length
        saved_tokens['without_prefix'] = all_sentences[:, idx_offset:].tolist()

        with open("saved_tokens_file", "w+") as f:
            json.dump(saved_tokens, f)

        # save log probabilities
        preprocessed_logits = os.path.join("preprocessed_logprobs/",
                                           results_basename,
                                           args.pretrained_class,
                                           args.output_dir, output_file)
        preprocessed_logits_dir = os.path.dirname(preprocessed_logits)
        if not os.path.isdir(preprocessed_logits_dir):
            os.makedirs(preprocessed_logits_dir)

        d = {}
        print(filtered_logprobs.shape)
        for idx in range(filtered_logprobs.shape[0]):
            if preprocessed_sentences[idx] in d:
                raise Exception("Duplicate sentences found!")
            sent_id = hashlib.sha256(
                preprocessed_sentences[idx].encode()).hexdigest()
            d[sent_id] = {
                "model_score": filtered_logprobs[idx].item(),
                "lengths": filtered_lengths[idx] - args.prefix_length,
                "sentence": preprocessed_sentences[idx]
            }
        print("Avg log probabilities:",
              (filtered_logprobs /
               (torch.tensor(filtered_lengths) - args.prefix_length)).mean(
                   dim=0))

        with open(preprocessed_logits, "w") as f:
            json.dump(d, f)

        # create plot
        plots_file = os.path.join("plots/", args.pretrained_class,
                                  args.results_file)
        plots_dir = os.path.dirname(plots_file)
        if not os.path.isdir(plots_dir):
            os.makedirs(plots_dir)

        plot = plotter.Plotter(results_file)
        plot.plot_curves()
        if args.plot_gold:
            params = {
                "eval_method": args.eval_method,
                "chunk": args.max_seq_length,
                "ngram": args.gram,
                "knn": args.knn,
                "num_sentences": args.num_sentences
            }
            result = plot.plot_gold(params)
            if not result:
                # We don't have a proper score for our reference file, so let's go ahead and create it.
                params['gold_file'] = chunked_reference_file.replace(
                    "test", "valid")
                print(
                    f"Evaluating gold point on {params['gold_file']} with KNN={args.knn}"
                )
                params['num_sentences'] = args.num_sentences
                params['reference_corpus'] = chunked_reference_file
                params['chunk'] = args.max_seq_length
                params['eval_method'] = args.eval_method
                params['knn'] = args.knn
                params['gram'] = args.gram
                params['device'] = device
                score_gold(params)
                result = plot.plot_gold(params)

        plot.save(plots_file.replace(".json", ""))
        # end critical section!
        utils.unlock(results_file)