Ejemplo n.º 1
0
def loadReceipeModel():
    ingrs_vocab = pickle.load(
        open(os.path.join(data_dir, 'ingr_vocab.pkl'), 'rb'))
    vocab = pickle.load(open(os.path.join(data_dir, 'instr_vocab.pkl'), 'rb'))
    title = ''
    ingr_vocab_size = len(ingrs_vocab)
    instrs_vocab_size = len(vocab)
    output_dim = instrs_vocab_size
    t = time.time()
    import sys
    sys.argv = ['']
    del sys
    args = get_parser()
    args.maxseqlen = 15
    args.ingrs_only = False
    model = get_model(args, ingr_vocab_size, instrs_vocab_size)
    # Load the trained model parameters
    model_path = os.path.join(data_dir, 'modelbest.ckpt')
    model.load_state_dict(torch.load(model_path, map_location=map_loc))
    model.to(device)
    model.eval()
    model.ingrs_only = False
    model.recipe_only = False
    print('loaded model')
    print("Elapsed time:", time.time() - t)
    return model
Ejemplo n.º 2
0
def main():
    """
    Utility to perform actions on HTML

    Required arguments:
        -a | --action <action_name> : Action to perform. 2 valid options exist so far:
                             'retrieve-content' or 'strip-tags'
        -u | --url <url> : URL for the HTML content
    """
    exit_code = 0
    logs.init(logfile='html')
    ap = args_util.get_parser()
    ap.add_argument('-a',
                    '--action',
                    type=str,
                    help='Action to perform. 2 valid options exist so far: '
                    '\'retrieve-content\' or \'strip-tags\'')
    ap.add_argument('-u', '--url', type=str, help='URL for the HTML content')
    ap.add_argument('-t',
                    '--tag',
                    type=str,
                    default=None,
                    help='Optional HTML tag to strip and return the content')
    ap.add_argument('--text',
                    type=str,
                    default=True,
                    help='Optional text within HTML tags to strip')
    args = ap.parse_args()
    assert args.action, ('--action is required. The valid options are: '
                         '\'retrieve-content\' or \'strip-tags\'')
    assert args.url, '--url is required'

    try:
        html = retrieve_html(args.url)

        if html is None:
            print 'HTML was not retrieved'
            exit_code = 1
            exit(exit_code)

        if args.action == 'retrieve-content':
            contents = extract_content_and_language(html, url=args.url)

            if contents is None:
                print 'Content could not be retrieved. Check log.'
            else:
                for content in contents:
                    print content, ' = ', contents[content]
        elif args.action == 'strip-tags':
            print strip_tags(html, node_text=args.text, tag=args.tag)
        else:
            log.error('Invalid action provided: \'%s\'' % args.action)
            print 'Invalid action provided: \'%s\'' % args.action
            print 'Must be either \'retrieve-content\' or \'strip-tags\''
            exit_code = 2
    except Exception as e:
        log.exception('Exception encountered: %s' % e)
        exit_code = 1

    exit(exit_code)
Ejemplo n.º 3
0
def main():
    import warnings
    warnings.filterwarnings("ignore")
    #set_seeds()
    args = get_parser()
    os.environ['CUDA_VISIBLE_DEVICES'] = args.cuda_index
    print('CUDA', args.cuda_index)

    datasets = ('ENZYMES', 'DD', 'REDDIT-MULTI-12K', 'COLLAB', 'PROTEINS_full', 'REDDIT-BINARY')
    benchmark = datasets[0]
    args.dataset = benchmark

    now = datetime.now().strftime('%Y-%m-%d_%H:%M:%S')
    args.logdir = f'logs/{benchmark}/{now}'

    if not os.path.exists(f'{args.logdir}/checkpoints'):
        os.makedirs(f'{args.logdir}/checkpoints')

    ds = Dataset(name=benchmark, max_nodes=1000, num_folds=10)
    args.input_dim = ds.feat_dim
    args.output_dim = args.input_dim
    args.num_classes = ds.num_class
    args.num_centroids = [int(x) for x in args.num_centroids.split(',') if x.strip().isdigit()]

    val_accs = train(ds, args)
    args.mean_validation_accuracy = np.mean(val_accs)
    args.std_validation_accuracy = np.std(val_accs)
    args.best_fold = int(np.argmax(val_accs))
    args.best_validation_accuracy = np.max(val_accs)
    args.validation_accuracies = val_accs

    with open(f'{args.logdir}/summary.json', 'w') as f:
        json.dump(args.__dict__, f, indent=2)
def load_model():
    global model
    args = get_parser()
    args.maxseqlen = 15
    args.ingrs_only = False
    model = get_model(args, ingr_vocab_size, instrs_vocab_size)
    model.load_state_dict(torch.load(MODEL_PATH, map_location=map_loc))
    model.to(device)
    model.eval()
    model.ingrs_only = False
    model.recipe_only = False
    logger.debug("Loaded model")
Ejemplo n.º 5
0
def main():
    """
    Utility to perform actions on HTML

    Required arguments:
        -a | --action <action_name> : Action to perform. 2 valid options exist so far:
                             'retrieve-content' or 'strip-tags'
        -u | --url <url> : URL for the HTML content
    """
    exit_code = 0
    logs.init(logfile='html')
    ap = args_util.get_parser()
    ap.add_argument('-a', '--action', type=str,
                    help='Action to perform. 2 valid options exist so far: '
                         '\'retrieve-content\' or \'strip-tags\'')
    ap.add_argument('-u', '--url', type=str, help='URL for the HTML content')
    ap.add_argument('-t', '--tag', type=str, default=None,
                    help='Optional HTML tag to strip and return the content')
    ap.add_argument('--text', type=str, default=True,
                    help='Optional text within HTML tags to strip')
    args = ap.parse_args()
    assert args.action, ('--action is required. The valid options are: '
                         '\'retrieve-content\' or \'strip-tags\'')
    assert args.url, '--url is required'

    try:
        html = retrieve_html(args.url)

        if html is None:
            print 'HTML was not retrieved'
            exit_code = 1
            exit(exit_code)

        if args.action == 'retrieve-content':
            contents = extract_content_and_language(html, url=args.url)

            if contents is None:
                print 'Content could not be retrieved. Check log.'
            else:
                for content in contents:
                    print content, ' = ', contents[content]
        elif args.action == 'strip-tags':
            print strip_tags(html, node_text=args.text, tag=args.tag)
        else:
            log.error('Invalid action provided: \'%s\'' % args.action)
            print 'Invalid action provided: \'%s\'' % args.action
            print 'Must be either \'retrieve-content\' or \'strip-tags\''
            exit_code = 2
    except Exception as e:
        log.exception('Exception encountered: %s' % e)
        exit_code = 1

    exit(exit_code)
Ejemplo n.º 6
0
def test():
    import args
    p = args.get_parser()
    a = p.parse_args()
    d = init_domain(args=a)
    print d.name
    for i in d.select("select count(*) from %s" % (d.name,)):
        print i

    d = init_domain(domain='gsr_ingest', args=a)
    print d.name
    for i in d.select("select count(*) from %s" % (d.name,)):
        print i
Ejemplo n.º 7
0
def imread_indexed(filename):
  """ Load image given filename."""
  # Dataset configuration initialization
  parser = get_parser()
  args = parser.parse_args()

  if args.dataset == 'kittimots':
    im = Image.open(filename).convert('P')
  else:
    im = Image.open(filename)

  annotation = np.atleast_3d(im)[...,0]
  reshape = np.array(im.getpalette()).reshape(-1, 3)

  return annotation, reshape
Ejemplo n.º 8
0
    def __init__(self, ):

        transf_list = []
        transf_list.append(transforms.Resize(256))
        transf_list.append(transforms.CenterCrop(224))
        self.transform = transforms.Compose(transf_list)

        transf_list_batch = []
        transf_list_batch.append(transforms.ToTensor())
        transf_list_batch.append(
            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)))
        self.to_input_transf = transforms.Compose(transf_list_batch)

        data_dir = "data"
        # code will run in gpu if available and if the flag is set to True, else it will run on cpu
        use_gpu = False
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() and use_gpu else "cpu")
        map_loc = None if torch.cuda.is_available() and use_gpu else "cpu"

        self.ingrs_vocab = pickle.load(
            open(os.path.join(data_dir, "ingr_vocab.pkl"), "rb"))
        self.vocab = pickle.load(
            open(os.path.join(data_dir, "instr_vocab.pkl"), "rb"))

        ingr_vocab_size = len(self.ingrs_vocab)
        instrs_vocab_size = len(self.vocab)
        # output_dim = instrs_vocab_size

        args = get_parser()
        args.maxseqlen = 15
        args.ingrs_only = False
        model = get_model(args, ingr_vocab_size, instrs_vocab_size)
        # Load the trained model parameters
        model_path = os.path.join(data_dir, "modelbest.ckpt")
        model.load_state_dict(torch.load(model_path, map_location=map_loc))
        model.to(self.device)
        model.eval()
        model.ingrs_only = False
        model.recipe_only = False
        self.model = model
        print("loaded model")

        self.ingr_co2 = pickle.load(
            open(os.path.join(data_dir, "ingr_co2.pkl"), "rb"))
        self.ingr_alternatives = pickle.load(
            open(os.path.join(data_dir, "ingr_alt.pkl"), "rb"))
Ejemplo n.º 9
0
def g_model():
	data_dir = 'M:/Final Project/code/inversecooking-master/data'
	ingrs_vocab = pickle.load(open(os.path.join(data_dir, 'ingr_vocab.pkl'), 'rb'))
	vocab = pickle.load(open(os.path.join(data_dir, 'instr_vocab.pkl'), 'rb'))
	ingr_vocab_size = len(ingrs_vocab)
	instrs_vocab_size = len(vocab)
	output_dim = instrs_vocab_size


	import sys; sys.argv=['']; del sys
	args = get_parser()
	args.maxseqlen = 15
	args.ingrs_only=False
	model = get_model(args, ingr_vocab_size, instrs_vocab_size)
	# Load the trained model parameters
	model_path = os.path.join(data_dir, 'modelbest.ckpt')
	model.load_state_dict(torch.load(model_path, map_location='cpu'))
	model.to('cpu')
	model.eval()
	model.ingrs_only = False
	model.recipe_only = False
	return model
Ejemplo n.º 10
0
Archivo: test.py Proyecto: sintocos/CIB
import torch
from sklearn.manifold import TSNE
from torch.autograd import Variable
import PIL.ImageOps
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

#import self-built function
from LoadData import FaceData
from Net import MLP, VAE  #import network
from Train import ModelTrain, AverageMeter  #import trainer
from Helper import plotfunc, TraverseDataset  #import some helper function
from args import get_parser  #import args
#==================================================
mp = get_parser()
opts = mp.parse_args()
#==================================================


def test(test_loader, MLPModel, AEModel):
    losses = AverageMeter()
    top1 = AverageMeter()
    print("=> loading checkpoint '{}'".format(opts.model_path))
    checkpoint = torch.load(opts.model_path)
    opts.start_epoch = checkpoint['epoch']
    MLPModel.load_state_dict(checkpoint['MLPstate_dict'])
    AEModel.load_state_dict(checkpoint['AEstate_dict'])
    print("=> loaded checkpoint '{}' (epoch {})".format(
        opts.model_path, checkpoint['epoch']))
Ejemplo n.º 11
0
def main():

    import args
    import logs
    # preliminary stab at some useful command line functionality (only cluster is currently needed)
    ap = args.get_parser()
    ap.add_argument('-c',
                    '--cluster',
                    action="store_true",
                    help='Switch to output names of cluster nodes.')
    ap.add_argument(
        '--get',
        metavar='REQUEST',
        nargs='?',
        type=str,
        help=
        'Information to get from embers.conf [service|data|cluster|host|input|output]'
    )
    ap.add_argument('--s3only',
                    action="store_true",
                    help="Write debug messages.")
    ap.add_argument('--s3onlyprod',
                    action="store_true",
                    help="Write debug messages.")
    ap.add_argument('--prefixes',
                    action="store_true",
                    help="Write debug messages.")
    ap.add_argument('--prefixpairs',
                    action="store_true",
                    help="Write debug messages.")
    group = ap.add_mutually_exclusive_group()
    group.add_argument('--host',
                       metavar='HOST',
                       nargs='?',
                       type=str,
                       help='Name of the host to get information about')
    group.add_argument(
        '--data',
        metavar='DATA',
        nargs='?',
        type=str,
        help='Name of the data (queue, etc.) to get information about')

    arg = ap.parse_args()
    logs.init(arg)
    init(arg)

    if arg.get:
        assert arg.get in ('service', 'data', 'cluster', 'host', 'inputs',
                           'outputs', 'services'), 'Improper get request'
    try:
        if arg.s3only or arg.s3onlyprod:
            if arg.s3only:
                prodOnly = False
            else:
                prodOnly = True
            plst = get_all_s3only(prodOnly)
            if plst:
                print string.join(plst)
            return

        if arg.prefixpairs:
            plst, qlst = get_all_prefixpairs()
            for i in range(len(plst)):
                print "%s %s" % (qlst[i], plst[i])
            return

        if arg.prefixes:
            plst = get_all_prefixes()
            for p in plst:
                print p
            return

        if arg.host:
            assert arg.host in conf.get(
                'cluster'), 'Host is not listed in embers.conf'
            if arg.get == 'services':
                print conf.get('cluster')[arg.host]['services']
            if arg.get == 'data':
                print[
                    data for service in conf['cluster'][arg.host]['services']
                    for data in conf['services'][service]['outputs']
                ]
        if arg.data:
            assert arg.data in conf.get(
                'data'), 'Data is not listed in embers.conf'
            (host, service) = get_host_and_service(arg.data)
            if arg.get == 'host':
                print host
            if arg.get == 'service':
                print service
        if arg.service:
            assert arg.service in conf.get(
                'services'), 'Service is not listed in embers.conf'
            if arg.get == 'host':
                print[
                    host for host in conf['cluster']
                    if arg.service in conf['cluster'][host]['services']
                ]
            if arg.get == 'inputs':
                print conf['services'][arg.service]['inputs']
            if arg.get == 'outputs':
                print conf['services'][arg.service]['outputs']

        if arg.cluster or arg.get == 'cluster':
            print ' '.join(conf.get('cluster', ''))
        if arg.get == 'data' and not (arg.service or arg.data or arg.host):
            print ' '.join(conf.get('data', ''))
        if arg.get == 'host' and not (arg.service or arg.data or arg.host):
            print ' '.join(conf.get('cluster', ''))
        if arg.get == 'service' and not (arg.service or arg.data or arg.host):
            print ' '.join(conf.get('services', ''))

    except Exception, e:
        log.error("Requested information not in embers.conf, error %s", e)
Ejemplo n.º 12
0
import numpy as np
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
import torch.backends.cudnn as cudnn
from data_loader import ImagerLoader 
from trijoint import im2recipe

# =============================================================================
import args
parser = args.get_parser()
opts = parser.parse_args()
data_path, snapshots, logdir = args.show_train_opts(opts) # create suffix-added path
# =============================================================================
from torch.utils.tensorboard import SummaryWriter
dt_now = snapshots.split('/')[-1]
train_writer = SummaryWriter(os.path.join(logdir, "train_" + dt_now))
val_writer = SummaryWriter(os.path.join(logdir, "val_" + dt_now))
# =============================================================================

if not(torch.cuda.device_count()):
    device = torch.device(*('cpu',0))
else:
    torch.cuda.manual_seed(opts.seed)
    device = torch.device(*('cuda',0))
Ejemplo n.º 13
0
import numpy as np
import os
import time

import torch
from torch import nn, optim
from tensorboard_logger import configure, log_value

from ntm import NTM
from ntm.datasets import CopyDataset, RepeatCopyDataset, AssociativeDataset, NGram, PrioritySort
from args import get_parser
from marnn import *
from dnc import DNC
from dnc.sam import SAM

args = get_parser().parse_args()
print("args:\n",args)

configure("runs/")
print('name:',args.name)

# ----------------------------------------------------------------------------
# -- initialize datasets, model, criterion and optimizer
# ----------------------------------------------------------------------------

'''
'''


if args.task=='copy':
    args.task_json = 'ntm/tasks/copy.json'
with open("./run_and_start.sh", "r") as f:
    command = re.sub(
        r'python.*?\n', '', f.read()
    )  # TODO is this really the best way to be compliant with the fork's parent?


def roscore():
    proc = subprocess.Popen(command,
                            shell=True,
                            executable='/bin/bash',
                            universal_newlines=True)
    proc.wait()
    print("\n\n!!!\nROSCORE CRASHED. EXITING NOW!\n!!!\n\n", file=sys.stderr)
    exit(-128)


thread = threading.Thread(target=roscore)
thread.start()

parsed = get_parser().parse_args()
print(parsed)

if get_parser().parse_args().test:
    os.environ["SOLUTION_TEST"] = "1"
    print("\n\n\n\n\n\n\n\nTESTING\n\n\n\n\n\n\n\n")
    import test
else:
    print("\n\n\n\n\n\n\n\nTRAINING\n\n\n\n\n\n\n\n")
    os.environ["SOLUTION_TEST"] = "0"
    import train
Ejemplo n.º 15
0
def davis_toolbox_evaluation(output_dir, eval_split, skip_F=1, title=None):
    """
    Use default DAVIS toolbox to evaluate the performance (J, F)
    Print the result on the table
    Save the result into a yaml file

    Args:
        output_dir: the folder includes all final annotations
    """
    from dmm.dataloader import db_eval, Segmentation
    from dmm.dataloader.youtubeVOS import YoutubeVOSLoader
    from dmm.misc.config_youtubeVOS import phase, cfg
    from args import get_parser

    phase = phase.VAL

    parser = get_parser()
    args = parser.parse_args()
    db = YoutubeVOSLoader(args, split=eval_split)

    print('Loading video segmentations from: {}'.format(output_dir))
    # Load segmentation
    segmentations = [
        Segmentation('trainval', osp.join(output_dir, s), False)
        for s in db.iternames()
    ]
    # Evaluate results
    if skip_F:
        evaluation = db_eval(db, segmentations, ['J'])
        JF = ['J']
    else:
        evaluation = db_eval(db, segmentations, ['J', 'F'])
        JF = ['J', 'F']
    # Print results
    table = PrettyTable(['Method'] + [
        p[0] + '_' + p[1]
        for p in itertools.product(JF, ['mean', 'recall', 'decay'])
    ])
    table.add_row([osp.basename(output_dir)] + [
        "%.3f" % np.round(evaluation['dataset'][metric][statistic], 3) for
        metric, statistic in itertools.product(JF, ['mean', 'recall', 'decay'])
    ])
    print(str(table) + "\n")
    # Save results into yaml file
    with open(osp.join(output_dir, 'davis_eval_results.yaml'), 'w') as f:
        yaml.dump(evaluation, f)
    tim = '{}'.format(time.strftime('%m-%d-%H'))
    print(tim)

    headers = ['Method'] + [
        p[0] + '_' + p[1]
        for p in itertools.product(JF, ['mean', 'recall', 'decay'])
    ]
    if title is None:
        title = output_dir
    tables = ["{}".format(title)] + [
        "%.3f" % np.round(evaluation['dataset'][metric][statistic], 3) for
        metric, statistic in itertools.product(JF, ['mean', 'recall', 'decay'])
    ]
    print(tables)
    info = '\n{}\n'.format(tabulate([tables], headers, tablefmt="github"))
    print(info)
    f = open("EXPERIMENT.md", "a")
    f.write(info)
    f.close()
def main():
    parser = get_parser("generation", MODEL_CLASSES, ALL_MODELS)
    args = parser.parse_args()

    if args.local_rank == -1:
        args.device = torch.device("cuda" if torch.cuda.is_available()
                                   and not args.no_cuda else "cpu")
        args.n_gpu = torch.cuda.device_count()
    else:
        torch.cuda.set_device(args.local_rank)
        args.device = torch.device("cuda", args.local_rank)
        torch.distributed.init_process_group(backend='nccl')
        args.n_gpu = 1

    logging.basicConfig(
        format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
        datefmt='%m/%d/%Y %H:%M:%S',
        level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN)
    logger.warning(
        "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s",
        args.local_rank, args.device, args.n_gpu, bool(args.local_rank != -1))

    set_seed(args)

    if args.local_rank not in [-1, 0]:
        torch.distributed.barrier()

    args.model_type = args.model_type.lower()
    model_class, tokenizer_class = MODEL_CLASSES[args.model_type]

    gpt2_model, tokenizer = init_gpt2_model(
        checkpoint_dir=args.model_name_or_path,
        args=args,
        model_class=model_class,
        tokenizer_class=tokenizer_class)
    gpt2_model.eval()

    if args.local_rank == 0:
        torch.distributed.barrier()

    config = gpt2_model.gpt2.config

    if args.length < 0 and config.max_position_embeddings > 0:
        args.length = config.max_position_embeddings
    elif 0 < config.max_position_embeddings < args.length:
        args.length = config.max_position_embeddings  # No generation bigger than model size
    elif args.length < 0:
        args.length = MAX_LENGTH  # avoid infinite loop

    logger.info(args)

    if args.local_rank not in [-1, 0]:
        torch.distributed.barrier(
        )  # Barrier to make sure only the first process in distributed training process the dataset, and the others will use the cache

    eval_dataset = load_and_cache_examples(args, tokenizer)

    if args.local_rank == 0:
        torch.distributed.barrier()

    eval_sampler = SequentialSampler(
        eval_dataset) if args.local_rank == -1 else DistributedSampler(
            eval_dataset, shuffle=False)
    eval_dataloader = DataLoader(eval_dataset,
                                 sampler=eval_sampler,
                                 batch_size=args.per_gpu_eval_batch_size)

    output_log = {
        "true_text": [],
        "generated_text": [],
        "context": [],
        "recall_score": [],
        "context_suffix_styles": [],
        "original_styles": [],
        "metadata": []
    }

    for batch in tqdm(eval_dataloader,
                      desc="Evaluating",
                      disable=args.local_rank not in [-1, 0]):
        sentences = batch["sentence"].to(args.device)
        segments = batch["segment"].to(args.device)
        global_dense_vectors = batch["global_dense_vectors"].to(args.device)
        suffix_styles = batch["suffix_style"]
        original_styles = batch["original_style"]
        metadata = batch["metadata"]

        # Assume init_context_size is same for all examples in minibatch
        init_context_size = batch["init_context_size"][0].item()

        out, dense_length, scores = gpt2_model.generate(
            gpt2_sentences=sentences,
            segments=segments,
            eos_token_id=tokenizer.eos_token_id,
            global_dense_vectors=global_dense_vectors,
            init_context_size=init_context_size)

        for sent_num in range(sentences.shape[0]):
            output_sequence = out[sent_num][init_context_size:].tolist()

            if tokenizer.eos_token_id in output_sequence:
                output_sequence = output_sequence[:output_sequence.index(
                    tokenizer.eos_token_id)]

            true_text = tokenizer.decode(
                sentences[sent_num, init_context_size:].tolist(),
                clean_up_tokenization_spaces=True,
                skip_special_tokens=True)
            generated_text = tokenizer.decode(
                output_sequence,
                clean_up_tokenization_spaces=True,
                skip_special_tokens=True)

            context = tokenizer.decode(
                sentences[sent_num, :init_context_size].tolist(),
                clean_up_tokenization_spaces=True,
                skip_special_tokens=True)
            recall_score = recall(true_text, context)

            output_log["true_text"].append(true_text)
            output_log["generated_text"].append(generated_text)
            output_log["context"].append(context)
            output_log["recall_score"].append("%.4f" % recall_score)
            output_log["metadata"].append(metadata[sent_num])

            if hasattr(eval_dataset, "reverse_label_dict"):
                output_log["context_suffix_styles"].append(
                    class_number_to_str(eval_dataset, suffix_styles[sent_num]))
                output_log["original_styles"].append(
                    class_number_to_str(eval_dataset,
                                        original_styles[sent_num]))
            else:
                output_log["context_suffix_styles"].append("<none>")
                output_log["original_styles"].append("<none>")

    with open(
            os.path.join(args.generation_output_dir,
                         "reference_%d.txt" % max(args.local_rank, 0)),
            "w") as f:
        f.write("\n".join(output_log["true_text"]) + "\n")

    with open(
            os.path.join(args.generation_output_dir,
                         "generated_%d.txt" % max(args.local_rank, 0)),
            "w") as f:
        f.write("\n".join(output_log["generated_text"]) + "\n")

    with open(
            os.path.join(args.generation_output_dir,
                         "context_%d.txt" % max(args.local_rank, 0)),
            "w") as f:
        f.write("\n".join(output_log["context"]) + "\n")

    with open(
            os.path.join(args.generation_output_dir,
                         "recall_score_%d.txt" % max(args.local_rank, 0)),
            "w") as f:
        f.write("\n".join(output_log["recall_score"]) + "\n")

    with open(
            os.path.join(
                args.generation_output_dir,
                "context_suffix_styles_%d.txt" % max(args.local_rank, 0)),
            "w") as f:
        f.write("\n".join(output_log["context_suffix_styles"]) + "\n")

    with open(
            os.path.join(args.generation_output_dir,
                         "original_styles_%d.txt" % max(args.local_rank, 0)),
            "w") as f:
        f.write("\n".join(output_log["original_styles"]) + "\n")

    with open(
            os.path.join(args.generation_output_dir,
                         "metadata_%d.txt" % max(args.local_rank, 0)),
            "w") as f:
        f.write("\n".join(output_log["metadata"]) + "\n")
Ejemplo n.º 17
0
from torchvision import transforms
from torch.autograd import Variable
import numpy as np
from PIL import Image
import torchvision.transforms.functional as TF
import random

import transformers

import h5py
from refer.refer import REFER

from args import get_parser

# Dataset configuration initialization
parser = get_parser()
args = parser.parse_args()


class ReferDataset(data.Dataset):
    def __init__(self,
                 args,
                 input_size,
                 image_transforms=None,
                 target_transforms=None,
                 split='train',
                 eval_mode=False):

        self.classes = []
        self.input_size = input_size
        self.image_transforms = image_transforms
Ejemplo n.º 18
0
    status = proc.wait()
    if status != 0:
        print(proc.stdout.read().decode("ascii").strip().split("\n")[-1])


def login(username):
    password = getpass.getpass()
    url = urls.auth()
    args = {"response_type": "token", "client_id": clientid(), "redirect_uri": "http://localhost", "scope": "user_read"}
    response = requests.get(url, params=args)
    print(response.status_code)
    print(response.headers)
    print(response.text)


parser = args.get_parser()
args = parser.parse_args()
print(args)
if args.top:
    printtopgames(args.top)
elif args.game:
    printgamestreams(args.game)
elif args.stream:
    openlivestreamer(args.stream)
elif args.follows:
    print(args.follows)
elif args.user:
    login(args.user)
else:
    parser.print_usage()
Ejemplo n.º 19
0
#-*- coding:UTF-8 -*-

import torch
import torch.nn as nn
import torch.optim as optim
import os
import shutil
import time
import numpy as np
from torch.autograd import Variable

from model import imvstxt
from args import get_parser

#=====================================================
parse = get_parser()
opts = parse.parse_args()


#=====================================================
def adjust_learning_rate(optimizer, epoch, opts):
    """Switching between modalities"""
    # parameters corresponding to the rest of the network
    optimizer.param_groups[0]['lr'] = opts.lr * opts.freeRecipe
    # parameters corresponding to visionMLP
    optimizer.param_groups[1]['lr'] = opts.lr * opts.freeVision

    print('Initial base params lr: %f' % optimizer.param_groups[0]['lr'])
    print('Initial vision lr: %f' % optimizer.param_groups[1]['lr'])

    # after first modality change we set patience to 3
Ejemplo n.º 20
0
def main(dir_file, image_folder, demo_path, lights):
    use_gpu = True

    device = torch.device(
        'cuda' if torch.cuda.is_available() and use_gpu else 'cpu')
    map_loc = None if torch.cuda.is_available() and use_gpu else 'cpu'

    ingrs_vocab = pickle.load(
        open(os.path.join(dir_file, 'recipe1m_vocab_unit.pkl'), 'rb'))
    ingr_vocab_size = len(ingrs_vocab)

    t = time.time()
    import sys
    sys.argv = ['']
    del sys
    args = get_parser()
    args.maxseqlen = 15
    args.ingrs_only = False
    model = get_model(args, ingr_vocab_size)

    # Load the trained model parameters
    model_dir = '/home/r8v10/git/InvCo/dataset/new_model/inversecooking/model/checkpoints'
    #     model_dir = F'{ROOT_DIR}/dataset/model/inversecooking/model/checkpoints'
    model_path = os.path.join(model_dir, 'modelbest.ckpt')
    model.load_state_dict(torch.load(model_path, map_location=map_loc))
    model.to(device)
    model.eval()
    model.ingrs_only = False
    model.recipe_only = False
    print('loaded model')
    print("Elapsed time:", time.time() - t)

    transf_list_batch = []
    transf_list_batch.append(transforms.ToTensor())
    transf_list_batch.append(
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)))
    to_input_transf = transforms.Compose(transf_list_batch)

    # set to true to load images from demo_urls instead of those in test_imgs folder
    use_urls = False
    #if True, it will show the recipe even if it's not valid
    show_anyways = True

    if use_urls:
        response = requests.get(demo_path)
        image = Image.open(BytesIO(response.content))
    else:
        image_path = os.path.join(image_folder, demo_path)
        image = Image.open(image_path).convert('RGB')

    # print('Data path:', image_path)

    transf_list = []
    transf_list.append(transforms.Resize(256))
    transf_list.append(transforms.CenterCrop(224))
    transform = transforms.Compose(transf_list)

    image_transf = transform(image)
    image_tensor = to_input_transf(image_transf).unsqueeze(0).to(device)

    num_valid = 1
    temperature = 1.0
    # greedy = [True, False, False, False]
    # beam = [-1, -1, -1, -1]

    while True:
        with torch.no_grad():
            outputs = model.sample(image_tensor,
                                   greedy=False,
                                   temperature=temperature,
                                   beam=-1,
                                   true_ingrs=None)

        recipe_ids = outputs['recipe_ids'].cpu().numpy()

        outs, valid = prepare_output(recipe_ids[0], ingrs_vocab)
        num_valid += 1

        if valid['is_valid'] or show_anyways:
            if valid['reason'] == 'All ok.':
                # print ('RECIPE', num_valid)

                # BOLD = '\033[1m'
                # END = '\033[0m'
                # print (BOLD + '\nTitle:' + END,outs['title'])

                # print (BOLD + '\nInstructions:'+END)
                # print ('-'+'\n-'.join(outs['recipe']))
                # print ('='*20)

                #print ("Reason: ", valid['reason'])
                break
    recommend_id, recommend_lights = search(dir_file, outs['recipe'], lights)
    recommend_title, recommend_url = get_recipe(dir_file, recommend_id)

    # print('Recommendation of recipe:', recommend_id)
    # print('Title:', recommend_title)
    # print('Lights:', recommend_lights)
    # print('url:', recommend_url)

    return outs['title'], outs[
        'recipe'], recommend_lights, recommend_title, recommend_url
Ejemplo n.º 21
0
#-*- coding:UTF-8 -*-
import time
import torch
import shutil
import numpy as np 
import torch.nn as nn
import torchvision 
import torch.utils.data 
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F 
from args import get_parser
from Deidentification import Scrambling #
from Loss import CenterLoss,RankingLoss
#================================================
myparser=get_parser()
opts=myparser.parse_args()
#================================================

class AverageMeter(object):
    """Compute and store the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val=0
        self.avg=0
        self.sum=0
        self.count=0

    def update(self,val,n=1):
Ejemplo n.º 22
0
def main():
    """
    A little utility to handle reading and writing streams
    to and from a queue.
    --pub <queue> : publish what's read from stdin to <queue>
    --sub <queue> : read from <queue> and write the messages to stdout
    --cat         : when used with --pub, write all published messages to stdout
    --clean       : check in incoming and outgoing messages.
                    Verify the message is correct JSON and add
                    an embersId if needed.
    --log_file    : Path to write the log file to
    --log_level   : Logging level
    Other standard EMBERS options (e.g. --verbose).
    """
    import args
    import message
    global log

    ap = args.get_parser()
    ap.add_argument(
        '--clean',
        action="store_true",
        help='Verify message format and add standard fields such as embersId.')
    ap.add_argument('--addfeed',
                    action="store_true",
                    help='Add feed and feedPath fields to published message.')
    ap.add_argument('--cat',
                    action="store_true",
                    help='Write all published messages to stdout.')
    ap.add_argument('--rm', nargs="+", help="delete queue")
    arg = ap.parse_args()
    log = logs.getLogger(log_name=arg.log_file)
    logs.init(arg, l=arg.log_level, logfile=arg.log_file)
    init(arg)

    if arg.rm and not arg.sub:
        for queue in arg.rm:
            print "Deleting", queue,
            queue = ikqueue.Queue(queue)
            queue.maybe_bind(connect())
            queue.delete()
            print "."
        return
    try:
        # need to use the raw/utf handler unless we are doing clean
        marshal = UnicodeMarshal()
        if arg.clean or arg.addfeed:
            marshal = JsonMarshal()

        if arg.sub is None and os.environ.get('UPSTART_JOB') is None:
            arg.sub = '-'  # stdin

        subq = open(
            arg.sub,
            'r')  #, marshal=marshal, ssh_key=arg.ssh_key, ssh_conn=arg.tunnel)

        if arg.pub is None and os.environ.get('UPSTART_JOB') is None:
            arg.pub = '-'  # stdout

        pubq = open(arg.pub, 'w', capture=arg.cat, marshal=marshal)
    except Exception as e:
        log.exception("Exception opening queues: %s" % e)

    # "Human-readable" queue name can be retrieved as
    #
    # sname = subq.get_name()
    # pname = pubq.get_name()
    rc = 0
    try:
        it = subq.__iter__()
        while True:
            m = ''
            try:
                m = it.next()
                if arg.clean:
                    m = message.clean(m)

                if m:
                    if arg.addfeed:
                        m = message.add_embers_ids(m,
                                                   feed=pubq.get_name(),
                                                   feedPath=pubq.get_name())
                    pubq.write(m)
            except StopIteration:
                break
            except KeyboardInterrupt:
                break
            except Exception as e:
                rc += 1
                if m:
                    log.exception('Could not process message %s: %s' % (m, e))
                else:
                    log.exception('Unknown processing error %s' % e)
    except KeyboardInterrupt:
        pass
    except Exception as e:
        rc = 1
        log.exception('Top level exception %s' % e)

    return rc
Ejemplo n.º 23
0
Archivo: main.py Proyecto: facss/CIB
def main():
    mp = get_parser()  #argv[0]:dataset. argv[1]:scramble type
    opts = mp.parse_args()
    #pltfunc=plotfunc()#plot image

    ###################################### 0.Image Scrambling and preprocess  images ###########################
    if opts.dataset_name == 'Yale':
        dataParams = YaleParams
        Scrambleimglist, class_number = TraverseDataset(
            dataParams.dataset_dir,
            opts.n).preprocessYaleDataset(opts.scramble_method)  # Yale
    elif opts.dataset_name == 'ORL':
        dataParams = ORLParams
        Scrambleimglist, class_number = TraverseDataset(
            dataParams.dataset_dir,
            opts.n).preprocessORLDataset(opts.scramble_method)  #ORL
    elif opts.dataset_name == 'CMUPIE':
        dataParams = CMUPIEParams
        Scrambleimglist, class_number = TraverseDataset(
            dataParams.dataset_dir,
            opts.n).preprocessCMUPIEDataset(opts.scramble_method)  #CMUPIE
    else:
        dataParams = PUBFIGParams
        Scrambleimglist, class_number = TraverseDataset(
            dataParams.dataset_dir,
            opts.n).preprocessPUBFIG83Dataset(opts.scramble_method)  # PUBFIG

    opts.num_classes = class_number
    print('Data Image Scrambling transforms done. Class number is :{}'.format(
        class_number))

    ###################################### 1.Data Loader#################################################

    VAE_traindata = FaceData(
        Scrambleimglist,
        class_number,
        dataParams,
        mode='VAEtrain',
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([.5, .5, .5],
                                 [.5, .5, .5]),  #normalization        
        ]),
        should_invert=False)
    print('VAE train data loaded done')

    MLP_traindata = FaceData(Scrambleimglist,
                             class_number,
                             dataParams,
                             mode='mlptrain',
                             transform=transforms.Compose([
                                 transforms.ToTensor(),
                                 transforms.Normalize([.5, .5, .5],
                                                      [.5, .5, .5]),
                             ]),
                             should_invert=False)
    print('MLP train data loaded done')

    MLP_valdata = FaceData(Scrambleimglist,
                           class_number,
                           dataParams,
                           mode='mlpvalidate',
                           transform=transforms.Compose([
                               transforms.ToTensor(),
                               transforms.Normalize([.5, .5, .5],
                                                    [.5, .5, .5]),
                           ]),
                           should_invert=False)
    print('MLP validate data loaded done')

    MLP_testdata = FaceData(Scrambleimglist,
                            class_number,
                            dataParams,
                            mode='mlptest',
                            transform=transforms.Compose([
                                transforms.ToTensor(),
                                transforms.Normalize([.5, .5, .5],
                                                     [.5, .5, .5]),
                            ]),
                            should_invert=False)
    print('MLP test data loaded done')

    VAE_train_loader = DataLoader(VAE_traindata,
                                  shuffle=True,
                                  num_workers=opts.num_workers,
                                  batch_size=opts.VAE_training_batch_size)
    MLP_train_loader = DataLoader(MLP_traindata,
                                  shuffle=True,
                                  num_workers=opts.num_workers,
                                  batch_size=opts.MLP_training_batch_size)
    MLP_val_loader = DataLoader(MLP_valdata,
                                shuffle=True,
                                num_workers=opts.num_workers,
                                batch_size=opts.MLP_validate_batch_size)
    MLP_test_loader = DataLoader(MLP_testdata,
                                 shuffle=True,
                                 num_workers=opts.num_workers,
                                 batch_size=opts.MLP_test_batch_size)
    ###################################### 2. Model ############################################################
    opts.cuda = torch.cuda.is_available()

    inputsize = Scrambleimglist[0][0].size[0]  #image size
    MLPModel = MLP(
        inputsize * int(inputsize / 16), dataParams.num_classes
    )  #Yale is 15,ORL face class number is 40,CMUPIE class number is 68,PUBFIG is 83
    VAEModel = VAE(inputsize)
    print(VAEModel)

    torch.manual_seed(opts.seed)
    if opts.cuda:
        MLPModel = MLPModel.cuda()
        VAEModel = VAEModel.cuda()
        torch.cuda.manual_seed(opts.seed)

    MLPModel.apply(weights_init)  #weight initial
    VAEModel.apply(weights_init)  #weight initial

    print('model done.')
    ####################################### 3.Optimizer ################################################################
    MLPOptimizer = optim.Adam(MLPModel.parameters(),
                              lr=opts.lr)  # optimzer4mlp
    VAEOptimizer = optim.Adam(VAEModel.parameters(),
                              lr=opts.lr)  # optimzer4vae
    best_val = 0
    print('optimizer done.')

    #################################################### 4.training#####################################################
    modelTrain = ModelTrain(opts, VAEModel, MLPModel, VAE_train_loader,
                            MLP_train_loader, MLP_val_loader, MLP_test_loader,
                            VAEOptimizer, MLPOptimizer)

    VAEcount = modelTrain.VAEtrain()
    mlpcount = modelTrain.mlptrain(best_val)
Ejemplo n.º 24
0
def main():
    """
    A little utility to handle reading and writing streams
    to and from a queue.
    --pub <queue> : publish what's read from stdin to <queue>
    --sub <queue> : read from <queue> and write the messages to stdout
    --cat         : when used with --pub, write all published messages to stdout
    --clean       : check in incoming and outgoing messages.
                    Verify the message is correct JSON and add
                    an embersId if needed.
    --log_file    : Path to write the log file to
    --log_level   : Logging level
    Other standard EMBERS options (e.g. --verbose).
    """
    import args
    import message
    global log

    ap = args.get_parser()
    ap.add_argument('--clean', action="store_true",
                    help='Verify message format and add standard fields such as embersId.')
    ap.add_argument('--addfeed', action="store_true", help='Add feed and feedPath fields to published message.')
    ap.add_argument('--cat', action="store_true", help='Write all published messages to stdout.')
    ap.add_argument('--rm', nargs="+", help="delete queue")
    arg = ap.parse_args()
    log = logs.getLogger(log_name=arg.log_file)
    logs.init(arg, l=arg.log_level, logfile=arg.log_file)
    init(arg)

    if arg.rm and not arg.sub:
        for queue in arg.rm:
            print "Deleting", queue,
            queue = ikqueue.Queue(queue)
            queue.maybe_bind(connect())
            queue.delete()
            print "."
        return
    try:
        # need to use the raw/utf handler unless we are doing clean
        marshal = UnicodeMarshal()
        if arg.clean or arg.addfeed:
            marshal = JsonMarshal()

        if arg.sub is None and os.environ.get('UPSTART_JOB') is None:
            arg.sub = '-'  # stdin

        subq = open(arg.sub, 'r') #, marshal=marshal, ssh_key=arg.ssh_key, ssh_conn=arg.tunnel)

        if arg.pub is None and os.environ.get('UPSTART_JOB') is None:
            arg.pub = '-'  # stdout

        pubq = open(arg.pub, 'w', capture=arg.cat, marshal=marshal)
    except Exception as e:
        log.exception("Exception opening queues: %s" % e)

    # "Human-readable" queue name can be retrieved as
    #
    # sname = subq.get_name()
    # pname = pubq.get_name()
    rc = 0
    try:
        it = subq.__iter__()
        while True:
            m = ''
            try:
                m = it.next()
                if arg.clean:
                    m = message.clean(m)

                if m:
                    if arg.addfeed:
                        m = message.add_embers_ids(m, feed=pubq.get_name(), feedPath=pubq.get_name())
                    pubq.write(m)
            except StopIteration:
                break
            except KeyboardInterrupt:
                break
            except Exception as e:
                rc += 1
                if m:
                    log.exception('Could not process message %s: %s' % (m, e))
                else:
                    log.exception('Unknown processing error %s' % e)
    except KeyboardInterrupt:
        pass
    except Exception as e:
        rc = 1
        log.exception('Top level exception %s' % e)

    return rc
Ejemplo n.º 25
0
def main():
    """A little utility to handle reading and writing streams 
    to and from a queue.
    --pub <queue> : publish what's read from stdin to <queue>
    --sub <queue> : read from <queue> and write the messages to stdout
    --cat         : when used with --pub, write all published messages to stdout
    --clean       : check in incoming and outgoing messages. 
                    Verify the message is correct JSON and add
                    an embersId if needed.
    Other standard EMBERS options (e.g. --verbose).
    """
    import args
    import logs
    import message

    ap = args.get_parser()
    ap.add_argument('--clean', action="store_true",
                    help='Verify message format and add standard fields such as embersId.')
    ap.add_argument('--cat', action="store_true",
                    help='Write all published messages to stdout.')
    arg = ap.parse_args()
    logs.init(arg)
    init(arg)
    assert arg.sub or arg.pub, "Need to subscribe or publish to something."

    # need to use the raw/utf handler unless we are doing clean
    marshal = UnicodeMarshal()
    if arg.clean:
        marshal = JsonMarshal()
    
    subq = None
    if arg.sub: # read a queue
        subq = open(arg.sub, 'sub', marshal=marshal, ssh_key=arg.ssh_key, ssh_conn=arg.tunnel)
    else: # read stdin
        subq = StreamQueue(sys.stdin, marshal=marshal)

    pubq = None
    if arg.pub: # send to queue
        pubq = open(arg.pub, 'pub', capture=arg.cat, marshal=marshal)
    else: # send to stdout
        pubq = StreamQueue(sys.stdout, mode='w', marshal=marshal)

    rc = 0
    try:
        it = subq.__iter__()
        while True:
            m = ''
            try:
                m = it.next()
                if arg.clean:
                    m = message.clean(m)
                    
                if m:
                    pubq.write(m)
                    
            except StopIteration:
                break

            except KeyboardInterrupt:
                break

            except:
                rc += 1
                if m:
                    log.exception('Could not process message %s' % (m,))
                else:
                    log.exception('Unknown processing error')

    except KeyboardInterrupt:
        pass

    except:
        rc = 1
        log.exception('Top level exception')

    return rc
Ejemplo n.º 26
0
def main():
    parser = get_parser("finetuning")
    args = parser.parse_args()

    if (os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train and not args.overwrite_output_dir):
        raise ValueError("Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome.".format(args.output_dir))

    # Setup CUDA, GPU & distributed training
    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
        args.n_gpu = torch.cuda.device_count()
    else:  # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        torch.distributed.init_process_group(backend='nccl')
        args.n_gpu = 1
    args.device = device

    # Setup logging
    logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                        datefmt='%m/%d/%Y %H:%M:%S',
                        level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN)
    logger.warning("Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
                   args.local_rank, device, args.n_gpu, bool(args.local_rank != -1), args.fp16)

    # Set seed
    set_seed(args)

    # Load pretrained model and tokenizer
    if args.local_rank not in [-1, 0]:
        # Barrier to make sure only the first process in distributed training download model & vocab
        torch.distributed.barrier()

    config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type]
    config = config_class.from_pretrained(args.config_name if args.config_name else args.model_name_or_path,
                                          cache_dir=args.cache_dir if args.cache_dir else None)
    # Adding an extra embedding dimension for style/content vectors
    config.extra_embedding_dim = args.extra_embedding_dim
    tokenizer = tokenizer_class.from_pretrained(args.tokenizer_name if args.tokenizer_name else args.model_name_or_path,
                                                do_lower_case=args.do_lower_case,
                                                cache_dir=args.cache_dir if args.cache_dir else None)

    model = model_class.from_pretrained(args.model_name_or_path,
                                        from_tf=bool('.ckpt' in args.model_name_or_path),
                                        config=config,
                                        cache_dir=args.cache_dir if args.cache_dir else None)
    tokenizer.add_special_tokens(SPECIAL_TOKENS)
    model.resize_token_embeddings(len(tokenizer))

    model.to(args.device)

    gpt2_model = GPT2ParentModule(args=args, gpt2=model)

    if args.local_rank == 0:
        torch.distributed.barrier()  # End of barrier to make sure only the first process in distributed training download model & vocab

    logger.info("Training/evaluation parameters %s", args)

    # Training
    if args.do_train:
        if args.local_rank not in [-1, 0]:
            torch.distributed.barrier()  # Barrier to make sure only the first process in distributed training process the dataset, and the others will use the cache

        train_dataset = load_and_cache_examples(args, tokenizer, evaluate=False)

        if args.local_rank == 0:
            torch.distributed.barrier()

        global_step, tr_loss = train(args, gpt2_model, train_dataset, tokenizer)
        logger.info(" global_step = %s, average loss = %s", global_step, tr_loss)

    if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0):

        output_dir = os.path.join(args.output_dir, 'checkpoint-{}'.format(global_step))
        if not os.path.exists(output_dir) and args.local_rank in [-1, 0]:
            os.makedirs(output_dir)
        save_model(gpt2_model, output_dir, args, global_step, tokenizer)

        gpt2_model, tokenizer = init_gpt2_model(checkpoint_dir=args.output_dir,
                                                args=args,
                                                model_class=model_class,
                                                tokenizer_class=tokenizer_class)

    # Evaluation
    if args.do_eval and args.local_rank in [-1, 0]:
        eval_done = False
        all_results = {}
        top_checkpoint = None
        patience = 0

        while not eval_done:
            checkpoints = []
            if not args.evaluate_specific:
                checkpoints = list(os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + '/checkpoint-*/' + WEIGHTS_NAME, recursive=True)))
                logging.getLogger("transformers.modeling_utils").setLevel(logging.WARN)  # Reduce logging
                # Sort checkpoints according to the step number
                if len(checkpoints) > 0:
                    checkpoints.sort(key=lambda x: int(x.split("-")[-1]))
            else:
                checkpoints.append(args.evaluate_specific)

            checkpoints = [x for x in checkpoints if x not in all_results]

            # Count the number of while loop iterations no new checkpoints were found
            if len(checkpoints) == 0:
                patience += 1
            else:
                patience = 0

            logger.info("Evaluate the following checkpoints: %s", checkpoints)
            for checkpoint in checkpoints:
                prefix = checkpoint.split('/')[-1] if checkpoint.find('checkpoint') != -1 else ""

                gpt2_model, _ = init_gpt2_model(checkpoint_dir=checkpoint,
                                                args=args,
                                                model_class=model_class)

                result = evaluate(args, gpt2_model, tokenizer, prefix=prefix)
                all_results[checkpoint] = result["perplexity"]

            sorted_results = [(k, v) for k, v in all_results.items()]
            sorted_results.sort(key=lambda x: x[1].item())

            if not args.evaluate_specific and args.do_delete_old and len(sorted_results) > args.save_total_limit:
                logger.info("Deleting worse checkpoints...")
                # delete all but the top save_total_limit checkpoints
                for res in sorted_results[args.save_total_limit:]:
                    if os.path.exists(res[0]):
                        logger.info("Deleting {}...".format(res[0]))
                        shutil.rmtree(res[0])

            # move top checkpoint to root directory
            if not args.evaluate_specific and len(sorted_results) > 0 and sorted_results[0][0] != top_checkpoint:
                command = "cp {}/* {}".format(sorted_results[0][0], args.output_dir)
                logger.info("executing {}...".format(command))
                subprocess.check_output(command, shell=True)
                top_checkpoint = sorted_results[0][0]

            sorted_results_summary = "\n".join(["{} = {:.4f}".format(x[0], x[1]) for x in sorted_results])
            logger.info("Top checkpoints:\n{}".format(sorted_results_summary))

            if args.eval_frequency_min == 0 or args.evaluate_specific or patience > args.eval_patience:
                eval_done = True
            else:
                logger.info("Sleeping for {:d} minutes...zzzz...".format(args.eval_frequency_min))
                time.sleep(args.eval_frequency_min * 60)

    return all_results
Ejemplo n.º 27
0
def main():

    import args
    import logs
# preliminary stab at some useful command line functionality (only cluster is currently needed)
    ap = args.get_parser()
    ap.add_argument('-c', '--cluster', action="store_true",
                    help='Switch to output names of cluster nodes.')
    ap.add_argument('--get', metavar='REQUEST', nargs='?', type=str,
                    help='Information to get from embers.conf [service|data|cluster|host|input|output]')
    ap.add_argument('--s3only', action="store_true", help="Write debug messages.")
    ap.add_argument('--s3onlyprod', action="store_true", help="Write debug messages.")
    ap.add_argument('--prefixes', action="store_true", help="Write debug messages.")
    ap.add_argument('--prefixpairs', action="store_true", help="Write debug messages.")
    group = ap.add_mutually_exclusive_group()
    group.add_argument('--host', metavar='HOST', nargs='?', type=str,
                    help='Name of the host to get information about')
    group.add_argument('--data', metavar='DATA', nargs='?', type=str,
                    help='Name of the data (queue, etc.) to get information about')

    arg = ap.parse_args()
    logs.init(arg)
    init(arg)

    if arg.get:
        assert arg.get in ('service','data','cluster','host','inputs','outputs','services'), 'Improper get request'
    try:
        if arg.s3only or arg.s3onlyprod:
            if arg.s3only:
                prodOnly = False
            else:
                prodOnly = True
            plst = get_all_s3only(prodOnly)
            if plst:
                print string.join(plst)
            return

        if arg.prefixpairs:
            plst, qlst = get_all_prefixpairs()
            for i in range(len(plst)):
                print "%s %s" % (qlst[i], plst[i])
            return

        if arg.prefixes:
            plst = get_all_prefixes()
            for p in plst:
                print p
            return

        if arg.host:
            assert arg.host in conf.get('cluster'), 'Host is not listed in embers.conf'
            if arg.get == 'services':
                print conf.get('cluster')[arg.host]['services']
            if arg.get == 'data':
                print [data for service in conf['cluster'][arg.host]['services'] for data in conf['services'][service]['outputs']]
        if arg.data:
            assert arg.data in conf.get('data'), 'Data is not listed in embers.conf'
            (host, service) = get_host_and_service(arg.data)
            if arg.get == 'host':
                print host
            if arg.get == 'service':
                print service
        if arg.service:
            assert arg.service in conf.get('services'), 'Service is not listed in embers.conf'
            if arg.get == 'host':
                print [host for host in conf['cluster'] if arg.service in conf['cluster'][host]['services']]
            if arg.get == 'inputs':
                print conf['services'][arg.service]['inputs']
            if arg.get == 'outputs':
                print conf['services'][arg.service]['outputs']

        if arg.cluster or arg.get == 'cluster':
            print ' '.join(conf.get('cluster',''))
        if arg.get == 'data' and not (arg.service or arg.data or arg.host):
            print ' '.join(conf.get('data',''))
        if arg.get == 'host' and not (arg.service or arg.data or arg.host):
            print ' '.join(conf.get('cluster',''))
        if arg.get == 'service' and not (arg.service or arg.data or arg.host):
            print ' '.join(conf.get('services',''))


    except Exception, e:
        log.error("Requested information not in embers.conf, error %s", e)
Ejemplo n.º 28
0
        if best_at_checkpoint_metric:
            curr_pat = 0
        else:
            curr_pat += 1

        args.current_epoch = epoch + 1  # Save the epoch at which the model needs to start
        save_checkpoint(model, optimizer, args, es_best, epoch_best, 0,
                        curr_pat, checkpoint_filename)
        stdout_logger.info('Saved checkpoint for epoch {}.'.format(epoch))

        if curr_pat > args.patience:
            break

    # Mark job as finished
    f = open(HALT_filename, 'w')
    for metric in es_best.keys():
        f.write('{}:{}\n'.format(metric, es_best[metric]))
    f.close()

    if args.tensorboard:
        logger.close()


if __name__ == '__main__':
    args = get_parser()
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    random.seed(args.seed)
    np.random.seed(args.seed)
    main(args)
Ejemplo n.º 29
0
def main():
    """A little utility to handle reading and writing streams 
    to and from a queue.
    --pub <queue> : publish what's read from stdin to <queue>
    --sub <queue> : read from <queue> and write the messages to stdout
    --cat         : when used with --pub, write all published messages to stdout
    --clean       : check in incoming and outgoing messages. 
                    Verify the message is correct JSON and add
                    an embersId if needed.
    Other standard EMBERS options (e.g. --verbose).
    """
    import args
    import logs
    import message

    ap = args.get_parser()
    ap.add_argument(
        '--clean',
        action="store_true",
        help='Verify message format and add standard fields such as embersId.')
    ap.add_argument('--cat',
                    action="store_true",
                    help='Write all published messages to stdout.')
    arg = ap.parse_args()
    logs.init(arg)
    init(arg)
    assert arg.sub or arg.pub, "Need to subscribe or publish to something."

    # need to use the raw/utf handler unless we are doing clean
    marshal = UnicodeMarshal()
    if arg.clean:
        marshal = JsonMarshal()

    subq = None
    if arg.sub:  # read a queue
        subq = open(arg.sub,
                    'sub',
                    marshal=marshal,
                    ssh_key=arg.ssh_key,
                    ssh_conn=arg.tunnel)
    else:  # read stdin
        subq = StreamQueue(sys.stdin, marshal=marshal)

    pubq = None
    if arg.pub:  # send to queue
        pubq = open(arg.pub, 'pub', capture=arg.cat, marshal=marshal)
    else:  # send to stdout
        pubq = StreamQueue(sys.stdout, mode='w', marshal=marshal)

    rc = 0
    try:
        it = subq.__iter__()
        while True:
            m = ''
            try:
                m = it.next()
                if arg.clean:
                    m = message.clean(m)

                if m:
                    pubq.write(m)

            except StopIteration:
                break

            except KeyboardInterrupt:
                break

            except:
                rc += 1
                if m:
                    log.exception('Could not process message %s' % (m, ))
                else:
                    log.exception('Unknown processing error')

    except KeyboardInterrupt:
        pass

    except:
        rc = 1
        log.exception('Top level exception')

    return rc