def main(): args = get_parser().parse_args() print(args) # Make datasets train_dir = os.path.join(args.datadir, 'train') val_dir = os.path.join(args.datadir, 'val') print('loading train dataset') train_loader = get_dataloader(train_dir, args.batch_size, args.pretrained, args.augmented) print('loading val dataset') val_loader = get_dataloader(val_dir, args.batch_size, args.pretrained, False) args.num_class = 2 # np.unique(train_loader[1]) args.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Initialisation model model = Classifier(args=args) while model.counter['epochs'] < args.epochs: train(model=model, dataloader=train_loader) val(model=model, dataloader=val_loader) if model.early_stopping.early_stop: break if model.writer: model.writer.close()
def main(): args = get_parser().parse_args() # Arguments by hand args.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') args.target_name = "LST_status" table = pd.read_csv(args.table_data) list_wsi = os.listdir(args.wsi) list_lst = [ table[table['ID'] == x][args.target_name].item() for x in list_wsi ] list_dataset = [] ## Initialisation model model = Classifier(args=args) ## Création des datasets for path in list_wsi: args.wsi = os.path.join(args.wsi, path) list_dataset.append(dataset(args)) args.wsi = os.path.dirname(args.wsi) list_dataset = np.array(list_dataset) ## Kfold_validation splitter = StratifiedKFold(n_splits=3) for r_eval, (id_train, id_val) in enumerate(splitter.split(list_lst, list_lst)): model.name = 'repeat_val_{}'.format(r_eval) dataset_train = list_dataset[id_train] dataset_val = list_dataset[id_val] for db in dataset_train: db.transform = get_transform(train=True) for db in dataset_val: db.transform = get_transform(train=False) dataset_train = torch.utils.data.ConcatDataset(dataset_train) dataset_val = torch.utils.data.ConcatDataset(dataset_val) dataloader_train = DataLoader(dataset=dataset_train, batch_size=args.batch_size, num_workers=24) dataloader_val = DataLoader(dataset=dataset_val, batch_size=args.batch_size, num_workers=24) # Initialize dataloader Creates 2 dataset : Careful, if I want to load all in memory ill have to change that, to have only one dataset. dataloader_train, dataloader_val = make_loaders(args=args) while model.counter['epochs'] < args.epochs: print("Begin training") train(model=model, dataloader=dataloader_train) val(model=model, dataloader=dataloader_val) if model.early_stopping.early_stop: break model.writer.close()
def convert_to_arguments(self, configuration): """ Convert from the configuration dict to a valid dict to send as arguments to the AWS API :param configuration: dict :return: dict """ configuration_converted = {} for key in configuration: new_key = self.__to_camel_case(key) if arguments.parser_exists(key): configuration_converted.update(arguments.get_parser(key).parse(configuration[key])) else: configuration_converted[new_key] = configuration[key] return configuration_converted
def convert_to_arguments(self, configuration): """ Convert from the configuration dict to a valid dict to send as arguments to the AWS API :param configuration: dict :return: dict """ configuration_converted = {} for key in configuration: new_key = self.__to_camel_case(key) if arguments.parser_exists(key): configuration_converted.update( arguments.get_parser(key).parse(configuration[key])) else: configuration_converted[new_key] = configuration[key] return configuration_converted
def main(): parser = get_parser() args = parser.parse_args() update_dir(args) print('===================== Parameters =======================') print('Device: {}'.format(args.device)) print('Batch size: {}'.format(args.batch_size)) print('Epoch number: {}'.format(args.epochs)) print('Learning rate: {} || Decay rate: {}'.format(args.lr_rate, args.weight_decay)) print( '===================== Starting modified VecRoad =======================' ) env = Environment(args) if args.teacher_forcing_number > 0: args.epochs = 1 args.lr_rate = 0.005 if args.test: time_start = time.time() env.network.val_mode() run_val(env, 0) print('Testing time usage: {}h'.format( round((time.time() - time_start) / 3600, 3))) else: for epoch in range(args.epochs): env.epoch_counter += 1 for iCurb_image_index, data in enumerate( env.network.dataloader_train): # load a single one tiff image and run training on this image # validation mode if (iCurb_image_index % 1000) == 0 and iCurb_image_index: env.network.val_mode() f1 = run_val(env, iCurb_image_index) if f1 > env.network.best_f1: env.network.best_f1 = f1 env.network.save_checkpoints(iCurb_image_index) # training mode env.network.train_mode() run_train(env, data, iCurb_image_index) if env.training_step > args.teacher_forcing_number and args.teacher_forcing_number > 0: break if env.training_step > args.teacher_forcing_number and args.teacher_forcing_number > 0: break
def main(): print("Running") torch.set_default_dtype(torch.float64) parser = get_parser() args = parser.parse_args(sys.argv[1:]) if args.dataset is None: print("No Dataset given, make argument for dataset path") return -1 # build saving folder save_dir = args.save_dir try: os.makedirs(save_dir, exist_ok=True) except OSError: pass morl.run(args)
def main(): torch.set_default_dtype(torch.float64) # ppo parameters args_list = ['--lr', '3e-4', '--use-linear-lr-decay', '--gamma', '0.995', '--use-gae', '--gae-lambda', '0.95', '--entropy-coef', '0', '--value-loss-coef', '0.5', '--num-steps', '2048', '--num-processes', '4', '--ppo-epoch', '10', '--num-mini-batch', '32', '--use-proper-time-limits', '--ob-rms', '--obj-rms', '--raw'] solve_argv_conflict(args_list) parser = get_parser() args = parser.parse_args(args_list + sys.argv[1:]) # build saving folder save_dir = args.save_dir try: os.makedirs(save_dir, exist_ok = True) except OSError: pass # output arguments fp = open(os.path.join(save_dir, 'args.txt'), 'w') fp.write(str(args_list + sys.argv[1:])) fp.close() logfile = open(os.path.join(args.save_dir, 'log.txt'), 'w') sys.stdout = Logger(sys.stdout, logfile) morl.run(args) logfile.close()
def parse_cl_args(): #TODO: away with this function, in theory. ''' Takes arguments from the command line and ignores as many of them as possible. ''' # assume the user passes no args, and these are defaults/dummy #TODO: trash all of this parser = get_parser() parser.add_argument('--non-det', action='store_true', default=False, help='whether to use a non-deterministic policy') parser.add_argument('--active-column', default=None, type=int, help='Run only one vertical column of a fractal model to see what it\ has learnt independently') parser.add_argument('--evaluate', action='store_true', default=False, help='record trained network\'s performance') # add any experiment-specific args here args = parser.parse_args() args.im_render = True #args.render = True args.random_terrain = False args.random_builds = False return args
import imageio from PIL import Image, ImageDraw import numpy as np from multiprocessing import Pool import subprocess from scipy.spatial import cKDTree import os from skimage import measure import pickle import random import json import time from arguments import get_parser, update_dir_candidate_train parser = get_parser() args = parser.parse_args() update_dir_candidate_train(args) tiff_dir = args.image_dir gt_instance_mask_dir = args.instance_mask_dir gt_mask_dir = args.mask_dir skeleton_dir = './records/skeleton/train' with open('./dataset/data_split.json', 'r') as jf: images = json.load(jf)['train'] class Graph(): def __init__(self): self.curbs = [] self.all_vertices = []
def main(): #create parser parser = arguments.get_parser() #initialize global iteration counter n_iter = 0 #cuda configuration cudnn.benchmark = True #initializing train from argumnents and creating args namespace training_writer, args = prepare_train.initialize_train(parser) #creating data loaders train_loader, val_loader, args = prepare_train.create_dataloaders(args) #create invrepnet model invrep_net = prepare_train.create_model(args, device) #create optimizer optimizer = prepare_train.create_optimizer(args, invrep_net) #train loop by epoch for epoch in range(args.epochs): # train for one epoch train_loss, n_iter = run_epoch.train_epoch(args, train_loader, invrep_net, optimizer, args.epoch_size, training_writer, epoch, n_iter) print(' * Epoch: {}, Avg Train Loss : {:.3f}'.format( epoch, train_loss)) # loss evaluate on validation set if ((epoch + 1) % args.validation_freq == 0) and not args.skip_validation: val_loss, data_time, batch_time = run_epoch.validate_epoch( args, val_loader, invrep_net, args.epoch_size, training_writer, epoch) print( 'Val: Epoch={}, AlgTime={:.3f}, DataTime={:.3f}, AvgLoss={:.4f}' .format(epoch, batch_time, data_time, val_loss)) # save checkpoint of model utils.save_checkpoint(args.save_path, { 'epoch': epoch + 1, 'state_dict': invrep_net.module.state_dict() }, args.checkpoint_freq, epoch + 1) #patch matching task validation if args.with_task_val: if (epoch + 1) % args.checkpoint_freq == 0 or (epoch + 1) == args.epochs: #inference print('==> Starting Inference\n') args.pretrained_model = os.path.join( args.save_path, 'invrep_checkpoint.pth.tar') args.output = os.path.join( args.save_path, 'inference_epoch_{}'.format(epoch + 1)) run_inf(args) print('==> Finished Inference\n') #directories for image saving args.task_image_dirs = os.path.join(args.output, 'img') args.task_invrep_dirs = os.path.join(args.output, 'rep') print('==> Starting Template Matching\n') pmres, methods = run_pm(pm_mode=args.pm_mode, args=args) for rr, m_res in enumerate(pmres): training_writer.add_scalar( 'val-task/pm_{}_32'.format(methods[rr]), np.around(m_res[0], decimals=4), epoch + 1) training_writer.add_scalar( 'val-task/pm_{}_64'.format(methods[rr]), np.around(m_res[1], decimals=4), epoch + 1) training_writer.add_scalar( 'val-task/pm_{}_128'.format(methods[rr]), np.around(m_res[2], decimals=4), epoch + 1) print('==> Finished Template Matching\n')
def main(): clear_loss() parser = get_parser() parsed = parser.parse_args() assert ((parsed.output_dir is None and parsed.save_step is None) or (parsed.output_dir is not None and parsed.save_step is not None)), "Save step and output directory must be " \ "null at the same time or not null at the same time" ds_type = parsed.dataset if ds_type == 'cifar10': dataset = CIFAR10Dataset() dataset.process() img_shape = [32, 32, 3] elif ds_type == 'mnist': dataset = MNISTDataset() dataset.process() img_shape = [28, 28, 1] elif parsed.dataset == 'fashion': dataset = FashionDataset() dataset.process() img_shape = [28, 28, 1] elif parsed.dataset == 'stl10': dataset = STLDataset(is_ae=True) dataset.process() img_shape = [96, 96, 3] else: print("Unknown dataset") exit() layers = parse_layers(parsed.layer_str) fc_size = parsed.fc_layers sess = tf.Session() swwae = SWWAE(sess, img_shape, 'autoencode', layers, learning_rate=parsed.learning_rate, lambda_rec=parsed.lambda_rec, lambda_M=parsed.lambda_M, dtype=tf.float32, tensorboard_id=parsed.tensorboard_id, encoder_train=True, rep_size=fc_size, batch_size=parsed.batch_size, sparsity=parsed.sparsity, beta=parsed.beta) if parsed.rest_dir is not None: swwae.restore(parsed.rest_dir) X_test, _ = dataset.get_batches(parsed.batch_size, train=False) test_steps = len(X_test) print("Preprocessing") datagen = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization= False, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range= 0.0, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range= 0.1, # randomly shift images horizontally (fraction of total width) height_shift_range= 0.1, # randomly shift images vertically (fraction of total height) horizontal_flip=True, # randomly flip images vertical_flip=False) # randomly flip images datagen.fit(dataset.training_data) train_steps = int(len(dataset.training_data) / parsed.batch_size) print("Started training.\nTrain steps: {}".format(train_steps)) for e in range(parsed.num_epochs): total_loss = 0.0 epoch_loss = 0.0 batches = 0 for x_batch in datagen.flow(dataset.training_data, batch_size=parsed.batch_size): loss, global_step = swwae.train(x_batch) batches += 1 total_loss += loss epoch_loss += loss if (batches + 1) % parsed.info_step == 0: avg_loss = total_loss / parsed.info_step save_loss(avg_loss) for test_step in range(test_steps): X_test_step = X_test[test_step] swwae.eval(input=X_test_step) #print("Train epoch {}:\n\tstep {}\n\tavg. L2 Loss: {}".format(e + 1, step + 1, avg_loss), # flush=True) total_loss = 0.0 if parsed.save_step is not None: if (global_step + 1) % parsed.save_step == 0: swwae.save(path=parsed.output_dir) if batches >= train_steps: break print("Train epoch {}: avg. loss: {}".format(e + 1, epoch_loss / train_steps), flush=True) if parsed.output_dir is not None: swwae.save(path=parsed.output_dir) print("Starting test..") total_loss = 0.0 for test_step in range(test_steps): X_test_step = X_test[test_step] loss = swwae.eval(input=X_test_step) total_loss += loss print("Test average loss: {}".format(total_loss / test_steps))
from ray.rllib.agents.ppo import PPOTrainer from ray.rllib.agents.ppo.ppo_torch_policy import PPOTorchPolicy from ray.rllib.env import BaseEnv from ray.rllib.evaluation import MultiAgentEpisode, RolloutWorker from ray.rllib.models import ModelCatalog from ray.rllib.policy import Policy import arguments import constants from MinerTraining import Metrics from exploration_annealing import ExplorationAnnealing from models import TorchRNNModel, SecondModel, ThirdModel, FourthModel, FifthModel, SixthModel, SeventhModel from rllib_envs import v0 from utils import policy_mapping parser = arguments.get_parser() args = parser.parse_args() params = vars(args) class MinerCallbacks(DefaultCallbacks): def __init__(self): super().__init__() self.training_policies = [f"policy_{i}" for i in range(8)] def on_episode_end(self, worker: RolloutWorker, base_env: BaseEnv, policies: Dict[str, Policy], episode: MultiAgentEpisode, **kwargs): for (agent_name, policy), v in episode.agent_rewards.items():
def main(): parser = arguments.get_parser() cudnn.benchmark = True args = prepare_train.initialize_test(parser) #regular invrepnet and original images mode if args.additional_rep == False: #LOADING ARGS FROM COMMAND LINE pretrained_model = args.pretrained_model use_cpu = args.use_cpu data_dir = args.data pm_mode = args.pm_mode #LOADING ARGS FROM FILE args_file = (args.pretrained_model).rsplit('/', 1)[0] with open(os.path.join(args_file, 'args_in_run.txt')) as fp: for line in fp: if line.startswith('--'): tokens = line[2:].strip().split() if tokens[1].isdigit(): tokens[1] = int(tokens[1]) if tokens[1] == "True": tokens[1] = True if tokens[1] == "False": tokens[1] = False if tokens[1] == "None": tokens[1] = None if tokens[1] == "[]": tokens[1] = [] if tokens[1] is not 'None': # print('arg.{}={}'.format(tokens[0], tokens[1])) vars(args)[tokens[0]] = tokens[1] #EVAL ARGS args.freeze_model = True args.batch_size = 1 # OVERRIDE ARGS FROM FILE WITH ARGS FROM COMMAND LINE args.pretrained_model = pretrained_model args.data = data_dir args.use_cpu = use_cpu args.pm_mode = pm_mode # inference args.output = args.pretrained_model.replace(".pth.tar", "_patchmatching_images") if not os.path.exists(args.output): os.makedirs(args.output) print('\n=> Starting Inference\n') run_inf(args) print('=> Finished Inference\n') # tasks args.task_image_dirs = os.path.join(args.output, 'img') args.task_invrep_dirs = os.path.join(args.output, 'rep') #start patch matching eval after inference print('=> Starting Template Matching\n') run_pm(pm_mode=args.pm_mode, args=args) print('=> Finished Template Matching\n') else: args.task_invrep_dirs = args.data args.task_image_dirs = args.data args.pm_mode = 2 print('=> Starting --ADDITIONAL REPRESENTATION-- Patch Matching\n') run_pm(pm_mode=args.pm_mode, args=args) print('=> Finished --ADDITIONAL REPRESENTATION-- Patch Matching\n')
try: table.insert(TO_BE_INSERTED, continue_on_error=True) except cm.pymongo.errors.DuplicateKeyError: pass except cm.pymongo.errors.OperationFailure as e: print(e, e.code) del TO_BE_INSERTED[:] if __name__ == '__main__': # pylint: disable=C0103 import sys import arguments from glob import glob sources = sorted(glob('all_*.csv')) parser = arguments.get_parser() args = parser.parse_args() print(args) db = cm.connect_to_db('foursquare', args.host, args.port)[0] TABLE = db['checkin'] TABLE.ensure_index([('loc', cm.pymongo.GEOSPHERE), ('city', cm.pymongo.ASCENDING)]) csv.field_size_limit(sys.maxsize) total, unmatched = 0, 0 for fn in sources: with open(fn, 'rb') as f: reader = csv.DictReader(f, delimiter=';') for i, row in enumerate(reader): checkin = reformat(row) total += 1 if checkin:
def main(): parser = get_parser(MODEL_TYPES, ALL_MODELS) args = parser.parse_args() if args.doc_stride >= args.max_seq_length - args.max_query_length: logger.warning( "WARNING - You've set a doc stride which may be superior to the document length in some " "examples. This could result in errors when building features from the examples. Please reduce the doc " "stride or increase the maximum length to ensure the features are correctly built." ) if (os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train and not args.overwrite_output_dir): raise ValueError( "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome." .format(args.output_dir)) # Setup distant debugging if needed if args.server_ip and args.server_port: # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script import ptvsd print("Waiting for debugger attach") ptvsd.enable_attach(address=(args.server_ip, args.server_port), redirect_output=True) ptvsd.wait_for_attach() # Setup CUDA, GPU & distributed training if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") args.n_gpu = 0 if args.no_cuda else torch.cuda.device_count() else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) torch.distributed.init_process_group(backend="nccl") args.n_gpu = 1 args.device = device # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN, ) logger.warning( "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s", args.local_rank, device, args.n_gpu, bool(args.local_rank != -1), args.fp16, ) # Set seed set_seed(args) # Load pretrained model and tokenizer if args.local_rank not in [-1, 0]: # Make sure only the first process in distributed training will download model & vocab torch.distributed.barrier() args.model_type = args.model_type.lower() config = AutoConfig.from_pretrained( args.config_name if args.config_name else args.model_name_or_path, cache_dir=args.cache_dir if args.cache_dir else None, output_attentions=args.uncertainty_model) tokenizer = AutoTokenizer.from_pretrained( args.tokenizer_name if args.tokenizer_name else args.model_name_or_path, do_lower_case=args.do_lower_case, cache_dir=args.cache_dir if args.cache_dir else None, ) model = AutoModelForQuestionAnswering.from_pretrained( args.model_name_or_path, from_tf=bool(".ckpt" in args.model_name_or_path), config=config, cache_dir=args.cache_dir if args.cache_dir else None, ) if args.uncertainty_model: uncertainty_inp_size = config.num_hidden_layers * config.num_attention_heads uncertainty_model = torch.nn.Sequential( torch.nn.Linear(uncertainty_inp_size, uncertainty_inp_size // 2), torch.nn.Dropout(0.1), torch.nn.ReLU(), torch.nn.Dropout(0.1), torch.nn.Linear(uncertainty_inp_size // 2, 2)) uncertainty_model.to(args.device) if args.local_rank == 0: # Make sure only the first process in distributed training will download model & vocab torch.distributed.barrier() model.to(args.device) logger.info("Training/evaluation parameters %s", args) # Before we do anything with models, we want to ensure that we get fp16 execution of torch.einsum if args.fp16 is set. # Otherwise it'll default to "promote" mode, and we'll get fp32 operations. Note that running `--fp16_opt_level="O2"` will # remove the need for this code, but it is still valid. if args.fp16: try: import apex apex.amp.register_half_function(torch, "einsum") except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use fp16 training." ) # Training if args.do_train: train_dataset = load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=False) global_step, tr_loss = train( args, train_dataset, model, tokenizer, uncertainty_model if args.uncertainty_model else None) logger.info(" global_step = %s, average loss = %s", global_step, tr_loss) # Save the trained model and the tokenizer if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0): # Create output directory if needed if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]: os.makedirs(args.output_dir) logger.info("Saving model checkpoint to %s", args.output_dir) # Save a trained model, configuration and tokenizer using `save_pretrained()`. # They can then be reloaded using `from_pretrained()` # Take care of distributed/parallel training model_to_save = model.module if hasattr(model, "module") else model model_to_save.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir) # Good practice: save your training arguments together with the trained model torch.save(args, os.path.join(args.output_dir, "training_args.bin")) # Load a trained model and vocabulary that you have fine-tuned model = AutoModelForQuestionAnswering.from_pretrained( args.output_dir) # , force_download=True) tokenizer = AutoTokenizer.from_pretrained( args.output_dir, do_lower_case=args.do_lower_case) model.to(args.device) if args.uncertainty_model: uncertainty_model.to(args.device) # Evaluation - we can ask to evaluate all the checkpoints (sub-directories) in a directory results = {} if args.do_eval and args.local_rank in [-1, 0]: if args.do_train: logger.info( "Loading checkpoints saved during training for evaluation") checkpoints = [args.output_dir] if args.eval_all_checkpoints: checkpoints = list( os.path.dirname(c) for c in sorted( glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME, recursive=True))) logging.getLogger("transformers.modeling_utils").setLevel( logging.WARN) # Reduce model loading logs else: logger.info("Loading checkpoint %s for evaluation", args.model_name_or_path) checkpoints = [args.model_name_or_path] logger.info("Evaluate the following checkpoints: %s", checkpoints) for checkpoint in checkpoints: # Reload the model global_step = checkpoint.split( "-")[-1] if len(checkpoints) > 1 else "" model = AutoModelForQuestionAnswering.from_pretrained( checkpoint) # , force_download=True) model.to(args.device) # Evaluate result = evaluate(args, model, tokenizer, prefix=global_step) result = dict( (k + ("_{}".format(global_step) if global_step else ""), v) for k, v in result.items()) results.update(result) logger.info("Results: {}".format(results)) return results
tak, tkl = DO_CLUSTER(sval, num_cluster) current_disto = vf.get_distorsion(tak, tkl, sval) if current_disto < min_disto: min_disto, ak, kl = current_disto, tak, tkl std_ord = np.argsort((np.argsort(ak)), 0)[:, -1] # vf.draw_classes(ak[std_ord, :], shift, chunk) # vf.plt.title('{}, {} venues'.format(city, len(enough))) # vf.plt.ylim([0, 0.28 if weekly else 0.9]) city = 'times/' + city city += '_weekly' if weekly else '_daily' sio.savemat(city + '_time', {'t': ak[std_ord, :]}, do_compression=True) # vf.plt.savefig(city+'_time.png', dpi=160, transparent=False, frameon=False, # bbox_inches='tight', pad_inches=0.1) # vf.plt.clf() if __name__ == '__main__': # pylint: disable=C0103 import arguments import persistent as p args = arguments.get_parser().parse_args() DB, CLIENT = xp.cm.connect_to_db('foursquare', args.host, args.port) res = {} for city in reversed(xp.cm.cities.SHORT_KEY): print(city) plot_city(city, weekly=False, clusters=5) # plot_city(ciy, weekly=True, clusters=) # venue_visits = xp.get_visits(CLIENT, xp.Entity.venue, city) # res.update({k: len(v) for k, v in venue_visits.iteritems()}) # p.save_var('venue_visits', res)
locale.setlocale(locale.LC_ALL, '') def ordered(counts, cities, threshold=10): """Return `counts` ordered by cities.""" as_dict = {v['_id']: v['count'] for v in counts} count = [as_dict.get(city, 0) for city in cities] count.append(sum(count)) fmt = lambda v: locale.format('%d', v, grouping=True) return [fmt(c) if c > threshold else '' for c in count] if __name__ == '__main__': #pylint: disable=C0103 import arguments args = arguments.get_parser().parse_args() foursquare, client = cm.connect_to_db('foursquare', args.host, args.port) checkins = foursquare.checkin venues = foursquare.venue photos = client.world.photos newer = dt(2001, 2, 1) t = pt.PrettyTable() t.junction_char = '|' # checkin = checkins.aggregate([{'$match': {'time': {'$lt': newer}}}, # {'$project': {'city': 1}}, # {'$group': {'_id': '$city', # 'count': {'$sum': 1}}}, # {'$sort': {'count': -1}}]) located = checkins.aggregate([{'$match': {'lid': {'$ne': None}, 'time': {'$lt': newer}}}, {'$project': {'city': 1}},