Beispiel #1
0
    def test_write_read_files(self):
        '''test_write_read_files will test the functions write_file and read_file
        '''
        print("Testing utils.write_file...")
        from utils import write_file
        import json
        tmpfile = tempfile.mkstemp()[1]
        os.remove(tmpfile)
        write_file(tmpfile, "hello!")
        self.assertTrue(os.path.exists(tmpfile))

        print("Testing utils.read_file...")
        from utils import read_file
        content = read_file(tmpfile)[0]
        self.assertEqual("hello!", content)

        from utils import write_json
        print("Testing utils.write_json...")
        print("Case 1: Providing bad json")
        bad_json = {"Wakkawakkawakka'}": [{True}, "2", 3]}
        tmpfile = tempfile.mkstemp()[1]
        os.remove(tmpfile)
        with self.assertRaises(TypeError) as cm:
            write_json(bad_json, tmpfile)

        print("Case 2: Providing good json")
        good_json = {"Wakkawakkawakka": [True, "2", 3]}
        tmpfile = tempfile.mkstemp()[1]
        os.remove(tmpfile)
        write_json(good_json, tmpfile)
        content = json.load(open(tmpfile, 'r'))
        self.assertTrue(isinstance(content, dict))
        self.assertTrue("Wakkawakkawakka" in content)
Beispiel #2
0
 def get_seeding(self) -> None:
     """
         Get seeding link from common crawl using seeding detail
     """
     self.get_seeding_links(self.links)
     write_json(self.final_seed, "seeding_final_data.json")
     print("Done saving seeding file !!!")
    def __init__(self, config_name):
        self.config_name = config_name
        self.config = read_json(self.config_name)

        # set save_dir where trained models and log will be saved
        save_dir = Path(self.config['trainer']['save_dir'])

        experiment_name = self.config['name']
        run_id = datetime.now().strftime(r'%m%d_%H%M%S')
        self.save_dir = save_dir / 'models' / experiment_name / run_id
        self.log_dir = save_dir / 'logs' / experiment_name / run_id

        # make directory for saving checkpoints and log
        exist_ok = False
        self.save_dir.mkdir(parents=True, exist_ok=exist_ok)
        self.log_dir.mkdir(parents=True, exist_ok=exist_ok)

        # save updated config file to the checkpoint dir
        write_json(self.config, self.save_dir / 'config.json')

        # configure logging module
        setup_logging(self.log_dir)
        self.log_levels = {
            0: logging.WARNING,
            1: logging.INFO,
            2: logging.DEBUG
        }
Beispiel #4
0
 def _gen_vocab(self):
     word_counter = Counter()
     for sent in self.data['question']:
         word_counter.update(
             [word for word in jieba.cut(sent) if regex.search(word)])
     write_json(word_counter, self.config['vocab_path'])
     return word_counter
Beispiel #5
0
def create():
    print("==========\nArgs:{}\n==========".format(args))
    print(
        "Goal: randomly split data for {} times, {:.1%} for training and the rest for testing"
        .format(args.num_splits, args.train_percent))
    print("Loading dataset from {}".format(args.dataset))
    dataset = h5py.File(args.dataset, 'r')
    keys = dataset.keys()
    num_videos = len(keys)
    num_train = int(math.ceil(num_videos * args.train_percent))
    num_test = num_videos - num_train
    print(
        "Split breakdown: # total videos {}. # train videos {}. # test videos {}"
        .format(num_videos, num_train, num_test))
    splits = []

    for split_idx in range(args.num_splits):
        train_keys, test_keys = split_random(keys, num_videos, num_train)
        splits.append({
            'train_keys': train_keys,
            'test_keys': test_keys,
        })

    saveto = osp.join(args.save_dir, args.save_name + '.json')
    write_json(splits, saveto)
    print("Splits saved to {}".format(saveto))

    dataset.close()
Beispiel #6
0
def get_token(**kwargs):
    token = kwargs.get("token") or read_json(PLEX_TOKEN_PATH).get("token")
    if token:
        return token
    token = plex_token_auth(kwargs["login"], kwargs["password"])
    write_json({"token": token}, PLEX_TOKEN_PATH)
    return token
Beispiel #7
0
    def save_configuration_overrides(self):
        """
        Saving the configuration file
        Any updates that have been made to either the integration location
        or the logging location. Users have the option to have this so the next time
        the UI is loaded, it will load these values by default
        """
        _logging_location = self.configuration_widgets.logging_location_label.text(
        ).replace('Logging Location: ', '')
        _output_location = self.configuration_widgets.integrate_location_label.text(
        ).replace('Output Location: ', '')
        _DEFAULT_CONFIG = {
            'loggingLocation':
            self.configuration_widgets.logging_location_label.text().replace(
                'Logging Location: ', ''),
            'outputLocation':
            self.configuration_widgets.integrate_location_label.text().replace(
                'Output Location: ', ''),
            'loggingStatus':
            'True'
            if self.configuration_widgets.logging_status_checkBox.isChecked()
            else 'False'
        }

        write_json(_DEFAULT_CONFIG)
Beispiel #8
0
def main(args):
    output_dir = args.output_dir
    mkdir_if_missing(osp.join(args.output_dir, 'cam_0'))
    mkdir_if_missing(osp.join(args.output_dir, 'cam_1'))
    num_identities = 971
    identities = [0] * num_identities
    for i in xrange(num_identities):
        p_images = [[], []]
        for j in xrange(4):
            cam_id = j // 2
            src_file = '{:04d}{:03d}.png'.format(i + 1, j + 1)
            tgt_file = 'cam_{}/{:05d}_{:05d}.png'.format(cam_id, i, j % 2)
            shutil.copy(osp.join(args.cuhk01_dir, src_file),
                        osp.join(args.output_dir, tgt_file))
            p_images[cam_id].append(tgt_file)
        identities[i] = p_images
    # Save meta information into a json file
    meta = {'name': 'cuhk01', 'shot': 'multiple', 'num_cameras': 2}
    meta['identities'] = identities
    write_json(meta, osp.join(output_dir, 'meta.json'))
    # Randomly create a training and test split
    num = len(identities)
    pids = np.random.permutation(num)
    trainval_pids = sorted(pids[:num // 2])
    test_pids = sorted(pids[num // 2:])
    split = {
        'trainval': trainval_pids,
        'test_probe': test_pids,
        'test_gallery': test_pids
    }
    write_json(split, osp.join(output_dir, 'split.json'))
Beispiel #9
0
    def post(self):
        config_data = utils.read_json_data(self.config_file)
        if not config_data:
            abort(404, message="No valid config file found.")

        args = config_parser.parse_args()
        args_dict = dict(args)

        for key in args_dict:
            if not args_dict[key]:
                continue

            if not key in config_data:
                abort(404, message="Category {} is not valid.".format(key))

            pair = args_dict[key].split(':')
            if not len(pair) == 2:
                abort(
                    404,
                    message=
                    "No valid config value provided. Format is str(parameter:value)."
                )

            config_data[key][pair[0]] = pair[1]

        utils.write_json(config_data, self.config_file)

        return config_data, 200
def main():
    config = utils.read_json("config/config.json")

    artist_name = config["artist_name"]
    mongo_driver = MongoDBDriver(config["mongodb"])
    chord_collector = UltimateGuitarCollector(config["ultimate_guitar"])
    genius_collector = GeniusCollector(config["genius"])
    spotify_collector = SpotifyCollector(config["spotify"])

    genius_df = genius_collector.collect(config["artist_id"], store_json=True)
    spotify_df = spotify_collector.collect(artist_name,
                                           config["album_blacklist"],
                                           store_json=True)

    left_key = spotify_df["name"].apply(utils.simplify_str)
    right_key = genius_df["title"].apply(utils.simplify_str)

    df = pd.merge(spotify_df,
                  genius_df,
                  how='left',
                  left_on=left_key,
                  right_on=right_key,
                  suffixes=("_spotify",
                            "_genius")).drop(columns=["key_0", "title"])
    df = df.fillna("")
    df["artist"] = artist_name
    df["chords"] = df.apply(
        lambda x: chord_collector.collect(artist_name, x["name"]), axis=1)

    # mongo_driver.clean_db()
    # mongo_driver.add_discography(artist_name, df)
    utils.write_json("data/data.json", df.to_dict("records"))
    def __init__(self, config, resume=None, run_id=None):
        """
        class to parse configuration json file. Handles hyperparameters for training, initializations of modules,
        checkpoint saving and logging module.
        :param config: Dict containing configurations, hyperparameters for training. contents of `config.json` file
         for example.
        :param resume: String, path to the checkpoint being loaded.
        :param run_id: Unique Identifier for training processes. Used to save checkpoints and training log.
         Timestamp is being used as default
        """
        self._config = config
        self.resume = resume

        if self.config['trainer']['save_dir'] == '':
            return

        self.name = self.config['name']
        self.run_id = run_id
        if run_id is None:
            self.run_id = datetime.now().strftime(
                r'%m%d_%H%M%S')  # use timestamp as default run-id
        self.result_dir = Path(self.config['trainer']['save_dir'])
        self.save_dir.mkdir(parents=True, exist_ok=True)

        # save updated config file to the checkpoint dir
        write_json(self.config, self.save_dir / 'config.json')

        # configure logging module
        setup_logging(self.log_dir, log_config='src/logger/logger_config.json')
Beispiel #12
0
async def create(ctx, game, time, *args):
    if game not in game_format:
        await ctx.send(em.get("select_game"))
        return
    elif not (pattern.match(time)):
        await ctx.send(em.get("time"))
        return
    elif len(args) != len(set(lowercase_players(args))):
        await ctx.send(em.get("duplicate_player"))
        return
    elif len(args) >= max_players.get(game):
        await ctx.send(em.get("too_many_players"))
        return
    server_id = str(ctx.message.guild.id)
    data = read_json("src/teams.json")
    players = [ctx.author.name]
    if len(args) > 0:
        for p in args:
            players.append(p)
    if server_id in data:
        server_teams = data.get(server_id)
        count = len(server_teams)
        id = int(server_teams[count - 1].get("id")) + 1 if count > 0 else 1
        new_team = {"id": id, "game": game, "time": time, "players": players}
        server_teams.append(new_team)
    else:
        id = 1
        new_team = {"id": 1, "game": game, "time": time, "players": players}
        data[server_id] = [new_team]

    write_json(data)
    await ctx.send("Created Team {}.".format(id))
    await ctx.send(embed=embed_team(new_team))
Beispiel #13
0
    def __init__(self, config, resume=None, modification=None, run_id=None):
        """
        class to parse configuration json file. Handles hyperparameters for training, initializations of modules, checkpoint saving
        and logging module.
        :param config: Dict containing configurations, hyperparameters for training. contents of `config.json` file for example.
        :param resume: String, path to the checkpoint being loaded.
        :param modification: Dict keychain:value, specifying position values to be replaced from config dict.
        :param run_id: Unique Identifier for training processes. Used to save checkpoints and training log. Timestamp is being used as default
        """
        # load config file and apply modification
        self._config = _update_config(config, modification)
        self.resume = resume

        # set save_dir where trained model and log will be saved.
        save_dir = Path(self.config['save_dir'])

        exper_name = self.config['name']
        if run_id is None:  # use timestamp as default run-id
            run_id = datetime.now().strftime(r'%m%d_%H%M%S')
        if resume is None:
            self._save_dir = save_dir / 'models' / exper_name / run_id
        else:
            self._save_dir = resume.parent

        self._log_dir = save_dir / 'log' / exper_name / run_id

        self.save_dir.mkdir(parents=True, exist_ok=True)
        # save updated config file to the checkpoint dir
        write_json(self.config, self.save_dir / 'config.json')

        self.log_levels = {
            0: logging.WARNING,
            1: logging.INFO,
            2: logging.DEBUG
        }
Beispiel #14
0
def main():
    parser = argparse.ArgumentParser(description="Available Parameters:")
    parser.add_argument("--n_hidden_units", default=64, type=int)
    parser.add_argument("--n_hidden_layers", default=1, type=int)
    parser.add_argument("--train_epochs", default=100, type=int)
    parser.add_argument("--write_output", default=True, type=bool)
    args = parser.parse_args()

    torch.manual_seed(0)
    np.random.seed(0)

    profiles = pd.read_csv("../data/new_profiles_200t.csv")
    comments = pd.read_csv("../data/new_comments_200t.csv")

    comments = comments.drop_duplicates()
    profiles = preprocessing.categorical_to_numerical(profiles, col="category_1")
    all_users = set(profiles.profile_username.values)

    data = preprocessing.scale(profiles.drop(columns=["category_1", "profile_username"]).values)
    name_to_record = {name: record for name, record in zip(all_users, data)}

    input_dim, output_dim = data.shape[1], len(profiles.category_1.unique()) + 1
    user_to_label = {user: category for user, category in profiles[["profile_username", "category_1"]].values}

    K = 5
    skf = StratifiedKFold(n_splits=K)
    models_metrics, models_histories = defaultdict(dict), defaultdict(list)
    for kth_fold, (train_idx, test_idx) in enumerate(skf.split(profiles.profile_username.values, profiles.category_1.values), start=1):
        print("Starting {}th Fold".format(kth_fold))

        authors = profiles.profile_username.values
        username_to_index = utils.get_users_indices(authors)
        interactions = utils.get_interactions(comments, username_to_index)
        edge_index = utils.get_edge_index(interactions)
        
        x = utils.get_x(authors, name_to_record, input_dim=input_dim)
        y = utils.get_y(user_to_label, authors)

        train_mask = [True if i in train_idx else False for i in range(len(x))]
        test_mask = [True if i in test_idx else False for i in range(len(x))]
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        data = Data(x=x, y=y, edge_index=edge_index, train_mask=train_mask, test_mask=test_mask).to(device)

        assert len(x)==len(y), "Train Input and Output tensor do not have the same dimensions"

        models = utils.get_models(data.num_nodes, input_dim, output_dim, args.n_hidden_units, args.n_hidden_layers, device=device, lr=0.005)
        histories = utils.train(data, models, epochs=args.train_epochs)
        models_histories = utils.update_histories(models_histories, histories)

        current_metrics = utils.test(data, models)
        utils.update_metrics_dict(models_metrics, current_metrics)

        print('\n')
        
    models_histories = {model: list(history/K) for model, history in models_histories.items()} # Get mean traces
    models_metrics = utils.calculate_statistics(models_metrics)

    if args.write_output:
        utils.write_json("../data/results/models_metrics_{}e_{}l_{}u.json".format(args.train_epochs, args.n_hidden_layers, args.n_hidden_units), models_metrics)
        utils.write_json("../data/results/models_histories_{}e_{}l_{}u.json".format(args.train_epochs, args.n_hidden_layers, args.n_hidden_units), models_histories)
Beispiel #15
0
def parse_organism_ppi(org, ppi_path, node_path, edge_path):
    with open(ppi_path, 'r') as index:
        # list of all protein codes
        nodes = set()
        # list of all protein interactions
        edges = []

        while True:
            line = index.readline()
            if line == '':
                break
            elif line[:(len(org))] != org:
                continue
            line = line.strip()
            words = line.split()
            prot1 = words[0]
            prot2 = words[1]
            score = words[2]
            if int(score) > cs.INTERACTION_THR:
                nodes.add(prot1)
                nodes.add(prot2)
                edges.append(words)

        utils.write_json(list(nodes), node_path)

        utils.write_json(edges, edge_path)
Beispiel #16
0
def final_grades(ctx, term):
    mytru.login(ctx)

    classes = mytru.extract_final_grades(ctx, term)

    old = read_json('final_grades')
    diff = jsondiff.diff(old, json.loads(json.dumps(classes)))

    if diff:
        write_json('final_grades', classes)
        click.echo("Changes detected! Current standings:\n{}".format(
            mytru.format_final_grades(classes)))

        if ctx.obj.email:
            # Email mode detected
            if not ctx.obj.sendgrid_api_key:
                logging.error(
                    "No api key provided for SendGrid! Please specify "
                    "a --sendgrid-api-key")
                end(ctx, status=1)
            response = sendgrid_send_email(
                ctx.obj.sendgrid_api_key, ctx.obj.sender, ctx.obj.email,
                'mytruCLI: Changes in Final Grades detected',
                "Current results:\n {}\n\n Difference:\n {}".format(
                    mytru.format_final_grades(classes), diff))
    else:
        click.echo('No changes detected.')

    end(ctx, status=0)
Beispiel #17
0
    def _process_dir(self, dir_path, json_path, relabel):
        if osp.exists(json_path):
            print("=> {} generated before, awesome!".format(json_path))
            split = read_json(json_path)
            return split['tracklets']

        print(
            "=> Automatically generating split (might take a while for the first time, have a coffe)"
        )
        pdirs = glob.glob(osp.join(dir_path, '*'))  # avoid .DS_Store
        print("Processing '{}' with {} person identities".format(
            dir_path, len(pdirs)))

        pid_container = set()
        for pdir in pdirs:
            pid = int(osp.basename(pdir))
            pid_container.add(pid)
        pid2label = {pid: label for label, pid in enumerate(pid_container)}

        tracklets = []
        for pdir in pdirs:
            pid = int(osp.basename(pdir))
            if relabel: pid = pid2label[pid]
            tdirs = glob.glob(osp.join(pdir, '*'))
            for tdir in tdirs:
                raw_img_paths = glob.glob(osp.join(tdir, '*.jpg'))
                num_imgs = len(raw_img_paths)

                if num_imgs < self.min_seq_len:
                    continue

                img_paths = []
                for img_idx in range(num_imgs):
                    # some tracklet starts from 0002 instead of 0001
                    img_idx_name = 'F' + str(img_idx + 1).zfill(4)
                    res = glob.glob(
                        osp.join(tdir, '*' + img_idx_name + '*.jpg'))
                    if len(res) == 0:
                        print(
                            "Warn: index name {} in {} is missing, jump to next"
                            .format(img_idx_name, tdir))
                        continue
                    img_paths.append(res[0])
                img_name = osp.basename(img_paths[0])
                if img_name.find('_') == -1:
                    # old naming format: 0001C6F0099X30823.jpg
                    camid = int(img_name[5]) - 1
                else:
                    # new naming format: 0001_C6_F0099_X30823.jpg
                    camid = int(img_name[6]) - 1
                img_paths = tuple(img_paths)
                tracklets.append((img_paths, pid, camid))

        print("Saving split to {}".format(json_path))
        split_dict = {
            'tracklets': tracklets,
        }
        write_json(split_dict, json_path)

        return tracklets
def outputs_to_json(file_path, dataset, classif, c_value=None):
    """ Create json file for visualization"""

    file_path = file_path.format(dataset, classif)

    results = np.load(file_path).item()

    if c_value is not None and not isinstance(c_value, list):
        c_value = [c_value]
    elif c_value is None:
        c_value = results.keys()


    for c_val in c_value:
        conf_mats = results[c_val]["confusions"]["train"]

        to_json = {"points":[], "P":int(conf_mats[0][1].sum()), "N":int(conf_mats[0][0].sum()),
                   "beta":1.0, "C":c_val, "dataset":dataset, "classif":classif}

        for conf_i, conf_mat in enumerate(conf_mats):
            to_json["points"].append({"t":float(results[c_val]["t_values"][conf_i]),
                                      "fp":int(conf_mat[0, 1]),
                                      "fn":int(conf_mat[1, 0])})

        utils.write_json("%s_C%s.json"%(file_path.replace(".npy", ""), str(c_val).replace(".", "")), to_json)
Beispiel #19
0
    def test_write_read_files(self):
        '''test_write_read_files will test the functions write_file and read_file
        '''
        print("Testing utils.write_file...")
        from utils import write_file
        import json
        tmpfile = tempfile.mkstemp()[1]
        os.remove(tmpfile)
        write_file(tmpfile,"hello!")
        self.assertTrue(os.path.exists(tmpfile))        

        print("Testing utils.read_file...")
        from utils import read_file
        content = read_file(tmpfile)[0]
        self.assertEqual("hello!",content)

        from utils import write_json
        print("Testing utils.write_json...")
        print("Case 1: Providing bad json")
        bad_json = {"Wakkawakkawakka'}":[{True},"2",3]}
        tmpfile = tempfile.mkstemp()[1]
        os.remove(tmpfile)        
        with self.assertRaises(TypeError) as cm:
            write_json(bad_json,tmpfile)

        print("Case 2: Providing good json")        
        good_json = {"Wakkawakkawakka":[True,"2",3]}
        tmpfile = tempfile.mkstemp()[1]
        os.remove(tmpfile)
        write_json(good_json,tmpfile)
        content = json.load(open(tmpfile,'r'))
        self.assertTrue(isinstance(content,dict))
        self.assertTrue("Wakkawakkawakka" in content)
Beispiel #20
0
async def add(ctx, id, *args):
    server_id = str(ctx.message.guild.id)
    data = read_json("src/teams.json")
    for t in data[server_id]:
        if str(t.get("id")) == id:
            players = t.get("players")
            lc_players = lowercase_players(players)
            if ctx.author.name.lower() not in lc_players:
                await ctx.send(em.get("non_member"))
            elif len(players) == max_players.get(t.get("game")):
                await ctx.send(em.get("team_full"))
            elif len(args) != len(set(lowercase_players(args))):
                await ctx.send(em.get("duplicate_player"))
            elif len(args) + len(players) > max_players.get(t.get("game")):
                await ctx.send(em.get("too_many_players"))
            else:
                new_players = []
                for p in args:
                    if p.lower() not in lc_players:
                        players.append(p)
                        new_players.append(p)
                    else:
                        await ctx.send("{} is already part of the team!".format(p))
                write_json(data)
                if len(new_players) > 0:
                    await ctx.send(
                        "{} have been added to Team {}.".format(
                            ", ".join(new_players), id
                        )
                    ) if len(new_players) > 1 else await ctx.send(
                        "{} has been added to Team {}.".format(new_players[0], id)
                    )
                    await ctx.send(embed=embed_team(t))
            return
    await ctx.send(em.get("team_not_found"))
Beispiel #21
0
def parse_organism_seq(org, in_path=cs.STRING_PATH, out_path=cs.JSON_PATH):
    seq_name = '{}.protein.sequences.v10.5.fa'.format(org)
    pseq_name = '{}_parsed_sequences.json'.format(org)

    seq_path = utils.join_path(in_path, seq_name)
    pseq_path = utils.join_path(out_path, pseq_name)

    with open(seq_path, 'r') as index:

        # list of all protein sequences
        proteins = []
        prt = ''
        seq = ''

        while True:
            line = index.readline()
            line = line.strip()
            if line == '':
                proteins.append({'code': prt, 'sequence': seq})
                break
            elif line[0] == '>':
                proteins.append({'code': prt, 'sequence': seq})
                prt = line[1:]
                seq = ''
            else:
                seq += line

        utils.write_json(proteins, pseq_path)
Beispiel #22
0
def process_images():
    hists = read_json()
    latest = int(max(hists.keys() if hists.keys() else 0, 1))
    max_ = max(10001, latest + 1000)
    for i in range(latest, max_):
        hists[i] = list(test_main(f'data/corel10K/{i}.jpg'))
        write_json(hists)
    def _validate_and_prepare_dir(self, app_dir_path, undo_ops, python_exe):
        results = run_installed_tests_as_subprocess(app_dir_path,
                                                    self.django_settings_module,
                                                    python_exe_path=python_exe,
                                                    read_config_file=False)
        if len(results.error_messages)>0:
            for error in results.error_messages:
                logger.error("ERROR>> %s" % error)
        if len(results.warning_messages)>0:
            for warning in results.warning_messages:
                logger.warning("WARNING>> %s" % warning)
        results.print_final_status_message(logger)
        if results.get_return_code() != SUCCESS_RC:
            return results.get_return_code()
        config = django_config_from_validation_results(self.django_settings_module,
                                                       VERSION,
                                                       results)
        write_json(config.to_json(), os.path.join(app_dir_path, DJANGO_CFG_FILENAME))

        # undo the changes we made
        for (op_fun, args) in undo_ops:
            op_fun(*args)
        # delete all the .pyc files
        find_files(app_dir_path, "\.pyc$", os.remove)
        # (re)create the archive
        if self.archive_type == "zip":
            archive = ZipfileHandler(self.archive_file, "w")
        else:
            archive = TarfileHandler(self.archive_file, "w:gz")
        archive.create_new_from_dir(app_dir_path)
        archive.close()
        return SUCCESS_RC
Beispiel #24
0
    def __init__(self, args, options='', timestamp=True):
        # parse default and custom cli options
        for opt in options:
            args.add_argument(*opt.flags, default=None, type=opt.type)
        args = args.parse_args()
        self._name = None

        if hasattr(args, 'device'):
            if args.device:
                os.environ["CUDA_VISIBLE_DEVICES"] = args.device

        self.cfg_fname = None
        if hasattr(args, 'resume'):
            if args.resume:
                self.resume = Path(args.resume)
                if hasattr(args, 'config') and args.config is not None:
                    self.cfg_fname = Path(args.config)
                else:
                    self.cfg_fname = self.resume.parent / 'config.json'

        if self.cfg_fname is None:
            if hasattr(args, 'config'):
                msg_no_cfg = "Configuration file need to be specified. Add '-c config.json', for example."
                assert args.config is not None, msg_no_cfg
                self.resume = None
                self.cfg_fname = Path(args.config)

        if hasattr(args, 'name'):
            if args.name:
                self._name = str(args.name)

        # load config file and apply custom cli options
        config = read_json(self.cfg_fname)
        self._config = _update_config(config, options, args)

        # set save_dir where trained model and log will be saved.
        save_dir = Path(self.config['trainer']['save_dir'])
        timestamp = datetime.now().strftime(
            r'%d%m%y_%H%M%S') if timestamp else ''

        exper_name = self.config['name']
        self._save_dir = save_dir / 'models' / exper_name / timestamp
        self._log_dir = save_dir / 'log' / exper_name / timestamp
        self._temp_dir = save_dir / 'temp' / exper_name / timestamp

        self.save_dir.mkdir(parents=True, exist_ok=True)
        self.log_dir.mkdir(parents=True, exist_ok=True)
        self.temp_dir.mkdir(parents=True, exist_ok=True)

        # save updated config file to the checkpoint dir
        write_json(self.config, self.save_dir / 'config.json')

        # configure logging module
        setup_logging(self.log_dir)
        self.log_levels = {
            0: logging.WARNING,
            1: logging.INFO,
            2: logging.DEBUG
        }
Beispiel #25
0
    def __init__(self, config, resume=None, modification=None, run_id=None):
        """
        class to parse configuration json file. Handles hyperparameters for training, initializations of modules, checkpoint saving
        and logging module.
        :param config: Dict containing configurations, hyperparameters for training. contents of `config.json` file for example.
        :param resume: String, path to the checkpoint being loaded.
        :param modification: Dict keychain:value, specifying position values to be replaced from config dict.
        :param run_id: Unique Identifier for training processes. Used to save checkpoints and training log. Timestamp is being used as default
        """
        # load config file and apply modification
        self._config = _update_config(config, modification)
        self.resume = resume

        # set save_dir where trained model and log will be saved.
        save_dir = Path(self.config['trainer']['save_dir'])

        exper_name = self.config['name']
        if run_id is None:  # use timestamp as default run-id
            run_id = datetime.now().strftime(r'%m%d_%H%M%S')
        else:
            run_id = datetime.now().strftime(r'%m%d_%H%M%S') + '_' + run_id
        print("save dir name : {}".format(run_id))

        # ZIP code [ default choice ]
        from utils.ZIPCODE import ZIPCODE
        import time
        import os
        target_path = './saved/code/'
        if not os.path.isdir(target_path):
            os.mkdir(target_path)
        target_name = run_id + '.zip'
        source_path = './'
        except_dir = ['saved', '__pycache__', '.ipynb_checkpoints', '.git']
        except_file = ['pth']
        ZIPCODE(target_path, target_name, source_path, except_dir, except_file)

        self._save_dir = save_dir / 'models' / exper_name / run_id
        self._log_dir = save_dir / 'log' / exper_name / run_id
        self._obj_dir = save_dir / 'obj' / exper_name / run_id

        # Save 3D obj
        # path = os.path.join('./saved','obj',exper_name,run_id)

        # make directory for saving checkpoints and log.
        exist_ok = run_id == ''
        self.save_dir.mkdir(parents=True, exist_ok=exist_ok)
        self.log_dir.mkdir(parents=True, exist_ok=exist_ok)
        self.obj_dir.mkdir(parents=True, exist_ok=exist_ok)

        # save updated config file to the checkpoint dir
        write_json(self.config, self.save_dir / 'config.json')

        # configure logging module
        setup_logging(self.log_dir)
        self.log_levels = {
            0: logging.WARNING,
            1: logging.INFO,
            2: logging.DEBUG
        }
def _create_extract(dataset_id):
    '''Given the unique identifier for a TweetSets dataset, submits a task to generate the extracts (ID\'s, JSON, CSV) for the whole dataset and saves to local storage.'''
    def get_file_limits():
        '''Returns the limits for each type of extract, as set in environment variables, or uses the default.'''
        return (app.config['MAX_PER_TXT_FILE'],
                app.config['MAX_PER_JSON_FILE'],
                app.config['MAX_PER_CSV_FILE'])

    def construct_path(dataset_id):
        '''Constructs the path to save the extract, given its unique ID, creating the directory if necessary.'''
        path = _dataset_path(dataset_id, full_dataset=True)
        if not os.path.exists(path):
            os.makedirs(path)
        return path

    def get_tweet_count(dataset_params):
        '''Retrieve total count of Tweets in dataset from ES index, using the dataset ID provided as a field in dataset_params.'''
        search = dataset_params_to_search(dataset_params, skip_aggs=True)
        search_response = search.execute()
        return search_response.hits.total.value

    # Dataset path
    dataset_path = construct_path(dataset_id)
    # Create the minimally necessary params for this extract
    dataset_params = {
        'tweet_type_original': 'true',
        'tweet_type_quote': 'true',
        'tweet_type_retweet': 'true',
        'tweet_type_reply': 'true'
    }
    dataset_params.update({
        'source_dataset':
        dataset_id,
        'dataset_name':
        'full-extract-of-{}'.format(dataset_id)
    })
    # Save to disk
    write_json(os.path.join(dataset_path, 'dataset_params.json'),
               dataset_params)
    # Create the task definitions
    keys = ['tweet_ids', 'tweet_json', 'tweet_csv']
    # File limits
    task_defs = {
        key: {
            'max_per_file': value
        }
        for key, value in zip(keys, get_file_limits())
    }
    # Email for notification
    task_defs['requester_email'] = app.config['ADMIN_EMAIL']
    # URL for extract
    task_defs['dataset_url'] = app.config['HOST']
    # Total tweets in index
    total_tweets = get_tweet_count(dataset_params)
    task = _generate_tasks.delay(task_defs, dataset_params, total_tweets,
                                 dataset_path)
    generate_tasks_filepath = os.path.join(dataset_path, 'generate_tasks.json')
    write_json(generate_tasks_filepath, {'id': task.id})
    return
Beispiel #27
0
 def get_timetable(self, force_parse=False):
     json_file = self.tt_file.with_suffix(".json")
     if not json_file.exists() or force_parse:
         self._timetable = parse_files(self.tt_file, self.midsem_file)
         write_json(json_file, self._timetable)
     else:
         self._timetable = read_json(json_file)
     return self._timetable
Beispiel #28
0
 async def on_message_delete(self, message):
     id = str(message.author.id)
     if not message.author.bot and not user_info[id][
             'isAdmin'] and random.uniform(0, 1) < user_info[id]['luck']:
         await message.channel.send(
             f"{message.author}, did something suspicious")
         user_info[id]['lastmsg'] = message.content
         write_json('users.json', user_info)
Beispiel #29
0
def record_changed_files(pr_event, test_name):
    pr_processor = PrProcessor(pr_event)
    resp = pr_processor.setup()
    changed_files = pr_processor.get_changed_files()
    out_filename = Mockable.get_test_filename_from_test_name(
        test_name, CHANGED_FILES_FILENAME)
    write_json(changed_files, out_filename)
    print('Wrote changed files to %s' % out_filename)
Beispiel #30
0
def write_all_features(path):
    features = {}
    for config_type in config_file:
        features[config_type] = {}
        for feature_type in config_file[config_type].feature_type:
            features[config_type][feature_type] = list(config_file[config_type].feature_type[feature_type])

    write_json(features, f'{path}config_features.json')
Beispiel #31
0
def main():
    data = get_api_data(API_URL)
    articles = get_news_body(data.keys())

    for k, v in articles.items():
        data[k]['body'] = v

    write_json('api_news', data)
Beispiel #32
0
    def execute(self):
        """
        Loops through Trakt lists and adds each video to a json file with each collection, holiday, oscar status and
        tag appropriate for the video. This makes it easier to match Plex videos for updates.

        Example:
            {
              {
                "The Lord of the Rings: The Fellowship of the Ring": {
                  "collections": ["Middle Earth"],
                  "nominees": ["Oscars Best Picture"]
              }, ...
            }
        """
        movies = {}
        shows = {}

        lists = self.get_user_lists()
        groups = self.generate_grouped_lists(lists=lists)

        video_lists = groups["videos"]

        for list_id, list_name in video_lists.items():
            list_details = self.get_list(list_id=list_id)

            for video in list_details:
                video_type = video["type"]
                title = "{title} ({year})".format(
                    title=video[video_type]['title'],
                    year=video[video_type]['year'])

                for trakt_tag, tag_config in settings.TRAKT_TAGS.items():

                    if trakt_tag != "PEOPLE":

                        if list_name.startswith(trakt_tag):
                            short_list_name = list_name.replace(
                                "{trakt_tag} - ".format(trakt_tag=trakt_tag),
                                "")

                            if video_type == "movie":
                                movies[title] = self._add_to_collection_dict(
                                    video=movies,
                                    title=title,
                                    list_name=short_list_name,
                                    group=tag_config["group"])

                            elif video_type == "show":
                                shows[title] = self._add_to_collection_dict(
                                    video=shows,
                                    title=title,
                                    list_name=short_list_name,
                                    group=tag_config["group"])

                    # TODO add people process

        utils.write_json(group="movies", data=movies)
        utils.write_json(group="shows", data=shows)
def crawl_blog_posts_for_query_per_date(query, date, db_pool=None):

    def get_keys_from_page(query, date, pagenum):
        root = html.parse(listurl % (query, date, date, pagenum))
        items = root.xpath('//ul[@class="list_type_1 search_list"]')[0]

        blog_ids = items.xpath('./input[@name="blogId"]/@value')
        log_nos = items.xpath('./input[@name="logNo"]/@value')
        times = [utils.format_datetime(utils.parse_datetime(time))\
            for time in items.xpath('./li/div[@class="list_data"]/span[@class="date"]/text()')]

        return {(b, l): t for b, l, t in zip(blog_ids, log_nos, times)}

    if db_pool is None:
        # make directories
        subdir = '/'.join([DATADIR, query, date.split('-')[0]])
        utils.checkdir(subdir)
        if REMOTE:
            rsubdir = '/'.join([REMOTE['dir'], query, date.split('-')[0]])
            utils.rcheckdir(sftp, rsubdir)

    # check number of items
    try:
        nitems = get_nitems_for_query(query, date, date)
    except IndexError:
        print query, date, 'None'
        return

    # crawl items
    for pagenum in range(int(nitems/10.)):
        keys = get_keys_from_page(query, date, pagenum + 1)
        tags = get_tags_for_items(keys)
        for (blog_id, log_no), written_time in keys.items():
            try:
                info = crawl_blog_post(blog_id, log_no, tags, written_time, verbose=False)
                if db_pool is None:
                    localpath = '%s/%s.json' % (subdir, log_no)
                    utils.write_json(info, localpath)
                    if REMOTE:
                        remotepath = '%s/%s.json' % (rsubdir, log_no)
                        sftp.put(localpath, remotepath)
                else:
                    db_pool.insert_blog_to_db(info)
            except IndexError:
                print Exception(\
                    'Crawl failed for http://blog.naver.com/%s/%s' % (blog_id, log_no))

            time.sleep(SLEEP)

    overwrite_queries(query, date)
    print query, date, nitems
Beispiel #34
0
def write_routes(routes):
    route_list = list(routes.values())
    route_list.sort(key=lambda r: (r['date'], r['name']))
    data = {
        'routes': route_list,
    }
    write_json(os.path.join(exec_root, '..', 'preprocessor', 'input.json'), data, {
        'indent': '  ',
        'item_sort_key': lambda i: [
            'name', 'date', 'link', 'terrain', 'length', 'ascent',
            'difficulty', 'strenuousness', 'duration', 'water', 'food',
            'terrains', 'traces', 'routes', 'trailhead'].index(i[0]),
        'ensure_ascii': False,
    })
Beispiel #35
0
def process(sources, output, force):
    """Download sources and process the file to the output directory.

    \b
    SOURCES: Source JSON file or directory of files. Required.
    OUTPUT: Destination directory for generated data. Required.
    """
    for path in utils.get_files(sources):
        pathparts = utils.get_path_parts(path)
        pathparts[0] = output.strip(os.sep)
        pathparts[-1] = pathparts[-1].replace('.json', '.geojson')

        outdir = os.sep.join(pathparts[:-1])
        outfile = os.sep.join(pathparts)

        source = utils.read_json(path)
        urlfile = urlparse(source['url']).path.split('/')[-1]

        if not hasattr(adapters, source['filetype']):
            utils.error('Unknown filetype', source['filetype'], '\n')
            continue

        if os.path.isfile(outfile) and not force:
            utils.error('Skipping', path, 'since generated file exists.',
                        'Use --force to regenerate.', '\n')
            continue

        utils.info('Downloading', source['url'])

        try:
            fp = utils.download(source['url'])
        except IOError:
            utils.error('Failed to download', source['url'], '\n')
            continue

        utils.info('Reading', urlfile)

        try:
            geojson = getattr(adapters, source['filetype']).read(fp, source['properties'])
        except IOError:
            utils.error('Failed to read', urlfile)
            continue
        finally:
            os.remove(fp.name)

        utils.make_sure_path_exists(outdir)
        utils.write_json(outfile, geojson)

        utils.success('Done. Processed to', outfile, '\n')
Beispiel #36
0
def parse_page(assembly_id, bill_id, meta, directory):

    fn = '%s/%s.json' % (directory, bill_id)

    d = extract_specifics(assembly_id, bill_id, meta)
    d['proposers']      = extract_proposers(assembly_id, bill_id)
    d['summaries']      = extract_summaries(assembly_id, bill_id)
    d['withdrawers']    = extract_withdrawers(assembly_id, bill_id)
    d['proposed_date']  = include(meta, bill_id, 'proposed_date')
    d['decision_date']  = include(meta, bill_id, 'decision_date')
    d['link_id']        = include(meta, bill_id, 'link_id')
    d['proposer_type']  = include(meta, bill_id, 'proposer_type')
    d['status']         = "계류" if include(meta, bill_id, 'status')==1 else "처리"

    utils.write_json(d, fn)
Beispiel #37
0
def crawl_movie(movie_id):
    items = []
    for page_num in range(10):  # limit to 100 recent ratings per movie
        url = MOVIEURL % (movie_id, page_num + 1)
        page_items, npages = crawl_rating_page(url)
        items.extend(page_items)
        if len(items)==0:
            return []
        if page_num >= npages - 1:
            break
    if items:
        utils.write_json(items, '%s/%s.json' % (DATADIR, movie_id))
        return items
    else:
        return []
 def run(self):
     if self.django_settings_module:
         ds_mod = self.django_settings_module
         django_config = None
     else:
         config_file = os.path.join(self.app_dir_path, DJANGO_CFG_FILENAME)
         if not os.path.exists(config_file):
             raise FileMissingError("Missing configuration file %s" % config_file)
         with open(config_file, "rb") as fileobj:
             django_config = django_config_from_json(fileobj.read(), COMPATIBLE_PACKAGER_VERSION)
         ds_mod = django_config.django_settings_module
     results = validate_settings(self.app_dir_path, ds_mod,
                                 django_config)
     write_json(results.to_json(), self.results_file)
     results.print_final_status_message(logger)
     return results.get_return_code()    
Beispiel #39
0
def identify(fondz_dir):
    """
    Will run identification over the source bags, and write out the
    format report to the js/formats.json file in the fondz directory.
    """
    results = []
    src_dir = os.path.join(fondz_dir, "originals")
    for f in os.listdir(src_dir):
        data_dir = os.path.join(src_dir, f, 'data')
        if os.path.isdir(data_dir):
            results.extend(identify_dir(data_dir))

    for f in results:
        f['path'] = os.path.relpath(f['path'], fondz_dir)

    formats_file = os.path.join(fondz_dir, "js", "formats.json")
    write_json(results, formats_file)

    return results
Beispiel #40
0
def main(filename):
    """
    Normalize each entry found in csv filename file into standard JSON.
    Write to file result.out
    """
    abspath = os.path.abspath(filename)
    with open(abspath) as infile:
        entries = []
        errors = []
        line_count = 0
        for line in infile:
            try:
                entries.append(produce_entry(line))
            except DataValueError as e:
                logging.error(e.err_type+"\t"+e.value)
                errors.append(line_count)
            finally:
                line_count += 1
    entries.sort(key=lambda x: (x['lastname'], x['firstname']))
    result = {'entries': entries, 'errors': errors}
    write_json(result, 'result.out')
    return entries
def run(options):
    # can limit it to one chamber
    chamber = options.get("chamber", None)
    if chamber and (chamber in ("house", "senate")):
        chambers = chamber
    else:
        chambers = ("house", "senate")

    load_by = options.get("load_by", None)

    # Load the committee metadata from the congress-legislators repository and make a
    # mapping from thomas_id and house_id to the committee dict. For each committee,
    # replace the subcommittees list with a dict from thomas_id to the subcommittee.
    utils.require_congress_legislators_repo()
    committees = {}
    for c in utils.yaml_load("congress-legislators/committees-current.yaml"):
        committees[c["thomas_id"]] = c
        if "house_committee_id" in c:
            committees[c["house_committee_id"] + "00"] = c
        c["subcommittees"] = dict((s["thomas_id"], s) for s in c.get("subcommittees", []))

    if "senate" in chambers:
        print "Fetching Senate meetings..."
        meetings = fetch_senate_committee_meetings(committees, options)
        print "Writing Senate meeting data to disk."
        utils.write_json(meetings, output_for("senate"))

    if "house" in chambers:
        if load_by == None:
            print "Fetching House meetings..."
            meetings = fetch_house_committee_meetings(committees, options)
        else:
            print "Fetching House meetings by event_id..."
            meetings = fetch_meeting_from_event_id(committees, options, load_by)

        print "Writing House meeting data to disk."
        utils.write_json(meetings, output_for("house"))
    def _validate_and_prepare_dir_old(self, app_dir_path, undo_ops):
        """Called by the subclass run() method after setting up the directory.
        """
        results = validate_settings(app_dir_path, self.django_settings_module)
        if results.get_return_code() != SUCCESS_RC:
            results.print_final_status_message(logger)
            return results.get_return_code()
        config = django_config_from_validation_results(self.django_settings_module,
                                                       VERSION, results)
        write_json(config.to_json(), os.path.join(app_dir_path, DJANGO_CFG_FILENAME))

        # undo the changes we made
        for (op_fun, args) in undo_ops:
            op_fun(*args)
        # delete all the .pyc files
        find_files(app_dir_path, "\.pyc$", os.remove)
        # (re)create the archive
        if self.archive_type == "zip":
            archive = ZipfileHandler(self.archive_file, "w")
        else:
            archive = TarfileHandler(self.archive_file, "w:gz")
        archive.create_new_from_dir(app_dir_path)
        archive.close()
        return SUCCESS_RC
Beispiel #43
0
def export_place(opts, place, count):

    exportdir = os.path.join(opts.outdir, place)

    if not os.path.exists(exportdir):
        os.makedirs(exportdir)
        
    logging.info("export %s (%s records)" % (place, count))

    dump = "%s.txt" % place
    path_dump = os.path.join(exportdir, dump)

    fh = open(path_dump, 'w')

    missing = "%s-nogeo.csv" % place
    path_missing = os.path.join(exportdir, missing)

    writer = csv.writer(open(path_missing, 'w'))
    writer.writerow(('woeid', 'name', 'iso'))


    point_features = {}
    poly_features = {}

    solr = pysolr.Solr(opts.solr)
    query = 'placetype:%s' % place

    start = 0
    rows = 10000

    while start <= count:

        args = {
            'q': query,
            'fq': '-woeid_superseded_by:*',
            'rows': rows,
            'start': start
            }

        rsp = solr.search(**args)

        for doc in rsp.docs:

            io = StringIO.StringIO()
            utils.write_json(doc, io)

            io.seek(0)
            fh.write(io.read() + "\n")

            woeid = doc['woeid']
            parent = doc.get('woeid_parent', -1)

            name = doc['name'].encode('utf8')
            iso = doc.get('iso', 'ZZ')	# mainly aerotrpolii - needs to be fixed (20130317/straup)

            centroid = doc.get('centroid', None)

            if not centroid:
                writer.writerow((woeid, name, iso))
                continue

            props = {
                'name': name,
                'woeid': woeid,
                'parent': parent,
                }

            # Make compatible props for https://github.com/mattb/flickrgeocoder-java
            # at least until I can patch it to generate GeoJSON... (20130319/straup)

            if opts.compat:
                props['place_type'] = place
                props['woe_id'] = woeid
                props['label'] = name
                # place_id
                # place_ty_1 (place_type_id)

            lat,lon = map(float, centroid.split(','))

            point = {
                'type': 'Feature',
                'properties': props,
                'geometry': { 'type': 'Point', 'coordinates': [ lon, lat ] },
                'id': woeid
                }

            if point_features.get(iso, False):
                point_features[iso].append(point)
            else:
                point_features[iso] = [ point ]

            geometry = None

            if doc.get('geometry_default', False):
                geometry = json.loads(doc['geometry_default'])

            elif doc.get('sw_corner', False):

                swlat,swlon = map(float, doc['sw_corner'].split(','))  
                nelat,nelon = map(float, doc['ne_corner'].split(','))  

                coords = [[
                        [swlon, swlat],
                        [swlon, nelat],
                        [nelon, nelat],
                        [nelon, swlat],
                        [swlon, swlat],
                        ]]

                geometry = {
                    'type': 'Polygon',
                    'coordinates': coords
                    }

            else:
                pass

            if not geometry:
                continue

            poly = {
                'type': 'Feature',
                'properties': props,
                'geometry': geometry,
                'id': woeid
                }
            
            if poly_features.get(iso, False):
                poly_features[iso].append(poly)
            else:
                poly_features[iso] = [ poly ]
          
        start += rows
    
    # Write points (by country)

    for iso, features in point_features.items():

        geojson = {
            'type': 'FeatureCollection',
            'features': features
            }

        fname = "%s-%s-centroid.json" % (place, iso)
        path = os.path.join(exportdir, fname)

        fh = open(path, 'w')

        logging.info("write %s" % fname)
        utils.write_json(geojson, fh)

    # Write all points

    all_points = []

    for ignore, features in point_features.items():
        all_points.extend(features)
        
    geojson = {
        'type': 'FeatureCollection',
        'features': all_points
        }

    fname = "%s-centroid.json" % (place)
    path = os.path.join(exportdir, fname)

    fh = open(path, 'w')

    logging.info("write %s" % fname)
    utils.write_json(geojson, fh)

    # Write polys by country

    for iso, features in poly_features.items():

        geojson = {
            'type': 'FeatureCollection',
            'features': features
            }

        fname = "%s-%s-poly.json" % (place, iso)
        path = os.path.join(exportdir, fname)

        fh = open(path, 'w')

        logging.info("write %s" % fname)
        utils.write_json(geojson, fh)

    # Write all polys

    all_polys = []

    for ignore, features in poly_features.items():
        all_polys.extend(features)
        
    geojson = {
        'type': 'FeatureCollection',
        'features': all_polys
        }

    fname = "%s-poly.json" % (place)
    path = os.path.join(exportdir, fname)

    fh = open(path, 'w')

    logging.info("write %s" % fname)
    utils.write_json(geojson, fh)
Beispiel #44
0
if __name__ == "__main__":

    # Nome do arquivo json que queremos fazer as operações
    json_filename = '../database_new.json'

    # Nome do arquivo com as urls novas que queremos adicionar no json
    input_filename = 'music_urls.txt'

    # Leitura do arquivo json. Caso não exista, retorna uma lista vazia
    database_array = read_json(json_filename)

    # Leitura do arquivo de urls
    urls_array = read_urls_file(input_filename) # Aqui colocamos a entrada desejada

    # Leitura das urls via input do usuário
    # urls_array = read_urls_input()

    # Chama a função para crawl das letras retornando uma lista de dicionários
    lyrics_array = crawl_lyrics(urls_array)
    
    # Adicionamos a nova lista ao final da lista de músicas que estavam
    # no banco de dados
    database_array.extend(classify_lyrics(lyrics_array))

    # Escrevemos no arquivo json
    write_json(json_filename, database_array)

    # Exibimos o total de musicas por sentimento no json
    sentiment_count(json_filename)

Beispiel #45
0
 def save(self):
     data = [{'name': self._name, 'avatar_name': self._avatar_name, 'colour': self._colour}]
     path = os.path.join("players", "%s.json" % self._name)
     u.write_json(path, data, sort_keys=True)
Beispiel #46
0
                geom = f['geometry']
                geom = shapely.geometry.asShape(geom)

                f['bbox'] = geom.bounds

            geojson = {
                'type': 'FeatureCollection',
                'features': [ f ]
            }

            woeid = f['id']

            tree = utils.woeid2path(woeid)
            fname = "%s.json" % woeid

            root = os.path.join(opts.outdir, tree)
            path = os.path.join(root, fname)

            if not os.path.exists(root):
                logging.info("create %s" % root)
                os.makedirs(root)

            logging.info("write %s" % path)
        
            out = open(path, 'w')
            utils.write_json(geojson, out)

    logging.debug("done")
    sys.exit()
        woeid = tmp[iso]
        ports = countries[woeid]

        logging.info("%s (%s) : %s airports" % (iso, woeid, len(ports)))
        writer.writerow((iso, woeid, len(ports)))

    csv_fh.close()

    # Now generate files for each country

    for woeid, features in countries.items():

        iso = iso_codes[woeid]

        collection = {
            'type': 'FeatureCollection',
            'features': features,
            'properties': {
                'woe:country': woeid,
                'iso': iso
                }
            }

        co_path = os.path.join(metadir, "%s.json" % iso)
        logging.info("write %s" % co_path)

        co_fh = open(co_path, 'w')
        utils.write_json(collection, co_fh)
        co_fh.close()
Beispiel #48
0
  cmd.append('') # we can leave root path empty as our file list contains absolute pathes
  cmd.append(list_file)
  cmd.append(out_dir)
  utils.run_command(cmd)


if __name__ == '__main__':
  print('\nConverting labels into SSD format...')
  convert_labels()

  print('\nMaking image list files...')
  make_train_test_file_lists()

  print('\nMaking training lmdb...')
  make_lmdb(TRAIN_FILE_LIST, TRAIN_LMDB)

  print('\nMaking testing lmdb...')
  make_lmdb(TEST_FILE_LIST, TEST_LMDB)

  # Save some info about prepared data to resue it in training script.
  utils.write_json('info.json', {
    'num_classes': len(LABEL_MAP),
    'img_width': TARGET_IMG_W,
    'img_height': TARGET_IMG_H,
    'train_img_count': TRAIN_IMG_COUNT,
    'test_img_count': TEST_IMG_COUNT
  })

  print('\nOK')
  print('Use "train" command key to start training session')
            props[k] = v

        p = f['geometry']
        p = shapely.geometry.asShape(p)
        bbox = p.bounds

        f['type'] = 'Feature'
        f['properties'] = props
        f['bbox'] = bbox
        f['id'] = props['woe_id']

        geojson = {
            'type': 'FeatureCollection',
            'features': [ f ]
            }

        tree = utils.woeid2path(f['id'])
        fname = "%s.json" % f['id']

        root = os.path.join(jsondir, tree)
        path = os.path.join(root, fname)

        if not os.path.exists(root):
            os.makedirs(root)
            
        fh = open(path, 'w')
        indent = None

        utils.write_json(geojson, fh, indent)
        print path
Beispiel #50
0
#     census_2000_energy = {}
#     for row in reader:
#         census_2000_energy[row[2]] = \
#             [float(row[3]), float(row[6]), float(row[4])]

# # test to see how much overlap in census blocks and print out a
# # message about matching between spreadsheet block IDs and census
# # block IDs
# spreadsheet_blocks = set(census_2000_energy.keys())
# chicago_blocks = set(CENSUS_BLOCK_2000.keys())
# print >> sys.stderr, "Unknown blocks in spreadsheet: %s" % \
#     spreadsheet_blocks.difference(chicago_blocks)
# print >> sys.stderr, "# of Missing blocks in spreadsheet: %s" % \
#     len(chicago_blocks.difference(spreadsheet_blocks))

# --------------------------------------------- maybe read in census information


# --------------------------------------- aggregate metrics and write JSON files
for zoom_level in [
        NEIGHBORHOOD,
    ]:
    aggregate_dict = aggregate_metrics(
        zoom_level,
        CENSUS_BLOCK_2010,
        CENSUS_BLOCK_2010_INDEX,
        # census_2000_energy,
    )
    write_json(aggregate_dict, None)

Beispiel #51
0
                    failures.append(path)
                    continue
                finally:
                    os.remove(fp.name)
                if(len(geojson['features'])) == 0:
                    utils.error("Result contained no features for " + path)
                    continue
                excluded_keys = ['filetype', 'url', 'properties', 'filter', 'filenameInZip']
                properties = {k:v for k,v in source.iteritems() if k not in excluded_keys}
                properties['source_url'] = source['url']
                properties['feature_count'] = len(geojson['features'])
                
                geojson['properties'] = properties
    
                utils.make_sure_path_exists(outdir)
                utils.write_json(outfile, geojson)

                utils.info("Generating label points")
                label_geojson = geoutils.get_label_points(geojson)
                label_pathparts = list(pathparts)
                label_pathparts[-1] = label_pathparts[-1].replace('.geojson', '.labels.geojson')
                label_path = os.sep.join(label_pathparts)
                utils.write_json(label_path, label_geojson)

                utils.success('Done. Processed to', outfile, '\n')
    
            properties['path'] = "/".join(pathparts[path_parts_to_skip:])
            catalog_entry = {
                'type': 'Feature',
                'properties': properties,
                'geometry': geoutils.get_union(geojson)
Beispiel #52
0
 def cleanup():
     with open(op.join(path, "maskedfreqs.json"), mode="w") as f:
         utils.write_json(maskedfreqs, f)
Beispiel #53
0
from utils import write_json, chk_dir
from ..utils.counter import get_words

def make_dictionary(documents, stoplist, dictfile, jsonfile, option='batch'):

    if option=='batch':
        texts = [get_words(document, stoplist) for document in documents]
        tokens = [k for k, v in Counter(sum(texts, [])).items() if v>1]
        texts = [[word for word in text if word in tokens] for text in texts]
        dictionary = corpora.Dictionary(texts)

    elif option=='online':
        words = [[word for word in regex.findall(ur'[\p{Hangul}|\p{Latin}|\p{Han}]+', doc.lower()) if len(word)>1] for doc in documents]
        dictionary = corpora.Dictionary(words)

        stop_ids = [dictionary.token2id[stopword] for stopword in stoplist
                if stopword in dictionary.token2id]
        once_ids = [tokenid for tokenid, docfreq in dictionary.dfs.iteritems()\
                if docfreq==1]

        dictionary.filter_tokens(stop_ids + once_ids)
        dictionary.compactify()

    else:
        dictionary = None

    dictionary.save(dictfile)
    write_json(dictionary.token2id, jsonfile)

    return dictionary
Beispiel #54
0
	def start(self):
		self.status.set("Estado: -")
		self.leng.set("Iteración: -/- y Número: -/-")
		self.totaltime.set("Tiempo total: -")
		self.ones.set("Total de unos: -")
		self.types.set("Progreso: -")
		self.startButton.config(state = 'disabled')
		self.browseButton.config(state = 'disabled')
		self.cancelButton.config(state = 'normal')
		self.maxnumberSpinbox.config(state = 'disabled')
		self.complexSpinbox.config(state = 'disabled')
		if int(self.complexSpinbox.get()) in (1,2,3,4,5) and int(self.maxnumberSpinbox.get()) in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21) and self.completeName != "":
			start_time = time.time()
			if self.name.get().split('.')[1] == 'csv':
				self.g = generator.Generator(self.maxnumberSpinbox.get(), self.complexSpinbox.get(), utils.read_csv(self.completeName), self.cancelButton, self.types)
			else:
				self.g = generator.Generator(self.maxnumberSpinbox.get(), self.complexSpinbox.get(), utils.read_json(self.completeName), self.cancelButton, self.types)
			self.g.count_one()
			self.ones.set("Total de unos: {0}".format(len(self.g.table_uno)))
			i = 0
			while self.g.maxim > 1:
				i += 1
				self.leng.set("Iteración: {0}/{1} y Número: {2}".format(i, self.complexSpinbox.get(), self.g.maxim))
				self.status.set("Estado: Generando puzzle...")
				self.g.step_one()
				tim = utils.sec_to(int(time.time() - start_time))
				self.totaltime.set("Tiempo total: {0}h:{1}m:{2}s".format(tim[0], tim[1], tim[2]))
				self.status.set("Estado: Aplicando condición uno...")
				self.g.cond_dos(1)
				tim = utils.sec_to(int(time.time() - start_time))
				self.totaltime.set("Tiempo total: {0}h:{1}m:{2}s".format(tim[0], tim[1], tim[2]))
				self.status.set("Estado: Aplicando condición dos...")
				self.g.cond_dos(2)
				tim = utils.sec_to(int(time.time() - start_time))
				self.totaltime.set("Tiempo total: {0}h:{1}m:{2}s".format(tim[0], tim[1], tim[2]))
				
				if self.g.maxim >= 4:
					self.status.set("Estado: Aplicando condición tres...")
					self.g.cond_dos(3)
					tim = utils.sec_to(int(time.time() - start_time))
					self.totaltime.set("Tiempo total: {0}h:{1}m:{2}s".format(tim[0], tim[1], tim[2]))
				
				self.g.count_one()
				self.ones.set("Total de unos: {0}".format(len(self.g.table_uno)))
				if i == self.g.iters:
					self.g.maxim -= 1
					i = 0
			if self.name.get().split('.')[1] == 'csv':
				utils.write_csv(self.g.table_all)
			else:
				utils.write_json(self.g.table_all)

			if self.g.cancel:
				self.status.set("Estado: Cancelado")
			else:
				self.status.set("Estado: Completado")
			self.g = None
		self.startButton.config(state = 'normal')
		self.browseButton.config(state = 'normal')
		self.cancelButton.config(state = 'disabled')
		self.maxnumberSpinbox.config(state = 'normal')
		self.complexSpinbox.config(state = 'normal')